1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2019 Joyent, Inc. 25 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 26 * Copyright 2022 MNX Cloud, Inc. 27 * Copyright 2022 Oxide Computer Company 28 */ 29 30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 31 /* All Rights Reserved */ 32 33 #include <sys/types.h> 34 #include <sys/t_lock.h> 35 #include <sys/param.h> 36 #include <sys/cmn_err.h> 37 #include <sys/cred.h> 38 #include <sys/priv.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/inline.h> 42 #include <sys/kmem.h> 43 #include <sys/mman.h> 44 #include <sys/proc.h> 45 #include <sys/brand.h> 46 #include <sys/sobject.h> 47 #include <sys/sysmacros.h> 48 #include <sys/systm.h> 49 #include <sys/uio.h> 50 #include <sys/var.h> 51 #include <sys/vfs.h> 52 #include <sys/vnode.h> 53 #include <sys/session.h> 54 #include <sys/pcb.h> 55 #include <sys/signal.h> 56 #include <sys/user.h> 57 #include <sys/disp.h> 58 #include <sys/class.h> 59 #include <sys/ts.h> 60 #include <sys/bitmap.h> 61 #include <sys/poll.h> 62 #include <sys/shm_impl.h> 63 #include <sys/fault.h> 64 #include <sys/syscall.h> 65 #include <sys/procfs.h> 66 #include <sys/processor.h> 67 #include <sys/cpuvar.h> 68 #include <sys/copyops.h> 69 #include <sys/time.h> 70 #include <sys/msacct.h> 71 #include <sys/flock_impl.h> 72 #include <sys/stropts.h> 73 #include <sys/strsubr.h> 74 #include <sys/pathname.h> 75 #include <sys/mode.h> 76 #include <sys/socketvar.h> 77 #include <sys/autoconf.h> 78 #include <sys/dtrace.h> 79 #include <sys/timod.h> 80 #include <sys/fs/namenode.h> 81 #include <netinet/udp.h> 82 #include <netinet/tcp.h> 83 #include <inet/cc.h> 84 #include <vm/as.h> 85 #include <vm/rm.h> 86 #include <vm/seg.h> 87 #include <vm/seg_vn.h> 88 #include <vm/seg_dev.h> 89 #include <vm/seg_spt.h> 90 #include <vm/page.h> 91 #include <sys/vmparam.h> 92 #include <sys/swap.h> 93 #include <fs/proc/prdata.h> 94 #include <sys/task.h> 95 #include <sys/project.h> 96 #include <sys/contract_impl.h> 97 #include <sys/contract/process.h> 98 #include <sys/contract/process_impl.h> 99 #include <sys/schedctl.h> 100 #include <sys/pool.h> 101 #include <sys/zone.h> 102 #include <sys/atomic.h> 103 #include <sys/sdt.h> 104 105 #define MAX_ITERS_SPIN 5 106 107 typedef struct prpagev { 108 uint_t *pg_protv; /* vector of page permissions */ 109 char *pg_incore; /* vector of incore flags */ 110 size_t pg_npages; /* number of pages in protv and incore */ 111 ulong_t pg_pnbase; /* pn within segment of first protv element */ 112 } prpagev_t; 113 114 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */ 115 116 extern struct seg_ops segdev_ops; /* needs a header file */ 117 extern struct seg_ops segspt_shmops; /* needs a header file */ 118 119 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t); 120 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t); 121 122 /* 123 * Choose an lwp from the complete set of lwps for the process. 124 * This is called for any operation applied to the process 125 * file descriptor that requires an lwp to operate upon. 126 * 127 * Returns a pointer to the thread for the selected LWP, 128 * and with the dispatcher lock held for the thread. 129 * 130 * The algorithm for choosing an lwp is critical for /proc semantics; 131 * don't touch this code unless you know all of the implications. 132 */ 133 kthread_t * 134 prchoose(proc_t *p) 135 { 136 kthread_t *t; 137 kthread_t *t_onproc = NULL; /* running on processor */ 138 kthread_t *t_run = NULL; /* runnable, on disp queue */ 139 kthread_t *t_sleep = NULL; /* sleeping */ 140 kthread_t *t_hold = NULL; /* sleeping, performing hold */ 141 kthread_t *t_susp = NULL; /* suspended stop */ 142 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */ 143 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */ 144 kthread_t *t_req = NULL; /* requested stop */ 145 kthread_t *t_istop = NULL; /* event-of-interest stop */ 146 kthread_t *t_dtrace = NULL; /* DTrace stop */ 147 148 ASSERT(MUTEX_HELD(&p->p_lock)); 149 150 /* 151 * If the agent lwp exists, it takes precedence over all others. 152 */ 153 if ((t = p->p_agenttp) != NULL) { 154 thread_lock(t); 155 return (t); 156 } 157 158 if ((t = p->p_tlist) == NULL) /* start at the head of the list */ 159 return (t); 160 do { /* for eacn lwp in the process */ 161 if (VSTOPPED(t)) { /* virtually stopped */ 162 if (t_req == NULL) 163 t_req = t; 164 continue; 165 } 166 167 /* If this is a process kernel thread, ignore it. */ 168 if ((t->t_proc_flag & TP_KTHREAD) != 0) { 169 continue; 170 } 171 172 thread_lock(t); /* make sure thread is in good state */ 173 switch (t->t_state) { 174 default: 175 panic("prchoose: bad thread state %d, thread 0x%p", 176 t->t_state, (void *)t); 177 /*NOTREACHED*/ 178 case TS_SLEEP: 179 /* this is filthy */ 180 if (t->t_wchan == (caddr_t)&p->p_holdlwps && 181 t->t_wchan0 == NULL) { 182 if (t_hold == NULL) 183 t_hold = t; 184 } else { 185 if (t_sleep == NULL) 186 t_sleep = t; 187 } 188 break; 189 case TS_RUN: 190 case TS_WAIT: 191 if (t_run == NULL) 192 t_run = t; 193 break; 194 case TS_ONPROC: 195 if (t_onproc == NULL) 196 t_onproc = t; 197 break; 198 case TS_ZOMB: /* last possible choice */ 199 break; 200 case TS_STOPPED: 201 switch (t->t_whystop) { 202 case PR_SUSPENDED: 203 if (t_susp == NULL) 204 t_susp = t; 205 break; 206 case PR_JOBCONTROL: 207 if (t->t_proc_flag & TP_PRSTOP) { 208 if (t_jdstop == NULL) 209 t_jdstop = t; 210 } else { 211 if (t_jstop == NULL) 212 t_jstop = t; 213 } 214 break; 215 case PR_REQUESTED: 216 if (t->t_dtrace_stop && t_dtrace == NULL) 217 t_dtrace = t; 218 else if (t_req == NULL) 219 t_req = t; 220 break; 221 case PR_SYSENTRY: 222 case PR_SYSEXIT: 223 case PR_SIGNALLED: 224 case PR_FAULTED: 225 /* 226 * Make an lwp calling exit() be the 227 * last lwp seen in the process. 228 */ 229 if (t_istop == NULL || 230 (t_istop->t_whystop == PR_SYSENTRY && 231 t_istop->t_whatstop == SYS_exit)) 232 t_istop = t; 233 break; 234 case PR_CHECKPOINT: /* can't happen? */ 235 break; 236 default: 237 panic("prchoose: bad t_whystop %d, thread 0x%p", 238 t->t_whystop, (void *)t); 239 /*NOTREACHED*/ 240 } 241 break; 242 } 243 thread_unlock(t); 244 } while ((t = t->t_forw) != p->p_tlist); 245 246 if (t_onproc) 247 t = t_onproc; 248 else if (t_run) 249 t = t_run; 250 else if (t_sleep) 251 t = t_sleep; 252 else if (t_jstop) 253 t = t_jstop; 254 else if (t_jdstop) 255 t = t_jdstop; 256 else if (t_istop) 257 t = t_istop; 258 else if (t_dtrace) 259 t = t_dtrace; 260 else if (t_req) 261 t = t_req; 262 else if (t_hold) 263 t = t_hold; 264 else if (t_susp) 265 t = t_susp; 266 else /* TS_ZOMB */ 267 t = p->p_tlist; 268 269 if (t != NULL) 270 thread_lock(t); 271 return (t); 272 } 273 274 /* 275 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop. 276 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI 277 * on the /proc file descriptor. Called from stop() when a traced 278 * process stops on an event of interest. Also called from exit() 279 * and prinvalidate() to indicate POLLHUP and POLLERR respectively. 280 */ 281 void 282 prnotify(struct vnode *vp) 283 { 284 prcommon_t *pcp = VTOP(vp)->pr_common; 285 286 mutex_enter(&pcp->prc_mutex); 287 cv_broadcast(&pcp->prc_wait); 288 mutex_exit(&pcp->prc_mutex); 289 if (pcp->prc_flags & PRC_POLL) { 290 /* 291 * We call pollwakeup() with POLLHUP to ensure that 292 * the pollers are awakened even if they are polling 293 * for nothing (i.e., waiting for the process to exit). 294 * This enables the use of the PRC_POLL flag for optimization 295 * (we can turn off PRC_POLL only if we know no pollers remain). 296 */ 297 pcp->prc_flags &= ~PRC_POLL; 298 pollwakeup(&pcp->prc_pollhead, POLLHUP); 299 } 300 } 301 302 /* called immediately below, in prfree() */ 303 static void 304 prfreenotify(vnode_t *vp) 305 { 306 prnode_t *pnp; 307 prcommon_t *pcp; 308 309 while (vp != NULL) { 310 pnp = VTOP(vp); 311 pcp = pnp->pr_common; 312 ASSERT(pcp->prc_thread == NULL); 313 pcp->prc_proc = NULL; 314 /* 315 * We can't call prnotify() here because we are holding 316 * pidlock. We assert that there is no need to. 317 */ 318 mutex_enter(&pcp->prc_mutex); 319 cv_broadcast(&pcp->prc_wait); 320 mutex_exit(&pcp->prc_mutex); 321 ASSERT(!(pcp->prc_flags & PRC_POLL)); 322 323 vp = pnp->pr_next; 324 pnp->pr_next = NULL; 325 } 326 } 327 328 /* 329 * Called from a hook in freeproc() when a traced process is removed 330 * from the process table. The proc-table pointers of all associated 331 * /proc vnodes are cleared to indicate that the process has gone away. 332 */ 333 void 334 prfree(proc_t *p) 335 { 336 uint_t slot = p->p_slot; 337 338 ASSERT(MUTEX_HELD(&pidlock)); 339 340 /* 341 * Block the process against /proc so it can be freed. 342 * It cannot be freed while locked by some controlling process. 343 * Lock ordering: 344 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex 345 */ 346 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */ 347 mutex_enter(&p->p_lock); 348 while (p->p_proc_flag & P_PR_LOCK) { 349 mutex_exit(&pr_pidlock); 350 cv_wait(&pr_pid_cv[slot], &p->p_lock); 351 mutex_exit(&p->p_lock); 352 mutex_enter(&pr_pidlock); 353 mutex_enter(&p->p_lock); 354 } 355 356 ASSERT(p->p_tlist == NULL); 357 358 prfreenotify(p->p_plist); 359 p->p_plist = NULL; 360 361 prfreenotify(p->p_trace); 362 p->p_trace = NULL; 363 364 /* 365 * We broadcast to wake up everyone waiting for this process. 366 * No one can reach this process from this point on. 367 */ 368 cv_broadcast(&pr_pid_cv[slot]); 369 370 mutex_exit(&p->p_lock); 371 mutex_exit(&pr_pidlock); 372 } 373 374 /* 375 * Called from a hook in exit() when a traced process is becoming a zombie. 376 */ 377 void 378 prexit(proc_t *p) 379 { 380 ASSERT(MUTEX_HELD(&p->p_lock)); 381 382 if (pr_watch_active(p)) { 383 pr_free_watchpoints(p); 384 watch_disable(curthread); 385 } 386 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */ 387 if (p->p_trace) { 388 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY; 389 prnotify(p->p_trace); 390 } 391 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */ 392 } 393 394 /* 395 * Called when a thread calls lwp_exit(). 396 */ 397 void 398 prlwpexit(kthread_t *t) 399 { 400 vnode_t *vp; 401 prnode_t *pnp; 402 prcommon_t *pcp; 403 proc_t *p = ttoproc(t); 404 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry; 405 406 ASSERT(t == curthread); 407 ASSERT(MUTEX_HELD(&p->p_lock)); 408 409 /* 410 * The process must be blocked against /proc to do this safely. 411 * The lwp must not disappear while the process is marked P_PR_LOCK. 412 * It is the caller's responsibility to have called prbarrier(p). 413 */ 414 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 415 416 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 417 pnp = VTOP(vp); 418 pcp = pnp->pr_common; 419 if (pcp->prc_thread == t) { 420 pcp->prc_thread = NULL; 421 pcp->prc_flags |= PRC_DESTROY; 422 } 423 } 424 425 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) { 426 pnp = VTOP(vp); 427 pcp = pnp->pr_common; 428 pcp->prc_thread = NULL; 429 pcp->prc_flags |= PRC_DESTROY; 430 prnotify(vp); 431 } 432 433 if (p->p_trace) 434 prnotify(p->p_trace); 435 } 436 437 /* 438 * Called when a zombie thread is joined or when a 439 * detached lwp exits. Called from lwp_hash_out(). 440 */ 441 void 442 prlwpfree(proc_t *p, lwpent_t *lep) 443 { 444 vnode_t *vp; 445 prnode_t *pnp; 446 prcommon_t *pcp; 447 448 ASSERT(MUTEX_HELD(&p->p_lock)); 449 450 /* 451 * The process must be blocked against /proc to do this safely. 452 * The lwp must not disappear while the process is marked P_PR_LOCK. 453 * It is the caller's responsibility to have called prbarrier(p). 454 */ 455 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 456 457 vp = lep->le_trace; 458 lep->le_trace = NULL; 459 while (vp) { 460 prnotify(vp); 461 pnp = VTOP(vp); 462 pcp = pnp->pr_common; 463 ASSERT(pcp->prc_thread == NULL && 464 (pcp->prc_flags & PRC_DESTROY)); 465 pcp->prc_tslot = -1; 466 vp = pnp->pr_next; 467 pnp->pr_next = NULL; 468 } 469 470 if (p->p_trace) 471 prnotify(p->p_trace); 472 } 473 474 /* 475 * Called from a hook in exec() when a thread starts exec(). 476 */ 477 void 478 prexecstart(void) 479 { 480 proc_t *p = ttoproc(curthread); 481 klwp_t *lwp = ttolwp(curthread); 482 483 /* 484 * The P_PR_EXEC flag blocks /proc operations for 485 * the duration of the exec(). 486 * We can't start exec() while the process is 487 * locked by /proc, so we call prbarrier(). 488 * lwp_nostop keeps the process from being stopped 489 * via job control for the duration of the exec(). 490 */ 491 492 ASSERT(MUTEX_HELD(&p->p_lock)); 493 prbarrier(p); 494 lwp->lwp_nostop++; 495 p->p_proc_flag |= P_PR_EXEC; 496 } 497 498 /* 499 * Called from a hook in exec() when a thread finishes exec(). 500 * The thread may or may not have succeeded. Some other thread 501 * may have beat it to the punch. 502 */ 503 void 504 prexecend(void) 505 { 506 proc_t *p = ttoproc(curthread); 507 klwp_t *lwp = ttolwp(curthread); 508 vnode_t *vp; 509 prnode_t *pnp; 510 prcommon_t *pcp; 511 model_t model = p->p_model; 512 id_t tid = curthread->t_tid; 513 int tslot = curthread->t_dslot; 514 515 ASSERT(MUTEX_HELD(&p->p_lock)); 516 517 lwp->lwp_nostop--; 518 if (p->p_flag & SEXITLWPS) { 519 /* 520 * We are on our way to exiting because some 521 * other thread beat us in the race to exec(). 522 * Don't clear the P_PR_EXEC flag in this case. 523 */ 524 return; 525 } 526 527 /* 528 * Wake up anyone waiting in /proc for the process to complete exec(). 529 */ 530 p->p_proc_flag &= ~P_PR_EXEC; 531 if ((vp = p->p_trace) != NULL) { 532 pcp = VTOP(vp)->pr_common; 533 mutex_enter(&pcp->prc_mutex); 534 cv_broadcast(&pcp->prc_wait); 535 mutex_exit(&pcp->prc_mutex); 536 for (; vp != NULL; vp = pnp->pr_next) { 537 pnp = VTOP(vp); 538 pnp->pr_common->prc_datamodel = model; 539 } 540 } 541 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) { 542 /* 543 * We dealt with the process common above. 544 */ 545 ASSERT(p->p_trace != NULL); 546 pcp = VTOP(vp)->pr_common; 547 mutex_enter(&pcp->prc_mutex); 548 cv_broadcast(&pcp->prc_wait); 549 mutex_exit(&pcp->prc_mutex); 550 for (; vp != NULL; vp = pnp->pr_next) { 551 pnp = VTOP(vp); 552 pcp = pnp->pr_common; 553 pcp->prc_datamodel = model; 554 pcp->prc_tid = tid; 555 pcp->prc_tslot = tslot; 556 } 557 } 558 } 559 560 /* 561 * Called from a hook in relvm() just before freeing the address space. 562 * We free all the watched areas now. 563 */ 564 void 565 prrelvm(void) 566 { 567 proc_t *p = ttoproc(curthread); 568 569 mutex_enter(&p->p_lock); 570 prbarrier(p); /* block all other /proc operations */ 571 if (pr_watch_active(p)) { 572 pr_free_watchpoints(p); 573 watch_disable(curthread); 574 } 575 mutex_exit(&p->p_lock); 576 pr_free_watched_pages(p); 577 } 578 579 /* 580 * Called from hooks in exec-related code when a traced process 581 * attempts to exec(2) a setuid/setgid program or an unreadable 582 * file. Rather than fail the exec we invalidate the associated 583 * /proc vnodes so that subsequent attempts to use them will fail. 584 * 585 * All /proc vnodes, except directory vnodes, are retained on a linked 586 * list (rooted at p_plist in the process structure) until last close. 587 * 588 * A controlling process must re-open the /proc files in order to 589 * regain control. 590 */ 591 void 592 prinvalidate(struct user *up) 593 { 594 kthread_t *t = curthread; 595 proc_t *p = ttoproc(t); 596 vnode_t *vp; 597 prnode_t *pnp; 598 int writers = 0; 599 600 mutex_enter(&p->p_lock); 601 prbarrier(p); /* block all other /proc operations */ 602 603 /* 604 * At this moment, there can be only one lwp in the process. 605 */ 606 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 607 608 /* 609 * Invalidate any currently active /proc vnodes. 610 */ 611 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 612 pnp = VTOP(vp); 613 switch (pnp->pr_type) { 614 case PR_PSINFO: /* these files can read by anyone */ 615 case PR_LPSINFO: 616 case PR_LWPSINFO: 617 case PR_LWPDIR: 618 case PR_LWPIDDIR: 619 case PR_USAGE: 620 case PR_LUSAGE: 621 case PR_LWPUSAGE: 622 break; 623 default: 624 pnp->pr_flags |= PR_INVAL; 625 break; 626 } 627 } 628 /* 629 * Wake up anyone waiting for the process or lwp. 630 * p->p_trace is guaranteed to be non-NULL if there 631 * are any open /proc files for this process. 632 */ 633 if ((vp = p->p_trace) != NULL) { 634 prcommon_t *pcp = VTOP(vp)->pr_pcommon; 635 636 prnotify(vp); 637 /* 638 * Are there any writers? 639 */ 640 if ((writers = pcp->prc_writers) != 0) { 641 /* 642 * Clear the exclusive open flag (old /proc interface). 643 * Set prc_selfopens equal to prc_writers so that 644 * the next O_EXCL|O_WRITE open will succeed 645 * even with existing (though invalid) writers. 646 * prclose() must decrement prc_selfopens when 647 * the invalid files are closed. 648 */ 649 pcp->prc_flags &= ~PRC_EXCL; 650 ASSERT(pcp->prc_selfopens <= writers); 651 pcp->prc_selfopens = writers; 652 } 653 } 654 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace; 655 while (vp != NULL) { 656 /* 657 * We should not invalidate the lwpiddir vnodes, 658 * but the necessities of maintaining the old 659 * ioctl()-based version of /proc require it. 660 */ 661 pnp = VTOP(vp); 662 pnp->pr_flags |= PR_INVAL; 663 prnotify(vp); 664 vp = pnp->pr_next; 665 } 666 667 /* 668 * If any tracing flags are in effect and any vnodes are open for 669 * writing then set the requested-stop and run-on-last-close flags. 670 * Otherwise, clear all tracing flags. 671 */ 672 t->t_proc_flag &= ~TP_PAUSE; 673 if ((p->p_proc_flag & P_PR_TRACE) && writers) { 674 t->t_proc_flag |= TP_PRSTOP; 675 aston(t); /* so ISSIG will see the flag */ 676 p->p_proc_flag |= P_PR_RUNLCL; 677 } else { 678 premptyset(&up->u_entrymask); /* syscalls */ 679 premptyset(&up->u_exitmask); 680 up->u_systrap = 0; 681 premptyset(&p->p_sigmask); /* signals */ 682 premptyset(&p->p_fltmask); /* faults */ 683 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING); 684 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE); 685 prnostep(ttolwp(t)); 686 } 687 688 mutex_exit(&p->p_lock); 689 } 690 691 /* 692 * Acquire the controlled process's p_lock and mark it P_PR_LOCK. 693 * Return with pr_pidlock held in all cases. 694 * Return with p_lock held if the the process still exists. 695 * Return value is the process pointer if the process still exists, else NULL. 696 * If we lock the process, give ourself kernel priority to avoid deadlocks; 697 * this is undone in prunlock(). 698 */ 699 proc_t * 700 pr_p_lock(prnode_t *pnp) 701 { 702 proc_t *p; 703 prcommon_t *pcp; 704 705 mutex_enter(&pr_pidlock); 706 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL) 707 return (NULL); 708 mutex_enter(&p->p_lock); 709 while (p->p_proc_flag & P_PR_LOCK) { 710 /* 711 * This cv/mutex pair is persistent even if 712 * the process disappears while we sleep. 713 */ 714 kcondvar_t *cv = &pr_pid_cv[p->p_slot]; 715 kmutex_t *mp = &p->p_lock; 716 717 mutex_exit(&pr_pidlock); 718 cv_wait(cv, mp); 719 mutex_exit(mp); 720 mutex_enter(&pr_pidlock); 721 if (pcp->prc_proc == NULL) 722 return (NULL); 723 ASSERT(p == pcp->prc_proc); 724 mutex_enter(&p->p_lock); 725 } 726 p->p_proc_flag |= P_PR_LOCK; 727 return (p); 728 } 729 730 /* 731 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock. 732 * This prevents any lwp of the process from disappearing and 733 * blocks most operations that a process can perform on itself. 734 * Returns 0 on success, a non-zero error number on failure. 735 * 736 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when 737 * the subject process is a zombie (ZYES) or fail for zombies (ZNO). 738 * 739 * error returns: 740 * ENOENT: process or lwp has disappeared or process is exiting 741 * (or has become a zombie and zdisp == ZNO). 742 * EAGAIN: procfs vnode has become invalid. 743 * EINTR: signal arrived while waiting for exec to complete. 744 */ 745 int 746 prlock(prnode_t *pnp, int zdisp) 747 { 748 prcommon_t *pcp; 749 proc_t *p; 750 751 again: 752 pcp = pnp->pr_common; 753 p = pr_p_lock(pnp); 754 mutex_exit(&pr_pidlock); 755 756 /* 757 * Return ENOENT immediately if there is no process. 758 */ 759 if (p == NULL) 760 return (ENOENT); 761 762 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL); 763 764 /* 765 * Return ENOENT if process entered zombie state or is exiting 766 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies. 767 */ 768 if (zdisp == ZNO && 769 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) { 770 prunlock(pnp); 771 return (ENOENT); 772 } 773 774 /* 775 * If lwp-specific, check to see if lwp has disappeared. 776 */ 777 if (pcp->prc_flags & PRC_LWP) { 778 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) || 779 pcp->prc_tslot == -1) { 780 prunlock(pnp); 781 return (ENOENT); 782 } 783 } 784 785 /* 786 * Return EAGAIN if we have encountered a security violation. 787 * (The process exec'd a set-id or unreadable executable file.) 788 */ 789 if (pnp->pr_flags & PR_INVAL) { 790 prunlock(pnp); 791 return (EAGAIN); 792 } 793 794 /* 795 * If process is undergoing an exec(), wait for 796 * completion and then start all over again. 797 */ 798 if (p->p_proc_flag & P_PR_EXEC) { 799 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */ 800 mutex_enter(&pcp->prc_mutex); 801 prunlock(pnp); 802 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) { 803 mutex_exit(&pcp->prc_mutex); 804 return (EINTR); 805 } 806 mutex_exit(&pcp->prc_mutex); 807 goto again; 808 } 809 810 /* 811 * We return holding p->p_lock. 812 */ 813 return (0); 814 } 815 816 /* 817 * Undo prlock() and pr_p_lock(). 818 * p->p_lock is still held; pr_pidlock is no longer held. 819 * 820 * prunmark() drops the P_PR_LOCK flag and wakes up another thread, 821 * if any, waiting for the flag to be dropped; it retains p->p_lock. 822 * 823 * prunlock() calls prunmark() and then drops p->p_lock. 824 */ 825 void 826 prunmark(proc_t *p) 827 { 828 ASSERT(p->p_proc_flag & P_PR_LOCK); 829 ASSERT(MUTEX_HELD(&p->p_lock)); 830 831 cv_signal(&pr_pid_cv[p->p_slot]); 832 p->p_proc_flag &= ~P_PR_LOCK; 833 } 834 835 void 836 prunlock(prnode_t *pnp) 837 { 838 prcommon_t *pcp = pnp->pr_common; 839 proc_t *p = pcp->prc_proc; 840 841 /* 842 * If we (or someone) gave it a SIGKILL, and it is not 843 * already a zombie, set it running unconditionally. 844 */ 845 if ((p->p_flag & SKILLED) && 846 !(p->p_flag & SEXITING) && 847 !(pcp->prc_flags & PRC_DESTROY) && 848 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) 849 (void) pr_setrun(pnp, 0); 850 prunmark(p); 851 mutex_exit(&p->p_lock); 852 } 853 854 /* 855 * Called while holding p->p_lock to delay until the process is unlocked. 856 * We enter holding p->p_lock; p->p_lock is dropped and reacquired. 857 * The process cannot become locked again until p->p_lock is dropped. 858 */ 859 void 860 prbarrier(proc_t *p) 861 { 862 ASSERT(MUTEX_HELD(&p->p_lock)); 863 864 if (p->p_proc_flag & P_PR_LOCK) { 865 /* The process is locked; delay until not locked */ 866 uint_t slot = p->p_slot; 867 868 while (p->p_proc_flag & P_PR_LOCK) 869 cv_wait(&pr_pid_cv[slot], &p->p_lock); 870 cv_signal(&pr_pid_cv[slot]); 871 } 872 } 873 874 /* 875 * Return process/lwp status. 876 * The u-block is mapped in by this routine and unmapped at the end. 877 */ 878 void 879 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp) 880 { 881 kthread_t *t; 882 883 ASSERT(MUTEX_HELD(&p->p_lock)); 884 885 t = prchoose(p); /* returns locked thread */ 886 ASSERT(t != NULL); 887 thread_unlock(t); 888 889 /* just bzero the process part, prgetlwpstatus() does the rest */ 890 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t)); 891 sp->pr_nlwp = p->p_lwpcnt; 892 sp->pr_nzomb = p->p_zombcnt; 893 prassignset(&sp->pr_sigpend, &p->p_sig); 894 sp->pr_brkbase = (uintptr_t)p->p_brkbase; 895 sp->pr_brksize = p->p_brksize; 896 sp->pr_stkbase = (uintptr_t)prgetstackbase(p); 897 sp->pr_stksize = p->p_stksize; 898 sp->pr_pid = p->p_pid; 899 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 900 (p->p_flag & SZONETOP)) { 901 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 902 /* 903 * Inside local zones, fake zsched's pid as parent pids for 904 * processes which reference processes outside of the zone. 905 */ 906 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 907 } else { 908 sp->pr_ppid = p->p_ppid; 909 } 910 sp->pr_pgid = p->p_pgrp; 911 sp->pr_sid = p->p_sessp->s_sid; 912 sp->pr_taskid = p->p_task->tk_tkid; 913 sp->pr_projid = p->p_task->tk_proj->kpj_id; 914 sp->pr_zoneid = p->p_zone->zone_id; 915 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 916 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 917 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime); 918 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime); 919 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 920 prassignset(&sp->pr_flttrace, &p->p_fltmask); 921 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 922 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 923 switch (p->p_model) { 924 case DATAMODEL_ILP32: 925 sp->pr_dmodel = PR_MODEL_ILP32; 926 break; 927 case DATAMODEL_LP64: 928 sp->pr_dmodel = PR_MODEL_LP64; 929 break; 930 } 931 if (p->p_agenttp) 932 sp->pr_agentid = p->p_agenttp->t_tid; 933 934 /* get the chosen lwp's status */ 935 prgetlwpstatus(t, &sp->pr_lwp, zp); 936 937 /* replicate the flags */ 938 sp->pr_flags = sp->pr_lwp.pr_flags; 939 } 940 941 /* 942 * Query mask of held signals for a given thread. 943 * 944 * This makes use of schedctl_sigblock() to query if userspace has requested 945 * that all maskable signals be held. While it would be tempting to call 946 * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be 947 * done safely without the risk of racing with the thread under consideration. 948 */ 949 void 950 prgethold(kthread_t *t, sigset_t *sp) 951 { 952 k_sigset_t set; 953 954 if (schedctl_sigblock(t)) { 955 set.__sigbits[0] = FILLSET0 & ~CANTMASK0; 956 set.__sigbits[1] = FILLSET1 & ~CANTMASK1; 957 set.__sigbits[2] = FILLSET2 & ~CANTMASK2; 958 } else { 959 set = t->t_hold; 960 } 961 sigktou(&set, sp); 962 } 963 964 #ifdef _SYSCALL32_IMPL 965 void 966 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp) 967 { 968 proc_t *p = ttoproc(t); 969 klwp_t *lwp = ttolwp(t); 970 struct mstate *ms = &lwp->lwp_mstate; 971 hrtime_t usr, sys; 972 int flags; 973 ulong_t instr; 974 975 ASSERT(MUTEX_HELD(&p->p_lock)); 976 977 bzero(sp, sizeof (*sp)); 978 flags = 0L; 979 if (t->t_state == TS_STOPPED) { 980 flags |= PR_STOPPED; 981 if ((t->t_schedflag & TS_PSTART) == 0) 982 flags |= PR_ISTOP; 983 } else if (VSTOPPED(t)) { 984 flags |= PR_STOPPED|PR_ISTOP; 985 } 986 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 987 flags |= PR_DSTOP; 988 if (lwp->lwp_asleep) 989 flags |= PR_ASLEEP; 990 if (t == p->p_agenttp) 991 flags |= PR_AGENT; 992 if (!(t->t_proc_flag & TP_TWAIT)) 993 flags |= PR_DETACH; 994 if (t->t_proc_flag & TP_DAEMON) 995 flags |= PR_DAEMON; 996 if (p->p_proc_flag & P_PR_FORK) 997 flags |= PR_FORK; 998 if (p->p_proc_flag & P_PR_RUNLCL) 999 flags |= PR_RLC; 1000 if (p->p_proc_flag & P_PR_KILLCL) 1001 flags |= PR_KLC; 1002 if (p->p_proc_flag & P_PR_ASYNC) 1003 flags |= PR_ASYNC; 1004 if (p->p_proc_flag & P_PR_BPTADJ) 1005 flags |= PR_BPTADJ; 1006 if (p->p_proc_flag & P_PR_PTRACE) 1007 flags |= PR_PTRACE; 1008 if (p->p_flag & SMSACCT) 1009 flags |= PR_MSACCT; 1010 if (p->p_flag & SMSFORK) 1011 flags |= PR_MSFORK; 1012 if (p->p_flag & SVFWAIT) 1013 flags |= PR_VFORKP; 1014 sp->pr_flags = flags; 1015 if (VSTOPPED(t)) { 1016 sp->pr_why = PR_REQUESTED; 1017 sp->pr_what = 0; 1018 } else { 1019 sp->pr_why = t->t_whystop; 1020 sp->pr_what = t->t_whatstop; 1021 } 1022 sp->pr_lwpid = t->t_tid; 1023 sp->pr_cursig = lwp->lwp_cursig; 1024 prassignset(&sp->pr_lwppend, &t->t_sig); 1025 prgethold(t, &sp->pr_lwphold); 1026 if (t->t_whystop == PR_FAULTED) { 1027 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info); 1028 if (t->t_whatstop == FLTPAGE) 1029 sp->pr_info.si_addr = 1030 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr; 1031 } else if (lwp->lwp_curinfo) 1032 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info); 1033 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1034 sp->pr_info.si_zoneid != zp->zone_id) { 1035 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1036 sp->pr_info.si_uid = 0; 1037 sp->pr_info.si_ctid = -1; 1038 sp->pr_info.si_zoneid = zp->zone_id; 1039 } 1040 sp->pr_altstack.ss_sp = 1041 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; 1042 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size; 1043 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags; 1044 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1045 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext; 1046 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack; 1047 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1048 sizeof (sp->pr_clname) - 1); 1049 if (flags & PR_STOPPED) 1050 hrt2ts32(t->t_stoptime, &sp->pr_tstamp); 1051 usr = ms->ms_acct[LMS_USER]; 1052 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1053 scalehrtime(&usr); 1054 scalehrtime(&sys); 1055 hrt2ts32(usr, &sp->pr_utime); 1056 hrt2ts32(sys, &sp->pr_stime); 1057 1058 /* 1059 * Fetch the current instruction, if not a system process. 1060 * We don't attempt this unless the lwp is stopped. 1061 */ 1062 if ((p->p_flag & SSYS) || p->p_as == &kas) 1063 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1064 else if (!(flags & PR_STOPPED)) 1065 sp->pr_flags |= PR_PCINVAL; 1066 else if (!prfetchinstr(lwp, &instr)) 1067 sp->pr_flags |= PR_PCINVAL; 1068 else 1069 sp->pr_instr = (uint32_t)instr; 1070 1071 /* 1072 * Drop p_lock while touching the lwp's stack. 1073 */ 1074 mutex_exit(&p->p_lock); 1075 if (prisstep(lwp)) 1076 sp->pr_flags |= PR_STEP; 1077 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1078 int i; 1079 1080 sp->pr_syscall = get_syscall32_args(lwp, 1081 (int *)sp->pr_sysarg, &i); 1082 sp->pr_nsysarg = (ushort_t)i; 1083 } 1084 if ((flags & PR_STOPPED) || t == curthread) 1085 prgetprregs32(lwp, sp->pr_reg); 1086 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1087 (flags & PR_VFORKP)) { 1088 long r1, r2; 1089 user_t *up; 1090 auxv_t *auxp; 1091 int i; 1092 1093 sp->pr_errno = prgetrvals(lwp, &r1, &r2); 1094 if (sp->pr_errno == 0) { 1095 sp->pr_rval1 = (int32_t)r1; 1096 sp->pr_rval2 = (int32_t)r2; 1097 sp->pr_errpriv = PRIV_NONE; 1098 } else 1099 sp->pr_errpriv = lwp->lwp_badpriv; 1100 1101 if (t->t_sysnum == SYS_execve) { 1102 up = PTOU(p); 1103 sp->pr_sysarg[0] = 0; 1104 sp->pr_sysarg[1] = (caddr32_t)up->u_argv; 1105 sp->pr_sysarg[2] = (caddr32_t)up->u_envp; 1106 for (i = 0, auxp = up->u_auxv; 1107 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1108 i++, auxp++) { 1109 if (auxp->a_type == AT_SUN_EXECNAME) { 1110 sp->pr_sysarg[0] = 1111 (caddr32_t) 1112 (uintptr_t)auxp->a_un.a_ptr; 1113 break; 1114 } 1115 } 1116 } 1117 } 1118 if (prhasfp()) 1119 prgetprfpregs32(lwp, &sp->pr_fpreg); 1120 mutex_enter(&p->p_lock); 1121 } 1122 1123 void 1124 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp) 1125 { 1126 kthread_t *t; 1127 1128 ASSERT(MUTEX_HELD(&p->p_lock)); 1129 1130 t = prchoose(p); /* returns locked thread */ 1131 ASSERT(t != NULL); 1132 thread_unlock(t); 1133 1134 /* just bzero the process part, prgetlwpstatus32() does the rest */ 1135 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t)); 1136 sp->pr_nlwp = p->p_lwpcnt; 1137 sp->pr_nzomb = p->p_zombcnt; 1138 prassignset(&sp->pr_sigpend, &p->p_sig); 1139 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase; 1140 sp->pr_brksize = (uint32_t)p->p_brksize; 1141 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p); 1142 sp->pr_stksize = (uint32_t)p->p_stksize; 1143 sp->pr_pid = p->p_pid; 1144 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 1145 (p->p_flag & SZONETOP)) { 1146 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 1147 /* 1148 * Inside local zones, fake zsched's pid as parent pids for 1149 * processes which reference processes outside of the zone. 1150 */ 1151 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 1152 } else { 1153 sp->pr_ppid = p->p_ppid; 1154 } 1155 sp->pr_pgid = p->p_pgrp; 1156 sp->pr_sid = p->p_sessp->s_sid; 1157 sp->pr_taskid = p->p_task->tk_tkid; 1158 sp->pr_projid = p->p_task->tk_proj->kpj_id; 1159 sp->pr_zoneid = p->p_zone->zone_id; 1160 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 1161 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 1162 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime); 1163 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime); 1164 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 1165 prassignset(&sp->pr_flttrace, &p->p_fltmask); 1166 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 1167 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 1168 switch (p->p_model) { 1169 case DATAMODEL_ILP32: 1170 sp->pr_dmodel = PR_MODEL_ILP32; 1171 break; 1172 case DATAMODEL_LP64: 1173 sp->pr_dmodel = PR_MODEL_LP64; 1174 break; 1175 } 1176 if (p->p_agenttp) 1177 sp->pr_agentid = p->p_agenttp->t_tid; 1178 1179 /* get the chosen lwp's status */ 1180 prgetlwpstatus32(t, &sp->pr_lwp, zp); 1181 1182 /* replicate the flags */ 1183 sp->pr_flags = sp->pr_lwp.pr_flags; 1184 } 1185 #endif /* _SYSCALL32_IMPL */ 1186 1187 /* 1188 * Return lwp status. 1189 */ 1190 void 1191 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp) 1192 { 1193 proc_t *p = ttoproc(t); 1194 klwp_t *lwp = ttolwp(t); 1195 struct mstate *ms = &lwp->lwp_mstate; 1196 hrtime_t usr, sys; 1197 int flags; 1198 ulong_t instr; 1199 1200 ASSERT(MUTEX_HELD(&p->p_lock)); 1201 1202 bzero(sp, sizeof (*sp)); 1203 flags = 0L; 1204 if (t->t_state == TS_STOPPED) { 1205 flags |= PR_STOPPED; 1206 if ((t->t_schedflag & TS_PSTART) == 0) 1207 flags |= PR_ISTOP; 1208 } else if (VSTOPPED(t)) { 1209 flags |= PR_STOPPED|PR_ISTOP; 1210 } 1211 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 1212 flags |= PR_DSTOP; 1213 if (lwp->lwp_asleep) 1214 flags |= PR_ASLEEP; 1215 if (t == p->p_agenttp) 1216 flags |= PR_AGENT; 1217 if (!(t->t_proc_flag & TP_TWAIT)) 1218 flags |= PR_DETACH; 1219 if (t->t_proc_flag & TP_DAEMON) 1220 flags |= PR_DAEMON; 1221 if (p->p_proc_flag & P_PR_FORK) 1222 flags |= PR_FORK; 1223 if (p->p_proc_flag & P_PR_RUNLCL) 1224 flags |= PR_RLC; 1225 if (p->p_proc_flag & P_PR_KILLCL) 1226 flags |= PR_KLC; 1227 if (p->p_proc_flag & P_PR_ASYNC) 1228 flags |= PR_ASYNC; 1229 if (p->p_proc_flag & P_PR_BPTADJ) 1230 flags |= PR_BPTADJ; 1231 if (p->p_proc_flag & P_PR_PTRACE) 1232 flags |= PR_PTRACE; 1233 if (p->p_flag & SMSACCT) 1234 flags |= PR_MSACCT; 1235 if (p->p_flag & SMSFORK) 1236 flags |= PR_MSFORK; 1237 if (p->p_flag & SVFWAIT) 1238 flags |= PR_VFORKP; 1239 if (p->p_pgidp->pid_pgorphaned) 1240 flags |= PR_ORPHAN; 1241 if (p->p_pidflag & CLDNOSIGCHLD) 1242 flags |= PR_NOSIGCHLD; 1243 if (p->p_pidflag & CLDWAITPID) 1244 flags |= PR_WAITPID; 1245 sp->pr_flags = flags; 1246 if (VSTOPPED(t)) { 1247 sp->pr_why = PR_REQUESTED; 1248 sp->pr_what = 0; 1249 } else { 1250 sp->pr_why = t->t_whystop; 1251 sp->pr_what = t->t_whatstop; 1252 } 1253 sp->pr_lwpid = t->t_tid; 1254 sp->pr_cursig = lwp->lwp_cursig; 1255 prassignset(&sp->pr_lwppend, &t->t_sig); 1256 prgethold(t, &sp->pr_lwphold); 1257 if (t->t_whystop == PR_FAULTED) 1258 bcopy(&lwp->lwp_siginfo, 1259 &sp->pr_info, sizeof (k_siginfo_t)); 1260 else if (lwp->lwp_curinfo) 1261 bcopy(&lwp->lwp_curinfo->sq_info, 1262 &sp->pr_info, sizeof (k_siginfo_t)); 1263 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1264 sp->pr_info.si_zoneid != zp->zone_id) { 1265 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1266 sp->pr_info.si_uid = 0; 1267 sp->pr_info.si_ctid = -1; 1268 sp->pr_info.si_zoneid = zp->zone_id; 1269 } 1270 sp->pr_altstack = lwp->lwp_sigaltstack; 1271 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1272 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext; 1273 sp->pr_ustack = lwp->lwp_ustack; 1274 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1275 sizeof (sp->pr_clname) - 1); 1276 if (flags & PR_STOPPED) 1277 hrt2ts(t->t_stoptime, &sp->pr_tstamp); 1278 usr = ms->ms_acct[LMS_USER]; 1279 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1280 scalehrtime(&usr); 1281 scalehrtime(&sys); 1282 hrt2ts(usr, &sp->pr_utime); 1283 hrt2ts(sys, &sp->pr_stime); 1284 1285 /* 1286 * Fetch the current instruction, if not a system process. 1287 * We don't attempt this unless the lwp is stopped. 1288 */ 1289 if ((p->p_flag & SSYS) || p->p_as == &kas) 1290 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1291 else if (!(flags & PR_STOPPED)) 1292 sp->pr_flags |= PR_PCINVAL; 1293 else if (!prfetchinstr(lwp, &instr)) 1294 sp->pr_flags |= PR_PCINVAL; 1295 else 1296 sp->pr_instr = instr; 1297 1298 /* 1299 * Drop p_lock while touching the lwp's stack. 1300 */ 1301 mutex_exit(&p->p_lock); 1302 if (prisstep(lwp)) 1303 sp->pr_flags |= PR_STEP; 1304 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1305 int i; 1306 1307 sp->pr_syscall = get_syscall_args(lwp, 1308 (long *)sp->pr_sysarg, &i); 1309 sp->pr_nsysarg = (ushort_t)i; 1310 } 1311 if ((flags & PR_STOPPED) || t == curthread) 1312 prgetprregs(lwp, sp->pr_reg); 1313 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1314 (flags & PR_VFORKP)) { 1315 user_t *up; 1316 auxv_t *auxp; 1317 int i; 1318 1319 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2); 1320 if (sp->pr_errno == 0) 1321 sp->pr_errpriv = PRIV_NONE; 1322 else 1323 sp->pr_errpriv = lwp->lwp_badpriv; 1324 1325 if (t->t_sysnum == SYS_execve) { 1326 up = PTOU(p); 1327 sp->pr_sysarg[0] = 0; 1328 sp->pr_sysarg[1] = (uintptr_t)up->u_argv; 1329 sp->pr_sysarg[2] = (uintptr_t)up->u_envp; 1330 for (i = 0, auxp = up->u_auxv; 1331 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1332 i++, auxp++) { 1333 if (auxp->a_type == AT_SUN_EXECNAME) { 1334 sp->pr_sysarg[0] = 1335 (uintptr_t)auxp->a_un.a_ptr; 1336 break; 1337 } 1338 } 1339 } 1340 } 1341 if (prhasfp()) 1342 prgetprfpregs(lwp, &sp->pr_fpreg); 1343 mutex_enter(&p->p_lock); 1344 } 1345 1346 /* 1347 * Get the sigaction structure for the specified signal. The u-block 1348 * must already have been mapped in by the caller. 1349 */ 1350 void 1351 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp) 1352 { 1353 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1354 1355 bzero(sp, sizeof (*sp)); 1356 1357 if (sig != 0 && (unsigned)sig < nsig) { 1358 sp->sa_handler = up->u_signal[sig-1]; 1359 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1360 if (sigismember(&up->u_sigonstack, sig)) 1361 sp->sa_flags |= SA_ONSTACK; 1362 if (sigismember(&up->u_sigresethand, sig)) 1363 sp->sa_flags |= SA_RESETHAND; 1364 if (sigismember(&up->u_sigrestart, sig)) 1365 sp->sa_flags |= SA_RESTART; 1366 if (sigismember(&p->p_siginfo, sig)) 1367 sp->sa_flags |= SA_SIGINFO; 1368 if (sigismember(&up->u_signodefer, sig)) 1369 sp->sa_flags |= SA_NODEFER; 1370 if (sig == SIGCLD) { 1371 if (p->p_flag & SNOWAIT) 1372 sp->sa_flags |= SA_NOCLDWAIT; 1373 if ((p->p_flag & SJCTL) == 0) 1374 sp->sa_flags |= SA_NOCLDSTOP; 1375 } 1376 } 1377 } 1378 1379 #ifdef _SYSCALL32_IMPL 1380 void 1381 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp) 1382 { 1383 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1384 1385 bzero(sp, sizeof (*sp)); 1386 1387 if (sig != 0 && (unsigned)sig < nsig) { 1388 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1]; 1389 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1390 if (sigismember(&up->u_sigonstack, sig)) 1391 sp->sa_flags |= SA_ONSTACK; 1392 if (sigismember(&up->u_sigresethand, sig)) 1393 sp->sa_flags |= SA_RESETHAND; 1394 if (sigismember(&up->u_sigrestart, sig)) 1395 sp->sa_flags |= SA_RESTART; 1396 if (sigismember(&p->p_siginfo, sig)) 1397 sp->sa_flags |= SA_SIGINFO; 1398 if (sigismember(&up->u_signodefer, sig)) 1399 sp->sa_flags |= SA_NODEFER; 1400 if (sig == SIGCLD) { 1401 if (p->p_flag & SNOWAIT) 1402 sp->sa_flags |= SA_NOCLDWAIT; 1403 if ((p->p_flag & SJCTL) == 0) 1404 sp->sa_flags |= SA_NOCLDSTOP; 1405 } 1406 } 1407 } 1408 #endif /* _SYSCALL32_IMPL */ 1409 1410 /* 1411 * Count the number of segments in this process's address space. 1412 */ 1413 uint_t 1414 prnsegs(struct as *as, int reserved) 1415 { 1416 uint_t n = 0; 1417 struct seg *seg; 1418 1419 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1420 1421 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1422 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1423 caddr_t saddr, naddr; 1424 void *tmp = NULL; 1425 1426 if ((seg->s_flags & S_HOLE) != 0) { 1427 continue; 1428 } 1429 1430 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1431 (void) pr_getprot(seg, reserved, &tmp, 1432 &saddr, &naddr, eaddr); 1433 if (saddr != naddr) { 1434 n++; 1435 /* 1436 * prnsegs() was formerly designated to return 1437 * an 'int' despite having no ability or use 1438 * for negative results. As part of changing 1439 * it to 'uint_t', keep the old effective limit 1440 * of INT_MAX in place. 1441 */ 1442 if (n == INT_MAX) { 1443 pr_getprot_done(&tmp); 1444 ASSERT(tmp == NULL); 1445 return (n); 1446 } 1447 } 1448 } 1449 1450 ASSERT(tmp == NULL); 1451 } 1452 1453 return (n); 1454 } 1455 1456 /* 1457 * Convert uint32_t to decimal string w/o leading zeros. 1458 * Add trailing null characters if 'len' is greater than string length. 1459 * Return the string length. 1460 */ 1461 int 1462 pr_u32tos(uint32_t n, char *s, int len) 1463 { 1464 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */ 1465 char *cp = cbuf; 1466 char *end = s + len; 1467 1468 do { 1469 *cp++ = (char)(n % 10 + '0'); 1470 n /= 10; 1471 } while (n); 1472 1473 len = (int)(cp - cbuf); 1474 1475 do { 1476 *s++ = *--cp; 1477 } while (cp > cbuf); 1478 1479 while (s < end) /* optional pad */ 1480 *s++ = '\0'; 1481 1482 return (len); 1483 } 1484 1485 /* 1486 * Convert uint64_t to decimal string w/o leading zeros. 1487 * Return the string length. 1488 */ 1489 static int 1490 pr_u64tos(uint64_t n, char *s) 1491 { 1492 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */ 1493 char *cp = cbuf; 1494 int len; 1495 1496 do { 1497 *cp++ = (char)(n % 10 + '0'); 1498 n /= 10; 1499 } while (n); 1500 1501 len = (int)(cp - cbuf); 1502 1503 do { 1504 *s++ = *--cp; 1505 } while (cp > cbuf); 1506 1507 return (len); 1508 } 1509 1510 /* 1511 * Similar to getf() / getf_gen(), but for the specified process. On success, 1512 * returns the fp with fp->f_count incremented. The caller MUST call 1513 * closef(fp) on the returned fp after completing any actions using that fp. 1514 * We return a reference-held (fp->f_count bumped) file_t so no other closef() 1515 * can invoke destructive VOP_CLOSE actions while we're inspecting the 1516 * process's FD. 1517 * 1518 * Returns NULL for errors: either an empty process-table slot post-fi_lock 1519 * and UF_ENTER, or too many mutex_tryenter() failures on the file_t's f_tlock. 1520 * Both failure modes have DTrace probes. 1521 * 1522 * The current design of the procfs "close" code path uses the following lock 1523 * order of: 1524 * 1525 * 1: (file_t) f_tlock 1526 * 2: (proc_t) p_lock AND setting p->p_proc_flag's P_PR_LOCK 1527 * 1528 * That happens because closef() holds f_tlock while calling fop_close(), 1529 * which can be prclose(), which currently waits on and sets P_PR_LOCK at its 1530 * beginning. 1531 * 1532 * That lock order creates a challenge for pr_getf, which needs to take those 1533 * locks in the opposite order when the fd points to a procfs file descriptor. 1534 * The solution chosen here is to use mutex_tryenter on f_tlock and retry some 1535 * (limited) number of times, failing if we don't get both locks. 1536 * 1537 * The cases where this can fail are rare, and all involve a procfs caller 1538 * asking for info (eg. FDINFO) on another procfs FD. In these cases, 1539 * returning EBADF (which results from a NULL return from pr_getf()) is 1540 * acceptable. 1541 * 1542 * One can increase the number of tries in pr_getf_maxtries if one is worried 1543 * about the contentuous case. 1544 */ 1545 1546 uint64_t pr_getf_tryfails; /* Bumped for statistic purposes. */ 1547 int pr_getf_maxtries = 3; /* So you can tune it from /etc/system */ 1548 1549 file_t * 1550 pr_getf(proc_t *p, uint_t fd, short *flag) 1551 { 1552 uf_entry_t *ufp; 1553 uf_info_t *fip; 1554 file_t *fp; 1555 int tries = 0; 1556 1557 ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK)); 1558 1559 retry: 1560 fip = P_FINFO(p); 1561 1562 if (fd >= fip->fi_nfiles) 1563 return (NULL); 1564 1565 mutex_exit(&p->p_lock); 1566 mutex_enter(&fip->fi_lock); 1567 UF_ENTER(ufp, fip, fd); 1568 if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) { 1569 if (mutex_tryenter(&fp->f_tlock)) { 1570 ASSERT(fp->f_count > 0); 1571 fp->f_count++; 1572 mutex_exit(&fp->f_tlock); 1573 if (flag != NULL) 1574 *flag = ufp->uf_flag; 1575 } else { 1576 /* 1577 * Note the number of mutex_trylock attempts. 1578 * 1579 * The exit path will catch this and try again if we 1580 * are below the retry threshhold (pr_getf_maxtries). 1581 */ 1582 tries++; 1583 pr_getf_tryfails++; 1584 /* 1585 * If we hit pr_getf_maxtries, we'll return NULL. 1586 * DTrace scripts looking for this sort of failure 1587 * should check when arg1 is pr_getf_maxtries. 1588 */ 1589 DTRACE_PROBE2(pr_getf_tryfail, file_t *, fp, int, 1590 tries); 1591 fp = NULL; 1592 } 1593 } else { 1594 fp = NULL; 1595 /* If we fail here, someone else closed this FD. */ 1596 DTRACE_PROBE1(pr_getf_emptyslot, int, tries); 1597 tries = pr_getf_maxtries; /* Don't bother retrying. */ 1598 } 1599 UF_EXIT(ufp); 1600 mutex_exit(&fip->fi_lock); 1601 mutex_enter(&p->p_lock); 1602 1603 /* Use goto instead of tail-recursion so we can keep "tries" around. */ 1604 if (fp == NULL) { 1605 /* "tries" starts at 1. */ 1606 if (tries < pr_getf_maxtries) 1607 goto retry; 1608 } else { 1609 /* 1610 * Probes here will detect successes after arg1's number of 1611 * mutex_tryenter() calls. 1612 */ 1613 DTRACE_PROBE2(pr_getf_trysuccess, file_t *, fp, int, tries + 1); 1614 } 1615 1616 return (fp); 1617 } 1618 1619 1620 /* 1621 * Just as pr_getf() is a little unusual in how it goes about making the file_t 1622 * safe for procfs consumers to access it, so too is pr_releasef() for safely 1623 * releasing that "hold". The "hold" is unlike normal file descriptor activity 1624 * -- procfs is just an interloper here, wanting access to the vnode_t without 1625 * risk of a racing close() disrupting the state. Just as pr_getf() avoids some 1626 * of the typical file_t behavior (such as auditing) when establishing its hold, 1627 * so too should pr_releasef(). It should not go through the motions of 1628 * closef() (since it is not a true close()) unless racing activity causes it to 1629 * be the last actor holding the refcount above zero. 1630 * 1631 * Under normal circumstances, we expect to find file_t`f_count > 1 after 1632 * the successful pr_getf() call. We are, after all, accessing a resource 1633 * already held by the process in question. We would also expect to rarely race 1634 * with a close() of the underlying fd, meaning that file_t`f_count > 1 would 1635 * still holds at pr_releasef() time. That would mean we only need to decrement 1636 * f_count, leaving it to the process to later close the fd (thus triggering 1637 * VOP_CLOSE(), etc). 1638 * 1639 * It is only when that process manages to close() the fd while we have it 1640 * "held" in procfs that we must make a trip through the traditional closef() 1641 * logic to ensure proper tear-down of the file_t. 1642 */ 1643 void 1644 pr_releasef(file_t *fp) 1645 { 1646 mutex_enter(&fp->f_tlock); 1647 if (fp->f_count > 1) { 1648 /* 1649 * This is the most common case: The file is still held open by 1650 * the process, and we simply need to release our hold by 1651 * decrementing f_count 1652 */ 1653 fp->f_count--; 1654 mutex_exit(&fp->f_tlock); 1655 } else { 1656 /* 1657 * A rare occasion: The process snuck a close() of this file 1658 * while we were doing our business in procfs. Given that 1659 * f_count == 1, we are the only one with a reference to the 1660 * file_t and need to take a trip through closef() to free it. 1661 */ 1662 mutex_exit(&fp->f_tlock); 1663 (void) closef(fp); 1664 } 1665 } 1666 1667 void 1668 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr) 1669 { 1670 char *s = name; 1671 struct vfs *vfsp; 1672 struct vfssw *vfsswp; 1673 1674 if ((vfsp = vp->v_vfsp) != NULL && 1675 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) && 1676 *vfsswp->vsw_name) { 1677 (void) strcpy(s, vfsswp->vsw_name); 1678 s += strlen(s); 1679 *s++ = '.'; 1680 } 1681 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0); 1682 *s++ = '.'; 1683 s += pr_u32tos(getminor(vattr->va_fsid), s, 0); 1684 *s++ = '.'; 1685 s += pr_u64tos(vattr->va_nodeid, s); 1686 *s++ = '\0'; 1687 } 1688 1689 struct seg * 1690 break_seg(proc_t *p) 1691 { 1692 caddr_t addr = p->p_brkbase; 1693 struct seg *seg; 1694 struct vnode *vp; 1695 1696 if (p->p_brksize != 0) 1697 addr += p->p_brksize - 1; 1698 seg = as_segat(p->p_as, addr); 1699 if (seg != NULL && seg->s_ops == &segvn_ops && 1700 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)) 1701 return (seg); 1702 return (NULL); 1703 } 1704 1705 /* 1706 * Implementation of service functions to handle procfs generic chained 1707 * copyout buffers. 1708 */ 1709 typedef struct pr_iobuf_list { 1710 list_node_t piol_link; /* buffer linkage */ 1711 size_t piol_size; /* total size (header + data) */ 1712 size_t piol_usedsize; /* amount to copy out from this buf */ 1713 } piol_t; 1714 1715 #define MAPSIZE (64 * 1024) 1716 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1])) 1717 1718 void 1719 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n) 1720 { 1721 piol_t *iol; 1722 size_t initial_size = MIN(1, n) * itemsize; 1723 1724 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link)); 1725 1726 ASSERT(list_head(iolhead) == NULL); 1727 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1728 ASSERT(initial_size > 0); 1729 1730 /* 1731 * Someone creating chained copyout buffers may ask for less than 1732 * MAPSIZE if the amount of data to be buffered is known to be 1733 * smaller than that. 1734 * But in order to prevent involuntary self-denial of service, 1735 * the requested input size is clamped at MAPSIZE. 1736 */ 1737 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol)); 1738 iol = kmem_alloc(initial_size, KM_SLEEP); 1739 list_insert_head(iolhead, iol); 1740 iol->piol_usedsize = 0; 1741 iol->piol_size = initial_size; 1742 } 1743 1744 void * 1745 pr_iol_newbuf(list_t *iolhead, size_t itemsize) 1746 { 1747 piol_t *iol; 1748 char *new; 1749 1750 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1751 ASSERT(list_head(iolhead) != NULL); 1752 1753 iol = (piol_t *)list_tail(iolhead); 1754 1755 if (iol->piol_size < 1756 iol->piol_usedsize + sizeof (*iol) + itemsize) { 1757 /* 1758 * Out of space in the current buffer. Allocate more. 1759 */ 1760 piol_t *newiol; 1761 1762 newiol = kmem_alloc(MAPSIZE, KM_SLEEP); 1763 newiol->piol_size = MAPSIZE; 1764 newiol->piol_usedsize = 0; 1765 1766 list_insert_after(iolhead, iol, newiol); 1767 iol = list_next(iolhead, iol); 1768 ASSERT(iol == newiol); 1769 } 1770 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize; 1771 iol->piol_usedsize += itemsize; 1772 bzero(new, itemsize); 1773 return (new); 1774 } 1775 1776 void 1777 pr_iol_freelist(list_t *iolhead) 1778 { 1779 piol_t *iol; 1780 1781 while ((iol = list_head(iolhead)) != NULL) { 1782 list_remove(iolhead, iol); 1783 kmem_free(iol, iol->piol_size); 1784 } 1785 list_destroy(iolhead); 1786 } 1787 1788 int 1789 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin) 1790 { 1791 int error = errin; 1792 piol_t *iol; 1793 1794 while ((iol = list_head(iolhead)) != NULL) { 1795 list_remove(iolhead, iol); 1796 if (!error) { 1797 if (copyout(PIOL_DATABUF(iol), *tgt, 1798 iol->piol_usedsize)) 1799 error = EFAULT; 1800 *tgt += iol->piol_usedsize; 1801 } 1802 kmem_free(iol, iol->piol_size); 1803 } 1804 list_destroy(iolhead); 1805 1806 return (error); 1807 } 1808 1809 int 1810 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin) 1811 { 1812 offset_t off = uiop->uio_offset; 1813 char *base; 1814 size_t size; 1815 piol_t *iol; 1816 int error = errin; 1817 1818 while ((iol = list_head(iolhead)) != NULL) { 1819 list_remove(iolhead, iol); 1820 base = PIOL_DATABUF(iol); 1821 size = iol->piol_usedsize; 1822 if (off <= size && error == 0 && uiop->uio_resid > 0) 1823 error = uiomove(base + off, size - off, 1824 UIO_READ, uiop); 1825 off = MAX(0, off - (offset_t)size); 1826 kmem_free(iol, iol->piol_size); 1827 } 1828 list_destroy(iolhead); 1829 1830 return (error); 1831 } 1832 1833 /* 1834 * Return an array of structures with memory map information. 1835 * We allocate here; the caller must deallocate. 1836 */ 1837 int 1838 prgetmap(proc_t *p, int reserved, list_t *iolhead) 1839 { 1840 struct as *as = p->p_as; 1841 prmap_t *mp; 1842 struct seg *seg; 1843 struct seg *brkseg, *stkseg; 1844 struct vnode *vp; 1845 struct vattr vattr; 1846 uint_t prot; 1847 1848 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1849 1850 /* 1851 * Request an initial buffer size that doesn't waste memory 1852 * if the address space has only a small number of segments. 1853 */ 1854 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1855 1856 if ((seg = AS_SEGFIRST(as)) == NULL) 1857 return (0); 1858 1859 brkseg = break_seg(p); 1860 stkseg = as_segat(as, prgetstackbase(p)); 1861 1862 do { 1863 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1864 caddr_t saddr, naddr; 1865 void *tmp = NULL; 1866 1867 if ((seg->s_flags & S_HOLE) != 0) { 1868 continue; 1869 } 1870 1871 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1872 prot = pr_getprot(seg, reserved, &tmp, 1873 &saddr, &naddr, eaddr); 1874 if (saddr == naddr) 1875 continue; 1876 1877 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1878 1879 mp->pr_vaddr = (uintptr_t)saddr; 1880 mp->pr_size = naddr - saddr; 1881 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1882 mp->pr_mflags = 0; 1883 if (prot & PROT_READ) 1884 mp->pr_mflags |= MA_READ; 1885 if (prot & PROT_WRITE) 1886 mp->pr_mflags |= MA_WRITE; 1887 if (prot & PROT_EXEC) 1888 mp->pr_mflags |= MA_EXEC; 1889 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1890 mp->pr_mflags |= MA_SHARED; 1891 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1892 mp->pr_mflags |= MA_NORESERVE; 1893 if (seg->s_ops == &segspt_shmops || 1894 (seg->s_ops == &segvn_ops && 1895 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1896 mp->pr_mflags |= MA_ANON; 1897 if (seg == brkseg) 1898 mp->pr_mflags |= MA_BREAK; 1899 else if (seg == stkseg) { 1900 mp->pr_mflags |= MA_STACK; 1901 if (reserved) { 1902 size_t maxstack = 1903 ((size_t)p->p_stk_ctl + 1904 PAGEOFFSET) & PAGEMASK; 1905 mp->pr_vaddr = 1906 (uintptr_t)prgetstackbase(p) + 1907 p->p_stksize - maxstack; 1908 mp->pr_size = (uintptr_t)naddr - 1909 mp->pr_vaddr; 1910 } 1911 } 1912 if (seg->s_ops == &segspt_shmops) 1913 mp->pr_mflags |= MA_ISM | MA_SHM; 1914 mp->pr_pagesize = PAGESIZE; 1915 1916 /* 1917 * Manufacture a filename for the "object" directory. 1918 */ 1919 vattr.va_mask = AT_FSID|AT_NODEID; 1920 if (seg->s_ops == &segvn_ops && 1921 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1922 vp != NULL && vp->v_type == VREG && 1923 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 1924 if (vp == p->p_exec) 1925 (void) strcpy(mp->pr_mapname, "a.out"); 1926 else 1927 pr_object_name(mp->pr_mapname, 1928 vp, &vattr); 1929 } 1930 1931 /* 1932 * Get the SysV shared memory id, if any. 1933 */ 1934 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1935 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1936 SHMID_NONE) { 1937 if (mp->pr_shmid == SHMID_FREE) 1938 mp->pr_shmid = -1; 1939 1940 mp->pr_mflags |= MA_SHM; 1941 } else { 1942 mp->pr_shmid = -1; 1943 } 1944 } 1945 ASSERT(tmp == NULL); 1946 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1947 1948 return (0); 1949 } 1950 1951 #ifdef _SYSCALL32_IMPL 1952 int 1953 prgetmap32(proc_t *p, int reserved, list_t *iolhead) 1954 { 1955 struct as *as = p->p_as; 1956 prmap32_t *mp; 1957 struct seg *seg; 1958 struct seg *brkseg, *stkseg; 1959 struct vnode *vp; 1960 struct vattr vattr; 1961 uint_t prot; 1962 1963 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1964 1965 /* 1966 * Request an initial buffer size that doesn't waste memory 1967 * if the address space has only a small number of segments. 1968 */ 1969 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1970 1971 if ((seg = AS_SEGFIRST(as)) == NULL) 1972 return (0); 1973 1974 brkseg = break_seg(p); 1975 stkseg = as_segat(as, prgetstackbase(p)); 1976 1977 do { 1978 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1979 caddr_t saddr, naddr; 1980 void *tmp = NULL; 1981 1982 if ((seg->s_flags & S_HOLE) != 0) { 1983 continue; 1984 } 1985 1986 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1987 prot = pr_getprot(seg, reserved, &tmp, 1988 &saddr, &naddr, eaddr); 1989 if (saddr == naddr) 1990 continue; 1991 1992 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1993 1994 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 1995 mp->pr_size = (size32_t)(naddr - saddr); 1996 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1997 mp->pr_mflags = 0; 1998 if (prot & PROT_READ) 1999 mp->pr_mflags |= MA_READ; 2000 if (prot & PROT_WRITE) 2001 mp->pr_mflags |= MA_WRITE; 2002 if (prot & PROT_EXEC) 2003 mp->pr_mflags |= MA_EXEC; 2004 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2005 mp->pr_mflags |= MA_SHARED; 2006 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2007 mp->pr_mflags |= MA_NORESERVE; 2008 if (seg->s_ops == &segspt_shmops || 2009 (seg->s_ops == &segvn_ops && 2010 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2011 mp->pr_mflags |= MA_ANON; 2012 if (seg == brkseg) 2013 mp->pr_mflags |= MA_BREAK; 2014 else if (seg == stkseg) { 2015 mp->pr_mflags |= MA_STACK; 2016 if (reserved) { 2017 size_t maxstack = 2018 ((size_t)p->p_stk_ctl + 2019 PAGEOFFSET) & PAGEMASK; 2020 uintptr_t vaddr = 2021 (uintptr_t)prgetstackbase(p) + 2022 p->p_stksize - maxstack; 2023 mp->pr_vaddr = (caddr32_t)vaddr; 2024 mp->pr_size = (size32_t) 2025 ((uintptr_t)naddr - vaddr); 2026 } 2027 } 2028 if (seg->s_ops == &segspt_shmops) 2029 mp->pr_mflags |= MA_ISM | MA_SHM; 2030 mp->pr_pagesize = PAGESIZE; 2031 2032 /* 2033 * Manufacture a filename for the "object" directory. 2034 */ 2035 vattr.va_mask = AT_FSID|AT_NODEID; 2036 if (seg->s_ops == &segvn_ops && 2037 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2038 vp != NULL && vp->v_type == VREG && 2039 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2040 if (vp == p->p_exec) 2041 (void) strcpy(mp->pr_mapname, "a.out"); 2042 else 2043 pr_object_name(mp->pr_mapname, 2044 vp, &vattr); 2045 } 2046 2047 /* 2048 * Get the SysV shared memory id, if any. 2049 */ 2050 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 2051 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 2052 SHMID_NONE) { 2053 if (mp->pr_shmid == SHMID_FREE) 2054 mp->pr_shmid = -1; 2055 2056 mp->pr_mflags |= MA_SHM; 2057 } else { 2058 mp->pr_shmid = -1; 2059 } 2060 } 2061 ASSERT(tmp == NULL); 2062 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2063 2064 return (0); 2065 } 2066 #endif /* _SYSCALL32_IMPL */ 2067 2068 /* 2069 * Return the size of the /proc page data file. 2070 */ 2071 size_t 2072 prpdsize(struct as *as) 2073 { 2074 struct seg *seg; 2075 size_t size; 2076 2077 ASSERT(as != &kas && AS_WRITE_HELD(as)); 2078 2079 if ((seg = AS_SEGFIRST(as)) == NULL) 2080 return (0); 2081 2082 size = sizeof (prpageheader_t); 2083 do { 2084 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2085 caddr_t saddr, naddr; 2086 void *tmp = NULL; 2087 size_t npage; 2088 2089 if ((seg->s_flags & S_HOLE) != 0) { 2090 continue; 2091 } 2092 2093 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2094 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2095 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 2096 size += sizeof (prasmap_t) + round8(npage); 2097 } 2098 ASSERT(tmp == NULL); 2099 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2100 2101 return (size); 2102 } 2103 2104 #ifdef _SYSCALL32_IMPL 2105 size_t 2106 prpdsize32(struct as *as) 2107 { 2108 struct seg *seg; 2109 size_t size; 2110 2111 ASSERT(as != &kas && AS_WRITE_HELD(as)); 2112 2113 if ((seg = AS_SEGFIRST(as)) == NULL) 2114 return (0); 2115 2116 size = sizeof (prpageheader32_t); 2117 do { 2118 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2119 caddr_t saddr, naddr; 2120 void *tmp = NULL; 2121 size_t npage; 2122 2123 if ((seg->s_flags & S_HOLE) != 0) { 2124 continue; 2125 } 2126 2127 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2128 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2129 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 2130 size += sizeof (prasmap32_t) + round8(npage); 2131 } 2132 ASSERT(tmp == NULL); 2133 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2134 2135 return (size); 2136 } 2137 #endif /* _SYSCALL32_IMPL */ 2138 2139 /* 2140 * Read page data information. 2141 */ 2142 int 2143 prpdread(proc_t *p, uint_t hatid, struct uio *uiop) 2144 { 2145 struct as *as = p->p_as; 2146 caddr_t buf; 2147 size_t size; 2148 prpageheader_t *php; 2149 prasmap_t *pmp; 2150 struct seg *seg; 2151 int error; 2152 2153 again: 2154 AS_LOCK_ENTER(as, RW_WRITER); 2155 2156 if ((seg = AS_SEGFIRST(as)) == NULL) { 2157 AS_LOCK_EXIT(as); 2158 return (0); 2159 } 2160 size = prpdsize(as); 2161 if (uiop->uio_resid < size) { 2162 AS_LOCK_EXIT(as); 2163 return (E2BIG); 2164 } 2165 2166 buf = kmem_zalloc(size, KM_SLEEP); 2167 php = (prpageheader_t *)buf; 2168 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t)); 2169 2170 hrt2ts(gethrtime(), &php->pr_tstamp); 2171 php->pr_nmap = 0; 2172 php->pr_npage = 0; 2173 do { 2174 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2175 caddr_t saddr, naddr; 2176 void *tmp = NULL; 2177 2178 if ((seg->s_flags & S_HOLE) != 0) { 2179 continue; 2180 } 2181 2182 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2183 struct vnode *vp; 2184 struct vattr vattr; 2185 size_t len; 2186 size_t npage; 2187 uint_t prot; 2188 uintptr_t next; 2189 2190 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2191 if ((len = (size_t)(naddr - saddr)) == 0) 2192 continue; 2193 npage = len / PAGESIZE; 2194 next = (uintptr_t)(pmp + 1) + round8(npage); 2195 /* 2196 * It's possible that the address space can change 2197 * subtlely even though we're holding as->a_lock 2198 * due to the nondeterminism of page_exists() in 2199 * the presence of asychronously flushed pages or 2200 * mapped files whose sizes are changing. 2201 * page_exists() may be called indirectly from 2202 * pr_getprot() by a SEGOP_INCORE() routine. 2203 * If this happens we need to make sure we don't 2204 * overrun the buffer whose size we computed based 2205 * on the initial iteration through the segments. 2206 * Once we've detected an overflow, we need to clean 2207 * up the temporary memory allocated in pr_getprot() 2208 * and retry. If there's a pending signal, we return 2209 * EINTR so that this thread can be dislodged if 2210 * a latent bug causes us to spin indefinitely. 2211 */ 2212 if (next > (uintptr_t)buf + size) { 2213 pr_getprot_done(&tmp); 2214 AS_LOCK_EXIT(as); 2215 2216 kmem_free(buf, size); 2217 2218 if (ISSIG(curthread, JUSTLOOKING)) 2219 return (EINTR); 2220 2221 goto again; 2222 } 2223 2224 php->pr_nmap++; 2225 php->pr_npage += npage; 2226 pmp->pr_vaddr = (uintptr_t)saddr; 2227 pmp->pr_npage = npage; 2228 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2229 pmp->pr_mflags = 0; 2230 if (prot & PROT_READ) 2231 pmp->pr_mflags |= MA_READ; 2232 if (prot & PROT_WRITE) 2233 pmp->pr_mflags |= MA_WRITE; 2234 if (prot & PROT_EXEC) 2235 pmp->pr_mflags |= MA_EXEC; 2236 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2237 pmp->pr_mflags |= MA_SHARED; 2238 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2239 pmp->pr_mflags |= MA_NORESERVE; 2240 if (seg->s_ops == &segspt_shmops || 2241 (seg->s_ops == &segvn_ops && 2242 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2243 pmp->pr_mflags |= MA_ANON; 2244 if (seg->s_ops == &segspt_shmops) 2245 pmp->pr_mflags |= MA_ISM | MA_SHM; 2246 pmp->pr_pagesize = PAGESIZE; 2247 /* 2248 * Manufacture a filename for the "object" directory. 2249 */ 2250 vattr.va_mask = AT_FSID|AT_NODEID; 2251 if (seg->s_ops == &segvn_ops && 2252 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2253 vp != NULL && vp->v_type == VREG && 2254 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2255 if (vp == p->p_exec) 2256 (void) strcpy(pmp->pr_mapname, "a.out"); 2257 else 2258 pr_object_name(pmp->pr_mapname, 2259 vp, &vattr); 2260 } 2261 2262 /* 2263 * Get the SysV shared memory id, if any. 2264 */ 2265 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2266 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2267 SHMID_NONE) { 2268 if (pmp->pr_shmid == SHMID_FREE) 2269 pmp->pr_shmid = -1; 2270 2271 pmp->pr_mflags |= MA_SHM; 2272 } else { 2273 pmp->pr_shmid = -1; 2274 } 2275 2276 hat_getstat(as, saddr, len, hatid, 2277 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2278 pmp = (prasmap_t *)next; 2279 } 2280 ASSERT(tmp == NULL); 2281 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2282 2283 AS_LOCK_EXIT(as); 2284 2285 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2286 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2287 kmem_free(buf, size); 2288 2289 return (error); 2290 } 2291 2292 #ifdef _SYSCALL32_IMPL 2293 int 2294 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop) 2295 { 2296 struct as *as = p->p_as; 2297 caddr_t buf; 2298 size_t size; 2299 prpageheader32_t *php; 2300 prasmap32_t *pmp; 2301 struct seg *seg; 2302 int error; 2303 2304 again: 2305 AS_LOCK_ENTER(as, RW_WRITER); 2306 2307 if ((seg = AS_SEGFIRST(as)) == NULL) { 2308 AS_LOCK_EXIT(as); 2309 return (0); 2310 } 2311 size = prpdsize32(as); 2312 if (uiop->uio_resid < size) { 2313 AS_LOCK_EXIT(as); 2314 return (E2BIG); 2315 } 2316 2317 buf = kmem_zalloc(size, KM_SLEEP); 2318 php = (prpageheader32_t *)buf; 2319 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t)); 2320 2321 hrt2ts32(gethrtime(), &php->pr_tstamp); 2322 php->pr_nmap = 0; 2323 php->pr_npage = 0; 2324 do { 2325 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2326 caddr_t saddr, naddr; 2327 void *tmp = NULL; 2328 2329 if ((seg->s_flags & S_HOLE) != 0) { 2330 continue; 2331 } 2332 2333 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2334 struct vnode *vp; 2335 struct vattr vattr; 2336 size_t len; 2337 size_t npage; 2338 uint_t prot; 2339 uintptr_t next; 2340 2341 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2342 if ((len = (size_t)(naddr - saddr)) == 0) 2343 continue; 2344 npage = len / PAGESIZE; 2345 next = (uintptr_t)(pmp + 1) + round8(npage); 2346 /* 2347 * It's possible that the address space can change 2348 * subtlely even though we're holding as->a_lock 2349 * due to the nondeterminism of page_exists() in 2350 * the presence of asychronously flushed pages or 2351 * mapped files whose sizes are changing. 2352 * page_exists() may be called indirectly from 2353 * pr_getprot() by a SEGOP_INCORE() routine. 2354 * If this happens we need to make sure we don't 2355 * overrun the buffer whose size we computed based 2356 * on the initial iteration through the segments. 2357 * Once we've detected an overflow, we need to clean 2358 * up the temporary memory allocated in pr_getprot() 2359 * and retry. If there's a pending signal, we return 2360 * EINTR so that this thread can be dislodged if 2361 * a latent bug causes us to spin indefinitely. 2362 */ 2363 if (next > (uintptr_t)buf + size) { 2364 pr_getprot_done(&tmp); 2365 AS_LOCK_EXIT(as); 2366 2367 kmem_free(buf, size); 2368 2369 if (ISSIG(curthread, JUSTLOOKING)) 2370 return (EINTR); 2371 2372 goto again; 2373 } 2374 2375 php->pr_nmap++; 2376 php->pr_npage += npage; 2377 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 2378 pmp->pr_npage = (size32_t)npage; 2379 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2380 pmp->pr_mflags = 0; 2381 if (prot & PROT_READ) 2382 pmp->pr_mflags |= MA_READ; 2383 if (prot & PROT_WRITE) 2384 pmp->pr_mflags |= MA_WRITE; 2385 if (prot & PROT_EXEC) 2386 pmp->pr_mflags |= MA_EXEC; 2387 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2388 pmp->pr_mflags |= MA_SHARED; 2389 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2390 pmp->pr_mflags |= MA_NORESERVE; 2391 if (seg->s_ops == &segspt_shmops || 2392 (seg->s_ops == &segvn_ops && 2393 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2394 pmp->pr_mflags |= MA_ANON; 2395 if (seg->s_ops == &segspt_shmops) 2396 pmp->pr_mflags |= MA_ISM | MA_SHM; 2397 pmp->pr_pagesize = PAGESIZE; 2398 /* 2399 * Manufacture a filename for the "object" directory. 2400 */ 2401 vattr.va_mask = AT_FSID|AT_NODEID; 2402 if (seg->s_ops == &segvn_ops && 2403 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2404 vp != NULL && vp->v_type == VREG && 2405 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2406 if (vp == p->p_exec) 2407 (void) strcpy(pmp->pr_mapname, "a.out"); 2408 else 2409 pr_object_name(pmp->pr_mapname, 2410 vp, &vattr); 2411 } 2412 2413 /* 2414 * Get the SysV shared memory id, if any. 2415 */ 2416 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2417 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2418 SHMID_NONE) { 2419 if (pmp->pr_shmid == SHMID_FREE) 2420 pmp->pr_shmid = -1; 2421 2422 pmp->pr_mflags |= MA_SHM; 2423 } else { 2424 pmp->pr_shmid = -1; 2425 } 2426 2427 hat_getstat(as, saddr, len, hatid, 2428 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2429 pmp = (prasmap32_t *)next; 2430 } 2431 ASSERT(tmp == NULL); 2432 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2433 2434 AS_LOCK_EXIT(as); 2435 2436 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2437 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2438 kmem_free(buf, size); 2439 2440 return (error); 2441 } 2442 #endif /* _SYSCALL32_IMPL */ 2443 2444 ushort_t 2445 prgetpctcpu(uint64_t pct) 2446 { 2447 /* 2448 * The value returned will be relevant in the zone of the examiner, 2449 * which may not be the same as the zone which performed the procfs 2450 * mount. 2451 */ 2452 int nonline = zone_ncpus_online_get(curproc->p_zone); 2453 2454 /* 2455 * Prorate over online cpus so we don't exceed 100% 2456 */ 2457 if (nonline > 1) 2458 pct /= nonline; 2459 pct >>= 16; /* convert to 16-bit scaled integer */ 2460 if (pct > 0x8000) /* might happen, due to rounding */ 2461 pct = 0x8000; 2462 return ((ushort_t)pct); 2463 } 2464 2465 /* 2466 * Return information used by ps(1). 2467 */ 2468 void 2469 prgetpsinfo(proc_t *p, psinfo_t *psp) 2470 { 2471 kthread_t *t; 2472 struct cred *cred; 2473 hrtime_t hrutime, hrstime; 2474 2475 ASSERT(MUTEX_HELD(&p->p_lock)); 2476 2477 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2478 bzero(psp, sizeof (*psp)); 2479 else { 2480 thread_unlock(t); 2481 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2482 } 2483 2484 /* 2485 * only export SSYS and SMSACCT; everything else is off-limits to 2486 * userland apps. 2487 */ 2488 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2489 psp->pr_nlwp = p->p_lwpcnt; 2490 psp->pr_nzomb = p->p_zombcnt; 2491 mutex_enter(&p->p_crlock); 2492 cred = p->p_cred; 2493 psp->pr_uid = crgetruid(cred); 2494 psp->pr_euid = crgetuid(cred); 2495 psp->pr_gid = crgetrgid(cred); 2496 psp->pr_egid = crgetgid(cred); 2497 mutex_exit(&p->p_crlock); 2498 psp->pr_pid = p->p_pid; 2499 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2500 (p->p_flag & SZONETOP)) { 2501 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2502 /* 2503 * Inside local zones, fake zsched's pid as parent pids for 2504 * processes which reference processes outside of the zone. 2505 */ 2506 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2507 } else { 2508 psp->pr_ppid = p->p_ppid; 2509 } 2510 psp->pr_pgid = p->p_pgrp; 2511 psp->pr_sid = p->p_sessp->s_sid; 2512 psp->pr_taskid = p->p_task->tk_tkid; 2513 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2514 psp->pr_poolid = p->p_pool->pool_id; 2515 psp->pr_zoneid = p->p_zone->zone_id; 2516 if ((psp->pr_contract = PRCTID(p)) == 0) 2517 psp->pr_contract = -1; 2518 psp->pr_addr = (uintptr_t)prgetpsaddr(p); 2519 switch (p->p_model) { 2520 case DATAMODEL_ILP32: 2521 psp->pr_dmodel = PR_MODEL_ILP32; 2522 break; 2523 case DATAMODEL_LP64: 2524 psp->pr_dmodel = PR_MODEL_LP64; 2525 break; 2526 } 2527 hrutime = mstate_aggr_state(p, LMS_USER); 2528 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2529 hrt2ts((hrutime + hrstime), &psp->pr_time); 2530 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2531 2532 if (t == NULL) { 2533 int wcode = p->p_wcode; /* must be atomic read */ 2534 2535 if (wcode) 2536 psp->pr_wstat = wstat(wcode, p->p_wdata); 2537 psp->pr_ttydev = PRNODEV; 2538 psp->pr_lwp.pr_state = SZOMB; 2539 psp->pr_lwp.pr_sname = 'Z'; 2540 psp->pr_lwp.pr_bindpro = PBIND_NONE; 2541 psp->pr_lwp.pr_bindpset = PS_NONE; 2542 } else { 2543 user_t *up = PTOU(p); 2544 struct as *as; 2545 dev_t d; 2546 extern dev_t rwsconsdev, rconsdev, uconsdev; 2547 2548 d = cttydev(p); 2549 /* 2550 * If the controlling terminal is the real 2551 * or workstation console device, map to what the 2552 * user thinks is the console device. Handle case when 2553 * rwsconsdev or rconsdev is set to NODEV for Starfire. 2554 */ 2555 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 2556 d = uconsdev; 2557 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d; 2558 psp->pr_start = up->u_start; 2559 bcopy(up->u_comm, psp->pr_fname, 2560 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2561 bcopy(up->u_psargs, psp->pr_psargs, 2562 MIN(PRARGSZ-1, PSARGSZ)); 2563 psp->pr_argc = up->u_argc; 2564 psp->pr_argv = up->u_argv; 2565 psp->pr_envp = up->u_envp; 2566 2567 /* get the chosen lwp's lwpsinfo */ 2568 prgetlwpsinfo(t, &psp->pr_lwp); 2569 2570 /* compute %cpu for the process */ 2571 if (p->p_lwpcnt == 1) 2572 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2573 else { 2574 uint64_t pct = 0; 2575 hrtime_t cur_time = gethrtime_unscaled(); 2576 2577 t = p->p_tlist; 2578 do { 2579 pct += cpu_update_pct(t, cur_time); 2580 } while ((t = t->t_forw) != p->p_tlist); 2581 2582 psp->pr_pctcpu = prgetpctcpu(pct); 2583 } 2584 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2585 psp->pr_size = 0; 2586 psp->pr_rssize = 0; 2587 } else { 2588 mutex_exit(&p->p_lock); 2589 AS_LOCK_ENTER(as, RW_READER); 2590 psp->pr_size = btopr(as->a_resvsize) * 2591 (PAGESIZE / 1024); 2592 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024); 2593 psp->pr_pctmem = rm_pctmemory(as); 2594 AS_LOCK_EXIT(as); 2595 mutex_enter(&p->p_lock); 2596 } 2597 } 2598 } 2599 2600 static size_t 2601 prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen) 2602 { 2603 pr_misc_header_t *misc; 2604 size_t len; 2605 2606 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen); 2607 2608 if (data != NULL) { 2609 misc = pr_iol_newbuf(data, len); 2610 misc->pr_misc_type = type; 2611 misc->pr_misc_size = len; 2612 misc++; 2613 bcopy((char *)val, (char *)misc, vlen); 2614 } 2615 2616 return (len); 2617 } 2618 2619 /* 2620 * There's no elegant way to determine if a character device 2621 * supports TLI, so just check a hardcoded list of known TLI 2622 * devices. 2623 */ 2624 2625 static boolean_t 2626 pristli(vnode_t *vp) 2627 { 2628 static const char *tlidevs[] = { 2629 "udp", "udp6", "tcp", "tcp6" 2630 }; 2631 char *devname; 2632 uint_t i; 2633 2634 ASSERT(vp != NULL); 2635 2636 if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0) 2637 return (B_FALSE); 2638 2639 if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL) 2640 return (B_FALSE); 2641 2642 for (i = 0; i < ARRAY_SIZE(tlidevs); i++) { 2643 if (strcmp(devname, tlidevs[i]) == 0) 2644 return (B_TRUE); 2645 } 2646 2647 return (B_FALSE); 2648 } 2649 2650 static size_t 2651 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred) 2652 { 2653 char *pathname; 2654 size_t pathlen; 2655 size_t sz = 0; 2656 2657 /* 2658 * The global zone's path to a file in a non-global zone can exceed 2659 * MAXPATHLEN. 2660 */ 2661 pathlen = MAXPATHLEN * 2 + 1; 2662 pathname = kmem_alloc(pathlen, KM_SLEEP); 2663 2664 if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) { 2665 sz += prfdinfomisc(data, PR_PATHNAME, 2666 pathname, strlen(pathname) + 1); 2667 } 2668 2669 kmem_free(pathname, pathlen); 2670 2671 return (sz); 2672 } 2673 2674 static size_t 2675 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred) 2676 { 2677 strcmd_t strcmd; 2678 int32_t rval; 2679 size_t sz = 0; 2680 2681 strcmd.sc_cmd = TI_GETMYNAME; 2682 strcmd.sc_timeout = 1; 2683 strcmd.sc_len = STRCMDBUFSIZE; 2684 2685 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred, 2686 &rval, NULL) == 0 && strcmd.sc_len > 0) { 2687 sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf, 2688 strcmd.sc_len); 2689 } 2690 2691 strcmd.sc_cmd = TI_GETPEERNAME; 2692 strcmd.sc_timeout = 1; 2693 strcmd.sc_len = STRCMDBUFSIZE; 2694 2695 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred, 2696 &rval, NULL) == 0 && strcmd.sc_len > 0) { 2697 sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf, 2698 strcmd.sc_len); 2699 } 2700 2701 return (sz); 2702 } 2703 2704 static size_t 2705 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred) 2706 { 2707 sonode_t *so; 2708 socklen_t vlen; 2709 size_t sz = 0; 2710 uint_t i; 2711 2712 if (vp->v_stream != NULL) { 2713 so = VTOSO(vp->v_stream->sd_vnode); 2714 2715 if (so->so_version == SOV_STREAM) 2716 so = NULL; 2717 } else { 2718 so = VTOSO(vp); 2719 } 2720 2721 if (so == NULL) 2722 return (0); 2723 2724 DTRACE_PROBE1(sonode, sonode_t *, so); 2725 2726 /* prmisc - PR_SOCKETNAME */ 2727 2728 struct sockaddr_storage buf; 2729 struct sockaddr *name = (struct sockaddr *)&buf; 2730 2731 vlen = sizeof (buf); 2732 if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0) 2733 sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen); 2734 2735 /* prmisc - PR_PEERSOCKNAME */ 2736 2737 vlen = sizeof (buf); 2738 if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0) 2739 sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen); 2740 2741 /* prmisc - PR_SOCKOPTS_BOOL_OPTS */ 2742 2743 static struct boolopt { 2744 int level; 2745 int opt; 2746 int bopt; 2747 } boolopts[] = { 2748 { SOL_SOCKET, SO_DEBUG, PR_SO_DEBUG }, 2749 { SOL_SOCKET, SO_REUSEADDR, PR_SO_REUSEADDR }, 2750 #ifdef SO_REUSEPORT 2751 /* SmartOS and OmniOS have SO_REUSEPORT */ 2752 { SOL_SOCKET, SO_REUSEPORT, PR_SO_REUSEPORT }, 2753 #endif 2754 { SOL_SOCKET, SO_KEEPALIVE, PR_SO_KEEPALIVE }, 2755 { SOL_SOCKET, SO_DONTROUTE, PR_SO_DONTROUTE }, 2756 { SOL_SOCKET, SO_BROADCAST, PR_SO_BROADCAST }, 2757 { SOL_SOCKET, SO_OOBINLINE, PR_SO_OOBINLINE }, 2758 { SOL_SOCKET, SO_DGRAM_ERRIND, PR_SO_DGRAM_ERRIND }, 2759 { SOL_SOCKET, SO_ALLZONES, PR_SO_ALLZONES }, 2760 { SOL_SOCKET, SO_MAC_EXEMPT, PR_SO_MAC_EXEMPT }, 2761 { SOL_SOCKET, SO_MAC_IMPLICIT, PR_SO_MAC_IMPLICIT }, 2762 { SOL_SOCKET, SO_EXCLBIND, PR_SO_EXCLBIND }, 2763 { SOL_SOCKET, SO_VRRP, PR_SO_VRRP }, 2764 { IPPROTO_UDP, UDP_NAT_T_ENDPOINT, 2765 PR_UDP_NAT_T_ENDPOINT } 2766 }; 2767 prsockopts_bool_opts_t opts; 2768 int val; 2769 2770 if (data != NULL) { 2771 opts.prsock_bool_opts = 0; 2772 2773 for (i = 0; i < ARRAY_SIZE(boolopts); i++) { 2774 vlen = sizeof (val); 2775 if (SOP_GETSOCKOPT(so, boolopts[i].level, 2776 boolopts[i].opt, &val, &vlen, 0, cred) == 0 && 2777 val != 0) { 2778 opts.prsock_bool_opts |= boolopts[i].bopt; 2779 } 2780 } 2781 } 2782 2783 sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts)); 2784 2785 /* prmisc - PR_SOCKOPT_LINGER */ 2786 2787 struct linger l; 2788 2789 vlen = sizeof (l); 2790 if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen, 2791 0, cred) == 0 && vlen > 0) { 2792 sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen); 2793 } 2794 2795 /* prmisc - PR_SOCKOPT_* int types */ 2796 2797 static struct sopt { 2798 int level; 2799 int opt; 2800 int bopt; 2801 } sopts[] = { 2802 { SOL_SOCKET, SO_TYPE, PR_SOCKOPT_TYPE }, 2803 { SOL_SOCKET, SO_SNDBUF, PR_SOCKOPT_SNDBUF }, 2804 { SOL_SOCKET, SO_RCVBUF, PR_SOCKOPT_RCVBUF } 2805 }; 2806 2807 for (i = 0; i < ARRAY_SIZE(sopts); i++) { 2808 vlen = sizeof (val); 2809 if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt, 2810 &val, &vlen, 0, cred) == 0 && vlen > 0) { 2811 sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen); 2812 } 2813 } 2814 2815 /* prmisc - PR_SOCKOPT_IP_NEXTHOP */ 2816 2817 in_addr_t nexthop_val; 2818 2819 vlen = sizeof (nexthop_val); 2820 if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP, 2821 &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) { 2822 sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP, 2823 &nexthop_val, vlen); 2824 } 2825 2826 /* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */ 2827 2828 struct sockaddr_in6 nexthop6_val; 2829 2830 vlen = sizeof (nexthop6_val); 2831 if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP, 2832 &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) { 2833 sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP, 2834 &nexthop6_val, vlen); 2835 } 2836 2837 /* prmisc - PR_SOCKOPT_TCP_CONGESTION */ 2838 2839 char cong[CC_ALGO_NAME_MAX]; 2840 2841 vlen = sizeof (cong); 2842 if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION, 2843 &cong, &vlen, 0, cred) == 0 && vlen > 0) { 2844 sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen); 2845 } 2846 2847 /* prmisc - PR_SOCKFILTERS_PRIV */ 2848 2849 struct fil_info fi; 2850 2851 vlen = sizeof (fi); 2852 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST, 2853 &fi, &vlen, 0, cred) == 0 && vlen != 0) { 2854 pr_misc_header_t *misc; 2855 size_t len; 2856 2857 /* 2858 * We limit the number of returned filters to 32. 2859 * This is the maximum number that pfiles will print 2860 * anyway. 2861 */ 2862 vlen = MIN(32, fi.fi_pos + 1); 2863 vlen *= sizeof (fi); 2864 2865 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen); 2866 sz += len; 2867 2868 if (data != NULL) { 2869 /* 2870 * So that the filter list can be built incrementally, 2871 * prfdinfomisc() is not used here. Instead we 2872 * allocate a buffer directly on the copyout list using 2873 * pr_iol_newbuf() 2874 */ 2875 misc = pr_iol_newbuf(data, len); 2876 misc->pr_misc_type = PR_SOCKFILTERS_PRIV; 2877 misc->pr_misc_size = len; 2878 misc++; 2879 len = vlen; 2880 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST, 2881 misc, &vlen, 0, cred) == 0) { 2882 /* 2883 * In case the number of filters has reduced 2884 * since the first call, explicitly zero out 2885 * any unpopulated space. 2886 */ 2887 if (vlen < len) 2888 bzero(misc + vlen, len - vlen); 2889 } else { 2890 /* Something went wrong, zero out the result */ 2891 bzero(misc, vlen); 2892 } 2893 } 2894 } 2895 2896 return (sz); 2897 } 2898 2899 typedef struct prfdinfo_nm_path_cbdata { 2900 proc_t *nmp_p; 2901 u_offset_t nmp_sz; 2902 list_t *nmp_data; 2903 } prfdinfo_nm_path_cbdata_t; 2904 2905 static int 2906 prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg) 2907 { 2908 prfdinfo_nm_path_cbdata_t *cb = arg; 2909 2910 cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred); 2911 2912 return (0); 2913 } 2914 2915 u_offset_t 2916 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred) 2917 { 2918 u_offset_t sz; 2919 2920 /* 2921 * All fdinfo files will be at least this big - 2922 * sizeof fdinfo struct + zero length trailer 2923 */ 2924 sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t); 2925 2926 /* Pathname */ 2927 switch (vp->v_type) { 2928 case VDOOR: { 2929 prfdinfo_nm_path_cbdata_t cb = { 2930 .nmp_p = p, 2931 .nmp_data = NULL, 2932 .nmp_sz = 0 2933 }; 2934 2935 (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); 2936 sz += cb.nmp_sz; 2937 break; 2938 } 2939 case VSOCK: 2940 break; 2941 default: 2942 sz += prfdinfopath(p, vp, NULL, cred); 2943 } 2944 2945 /* Socket options */ 2946 if (vp->v_type == VSOCK) 2947 sz += prfdinfosockopt(vp, NULL, cred); 2948 2949 /* TLI/XTI sockets */ 2950 if (pristli(vp)) 2951 sz += prfdinfotlisockopt(vp, NULL, cred); 2952 2953 return (sz); 2954 } 2955 2956 int 2957 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred, 2958 cred_t *file_cred, list_t *data) 2959 { 2960 vattr_t vattr; 2961 int error; 2962 2963 /* 2964 * The buffer has been initialised to zero by pr_iol_newbuf(). 2965 * Initialise defaults for any values that should not default to zero. 2966 */ 2967 fdinfo->pr_uid = (uid_t)-1; 2968 fdinfo->pr_gid = (gid_t)-1; 2969 fdinfo->pr_size = -1; 2970 fdinfo->pr_locktype = F_UNLCK; 2971 fdinfo->pr_lockpid = -1; 2972 fdinfo->pr_locksysid = -1; 2973 fdinfo->pr_peerpid = -1; 2974 2975 /* Offset */ 2976 2977 /* 2978 * pr_offset has already been set from the underlying file_t. 2979 * Check if it is plausible and reset to -1 if not. 2980 */ 2981 if (fdinfo->pr_offset != -1 && 2982 VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0) 2983 fdinfo->pr_offset = -1; 2984 2985 /* 2986 * Attributes 2987 * 2988 * We have two cred_t structures available here. 2989 * 'cred' is the caller's credential, and 'file_cred' is the credential 2990 * for the file being inspected. 2991 * 2992 * When looking up the file attributes, file_cred is used in order 2993 * that the correct ownership is set for doors and FIFOs. Since the 2994 * caller has permission to read the fdinfo file in proc, this does 2995 * not expose any additional information. 2996 */ 2997 vattr.va_mask = AT_STAT; 2998 if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) { 2999 fdinfo->pr_major = getmajor(vattr.va_fsid); 3000 fdinfo->pr_minor = getminor(vattr.va_fsid); 3001 fdinfo->pr_rmajor = getmajor(vattr.va_rdev); 3002 fdinfo->pr_rminor = getminor(vattr.va_rdev); 3003 fdinfo->pr_ino = (ino64_t)vattr.va_nodeid; 3004 fdinfo->pr_size = (off64_t)vattr.va_size; 3005 fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode; 3006 fdinfo->pr_uid = vattr.va_uid; 3007 fdinfo->pr_gid = vattr.va_gid; 3008 if (vp->v_type == VSOCK) 3009 fdinfo->pr_fileflags |= sock_getfasync(vp); 3010 } 3011 3012 /* locks */ 3013 3014 flock64_t bf; 3015 3016 bzero(&bf, sizeof (bf)); 3017 bf.l_type = F_WRLCK; 3018 3019 if (VOP_FRLOCK(vp, F_GETLK, &bf, 3020 (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL, 3021 cred, NULL) == 0 && bf.l_type != F_UNLCK) { 3022 fdinfo->pr_locktype = bf.l_type; 3023 fdinfo->pr_lockpid = bf.l_pid; 3024 fdinfo->pr_locksysid = bf.l_sysid; 3025 } 3026 3027 /* peer cred */ 3028 3029 k_peercred_t kpc; 3030 3031 switch (vp->v_type) { 3032 case VFIFO: 3033 case VSOCK: { 3034 int32_t rval; 3035 3036 error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc, 3037 FKIOCTL, cred, &rval, NULL); 3038 break; 3039 } 3040 case VCHR: { 3041 struct strioctl strioc; 3042 int32_t rval; 3043 3044 if (vp->v_stream == NULL) { 3045 error = ENOTSUP; 3046 break; 3047 } 3048 strioc.ic_cmd = _I_GETPEERCRED; 3049 strioc.ic_timout = INFTIM; 3050 strioc.ic_len = (int)sizeof (k_peercred_t); 3051 strioc.ic_dp = (char *)&kpc; 3052 3053 error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL, 3054 STR_NOSIG | K_TO_K, cred, &rval); 3055 break; 3056 } 3057 default: 3058 error = ENOTSUP; 3059 break; 3060 } 3061 3062 if (error == 0 && kpc.pc_cr != NULL) { 3063 proc_t *peerp; 3064 3065 fdinfo->pr_peerpid = kpc.pc_cpid; 3066 3067 crfree(kpc.pc_cr); 3068 3069 mutex_enter(&pidlock); 3070 if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) { 3071 user_t *up; 3072 3073 mutex_enter(&peerp->p_lock); 3074 mutex_exit(&pidlock); 3075 3076 up = PTOU(peerp); 3077 bcopy(up->u_comm, fdinfo->pr_peername, 3078 MIN(sizeof (up->u_comm), 3079 sizeof (fdinfo->pr_peername) - 1)); 3080 3081 mutex_exit(&peerp->p_lock); 3082 } else { 3083 mutex_exit(&pidlock); 3084 } 3085 } 3086 3087 /* pathname */ 3088 3089 switch (vp->v_type) { 3090 case VDOOR: { 3091 prfdinfo_nm_path_cbdata_t cb = { 3092 .nmp_p = p, 3093 .nmp_data = data, 3094 .nmp_sz = 0 3095 }; 3096 3097 (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); 3098 break; 3099 } 3100 case VSOCK: 3101 /* 3102 * Don't attempt to determine the path for a socket as the 3103 * vnode has no associated v_path. It will cause a linear scan 3104 * of the dnlc table and result in no path being found. 3105 */ 3106 break; 3107 default: 3108 (void) prfdinfopath(p, vp, data, cred); 3109 } 3110 3111 /* socket options */ 3112 if (vp->v_type == VSOCK) 3113 (void) prfdinfosockopt(vp, data, cred); 3114 3115 /* TLI/XTI stream sockets */ 3116 if (pristli(vp)) 3117 (void) prfdinfotlisockopt(vp, data, cred); 3118 3119 /* 3120 * Add a terminating header with a zero size. 3121 */ 3122 pr_misc_header_t *misc; 3123 3124 misc = pr_iol_newbuf(data, sizeof (*misc)); 3125 misc->pr_misc_size = 0; 3126 misc->pr_misc_type = (uint_t)-1; 3127 3128 return (0); 3129 } 3130 3131 #ifdef _SYSCALL32_IMPL 3132 void 3133 prgetpsinfo32(proc_t *p, psinfo32_t *psp) 3134 { 3135 kthread_t *t; 3136 struct cred *cred; 3137 hrtime_t hrutime, hrstime; 3138 3139 ASSERT(MUTEX_HELD(&p->p_lock)); 3140 3141 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 3142 bzero(psp, sizeof (*psp)); 3143 else { 3144 thread_unlock(t); 3145 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 3146 } 3147 3148 /* 3149 * only export SSYS and SMSACCT; everything else is off-limits to 3150 * userland apps. 3151 */ 3152 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 3153 psp->pr_nlwp = p->p_lwpcnt; 3154 psp->pr_nzomb = p->p_zombcnt; 3155 mutex_enter(&p->p_crlock); 3156 cred = p->p_cred; 3157 psp->pr_uid = crgetruid(cred); 3158 psp->pr_euid = crgetuid(cred); 3159 psp->pr_gid = crgetrgid(cred); 3160 psp->pr_egid = crgetgid(cred); 3161 mutex_exit(&p->p_crlock); 3162 psp->pr_pid = p->p_pid; 3163 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 3164 (p->p_flag & SZONETOP)) { 3165 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 3166 /* 3167 * Inside local zones, fake zsched's pid as parent pids for 3168 * processes which reference processes outside of the zone. 3169 */ 3170 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 3171 } else { 3172 psp->pr_ppid = p->p_ppid; 3173 } 3174 psp->pr_pgid = p->p_pgrp; 3175 psp->pr_sid = p->p_sessp->s_sid; 3176 psp->pr_taskid = p->p_task->tk_tkid; 3177 psp->pr_projid = p->p_task->tk_proj->kpj_id; 3178 psp->pr_poolid = p->p_pool->pool_id; 3179 psp->pr_zoneid = p->p_zone->zone_id; 3180 if ((psp->pr_contract = PRCTID(p)) == 0) 3181 psp->pr_contract = -1; 3182 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 3183 switch (p->p_model) { 3184 case DATAMODEL_ILP32: 3185 psp->pr_dmodel = PR_MODEL_ILP32; 3186 break; 3187 case DATAMODEL_LP64: 3188 psp->pr_dmodel = PR_MODEL_LP64; 3189 break; 3190 } 3191 hrutime = mstate_aggr_state(p, LMS_USER); 3192 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 3193 hrt2ts32(hrutime + hrstime, &psp->pr_time); 3194 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime); 3195 3196 if (t == NULL) { 3197 extern int wstat(int, int); /* needs a header file */ 3198 int wcode = p->p_wcode; /* must be atomic read */ 3199 3200 if (wcode) 3201 psp->pr_wstat = wstat(wcode, p->p_wdata); 3202 psp->pr_ttydev = PRNODEV32; 3203 psp->pr_lwp.pr_state = SZOMB; 3204 psp->pr_lwp.pr_sname = 'Z'; 3205 } else { 3206 user_t *up = PTOU(p); 3207 struct as *as; 3208 dev_t d; 3209 extern dev_t rwsconsdev, rconsdev, uconsdev; 3210 3211 d = cttydev(p); 3212 /* 3213 * If the controlling terminal is the real 3214 * or workstation console device, map to what the 3215 * user thinks is the console device. Handle case when 3216 * rwsconsdev or rconsdev is set to NODEV for Starfire. 3217 */ 3218 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 3219 d = uconsdev; 3220 (void) cmpldev(&psp->pr_ttydev, d); 3221 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start); 3222 bcopy(up->u_comm, psp->pr_fname, 3223 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 3224 bcopy(up->u_psargs, psp->pr_psargs, 3225 MIN(PRARGSZ-1, PSARGSZ)); 3226 psp->pr_argc = up->u_argc; 3227 psp->pr_argv = (caddr32_t)up->u_argv; 3228 psp->pr_envp = (caddr32_t)up->u_envp; 3229 3230 /* get the chosen lwp's lwpsinfo */ 3231 prgetlwpsinfo32(t, &psp->pr_lwp); 3232 3233 /* compute %cpu for the process */ 3234 if (p->p_lwpcnt == 1) 3235 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 3236 else { 3237 uint64_t pct = 0; 3238 hrtime_t cur_time; 3239 3240 t = p->p_tlist; 3241 cur_time = gethrtime_unscaled(); 3242 do { 3243 pct += cpu_update_pct(t, cur_time); 3244 } while ((t = t->t_forw) != p->p_tlist); 3245 3246 psp->pr_pctcpu = prgetpctcpu(pct); 3247 } 3248 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 3249 psp->pr_size = 0; 3250 psp->pr_rssize = 0; 3251 } else { 3252 mutex_exit(&p->p_lock); 3253 AS_LOCK_ENTER(as, RW_READER); 3254 psp->pr_size = (size32_t) 3255 (btopr(as->a_resvsize) * (PAGESIZE / 1024)); 3256 psp->pr_rssize = (size32_t) 3257 (rm_asrss(as) * (PAGESIZE / 1024)); 3258 psp->pr_pctmem = rm_pctmemory(as); 3259 AS_LOCK_EXIT(as); 3260 mutex_enter(&p->p_lock); 3261 } 3262 } 3263 3264 /* 3265 * If we are looking at an LP64 process, zero out 3266 * the fields that cannot be represented in ILP32. 3267 */ 3268 if (p->p_model != DATAMODEL_ILP32) { 3269 psp->pr_size = 0; 3270 psp->pr_rssize = 0; 3271 psp->pr_argv = 0; 3272 psp->pr_envp = 0; 3273 } 3274 } 3275 3276 #endif /* _SYSCALL32_IMPL */ 3277 3278 void 3279 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp) 3280 { 3281 klwp_t *lwp = ttolwp(t); 3282 sobj_ops_t *sobj; 3283 char c, state; 3284 uint64_t pct; 3285 int retval, niceval; 3286 hrtime_t hrutime, hrstime; 3287 3288 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 3289 3290 bzero(psp, sizeof (*psp)); 3291 3292 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 3293 psp->pr_lwpid = t->t_tid; 3294 psp->pr_addr = (uintptr_t)t; 3295 psp->pr_wchan = (uintptr_t)t->t_wchan; 3296 3297 /* map the thread state enum into a process state enum */ 3298 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 3299 switch (state) { 3300 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 3301 case TS_RUN: state = SRUN; c = 'R'; break; 3302 case TS_ONPROC: state = SONPROC; c = 'O'; break; 3303 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 3304 case TS_STOPPED: state = SSTOP; c = 'T'; break; 3305 case TS_WAIT: state = SWAIT; c = 'W'; break; 3306 default: state = 0; c = '?'; break; 3307 } 3308 psp->pr_state = state; 3309 psp->pr_sname = c; 3310 if ((sobj = t->t_sobj_ops) != NULL) 3311 psp->pr_stype = SOBJ_TYPE(sobj); 3312 retval = CL_DONICE(t, NULL, 0, &niceval); 3313 if (retval == 0) { 3314 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 3315 psp->pr_nice = niceval + NZERO; 3316 } 3317 psp->pr_syscall = t->t_sysnum; 3318 psp->pr_pri = t->t_pri; 3319 psp->pr_start.tv_sec = t->t_start; 3320 psp->pr_start.tv_nsec = 0L; 3321 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 3322 scalehrtime(&hrutime); 3323 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 3324 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 3325 scalehrtime(&hrstime); 3326 hrt2ts(hrutime + hrstime, &psp->pr_time); 3327 /* compute %cpu for the lwp */ 3328 pct = cpu_update_pct(t, gethrtime_unscaled()); 3329 psp->pr_pctcpu = prgetpctcpu(pct); 3330 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 3331 if (psp->pr_cpu > 99) 3332 psp->pr_cpu = 99; 3333 3334 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 3335 sizeof (psp->pr_clname) - 1); 3336 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 3337 psp->pr_onpro = t->t_cpu->cpu_id; 3338 psp->pr_bindpro = t->t_bind_cpu; 3339 psp->pr_bindpset = t->t_bind_pset; 3340 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 3341 } 3342 3343 #ifdef _SYSCALL32_IMPL 3344 void 3345 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp) 3346 { 3347 klwp_t *lwp = ttolwp(t); 3348 sobj_ops_t *sobj; 3349 char c, state; 3350 uint64_t pct; 3351 int retval, niceval; 3352 hrtime_t hrutime, hrstime; 3353 3354 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 3355 3356 bzero(psp, sizeof (*psp)); 3357 3358 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 3359 psp->pr_lwpid = t->t_tid; 3360 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 3361 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */ 3362 3363 /* map the thread state enum into a process state enum */ 3364 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 3365 switch (state) { 3366 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 3367 case TS_RUN: state = SRUN; c = 'R'; break; 3368 case TS_ONPROC: state = SONPROC; c = 'O'; break; 3369 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 3370 case TS_STOPPED: state = SSTOP; c = 'T'; break; 3371 case TS_WAIT: state = SWAIT; c = 'W'; break; 3372 default: state = 0; c = '?'; break; 3373 } 3374 psp->pr_state = state; 3375 psp->pr_sname = c; 3376 if ((sobj = t->t_sobj_ops) != NULL) 3377 psp->pr_stype = SOBJ_TYPE(sobj); 3378 retval = CL_DONICE(t, NULL, 0, &niceval); 3379 if (retval == 0) { 3380 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 3381 psp->pr_nice = niceval + NZERO; 3382 } else { 3383 psp->pr_oldpri = 0; 3384 psp->pr_nice = 0; 3385 } 3386 psp->pr_syscall = t->t_sysnum; 3387 psp->pr_pri = t->t_pri; 3388 psp->pr_start.tv_sec = (time32_t)t->t_start; 3389 psp->pr_start.tv_nsec = 0L; 3390 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 3391 scalehrtime(&hrutime); 3392 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 3393 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 3394 scalehrtime(&hrstime); 3395 hrt2ts32(hrutime + hrstime, &psp->pr_time); 3396 /* compute %cpu for the lwp */ 3397 pct = cpu_update_pct(t, gethrtime_unscaled()); 3398 psp->pr_pctcpu = prgetpctcpu(pct); 3399 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 3400 if (psp->pr_cpu > 99) 3401 psp->pr_cpu = 99; 3402 3403 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 3404 sizeof (psp->pr_clname) - 1); 3405 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 3406 psp->pr_onpro = t->t_cpu->cpu_id; 3407 psp->pr_bindpro = t->t_bind_cpu; 3408 psp->pr_bindpset = t->t_bind_pset; 3409 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 3410 } 3411 #endif /* _SYSCALL32_IMPL */ 3412 3413 #ifdef _SYSCALL32_IMPL 3414 3415 #define PR_COPY_FIELD(s, d, field) d->field = s->field 3416 3417 #define PR_COPY_FIELD_ILP32(s, d, field) \ 3418 if (s->pr_dmodel == PR_MODEL_ILP32) { \ 3419 d->field = s->field; \ 3420 } 3421 3422 #define PR_COPY_TIMESPEC(s, d, field) \ 3423 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field); 3424 3425 #define PR_COPY_BUF(s, d, field) \ 3426 bcopy(s->field, d->field, sizeof (d->field)); 3427 3428 #define PR_IGNORE_FIELD(s, d, field) 3429 3430 void 3431 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest) 3432 { 3433 bzero(dest, sizeof (*dest)); 3434 3435 PR_COPY_FIELD(src, dest, pr_flag); 3436 PR_COPY_FIELD(src, dest, pr_lwpid); 3437 PR_IGNORE_FIELD(src, dest, pr_addr); 3438 PR_IGNORE_FIELD(src, dest, pr_wchan); 3439 PR_COPY_FIELD(src, dest, pr_stype); 3440 PR_COPY_FIELD(src, dest, pr_state); 3441 PR_COPY_FIELD(src, dest, pr_sname); 3442 PR_COPY_FIELD(src, dest, pr_nice); 3443 PR_COPY_FIELD(src, dest, pr_syscall); 3444 PR_COPY_FIELD(src, dest, pr_oldpri); 3445 PR_COPY_FIELD(src, dest, pr_cpu); 3446 PR_COPY_FIELD(src, dest, pr_pri); 3447 PR_COPY_FIELD(src, dest, pr_pctcpu); 3448 PR_COPY_TIMESPEC(src, dest, pr_start); 3449 PR_COPY_BUF(src, dest, pr_clname); 3450 PR_COPY_BUF(src, dest, pr_name); 3451 PR_COPY_FIELD(src, dest, pr_onpro); 3452 PR_COPY_FIELD(src, dest, pr_bindpro); 3453 PR_COPY_FIELD(src, dest, pr_bindpset); 3454 PR_COPY_FIELD(src, dest, pr_lgrp); 3455 } 3456 3457 void 3458 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest) 3459 { 3460 bzero(dest, sizeof (*dest)); 3461 3462 PR_COPY_FIELD(src, dest, pr_flag); 3463 PR_COPY_FIELD(src, dest, pr_nlwp); 3464 PR_COPY_FIELD(src, dest, pr_pid); 3465 PR_COPY_FIELD(src, dest, pr_ppid); 3466 PR_COPY_FIELD(src, dest, pr_pgid); 3467 PR_COPY_FIELD(src, dest, pr_sid); 3468 PR_COPY_FIELD(src, dest, pr_uid); 3469 PR_COPY_FIELD(src, dest, pr_euid); 3470 PR_COPY_FIELD(src, dest, pr_gid); 3471 PR_COPY_FIELD(src, dest, pr_egid); 3472 PR_IGNORE_FIELD(src, dest, pr_addr); 3473 PR_COPY_FIELD_ILP32(src, dest, pr_size); 3474 PR_COPY_FIELD_ILP32(src, dest, pr_rssize); 3475 PR_COPY_FIELD(src, dest, pr_ttydev); 3476 PR_COPY_FIELD(src, dest, pr_pctcpu); 3477 PR_COPY_FIELD(src, dest, pr_pctmem); 3478 PR_COPY_TIMESPEC(src, dest, pr_start); 3479 PR_COPY_TIMESPEC(src, dest, pr_time); 3480 PR_COPY_TIMESPEC(src, dest, pr_ctime); 3481 PR_COPY_BUF(src, dest, pr_fname); 3482 PR_COPY_BUF(src, dest, pr_psargs); 3483 PR_COPY_FIELD(src, dest, pr_wstat); 3484 PR_COPY_FIELD(src, dest, pr_argc); 3485 PR_COPY_FIELD_ILP32(src, dest, pr_argv); 3486 PR_COPY_FIELD_ILP32(src, dest, pr_envp); 3487 PR_COPY_FIELD(src, dest, pr_dmodel); 3488 PR_COPY_FIELD(src, dest, pr_taskid); 3489 PR_COPY_FIELD(src, dest, pr_projid); 3490 PR_COPY_FIELD(src, dest, pr_nzomb); 3491 PR_COPY_FIELD(src, dest, pr_poolid); 3492 PR_COPY_FIELD(src, dest, pr_contract); 3493 PR_COPY_FIELD(src, dest, pr_poolid); 3494 PR_COPY_FIELD(src, dest, pr_poolid); 3495 3496 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp); 3497 } 3498 3499 #undef PR_COPY_FIELD 3500 #undef PR_COPY_FIELD_ILP32 3501 #undef PR_COPY_TIMESPEC 3502 #undef PR_COPY_BUF 3503 #undef PR_IGNORE_FIELD 3504 3505 #endif /* _SYSCALL32_IMPL */ 3506 3507 /* 3508 * This used to get called when microstate accounting was disabled but 3509 * microstate information was requested. Since Microstate accounting is on 3510 * regardless of the proc flags, this simply makes it appear to procfs that 3511 * microstate accounting is on. This is relatively meaningless since you 3512 * can't turn it off, but this is here for the sake of appearances. 3513 */ 3514 3515 /*ARGSUSED*/ 3516 void 3517 estimate_msacct(kthread_t *t, hrtime_t curtime) 3518 { 3519 proc_t *p; 3520 3521 if (t == NULL) 3522 return; 3523 3524 p = ttoproc(t); 3525 ASSERT(MUTEX_HELD(&p->p_lock)); 3526 3527 /* 3528 * A system process (p0) could be referenced if the thread is 3529 * in the process of exiting. Don't turn on microstate accounting 3530 * in that case. 3531 */ 3532 if (p->p_flag & SSYS) 3533 return; 3534 3535 /* 3536 * Loop through all the LWPs (kernel threads) in the process. 3537 */ 3538 t = p->p_tlist; 3539 do { 3540 t->t_proc_flag |= TP_MSACCT; 3541 } while ((t = t->t_forw) != p->p_tlist); 3542 3543 p->p_flag |= SMSACCT; /* set process-wide MSACCT */ 3544 } 3545 3546 /* 3547 * It's not really possible to disable microstate accounting anymore. 3548 * However, this routine simply turns off the ms accounting flags in a process 3549 * This way procfs can still pretend to turn microstate accounting on and 3550 * off for a process, but it actually doesn't do anything. This is 3551 * a neutered form of preemptive idiot-proofing. 3552 */ 3553 void 3554 disable_msacct(proc_t *p) 3555 { 3556 kthread_t *t; 3557 3558 ASSERT(MUTEX_HELD(&p->p_lock)); 3559 3560 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */ 3561 /* 3562 * Loop through all the LWPs (kernel threads) in the process. 3563 */ 3564 if ((t = p->p_tlist) != NULL) { 3565 do { 3566 /* clear per-thread flag */ 3567 t->t_proc_flag &= ~TP_MSACCT; 3568 } while ((t = t->t_forw) != p->p_tlist); 3569 } 3570 } 3571 3572 /* 3573 * Return resource usage information. 3574 */ 3575 void 3576 prgetusage(kthread_t *t, prhusage_t *pup) 3577 { 3578 klwp_t *lwp = ttolwp(t); 3579 hrtime_t *mstimep; 3580 struct mstate *ms = &lwp->lwp_mstate; 3581 int state; 3582 int i; 3583 hrtime_t curtime; 3584 hrtime_t waitrq; 3585 hrtime_t tmp1; 3586 3587 curtime = gethrtime_unscaled(); 3588 3589 pup->pr_lwpid = t->t_tid; 3590 pup->pr_count = 1; 3591 pup->pr_create = ms->ms_start; 3592 pup->pr_term = ms->ms_term; 3593 scalehrtime(&pup->pr_create); 3594 scalehrtime(&pup->pr_term); 3595 if (ms->ms_term == 0) { 3596 pup->pr_rtime = curtime - ms->ms_start; 3597 scalehrtime(&pup->pr_rtime); 3598 } else { 3599 pup->pr_rtime = ms->ms_term - ms->ms_start; 3600 scalehrtime(&pup->pr_rtime); 3601 } 3602 3603 3604 pup->pr_utime = ms->ms_acct[LMS_USER]; 3605 pup->pr_stime = ms->ms_acct[LMS_SYSTEM]; 3606 pup->pr_ttime = ms->ms_acct[LMS_TRAP]; 3607 pup->pr_tftime = ms->ms_acct[LMS_TFAULT]; 3608 pup->pr_dftime = ms->ms_acct[LMS_DFAULT]; 3609 pup->pr_kftime = ms->ms_acct[LMS_KFAULT]; 3610 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 3611 pup->pr_slptime = ms->ms_acct[LMS_SLEEP]; 3612 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 3613 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED]; 3614 3615 prscaleusage(pup); 3616 3617 /* 3618 * Adjust for time waiting in the dispatcher queue. 3619 */ 3620 waitrq = t->t_waitrq; /* hopefully atomic */ 3621 if (waitrq != 0) { 3622 if (waitrq > curtime) { 3623 curtime = gethrtime_unscaled(); 3624 } 3625 tmp1 = curtime - waitrq; 3626 scalehrtime(&tmp1); 3627 pup->pr_wtime += tmp1; 3628 curtime = waitrq; 3629 } 3630 3631 /* 3632 * Adjust for time spent in current microstate. 3633 */ 3634 if (ms->ms_state_start > curtime) { 3635 curtime = gethrtime_unscaled(); 3636 } 3637 3638 i = 0; 3639 do { 3640 switch (state = t->t_mstate) { 3641 case LMS_SLEEP: 3642 /* 3643 * Update the timer for the current sleep state. 3644 */ 3645 switch (state = ms->ms_prev) { 3646 case LMS_TFAULT: 3647 case LMS_DFAULT: 3648 case LMS_KFAULT: 3649 case LMS_USER_LOCK: 3650 break; 3651 default: 3652 state = LMS_SLEEP; 3653 break; 3654 } 3655 break; 3656 case LMS_TFAULT: 3657 case LMS_DFAULT: 3658 case LMS_KFAULT: 3659 case LMS_USER_LOCK: 3660 state = LMS_SYSTEM; 3661 break; 3662 } 3663 switch (state) { 3664 case LMS_USER: mstimep = &pup->pr_utime; break; 3665 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 3666 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 3667 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 3668 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 3669 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 3670 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 3671 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 3672 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 3673 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 3674 default: panic("prgetusage: unknown microstate"); 3675 } 3676 tmp1 = curtime - ms->ms_state_start; 3677 if (tmp1 < 0) { 3678 curtime = gethrtime_unscaled(); 3679 i++; 3680 continue; 3681 } 3682 scalehrtime(&tmp1); 3683 } while (tmp1 < 0 && i < MAX_ITERS_SPIN); 3684 3685 *mstimep += tmp1; 3686 3687 /* update pup timestamp */ 3688 pup->pr_tstamp = curtime; 3689 scalehrtime(&pup->pr_tstamp); 3690 3691 /* 3692 * Resource usage counters. 3693 */ 3694 pup->pr_minf = lwp->lwp_ru.minflt; 3695 pup->pr_majf = lwp->lwp_ru.majflt; 3696 pup->pr_nswap = lwp->lwp_ru.nswap; 3697 pup->pr_inblk = lwp->lwp_ru.inblock; 3698 pup->pr_oublk = lwp->lwp_ru.oublock; 3699 pup->pr_msnd = lwp->lwp_ru.msgsnd; 3700 pup->pr_mrcv = lwp->lwp_ru.msgrcv; 3701 pup->pr_sigs = lwp->lwp_ru.nsignals; 3702 pup->pr_vctx = lwp->lwp_ru.nvcsw; 3703 pup->pr_ictx = lwp->lwp_ru.nivcsw; 3704 pup->pr_sysc = lwp->lwp_ru.sysc; 3705 pup->pr_ioch = lwp->lwp_ru.ioch; 3706 } 3707 3708 /* 3709 * Convert ms_acct stats from unscaled high-res time to nanoseconds 3710 */ 3711 void 3712 prscaleusage(prhusage_t *usg) 3713 { 3714 scalehrtime(&usg->pr_utime); 3715 scalehrtime(&usg->pr_stime); 3716 scalehrtime(&usg->pr_ttime); 3717 scalehrtime(&usg->pr_tftime); 3718 scalehrtime(&usg->pr_dftime); 3719 scalehrtime(&usg->pr_kftime); 3720 scalehrtime(&usg->pr_ltime); 3721 scalehrtime(&usg->pr_slptime); 3722 scalehrtime(&usg->pr_wtime); 3723 scalehrtime(&usg->pr_stoptime); 3724 } 3725 3726 3727 /* 3728 * Sum resource usage information. 3729 */ 3730 void 3731 praddusage(kthread_t *t, prhusage_t *pup) 3732 { 3733 klwp_t *lwp = ttolwp(t); 3734 hrtime_t *mstimep; 3735 struct mstate *ms = &lwp->lwp_mstate; 3736 int state; 3737 int i; 3738 hrtime_t curtime; 3739 hrtime_t waitrq; 3740 hrtime_t tmp; 3741 prhusage_t conv; 3742 3743 curtime = gethrtime_unscaled(); 3744 3745 if (ms->ms_term == 0) { 3746 tmp = curtime - ms->ms_start; 3747 scalehrtime(&tmp); 3748 pup->pr_rtime += tmp; 3749 } else { 3750 tmp = ms->ms_term - ms->ms_start; 3751 scalehrtime(&tmp); 3752 pup->pr_rtime += tmp; 3753 } 3754 3755 conv.pr_utime = ms->ms_acct[LMS_USER]; 3756 conv.pr_stime = ms->ms_acct[LMS_SYSTEM]; 3757 conv.pr_ttime = ms->ms_acct[LMS_TRAP]; 3758 conv.pr_tftime = ms->ms_acct[LMS_TFAULT]; 3759 conv.pr_dftime = ms->ms_acct[LMS_DFAULT]; 3760 conv.pr_kftime = ms->ms_acct[LMS_KFAULT]; 3761 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 3762 conv.pr_slptime = ms->ms_acct[LMS_SLEEP]; 3763 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 3764 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED]; 3765 3766 prscaleusage(&conv); 3767 3768 pup->pr_utime += conv.pr_utime; 3769 pup->pr_stime += conv.pr_stime; 3770 pup->pr_ttime += conv.pr_ttime; 3771 pup->pr_tftime += conv.pr_tftime; 3772 pup->pr_dftime += conv.pr_dftime; 3773 pup->pr_kftime += conv.pr_kftime; 3774 pup->pr_ltime += conv.pr_ltime; 3775 pup->pr_slptime += conv.pr_slptime; 3776 pup->pr_wtime += conv.pr_wtime; 3777 pup->pr_stoptime += conv.pr_stoptime; 3778 3779 /* 3780 * Adjust for time waiting in the dispatcher queue. 3781 */ 3782 waitrq = t->t_waitrq; /* hopefully atomic */ 3783 if (waitrq != 0) { 3784 if (waitrq > curtime) { 3785 curtime = gethrtime_unscaled(); 3786 } 3787 tmp = curtime - waitrq; 3788 scalehrtime(&tmp); 3789 pup->pr_wtime += tmp; 3790 curtime = waitrq; 3791 } 3792 3793 /* 3794 * Adjust for time spent in current microstate. 3795 */ 3796 if (ms->ms_state_start > curtime) { 3797 curtime = gethrtime_unscaled(); 3798 } 3799 3800 i = 0; 3801 do { 3802 switch (state = t->t_mstate) { 3803 case LMS_SLEEP: 3804 /* 3805 * Update the timer for the current sleep state. 3806 */ 3807 switch (state = ms->ms_prev) { 3808 case LMS_TFAULT: 3809 case LMS_DFAULT: 3810 case LMS_KFAULT: 3811 case LMS_USER_LOCK: 3812 break; 3813 default: 3814 state = LMS_SLEEP; 3815 break; 3816 } 3817 break; 3818 case LMS_TFAULT: 3819 case LMS_DFAULT: 3820 case LMS_KFAULT: 3821 case LMS_USER_LOCK: 3822 state = LMS_SYSTEM; 3823 break; 3824 } 3825 switch (state) { 3826 case LMS_USER: mstimep = &pup->pr_utime; break; 3827 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 3828 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 3829 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 3830 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 3831 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 3832 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 3833 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 3834 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 3835 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 3836 default: panic("praddusage: unknown microstate"); 3837 } 3838 tmp = curtime - ms->ms_state_start; 3839 if (tmp < 0) { 3840 curtime = gethrtime_unscaled(); 3841 i++; 3842 continue; 3843 } 3844 scalehrtime(&tmp); 3845 } while (tmp < 0 && i < MAX_ITERS_SPIN); 3846 3847 *mstimep += tmp; 3848 3849 /* update pup timestamp */ 3850 pup->pr_tstamp = curtime; 3851 scalehrtime(&pup->pr_tstamp); 3852 3853 /* 3854 * Resource usage counters. 3855 */ 3856 pup->pr_minf += lwp->lwp_ru.minflt; 3857 pup->pr_majf += lwp->lwp_ru.majflt; 3858 pup->pr_nswap += lwp->lwp_ru.nswap; 3859 pup->pr_inblk += lwp->lwp_ru.inblock; 3860 pup->pr_oublk += lwp->lwp_ru.oublock; 3861 pup->pr_msnd += lwp->lwp_ru.msgsnd; 3862 pup->pr_mrcv += lwp->lwp_ru.msgrcv; 3863 pup->pr_sigs += lwp->lwp_ru.nsignals; 3864 pup->pr_vctx += lwp->lwp_ru.nvcsw; 3865 pup->pr_ictx += lwp->lwp_ru.nivcsw; 3866 pup->pr_sysc += lwp->lwp_ru.sysc; 3867 pup->pr_ioch += lwp->lwp_ru.ioch; 3868 } 3869 3870 /* 3871 * Convert a prhusage_t to a prusage_t. 3872 * This means convert each hrtime_t to a timestruc_t 3873 * and copy the count fields uint64_t => ulong_t. 3874 */ 3875 void 3876 prcvtusage(prhusage_t *pup, prusage_t *upup) 3877 { 3878 uint64_t *ullp; 3879 ulong_t *ulp; 3880 int i; 3881 3882 upup->pr_lwpid = pup->pr_lwpid; 3883 upup->pr_count = pup->pr_count; 3884 3885 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp); 3886 hrt2ts(pup->pr_create, &upup->pr_create); 3887 hrt2ts(pup->pr_term, &upup->pr_term); 3888 hrt2ts(pup->pr_rtime, &upup->pr_rtime); 3889 hrt2ts(pup->pr_utime, &upup->pr_utime); 3890 hrt2ts(pup->pr_stime, &upup->pr_stime); 3891 hrt2ts(pup->pr_ttime, &upup->pr_ttime); 3892 hrt2ts(pup->pr_tftime, &upup->pr_tftime); 3893 hrt2ts(pup->pr_dftime, &upup->pr_dftime); 3894 hrt2ts(pup->pr_kftime, &upup->pr_kftime); 3895 hrt2ts(pup->pr_ltime, &upup->pr_ltime); 3896 hrt2ts(pup->pr_slptime, &upup->pr_slptime); 3897 hrt2ts(pup->pr_wtime, &upup->pr_wtime); 3898 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime); 3899 bzero(upup->filltime, sizeof (upup->filltime)); 3900 3901 ullp = &pup->pr_minf; 3902 ulp = &upup->pr_minf; 3903 for (i = 0; i < 22; i++) 3904 *ulp++ = (ulong_t)*ullp++; 3905 } 3906 3907 #ifdef _SYSCALL32_IMPL 3908 void 3909 prcvtusage32(prhusage_t *pup, prusage32_t *upup) 3910 { 3911 uint64_t *ullp; 3912 uint32_t *ulp; 3913 int i; 3914 3915 upup->pr_lwpid = pup->pr_lwpid; 3916 upup->pr_count = pup->pr_count; 3917 3918 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp); 3919 hrt2ts32(pup->pr_create, &upup->pr_create); 3920 hrt2ts32(pup->pr_term, &upup->pr_term); 3921 hrt2ts32(pup->pr_rtime, &upup->pr_rtime); 3922 hrt2ts32(pup->pr_utime, &upup->pr_utime); 3923 hrt2ts32(pup->pr_stime, &upup->pr_stime); 3924 hrt2ts32(pup->pr_ttime, &upup->pr_ttime); 3925 hrt2ts32(pup->pr_tftime, &upup->pr_tftime); 3926 hrt2ts32(pup->pr_dftime, &upup->pr_dftime); 3927 hrt2ts32(pup->pr_kftime, &upup->pr_kftime); 3928 hrt2ts32(pup->pr_ltime, &upup->pr_ltime); 3929 hrt2ts32(pup->pr_slptime, &upup->pr_slptime); 3930 hrt2ts32(pup->pr_wtime, &upup->pr_wtime); 3931 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime); 3932 bzero(upup->filltime, sizeof (upup->filltime)); 3933 3934 ullp = &pup->pr_minf; 3935 ulp = &upup->pr_minf; 3936 for (i = 0; i < 22; i++) 3937 *ulp++ = (uint32_t)*ullp++; 3938 } 3939 #endif /* _SYSCALL32_IMPL */ 3940 3941 /* 3942 * Determine whether a set is empty. 3943 */ 3944 int 3945 setisempty(uint32_t *sp, uint_t n) 3946 { 3947 while (n--) 3948 if (*sp++) 3949 return (0); 3950 return (1); 3951 } 3952 3953 /* 3954 * Utility routine for establishing a watched area in the process. 3955 * Keep the list of watched areas sorted by virtual address. 3956 */ 3957 int 3958 set_watched_area(proc_t *p, struct watched_area *pwa) 3959 { 3960 caddr_t vaddr = pwa->wa_vaddr; 3961 caddr_t eaddr = pwa->wa_eaddr; 3962 ulong_t flags = pwa->wa_flags; 3963 struct watched_area *target; 3964 avl_index_t where; 3965 int error = 0; 3966 3967 /* we must not be holding p->p_lock, but the process must be locked */ 3968 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3969 ASSERT(p->p_proc_flag & P_PR_LOCK); 3970 3971 /* 3972 * If this is our first watchpoint, enable watchpoints for the process. 3973 */ 3974 if (!pr_watch_active(p)) { 3975 kthread_t *t; 3976 3977 mutex_enter(&p->p_lock); 3978 if ((t = p->p_tlist) != NULL) { 3979 do { 3980 watch_enable(t); 3981 } while ((t = t->t_forw) != p->p_tlist); 3982 } 3983 mutex_exit(&p->p_lock); 3984 } 3985 3986 target = pr_find_watched_area(p, pwa, &where); 3987 if (target != NULL) { 3988 /* 3989 * We discovered an existing, overlapping watched area. 3990 * Allow it only if it is an exact match. 3991 */ 3992 if (target->wa_vaddr != vaddr || 3993 target->wa_eaddr != eaddr) 3994 error = EINVAL; 3995 else if (target->wa_flags != flags) { 3996 error = set_watched_page(p, vaddr, eaddr, 3997 flags, target->wa_flags); 3998 target->wa_flags = flags; 3999 } 4000 kmem_free(pwa, sizeof (struct watched_area)); 4001 } else { 4002 avl_insert(&p->p_warea, pwa, where); 4003 error = set_watched_page(p, vaddr, eaddr, flags, 0); 4004 } 4005 4006 return (error); 4007 } 4008 4009 /* 4010 * Utility routine for clearing a watched area in the process. 4011 * Must be an exact match of the virtual address. 4012 * size and flags don't matter. 4013 */ 4014 int 4015 clear_watched_area(proc_t *p, struct watched_area *pwa) 4016 { 4017 struct watched_area *found; 4018 4019 /* we must not be holding p->p_lock, but the process must be locked */ 4020 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 4021 ASSERT(p->p_proc_flag & P_PR_LOCK); 4022 4023 4024 if (!pr_watch_active(p)) { 4025 kmem_free(pwa, sizeof (struct watched_area)); 4026 return (0); 4027 } 4028 4029 /* 4030 * Look for a matching address in the watched areas. If a match is 4031 * found, clear the old watched area and adjust the watched page(s). It 4032 * is not an error if there is no match. 4033 */ 4034 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL && 4035 found->wa_vaddr == pwa->wa_vaddr) { 4036 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr, 4037 found->wa_flags); 4038 avl_remove(&p->p_warea, found); 4039 kmem_free(found, sizeof (struct watched_area)); 4040 } 4041 4042 kmem_free(pwa, sizeof (struct watched_area)); 4043 4044 /* 4045 * If we removed the last watched area from the process, disable 4046 * watchpoints. 4047 */ 4048 if (!pr_watch_active(p)) { 4049 kthread_t *t; 4050 4051 mutex_enter(&p->p_lock); 4052 if ((t = p->p_tlist) != NULL) { 4053 do { 4054 watch_disable(t); 4055 } while ((t = t->t_forw) != p->p_tlist); 4056 } 4057 mutex_exit(&p->p_lock); 4058 } 4059 4060 return (0); 4061 } 4062 4063 /* 4064 * Frees all the watched_area structures 4065 */ 4066 void 4067 pr_free_watchpoints(proc_t *p) 4068 { 4069 struct watched_area *delp; 4070 void *cookie; 4071 4072 cookie = NULL; 4073 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL) 4074 kmem_free(delp, sizeof (struct watched_area)); 4075 4076 avl_destroy(&p->p_warea); 4077 } 4078 4079 /* 4080 * This one is called by the traced process to unwatch all the 4081 * pages while deallocating the list of watched_page structs. 4082 */ 4083 void 4084 pr_free_watched_pages(proc_t *p) 4085 { 4086 struct as *as = p->p_as; 4087 struct watched_page *pwp; 4088 uint_t prot; 4089 int retrycnt, err; 4090 void *cookie; 4091 4092 if (as == NULL || avl_numnodes(&as->a_wpage) == 0) 4093 return; 4094 4095 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 4096 AS_LOCK_ENTER(as, RW_WRITER); 4097 4098 pwp = avl_first(&as->a_wpage); 4099 4100 cookie = NULL; 4101 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) { 4102 retrycnt = 0; 4103 if ((prot = pwp->wp_oprot) != 0) { 4104 caddr_t addr = pwp->wp_vaddr; 4105 struct seg *seg; 4106 retry: 4107 4108 if ((pwp->wp_prot != prot || 4109 (pwp->wp_flags & WP_NOWATCH)) && 4110 (seg = as_segat(as, addr)) != NULL) { 4111 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); 4112 if (err == IE_RETRY) { 4113 ASSERT(retrycnt == 0); 4114 retrycnt++; 4115 goto retry; 4116 } 4117 } 4118 } 4119 kmem_free(pwp, sizeof (struct watched_page)); 4120 } 4121 4122 avl_destroy(&as->a_wpage); 4123 p->p_wprot = NULL; 4124 4125 AS_LOCK_EXIT(as); 4126 } 4127 4128 /* 4129 * Insert a watched area into the list of watched pages. 4130 * If oflags is zero then we are adding a new watched area. 4131 * Otherwise we are changing the flags of an existing watched area. 4132 */ 4133 static int 4134 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, 4135 ulong_t flags, ulong_t oflags) 4136 { 4137 struct as *as = p->p_as; 4138 avl_tree_t *pwp_tree; 4139 struct watched_page *pwp, *newpwp; 4140 struct watched_page tpw; 4141 avl_index_t where; 4142 struct seg *seg; 4143 uint_t prot; 4144 caddr_t addr; 4145 4146 /* 4147 * We need to pre-allocate a list of structures before we grab the 4148 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks 4149 * held. 4150 */ 4151 newpwp = NULL; 4152 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4153 addr < eaddr; addr += PAGESIZE) { 4154 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP); 4155 pwp->wp_list = newpwp; 4156 newpwp = pwp; 4157 } 4158 4159 AS_LOCK_ENTER(as, RW_WRITER); 4160 4161 /* 4162 * Search for an existing watched page to contain the watched area. 4163 * If none is found, grab a new one from the available list 4164 * and insert it in the active list, keeping the list sorted 4165 * by user-level virtual address. 4166 */ 4167 if (p->p_flag & SVFWAIT) 4168 pwp_tree = &p->p_wpage; 4169 else 4170 pwp_tree = &as->a_wpage; 4171 4172 again: 4173 if (avl_numnodes(pwp_tree) > prnwatch) { 4174 AS_LOCK_EXIT(as); 4175 while (newpwp != NULL) { 4176 pwp = newpwp->wp_list; 4177 kmem_free(newpwp, sizeof (struct watched_page)); 4178 newpwp = pwp; 4179 } 4180 return (E2BIG); 4181 } 4182 4183 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4184 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) { 4185 pwp = newpwp; 4186 newpwp = newpwp->wp_list; 4187 pwp->wp_list = NULL; 4188 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr & 4189 (uintptr_t)PAGEMASK); 4190 avl_insert(pwp_tree, pwp, where); 4191 } 4192 4193 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE); 4194 4195 if (oflags & WA_READ) 4196 pwp->wp_read--; 4197 if (oflags & WA_WRITE) 4198 pwp->wp_write--; 4199 if (oflags & WA_EXEC) 4200 pwp->wp_exec--; 4201 4202 ASSERT(pwp->wp_read >= 0); 4203 ASSERT(pwp->wp_write >= 0); 4204 ASSERT(pwp->wp_exec >= 0); 4205 4206 if (flags & WA_READ) 4207 pwp->wp_read++; 4208 if (flags & WA_WRITE) 4209 pwp->wp_write++; 4210 if (flags & WA_EXEC) 4211 pwp->wp_exec++; 4212 4213 if (!(p->p_flag & SVFWAIT)) { 4214 vaddr = pwp->wp_vaddr; 4215 if (pwp->wp_oprot == 0 && 4216 (seg = as_segat(as, vaddr)) != NULL) { 4217 SEGOP_GETPROT(seg, vaddr, 0, &prot); 4218 pwp->wp_oprot = (uchar_t)prot; 4219 pwp->wp_prot = (uchar_t)prot; 4220 } 4221 if (pwp->wp_oprot != 0) { 4222 prot = pwp->wp_oprot; 4223 if (pwp->wp_read) 4224 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4225 if (pwp->wp_write) 4226 prot &= ~PROT_WRITE; 4227 if (pwp->wp_exec) 4228 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4229 if (!(pwp->wp_flags & WP_NOWATCH) && 4230 pwp->wp_prot != prot && 4231 (pwp->wp_flags & WP_SETPROT) == 0) { 4232 pwp->wp_flags |= WP_SETPROT; 4233 pwp->wp_list = p->p_wprot; 4234 p->p_wprot = pwp; 4235 } 4236 pwp->wp_prot = (uchar_t)prot; 4237 } 4238 } 4239 4240 /* 4241 * If the watched area extends into the next page then do 4242 * it over again with the virtual address of the next page. 4243 */ 4244 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr) 4245 goto again; 4246 4247 AS_LOCK_EXIT(as); 4248 4249 /* 4250 * Free any pages we may have over-allocated 4251 */ 4252 while (newpwp != NULL) { 4253 pwp = newpwp->wp_list; 4254 kmem_free(newpwp, sizeof (struct watched_page)); 4255 newpwp = pwp; 4256 } 4257 4258 return (0); 4259 } 4260 4261 /* 4262 * Remove a watched area from the list of watched pages. 4263 * A watched area may extend over more than one page. 4264 */ 4265 static void 4266 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags) 4267 { 4268 struct as *as = p->p_as; 4269 struct watched_page *pwp; 4270 struct watched_page tpw; 4271 avl_tree_t *tree; 4272 avl_index_t where; 4273 4274 AS_LOCK_ENTER(as, RW_WRITER); 4275 4276 if (p->p_flag & SVFWAIT) 4277 tree = &p->p_wpage; 4278 else 4279 tree = &as->a_wpage; 4280 4281 tpw.wp_vaddr = vaddr = 4282 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4283 pwp = avl_find(tree, &tpw, &where); 4284 if (pwp == NULL) 4285 pwp = avl_nearest(tree, where, AVL_AFTER); 4286 4287 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 4288 ASSERT(vaddr <= pwp->wp_vaddr); 4289 4290 if (flags & WA_READ) 4291 pwp->wp_read--; 4292 if (flags & WA_WRITE) 4293 pwp->wp_write--; 4294 if (flags & WA_EXEC) 4295 pwp->wp_exec--; 4296 4297 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) { 4298 /* 4299 * Reset the hat layer's protections on this page. 4300 */ 4301 if (pwp->wp_oprot != 0) { 4302 uint_t prot = pwp->wp_oprot; 4303 4304 if (pwp->wp_read) 4305 prot &= 4306 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4307 if (pwp->wp_write) 4308 prot &= ~PROT_WRITE; 4309 if (pwp->wp_exec) 4310 prot &= 4311 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4312 if (!(pwp->wp_flags & WP_NOWATCH) && 4313 pwp->wp_prot != prot && 4314 (pwp->wp_flags & WP_SETPROT) == 0) { 4315 pwp->wp_flags |= WP_SETPROT; 4316 pwp->wp_list = p->p_wprot; 4317 p->p_wprot = pwp; 4318 } 4319 pwp->wp_prot = (uchar_t)prot; 4320 } 4321 } else { 4322 /* 4323 * No watched areas remain in this page. 4324 * Reset everything to normal. 4325 */ 4326 if (pwp->wp_oprot != 0) { 4327 pwp->wp_prot = pwp->wp_oprot; 4328 if ((pwp->wp_flags & WP_SETPROT) == 0) { 4329 pwp->wp_flags |= WP_SETPROT; 4330 pwp->wp_list = p->p_wprot; 4331 p->p_wprot = pwp; 4332 } 4333 } 4334 } 4335 4336 pwp = AVL_NEXT(tree, pwp); 4337 } 4338 4339 AS_LOCK_EXIT(as); 4340 } 4341 4342 /* 4343 * Return the original protections for the specified page. 4344 */ 4345 static void 4346 getwatchprot(struct as *as, caddr_t addr, uint_t *prot) 4347 { 4348 struct watched_page *pwp; 4349 struct watched_page tpw; 4350 4351 ASSERT(AS_LOCK_HELD(as)); 4352 4353 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 4354 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL) 4355 *prot = pwp->wp_oprot; 4356 } 4357 4358 static prpagev_t * 4359 pr_pagev_create(struct seg *seg, int check_noreserve) 4360 { 4361 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP); 4362 size_t total_pages = seg_pages(seg); 4363 4364 /* 4365 * Limit the size of our vectors to pagev_lim pages at a time. We need 4366 * 4 or 5 bytes of storage per page, so this means we limit ourself 4367 * to about a megabyte of kernel heap by default. 4368 */ 4369 pagev->pg_npages = MIN(total_pages, pagev_lim); 4370 pagev->pg_pnbase = 0; 4371 4372 pagev->pg_protv = 4373 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP); 4374 4375 if (check_noreserve) 4376 pagev->pg_incore = 4377 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP); 4378 else 4379 pagev->pg_incore = NULL; 4380 4381 return (pagev); 4382 } 4383 4384 static void 4385 pr_pagev_destroy(prpagev_t *pagev) 4386 { 4387 if (pagev->pg_incore != NULL) 4388 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char)); 4389 4390 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t)); 4391 kmem_free(pagev, sizeof (prpagev_t)); 4392 } 4393 4394 static caddr_t 4395 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr) 4396 { 4397 ulong_t lastpg = seg_page(seg, eaddr - 1); 4398 ulong_t pn, pnlim; 4399 caddr_t saddr; 4400 size_t len; 4401 4402 ASSERT(addr >= seg->s_base && addr <= eaddr); 4403 4404 if (addr == eaddr) 4405 return (eaddr); 4406 4407 refill: 4408 ASSERT(addr < eaddr); 4409 pagev->pg_pnbase = seg_page(seg, addr); 4410 pnlim = pagev->pg_pnbase + pagev->pg_npages; 4411 saddr = addr; 4412 4413 if (lastpg < pnlim) 4414 len = (size_t)(eaddr - addr); 4415 else 4416 len = pagev->pg_npages * PAGESIZE; 4417 4418 if (pagev->pg_incore != NULL) { 4419 /* 4420 * INCORE cleverly has different semantics than GETPROT: 4421 * it returns info on pages up to but NOT including addr + len. 4422 */ 4423 SEGOP_INCORE(seg, addr, len, pagev->pg_incore); 4424 pn = pagev->pg_pnbase; 4425 4426 do { 4427 /* 4428 * Guilty knowledge here: We know that segvn_incore 4429 * returns more than just the low-order bit that 4430 * indicates the page is actually in memory. If any 4431 * bits are set, then the page has backing store. 4432 */ 4433 if (pagev->pg_incore[pn++ - pagev->pg_pnbase]) 4434 goto out; 4435 4436 } while ((addr += PAGESIZE) < eaddr && pn < pnlim); 4437 4438 /* 4439 * If we examined all the pages in the vector but we're not 4440 * at the end of the segment, take another lap. 4441 */ 4442 if (addr < eaddr) 4443 goto refill; 4444 } 4445 4446 /* 4447 * Need to take len - 1 because addr + len is the address of the 4448 * first byte of the page just past the end of what we want. 4449 */ 4450 out: 4451 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv); 4452 return (addr); 4453 } 4454 4455 static caddr_t 4456 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg, 4457 caddr_t *saddrp, caddr_t eaddr, uint_t *protp) 4458 { 4459 /* 4460 * Our starting address is either the specified address, or the base 4461 * address from the start of the pagev. If the latter is greater, 4462 * this means a previous call to pr_pagev_fill has already scanned 4463 * further than the end of the previous mapping. 4464 */ 4465 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE; 4466 caddr_t addr = MAX(*saddrp, base); 4467 ulong_t pn = seg_page(seg, addr); 4468 uint_t prot, nprot; 4469 4470 /* 4471 * If we're dealing with noreserve pages, then advance addr to 4472 * the address of the next page which has backing store. 4473 */ 4474 if (pagev->pg_incore != NULL) { 4475 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) { 4476 if ((addr += PAGESIZE) == eaddr) { 4477 *saddrp = addr; 4478 prot = 0; 4479 goto out; 4480 } 4481 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 4482 addr = pr_pagev_fill(pagev, seg, addr, eaddr); 4483 if (addr == eaddr) { 4484 *saddrp = addr; 4485 prot = 0; 4486 goto out; 4487 } 4488 pn = seg_page(seg, addr); 4489 } 4490 } 4491 } 4492 4493 /* 4494 * Get the protections on the page corresponding to addr. 4495 */ 4496 pn = seg_page(seg, addr); 4497 ASSERT(pn >= pagev->pg_pnbase); 4498 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages)); 4499 4500 prot = pagev->pg_protv[pn - pagev->pg_pnbase]; 4501 getwatchprot(seg->s_as, addr, &prot); 4502 *saddrp = addr; 4503 4504 /* 4505 * Now loop until we find a backed page with different protections 4506 * or we reach the end of this segment. 4507 */ 4508 while ((addr += PAGESIZE) < eaddr) { 4509 /* 4510 * If pn has advanced to the page number following what we 4511 * have information on, refill the page vector and reset 4512 * addr and pn. If pr_pagev_fill does not return the 4513 * address of the next page, we have a discontiguity and 4514 * thus have reached the end of the current mapping. 4515 */ 4516 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 4517 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr); 4518 if (naddr != addr) 4519 goto out; 4520 pn = seg_page(seg, addr); 4521 } 4522 4523 /* 4524 * The previous page's protections are in prot, and it has 4525 * backing. If this page is MAP_NORESERVE and has no backing, 4526 * then end this mapping and return the previous protections. 4527 */ 4528 if (pagev->pg_incore != NULL && 4529 pagev->pg_incore[pn - pagev->pg_pnbase] == 0) 4530 break; 4531 4532 /* 4533 * Otherwise end the mapping if this page's protections (nprot) 4534 * are different than those in the previous page (prot). 4535 */ 4536 nprot = pagev->pg_protv[pn - pagev->pg_pnbase]; 4537 getwatchprot(seg->s_as, addr, &nprot); 4538 4539 if (nprot != prot) 4540 break; 4541 } 4542 4543 out: 4544 *protp = prot; 4545 return (addr); 4546 } 4547 4548 size_t 4549 pr_getsegsize(struct seg *seg, int reserved) 4550 { 4551 size_t size = seg->s_size; 4552 4553 /* 4554 * If we're interested in the reserved space, return the size of the 4555 * segment itself. Everything else in this function is a special case 4556 * to determine the actual underlying size of various segment types. 4557 */ 4558 if (reserved) 4559 return (size); 4560 4561 /* 4562 * If this is a segvn mapping of a regular file, return the smaller 4563 * of the segment size and the remaining size of the file beyond 4564 * the file offset corresponding to seg->s_base. 4565 */ 4566 if (seg->s_ops == &segvn_ops) { 4567 vattr_t vattr; 4568 vnode_t *vp; 4569 4570 vattr.va_mask = AT_SIZE; 4571 4572 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 4573 vp != NULL && vp->v_type == VREG && 4574 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 4575 4576 u_offset_t fsize = vattr.va_size; 4577 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base); 4578 4579 if (fsize < offset) 4580 fsize = 0; 4581 else 4582 fsize -= offset; 4583 4584 fsize = roundup(fsize, (u_offset_t)PAGESIZE); 4585 4586 if (fsize < (u_offset_t)size) 4587 size = (size_t)fsize; 4588 } 4589 4590 return (size); 4591 } 4592 4593 /* 4594 * If this is an ISM shared segment, don't include pages that are 4595 * beyond the real size of the spt segment that backs it. 4596 */ 4597 if (seg->s_ops == &segspt_shmops) 4598 return (MIN(spt_realsize(seg), size)); 4599 4600 /* 4601 * If this is segment is a mapping from /dev/null, then this is a 4602 * reservation of virtual address space and has no actual size. 4603 * Such segments are backed by segdev and have type set to neither 4604 * MAP_SHARED nor MAP_PRIVATE. 4605 */ 4606 if (seg->s_ops == &segdev_ops && 4607 ((SEGOP_GETTYPE(seg, seg->s_base) & 4608 (MAP_SHARED | MAP_PRIVATE)) == 0)) 4609 return (0); 4610 4611 /* 4612 * If this segment doesn't match one of the special types we handle, 4613 * just return the size of the segment itself. 4614 */ 4615 return (size); 4616 } 4617 4618 uint_t 4619 pr_getprot(struct seg *seg, int reserved, void **tmp, 4620 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr) 4621 { 4622 struct as *as = seg->s_as; 4623 4624 caddr_t saddr = *saddrp; 4625 caddr_t naddr; 4626 4627 int check_noreserve; 4628 uint_t prot; 4629 4630 union { 4631 struct segvn_data *svd; 4632 struct segdev_data *sdp; 4633 void *data; 4634 } s; 4635 4636 s.data = seg->s_data; 4637 4638 ASSERT(AS_WRITE_HELD(as)); 4639 ASSERT(saddr >= seg->s_base && saddr < eaddr); 4640 ASSERT(eaddr <= seg->s_base + seg->s_size); 4641 4642 /* 4643 * Don't include MAP_NORESERVE pages in the address range 4644 * unless their mappings have actually materialized. 4645 * We cheat by knowing that segvn is the only segment 4646 * driver that supports MAP_NORESERVE. 4647 */ 4648 check_noreserve = 4649 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL && 4650 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) && 4651 (s.svd->flags & MAP_NORESERVE)); 4652 4653 /* 4654 * Examine every page only as a last resort. We use guilty knowledge 4655 * of segvn and segdev to avoid this: if there are no per-page 4656 * protections present in the segment and we don't care about 4657 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment. 4658 */ 4659 if (!check_noreserve && saddr == seg->s_base && 4660 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) { 4661 prot = s.svd->prot; 4662 getwatchprot(as, saddr, &prot); 4663 naddr = eaddr; 4664 4665 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops && 4666 s.sdp != NULL && s.sdp->pageprot == 0) { 4667 prot = s.sdp->prot; 4668 getwatchprot(as, saddr, &prot); 4669 naddr = eaddr; 4670 4671 } else { 4672 prpagev_t *pagev; 4673 4674 /* 4675 * If addr is sitting at the start of the segment, then 4676 * create a page vector to store protection and incore 4677 * information for pages in the segment, and fill it. 4678 * Otherwise, we expect *tmp to address the prpagev_t 4679 * allocated by a previous call to this function. 4680 */ 4681 if (saddr == seg->s_base) { 4682 pagev = pr_pagev_create(seg, check_noreserve); 4683 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr); 4684 4685 ASSERT(*tmp == NULL); 4686 *tmp = pagev; 4687 4688 ASSERT(saddr <= eaddr); 4689 *saddrp = saddr; 4690 4691 if (saddr == eaddr) { 4692 naddr = saddr; 4693 prot = 0; 4694 goto out; 4695 } 4696 4697 } else { 4698 ASSERT(*tmp != NULL); 4699 pagev = (prpagev_t *)*tmp; 4700 } 4701 4702 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot); 4703 ASSERT(naddr <= eaddr); 4704 } 4705 4706 out: 4707 if (naddr == eaddr) 4708 pr_getprot_done(tmp); 4709 *naddrp = naddr; 4710 return (prot); 4711 } 4712 4713 void 4714 pr_getprot_done(void **tmp) 4715 { 4716 if (*tmp != NULL) { 4717 pr_pagev_destroy((prpagev_t *)*tmp); 4718 *tmp = NULL; 4719 } 4720 } 4721 4722 /* 4723 * Return true iff the vnode is a /proc file from the object directory. 4724 */ 4725 int 4726 pr_isobject(vnode_t *vp) 4727 { 4728 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT); 4729 } 4730 4731 /* 4732 * Return true iff the vnode is a /proc file opened by the process itself. 4733 */ 4734 int 4735 pr_isself(vnode_t *vp) 4736 { 4737 /* 4738 * XXX: To retain binary compatibility with the old 4739 * ioctl()-based version of /proc, we exempt self-opens 4740 * of /proc/<pid> from being marked close-on-exec. 4741 */ 4742 return (vn_matchops(vp, prvnodeops) && 4743 (VTOP(vp)->pr_flags & PR_ISSELF) && 4744 VTOP(vp)->pr_type != PR_PIDDIR); 4745 } 4746 4747 static ssize_t 4748 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr) 4749 { 4750 ssize_t pagesize, hatsize; 4751 4752 ASSERT(AS_WRITE_HELD(seg->s_as)); 4753 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE)); 4754 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE)); 4755 ASSERT(saddr < eaddr); 4756 4757 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr); 4758 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize)); 4759 ASSERT(pagesize != 0); 4760 4761 if (pagesize == -1) 4762 pagesize = PAGESIZE; 4763 4764 saddr += P2NPHASE((uintptr_t)saddr, pagesize); 4765 4766 while (saddr < eaddr) { 4767 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr)) 4768 break; 4769 ASSERT(IS_P2ALIGNED(saddr, pagesize)); 4770 saddr += pagesize; 4771 } 4772 4773 *naddrp = ((saddr < eaddr) ? saddr : eaddr); 4774 return (hatsize); 4775 } 4776 4777 /* 4778 * Return an array of structures with extended memory map information. 4779 * We allocate here; the caller must deallocate. 4780 */ 4781 int 4782 prgetxmap(proc_t *p, list_t *iolhead) 4783 { 4784 struct as *as = p->p_as; 4785 prxmap_t *mp; 4786 struct seg *seg; 4787 struct seg *brkseg, *stkseg; 4788 struct vnode *vp; 4789 struct vattr vattr; 4790 uint_t prot; 4791 4792 ASSERT(as != &kas && AS_WRITE_HELD(as)); 4793 4794 /* 4795 * Request an initial buffer size that doesn't waste memory 4796 * if the address space has only a small number of segments. 4797 */ 4798 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 4799 4800 if ((seg = AS_SEGFIRST(as)) == NULL) 4801 return (0); 4802 4803 brkseg = break_seg(p); 4804 stkseg = as_segat(as, prgetstackbase(p)); 4805 4806 do { 4807 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 4808 caddr_t saddr, naddr, baddr; 4809 void *tmp = NULL; 4810 ssize_t psz; 4811 char *parr; 4812 uint64_t npages; 4813 uint64_t pagenum; 4814 4815 if ((seg->s_flags & S_HOLE) != 0) { 4816 continue; 4817 } 4818 /* 4819 * Segment loop part one: iterate from the base of the segment 4820 * to its end, pausing at each address boundary (baddr) between 4821 * ranges that have different virtual memory protections. 4822 */ 4823 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 4824 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 4825 ASSERT(baddr >= saddr && baddr <= eaddr); 4826 4827 /* 4828 * Segment loop part two: iterate from the current 4829 * position to the end of the protection boundary, 4830 * pausing at each address boundary (naddr) between 4831 * ranges that have different underlying page sizes. 4832 */ 4833 for (; saddr < baddr; saddr = naddr) { 4834 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 4835 ASSERT(naddr >= saddr && naddr <= baddr); 4836 4837 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 4838 4839 mp->pr_vaddr = (uintptr_t)saddr; 4840 mp->pr_size = naddr - saddr; 4841 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 4842 mp->pr_mflags = 0; 4843 if (prot & PROT_READ) 4844 mp->pr_mflags |= MA_READ; 4845 if (prot & PROT_WRITE) 4846 mp->pr_mflags |= MA_WRITE; 4847 if (prot & PROT_EXEC) 4848 mp->pr_mflags |= MA_EXEC; 4849 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 4850 mp->pr_mflags |= MA_SHARED; 4851 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 4852 mp->pr_mflags |= MA_NORESERVE; 4853 if (seg->s_ops == &segspt_shmops || 4854 (seg->s_ops == &segvn_ops && 4855 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 4856 vp == NULL))) 4857 mp->pr_mflags |= MA_ANON; 4858 if (seg == brkseg) 4859 mp->pr_mflags |= MA_BREAK; 4860 else if (seg == stkseg) 4861 mp->pr_mflags |= MA_STACK; 4862 if (seg->s_ops == &segspt_shmops) 4863 mp->pr_mflags |= MA_ISM | MA_SHM; 4864 4865 mp->pr_pagesize = PAGESIZE; 4866 if (psz == -1) { 4867 mp->pr_hatpagesize = 0; 4868 } else { 4869 mp->pr_hatpagesize = psz; 4870 } 4871 4872 /* 4873 * Manufacture a filename for the "object" dir. 4874 */ 4875 mp->pr_dev = PRNODEV; 4876 vattr.va_mask = AT_FSID|AT_NODEID; 4877 if (seg->s_ops == &segvn_ops && 4878 SEGOP_GETVP(seg, saddr, &vp) == 0 && 4879 vp != NULL && vp->v_type == VREG && 4880 VOP_GETATTR(vp, &vattr, 0, CRED(), 4881 NULL) == 0) { 4882 mp->pr_dev = vattr.va_fsid; 4883 mp->pr_ino = vattr.va_nodeid; 4884 if (vp == p->p_exec) 4885 (void) strcpy(mp->pr_mapname, 4886 "a.out"); 4887 else 4888 pr_object_name(mp->pr_mapname, 4889 vp, &vattr); 4890 } 4891 4892 /* 4893 * Get the SysV shared memory id, if any. 4894 */ 4895 if ((mp->pr_mflags & MA_SHARED) && 4896 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4897 seg->s_base)) != SHMID_NONE) { 4898 if (mp->pr_shmid == SHMID_FREE) 4899 mp->pr_shmid = -1; 4900 4901 mp->pr_mflags |= MA_SHM; 4902 } else { 4903 mp->pr_shmid = -1; 4904 } 4905 4906 npages = ((uintptr_t)(naddr - saddr)) >> 4907 PAGESHIFT; 4908 parr = kmem_zalloc(npages, KM_SLEEP); 4909 4910 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4911 4912 for (pagenum = 0; pagenum < npages; pagenum++) { 4913 if (parr[pagenum] & SEG_PAGE_INCORE) 4914 mp->pr_rss++; 4915 if (parr[pagenum] & SEG_PAGE_ANON) 4916 mp->pr_anon++; 4917 if (parr[pagenum] & SEG_PAGE_LOCKED) 4918 mp->pr_locked++; 4919 } 4920 kmem_free(parr, npages); 4921 } 4922 } 4923 ASSERT(tmp == NULL); 4924 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4925 4926 return (0); 4927 } 4928 4929 /* 4930 * Return the process's credentials. We don't need a 32-bit equivalent of 4931 * this function because prcred_t and prcred32_t are actually the same. 4932 */ 4933 void 4934 prgetcred(proc_t *p, prcred_t *pcrp) 4935 { 4936 mutex_enter(&p->p_crlock); 4937 cred2prcred(p->p_cred, pcrp); 4938 mutex_exit(&p->p_crlock); 4939 } 4940 4941 void 4942 prgetsecflags(proc_t *p, prsecflags_t *psfp) 4943 { 4944 ASSERT(psfp != NULL); 4945 4946 bzero(psfp, sizeof (*psfp)); 4947 psfp->pr_version = PRSECFLAGS_VERSION_CURRENT; 4948 psfp->pr_lower = p->p_secflags.psf_lower; 4949 psfp->pr_upper = p->p_secflags.psf_upper; 4950 psfp->pr_effective = p->p_secflags.psf_effective; 4951 psfp->pr_inherit = p->p_secflags.psf_inherit; 4952 } 4953 4954 /* 4955 * Compute actual size of the prpriv_t structure. 4956 */ 4957 4958 size_t 4959 prgetprivsize(void) 4960 { 4961 return (priv_prgetprivsize(NULL)); 4962 } 4963 4964 /* 4965 * Return the process's privileges. We don't need a 32-bit equivalent of 4966 * this function because prpriv_t and prpriv32_t are actually the same. 4967 */ 4968 void 4969 prgetpriv(proc_t *p, prpriv_t *pprp) 4970 { 4971 mutex_enter(&p->p_crlock); 4972 cred2prpriv(p->p_cred, pprp); 4973 mutex_exit(&p->p_crlock); 4974 } 4975 4976 #ifdef _SYSCALL32_IMPL 4977 /* 4978 * Return an array of structures with HAT memory map information. 4979 * We allocate here; the caller must deallocate. 4980 */ 4981 int 4982 prgetxmap32(proc_t *p, list_t *iolhead) 4983 { 4984 struct as *as = p->p_as; 4985 prxmap32_t *mp; 4986 struct seg *seg; 4987 struct seg *brkseg, *stkseg; 4988 struct vnode *vp; 4989 struct vattr vattr; 4990 uint_t prot; 4991 4992 ASSERT(as != &kas && AS_WRITE_HELD(as)); 4993 4994 /* 4995 * Request an initial buffer size that doesn't waste memory 4996 * if the address space has only a small number of segments. 4997 */ 4998 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 4999 5000 if ((seg = AS_SEGFIRST(as)) == NULL) 5001 return (0); 5002 5003 brkseg = break_seg(p); 5004 stkseg = as_segat(as, prgetstackbase(p)); 5005 5006 do { 5007 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 5008 caddr_t saddr, naddr, baddr; 5009 void *tmp = NULL; 5010 ssize_t psz; 5011 char *parr; 5012 uint64_t npages; 5013 uint64_t pagenum; 5014 5015 if ((seg->s_flags & S_HOLE) != 0) { 5016 continue; 5017 } 5018 5019 /* 5020 * Segment loop part one: iterate from the base of the segment 5021 * to its end, pausing at each address boundary (baddr) between 5022 * ranges that have different virtual memory protections. 5023 */ 5024 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 5025 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 5026 ASSERT(baddr >= saddr && baddr <= eaddr); 5027 5028 /* 5029 * Segment loop part two: iterate from the current 5030 * position to the end of the protection boundary, 5031 * pausing at each address boundary (naddr) between 5032 * ranges that have different underlying page sizes. 5033 */ 5034 for (; saddr < baddr; saddr = naddr) { 5035 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 5036 ASSERT(naddr >= saddr && naddr <= baddr); 5037 5038 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 5039 5040 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 5041 mp->pr_size = (size32_t)(naddr - saddr); 5042 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 5043 mp->pr_mflags = 0; 5044 if (prot & PROT_READ) 5045 mp->pr_mflags |= MA_READ; 5046 if (prot & PROT_WRITE) 5047 mp->pr_mflags |= MA_WRITE; 5048 if (prot & PROT_EXEC) 5049 mp->pr_mflags |= MA_EXEC; 5050 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 5051 mp->pr_mflags |= MA_SHARED; 5052 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 5053 mp->pr_mflags |= MA_NORESERVE; 5054 if (seg->s_ops == &segspt_shmops || 5055 (seg->s_ops == &segvn_ops && 5056 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 5057 vp == NULL))) 5058 mp->pr_mflags |= MA_ANON; 5059 if (seg == brkseg) 5060 mp->pr_mflags |= MA_BREAK; 5061 else if (seg == stkseg) 5062 mp->pr_mflags |= MA_STACK; 5063 if (seg->s_ops == &segspt_shmops) 5064 mp->pr_mflags |= MA_ISM | MA_SHM; 5065 5066 mp->pr_pagesize = PAGESIZE; 5067 if (psz == -1) { 5068 mp->pr_hatpagesize = 0; 5069 } else { 5070 mp->pr_hatpagesize = psz; 5071 } 5072 5073 /* 5074 * Manufacture a filename for the "object" dir. 5075 */ 5076 mp->pr_dev = PRNODEV32; 5077 vattr.va_mask = AT_FSID|AT_NODEID; 5078 if (seg->s_ops == &segvn_ops && 5079 SEGOP_GETVP(seg, saddr, &vp) == 0 && 5080 vp != NULL && vp->v_type == VREG && 5081 VOP_GETATTR(vp, &vattr, 0, CRED(), 5082 NULL) == 0) { 5083 (void) cmpldev(&mp->pr_dev, 5084 vattr.va_fsid); 5085 mp->pr_ino = vattr.va_nodeid; 5086 if (vp == p->p_exec) 5087 (void) strcpy(mp->pr_mapname, 5088 "a.out"); 5089 else 5090 pr_object_name(mp->pr_mapname, 5091 vp, &vattr); 5092 } 5093 5094 /* 5095 * Get the SysV shared memory id, if any. 5096 */ 5097 if ((mp->pr_mflags & MA_SHARED) && 5098 p->p_segacct && (mp->pr_shmid = shmgetid(p, 5099 seg->s_base)) != SHMID_NONE) { 5100 if (mp->pr_shmid == SHMID_FREE) 5101 mp->pr_shmid = -1; 5102 5103 mp->pr_mflags |= MA_SHM; 5104 } else { 5105 mp->pr_shmid = -1; 5106 } 5107 5108 npages = ((uintptr_t)(naddr - saddr)) >> 5109 PAGESHIFT; 5110 parr = kmem_zalloc(npages, KM_SLEEP); 5111 5112 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 5113 5114 for (pagenum = 0; pagenum < npages; pagenum++) { 5115 if (parr[pagenum] & SEG_PAGE_INCORE) 5116 mp->pr_rss++; 5117 if (parr[pagenum] & SEG_PAGE_ANON) 5118 mp->pr_anon++; 5119 if (parr[pagenum] & SEG_PAGE_LOCKED) 5120 mp->pr_locked++; 5121 } 5122 kmem_free(parr, npages); 5123 } 5124 } 5125 ASSERT(tmp == NULL); 5126 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 5127 5128 return (0); 5129 } 5130 #endif /* _SYSCALL32_IMPL */ 5131