1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2019 Joyent, Inc. 25 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 26 * Copyright 2022 MNX Cloud, Inc. 27 */ 28 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 30 /* All Rights Reserved */ 31 32 #include <sys/types.h> 33 #include <sys/t_lock.h> 34 #include <sys/param.h> 35 #include <sys/cmn_err.h> 36 #include <sys/cred.h> 37 #include <sys/priv.h> 38 #include <sys/debug.h> 39 #include <sys/errno.h> 40 #include <sys/inline.h> 41 #include <sys/kmem.h> 42 #include <sys/mman.h> 43 #include <sys/proc.h> 44 #include <sys/brand.h> 45 #include <sys/sobject.h> 46 #include <sys/sysmacros.h> 47 #include <sys/systm.h> 48 #include <sys/uio.h> 49 #include <sys/var.h> 50 #include <sys/vfs.h> 51 #include <sys/vnode.h> 52 #include <sys/session.h> 53 #include <sys/pcb.h> 54 #include <sys/signal.h> 55 #include <sys/user.h> 56 #include <sys/disp.h> 57 #include <sys/class.h> 58 #include <sys/ts.h> 59 #include <sys/bitmap.h> 60 #include <sys/poll.h> 61 #include <sys/shm_impl.h> 62 #include <sys/fault.h> 63 #include <sys/syscall.h> 64 #include <sys/procfs.h> 65 #include <sys/processor.h> 66 #include <sys/cpuvar.h> 67 #include <sys/copyops.h> 68 #include <sys/time.h> 69 #include <sys/msacct.h> 70 #include <sys/flock_impl.h> 71 #include <sys/stropts.h> 72 #include <sys/strsubr.h> 73 #include <sys/pathname.h> 74 #include <sys/mode.h> 75 #include <sys/socketvar.h> 76 #include <sys/autoconf.h> 77 #include <sys/dtrace.h> 78 #include <sys/timod.h> 79 #include <sys/fs/namenode.h> 80 #include <netinet/udp.h> 81 #include <netinet/tcp.h> 82 #include <inet/cc.h> 83 #include <vm/as.h> 84 #include <vm/rm.h> 85 #include <vm/seg.h> 86 #include <vm/seg_vn.h> 87 #include <vm/seg_dev.h> 88 #include <vm/seg_spt.h> 89 #include <vm/page.h> 90 #include <sys/vmparam.h> 91 #include <sys/swap.h> 92 #include <fs/proc/prdata.h> 93 #include <sys/task.h> 94 #include <sys/project.h> 95 #include <sys/contract_impl.h> 96 #include <sys/contract/process.h> 97 #include <sys/contract/process_impl.h> 98 #include <sys/schedctl.h> 99 #include <sys/pool.h> 100 #include <sys/zone.h> 101 #include <sys/atomic.h> 102 #include <sys/sdt.h> 103 104 #define MAX_ITERS_SPIN 5 105 106 typedef struct prpagev { 107 uint_t *pg_protv; /* vector of page permissions */ 108 char *pg_incore; /* vector of incore flags */ 109 size_t pg_npages; /* number of pages in protv and incore */ 110 ulong_t pg_pnbase; /* pn within segment of first protv element */ 111 } prpagev_t; 112 113 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */ 114 115 extern struct seg_ops segdev_ops; /* needs a header file */ 116 extern struct seg_ops segspt_shmops; /* needs a header file */ 117 118 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t); 119 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t); 120 121 /* 122 * Choose an lwp from the complete set of lwps for the process. 123 * This is called for any operation applied to the process 124 * file descriptor that requires an lwp to operate upon. 125 * 126 * Returns a pointer to the thread for the selected LWP, 127 * and with the dispatcher lock held for the thread. 128 * 129 * The algorithm for choosing an lwp is critical for /proc semantics; 130 * don't touch this code unless you know all of the implications. 131 */ 132 kthread_t * 133 prchoose(proc_t *p) 134 { 135 kthread_t *t; 136 kthread_t *t_onproc = NULL; /* running on processor */ 137 kthread_t *t_run = NULL; /* runnable, on disp queue */ 138 kthread_t *t_sleep = NULL; /* sleeping */ 139 kthread_t *t_hold = NULL; /* sleeping, performing hold */ 140 kthread_t *t_susp = NULL; /* suspended stop */ 141 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */ 142 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */ 143 kthread_t *t_req = NULL; /* requested stop */ 144 kthread_t *t_istop = NULL; /* event-of-interest stop */ 145 kthread_t *t_dtrace = NULL; /* DTrace stop */ 146 147 ASSERT(MUTEX_HELD(&p->p_lock)); 148 149 /* 150 * If the agent lwp exists, it takes precedence over all others. 151 */ 152 if ((t = p->p_agenttp) != NULL) { 153 thread_lock(t); 154 return (t); 155 } 156 157 if ((t = p->p_tlist) == NULL) /* start at the head of the list */ 158 return (t); 159 do { /* for eacn lwp in the process */ 160 if (VSTOPPED(t)) { /* virtually stopped */ 161 if (t_req == NULL) 162 t_req = t; 163 continue; 164 } 165 166 /* If this is a process kernel thread, ignore it. */ 167 if ((t->t_proc_flag & TP_KTHREAD) != 0) { 168 continue; 169 } 170 171 thread_lock(t); /* make sure thread is in good state */ 172 switch (t->t_state) { 173 default: 174 panic("prchoose: bad thread state %d, thread 0x%p", 175 t->t_state, (void *)t); 176 /*NOTREACHED*/ 177 case TS_SLEEP: 178 /* this is filthy */ 179 if (t->t_wchan == (caddr_t)&p->p_holdlwps && 180 t->t_wchan0 == NULL) { 181 if (t_hold == NULL) 182 t_hold = t; 183 } else { 184 if (t_sleep == NULL) 185 t_sleep = t; 186 } 187 break; 188 case TS_RUN: 189 case TS_WAIT: 190 if (t_run == NULL) 191 t_run = t; 192 break; 193 case TS_ONPROC: 194 if (t_onproc == NULL) 195 t_onproc = t; 196 break; 197 case TS_ZOMB: /* last possible choice */ 198 break; 199 case TS_STOPPED: 200 switch (t->t_whystop) { 201 case PR_SUSPENDED: 202 if (t_susp == NULL) 203 t_susp = t; 204 break; 205 case PR_JOBCONTROL: 206 if (t->t_proc_flag & TP_PRSTOP) { 207 if (t_jdstop == NULL) 208 t_jdstop = t; 209 } else { 210 if (t_jstop == NULL) 211 t_jstop = t; 212 } 213 break; 214 case PR_REQUESTED: 215 if (t->t_dtrace_stop && t_dtrace == NULL) 216 t_dtrace = t; 217 else if (t_req == NULL) 218 t_req = t; 219 break; 220 case PR_SYSENTRY: 221 case PR_SYSEXIT: 222 case PR_SIGNALLED: 223 case PR_FAULTED: 224 /* 225 * Make an lwp calling exit() be the 226 * last lwp seen in the process. 227 */ 228 if (t_istop == NULL || 229 (t_istop->t_whystop == PR_SYSENTRY && 230 t_istop->t_whatstop == SYS_exit)) 231 t_istop = t; 232 break; 233 case PR_CHECKPOINT: /* can't happen? */ 234 break; 235 default: 236 panic("prchoose: bad t_whystop %d, thread 0x%p", 237 t->t_whystop, (void *)t); 238 /*NOTREACHED*/ 239 } 240 break; 241 } 242 thread_unlock(t); 243 } while ((t = t->t_forw) != p->p_tlist); 244 245 if (t_onproc) 246 t = t_onproc; 247 else if (t_run) 248 t = t_run; 249 else if (t_sleep) 250 t = t_sleep; 251 else if (t_jstop) 252 t = t_jstop; 253 else if (t_jdstop) 254 t = t_jdstop; 255 else if (t_istop) 256 t = t_istop; 257 else if (t_dtrace) 258 t = t_dtrace; 259 else if (t_req) 260 t = t_req; 261 else if (t_hold) 262 t = t_hold; 263 else if (t_susp) 264 t = t_susp; 265 else /* TS_ZOMB */ 266 t = p->p_tlist; 267 268 if (t != NULL) 269 thread_lock(t); 270 return (t); 271 } 272 273 /* 274 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop. 275 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI 276 * on the /proc file descriptor. Called from stop() when a traced 277 * process stops on an event of interest. Also called from exit() 278 * and prinvalidate() to indicate POLLHUP and POLLERR respectively. 279 */ 280 void 281 prnotify(struct vnode *vp) 282 { 283 prcommon_t *pcp = VTOP(vp)->pr_common; 284 285 mutex_enter(&pcp->prc_mutex); 286 cv_broadcast(&pcp->prc_wait); 287 mutex_exit(&pcp->prc_mutex); 288 if (pcp->prc_flags & PRC_POLL) { 289 /* 290 * We call pollwakeup() with POLLHUP to ensure that 291 * the pollers are awakened even if they are polling 292 * for nothing (i.e., waiting for the process to exit). 293 * This enables the use of the PRC_POLL flag for optimization 294 * (we can turn off PRC_POLL only if we know no pollers remain). 295 */ 296 pcp->prc_flags &= ~PRC_POLL; 297 pollwakeup(&pcp->prc_pollhead, POLLHUP); 298 } 299 } 300 301 /* called immediately below, in prfree() */ 302 static void 303 prfreenotify(vnode_t *vp) 304 { 305 prnode_t *pnp; 306 prcommon_t *pcp; 307 308 while (vp != NULL) { 309 pnp = VTOP(vp); 310 pcp = pnp->pr_common; 311 ASSERT(pcp->prc_thread == NULL); 312 pcp->prc_proc = NULL; 313 /* 314 * We can't call prnotify() here because we are holding 315 * pidlock. We assert that there is no need to. 316 */ 317 mutex_enter(&pcp->prc_mutex); 318 cv_broadcast(&pcp->prc_wait); 319 mutex_exit(&pcp->prc_mutex); 320 ASSERT(!(pcp->prc_flags & PRC_POLL)); 321 322 vp = pnp->pr_next; 323 pnp->pr_next = NULL; 324 } 325 } 326 327 /* 328 * Called from a hook in freeproc() when a traced process is removed 329 * from the process table. The proc-table pointers of all associated 330 * /proc vnodes are cleared to indicate that the process has gone away. 331 */ 332 void 333 prfree(proc_t *p) 334 { 335 uint_t slot = p->p_slot; 336 337 ASSERT(MUTEX_HELD(&pidlock)); 338 339 /* 340 * Block the process against /proc so it can be freed. 341 * It cannot be freed while locked by some controlling process. 342 * Lock ordering: 343 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex 344 */ 345 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */ 346 mutex_enter(&p->p_lock); 347 while (p->p_proc_flag & P_PR_LOCK) { 348 mutex_exit(&pr_pidlock); 349 cv_wait(&pr_pid_cv[slot], &p->p_lock); 350 mutex_exit(&p->p_lock); 351 mutex_enter(&pr_pidlock); 352 mutex_enter(&p->p_lock); 353 } 354 355 ASSERT(p->p_tlist == NULL); 356 357 prfreenotify(p->p_plist); 358 p->p_plist = NULL; 359 360 prfreenotify(p->p_trace); 361 p->p_trace = NULL; 362 363 /* 364 * We broadcast to wake up everyone waiting for this process. 365 * No one can reach this process from this point on. 366 */ 367 cv_broadcast(&pr_pid_cv[slot]); 368 369 mutex_exit(&p->p_lock); 370 mutex_exit(&pr_pidlock); 371 } 372 373 /* 374 * Called from a hook in exit() when a traced process is becoming a zombie. 375 */ 376 void 377 prexit(proc_t *p) 378 { 379 ASSERT(MUTEX_HELD(&p->p_lock)); 380 381 if (pr_watch_active(p)) { 382 pr_free_watchpoints(p); 383 watch_disable(curthread); 384 } 385 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */ 386 if (p->p_trace) { 387 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY; 388 prnotify(p->p_trace); 389 } 390 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */ 391 } 392 393 /* 394 * Called when a thread calls lwp_exit(). 395 */ 396 void 397 prlwpexit(kthread_t *t) 398 { 399 vnode_t *vp; 400 prnode_t *pnp; 401 prcommon_t *pcp; 402 proc_t *p = ttoproc(t); 403 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry; 404 405 ASSERT(t == curthread); 406 ASSERT(MUTEX_HELD(&p->p_lock)); 407 408 /* 409 * The process must be blocked against /proc to do this safely. 410 * The lwp must not disappear while the process is marked P_PR_LOCK. 411 * It is the caller's responsibility to have called prbarrier(p). 412 */ 413 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 414 415 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 416 pnp = VTOP(vp); 417 pcp = pnp->pr_common; 418 if (pcp->prc_thread == t) { 419 pcp->prc_thread = NULL; 420 pcp->prc_flags |= PRC_DESTROY; 421 } 422 } 423 424 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) { 425 pnp = VTOP(vp); 426 pcp = pnp->pr_common; 427 pcp->prc_thread = NULL; 428 pcp->prc_flags |= PRC_DESTROY; 429 prnotify(vp); 430 } 431 432 if (p->p_trace) 433 prnotify(p->p_trace); 434 } 435 436 /* 437 * Called when a zombie thread is joined or when a 438 * detached lwp exits. Called from lwp_hash_out(). 439 */ 440 void 441 prlwpfree(proc_t *p, lwpent_t *lep) 442 { 443 vnode_t *vp; 444 prnode_t *pnp; 445 prcommon_t *pcp; 446 447 ASSERT(MUTEX_HELD(&p->p_lock)); 448 449 /* 450 * The process must be blocked against /proc to do this safely. 451 * The lwp must not disappear while the process is marked P_PR_LOCK. 452 * It is the caller's responsibility to have called prbarrier(p). 453 */ 454 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 455 456 vp = lep->le_trace; 457 lep->le_trace = NULL; 458 while (vp) { 459 prnotify(vp); 460 pnp = VTOP(vp); 461 pcp = pnp->pr_common; 462 ASSERT(pcp->prc_thread == NULL && 463 (pcp->prc_flags & PRC_DESTROY)); 464 pcp->prc_tslot = -1; 465 vp = pnp->pr_next; 466 pnp->pr_next = NULL; 467 } 468 469 if (p->p_trace) 470 prnotify(p->p_trace); 471 } 472 473 /* 474 * Called from a hook in exec() when a thread starts exec(). 475 */ 476 void 477 prexecstart(void) 478 { 479 proc_t *p = ttoproc(curthread); 480 klwp_t *lwp = ttolwp(curthread); 481 482 /* 483 * The P_PR_EXEC flag blocks /proc operations for 484 * the duration of the exec(). 485 * We can't start exec() while the process is 486 * locked by /proc, so we call prbarrier(). 487 * lwp_nostop keeps the process from being stopped 488 * via job control for the duration of the exec(). 489 */ 490 491 ASSERT(MUTEX_HELD(&p->p_lock)); 492 prbarrier(p); 493 lwp->lwp_nostop++; 494 p->p_proc_flag |= P_PR_EXEC; 495 } 496 497 /* 498 * Called from a hook in exec() when a thread finishes exec(). 499 * The thread may or may not have succeeded. Some other thread 500 * may have beat it to the punch. 501 */ 502 void 503 prexecend(void) 504 { 505 proc_t *p = ttoproc(curthread); 506 klwp_t *lwp = ttolwp(curthread); 507 vnode_t *vp; 508 prnode_t *pnp; 509 prcommon_t *pcp; 510 model_t model = p->p_model; 511 id_t tid = curthread->t_tid; 512 int tslot = curthread->t_dslot; 513 514 ASSERT(MUTEX_HELD(&p->p_lock)); 515 516 lwp->lwp_nostop--; 517 if (p->p_flag & SEXITLWPS) { 518 /* 519 * We are on our way to exiting because some 520 * other thread beat us in the race to exec(). 521 * Don't clear the P_PR_EXEC flag in this case. 522 */ 523 return; 524 } 525 526 /* 527 * Wake up anyone waiting in /proc for the process to complete exec(). 528 */ 529 p->p_proc_flag &= ~P_PR_EXEC; 530 if ((vp = p->p_trace) != NULL) { 531 pcp = VTOP(vp)->pr_common; 532 mutex_enter(&pcp->prc_mutex); 533 cv_broadcast(&pcp->prc_wait); 534 mutex_exit(&pcp->prc_mutex); 535 for (; vp != NULL; vp = pnp->pr_next) { 536 pnp = VTOP(vp); 537 pnp->pr_common->prc_datamodel = model; 538 } 539 } 540 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) { 541 /* 542 * We dealt with the process common above. 543 */ 544 ASSERT(p->p_trace != NULL); 545 pcp = VTOP(vp)->pr_common; 546 mutex_enter(&pcp->prc_mutex); 547 cv_broadcast(&pcp->prc_wait); 548 mutex_exit(&pcp->prc_mutex); 549 for (; vp != NULL; vp = pnp->pr_next) { 550 pnp = VTOP(vp); 551 pcp = pnp->pr_common; 552 pcp->prc_datamodel = model; 553 pcp->prc_tid = tid; 554 pcp->prc_tslot = tslot; 555 } 556 } 557 } 558 559 /* 560 * Called from a hook in relvm() just before freeing the address space. 561 * We free all the watched areas now. 562 */ 563 void 564 prrelvm(void) 565 { 566 proc_t *p = ttoproc(curthread); 567 568 mutex_enter(&p->p_lock); 569 prbarrier(p); /* block all other /proc operations */ 570 if (pr_watch_active(p)) { 571 pr_free_watchpoints(p); 572 watch_disable(curthread); 573 } 574 mutex_exit(&p->p_lock); 575 pr_free_watched_pages(p); 576 } 577 578 /* 579 * Called from hooks in exec-related code when a traced process 580 * attempts to exec(2) a setuid/setgid program or an unreadable 581 * file. Rather than fail the exec we invalidate the associated 582 * /proc vnodes so that subsequent attempts to use them will fail. 583 * 584 * All /proc vnodes, except directory vnodes, are retained on a linked 585 * list (rooted at p_plist in the process structure) until last close. 586 * 587 * A controlling process must re-open the /proc files in order to 588 * regain control. 589 */ 590 void 591 prinvalidate(struct user *up) 592 { 593 kthread_t *t = curthread; 594 proc_t *p = ttoproc(t); 595 vnode_t *vp; 596 prnode_t *pnp; 597 int writers = 0; 598 599 mutex_enter(&p->p_lock); 600 prbarrier(p); /* block all other /proc operations */ 601 602 /* 603 * At this moment, there can be only one lwp in the process. 604 */ 605 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 606 607 /* 608 * Invalidate any currently active /proc vnodes. 609 */ 610 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 611 pnp = VTOP(vp); 612 switch (pnp->pr_type) { 613 case PR_PSINFO: /* these files can read by anyone */ 614 case PR_LPSINFO: 615 case PR_LWPSINFO: 616 case PR_LWPDIR: 617 case PR_LWPIDDIR: 618 case PR_USAGE: 619 case PR_LUSAGE: 620 case PR_LWPUSAGE: 621 break; 622 default: 623 pnp->pr_flags |= PR_INVAL; 624 break; 625 } 626 } 627 /* 628 * Wake up anyone waiting for the process or lwp. 629 * p->p_trace is guaranteed to be non-NULL if there 630 * are any open /proc files for this process. 631 */ 632 if ((vp = p->p_trace) != NULL) { 633 prcommon_t *pcp = VTOP(vp)->pr_pcommon; 634 635 prnotify(vp); 636 /* 637 * Are there any writers? 638 */ 639 if ((writers = pcp->prc_writers) != 0) { 640 /* 641 * Clear the exclusive open flag (old /proc interface). 642 * Set prc_selfopens equal to prc_writers so that 643 * the next O_EXCL|O_WRITE open will succeed 644 * even with existing (though invalid) writers. 645 * prclose() must decrement prc_selfopens when 646 * the invalid files are closed. 647 */ 648 pcp->prc_flags &= ~PRC_EXCL; 649 ASSERT(pcp->prc_selfopens <= writers); 650 pcp->prc_selfopens = writers; 651 } 652 } 653 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace; 654 while (vp != NULL) { 655 /* 656 * We should not invalidate the lwpiddir vnodes, 657 * but the necessities of maintaining the old 658 * ioctl()-based version of /proc require it. 659 */ 660 pnp = VTOP(vp); 661 pnp->pr_flags |= PR_INVAL; 662 prnotify(vp); 663 vp = pnp->pr_next; 664 } 665 666 /* 667 * If any tracing flags are in effect and any vnodes are open for 668 * writing then set the requested-stop and run-on-last-close flags. 669 * Otherwise, clear all tracing flags. 670 */ 671 t->t_proc_flag &= ~TP_PAUSE; 672 if ((p->p_proc_flag & P_PR_TRACE) && writers) { 673 t->t_proc_flag |= TP_PRSTOP; 674 aston(t); /* so ISSIG will see the flag */ 675 p->p_proc_flag |= P_PR_RUNLCL; 676 } else { 677 premptyset(&up->u_entrymask); /* syscalls */ 678 premptyset(&up->u_exitmask); 679 up->u_systrap = 0; 680 premptyset(&p->p_sigmask); /* signals */ 681 premptyset(&p->p_fltmask); /* faults */ 682 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING); 683 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE); 684 prnostep(ttolwp(t)); 685 } 686 687 mutex_exit(&p->p_lock); 688 } 689 690 /* 691 * Acquire the controlled process's p_lock and mark it P_PR_LOCK. 692 * Return with pr_pidlock held in all cases. 693 * Return with p_lock held if the the process still exists. 694 * Return value is the process pointer if the process still exists, else NULL. 695 * If we lock the process, give ourself kernel priority to avoid deadlocks; 696 * this is undone in prunlock(). 697 */ 698 proc_t * 699 pr_p_lock(prnode_t *pnp) 700 { 701 proc_t *p; 702 prcommon_t *pcp; 703 704 mutex_enter(&pr_pidlock); 705 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL) 706 return (NULL); 707 mutex_enter(&p->p_lock); 708 while (p->p_proc_flag & P_PR_LOCK) { 709 /* 710 * This cv/mutex pair is persistent even if 711 * the process disappears while we sleep. 712 */ 713 kcondvar_t *cv = &pr_pid_cv[p->p_slot]; 714 kmutex_t *mp = &p->p_lock; 715 716 mutex_exit(&pr_pidlock); 717 cv_wait(cv, mp); 718 mutex_exit(mp); 719 mutex_enter(&pr_pidlock); 720 if (pcp->prc_proc == NULL) 721 return (NULL); 722 ASSERT(p == pcp->prc_proc); 723 mutex_enter(&p->p_lock); 724 } 725 p->p_proc_flag |= P_PR_LOCK; 726 return (p); 727 } 728 729 /* 730 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock. 731 * This prevents any lwp of the process from disappearing and 732 * blocks most operations that a process can perform on itself. 733 * Returns 0 on success, a non-zero error number on failure. 734 * 735 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when 736 * the subject process is a zombie (ZYES) or fail for zombies (ZNO). 737 * 738 * error returns: 739 * ENOENT: process or lwp has disappeared or process is exiting 740 * (or has become a zombie and zdisp == ZNO). 741 * EAGAIN: procfs vnode has become invalid. 742 * EINTR: signal arrived while waiting for exec to complete. 743 */ 744 int 745 prlock(prnode_t *pnp, int zdisp) 746 { 747 prcommon_t *pcp; 748 proc_t *p; 749 750 again: 751 pcp = pnp->pr_common; 752 p = pr_p_lock(pnp); 753 mutex_exit(&pr_pidlock); 754 755 /* 756 * Return ENOENT immediately if there is no process. 757 */ 758 if (p == NULL) 759 return (ENOENT); 760 761 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL); 762 763 /* 764 * Return ENOENT if process entered zombie state or is exiting 765 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies. 766 */ 767 if (zdisp == ZNO && 768 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) { 769 prunlock(pnp); 770 return (ENOENT); 771 } 772 773 /* 774 * If lwp-specific, check to see if lwp has disappeared. 775 */ 776 if (pcp->prc_flags & PRC_LWP) { 777 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) || 778 pcp->prc_tslot == -1) { 779 prunlock(pnp); 780 return (ENOENT); 781 } 782 } 783 784 /* 785 * Return EAGAIN if we have encountered a security violation. 786 * (The process exec'd a set-id or unreadable executable file.) 787 */ 788 if (pnp->pr_flags & PR_INVAL) { 789 prunlock(pnp); 790 return (EAGAIN); 791 } 792 793 /* 794 * If process is undergoing an exec(), wait for 795 * completion and then start all over again. 796 */ 797 if (p->p_proc_flag & P_PR_EXEC) { 798 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */ 799 mutex_enter(&pcp->prc_mutex); 800 prunlock(pnp); 801 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) { 802 mutex_exit(&pcp->prc_mutex); 803 return (EINTR); 804 } 805 mutex_exit(&pcp->prc_mutex); 806 goto again; 807 } 808 809 /* 810 * We return holding p->p_lock. 811 */ 812 return (0); 813 } 814 815 /* 816 * Undo prlock() and pr_p_lock(). 817 * p->p_lock is still held; pr_pidlock is no longer held. 818 * 819 * prunmark() drops the P_PR_LOCK flag and wakes up another thread, 820 * if any, waiting for the flag to be dropped; it retains p->p_lock. 821 * 822 * prunlock() calls prunmark() and then drops p->p_lock. 823 */ 824 void 825 prunmark(proc_t *p) 826 { 827 ASSERT(p->p_proc_flag & P_PR_LOCK); 828 ASSERT(MUTEX_HELD(&p->p_lock)); 829 830 cv_signal(&pr_pid_cv[p->p_slot]); 831 p->p_proc_flag &= ~P_PR_LOCK; 832 } 833 834 void 835 prunlock(prnode_t *pnp) 836 { 837 prcommon_t *pcp = pnp->pr_common; 838 proc_t *p = pcp->prc_proc; 839 840 /* 841 * If we (or someone) gave it a SIGKILL, and it is not 842 * already a zombie, set it running unconditionally. 843 */ 844 if ((p->p_flag & SKILLED) && 845 !(p->p_flag & SEXITING) && 846 !(pcp->prc_flags & PRC_DESTROY) && 847 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) 848 (void) pr_setrun(pnp, 0); 849 prunmark(p); 850 mutex_exit(&p->p_lock); 851 } 852 853 /* 854 * Called while holding p->p_lock to delay until the process is unlocked. 855 * We enter holding p->p_lock; p->p_lock is dropped and reacquired. 856 * The process cannot become locked again until p->p_lock is dropped. 857 */ 858 void 859 prbarrier(proc_t *p) 860 { 861 ASSERT(MUTEX_HELD(&p->p_lock)); 862 863 if (p->p_proc_flag & P_PR_LOCK) { 864 /* The process is locked; delay until not locked */ 865 uint_t slot = p->p_slot; 866 867 while (p->p_proc_flag & P_PR_LOCK) 868 cv_wait(&pr_pid_cv[slot], &p->p_lock); 869 cv_signal(&pr_pid_cv[slot]); 870 } 871 } 872 873 /* 874 * Return process/lwp status. 875 * The u-block is mapped in by this routine and unmapped at the end. 876 */ 877 void 878 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp) 879 { 880 kthread_t *t; 881 882 ASSERT(MUTEX_HELD(&p->p_lock)); 883 884 t = prchoose(p); /* returns locked thread */ 885 ASSERT(t != NULL); 886 thread_unlock(t); 887 888 /* just bzero the process part, prgetlwpstatus() does the rest */ 889 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t)); 890 sp->pr_nlwp = p->p_lwpcnt; 891 sp->pr_nzomb = p->p_zombcnt; 892 prassignset(&sp->pr_sigpend, &p->p_sig); 893 sp->pr_brkbase = (uintptr_t)p->p_brkbase; 894 sp->pr_brksize = p->p_brksize; 895 sp->pr_stkbase = (uintptr_t)prgetstackbase(p); 896 sp->pr_stksize = p->p_stksize; 897 sp->pr_pid = p->p_pid; 898 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 899 (p->p_flag & SZONETOP)) { 900 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 901 /* 902 * Inside local zones, fake zsched's pid as parent pids for 903 * processes which reference processes outside of the zone. 904 */ 905 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 906 } else { 907 sp->pr_ppid = p->p_ppid; 908 } 909 sp->pr_pgid = p->p_pgrp; 910 sp->pr_sid = p->p_sessp->s_sid; 911 sp->pr_taskid = p->p_task->tk_tkid; 912 sp->pr_projid = p->p_task->tk_proj->kpj_id; 913 sp->pr_zoneid = p->p_zone->zone_id; 914 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 915 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 916 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime); 917 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime); 918 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 919 prassignset(&sp->pr_flttrace, &p->p_fltmask); 920 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 921 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 922 switch (p->p_model) { 923 case DATAMODEL_ILP32: 924 sp->pr_dmodel = PR_MODEL_ILP32; 925 break; 926 case DATAMODEL_LP64: 927 sp->pr_dmodel = PR_MODEL_LP64; 928 break; 929 } 930 if (p->p_agenttp) 931 sp->pr_agentid = p->p_agenttp->t_tid; 932 933 /* get the chosen lwp's status */ 934 prgetlwpstatus(t, &sp->pr_lwp, zp); 935 936 /* replicate the flags */ 937 sp->pr_flags = sp->pr_lwp.pr_flags; 938 } 939 940 /* 941 * Query mask of held signals for a given thread. 942 * 943 * This makes use of schedctl_sigblock() to query if userspace has requested 944 * that all maskable signals be held. While it would be tempting to call 945 * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be 946 * done safely without the risk of racing with the thread under consideration. 947 */ 948 void 949 prgethold(kthread_t *t, sigset_t *sp) 950 { 951 k_sigset_t set; 952 953 if (schedctl_sigblock(t)) { 954 set.__sigbits[0] = FILLSET0 & ~CANTMASK0; 955 set.__sigbits[1] = FILLSET1 & ~CANTMASK1; 956 set.__sigbits[2] = FILLSET2 & ~CANTMASK2; 957 } else { 958 set = t->t_hold; 959 } 960 sigktou(&set, sp); 961 } 962 963 #ifdef _SYSCALL32_IMPL 964 void 965 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp) 966 { 967 proc_t *p = ttoproc(t); 968 klwp_t *lwp = ttolwp(t); 969 struct mstate *ms = &lwp->lwp_mstate; 970 hrtime_t usr, sys; 971 int flags; 972 ulong_t instr; 973 974 ASSERT(MUTEX_HELD(&p->p_lock)); 975 976 bzero(sp, sizeof (*sp)); 977 flags = 0L; 978 if (t->t_state == TS_STOPPED) { 979 flags |= PR_STOPPED; 980 if ((t->t_schedflag & TS_PSTART) == 0) 981 flags |= PR_ISTOP; 982 } else if (VSTOPPED(t)) { 983 flags |= PR_STOPPED|PR_ISTOP; 984 } 985 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 986 flags |= PR_DSTOP; 987 if (lwp->lwp_asleep) 988 flags |= PR_ASLEEP; 989 if (t == p->p_agenttp) 990 flags |= PR_AGENT; 991 if (!(t->t_proc_flag & TP_TWAIT)) 992 flags |= PR_DETACH; 993 if (t->t_proc_flag & TP_DAEMON) 994 flags |= PR_DAEMON; 995 if (p->p_proc_flag & P_PR_FORK) 996 flags |= PR_FORK; 997 if (p->p_proc_flag & P_PR_RUNLCL) 998 flags |= PR_RLC; 999 if (p->p_proc_flag & P_PR_KILLCL) 1000 flags |= PR_KLC; 1001 if (p->p_proc_flag & P_PR_ASYNC) 1002 flags |= PR_ASYNC; 1003 if (p->p_proc_flag & P_PR_BPTADJ) 1004 flags |= PR_BPTADJ; 1005 if (p->p_proc_flag & P_PR_PTRACE) 1006 flags |= PR_PTRACE; 1007 if (p->p_flag & SMSACCT) 1008 flags |= PR_MSACCT; 1009 if (p->p_flag & SMSFORK) 1010 flags |= PR_MSFORK; 1011 if (p->p_flag & SVFWAIT) 1012 flags |= PR_VFORKP; 1013 sp->pr_flags = flags; 1014 if (VSTOPPED(t)) { 1015 sp->pr_why = PR_REQUESTED; 1016 sp->pr_what = 0; 1017 } else { 1018 sp->pr_why = t->t_whystop; 1019 sp->pr_what = t->t_whatstop; 1020 } 1021 sp->pr_lwpid = t->t_tid; 1022 sp->pr_cursig = lwp->lwp_cursig; 1023 prassignset(&sp->pr_lwppend, &t->t_sig); 1024 prgethold(t, &sp->pr_lwphold); 1025 if (t->t_whystop == PR_FAULTED) { 1026 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info); 1027 if (t->t_whatstop == FLTPAGE) 1028 sp->pr_info.si_addr = 1029 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr; 1030 } else if (lwp->lwp_curinfo) 1031 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info); 1032 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1033 sp->pr_info.si_zoneid != zp->zone_id) { 1034 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1035 sp->pr_info.si_uid = 0; 1036 sp->pr_info.si_ctid = -1; 1037 sp->pr_info.si_zoneid = zp->zone_id; 1038 } 1039 sp->pr_altstack.ss_sp = 1040 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; 1041 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size; 1042 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags; 1043 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1044 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext; 1045 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack; 1046 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1047 sizeof (sp->pr_clname) - 1); 1048 if (flags & PR_STOPPED) 1049 hrt2ts32(t->t_stoptime, &sp->pr_tstamp); 1050 usr = ms->ms_acct[LMS_USER]; 1051 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1052 scalehrtime(&usr); 1053 scalehrtime(&sys); 1054 hrt2ts32(usr, &sp->pr_utime); 1055 hrt2ts32(sys, &sp->pr_stime); 1056 1057 /* 1058 * Fetch the current instruction, if not a system process. 1059 * We don't attempt this unless the lwp is stopped. 1060 */ 1061 if ((p->p_flag & SSYS) || p->p_as == &kas) 1062 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1063 else if (!(flags & PR_STOPPED)) 1064 sp->pr_flags |= PR_PCINVAL; 1065 else if (!prfetchinstr(lwp, &instr)) 1066 sp->pr_flags |= PR_PCINVAL; 1067 else 1068 sp->pr_instr = (uint32_t)instr; 1069 1070 /* 1071 * Drop p_lock while touching the lwp's stack. 1072 */ 1073 mutex_exit(&p->p_lock); 1074 if (prisstep(lwp)) 1075 sp->pr_flags |= PR_STEP; 1076 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1077 int i; 1078 1079 sp->pr_syscall = get_syscall32_args(lwp, 1080 (int *)sp->pr_sysarg, &i); 1081 sp->pr_nsysarg = (ushort_t)i; 1082 } 1083 if ((flags & PR_STOPPED) || t == curthread) 1084 prgetprregs32(lwp, sp->pr_reg); 1085 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1086 (flags & PR_VFORKP)) { 1087 long r1, r2; 1088 user_t *up; 1089 auxv_t *auxp; 1090 int i; 1091 1092 sp->pr_errno = prgetrvals(lwp, &r1, &r2); 1093 if (sp->pr_errno == 0) { 1094 sp->pr_rval1 = (int32_t)r1; 1095 sp->pr_rval2 = (int32_t)r2; 1096 sp->pr_errpriv = PRIV_NONE; 1097 } else 1098 sp->pr_errpriv = lwp->lwp_badpriv; 1099 1100 if (t->t_sysnum == SYS_execve) { 1101 up = PTOU(p); 1102 sp->pr_sysarg[0] = 0; 1103 sp->pr_sysarg[1] = (caddr32_t)up->u_argv; 1104 sp->pr_sysarg[2] = (caddr32_t)up->u_envp; 1105 for (i = 0, auxp = up->u_auxv; 1106 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1107 i++, auxp++) { 1108 if (auxp->a_type == AT_SUN_EXECNAME) { 1109 sp->pr_sysarg[0] = 1110 (caddr32_t) 1111 (uintptr_t)auxp->a_un.a_ptr; 1112 break; 1113 } 1114 } 1115 } 1116 } 1117 if (prhasfp()) 1118 prgetprfpregs32(lwp, &sp->pr_fpreg); 1119 mutex_enter(&p->p_lock); 1120 } 1121 1122 void 1123 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp) 1124 { 1125 kthread_t *t; 1126 1127 ASSERT(MUTEX_HELD(&p->p_lock)); 1128 1129 t = prchoose(p); /* returns locked thread */ 1130 ASSERT(t != NULL); 1131 thread_unlock(t); 1132 1133 /* just bzero the process part, prgetlwpstatus32() does the rest */ 1134 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t)); 1135 sp->pr_nlwp = p->p_lwpcnt; 1136 sp->pr_nzomb = p->p_zombcnt; 1137 prassignset(&sp->pr_sigpend, &p->p_sig); 1138 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase; 1139 sp->pr_brksize = (uint32_t)p->p_brksize; 1140 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p); 1141 sp->pr_stksize = (uint32_t)p->p_stksize; 1142 sp->pr_pid = p->p_pid; 1143 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 1144 (p->p_flag & SZONETOP)) { 1145 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 1146 /* 1147 * Inside local zones, fake zsched's pid as parent pids for 1148 * processes which reference processes outside of the zone. 1149 */ 1150 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 1151 } else { 1152 sp->pr_ppid = p->p_ppid; 1153 } 1154 sp->pr_pgid = p->p_pgrp; 1155 sp->pr_sid = p->p_sessp->s_sid; 1156 sp->pr_taskid = p->p_task->tk_tkid; 1157 sp->pr_projid = p->p_task->tk_proj->kpj_id; 1158 sp->pr_zoneid = p->p_zone->zone_id; 1159 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 1160 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 1161 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime); 1162 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime); 1163 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 1164 prassignset(&sp->pr_flttrace, &p->p_fltmask); 1165 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 1166 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 1167 switch (p->p_model) { 1168 case DATAMODEL_ILP32: 1169 sp->pr_dmodel = PR_MODEL_ILP32; 1170 break; 1171 case DATAMODEL_LP64: 1172 sp->pr_dmodel = PR_MODEL_LP64; 1173 break; 1174 } 1175 if (p->p_agenttp) 1176 sp->pr_agentid = p->p_agenttp->t_tid; 1177 1178 /* get the chosen lwp's status */ 1179 prgetlwpstatus32(t, &sp->pr_lwp, zp); 1180 1181 /* replicate the flags */ 1182 sp->pr_flags = sp->pr_lwp.pr_flags; 1183 } 1184 #endif /* _SYSCALL32_IMPL */ 1185 1186 /* 1187 * Return lwp status. 1188 */ 1189 void 1190 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp) 1191 { 1192 proc_t *p = ttoproc(t); 1193 klwp_t *lwp = ttolwp(t); 1194 struct mstate *ms = &lwp->lwp_mstate; 1195 hrtime_t usr, sys; 1196 int flags; 1197 ulong_t instr; 1198 1199 ASSERT(MUTEX_HELD(&p->p_lock)); 1200 1201 bzero(sp, sizeof (*sp)); 1202 flags = 0L; 1203 if (t->t_state == TS_STOPPED) { 1204 flags |= PR_STOPPED; 1205 if ((t->t_schedflag & TS_PSTART) == 0) 1206 flags |= PR_ISTOP; 1207 } else if (VSTOPPED(t)) { 1208 flags |= PR_STOPPED|PR_ISTOP; 1209 } 1210 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 1211 flags |= PR_DSTOP; 1212 if (lwp->lwp_asleep) 1213 flags |= PR_ASLEEP; 1214 if (t == p->p_agenttp) 1215 flags |= PR_AGENT; 1216 if (!(t->t_proc_flag & TP_TWAIT)) 1217 flags |= PR_DETACH; 1218 if (t->t_proc_flag & TP_DAEMON) 1219 flags |= PR_DAEMON; 1220 if (p->p_proc_flag & P_PR_FORK) 1221 flags |= PR_FORK; 1222 if (p->p_proc_flag & P_PR_RUNLCL) 1223 flags |= PR_RLC; 1224 if (p->p_proc_flag & P_PR_KILLCL) 1225 flags |= PR_KLC; 1226 if (p->p_proc_flag & P_PR_ASYNC) 1227 flags |= PR_ASYNC; 1228 if (p->p_proc_flag & P_PR_BPTADJ) 1229 flags |= PR_BPTADJ; 1230 if (p->p_proc_flag & P_PR_PTRACE) 1231 flags |= PR_PTRACE; 1232 if (p->p_flag & SMSACCT) 1233 flags |= PR_MSACCT; 1234 if (p->p_flag & SMSFORK) 1235 flags |= PR_MSFORK; 1236 if (p->p_flag & SVFWAIT) 1237 flags |= PR_VFORKP; 1238 if (p->p_pgidp->pid_pgorphaned) 1239 flags |= PR_ORPHAN; 1240 if (p->p_pidflag & CLDNOSIGCHLD) 1241 flags |= PR_NOSIGCHLD; 1242 if (p->p_pidflag & CLDWAITPID) 1243 flags |= PR_WAITPID; 1244 sp->pr_flags = flags; 1245 if (VSTOPPED(t)) { 1246 sp->pr_why = PR_REQUESTED; 1247 sp->pr_what = 0; 1248 } else { 1249 sp->pr_why = t->t_whystop; 1250 sp->pr_what = t->t_whatstop; 1251 } 1252 sp->pr_lwpid = t->t_tid; 1253 sp->pr_cursig = lwp->lwp_cursig; 1254 prassignset(&sp->pr_lwppend, &t->t_sig); 1255 prgethold(t, &sp->pr_lwphold); 1256 if (t->t_whystop == PR_FAULTED) 1257 bcopy(&lwp->lwp_siginfo, 1258 &sp->pr_info, sizeof (k_siginfo_t)); 1259 else if (lwp->lwp_curinfo) 1260 bcopy(&lwp->lwp_curinfo->sq_info, 1261 &sp->pr_info, sizeof (k_siginfo_t)); 1262 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1263 sp->pr_info.si_zoneid != zp->zone_id) { 1264 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1265 sp->pr_info.si_uid = 0; 1266 sp->pr_info.si_ctid = -1; 1267 sp->pr_info.si_zoneid = zp->zone_id; 1268 } 1269 sp->pr_altstack = lwp->lwp_sigaltstack; 1270 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1271 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext; 1272 sp->pr_ustack = lwp->lwp_ustack; 1273 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1274 sizeof (sp->pr_clname) - 1); 1275 if (flags & PR_STOPPED) 1276 hrt2ts(t->t_stoptime, &sp->pr_tstamp); 1277 usr = ms->ms_acct[LMS_USER]; 1278 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1279 scalehrtime(&usr); 1280 scalehrtime(&sys); 1281 hrt2ts(usr, &sp->pr_utime); 1282 hrt2ts(sys, &sp->pr_stime); 1283 1284 /* 1285 * Fetch the current instruction, if not a system process. 1286 * We don't attempt this unless the lwp is stopped. 1287 */ 1288 if ((p->p_flag & SSYS) || p->p_as == &kas) 1289 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1290 else if (!(flags & PR_STOPPED)) 1291 sp->pr_flags |= PR_PCINVAL; 1292 else if (!prfetchinstr(lwp, &instr)) 1293 sp->pr_flags |= PR_PCINVAL; 1294 else 1295 sp->pr_instr = instr; 1296 1297 /* 1298 * Drop p_lock while touching the lwp's stack. 1299 */ 1300 mutex_exit(&p->p_lock); 1301 if (prisstep(lwp)) 1302 sp->pr_flags |= PR_STEP; 1303 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1304 int i; 1305 1306 sp->pr_syscall = get_syscall_args(lwp, 1307 (long *)sp->pr_sysarg, &i); 1308 sp->pr_nsysarg = (ushort_t)i; 1309 } 1310 if ((flags & PR_STOPPED) || t == curthread) 1311 prgetprregs(lwp, sp->pr_reg); 1312 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1313 (flags & PR_VFORKP)) { 1314 user_t *up; 1315 auxv_t *auxp; 1316 int i; 1317 1318 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2); 1319 if (sp->pr_errno == 0) 1320 sp->pr_errpriv = PRIV_NONE; 1321 else 1322 sp->pr_errpriv = lwp->lwp_badpriv; 1323 1324 if (t->t_sysnum == SYS_execve) { 1325 up = PTOU(p); 1326 sp->pr_sysarg[0] = 0; 1327 sp->pr_sysarg[1] = (uintptr_t)up->u_argv; 1328 sp->pr_sysarg[2] = (uintptr_t)up->u_envp; 1329 for (i = 0, auxp = up->u_auxv; 1330 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1331 i++, auxp++) { 1332 if (auxp->a_type == AT_SUN_EXECNAME) { 1333 sp->pr_sysarg[0] = 1334 (uintptr_t)auxp->a_un.a_ptr; 1335 break; 1336 } 1337 } 1338 } 1339 } 1340 if (prhasfp()) 1341 prgetprfpregs(lwp, &sp->pr_fpreg); 1342 mutex_enter(&p->p_lock); 1343 } 1344 1345 /* 1346 * Get the sigaction structure for the specified signal. The u-block 1347 * must already have been mapped in by the caller. 1348 */ 1349 void 1350 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp) 1351 { 1352 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1353 1354 bzero(sp, sizeof (*sp)); 1355 1356 if (sig != 0 && (unsigned)sig < nsig) { 1357 sp->sa_handler = up->u_signal[sig-1]; 1358 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1359 if (sigismember(&up->u_sigonstack, sig)) 1360 sp->sa_flags |= SA_ONSTACK; 1361 if (sigismember(&up->u_sigresethand, sig)) 1362 sp->sa_flags |= SA_RESETHAND; 1363 if (sigismember(&up->u_sigrestart, sig)) 1364 sp->sa_flags |= SA_RESTART; 1365 if (sigismember(&p->p_siginfo, sig)) 1366 sp->sa_flags |= SA_SIGINFO; 1367 if (sigismember(&up->u_signodefer, sig)) 1368 sp->sa_flags |= SA_NODEFER; 1369 if (sig == SIGCLD) { 1370 if (p->p_flag & SNOWAIT) 1371 sp->sa_flags |= SA_NOCLDWAIT; 1372 if ((p->p_flag & SJCTL) == 0) 1373 sp->sa_flags |= SA_NOCLDSTOP; 1374 } 1375 } 1376 } 1377 1378 #ifdef _SYSCALL32_IMPL 1379 void 1380 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp) 1381 { 1382 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1383 1384 bzero(sp, sizeof (*sp)); 1385 1386 if (sig != 0 && (unsigned)sig < nsig) { 1387 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1]; 1388 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1389 if (sigismember(&up->u_sigonstack, sig)) 1390 sp->sa_flags |= SA_ONSTACK; 1391 if (sigismember(&up->u_sigresethand, sig)) 1392 sp->sa_flags |= SA_RESETHAND; 1393 if (sigismember(&up->u_sigrestart, sig)) 1394 sp->sa_flags |= SA_RESTART; 1395 if (sigismember(&p->p_siginfo, sig)) 1396 sp->sa_flags |= SA_SIGINFO; 1397 if (sigismember(&up->u_signodefer, sig)) 1398 sp->sa_flags |= SA_NODEFER; 1399 if (sig == SIGCLD) { 1400 if (p->p_flag & SNOWAIT) 1401 sp->sa_flags |= SA_NOCLDWAIT; 1402 if ((p->p_flag & SJCTL) == 0) 1403 sp->sa_flags |= SA_NOCLDSTOP; 1404 } 1405 } 1406 } 1407 #endif /* _SYSCALL32_IMPL */ 1408 1409 /* 1410 * Count the number of segments in this process's address space. 1411 */ 1412 uint_t 1413 prnsegs(struct as *as, int reserved) 1414 { 1415 uint_t n = 0; 1416 struct seg *seg; 1417 1418 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1419 1420 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1421 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1422 caddr_t saddr, naddr; 1423 void *tmp = NULL; 1424 1425 if ((seg->s_flags & S_HOLE) != 0) { 1426 continue; 1427 } 1428 1429 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1430 (void) pr_getprot(seg, reserved, &tmp, 1431 &saddr, &naddr, eaddr); 1432 if (saddr != naddr) { 1433 n++; 1434 /* 1435 * prnsegs() was formerly designated to return 1436 * an 'int' despite having no ability or use 1437 * for negative results. As part of changing 1438 * it to 'uint_t', keep the old effective limit 1439 * of INT_MAX in place. 1440 */ 1441 if (n == INT_MAX) { 1442 pr_getprot_done(&tmp); 1443 ASSERT(tmp == NULL); 1444 return (n); 1445 } 1446 } 1447 } 1448 1449 ASSERT(tmp == NULL); 1450 } 1451 1452 return (n); 1453 } 1454 1455 /* 1456 * Convert uint32_t to decimal string w/o leading zeros. 1457 * Add trailing null characters if 'len' is greater than string length. 1458 * Return the string length. 1459 */ 1460 int 1461 pr_u32tos(uint32_t n, char *s, int len) 1462 { 1463 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */ 1464 char *cp = cbuf; 1465 char *end = s + len; 1466 1467 do { 1468 *cp++ = (char)(n % 10 + '0'); 1469 n /= 10; 1470 } while (n); 1471 1472 len = (int)(cp - cbuf); 1473 1474 do { 1475 *s++ = *--cp; 1476 } while (cp > cbuf); 1477 1478 while (s < end) /* optional pad */ 1479 *s++ = '\0'; 1480 1481 return (len); 1482 } 1483 1484 /* 1485 * Convert uint64_t to decimal string w/o leading zeros. 1486 * Return the string length. 1487 */ 1488 static int 1489 pr_u64tos(uint64_t n, char *s) 1490 { 1491 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */ 1492 char *cp = cbuf; 1493 int len; 1494 1495 do { 1496 *cp++ = (char)(n % 10 + '0'); 1497 n /= 10; 1498 } while (n); 1499 1500 len = (int)(cp - cbuf); 1501 1502 do { 1503 *s++ = *--cp; 1504 } while (cp > cbuf); 1505 1506 return (len); 1507 } 1508 1509 /* 1510 * Similar to getf() / getf_gen(), but for the specified process. On success, 1511 * returns the fp with fp->f_count incremented. The caller MUST call 1512 * closef(fp) on the returned fp after completing any actions using that fp. 1513 * We return a reference-held (fp->f_count bumped) file_t so no other closef() 1514 * can invoke destructive VOP_CLOSE actions while we're inspecting the 1515 * process's FD. 1516 * 1517 * Returns NULL for errors: either an empty process-table slot post-fi_lock 1518 * and UF_ENTER, or too many mutex_tryenter() failures on the file_t's f_tlock. 1519 * Both failure modes have DTrace probes. 1520 * 1521 * The current design of the procfs "close" code path uses the following lock 1522 * order of: 1523 * 1524 * 1: (file_t) f_tlock 1525 * 2: (proc_t) p_lock AND setting p->p_proc_flag's P_PR_LOCK 1526 * 1527 * That happens because closef() holds f_tlock while calling fop_close(), 1528 * which can be prclose(), which currently waits on and sets P_PR_LOCK at its 1529 * beginning. 1530 * 1531 * That lock order creates a challenge for pr_getf, which needs to take those 1532 * locks in the opposite order when the fd points to a procfs file descriptor. 1533 * The solution chosen here is to use mutex_tryenter on f_tlock and retry some 1534 * (limited) number of times, failing if we don't get both locks. 1535 * 1536 * The cases where this can fail are rare, and all involve a procfs caller 1537 * asking for info (eg. FDINFO) on another procfs FD. In these cases, 1538 * returning EBADF (which results from a NULL return from pr_getf()) is 1539 * acceptable. 1540 * 1541 * One can increase the number of tries in pr_getf_maxtries if one is worried 1542 * about the contentuous case. 1543 */ 1544 1545 uint64_t pr_getf_tryfails; /* Bumped for statistic purposes. */ 1546 int pr_getf_maxtries = 3; /* So you can tune it from /etc/system */ 1547 1548 file_t * 1549 pr_getf(proc_t *p, uint_t fd, short *flag) 1550 { 1551 uf_entry_t *ufp; 1552 uf_info_t *fip; 1553 file_t *fp; 1554 int tries = 0; 1555 1556 ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK)); 1557 1558 retry: 1559 fip = P_FINFO(p); 1560 1561 if (fd >= fip->fi_nfiles) 1562 return (NULL); 1563 1564 mutex_exit(&p->p_lock); 1565 mutex_enter(&fip->fi_lock); 1566 UF_ENTER(ufp, fip, fd); 1567 if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) { 1568 if (mutex_tryenter(&fp->f_tlock)) { 1569 ASSERT(fp->f_count > 0); 1570 fp->f_count++; 1571 mutex_exit(&fp->f_tlock); 1572 if (flag != NULL) 1573 *flag = ufp->uf_flag; 1574 } else { 1575 /* 1576 * Note the number of mutex_trylock attempts. 1577 * 1578 * The exit path will catch this and try again if we 1579 * are below the retry threshhold (pr_getf_maxtries). 1580 */ 1581 tries++; 1582 pr_getf_tryfails++; 1583 /* 1584 * If we hit pr_getf_maxtries, we'll return NULL. 1585 * DTrace scripts looking for this sort of failure 1586 * should check when arg1 is pr_getf_maxtries. 1587 */ 1588 DTRACE_PROBE2(pr_getf_tryfail, file_t *, fp, int, 1589 tries); 1590 fp = NULL; 1591 } 1592 } else { 1593 fp = NULL; 1594 /* If we fail here, someone else closed this FD. */ 1595 DTRACE_PROBE1(pr_getf_emptyslot, int, tries); 1596 tries = pr_getf_maxtries; /* Don't bother retrying. */ 1597 } 1598 UF_EXIT(ufp); 1599 mutex_exit(&fip->fi_lock); 1600 mutex_enter(&p->p_lock); 1601 1602 /* Use goto instead of tail-recursion so we can keep "tries" around. */ 1603 if (fp == NULL) { 1604 /* "tries" starts at 1. */ 1605 if (tries < pr_getf_maxtries) 1606 goto retry; 1607 } else { 1608 /* 1609 * Probes here will detect successes after arg1's number of 1610 * mutex_tryenter() calls. 1611 */ 1612 DTRACE_PROBE2(pr_getf_trysuccess, file_t *, fp, int, tries + 1); 1613 } 1614 1615 return (fp); 1616 } 1617 1618 void 1619 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr) 1620 { 1621 char *s = name; 1622 struct vfs *vfsp; 1623 struct vfssw *vfsswp; 1624 1625 if ((vfsp = vp->v_vfsp) != NULL && 1626 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) && 1627 *vfsswp->vsw_name) { 1628 (void) strcpy(s, vfsswp->vsw_name); 1629 s += strlen(s); 1630 *s++ = '.'; 1631 } 1632 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0); 1633 *s++ = '.'; 1634 s += pr_u32tos(getminor(vattr->va_fsid), s, 0); 1635 *s++ = '.'; 1636 s += pr_u64tos(vattr->va_nodeid, s); 1637 *s++ = '\0'; 1638 } 1639 1640 struct seg * 1641 break_seg(proc_t *p) 1642 { 1643 caddr_t addr = p->p_brkbase; 1644 struct seg *seg; 1645 struct vnode *vp; 1646 1647 if (p->p_brksize != 0) 1648 addr += p->p_brksize - 1; 1649 seg = as_segat(p->p_as, addr); 1650 if (seg != NULL && seg->s_ops == &segvn_ops && 1651 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)) 1652 return (seg); 1653 return (NULL); 1654 } 1655 1656 /* 1657 * Implementation of service functions to handle procfs generic chained 1658 * copyout buffers. 1659 */ 1660 typedef struct pr_iobuf_list { 1661 list_node_t piol_link; /* buffer linkage */ 1662 size_t piol_size; /* total size (header + data) */ 1663 size_t piol_usedsize; /* amount to copy out from this buf */ 1664 } piol_t; 1665 1666 #define MAPSIZE (64 * 1024) 1667 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1])) 1668 1669 void 1670 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n) 1671 { 1672 piol_t *iol; 1673 size_t initial_size = MIN(1, n) * itemsize; 1674 1675 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link)); 1676 1677 ASSERT(list_head(iolhead) == NULL); 1678 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1679 ASSERT(initial_size > 0); 1680 1681 /* 1682 * Someone creating chained copyout buffers may ask for less than 1683 * MAPSIZE if the amount of data to be buffered is known to be 1684 * smaller than that. 1685 * But in order to prevent involuntary self-denial of service, 1686 * the requested input size is clamped at MAPSIZE. 1687 */ 1688 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol)); 1689 iol = kmem_alloc(initial_size, KM_SLEEP); 1690 list_insert_head(iolhead, iol); 1691 iol->piol_usedsize = 0; 1692 iol->piol_size = initial_size; 1693 } 1694 1695 void * 1696 pr_iol_newbuf(list_t *iolhead, size_t itemsize) 1697 { 1698 piol_t *iol; 1699 char *new; 1700 1701 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1702 ASSERT(list_head(iolhead) != NULL); 1703 1704 iol = (piol_t *)list_tail(iolhead); 1705 1706 if (iol->piol_size < 1707 iol->piol_usedsize + sizeof (*iol) + itemsize) { 1708 /* 1709 * Out of space in the current buffer. Allocate more. 1710 */ 1711 piol_t *newiol; 1712 1713 newiol = kmem_alloc(MAPSIZE, KM_SLEEP); 1714 newiol->piol_size = MAPSIZE; 1715 newiol->piol_usedsize = 0; 1716 1717 list_insert_after(iolhead, iol, newiol); 1718 iol = list_next(iolhead, iol); 1719 ASSERT(iol == newiol); 1720 } 1721 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize; 1722 iol->piol_usedsize += itemsize; 1723 bzero(new, itemsize); 1724 return (new); 1725 } 1726 1727 void 1728 pr_iol_freelist(list_t *iolhead) 1729 { 1730 piol_t *iol; 1731 1732 while ((iol = list_head(iolhead)) != NULL) { 1733 list_remove(iolhead, iol); 1734 kmem_free(iol, iol->piol_size); 1735 } 1736 list_destroy(iolhead); 1737 } 1738 1739 int 1740 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin) 1741 { 1742 int error = errin; 1743 piol_t *iol; 1744 1745 while ((iol = list_head(iolhead)) != NULL) { 1746 list_remove(iolhead, iol); 1747 if (!error) { 1748 if (copyout(PIOL_DATABUF(iol), *tgt, 1749 iol->piol_usedsize)) 1750 error = EFAULT; 1751 *tgt += iol->piol_usedsize; 1752 } 1753 kmem_free(iol, iol->piol_size); 1754 } 1755 list_destroy(iolhead); 1756 1757 return (error); 1758 } 1759 1760 int 1761 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin) 1762 { 1763 offset_t off = uiop->uio_offset; 1764 char *base; 1765 size_t size; 1766 piol_t *iol; 1767 int error = errin; 1768 1769 while ((iol = list_head(iolhead)) != NULL) { 1770 list_remove(iolhead, iol); 1771 base = PIOL_DATABUF(iol); 1772 size = iol->piol_usedsize; 1773 if (off <= size && error == 0 && uiop->uio_resid > 0) 1774 error = uiomove(base + off, size - off, 1775 UIO_READ, uiop); 1776 off = MAX(0, off - (offset_t)size); 1777 kmem_free(iol, iol->piol_size); 1778 } 1779 list_destroy(iolhead); 1780 1781 return (error); 1782 } 1783 1784 /* 1785 * Return an array of structures with memory map information. 1786 * We allocate here; the caller must deallocate. 1787 */ 1788 int 1789 prgetmap(proc_t *p, int reserved, list_t *iolhead) 1790 { 1791 struct as *as = p->p_as; 1792 prmap_t *mp; 1793 struct seg *seg; 1794 struct seg *brkseg, *stkseg; 1795 struct vnode *vp; 1796 struct vattr vattr; 1797 uint_t prot; 1798 1799 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1800 1801 /* 1802 * Request an initial buffer size that doesn't waste memory 1803 * if the address space has only a small number of segments. 1804 */ 1805 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1806 1807 if ((seg = AS_SEGFIRST(as)) == NULL) 1808 return (0); 1809 1810 brkseg = break_seg(p); 1811 stkseg = as_segat(as, prgetstackbase(p)); 1812 1813 do { 1814 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1815 caddr_t saddr, naddr; 1816 void *tmp = NULL; 1817 1818 if ((seg->s_flags & S_HOLE) != 0) { 1819 continue; 1820 } 1821 1822 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1823 prot = pr_getprot(seg, reserved, &tmp, 1824 &saddr, &naddr, eaddr); 1825 if (saddr == naddr) 1826 continue; 1827 1828 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1829 1830 mp->pr_vaddr = (uintptr_t)saddr; 1831 mp->pr_size = naddr - saddr; 1832 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1833 mp->pr_mflags = 0; 1834 if (prot & PROT_READ) 1835 mp->pr_mflags |= MA_READ; 1836 if (prot & PROT_WRITE) 1837 mp->pr_mflags |= MA_WRITE; 1838 if (prot & PROT_EXEC) 1839 mp->pr_mflags |= MA_EXEC; 1840 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1841 mp->pr_mflags |= MA_SHARED; 1842 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1843 mp->pr_mflags |= MA_NORESERVE; 1844 if (seg->s_ops == &segspt_shmops || 1845 (seg->s_ops == &segvn_ops && 1846 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1847 mp->pr_mflags |= MA_ANON; 1848 if (seg == brkseg) 1849 mp->pr_mflags |= MA_BREAK; 1850 else if (seg == stkseg) { 1851 mp->pr_mflags |= MA_STACK; 1852 if (reserved) { 1853 size_t maxstack = 1854 ((size_t)p->p_stk_ctl + 1855 PAGEOFFSET) & PAGEMASK; 1856 mp->pr_vaddr = 1857 (uintptr_t)prgetstackbase(p) + 1858 p->p_stksize - maxstack; 1859 mp->pr_size = (uintptr_t)naddr - 1860 mp->pr_vaddr; 1861 } 1862 } 1863 if (seg->s_ops == &segspt_shmops) 1864 mp->pr_mflags |= MA_ISM | MA_SHM; 1865 mp->pr_pagesize = PAGESIZE; 1866 1867 /* 1868 * Manufacture a filename for the "object" directory. 1869 */ 1870 vattr.va_mask = AT_FSID|AT_NODEID; 1871 if (seg->s_ops == &segvn_ops && 1872 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1873 vp != NULL && vp->v_type == VREG && 1874 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 1875 if (vp == p->p_exec) 1876 (void) strcpy(mp->pr_mapname, "a.out"); 1877 else 1878 pr_object_name(mp->pr_mapname, 1879 vp, &vattr); 1880 } 1881 1882 /* 1883 * Get the SysV shared memory id, if any. 1884 */ 1885 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1886 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1887 SHMID_NONE) { 1888 if (mp->pr_shmid == SHMID_FREE) 1889 mp->pr_shmid = -1; 1890 1891 mp->pr_mflags |= MA_SHM; 1892 } else { 1893 mp->pr_shmid = -1; 1894 } 1895 } 1896 ASSERT(tmp == NULL); 1897 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1898 1899 return (0); 1900 } 1901 1902 #ifdef _SYSCALL32_IMPL 1903 int 1904 prgetmap32(proc_t *p, int reserved, list_t *iolhead) 1905 { 1906 struct as *as = p->p_as; 1907 prmap32_t *mp; 1908 struct seg *seg; 1909 struct seg *brkseg, *stkseg; 1910 struct vnode *vp; 1911 struct vattr vattr; 1912 uint_t prot; 1913 1914 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1915 1916 /* 1917 * Request an initial buffer size that doesn't waste memory 1918 * if the address space has only a small number of segments. 1919 */ 1920 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1921 1922 if ((seg = AS_SEGFIRST(as)) == NULL) 1923 return (0); 1924 1925 brkseg = break_seg(p); 1926 stkseg = as_segat(as, prgetstackbase(p)); 1927 1928 do { 1929 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1930 caddr_t saddr, naddr; 1931 void *tmp = NULL; 1932 1933 if ((seg->s_flags & S_HOLE) != 0) { 1934 continue; 1935 } 1936 1937 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1938 prot = pr_getprot(seg, reserved, &tmp, 1939 &saddr, &naddr, eaddr); 1940 if (saddr == naddr) 1941 continue; 1942 1943 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1944 1945 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 1946 mp->pr_size = (size32_t)(naddr - saddr); 1947 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1948 mp->pr_mflags = 0; 1949 if (prot & PROT_READ) 1950 mp->pr_mflags |= MA_READ; 1951 if (prot & PROT_WRITE) 1952 mp->pr_mflags |= MA_WRITE; 1953 if (prot & PROT_EXEC) 1954 mp->pr_mflags |= MA_EXEC; 1955 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1956 mp->pr_mflags |= MA_SHARED; 1957 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1958 mp->pr_mflags |= MA_NORESERVE; 1959 if (seg->s_ops == &segspt_shmops || 1960 (seg->s_ops == &segvn_ops && 1961 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1962 mp->pr_mflags |= MA_ANON; 1963 if (seg == brkseg) 1964 mp->pr_mflags |= MA_BREAK; 1965 else if (seg == stkseg) { 1966 mp->pr_mflags |= MA_STACK; 1967 if (reserved) { 1968 size_t maxstack = 1969 ((size_t)p->p_stk_ctl + 1970 PAGEOFFSET) & PAGEMASK; 1971 uintptr_t vaddr = 1972 (uintptr_t)prgetstackbase(p) + 1973 p->p_stksize - maxstack; 1974 mp->pr_vaddr = (caddr32_t)vaddr; 1975 mp->pr_size = (size32_t) 1976 ((uintptr_t)naddr - vaddr); 1977 } 1978 } 1979 if (seg->s_ops == &segspt_shmops) 1980 mp->pr_mflags |= MA_ISM | MA_SHM; 1981 mp->pr_pagesize = PAGESIZE; 1982 1983 /* 1984 * Manufacture a filename for the "object" directory. 1985 */ 1986 vattr.va_mask = AT_FSID|AT_NODEID; 1987 if (seg->s_ops == &segvn_ops && 1988 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1989 vp != NULL && vp->v_type == VREG && 1990 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 1991 if (vp == p->p_exec) 1992 (void) strcpy(mp->pr_mapname, "a.out"); 1993 else 1994 pr_object_name(mp->pr_mapname, 1995 vp, &vattr); 1996 } 1997 1998 /* 1999 * Get the SysV shared memory id, if any. 2000 */ 2001 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 2002 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 2003 SHMID_NONE) { 2004 if (mp->pr_shmid == SHMID_FREE) 2005 mp->pr_shmid = -1; 2006 2007 mp->pr_mflags |= MA_SHM; 2008 } else { 2009 mp->pr_shmid = -1; 2010 } 2011 } 2012 ASSERT(tmp == NULL); 2013 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2014 2015 return (0); 2016 } 2017 #endif /* _SYSCALL32_IMPL */ 2018 2019 /* 2020 * Return the size of the /proc page data file. 2021 */ 2022 size_t 2023 prpdsize(struct as *as) 2024 { 2025 struct seg *seg; 2026 size_t size; 2027 2028 ASSERT(as != &kas && AS_WRITE_HELD(as)); 2029 2030 if ((seg = AS_SEGFIRST(as)) == NULL) 2031 return (0); 2032 2033 size = sizeof (prpageheader_t); 2034 do { 2035 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2036 caddr_t saddr, naddr; 2037 void *tmp = NULL; 2038 size_t npage; 2039 2040 if ((seg->s_flags & S_HOLE) != 0) { 2041 continue; 2042 } 2043 2044 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2045 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2046 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 2047 size += sizeof (prasmap_t) + round8(npage); 2048 } 2049 ASSERT(tmp == NULL); 2050 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2051 2052 return (size); 2053 } 2054 2055 #ifdef _SYSCALL32_IMPL 2056 size_t 2057 prpdsize32(struct as *as) 2058 { 2059 struct seg *seg; 2060 size_t size; 2061 2062 ASSERT(as != &kas && AS_WRITE_HELD(as)); 2063 2064 if ((seg = AS_SEGFIRST(as)) == NULL) 2065 return (0); 2066 2067 size = sizeof (prpageheader32_t); 2068 do { 2069 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2070 caddr_t saddr, naddr; 2071 void *tmp = NULL; 2072 size_t npage; 2073 2074 if ((seg->s_flags & S_HOLE) != 0) { 2075 continue; 2076 } 2077 2078 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2079 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2080 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 2081 size += sizeof (prasmap32_t) + round8(npage); 2082 } 2083 ASSERT(tmp == NULL); 2084 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2085 2086 return (size); 2087 } 2088 #endif /* _SYSCALL32_IMPL */ 2089 2090 /* 2091 * Read page data information. 2092 */ 2093 int 2094 prpdread(proc_t *p, uint_t hatid, struct uio *uiop) 2095 { 2096 struct as *as = p->p_as; 2097 caddr_t buf; 2098 size_t size; 2099 prpageheader_t *php; 2100 prasmap_t *pmp; 2101 struct seg *seg; 2102 int error; 2103 2104 again: 2105 AS_LOCK_ENTER(as, RW_WRITER); 2106 2107 if ((seg = AS_SEGFIRST(as)) == NULL) { 2108 AS_LOCK_EXIT(as); 2109 return (0); 2110 } 2111 size = prpdsize(as); 2112 if (uiop->uio_resid < size) { 2113 AS_LOCK_EXIT(as); 2114 return (E2BIG); 2115 } 2116 2117 buf = kmem_zalloc(size, KM_SLEEP); 2118 php = (prpageheader_t *)buf; 2119 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t)); 2120 2121 hrt2ts(gethrtime(), &php->pr_tstamp); 2122 php->pr_nmap = 0; 2123 php->pr_npage = 0; 2124 do { 2125 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2126 caddr_t saddr, naddr; 2127 void *tmp = NULL; 2128 2129 if ((seg->s_flags & S_HOLE) != 0) { 2130 continue; 2131 } 2132 2133 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2134 struct vnode *vp; 2135 struct vattr vattr; 2136 size_t len; 2137 size_t npage; 2138 uint_t prot; 2139 uintptr_t next; 2140 2141 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2142 if ((len = (size_t)(naddr - saddr)) == 0) 2143 continue; 2144 npage = len / PAGESIZE; 2145 next = (uintptr_t)(pmp + 1) + round8(npage); 2146 /* 2147 * It's possible that the address space can change 2148 * subtlely even though we're holding as->a_lock 2149 * due to the nondeterminism of page_exists() in 2150 * the presence of asychronously flushed pages or 2151 * mapped files whose sizes are changing. 2152 * page_exists() may be called indirectly from 2153 * pr_getprot() by a SEGOP_INCORE() routine. 2154 * If this happens we need to make sure we don't 2155 * overrun the buffer whose size we computed based 2156 * on the initial iteration through the segments. 2157 * Once we've detected an overflow, we need to clean 2158 * up the temporary memory allocated in pr_getprot() 2159 * and retry. If there's a pending signal, we return 2160 * EINTR so that this thread can be dislodged if 2161 * a latent bug causes us to spin indefinitely. 2162 */ 2163 if (next > (uintptr_t)buf + size) { 2164 pr_getprot_done(&tmp); 2165 AS_LOCK_EXIT(as); 2166 2167 kmem_free(buf, size); 2168 2169 if (ISSIG(curthread, JUSTLOOKING)) 2170 return (EINTR); 2171 2172 goto again; 2173 } 2174 2175 php->pr_nmap++; 2176 php->pr_npage += npage; 2177 pmp->pr_vaddr = (uintptr_t)saddr; 2178 pmp->pr_npage = npage; 2179 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2180 pmp->pr_mflags = 0; 2181 if (prot & PROT_READ) 2182 pmp->pr_mflags |= MA_READ; 2183 if (prot & PROT_WRITE) 2184 pmp->pr_mflags |= MA_WRITE; 2185 if (prot & PROT_EXEC) 2186 pmp->pr_mflags |= MA_EXEC; 2187 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2188 pmp->pr_mflags |= MA_SHARED; 2189 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2190 pmp->pr_mflags |= MA_NORESERVE; 2191 if (seg->s_ops == &segspt_shmops || 2192 (seg->s_ops == &segvn_ops && 2193 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2194 pmp->pr_mflags |= MA_ANON; 2195 if (seg->s_ops == &segspt_shmops) 2196 pmp->pr_mflags |= MA_ISM | MA_SHM; 2197 pmp->pr_pagesize = PAGESIZE; 2198 /* 2199 * Manufacture a filename for the "object" directory. 2200 */ 2201 vattr.va_mask = AT_FSID|AT_NODEID; 2202 if (seg->s_ops == &segvn_ops && 2203 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2204 vp != NULL && vp->v_type == VREG && 2205 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2206 if (vp == p->p_exec) 2207 (void) strcpy(pmp->pr_mapname, "a.out"); 2208 else 2209 pr_object_name(pmp->pr_mapname, 2210 vp, &vattr); 2211 } 2212 2213 /* 2214 * Get the SysV shared memory id, if any. 2215 */ 2216 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2217 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2218 SHMID_NONE) { 2219 if (pmp->pr_shmid == SHMID_FREE) 2220 pmp->pr_shmid = -1; 2221 2222 pmp->pr_mflags |= MA_SHM; 2223 } else { 2224 pmp->pr_shmid = -1; 2225 } 2226 2227 hat_getstat(as, saddr, len, hatid, 2228 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2229 pmp = (prasmap_t *)next; 2230 } 2231 ASSERT(tmp == NULL); 2232 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2233 2234 AS_LOCK_EXIT(as); 2235 2236 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2237 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2238 kmem_free(buf, size); 2239 2240 return (error); 2241 } 2242 2243 #ifdef _SYSCALL32_IMPL 2244 int 2245 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop) 2246 { 2247 struct as *as = p->p_as; 2248 caddr_t buf; 2249 size_t size; 2250 prpageheader32_t *php; 2251 prasmap32_t *pmp; 2252 struct seg *seg; 2253 int error; 2254 2255 again: 2256 AS_LOCK_ENTER(as, RW_WRITER); 2257 2258 if ((seg = AS_SEGFIRST(as)) == NULL) { 2259 AS_LOCK_EXIT(as); 2260 return (0); 2261 } 2262 size = prpdsize32(as); 2263 if (uiop->uio_resid < size) { 2264 AS_LOCK_EXIT(as); 2265 return (E2BIG); 2266 } 2267 2268 buf = kmem_zalloc(size, KM_SLEEP); 2269 php = (prpageheader32_t *)buf; 2270 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t)); 2271 2272 hrt2ts32(gethrtime(), &php->pr_tstamp); 2273 php->pr_nmap = 0; 2274 php->pr_npage = 0; 2275 do { 2276 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2277 caddr_t saddr, naddr; 2278 void *tmp = NULL; 2279 2280 if ((seg->s_flags & S_HOLE) != 0) { 2281 continue; 2282 } 2283 2284 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2285 struct vnode *vp; 2286 struct vattr vattr; 2287 size_t len; 2288 size_t npage; 2289 uint_t prot; 2290 uintptr_t next; 2291 2292 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2293 if ((len = (size_t)(naddr - saddr)) == 0) 2294 continue; 2295 npage = len / PAGESIZE; 2296 next = (uintptr_t)(pmp + 1) + round8(npage); 2297 /* 2298 * It's possible that the address space can change 2299 * subtlely even though we're holding as->a_lock 2300 * due to the nondeterminism of page_exists() in 2301 * the presence of asychronously flushed pages or 2302 * mapped files whose sizes are changing. 2303 * page_exists() may be called indirectly from 2304 * pr_getprot() by a SEGOP_INCORE() routine. 2305 * If this happens we need to make sure we don't 2306 * overrun the buffer whose size we computed based 2307 * on the initial iteration through the segments. 2308 * Once we've detected an overflow, we need to clean 2309 * up the temporary memory allocated in pr_getprot() 2310 * and retry. If there's a pending signal, we return 2311 * EINTR so that this thread can be dislodged if 2312 * a latent bug causes us to spin indefinitely. 2313 */ 2314 if (next > (uintptr_t)buf + size) { 2315 pr_getprot_done(&tmp); 2316 AS_LOCK_EXIT(as); 2317 2318 kmem_free(buf, size); 2319 2320 if (ISSIG(curthread, JUSTLOOKING)) 2321 return (EINTR); 2322 2323 goto again; 2324 } 2325 2326 php->pr_nmap++; 2327 php->pr_npage += npage; 2328 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 2329 pmp->pr_npage = (size32_t)npage; 2330 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2331 pmp->pr_mflags = 0; 2332 if (prot & PROT_READ) 2333 pmp->pr_mflags |= MA_READ; 2334 if (prot & PROT_WRITE) 2335 pmp->pr_mflags |= MA_WRITE; 2336 if (prot & PROT_EXEC) 2337 pmp->pr_mflags |= MA_EXEC; 2338 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2339 pmp->pr_mflags |= MA_SHARED; 2340 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2341 pmp->pr_mflags |= MA_NORESERVE; 2342 if (seg->s_ops == &segspt_shmops || 2343 (seg->s_ops == &segvn_ops && 2344 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2345 pmp->pr_mflags |= MA_ANON; 2346 if (seg->s_ops == &segspt_shmops) 2347 pmp->pr_mflags |= MA_ISM | MA_SHM; 2348 pmp->pr_pagesize = PAGESIZE; 2349 /* 2350 * Manufacture a filename for the "object" directory. 2351 */ 2352 vattr.va_mask = AT_FSID|AT_NODEID; 2353 if (seg->s_ops == &segvn_ops && 2354 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2355 vp != NULL && vp->v_type == VREG && 2356 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2357 if (vp == p->p_exec) 2358 (void) strcpy(pmp->pr_mapname, "a.out"); 2359 else 2360 pr_object_name(pmp->pr_mapname, 2361 vp, &vattr); 2362 } 2363 2364 /* 2365 * Get the SysV shared memory id, if any. 2366 */ 2367 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2368 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2369 SHMID_NONE) { 2370 if (pmp->pr_shmid == SHMID_FREE) 2371 pmp->pr_shmid = -1; 2372 2373 pmp->pr_mflags |= MA_SHM; 2374 } else { 2375 pmp->pr_shmid = -1; 2376 } 2377 2378 hat_getstat(as, saddr, len, hatid, 2379 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2380 pmp = (prasmap32_t *)next; 2381 } 2382 ASSERT(tmp == NULL); 2383 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2384 2385 AS_LOCK_EXIT(as); 2386 2387 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2388 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2389 kmem_free(buf, size); 2390 2391 return (error); 2392 } 2393 #endif /* _SYSCALL32_IMPL */ 2394 2395 ushort_t 2396 prgetpctcpu(uint64_t pct) 2397 { 2398 /* 2399 * The value returned will be relevant in the zone of the examiner, 2400 * which may not be the same as the zone which performed the procfs 2401 * mount. 2402 */ 2403 int nonline = zone_ncpus_online_get(curproc->p_zone); 2404 2405 /* 2406 * Prorate over online cpus so we don't exceed 100% 2407 */ 2408 if (nonline > 1) 2409 pct /= nonline; 2410 pct >>= 16; /* convert to 16-bit scaled integer */ 2411 if (pct > 0x8000) /* might happen, due to rounding */ 2412 pct = 0x8000; 2413 return ((ushort_t)pct); 2414 } 2415 2416 /* 2417 * Return information used by ps(1). 2418 */ 2419 void 2420 prgetpsinfo(proc_t *p, psinfo_t *psp) 2421 { 2422 kthread_t *t; 2423 struct cred *cred; 2424 hrtime_t hrutime, hrstime; 2425 2426 ASSERT(MUTEX_HELD(&p->p_lock)); 2427 2428 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2429 bzero(psp, sizeof (*psp)); 2430 else { 2431 thread_unlock(t); 2432 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2433 } 2434 2435 /* 2436 * only export SSYS and SMSACCT; everything else is off-limits to 2437 * userland apps. 2438 */ 2439 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2440 psp->pr_nlwp = p->p_lwpcnt; 2441 psp->pr_nzomb = p->p_zombcnt; 2442 mutex_enter(&p->p_crlock); 2443 cred = p->p_cred; 2444 psp->pr_uid = crgetruid(cred); 2445 psp->pr_euid = crgetuid(cred); 2446 psp->pr_gid = crgetrgid(cred); 2447 psp->pr_egid = crgetgid(cred); 2448 mutex_exit(&p->p_crlock); 2449 psp->pr_pid = p->p_pid; 2450 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2451 (p->p_flag & SZONETOP)) { 2452 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2453 /* 2454 * Inside local zones, fake zsched's pid as parent pids for 2455 * processes which reference processes outside of the zone. 2456 */ 2457 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2458 } else { 2459 psp->pr_ppid = p->p_ppid; 2460 } 2461 psp->pr_pgid = p->p_pgrp; 2462 psp->pr_sid = p->p_sessp->s_sid; 2463 psp->pr_taskid = p->p_task->tk_tkid; 2464 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2465 psp->pr_poolid = p->p_pool->pool_id; 2466 psp->pr_zoneid = p->p_zone->zone_id; 2467 if ((psp->pr_contract = PRCTID(p)) == 0) 2468 psp->pr_contract = -1; 2469 psp->pr_addr = (uintptr_t)prgetpsaddr(p); 2470 switch (p->p_model) { 2471 case DATAMODEL_ILP32: 2472 psp->pr_dmodel = PR_MODEL_ILP32; 2473 break; 2474 case DATAMODEL_LP64: 2475 psp->pr_dmodel = PR_MODEL_LP64; 2476 break; 2477 } 2478 hrutime = mstate_aggr_state(p, LMS_USER); 2479 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2480 hrt2ts((hrutime + hrstime), &psp->pr_time); 2481 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2482 2483 if (t == NULL) { 2484 int wcode = p->p_wcode; /* must be atomic read */ 2485 2486 if (wcode) 2487 psp->pr_wstat = wstat(wcode, p->p_wdata); 2488 psp->pr_ttydev = PRNODEV; 2489 psp->pr_lwp.pr_state = SZOMB; 2490 psp->pr_lwp.pr_sname = 'Z'; 2491 psp->pr_lwp.pr_bindpro = PBIND_NONE; 2492 psp->pr_lwp.pr_bindpset = PS_NONE; 2493 } else { 2494 user_t *up = PTOU(p); 2495 struct as *as; 2496 dev_t d; 2497 extern dev_t rwsconsdev, rconsdev, uconsdev; 2498 2499 d = cttydev(p); 2500 /* 2501 * If the controlling terminal is the real 2502 * or workstation console device, map to what the 2503 * user thinks is the console device. Handle case when 2504 * rwsconsdev or rconsdev is set to NODEV for Starfire. 2505 */ 2506 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 2507 d = uconsdev; 2508 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d; 2509 psp->pr_start = up->u_start; 2510 bcopy(up->u_comm, psp->pr_fname, 2511 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2512 bcopy(up->u_psargs, psp->pr_psargs, 2513 MIN(PRARGSZ-1, PSARGSZ)); 2514 psp->pr_argc = up->u_argc; 2515 psp->pr_argv = up->u_argv; 2516 psp->pr_envp = up->u_envp; 2517 2518 /* get the chosen lwp's lwpsinfo */ 2519 prgetlwpsinfo(t, &psp->pr_lwp); 2520 2521 /* compute %cpu for the process */ 2522 if (p->p_lwpcnt == 1) 2523 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2524 else { 2525 uint64_t pct = 0; 2526 hrtime_t cur_time = gethrtime_unscaled(); 2527 2528 t = p->p_tlist; 2529 do { 2530 pct += cpu_update_pct(t, cur_time); 2531 } while ((t = t->t_forw) != p->p_tlist); 2532 2533 psp->pr_pctcpu = prgetpctcpu(pct); 2534 } 2535 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2536 psp->pr_size = 0; 2537 psp->pr_rssize = 0; 2538 } else { 2539 mutex_exit(&p->p_lock); 2540 AS_LOCK_ENTER(as, RW_READER); 2541 psp->pr_size = btopr(as->a_resvsize) * 2542 (PAGESIZE / 1024); 2543 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024); 2544 psp->pr_pctmem = rm_pctmemory(as); 2545 AS_LOCK_EXIT(as); 2546 mutex_enter(&p->p_lock); 2547 } 2548 } 2549 } 2550 2551 static size_t 2552 prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen) 2553 { 2554 pr_misc_header_t *misc; 2555 size_t len; 2556 2557 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen); 2558 2559 if (data != NULL) { 2560 misc = pr_iol_newbuf(data, len); 2561 misc->pr_misc_type = type; 2562 misc->pr_misc_size = len; 2563 misc++; 2564 bcopy((char *)val, (char *)misc, vlen); 2565 } 2566 2567 return (len); 2568 } 2569 2570 /* 2571 * There's no elegant way to determine if a character device 2572 * supports TLI, so just check a hardcoded list of known TLI 2573 * devices. 2574 */ 2575 2576 static boolean_t 2577 pristli(vnode_t *vp) 2578 { 2579 static const char *tlidevs[] = { 2580 "udp", "udp6", "tcp", "tcp6" 2581 }; 2582 char *devname; 2583 uint_t i; 2584 2585 ASSERT(vp != NULL); 2586 2587 if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0) 2588 return (B_FALSE); 2589 2590 if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL) 2591 return (B_FALSE); 2592 2593 for (i = 0; i < ARRAY_SIZE(tlidevs); i++) { 2594 if (strcmp(devname, tlidevs[i]) == 0) 2595 return (B_TRUE); 2596 } 2597 2598 return (B_FALSE); 2599 } 2600 2601 static size_t 2602 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred) 2603 { 2604 char *pathname; 2605 size_t pathlen; 2606 size_t sz = 0; 2607 2608 /* 2609 * The global zone's path to a file in a non-global zone can exceed 2610 * MAXPATHLEN. 2611 */ 2612 pathlen = MAXPATHLEN * 2 + 1; 2613 pathname = kmem_alloc(pathlen, KM_SLEEP); 2614 2615 if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) { 2616 sz += prfdinfomisc(data, PR_PATHNAME, 2617 pathname, strlen(pathname) + 1); 2618 } 2619 2620 kmem_free(pathname, pathlen); 2621 2622 return (sz); 2623 } 2624 2625 static size_t 2626 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred) 2627 { 2628 strcmd_t strcmd; 2629 int32_t rval; 2630 size_t sz = 0; 2631 2632 strcmd.sc_cmd = TI_GETMYNAME; 2633 strcmd.sc_timeout = 1; 2634 strcmd.sc_len = STRCMDBUFSIZE; 2635 2636 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred, 2637 &rval, NULL) == 0 && strcmd.sc_len > 0) { 2638 sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf, 2639 strcmd.sc_len); 2640 } 2641 2642 strcmd.sc_cmd = TI_GETPEERNAME; 2643 strcmd.sc_timeout = 1; 2644 strcmd.sc_len = STRCMDBUFSIZE; 2645 2646 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred, 2647 &rval, NULL) == 0 && strcmd.sc_len > 0) { 2648 sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf, 2649 strcmd.sc_len); 2650 } 2651 2652 return (sz); 2653 } 2654 2655 static size_t 2656 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred) 2657 { 2658 sonode_t *so; 2659 socklen_t vlen; 2660 size_t sz = 0; 2661 uint_t i; 2662 2663 if (vp->v_stream != NULL) { 2664 so = VTOSO(vp->v_stream->sd_vnode); 2665 2666 if (so->so_version == SOV_STREAM) 2667 so = NULL; 2668 } else { 2669 so = VTOSO(vp); 2670 } 2671 2672 if (so == NULL) 2673 return (0); 2674 2675 DTRACE_PROBE1(sonode, sonode_t *, so); 2676 2677 /* prmisc - PR_SOCKETNAME */ 2678 2679 struct sockaddr_storage buf; 2680 struct sockaddr *name = (struct sockaddr *)&buf; 2681 2682 vlen = sizeof (buf); 2683 if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0) 2684 sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen); 2685 2686 /* prmisc - PR_PEERSOCKNAME */ 2687 2688 vlen = sizeof (buf); 2689 if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0) 2690 sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen); 2691 2692 /* prmisc - PR_SOCKOPTS_BOOL_OPTS */ 2693 2694 static struct boolopt { 2695 int level; 2696 int opt; 2697 int bopt; 2698 } boolopts[] = { 2699 { SOL_SOCKET, SO_DEBUG, PR_SO_DEBUG }, 2700 { SOL_SOCKET, SO_REUSEADDR, PR_SO_REUSEADDR }, 2701 #ifdef SO_REUSEPORT 2702 /* SmartOS and OmniOS have SO_REUSEPORT */ 2703 { SOL_SOCKET, SO_REUSEPORT, PR_SO_REUSEPORT }, 2704 #endif 2705 { SOL_SOCKET, SO_KEEPALIVE, PR_SO_KEEPALIVE }, 2706 { SOL_SOCKET, SO_DONTROUTE, PR_SO_DONTROUTE }, 2707 { SOL_SOCKET, SO_BROADCAST, PR_SO_BROADCAST }, 2708 { SOL_SOCKET, SO_OOBINLINE, PR_SO_OOBINLINE }, 2709 { SOL_SOCKET, SO_DGRAM_ERRIND, PR_SO_DGRAM_ERRIND }, 2710 { SOL_SOCKET, SO_ALLZONES, PR_SO_ALLZONES }, 2711 { SOL_SOCKET, SO_MAC_EXEMPT, PR_SO_MAC_EXEMPT }, 2712 { SOL_SOCKET, SO_MAC_IMPLICIT, PR_SO_MAC_IMPLICIT }, 2713 { SOL_SOCKET, SO_EXCLBIND, PR_SO_EXCLBIND }, 2714 { SOL_SOCKET, SO_VRRP, PR_SO_VRRP }, 2715 { IPPROTO_UDP, UDP_NAT_T_ENDPOINT, 2716 PR_UDP_NAT_T_ENDPOINT } 2717 }; 2718 prsockopts_bool_opts_t opts; 2719 int val; 2720 2721 if (data != NULL) { 2722 opts.prsock_bool_opts = 0; 2723 2724 for (i = 0; i < ARRAY_SIZE(boolopts); i++) { 2725 vlen = sizeof (val); 2726 if (SOP_GETSOCKOPT(so, boolopts[i].level, 2727 boolopts[i].opt, &val, &vlen, 0, cred) == 0 && 2728 val != 0) { 2729 opts.prsock_bool_opts |= boolopts[i].bopt; 2730 } 2731 } 2732 } 2733 2734 sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts)); 2735 2736 /* prmisc - PR_SOCKOPT_LINGER */ 2737 2738 struct linger l; 2739 2740 vlen = sizeof (l); 2741 if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen, 2742 0, cred) == 0 && vlen > 0) { 2743 sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen); 2744 } 2745 2746 /* prmisc - PR_SOCKOPT_* int types */ 2747 2748 static struct sopt { 2749 int level; 2750 int opt; 2751 int bopt; 2752 } sopts[] = { 2753 { SOL_SOCKET, SO_TYPE, PR_SOCKOPT_TYPE }, 2754 { SOL_SOCKET, SO_SNDBUF, PR_SOCKOPT_SNDBUF }, 2755 { SOL_SOCKET, SO_RCVBUF, PR_SOCKOPT_RCVBUF } 2756 }; 2757 2758 for (i = 0; i < ARRAY_SIZE(sopts); i++) { 2759 vlen = sizeof (val); 2760 if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt, 2761 &val, &vlen, 0, cred) == 0 && vlen > 0) { 2762 sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen); 2763 } 2764 } 2765 2766 /* prmisc - PR_SOCKOPT_IP_NEXTHOP */ 2767 2768 in_addr_t nexthop_val; 2769 2770 vlen = sizeof (nexthop_val); 2771 if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP, 2772 &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) { 2773 sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP, 2774 &nexthop_val, vlen); 2775 } 2776 2777 /* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */ 2778 2779 struct sockaddr_in6 nexthop6_val; 2780 2781 vlen = sizeof (nexthop6_val); 2782 if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP, 2783 &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) { 2784 sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP, 2785 &nexthop6_val, vlen); 2786 } 2787 2788 /* prmisc - PR_SOCKOPT_TCP_CONGESTION */ 2789 2790 char cong[CC_ALGO_NAME_MAX]; 2791 2792 vlen = sizeof (cong); 2793 if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION, 2794 &cong, &vlen, 0, cred) == 0 && vlen > 0) { 2795 sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen); 2796 } 2797 2798 /* prmisc - PR_SOCKFILTERS_PRIV */ 2799 2800 struct fil_info fi; 2801 2802 vlen = sizeof (fi); 2803 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST, 2804 &fi, &vlen, 0, cred) == 0 && vlen != 0) { 2805 pr_misc_header_t *misc; 2806 size_t len; 2807 2808 /* 2809 * We limit the number of returned filters to 32. 2810 * This is the maximum number that pfiles will print 2811 * anyway. 2812 */ 2813 vlen = MIN(32, fi.fi_pos + 1); 2814 vlen *= sizeof (fi); 2815 2816 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen); 2817 sz += len; 2818 2819 if (data != NULL) { 2820 /* 2821 * So that the filter list can be built incrementally, 2822 * prfdinfomisc() is not used here. Instead we 2823 * allocate a buffer directly on the copyout list using 2824 * pr_iol_newbuf() 2825 */ 2826 misc = pr_iol_newbuf(data, len); 2827 misc->pr_misc_type = PR_SOCKFILTERS_PRIV; 2828 misc->pr_misc_size = len; 2829 misc++; 2830 len = vlen; 2831 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST, 2832 misc, &vlen, 0, cred) == 0) { 2833 /* 2834 * In case the number of filters has reduced 2835 * since the first call, explicitly zero out 2836 * any unpopulated space. 2837 */ 2838 if (vlen < len) 2839 bzero(misc + vlen, len - vlen); 2840 } else { 2841 /* Something went wrong, zero out the result */ 2842 bzero(misc, vlen); 2843 } 2844 } 2845 } 2846 2847 return (sz); 2848 } 2849 2850 typedef struct prfdinfo_nm_path_cbdata { 2851 proc_t *nmp_p; 2852 u_offset_t nmp_sz; 2853 list_t *nmp_data; 2854 } prfdinfo_nm_path_cbdata_t; 2855 2856 static int 2857 prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg) 2858 { 2859 prfdinfo_nm_path_cbdata_t *cb = arg; 2860 2861 cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred); 2862 2863 return (0); 2864 } 2865 2866 u_offset_t 2867 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred) 2868 { 2869 u_offset_t sz; 2870 2871 /* 2872 * All fdinfo files will be at least this big - 2873 * sizeof fdinfo struct + zero length trailer 2874 */ 2875 sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t); 2876 2877 /* Pathname */ 2878 switch (vp->v_type) { 2879 case VDOOR: { 2880 prfdinfo_nm_path_cbdata_t cb = { 2881 .nmp_p = p, 2882 .nmp_data = NULL, 2883 .nmp_sz = 0 2884 }; 2885 2886 (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); 2887 sz += cb.nmp_sz; 2888 break; 2889 } 2890 case VSOCK: 2891 break; 2892 default: 2893 sz += prfdinfopath(p, vp, NULL, cred); 2894 } 2895 2896 /* Socket options */ 2897 if (vp->v_type == VSOCK) 2898 sz += prfdinfosockopt(vp, NULL, cred); 2899 2900 /* TLI/XTI sockets */ 2901 if (pristli(vp)) 2902 sz += prfdinfotlisockopt(vp, NULL, cred); 2903 2904 return (sz); 2905 } 2906 2907 int 2908 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred, 2909 cred_t *file_cred, list_t *data) 2910 { 2911 vattr_t vattr; 2912 int error; 2913 2914 /* 2915 * The buffer has been initialised to zero by pr_iol_newbuf(). 2916 * Initialise defaults for any values that should not default to zero. 2917 */ 2918 fdinfo->pr_uid = (uid_t)-1; 2919 fdinfo->pr_gid = (gid_t)-1; 2920 fdinfo->pr_size = -1; 2921 fdinfo->pr_locktype = F_UNLCK; 2922 fdinfo->pr_lockpid = -1; 2923 fdinfo->pr_locksysid = -1; 2924 fdinfo->pr_peerpid = -1; 2925 2926 /* Offset */ 2927 2928 /* 2929 * pr_offset has already been set from the underlying file_t. 2930 * Check if it is plausible and reset to -1 if not. 2931 */ 2932 if (fdinfo->pr_offset != -1 && 2933 VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0) 2934 fdinfo->pr_offset = -1; 2935 2936 /* 2937 * Attributes 2938 * 2939 * We have two cred_t structures available here. 2940 * 'cred' is the caller's credential, and 'file_cred' is the credential 2941 * for the file being inspected. 2942 * 2943 * When looking up the file attributes, file_cred is used in order 2944 * that the correct ownership is set for doors and FIFOs. Since the 2945 * caller has permission to read the fdinfo file in proc, this does 2946 * not expose any additional information. 2947 */ 2948 vattr.va_mask = AT_STAT; 2949 if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) { 2950 fdinfo->pr_major = getmajor(vattr.va_fsid); 2951 fdinfo->pr_minor = getminor(vattr.va_fsid); 2952 fdinfo->pr_rmajor = getmajor(vattr.va_rdev); 2953 fdinfo->pr_rminor = getminor(vattr.va_rdev); 2954 fdinfo->pr_ino = (ino64_t)vattr.va_nodeid; 2955 fdinfo->pr_size = (off64_t)vattr.va_size; 2956 fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode; 2957 fdinfo->pr_uid = vattr.va_uid; 2958 fdinfo->pr_gid = vattr.va_gid; 2959 if (vp->v_type == VSOCK) 2960 fdinfo->pr_fileflags |= sock_getfasync(vp); 2961 } 2962 2963 /* locks */ 2964 2965 flock64_t bf; 2966 2967 bzero(&bf, sizeof (bf)); 2968 bf.l_type = F_WRLCK; 2969 2970 if (VOP_FRLOCK(vp, F_GETLK, &bf, 2971 (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL, 2972 cred, NULL) == 0 && bf.l_type != F_UNLCK) { 2973 fdinfo->pr_locktype = bf.l_type; 2974 fdinfo->pr_lockpid = bf.l_pid; 2975 fdinfo->pr_locksysid = bf.l_sysid; 2976 } 2977 2978 /* peer cred */ 2979 2980 k_peercred_t kpc; 2981 2982 switch (vp->v_type) { 2983 case VFIFO: 2984 case VSOCK: { 2985 int32_t rval; 2986 2987 error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc, 2988 FKIOCTL, cred, &rval, NULL); 2989 break; 2990 } 2991 case VCHR: { 2992 struct strioctl strioc; 2993 int32_t rval; 2994 2995 if (vp->v_stream == NULL) { 2996 error = ENOTSUP; 2997 break; 2998 } 2999 strioc.ic_cmd = _I_GETPEERCRED; 3000 strioc.ic_timout = INFTIM; 3001 strioc.ic_len = (int)sizeof (k_peercred_t); 3002 strioc.ic_dp = (char *)&kpc; 3003 3004 error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL, 3005 STR_NOSIG | K_TO_K, cred, &rval); 3006 break; 3007 } 3008 default: 3009 error = ENOTSUP; 3010 break; 3011 } 3012 3013 if (error == 0 && kpc.pc_cr != NULL) { 3014 proc_t *peerp; 3015 3016 fdinfo->pr_peerpid = kpc.pc_cpid; 3017 3018 crfree(kpc.pc_cr); 3019 3020 mutex_enter(&pidlock); 3021 if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) { 3022 user_t *up; 3023 3024 mutex_enter(&peerp->p_lock); 3025 mutex_exit(&pidlock); 3026 3027 up = PTOU(peerp); 3028 bcopy(up->u_comm, fdinfo->pr_peername, 3029 MIN(sizeof (up->u_comm), 3030 sizeof (fdinfo->pr_peername) - 1)); 3031 3032 mutex_exit(&peerp->p_lock); 3033 } else { 3034 mutex_exit(&pidlock); 3035 } 3036 } 3037 3038 /* pathname */ 3039 3040 switch (vp->v_type) { 3041 case VDOOR: { 3042 prfdinfo_nm_path_cbdata_t cb = { 3043 .nmp_p = p, 3044 .nmp_data = data, 3045 .nmp_sz = 0 3046 }; 3047 3048 (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); 3049 break; 3050 } 3051 case VSOCK: 3052 /* 3053 * Don't attempt to determine the path for a socket as the 3054 * vnode has no associated v_path. It will cause a linear scan 3055 * of the dnlc table and result in no path being found. 3056 */ 3057 break; 3058 default: 3059 (void) prfdinfopath(p, vp, data, cred); 3060 } 3061 3062 /* socket options */ 3063 if (vp->v_type == VSOCK) 3064 (void) prfdinfosockopt(vp, data, cred); 3065 3066 /* TLI/XTI stream sockets */ 3067 if (pristli(vp)) 3068 (void) prfdinfotlisockopt(vp, data, cred); 3069 3070 /* 3071 * Add a terminating header with a zero size. 3072 */ 3073 pr_misc_header_t *misc; 3074 3075 misc = pr_iol_newbuf(data, sizeof (*misc)); 3076 misc->pr_misc_size = 0; 3077 misc->pr_misc_type = (uint_t)-1; 3078 3079 return (0); 3080 } 3081 3082 #ifdef _SYSCALL32_IMPL 3083 void 3084 prgetpsinfo32(proc_t *p, psinfo32_t *psp) 3085 { 3086 kthread_t *t; 3087 struct cred *cred; 3088 hrtime_t hrutime, hrstime; 3089 3090 ASSERT(MUTEX_HELD(&p->p_lock)); 3091 3092 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 3093 bzero(psp, sizeof (*psp)); 3094 else { 3095 thread_unlock(t); 3096 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 3097 } 3098 3099 /* 3100 * only export SSYS and SMSACCT; everything else is off-limits to 3101 * userland apps. 3102 */ 3103 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 3104 psp->pr_nlwp = p->p_lwpcnt; 3105 psp->pr_nzomb = p->p_zombcnt; 3106 mutex_enter(&p->p_crlock); 3107 cred = p->p_cred; 3108 psp->pr_uid = crgetruid(cred); 3109 psp->pr_euid = crgetuid(cred); 3110 psp->pr_gid = crgetrgid(cred); 3111 psp->pr_egid = crgetgid(cred); 3112 mutex_exit(&p->p_crlock); 3113 psp->pr_pid = p->p_pid; 3114 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 3115 (p->p_flag & SZONETOP)) { 3116 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 3117 /* 3118 * Inside local zones, fake zsched's pid as parent pids for 3119 * processes which reference processes outside of the zone. 3120 */ 3121 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 3122 } else { 3123 psp->pr_ppid = p->p_ppid; 3124 } 3125 psp->pr_pgid = p->p_pgrp; 3126 psp->pr_sid = p->p_sessp->s_sid; 3127 psp->pr_taskid = p->p_task->tk_tkid; 3128 psp->pr_projid = p->p_task->tk_proj->kpj_id; 3129 psp->pr_poolid = p->p_pool->pool_id; 3130 psp->pr_zoneid = p->p_zone->zone_id; 3131 if ((psp->pr_contract = PRCTID(p)) == 0) 3132 psp->pr_contract = -1; 3133 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 3134 switch (p->p_model) { 3135 case DATAMODEL_ILP32: 3136 psp->pr_dmodel = PR_MODEL_ILP32; 3137 break; 3138 case DATAMODEL_LP64: 3139 psp->pr_dmodel = PR_MODEL_LP64; 3140 break; 3141 } 3142 hrutime = mstate_aggr_state(p, LMS_USER); 3143 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 3144 hrt2ts32(hrutime + hrstime, &psp->pr_time); 3145 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime); 3146 3147 if (t == NULL) { 3148 extern int wstat(int, int); /* needs a header file */ 3149 int wcode = p->p_wcode; /* must be atomic read */ 3150 3151 if (wcode) 3152 psp->pr_wstat = wstat(wcode, p->p_wdata); 3153 psp->pr_ttydev = PRNODEV32; 3154 psp->pr_lwp.pr_state = SZOMB; 3155 psp->pr_lwp.pr_sname = 'Z'; 3156 } else { 3157 user_t *up = PTOU(p); 3158 struct as *as; 3159 dev_t d; 3160 extern dev_t rwsconsdev, rconsdev, uconsdev; 3161 3162 d = cttydev(p); 3163 /* 3164 * If the controlling terminal is the real 3165 * or workstation console device, map to what the 3166 * user thinks is the console device. Handle case when 3167 * rwsconsdev or rconsdev is set to NODEV for Starfire. 3168 */ 3169 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 3170 d = uconsdev; 3171 (void) cmpldev(&psp->pr_ttydev, d); 3172 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start); 3173 bcopy(up->u_comm, psp->pr_fname, 3174 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 3175 bcopy(up->u_psargs, psp->pr_psargs, 3176 MIN(PRARGSZ-1, PSARGSZ)); 3177 psp->pr_argc = up->u_argc; 3178 psp->pr_argv = (caddr32_t)up->u_argv; 3179 psp->pr_envp = (caddr32_t)up->u_envp; 3180 3181 /* get the chosen lwp's lwpsinfo */ 3182 prgetlwpsinfo32(t, &psp->pr_lwp); 3183 3184 /* compute %cpu for the process */ 3185 if (p->p_lwpcnt == 1) 3186 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 3187 else { 3188 uint64_t pct = 0; 3189 hrtime_t cur_time; 3190 3191 t = p->p_tlist; 3192 cur_time = gethrtime_unscaled(); 3193 do { 3194 pct += cpu_update_pct(t, cur_time); 3195 } while ((t = t->t_forw) != p->p_tlist); 3196 3197 psp->pr_pctcpu = prgetpctcpu(pct); 3198 } 3199 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 3200 psp->pr_size = 0; 3201 psp->pr_rssize = 0; 3202 } else { 3203 mutex_exit(&p->p_lock); 3204 AS_LOCK_ENTER(as, RW_READER); 3205 psp->pr_size = (size32_t) 3206 (btopr(as->a_resvsize) * (PAGESIZE / 1024)); 3207 psp->pr_rssize = (size32_t) 3208 (rm_asrss(as) * (PAGESIZE / 1024)); 3209 psp->pr_pctmem = rm_pctmemory(as); 3210 AS_LOCK_EXIT(as); 3211 mutex_enter(&p->p_lock); 3212 } 3213 } 3214 3215 /* 3216 * If we are looking at an LP64 process, zero out 3217 * the fields that cannot be represented in ILP32. 3218 */ 3219 if (p->p_model != DATAMODEL_ILP32) { 3220 psp->pr_size = 0; 3221 psp->pr_rssize = 0; 3222 psp->pr_argv = 0; 3223 psp->pr_envp = 0; 3224 } 3225 } 3226 3227 #endif /* _SYSCALL32_IMPL */ 3228 3229 void 3230 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp) 3231 { 3232 klwp_t *lwp = ttolwp(t); 3233 sobj_ops_t *sobj; 3234 char c, state; 3235 uint64_t pct; 3236 int retval, niceval; 3237 hrtime_t hrutime, hrstime; 3238 3239 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 3240 3241 bzero(psp, sizeof (*psp)); 3242 3243 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 3244 psp->pr_lwpid = t->t_tid; 3245 psp->pr_addr = (uintptr_t)t; 3246 psp->pr_wchan = (uintptr_t)t->t_wchan; 3247 3248 /* map the thread state enum into a process state enum */ 3249 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 3250 switch (state) { 3251 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 3252 case TS_RUN: state = SRUN; c = 'R'; break; 3253 case TS_ONPROC: state = SONPROC; c = 'O'; break; 3254 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 3255 case TS_STOPPED: state = SSTOP; c = 'T'; break; 3256 case TS_WAIT: state = SWAIT; c = 'W'; break; 3257 default: state = 0; c = '?'; break; 3258 } 3259 psp->pr_state = state; 3260 psp->pr_sname = c; 3261 if ((sobj = t->t_sobj_ops) != NULL) 3262 psp->pr_stype = SOBJ_TYPE(sobj); 3263 retval = CL_DONICE(t, NULL, 0, &niceval); 3264 if (retval == 0) { 3265 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 3266 psp->pr_nice = niceval + NZERO; 3267 } 3268 psp->pr_syscall = t->t_sysnum; 3269 psp->pr_pri = t->t_pri; 3270 psp->pr_start.tv_sec = t->t_start; 3271 psp->pr_start.tv_nsec = 0L; 3272 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 3273 scalehrtime(&hrutime); 3274 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 3275 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 3276 scalehrtime(&hrstime); 3277 hrt2ts(hrutime + hrstime, &psp->pr_time); 3278 /* compute %cpu for the lwp */ 3279 pct = cpu_update_pct(t, gethrtime_unscaled()); 3280 psp->pr_pctcpu = prgetpctcpu(pct); 3281 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 3282 if (psp->pr_cpu > 99) 3283 psp->pr_cpu = 99; 3284 3285 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 3286 sizeof (psp->pr_clname) - 1); 3287 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 3288 psp->pr_onpro = t->t_cpu->cpu_id; 3289 psp->pr_bindpro = t->t_bind_cpu; 3290 psp->pr_bindpset = t->t_bind_pset; 3291 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 3292 } 3293 3294 #ifdef _SYSCALL32_IMPL 3295 void 3296 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp) 3297 { 3298 klwp_t *lwp = ttolwp(t); 3299 sobj_ops_t *sobj; 3300 char c, state; 3301 uint64_t pct; 3302 int retval, niceval; 3303 hrtime_t hrutime, hrstime; 3304 3305 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 3306 3307 bzero(psp, sizeof (*psp)); 3308 3309 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 3310 psp->pr_lwpid = t->t_tid; 3311 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 3312 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */ 3313 3314 /* map the thread state enum into a process state enum */ 3315 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 3316 switch (state) { 3317 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 3318 case TS_RUN: state = SRUN; c = 'R'; break; 3319 case TS_ONPROC: state = SONPROC; c = 'O'; break; 3320 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 3321 case TS_STOPPED: state = SSTOP; c = 'T'; break; 3322 case TS_WAIT: state = SWAIT; c = 'W'; break; 3323 default: state = 0; c = '?'; break; 3324 } 3325 psp->pr_state = state; 3326 psp->pr_sname = c; 3327 if ((sobj = t->t_sobj_ops) != NULL) 3328 psp->pr_stype = SOBJ_TYPE(sobj); 3329 retval = CL_DONICE(t, NULL, 0, &niceval); 3330 if (retval == 0) { 3331 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 3332 psp->pr_nice = niceval + NZERO; 3333 } else { 3334 psp->pr_oldpri = 0; 3335 psp->pr_nice = 0; 3336 } 3337 psp->pr_syscall = t->t_sysnum; 3338 psp->pr_pri = t->t_pri; 3339 psp->pr_start.tv_sec = (time32_t)t->t_start; 3340 psp->pr_start.tv_nsec = 0L; 3341 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 3342 scalehrtime(&hrutime); 3343 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 3344 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 3345 scalehrtime(&hrstime); 3346 hrt2ts32(hrutime + hrstime, &psp->pr_time); 3347 /* compute %cpu for the lwp */ 3348 pct = cpu_update_pct(t, gethrtime_unscaled()); 3349 psp->pr_pctcpu = prgetpctcpu(pct); 3350 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 3351 if (psp->pr_cpu > 99) 3352 psp->pr_cpu = 99; 3353 3354 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 3355 sizeof (psp->pr_clname) - 1); 3356 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 3357 psp->pr_onpro = t->t_cpu->cpu_id; 3358 psp->pr_bindpro = t->t_bind_cpu; 3359 psp->pr_bindpset = t->t_bind_pset; 3360 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 3361 } 3362 #endif /* _SYSCALL32_IMPL */ 3363 3364 #ifdef _SYSCALL32_IMPL 3365 3366 #define PR_COPY_FIELD(s, d, field) d->field = s->field 3367 3368 #define PR_COPY_FIELD_ILP32(s, d, field) \ 3369 if (s->pr_dmodel == PR_MODEL_ILP32) { \ 3370 d->field = s->field; \ 3371 } 3372 3373 #define PR_COPY_TIMESPEC(s, d, field) \ 3374 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field); 3375 3376 #define PR_COPY_BUF(s, d, field) \ 3377 bcopy(s->field, d->field, sizeof (d->field)); 3378 3379 #define PR_IGNORE_FIELD(s, d, field) 3380 3381 void 3382 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest) 3383 { 3384 bzero(dest, sizeof (*dest)); 3385 3386 PR_COPY_FIELD(src, dest, pr_flag); 3387 PR_COPY_FIELD(src, dest, pr_lwpid); 3388 PR_IGNORE_FIELD(src, dest, pr_addr); 3389 PR_IGNORE_FIELD(src, dest, pr_wchan); 3390 PR_COPY_FIELD(src, dest, pr_stype); 3391 PR_COPY_FIELD(src, dest, pr_state); 3392 PR_COPY_FIELD(src, dest, pr_sname); 3393 PR_COPY_FIELD(src, dest, pr_nice); 3394 PR_COPY_FIELD(src, dest, pr_syscall); 3395 PR_COPY_FIELD(src, dest, pr_oldpri); 3396 PR_COPY_FIELD(src, dest, pr_cpu); 3397 PR_COPY_FIELD(src, dest, pr_pri); 3398 PR_COPY_FIELD(src, dest, pr_pctcpu); 3399 PR_COPY_TIMESPEC(src, dest, pr_start); 3400 PR_COPY_BUF(src, dest, pr_clname); 3401 PR_COPY_BUF(src, dest, pr_name); 3402 PR_COPY_FIELD(src, dest, pr_onpro); 3403 PR_COPY_FIELD(src, dest, pr_bindpro); 3404 PR_COPY_FIELD(src, dest, pr_bindpset); 3405 PR_COPY_FIELD(src, dest, pr_lgrp); 3406 } 3407 3408 void 3409 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest) 3410 { 3411 bzero(dest, sizeof (*dest)); 3412 3413 PR_COPY_FIELD(src, dest, pr_flag); 3414 PR_COPY_FIELD(src, dest, pr_nlwp); 3415 PR_COPY_FIELD(src, dest, pr_pid); 3416 PR_COPY_FIELD(src, dest, pr_ppid); 3417 PR_COPY_FIELD(src, dest, pr_pgid); 3418 PR_COPY_FIELD(src, dest, pr_sid); 3419 PR_COPY_FIELD(src, dest, pr_uid); 3420 PR_COPY_FIELD(src, dest, pr_euid); 3421 PR_COPY_FIELD(src, dest, pr_gid); 3422 PR_COPY_FIELD(src, dest, pr_egid); 3423 PR_IGNORE_FIELD(src, dest, pr_addr); 3424 PR_COPY_FIELD_ILP32(src, dest, pr_size); 3425 PR_COPY_FIELD_ILP32(src, dest, pr_rssize); 3426 PR_COPY_FIELD(src, dest, pr_ttydev); 3427 PR_COPY_FIELD(src, dest, pr_pctcpu); 3428 PR_COPY_FIELD(src, dest, pr_pctmem); 3429 PR_COPY_TIMESPEC(src, dest, pr_start); 3430 PR_COPY_TIMESPEC(src, dest, pr_time); 3431 PR_COPY_TIMESPEC(src, dest, pr_ctime); 3432 PR_COPY_BUF(src, dest, pr_fname); 3433 PR_COPY_BUF(src, dest, pr_psargs); 3434 PR_COPY_FIELD(src, dest, pr_wstat); 3435 PR_COPY_FIELD(src, dest, pr_argc); 3436 PR_COPY_FIELD_ILP32(src, dest, pr_argv); 3437 PR_COPY_FIELD_ILP32(src, dest, pr_envp); 3438 PR_COPY_FIELD(src, dest, pr_dmodel); 3439 PR_COPY_FIELD(src, dest, pr_taskid); 3440 PR_COPY_FIELD(src, dest, pr_projid); 3441 PR_COPY_FIELD(src, dest, pr_nzomb); 3442 PR_COPY_FIELD(src, dest, pr_poolid); 3443 PR_COPY_FIELD(src, dest, pr_contract); 3444 PR_COPY_FIELD(src, dest, pr_poolid); 3445 PR_COPY_FIELD(src, dest, pr_poolid); 3446 3447 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp); 3448 } 3449 3450 #undef PR_COPY_FIELD 3451 #undef PR_COPY_FIELD_ILP32 3452 #undef PR_COPY_TIMESPEC 3453 #undef PR_COPY_BUF 3454 #undef PR_IGNORE_FIELD 3455 3456 #endif /* _SYSCALL32_IMPL */ 3457 3458 /* 3459 * This used to get called when microstate accounting was disabled but 3460 * microstate information was requested. Since Microstate accounting is on 3461 * regardless of the proc flags, this simply makes it appear to procfs that 3462 * microstate accounting is on. This is relatively meaningless since you 3463 * can't turn it off, but this is here for the sake of appearances. 3464 */ 3465 3466 /*ARGSUSED*/ 3467 void 3468 estimate_msacct(kthread_t *t, hrtime_t curtime) 3469 { 3470 proc_t *p; 3471 3472 if (t == NULL) 3473 return; 3474 3475 p = ttoproc(t); 3476 ASSERT(MUTEX_HELD(&p->p_lock)); 3477 3478 /* 3479 * A system process (p0) could be referenced if the thread is 3480 * in the process of exiting. Don't turn on microstate accounting 3481 * in that case. 3482 */ 3483 if (p->p_flag & SSYS) 3484 return; 3485 3486 /* 3487 * Loop through all the LWPs (kernel threads) in the process. 3488 */ 3489 t = p->p_tlist; 3490 do { 3491 t->t_proc_flag |= TP_MSACCT; 3492 } while ((t = t->t_forw) != p->p_tlist); 3493 3494 p->p_flag |= SMSACCT; /* set process-wide MSACCT */ 3495 } 3496 3497 /* 3498 * It's not really possible to disable microstate accounting anymore. 3499 * However, this routine simply turns off the ms accounting flags in a process 3500 * This way procfs can still pretend to turn microstate accounting on and 3501 * off for a process, but it actually doesn't do anything. This is 3502 * a neutered form of preemptive idiot-proofing. 3503 */ 3504 void 3505 disable_msacct(proc_t *p) 3506 { 3507 kthread_t *t; 3508 3509 ASSERT(MUTEX_HELD(&p->p_lock)); 3510 3511 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */ 3512 /* 3513 * Loop through all the LWPs (kernel threads) in the process. 3514 */ 3515 if ((t = p->p_tlist) != NULL) { 3516 do { 3517 /* clear per-thread flag */ 3518 t->t_proc_flag &= ~TP_MSACCT; 3519 } while ((t = t->t_forw) != p->p_tlist); 3520 } 3521 } 3522 3523 /* 3524 * Return resource usage information. 3525 */ 3526 void 3527 prgetusage(kthread_t *t, prhusage_t *pup) 3528 { 3529 klwp_t *lwp = ttolwp(t); 3530 hrtime_t *mstimep; 3531 struct mstate *ms = &lwp->lwp_mstate; 3532 int state; 3533 int i; 3534 hrtime_t curtime; 3535 hrtime_t waitrq; 3536 hrtime_t tmp1; 3537 3538 curtime = gethrtime_unscaled(); 3539 3540 pup->pr_lwpid = t->t_tid; 3541 pup->pr_count = 1; 3542 pup->pr_create = ms->ms_start; 3543 pup->pr_term = ms->ms_term; 3544 scalehrtime(&pup->pr_create); 3545 scalehrtime(&pup->pr_term); 3546 if (ms->ms_term == 0) { 3547 pup->pr_rtime = curtime - ms->ms_start; 3548 scalehrtime(&pup->pr_rtime); 3549 } else { 3550 pup->pr_rtime = ms->ms_term - ms->ms_start; 3551 scalehrtime(&pup->pr_rtime); 3552 } 3553 3554 3555 pup->pr_utime = ms->ms_acct[LMS_USER]; 3556 pup->pr_stime = ms->ms_acct[LMS_SYSTEM]; 3557 pup->pr_ttime = ms->ms_acct[LMS_TRAP]; 3558 pup->pr_tftime = ms->ms_acct[LMS_TFAULT]; 3559 pup->pr_dftime = ms->ms_acct[LMS_DFAULT]; 3560 pup->pr_kftime = ms->ms_acct[LMS_KFAULT]; 3561 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 3562 pup->pr_slptime = ms->ms_acct[LMS_SLEEP]; 3563 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 3564 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED]; 3565 3566 prscaleusage(pup); 3567 3568 /* 3569 * Adjust for time waiting in the dispatcher queue. 3570 */ 3571 waitrq = t->t_waitrq; /* hopefully atomic */ 3572 if (waitrq != 0) { 3573 if (waitrq > curtime) { 3574 curtime = gethrtime_unscaled(); 3575 } 3576 tmp1 = curtime - waitrq; 3577 scalehrtime(&tmp1); 3578 pup->pr_wtime += tmp1; 3579 curtime = waitrq; 3580 } 3581 3582 /* 3583 * Adjust for time spent in current microstate. 3584 */ 3585 if (ms->ms_state_start > curtime) { 3586 curtime = gethrtime_unscaled(); 3587 } 3588 3589 i = 0; 3590 do { 3591 switch (state = t->t_mstate) { 3592 case LMS_SLEEP: 3593 /* 3594 * Update the timer for the current sleep state. 3595 */ 3596 switch (state = ms->ms_prev) { 3597 case LMS_TFAULT: 3598 case LMS_DFAULT: 3599 case LMS_KFAULT: 3600 case LMS_USER_LOCK: 3601 break; 3602 default: 3603 state = LMS_SLEEP; 3604 break; 3605 } 3606 break; 3607 case LMS_TFAULT: 3608 case LMS_DFAULT: 3609 case LMS_KFAULT: 3610 case LMS_USER_LOCK: 3611 state = LMS_SYSTEM; 3612 break; 3613 } 3614 switch (state) { 3615 case LMS_USER: mstimep = &pup->pr_utime; break; 3616 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 3617 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 3618 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 3619 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 3620 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 3621 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 3622 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 3623 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 3624 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 3625 default: panic("prgetusage: unknown microstate"); 3626 } 3627 tmp1 = curtime - ms->ms_state_start; 3628 if (tmp1 < 0) { 3629 curtime = gethrtime_unscaled(); 3630 i++; 3631 continue; 3632 } 3633 scalehrtime(&tmp1); 3634 } while (tmp1 < 0 && i < MAX_ITERS_SPIN); 3635 3636 *mstimep += tmp1; 3637 3638 /* update pup timestamp */ 3639 pup->pr_tstamp = curtime; 3640 scalehrtime(&pup->pr_tstamp); 3641 3642 /* 3643 * Resource usage counters. 3644 */ 3645 pup->pr_minf = lwp->lwp_ru.minflt; 3646 pup->pr_majf = lwp->lwp_ru.majflt; 3647 pup->pr_nswap = lwp->lwp_ru.nswap; 3648 pup->pr_inblk = lwp->lwp_ru.inblock; 3649 pup->pr_oublk = lwp->lwp_ru.oublock; 3650 pup->pr_msnd = lwp->lwp_ru.msgsnd; 3651 pup->pr_mrcv = lwp->lwp_ru.msgrcv; 3652 pup->pr_sigs = lwp->lwp_ru.nsignals; 3653 pup->pr_vctx = lwp->lwp_ru.nvcsw; 3654 pup->pr_ictx = lwp->lwp_ru.nivcsw; 3655 pup->pr_sysc = lwp->lwp_ru.sysc; 3656 pup->pr_ioch = lwp->lwp_ru.ioch; 3657 } 3658 3659 /* 3660 * Convert ms_acct stats from unscaled high-res time to nanoseconds 3661 */ 3662 void 3663 prscaleusage(prhusage_t *usg) 3664 { 3665 scalehrtime(&usg->pr_utime); 3666 scalehrtime(&usg->pr_stime); 3667 scalehrtime(&usg->pr_ttime); 3668 scalehrtime(&usg->pr_tftime); 3669 scalehrtime(&usg->pr_dftime); 3670 scalehrtime(&usg->pr_kftime); 3671 scalehrtime(&usg->pr_ltime); 3672 scalehrtime(&usg->pr_slptime); 3673 scalehrtime(&usg->pr_wtime); 3674 scalehrtime(&usg->pr_stoptime); 3675 } 3676 3677 3678 /* 3679 * Sum resource usage information. 3680 */ 3681 void 3682 praddusage(kthread_t *t, prhusage_t *pup) 3683 { 3684 klwp_t *lwp = ttolwp(t); 3685 hrtime_t *mstimep; 3686 struct mstate *ms = &lwp->lwp_mstate; 3687 int state; 3688 int i; 3689 hrtime_t curtime; 3690 hrtime_t waitrq; 3691 hrtime_t tmp; 3692 prhusage_t conv; 3693 3694 curtime = gethrtime_unscaled(); 3695 3696 if (ms->ms_term == 0) { 3697 tmp = curtime - ms->ms_start; 3698 scalehrtime(&tmp); 3699 pup->pr_rtime += tmp; 3700 } else { 3701 tmp = ms->ms_term - ms->ms_start; 3702 scalehrtime(&tmp); 3703 pup->pr_rtime += tmp; 3704 } 3705 3706 conv.pr_utime = ms->ms_acct[LMS_USER]; 3707 conv.pr_stime = ms->ms_acct[LMS_SYSTEM]; 3708 conv.pr_ttime = ms->ms_acct[LMS_TRAP]; 3709 conv.pr_tftime = ms->ms_acct[LMS_TFAULT]; 3710 conv.pr_dftime = ms->ms_acct[LMS_DFAULT]; 3711 conv.pr_kftime = ms->ms_acct[LMS_KFAULT]; 3712 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 3713 conv.pr_slptime = ms->ms_acct[LMS_SLEEP]; 3714 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 3715 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED]; 3716 3717 prscaleusage(&conv); 3718 3719 pup->pr_utime += conv.pr_utime; 3720 pup->pr_stime += conv.pr_stime; 3721 pup->pr_ttime += conv.pr_ttime; 3722 pup->pr_tftime += conv.pr_tftime; 3723 pup->pr_dftime += conv.pr_dftime; 3724 pup->pr_kftime += conv.pr_kftime; 3725 pup->pr_ltime += conv.pr_ltime; 3726 pup->pr_slptime += conv.pr_slptime; 3727 pup->pr_wtime += conv.pr_wtime; 3728 pup->pr_stoptime += conv.pr_stoptime; 3729 3730 /* 3731 * Adjust for time waiting in the dispatcher queue. 3732 */ 3733 waitrq = t->t_waitrq; /* hopefully atomic */ 3734 if (waitrq != 0) { 3735 if (waitrq > curtime) { 3736 curtime = gethrtime_unscaled(); 3737 } 3738 tmp = curtime - waitrq; 3739 scalehrtime(&tmp); 3740 pup->pr_wtime += tmp; 3741 curtime = waitrq; 3742 } 3743 3744 /* 3745 * Adjust for time spent in current microstate. 3746 */ 3747 if (ms->ms_state_start > curtime) { 3748 curtime = gethrtime_unscaled(); 3749 } 3750 3751 i = 0; 3752 do { 3753 switch (state = t->t_mstate) { 3754 case LMS_SLEEP: 3755 /* 3756 * Update the timer for the current sleep state. 3757 */ 3758 switch (state = ms->ms_prev) { 3759 case LMS_TFAULT: 3760 case LMS_DFAULT: 3761 case LMS_KFAULT: 3762 case LMS_USER_LOCK: 3763 break; 3764 default: 3765 state = LMS_SLEEP; 3766 break; 3767 } 3768 break; 3769 case LMS_TFAULT: 3770 case LMS_DFAULT: 3771 case LMS_KFAULT: 3772 case LMS_USER_LOCK: 3773 state = LMS_SYSTEM; 3774 break; 3775 } 3776 switch (state) { 3777 case LMS_USER: mstimep = &pup->pr_utime; break; 3778 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 3779 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 3780 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 3781 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 3782 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 3783 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 3784 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 3785 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 3786 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 3787 default: panic("praddusage: unknown microstate"); 3788 } 3789 tmp = curtime - ms->ms_state_start; 3790 if (tmp < 0) { 3791 curtime = gethrtime_unscaled(); 3792 i++; 3793 continue; 3794 } 3795 scalehrtime(&tmp); 3796 } while (tmp < 0 && i < MAX_ITERS_SPIN); 3797 3798 *mstimep += tmp; 3799 3800 /* update pup timestamp */ 3801 pup->pr_tstamp = curtime; 3802 scalehrtime(&pup->pr_tstamp); 3803 3804 /* 3805 * Resource usage counters. 3806 */ 3807 pup->pr_minf += lwp->lwp_ru.minflt; 3808 pup->pr_majf += lwp->lwp_ru.majflt; 3809 pup->pr_nswap += lwp->lwp_ru.nswap; 3810 pup->pr_inblk += lwp->lwp_ru.inblock; 3811 pup->pr_oublk += lwp->lwp_ru.oublock; 3812 pup->pr_msnd += lwp->lwp_ru.msgsnd; 3813 pup->pr_mrcv += lwp->lwp_ru.msgrcv; 3814 pup->pr_sigs += lwp->lwp_ru.nsignals; 3815 pup->pr_vctx += lwp->lwp_ru.nvcsw; 3816 pup->pr_ictx += lwp->lwp_ru.nivcsw; 3817 pup->pr_sysc += lwp->lwp_ru.sysc; 3818 pup->pr_ioch += lwp->lwp_ru.ioch; 3819 } 3820 3821 /* 3822 * Convert a prhusage_t to a prusage_t. 3823 * This means convert each hrtime_t to a timestruc_t 3824 * and copy the count fields uint64_t => ulong_t. 3825 */ 3826 void 3827 prcvtusage(prhusage_t *pup, prusage_t *upup) 3828 { 3829 uint64_t *ullp; 3830 ulong_t *ulp; 3831 int i; 3832 3833 upup->pr_lwpid = pup->pr_lwpid; 3834 upup->pr_count = pup->pr_count; 3835 3836 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp); 3837 hrt2ts(pup->pr_create, &upup->pr_create); 3838 hrt2ts(pup->pr_term, &upup->pr_term); 3839 hrt2ts(pup->pr_rtime, &upup->pr_rtime); 3840 hrt2ts(pup->pr_utime, &upup->pr_utime); 3841 hrt2ts(pup->pr_stime, &upup->pr_stime); 3842 hrt2ts(pup->pr_ttime, &upup->pr_ttime); 3843 hrt2ts(pup->pr_tftime, &upup->pr_tftime); 3844 hrt2ts(pup->pr_dftime, &upup->pr_dftime); 3845 hrt2ts(pup->pr_kftime, &upup->pr_kftime); 3846 hrt2ts(pup->pr_ltime, &upup->pr_ltime); 3847 hrt2ts(pup->pr_slptime, &upup->pr_slptime); 3848 hrt2ts(pup->pr_wtime, &upup->pr_wtime); 3849 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime); 3850 bzero(upup->filltime, sizeof (upup->filltime)); 3851 3852 ullp = &pup->pr_minf; 3853 ulp = &upup->pr_minf; 3854 for (i = 0; i < 22; i++) 3855 *ulp++ = (ulong_t)*ullp++; 3856 } 3857 3858 #ifdef _SYSCALL32_IMPL 3859 void 3860 prcvtusage32(prhusage_t *pup, prusage32_t *upup) 3861 { 3862 uint64_t *ullp; 3863 uint32_t *ulp; 3864 int i; 3865 3866 upup->pr_lwpid = pup->pr_lwpid; 3867 upup->pr_count = pup->pr_count; 3868 3869 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp); 3870 hrt2ts32(pup->pr_create, &upup->pr_create); 3871 hrt2ts32(pup->pr_term, &upup->pr_term); 3872 hrt2ts32(pup->pr_rtime, &upup->pr_rtime); 3873 hrt2ts32(pup->pr_utime, &upup->pr_utime); 3874 hrt2ts32(pup->pr_stime, &upup->pr_stime); 3875 hrt2ts32(pup->pr_ttime, &upup->pr_ttime); 3876 hrt2ts32(pup->pr_tftime, &upup->pr_tftime); 3877 hrt2ts32(pup->pr_dftime, &upup->pr_dftime); 3878 hrt2ts32(pup->pr_kftime, &upup->pr_kftime); 3879 hrt2ts32(pup->pr_ltime, &upup->pr_ltime); 3880 hrt2ts32(pup->pr_slptime, &upup->pr_slptime); 3881 hrt2ts32(pup->pr_wtime, &upup->pr_wtime); 3882 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime); 3883 bzero(upup->filltime, sizeof (upup->filltime)); 3884 3885 ullp = &pup->pr_minf; 3886 ulp = &upup->pr_minf; 3887 for (i = 0; i < 22; i++) 3888 *ulp++ = (uint32_t)*ullp++; 3889 } 3890 #endif /* _SYSCALL32_IMPL */ 3891 3892 /* 3893 * Determine whether a set is empty. 3894 */ 3895 int 3896 setisempty(uint32_t *sp, uint_t n) 3897 { 3898 while (n--) 3899 if (*sp++) 3900 return (0); 3901 return (1); 3902 } 3903 3904 /* 3905 * Utility routine for establishing a watched area in the process. 3906 * Keep the list of watched areas sorted by virtual address. 3907 */ 3908 int 3909 set_watched_area(proc_t *p, struct watched_area *pwa) 3910 { 3911 caddr_t vaddr = pwa->wa_vaddr; 3912 caddr_t eaddr = pwa->wa_eaddr; 3913 ulong_t flags = pwa->wa_flags; 3914 struct watched_area *target; 3915 avl_index_t where; 3916 int error = 0; 3917 3918 /* we must not be holding p->p_lock, but the process must be locked */ 3919 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3920 ASSERT(p->p_proc_flag & P_PR_LOCK); 3921 3922 /* 3923 * If this is our first watchpoint, enable watchpoints for the process. 3924 */ 3925 if (!pr_watch_active(p)) { 3926 kthread_t *t; 3927 3928 mutex_enter(&p->p_lock); 3929 if ((t = p->p_tlist) != NULL) { 3930 do { 3931 watch_enable(t); 3932 } while ((t = t->t_forw) != p->p_tlist); 3933 } 3934 mutex_exit(&p->p_lock); 3935 } 3936 3937 target = pr_find_watched_area(p, pwa, &where); 3938 if (target != NULL) { 3939 /* 3940 * We discovered an existing, overlapping watched area. 3941 * Allow it only if it is an exact match. 3942 */ 3943 if (target->wa_vaddr != vaddr || 3944 target->wa_eaddr != eaddr) 3945 error = EINVAL; 3946 else if (target->wa_flags != flags) { 3947 error = set_watched_page(p, vaddr, eaddr, 3948 flags, target->wa_flags); 3949 target->wa_flags = flags; 3950 } 3951 kmem_free(pwa, sizeof (struct watched_area)); 3952 } else { 3953 avl_insert(&p->p_warea, pwa, where); 3954 error = set_watched_page(p, vaddr, eaddr, flags, 0); 3955 } 3956 3957 return (error); 3958 } 3959 3960 /* 3961 * Utility routine for clearing a watched area in the process. 3962 * Must be an exact match of the virtual address. 3963 * size and flags don't matter. 3964 */ 3965 int 3966 clear_watched_area(proc_t *p, struct watched_area *pwa) 3967 { 3968 struct watched_area *found; 3969 3970 /* we must not be holding p->p_lock, but the process must be locked */ 3971 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3972 ASSERT(p->p_proc_flag & P_PR_LOCK); 3973 3974 3975 if (!pr_watch_active(p)) { 3976 kmem_free(pwa, sizeof (struct watched_area)); 3977 return (0); 3978 } 3979 3980 /* 3981 * Look for a matching address in the watched areas. If a match is 3982 * found, clear the old watched area and adjust the watched page(s). It 3983 * is not an error if there is no match. 3984 */ 3985 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL && 3986 found->wa_vaddr == pwa->wa_vaddr) { 3987 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr, 3988 found->wa_flags); 3989 avl_remove(&p->p_warea, found); 3990 kmem_free(found, sizeof (struct watched_area)); 3991 } 3992 3993 kmem_free(pwa, sizeof (struct watched_area)); 3994 3995 /* 3996 * If we removed the last watched area from the process, disable 3997 * watchpoints. 3998 */ 3999 if (!pr_watch_active(p)) { 4000 kthread_t *t; 4001 4002 mutex_enter(&p->p_lock); 4003 if ((t = p->p_tlist) != NULL) { 4004 do { 4005 watch_disable(t); 4006 } while ((t = t->t_forw) != p->p_tlist); 4007 } 4008 mutex_exit(&p->p_lock); 4009 } 4010 4011 return (0); 4012 } 4013 4014 /* 4015 * Frees all the watched_area structures 4016 */ 4017 void 4018 pr_free_watchpoints(proc_t *p) 4019 { 4020 struct watched_area *delp; 4021 void *cookie; 4022 4023 cookie = NULL; 4024 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL) 4025 kmem_free(delp, sizeof (struct watched_area)); 4026 4027 avl_destroy(&p->p_warea); 4028 } 4029 4030 /* 4031 * This one is called by the traced process to unwatch all the 4032 * pages while deallocating the list of watched_page structs. 4033 */ 4034 void 4035 pr_free_watched_pages(proc_t *p) 4036 { 4037 struct as *as = p->p_as; 4038 struct watched_page *pwp; 4039 uint_t prot; 4040 int retrycnt, err; 4041 void *cookie; 4042 4043 if (as == NULL || avl_numnodes(&as->a_wpage) == 0) 4044 return; 4045 4046 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 4047 AS_LOCK_ENTER(as, RW_WRITER); 4048 4049 pwp = avl_first(&as->a_wpage); 4050 4051 cookie = NULL; 4052 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) { 4053 retrycnt = 0; 4054 if ((prot = pwp->wp_oprot) != 0) { 4055 caddr_t addr = pwp->wp_vaddr; 4056 struct seg *seg; 4057 retry: 4058 4059 if ((pwp->wp_prot != prot || 4060 (pwp->wp_flags & WP_NOWATCH)) && 4061 (seg = as_segat(as, addr)) != NULL) { 4062 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); 4063 if (err == IE_RETRY) { 4064 ASSERT(retrycnt == 0); 4065 retrycnt++; 4066 goto retry; 4067 } 4068 } 4069 } 4070 kmem_free(pwp, sizeof (struct watched_page)); 4071 } 4072 4073 avl_destroy(&as->a_wpage); 4074 p->p_wprot = NULL; 4075 4076 AS_LOCK_EXIT(as); 4077 } 4078 4079 /* 4080 * Insert a watched area into the list of watched pages. 4081 * If oflags is zero then we are adding a new watched area. 4082 * Otherwise we are changing the flags of an existing watched area. 4083 */ 4084 static int 4085 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, 4086 ulong_t flags, ulong_t oflags) 4087 { 4088 struct as *as = p->p_as; 4089 avl_tree_t *pwp_tree; 4090 struct watched_page *pwp, *newpwp; 4091 struct watched_page tpw; 4092 avl_index_t where; 4093 struct seg *seg; 4094 uint_t prot; 4095 caddr_t addr; 4096 4097 /* 4098 * We need to pre-allocate a list of structures before we grab the 4099 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks 4100 * held. 4101 */ 4102 newpwp = NULL; 4103 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4104 addr < eaddr; addr += PAGESIZE) { 4105 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP); 4106 pwp->wp_list = newpwp; 4107 newpwp = pwp; 4108 } 4109 4110 AS_LOCK_ENTER(as, RW_WRITER); 4111 4112 /* 4113 * Search for an existing watched page to contain the watched area. 4114 * If none is found, grab a new one from the available list 4115 * and insert it in the active list, keeping the list sorted 4116 * by user-level virtual address. 4117 */ 4118 if (p->p_flag & SVFWAIT) 4119 pwp_tree = &p->p_wpage; 4120 else 4121 pwp_tree = &as->a_wpage; 4122 4123 again: 4124 if (avl_numnodes(pwp_tree) > prnwatch) { 4125 AS_LOCK_EXIT(as); 4126 while (newpwp != NULL) { 4127 pwp = newpwp->wp_list; 4128 kmem_free(newpwp, sizeof (struct watched_page)); 4129 newpwp = pwp; 4130 } 4131 return (E2BIG); 4132 } 4133 4134 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4135 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) { 4136 pwp = newpwp; 4137 newpwp = newpwp->wp_list; 4138 pwp->wp_list = NULL; 4139 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr & 4140 (uintptr_t)PAGEMASK); 4141 avl_insert(pwp_tree, pwp, where); 4142 } 4143 4144 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE); 4145 4146 if (oflags & WA_READ) 4147 pwp->wp_read--; 4148 if (oflags & WA_WRITE) 4149 pwp->wp_write--; 4150 if (oflags & WA_EXEC) 4151 pwp->wp_exec--; 4152 4153 ASSERT(pwp->wp_read >= 0); 4154 ASSERT(pwp->wp_write >= 0); 4155 ASSERT(pwp->wp_exec >= 0); 4156 4157 if (flags & WA_READ) 4158 pwp->wp_read++; 4159 if (flags & WA_WRITE) 4160 pwp->wp_write++; 4161 if (flags & WA_EXEC) 4162 pwp->wp_exec++; 4163 4164 if (!(p->p_flag & SVFWAIT)) { 4165 vaddr = pwp->wp_vaddr; 4166 if (pwp->wp_oprot == 0 && 4167 (seg = as_segat(as, vaddr)) != NULL) { 4168 SEGOP_GETPROT(seg, vaddr, 0, &prot); 4169 pwp->wp_oprot = (uchar_t)prot; 4170 pwp->wp_prot = (uchar_t)prot; 4171 } 4172 if (pwp->wp_oprot != 0) { 4173 prot = pwp->wp_oprot; 4174 if (pwp->wp_read) 4175 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4176 if (pwp->wp_write) 4177 prot &= ~PROT_WRITE; 4178 if (pwp->wp_exec) 4179 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4180 if (!(pwp->wp_flags & WP_NOWATCH) && 4181 pwp->wp_prot != prot && 4182 (pwp->wp_flags & WP_SETPROT) == 0) { 4183 pwp->wp_flags |= WP_SETPROT; 4184 pwp->wp_list = p->p_wprot; 4185 p->p_wprot = pwp; 4186 } 4187 pwp->wp_prot = (uchar_t)prot; 4188 } 4189 } 4190 4191 /* 4192 * If the watched area extends into the next page then do 4193 * it over again with the virtual address of the next page. 4194 */ 4195 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr) 4196 goto again; 4197 4198 AS_LOCK_EXIT(as); 4199 4200 /* 4201 * Free any pages we may have over-allocated 4202 */ 4203 while (newpwp != NULL) { 4204 pwp = newpwp->wp_list; 4205 kmem_free(newpwp, sizeof (struct watched_page)); 4206 newpwp = pwp; 4207 } 4208 4209 return (0); 4210 } 4211 4212 /* 4213 * Remove a watched area from the list of watched pages. 4214 * A watched area may extend over more than one page. 4215 */ 4216 static void 4217 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags) 4218 { 4219 struct as *as = p->p_as; 4220 struct watched_page *pwp; 4221 struct watched_page tpw; 4222 avl_tree_t *tree; 4223 avl_index_t where; 4224 4225 AS_LOCK_ENTER(as, RW_WRITER); 4226 4227 if (p->p_flag & SVFWAIT) 4228 tree = &p->p_wpage; 4229 else 4230 tree = &as->a_wpage; 4231 4232 tpw.wp_vaddr = vaddr = 4233 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4234 pwp = avl_find(tree, &tpw, &where); 4235 if (pwp == NULL) 4236 pwp = avl_nearest(tree, where, AVL_AFTER); 4237 4238 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 4239 ASSERT(vaddr <= pwp->wp_vaddr); 4240 4241 if (flags & WA_READ) 4242 pwp->wp_read--; 4243 if (flags & WA_WRITE) 4244 pwp->wp_write--; 4245 if (flags & WA_EXEC) 4246 pwp->wp_exec--; 4247 4248 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) { 4249 /* 4250 * Reset the hat layer's protections on this page. 4251 */ 4252 if (pwp->wp_oprot != 0) { 4253 uint_t prot = pwp->wp_oprot; 4254 4255 if (pwp->wp_read) 4256 prot &= 4257 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4258 if (pwp->wp_write) 4259 prot &= ~PROT_WRITE; 4260 if (pwp->wp_exec) 4261 prot &= 4262 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4263 if (!(pwp->wp_flags & WP_NOWATCH) && 4264 pwp->wp_prot != prot && 4265 (pwp->wp_flags & WP_SETPROT) == 0) { 4266 pwp->wp_flags |= WP_SETPROT; 4267 pwp->wp_list = p->p_wprot; 4268 p->p_wprot = pwp; 4269 } 4270 pwp->wp_prot = (uchar_t)prot; 4271 } 4272 } else { 4273 /* 4274 * No watched areas remain in this page. 4275 * Reset everything to normal. 4276 */ 4277 if (pwp->wp_oprot != 0) { 4278 pwp->wp_prot = pwp->wp_oprot; 4279 if ((pwp->wp_flags & WP_SETPROT) == 0) { 4280 pwp->wp_flags |= WP_SETPROT; 4281 pwp->wp_list = p->p_wprot; 4282 p->p_wprot = pwp; 4283 } 4284 } 4285 } 4286 4287 pwp = AVL_NEXT(tree, pwp); 4288 } 4289 4290 AS_LOCK_EXIT(as); 4291 } 4292 4293 /* 4294 * Return the original protections for the specified page. 4295 */ 4296 static void 4297 getwatchprot(struct as *as, caddr_t addr, uint_t *prot) 4298 { 4299 struct watched_page *pwp; 4300 struct watched_page tpw; 4301 4302 ASSERT(AS_LOCK_HELD(as)); 4303 4304 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 4305 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL) 4306 *prot = pwp->wp_oprot; 4307 } 4308 4309 static prpagev_t * 4310 pr_pagev_create(struct seg *seg, int check_noreserve) 4311 { 4312 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP); 4313 size_t total_pages = seg_pages(seg); 4314 4315 /* 4316 * Limit the size of our vectors to pagev_lim pages at a time. We need 4317 * 4 or 5 bytes of storage per page, so this means we limit ourself 4318 * to about a megabyte of kernel heap by default. 4319 */ 4320 pagev->pg_npages = MIN(total_pages, pagev_lim); 4321 pagev->pg_pnbase = 0; 4322 4323 pagev->pg_protv = 4324 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP); 4325 4326 if (check_noreserve) 4327 pagev->pg_incore = 4328 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP); 4329 else 4330 pagev->pg_incore = NULL; 4331 4332 return (pagev); 4333 } 4334 4335 static void 4336 pr_pagev_destroy(prpagev_t *pagev) 4337 { 4338 if (pagev->pg_incore != NULL) 4339 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char)); 4340 4341 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t)); 4342 kmem_free(pagev, sizeof (prpagev_t)); 4343 } 4344 4345 static caddr_t 4346 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr) 4347 { 4348 ulong_t lastpg = seg_page(seg, eaddr - 1); 4349 ulong_t pn, pnlim; 4350 caddr_t saddr; 4351 size_t len; 4352 4353 ASSERT(addr >= seg->s_base && addr <= eaddr); 4354 4355 if (addr == eaddr) 4356 return (eaddr); 4357 4358 refill: 4359 ASSERT(addr < eaddr); 4360 pagev->pg_pnbase = seg_page(seg, addr); 4361 pnlim = pagev->pg_pnbase + pagev->pg_npages; 4362 saddr = addr; 4363 4364 if (lastpg < pnlim) 4365 len = (size_t)(eaddr - addr); 4366 else 4367 len = pagev->pg_npages * PAGESIZE; 4368 4369 if (pagev->pg_incore != NULL) { 4370 /* 4371 * INCORE cleverly has different semantics than GETPROT: 4372 * it returns info on pages up to but NOT including addr + len. 4373 */ 4374 SEGOP_INCORE(seg, addr, len, pagev->pg_incore); 4375 pn = pagev->pg_pnbase; 4376 4377 do { 4378 /* 4379 * Guilty knowledge here: We know that segvn_incore 4380 * returns more than just the low-order bit that 4381 * indicates the page is actually in memory. If any 4382 * bits are set, then the page has backing store. 4383 */ 4384 if (pagev->pg_incore[pn++ - pagev->pg_pnbase]) 4385 goto out; 4386 4387 } while ((addr += PAGESIZE) < eaddr && pn < pnlim); 4388 4389 /* 4390 * If we examined all the pages in the vector but we're not 4391 * at the end of the segment, take another lap. 4392 */ 4393 if (addr < eaddr) 4394 goto refill; 4395 } 4396 4397 /* 4398 * Need to take len - 1 because addr + len is the address of the 4399 * first byte of the page just past the end of what we want. 4400 */ 4401 out: 4402 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv); 4403 return (addr); 4404 } 4405 4406 static caddr_t 4407 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg, 4408 caddr_t *saddrp, caddr_t eaddr, uint_t *protp) 4409 { 4410 /* 4411 * Our starting address is either the specified address, or the base 4412 * address from the start of the pagev. If the latter is greater, 4413 * this means a previous call to pr_pagev_fill has already scanned 4414 * further than the end of the previous mapping. 4415 */ 4416 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE; 4417 caddr_t addr = MAX(*saddrp, base); 4418 ulong_t pn = seg_page(seg, addr); 4419 uint_t prot, nprot; 4420 4421 /* 4422 * If we're dealing with noreserve pages, then advance addr to 4423 * the address of the next page which has backing store. 4424 */ 4425 if (pagev->pg_incore != NULL) { 4426 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) { 4427 if ((addr += PAGESIZE) == eaddr) { 4428 *saddrp = addr; 4429 prot = 0; 4430 goto out; 4431 } 4432 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 4433 addr = pr_pagev_fill(pagev, seg, addr, eaddr); 4434 if (addr == eaddr) { 4435 *saddrp = addr; 4436 prot = 0; 4437 goto out; 4438 } 4439 pn = seg_page(seg, addr); 4440 } 4441 } 4442 } 4443 4444 /* 4445 * Get the protections on the page corresponding to addr. 4446 */ 4447 pn = seg_page(seg, addr); 4448 ASSERT(pn >= pagev->pg_pnbase); 4449 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages)); 4450 4451 prot = pagev->pg_protv[pn - pagev->pg_pnbase]; 4452 getwatchprot(seg->s_as, addr, &prot); 4453 *saddrp = addr; 4454 4455 /* 4456 * Now loop until we find a backed page with different protections 4457 * or we reach the end of this segment. 4458 */ 4459 while ((addr += PAGESIZE) < eaddr) { 4460 /* 4461 * If pn has advanced to the page number following what we 4462 * have information on, refill the page vector and reset 4463 * addr and pn. If pr_pagev_fill does not return the 4464 * address of the next page, we have a discontiguity and 4465 * thus have reached the end of the current mapping. 4466 */ 4467 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 4468 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr); 4469 if (naddr != addr) 4470 goto out; 4471 pn = seg_page(seg, addr); 4472 } 4473 4474 /* 4475 * The previous page's protections are in prot, and it has 4476 * backing. If this page is MAP_NORESERVE and has no backing, 4477 * then end this mapping and return the previous protections. 4478 */ 4479 if (pagev->pg_incore != NULL && 4480 pagev->pg_incore[pn - pagev->pg_pnbase] == 0) 4481 break; 4482 4483 /* 4484 * Otherwise end the mapping if this page's protections (nprot) 4485 * are different than those in the previous page (prot). 4486 */ 4487 nprot = pagev->pg_protv[pn - pagev->pg_pnbase]; 4488 getwatchprot(seg->s_as, addr, &nprot); 4489 4490 if (nprot != prot) 4491 break; 4492 } 4493 4494 out: 4495 *protp = prot; 4496 return (addr); 4497 } 4498 4499 size_t 4500 pr_getsegsize(struct seg *seg, int reserved) 4501 { 4502 size_t size = seg->s_size; 4503 4504 /* 4505 * If we're interested in the reserved space, return the size of the 4506 * segment itself. Everything else in this function is a special case 4507 * to determine the actual underlying size of various segment types. 4508 */ 4509 if (reserved) 4510 return (size); 4511 4512 /* 4513 * If this is a segvn mapping of a regular file, return the smaller 4514 * of the segment size and the remaining size of the file beyond 4515 * the file offset corresponding to seg->s_base. 4516 */ 4517 if (seg->s_ops == &segvn_ops) { 4518 vattr_t vattr; 4519 vnode_t *vp; 4520 4521 vattr.va_mask = AT_SIZE; 4522 4523 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 4524 vp != NULL && vp->v_type == VREG && 4525 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 4526 4527 u_offset_t fsize = vattr.va_size; 4528 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base); 4529 4530 if (fsize < offset) 4531 fsize = 0; 4532 else 4533 fsize -= offset; 4534 4535 fsize = roundup(fsize, (u_offset_t)PAGESIZE); 4536 4537 if (fsize < (u_offset_t)size) 4538 size = (size_t)fsize; 4539 } 4540 4541 return (size); 4542 } 4543 4544 /* 4545 * If this is an ISM shared segment, don't include pages that are 4546 * beyond the real size of the spt segment that backs it. 4547 */ 4548 if (seg->s_ops == &segspt_shmops) 4549 return (MIN(spt_realsize(seg), size)); 4550 4551 /* 4552 * If this is segment is a mapping from /dev/null, then this is a 4553 * reservation of virtual address space and has no actual size. 4554 * Such segments are backed by segdev and have type set to neither 4555 * MAP_SHARED nor MAP_PRIVATE. 4556 */ 4557 if (seg->s_ops == &segdev_ops && 4558 ((SEGOP_GETTYPE(seg, seg->s_base) & 4559 (MAP_SHARED | MAP_PRIVATE)) == 0)) 4560 return (0); 4561 4562 /* 4563 * If this segment doesn't match one of the special types we handle, 4564 * just return the size of the segment itself. 4565 */ 4566 return (size); 4567 } 4568 4569 uint_t 4570 pr_getprot(struct seg *seg, int reserved, void **tmp, 4571 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr) 4572 { 4573 struct as *as = seg->s_as; 4574 4575 caddr_t saddr = *saddrp; 4576 caddr_t naddr; 4577 4578 int check_noreserve; 4579 uint_t prot; 4580 4581 union { 4582 struct segvn_data *svd; 4583 struct segdev_data *sdp; 4584 void *data; 4585 } s; 4586 4587 s.data = seg->s_data; 4588 4589 ASSERT(AS_WRITE_HELD(as)); 4590 ASSERT(saddr >= seg->s_base && saddr < eaddr); 4591 ASSERT(eaddr <= seg->s_base + seg->s_size); 4592 4593 /* 4594 * Don't include MAP_NORESERVE pages in the address range 4595 * unless their mappings have actually materialized. 4596 * We cheat by knowing that segvn is the only segment 4597 * driver that supports MAP_NORESERVE. 4598 */ 4599 check_noreserve = 4600 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL && 4601 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) && 4602 (s.svd->flags & MAP_NORESERVE)); 4603 4604 /* 4605 * Examine every page only as a last resort. We use guilty knowledge 4606 * of segvn and segdev to avoid this: if there are no per-page 4607 * protections present in the segment and we don't care about 4608 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment. 4609 */ 4610 if (!check_noreserve && saddr == seg->s_base && 4611 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) { 4612 prot = s.svd->prot; 4613 getwatchprot(as, saddr, &prot); 4614 naddr = eaddr; 4615 4616 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops && 4617 s.sdp != NULL && s.sdp->pageprot == 0) { 4618 prot = s.sdp->prot; 4619 getwatchprot(as, saddr, &prot); 4620 naddr = eaddr; 4621 4622 } else { 4623 prpagev_t *pagev; 4624 4625 /* 4626 * If addr is sitting at the start of the segment, then 4627 * create a page vector to store protection and incore 4628 * information for pages in the segment, and fill it. 4629 * Otherwise, we expect *tmp to address the prpagev_t 4630 * allocated by a previous call to this function. 4631 */ 4632 if (saddr == seg->s_base) { 4633 pagev = pr_pagev_create(seg, check_noreserve); 4634 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr); 4635 4636 ASSERT(*tmp == NULL); 4637 *tmp = pagev; 4638 4639 ASSERT(saddr <= eaddr); 4640 *saddrp = saddr; 4641 4642 if (saddr == eaddr) { 4643 naddr = saddr; 4644 prot = 0; 4645 goto out; 4646 } 4647 4648 } else { 4649 ASSERT(*tmp != NULL); 4650 pagev = (prpagev_t *)*tmp; 4651 } 4652 4653 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot); 4654 ASSERT(naddr <= eaddr); 4655 } 4656 4657 out: 4658 if (naddr == eaddr) 4659 pr_getprot_done(tmp); 4660 *naddrp = naddr; 4661 return (prot); 4662 } 4663 4664 void 4665 pr_getprot_done(void **tmp) 4666 { 4667 if (*tmp != NULL) { 4668 pr_pagev_destroy((prpagev_t *)*tmp); 4669 *tmp = NULL; 4670 } 4671 } 4672 4673 /* 4674 * Return true iff the vnode is a /proc file from the object directory. 4675 */ 4676 int 4677 pr_isobject(vnode_t *vp) 4678 { 4679 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT); 4680 } 4681 4682 /* 4683 * Return true iff the vnode is a /proc file opened by the process itself. 4684 */ 4685 int 4686 pr_isself(vnode_t *vp) 4687 { 4688 /* 4689 * XXX: To retain binary compatibility with the old 4690 * ioctl()-based version of /proc, we exempt self-opens 4691 * of /proc/<pid> from being marked close-on-exec. 4692 */ 4693 return (vn_matchops(vp, prvnodeops) && 4694 (VTOP(vp)->pr_flags & PR_ISSELF) && 4695 VTOP(vp)->pr_type != PR_PIDDIR); 4696 } 4697 4698 static ssize_t 4699 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr) 4700 { 4701 ssize_t pagesize, hatsize; 4702 4703 ASSERT(AS_WRITE_HELD(seg->s_as)); 4704 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE)); 4705 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE)); 4706 ASSERT(saddr < eaddr); 4707 4708 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr); 4709 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize)); 4710 ASSERT(pagesize != 0); 4711 4712 if (pagesize == -1) 4713 pagesize = PAGESIZE; 4714 4715 saddr += P2NPHASE((uintptr_t)saddr, pagesize); 4716 4717 while (saddr < eaddr) { 4718 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr)) 4719 break; 4720 ASSERT(IS_P2ALIGNED(saddr, pagesize)); 4721 saddr += pagesize; 4722 } 4723 4724 *naddrp = ((saddr < eaddr) ? saddr : eaddr); 4725 return (hatsize); 4726 } 4727 4728 /* 4729 * Return an array of structures with extended memory map information. 4730 * We allocate here; the caller must deallocate. 4731 */ 4732 int 4733 prgetxmap(proc_t *p, list_t *iolhead) 4734 { 4735 struct as *as = p->p_as; 4736 prxmap_t *mp; 4737 struct seg *seg; 4738 struct seg *brkseg, *stkseg; 4739 struct vnode *vp; 4740 struct vattr vattr; 4741 uint_t prot; 4742 4743 ASSERT(as != &kas && AS_WRITE_HELD(as)); 4744 4745 /* 4746 * Request an initial buffer size that doesn't waste memory 4747 * if the address space has only a small number of segments. 4748 */ 4749 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 4750 4751 if ((seg = AS_SEGFIRST(as)) == NULL) 4752 return (0); 4753 4754 brkseg = break_seg(p); 4755 stkseg = as_segat(as, prgetstackbase(p)); 4756 4757 do { 4758 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 4759 caddr_t saddr, naddr, baddr; 4760 void *tmp = NULL; 4761 ssize_t psz; 4762 char *parr; 4763 uint64_t npages; 4764 uint64_t pagenum; 4765 4766 if ((seg->s_flags & S_HOLE) != 0) { 4767 continue; 4768 } 4769 /* 4770 * Segment loop part one: iterate from the base of the segment 4771 * to its end, pausing at each address boundary (baddr) between 4772 * ranges that have different virtual memory protections. 4773 */ 4774 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 4775 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 4776 ASSERT(baddr >= saddr && baddr <= eaddr); 4777 4778 /* 4779 * Segment loop part two: iterate from the current 4780 * position to the end of the protection boundary, 4781 * pausing at each address boundary (naddr) between 4782 * ranges that have different underlying page sizes. 4783 */ 4784 for (; saddr < baddr; saddr = naddr) { 4785 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 4786 ASSERT(naddr >= saddr && naddr <= baddr); 4787 4788 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 4789 4790 mp->pr_vaddr = (uintptr_t)saddr; 4791 mp->pr_size = naddr - saddr; 4792 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 4793 mp->pr_mflags = 0; 4794 if (prot & PROT_READ) 4795 mp->pr_mflags |= MA_READ; 4796 if (prot & PROT_WRITE) 4797 mp->pr_mflags |= MA_WRITE; 4798 if (prot & PROT_EXEC) 4799 mp->pr_mflags |= MA_EXEC; 4800 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 4801 mp->pr_mflags |= MA_SHARED; 4802 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 4803 mp->pr_mflags |= MA_NORESERVE; 4804 if (seg->s_ops == &segspt_shmops || 4805 (seg->s_ops == &segvn_ops && 4806 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 4807 vp == NULL))) 4808 mp->pr_mflags |= MA_ANON; 4809 if (seg == brkseg) 4810 mp->pr_mflags |= MA_BREAK; 4811 else if (seg == stkseg) 4812 mp->pr_mflags |= MA_STACK; 4813 if (seg->s_ops == &segspt_shmops) 4814 mp->pr_mflags |= MA_ISM | MA_SHM; 4815 4816 mp->pr_pagesize = PAGESIZE; 4817 if (psz == -1) { 4818 mp->pr_hatpagesize = 0; 4819 } else { 4820 mp->pr_hatpagesize = psz; 4821 } 4822 4823 /* 4824 * Manufacture a filename for the "object" dir. 4825 */ 4826 mp->pr_dev = PRNODEV; 4827 vattr.va_mask = AT_FSID|AT_NODEID; 4828 if (seg->s_ops == &segvn_ops && 4829 SEGOP_GETVP(seg, saddr, &vp) == 0 && 4830 vp != NULL && vp->v_type == VREG && 4831 VOP_GETATTR(vp, &vattr, 0, CRED(), 4832 NULL) == 0) { 4833 mp->pr_dev = vattr.va_fsid; 4834 mp->pr_ino = vattr.va_nodeid; 4835 if (vp == p->p_exec) 4836 (void) strcpy(mp->pr_mapname, 4837 "a.out"); 4838 else 4839 pr_object_name(mp->pr_mapname, 4840 vp, &vattr); 4841 } 4842 4843 /* 4844 * Get the SysV shared memory id, if any. 4845 */ 4846 if ((mp->pr_mflags & MA_SHARED) && 4847 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4848 seg->s_base)) != SHMID_NONE) { 4849 if (mp->pr_shmid == SHMID_FREE) 4850 mp->pr_shmid = -1; 4851 4852 mp->pr_mflags |= MA_SHM; 4853 } else { 4854 mp->pr_shmid = -1; 4855 } 4856 4857 npages = ((uintptr_t)(naddr - saddr)) >> 4858 PAGESHIFT; 4859 parr = kmem_zalloc(npages, KM_SLEEP); 4860 4861 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4862 4863 for (pagenum = 0; pagenum < npages; pagenum++) { 4864 if (parr[pagenum] & SEG_PAGE_INCORE) 4865 mp->pr_rss++; 4866 if (parr[pagenum] & SEG_PAGE_ANON) 4867 mp->pr_anon++; 4868 if (parr[pagenum] & SEG_PAGE_LOCKED) 4869 mp->pr_locked++; 4870 } 4871 kmem_free(parr, npages); 4872 } 4873 } 4874 ASSERT(tmp == NULL); 4875 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4876 4877 return (0); 4878 } 4879 4880 /* 4881 * Return the process's credentials. We don't need a 32-bit equivalent of 4882 * this function because prcred_t and prcred32_t are actually the same. 4883 */ 4884 void 4885 prgetcred(proc_t *p, prcred_t *pcrp) 4886 { 4887 mutex_enter(&p->p_crlock); 4888 cred2prcred(p->p_cred, pcrp); 4889 mutex_exit(&p->p_crlock); 4890 } 4891 4892 void 4893 prgetsecflags(proc_t *p, prsecflags_t *psfp) 4894 { 4895 ASSERT(psfp != NULL); 4896 4897 psfp->pr_version = PRSECFLAGS_VERSION_CURRENT; 4898 psfp->pr_lower = p->p_secflags.psf_lower; 4899 psfp->pr_upper = p->p_secflags.psf_upper; 4900 psfp->pr_effective = p->p_secflags.psf_effective; 4901 psfp->pr_inherit = p->p_secflags.psf_inherit; 4902 } 4903 4904 /* 4905 * Compute actual size of the prpriv_t structure. 4906 */ 4907 4908 size_t 4909 prgetprivsize(void) 4910 { 4911 return (priv_prgetprivsize(NULL)); 4912 } 4913 4914 /* 4915 * Return the process's privileges. We don't need a 32-bit equivalent of 4916 * this function because prpriv_t and prpriv32_t are actually the same. 4917 */ 4918 void 4919 prgetpriv(proc_t *p, prpriv_t *pprp) 4920 { 4921 mutex_enter(&p->p_crlock); 4922 cred2prpriv(p->p_cred, pprp); 4923 mutex_exit(&p->p_crlock); 4924 } 4925 4926 #ifdef _SYSCALL32_IMPL 4927 /* 4928 * Return an array of structures with HAT memory map information. 4929 * We allocate here; the caller must deallocate. 4930 */ 4931 int 4932 prgetxmap32(proc_t *p, list_t *iolhead) 4933 { 4934 struct as *as = p->p_as; 4935 prxmap32_t *mp; 4936 struct seg *seg; 4937 struct seg *brkseg, *stkseg; 4938 struct vnode *vp; 4939 struct vattr vattr; 4940 uint_t prot; 4941 4942 ASSERT(as != &kas && AS_WRITE_HELD(as)); 4943 4944 /* 4945 * Request an initial buffer size that doesn't waste memory 4946 * if the address space has only a small number of segments. 4947 */ 4948 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 4949 4950 if ((seg = AS_SEGFIRST(as)) == NULL) 4951 return (0); 4952 4953 brkseg = break_seg(p); 4954 stkseg = as_segat(as, prgetstackbase(p)); 4955 4956 do { 4957 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 4958 caddr_t saddr, naddr, baddr; 4959 void *tmp = NULL; 4960 ssize_t psz; 4961 char *parr; 4962 uint64_t npages; 4963 uint64_t pagenum; 4964 4965 if ((seg->s_flags & S_HOLE) != 0) { 4966 continue; 4967 } 4968 4969 /* 4970 * Segment loop part one: iterate from the base of the segment 4971 * to its end, pausing at each address boundary (baddr) between 4972 * ranges that have different virtual memory protections. 4973 */ 4974 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 4975 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 4976 ASSERT(baddr >= saddr && baddr <= eaddr); 4977 4978 /* 4979 * Segment loop part two: iterate from the current 4980 * position to the end of the protection boundary, 4981 * pausing at each address boundary (naddr) between 4982 * ranges that have different underlying page sizes. 4983 */ 4984 for (; saddr < baddr; saddr = naddr) { 4985 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 4986 ASSERT(naddr >= saddr && naddr <= baddr); 4987 4988 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 4989 4990 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 4991 mp->pr_size = (size32_t)(naddr - saddr); 4992 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 4993 mp->pr_mflags = 0; 4994 if (prot & PROT_READ) 4995 mp->pr_mflags |= MA_READ; 4996 if (prot & PROT_WRITE) 4997 mp->pr_mflags |= MA_WRITE; 4998 if (prot & PROT_EXEC) 4999 mp->pr_mflags |= MA_EXEC; 5000 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 5001 mp->pr_mflags |= MA_SHARED; 5002 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 5003 mp->pr_mflags |= MA_NORESERVE; 5004 if (seg->s_ops == &segspt_shmops || 5005 (seg->s_ops == &segvn_ops && 5006 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 5007 vp == NULL))) 5008 mp->pr_mflags |= MA_ANON; 5009 if (seg == brkseg) 5010 mp->pr_mflags |= MA_BREAK; 5011 else if (seg == stkseg) 5012 mp->pr_mflags |= MA_STACK; 5013 if (seg->s_ops == &segspt_shmops) 5014 mp->pr_mflags |= MA_ISM | MA_SHM; 5015 5016 mp->pr_pagesize = PAGESIZE; 5017 if (psz == -1) { 5018 mp->pr_hatpagesize = 0; 5019 } else { 5020 mp->pr_hatpagesize = psz; 5021 } 5022 5023 /* 5024 * Manufacture a filename for the "object" dir. 5025 */ 5026 mp->pr_dev = PRNODEV32; 5027 vattr.va_mask = AT_FSID|AT_NODEID; 5028 if (seg->s_ops == &segvn_ops && 5029 SEGOP_GETVP(seg, saddr, &vp) == 0 && 5030 vp != NULL && vp->v_type == VREG && 5031 VOP_GETATTR(vp, &vattr, 0, CRED(), 5032 NULL) == 0) { 5033 (void) cmpldev(&mp->pr_dev, 5034 vattr.va_fsid); 5035 mp->pr_ino = vattr.va_nodeid; 5036 if (vp == p->p_exec) 5037 (void) strcpy(mp->pr_mapname, 5038 "a.out"); 5039 else 5040 pr_object_name(mp->pr_mapname, 5041 vp, &vattr); 5042 } 5043 5044 /* 5045 * Get the SysV shared memory id, if any. 5046 */ 5047 if ((mp->pr_mflags & MA_SHARED) && 5048 p->p_segacct && (mp->pr_shmid = shmgetid(p, 5049 seg->s_base)) != SHMID_NONE) { 5050 if (mp->pr_shmid == SHMID_FREE) 5051 mp->pr_shmid = -1; 5052 5053 mp->pr_mflags |= MA_SHM; 5054 } else { 5055 mp->pr_shmid = -1; 5056 } 5057 5058 npages = ((uintptr_t)(naddr - saddr)) >> 5059 PAGESHIFT; 5060 parr = kmem_zalloc(npages, KM_SLEEP); 5061 5062 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 5063 5064 for (pagenum = 0; pagenum < npages; pagenum++) { 5065 if (parr[pagenum] & SEG_PAGE_INCORE) 5066 mp->pr_rss++; 5067 if (parr[pagenum] & SEG_PAGE_ANON) 5068 mp->pr_anon++; 5069 if (parr[pagenum] & SEG_PAGE_LOCKED) 5070 mp->pr_locked++; 5071 } 5072 kmem_free(parr, npages); 5073 } 5074 } 5075 ASSERT(tmp == NULL); 5076 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 5077 5078 return (0); 5079 } 5080 #endif /* _SYSCALL32_IMPL */ 5081