1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2019 Joyent, Inc. 25 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 26 * Copyright 2022 MNX Cloud, Inc. 27 * Copyright 2025 Oxide Computer Company 28 */ 29 30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 31 /* All Rights Reserved */ 32 33 #include <sys/types.h> 34 #include <sys/t_lock.h> 35 #include <sys/param.h> 36 #include <sys/cmn_err.h> 37 #include <sys/cred.h> 38 #include <sys/priv.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/inline.h> 42 #include <sys/kmem.h> 43 #include <sys/mman.h> 44 #include <sys/proc.h> 45 #include <sys/brand.h> 46 #include <sys/sobject.h> 47 #include <sys/sysmacros.h> 48 #include <sys/systm.h> 49 #include <sys/uio.h> 50 #include <sys/var.h> 51 #include <sys/vfs.h> 52 #include <sys/vnode.h> 53 #include <sys/session.h> 54 #include <sys/pcb.h> 55 #include <sys/signal.h> 56 #include <sys/user.h> 57 #include <sys/disp.h> 58 #include <sys/class.h> 59 #include <sys/ts.h> 60 #include <sys/bitmap.h> 61 #include <sys/poll.h> 62 #include <sys/shm_impl.h> 63 #include <sys/fault.h> 64 #include <sys/syscall.h> 65 #include <sys/procfs.h> 66 #include <sys/processor.h> 67 #include <sys/cpuvar.h> 68 #include <sys/copyops.h> 69 #include <sys/time.h> 70 #include <sys/msacct.h> 71 #include <sys/flock_impl.h> 72 #include <sys/stropts.h> 73 #include <sys/strsubr.h> 74 #include <sys/pathname.h> 75 #include <sys/mode.h> 76 #include <sys/socketvar.h> 77 #include <sys/autoconf.h> 78 #include <sys/dtrace.h> 79 #include <sys/timod.h> 80 #include <sys/fs/namenode.h> 81 #include <netinet/udp.h> 82 #include <netinet/tcp.h> 83 #include <inet/cc.h> 84 #include <vm/as.h> 85 #include <vm/rm.h> 86 #include <vm/seg.h> 87 #include <vm/seg_vn.h> 88 #include <vm/seg_dev.h> 89 #include <vm/seg_spt.h> 90 #include <vm/page.h> 91 #include <sys/vmparam.h> 92 #include <sys/swap.h> 93 #include <fs/proc/prdata.h> 94 #include <sys/task.h> 95 #include <sys/project.h> 96 #include <sys/contract_impl.h> 97 #include <sys/contract/process.h> 98 #include <sys/contract/process_impl.h> 99 #include <sys/schedctl.h> 100 #include <sys/pool.h> 101 #include <sys/zone.h> 102 #include <sys/atomic.h> 103 #include <sys/sdt.h> 104 105 #define MAX_ITERS_SPIN 5 106 107 typedef struct prpagev { 108 uint_t *pg_protv; /* vector of page permissions */ 109 char *pg_incore; /* vector of incore flags */ 110 size_t pg_npages; /* number of pages in protv and incore */ 111 ulong_t pg_pnbase; /* pn within segment of first protv element */ 112 } prpagev_t; 113 114 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */ 115 116 extern struct seg_ops segdev_ops; /* needs a header file */ 117 extern struct seg_ops segspt_shmops; /* needs a header file */ 118 119 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t); 120 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t); 121 122 /* 123 * Choose an lwp from the complete set of lwps for the process. 124 * This is called for any operation applied to the process 125 * file descriptor that requires an lwp to operate upon. 126 * 127 * Returns a pointer to the thread for the selected LWP, 128 * and with the dispatcher lock held for the thread. 129 * 130 * The algorithm for choosing an lwp is critical for /proc semantics; 131 * don't touch this code unless you know all of the implications. 132 */ 133 kthread_t * 134 prchoose(proc_t *p) 135 { 136 kthread_t *t; 137 kthread_t *t_onproc = NULL; /* running on processor */ 138 kthread_t *t_run = NULL; /* runnable, on disp queue */ 139 kthread_t *t_sleep = NULL; /* sleeping */ 140 kthread_t *t_hold = NULL; /* sleeping, performing hold */ 141 kthread_t *t_susp = NULL; /* suspended stop */ 142 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */ 143 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */ 144 kthread_t *t_req = NULL; /* requested stop */ 145 kthread_t *t_istop = NULL; /* event-of-interest stop */ 146 kthread_t *t_dtrace = NULL; /* DTrace stop */ 147 148 ASSERT(MUTEX_HELD(&p->p_lock)); 149 150 /* 151 * If the agent lwp exists, it takes precedence over all others. 152 */ 153 if ((t = p->p_agenttp) != NULL) { 154 thread_lock(t); 155 return (t); 156 } 157 158 if ((t = p->p_tlist) == NULL) /* start at the head of the list */ 159 return (t); 160 do { /* for eacn lwp in the process */ 161 if (VSTOPPED(t)) { /* virtually stopped */ 162 if (t_req == NULL) 163 t_req = t; 164 continue; 165 } 166 167 /* If this is a process kernel thread, ignore it. */ 168 if ((t->t_proc_flag & TP_KTHREAD) != 0) { 169 continue; 170 } 171 172 thread_lock(t); /* make sure thread is in good state */ 173 switch (t->t_state) { 174 default: 175 panic("prchoose: bad thread state %d, thread 0x%p", 176 t->t_state, (void *)t); 177 /*NOTREACHED*/ 178 case TS_SLEEP: 179 /* this is filthy */ 180 if (t->t_wchan == (caddr_t)&p->p_holdlwps && 181 t->t_wchan0 == NULL) { 182 if (t_hold == NULL) 183 t_hold = t; 184 } else { 185 if (t_sleep == NULL) 186 t_sleep = t; 187 } 188 break; 189 case TS_RUN: 190 case TS_WAIT: 191 if (t_run == NULL) 192 t_run = t; 193 break; 194 case TS_ONPROC: 195 if (t_onproc == NULL) 196 t_onproc = t; 197 break; 198 case TS_ZOMB: /* last possible choice */ 199 break; 200 case TS_STOPPED: 201 switch (t->t_whystop) { 202 case PR_SUSPENDED: 203 if (t_susp == NULL) 204 t_susp = t; 205 break; 206 case PR_JOBCONTROL: 207 if (t->t_proc_flag & TP_PRSTOP) { 208 if (t_jdstop == NULL) 209 t_jdstop = t; 210 } else { 211 if (t_jstop == NULL) 212 t_jstop = t; 213 } 214 break; 215 case PR_REQUESTED: 216 if (t->t_dtrace_stop && t_dtrace == NULL) 217 t_dtrace = t; 218 else if (t_req == NULL) 219 t_req = t; 220 break; 221 case PR_SYSENTRY: 222 case PR_SYSEXIT: 223 case PR_SIGNALLED: 224 case PR_FAULTED: 225 /* 226 * Make an lwp calling exit() be the 227 * last lwp seen in the process. 228 */ 229 if (t_istop == NULL || 230 (t_istop->t_whystop == PR_SYSENTRY && 231 t_istop->t_whatstop == SYS_exit)) 232 t_istop = t; 233 break; 234 case PR_CHECKPOINT: /* can't happen? */ 235 break; 236 default: 237 panic("prchoose: bad t_whystop %d, thread 0x%p", 238 t->t_whystop, (void *)t); 239 /*NOTREACHED*/ 240 } 241 break; 242 } 243 thread_unlock(t); 244 } while ((t = t->t_forw) != p->p_tlist); 245 246 if (t_onproc) 247 t = t_onproc; 248 else if (t_run) 249 t = t_run; 250 else if (t_sleep) 251 t = t_sleep; 252 else if (t_jstop) 253 t = t_jstop; 254 else if (t_jdstop) 255 t = t_jdstop; 256 else if (t_istop) 257 t = t_istop; 258 else if (t_dtrace) 259 t = t_dtrace; 260 else if (t_req) 261 t = t_req; 262 else if (t_hold) 263 t = t_hold; 264 else if (t_susp) 265 t = t_susp; 266 else /* TS_ZOMB */ 267 t = p->p_tlist; 268 269 if (t != NULL) 270 thread_lock(t); 271 return (t); 272 } 273 274 /* 275 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop. 276 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI 277 * on the /proc file descriptor. Called from stop() when a traced 278 * process stops on an event of interest. Also called from exit() 279 * and prinvalidate() to indicate POLLHUP and POLLERR respectively. 280 */ 281 void 282 prnotify(struct vnode *vp) 283 { 284 prcommon_t *pcp = VTOP(vp)->pr_common; 285 286 mutex_enter(&pcp->prc_mutex); 287 cv_broadcast(&pcp->prc_wait); 288 mutex_exit(&pcp->prc_mutex); 289 if (pcp->prc_flags & PRC_POLL) { 290 /* 291 * We call pollwakeup() with POLLHUP to ensure that 292 * the pollers are awakened even if they are polling 293 * for nothing (i.e., waiting for the process to exit). 294 * This enables the use of the PRC_POLL flag for optimization 295 * (we can turn off PRC_POLL only if we know no pollers remain). 296 */ 297 pcp->prc_flags &= ~PRC_POLL; 298 pollwakeup(&pcp->prc_pollhead, POLLHUP); 299 } 300 } 301 302 /* called immediately below, in prfree() */ 303 static void 304 prfreenotify(vnode_t *vp) 305 { 306 prnode_t *pnp; 307 prcommon_t *pcp; 308 309 while (vp != NULL) { 310 pnp = VTOP(vp); 311 pcp = pnp->pr_common; 312 ASSERT(pcp->prc_thread == NULL); 313 pcp->prc_proc = NULL; 314 /* 315 * We can't call prnotify() here because we are holding 316 * pidlock. We assert that there is no need to. 317 */ 318 mutex_enter(&pcp->prc_mutex); 319 cv_broadcast(&pcp->prc_wait); 320 mutex_exit(&pcp->prc_mutex); 321 ASSERT(!(pcp->prc_flags & PRC_POLL)); 322 323 vp = pnp->pr_next; 324 pnp->pr_next = NULL; 325 } 326 } 327 328 /* 329 * Called from a hook in freeproc() when a traced process is removed 330 * from the process table. The proc-table pointers of all associated 331 * /proc vnodes are cleared to indicate that the process has gone away. 332 */ 333 void 334 prfree(proc_t *p) 335 { 336 uint_t slot = p->p_slot; 337 338 ASSERT(MUTEX_HELD(&pidlock)); 339 340 /* 341 * Block the process against /proc so it can be freed. 342 * It cannot be freed while locked by some controlling process. 343 * Lock ordering: 344 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex 345 */ 346 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */ 347 mutex_enter(&p->p_lock); 348 while (p->p_proc_flag & P_PR_LOCK) { 349 mutex_exit(&pr_pidlock); 350 cv_wait(&pr_pid_cv[slot], &p->p_lock); 351 mutex_exit(&p->p_lock); 352 mutex_enter(&pr_pidlock); 353 mutex_enter(&p->p_lock); 354 } 355 356 ASSERT(p->p_tlist == NULL); 357 358 prfreenotify(p->p_plist); 359 p->p_plist = NULL; 360 361 prfreenotify(p->p_trace); 362 p->p_trace = NULL; 363 364 /* 365 * We broadcast to wake up everyone waiting for this process. 366 * No one can reach this process from this point on. 367 */ 368 cv_broadcast(&pr_pid_cv[slot]); 369 370 mutex_exit(&p->p_lock); 371 mutex_exit(&pr_pidlock); 372 } 373 374 /* 375 * Called from a hook in exit() when a traced process is becoming a zombie. 376 */ 377 void 378 prexit(proc_t *p) 379 { 380 ASSERT(MUTEX_HELD(&p->p_lock)); 381 382 if (pr_watch_active(p)) { 383 pr_free_watchpoints(p); 384 watch_disable(curthread); 385 } 386 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */ 387 if (p->p_trace) { 388 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY; 389 prnotify(p->p_trace); 390 } 391 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */ 392 } 393 394 /* 395 * Called when a thread calls lwp_exit(). 396 */ 397 void 398 prlwpexit(kthread_t *t) 399 { 400 vnode_t *vp; 401 prnode_t *pnp; 402 prcommon_t *pcp; 403 proc_t *p = ttoproc(t); 404 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry; 405 406 ASSERT(t == curthread); 407 ASSERT(MUTEX_HELD(&p->p_lock)); 408 409 /* 410 * The process must be blocked against /proc to do this safely. 411 * The lwp must not disappear while the process is marked P_PR_LOCK. 412 * It is the caller's responsibility to have called prbarrier(p). 413 */ 414 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 415 416 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 417 pnp = VTOP(vp); 418 pcp = pnp->pr_common; 419 if (pcp->prc_thread == t) { 420 pcp->prc_thread = NULL; 421 pcp->prc_flags |= PRC_DESTROY; 422 } 423 } 424 425 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) { 426 pnp = VTOP(vp); 427 pcp = pnp->pr_common; 428 pcp->prc_thread = NULL; 429 pcp->prc_flags |= PRC_DESTROY; 430 prnotify(vp); 431 } 432 433 if (p->p_trace) 434 prnotify(p->p_trace); 435 } 436 437 /* 438 * Called when a zombie thread is joined or when a 439 * detached lwp exits. Called from lwp_hash_out(). 440 */ 441 void 442 prlwpfree(proc_t *p, lwpent_t *lep) 443 { 444 vnode_t *vp; 445 prnode_t *pnp; 446 prcommon_t *pcp; 447 448 ASSERT(MUTEX_HELD(&p->p_lock)); 449 450 /* 451 * The process must be blocked against /proc to do this safely. 452 * The lwp must not disappear while the process is marked P_PR_LOCK. 453 * It is the caller's responsibility to have called prbarrier(p). 454 */ 455 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 456 457 vp = lep->le_trace; 458 lep->le_trace = NULL; 459 while (vp) { 460 prnotify(vp); 461 pnp = VTOP(vp); 462 pcp = pnp->pr_common; 463 ASSERT(pcp->prc_thread == NULL && 464 (pcp->prc_flags & PRC_DESTROY)); 465 pcp->prc_tslot = -1; 466 vp = pnp->pr_next; 467 pnp->pr_next = NULL; 468 } 469 470 if (p->p_trace) 471 prnotify(p->p_trace); 472 } 473 474 /* 475 * Called from a hook in exec() when a thread starts exec(). 476 */ 477 void 478 prexecstart(void) 479 { 480 proc_t *p = ttoproc(curthread); 481 klwp_t *lwp = ttolwp(curthread); 482 483 /* 484 * The P_PR_EXEC flag blocks /proc operations for 485 * the duration of the exec(). 486 * We can't start exec() while the process is 487 * locked by /proc, so we call prbarrier(). 488 * lwp_nostop keeps the process from being stopped 489 * via job control for the duration of the exec(). 490 */ 491 492 ASSERT(MUTEX_HELD(&p->p_lock)); 493 prbarrier(p); 494 lwp->lwp_nostop++; 495 p->p_proc_flag |= P_PR_EXEC; 496 } 497 498 /* 499 * Called from a hook in exec() when a thread finishes exec(). 500 * The thread may or may not have succeeded. Some other thread 501 * may have beat it to the punch. 502 */ 503 void 504 prexecend(void) 505 { 506 proc_t *p = ttoproc(curthread); 507 klwp_t *lwp = ttolwp(curthread); 508 vnode_t *vp; 509 prnode_t *pnp; 510 prcommon_t *pcp; 511 model_t model = p->p_model; 512 id_t tid = curthread->t_tid; 513 int tslot = curthread->t_dslot; 514 515 ASSERT(MUTEX_HELD(&p->p_lock)); 516 517 lwp->lwp_nostop--; 518 if (p->p_flag & SEXITLWPS) { 519 /* 520 * We are on our way to exiting because some 521 * other thread beat us in the race to exec(). 522 * Don't clear the P_PR_EXEC flag in this case. 523 */ 524 return; 525 } 526 527 /* 528 * Wake up anyone waiting in /proc for the process to complete exec(). 529 */ 530 p->p_proc_flag &= ~P_PR_EXEC; 531 if ((vp = p->p_trace) != NULL) { 532 pcp = VTOP(vp)->pr_common; 533 mutex_enter(&pcp->prc_mutex); 534 cv_broadcast(&pcp->prc_wait); 535 mutex_exit(&pcp->prc_mutex); 536 for (; vp != NULL; vp = pnp->pr_next) { 537 pnp = VTOP(vp); 538 pnp->pr_common->prc_datamodel = model; 539 } 540 } 541 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) { 542 /* 543 * We dealt with the process common above. 544 */ 545 ASSERT(p->p_trace != NULL); 546 pcp = VTOP(vp)->pr_common; 547 mutex_enter(&pcp->prc_mutex); 548 cv_broadcast(&pcp->prc_wait); 549 mutex_exit(&pcp->prc_mutex); 550 for (; vp != NULL; vp = pnp->pr_next) { 551 pnp = VTOP(vp); 552 pcp = pnp->pr_common; 553 pcp->prc_datamodel = model; 554 pcp->prc_tid = tid; 555 pcp->prc_tslot = tslot; 556 } 557 } 558 } 559 560 /* 561 * Called from a hook in relvm() just before freeing the address space. 562 * We free all the watched areas now. 563 */ 564 void 565 prrelvm(void) 566 { 567 proc_t *p = ttoproc(curthread); 568 569 mutex_enter(&p->p_lock); 570 prbarrier(p); /* block all other /proc operations */ 571 if (pr_watch_active(p)) { 572 pr_free_watchpoints(p); 573 watch_disable(curthread); 574 } 575 mutex_exit(&p->p_lock); 576 pr_free_watched_pages(p); 577 } 578 579 /* 580 * Called from hooks in exec-related code when a traced process 581 * attempts to exec(2) a setuid/setgid program or an unreadable 582 * file. Rather than fail the exec we invalidate the associated 583 * /proc vnodes so that subsequent attempts to use them will fail. 584 * 585 * All /proc vnodes, except directory vnodes, are retained on a linked 586 * list (rooted at p_plist in the process structure) until last close. 587 * 588 * A controlling process must re-open the /proc files in order to 589 * regain control. 590 */ 591 void 592 prinvalidate(struct user *up) 593 { 594 kthread_t *t = curthread; 595 proc_t *p = ttoproc(t); 596 vnode_t *vp; 597 prnode_t *pnp; 598 int writers = 0; 599 600 mutex_enter(&p->p_lock); 601 prbarrier(p); /* block all other /proc operations */ 602 603 /* 604 * At this moment, there can be only one lwp in the process. 605 */ 606 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 607 608 /* 609 * Invalidate any currently active /proc vnodes. 610 */ 611 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 612 pnp = VTOP(vp); 613 switch (pnp->pr_type) { 614 case PR_PSINFO: /* these files can read by anyone */ 615 case PR_LPSINFO: 616 case PR_LWPSINFO: 617 case PR_LWPDIR: 618 case PR_LWPIDDIR: 619 case PR_USAGE: 620 case PR_LUSAGE: 621 case PR_LWPUSAGE: 622 break; 623 default: 624 pnp->pr_flags |= PR_INVAL; 625 break; 626 } 627 } 628 /* 629 * Wake up anyone waiting for the process or lwp. 630 * p->p_trace is guaranteed to be non-NULL if there 631 * are any open /proc files for this process. 632 */ 633 if ((vp = p->p_trace) != NULL) { 634 prcommon_t *pcp = VTOP(vp)->pr_pcommon; 635 636 prnotify(vp); 637 /* 638 * Are there any writers? 639 */ 640 if ((writers = pcp->prc_writers) != 0) { 641 /* 642 * Clear the exclusive open flag (old /proc interface). 643 * Set prc_selfopens equal to prc_writers so that 644 * the next O_EXCL|O_WRITE open will succeed 645 * even with existing (though invalid) writers. 646 * prclose() must decrement prc_selfopens when 647 * the invalid files are closed. 648 */ 649 pcp->prc_flags &= ~PRC_EXCL; 650 ASSERT(pcp->prc_selfopens <= writers); 651 pcp->prc_selfopens = writers; 652 } 653 } 654 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace; 655 while (vp != NULL) { 656 /* 657 * We should not invalidate the lwpiddir vnodes, 658 * but the necessities of maintaining the old 659 * ioctl()-based version of /proc require it. 660 */ 661 pnp = VTOP(vp); 662 pnp->pr_flags |= PR_INVAL; 663 prnotify(vp); 664 vp = pnp->pr_next; 665 } 666 667 /* 668 * If any tracing flags are in effect and any vnodes are open for 669 * writing then set the requested-stop and run-on-last-close flags. 670 * Otherwise, clear all tracing flags. 671 */ 672 t->t_proc_flag &= ~TP_PAUSE; 673 if ((p->p_proc_flag & P_PR_TRACE) && writers) { 674 t->t_proc_flag |= TP_PRSTOP; 675 aston(t); /* so ISSIG will see the flag */ 676 p->p_proc_flag |= P_PR_RUNLCL; 677 } else { 678 premptyset(&up->u_entrymask); /* syscalls */ 679 premptyset(&up->u_exitmask); 680 up->u_systrap = 0; 681 premptyset(&p->p_sigmask); /* signals */ 682 premptyset(&p->p_fltmask); /* faults */ 683 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING); 684 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE); 685 prnostep(ttolwp(t)); 686 } 687 688 mutex_exit(&p->p_lock); 689 } 690 691 /* 692 * Acquire the controlled process's p_lock and mark it P_PR_LOCK. 693 * Return with pr_pidlock held in all cases. 694 * Return with p_lock held if the the process still exists. 695 * Return value is the process pointer if the process still exists, else NULL. 696 * If we lock the process, give ourself kernel priority to avoid deadlocks; 697 * this is undone in prunlock(). 698 */ 699 proc_t * 700 pr_p_lock(prnode_t *pnp) 701 { 702 proc_t *p; 703 prcommon_t *pcp; 704 705 mutex_enter(&pr_pidlock); 706 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL) 707 return (NULL); 708 mutex_enter(&p->p_lock); 709 while (p->p_proc_flag & P_PR_LOCK) { 710 /* 711 * This cv/mutex pair is persistent even if 712 * the process disappears while we sleep. 713 */ 714 kcondvar_t *cv = &pr_pid_cv[p->p_slot]; 715 kmutex_t *mp = &p->p_lock; 716 717 mutex_exit(&pr_pidlock); 718 cv_wait(cv, mp); 719 mutex_exit(mp); 720 mutex_enter(&pr_pidlock); 721 if (pcp->prc_proc == NULL) 722 return (NULL); 723 ASSERT(p == pcp->prc_proc); 724 mutex_enter(&p->p_lock); 725 } 726 p->p_proc_flag |= P_PR_LOCK; 727 return (p); 728 } 729 730 /* 731 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock. 732 * This prevents any lwp of the process from disappearing and 733 * blocks most operations that a process can perform on itself. 734 * Returns 0 on success, a non-zero error number on failure. 735 * 736 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when 737 * the subject process is a zombie (ZYES) or fail for zombies (ZNO). 738 * 739 * error returns: 740 * ENOENT: process or lwp has disappeared or process is exiting 741 * (or has become a zombie and zdisp == ZNO). 742 * EAGAIN: procfs vnode has become invalid. 743 * EINTR: signal arrived while waiting for exec to complete. 744 */ 745 int 746 prlock(prnode_t *pnp, int zdisp) 747 { 748 prcommon_t *pcp; 749 proc_t *p; 750 751 again: 752 pcp = pnp->pr_common; 753 p = pr_p_lock(pnp); 754 mutex_exit(&pr_pidlock); 755 756 /* 757 * Return ENOENT immediately if there is no process. 758 */ 759 if (p == NULL) 760 return (ENOENT); 761 762 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL); 763 764 /* 765 * Return ENOENT if process entered zombie state or is exiting 766 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies. 767 */ 768 if (zdisp == ZNO && 769 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) { 770 prunlock(pnp); 771 return (ENOENT); 772 } 773 774 /* 775 * If lwp-specific, check to see if lwp has disappeared. 776 */ 777 if (pcp->prc_flags & PRC_LWP) { 778 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) || 779 pcp->prc_tslot == -1) { 780 prunlock(pnp); 781 return (ENOENT); 782 } 783 } 784 785 /* 786 * Return EAGAIN if we have encountered a security violation. 787 * (The process exec'd a set-id or unreadable executable file.) 788 */ 789 if (pnp->pr_flags & PR_INVAL) { 790 prunlock(pnp); 791 return (EAGAIN); 792 } 793 794 /* 795 * If process is undergoing an exec(), wait for 796 * completion and then start all over again. 797 */ 798 if (p->p_proc_flag & P_PR_EXEC) { 799 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */ 800 mutex_enter(&pcp->prc_mutex); 801 prunlock(pnp); 802 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) { 803 mutex_exit(&pcp->prc_mutex); 804 return (EINTR); 805 } 806 mutex_exit(&pcp->prc_mutex); 807 goto again; 808 } 809 810 /* 811 * We return holding p->p_lock. 812 */ 813 return (0); 814 } 815 816 /* 817 * Undo prlock() and pr_p_lock(). 818 * p->p_lock is still held; pr_pidlock is no longer held. 819 * 820 * prunmark() drops the P_PR_LOCK flag and wakes up another thread, 821 * if any, waiting for the flag to be dropped; it retains p->p_lock. 822 * 823 * prunlock() calls prunmark() and then drops p->p_lock. 824 */ 825 void 826 prunmark(proc_t *p) 827 { 828 ASSERT(p->p_proc_flag & P_PR_LOCK); 829 ASSERT(MUTEX_HELD(&p->p_lock)); 830 831 cv_signal(&pr_pid_cv[p->p_slot]); 832 p->p_proc_flag &= ~P_PR_LOCK; 833 } 834 835 void 836 prunlock(prnode_t *pnp) 837 { 838 prcommon_t *pcp = pnp->pr_common; 839 proc_t *p = pcp->prc_proc; 840 841 /* 842 * If we (or someone) gave it a SIGKILL, and it is not 843 * already a zombie, set it running unconditionally. 844 */ 845 if ((p->p_flag & SKILLED) && 846 !(p->p_flag & SEXITING) && 847 !(pcp->prc_flags & PRC_DESTROY) && 848 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) { 849 int err = pr_setrun(pnp, 0); 850 /* 851 * EBUSY here means either the process was not stopped by /proc 852 * or there is an agent lwp. If there's an agent lwp, we don't 853 * need to do anything as it will run and witness the SIGKILL. 854 * However, if there's no agent lwp and the process was not 855 * stopped by /proc, it may have been stopped by SIGSTOP; try 856 * getting lwps running with TS_XSTART to undo SIGSTOP effect. 857 * 858 * Notably, other TS_* bits are inappropriate here: 859 * * Do not set TS_PSTART; pr_setrun() above would have already 860 * set this if it did anything for this process. 861 * * Do not set TS_CSTART or TS_UNPAUSE; lwps may be stopped by 862 * PR_SUSPEND for many reasons. Some cases, like holdlwps(), 863 * will resume the process before the corresponding syscall 864 * returns. Other cases, like dumping core, the suspender 865 * will tear down the lwps as it completes. 866 * * Do not set TS_RESUME out of caution; not sure about the 867 * consequences of a process going away during CPR resume and 868 * CPR should set the process running eventually. 869 * * Do not set TS_CREATE because lwp creation expects threads 870 * to remain paused until lwp completes. 871 */ 872 if (err == EBUSY && p->p_agenttp == NULL) { 873 runlwps(p, TS_XSTART); 874 } 875 } 876 prunmark(p); 877 mutex_exit(&p->p_lock); 878 } 879 880 /* 881 * Called while holding p->p_lock to delay until the process is unlocked. 882 * We enter holding p->p_lock; p->p_lock is dropped and reacquired. 883 * The process cannot become locked again until p->p_lock is dropped. 884 */ 885 void 886 prbarrier(proc_t *p) 887 { 888 ASSERT(MUTEX_HELD(&p->p_lock)); 889 890 if (p->p_proc_flag & P_PR_LOCK) { 891 /* The process is locked; delay until not locked */ 892 uint_t slot = p->p_slot; 893 894 while (p->p_proc_flag & P_PR_LOCK) 895 cv_wait(&pr_pid_cv[slot], &p->p_lock); 896 cv_signal(&pr_pid_cv[slot]); 897 } 898 } 899 900 /* 901 * Return process/lwp status. 902 * The u-block is mapped in by this routine and unmapped at the end. 903 */ 904 void 905 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp) 906 { 907 kthread_t *t; 908 909 ASSERT(MUTEX_HELD(&p->p_lock)); 910 911 t = prchoose(p); /* returns locked thread */ 912 ASSERT(t != NULL); 913 thread_unlock(t); 914 915 /* just bzero the process part, prgetlwpstatus() does the rest */ 916 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t)); 917 sp->pr_nlwp = p->p_lwpcnt; 918 sp->pr_nzomb = p->p_zombcnt; 919 prassignset(&sp->pr_sigpend, &p->p_sig); 920 sp->pr_brkbase = (uintptr_t)p->p_brkbase; 921 sp->pr_brksize = p->p_brksize; 922 sp->pr_stkbase = (uintptr_t)prgetstackbase(p); 923 sp->pr_stksize = p->p_stksize; 924 sp->pr_pid = p->p_pid; 925 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 926 (p->p_flag & SZONETOP)) { 927 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 928 /* 929 * Inside local zones, fake zsched's pid as parent pids for 930 * processes which reference processes outside of the zone. 931 */ 932 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 933 } else { 934 sp->pr_ppid = p->p_ppid; 935 } 936 sp->pr_pgid = p->p_pgrp; 937 sp->pr_sid = p->p_sessp->s_sid; 938 sp->pr_taskid = p->p_task->tk_tkid; 939 sp->pr_projid = p->p_task->tk_proj->kpj_id; 940 sp->pr_zoneid = p->p_zone->zone_id; 941 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 942 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 943 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime); 944 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime); 945 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 946 prassignset(&sp->pr_flttrace, &p->p_fltmask); 947 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 948 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 949 switch (p->p_model) { 950 case DATAMODEL_ILP32: 951 sp->pr_dmodel = PR_MODEL_ILP32; 952 break; 953 case DATAMODEL_LP64: 954 sp->pr_dmodel = PR_MODEL_LP64; 955 break; 956 } 957 if (p->p_agenttp) 958 sp->pr_agentid = p->p_agenttp->t_tid; 959 960 /* get the chosen lwp's status */ 961 prgetlwpstatus(t, &sp->pr_lwp, zp); 962 963 /* replicate the flags */ 964 sp->pr_flags = sp->pr_lwp.pr_flags; 965 } 966 967 /* 968 * Query mask of held signals for a given thread. 969 * 970 * This makes use of schedctl_sigblock() to query if userspace has requested 971 * that all maskable signals be held. While it would be tempting to call 972 * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be 973 * done safely without the risk of racing with the thread under consideration. 974 */ 975 void 976 prgethold(kthread_t *t, sigset_t *sp) 977 { 978 k_sigset_t set; 979 980 if (schedctl_sigblock(t)) { 981 set.__sigbits[0] = FILLSET0 & ~CANTMASK0; 982 set.__sigbits[1] = FILLSET1 & ~CANTMASK1; 983 set.__sigbits[2] = FILLSET2 & ~CANTMASK2; 984 } else { 985 set = t->t_hold; 986 } 987 sigktou(&set, sp); 988 } 989 990 #ifdef _SYSCALL32_IMPL 991 void 992 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp) 993 { 994 proc_t *p = ttoproc(t); 995 klwp_t *lwp = ttolwp(t); 996 struct mstate *ms = &lwp->lwp_mstate; 997 hrtime_t usr, sys; 998 int flags; 999 ulong_t instr; 1000 1001 ASSERT(MUTEX_HELD(&p->p_lock)); 1002 1003 bzero(sp, sizeof (*sp)); 1004 flags = 0L; 1005 if (t->t_state == TS_STOPPED) { 1006 flags |= PR_STOPPED; 1007 if ((t->t_schedflag & TS_PSTART) == 0) 1008 flags |= PR_ISTOP; 1009 } else if (VSTOPPED(t)) { 1010 flags |= PR_STOPPED|PR_ISTOP; 1011 } 1012 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 1013 flags |= PR_DSTOP; 1014 if (lwp->lwp_asleep) 1015 flags |= PR_ASLEEP; 1016 if (t == p->p_agenttp) 1017 flags |= PR_AGENT; 1018 if (!(t->t_proc_flag & TP_TWAIT)) 1019 flags |= PR_DETACH; 1020 if (t->t_proc_flag & TP_DAEMON) 1021 flags |= PR_DAEMON; 1022 if (p->p_proc_flag & P_PR_FORK) 1023 flags |= PR_FORK; 1024 if (p->p_proc_flag & P_PR_RUNLCL) 1025 flags |= PR_RLC; 1026 if (p->p_proc_flag & P_PR_KILLCL) 1027 flags |= PR_KLC; 1028 if (p->p_proc_flag & P_PR_ASYNC) 1029 flags |= PR_ASYNC; 1030 if (p->p_proc_flag & P_PR_BPTADJ) 1031 flags |= PR_BPTADJ; 1032 if (p->p_proc_flag & P_PR_PTRACE) 1033 flags |= PR_PTRACE; 1034 if (p->p_flag & SMSACCT) 1035 flags |= PR_MSACCT; 1036 if (p->p_flag & SMSFORK) 1037 flags |= PR_MSFORK; 1038 if (p->p_flag & SVFWAIT) 1039 flags |= PR_VFORKP; 1040 sp->pr_flags = flags; 1041 if (VSTOPPED(t)) { 1042 sp->pr_why = PR_REQUESTED; 1043 sp->pr_what = 0; 1044 } else { 1045 sp->pr_why = t->t_whystop; 1046 sp->pr_what = t->t_whatstop; 1047 } 1048 sp->pr_lwpid = t->t_tid; 1049 sp->pr_cursig = lwp->lwp_cursig; 1050 prassignset(&sp->pr_lwppend, &t->t_sig); 1051 prgethold(t, &sp->pr_lwphold); 1052 if (t->t_whystop == PR_FAULTED) { 1053 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info); 1054 if (t->t_whatstop == FLTPAGE) 1055 sp->pr_info.si_addr = 1056 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr; 1057 } else if (lwp->lwp_curinfo) 1058 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info); 1059 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1060 sp->pr_info.si_zoneid != zp->zone_id) { 1061 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1062 sp->pr_info.si_uid = 0; 1063 sp->pr_info.si_ctid = -1; 1064 sp->pr_info.si_zoneid = zp->zone_id; 1065 } 1066 sp->pr_altstack.ss_sp = 1067 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; 1068 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size; 1069 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags; 1070 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1071 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext; 1072 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack; 1073 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1074 sizeof (sp->pr_clname) - 1); 1075 if (flags & PR_STOPPED) 1076 hrt2ts32(t->t_stoptime, &sp->pr_tstamp); 1077 usr = ms->ms_acct[LMS_USER]; 1078 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1079 scalehrtime(&usr); 1080 scalehrtime(&sys); 1081 hrt2ts32(usr, &sp->pr_utime); 1082 hrt2ts32(sys, &sp->pr_stime); 1083 1084 /* 1085 * Fetch the current instruction, if not a system process. 1086 * We don't attempt this unless the lwp is stopped. 1087 */ 1088 if ((p->p_flag & SSYS) || p->p_as == &kas) 1089 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1090 else if (!(flags & PR_STOPPED)) 1091 sp->pr_flags |= PR_PCINVAL; 1092 else if (!prfetchinstr(lwp, &instr)) 1093 sp->pr_flags |= PR_PCINVAL; 1094 else 1095 sp->pr_instr = (uint32_t)instr; 1096 1097 /* 1098 * Drop p_lock while touching the lwp's stack. 1099 */ 1100 mutex_exit(&p->p_lock); 1101 if (prisstep(lwp)) 1102 sp->pr_flags |= PR_STEP; 1103 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1104 int i; 1105 1106 sp->pr_syscall = get_syscall32_args(lwp, 1107 (int *)sp->pr_sysarg, &i); 1108 sp->pr_nsysarg = (ushort_t)i; 1109 } 1110 if ((flags & PR_STOPPED) || t == curthread) 1111 prgetprregs32(lwp, sp->pr_reg); 1112 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1113 (flags & PR_VFORKP)) { 1114 long r1, r2; 1115 user_t *up; 1116 auxv_t *auxp; 1117 int i; 1118 1119 sp->pr_errno = prgetrvals(lwp, &r1, &r2); 1120 if (sp->pr_errno == 0) { 1121 sp->pr_rval1 = (int32_t)r1; 1122 sp->pr_rval2 = (int32_t)r2; 1123 sp->pr_errpriv = PRIV_NONE; 1124 } else 1125 sp->pr_errpriv = lwp->lwp_badpriv; 1126 1127 if (t->t_sysnum == SYS_execve) { 1128 up = PTOU(p); 1129 sp->pr_sysarg[0] = 0; 1130 sp->pr_sysarg[1] = (caddr32_t)up->u_argv; 1131 sp->pr_sysarg[2] = (caddr32_t)up->u_envp; 1132 sp->pr_sysarg[3] = 0; 1133 for (i = 0, auxp = up->u_auxv; 1134 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1135 i++, auxp++) { 1136 if (auxp->a_type == AT_SUN_EXECNAME) { 1137 sp->pr_sysarg[0] = 1138 (caddr32_t) 1139 (uintptr_t)auxp->a_un.a_ptr; 1140 break; 1141 } 1142 } 1143 } 1144 } 1145 if (prhasfp()) 1146 prgetprfpregs32(lwp, &sp->pr_fpreg); 1147 mutex_enter(&p->p_lock); 1148 } 1149 1150 void 1151 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp) 1152 { 1153 kthread_t *t; 1154 1155 ASSERT(MUTEX_HELD(&p->p_lock)); 1156 1157 t = prchoose(p); /* returns locked thread */ 1158 ASSERT(t != NULL); 1159 thread_unlock(t); 1160 1161 /* just bzero the process part, prgetlwpstatus32() does the rest */ 1162 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t)); 1163 sp->pr_nlwp = p->p_lwpcnt; 1164 sp->pr_nzomb = p->p_zombcnt; 1165 prassignset(&sp->pr_sigpend, &p->p_sig); 1166 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase; 1167 sp->pr_brksize = (uint32_t)p->p_brksize; 1168 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p); 1169 sp->pr_stksize = (uint32_t)p->p_stksize; 1170 sp->pr_pid = p->p_pid; 1171 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 1172 (p->p_flag & SZONETOP)) { 1173 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 1174 /* 1175 * Inside local zones, fake zsched's pid as parent pids for 1176 * processes which reference processes outside of the zone. 1177 */ 1178 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 1179 } else { 1180 sp->pr_ppid = p->p_ppid; 1181 } 1182 sp->pr_pgid = p->p_pgrp; 1183 sp->pr_sid = p->p_sessp->s_sid; 1184 sp->pr_taskid = p->p_task->tk_tkid; 1185 sp->pr_projid = p->p_task->tk_proj->kpj_id; 1186 sp->pr_zoneid = p->p_zone->zone_id; 1187 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 1188 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 1189 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime); 1190 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime); 1191 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 1192 prassignset(&sp->pr_flttrace, &p->p_fltmask); 1193 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 1194 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 1195 switch (p->p_model) { 1196 case DATAMODEL_ILP32: 1197 sp->pr_dmodel = PR_MODEL_ILP32; 1198 break; 1199 case DATAMODEL_LP64: 1200 sp->pr_dmodel = PR_MODEL_LP64; 1201 break; 1202 } 1203 if (p->p_agenttp) 1204 sp->pr_agentid = p->p_agenttp->t_tid; 1205 1206 /* get the chosen lwp's status */ 1207 prgetlwpstatus32(t, &sp->pr_lwp, zp); 1208 1209 /* replicate the flags */ 1210 sp->pr_flags = sp->pr_lwp.pr_flags; 1211 } 1212 #endif /* _SYSCALL32_IMPL */ 1213 1214 /* 1215 * Return lwp status. 1216 */ 1217 void 1218 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp) 1219 { 1220 proc_t *p = ttoproc(t); 1221 klwp_t *lwp = ttolwp(t); 1222 struct mstate *ms = &lwp->lwp_mstate; 1223 hrtime_t usr, sys; 1224 int flags; 1225 ulong_t instr; 1226 1227 ASSERT(MUTEX_HELD(&p->p_lock)); 1228 1229 bzero(sp, sizeof (*sp)); 1230 flags = 0L; 1231 if (t->t_state == TS_STOPPED) { 1232 flags |= PR_STOPPED; 1233 if ((t->t_schedflag & TS_PSTART) == 0) 1234 flags |= PR_ISTOP; 1235 } else if (VSTOPPED(t)) { 1236 flags |= PR_STOPPED|PR_ISTOP; 1237 } 1238 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 1239 flags |= PR_DSTOP; 1240 if (lwp->lwp_asleep) 1241 flags |= PR_ASLEEP; 1242 if (t == p->p_agenttp) 1243 flags |= PR_AGENT; 1244 if (!(t->t_proc_flag & TP_TWAIT)) 1245 flags |= PR_DETACH; 1246 if (t->t_proc_flag & TP_DAEMON) 1247 flags |= PR_DAEMON; 1248 if (p->p_proc_flag & P_PR_FORK) 1249 flags |= PR_FORK; 1250 if (p->p_proc_flag & P_PR_RUNLCL) 1251 flags |= PR_RLC; 1252 if (p->p_proc_flag & P_PR_KILLCL) 1253 flags |= PR_KLC; 1254 if (p->p_proc_flag & P_PR_ASYNC) 1255 flags |= PR_ASYNC; 1256 if (p->p_proc_flag & P_PR_BPTADJ) 1257 flags |= PR_BPTADJ; 1258 if (p->p_proc_flag & P_PR_PTRACE) 1259 flags |= PR_PTRACE; 1260 if (p->p_flag & SMSACCT) 1261 flags |= PR_MSACCT; 1262 if (p->p_flag & SMSFORK) 1263 flags |= PR_MSFORK; 1264 if (p->p_flag & SVFWAIT) 1265 flags |= PR_VFORKP; 1266 if (p->p_pgidp->pid_pgorphaned) 1267 flags |= PR_ORPHAN; 1268 if (p->p_pidflag & CLDNOSIGCHLD) 1269 flags |= PR_NOSIGCHLD; 1270 if (p->p_pidflag & CLDWAITPID) 1271 flags |= PR_WAITPID; 1272 sp->pr_flags = flags; 1273 if (VSTOPPED(t)) { 1274 sp->pr_why = PR_REQUESTED; 1275 sp->pr_what = 0; 1276 } else { 1277 sp->pr_why = t->t_whystop; 1278 sp->pr_what = t->t_whatstop; 1279 } 1280 sp->pr_lwpid = t->t_tid; 1281 sp->pr_cursig = lwp->lwp_cursig; 1282 prassignset(&sp->pr_lwppend, &t->t_sig); 1283 prgethold(t, &sp->pr_lwphold); 1284 if (t->t_whystop == PR_FAULTED) 1285 bcopy(&lwp->lwp_siginfo, 1286 &sp->pr_info, sizeof (k_siginfo_t)); 1287 else if (lwp->lwp_curinfo) 1288 bcopy(&lwp->lwp_curinfo->sq_info, 1289 &sp->pr_info, sizeof (k_siginfo_t)); 1290 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1291 sp->pr_info.si_zoneid != zp->zone_id) { 1292 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1293 sp->pr_info.si_uid = 0; 1294 sp->pr_info.si_ctid = -1; 1295 sp->pr_info.si_zoneid = zp->zone_id; 1296 } 1297 sp->pr_altstack = lwp->lwp_sigaltstack; 1298 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1299 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext; 1300 sp->pr_ustack = lwp->lwp_ustack; 1301 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1302 sizeof (sp->pr_clname) - 1); 1303 if (flags & PR_STOPPED) 1304 hrt2ts(t->t_stoptime, &sp->pr_tstamp); 1305 usr = ms->ms_acct[LMS_USER]; 1306 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1307 scalehrtime(&usr); 1308 scalehrtime(&sys); 1309 hrt2ts(usr, &sp->pr_utime); 1310 hrt2ts(sys, &sp->pr_stime); 1311 1312 /* 1313 * Fetch the current instruction, if not a system process. 1314 * We don't attempt this unless the lwp is stopped. 1315 */ 1316 if ((p->p_flag & SSYS) || p->p_as == &kas) 1317 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1318 else if (!(flags & PR_STOPPED)) 1319 sp->pr_flags |= PR_PCINVAL; 1320 else if (!prfetchinstr(lwp, &instr)) 1321 sp->pr_flags |= PR_PCINVAL; 1322 else 1323 sp->pr_instr = instr; 1324 1325 /* 1326 * Drop p_lock while touching the lwp's stack. 1327 */ 1328 mutex_exit(&p->p_lock); 1329 if (prisstep(lwp)) 1330 sp->pr_flags |= PR_STEP; 1331 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1332 int i; 1333 1334 sp->pr_syscall = get_syscall_args(lwp, 1335 (long *)sp->pr_sysarg, &i); 1336 sp->pr_nsysarg = (ushort_t)i; 1337 } 1338 if ((flags & PR_STOPPED) || t == curthread) 1339 prgetprregs(lwp, sp->pr_reg); 1340 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1341 (flags & PR_VFORKP)) { 1342 user_t *up; 1343 auxv_t *auxp; 1344 int i; 1345 1346 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2); 1347 if (sp->pr_errno == 0) 1348 sp->pr_errpriv = PRIV_NONE; 1349 else 1350 sp->pr_errpriv = lwp->lwp_badpriv; 1351 1352 if (t->t_sysnum == SYS_execve) { 1353 up = PTOU(p); 1354 sp->pr_sysarg[0] = 0; 1355 sp->pr_sysarg[1] = (uintptr_t)up->u_argv; 1356 sp->pr_sysarg[2] = (uintptr_t)up->u_envp; 1357 sp->pr_sysarg[3] = 0; 1358 for (i = 0, auxp = up->u_auxv; 1359 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1360 i++, auxp++) { 1361 if (auxp->a_type == AT_SUN_EXECNAME) { 1362 sp->pr_sysarg[0] = 1363 (uintptr_t)auxp->a_un.a_ptr; 1364 break; 1365 } 1366 } 1367 } 1368 } 1369 if (prhasfp()) 1370 prgetprfpregs(lwp, &sp->pr_fpreg); 1371 mutex_enter(&p->p_lock); 1372 } 1373 1374 /* 1375 * Get the sigaction structure for the specified signal. The u-block 1376 * must already have been mapped in by the caller. 1377 */ 1378 void 1379 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp) 1380 { 1381 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1382 1383 bzero(sp, sizeof (*sp)); 1384 1385 if (sig != 0 && (unsigned)sig < nsig) { 1386 sp->sa_handler = up->u_signal[sig-1]; 1387 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1388 if (sigismember(&up->u_sigonstack, sig)) 1389 sp->sa_flags |= SA_ONSTACK; 1390 if (sigismember(&up->u_sigresethand, sig)) 1391 sp->sa_flags |= SA_RESETHAND; 1392 if (sigismember(&up->u_sigrestart, sig)) 1393 sp->sa_flags |= SA_RESTART; 1394 if (sigismember(&p->p_siginfo, sig)) 1395 sp->sa_flags |= SA_SIGINFO; 1396 if (sigismember(&up->u_signodefer, sig)) 1397 sp->sa_flags |= SA_NODEFER; 1398 if (sig == SIGCLD) { 1399 if (p->p_flag & SNOWAIT) 1400 sp->sa_flags |= SA_NOCLDWAIT; 1401 if ((p->p_flag & SJCTL) == 0) 1402 sp->sa_flags |= SA_NOCLDSTOP; 1403 } 1404 } 1405 } 1406 1407 #ifdef _SYSCALL32_IMPL 1408 void 1409 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp) 1410 { 1411 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1412 1413 bzero(sp, sizeof (*sp)); 1414 1415 if (sig != 0 && (unsigned)sig < nsig) { 1416 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1]; 1417 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1418 if (sigismember(&up->u_sigonstack, sig)) 1419 sp->sa_flags |= SA_ONSTACK; 1420 if (sigismember(&up->u_sigresethand, sig)) 1421 sp->sa_flags |= SA_RESETHAND; 1422 if (sigismember(&up->u_sigrestart, sig)) 1423 sp->sa_flags |= SA_RESTART; 1424 if (sigismember(&p->p_siginfo, sig)) 1425 sp->sa_flags |= SA_SIGINFO; 1426 if (sigismember(&up->u_signodefer, sig)) 1427 sp->sa_flags |= SA_NODEFER; 1428 if (sig == SIGCLD) { 1429 if (p->p_flag & SNOWAIT) 1430 sp->sa_flags |= SA_NOCLDWAIT; 1431 if ((p->p_flag & SJCTL) == 0) 1432 sp->sa_flags |= SA_NOCLDSTOP; 1433 } 1434 } 1435 } 1436 #endif /* _SYSCALL32_IMPL */ 1437 1438 /* 1439 * Count the number of segments in this process's address space. 1440 */ 1441 uint_t 1442 prnsegs(struct as *as, int reserved) 1443 { 1444 uint_t n = 0; 1445 struct seg *seg; 1446 1447 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1448 1449 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1450 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1451 caddr_t saddr, naddr; 1452 void *tmp = NULL; 1453 1454 if ((seg->s_flags & S_HOLE) != 0) { 1455 continue; 1456 } 1457 1458 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1459 (void) pr_getprot(seg, reserved, &tmp, 1460 &saddr, &naddr, eaddr); 1461 if (saddr != naddr) { 1462 n++; 1463 /* 1464 * prnsegs() was formerly designated to return 1465 * an 'int' despite having no ability or use 1466 * for negative results. As part of changing 1467 * it to 'uint_t', keep the old effective limit 1468 * of INT_MAX in place. 1469 */ 1470 if (n == INT_MAX) { 1471 pr_getprot_done(&tmp); 1472 ASSERT(tmp == NULL); 1473 return (n); 1474 } 1475 } 1476 } 1477 1478 ASSERT(tmp == NULL); 1479 } 1480 1481 return (n); 1482 } 1483 1484 /* 1485 * Convert uint32_t to decimal string w/o leading zeros. 1486 * Add trailing null characters if 'len' is greater than string length. 1487 * Return the string length. 1488 */ 1489 int 1490 pr_u32tos(uint32_t n, char *s, int len) 1491 { 1492 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */ 1493 char *cp = cbuf; 1494 char *end = s + len; 1495 1496 do { 1497 *cp++ = (char)(n % 10 + '0'); 1498 n /= 10; 1499 } while (n); 1500 1501 len = (int)(cp - cbuf); 1502 1503 do { 1504 *s++ = *--cp; 1505 } while (cp > cbuf); 1506 1507 while (s < end) /* optional pad */ 1508 *s++ = '\0'; 1509 1510 return (len); 1511 } 1512 1513 /* 1514 * Convert uint64_t to decimal string w/o leading zeros. 1515 * Return the string length. 1516 */ 1517 static int 1518 pr_u64tos(uint64_t n, char *s) 1519 { 1520 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */ 1521 char *cp = cbuf; 1522 int len; 1523 1524 do { 1525 *cp++ = (char)(n % 10 + '0'); 1526 n /= 10; 1527 } while (n); 1528 1529 len = (int)(cp - cbuf); 1530 1531 do { 1532 *s++ = *--cp; 1533 } while (cp > cbuf); 1534 1535 return (len); 1536 } 1537 1538 /* 1539 * Similar to getf() / getf_gen(), but for the specified process. On success, 1540 * returns the fp with fp->f_count incremented. The caller MUST call 1541 * closef(fp) on the returned fp after completing any actions using that fp. 1542 * We return a reference-held (fp->f_count bumped) file_t so no other closef() 1543 * can invoke destructive VOP_CLOSE actions while we're inspecting the 1544 * process's FD. 1545 * 1546 * Returns NULL for errors: either an empty process-table slot post-fi_lock 1547 * and UF_ENTER, or too many mutex_tryenter() failures on the file_t's f_tlock. 1548 * Both failure modes have DTrace probes. 1549 * 1550 * The current design of the procfs "close" code path uses the following lock 1551 * order of: 1552 * 1553 * 1: (file_t) f_tlock 1554 * 2: (proc_t) p_lock AND setting p->p_proc_flag's P_PR_LOCK 1555 * 1556 * That happens because closef() holds f_tlock while calling fop_close(), 1557 * which can be prclose(), which currently waits on and sets P_PR_LOCK at its 1558 * beginning. 1559 * 1560 * That lock order creates a challenge for pr_getf, which needs to take those 1561 * locks in the opposite order when the fd points to a procfs file descriptor. 1562 * The solution chosen here is to use mutex_tryenter on f_tlock and retry some 1563 * (limited) number of times, failing if we don't get both locks. 1564 * 1565 * The cases where this can fail are rare, and all involve a procfs caller 1566 * asking for info (eg. FDINFO) on another procfs FD. In these cases, 1567 * returning EBADF (which results from a NULL return from pr_getf()) is 1568 * acceptable. 1569 * 1570 * One can increase the number of tries in pr_getf_maxtries if one is worried 1571 * about the contentuous case. 1572 */ 1573 1574 uint64_t pr_getf_tryfails; /* Bumped for statistic purposes. */ 1575 int pr_getf_maxtries = 3; /* So you can tune it from /etc/system */ 1576 1577 file_t * 1578 pr_getf(proc_t *p, uint_t fd, short *flag) 1579 { 1580 uf_entry_t *ufp; 1581 uf_info_t *fip; 1582 file_t *fp; 1583 int tries = 0; 1584 1585 ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK)); 1586 1587 retry: 1588 fip = P_FINFO(p); 1589 1590 if (fd >= fip->fi_nfiles) 1591 return (NULL); 1592 1593 mutex_exit(&p->p_lock); 1594 mutex_enter(&fip->fi_lock); 1595 UF_ENTER(ufp, fip, fd); 1596 if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) { 1597 if (mutex_tryenter(&fp->f_tlock)) { 1598 ASSERT(fp->f_count > 0); 1599 fp->f_count++; 1600 mutex_exit(&fp->f_tlock); 1601 if (flag != NULL) 1602 *flag = ufp->uf_flag; 1603 } else { 1604 /* 1605 * Note the number of mutex_trylock attempts. 1606 * 1607 * The exit path will catch this and try again if we 1608 * are below the retry threshhold (pr_getf_maxtries). 1609 */ 1610 tries++; 1611 pr_getf_tryfails++; 1612 /* 1613 * If we hit pr_getf_maxtries, we'll return NULL. 1614 * DTrace scripts looking for this sort of failure 1615 * should check when arg1 is pr_getf_maxtries. 1616 */ 1617 DTRACE_PROBE2(pr_getf_tryfail, file_t *, fp, int, 1618 tries); 1619 fp = NULL; 1620 } 1621 } else { 1622 fp = NULL; 1623 /* If we fail here, someone else closed this FD. */ 1624 DTRACE_PROBE1(pr_getf_emptyslot, int, tries); 1625 tries = pr_getf_maxtries; /* Don't bother retrying. */ 1626 } 1627 UF_EXIT(ufp); 1628 mutex_exit(&fip->fi_lock); 1629 mutex_enter(&p->p_lock); 1630 1631 /* Use goto instead of tail-recursion so we can keep "tries" around. */ 1632 if (fp == NULL) { 1633 /* "tries" starts at 1. */ 1634 if (tries < pr_getf_maxtries) 1635 goto retry; 1636 } else { 1637 /* 1638 * Probes here will detect successes after arg1's number of 1639 * mutex_tryenter() calls. 1640 */ 1641 DTRACE_PROBE2(pr_getf_trysuccess, file_t *, fp, int, tries + 1); 1642 } 1643 1644 return (fp); 1645 } 1646 1647 1648 /* 1649 * Just as pr_getf() is a little unusual in how it goes about making the file_t 1650 * safe for procfs consumers to access it, so too is pr_releasef() for safely 1651 * releasing that "hold". The "hold" is unlike normal file descriptor activity 1652 * -- procfs is just an interloper here, wanting access to the vnode_t without 1653 * risk of a racing close() disrupting the state. Just as pr_getf() avoids some 1654 * of the typical file_t behavior (such as auditing) when establishing its hold, 1655 * so too should pr_releasef(). It should not go through the motions of 1656 * closef() (since it is not a true close()) unless racing activity causes it to 1657 * be the last actor holding the refcount above zero. 1658 * 1659 * Under normal circumstances, we expect to find file_t`f_count > 1 after 1660 * the successful pr_getf() call. We are, after all, accessing a resource 1661 * already held by the process in question. We would also expect to rarely race 1662 * with a close() of the underlying fd, meaning that file_t`f_count > 1 would 1663 * still holds at pr_releasef() time. That would mean we only need to decrement 1664 * f_count, leaving it to the process to later close the fd (thus triggering 1665 * VOP_CLOSE(), etc). 1666 * 1667 * It is only when that process manages to close() the fd while we have it 1668 * "held" in procfs that we must make a trip through the traditional closef() 1669 * logic to ensure proper tear-down of the file_t. 1670 */ 1671 void 1672 pr_releasef(file_t *fp) 1673 { 1674 mutex_enter(&fp->f_tlock); 1675 if (fp->f_count > 1) { 1676 /* 1677 * This is the most common case: The file is still held open by 1678 * the process, and we simply need to release our hold by 1679 * decrementing f_count 1680 */ 1681 fp->f_count--; 1682 mutex_exit(&fp->f_tlock); 1683 } else { 1684 /* 1685 * A rare occasion: The process snuck a close() of this file 1686 * while we were doing our business in procfs. Given that 1687 * f_count == 1, we are the only one with a reference to the 1688 * file_t and need to take a trip through closef() to free it. 1689 */ 1690 mutex_exit(&fp->f_tlock); 1691 (void) closef(fp); 1692 } 1693 } 1694 1695 void 1696 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr) 1697 { 1698 char *s = name; 1699 struct vfs *vfsp; 1700 struct vfssw *vfsswp; 1701 1702 if ((vfsp = vp->v_vfsp) != NULL && 1703 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) && 1704 *vfsswp->vsw_name) { 1705 (void) strcpy(s, vfsswp->vsw_name); 1706 s += strlen(s); 1707 *s++ = '.'; 1708 } 1709 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0); 1710 *s++ = '.'; 1711 s += pr_u32tos(getminor(vattr->va_fsid), s, 0); 1712 *s++ = '.'; 1713 s += pr_u64tos(vattr->va_nodeid, s); 1714 *s++ = '\0'; 1715 } 1716 1717 struct seg * 1718 break_seg(proc_t *p) 1719 { 1720 caddr_t addr = p->p_brkbase; 1721 struct seg *seg; 1722 struct vnode *vp; 1723 1724 if (p->p_brksize != 0) 1725 addr += p->p_brksize - 1; 1726 seg = as_segat(p->p_as, addr); 1727 if (seg != NULL && seg->s_ops == &segvn_ops && 1728 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)) 1729 return (seg); 1730 return (NULL); 1731 } 1732 1733 /* 1734 * Implementation of service functions to handle procfs generic chained 1735 * copyout buffers. 1736 */ 1737 typedef struct pr_iobuf_list { 1738 list_node_t piol_link; /* buffer linkage */ 1739 size_t piol_size; /* total size (header + data) */ 1740 size_t piol_usedsize; /* amount to copy out from this buf */ 1741 } piol_t; 1742 1743 #define MAPSIZE (64 * 1024) 1744 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1])) 1745 1746 void 1747 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n) 1748 { 1749 piol_t *iol; 1750 size_t initial_size = MIN(1, n) * itemsize; 1751 1752 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link)); 1753 1754 ASSERT(list_head(iolhead) == NULL); 1755 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1756 ASSERT(initial_size > 0); 1757 1758 /* 1759 * Someone creating chained copyout buffers may ask for less than 1760 * MAPSIZE if the amount of data to be buffered is known to be 1761 * smaller than that. 1762 * But in order to prevent involuntary self-denial of service, 1763 * the requested input size is clamped at MAPSIZE. 1764 */ 1765 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol)); 1766 iol = kmem_alloc(initial_size, KM_SLEEP); 1767 list_insert_head(iolhead, iol); 1768 iol->piol_usedsize = 0; 1769 iol->piol_size = initial_size; 1770 } 1771 1772 void * 1773 pr_iol_newbuf(list_t *iolhead, size_t itemsize) 1774 { 1775 piol_t *iol; 1776 char *new; 1777 1778 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1779 ASSERT(list_head(iolhead) != NULL); 1780 1781 iol = (piol_t *)list_tail(iolhead); 1782 1783 if (iol->piol_size < 1784 iol->piol_usedsize + sizeof (*iol) + itemsize) { 1785 /* 1786 * Out of space in the current buffer. Allocate more. 1787 */ 1788 piol_t *newiol; 1789 1790 newiol = kmem_alloc(MAPSIZE, KM_SLEEP); 1791 newiol->piol_size = MAPSIZE; 1792 newiol->piol_usedsize = 0; 1793 1794 list_insert_after(iolhead, iol, newiol); 1795 iol = list_next(iolhead, iol); 1796 ASSERT(iol == newiol); 1797 } 1798 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize; 1799 iol->piol_usedsize += itemsize; 1800 bzero(new, itemsize); 1801 return (new); 1802 } 1803 1804 void 1805 pr_iol_freelist(list_t *iolhead) 1806 { 1807 piol_t *iol; 1808 1809 while ((iol = list_head(iolhead)) != NULL) { 1810 list_remove(iolhead, iol); 1811 kmem_free(iol, iol->piol_size); 1812 } 1813 list_destroy(iolhead); 1814 } 1815 1816 int 1817 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin) 1818 { 1819 int error = errin; 1820 piol_t *iol; 1821 1822 while ((iol = list_head(iolhead)) != NULL) { 1823 list_remove(iolhead, iol); 1824 if (!error) { 1825 if (copyout(PIOL_DATABUF(iol), *tgt, 1826 iol->piol_usedsize)) 1827 error = EFAULT; 1828 *tgt += iol->piol_usedsize; 1829 } 1830 kmem_free(iol, iol->piol_size); 1831 } 1832 list_destroy(iolhead); 1833 1834 return (error); 1835 } 1836 1837 int 1838 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin) 1839 { 1840 offset_t off = uiop->uio_offset; 1841 char *base; 1842 size_t size; 1843 piol_t *iol; 1844 int error = errin; 1845 1846 while ((iol = list_head(iolhead)) != NULL) { 1847 list_remove(iolhead, iol); 1848 base = PIOL_DATABUF(iol); 1849 size = iol->piol_usedsize; 1850 if (off <= size && error == 0 && uiop->uio_resid > 0) 1851 error = uiomove(base + off, size - off, 1852 UIO_READ, uiop); 1853 off = MAX(0, off - (offset_t)size); 1854 kmem_free(iol, iol->piol_size); 1855 } 1856 list_destroy(iolhead); 1857 1858 return (error); 1859 } 1860 1861 /* 1862 * Return an array of structures with memory map information. 1863 * We allocate here; the caller must deallocate. 1864 */ 1865 int 1866 prgetmap(proc_t *p, int reserved, list_t *iolhead) 1867 { 1868 struct as *as = p->p_as; 1869 prmap_t *mp; 1870 struct seg *seg; 1871 struct seg *brkseg, *stkseg; 1872 struct vnode *vp; 1873 struct vattr vattr; 1874 uint_t prot; 1875 1876 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1877 1878 /* 1879 * Request an initial buffer size that doesn't waste memory 1880 * if the address space has only a small number of segments. 1881 */ 1882 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1883 1884 if ((seg = AS_SEGFIRST(as)) == NULL) 1885 return (0); 1886 1887 brkseg = break_seg(p); 1888 stkseg = as_segat(as, prgetstackbase(p)); 1889 1890 do { 1891 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1892 caddr_t saddr, naddr; 1893 void *tmp = NULL; 1894 1895 if ((seg->s_flags & S_HOLE) != 0) { 1896 continue; 1897 } 1898 1899 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1900 prot = pr_getprot(seg, reserved, &tmp, 1901 &saddr, &naddr, eaddr); 1902 if (saddr == naddr) 1903 continue; 1904 1905 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1906 1907 mp->pr_vaddr = (uintptr_t)saddr; 1908 mp->pr_size = naddr - saddr; 1909 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1910 mp->pr_mflags = 0; 1911 if (prot & PROT_READ) 1912 mp->pr_mflags |= MA_READ; 1913 if (prot & PROT_WRITE) 1914 mp->pr_mflags |= MA_WRITE; 1915 if (prot & PROT_EXEC) 1916 mp->pr_mflags |= MA_EXEC; 1917 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1918 mp->pr_mflags |= MA_SHARED; 1919 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1920 mp->pr_mflags |= MA_NORESERVE; 1921 if (seg->s_ops == &segspt_shmops || 1922 (seg->s_ops == &segvn_ops && 1923 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1924 mp->pr_mflags |= MA_ANON; 1925 if (seg == brkseg) 1926 mp->pr_mflags |= MA_BREAK; 1927 else if (seg == stkseg) { 1928 mp->pr_mflags |= MA_STACK; 1929 if (reserved) { 1930 size_t maxstack = 1931 ((size_t)p->p_stk_ctl + 1932 PAGEOFFSET) & PAGEMASK; 1933 mp->pr_vaddr = 1934 (uintptr_t)prgetstackbase(p) + 1935 p->p_stksize - maxstack; 1936 mp->pr_size = (uintptr_t)naddr - 1937 mp->pr_vaddr; 1938 } 1939 } 1940 if (seg->s_ops == &segspt_shmops) 1941 mp->pr_mflags |= MA_ISM | MA_SHM; 1942 mp->pr_pagesize = PAGESIZE; 1943 1944 /* 1945 * Manufacture a filename for the "object" directory. 1946 */ 1947 vattr.va_mask = AT_FSID|AT_NODEID; 1948 if (seg->s_ops == &segvn_ops && 1949 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1950 vp != NULL && vp->v_type == VREG && 1951 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 1952 if (vp == p->p_exec) 1953 (void) strcpy(mp->pr_mapname, "a.out"); 1954 else 1955 pr_object_name(mp->pr_mapname, 1956 vp, &vattr); 1957 } 1958 1959 /* 1960 * Get the SysV shared memory id, if any. 1961 */ 1962 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1963 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1964 SHMID_NONE) { 1965 if (mp->pr_shmid == SHMID_FREE) 1966 mp->pr_shmid = -1; 1967 1968 mp->pr_mflags |= MA_SHM; 1969 } else { 1970 mp->pr_shmid = -1; 1971 } 1972 } 1973 ASSERT(tmp == NULL); 1974 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1975 1976 return (0); 1977 } 1978 1979 #ifdef _SYSCALL32_IMPL 1980 int 1981 prgetmap32(proc_t *p, int reserved, list_t *iolhead) 1982 { 1983 struct as *as = p->p_as; 1984 prmap32_t *mp; 1985 struct seg *seg; 1986 struct seg *brkseg, *stkseg; 1987 struct vnode *vp; 1988 struct vattr vattr; 1989 uint_t prot; 1990 1991 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1992 1993 /* 1994 * Request an initial buffer size that doesn't waste memory 1995 * if the address space has only a small number of segments. 1996 */ 1997 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1998 1999 if ((seg = AS_SEGFIRST(as)) == NULL) 2000 return (0); 2001 2002 brkseg = break_seg(p); 2003 stkseg = as_segat(as, prgetstackbase(p)); 2004 2005 do { 2006 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 2007 caddr_t saddr, naddr; 2008 void *tmp = NULL; 2009 2010 if ((seg->s_flags & S_HOLE) != 0) { 2011 continue; 2012 } 2013 2014 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2015 prot = pr_getprot(seg, reserved, &tmp, 2016 &saddr, &naddr, eaddr); 2017 if (saddr == naddr) 2018 continue; 2019 2020 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 2021 2022 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 2023 mp->pr_size = (size32_t)(naddr - saddr); 2024 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2025 mp->pr_mflags = 0; 2026 if (prot & PROT_READ) 2027 mp->pr_mflags |= MA_READ; 2028 if (prot & PROT_WRITE) 2029 mp->pr_mflags |= MA_WRITE; 2030 if (prot & PROT_EXEC) 2031 mp->pr_mflags |= MA_EXEC; 2032 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2033 mp->pr_mflags |= MA_SHARED; 2034 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2035 mp->pr_mflags |= MA_NORESERVE; 2036 if (seg->s_ops == &segspt_shmops || 2037 (seg->s_ops == &segvn_ops && 2038 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2039 mp->pr_mflags |= MA_ANON; 2040 if (seg == brkseg) 2041 mp->pr_mflags |= MA_BREAK; 2042 else if (seg == stkseg) { 2043 mp->pr_mflags |= MA_STACK; 2044 if (reserved) { 2045 size_t maxstack = 2046 ((size_t)p->p_stk_ctl + 2047 PAGEOFFSET) & PAGEMASK; 2048 uintptr_t vaddr = 2049 (uintptr_t)prgetstackbase(p) + 2050 p->p_stksize - maxstack; 2051 mp->pr_vaddr = (caddr32_t)vaddr; 2052 mp->pr_size = (size32_t) 2053 ((uintptr_t)naddr - vaddr); 2054 } 2055 } 2056 if (seg->s_ops == &segspt_shmops) 2057 mp->pr_mflags |= MA_ISM | MA_SHM; 2058 mp->pr_pagesize = PAGESIZE; 2059 2060 /* 2061 * Manufacture a filename for the "object" directory. 2062 */ 2063 vattr.va_mask = AT_FSID|AT_NODEID; 2064 if (seg->s_ops == &segvn_ops && 2065 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2066 vp != NULL && vp->v_type == VREG && 2067 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2068 if (vp == p->p_exec) 2069 (void) strcpy(mp->pr_mapname, "a.out"); 2070 else 2071 pr_object_name(mp->pr_mapname, 2072 vp, &vattr); 2073 } 2074 2075 /* 2076 * Get the SysV shared memory id, if any. 2077 */ 2078 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 2079 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 2080 SHMID_NONE) { 2081 if (mp->pr_shmid == SHMID_FREE) 2082 mp->pr_shmid = -1; 2083 2084 mp->pr_mflags |= MA_SHM; 2085 } else { 2086 mp->pr_shmid = -1; 2087 } 2088 } 2089 ASSERT(tmp == NULL); 2090 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2091 2092 return (0); 2093 } 2094 #endif /* _SYSCALL32_IMPL */ 2095 2096 /* 2097 * Return the size of the /proc page data file. 2098 */ 2099 size_t 2100 prpdsize(struct as *as) 2101 { 2102 struct seg *seg; 2103 size_t size; 2104 2105 ASSERT(as != &kas && AS_WRITE_HELD(as)); 2106 2107 if ((seg = AS_SEGFIRST(as)) == NULL) 2108 return (0); 2109 2110 size = sizeof (prpageheader_t); 2111 do { 2112 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2113 caddr_t saddr, naddr; 2114 void *tmp = NULL; 2115 size_t npage; 2116 2117 if ((seg->s_flags & S_HOLE) != 0) { 2118 continue; 2119 } 2120 2121 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2122 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2123 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 2124 size += sizeof (prasmap_t) + round8(npage); 2125 } 2126 ASSERT(tmp == NULL); 2127 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2128 2129 return (size); 2130 } 2131 2132 #ifdef _SYSCALL32_IMPL 2133 size_t 2134 prpdsize32(struct as *as) 2135 { 2136 struct seg *seg; 2137 size_t size; 2138 2139 ASSERT(as != &kas && AS_WRITE_HELD(as)); 2140 2141 if ((seg = AS_SEGFIRST(as)) == NULL) 2142 return (0); 2143 2144 size = sizeof (prpageheader32_t); 2145 do { 2146 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2147 caddr_t saddr, naddr; 2148 void *tmp = NULL; 2149 size_t npage; 2150 2151 if ((seg->s_flags & S_HOLE) != 0) { 2152 continue; 2153 } 2154 2155 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2156 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2157 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 2158 size += sizeof (prasmap32_t) + round8(npage); 2159 } 2160 ASSERT(tmp == NULL); 2161 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2162 2163 return (size); 2164 } 2165 #endif /* _SYSCALL32_IMPL */ 2166 2167 /* 2168 * Read page data information. 2169 */ 2170 int 2171 prpdread(proc_t *p, uint_t hatid, struct uio *uiop) 2172 { 2173 struct as *as = p->p_as; 2174 caddr_t buf; 2175 size_t size; 2176 prpageheader_t *php; 2177 prasmap_t *pmp; 2178 struct seg *seg; 2179 int error; 2180 2181 again: 2182 AS_LOCK_ENTER(as, RW_WRITER); 2183 2184 if ((seg = AS_SEGFIRST(as)) == NULL) { 2185 AS_LOCK_EXIT(as); 2186 return (0); 2187 } 2188 size = prpdsize(as); 2189 if (uiop->uio_resid < size) { 2190 AS_LOCK_EXIT(as); 2191 return (E2BIG); 2192 } 2193 2194 buf = kmem_zalloc(size, KM_SLEEP); 2195 php = (prpageheader_t *)buf; 2196 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t)); 2197 2198 hrt2ts(gethrtime(), &php->pr_tstamp); 2199 php->pr_nmap = 0; 2200 php->pr_npage = 0; 2201 do { 2202 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2203 caddr_t saddr, naddr; 2204 void *tmp = NULL; 2205 2206 if ((seg->s_flags & S_HOLE) != 0) { 2207 continue; 2208 } 2209 2210 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2211 struct vnode *vp; 2212 struct vattr vattr; 2213 size_t len; 2214 size_t npage; 2215 uint_t prot; 2216 uintptr_t next; 2217 2218 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2219 if ((len = (size_t)(naddr - saddr)) == 0) 2220 continue; 2221 npage = len / PAGESIZE; 2222 next = (uintptr_t)(pmp + 1) + round8(npage); 2223 /* 2224 * It's possible that the address space can change 2225 * subtlely even though we're holding as->a_lock 2226 * due to the nondeterminism of page_exists() in 2227 * the presence of asychronously flushed pages or 2228 * mapped files whose sizes are changing. 2229 * page_exists() may be called indirectly from 2230 * pr_getprot() by a SEGOP_INCORE() routine. 2231 * If this happens we need to make sure we don't 2232 * overrun the buffer whose size we computed based 2233 * on the initial iteration through the segments. 2234 * Once we've detected an overflow, we need to clean 2235 * up the temporary memory allocated in pr_getprot() 2236 * and retry. If there's a pending signal, we return 2237 * EINTR so that this thread can be dislodged if 2238 * a latent bug causes us to spin indefinitely. 2239 */ 2240 if (next > (uintptr_t)buf + size) { 2241 pr_getprot_done(&tmp); 2242 AS_LOCK_EXIT(as); 2243 2244 kmem_free(buf, size); 2245 2246 if (ISSIG(curthread, JUSTLOOKING)) 2247 return (EINTR); 2248 2249 goto again; 2250 } 2251 2252 php->pr_nmap++; 2253 php->pr_npage += npage; 2254 pmp->pr_vaddr = (uintptr_t)saddr; 2255 pmp->pr_npage = npage; 2256 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2257 pmp->pr_mflags = 0; 2258 if (prot & PROT_READ) 2259 pmp->pr_mflags |= MA_READ; 2260 if (prot & PROT_WRITE) 2261 pmp->pr_mflags |= MA_WRITE; 2262 if (prot & PROT_EXEC) 2263 pmp->pr_mflags |= MA_EXEC; 2264 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2265 pmp->pr_mflags |= MA_SHARED; 2266 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2267 pmp->pr_mflags |= MA_NORESERVE; 2268 if (seg->s_ops == &segspt_shmops || 2269 (seg->s_ops == &segvn_ops && 2270 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2271 pmp->pr_mflags |= MA_ANON; 2272 if (seg->s_ops == &segspt_shmops) 2273 pmp->pr_mflags |= MA_ISM | MA_SHM; 2274 pmp->pr_pagesize = PAGESIZE; 2275 /* 2276 * Manufacture a filename for the "object" directory. 2277 */ 2278 vattr.va_mask = AT_FSID|AT_NODEID; 2279 if (seg->s_ops == &segvn_ops && 2280 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2281 vp != NULL && vp->v_type == VREG && 2282 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2283 if (vp == p->p_exec) 2284 (void) strcpy(pmp->pr_mapname, "a.out"); 2285 else 2286 pr_object_name(pmp->pr_mapname, 2287 vp, &vattr); 2288 } 2289 2290 /* 2291 * Get the SysV shared memory id, if any. 2292 */ 2293 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2294 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2295 SHMID_NONE) { 2296 if (pmp->pr_shmid == SHMID_FREE) 2297 pmp->pr_shmid = -1; 2298 2299 pmp->pr_mflags |= MA_SHM; 2300 } else { 2301 pmp->pr_shmid = -1; 2302 } 2303 2304 hat_getstat(as, saddr, len, hatid, 2305 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2306 pmp = (prasmap_t *)next; 2307 } 2308 ASSERT(tmp == NULL); 2309 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2310 2311 AS_LOCK_EXIT(as); 2312 2313 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2314 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2315 kmem_free(buf, size); 2316 2317 return (error); 2318 } 2319 2320 #ifdef _SYSCALL32_IMPL 2321 int 2322 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop) 2323 { 2324 struct as *as = p->p_as; 2325 caddr_t buf; 2326 size_t size; 2327 prpageheader32_t *php; 2328 prasmap32_t *pmp; 2329 struct seg *seg; 2330 int error; 2331 2332 again: 2333 AS_LOCK_ENTER(as, RW_WRITER); 2334 2335 if ((seg = AS_SEGFIRST(as)) == NULL) { 2336 AS_LOCK_EXIT(as); 2337 return (0); 2338 } 2339 size = prpdsize32(as); 2340 if (uiop->uio_resid < size) { 2341 AS_LOCK_EXIT(as); 2342 return (E2BIG); 2343 } 2344 2345 buf = kmem_zalloc(size, KM_SLEEP); 2346 php = (prpageheader32_t *)buf; 2347 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t)); 2348 2349 hrt2ts32(gethrtime(), &php->pr_tstamp); 2350 php->pr_nmap = 0; 2351 php->pr_npage = 0; 2352 do { 2353 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2354 caddr_t saddr, naddr; 2355 void *tmp = NULL; 2356 2357 if ((seg->s_flags & S_HOLE) != 0) { 2358 continue; 2359 } 2360 2361 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2362 struct vnode *vp; 2363 struct vattr vattr; 2364 size_t len; 2365 size_t npage; 2366 uint_t prot; 2367 uintptr_t next; 2368 2369 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2370 if ((len = (size_t)(naddr - saddr)) == 0) 2371 continue; 2372 npage = len / PAGESIZE; 2373 next = (uintptr_t)(pmp + 1) + round8(npage); 2374 /* 2375 * It's possible that the address space can change 2376 * subtlely even though we're holding as->a_lock 2377 * due to the nondeterminism of page_exists() in 2378 * the presence of asychronously flushed pages or 2379 * mapped files whose sizes are changing. 2380 * page_exists() may be called indirectly from 2381 * pr_getprot() by a SEGOP_INCORE() routine. 2382 * If this happens we need to make sure we don't 2383 * overrun the buffer whose size we computed based 2384 * on the initial iteration through the segments. 2385 * Once we've detected an overflow, we need to clean 2386 * up the temporary memory allocated in pr_getprot() 2387 * and retry. If there's a pending signal, we return 2388 * EINTR so that this thread can be dislodged if 2389 * a latent bug causes us to spin indefinitely. 2390 */ 2391 if (next > (uintptr_t)buf + size) { 2392 pr_getprot_done(&tmp); 2393 AS_LOCK_EXIT(as); 2394 2395 kmem_free(buf, size); 2396 2397 if (ISSIG(curthread, JUSTLOOKING)) 2398 return (EINTR); 2399 2400 goto again; 2401 } 2402 2403 php->pr_nmap++; 2404 php->pr_npage += npage; 2405 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 2406 pmp->pr_npage = (size32_t)npage; 2407 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2408 pmp->pr_mflags = 0; 2409 if (prot & PROT_READ) 2410 pmp->pr_mflags |= MA_READ; 2411 if (prot & PROT_WRITE) 2412 pmp->pr_mflags |= MA_WRITE; 2413 if (prot & PROT_EXEC) 2414 pmp->pr_mflags |= MA_EXEC; 2415 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2416 pmp->pr_mflags |= MA_SHARED; 2417 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2418 pmp->pr_mflags |= MA_NORESERVE; 2419 if (seg->s_ops == &segspt_shmops || 2420 (seg->s_ops == &segvn_ops && 2421 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2422 pmp->pr_mflags |= MA_ANON; 2423 if (seg->s_ops == &segspt_shmops) 2424 pmp->pr_mflags |= MA_ISM | MA_SHM; 2425 pmp->pr_pagesize = PAGESIZE; 2426 /* 2427 * Manufacture a filename for the "object" directory. 2428 */ 2429 vattr.va_mask = AT_FSID|AT_NODEID; 2430 if (seg->s_ops == &segvn_ops && 2431 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2432 vp != NULL && vp->v_type == VREG && 2433 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2434 if (vp == p->p_exec) 2435 (void) strcpy(pmp->pr_mapname, "a.out"); 2436 else 2437 pr_object_name(pmp->pr_mapname, 2438 vp, &vattr); 2439 } 2440 2441 /* 2442 * Get the SysV shared memory id, if any. 2443 */ 2444 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2445 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2446 SHMID_NONE) { 2447 if (pmp->pr_shmid == SHMID_FREE) 2448 pmp->pr_shmid = -1; 2449 2450 pmp->pr_mflags |= MA_SHM; 2451 } else { 2452 pmp->pr_shmid = -1; 2453 } 2454 2455 hat_getstat(as, saddr, len, hatid, 2456 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2457 pmp = (prasmap32_t *)next; 2458 } 2459 ASSERT(tmp == NULL); 2460 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2461 2462 AS_LOCK_EXIT(as); 2463 2464 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2465 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2466 kmem_free(buf, size); 2467 2468 return (error); 2469 } 2470 #endif /* _SYSCALL32_IMPL */ 2471 2472 ushort_t 2473 prgetpctcpu(uint64_t pct) 2474 { 2475 /* 2476 * The value returned will be relevant in the zone of the examiner, 2477 * which may not be the same as the zone which performed the procfs 2478 * mount. 2479 */ 2480 int nonline = zone_ncpus_online_get(curproc->p_zone); 2481 2482 /* 2483 * Prorate over online cpus so we don't exceed 100% 2484 */ 2485 if (nonline > 1) 2486 pct /= nonline; 2487 pct >>= 16; /* convert to 16-bit scaled integer */ 2488 if (pct > 0x8000) /* might happen, due to rounding */ 2489 pct = 0x8000; 2490 return ((ushort_t)pct); 2491 } 2492 2493 /* 2494 * Return information used by ps(1). 2495 */ 2496 void 2497 prgetpsinfo(proc_t *p, psinfo_t *psp) 2498 { 2499 kthread_t *t; 2500 struct cred *cred; 2501 hrtime_t hrutime, hrstime; 2502 2503 ASSERT(MUTEX_HELD(&p->p_lock)); 2504 2505 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2506 bzero(psp, sizeof (*psp)); 2507 else { 2508 thread_unlock(t); 2509 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2510 } 2511 2512 /* 2513 * only export SSYS and SMSACCT; everything else is off-limits to 2514 * userland apps. 2515 */ 2516 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2517 psp->pr_nlwp = p->p_lwpcnt; 2518 psp->pr_nzomb = p->p_zombcnt; 2519 mutex_enter(&p->p_crlock); 2520 cred = p->p_cred; 2521 psp->pr_uid = crgetruid(cred); 2522 psp->pr_euid = crgetuid(cred); 2523 psp->pr_gid = crgetrgid(cred); 2524 psp->pr_egid = crgetgid(cred); 2525 mutex_exit(&p->p_crlock); 2526 psp->pr_pid = p->p_pid; 2527 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2528 (p->p_flag & SZONETOP)) { 2529 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2530 /* 2531 * Inside local zones, fake zsched's pid as parent pids for 2532 * processes which reference processes outside of the zone. 2533 */ 2534 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2535 } else { 2536 psp->pr_ppid = p->p_ppid; 2537 } 2538 psp->pr_pgid = p->p_pgrp; 2539 psp->pr_sid = p->p_sessp->s_sid; 2540 psp->pr_taskid = p->p_task->tk_tkid; 2541 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2542 psp->pr_poolid = p->p_pool->pool_id; 2543 psp->pr_zoneid = p->p_zone->zone_id; 2544 if ((psp->pr_contract = PRCTID(p)) == 0) 2545 psp->pr_contract = -1; 2546 psp->pr_addr = (uintptr_t)prgetpsaddr(p); 2547 switch (p->p_model) { 2548 case DATAMODEL_ILP32: 2549 psp->pr_dmodel = PR_MODEL_ILP32; 2550 break; 2551 case DATAMODEL_LP64: 2552 psp->pr_dmodel = PR_MODEL_LP64; 2553 break; 2554 } 2555 hrutime = mstate_aggr_state(p, LMS_USER); 2556 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2557 hrt2ts((hrutime + hrstime), &psp->pr_time); 2558 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2559 2560 if (t == NULL) { 2561 int wcode = p->p_wcode; /* must be atomic read */ 2562 2563 if (wcode) 2564 psp->pr_wstat = wstat(wcode, p->p_wdata); 2565 psp->pr_ttydev = PRNODEV; 2566 psp->pr_lwp.pr_state = SZOMB; 2567 psp->pr_lwp.pr_sname = 'Z'; 2568 psp->pr_lwp.pr_bindpro = PBIND_NONE; 2569 psp->pr_lwp.pr_bindpset = PS_NONE; 2570 } else { 2571 user_t *up = PTOU(p); 2572 struct as *as; 2573 dev_t d; 2574 extern dev_t rwsconsdev, rconsdev, uconsdev; 2575 2576 d = cttydev(p); 2577 /* 2578 * If the controlling terminal is the real 2579 * or workstation console device, map to what the 2580 * user thinks is the console device. Handle case when 2581 * rwsconsdev or rconsdev is set to NODEV for Starfire. 2582 */ 2583 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 2584 d = uconsdev; 2585 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d; 2586 psp->pr_start = up->u_start; 2587 bcopy(up->u_comm, psp->pr_fname, 2588 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2589 bcopy(up->u_psargs, psp->pr_psargs, 2590 MIN(PRARGSZ-1, PSARGSZ)); 2591 psp->pr_argc = up->u_argc; 2592 psp->pr_argv = up->u_argv; 2593 psp->pr_envp = up->u_envp; 2594 2595 /* get the chosen lwp's lwpsinfo */ 2596 prgetlwpsinfo(t, &psp->pr_lwp); 2597 2598 /* compute %cpu for the process */ 2599 if (p->p_lwpcnt == 1) 2600 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2601 else { 2602 uint64_t pct = 0; 2603 hrtime_t cur_time = gethrtime_unscaled(); 2604 2605 t = p->p_tlist; 2606 do { 2607 pct += cpu_update_pct(t, cur_time); 2608 } while ((t = t->t_forw) != p->p_tlist); 2609 2610 psp->pr_pctcpu = prgetpctcpu(pct); 2611 } 2612 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2613 psp->pr_size = 0; 2614 psp->pr_rssize = 0; 2615 } else { 2616 mutex_exit(&p->p_lock); 2617 AS_LOCK_ENTER(as, RW_READER); 2618 psp->pr_size = btopr(as->a_resvsize) * 2619 (PAGESIZE / 1024); 2620 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024); 2621 psp->pr_pctmem = rm_pctmemory(as); 2622 AS_LOCK_EXIT(as); 2623 mutex_enter(&p->p_lock); 2624 } 2625 } 2626 } 2627 2628 static size_t 2629 prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen) 2630 { 2631 pr_misc_header_t *misc; 2632 size_t len; 2633 2634 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen); 2635 2636 if (data != NULL) { 2637 misc = pr_iol_newbuf(data, len); 2638 misc->pr_misc_type = type; 2639 misc->pr_misc_size = len; 2640 misc++; 2641 bcopy((char *)val, (char *)misc, vlen); 2642 } 2643 2644 return (len); 2645 } 2646 2647 /* 2648 * There's no elegant way to determine if a character device 2649 * supports TLI, so just check a hardcoded list of known TLI 2650 * devices. 2651 */ 2652 2653 static boolean_t 2654 pristli(vnode_t *vp) 2655 { 2656 static const char *tlidevs[] = { 2657 "udp", "udp6", "tcp", "tcp6" 2658 }; 2659 char *devname; 2660 uint_t i; 2661 2662 ASSERT(vp != NULL); 2663 2664 if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0) 2665 return (B_FALSE); 2666 2667 if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL) 2668 return (B_FALSE); 2669 2670 for (i = 0; i < ARRAY_SIZE(tlidevs); i++) { 2671 if (strcmp(devname, tlidevs[i]) == 0) 2672 return (B_TRUE); 2673 } 2674 2675 return (B_FALSE); 2676 } 2677 2678 static size_t 2679 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred) 2680 { 2681 char *pathname; 2682 size_t pathlen; 2683 size_t sz = 0; 2684 2685 /* 2686 * The global zone's path to a file in a non-global zone can exceed 2687 * MAXPATHLEN. 2688 */ 2689 pathlen = MAXPATHLEN * 2 + 1; 2690 pathname = kmem_alloc(pathlen, KM_SLEEP); 2691 2692 if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) { 2693 sz += prfdinfomisc(data, PR_PATHNAME, 2694 pathname, strlen(pathname) + 1); 2695 } 2696 2697 kmem_free(pathname, pathlen); 2698 2699 return (sz); 2700 } 2701 2702 static size_t 2703 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred) 2704 { 2705 strcmd_t strcmd; 2706 int32_t rval; 2707 size_t sz = 0; 2708 2709 strcmd.sc_cmd = TI_GETMYNAME; 2710 strcmd.sc_timeout = 1; 2711 strcmd.sc_len = STRCMDBUFSIZE; 2712 2713 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred, 2714 &rval, NULL) == 0 && strcmd.sc_len > 0) { 2715 sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf, 2716 strcmd.sc_len); 2717 } 2718 2719 strcmd.sc_cmd = TI_GETPEERNAME; 2720 strcmd.sc_timeout = 1; 2721 strcmd.sc_len = STRCMDBUFSIZE; 2722 2723 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred, 2724 &rval, NULL) == 0 && strcmd.sc_len > 0) { 2725 sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf, 2726 strcmd.sc_len); 2727 } 2728 2729 return (sz); 2730 } 2731 2732 static size_t 2733 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred) 2734 { 2735 sonode_t *so; 2736 socklen_t vlen; 2737 size_t sz = 0; 2738 uint_t i; 2739 2740 if (vp->v_stream != NULL) { 2741 so = VTOSO(vp->v_stream->sd_vnode); 2742 2743 if (so->so_version == SOV_STREAM) 2744 so = NULL; 2745 } else { 2746 so = VTOSO(vp); 2747 } 2748 2749 if (so == NULL) 2750 return (0); 2751 2752 DTRACE_PROBE1(sonode, sonode_t *, so); 2753 2754 /* prmisc - PR_SOCKETNAME */ 2755 2756 struct sockaddr_storage buf; 2757 struct sockaddr *name = (struct sockaddr *)&buf; 2758 2759 vlen = sizeof (buf); 2760 if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0) 2761 sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen); 2762 2763 /* prmisc - PR_PEERSOCKNAME */ 2764 2765 vlen = sizeof (buf); 2766 if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0) 2767 sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen); 2768 2769 /* prmisc - PR_SOCKOPTS_BOOL_OPTS */ 2770 2771 static struct boolopt { 2772 int level; 2773 int opt; 2774 int bopt; 2775 } boolopts[] = { 2776 { SOL_SOCKET, SO_DEBUG, PR_SO_DEBUG }, 2777 { SOL_SOCKET, SO_REUSEADDR, PR_SO_REUSEADDR }, 2778 #ifdef SO_REUSEPORT 2779 /* SmartOS and OmniOS have SO_REUSEPORT */ 2780 { SOL_SOCKET, SO_REUSEPORT, PR_SO_REUSEPORT }, 2781 #endif 2782 { SOL_SOCKET, SO_KEEPALIVE, PR_SO_KEEPALIVE }, 2783 { SOL_SOCKET, SO_DONTROUTE, PR_SO_DONTROUTE }, 2784 { SOL_SOCKET, SO_BROADCAST, PR_SO_BROADCAST }, 2785 { SOL_SOCKET, SO_OOBINLINE, PR_SO_OOBINLINE }, 2786 { SOL_SOCKET, SO_DGRAM_ERRIND, PR_SO_DGRAM_ERRIND }, 2787 { SOL_SOCKET, SO_ALLZONES, PR_SO_ALLZONES }, 2788 { SOL_SOCKET, SO_MAC_EXEMPT, PR_SO_MAC_EXEMPT }, 2789 { SOL_SOCKET, SO_MAC_IMPLICIT, PR_SO_MAC_IMPLICIT }, 2790 { SOL_SOCKET, SO_EXCLBIND, PR_SO_EXCLBIND }, 2791 { SOL_SOCKET, SO_VRRP, PR_SO_VRRP }, 2792 { IPPROTO_UDP, UDP_NAT_T_ENDPOINT, 2793 PR_UDP_NAT_T_ENDPOINT } 2794 }; 2795 prsockopts_bool_opts_t opts; 2796 int val; 2797 2798 if (data != NULL) { 2799 opts.prsock_bool_opts = 0; 2800 2801 for (i = 0; i < ARRAY_SIZE(boolopts); i++) { 2802 vlen = sizeof (val); 2803 if (SOP_GETSOCKOPT(so, boolopts[i].level, 2804 boolopts[i].opt, &val, &vlen, 0, cred) == 0 && 2805 val != 0) { 2806 opts.prsock_bool_opts |= boolopts[i].bopt; 2807 } 2808 } 2809 } 2810 2811 sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts)); 2812 2813 /* prmisc - PR_SOCKOPT_LINGER */ 2814 2815 struct linger l; 2816 2817 vlen = sizeof (l); 2818 if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen, 2819 0, cred) == 0 && vlen > 0) { 2820 sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen); 2821 } 2822 2823 /* prmisc - PR_SOCKOPT_* int types */ 2824 2825 static struct sopt { 2826 int level; 2827 int opt; 2828 int bopt; 2829 } sopts[] = { 2830 { SOL_SOCKET, SO_TYPE, PR_SOCKOPT_TYPE }, 2831 { SOL_SOCKET, SO_SNDBUF, PR_SOCKOPT_SNDBUF }, 2832 { SOL_SOCKET, SO_RCVBUF, PR_SOCKOPT_RCVBUF } 2833 }; 2834 2835 for (i = 0; i < ARRAY_SIZE(sopts); i++) { 2836 vlen = sizeof (val); 2837 if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt, 2838 &val, &vlen, 0, cred) == 0 && vlen > 0) { 2839 sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen); 2840 } 2841 } 2842 2843 /* prmisc - PR_SOCKOPT_IP_NEXTHOP */ 2844 2845 in_addr_t nexthop_val; 2846 2847 vlen = sizeof (nexthop_val); 2848 if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP, 2849 &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) { 2850 sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP, 2851 &nexthop_val, vlen); 2852 } 2853 2854 /* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */ 2855 2856 struct sockaddr_in6 nexthop6_val; 2857 2858 vlen = sizeof (nexthop6_val); 2859 if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP, 2860 &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) { 2861 sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP, 2862 &nexthop6_val, vlen); 2863 } 2864 2865 /* prmisc - PR_SOCKOPT_TCP_CONGESTION */ 2866 2867 char cong[CC_ALGO_NAME_MAX]; 2868 2869 vlen = sizeof (cong); 2870 if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION, 2871 &cong, &vlen, 0, cred) == 0 && vlen > 0) { 2872 sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen); 2873 } 2874 2875 /* prmisc - PR_SOCKFILTERS_PRIV */ 2876 2877 struct fil_info fi; 2878 2879 vlen = sizeof (fi); 2880 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST, 2881 &fi, &vlen, 0, cred) == 0 && vlen != 0) { 2882 pr_misc_header_t *misc; 2883 size_t len; 2884 2885 /* 2886 * We limit the number of returned filters to 32. 2887 * This is the maximum number that pfiles will print 2888 * anyway. 2889 */ 2890 vlen = MIN(32, fi.fi_pos + 1); 2891 vlen *= sizeof (fi); 2892 2893 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen); 2894 sz += len; 2895 2896 if (data != NULL) { 2897 /* 2898 * So that the filter list can be built incrementally, 2899 * prfdinfomisc() is not used here. Instead we 2900 * allocate a buffer directly on the copyout list using 2901 * pr_iol_newbuf() 2902 */ 2903 misc = pr_iol_newbuf(data, len); 2904 misc->pr_misc_type = PR_SOCKFILTERS_PRIV; 2905 misc->pr_misc_size = len; 2906 misc++; 2907 len = vlen; 2908 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST, 2909 misc, &vlen, 0, cred) == 0) { 2910 /* 2911 * In case the number of filters has reduced 2912 * since the first call, explicitly zero out 2913 * any unpopulated space. 2914 */ 2915 if (vlen < len) 2916 bzero(misc + vlen, len - vlen); 2917 } else { 2918 /* Something went wrong, zero out the result */ 2919 bzero(misc, vlen); 2920 } 2921 } 2922 } 2923 2924 return (sz); 2925 } 2926 2927 typedef struct prfdinfo_nm_path_cbdata { 2928 proc_t *nmp_p; 2929 u_offset_t nmp_sz; 2930 list_t *nmp_data; 2931 } prfdinfo_nm_path_cbdata_t; 2932 2933 static int 2934 prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg) 2935 { 2936 prfdinfo_nm_path_cbdata_t *cb = arg; 2937 2938 cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred); 2939 2940 return (0); 2941 } 2942 2943 u_offset_t 2944 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred) 2945 { 2946 u_offset_t sz; 2947 2948 /* 2949 * All fdinfo files will be at least this big - 2950 * sizeof fdinfo struct + zero length trailer 2951 */ 2952 sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t); 2953 2954 /* Pathname */ 2955 switch (vp->v_type) { 2956 case VDOOR: { 2957 prfdinfo_nm_path_cbdata_t cb = { 2958 .nmp_p = p, 2959 .nmp_data = NULL, 2960 .nmp_sz = 0 2961 }; 2962 2963 (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); 2964 sz += cb.nmp_sz; 2965 break; 2966 } 2967 case VSOCK: 2968 break; 2969 default: 2970 sz += prfdinfopath(p, vp, NULL, cred); 2971 } 2972 2973 /* Socket options */ 2974 if (vp->v_type == VSOCK) 2975 sz += prfdinfosockopt(vp, NULL, cred); 2976 2977 /* TLI/XTI sockets */ 2978 if (pristli(vp)) 2979 sz += prfdinfotlisockopt(vp, NULL, cred); 2980 2981 return (sz); 2982 } 2983 2984 int 2985 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred, 2986 cred_t *file_cred, list_t *data) 2987 { 2988 vattr_t vattr; 2989 int error; 2990 2991 /* 2992 * The buffer has been initialised to zero by pr_iol_newbuf(). 2993 * Initialise defaults for any values that should not default to zero. 2994 */ 2995 fdinfo->pr_uid = (uid_t)-1; 2996 fdinfo->pr_gid = (gid_t)-1; 2997 fdinfo->pr_size = -1; 2998 fdinfo->pr_locktype = F_UNLCK; 2999 fdinfo->pr_lockpid = -1; 3000 fdinfo->pr_locksysid = -1; 3001 fdinfo->pr_peerpid = -1; 3002 3003 /* Offset */ 3004 3005 /* 3006 * pr_offset has already been set from the underlying file_t. 3007 * Check if it is plausible and reset to -1 if not. 3008 */ 3009 if (fdinfo->pr_offset != -1 && 3010 VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0) 3011 fdinfo->pr_offset = -1; 3012 3013 /* 3014 * Attributes 3015 * 3016 * We have two cred_t structures available here. 3017 * 'cred' is the caller's credential, and 'file_cred' is the credential 3018 * for the file being inspected. 3019 * 3020 * When looking up the file attributes, file_cred is used in order 3021 * that the correct ownership is set for doors and FIFOs. Since the 3022 * caller has permission to read the fdinfo file in proc, this does 3023 * not expose any additional information. 3024 */ 3025 vattr.va_mask = AT_STAT; 3026 if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) { 3027 fdinfo->pr_major = getmajor(vattr.va_fsid); 3028 fdinfo->pr_minor = getminor(vattr.va_fsid); 3029 fdinfo->pr_rmajor = getmajor(vattr.va_rdev); 3030 fdinfo->pr_rminor = getminor(vattr.va_rdev); 3031 fdinfo->pr_ino = (ino64_t)vattr.va_nodeid; 3032 fdinfo->pr_size = (off64_t)vattr.va_size; 3033 fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode; 3034 fdinfo->pr_uid = vattr.va_uid; 3035 fdinfo->pr_gid = vattr.va_gid; 3036 if (vp->v_type == VSOCK) 3037 fdinfo->pr_fileflags |= sock_getfasync(vp); 3038 } 3039 3040 /* locks */ 3041 3042 flock64_t bf; 3043 3044 bzero(&bf, sizeof (bf)); 3045 bf.l_type = F_WRLCK; 3046 3047 if (VOP_FRLOCK(vp, F_GETLK, &bf, 3048 (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL, 3049 cred, NULL) == 0 && bf.l_type != F_UNLCK) { 3050 fdinfo->pr_locktype = bf.l_type; 3051 fdinfo->pr_lockpid = bf.l_pid; 3052 fdinfo->pr_locksysid = bf.l_sysid; 3053 } 3054 3055 /* peer cred */ 3056 3057 k_peercred_t kpc; 3058 3059 switch (vp->v_type) { 3060 case VFIFO: 3061 case VSOCK: { 3062 int32_t rval; 3063 3064 error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc, 3065 FKIOCTL, cred, &rval, NULL); 3066 break; 3067 } 3068 case VCHR: { 3069 struct strioctl strioc; 3070 int32_t rval; 3071 3072 if (vp->v_stream == NULL) { 3073 error = ENOTSUP; 3074 break; 3075 } 3076 strioc.ic_cmd = _I_GETPEERCRED; 3077 strioc.ic_timout = INFTIM; 3078 strioc.ic_len = (int)sizeof (k_peercred_t); 3079 strioc.ic_dp = (char *)&kpc; 3080 3081 error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL, 3082 STR_NOSIG | K_TO_K, cred, &rval); 3083 break; 3084 } 3085 default: 3086 error = ENOTSUP; 3087 break; 3088 } 3089 3090 if (error == 0 && kpc.pc_cr != NULL) { 3091 proc_t *peerp; 3092 3093 fdinfo->pr_peerpid = kpc.pc_cpid; 3094 3095 crfree(kpc.pc_cr); 3096 3097 mutex_enter(&pidlock); 3098 if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) { 3099 user_t *up; 3100 3101 mutex_enter(&peerp->p_lock); 3102 mutex_exit(&pidlock); 3103 3104 up = PTOU(peerp); 3105 bcopy(up->u_comm, fdinfo->pr_peername, 3106 MIN(sizeof (up->u_comm), 3107 sizeof (fdinfo->pr_peername) - 1)); 3108 3109 mutex_exit(&peerp->p_lock); 3110 } else { 3111 mutex_exit(&pidlock); 3112 } 3113 } 3114 3115 /* pathname */ 3116 3117 switch (vp->v_type) { 3118 case VDOOR: { 3119 prfdinfo_nm_path_cbdata_t cb = { 3120 .nmp_p = p, 3121 .nmp_data = data, 3122 .nmp_sz = 0 3123 }; 3124 3125 (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); 3126 break; 3127 } 3128 case VSOCK: 3129 /* 3130 * Don't attempt to determine the path for a socket as the 3131 * vnode has no associated v_path. It will cause a linear scan 3132 * of the dnlc table and result in no path being found. 3133 */ 3134 break; 3135 default: 3136 (void) prfdinfopath(p, vp, data, cred); 3137 } 3138 3139 /* socket options */ 3140 if (vp->v_type == VSOCK) 3141 (void) prfdinfosockopt(vp, data, cred); 3142 3143 /* TLI/XTI stream sockets */ 3144 if (pristli(vp)) 3145 (void) prfdinfotlisockopt(vp, data, cred); 3146 3147 /* 3148 * Add a terminating header with a zero size. 3149 */ 3150 pr_misc_header_t *misc; 3151 3152 misc = pr_iol_newbuf(data, sizeof (*misc)); 3153 misc->pr_misc_size = 0; 3154 misc->pr_misc_type = (uint_t)-1; 3155 3156 return (0); 3157 } 3158 3159 #ifdef _SYSCALL32_IMPL 3160 void 3161 prgetpsinfo32(proc_t *p, psinfo32_t *psp) 3162 { 3163 kthread_t *t; 3164 struct cred *cred; 3165 hrtime_t hrutime, hrstime; 3166 3167 ASSERT(MUTEX_HELD(&p->p_lock)); 3168 3169 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 3170 bzero(psp, sizeof (*psp)); 3171 else { 3172 thread_unlock(t); 3173 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 3174 } 3175 3176 /* 3177 * only export SSYS and SMSACCT; everything else is off-limits to 3178 * userland apps. 3179 */ 3180 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 3181 psp->pr_nlwp = p->p_lwpcnt; 3182 psp->pr_nzomb = p->p_zombcnt; 3183 mutex_enter(&p->p_crlock); 3184 cred = p->p_cred; 3185 psp->pr_uid = crgetruid(cred); 3186 psp->pr_euid = crgetuid(cred); 3187 psp->pr_gid = crgetrgid(cred); 3188 psp->pr_egid = crgetgid(cred); 3189 mutex_exit(&p->p_crlock); 3190 psp->pr_pid = p->p_pid; 3191 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 3192 (p->p_flag & SZONETOP)) { 3193 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 3194 /* 3195 * Inside local zones, fake zsched's pid as parent pids for 3196 * processes which reference processes outside of the zone. 3197 */ 3198 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 3199 } else { 3200 psp->pr_ppid = p->p_ppid; 3201 } 3202 psp->pr_pgid = p->p_pgrp; 3203 psp->pr_sid = p->p_sessp->s_sid; 3204 psp->pr_taskid = p->p_task->tk_tkid; 3205 psp->pr_projid = p->p_task->tk_proj->kpj_id; 3206 psp->pr_poolid = p->p_pool->pool_id; 3207 psp->pr_zoneid = p->p_zone->zone_id; 3208 if ((psp->pr_contract = PRCTID(p)) == 0) 3209 psp->pr_contract = -1; 3210 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 3211 switch (p->p_model) { 3212 case DATAMODEL_ILP32: 3213 psp->pr_dmodel = PR_MODEL_ILP32; 3214 break; 3215 case DATAMODEL_LP64: 3216 psp->pr_dmodel = PR_MODEL_LP64; 3217 break; 3218 } 3219 hrutime = mstate_aggr_state(p, LMS_USER); 3220 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 3221 hrt2ts32(hrutime + hrstime, &psp->pr_time); 3222 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime); 3223 3224 if (t == NULL) { 3225 extern int wstat(int, int); /* needs a header file */ 3226 int wcode = p->p_wcode; /* must be atomic read */ 3227 3228 if (wcode) 3229 psp->pr_wstat = wstat(wcode, p->p_wdata); 3230 psp->pr_ttydev = PRNODEV32; 3231 psp->pr_lwp.pr_state = SZOMB; 3232 psp->pr_lwp.pr_sname = 'Z'; 3233 } else { 3234 user_t *up = PTOU(p); 3235 struct as *as; 3236 dev_t d; 3237 extern dev_t rwsconsdev, rconsdev, uconsdev; 3238 3239 d = cttydev(p); 3240 /* 3241 * If the controlling terminal is the real 3242 * or workstation console device, map to what the 3243 * user thinks is the console device. Handle case when 3244 * rwsconsdev or rconsdev is set to NODEV for Starfire. 3245 */ 3246 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 3247 d = uconsdev; 3248 (void) cmpldev(&psp->pr_ttydev, d); 3249 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start); 3250 bcopy(up->u_comm, psp->pr_fname, 3251 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 3252 bcopy(up->u_psargs, psp->pr_psargs, 3253 MIN(PRARGSZ-1, PSARGSZ)); 3254 psp->pr_argc = up->u_argc; 3255 psp->pr_argv = (caddr32_t)up->u_argv; 3256 psp->pr_envp = (caddr32_t)up->u_envp; 3257 3258 /* get the chosen lwp's lwpsinfo */ 3259 prgetlwpsinfo32(t, &psp->pr_lwp); 3260 3261 /* compute %cpu for the process */ 3262 if (p->p_lwpcnt == 1) 3263 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 3264 else { 3265 uint64_t pct = 0; 3266 hrtime_t cur_time; 3267 3268 t = p->p_tlist; 3269 cur_time = gethrtime_unscaled(); 3270 do { 3271 pct += cpu_update_pct(t, cur_time); 3272 } while ((t = t->t_forw) != p->p_tlist); 3273 3274 psp->pr_pctcpu = prgetpctcpu(pct); 3275 } 3276 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 3277 psp->pr_size = 0; 3278 psp->pr_rssize = 0; 3279 } else { 3280 mutex_exit(&p->p_lock); 3281 AS_LOCK_ENTER(as, RW_READER); 3282 psp->pr_size = (size32_t) 3283 (btopr(as->a_resvsize) * (PAGESIZE / 1024)); 3284 psp->pr_rssize = (size32_t) 3285 (rm_asrss(as) * (PAGESIZE / 1024)); 3286 psp->pr_pctmem = rm_pctmemory(as); 3287 AS_LOCK_EXIT(as); 3288 mutex_enter(&p->p_lock); 3289 } 3290 } 3291 3292 /* 3293 * If we are looking at an LP64 process, zero out 3294 * the fields that cannot be represented in ILP32. 3295 */ 3296 if (p->p_model != DATAMODEL_ILP32) { 3297 psp->pr_size = 0; 3298 psp->pr_rssize = 0; 3299 psp->pr_argv = 0; 3300 psp->pr_envp = 0; 3301 } 3302 } 3303 3304 #endif /* _SYSCALL32_IMPL */ 3305 3306 void 3307 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp) 3308 { 3309 klwp_t *lwp = ttolwp(t); 3310 sobj_ops_t *sobj; 3311 char c, state; 3312 uint64_t pct; 3313 int retval, niceval; 3314 hrtime_t hrutime, hrstime; 3315 3316 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 3317 3318 bzero(psp, sizeof (*psp)); 3319 3320 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 3321 psp->pr_lwpid = t->t_tid; 3322 psp->pr_addr = (uintptr_t)t; 3323 psp->pr_wchan = (uintptr_t)t->t_wchan; 3324 3325 /* map the thread state enum into a process state enum */ 3326 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 3327 switch (state) { 3328 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 3329 case TS_RUN: state = SRUN; c = 'R'; break; 3330 case TS_ONPROC: state = SONPROC; c = 'O'; break; 3331 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 3332 case TS_STOPPED: state = SSTOP; c = 'T'; break; 3333 case TS_WAIT: state = SWAIT; c = 'W'; break; 3334 default: state = 0; c = '?'; break; 3335 } 3336 psp->pr_state = state; 3337 psp->pr_sname = c; 3338 if ((sobj = t->t_sobj_ops) != NULL) 3339 psp->pr_stype = SOBJ_TYPE(sobj); 3340 retval = CL_DONICE(t, NULL, 0, &niceval); 3341 if (retval == 0) { 3342 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 3343 psp->pr_nice = niceval + NZERO; 3344 } 3345 psp->pr_syscall = t->t_sysnum; 3346 psp->pr_pri = t->t_pri; 3347 psp->pr_start.tv_sec = t->t_start; 3348 psp->pr_start.tv_nsec = 0L; 3349 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 3350 scalehrtime(&hrutime); 3351 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 3352 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 3353 scalehrtime(&hrstime); 3354 hrt2ts(hrutime + hrstime, &psp->pr_time); 3355 /* compute %cpu for the lwp */ 3356 pct = cpu_update_pct(t, gethrtime_unscaled()); 3357 psp->pr_pctcpu = prgetpctcpu(pct); 3358 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 3359 if (psp->pr_cpu > 99) 3360 psp->pr_cpu = 99; 3361 3362 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 3363 sizeof (psp->pr_clname) - 1); 3364 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 3365 psp->pr_onpro = t->t_cpu->cpu_id; 3366 psp->pr_bindpro = t->t_bind_cpu; 3367 psp->pr_bindpset = t->t_bind_pset; 3368 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 3369 } 3370 3371 #ifdef _SYSCALL32_IMPL 3372 void 3373 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp) 3374 { 3375 klwp_t *lwp = ttolwp(t); 3376 sobj_ops_t *sobj; 3377 char c, state; 3378 uint64_t pct; 3379 int retval, niceval; 3380 hrtime_t hrutime, hrstime; 3381 3382 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 3383 3384 bzero(psp, sizeof (*psp)); 3385 3386 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 3387 psp->pr_lwpid = t->t_tid; 3388 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 3389 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */ 3390 3391 /* map the thread state enum into a process state enum */ 3392 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 3393 switch (state) { 3394 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 3395 case TS_RUN: state = SRUN; c = 'R'; break; 3396 case TS_ONPROC: state = SONPROC; c = 'O'; break; 3397 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 3398 case TS_STOPPED: state = SSTOP; c = 'T'; break; 3399 case TS_WAIT: state = SWAIT; c = 'W'; break; 3400 default: state = 0; c = '?'; break; 3401 } 3402 psp->pr_state = state; 3403 psp->pr_sname = c; 3404 if ((sobj = t->t_sobj_ops) != NULL) 3405 psp->pr_stype = SOBJ_TYPE(sobj); 3406 retval = CL_DONICE(t, NULL, 0, &niceval); 3407 if (retval == 0) { 3408 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 3409 psp->pr_nice = niceval + NZERO; 3410 } else { 3411 psp->pr_oldpri = 0; 3412 psp->pr_nice = 0; 3413 } 3414 psp->pr_syscall = t->t_sysnum; 3415 psp->pr_pri = t->t_pri; 3416 psp->pr_start.tv_sec = (time32_t)t->t_start; 3417 psp->pr_start.tv_nsec = 0L; 3418 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 3419 scalehrtime(&hrutime); 3420 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 3421 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 3422 scalehrtime(&hrstime); 3423 hrt2ts32(hrutime + hrstime, &psp->pr_time); 3424 /* compute %cpu for the lwp */ 3425 pct = cpu_update_pct(t, gethrtime_unscaled()); 3426 psp->pr_pctcpu = prgetpctcpu(pct); 3427 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 3428 if (psp->pr_cpu > 99) 3429 psp->pr_cpu = 99; 3430 3431 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 3432 sizeof (psp->pr_clname) - 1); 3433 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 3434 psp->pr_onpro = t->t_cpu->cpu_id; 3435 psp->pr_bindpro = t->t_bind_cpu; 3436 psp->pr_bindpset = t->t_bind_pset; 3437 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 3438 } 3439 #endif /* _SYSCALL32_IMPL */ 3440 3441 #ifdef _SYSCALL32_IMPL 3442 3443 #define PR_COPY_FIELD(s, d, field) d->field = s->field 3444 3445 #define PR_COPY_FIELD_ILP32(s, d, field) \ 3446 if (s->pr_dmodel == PR_MODEL_ILP32) { \ 3447 d->field = s->field; \ 3448 } 3449 3450 #define PR_COPY_TIMESPEC(s, d, field) \ 3451 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field); 3452 3453 #define PR_COPY_BUF(s, d, field) \ 3454 bcopy(s->field, d->field, sizeof (d->field)); 3455 3456 #define PR_IGNORE_FIELD(s, d, field) 3457 3458 void 3459 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest) 3460 { 3461 bzero(dest, sizeof (*dest)); 3462 3463 PR_COPY_FIELD(src, dest, pr_flag); 3464 PR_COPY_FIELD(src, dest, pr_lwpid); 3465 PR_IGNORE_FIELD(src, dest, pr_addr); 3466 PR_IGNORE_FIELD(src, dest, pr_wchan); 3467 PR_COPY_FIELD(src, dest, pr_stype); 3468 PR_COPY_FIELD(src, dest, pr_state); 3469 PR_COPY_FIELD(src, dest, pr_sname); 3470 PR_COPY_FIELD(src, dest, pr_nice); 3471 PR_COPY_FIELD(src, dest, pr_syscall); 3472 PR_COPY_FIELD(src, dest, pr_oldpri); 3473 PR_COPY_FIELD(src, dest, pr_cpu); 3474 PR_COPY_FIELD(src, dest, pr_pri); 3475 PR_COPY_FIELD(src, dest, pr_pctcpu); 3476 PR_COPY_TIMESPEC(src, dest, pr_start); 3477 PR_COPY_BUF(src, dest, pr_clname); 3478 PR_COPY_BUF(src, dest, pr_name); 3479 PR_COPY_FIELD(src, dest, pr_onpro); 3480 PR_COPY_FIELD(src, dest, pr_bindpro); 3481 PR_COPY_FIELD(src, dest, pr_bindpset); 3482 PR_COPY_FIELD(src, dest, pr_lgrp); 3483 } 3484 3485 void 3486 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest) 3487 { 3488 bzero(dest, sizeof (*dest)); 3489 3490 PR_COPY_FIELD(src, dest, pr_flag); 3491 PR_COPY_FIELD(src, dest, pr_nlwp); 3492 PR_COPY_FIELD(src, dest, pr_pid); 3493 PR_COPY_FIELD(src, dest, pr_ppid); 3494 PR_COPY_FIELD(src, dest, pr_pgid); 3495 PR_COPY_FIELD(src, dest, pr_sid); 3496 PR_COPY_FIELD(src, dest, pr_uid); 3497 PR_COPY_FIELD(src, dest, pr_euid); 3498 PR_COPY_FIELD(src, dest, pr_gid); 3499 PR_COPY_FIELD(src, dest, pr_egid); 3500 PR_IGNORE_FIELD(src, dest, pr_addr); 3501 PR_COPY_FIELD_ILP32(src, dest, pr_size); 3502 PR_COPY_FIELD_ILP32(src, dest, pr_rssize); 3503 PR_COPY_FIELD(src, dest, pr_ttydev); 3504 PR_COPY_FIELD(src, dest, pr_pctcpu); 3505 PR_COPY_FIELD(src, dest, pr_pctmem); 3506 PR_COPY_TIMESPEC(src, dest, pr_start); 3507 PR_COPY_TIMESPEC(src, dest, pr_time); 3508 PR_COPY_TIMESPEC(src, dest, pr_ctime); 3509 PR_COPY_BUF(src, dest, pr_fname); 3510 PR_COPY_BUF(src, dest, pr_psargs); 3511 PR_COPY_FIELD(src, dest, pr_wstat); 3512 PR_COPY_FIELD(src, dest, pr_argc); 3513 PR_COPY_FIELD_ILP32(src, dest, pr_argv); 3514 PR_COPY_FIELD_ILP32(src, dest, pr_envp); 3515 PR_COPY_FIELD(src, dest, pr_dmodel); 3516 PR_COPY_FIELD(src, dest, pr_taskid); 3517 PR_COPY_FIELD(src, dest, pr_projid); 3518 PR_COPY_FIELD(src, dest, pr_nzomb); 3519 PR_COPY_FIELD(src, dest, pr_poolid); 3520 PR_COPY_FIELD(src, dest, pr_contract); 3521 PR_COPY_FIELD(src, dest, pr_poolid); 3522 PR_COPY_FIELD(src, dest, pr_poolid); 3523 3524 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp); 3525 } 3526 3527 #undef PR_COPY_FIELD 3528 #undef PR_COPY_FIELD_ILP32 3529 #undef PR_COPY_TIMESPEC 3530 #undef PR_COPY_BUF 3531 #undef PR_IGNORE_FIELD 3532 3533 #endif /* _SYSCALL32_IMPL */ 3534 3535 /* 3536 * This used to get called when microstate accounting was disabled but 3537 * microstate information was requested. Since Microstate accounting is on 3538 * regardless of the proc flags, this simply makes it appear to procfs that 3539 * microstate accounting is on. This is relatively meaningless since you 3540 * can't turn it off, but this is here for the sake of appearances. 3541 */ 3542 3543 /*ARGSUSED*/ 3544 void 3545 estimate_msacct(kthread_t *t, hrtime_t curtime) 3546 { 3547 proc_t *p; 3548 3549 if (t == NULL) 3550 return; 3551 3552 p = ttoproc(t); 3553 ASSERT(MUTEX_HELD(&p->p_lock)); 3554 3555 /* 3556 * A system process (p0) could be referenced if the thread is 3557 * in the process of exiting. Don't turn on microstate accounting 3558 * in that case. 3559 */ 3560 if (p->p_flag & SSYS) 3561 return; 3562 3563 /* 3564 * Loop through all the LWPs (kernel threads) in the process. 3565 */ 3566 t = p->p_tlist; 3567 do { 3568 t->t_proc_flag |= TP_MSACCT; 3569 } while ((t = t->t_forw) != p->p_tlist); 3570 3571 p->p_flag |= SMSACCT; /* set process-wide MSACCT */ 3572 } 3573 3574 /* 3575 * It's not really possible to disable microstate accounting anymore. 3576 * However, this routine simply turns off the ms accounting flags in a process 3577 * This way procfs can still pretend to turn microstate accounting on and 3578 * off for a process, but it actually doesn't do anything. This is 3579 * a neutered form of preemptive idiot-proofing. 3580 */ 3581 void 3582 disable_msacct(proc_t *p) 3583 { 3584 kthread_t *t; 3585 3586 ASSERT(MUTEX_HELD(&p->p_lock)); 3587 3588 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */ 3589 /* 3590 * Loop through all the LWPs (kernel threads) in the process. 3591 */ 3592 if ((t = p->p_tlist) != NULL) { 3593 do { 3594 /* clear per-thread flag */ 3595 t->t_proc_flag &= ~TP_MSACCT; 3596 } while ((t = t->t_forw) != p->p_tlist); 3597 } 3598 } 3599 3600 /* 3601 * Return resource usage information. 3602 */ 3603 void 3604 prgetusage(kthread_t *t, prhusage_t *pup) 3605 { 3606 klwp_t *lwp = ttolwp(t); 3607 hrtime_t *mstimep; 3608 struct mstate *ms = &lwp->lwp_mstate; 3609 int state; 3610 int i; 3611 hrtime_t curtime; 3612 hrtime_t waitrq; 3613 hrtime_t tmp1; 3614 3615 curtime = gethrtime_unscaled(); 3616 3617 pup->pr_lwpid = t->t_tid; 3618 pup->pr_count = 1; 3619 pup->pr_create = ms->ms_start; 3620 pup->pr_term = ms->ms_term; 3621 scalehrtime(&pup->pr_create); 3622 scalehrtime(&pup->pr_term); 3623 if (ms->ms_term == 0) { 3624 pup->pr_rtime = curtime - ms->ms_start; 3625 scalehrtime(&pup->pr_rtime); 3626 } else { 3627 pup->pr_rtime = ms->ms_term - ms->ms_start; 3628 scalehrtime(&pup->pr_rtime); 3629 } 3630 3631 3632 pup->pr_utime = ms->ms_acct[LMS_USER]; 3633 pup->pr_stime = ms->ms_acct[LMS_SYSTEM]; 3634 pup->pr_ttime = ms->ms_acct[LMS_TRAP]; 3635 pup->pr_tftime = ms->ms_acct[LMS_TFAULT]; 3636 pup->pr_dftime = ms->ms_acct[LMS_DFAULT]; 3637 pup->pr_kftime = ms->ms_acct[LMS_KFAULT]; 3638 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 3639 pup->pr_slptime = ms->ms_acct[LMS_SLEEP]; 3640 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 3641 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED]; 3642 3643 prscaleusage(pup); 3644 3645 /* 3646 * Adjust for time waiting in the dispatcher queue. 3647 */ 3648 waitrq = t->t_waitrq; /* hopefully atomic */ 3649 if (waitrq != 0) { 3650 if (waitrq > curtime) { 3651 curtime = gethrtime_unscaled(); 3652 } 3653 tmp1 = curtime - waitrq; 3654 scalehrtime(&tmp1); 3655 pup->pr_wtime += tmp1; 3656 curtime = waitrq; 3657 } 3658 3659 /* 3660 * Adjust for time spent in current microstate. 3661 */ 3662 if (ms->ms_state_start > curtime) { 3663 curtime = gethrtime_unscaled(); 3664 } 3665 3666 i = 0; 3667 do { 3668 switch (state = t->t_mstate) { 3669 case LMS_SLEEP: 3670 /* 3671 * Update the timer for the current sleep state. 3672 */ 3673 switch (state = ms->ms_prev) { 3674 case LMS_TFAULT: 3675 case LMS_DFAULT: 3676 case LMS_KFAULT: 3677 case LMS_USER_LOCK: 3678 break; 3679 default: 3680 state = LMS_SLEEP; 3681 break; 3682 } 3683 break; 3684 case LMS_TFAULT: 3685 case LMS_DFAULT: 3686 case LMS_KFAULT: 3687 case LMS_USER_LOCK: 3688 state = LMS_SYSTEM; 3689 break; 3690 } 3691 switch (state) { 3692 case LMS_USER: mstimep = &pup->pr_utime; break; 3693 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 3694 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 3695 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 3696 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 3697 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 3698 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 3699 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 3700 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 3701 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 3702 default: panic("prgetusage: unknown microstate"); 3703 } 3704 tmp1 = curtime - ms->ms_state_start; 3705 if (tmp1 < 0) { 3706 curtime = gethrtime_unscaled(); 3707 i++; 3708 continue; 3709 } 3710 scalehrtime(&tmp1); 3711 } while (tmp1 < 0 && i < MAX_ITERS_SPIN); 3712 3713 *mstimep += tmp1; 3714 3715 /* update pup timestamp */ 3716 pup->pr_tstamp = curtime; 3717 scalehrtime(&pup->pr_tstamp); 3718 3719 /* 3720 * Resource usage counters. 3721 */ 3722 pup->pr_minf = lwp->lwp_ru.minflt; 3723 pup->pr_majf = lwp->lwp_ru.majflt; 3724 pup->pr_nswap = lwp->lwp_ru.nswap; 3725 pup->pr_inblk = lwp->lwp_ru.inblock; 3726 pup->pr_oublk = lwp->lwp_ru.oublock; 3727 pup->pr_msnd = lwp->lwp_ru.msgsnd; 3728 pup->pr_mrcv = lwp->lwp_ru.msgrcv; 3729 pup->pr_sigs = lwp->lwp_ru.nsignals; 3730 pup->pr_vctx = lwp->lwp_ru.nvcsw; 3731 pup->pr_ictx = lwp->lwp_ru.nivcsw; 3732 pup->pr_sysc = lwp->lwp_ru.sysc; 3733 pup->pr_ioch = lwp->lwp_ru.ioch; 3734 } 3735 3736 /* 3737 * Convert ms_acct stats from unscaled high-res time to nanoseconds 3738 */ 3739 void 3740 prscaleusage(prhusage_t *usg) 3741 { 3742 scalehrtime(&usg->pr_utime); 3743 scalehrtime(&usg->pr_stime); 3744 scalehrtime(&usg->pr_ttime); 3745 scalehrtime(&usg->pr_tftime); 3746 scalehrtime(&usg->pr_dftime); 3747 scalehrtime(&usg->pr_kftime); 3748 scalehrtime(&usg->pr_ltime); 3749 scalehrtime(&usg->pr_slptime); 3750 scalehrtime(&usg->pr_wtime); 3751 scalehrtime(&usg->pr_stoptime); 3752 } 3753 3754 3755 /* 3756 * Sum resource usage information. 3757 */ 3758 void 3759 praddusage(kthread_t *t, prhusage_t *pup) 3760 { 3761 klwp_t *lwp = ttolwp(t); 3762 hrtime_t *mstimep; 3763 struct mstate *ms = &lwp->lwp_mstate; 3764 int state; 3765 int i; 3766 hrtime_t curtime; 3767 hrtime_t waitrq; 3768 hrtime_t tmp; 3769 prhusage_t conv; 3770 3771 curtime = gethrtime_unscaled(); 3772 3773 if (ms->ms_term == 0) { 3774 tmp = curtime - ms->ms_start; 3775 scalehrtime(&tmp); 3776 pup->pr_rtime += tmp; 3777 } else { 3778 tmp = ms->ms_term - ms->ms_start; 3779 scalehrtime(&tmp); 3780 pup->pr_rtime += tmp; 3781 } 3782 3783 conv.pr_utime = ms->ms_acct[LMS_USER]; 3784 conv.pr_stime = ms->ms_acct[LMS_SYSTEM]; 3785 conv.pr_ttime = ms->ms_acct[LMS_TRAP]; 3786 conv.pr_tftime = ms->ms_acct[LMS_TFAULT]; 3787 conv.pr_dftime = ms->ms_acct[LMS_DFAULT]; 3788 conv.pr_kftime = ms->ms_acct[LMS_KFAULT]; 3789 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 3790 conv.pr_slptime = ms->ms_acct[LMS_SLEEP]; 3791 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 3792 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED]; 3793 3794 prscaleusage(&conv); 3795 3796 pup->pr_utime += conv.pr_utime; 3797 pup->pr_stime += conv.pr_stime; 3798 pup->pr_ttime += conv.pr_ttime; 3799 pup->pr_tftime += conv.pr_tftime; 3800 pup->pr_dftime += conv.pr_dftime; 3801 pup->pr_kftime += conv.pr_kftime; 3802 pup->pr_ltime += conv.pr_ltime; 3803 pup->pr_slptime += conv.pr_slptime; 3804 pup->pr_wtime += conv.pr_wtime; 3805 pup->pr_stoptime += conv.pr_stoptime; 3806 3807 /* 3808 * Adjust for time waiting in the dispatcher queue. 3809 */ 3810 waitrq = t->t_waitrq; /* hopefully atomic */ 3811 if (waitrq != 0) { 3812 if (waitrq > curtime) { 3813 curtime = gethrtime_unscaled(); 3814 } 3815 tmp = curtime - waitrq; 3816 scalehrtime(&tmp); 3817 pup->pr_wtime += tmp; 3818 curtime = waitrq; 3819 } 3820 3821 /* 3822 * Adjust for time spent in current microstate. 3823 */ 3824 if (ms->ms_state_start > curtime) { 3825 curtime = gethrtime_unscaled(); 3826 } 3827 3828 i = 0; 3829 do { 3830 switch (state = t->t_mstate) { 3831 case LMS_SLEEP: 3832 /* 3833 * Update the timer for the current sleep state. 3834 */ 3835 switch (state = ms->ms_prev) { 3836 case LMS_TFAULT: 3837 case LMS_DFAULT: 3838 case LMS_KFAULT: 3839 case LMS_USER_LOCK: 3840 break; 3841 default: 3842 state = LMS_SLEEP; 3843 break; 3844 } 3845 break; 3846 case LMS_TFAULT: 3847 case LMS_DFAULT: 3848 case LMS_KFAULT: 3849 case LMS_USER_LOCK: 3850 state = LMS_SYSTEM; 3851 break; 3852 } 3853 switch (state) { 3854 case LMS_USER: mstimep = &pup->pr_utime; break; 3855 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 3856 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 3857 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 3858 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 3859 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 3860 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 3861 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 3862 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 3863 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 3864 default: panic("praddusage: unknown microstate"); 3865 } 3866 tmp = curtime - ms->ms_state_start; 3867 if (tmp < 0) { 3868 curtime = gethrtime_unscaled(); 3869 i++; 3870 continue; 3871 } 3872 scalehrtime(&tmp); 3873 } while (tmp < 0 && i < MAX_ITERS_SPIN); 3874 3875 *mstimep += tmp; 3876 3877 /* update pup timestamp */ 3878 pup->pr_tstamp = curtime; 3879 scalehrtime(&pup->pr_tstamp); 3880 3881 /* 3882 * Resource usage counters. 3883 */ 3884 pup->pr_minf += lwp->lwp_ru.minflt; 3885 pup->pr_majf += lwp->lwp_ru.majflt; 3886 pup->pr_nswap += lwp->lwp_ru.nswap; 3887 pup->pr_inblk += lwp->lwp_ru.inblock; 3888 pup->pr_oublk += lwp->lwp_ru.oublock; 3889 pup->pr_msnd += lwp->lwp_ru.msgsnd; 3890 pup->pr_mrcv += lwp->lwp_ru.msgrcv; 3891 pup->pr_sigs += lwp->lwp_ru.nsignals; 3892 pup->pr_vctx += lwp->lwp_ru.nvcsw; 3893 pup->pr_ictx += lwp->lwp_ru.nivcsw; 3894 pup->pr_sysc += lwp->lwp_ru.sysc; 3895 pup->pr_ioch += lwp->lwp_ru.ioch; 3896 } 3897 3898 /* 3899 * Convert a prhusage_t to a prusage_t. 3900 * This means convert each hrtime_t to a timestruc_t 3901 * and copy the count fields uint64_t => ulong_t. 3902 */ 3903 void 3904 prcvtusage(prhusage_t *pup, prusage_t *upup) 3905 { 3906 uint64_t *ullp; 3907 ulong_t *ulp; 3908 int i; 3909 3910 upup->pr_lwpid = pup->pr_lwpid; 3911 upup->pr_count = pup->pr_count; 3912 3913 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp); 3914 hrt2ts(pup->pr_create, &upup->pr_create); 3915 hrt2ts(pup->pr_term, &upup->pr_term); 3916 hrt2ts(pup->pr_rtime, &upup->pr_rtime); 3917 hrt2ts(pup->pr_utime, &upup->pr_utime); 3918 hrt2ts(pup->pr_stime, &upup->pr_stime); 3919 hrt2ts(pup->pr_ttime, &upup->pr_ttime); 3920 hrt2ts(pup->pr_tftime, &upup->pr_tftime); 3921 hrt2ts(pup->pr_dftime, &upup->pr_dftime); 3922 hrt2ts(pup->pr_kftime, &upup->pr_kftime); 3923 hrt2ts(pup->pr_ltime, &upup->pr_ltime); 3924 hrt2ts(pup->pr_slptime, &upup->pr_slptime); 3925 hrt2ts(pup->pr_wtime, &upup->pr_wtime); 3926 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime); 3927 bzero(upup->filltime, sizeof (upup->filltime)); 3928 3929 ullp = &pup->pr_minf; 3930 ulp = &upup->pr_minf; 3931 for (i = 0; i < 22; i++) 3932 *ulp++ = (ulong_t)*ullp++; 3933 } 3934 3935 #ifdef _SYSCALL32_IMPL 3936 void 3937 prcvtusage32(prhusage_t *pup, prusage32_t *upup) 3938 { 3939 uint64_t *ullp; 3940 uint32_t *ulp; 3941 int i; 3942 3943 upup->pr_lwpid = pup->pr_lwpid; 3944 upup->pr_count = pup->pr_count; 3945 3946 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp); 3947 hrt2ts32(pup->pr_create, &upup->pr_create); 3948 hrt2ts32(pup->pr_term, &upup->pr_term); 3949 hrt2ts32(pup->pr_rtime, &upup->pr_rtime); 3950 hrt2ts32(pup->pr_utime, &upup->pr_utime); 3951 hrt2ts32(pup->pr_stime, &upup->pr_stime); 3952 hrt2ts32(pup->pr_ttime, &upup->pr_ttime); 3953 hrt2ts32(pup->pr_tftime, &upup->pr_tftime); 3954 hrt2ts32(pup->pr_dftime, &upup->pr_dftime); 3955 hrt2ts32(pup->pr_kftime, &upup->pr_kftime); 3956 hrt2ts32(pup->pr_ltime, &upup->pr_ltime); 3957 hrt2ts32(pup->pr_slptime, &upup->pr_slptime); 3958 hrt2ts32(pup->pr_wtime, &upup->pr_wtime); 3959 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime); 3960 bzero(upup->filltime, sizeof (upup->filltime)); 3961 3962 ullp = &pup->pr_minf; 3963 ulp = &upup->pr_minf; 3964 for (i = 0; i < 22; i++) 3965 *ulp++ = (uint32_t)*ullp++; 3966 } 3967 #endif /* _SYSCALL32_IMPL */ 3968 3969 /* 3970 * Determine whether a set is empty. 3971 */ 3972 int 3973 setisempty(uint32_t *sp, uint_t n) 3974 { 3975 while (n--) 3976 if (*sp++) 3977 return (0); 3978 return (1); 3979 } 3980 3981 /* 3982 * Utility routine for establishing a watched area in the process. 3983 * Keep the list of watched areas sorted by virtual address. 3984 */ 3985 int 3986 set_watched_area(proc_t *p, struct watched_area *pwa) 3987 { 3988 caddr_t vaddr = pwa->wa_vaddr; 3989 caddr_t eaddr = pwa->wa_eaddr; 3990 ulong_t flags = pwa->wa_flags; 3991 struct watched_area *target; 3992 avl_index_t where; 3993 int error = 0; 3994 3995 /* we must not be holding p->p_lock, but the process must be locked */ 3996 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3997 ASSERT(p->p_proc_flag & P_PR_LOCK); 3998 3999 /* 4000 * If this is our first watchpoint, enable watchpoints for the process. 4001 */ 4002 if (!pr_watch_active(p)) { 4003 kthread_t *t; 4004 4005 mutex_enter(&p->p_lock); 4006 if ((t = p->p_tlist) != NULL) { 4007 do { 4008 watch_enable(t); 4009 } while ((t = t->t_forw) != p->p_tlist); 4010 } 4011 mutex_exit(&p->p_lock); 4012 } 4013 4014 target = pr_find_watched_area(p, pwa, &where); 4015 if (target != NULL) { 4016 /* 4017 * We discovered an existing, overlapping watched area. 4018 * Allow it only if it is an exact match. 4019 */ 4020 if (target->wa_vaddr != vaddr || 4021 target->wa_eaddr != eaddr) 4022 error = EINVAL; 4023 else if (target->wa_flags != flags) { 4024 error = set_watched_page(p, vaddr, eaddr, 4025 flags, target->wa_flags); 4026 target->wa_flags = flags; 4027 } 4028 kmem_free(pwa, sizeof (struct watched_area)); 4029 } else { 4030 avl_insert(&p->p_warea, pwa, where); 4031 error = set_watched_page(p, vaddr, eaddr, flags, 0); 4032 } 4033 4034 return (error); 4035 } 4036 4037 /* 4038 * Utility routine for clearing a watched area in the process. 4039 * Must be an exact match of the virtual address. 4040 * size and flags don't matter. 4041 */ 4042 int 4043 clear_watched_area(proc_t *p, struct watched_area *pwa) 4044 { 4045 struct watched_area *found; 4046 4047 /* we must not be holding p->p_lock, but the process must be locked */ 4048 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 4049 ASSERT(p->p_proc_flag & P_PR_LOCK); 4050 4051 4052 if (!pr_watch_active(p)) { 4053 kmem_free(pwa, sizeof (struct watched_area)); 4054 return (0); 4055 } 4056 4057 /* 4058 * Look for a matching address in the watched areas. If a match is 4059 * found, clear the old watched area and adjust the watched page(s). It 4060 * is not an error if there is no match. 4061 */ 4062 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL && 4063 found->wa_vaddr == pwa->wa_vaddr) { 4064 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr, 4065 found->wa_flags); 4066 avl_remove(&p->p_warea, found); 4067 kmem_free(found, sizeof (struct watched_area)); 4068 } 4069 4070 kmem_free(pwa, sizeof (struct watched_area)); 4071 4072 /* 4073 * If we removed the last watched area from the process, disable 4074 * watchpoints. 4075 */ 4076 if (!pr_watch_active(p)) { 4077 kthread_t *t; 4078 4079 mutex_enter(&p->p_lock); 4080 if ((t = p->p_tlist) != NULL) { 4081 do { 4082 watch_disable(t); 4083 } while ((t = t->t_forw) != p->p_tlist); 4084 } 4085 mutex_exit(&p->p_lock); 4086 } 4087 4088 return (0); 4089 } 4090 4091 /* 4092 * Frees all the watched_area structures 4093 */ 4094 void 4095 pr_free_watchpoints(proc_t *p) 4096 { 4097 struct watched_area *delp; 4098 void *cookie; 4099 4100 cookie = NULL; 4101 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL) 4102 kmem_free(delp, sizeof (struct watched_area)); 4103 4104 avl_destroy(&p->p_warea); 4105 } 4106 4107 /* 4108 * This one is called by the traced process to unwatch all the 4109 * pages while deallocating the list of watched_page structs. 4110 */ 4111 void 4112 pr_free_watched_pages(proc_t *p) 4113 { 4114 struct as *as = p->p_as; 4115 struct watched_page *pwp; 4116 uint_t prot; 4117 int retrycnt, err; 4118 void *cookie; 4119 4120 if (as == NULL || avl_numnodes(&as->a_wpage) == 0) 4121 return; 4122 4123 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 4124 AS_LOCK_ENTER(as, RW_WRITER); 4125 4126 pwp = avl_first(&as->a_wpage); 4127 4128 cookie = NULL; 4129 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) { 4130 retrycnt = 0; 4131 if ((prot = pwp->wp_oprot) != 0) { 4132 caddr_t addr = pwp->wp_vaddr; 4133 struct seg *seg; 4134 retry: 4135 4136 if ((pwp->wp_prot != prot || 4137 (pwp->wp_flags & WP_NOWATCH)) && 4138 (seg = as_segat(as, addr)) != NULL) { 4139 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); 4140 if (err == IE_RETRY) { 4141 ASSERT(retrycnt == 0); 4142 retrycnt++; 4143 goto retry; 4144 } 4145 } 4146 } 4147 kmem_free(pwp, sizeof (struct watched_page)); 4148 } 4149 4150 avl_destroy(&as->a_wpage); 4151 p->p_wprot = NULL; 4152 4153 AS_LOCK_EXIT(as); 4154 } 4155 4156 /* 4157 * Insert a watched area into the list of watched pages. 4158 * If oflags is zero then we are adding a new watched area. 4159 * Otherwise we are changing the flags of an existing watched area. 4160 */ 4161 static int 4162 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, 4163 ulong_t flags, ulong_t oflags) 4164 { 4165 struct as *as = p->p_as; 4166 avl_tree_t *pwp_tree; 4167 struct watched_page *pwp, *newpwp; 4168 struct watched_page tpw; 4169 avl_index_t where; 4170 struct seg *seg; 4171 uint_t prot; 4172 caddr_t addr; 4173 4174 /* 4175 * We need to pre-allocate a list of structures before we grab the 4176 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks 4177 * held. 4178 */ 4179 newpwp = NULL; 4180 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4181 addr < eaddr; addr += PAGESIZE) { 4182 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP); 4183 pwp->wp_list = newpwp; 4184 newpwp = pwp; 4185 } 4186 4187 AS_LOCK_ENTER(as, RW_WRITER); 4188 4189 /* 4190 * Search for an existing watched page to contain the watched area. 4191 * If none is found, grab a new one from the available list 4192 * and insert it in the active list, keeping the list sorted 4193 * by user-level virtual address. 4194 */ 4195 if (p->p_flag & SVFWAIT) 4196 pwp_tree = &p->p_wpage; 4197 else 4198 pwp_tree = &as->a_wpage; 4199 4200 again: 4201 if (avl_numnodes(pwp_tree) > prnwatch) { 4202 AS_LOCK_EXIT(as); 4203 while (newpwp != NULL) { 4204 pwp = newpwp->wp_list; 4205 kmem_free(newpwp, sizeof (struct watched_page)); 4206 newpwp = pwp; 4207 } 4208 return (E2BIG); 4209 } 4210 4211 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4212 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) { 4213 pwp = newpwp; 4214 newpwp = newpwp->wp_list; 4215 pwp->wp_list = NULL; 4216 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr & 4217 (uintptr_t)PAGEMASK); 4218 avl_insert(pwp_tree, pwp, where); 4219 } 4220 4221 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE); 4222 4223 if (oflags & WA_READ) 4224 pwp->wp_read--; 4225 if (oflags & WA_WRITE) 4226 pwp->wp_write--; 4227 if (oflags & WA_EXEC) 4228 pwp->wp_exec--; 4229 4230 ASSERT(pwp->wp_read >= 0); 4231 ASSERT(pwp->wp_write >= 0); 4232 ASSERT(pwp->wp_exec >= 0); 4233 4234 if (flags & WA_READ) 4235 pwp->wp_read++; 4236 if (flags & WA_WRITE) 4237 pwp->wp_write++; 4238 if (flags & WA_EXEC) 4239 pwp->wp_exec++; 4240 4241 if (!(p->p_flag & SVFWAIT)) { 4242 vaddr = pwp->wp_vaddr; 4243 if (pwp->wp_oprot == 0 && 4244 (seg = as_segat(as, vaddr)) != NULL) { 4245 SEGOP_GETPROT(seg, vaddr, 0, &prot); 4246 pwp->wp_oprot = (uchar_t)prot; 4247 pwp->wp_prot = (uchar_t)prot; 4248 } 4249 if (pwp->wp_oprot != 0) { 4250 prot = pwp->wp_oprot; 4251 if (pwp->wp_read) 4252 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4253 if (pwp->wp_write) 4254 prot &= ~PROT_WRITE; 4255 if (pwp->wp_exec) 4256 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4257 if (!(pwp->wp_flags & WP_NOWATCH) && 4258 pwp->wp_prot != prot && 4259 (pwp->wp_flags & WP_SETPROT) == 0) { 4260 pwp->wp_flags |= WP_SETPROT; 4261 pwp->wp_list = p->p_wprot; 4262 p->p_wprot = pwp; 4263 } 4264 pwp->wp_prot = (uchar_t)prot; 4265 } 4266 } 4267 4268 /* 4269 * If the watched area extends into the next page then do 4270 * it over again with the virtual address of the next page. 4271 */ 4272 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr) 4273 goto again; 4274 4275 AS_LOCK_EXIT(as); 4276 4277 /* 4278 * Free any pages we may have over-allocated 4279 */ 4280 while (newpwp != NULL) { 4281 pwp = newpwp->wp_list; 4282 kmem_free(newpwp, sizeof (struct watched_page)); 4283 newpwp = pwp; 4284 } 4285 4286 return (0); 4287 } 4288 4289 /* 4290 * Remove a watched area from the list of watched pages. 4291 * A watched area may extend over more than one page. 4292 */ 4293 static void 4294 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags) 4295 { 4296 struct as *as = p->p_as; 4297 struct watched_page *pwp; 4298 struct watched_page tpw; 4299 avl_tree_t *tree; 4300 avl_index_t where; 4301 4302 AS_LOCK_ENTER(as, RW_WRITER); 4303 4304 if (p->p_flag & SVFWAIT) 4305 tree = &p->p_wpage; 4306 else 4307 tree = &as->a_wpage; 4308 4309 tpw.wp_vaddr = vaddr = 4310 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4311 pwp = avl_find(tree, &tpw, &where); 4312 if (pwp == NULL) 4313 pwp = avl_nearest(tree, where, AVL_AFTER); 4314 4315 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 4316 ASSERT(vaddr <= pwp->wp_vaddr); 4317 4318 if (flags & WA_READ) 4319 pwp->wp_read--; 4320 if (flags & WA_WRITE) 4321 pwp->wp_write--; 4322 if (flags & WA_EXEC) 4323 pwp->wp_exec--; 4324 4325 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) { 4326 /* 4327 * Reset the hat layer's protections on this page. 4328 */ 4329 if (pwp->wp_oprot != 0) { 4330 uint_t prot = pwp->wp_oprot; 4331 4332 if (pwp->wp_read) 4333 prot &= 4334 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4335 if (pwp->wp_write) 4336 prot &= ~PROT_WRITE; 4337 if (pwp->wp_exec) 4338 prot &= 4339 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4340 if (!(pwp->wp_flags & WP_NOWATCH) && 4341 pwp->wp_prot != prot && 4342 (pwp->wp_flags & WP_SETPROT) == 0) { 4343 pwp->wp_flags |= WP_SETPROT; 4344 pwp->wp_list = p->p_wprot; 4345 p->p_wprot = pwp; 4346 } 4347 pwp->wp_prot = (uchar_t)prot; 4348 } 4349 } else { 4350 /* 4351 * No watched areas remain in this page. 4352 * Reset everything to normal. 4353 */ 4354 if (pwp->wp_oprot != 0) { 4355 pwp->wp_prot = pwp->wp_oprot; 4356 if ((pwp->wp_flags & WP_SETPROT) == 0) { 4357 pwp->wp_flags |= WP_SETPROT; 4358 pwp->wp_list = p->p_wprot; 4359 p->p_wprot = pwp; 4360 } 4361 } 4362 } 4363 4364 pwp = AVL_NEXT(tree, pwp); 4365 } 4366 4367 AS_LOCK_EXIT(as); 4368 } 4369 4370 /* 4371 * Return the original protections for the specified page. 4372 */ 4373 static void 4374 getwatchprot(struct as *as, caddr_t addr, uint_t *prot) 4375 { 4376 struct watched_page *pwp; 4377 struct watched_page tpw; 4378 4379 ASSERT(AS_LOCK_HELD(as)); 4380 4381 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 4382 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL) 4383 *prot = pwp->wp_oprot; 4384 } 4385 4386 static prpagev_t * 4387 pr_pagev_create(struct seg *seg, int check_noreserve) 4388 { 4389 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP); 4390 size_t total_pages = seg_pages(seg); 4391 4392 /* 4393 * Limit the size of our vectors to pagev_lim pages at a time. We need 4394 * 4 or 5 bytes of storage per page, so this means we limit ourself 4395 * to about a megabyte of kernel heap by default. 4396 */ 4397 pagev->pg_npages = MIN(total_pages, pagev_lim); 4398 pagev->pg_pnbase = 0; 4399 4400 pagev->pg_protv = 4401 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP); 4402 4403 if (check_noreserve) 4404 pagev->pg_incore = 4405 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP); 4406 else 4407 pagev->pg_incore = NULL; 4408 4409 return (pagev); 4410 } 4411 4412 static void 4413 pr_pagev_destroy(prpagev_t *pagev) 4414 { 4415 if (pagev->pg_incore != NULL) 4416 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char)); 4417 4418 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t)); 4419 kmem_free(pagev, sizeof (prpagev_t)); 4420 } 4421 4422 static caddr_t 4423 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr) 4424 { 4425 ulong_t lastpg = seg_page(seg, eaddr - 1); 4426 ulong_t pn, pnlim; 4427 caddr_t saddr; 4428 size_t len; 4429 4430 ASSERT(addr >= seg->s_base && addr <= eaddr); 4431 4432 if (addr == eaddr) 4433 return (eaddr); 4434 4435 refill: 4436 ASSERT(addr < eaddr); 4437 pagev->pg_pnbase = seg_page(seg, addr); 4438 pnlim = pagev->pg_pnbase + pagev->pg_npages; 4439 saddr = addr; 4440 4441 if (lastpg < pnlim) 4442 len = (size_t)(eaddr - addr); 4443 else 4444 len = pagev->pg_npages * PAGESIZE; 4445 4446 if (pagev->pg_incore != NULL) { 4447 /* 4448 * INCORE cleverly has different semantics than GETPROT: 4449 * it returns info on pages up to but NOT including addr + len. 4450 */ 4451 SEGOP_INCORE(seg, addr, len, pagev->pg_incore); 4452 pn = pagev->pg_pnbase; 4453 4454 do { 4455 /* 4456 * Guilty knowledge here: We know that segvn_incore 4457 * returns more than just the low-order bit that 4458 * indicates the page is actually in memory. If any 4459 * bits are set, then the page has backing store. 4460 */ 4461 if (pagev->pg_incore[pn++ - pagev->pg_pnbase]) 4462 goto out; 4463 4464 } while ((addr += PAGESIZE) < eaddr && pn < pnlim); 4465 4466 /* 4467 * If we examined all the pages in the vector but we're not 4468 * at the end of the segment, take another lap. 4469 */ 4470 if (addr < eaddr) 4471 goto refill; 4472 } 4473 4474 /* 4475 * Need to take len - 1 because addr + len is the address of the 4476 * first byte of the page just past the end of what we want. 4477 */ 4478 out: 4479 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv); 4480 return (addr); 4481 } 4482 4483 static caddr_t 4484 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg, 4485 caddr_t *saddrp, caddr_t eaddr, uint_t *protp) 4486 { 4487 /* 4488 * Our starting address is either the specified address, or the base 4489 * address from the start of the pagev. If the latter is greater, 4490 * this means a previous call to pr_pagev_fill has already scanned 4491 * further than the end of the previous mapping. 4492 */ 4493 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE; 4494 caddr_t addr = MAX(*saddrp, base); 4495 ulong_t pn = seg_page(seg, addr); 4496 uint_t prot, nprot; 4497 4498 /* 4499 * If we're dealing with noreserve pages, then advance addr to 4500 * the address of the next page which has backing store. 4501 */ 4502 if (pagev->pg_incore != NULL) { 4503 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) { 4504 if ((addr += PAGESIZE) == eaddr) { 4505 *saddrp = addr; 4506 prot = 0; 4507 goto out; 4508 } 4509 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 4510 addr = pr_pagev_fill(pagev, seg, addr, eaddr); 4511 if (addr == eaddr) { 4512 *saddrp = addr; 4513 prot = 0; 4514 goto out; 4515 } 4516 pn = seg_page(seg, addr); 4517 } 4518 } 4519 } 4520 4521 /* 4522 * Get the protections on the page corresponding to addr. 4523 */ 4524 pn = seg_page(seg, addr); 4525 ASSERT(pn >= pagev->pg_pnbase); 4526 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages)); 4527 4528 prot = pagev->pg_protv[pn - pagev->pg_pnbase]; 4529 getwatchprot(seg->s_as, addr, &prot); 4530 *saddrp = addr; 4531 4532 /* 4533 * Now loop until we find a backed page with different protections 4534 * or we reach the end of this segment. 4535 */ 4536 while ((addr += PAGESIZE) < eaddr) { 4537 /* 4538 * If pn has advanced to the page number following what we 4539 * have information on, refill the page vector and reset 4540 * addr and pn. If pr_pagev_fill does not return the 4541 * address of the next page, we have a discontiguity and 4542 * thus have reached the end of the current mapping. 4543 */ 4544 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 4545 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr); 4546 if (naddr != addr) 4547 goto out; 4548 pn = seg_page(seg, addr); 4549 } 4550 4551 /* 4552 * The previous page's protections are in prot, and it has 4553 * backing. If this page is MAP_NORESERVE and has no backing, 4554 * then end this mapping and return the previous protections. 4555 */ 4556 if (pagev->pg_incore != NULL && 4557 pagev->pg_incore[pn - pagev->pg_pnbase] == 0) 4558 break; 4559 4560 /* 4561 * Otherwise end the mapping if this page's protections (nprot) 4562 * are different than those in the previous page (prot). 4563 */ 4564 nprot = pagev->pg_protv[pn - pagev->pg_pnbase]; 4565 getwatchprot(seg->s_as, addr, &nprot); 4566 4567 if (nprot != prot) 4568 break; 4569 } 4570 4571 out: 4572 *protp = prot; 4573 return (addr); 4574 } 4575 4576 size_t 4577 pr_getsegsize(struct seg *seg, int reserved) 4578 { 4579 size_t size = seg->s_size; 4580 4581 /* 4582 * If we're interested in the reserved space, return the size of the 4583 * segment itself. Everything else in this function is a special case 4584 * to determine the actual underlying size of various segment types. 4585 */ 4586 if (reserved) 4587 return (size); 4588 4589 /* 4590 * If this is a segvn mapping of a regular file, return the smaller 4591 * of the segment size and the remaining size of the file beyond 4592 * the file offset corresponding to seg->s_base. 4593 */ 4594 if (seg->s_ops == &segvn_ops) { 4595 vattr_t vattr; 4596 vnode_t *vp; 4597 4598 vattr.va_mask = AT_SIZE; 4599 4600 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 4601 vp != NULL && vp->v_type == VREG && 4602 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 4603 4604 u_offset_t fsize = vattr.va_size; 4605 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base); 4606 4607 if (fsize < offset) 4608 fsize = 0; 4609 else 4610 fsize -= offset; 4611 4612 fsize = roundup(fsize, (u_offset_t)PAGESIZE); 4613 4614 if (fsize < (u_offset_t)size) 4615 size = (size_t)fsize; 4616 } 4617 4618 return (size); 4619 } 4620 4621 /* 4622 * If this is an ISM shared segment, don't include pages that are 4623 * beyond the real size of the spt segment that backs it. 4624 */ 4625 if (seg->s_ops == &segspt_shmops) 4626 return (MIN(spt_realsize(seg), size)); 4627 4628 /* 4629 * If this is segment is a mapping from /dev/null, then this is a 4630 * reservation of virtual address space and has no actual size. 4631 * Such segments are backed by segdev and have type set to neither 4632 * MAP_SHARED nor MAP_PRIVATE. 4633 */ 4634 if (seg->s_ops == &segdev_ops && 4635 ((SEGOP_GETTYPE(seg, seg->s_base) & 4636 (MAP_SHARED | MAP_PRIVATE)) == 0)) 4637 return (0); 4638 4639 /* 4640 * If this segment doesn't match one of the special types we handle, 4641 * just return the size of the segment itself. 4642 */ 4643 return (size); 4644 } 4645 4646 uint_t 4647 pr_getprot(struct seg *seg, int reserved, void **tmp, 4648 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr) 4649 { 4650 struct as *as = seg->s_as; 4651 4652 caddr_t saddr = *saddrp; 4653 caddr_t naddr; 4654 4655 int check_noreserve; 4656 uint_t prot; 4657 4658 union { 4659 struct segvn_data *svd; 4660 struct segdev_data *sdp; 4661 void *data; 4662 } s; 4663 4664 s.data = seg->s_data; 4665 4666 ASSERT(AS_WRITE_HELD(as)); 4667 ASSERT(saddr >= seg->s_base && saddr < eaddr); 4668 ASSERT(eaddr <= seg->s_base + seg->s_size); 4669 4670 /* 4671 * Don't include MAP_NORESERVE pages in the address range 4672 * unless their mappings have actually materialized. 4673 * We cheat by knowing that segvn is the only segment 4674 * driver that supports MAP_NORESERVE. 4675 */ 4676 check_noreserve = 4677 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL && 4678 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) && 4679 (s.svd->flags & MAP_NORESERVE)); 4680 4681 /* 4682 * Examine every page only as a last resort. We use guilty knowledge 4683 * of segvn and segdev to avoid this: if there are no per-page 4684 * protections present in the segment and we don't care about 4685 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment. 4686 */ 4687 if (!check_noreserve && saddr == seg->s_base && 4688 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) { 4689 prot = s.svd->prot; 4690 getwatchprot(as, saddr, &prot); 4691 naddr = eaddr; 4692 4693 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops && 4694 s.sdp != NULL && s.sdp->pageprot == 0) { 4695 prot = s.sdp->prot; 4696 getwatchprot(as, saddr, &prot); 4697 naddr = eaddr; 4698 4699 } else { 4700 prpagev_t *pagev; 4701 4702 /* 4703 * If addr is sitting at the start of the segment, then 4704 * create a page vector to store protection and incore 4705 * information for pages in the segment, and fill it. 4706 * Otherwise, we expect *tmp to address the prpagev_t 4707 * allocated by a previous call to this function. 4708 */ 4709 if (saddr == seg->s_base) { 4710 pagev = pr_pagev_create(seg, check_noreserve); 4711 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr); 4712 4713 ASSERT(*tmp == NULL); 4714 *tmp = pagev; 4715 4716 ASSERT(saddr <= eaddr); 4717 *saddrp = saddr; 4718 4719 if (saddr == eaddr) { 4720 naddr = saddr; 4721 prot = 0; 4722 goto out; 4723 } 4724 4725 } else { 4726 ASSERT(*tmp != NULL); 4727 pagev = (prpagev_t *)*tmp; 4728 } 4729 4730 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot); 4731 ASSERT(naddr <= eaddr); 4732 } 4733 4734 out: 4735 if (naddr == eaddr) 4736 pr_getprot_done(tmp); 4737 *naddrp = naddr; 4738 return (prot); 4739 } 4740 4741 void 4742 pr_getprot_done(void **tmp) 4743 { 4744 if (*tmp != NULL) { 4745 pr_pagev_destroy((prpagev_t *)*tmp); 4746 *tmp = NULL; 4747 } 4748 } 4749 4750 /* 4751 * Return true iff the vnode is a /proc file from the object directory. 4752 */ 4753 int 4754 pr_isobject(vnode_t *vp) 4755 { 4756 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT); 4757 } 4758 4759 /* 4760 * Return true iff the vnode is a /proc file opened by the process itself. 4761 */ 4762 int 4763 pr_isself(vnode_t *vp) 4764 { 4765 /* 4766 * XXX: To retain binary compatibility with the old 4767 * ioctl()-based version of /proc, we exempt self-opens 4768 * of /proc/<pid> from being marked close-on-exec. 4769 */ 4770 return (vn_matchops(vp, prvnodeops) && 4771 (VTOP(vp)->pr_flags & PR_ISSELF) && 4772 VTOP(vp)->pr_type != PR_PIDDIR); 4773 } 4774 4775 static ssize_t 4776 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr) 4777 { 4778 ssize_t pagesize, hatsize; 4779 4780 ASSERT(AS_WRITE_HELD(seg->s_as)); 4781 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE)); 4782 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE)); 4783 ASSERT(saddr < eaddr); 4784 4785 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr); 4786 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize)); 4787 ASSERT(pagesize != 0); 4788 4789 if (pagesize == -1) 4790 pagesize = PAGESIZE; 4791 4792 saddr += P2NPHASE((uintptr_t)saddr, pagesize); 4793 4794 while (saddr < eaddr) { 4795 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr)) 4796 break; 4797 ASSERT(IS_P2ALIGNED(saddr, pagesize)); 4798 saddr += pagesize; 4799 } 4800 4801 *naddrp = ((saddr < eaddr) ? saddr : eaddr); 4802 return (hatsize); 4803 } 4804 4805 /* 4806 * Return an array of structures with extended memory map information. 4807 * We allocate here; the caller must deallocate. 4808 */ 4809 int 4810 prgetxmap(proc_t *p, list_t *iolhead) 4811 { 4812 struct as *as = p->p_as; 4813 prxmap_t *mp; 4814 struct seg *seg; 4815 struct seg *brkseg, *stkseg; 4816 struct vnode *vp; 4817 struct vattr vattr; 4818 uint_t prot; 4819 4820 ASSERT(as != &kas && AS_WRITE_HELD(as)); 4821 4822 /* 4823 * Request an initial buffer size that doesn't waste memory 4824 * if the address space has only a small number of segments. 4825 */ 4826 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 4827 4828 if ((seg = AS_SEGFIRST(as)) == NULL) 4829 return (0); 4830 4831 brkseg = break_seg(p); 4832 stkseg = as_segat(as, prgetstackbase(p)); 4833 4834 do { 4835 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 4836 caddr_t saddr, naddr, baddr; 4837 void *tmp = NULL; 4838 ssize_t psz; 4839 char *parr; 4840 uint64_t npages; 4841 uint64_t pagenum; 4842 4843 if ((seg->s_flags & S_HOLE) != 0) { 4844 continue; 4845 } 4846 /* 4847 * Segment loop part one: iterate from the base of the segment 4848 * to its end, pausing at each address boundary (baddr) between 4849 * ranges that have different virtual memory protections. 4850 */ 4851 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 4852 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 4853 ASSERT(baddr >= saddr && baddr <= eaddr); 4854 4855 /* 4856 * Segment loop part two: iterate from the current 4857 * position to the end of the protection boundary, 4858 * pausing at each address boundary (naddr) between 4859 * ranges that have different underlying page sizes. 4860 */ 4861 for (; saddr < baddr; saddr = naddr) { 4862 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 4863 ASSERT(naddr >= saddr && naddr <= baddr); 4864 4865 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 4866 4867 mp->pr_vaddr = (uintptr_t)saddr; 4868 mp->pr_size = naddr - saddr; 4869 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 4870 mp->pr_mflags = 0; 4871 if (prot & PROT_READ) 4872 mp->pr_mflags |= MA_READ; 4873 if (prot & PROT_WRITE) 4874 mp->pr_mflags |= MA_WRITE; 4875 if (prot & PROT_EXEC) 4876 mp->pr_mflags |= MA_EXEC; 4877 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 4878 mp->pr_mflags |= MA_SHARED; 4879 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 4880 mp->pr_mflags |= MA_NORESERVE; 4881 if (seg->s_ops == &segspt_shmops || 4882 (seg->s_ops == &segvn_ops && 4883 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 4884 vp == NULL))) 4885 mp->pr_mflags |= MA_ANON; 4886 if (seg == brkseg) 4887 mp->pr_mflags |= MA_BREAK; 4888 else if (seg == stkseg) 4889 mp->pr_mflags |= MA_STACK; 4890 if (seg->s_ops == &segspt_shmops) 4891 mp->pr_mflags |= MA_ISM | MA_SHM; 4892 4893 mp->pr_pagesize = PAGESIZE; 4894 if (psz == -1) { 4895 mp->pr_hatpagesize = 0; 4896 } else { 4897 mp->pr_hatpagesize = psz; 4898 } 4899 4900 /* 4901 * Manufacture a filename for the "object" dir. 4902 */ 4903 mp->pr_dev = PRNODEV; 4904 vattr.va_mask = AT_FSID|AT_NODEID; 4905 if (seg->s_ops == &segvn_ops && 4906 SEGOP_GETVP(seg, saddr, &vp) == 0 && 4907 vp != NULL && vp->v_type == VREG && 4908 VOP_GETATTR(vp, &vattr, 0, CRED(), 4909 NULL) == 0) { 4910 mp->pr_dev = vattr.va_fsid; 4911 mp->pr_ino = vattr.va_nodeid; 4912 if (vp == p->p_exec) 4913 (void) strcpy(mp->pr_mapname, 4914 "a.out"); 4915 else 4916 pr_object_name(mp->pr_mapname, 4917 vp, &vattr); 4918 } 4919 4920 /* 4921 * Get the SysV shared memory id, if any. 4922 */ 4923 if ((mp->pr_mflags & MA_SHARED) && 4924 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4925 seg->s_base)) != SHMID_NONE) { 4926 if (mp->pr_shmid == SHMID_FREE) 4927 mp->pr_shmid = -1; 4928 4929 mp->pr_mflags |= MA_SHM; 4930 } else { 4931 mp->pr_shmid = -1; 4932 } 4933 4934 npages = ((uintptr_t)(naddr - saddr)) >> 4935 PAGESHIFT; 4936 parr = kmem_zalloc(npages, KM_SLEEP); 4937 4938 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4939 4940 for (pagenum = 0; pagenum < npages; pagenum++) { 4941 if (parr[pagenum] & SEG_PAGE_INCORE) 4942 mp->pr_rss++; 4943 if (parr[pagenum] & SEG_PAGE_ANON) 4944 mp->pr_anon++; 4945 if (parr[pagenum] & SEG_PAGE_LOCKED) 4946 mp->pr_locked++; 4947 } 4948 kmem_free(parr, npages); 4949 } 4950 } 4951 ASSERT(tmp == NULL); 4952 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4953 4954 return (0); 4955 } 4956 4957 /* 4958 * Return the process's credentials. We don't need a 32-bit equivalent of 4959 * this function because prcred_t and prcred32_t are actually the same. 4960 */ 4961 void 4962 prgetcred(proc_t *p, prcred_t *pcrp) 4963 { 4964 mutex_enter(&p->p_crlock); 4965 cred2prcred(p->p_cred, pcrp); 4966 mutex_exit(&p->p_crlock); 4967 } 4968 4969 void 4970 prgetsecflags(proc_t *p, prsecflags_t *psfp) 4971 { 4972 ASSERT(psfp != NULL); 4973 4974 bzero(psfp, sizeof (*psfp)); 4975 psfp->pr_version = PRSECFLAGS_VERSION_CURRENT; 4976 psfp->pr_lower = p->p_secflags.psf_lower; 4977 psfp->pr_upper = p->p_secflags.psf_upper; 4978 psfp->pr_effective = p->p_secflags.psf_effective; 4979 psfp->pr_inherit = p->p_secflags.psf_inherit; 4980 } 4981 4982 /* 4983 * Compute actual size of the prpriv_t structure. 4984 */ 4985 4986 size_t 4987 prgetprivsize(void) 4988 { 4989 return (priv_prgetprivsize(NULL)); 4990 } 4991 4992 /* 4993 * Return the process's privileges. We don't need a 32-bit equivalent of 4994 * this function because prpriv_t and prpriv32_t are actually the same. 4995 */ 4996 void 4997 prgetpriv(proc_t *p, prpriv_t *pprp) 4998 { 4999 mutex_enter(&p->p_crlock); 5000 cred2prpriv(p->p_cred, pprp); 5001 mutex_exit(&p->p_crlock); 5002 } 5003 5004 #ifdef _SYSCALL32_IMPL 5005 /* 5006 * Return an array of structures with HAT memory map information. 5007 * We allocate here; the caller must deallocate. 5008 */ 5009 int 5010 prgetxmap32(proc_t *p, list_t *iolhead) 5011 { 5012 struct as *as = p->p_as; 5013 prxmap32_t *mp; 5014 struct seg *seg; 5015 struct seg *brkseg, *stkseg; 5016 struct vnode *vp; 5017 struct vattr vattr; 5018 uint_t prot; 5019 5020 ASSERT(as != &kas && AS_WRITE_HELD(as)); 5021 5022 /* 5023 * Request an initial buffer size that doesn't waste memory 5024 * if the address space has only a small number of segments. 5025 */ 5026 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 5027 5028 if ((seg = AS_SEGFIRST(as)) == NULL) 5029 return (0); 5030 5031 brkseg = break_seg(p); 5032 stkseg = as_segat(as, prgetstackbase(p)); 5033 5034 do { 5035 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 5036 caddr_t saddr, naddr, baddr; 5037 void *tmp = NULL; 5038 ssize_t psz; 5039 char *parr; 5040 uint64_t npages; 5041 uint64_t pagenum; 5042 5043 if ((seg->s_flags & S_HOLE) != 0) { 5044 continue; 5045 } 5046 5047 /* 5048 * Segment loop part one: iterate from the base of the segment 5049 * to its end, pausing at each address boundary (baddr) between 5050 * ranges that have different virtual memory protections. 5051 */ 5052 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 5053 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 5054 ASSERT(baddr >= saddr && baddr <= eaddr); 5055 5056 /* 5057 * Segment loop part two: iterate from the current 5058 * position to the end of the protection boundary, 5059 * pausing at each address boundary (naddr) between 5060 * ranges that have different underlying page sizes. 5061 */ 5062 for (; saddr < baddr; saddr = naddr) { 5063 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 5064 ASSERT(naddr >= saddr && naddr <= baddr); 5065 5066 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 5067 5068 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 5069 mp->pr_size = (size32_t)(naddr - saddr); 5070 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 5071 mp->pr_mflags = 0; 5072 if (prot & PROT_READ) 5073 mp->pr_mflags |= MA_READ; 5074 if (prot & PROT_WRITE) 5075 mp->pr_mflags |= MA_WRITE; 5076 if (prot & PROT_EXEC) 5077 mp->pr_mflags |= MA_EXEC; 5078 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 5079 mp->pr_mflags |= MA_SHARED; 5080 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 5081 mp->pr_mflags |= MA_NORESERVE; 5082 if (seg->s_ops == &segspt_shmops || 5083 (seg->s_ops == &segvn_ops && 5084 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 5085 vp == NULL))) 5086 mp->pr_mflags |= MA_ANON; 5087 if (seg == brkseg) 5088 mp->pr_mflags |= MA_BREAK; 5089 else if (seg == stkseg) 5090 mp->pr_mflags |= MA_STACK; 5091 if (seg->s_ops == &segspt_shmops) 5092 mp->pr_mflags |= MA_ISM | MA_SHM; 5093 5094 mp->pr_pagesize = PAGESIZE; 5095 if (psz == -1) { 5096 mp->pr_hatpagesize = 0; 5097 } else { 5098 mp->pr_hatpagesize = psz; 5099 } 5100 5101 /* 5102 * Manufacture a filename for the "object" dir. 5103 */ 5104 mp->pr_dev = PRNODEV32; 5105 vattr.va_mask = AT_FSID|AT_NODEID; 5106 if (seg->s_ops == &segvn_ops && 5107 SEGOP_GETVP(seg, saddr, &vp) == 0 && 5108 vp != NULL && vp->v_type == VREG && 5109 VOP_GETATTR(vp, &vattr, 0, CRED(), 5110 NULL) == 0) { 5111 (void) cmpldev(&mp->pr_dev, 5112 vattr.va_fsid); 5113 mp->pr_ino = vattr.va_nodeid; 5114 if (vp == p->p_exec) 5115 (void) strcpy(mp->pr_mapname, 5116 "a.out"); 5117 else 5118 pr_object_name(mp->pr_mapname, 5119 vp, &vattr); 5120 } 5121 5122 /* 5123 * Get the SysV shared memory id, if any. 5124 */ 5125 if ((mp->pr_mflags & MA_SHARED) && 5126 p->p_segacct && (mp->pr_shmid = shmgetid(p, 5127 seg->s_base)) != SHMID_NONE) { 5128 if (mp->pr_shmid == SHMID_FREE) 5129 mp->pr_shmid = -1; 5130 5131 mp->pr_mflags |= MA_SHM; 5132 } else { 5133 mp->pr_shmid = -1; 5134 } 5135 5136 npages = ((uintptr_t)(naddr - saddr)) >> 5137 PAGESHIFT; 5138 parr = kmem_zalloc(npages, KM_SLEEP); 5139 5140 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 5141 5142 for (pagenum = 0; pagenum < npages; pagenum++) { 5143 if (parr[pagenum] & SEG_PAGE_INCORE) 5144 mp->pr_rss++; 5145 if (parr[pagenum] & SEG_PAGE_ANON) 5146 mp->pr_anon++; 5147 if (parr[pagenum] & SEG_PAGE_LOCKED) 5148 mp->pr_locked++; 5149 } 5150 kmem_free(parr, npages); 5151 } 5152 } 5153 ASSERT(tmp == NULL); 5154 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 5155 5156 return (0); 5157 } 5158 #endif /* _SYSCALL32_IMPL */ 5159