1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2019 Joyent, Inc. 25 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 26 * Copyright 2022 MNX Cloud, Inc. 27 * Copyright 2022 Oxide Computer Company 28 */ 29 30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 31 /* All Rights Reserved */ 32 33 #include <sys/types.h> 34 #include <sys/t_lock.h> 35 #include <sys/param.h> 36 #include <sys/cmn_err.h> 37 #include <sys/cred.h> 38 #include <sys/priv.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/inline.h> 42 #include <sys/kmem.h> 43 #include <sys/mman.h> 44 #include <sys/proc.h> 45 #include <sys/brand.h> 46 #include <sys/sobject.h> 47 #include <sys/sysmacros.h> 48 #include <sys/systm.h> 49 #include <sys/uio.h> 50 #include <sys/var.h> 51 #include <sys/vfs.h> 52 #include <sys/vnode.h> 53 #include <sys/session.h> 54 #include <sys/pcb.h> 55 #include <sys/signal.h> 56 #include <sys/user.h> 57 #include <sys/disp.h> 58 #include <sys/class.h> 59 #include <sys/ts.h> 60 #include <sys/bitmap.h> 61 #include <sys/poll.h> 62 #include <sys/shm_impl.h> 63 #include <sys/fault.h> 64 #include <sys/syscall.h> 65 #include <sys/procfs.h> 66 #include <sys/processor.h> 67 #include <sys/cpuvar.h> 68 #include <sys/copyops.h> 69 #include <sys/time.h> 70 #include <sys/msacct.h> 71 #include <sys/flock_impl.h> 72 #include <sys/stropts.h> 73 #include <sys/strsubr.h> 74 #include <sys/pathname.h> 75 #include <sys/mode.h> 76 #include <sys/socketvar.h> 77 #include <sys/autoconf.h> 78 #include <sys/dtrace.h> 79 #include <sys/timod.h> 80 #include <sys/fs/namenode.h> 81 #include <netinet/udp.h> 82 #include <netinet/tcp.h> 83 #include <inet/cc.h> 84 #include <vm/as.h> 85 #include <vm/rm.h> 86 #include <vm/seg.h> 87 #include <vm/seg_vn.h> 88 #include <vm/seg_dev.h> 89 #include <vm/seg_spt.h> 90 #include <vm/page.h> 91 #include <sys/vmparam.h> 92 #include <sys/swap.h> 93 #include <fs/proc/prdata.h> 94 #include <sys/task.h> 95 #include <sys/project.h> 96 #include <sys/contract_impl.h> 97 #include <sys/contract/process.h> 98 #include <sys/contract/process_impl.h> 99 #include <sys/schedctl.h> 100 #include <sys/pool.h> 101 #include <sys/zone.h> 102 #include <sys/atomic.h> 103 #include <sys/sdt.h> 104 105 #define MAX_ITERS_SPIN 5 106 107 typedef struct prpagev { 108 uint_t *pg_protv; /* vector of page permissions */ 109 char *pg_incore; /* vector of incore flags */ 110 size_t pg_npages; /* number of pages in protv and incore */ 111 ulong_t pg_pnbase; /* pn within segment of first protv element */ 112 } prpagev_t; 113 114 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */ 115 116 extern struct seg_ops segdev_ops; /* needs a header file */ 117 extern struct seg_ops segspt_shmops; /* needs a header file */ 118 119 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t); 120 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t); 121 122 /* 123 * Choose an lwp from the complete set of lwps for the process. 124 * This is called for any operation applied to the process 125 * file descriptor that requires an lwp to operate upon. 126 * 127 * Returns a pointer to the thread for the selected LWP, 128 * and with the dispatcher lock held for the thread. 129 * 130 * The algorithm for choosing an lwp is critical for /proc semantics; 131 * don't touch this code unless you know all of the implications. 132 */ 133 kthread_t * 134 prchoose(proc_t *p) 135 { 136 kthread_t *t; 137 kthread_t *t_onproc = NULL; /* running on processor */ 138 kthread_t *t_run = NULL; /* runnable, on disp queue */ 139 kthread_t *t_sleep = NULL; /* sleeping */ 140 kthread_t *t_hold = NULL; /* sleeping, performing hold */ 141 kthread_t *t_susp = NULL; /* suspended stop */ 142 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */ 143 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */ 144 kthread_t *t_req = NULL; /* requested stop */ 145 kthread_t *t_istop = NULL; /* event-of-interest stop */ 146 kthread_t *t_dtrace = NULL; /* DTrace stop */ 147 148 ASSERT(MUTEX_HELD(&p->p_lock)); 149 150 /* 151 * If the agent lwp exists, it takes precedence over all others. 152 */ 153 if ((t = p->p_agenttp) != NULL) { 154 thread_lock(t); 155 return (t); 156 } 157 158 if ((t = p->p_tlist) == NULL) /* start at the head of the list */ 159 return (t); 160 do { /* for eacn lwp in the process */ 161 if (VSTOPPED(t)) { /* virtually stopped */ 162 if (t_req == NULL) 163 t_req = t; 164 continue; 165 } 166 167 /* If this is a process kernel thread, ignore it. */ 168 if ((t->t_proc_flag & TP_KTHREAD) != 0) { 169 continue; 170 } 171 172 thread_lock(t); /* make sure thread is in good state */ 173 switch (t->t_state) { 174 default: 175 panic("prchoose: bad thread state %d, thread 0x%p", 176 t->t_state, (void *)t); 177 /*NOTREACHED*/ 178 case TS_SLEEP: 179 /* this is filthy */ 180 if (t->t_wchan == (caddr_t)&p->p_holdlwps && 181 t->t_wchan0 == NULL) { 182 if (t_hold == NULL) 183 t_hold = t; 184 } else { 185 if (t_sleep == NULL) 186 t_sleep = t; 187 } 188 break; 189 case TS_RUN: 190 case TS_WAIT: 191 if (t_run == NULL) 192 t_run = t; 193 break; 194 case TS_ONPROC: 195 if (t_onproc == NULL) 196 t_onproc = t; 197 break; 198 case TS_ZOMB: /* last possible choice */ 199 break; 200 case TS_STOPPED: 201 switch (t->t_whystop) { 202 case PR_SUSPENDED: 203 if (t_susp == NULL) 204 t_susp = t; 205 break; 206 case PR_JOBCONTROL: 207 if (t->t_proc_flag & TP_PRSTOP) { 208 if (t_jdstop == NULL) 209 t_jdstop = t; 210 } else { 211 if (t_jstop == NULL) 212 t_jstop = t; 213 } 214 break; 215 case PR_REQUESTED: 216 if (t->t_dtrace_stop && t_dtrace == NULL) 217 t_dtrace = t; 218 else if (t_req == NULL) 219 t_req = t; 220 break; 221 case PR_SYSENTRY: 222 case PR_SYSEXIT: 223 case PR_SIGNALLED: 224 case PR_FAULTED: 225 /* 226 * Make an lwp calling exit() be the 227 * last lwp seen in the process. 228 */ 229 if (t_istop == NULL || 230 (t_istop->t_whystop == PR_SYSENTRY && 231 t_istop->t_whatstop == SYS_exit)) 232 t_istop = t; 233 break; 234 case PR_CHECKPOINT: /* can't happen? */ 235 break; 236 default: 237 panic("prchoose: bad t_whystop %d, thread 0x%p", 238 t->t_whystop, (void *)t); 239 /*NOTREACHED*/ 240 } 241 break; 242 } 243 thread_unlock(t); 244 } while ((t = t->t_forw) != p->p_tlist); 245 246 if (t_onproc) 247 t = t_onproc; 248 else if (t_run) 249 t = t_run; 250 else if (t_sleep) 251 t = t_sleep; 252 else if (t_jstop) 253 t = t_jstop; 254 else if (t_jdstop) 255 t = t_jdstop; 256 else if (t_istop) 257 t = t_istop; 258 else if (t_dtrace) 259 t = t_dtrace; 260 else if (t_req) 261 t = t_req; 262 else if (t_hold) 263 t = t_hold; 264 else if (t_susp) 265 t = t_susp; 266 else /* TS_ZOMB */ 267 t = p->p_tlist; 268 269 if (t != NULL) 270 thread_lock(t); 271 return (t); 272 } 273 274 /* 275 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop. 276 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI 277 * on the /proc file descriptor. Called from stop() when a traced 278 * process stops on an event of interest. Also called from exit() 279 * and prinvalidate() to indicate POLLHUP and POLLERR respectively. 280 */ 281 void 282 prnotify(struct vnode *vp) 283 { 284 prcommon_t *pcp = VTOP(vp)->pr_common; 285 286 mutex_enter(&pcp->prc_mutex); 287 cv_broadcast(&pcp->prc_wait); 288 mutex_exit(&pcp->prc_mutex); 289 if (pcp->prc_flags & PRC_POLL) { 290 /* 291 * We call pollwakeup() with POLLHUP to ensure that 292 * the pollers are awakened even if they are polling 293 * for nothing (i.e., waiting for the process to exit). 294 * This enables the use of the PRC_POLL flag for optimization 295 * (we can turn off PRC_POLL only if we know no pollers remain). 296 */ 297 pcp->prc_flags &= ~PRC_POLL; 298 pollwakeup(&pcp->prc_pollhead, POLLHUP); 299 } 300 } 301 302 /* called immediately below, in prfree() */ 303 static void 304 prfreenotify(vnode_t *vp) 305 { 306 prnode_t *pnp; 307 prcommon_t *pcp; 308 309 while (vp != NULL) { 310 pnp = VTOP(vp); 311 pcp = pnp->pr_common; 312 ASSERT(pcp->prc_thread == NULL); 313 pcp->prc_proc = NULL; 314 /* 315 * We can't call prnotify() here because we are holding 316 * pidlock. We assert that there is no need to. 317 */ 318 mutex_enter(&pcp->prc_mutex); 319 cv_broadcast(&pcp->prc_wait); 320 mutex_exit(&pcp->prc_mutex); 321 ASSERT(!(pcp->prc_flags & PRC_POLL)); 322 323 vp = pnp->pr_next; 324 pnp->pr_next = NULL; 325 } 326 } 327 328 /* 329 * Called from a hook in freeproc() when a traced process is removed 330 * from the process table. The proc-table pointers of all associated 331 * /proc vnodes are cleared to indicate that the process has gone away. 332 */ 333 void 334 prfree(proc_t *p) 335 { 336 uint_t slot = p->p_slot; 337 338 ASSERT(MUTEX_HELD(&pidlock)); 339 340 /* 341 * Block the process against /proc so it can be freed. 342 * It cannot be freed while locked by some controlling process. 343 * Lock ordering: 344 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex 345 */ 346 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */ 347 mutex_enter(&p->p_lock); 348 while (p->p_proc_flag & P_PR_LOCK) { 349 mutex_exit(&pr_pidlock); 350 cv_wait(&pr_pid_cv[slot], &p->p_lock); 351 mutex_exit(&p->p_lock); 352 mutex_enter(&pr_pidlock); 353 mutex_enter(&p->p_lock); 354 } 355 356 ASSERT(p->p_tlist == NULL); 357 358 prfreenotify(p->p_plist); 359 p->p_plist = NULL; 360 361 prfreenotify(p->p_trace); 362 p->p_trace = NULL; 363 364 /* 365 * We broadcast to wake up everyone waiting for this process. 366 * No one can reach this process from this point on. 367 */ 368 cv_broadcast(&pr_pid_cv[slot]); 369 370 mutex_exit(&p->p_lock); 371 mutex_exit(&pr_pidlock); 372 } 373 374 /* 375 * Called from a hook in exit() when a traced process is becoming a zombie. 376 */ 377 void 378 prexit(proc_t *p) 379 { 380 ASSERT(MUTEX_HELD(&p->p_lock)); 381 382 if (pr_watch_active(p)) { 383 pr_free_watchpoints(p); 384 watch_disable(curthread); 385 } 386 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */ 387 if (p->p_trace) { 388 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY; 389 prnotify(p->p_trace); 390 } 391 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */ 392 } 393 394 /* 395 * Called when a thread calls lwp_exit(). 396 */ 397 void 398 prlwpexit(kthread_t *t) 399 { 400 vnode_t *vp; 401 prnode_t *pnp; 402 prcommon_t *pcp; 403 proc_t *p = ttoproc(t); 404 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry; 405 406 ASSERT(t == curthread); 407 ASSERT(MUTEX_HELD(&p->p_lock)); 408 409 /* 410 * The process must be blocked against /proc to do this safely. 411 * The lwp must not disappear while the process is marked P_PR_LOCK. 412 * It is the caller's responsibility to have called prbarrier(p). 413 */ 414 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 415 416 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 417 pnp = VTOP(vp); 418 pcp = pnp->pr_common; 419 if (pcp->prc_thread == t) { 420 pcp->prc_thread = NULL; 421 pcp->prc_flags |= PRC_DESTROY; 422 } 423 } 424 425 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) { 426 pnp = VTOP(vp); 427 pcp = pnp->pr_common; 428 pcp->prc_thread = NULL; 429 pcp->prc_flags |= PRC_DESTROY; 430 prnotify(vp); 431 } 432 433 if (p->p_trace) 434 prnotify(p->p_trace); 435 } 436 437 /* 438 * Called when a zombie thread is joined or when a 439 * detached lwp exits. Called from lwp_hash_out(). 440 */ 441 void 442 prlwpfree(proc_t *p, lwpent_t *lep) 443 { 444 vnode_t *vp; 445 prnode_t *pnp; 446 prcommon_t *pcp; 447 448 ASSERT(MUTEX_HELD(&p->p_lock)); 449 450 /* 451 * The process must be blocked against /proc to do this safely. 452 * The lwp must not disappear while the process is marked P_PR_LOCK. 453 * It is the caller's responsibility to have called prbarrier(p). 454 */ 455 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 456 457 vp = lep->le_trace; 458 lep->le_trace = NULL; 459 while (vp) { 460 prnotify(vp); 461 pnp = VTOP(vp); 462 pcp = pnp->pr_common; 463 ASSERT(pcp->prc_thread == NULL && 464 (pcp->prc_flags & PRC_DESTROY)); 465 pcp->prc_tslot = -1; 466 vp = pnp->pr_next; 467 pnp->pr_next = NULL; 468 } 469 470 if (p->p_trace) 471 prnotify(p->p_trace); 472 } 473 474 /* 475 * Called from a hook in exec() when a thread starts exec(). 476 */ 477 void 478 prexecstart(void) 479 { 480 proc_t *p = ttoproc(curthread); 481 klwp_t *lwp = ttolwp(curthread); 482 483 /* 484 * The P_PR_EXEC flag blocks /proc operations for 485 * the duration of the exec(). 486 * We can't start exec() while the process is 487 * locked by /proc, so we call prbarrier(). 488 * lwp_nostop keeps the process from being stopped 489 * via job control for the duration of the exec(). 490 */ 491 492 ASSERT(MUTEX_HELD(&p->p_lock)); 493 prbarrier(p); 494 lwp->lwp_nostop++; 495 p->p_proc_flag |= P_PR_EXEC; 496 } 497 498 /* 499 * Called from a hook in exec() when a thread finishes exec(). 500 * The thread may or may not have succeeded. Some other thread 501 * may have beat it to the punch. 502 */ 503 void 504 prexecend(void) 505 { 506 proc_t *p = ttoproc(curthread); 507 klwp_t *lwp = ttolwp(curthread); 508 vnode_t *vp; 509 prnode_t *pnp; 510 prcommon_t *pcp; 511 model_t model = p->p_model; 512 id_t tid = curthread->t_tid; 513 int tslot = curthread->t_dslot; 514 515 ASSERT(MUTEX_HELD(&p->p_lock)); 516 517 lwp->lwp_nostop--; 518 if (p->p_flag & SEXITLWPS) { 519 /* 520 * We are on our way to exiting because some 521 * other thread beat us in the race to exec(). 522 * Don't clear the P_PR_EXEC flag in this case. 523 */ 524 return; 525 } 526 527 /* 528 * Wake up anyone waiting in /proc for the process to complete exec(). 529 */ 530 p->p_proc_flag &= ~P_PR_EXEC; 531 if ((vp = p->p_trace) != NULL) { 532 pcp = VTOP(vp)->pr_common; 533 mutex_enter(&pcp->prc_mutex); 534 cv_broadcast(&pcp->prc_wait); 535 mutex_exit(&pcp->prc_mutex); 536 for (; vp != NULL; vp = pnp->pr_next) { 537 pnp = VTOP(vp); 538 pnp->pr_common->prc_datamodel = model; 539 } 540 } 541 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) { 542 /* 543 * We dealt with the process common above. 544 */ 545 ASSERT(p->p_trace != NULL); 546 pcp = VTOP(vp)->pr_common; 547 mutex_enter(&pcp->prc_mutex); 548 cv_broadcast(&pcp->prc_wait); 549 mutex_exit(&pcp->prc_mutex); 550 for (; vp != NULL; vp = pnp->pr_next) { 551 pnp = VTOP(vp); 552 pcp = pnp->pr_common; 553 pcp->prc_datamodel = model; 554 pcp->prc_tid = tid; 555 pcp->prc_tslot = tslot; 556 } 557 } 558 } 559 560 /* 561 * Called from a hook in relvm() just before freeing the address space. 562 * We free all the watched areas now. 563 */ 564 void 565 prrelvm(void) 566 { 567 proc_t *p = ttoproc(curthread); 568 569 mutex_enter(&p->p_lock); 570 prbarrier(p); /* block all other /proc operations */ 571 if (pr_watch_active(p)) { 572 pr_free_watchpoints(p); 573 watch_disable(curthread); 574 } 575 mutex_exit(&p->p_lock); 576 pr_free_watched_pages(p); 577 } 578 579 /* 580 * Called from hooks in exec-related code when a traced process 581 * attempts to exec(2) a setuid/setgid program or an unreadable 582 * file. Rather than fail the exec we invalidate the associated 583 * /proc vnodes so that subsequent attempts to use them will fail. 584 * 585 * All /proc vnodes, except directory vnodes, are retained on a linked 586 * list (rooted at p_plist in the process structure) until last close. 587 * 588 * A controlling process must re-open the /proc files in order to 589 * regain control. 590 */ 591 void 592 prinvalidate(struct user *up) 593 { 594 kthread_t *t = curthread; 595 proc_t *p = ttoproc(t); 596 vnode_t *vp; 597 prnode_t *pnp; 598 int writers = 0; 599 600 mutex_enter(&p->p_lock); 601 prbarrier(p); /* block all other /proc operations */ 602 603 /* 604 * At this moment, there can be only one lwp in the process. 605 */ 606 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 607 608 /* 609 * Invalidate any currently active /proc vnodes. 610 */ 611 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 612 pnp = VTOP(vp); 613 switch (pnp->pr_type) { 614 case PR_PSINFO: /* these files can read by anyone */ 615 case PR_LPSINFO: 616 case PR_LWPSINFO: 617 case PR_LWPDIR: 618 case PR_LWPIDDIR: 619 case PR_USAGE: 620 case PR_LUSAGE: 621 case PR_LWPUSAGE: 622 break; 623 default: 624 pnp->pr_flags |= PR_INVAL; 625 break; 626 } 627 } 628 /* 629 * Wake up anyone waiting for the process or lwp. 630 * p->p_trace is guaranteed to be non-NULL if there 631 * are any open /proc files for this process. 632 */ 633 if ((vp = p->p_trace) != NULL) { 634 prcommon_t *pcp = VTOP(vp)->pr_pcommon; 635 636 prnotify(vp); 637 /* 638 * Are there any writers? 639 */ 640 if ((writers = pcp->prc_writers) != 0) { 641 /* 642 * Clear the exclusive open flag (old /proc interface). 643 * Set prc_selfopens equal to prc_writers so that 644 * the next O_EXCL|O_WRITE open will succeed 645 * even with existing (though invalid) writers. 646 * prclose() must decrement prc_selfopens when 647 * the invalid files are closed. 648 */ 649 pcp->prc_flags &= ~PRC_EXCL; 650 ASSERT(pcp->prc_selfopens <= writers); 651 pcp->prc_selfopens = writers; 652 } 653 } 654 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace; 655 while (vp != NULL) { 656 /* 657 * We should not invalidate the lwpiddir vnodes, 658 * but the necessities of maintaining the old 659 * ioctl()-based version of /proc require it. 660 */ 661 pnp = VTOP(vp); 662 pnp->pr_flags |= PR_INVAL; 663 prnotify(vp); 664 vp = pnp->pr_next; 665 } 666 667 /* 668 * If any tracing flags are in effect and any vnodes are open for 669 * writing then set the requested-stop and run-on-last-close flags. 670 * Otherwise, clear all tracing flags. 671 */ 672 t->t_proc_flag &= ~TP_PAUSE; 673 if ((p->p_proc_flag & P_PR_TRACE) && writers) { 674 t->t_proc_flag |= TP_PRSTOP; 675 aston(t); /* so ISSIG will see the flag */ 676 p->p_proc_flag |= P_PR_RUNLCL; 677 } else { 678 premptyset(&up->u_entrymask); /* syscalls */ 679 premptyset(&up->u_exitmask); 680 up->u_systrap = 0; 681 premptyset(&p->p_sigmask); /* signals */ 682 premptyset(&p->p_fltmask); /* faults */ 683 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING); 684 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE); 685 prnostep(ttolwp(t)); 686 } 687 688 mutex_exit(&p->p_lock); 689 } 690 691 /* 692 * Acquire the controlled process's p_lock and mark it P_PR_LOCK. 693 * Return with pr_pidlock held in all cases. 694 * Return with p_lock held if the the process still exists. 695 * Return value is the process pointer if the process still exists, else NULL. 696 * If we lock the process, give ourself kernel priority to avoid deadlocks; 697 * this is undone in prunlock(). 698 */ 699 proc_t * 700 pr_p_lock(prnode_t *pnp) 701 { 702 proc_t *p; 703 prcommon_t *pcp; 704 705 mutex_enter(&pr_pidlock); 706 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL) 707 return (NULL); 708 mutex_enter(&p->p_lock); 709 while (p->p_proc_flag & P_PR_LOCK) { 710 /* 711 * This cv/mutex pair is persistent even if 712 * the process disappears while we sleep. 713 */ 714 kcondvar_t *cv = &pr_pid_cv[p->p_slot]; 715 kmutex_t *mp = &p->p_lock; 716 717 mutex_exit(&pr_pidlock); 718 cv_wait(cv, mp); 719 mutex_exit(mp); 720 mutex_enter(&pr_pidlock); 721 if (pcp->prc_proc == NULL) 722 return (NULL); 723 ASSERT(p == pcp->prc_proc); 724 mutex_enter(&p->p_lock); 725 } 726 p->p_proc_flag |= P_PR_LOCK; 727 return (p); 728 } 729 730 /* 731 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock. 732 * This prevents any lwp of the process from disappearing and 733 * blocks most operations that a process can perform on itself. 734 * Returns 0 on success, a non-zero error number on failure. 735 * 736 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when 737 * the subject process is a zombie (ZYES) or fail for zombies (ZNO). 738 * 739 * error returns: 740 * ENOENT: process or lwp has disappeared or process is exiting 741 * (or has become a zombie and zdisp == ZNO). 742 * EAGAIN: procfs vnode has become invalid. 743 * EINTR: signal arrived while waiting for exec to complete. 744 */ 745 int 746 prlock(prnode_t *pnp, int zdisp) 747 { 748 prcommon_t *pcp; 749 proc_t *p; 750 751 again: 752 pcp = pnp->pr_common; 753 p = pr_p_lock(pnp); 754 mutex_exit(&pr_pidlock); 755 756 /* 757 * Return ENOENT immediately if there is no process. 758 */ 759 if (p == NULL) 760 return (ENOENT); 761 762 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL); 763 764 /* 765 * Return ENOENT if process entered zombie state or is exiting 766 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies. 767 */ 768 if (zdisp == ZNO && 769 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) { 770 prunlock(pnp); 771 return (ENOENT); 772 } 773 774 /* 775 * If lwp-specific, check to see if lwp has disappeared. 776 */ 777 if (pcp->prc_flags & PRC_LWP) { 778 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) || 779 pcp->prc_tslot == -1) { 780 prunlock(pnp); 781 return (ENOENT); 782 } 783 } 784 785 /* 786 * Return EAGAIN if we have encountered a security violation. 787 * (The process exec'd a set-id or unreadable executable file.) 788 */ 789 if (pnp->pr_flags & PR_INVAL) { 790 prunlock(pnp); 791 return (EAGAIN); 792 } 793 794 /* 795 * If process is undergoing an exec(), wait for 796 * completion and then start all over again. 797 */ 798 if (p->p_proc_flag & P_PR_EXEC) { 799 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */ 800 mutex_enter(&pcp->prc_mutex); 801 prunlock(pnp); 802 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) { 803 mutex_exit(&pcp->prc_mutex); 804 return (EINTR); 805 } 806 mutex_exit(&pcp->prc_mutex); 807 goto again; 808 } 809 810 /* 811 * We return holding p->p_lock. 812 */ 813 return (0); 814 } 815 816 /* 817 * Undo prlock() and pr_p_lock(). 818 * p->p_lock is still held; pr_pidlock is no longer held. 819 * 820 * prunmark() drops the P_PR_LOCK flag and wakes up another thread, 821 * if any, waiting for the flag to be dropped; it retains p->p_lock. 822 * 823 * prunlock() calls prunmark() and then drops p->p_lock. 824 */ 825 void 826 prunmark(proc_t *p) 827 { 828 ASSERT(p->p_proc_flag & P_PR_LOCK); 829 ASSERT(MUTEX_HELD(&p->p_lock)); 830 831 cv_signal(&pr_pid_cv[p->p_slot]); 832 p->p_proc_flag &= ~P_PR_LOCK; 833 } 834 835 void 836 prunlock(prnode_t *pnp) 837 { 838 prcommon_t *pcp = pnp->pr_common; 839 proc_t *p = pcp->prc_proc; 840 841 /* 842 * If we (or someone) gave it a SIGKILL, and it is not 843 * already a zombie, set it running unconditionally. 844 */ 845 if ((p->p_flag & SKILLED) && 846 !(p->p_flag & SEXITING) && 847 !(pcp->prc_flags & PRC_DESTROY) && 848 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) 849 (void) pr_setrun(pnp, 0); 850 prunmark(p); 851 mutex_exit(&p->p_lock); 852 } 853 854 /* 855 * Called while holding p->p_lock to delay until the process is unlocked. 856 * We enter holding p->p_lock; p->p_lock is dropped and reacquired. 857 * The process cannot become locked again until p->p_lock is dropped. 858 */ 859 void 860 prbarrier(proc_t *p) 861 { 862 ASSERT(MUTEX_HELD(&p->p_lock)); 863 864 if (p->p_proc_flag & P_PR_LOCK) { 865 /* The process is locked; delay until not locked */ 866 uint_t slot = p->p_slot; 867 868 while (p->p_proc_flag & P_PR_LOCK) 869 cv_wait(&pr_pid_cv[slot], &p->p_lock); 870 cv_signal(&pr_pid_cv[slot]); 871 } 872 } 873 874 /* 875 * Return process/lwp status. 876 * The u-block is mapped in by this routine and unmapped at the end. 877 */ 878 void 879 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp) 880 { 881 kthread_t *t; 882 883 ASSERT(MUTEX_HELD(&p->p_lock)); 884 885 t = prchoose(p); /* returns locked thread */ 886 ASSERT(t != NULL); 887 thread_unlock(t); 888 889 /* just bzero the process part, prgetlwpstatus() does the rest */ 890 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t)); 891 sp->pr_nlwp = p->p_lwpcnt; 892 sp->pr_nzomb = p->p_zombcnt; 893 prassignset(&sp->pr_sigpend, &p->p_sig); 894 sp->pr_brkbase = (uintptr_t)p->p_brkbase; 895 sp->pr_brksize = p->p_brksize; 896 sp->pr_stkbase = (uintptr_t)prgetstackbase(p); 897 sp->pr_stksize = p->p_stksize; 898 sp->pr_pid = p->p_pid; 899 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 900 (p->p_flag & SZONETOP)) { 901 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 902 /* 903 * Inside local zones, fake zsched's pid as parent pids for 904 * processes which reference processes outside of the zone. 905 */ 906 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 907 } else { 908 sp->pr_ppid = p->p_ppid; 909 } 910 sp->pr_pgid = p->p_pgrp; 911 sp->pr_sid = p->p_sessp->s_sid; 912 sp->pr_taskid = p->p_task->tk_tkid; 913 sp->pr_projid = p->p_task->tk_proj->kpj_id; 914 sp->pr_zoneid = p->p_zone->zone_id; 915 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 916 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 917 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime); 918 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime); 919 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 920 prassignset(&sp->pr_flttrace, &p->p_fltmask); 921 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 922 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 923 switch (p->p_model) { 924 case DATAMODEL_ILP32: 925 sp->pr_dmodel = PR_MODEL_ILP32; 926 break; 927 case DATAMODEL_LP64: 928 sp->pr_dmodel = PR_MODEL_LP64; 929 break; 930 } 931 if (p->p_agenttp) 932 sp->pr_agentid = p->p_agenttp->t_tid; 933 934 /* get the chosen lwp's status */ 935 prgetlwpstatus(t, &sp->pr_lwp, zp); 936 937 /* replicate the flags */ 938 sp->pr_flags = sp->pr_lwp.pr_flags; 939 } 940 941 /* 942 * Query mask of held signals for a given thread. 943 * 944 * This makes use of schedctl_sigblock() to query if userspace has requested 945 * that all maskable signals be held. While it would be tempting to call 946 * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be 947 * done safely without the risk of racing with the thread under consideration. 948 */ 949 void 950 prgethold(kthread_t *t, sigset_t *sp) 951 { 952 k_sigset_t set; 953 954 if (schedctl_sigblock(t)) { 955 set.__sigbits[0] = FILLSET0 & ~CANTMASK0; 956 set.__sigbits[1] = FILLSET1 & ~CANTMASK1; 957 set.__sigbits[2] = FILLSET2 & ~CANTMASK2; 958 } else { 959 set = t->t_hold; 960 } 961 sigktou(&set, sp); 962 } 963 964 #ifdef _SYSCALL32_IMPL 965 void 966 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp) 967 { 968 proc_t *p = ttoproc(t); 969 klwp_t *lwp = ttolwp(t); 970 struct mstate *ms = &lwp->lwp_mstate; 971 hrtime_t usr, sys; 972 int flags; 973 ulong_t instr; 974 975 ASSERT(MUTEX_HELD(&p->p_lock)); 976 977 bzero(sp, sizeof (*sp)); 978 flags = 0L; 979 if (t->t_state == TS_STOPPED) { 980 flags |= PR_STOPPED; 981 if ((t->t_schedflag & TS_PSTART) == 0) 982 flags |= PR_ISTOP; 983 } else if (VSTOPPED(t)) { 984 flags |= PR_STOPPED|PR_ISTOP; 985 } 986 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 987 flags |= PR_DSTOP; 988 if (lwp->lwp_asleep) 989 flags |= PR_ASLEEP; 990 if (t == p->p_agenttp) 991 flags |= PR_AGENT; 992 if (!(t->t_proc_flag & TP_TWAIT)) 993 flags |= PR_DETACH; 994 if (t->t_proc_flag & TP_DAEMON) 995 flags |= PR_DAEMON; 996 if (p->p_proc_flag & P_PR_FORK) 997 flags |= PR_FORK; 998 if (p->p_proc_flag & P_PR_RUNLCL) 999 flags |= PR_RLC; 1000 if (p->p_proc_flag & P_PR_KILLCL) 1001 flags |= PR_KLC; 1002 if (p->p_proc_flag & P_PR_ASYNC) 1003 flags |= PR_ASYNC; 1004 if (p->p_proc_flag & P_PR_BPTADJ) 1005 flags |= PR_BPTADJ; 1006 if (p->p_proc_flag & P_PR_PTRACE) 1007 flags |= PR_PTRACE; 1008 if (p->p_flag & SMSACCT) 1009 flags |= PR_MSACCT; 1010 if (p->p_flag & SMSFORK) 1011 flags |= PR_MSFORK; 1012 if (p->p_flag & SVFWAIT) 1013 flags |= PR_VFORKP; 1014 sp->pr_flags = flags; 1015 if (VSTOPPED(t)) { 1016 sp->pr_why = PR_REQUESTED; 1017 sp->pr_what = 0; 1018 } else { 1019 sp->pr_why = t->t_whystop; 1020 sp->pr_what = t->t_whatstop; 1021 } 1022 sp->pr_lwpid = t->t_tid; 1023 sp->pr_cursig = lwp->lwp_cursig; 1024 prassignset(&sp->pr_lwppend, &t->t_sig); 1025 prgethold(t, &sp->pr_lwphold); 1026 if (t->t_whystop == PR_FAULTED) { 1027 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info); 1028 if (t->t_whatstop == FLTPAGE) 1029 sp->pr_info.si_addr = 1030 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr; 1031 } else if (lwp->lwp_curinfo) 1032 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info); 1033 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1034 sp->pr_info.si_zoneid != zp->zone_id) { 1035 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1036 sp->pr_info.si_uid = 0; 1037 sp->pr_info.si_ctid = -1; 1038 sp->pr_info.si_zoneid = zp->zone_id; 1039 } 1040 sp->pr_altstack.ss_sp = 1041 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; 1042 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size; 1043 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags; 1044 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1045 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext; 1046 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack; 1047 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1048 sizeof (sp->pr_clname) - 1); 1049 if (flags & PR_STOPPED) 1050 hrt2ts32(t->t_stoptime, &sp->pr_tstamp); 1051 usr = ms->ms_acct[LMS_USER]; 1052 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1053 scalehrtime(&usr); 1054 scalehrtime(&sys); 1055 hrt2ts32(usr, &sp->pr_utime); 1056 hrt2ts32(sys, &sp->pr_stime); 1057 1058 /* 1059 * Fetch the current instruction, if not a system process. 1060 * We don't attempt this unless the lwp is stopped. 1061 */ 1062 if ((p->p_flag & SSYS) || p->p_as == &kas) 1063 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1064 else if (!(flags & PR_STOPPED)) 1065 sp->pr_flags |= PR_PCINVAL; 1066 else if (!prfetchinstr(lwp, &instr)) 1067 sp->pr_flags |= PR_PCINVAL; 1068 else 1069 sp->pr_instr = (uint32_t)instr; 1070 1071 /* 1072 * Drop p_lock while touching the lwp's stack. 1073 */ 1074 mutex_exit(&p->p_lock); 1075 if (prisstep(lwp)) 1076 sp->pr_flags |= PR_STEP; 1077 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1078 int i; 1079 1080 sp->pr_syscall = get_syscall32_args(lwp, 1081 (int *)sp->pr_sysarg, &i); 1082 sp->pr_nsysarg = (ushort_t)i; 1083 } 1084 if ((flags & PR_STOPPED) || t == curthread) 1085 prgetprregs32(lwp, sp->pr_reg); 1086 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1087 (flags & PR_VFORKP)) { 1088 long r1, r2; 1089 user_t *up; 1090 auxv_t *auxp; 1091 int i; 1092 1093 sp->pr_errno = prgetrvals(lwp, &r1, &r2); 1094 if (sp->pr_errno == 0) { 1095 sp->pr_rval1 = (int32_t)r1; 1096 sp->pr_rval2 = (int32_t)r2; 1097 sp->pr_errpriv = PRIV_NONE; 1098 } else 1099 sp->pr_errpriv = lwp->lwp_badpriv; 1100 1101 if (t->t_sysnum == SYS_execve) { 1102 up = PTOU(p); 1103 sp->pr_sysarg[0] = 0; 1104 sp->pr_sysarg[1] = (caddr32_t)up->u_argv; 1105 sp->pr_sysarg[2] = (caddr32_t)up->u_envp; 1106 sp->pr_sysarg[3] = 0; 1107 for (i = 0, auxp = up->u_auxv; 1108 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1109 i++, auxp++) { 1110 if (auxp->a_type == AT_SUN_EXECNAME) { 1111 sp->pr_sysarg[0] = 1112 (caddr32_t) 1113 (uintptr_t)auxp->a_un.a_ptr; 1114 break; 1115 } 1116 } 1117 } 1118 } 1119 if (prhasfp()) 1120 prgetprfpregs32(lwp, &sp->pr_fpreg); 1121 mutex_enter(&p->p_lock); 1122 } 1123 1124 void 1125 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp) 1126 { 1127 kthread_t *t; 1128 1129 ASSERT(MUTEX_HELD(&p->p_lock)); 1130 1131 t = prchoose(p); /* returns locked thread */ 1132 ASSERT(t != NULL); 1133 thread_unlock(t); 1134 1135 /* just bzero the process part, prgetlwpstatus32() does the rest */ 1136 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t)); 1137 sp->pr_nlwp = p->p_lwpcnt; 1138 sp->pr_nzomb = p->p_zombcnt; 1139 prassignset(&sp->pr_sigpend, &p->p_sig); 1140 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase; 1141 sp->pr_brksize = (uint32_t)p->p_brksize; 1142 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p); 1143 sp->pr_stksize = (uint32_t)p->p_stksize; 1144 sp->pr_pid = p->p_pid; 1145 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 1146 (p->p_flag & SZONETOP)) { 1147 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 1148 /* 1149 * Inside local zones, fake zsched's pid as parent pids for 1150 * processes which reference processes outside of the zone. 1151 */ 1152 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 1153 } else { 1154 sp->pr_ppid = p->p_ppid; 1155 } 1156 sp->pr_pgid = p->p_pgrp; 1157 sp->pr_sid = p->p_sessp->s_sid; 1158 sp->pr_taskid = p->p_task->tk_tkid; 1159 sp->pr_projid = p->p_task->tk_proj->kpj_id; 1160 sp->pr_zoneid = p->p_zone->zone_id; 1161 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 1162 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 1163 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime); 1164 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime); 1165 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 1166 prassignset(&sp->pr_flttrace, &p->p_fltmask); 1167 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 1168 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 1169 switch (p->p_model) { 1170 case DATAMODEL_ILP32: 1171 sp->pr_dmodel = PR_MODEL_ILP32; 1172 break; 1173 case DATAMODEL_LP64: 1174 sp->pr_dmodel = PR_MODEL_LP64; 1175 break; 1176 } 1177 if (p->p_agenttp) 1178 sp->pr_agentid = p->p_agenttp->t_tid; 1179 1180 /* get the chosen lwp's status */ 1181 prgetlwpstatus32(t, &sp->pr_lwp, zp); 1182 1183 /* replicate the flags */ 1184 sp->pr_flags = sp->pr_lwp.pr_flags; 1185 } 1186 #endif /* _SYSCALL32_IMPL */ 1187 1188 /* 1189 * Return lwp status. 1190 */ 1191 void 1192 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp) 1193 { 1194 proc_t *p = ttoproc(t); 1195 klwp_t *lwp = ttolwp(t); 1196 struct mstate *ms = &lwp->lwp_mstate; 1197 hrtime_t usr, sys; 1198 int flags; 1199 ulong_t instr; 1200 1201 ASSERT(MUTEX_HELD(&p->p_lock)); 1202 1203 bzero(sp, sizeof (*sp)); 1204 flags = 0L; 1205 if (t->t_state == TS_STOPPED) { 1206 flags |= PR_STOPPED; 1207 if ((t->t_schedflag & TS_PSTART) == 0) 1208 flags |= PR_ISTOP; 1209 } else if (VSTOPPED(t)) { 1210 flags |= PR_STOPPED|PR_ISTOP; 1211 } 1212 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 1213 flags |= PR_DSTOP; 1214 if (lwp->lwp_asleep) 1215 flags |= PR_ASLEEP; 1216 if (t == p->p_agenttp) 1217 flags |= PR_AGENT; 1218 if (!(t->t_proc_flag & TP_TWAIT)) 1219 flags |= PR_DETACH; 1220 if (t->t_proc_flag & TP_DAEMON) 1221 flags |= PR_DAEMON; 1222 if (p->p_proc_flag & P_PR_FORK) 1223 flags |= PR_FORK; 1224 if (p->p_proc_flag & P_PR_RUNLCL) 1225 flags |= PR_RLC; 1226 if (p->p_proc_flag & P_PR_KILLCL) 1227 flags |= PR_KLC; 1228 if (p->p_proc_flag & P_PR_ASYNC) 1229 flags |= PR_ASYNC; 1230 if (p->p_proc_flag & P_PR_BPTADJ) 1231 flags |= PR_BPTADJ; 1232 if (p->p_proc_flag & P_PR_PTRACE) 1233 flags |= PR_PTRACE; 1234 if (p->p_flag & SMSACCT) 1235 flags |= PR_MSACCT; 1236 if (p->p_flag & SMSFORK) 1237 flags |= PR_MSFORK; 1238 if (p->p_flag & SVFWAIT) 1239 flags |= PR_VFORKP; 1240 if (p->p_pgidp->pid_pgorphaned) 1241 flags |= PR_ORPHAN; 1242 if (p->p_pidflag & CLDNOSIGCHLD) 1243 flags |= PR_NOSIGCHLD; 1244 if (p->p_pidflag & CLDWAITPID) 1245 flags |= PR_WAITPID; 1246 sp->pr_flags = flags; 1247 if (VSTOPPED(t)) { 1248 sp->pr_why = PR_REQUESTED; 1249 sp->pr_what = 0; 1250 } else { 1251 sp->pr_why = t->t_whystop; 1252 sp->pr_what = t->t_whatstop; 1253 } 1254 sp->pr_lwpid = t->t_tid; 1255 sp->pr_cursig = lwp->lwp_cursig; 1256 prassignset(&sp->pr_lwppend, &t->t_sig); 1257 prgethold(t, &sp->pr_lwphold); 1258 if (t->t_whystop == PR_FAULTED) 1259 bcopy(&lwp->lwp_siginfo, 1260 &sp->pr_info, sizeof (k_siginfo_t)); 1261 else if (lwp->lwp_curinfo) 1262 bcopy(&lwp->lwp_curinfo->sq_info, 1263 &sp->pr_info, sizeof (k_siginfo_t)); 1264 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1265 sp->pr_info.si_zoneid != zp->zone_id) { 1266 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1267 sp->pr_info.si_uid = 0; 1268 sp->pr_info.si_ctid = -1; 1269 sp->pr_info.si_zoneid = zp->zone_id; 1270 } 1271 sp->pr_altstack = lwp->lwp_sigaltstack; 1272 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1273 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext; 1274 sp->pr_ustack = lwp->lwp_ustack; 1275 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1276 sizeof (sp->pr_clname) - 1); 1277 if (flags & PR_STOPPED) 1278 hrt2ts(t->t_stoptime, &sp->pr_tstamp); 1279 usr = ms->ms_acct[LMS_USER]; 1280 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1281 scalehrtime(&usr); 1282 scalehrtime(&sys); 1283 hrt2ts(usr, &sp->pr_utime); 1284 hrt2ts(sys, &sp->pr_stime); 1285 1286 /* 1287 * Fetch the current instruction, if not a system process. 1288 * We don't attempt this unless the lwp is stopped. 1289 */ 1290 if ((p->p_flag & SSYS) || p->p_as == &kas) 1291 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1292 else if (!(flags & PR_STOPPED)) 1293 sp->pr_flags |= PR_PCINVAL; 1294 else if (!prfetchinstr(lwp, &instr)) 1295 sp->pr_flags |= PR_PCINVAL; 1296 else 1297 sp->pr_instr = instr; 1298 1299 /* 1300 * Drop p_lock while touching the lwp's stack. 1301 */ 1302 mutex_exit(&p->p_lock); 1303 if (prisstep(lwp)) 1304 sp->pr_flags |= PR_STEP; 1305 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1306 int i; 1307 1308 sp->pr_syscall = get_syscall_args(lwp, 1309 (long *)sp->pr_sysarg, &i); 1310 sp->pr_nsysarg = (ushort_t)i; 1311 } 1312 if ((flags & PR_STOPPED) || t == curthread) 1313 prgetprregs(lwp, sp->pr_reg); 1314 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1315 (flags & PR_VFORKP)) { 1316 user_t *up; 1317 auxv_t *auxp; 1318 int i; 1319 1320 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2); 1321 if (sp->pr_errno == 0) 1322 sp->pr_errpriv = PRIV_NONE; 1323 else 1324 sp->pr_errpriv = lwp->lwp_badpriv; 1325 1326 if (t->t_sysnum == SYS_execve) { 1327 up = PTOU(p); 1328 sp->pr_sysarg[0] = 0; 1329 sp->pr_sysarg[1] = (uintptr_t)up->u_argv; 1330 sp->pr_sysarg[2] = (uintptr_t)up->u_envp; 1331 sp->pr_sysarg[3] = 0; 1332 for (i = 0, auxp = up->u_auxv; 1333 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1334 i++, auxp++) { 1335 if (auxp->a_type == AT_SUN_EXECNAME) { 1336 sp->pr_sysarg[0] = 1337 (uintptr_t)auxp->a_un.a_ptr; 1338 break; 1339 } 1340 } 1341 } 1342 } 1343 if (prhasfp()) 1344 prgetprfpregs(lwp, &sp->pr_fpreg); 1345 mutex_enter(&p->p_lock); 1346 } 1347 1348 /* 1349 * Get the sigaction structure for the specified signal. The u-block 1350 * must already have been mapped in by the caller. 1351 */ 1352 void 1353 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp) 1354 { 1355 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1356 1357 bzero(sp, sizeof (*sp)); 1358 1359 if (sig != 0 && (unsigned)sig < nsig) { 1360 sp->sa_handler = up->u_signal[sig-1]; 1361 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1362 if (sigismember(&up->u_sigonstack, sig)) 1363 sp->sa_flags |= SA_ONSTACK; 1364 if (sigismember(&up->u_sigresethand, sig)) 1365 sp->sa_flags |= SA_RESETHAND; 1366 if (sigismember(&up->u_sigrestart, sig)) 1367 sp->sa_flags |= SA_RESTART; 1368 if (sigismember(&p->p_siginfo, sig)) 1369 sp->sa_flags |= SA_SIGINFO; 1370 if (sigismember(&up->u_signodefer, sig)) 1371 sp->sa_flags |= SA_NODEFER; 1372 if (sig == SIGCLD) { 1373 if (p->p_flag & SNOWAIT) 1374 sp->sa_flags |= SA_NOCLDWAIT; 1375 if ((p->p_flag & SJCTL) == 0) 1376 sp->sa_flags |= SA_NOCLDSTOP; 1377 } 1378 } 1379 } 1380 1381 #ifdef _SYSCALL32_IMPL 1382 void 1383 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp) 1384 { 1385 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1386 1387 bzero(sp, sizeof (*sp)); 1388 1389 if (sig != 0 && (unsigned)sig < nsig) { 1390 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1]; 1391 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1392 if (sigismember(&up->u_sigonstack, sig)) 1393 sp->sa_flags |= SA_ONSTACK; 1394 if (sigismember(&up->u_sigresethand, sig)) 1395 sp->sa_flags |= SA_RESETHAND; 1396 if (sigismember(&up->u_sigrestart, sig)) 1397 sp->sa_flags |= SA_RESTART; 1398 if (sigismember(&p->p_siginfo, sig)) 1399 sp->sa_flags |= SA_SIGINFO; 1400 if (sigismember(&up->u_signodefer, sig)) 1401 sp->sa_flags |= SA_NODEFER; 1402 if (sig == SIGCLD) { 1403 if (p->p_flag & SNOWAIT) 1404 sp->sa_flags |= SA_NOCLDWAIT; 1405 if ((p->p_flag & SJCTL) == 0) 1406 sp->sa_flags |= SA_NOCLDSTOP; 1407 } 1408 } 1409 } 1410 #endif /* _SYSCALL32_IMPL */ 1411 1412 /* 1413 * Count the number of segments in this process's address space. 1414 */ 1415 uint_t 1416 prnsegs(struct as *as, int reserved) 1417 { 1418 uint_t n = 0; 1419 struct seg *seg; 1420 1421 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1422 1423 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1424 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1425 caddr_t saddr, naddr; 1426 void *tmp = NULL; 1427 1428 if ((seg->s_flags & S_HOLE) != 0) { 1429 continue; 1430 } 1431 1432 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1433 (void) pr_getprot(seg, reserved, &tmp, 1434 &saddr, &naddr, eaddr); 1435 if (saddr != naddr) { 1436 n++; 1437 /* 1438 * prnsegs() was formerly designated to return 1439 * an 'int' despite having no ability or use 1440 * for negative results. As part of changing 1441 * it to 'uint_t', keep the old effective limit 1442 * of INT_MAX in place. 1443 */ 1444 if (n == INT_MAX) { 1445 pr_getprot_done(&tmp); 1446 ASSERT(tmp == NULL); 1447 return (n); 1448 } 1449 } 1450 } 1451 1452 ASSERT(tmp == NULL); 1453 } 1454 1455 return (n); 1456 } 1457 1458 /* 1459 * Convert uint32_t to decimal string w/o leading zeros. 1460 * Add trailing null characters if 'len' is greater than string length. 1461 * Return the string length. 1462 */ 1463 int 1464 pr_u32tos(uint32_t n, char *s, int len) 1465 { 1466 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */ 1467 char *cp = cbuf; 1468 char *end = s + len; 1469 1470 do { 1471 *cp++ = (char)(n % 10 + '0'); 1472 n /= 10; 1473 } while (n); 1474 1475 len = (int)(cp - cbuf); 1476 1477 do { 1478 *s++ = *--cp; 1479 } while (cp > cbuf); 1480 1481 while (s < end) /* optional pad */ 1482 *s++ = '\0'; 1483 1484 return (len); 1485 } 1486 1487 /* 1488 * Convert uint64_t to decimal string w/o leading zeros. 1489 * Return the string length. 1490 */ 1491 static int 1492 pr_u64tos(uint64_t n, char *s) 1493 { 1494 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */ 1495 char *cp = cbuf; 1496 int len; 1497 1498 do { 1499 *cp++ = (char)(n % 10 + '0'); 1500 n /= 10; 1501 } while (n); 1502 1503 len = (int)(cp - cbuf); 1504 1505 do { 1506 *s++ = *--cp; 1507 } while (cp > cbuf); 1508 1509 return (len); 1510 } 1511 1512 /* 1513 * Similar to getf() / getf_gen(), but for the specified process. On success, 1514 * returns the fp with fp->f_count incremented. The caller MUST call 1515 * closef(fp) on the returned fp after completing any actions using that fp. 1516 * We return a reference-held (fp->f_count bumped) file_t so no other closef() 1517 * can invoke destructive VOP_CLOSE actions while we're inspecting the 1518 * process's FD. 1519 * 1520 * Returns NULL for errors: either an empty process-table slot post-fi_lock 1521 * and UF_ENTER, or too many mutex_tryenter() failures on the file_t's f_tlock. 1522 * Both failure modes have DTrace probes. 1523 * 1524 * The current design of the procfs "close" code path uses the following lock 1525 * order of: 1526 * 1527 * 1: (file_t) f_tlock 1528 * 2: (proc_t) p_lock AND setting p->p_proc_flag's P_PR_LOCK 1529 * 1530 * That happens because closef() holds f_tlock while calling fop_close(), 1531 * which can be prclose(), which currently waits on and sets P_PR_LOCK at its 1532 * beginning. 1533 * 1534 * That lock order creates a challenge for pr_getf, which needs to take those 1535 * locks in the opposite order when the fd points to a procfs file descriptor. 1536 * The solution chosen here is to use mutex_tryenter on f_tlock and retry some 1537 * (limited) number of times, failing if we don't get both locks. 1538 * 1539 * The cases where this can fail are rare, and all involve a procfs caller 1540 * asking for info (eg. FDINFO) on another procfs FD. In these cases, 1541 * returning EBADF (which results from a NULL return from pr_getf()) is 1542 * acceptable. 1543 * 1544 * One can increase the number of tries in pr_getf_maxtries if one is worried 1545 * about the contentuous case. 1546 */ 1547 1548 uint64_t pr_getf_tryfails; /* Bumped for statistic purposes. */ 1549 int pr_getf_maxtries = 3; /* So you can tune it from /etc/system */ 1550 1551 file_t * 1552 pr_getf(proc_t *p, uint_t fd, short *flag) 1553 { 1554 uf_entry_t *ufp; 1555 uf_info_t *fip; 1556 file_t *fp; 1557 int tries = 0; 1558 1559 ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK)); 1560 1561 retry: 1562 fip = P_FINFO(p); 1563 1564 if (fd >= fip->fi_nfiles) 1565 return (NULL); 1566 1567 mutex_exit(&p->p_lock); 1568 mutex_enter(&fip->fi_lock); 1569 UF_ENTER(ufp, fip, fd); 1570 if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) { 1571 if (mutex_tryenter(&fp->f_tlock)) { 1572 ASSERT(fp->f_count > 0); 1573 fp->f_count++; 1574 mutex_exit(&fp->f_tlock); 1575 if (flag != NULL) 1576 *flag = ufp->uf_flag; 1577 } else { 1578 /* 1579 * Note the number of mutex_trylock attempts. 1580 * 1581 * The exit path will catch this and try again if we 1582 * are below the retry threshhold (pr_getf_maxtries). 1583 */ 1584 tries++; 1585 pr_getf_tryfails++; 1586 /* 1587 * If we hit pr_getf_maxtries, we'll return NULL. 1588 * DTrace scripts looking for this sort of failure 1589 * should check when arg1 is pr_getf_maxtries. 1590 */ 1591 DTRACE_PROBE2(pr_getf_tryfail, file_t *, fp, int, 1592 tries); 1593 fp = NULL; 1594 } 1595 } else { 1596 fp = NULL; 1597 /* If we fail here, someone else closed this FD. */ 1598 DTRACE_PROBE1(pr_getf_emptyslot, int, tries); 1599 tries = pr_getf_maxtries; /* Don't bother retrying. */ 1600 } 1601 UF_EXIT(ufp); 1602 mutex_exit(&fip->fi_lock); 1603 mutex_enter(&p->p_lock); 1604 1605 /* Use goto instead of tail-recursion so we can keep "tries" around. */ 1606 if (fp == NULL) { 1607 /* "tries" starts at 1. */ 1608 if (tries < pr_getf_maxtries) 1609 goto retry; 1610 } else { 1611 /* 1612 * Probes here will detect successes after arg1's number of 1613 * mutex_tryenter() calls. 1614 */ 1615 DTRACE_PROBE2(pr_getf_trysuccess, file_t *, fp, int, tries + 1); 1616 } 1617 1618 return (fp); 1619 } 1620 1621 1622 /* 1623 * Just as pr_getf() is a little unusual in how it goes about making the file_t 1624 * safe for procfs consumers to access it, so too is pr_releasef() for safely 1625 * releasing that "hold". The "hold" is unlike normal file descriptor activity 1626 * -- procfs is just an interloper here, wanting access to the vnode_t without 1627 * risk of a racing close() disrupting the state. Just as pr_getf() avoids some 1628 * of the typical file_t behavior (such as auditing) when establishing its hold, 1629 * so too should pr_releasef(). It should not go through the motions of 1630 * closef() (since it is not a true close()) unless racing activity causes it to 1631 * be the last actor holding the refcount above zero. 1632 * 1633 * Under normal circumstances, we expect to find file_t`f_count > 1 after 1634 * the successful pr_getf() call. We are, after all, accessing a resource 1635 * already held by the process in question. We would also expect to rarely race 1636 * with a close() of the underlying fd, meaning that file_t`f_count > 1 would 1637 * still holds at pr_releasef() time. That would mean we only need to decrement 1638 * f_count, leaving it to the process to later close the fd (thus triggering 1639 * VOP_CLOSE(), etc). 1640 * 1641 * It is only when that process manages to close() the fd while we have it 1642 * "held" in procfs that we must make a trip through the traditional closef() 1643 * logic to ensure proper tear-down of the file_t. 1644 */ 1645 void 1646 pr_releasef(file_t *fp) 1647 { 1648 mutex_enter(&fp->f_tlock); 1649 if (fp->f_count > 1) { 1650 /* 1651 * This is the most common case: The file is still held open by 1652 * the process, and we simply need to release our hold by 1653 * decrementing f_count 1654 */ 1655 fp->f_count--; 1656 mutex_exit(&fp->f_tlock); 1657 } else { 1658 /* 1659 * A rare occasion: The process snuck a close() of this file 1660 * while we were doing our business in procfs. Given that 1661 * f_count == 1, we are the only one with a reference to the 1662 * file_t and need to take a trip through closef() to free it. 1663 */ 1664 mutex_exit(&fp->f_tlock); 1665 (void) closef(fp); 1666 } 1667 } 1668 1669 void 1670 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr) 1671 { 1672 char *s = name; 1673 struct vfs *vfsp; 1674 struct vfssw *vfsswp; 1675 1676 if ((vfsp = vp->v_vfsp) != NULL && 1677 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) && 1678 *vfsswp->vsw_name) { 1679 (void) strcpy(s, vfsswp->vsw_name); 1680 s += strlen(s); 1681 *s++ = '.'; 1682 } 1683 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0); 1684 *s++ = '.'; 1685 s += pr_u32tos(getminor(vattr->va_fsid), s, 0); 1686 *s++ = '.'; 1687 s += pr_u64tos(vattr->va_nodeid, s); 1688 *s++ = '\0'; 1689 } 1690 1691 struct seg * 1692 break_seg(proc_t *p) 1693 { 1694 caddr_t addr = p->p_brkbase; 1695 struct seg *seg; 1696 struct vnode *vp; 1697 1698 if (p->p_brksize != 0) 1699 addr += p->p_brksize - 1; 1700 seg = as_segat(p->p_as, addr); 1701 if (seg != NULL && seg->s_ops == &segvn_ops && 1702 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)) 1703 return (seg); 1704 return (NULL); 1705 } 1706 1707 /* 1708 * Implementation of service functions to handle procfs generic chained 1709 * copyout buffers. 1710 */ 1711 typedef struct pr_iobuf_list { 1712 list_node_t piol_link; /* buffer linkage */ 1713 size_t piol_size; /* total size (header + data) */ 1714 size_t piol_usedsize; /* amount to copy out from this buf */ 1715 } piol_t; 1716 1717 #define MAPSIZE (64 * 1024) 1718 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1])) 1719 1720 void 1721 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n) 1722 { 1723 piol_t *iol; 1724 size_t initial_size = MIN(1, n) * itemsize; 1725 1726 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link)); 1727 1728 ASSERT(list_head(iolhead) == NULL); 1729 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1730 ASSERT(initial_size > 0); 1731 1732 /* 1733 * Someone creating chained copyout buffers may ask for less than 1734 * MAPSIZE if the amount of data to be buffered is known to be 1735 * smaller than that. 1736 * But in order to prevent involuntary self-denial of service, 1737 * the requested input size is clamped at MAPSIZE. 1738 */ 1739 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol)); 1740 iol = kmem_alloc(initial_size, KM_SLEEP); 1741 list_insert_head(iolhead, iol); 1742 iol->piol_usedsize = 0; 1743 iol->piol_size = initial_size; 1744 } 1745 1746 void * 1747 pr_iol_newbuf(list_t *iolhead, size_t itemsize) 1748 { 1749 piol_t *iol; 1750 char *new; 1751 1752 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1753 ASSERT(list_head(iolhead) != NULL); 1754 1755 iol = (piol_t *)list_tail(iolhead); 1756 1757 if (iol->piol_size < 1758 iol->piol_usedsize + sizeof (*iol) + itemsize) { 1759 /* 1760 * Out of space in the current buffer. Allocate more. 1761 */ 1762 piol_t *newiol; 1763 1764 newiol = kmem_alloc(MAPSIZE, KM_SLEEP); 1765 newiol->piol_size = MAPSIZE; 1766 newiol->piol_usedsize = 0; 1767 1768 list_insert_after(iolhead, iol, newiol); 1769 iol = list_next(iolhead, iol); 1770 ASSERT(iol == newiol); 1771 } 1772 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize; 1773 iol->piol_usedsize += itemsize; 1774 bzero(new, itemsize); 1775 return (new); 1776 } 1777 1778 void 1779 pr_iol_freelist(list_t *iolhead) 1780 { 1781 piol_t *iol; 1782 1783 while ((iol = list_head(iolhead)) != NULL) { 1784 list_remove(iolhead, iol); 1785 kmem_free(iol, iol->piol_size); 1786 } 1787 list_destroy(iolhead); 1788 } 1789 1790 int 1791 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin) 1792 { 1793 int error = errin; 1794 piol_t *iol; 1795 1796 while ((iol = list_head(iolhead)) != NULL) { 1797 list_remove(iolhead, iol); 1798 if (!error) { 1799 if (copyout(PIOL_DATABUF(iol), *tgt, 1800 iol->piol_usedsize)) 1801 error = EFAULT; 1802 *tgt += iol->piol_usedsize; 1803 } 1804 kmem_free(iol, iol->piol_size); 1805 } 1806 list_destroy(iolhead); 1807 1808 return (error); 1809 } 1810 1811 int 1812 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin) 1813 { 1814 offset_t off = uiop->uio_offset; 1815 char *base; 1816 size_t size; 1817 piol_t *iol; 1818 int error = errin; 1819 1820 while ((iol = list_head(iolhead)) != NULL) { 1821 list_remove(iolhead, iol); 1822 base = PIOL_DATABUF(iol); 1823 size = iol->piol_usedsize; 1824 if (off <= size && error == 0 && uiop->uio_resid > 0) 1825 error = uiomove(base + off, size - off, 1826 UIO_READ, uiop); 1827 off = MAX(0, off - (offset_t)size); 1828 kmem_free(iol, iol->piol_size); 1829 } 1830 list_destroy(iolhead); 1831 1832 return (error); 1833 } 1834 1835 /* 1836 * Return an array of structures with memory map information. 1837 * We allocate here; the caller must deallocate. 1838 */ 1839 int 1840 prgetmap(proc_t *p, int reserved, list_t *iolhead) 1841 { 1842 struct as *as = p->p_as; 1843 prmap_t *mp; 1844 struct seg *seg; 1845 struct seg *brkseg, *stkseg; 1846 struct vnode *vp; 1847 struct vattr vattr; 1848 uint_t prot; 1849 1850 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1851 1852 /* 1853 * Request an initial buffer size that doesn't waste memory 1854 * if the address space has only a small number of segments. 1855 */ 1856 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1857 1858 if ((seg = AS_SEGFIRST(as)) == NULL) 1859 return (0); 1860 1861 brkseg = break_seg(p); 1862 stkseg = as_segat(as, prgetstackbase(p)); 1863 1864 do { 1865 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1866 caddr_t saddr, naddr; 1867 void *tmp = NULL; 1868 1869 if ((seg->s_flags & S_HOLE) != 0) { 1870 continue; 1871 } 1872 1873 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1874 prot = pr_getprot(seg, reserved, &tmp, 1875 &saddr, &naddr, eaddr); 1876 if (saddr == naddr) 1877 continue; 1878 1879 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1880 1881 mp->pr_vaddr = (uintptr_t)saddr; 1882 mp->pr_size = naddr - saddr; 1883 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1884 mp->pr_mflags = 0; 1885 if (prot & PROT_READ) 1886 mp->pr_mflags |= MA_READ; 1887 if (prot & PROT_WRITE) 1888 mp->pr_mflags |= MA_WRITE; 1889 if (prot & PROT_EXEC) 1890 mp->pr_mflags |= MA_EXEC; 1891 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1892 mp->pr_mflags |= MA_SHARED; 1893 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1894 mp->pr_mflags |= MA_NORESERVE; 1895 if (seg->s_ops == &segspt_shmops || 1896 (seg->s_ops == &segvn_ops && 1897 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1898 mp->pr_mflags |= MA_ANON; 1899 if (seg == brkseg) 1900 mp->pr_mflags |= MA_BREAK; 1901 else if (seg == stkseg) { 1902 mp->pr_mflags |= MA_STACK; 1903 if (reserved) { 1904 size_t maxstack = 1905 ((size_t)p->p_stk_ctl + 1906 PAGEOFFSET) & PAGEMASK; 1907 mp->pr_vaddr = 1908 (uintptr_t)prgetstackbase(p) + 1909 p->p_stksize - maxstack; 1910 mp->pr_size = (uintptr_t)naddr - 1911 mp->pr_vaddr; 1912 } 1913 } 1914 if (seg->s_ops == &segspt_shmops) 1915 mp->pr_mflags |= MA_ISM | MA_SHM; 1916 mp->pr_pagesize = PAGESIZE; 1917 1918 /* 1919 * Manufacture a filename for the "object" directory. 1920 */ 1921 vattr.va_mask = AT_FSID|AT_NODEID; 1922 if (seg->s_ops == &segvn_ops && 1923 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1924 vp != NULL && vp->v_type == VREG && 1925 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 1926 if (vp == p->p_exec) 1927 (void) strcpy(mp->pr_mapname, "a.out"); 1928 else 1929 pr_object_name(mp->pr_mapname, 1930 vp, &vattr); 1931 } 1932 1933 /* 1934 * Get the SysV shared memory id, if any. 1935 */ 1936 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1937 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1938 SHMID_NONE) { 1939 if (mp->pr_shmid == SHMID_FREE) 1940 mp->pr_shmid = -1; 1941 1942 mp->pr_mflags |= MA_SHM; 1943 } else { 1944 mp->pr_shmid = -1; 1945 } 1946 } 1947 ASSERT(tmp == NULL); 1948 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1949 1950 return (0); 1951 } 1952 1953 #ifdef _SYSCALL32_IMPL 1954 int 1955 prgetmap32(proc_t *p, int reserved, list_t *iolhead) 1956 { 1957 struct as *as = p->p_as; 1958 prmap32_t *mp; 1959 struct seg *seg; 1960 struct seg *brkseg, *stkseg; 1961 struct vnode *vp; 1962 struct vattr vattr; 1963 uint_t prot; 1964 1965 ASSERT(as != &kas && AS_WRITE_HELD(as)); 1966 1967 /* 1968 * Request an initial buffer size that doesn't waste memory 1969 * if the address space has only a small number of segments. 1970 */ 1971 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1972 1973 if ((seg = AS_SEGFIRST(as)) == NULL) 1974 return (0); 1975 1976 brkseg = break_seg(p); 1977 stkseg = as_segat(as, prgetstackbase(p)); 1978 1979 do { 1980 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1981 caddr_t saddr, naddr; 1982 void *tmp = NULL; 1983 1984 if ((seg->s_flags & S_HOLE) != 0) { 1985 continue; 1986 } 1987 1988 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1989 prot = pr_getprot(seg, reserved, &tmp, 1990 &saddr, &naddr, eaddr); 1991 if (saddr == naddr) 1992 continue; 1993 1994 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1995 1996 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 1997 mp->pr_size = (size32_t)(naddr - saddr); 1998 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1999 mp->pr_mflags = 0; 2000 if (prot & PROT_READ) 2001 mp->pr_mflags |= MA_READ; 2002 if (prot & PROT_WRITE) 2003 mp->pr_mflags |= MA_WRITE; 2004 if (prot & PROT_EXEC) 2005 mp->pr_mflags |= MA_EXEC; 2006 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2007 mp->pr_mflags |= MA_SHARED; 2008 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2009 mp->pr_mflags |= MA_NORESERVE; 2010 if (seg->s_ops == &segspt_shmops || 2011 (seg->s_ops == &segvn_ops && 2012 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2013 mp->pr_mflags |= MA_ANON; 2014 if (seg == brkseg) 2015 mp->pr_mflags |= MA_BREAK; 2016 else if (seg == stkseg) { 2017 mp->pr_mflags |= MA_STACK; 2018 if (reserved) { 2019 size_t maxstack = 2020 ((size_t)p->p_stk_ctl + 2021 PAGEOFFSET) & PAGEMASK; 2022 uintptr_t vaddr = 2023 (uintptr_t)prgetstackbase(p) + 2024 p->p_stksize - maxstack; 2025 mp->pr_vaddr = (caddr32_t)vaddr; 2026 mp->pr_size = (size32_t) 2027 ((uintptr_t)naddr - vaddr); 2028 } 2029 } 2030 if (seg->s_ops == &segspt_shmops) 2031 mp->pr_mflags |= MA_ISM | MA_SHM; 2032 mp->pr_pagesize = PAGESIZE; 2033 2034 /* 2035 * Manufacture a filename for the "object" directory. 2036 */ 2037 vattr.va_mask = AT_FSID|AT_NODEID; 2038 if (seg->s_ops == &segvn_ops && 2039 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2040 vp != NULL && vp->v_type == VREG && 2041 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2042 if (vp == p->p_exec) 2043 (void) strcpy(mp->pr_mapname, "a.out"); 2044 else 2045 pr_object_name(mp->pr_mapname, 2046 vp, &vattr); 2047 } 2048 2049 /* 2050 * Get the SysV shared memory id, if any. 2051 */ 2052 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 2053 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 2054 SHMID_NONE) { 2055 if (mp->pr_shmid == SHMID_FREE) 2056 mp->pr_shmid = -1; 2057 2058 mp->pr_mflags |= MA_SHM; 2059 } else { 2060 mp->pr_shmid = -1; 2061 } 2062 } 2063 ASSERT(tmp == NULL); 2064 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2065 2066 return (0); 2067 } 2068 #endif /* _SYSCALL32_IMPL */ 2069 2070 /* 2071 * Return the size of the /proc page data file. 2072 */ 2073 size_t 2074 prpdsize(struct as *as) 2075 { 2076 struct seg *seg; 2077 size_t size; 2078 2079 ASSERT(as != &kas && AS_WRITE_HELD(as)); 2080 2081 if ((seg = AS_SEGFIRST(as)) == NULL) 2082 return (0); 2083 2084 size = sizeof (prpageheader_t); 2085 do { 2086 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2087 caddr_t saddr, naddr; 2088 void *tmp = NULL; 2089 size_t npage; 2090 2091 if ((seg->s_flags & S_HOLE) != 0) { 2092 continue; 2093 } 2094 2095 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2096 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2097 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 2098 size += sizeof (prasmap_t) + round8(npage); 2099 } 2100 ASSERT(tmp == NULL); 2101 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2102 2103 return (size); 2104 } 2105 2106 #ifdef _SYSCALL32_IMPL 2107 size_t 2108 prpdsize32(struct as *as) 2109 { 2110 struct seg *seg; 2111 size_t size; 2112 2113 ASSERT(as != &kas && AS_WRITE_HELD(as)); 2114 2115 if ((seg = AS_SEGFIRST(as)) == NULL) 2116 return (0); 2117 2118 size = sizeof (prpageheader32_t); 2119 do { 2120 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2121 caddr_t saddr, naddr; 2122 void *tmp = NULL; 2123 size_t npage; 2124 2125 if ((seg->s_flags & S_HOLE) != 0) { 2126 continue; 2127 } 2128 2129 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2130 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2131 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 2132 size += sizeof (prasmap32_t) + round8(npage); 2133 } 2134 ASSERT(tmp == NULL); 2135 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2136 2137 return (size); 2138 } 2139 #endif /* _SYSCALL32_IMPL */ 2140 2141 /* 2142 * Read page data information. 2143 */ 2144 int 2145 prpdread(proc_t *p, uint_t hatid, struct uio *uiop) 2146 { 2147 struct as *as = p->p_as; 2148 caddr_t buf; 2149 size_t size; 2150 prpageheader_t *php; 2151 prasmap_t *pmp; 2152 struct seg *seg; 2153 int error; 2154 2155 again: 2156 AS_LOCK_ENTER(as, RW_WRITER); 2157 2158 if ((seg = AS_SEGFIRST(as)) == NULL) { 2159 AS_LOCK_EXIT(as); 2160 return (0); 2161 } 2162 size = prpdsize(as); 2163 if (uiop->uio_resid < size) { 2164 AS_LOCK_EXIT(as); 2165 return (E2BIG); 2166 } 2167 2168 buf = kmem_zalloc(size, KM_SLEEP); 2169 php = (prpageheader_t *)buf; 2170 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t)); 2171 2172 hrt2ts(gethrtime(), &php->pr_tstamp); 2173 php->pr_nmap = 0; 2174 php->pr_npage = 0; 2175 do { 2176 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2177 caddr_t saddr, naddr; 2178 void *tmp = NULL; 2179 2180 if ((seg->s_flags & S_HOLE) != 0) { 2181 continue; 2182 } 2183 2184 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2185 struct vnode *vp; 2186 struct vattr vattr; 2187 size_t len; 2188 size_t npage; 2189 uint_t prot; 2190 uintptr_t next; 2191 2192 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2193 if ((len = (size_t)(naddr - saddr)) == 0) 2194 continue; 2195 npage = len / PAGESIZE; 2196 next = (uintptr_t)(pmp + 1) + round8(npage); 2197 /* 2198 * It's possible that the address space can change 2199 * subtlely even though we're holding as->a_lock 2200 * due to the nondeterminism of page_exists() in 2201 * the presence of asychronously flushed pages or 2202 * mapped files whose sizes are changing. 2203 * page_exists() may be called indirectly from 2204 * pr_getprot() by a SEGOP_INCORE() routine. 2205 * If this happens we need to make sure we don't 2206 * overrun the buffer whose size we computed based 2207 * on the initial iteration through the segments. 2208 * Once we've detected an overflow, we need to clean 2209 * up the temporary memory allocated in pr_getprot() 2210 * and retry. If there's a pending signal, we return 2211 * EINTR so that this thread can be dislodged if 2212 * a latent bug causes us to spin indefinitely. 2213 */ 2214 if (next > (uintptr_t)buf + size) { 2215 pr_getprot_done(&tmp); 2216 AS_LOCK_EXIT(as); 2217 2218 kmem_free(buf, size); 2219 2220 if (ISSIG(curthread, JUSTLOOKING)) 2221 return (EINTR); 2222 2223 goto again; 2224 } 2225 2226 php->pr_nmap++; 2227 php->pr_npage += npage; 2228 pmp->pr_vaddr = (uintptr_t)saddr; 2229 pmp->pr_npage = npage; 2230 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2231 pmp->pr_mflags = 0; 2232 if (prot & PROT_READ) 2233 pmp->pr_mflags |= MA_READ; 2234 if (prot & PROT_WRITE) 2235 pmp->pr_mflags |= MA_WRITE; 2236 if (prot & PROT_EXEC) 2237 pmp->pr_mflags |= MA_EXEC; 2238 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2239 pmp->pr_mflags |= MA_SHARED; 2240 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2241 pmp->pr_mflags |= MA_NORESERVE; 2242 if (seg->s_ops == &segspt_shmops || 2243 (seg->s_ops == &segvn_ops && 2244 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2245 pmp->pr_mflags |= MA_ANON; 2246 if (seg->s_ops == &segspt_shmops) 2247 pmp->pr_mflags |= MA_ISM | MA_SHM; 2248 pmp->pr_pagesize = PAGESIZE; 2249 /* 2250 * Manufacture a filename for the "object" directory. 2251 */ 2252 vattr.va_mask = AT_FSID|AT_NODEID; 2253 if (seg->s_ops == &segvn_ops && 2254 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2255 vp != NULL && vp->v_type == VREG && 2256 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2257 if (vp == p->p_exec) 2258 (void) strcpy(pmp->pr_mapname, "a.out"); 2259 else 2260 pr_object_name(pmp->pr_mapname, 2261 vp, &vattr); 2262 } 2263 2264 /* 2265 * Get the SysV shared memory id, if any. 2266 */ 2267 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2268 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2269 SHMID_NONE) { 2270 if (pmp->pr_shmid == SHMID_FREE) 2271 pmp->pr_shmid = -1; 2272 2273 pmp->pr_mflags |= MA_SHM; 2274 } else { 2275 pmp->pr_shmid = -1; 2276 } 2277 2278 hat_getstat(as, saddr, len, hatid, 2279 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2280 pmp = (prasmap_t *)next; 2281 } 2282 ASSERT(tmp == NULL); 2283 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2284 2285 AS_LOCK_EXIT(as); 2286 2287 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2288 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2289 kmem_free(buf, size); 2290 2291 return (error); 2292 } 2293 2294 #ifdef _SYSCALL32_IMPL 2295 int 2296 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop) 2297 { 2298 struct as *as = p->p_as; 2299 caddr_t buf; 2300 size_t size; 2301 prpageheader32_t *php; 2302 prasmap32_t *pmp; 2303 struct seg *seg; 2304 int error; 2305 2306 again: 2307 AS_LOCK_ENTER(as, RW_WRITER); 2308 2309 if ((seg = AS_SEGFIRST(as)) == NULL) { 2310 AS_LOCK_EXIT(as); 2311 return (0); 2312 } 2313 size = prpdsize32(as); 2314 if (uiop->uio_resid < size) { 2315 AS_LOCK_EXIT(as); 2316 return (E2BIG); 2317 } 2318 2319 buf = kmem_zalloc(size, KM_SLEEP); 2320 php = (prpageheader32_t *)buf; 2321 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t)); 2322 2323 hrt2ts32(gethrtime(), &php->pr_tstamp); 2324 php->pr_nmap = 0; 2325 php->pr_npage = 0; 2326 do { 2327 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2328 caddr_t saddr, naddr; 2329 void *tmp = NULL; 2330 2331 if ((seg->s_flags & S_HOLE) != 0) { 2332 continue; 2333 } 2334 2335 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2336 struct vnode *vp; 2337 struct vattr vattr; 2338 size_t len; 2339 size_t npage; 2340 uint_t prot; 2341 uintptr_t next; 2342 2343 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2344 if ((len = (size_t)(naddr - saddr)) == 0) 2345 continue; 2346 npage = len / PAGESIZE; 2347 next = (uintptr_t)(pmp + 1) + round8(npage); 2348 /* 2349 * It's possible that the address space can change 2350 * subtlely even though we're holding as->a_lock 2351 * due to the nondeterminism of page_exists() in 2352 * the presence of asychronously flushed pages or 2353 * mapped files whose sizes are changing. 2354 * page_exists() may be called indirectly from 2355 * pr_getprot() by a SEGOP_INCORE() routine. 2356 * If this happens we need to make sure we don't 2357 * overrun the buffer whose size we computed based 2358 * on the initial iteration through the segments. 2359 * Once we've detected an overflow, we need to clean 2360 * up the temporary memory allocated in pr_getprot() 2361 * and retry. If there's a pending signal, we return 2362 * EINTR so that this thread can be dislodged if 2363 * a latent bug causes us to spin indefinitely. 2364 */ 2365 if (next > (uintptr_t)buf + size) { 2366 pr_getprot_done(&tmp); 2367 AS_LOCK_EXIT(as); 2368 2369 kmem_free(buf, size); 2370 2371 if (ISSIG(curthread, JUSTLOOKING)) 2372 return (EINTR); 2373 2374 goto again; 2375 } 2376 2377 php->pr_nmap++; 2378 php->pr_npage += npage; 2379 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 2380 pmp->pr_npage = (size32_t)npage; 2381 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2382 pmp->pr_mflags = 0; 2383 if (prot & PROT_READ) 2384 pmp->pr_mflags |= MA_READ; 2385 if (prot & PROT_WRITE) 2386 pmp->pr_mflags |= MA_WRITE; 2387 if (prot & PROT_EXEC) 2388 pmp->pr_mflags |= MA_EXEC; 2389 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2390 pmp->pr_mflags |= MA_SHARED; 2391 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2392 pmp->pr_mflags |= MA_NORESERVE; 2393 if (seg->s_ops == &segspt_shmops || 2394 (seg->s_ops == &segvn_ops && 2395 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2396 pmp->pr_mflags |= MA_ANON; 2397 if (seg->s_ops == &segspt_shmops) 2398 pmp->pr_mflags |= MA_ISM | MA_SHM; 2399 pmp->pr_pagesize = PAGESIZE; 2400 /* 2401 * Manufacture a filename for the "object" directory. 2402 */ 2403 vattr.va_mask = AT_FSID|AT_NODEID; 2404 if (seg->s_ops == &segvn_ops && 2405 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2406 vp != NULL && vp->v_type == VREG && 2407 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2408 if (vp == p->p_exec) 2409 (void) strcpy(pmp->pr_mapname, "a.out"); 2410 else 2411 pr_object_name(pmp->pr_mapname, 2412 vp, &vattr); 2413 } 2414 2415 /* 2416 * Get the SysV shared memory id, if any. 2417 */ 2418 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2419 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2420 SHMID_NONE) { 2421 if (pmp->pr_shmid == SHMID_FREE) 2422 pmp->pr_shmid = -1; 2423 2424 pmp->pr_mflags |= MA_SHM; 2425 } else { 2426 pmp->pr_shmid = -1; 2427 } 2428 2429 hat_getstat(as, saddr, len, hatid, 2430 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2431 pmp = (prasmap32_t *)next; 2432 } 2433 ASSERT(tmp == NULL); 2434 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2435 2436 AS_LOCK_EXIT(as); 2437 2438 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2439 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2440 kmem_free(buf, size); 2441 2442 return (error); 2443 } 2444 #endif /* _SYSCALL32_IMPL */ 2445 2446 ushort_t 2447 prgetpctcpu(uint64_t pct) 2448 { 2449 /* 2450 * The value returned will be relevant in the zone of the examiner, 2451 * which may not be the same as the zone which performed the procfs 2452 * mount. 2453 */ 2454 int nonline = zone_ncpus_online_get(curproc->p_zone); 2455 2456 /* 2457 * Prorate over online cpus so we don't exceed 100% 2458 */ 2459 if (nonline > 1) 2460 pct /= nonline; 2461 pct >>= 16; /* convert to 16-bit scaled integer */ 2462 if (pct > 0x8000) /* might happen, due to rounding */ 2463 pct = 0x8000; 2464 return ((ushort_t)pct); 2465 } 2466 2467 /* 2468 * Return information used by ps(1). 2469 */ 2470 void 2471 prgetpsinfo(proc_t *p, psinfo_t *psp) 2472 { 2473 kthread_t *t; 2474 struct cred *cred; 2475 hrtime_t hrutime, hrstime; 2476 2477 ASSERT(MUTEX_HELD(&p->p_lock)); 2478 2479 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2480 bzero(psp, sizeof (*psp)); 2481 else { 2482 thread_unlock(t); 2483 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2484 } 2485 2486 /* 2487 * only export SSYS and SMSACCT; everything else is off-limits to 2488 * userland apps. 2489 */ 2490 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2491 psp->pr_nlwp = p->p_lwpcnt; 2492 psp->pr_nzomb = p->p_zombcnt; 2493 mutex_enter(&p->p_crlock); 2494 cred = p->p_cred; 2495 psp->pr_uid = crgetruid(cred); 2496 psp->pr_euid = crgetuid(cred); 2497 psp->pr_gid = crgetrgid(cred); 2498 psp->pr_egid = crgetgid(cred); 2499 mutex_exit(&p->p_crlock); 2500 psp->pr_pid = p->p_pid; 2501 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2502 (p->p_flag & SZONETOP)) { 2503 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2504 /* 2505 * Inside local zones, fake zsched's pid as parent pids for 2506 * processes which reference processes outside of the zone. 2507 */ 2508 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2509 } else { 2510 psp->pr_ppid = p->p_ppid; 2511 } 2512 psp->pr_pgid = p->p_pgrp; 2513 psp->pr_sid = p->p_sessp->s_sid; 2514 psp->pr_taskid = p->p_task->tk_tkid; 2515 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2516 psp->pr_poolid = p->p_pool->pool_id; 2517 psp->pr_zoneid = p->p_zone->zone_id; 2518 if ((psp->pr_contract = PRCTID(p)) == 0) 2519 psp->pr_contract = -1; 2520 psp->pr_addr = (uintptr_t)prgetpsaddr(p); 2521 switch (p->p_model) { 2522 case DATAMODEL_ILP32: 2523 psp->pr_dmodel = PR_MODEL_ILP32; 2524 break; 2525 case DATAMODEL_LP64: 2526 psp->pr_dmodel = PR_MODEL_LP64; 2527 break; 2528 } 2529 hrutime = mstate_aggr_state(p, LMS_USER); 2530 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2531 hrt2ts((hrutime + hrstime), &psp->pr_time); 2532 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2533 2534 if (t == NULL) { 2535 int wcode = p->p_wcode; /* must be atomic read */ 2536 2537 if (wcode) 2538 psp->pr_wstat = wstat(wcode, p->p_wdata); 2539 psp->pr_ttydev = PRNODEV; 2540 psp->pr_lwp.pr_state = SZOMB; 2541 psp->pr_lwp.pr_sname = 'Z'; 2542 psp->pr_lwp.pr_bindpro = PBIND_NONE; 2543 psp->pr_lwp.pr_bindpset = PS_NONE; 2544 } else { 2545 user_t *up = PTOU(p); 2546 struct as *as; 2547 dev_t d; 2548 extern dev_t rwsconsdev, rconsdev, uconsdev; 2549 2550 d = cttydev(p); 2551 /* 2552 * If the controlling terminal is the real 2553 * or workstation console device, map to what the 2554 * user thinks is the console device. Handle case when 2555 * rwsconsdev or rconsdev is set to NODEV for Starfire. 2556 */ 2557 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 2558 d = uconsdev; 2559 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d; 2560 psp->pr_start = up->u_start; 2561 bcopy(up->u_comm, psp->pr_fname, 2562 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2563 bcopy(up->u_psargs, psp->pr_psargs, 2564 MIN(PRARGSZ-1, PSARGSZ)); 2565 psp->pr_argc = up->u_argc; 2566 psp->pr_argv = up->u_argv; 2567 psp->pr_envp = up->u_envp; 2568 2569 /* get the chosen lwp's lwpsinfo */ 2570 prgetlwpsinfo(t, &psp->pr_lwp); 2571 2572 /* compute %cpu for the process */ 2573 if (p->p_lwpcnt == 1) 2574 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2575 else { 2576 uint64_t pct = 0; 2577 hrtime_t cur_time = gethrtime_unscaled(); 2578 2579 t = p->p_tlist; 2580 do { 2581 pct += cpu_update_pct(t, cur_time); 2582 } while ((t = t->t_forw) != p->p_tlist); 2583 2584 psp->pr_pctcpu = prgetpctcpu(pct); 2585 } 2586 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2587 psp->pr_size = 0; 2588 psp->pr_rssize = 0; 2589 } else { 2590 mutex_exit(&p->p_lock); 2591 AS_LOCK_ENTER(as, RW_READER); 2592 psp->pr_size = btopr(as->a_resvsize) * 2593 (PAGESIZE / 1024); 2594 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024); 2595 psp->pr_pctmem = rm_pctmemory(as); 2596 AS_LOCK_EXIT(as); 2597 mutex_enter(&p->p_lock); 2598 } 2599 } 2600 } 2601 2602 static size_t 2603 prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen) 2604 { 2605 pr_misc_header_t *misc; 2606 size_t len; 2607 2608 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen); 2609 2610 if (data != NULL) { 2611 misc = pr_iol_newbuf(data, len); 2612 misc->pr_misc_type = type; 2613 misc->pr_misc_size = len; 2614 misc++; 2615 bcopy((char *)val, (char *)misc, vlen); 2616 } 2617 2618 return (len); 2619 } 2620 2621 /* 2622 * There's no elegant way to determine if a character device 2623 * supports TLI, so just check a hardcoded list of known TLI 2624 * devices. 2625 */ 2626 2627 static boolean_t 2628 pristli(vnode_t *vp) 2629 { 2630 static const char *tlidevs[] = { 2631 "udp", "udp6", "tcp", "tcp6" 2632 }; 2633 char *devname; 2634 uint_t i; 2635 2636 ASSERT(vp != NULL); 2637 2638 if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0) 2639 return (B_FALSE); 2640 2641 if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL) 2642 return (B_FALSE); 2643 2644 for (i = 0; i < ARRAY_SIZE(tlidevs); i++) { 2645 if (strcmp(devname, tlidevs[i]) == 0) 2646 return (B_TRUE); 2647 } 2648 2649 return (B_FALSE); 2650 } 2651 2652 static size_t 2653 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred) 2654 { 2655 char *pathname; 2656 size_t pathlen; 2657 size_t sz = 0; 2658 2659 /* 2660 * The global zone's path to a file in a non-global zone can exceed 2661 * MAXPATHLEN. 2662 */ 2663 pathlen = MAXPATHLEN * 2 + 1; 2664 pathname = kmem_alloc(pathlen, KM_SLEEP); 2665 2666 if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) { 2667 sz += prfdinfomisc(data, PR_PATHNAME, 2668 pathname, strlen(pathname) + 1); 2669 } 2670 2671 kmem_free(pathname, pathlen); 2672 2673 return (sz); 2674 } 2675 2676 static size_t 2677 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred) 2678 { 2679 strcmd_t strcmd; 2680 int32_t rval; 2681 size_t sz = 0; 2682 2683 strcmd.sc_cmd = TI_GETMYNAME; 2684 strcmd.sc_timeout = 1; 2685 strcmd.sc_len = STRCMDBUFSIZE; 2686 2687 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred, 2688 &rval, NULL) == 0 && strcmd.sc_len > 0) { 2689 sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf, 2690 strcmd.sc_len); 2691 } 2692 2693 strcmd.sc_cmd = TI_GETPEERNAME; 2694 strcmd.sc_timeout = 1; 2695 strcmd.sc_len = STRCMDBUFSIZE; 2696 2697 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred, 2698 &rval, NULL) == 0 && strcmd.sc_len > 0) { 2699 sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf, 2700 strcmd.sc_len); 2701 } 2702 2703 return (sz); 2704 } 2705 2706 static size_t 2707 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred) 2708 { 2709 sonode_t *so; 2710 socklen_t vlen; 2711 size_t sz = 0; 2712 uint_t i; 2713 2714 if (vp->v_stream != NULL) { 2715 so = VTOSO(vp->v_stream->sd_vnode); 2716 2717 if (so->so_version == SOV_STREAM) 2718 so = NULL; 2719 } else { 2720 so = VTOSO(vp); 2721 } 2722 2723 if (so == NULL) 2724 return (0); 2725 2726 DTRACE_PROBE1(sonode, sonode_t *, so); 2727 2728 /* prmisc - PR_SOCKETNAME */ 2729 2730 struct sockaddr_storage buf; 2731 struct sockaddr *name = (struct sockaddr *)&buf; 2732 2733 vlen = sizeof (buf); 2734 if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0) 2735 sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen); 2736 2737 /* prmisc - PR_PEERSOCKNAME */ 2738 2739 vlen = sizeof (buf); 2740 if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0) 2741 sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen); 2742 2743 /* prmisc - PR_SOCKOPTS_BOOL_OPTS */ 2744 2745 static struct boolopt { 2746 int level; 2747 int opt; 2748 int bopt; 2749 } boolopts[] = { 2750 { SOL_SOCKET, SO_DEBUG, PR_SO_DEBUG }, 2751 { SOL_SOCKET, SO_REUSEADDR, PR_SO_REUSEADDR }, 2752 #ifdef SO_REUSEPORT 2753 /* SmartOS and OmniOS have SO_REUSEPORT */ 2754 { SOL_SOCKET, SO_REUSEPORT, PR_SO_REUSEPORT }, 2755 #endif 2756 { SOL_SOCKET, SO_KEEPALIVE, PR_SO_KEEPALIVE }, 2757 { SOL_SOCKET, SO_DONTROUTE, PR_SO_DONTROUTE }, 2758 { SOL_SOCKET, SO_BROADCAST, PR_SO_BROADCAST }, 2759 { SOL_SOCKET, SO_OOBINLINE, PR_SO_OOBINLINE }, 2760 { SOL_SOCKET, SO_DGRAM_ERRIND, PR_SO_DGRAM_ERRIND }, 2761 { SOL_SOCKET, SO_ALLZONES, PR_SO_ALLZONES }, 2762 { SOL_SOCKET, SO_MAC_EXEMPT, PR_SO_MAC_EXEMPT }, 2763 { SOL_SOCKET, SO_MAC_IMPLICIT, PR_SO_MAC_IMPLICIT }, 2764 { SOL_SOCKET, SO_EXCLBIND, PR_SO_EXCLBIND }, 2765 { SOL_SOCKET, SO_VRRP, PR_SO_VRRP }, 2766 { IPPROTO_UDP, UDP_NAT_T_ENDPOINT, 2767 PR_UDP_NAT_T_ENDPOINT } 2768 }; 2769 prsockopts_bool_opts_t opts; 2770 int val; 2771 2772 if (data != NULL) { 2773 opts.prsock_bool_opts = 0; 2774 2775 for (i = 0; i < ARRAY_SIZE(boolopts); i++) { 2776 vlen = sizeof (val); 2777 if (SOP_GETSOCKOPT(so, boolopts[i].level, 2778 boolopts[i].opt, &val, &vlen, 0, cred) == 0 && 2779 val != 0) { 2780 opts.prsock_bool_opts |= boolopts[i].bopt; 2781 } 2782 } 2783 } 2784 2785 sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts)); 2786 2787 /* prmisc - PR_SOCKOPT_LINGER */ 2788 2789 struct linger l; 2790 2791 vlen = sizeof (l); 2792 if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen, 2793 0, cred) == 0 && vlen > 0) { 2794 sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen); 2795 } 2796 2797 /* prmisc - PR_SOCKOPT_* int types */ 2798 2799 static struct sopt { 2800 int level; 2801 int opt; 2802 int bopt; 2803 } sopts[] = { 2804 { SOL_SOCKET, SO_TYPE, PR_SOCKOPT_TYPE }, 2805 { SOL_SOCKET, SO_SNDBUF, PR_SOCKOPT_SNDBUF }, 2806 { SOL_SOCKET, SO_RCVBUF, PR_SOCKOPT_RCVBUF } 2807 }; 2808 2809 for (i = 0; i < ARRAY_SIZE(sopts); i++) { 2810 vlen = sizeof (val); 2811 if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt, 2812 &val, &vlen, 0, cred) == 0 && vlen > 0) { 2813 sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen); 2814 } 2815 } 2816 2817 /* prmisc - PR_SOCKOPT_IP_NEXTHOP */ 2818 2819 in_addr_t nexthop_val; 2820 2821 vlen = sizeof (nexthop_val); 2822 if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP, 2823 &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) { 2824 sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP, 2825 &nexthop_val, vlen); 2826 } 2827 2828 /* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */ 2829 2830 struct sockaddr_in6 nexthop6_val; 2831 2832 vlen = sizeof (nexthop6_val); 2833 if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP, 2834 &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) { 2835 sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP, 2836 &nexthop6_val, vlen); 2837 } 2838 2839 /* prmisc - PR_SOCKOPT_TCP_CONGESTION */ 2840 2841 char cong[CC_ALGO_NAME_MAX]; 2842 2843 vlen = sizeof (cong); 2844 if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION, 2845 &cong, &vlen, 0, cred) == 0 && vlen > 0) { 2846 sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen); 2847 } 2848 2849 /* prmisc - PR_SOCKFILTERS_PRIV */ 2850 2851 struct fil_info fi; 2852 2853 vlen = sizeof (fi); 2854 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST, 2855 &fi, &vlen, 0, cred) == 0 && vlen != 0) { 2856 pr_misc_header_t *misc; 2857 size_t len; 2858 2859 /* 2860 * We limit the number of returned filters to 32. 2861 * This is the maximum number that pfiles will print 2862 * anyway. 2863 */ 2864 vlen = MIN(32, fi.fi_pos + 1); 2865 vlen *= sizeof (fi); 2866 2867 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen); 2868 sz += len; 2869 2870 if (data != NULL) { 2871 /* 2872 * So that the filter list can be built incrementally, 2873 * prfdinfomisc() is not used here. Instead we 2874 * allocate a buffer directly on the copyout list using 2875 * pr_iol_newbuf() 2876 */ 2877 misc = pr_iol_newbuf(data, len); 2878 misc->pr_misc_type = PR_SOCKFILTERS_PRIV; 2879 misc->pr_misc_size = len; 2880 misc++; 2881 len = vlen; 2882 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST, 2883 misc, &vlen, 0, cred) == 0) { 2884 /* 2885 * In case the number of filters has reduced 2886 * since the first call, explicitly zero out 2887 * any unpopulated space. 2888 */ 2889 if (vlen < len) 2890 bzero(misc + vlen, len - vlen); 2891 } else { 2892 /* Something went wrong, zero out the result */ 2893 bzero(misc, vlen); 2894 } 2895 } 2896 } 2897 2898 return (sz); 2899 } 2900 2901 typedef struct prfdinfo_nm_path_cbdata { 2902 proc_t *nmp_p; 2903 u_offset_t nmp_sz; 2904 list_t *nmp_data; 2905 } prfdinfo_nm_path_cbdata_t; 2906 2907 static int 2908 prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg) 2909 { 2910 prfdinfo_nm_path_cbdata_t *cb = arg; 2911 2912 cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred); 2913 2914 return (0); 2915 } 2916 2917 u_offset_t 2918 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred) 2919 { 2920 u_offset_t sz; 2921 2922 /* 2923 * All fdinfo files will be at least this big - 2924 * sizeof fdinfo struct + zero length trailer 2925 */ 2926 sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t); 2927 2928 /* Pathname */ 2929 switch (vp->v_type) { 2930 case VDOOR: { 2931 prfdinfo_nm_path_cbdata_t cb = { 2932 .nmp_p = p, 2933 .nmp_data = NULL, 2934 .nmp_sz = 0 2935 }; 2936 2937 (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); 2938 sz += cb.nmp_sz; 2939 break; 2940 } 2941 case VSOCK: 2942 break; 2943 default: 2944 sz += prfdinfopath(p, vp, NULL, cred); 2945 } 2946 2947 /* Socket options */ 2948 if (vp->v_type == VSOCK) 2949 sz += prfdinfosockopt(vp, NULL, cred); 2950 2951 /* TLI/XTI sockets */ 2952 if (pristli(vp)) 2953 sz += prfdinfotlisockopt(vp, NULL, cred); 2954 2955 return (sz); 2956 } 2957 2958 int 2959 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred, 2960 cred_t *file_cred, list_t *data) 2961 { 2962 vattr_t vattr; 2963 int error; 2964 2965 /* 2966 * The buffer has been initialised to zero by pr_iol_newbuf(). 2967 * Initialise defaults for any values that should not default to zero. 2968 */ 2969 fdinfo->pr_uid = (uid_t)-1; 2970 fdinfo->pr_gid = (gid_t)-1; 2971 fdinfo->pr_size = -1; 2972 fdinfo->pr_locktype = F_UNLCK; 2973 fdinfo->pr_lockpid = -1; 2974 fdinfo->pr_locksysid = -1; 2975 fdinfo->pr_peerpid = -1; 2976 2977 /* Offset */ 2978 2979 /* 2980 * pr_offset has already been set from the underlying file_t. 2981 * Check if it is plausible and reset to -1 if not. 2982 */ 2983 if (fdinfo->pr_offset != -1 && 2984 VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0) 2985 fdinfo->pr_offset = -1; 2986 2987 /* 2988 * Attributes 2989 * 2990 * We have two cred_t structures available here. 2991 * 'cred' is the caller's credential, and 'file_cred' is the credential 2992 * for the file being inspected. 2993 * 2994 * When looking up the file attributes, file_cred is used in order 2995 * that the correct ownership is set for doors and FIFOs. Since the 2996 * caller has permission to read the fdinfo file in proc, this does 2997 * not expose any additional information. 2998 */ 2999 vattr.va_mask = AT_STAT; 3000 if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) { 3001 fdinfo->pr_major = getmajor(vattr.va_fsid); 3002 fdinfo->pr_minor = getminor(vattr.va_fsid); 3003 fdinfo->pr_rmajor = getmajor(vattr.va_rdev); 3004 fdinfo->pr_rminor = getminor(vattr.va_rdev); 3005 fdinfo->pr_ino = (ino64_t)vattr.va_nodeid; 3006 fdinfo->pr_size = (off64_t)vattr.va_size; 3007 fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode; 3008 fdinfo->pr_uid = vattr.va_uid; 3009 fdinfo->pr_gid = vattr.va_gid; 3010 if (vp->v_type == VSOCK) 3011 fdinfo->pr_fileflags |= sock_getfasync(vp); 3012 } 3013 3014 /* locks */ 3015 3016 flock64_t bf; 3017 3018 bzero(&bf, sizeof (bf)); 3019 bf.l_type = F_WRLCK; 3020 3021 if (VOP_FRLOCK(vp, F_GETLK, &bf, 3022 (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL, 3023 cred, NULL) == 0 && bf.l_type != F_UNLCK) { 3024 fdinfo->pr_locktype = bf.l_type; 3025 fdinfo->pr_lockpid = bf.l_pid; 3026 fdinfo->pr_locksysid = bf.l_sysid; 3027 } 3028 3029 /* peer cred */ 3030 3031 k_peercred_t kpc; 3032 3033 switch (vp->v_type) { 3034 case VFIFO: 3035 case VSOCK: { 3036 int32_t rval; 3037 3038 error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc, 3039 FKIOCTL, cred, &rval, NULL); 3040 break; 3041 } 3042 case VCHR: { 3043 struct strioctl strioc; 3044 int32_t rval; 3045 3046 if (vp->v_stream == NULL) { 3047 error = ENOTSUP; 3048 break; 3049 } 3050 strioc.ic_cmd = _I_GETPEERCRED; 3051 strioc.ic_timout = INFTIM; 3052 strioc.ic_len = (int)sizeof (k_peercred_t); 3053 strioc.ic_dp = (char *)&kpc; 3054 3055 error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL, 3056 STR_NOSIG | K_TO_K, cred, &rval); 3057 break; 3058 } 3059 default: 3060 error = ENOTSUP; 3061 break; 3062 } 3063 3064 if (error == 0 && kpc.pc_cr != NULL) { 3065 proc_t *peerp; 3066 3067 fdinfo->pr_peerpid = kpc.pc_cpid; 3068 3069 crfree(kpc.pc_cr); 3070 3071 mutex_enter(&pidlock); 3072 if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) { 3073 user_t *up; 3074 3075 mutex_enter(&peerp->p_lock); 3076 mutex_exit(&pidlock); 3077 3078 up = PTOU(peerp); 3079 bcopy(up->u_comm, fdinfo->pr_peername, 3080 MIN(sizeof (up->u_comm), 3081 sizeof (fdinfo->pr_peername) - 1)); 3082 3083 mutex_exit(&peerp->p_lock); 3084 } else { 3085 mutex_exit(&pidlock); 3086 } 3087 } 3088 3089 /* pathname */ 3090 3091 switch (vp->v_type) { 3092 case VDOOR: { 3093 prfdinfo_nm_path_cbdata_t cb = { 3094 .nmp_p = p, 3095 .nmp_data = data, 3096 .nmp_sz = 0 3097 }; 3098 3099 (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); 3100 break; 3101 } 3102 case VSOCK: 3103 /* 3104 * Don't attempt to determine the path for a socket as the 3105 * vnode has no associated v_path. It will cause a linear scan 3106 * of the dnlc table and result in no path being found. 3107 */ 3108 break; 3109 default: 3110 (void) prfdinfopath(p, vp, data, cred); 3111 } 3112 3113 /* socket options */ 3114 if (vp->v_type == VSOCK) 3115 (void) prfdinfosockopt(vp, data, cred); 3116 3117 /* TLI/XTI stream sockets */ 3118 if (pristli(vp)) 3119 (void) prfdinfotlisockopt(vp, data, cred); 3120 3121 /* 3122 * Add a terminating header with a zero size. 3123 */ 3124 pr_misc_header_t *misc; 3125 3126 misc = pr_iol_newbuf(data, sizeof (*misc)); 3127 misc->pr_misc_size = 0; 3128 misc->pr_misc_type = (uint_t)-1; 3129 3130 return (0); 3131 } 3132 3133 #ifdef _SYSCALL32_IMPL 3134 void 3135 prgetpsinfo32(proc_t *p, psinfo32_t *psp) 3136 { 3137 kthread_t *t; 3138 struct cred *cred; 3139 hrtime_t hrutime, hrstime; 3140 3141 ASSERT(MUTEX_HELD(&p->p_lock)); 3142 3143 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 3144 bzero(psp, sizeof (*psp)); 3145 else { 3146 thread_unlock(t); 3147 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 3148 } 3149 3150 /* 3151 * only export SSYS and SMSACCT; everything else is off-limits to 3152 * userland apps. 3153 */ 3154 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 3155 psp->pr_nlwp = p->p_lwpcnt; 3156 psp->pr_nzomb = p->p_zombcnt; 3157 mutex_enter(&p->p_crlock); 3158 cred = p->p_cred; 3159 psp->pr_uid = crgetruid(cred); 3160 psp->pr_euid = crgetuid(cred); 3161 psp->pr_gid = crgetrgid(cred); 3162 psp->pr_egid = crgetgid(cred); 3163 mutex_exit(&p->p_crlock); 3164 psp->pr_pid = p->p_pid; 3165 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 3166 (p->p_flag & SZONETOP)) { 3167 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 3168 /* 3169 * Inside local zones, fake zsched's pid as parent pids for 3170 * processes which reference processes outside of the zone. 3171 */ 3172 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 3173 } else { 3174 psp->pr_ppid = p->p_ppid; 3175 } 3176 psp->pr_pgid = p->p_pgrp; 3177 psp->pr_sid = p->p_sessp->s_sid; 3178 psp->pr_taskid = p->p_task->tk_tkid; 3179 psp->pr_projid = p->p_task->tk_proj->kpj_id; 3180 psp->pr_poolid = p->p_pool->pool_id; 3181 psp->pr_zoneid = p->p_zone->zone_id; 3182 if ((psp->pr_contract = PRCTID(p)) == 0) 3183 psp->pr_contract = -1; 3184 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 3185 switch (p->p_model) { 3186 case DATAMODEL_ILP32: 3187 psp->pr_dmodel = PR_MODEL_ILP32; 3188 break; 3189 case DATAMODEL_LP64: 3190 psp->pr_dmodel = PR_MODEL_LP64; 3191 break; 3192 } 3193 hrutime = mstate_aggr_state(p, LMS_USER); 3194 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 3195 hrt2ts32(hrutime + hrstime, &psp->pr_time); 3196 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime); 3197 3198 if (t == NULL) { 3199 extern int wstat(int, int); /* needs a header file */ 3200 int wcode = p->p_wcode; /* must be atomic read */ 3201 3202 if (wcode) 3203 psp->pr_wstat = wstat(wcode, p->p_wdata); 3204 psp->pr_ttydev = PRNODEV32; 3205 psp->pr_lwp.pr_state = SZOMB; 3206 psp->pr_lwp.pr_sname = 'Z'; 3207 } else { 3208 user_t *up = PTOU(p); 3209 struct as *as; 3210 dev_t d; 3211 extern dev_t rwsconsdev, rconsdev, uconsdev; 3212 3213 d = cttydev(p); 3214 /* 3215 * If the controlling terminal is the real 3216 * or workstation console device, map to what the 3217 * user thinks is the console device. Handle case when 3218 * rwsconsdev or rconsdev is set to NODEV for Starfire. 3219 */ 3220 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 3221 d = uconsdev; 3222 (void) cmpldev(&psp->pr_ttydev, d); 3223 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start); 3224 bcopy(up->u_comm, psp->pr_fname, 3225 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 3226 bcopy(up->u_psargs, psp->pr_psargs, 3227 MIN(PRARGSZ-1, PSARGSZ)); 3228 psp->pr_argc = up->u_argc; 3229 psp->pr_argv = (caddr32_t)up->u_argv; 3230 psp->pr_envp = (caddr32_t)up->u_envp; 3231 3232 /* get the chosen lwp's lwpsinfo */ 3233 prgetlwpsinfo32(t, &psp->pr_lwp); 3234 3235 /* compute %cpu for the process */ 3236 if (p->p_lwpcnt == 1) 3237 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 3238 else { 3239 uint64_t pct = 0; 3240 hrtime_t cur_time; 3241 3242 t = p->p_tlist; 3243 cur_time = gethrtime_unscaled(); 3244 do { 3245 pct += cpu_update_pct(t, cur_time); 3246 } while ((t = t->t_forw) != p->p_tlist); 3247 3248 psp->pr_pctcpu = prgetpctcpu(pct); 3249 } 3250 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 3251 psp->pr_size = 0; 3252 psp->pr_rssize = 0; 3253 } else { 3254 mutex_exit(&p->p_lock); 3255 AS_LOCK_ENTER(as, RW_READER); 3256 psp->pr_size = (size32_t) 3257 (btopr(as->a_resvsize) * (PAGESIZE / 1024)); 3258 psp->pr_rssize = (size32_t) 3259 (rm_asrss(as) * (PAGESIZE / 1024)); 3260 psp->pr_pctmem = rm_pctmemory(as); 3261 AS_LOCK_EXIT(as); 3262 mutex_enter(&p->p_lock); 3263 } 3264 } 3265 3266 /* 3267 * If we are looking at an LP64 process, zero out 3268 * the fields that cannot be represented in ILP32. 3269 */ 3270 if (p->p_model != DATAMODEL_ILP32) { 3271 psp->pr_size = 0; 3272 psp->pr_rssize = 0; 3273 psp->pr_argv = 0; 3274 psp->pr_envp = 0; 3275 } 3276 } 3277 3278 #endif /* _SYSCALL32_IMPL */ 3279 3280 void 3281 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp) 3282 { 3283 klwp_t *lwp = ttolwp(t); 3284 sobj_ops_t *sobj; 3285 char c, state; 3286 uint64_t pct; 3287 int retval, niceval; 3288 hrtime_t hrutime, hrstime; 3289 3290 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 3291 3292 bzero(psp, sizeof (*psp)); 3293 3294 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 3295 psp->pr_lwpid = t->t_tid; 3296 psp->pr_addr = (uintptr_t)t; 3297 psp->pr_wchan = (uintptr_t)t->t_wchan; 3298 3299 /* map the thread state enum into a process state enum */ 3300 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 3301 switch (state) { 3302 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 3303 case TS_RUN: state = SRUN; c = 'R'; break; 3304 case TS_ONPROC: state = SONPROC; c = 'O'; break; 3305 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 3306 case TS_STOPPED: state = SSTOP; c = 'T'; break; 3307 case TS_WAIT: state = SWAIT; c = 'W'; break; 3308 default: state = 0; c = '?'; break; 3309 } 3310 psp->pr_state = state; 3311 psp->pr_sname = c; 3312 if ((sobj = t->t_sobj_ops) != NULL) 3313 psp->pr_stype = SOBJ_TYPE(sobj); 3314 retval = CL_DONICE(t, NULL, 0, &niceval); 3315 if (retval == 0) { 3316 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 3317 psp->pr_nice = niceval + NZERO; 3318 } 3319 psp->pr_syscall = t->t_sysnum; 3320 psp->pr_pri = t->t_pri; 3321 psp->pr_start.tv_sec = t->t_start; 3322 psp->pr_start.tv_nsec = 0L; 3323 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 3324 scalehrtime(&hrutime); 3325 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 3326 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 3327 scalehrtime(&hrstime); 3328 hrt2ts(hrutime + hrstime, &psp->pr_time); 3329 /* compute %cpu for the lwp */ 3330 pct = cpu_update_pct(t, gethrtime_unscaled()); 3331 psp->pr_pctcpu = prgetpctcpu(pct); 3332 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 3333 if (psp->pr_cpu > 99) 3334 psp->pr_cpu = 99; 3335 3336 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 3337 sizeof (psp->pr_clname) - 1); 3338 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 3339 psp->pr_onpro = t->t_cpu->cpu_id; 3340 psp->pr_bindpro = t->t_bind_cpu; 3341 psp->pr_bindpset = t->t_bind_pset; 3342 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 3343 } 3344 3345 #ifdef _SYSCALL32_IMPL 3346 void 3347 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp) 3348 { 3349 klwp_t *lwp = ttolwp(t); 3350 sobj_ops_t *sobj; 3351 char c, state; 3352 uint64_t pct; 3353 int retval, niceval; 3354 hrtime_t hrutime, hrstime; 3355 3356 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 3357 3358 bzero(psp, sizeof (*psp)); 3359 3360 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 3361 psp->pr_lwpid = t->t_tid; 3362 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 3363 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */ 3364 3365 /* map the thread state enum into a process state enum */ 3366 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 3367 switch (state) { 3368 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 3369 case TS_RUN: state = SRUN; c = 'R'; break; 3370 case TS_ONPROC: state = SONPROC; c = 'O'; break; 3371 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 3372 case TS_STOPPED: state = SSTOP; c = 'T'; break; 3373 case TS_WAIT: state = SWAIT; c = 'W'; break; 3374 default: state = 0; c = '?'; break; 3375 } 3376 psp->pr_state = state; 3377 psp->pr_sname = c; 3378 if ((sobj = t->t_sobj_ops) != NULL) 3379 psp->pr_stype = SOBJ_TYPE(sobj); 3380 retval = CL_DONICE(t, NULL, 0, &niceval); 3381 if (retval == 0) { 3382 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 3383 psp->pr_nice = niceval + NZERO; 3384 } else { 3385 psp->pr_oldpri = 0; 3386 psp->pr_nice = 0; 3387 } 3388 psp->pr_syscall = t->t_sysnum; 3389 psp->pr_pri = t->t_pri; 3390 psp->pr_start.tv_sec = (time32_t)t->t_start; 3391 psp->pr_start.tv_nsec = 0L; 3392 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 3393 scalehrtime(&hrutime); 3394 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 3395 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 3396 scalehrtime(&hrstime); 3397 hrt2ts32(hrutime + hrstime, &psp->pr_time); 3398 /* compute %cpu for the lwp */ 3399 pct = cpu_update_pct(t, gethrtime_unscaled()); 3400 psp->pr_pctcpu = prgetpctcpu(pct); 3401 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 3402 if (psp->pr_cpu > 99) 3403 psp->pr_cpu = 99; 3404 3405 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 3406 sizeof (psp->pr_clname) - 1); 3407 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 3408 psp->pr_onpro = t->t_cpu->cpu_id; 3409 psp->pr_bindpro = t->t_bind_cpu; 3410 psp->pr_bindpset = t->t_bind_pset; 3411 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 3412 } 3413 #endif /* _SYSCALL32_IMPL */ 3414 3415 #ifdef _SYSCALL32_IMPL 3416 3417 #define PR_COPY_FIELD(s, d, field) d->field = s->field 3418 3419 #define PR_COPY_FIELD_ILP32(s, d, field) \ 3420 if (s->pr_dmodel == PR_MODEL_ILP32) { \ 3421 d->field = s->field; \ 3422 } 3423 3424 #define PR_COPY_TIMESPEC(s, d, field) \ 3425 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field); 3426 3427 #define PR_COPY_BUF(s, d, field) \ 3428 bcopy(s->field, d->field, sizeof (d->field)); 3429 3430 #define PR_IGNORE_FIELD(s, d, field) 3431 3432 void 3433 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest) 3434 { 3435 bzero(dest, sizeof (*dest)); 3436 3437 PR_COPY_FIELD(src, dest, pr_flag); 3438 PR_COPY_FIELD(src, dest, pr_lwpid); 3439 PR_IGNORE_FIELD(src, dest, pr_addr); 3440 PR_IGNORE_FIELD(src, dest, pr_wchan); 3441 PR_COPY_FIELD(src, dest, pr_stype); 3442 PR_COPY_FIELD(src, dest, pr_state); 3443 PR_COPY_FIELD(src, dest, pr_sname); 3444 PR_COPY_FIELD(src, dest, pr_nice); 3445 PR_COPY_FIELD(src, dest, pr_syscall); 3446 PR_COPY_FIELD(src, dest, pr_oldpri); 3447 PR_COPY_FIELD(src, dest, pr_cpu); 3448 PR_COPY_FIELD(src, dest, pr_pri); 3449 PR_COPY_FIELD(src, dest, pr_pctcpu); 3450 PR_COPY_TIMESPEC(src, dest, pr_start); 3451 PR_COPY_BUF(src, dest, pr_clname); 3452 PR_COPY_BUF(src, dest, pr_name); 3453 PR_COPY_FIELD(src, dest, pr_onpro); 3454 PR_COPY_FIELD(src, dest, pr_bindpro); 3455 PR_COPY_FIELD(src, dest, pr_bindpset); 3456 PR_COPY_FIELD(src, dest, pr_lgrp); 3457 } 3458 3459 void 3460 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest) 3461 { 3462 bzero(dest, sizeof (*dest)); 3463 3464 PR_COPY_FIELD(src, dest, pr_flag); 3465 PR_COPY_FIELD(src, dest, pr_nlwp); 3466 PR_COPY_FIELD(src, dest, pr_pid); 3467 PR_COPY_FIELD(src, dest, pr_ppid); 3468 PR_COPY_FIELD(src, dest, pr_pgid); 3469 PR_COPY_FIELD(src, dest, pr_sid); 3470 PR_COPY_FIELD(src, dest, pr_uid); 3471 PR_COPY_FIELD(src, dest, pr_euid); 3472 PR_COPY_FIELD(src, dest, pr_gid); 3473 PR_COPY_FIELD(src, dest, pr_egid); 3474 PR_IGNORE_FIELD(src, dest, pr_addr); 3475 PR_COPY_FIELD_ILP32(src, dest, pr_size); 3476 PR_COPY_FIELD_ILP32(src, dest, pr_rssize); 3477 PR_COPY_FIELD(src, dest, pr_ttydev); 3478 PR_COPY_FIELD(src, dest, pr_pctcpu); 3479 PR_COPY_FIELD(src, dest, pr_pctmem); 3480 PR_COPY_TIMESPEC(src, dest, pr_start); 3481 PR_COPY_TIMESPEC(src, dest, pr_time); 3482 PR_COPY_TIMESPEC(src, dest, pr_ctime); 3483 PR_COPY_BUF(src, dest, pr_fname); 3484 PR_COPY_BUF(src, dest, pr_psargs); 3485 PR_COPY_FIELD(src, dest, pr_wstat); 3486 PR_COPY_FIELD(src, dest, pr_argc); 3487 PR_COPY_FIELD_ILP32(src, dest, pr_argv); 3488 PR_COPY_FIELD_ILP32(src, dest, pr_envp); 3489 PR_COPY_FIELD(src, dest, pr_dmodel); 3490 PR_COPY_FIELD(src, dest, pr_taskid); 3491 PR_COPY_FIELD(src, dest, pr_projid); 3492 PR_COPY_FIELD(src, dest, pr_nzomb); 3493 PR_COPY_FIELD(src, dest, pr_poolid); 3494 PR_COPY_FIELD(src, dest, pr_contract); 3495 PR_COPY_FIELD(src, dest, pr_poolid); 3496 PR_COPY_FIELD(src, dest, pr_poolid); 3497 3498 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp); 3499 } 3500 3501 #undef PR_COPY_FIELD 3502 #undef PR_COPY_FIELD_ILP32 3503 #undef PR_COPY_TIMESPEC 3504 #undef PR_COPY_BUF 3505 #undef PR_IGNORE_FIELD 3506 3507 #endif /* _SYSCALL32_IMPL */ 3508 3509 /* 3510 * This used to get called when microstate accounting was disabled but 3511 * microstate information was requested. Since Microstate accounting is on 3512 * regardless of the proc flags, this simply makes it appear to procfs that 3513 * microstate accounting is on. This is relatively meaningless since you 3514 * can't turn it off, but this is here for the sake of appearances. 3515 */ 3516 3517 /*ARGSUSED*/ 3518 void 3519 estimate_msacct(kthread_t *t, hrtime_t curtime) 3520 { 3521 proc_t *p; 3522 3523 if (t == NULL) 3524 return; 3525 3526 p = ttoproc(t); 3527 ASSERT(MUTEX_HELD(&p->p_lock)); 3528 3529 /* 3530 * A system process (p0) could be referenced if the thread is 3531 * in the process of exiting. Don't turn on microstate accounting 3532 * in that case. 3533 */ 3534 if (p->p_flag & SSYS) 3535 return; 3536 3537 /* 3538 * Loop through all the LWPs (kernel threads) in the process. 3539 */ 3540 t = p->p_tlist; 3541 do { 3542 t->t_proc_flag |= TP_MSACCT; 3543 } while ((t = t->t_forw) != p->p_tlist); 3544 3545 p->p_flag |= SMSACCT; /* set process-wide MSACCT */ 3546 } 3547 3548 /* 3549 * It's not really possible to disable microstate accounting anymore. 3550 * However, this routine simply turns off the ms accounting flags in a process 3551 * This way procfs can still pretend to turn microstate accounting on and 3552 * off for a process, but it actually doesn't do anything. This is 3553 * a neutered form of preemptive idiot-proofing. 3554 */ 3555 void 3556 disable_msacct(proc_t *p) 3557 { 3558 kthread_t *t; 3559 3560 ASSERT(MUTEX_HELD(&p->p_lock)); 3561 3562 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */ 3563 /* 3564 * Loop through all the LWPs (kernel threads) in the process. 3565 */ 3566 if ((t = p->p_tlist) != NULL) { 3567 do { 3568 /* clear per-thread flag */ 3569 t->t_proc_flag &= ~TP_MSACCT; 3570 } while ((t = t->t_forw) != p->p_tlist); 3571 } 3572 } 3573 3574 /* 3575 * Return resource usage information. 3576 */ 3577 void 3578 prgetusage(kthread_t *t, prhusage_t *pup) 3579 { 3580 klwp_t *lwp = ttolwp(t); 3581 hrtime_t *mstimep; 3582 struct mstate *ms = &lwp->lwp_mstate; 3583 int state; 3584 int i; 3585 hrtime_t curtime; 3586 hrtime_t waitrq; 3587 hrtime_t tmp1; 3588 3589 curtime = gethrtime_unscaled(); 3590 3591 pup->pr_lwpid = t->t_tid; 3592 pup->pr_count = 1; 3593 pup->pr_create = ms->ms_start; 3594 pup->pr_term = ms->ms_term; 3595 scalehrtime(&pup->pr_create); 3596 scalehrtime(&pup->pr_term); 3597 if (ms->ms_term == 0) { 3598 pup->pr_rtime = curtime - ms->ms_start; 3599 scalehrtime(&pup->pr_rtime); 3600 } else { 3601 pup->pr_rtime = ms->ms_term - ms->ms_start; 3602 scalehrtime(&pup->pr_rtime); 3603 } 3604 3605 3606 pup->pr_utime = ms->ms_acct[LMS_USER]; 3607 pup->pr_stime = ms->ms_acct[LMS_SYSTEM]; 3608 pup->pr_ttime = ms->ms_acct[LMS_TRAP]; 3609 pup->pr_tftime = ms->ms_acct[LMS_TFAULT]; 3610 pup->pr_dftime = ms->ms_acct[LMS_DFAULT]; 3611 pup->pr_kftime = ms->ms_acct[LMS_KFAULT]; 3612 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 3613 pup->pr_slptime = ms->ms_acct[LMS_SLEEP]; 3614 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 3615 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED]; 3616 3617 prscaleusage(pup); 3618 3619 /* 3620 * Adjust for time waiting in the dispatcher queue. 3621 */ 3622 waitrq = t->t_waitrq; /* hopefully atomic */ 3623 if (waitrq != 0) { 3624 if (waitrq > curtime) { 3625 curtime = gethrtime_unscaled(); 3626 } 3627 tmp1 = curtime - waitrq; 3628 scalehrtime(&tmp1); 3629 pup->pr_wtime += tmp1; 3630 curtime = waitrq; 3631 } 3632 3633 /* 3634 * Adjust for time spent in current microstate. 3635 */ 3636 if (ms->ms_state_start > curtime) { 3637 curtime = gethrtime_unscaled(); 3638 } 3639 3640 i = 0; 3641 do { 3642 switch (state = t->t_mstate) { 3643 case LMS_SLEEP: 3644 /* 3645 * Update the timer for the current sleep state. 3646 */ 3647 switch (state = ms->ms_prev) { 3648 case LMS_TFAULT: 3649 case LMS_DFAULT: 3650 case LMS_KFAULT: 3651 case LMS_USER_LOCK: 3652 break; 3653 default: 3654 state = LMS_SLEEP; 3655 break; 3656 } 3657 break; 3658 case LMS_TFAULT: 3659 case LMS_DFAULT: 3660 case LMS_KFAULT: 3661 case LMS_USER_LOCK: 3662 state = LMS_SYSTEM; 3663 break; 3664 } 3665 switch (state) { 3666 case LMS_USER: mstimep = &pup->pr_utime; break; 3667 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 3668 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 3669 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 3670 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 3671 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 3672 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 3673 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 3674 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 3675 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 3676 default: panic("prgetusage: unknown microstate"); 3677 } 3678 tmp1 = curtime - ms->ms_state_start; 3679 if (tmp1 < 0) { 3680 curtime = gethrtime_unscaled(); 3681 i++; 3682 continue; 3683 } 3684 scalehrtime(&tmp1); 3685 } while (tmp1 < 0 && i < MAX_ITERS_SPIN); 3686 3687 *mstimep += tmp1; 3688 3689 /* update pup timestamp */ 3690 pup->pr_tstamp = curtime; 3691 scalehrtime(&pup->pr_tstamp); 3692 3693 /* 3694 * Resource usage counters. 3695 */ 3696 pup->pr_minf = lwp->lwp_ru.minflt; 3697 pup->pr_majf = lwp->lwp_ru.majflt; 3698 pup->pr_nswap = lwp->lwp_ru.nswap; 3699 pup->pr_inblk = lwp->lwp_ru.inblock; 3700 pup->pr_oublk = lwp->lwp_ru.oublock; 3701 pup->pr_msnd = lwp->lwp_ru.msgsnd; 3702 pup->pr_mrcv = lwp->lwp_ru.msgrcv; 3703 pup->pr_sigs = lwp->lwp_ru.nsignals; 3704 pup->pr_vctx = lwp->lwp_ru.nvcsw; 3705 pup->pr_ictx = lwp->lwp_ru.nivcsw; 3706 pup->pr_sysc = lwp->lwp_ru.sysc; 3707 pup->pr_ioch = lwp->lwp_ru.ioch; 3708 } 3709 3710 /* 3711 * Convert ms_acct stats from unscaled high-res time to nanoseconds 3712 */ 3713 void 3714 prscaleusage(prhusage_t *usg) 3715 { 3716 scalehrtime(&usg->pr_utime); 3717 scalehrtime(&usg->pr_stime); 3718 scalehrtime(&usg->pr_ttime); 3719 scalehrtime(&usg->pr_tftime); 3720 scalehrtime(&usg->pr_dftime); 3721 scalehrtime(&usg->pr_kftime); 3722 scalehrtime(&usg->pr_ltime); 3723 scalehrtime(&usg->pr_slptime); 3724 scalehrtime(&usg->pr_wtime); 3725 scalehrtime(&usg->pr_stoptime); 3726 } 3727 3728 3729 /* 3730 * Sum resource usage information. 3731 */ 3732 void 3733 praddusage(kthread_t *t, prhusage_t *pup) 3734 { 3735 klwp_t *lwp = ttolwp(t); 3736 hrtime_t *mstimep; 3737 struct mstate *ms = &lwp->lwp_mstate; 3738 int state; 3739 int i; 3740 hrtime_t curtime; 3741 hrtime_t waitrq; 3742 hrtime_t tmp; 3743 prhusage_t conv; 3744 3745 curtime = gethrtime_unscaled(); 3746 3747 if (ms->ms_term == 0) { 3748 tmp = curtime - ms->ms_start; 3749 scalehrtime(&tmp); 3750 pup->pr_rtime += tmp; 3751 } else { 3752 tmp = ms->ms_term - ms->ms_start; 3753 scalehrtime(&tmp); 3754 pup->pr_rtime += tmp; 3755 } 3756 3757 conv.pr_utime = ms->ms_acct[LMS_USER]; 3758 conv.pr_stime = ms->ms_acct[LMS_SYSTEM]; 3759 conv.pr_ttime = ms->ms_acct[LMS_TRAP]; 3760 conv.pr_tftime = ms->ms_acct[LMS_TFAULT]; 3761 conv.pr_dftime = ms->ms_acct[LMS_DFAULT]; 3762 conv.pr_kftime = ms->ms_acct[LMS_KFAULT]; 3763 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 3764 conv.pr_slptime = ms->ms_acct[LMS_SLEEP]; 3765 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 3766 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED]; 3767 3768 prscaleusage(&conv); 3769 3770 pup->pr_utime += conv.pr_utime; 3771 pup->pr_stime += conv.pr_stime; 3772 pup->pr_ttime += conv.pr_ttime; 3773 pup->pr_tftime += conv.pr_tftime; 3774 pup->pr_dftime += conv.pr_dftime; 3775 pup->pr_kftime += conv.pr_kftime; 3776 pup->pr_ltime += conv.pr_ltime; 3777 pup->pr_slptime += conv.pr_slptime; 3778 pup->pr_wtime += conv.pr_wtime; 3779 pup->pr_stoptime += conv.pr_stoptime; 3780 3781 /* 3782 * Adjust for time waiting in the dispatcher queue. 3783 */ 3784 waitrq = t->t_waitrq; /* hopefully atomic */ 3785 if (waitrq != 0) { 3786 if (waitrq > curtime) { 3787 curtime = gethrtime_unscaled(); 3788 } 3789 tmp = curtime - waitrq; 3790 scalehrtime(&tmp); 3791 pup->pr_wtime += tmp; 3792 curtime = waitrq; 3793 } 3794 3795 /* 3796 * Adjust for time spent in current microstate. 3797 */ 3798 if (ms->ms_state_start > curtime) { 3799 curtime = gethrtime_unscaled(); 3800 } 3801 3802 i = 0; 3803 do { 3804 switch (state = t->t_mstate) { 3805 case LMS_SLEEP: 3806 /* 3807 * Update the timer for the current sleep state. 3808 */ 3809 switch (state = ms->ms_prev) { 3810 case LMS_TFAULT: 3811 case LMS_DFAULT: 3812 case LMS_KFAULT: 3813 case LMS_USER_LOCK: 3814 break; 3815 default: 3816 state = LMS_SLEEP; 3817 break; 3818 } 3819 break; 3820 case LMS_TFAULT: 3821 case LMS_DFAULT: 3822 case LMS_KFAULT: 3823 case LMS_USER_LOCK: 3824 state = LMS_SYSTEM; 3825 break; 3826 } 3827 switch (state) { 3828 case LMS_USER: mstimep = &pup->pr_utime; break; 3829 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 3830 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 3831 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 3832 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 3833 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 3834 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 3835 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 3836 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 3837 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 3838 default: panic("praddusage: unknown microstate"); 3839 } 3840 tmp = curtime - ms->ms_state_start; 3841 if (tmp < 0) { 3842 curtime = gethrtime_unscaled(); 3843 i++; 3844 continue; 3845 } 3846 scalehrtime(&tmp); 3847 } while (tmp < 0 && i < MAX_ITERS_SPIN); 3848 3849 *mstimep += tmp; 3850 3851 /* update pup timestamp */ 3852 pup->pr_tstamp = curtime; 3853 scalehrtime(&pup->pr_tstamp); 3854 3855 /* 3856 * Resource usage counters. 3857 */ 3858 pup->pr_minf += lwp->lwp_ru.minflt; 3859 pup->pr_majf += lwp->lwp_ru.majflt; 3860 pup->pr_nswap += lwp->lwp_ru.nswap; 3861 pup->pr_inblk += lwp->lwp_ru.inblock; 3862 pup->pr_oublk += lwp->lwp_ru.oublock; 3863 pup->pr_msnd += lwp->lwp_ru.msgsnd; 3864 pup->pr_mrcv += lwp->lwp_ru.msgrcv; 3865 pup->pr_sigs += lwp->lwp_ru.nsignals; 3866 pup->pr_vctx += lwp->lwp_ru.nvcsw; 3867 pup->pr_ictx += lwp->lwp_ru.nivcsw; 3868 pup->pr_sysc += lwp->lwp_ru.sysc; 3869 pup->pr_ioch += lwp->lwp_ru.ioch; 3870 } 3871 3872 /* 3873 * Convert a prhusage_t to a prusage_t. 3874 * This means convert each hrtime_t to a timestruc_t 3875 * and copy the count fields uint64_t => ulong_t. 3876 */ 3877 void 3878 prcvtusage(prhusage_t *pup, prusage_t *upup) 3879 { 3880 uint64_t *ullp; 3881 ulong_t *ulp; 3882 int i; 3883 3884 upup->pr_lwpid = pup->pr_lwpid; 3885 upup->pr_count = pup->pr_count; 3886 3887 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp); 3888 hrt2ts(pup->pr_create, &upup->pr_create); 3889 hrt2ts(pup->pr_term, &upup->pr_term); 3890 hrt2ts(pup->pr_rtime, &upup->pr_rtime); 3891 hrt2ts(pup->pr_utime, &upup->pr_utime); 3892 hrt2ts(pup->pr_stime, &upup->pr_stime); 3893 hrt2ts(pup->pr_ttime, &upup->pr_ttime); 3894 hrt2ts(pup->pr_tftime, &upup->pr_tftime); 3895 hrt2ts(pup->pr_dftime, &upup->pr_dftime); 3896 hrt2ts(pup->pr_kftime, &upup->pr_kftime); 3897 hrt2ts(pup->pr_ltime, &upup->pr_ltime); 3898 hrt2ts(pup->pr_slptime, &upup->pr_slptime); 3899 hrt2ts(pup->pr_wtime, &upup->pr_wtime); 3900 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime); 3901 bzero(upup->filltime, sizeof (upup->filltime)); 3902 3903 ullp = &pup->pr_minf; 3904 ulp = &upup->pr_minf; 3905 for (i = 0; i < 22; i++) 3906 *ulp++ = (ulong_t)*ullp++; 3907 } 3908 3909 #ifdef _SYSCALL32_IMPL 3910 void 3911 prcvtusage32(prhusage_t *pup, prusage32_t *upup) 3912 { 3913 uint64_t *ullp; 3914 uint32_t *ulp; 3915 int i; 3916 3917 upup->pr_lwpid = pup->pr_lwpid; 3918 upup->pr_count = pup->pr_count; 3919 3920 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp); 3921 hrt2ts32(pup->pr_create, &upup->pr_create); 3922 hrt2ts32(pup->pr_term, &upup->pr_term); 3923 hrt2ts32(pup->pr_rtime, &upup->pr_rtime); 3924 hrt2ts32(pup->pr_utime, &upup->pr_utime); 3925 hrt2ts32(pup->pr_stime, &upup->pr_stime); 3926 hrt2ts32(pup->pr_ttime, &upup->pr_ttime); 3927 hrt2ts32(pup->pr_tftime, &upup->pr_tftime); 3928 hrt2ts32(pup->pr_dftime, &upup->pr_dftime); 3929 hrt2ts32(pup->pr_kftime, &upup->pr_kftime); 3930 hrt2ts32(pup->pr_ltime, &upup->pr_ltime); 3931 hrt2ts32(pup->pr_slptime, &upup->pr_slptime); 3932 hrt2ts32(pup->pr_wtime, &upup->pr_wtime); 3933 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime); 3934 bzero(upup->filltime, sizeof (upup->filltime)); 3935 3936 ullp = &pup->pr_minf; 3937 ulp = &upup->pr_minf; 3938 for (i = 0; i < 22; i++) 3939 *ulp++ = (uint32_t)*ullp++; 3940 } 3941 #endif /* _SYSCALL32_IMPL */ 3942 3943 /* 3944 * Determine whether a set is empty. 3945 */ 3946 int 3947 setisempty(uint32_t *sp, uint_t n) 3948 { 3949 while (n--) 3950 if (*sp++) 3951 return (0); 3952 return (1); 3953 } 3954 3955 /* 3956 * Utility routine for establishing a watched area in the process. 3957 * Keep the list of watched areas sorted by virtual address. 3958 */ 3959 int 3960 set_watched_area(proc_t *p, struct watched_area *pwa) 3961 { 3962 caddr_t vaddr = pwa->wa_vaddr; 3963 caddr_t eaddr = pwa->wa_eaddr; 3964 ulong_t flags = pwa->wa_flags; 3965 struct watched_area *target; 3966 avl_index_t where; 3967 int error = 0; 3968 3969 /* we must not be holding p->p_lock, but the process must be locked */ 3970 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3971 ASSERT(p->p_proc_flag & P_PR_LOCK); 3972 3973 /* 3974 * If this is our first watchpoint, enable watchpoints for the process. 3975 */ 3976 if (!pr_watch_active(p)) { 3977 kthread_t *t; 3978 3979 mutex_enter(&p->p_lock); 3980 if ((t = p->p_tlist) != NULL) { 3981 do { 3982 watch_enable(t); 3983 } while ((t = t->t_forw) != p->p_tlist); 3984 } 3985 mutex_exit(&p->p_lock); 3986 } 3987 3988 target = pr_find_watched_area(p, pwa, &where); 3989 if (target != NULL) { 3990 /* 3991 * We discovered an existing, overlapping watched area. 3992 * Allow it only if it is an exact match. 3993 */ 3994 if (target->wa_vaddr != vaddr || 3995 target->wa_eaddr != eaddr) 3996 error = EINVAL; 3997 else if (target->wa_flags != flags) { 3998 error = set_watched_page(p, vaddr, eaddr, 3999 flags, target->wa_flags); 4000 target->wa_flags = flags; 4001 } 4002 kmem_free(pwa, sizeof (struct watched_area)); 4003 } else { 4004 avl_insert(&p->p_warea, pwa, where); 4005 error = set_watched_page(p, vaddr, eaddr, flags, 0); 4006 } 4007 4008 return (error); 4009 } 4010 4011 /* 4012 * Utility routine for clearing a watched area in the process. 4013 * Must be an exact match of the virtual address. 4014 * size and flags don't matter. 4015 */ 4016 int 4017 clear_watched_area(proc_t *p, struct watched_area *pwa) 4018 { 4019 struct watched_area *found; 4020 4021 /* we must not be holding p->p_lock, but the process must be locked */ 4022 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 4023 ASSERT(p->p_proc_flag & P_PR_LOCK); 4024 4025 4026 if (!pr_watch_active(p)) { 4027 kmem_free(pwa, sizeof (struct watched_area)); 4028 return (0); 4029 } 4030 4031 /* 4032 * Look for a matching address in the watched areas. If a match is 4033 * found, clear the old watched area and adjust the watched page(s). It 4034 * is not an error if there is no match. 4035 */ 4036 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL && 4037 found->wa_vaddr == pwa->wa_vaddr) { 4038 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr, 4039 found->wa_flags); 4040 avl_remove(&p->p_warea, found); 4041 kmem_free(found, sizeof (struct watched_area)); 4042 } 4043 4044 kmem_free(pwa, sizeof (struct watched_area)); 4045 4046 /* 4047 * If we removed the last watched area from the process, disable 4048 * watchpoints. 4049 */ 4050 if (!pr_watch_active(p)) { 4051 kthread_t *t; 4052 4053 mutex_enter(&p->p_lock); 4054 if ((t = p->p_tlist) != NULL) { 4055 do { 4056 watch_disable(t); 4057 } while ((t = t->t_forw) != p->p_tlist); 4058 } 4059 mutex_exit(&p->p_lock); 4060 } 4061 4062 return (0); 4063 } 4064 4065 /* 4066 * Frees all the watched_area structures 4067 */ 4068 void 4069 pr_free_watchpoints(proc_t *p) 4070 { 4071 struct watched_area *delp; 4072 void *cookie; 4073 4074 cookie = NULL; 4075 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL) 4076 kmem_free(delp, sizeof (struct watched_area)); 4077 4078 avl_destroy(&p->p_warea); 4079 } 4080 4081 /* 4082 * This one is called by the traced process to unwatch all the 4083 * pages while deallocating the list of watched_page structs. 4084 */ 4085 void 4086 pr_free_watched_pages(proc_t *p) 4087 { 4088 struct as *as = p->p_as; 4089 struct watched_page *pwp; 4090 uint_t prot; 4091 int retrycnt, err; 4092 void *cookie; 4093 4094 if (as == NULL || avl_numnodes(&as->a_wpage) == 0) 4095 return; 4096 4097 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 4098 AS_LOCK_ENTER(as, RW_WRITER); 4099 4100 pwp = avl_first(&as->a_wpage); 4101 4102 cookie = NULL; 4103 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) { 4104 retrycnt = 0; 4105 if ((prot = pwp->wp_oprot) != 0) { 4106 caddr_t addr = pwp->wp_vaddr; 4107 struct seg *seg; 4108 retry: 4109 4110 if ((pwp->wp_prot != prot || 4111 (pwp->wp_flags & WP_NOWATCH)) && 4112 (seg = as_segat(as, addr)) != NULL) { 4113 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); 4114 if (err == IE_RETRY) { 4115 ASSERT(retrycnt == 0); 4116 retrycnt++; 4117 goto retry; 4118 } 4119 } 4120 } 4121 kmem_free(pwp, sizeof (struct watched_page)); 4122 } 4123 4124 avl_destroy(&as->a_wpage); 4125 p->p_wprot = NULL; 4126 4127 AS_LOCK_EXIT(as); 4128 } 4129 4130 /* 4131 * Insert a watched area into the list of watched pages. 4132 * If oflags is zero then we are adding a new watched area. 4133 * Otherwise we are changing the flags of an existing watched area. 4134 */ 4135 static int 4136 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, 4137 ulong_t flags, ulong_t oflags) 4138 { 4139 struct as *as = p->p_as; 4140 avl_tree_t *pwp_tree; 4141 struct watched_page *pwp, *newpwp; 4142 struct watched_page tpw; 4143 avl_index_t where; 4144 struct seg *seg; 4145 uint_t prot; 4146 caddr_t addr; 4147 4148 /* 4149 * We need to pre-allocate a list of structures before we grab the 4150 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks 4151 * held. 4152 */ 4153 newpwp = NULL; 4154 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4155 addr < eaddr; addr += PAGESIZE) { 4156 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP); 4157 pwp->wp_list = newpwp; 4158 newpwp = pwp; 4159 } 4160 4161 AS_LOCK_ENTER(as, RW_WRITER); 4162 4163 /* 4164 * Search for an existing watched page to contain the watched area. 4165 * If none is found, grab a new one from the available list 4166 * and insert it in the active list, keeping the list sorted 4167 * by user-level virtual address. 4168 */ 4169 if (p->p_flag & SVFWAIT) 4170 pwp_tree = &p->p_wpage; 4171 else 4172 pwp_tree = &as->a_wpage; 4173 4174 again: 4175 if (avl_numnodes(pwp_tree) > prnwatch) { 4176 AS_LOCK_EXIT(as); 4177 while (newpwp != NULL) { 4178 pwp = newpwp->wp_list; 4179 kmem_free(newpwp, sizeof (struct watched_page)); 4180 newpwp = pwp; 4181 } 4182 return (E2BIG); 4183 } 4184 4185 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4186 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) { 4187 pwp = newpwp; 4188 newpwp = newpwp->wp_list; 4189 pwp->wp_list = NULL; 4190 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr & 4191 (uintptr_t)PAGEMASK); 4192 avl_insert(pwp_tree, pwp, where); 4193 } 4194 4195 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE); 4196 4197 if (oflags & WA_READ) 4198 pwp->wp_read--; 4199 if (oflags & WA_WRITE) 4200 pwp->wp_write--; 4201 if (oflags & WA_EXEC) 4202 pwp->wp_exec--; 4203 4204 ASSERT(pwp->wp_read >= 0); 4205 ASSERT(pwp->wp_write >= 0); 4206 ASSERT(pwp->wp_exec >= 0); 4207 4208 if (flags & WA_READ) 4209 pwp->wp_read++; 4210 if (flags & WA_WRITE) 4211 pwp->wp_write++; 4212 if (flags & WA_EXEC) 4213 pwp->wp_exec++; 4214 4215 if (!(p->p_flag & SVFWAIT)) { 4216 vaddr = pwp->wp_vaddr; 4217 if (pwp->wp_oprot == 0 && 4218 (seg = as_segat(as, vaddr)) != NULL) { 4219 SEGOP_GETPROT(seg, vaddr, 0, &prot); 4220 pwp->wp_oprot = (uchar_t)prot; 4221 pwp->wp_prot = (uchar_t)prot; 4222 } 4223 if (pwp->wp_oprot != 0) { 4224 prot = pwp->wp_oprot; 4225 if (pwp->wp_read) 4226 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4227 if (pwp->wp_write) 4228 prot &= ~PROT_WRITE; 4229 if (pwp->wp_exec) 4230 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4231 if (!(pwp->wp_flags & WP_NOWATCH) && 4232 pwp->wp_prot != prot && 4233 (pwp->wp_flags & WP_SETPROT) == 0) { 4234 pwp->wp_flags |= WP_SETPROT; 4235 pwp->wp_list = p->p_wprot; 4236 p->p_wprot = pwp; 4237 } 4238 pwp->wp_prot = (uchar_t)prot; 4239 } 4240 } 4241 4242 /* 4243 * If the watched area extends into the next page then do 4244 * it over again with the virtual address of the next page. 4245 */ 4246 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr) 4247 goto again; 4248 4249 AS_LOCK_EXIT(as); 4250 4251 /* 4252 * Free any pages we may have over-allocated 4253 */ 4254 while (newpwp != NULL) { 4255 pwp = newpwp->wp_list; 4256 kmem_free(newpwp, sizeof (struct watched_page)); 4257 newpwp = pwp; 4258 } 4259 4260 return (0); 4261 } 4262 4263 /* 4264 * Remove a watched area from the list of watched pages. 4265 * A watched area may extend over more than one page. 4266 */ 4267 static void 4268 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags) 4269 { 4270 struct as *as = p->p_as; 4271 struct watched_page *pwp; 4272 struct watched_page tpw; 4273 avl_tree_t *tree; 4274 avl_index_t where; 4275 4276 AS_LOCK_ENTER(as, RW_WRITER); 4277 4278 if (p->p_flag & SVFWAIT) 4279 tree = &p->p_wpage; 4280 else 4281 tree = &as->a_wpage; 4282 4283 tpw.wp_vaddr = vaddr = 4284 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 4285 pwp = avl_find(tree, &tpw, &where); 4286 if (pwp == NULL) 4287 pwp = avl_nearest(tree, where, AVL_AFTER); 4288 4289 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 4290 ASSERT(vaddr <= pwp->wp_vaddr); 4291 4292 if (flags & WA_READ) 4293 pwp->wp_read--; 4294 if (flags & WA_WRITE) 4295 pwp->wp_write--; 4296 if (flags & WA_EXEC) 4297 pwp->wp_exec--; 4298 4299 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) { 4300 /* 4301 * Reset the hat layer's protections on this page. 4302 */ 4303 if (pwp->wp_oprot != 0) { 4304 uint_t prot = pwp->wp_oprot; 4305 4306 if (pwp->wp_read) 4307 prot &= 4308 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4309 if (pwp->wp_write) 4310 prot &= ~PROT_WRITE; 4311 if (pwp->wp_exec) 4312 prot &= 4313 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 4314 if (!(pwp->wp_flags & WP_NOWATCH) && 4315 pwp->wp_prot != prot && 4316 (pwp->wp_flags & WP_SETPROT) == 0) { 4317 pwp->wp_flags |= WP_SETPROT; 4318 pwp->wp_list = p->p_wprot; 4319 p->p_wprot = pwp; 4320 } 4321 pwp->wp_prot = (uchar_t)prot; 4322 } 4323 } else { 4324 /* 4325 * No watched areas remain in this page. 4326 * Reset everything to normal. 4327 */ 4328 if (pwp->wp_oprot != 0) { 4329 pwp->wp_prot = pwp->wp_oprot; 4330 if ((pwp->wp_flags & WP_SETPROT) == 0) { 4331 pwp->wp_flags |= WP_SETPROT; 4332 pwp->wp_list = p->p_wprot; 4333 p->p_wprot = pwp; 4334 } 4335 } 4336 } 4337 4338 pwp = AVL_NEXT(tree, pwp); 4339 } 4340 4341 AS_LOCK_EXIT(as); 4342 } 4343 4344 /* 4345 * Return the original protections for the specified page. 4346 */ 4347 static void 4348 getwatchprot(struct as *as, caddr_t addr, uint_t *prot) 4349 { 4350 struct watched_page *pwp; 4351 struct watched_page tpw; 4352 4353 ASSERT(AS_LOCK_HELD(as)); 4354 4355 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 4356 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL) 4357 *prot = pwp->wp_oprot; 4358 } 4359 4360 static prpagev_t * 4361 pr_pagev_create(struct seg *seg, int check_noreserve) 4362 { 4363 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP); 4364 size_t total_pages = seg_pages(seg); 4365 4366 /* 4367 * Limit the size of our vectors to pagev_lim pages at a time. We need 4368 * 4 or 5 bytes of storage per page, so this means we limit ourself 4369 * to about a megabyte of kernel heap by default. 4370 */ 4371 pagev->pg_npages = MIN(total_pages, pagev_lim); 4372 pagev->pg_pnbase = 0; 4373 4374 pagev->pg_protv = 4375 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP); 4376 4377 if (check_noreserve) 4378 pagev->pg_incore = 4379 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP); 4380 else 4381 pagev->pg_incore = NULL; 4382 4383 return (pagev); 4384 } 4385 4386 static void 4387 pr_pagev_destroy(prpagev_t *pagev) 4388 { 4389 if (pagev->pg_incore != NULL) 4390 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char)); 4391 4392 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t)); 4393 kmem_free(pagev, sizeof (prpagev_t)); 4394 } 4395 4396 static caddr_t 4397 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr) 4398 { 4399 ulong_t lastpg = seg_page(seg, eaddr - 1); 4400 ulong_t pn, pnlim; 4401 caddr_t saddr; 4402 size_t len; 4403 4404 ASSERT(addr >= seg->s_base && addr <= eaddr); 4405 4406 if (addr == eaddr) 4407 return (eaddr); 4408 4409 refill: 4410 ASSERT(addr < eaddr); 4411 pagev->pg_pnbase = seg_page(seg, addr); 4412 pnlim = pagev->pg_pnbase + pagev->pg_npages; 4413 saddr = addr; 4414 4415 if (lastpg < pnlim) 4416 len = (size_t)(eaddr - addr); 4417 else 4418 len = pagev->pg_npages * PAGESIZE; 4419 4420 if (pagev->pg_incore != NULL) { 4421 /* 4422 * INCORE cleverly has different semantics than GETPROT: 4423 * it returns info on pages up to but NOT including addr + len. 4424 */ 4425 SEGOP_INCORE(seg, addr, len, pagev->pg_incore); 4426 pn = pagev->pg_pnbase; 4427 4428 do { 4429 /* 4430 * Guilty knowledge here: We know that segvn_incore 4431 * returns more than just the low-order bit that 4432 * indicates the page is actually in memory. If any 4433 * bits are set, then the page has backing store. 4434 */ 4435 if (pagev->pg_incore[pn++ - pagev->pg_pnbase]) 4436 goto out; 4437 4438 } while ((addr += PAGESIZE) < eaddr && pn < pnlim); 4439 4440 /* 4441 * If we examined all the pages in the vector but we're not 4442 * at the end of the segment, take another lap. 4443 */ 4444 if (addr < eaddr) 4445 goto refill; 4446 } 4447 4448 /* 4449 * Need to take len - 1 because addr + len is the address of the 4450 * first byte of the page just past the end of what we want. 4451 */ 4452 out: 4453 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv); 4454 return (addr); 4455 } 4456 4457 static caddr_t 4458 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg, 4459 caddr_t *saddrp, caddr_t eaddr, uint_t *protp) 4460 { 4461 /* 4462 * Our starting address is either the specified address, or the base 4463 * address from the start of the pagev. If the latter is greater, 4464 * this means a previous call to pr_pagev_fill has already scanned 4465 * further than the end of the previous mapping. 4466 */ 4467 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE; 4468 caddr_t addr = MAX(*saddrp, base); 4469 ulong_t pn = seg_page(seg, addr); 4470 uint_t prot, nprot; 4471 4472 /* 4473 * If we're dealing with noreserve pages, then advance addr to 4474 * the address of the next page which has backing store. 4475 */ 4476 if (pagev->pg_incore != NULL) { 4477 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) { 4478 if ((addr += PAGESIZE) == eaddr) { 4479 *saddrp = addr; 4480 prot = 0; 4481 goto out; 4482 } 4483 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 4484 addr = pr_pagev_fill(pagev, seg, addr, eaddr); 4485 if (addr == eaddr) { 4486 *saddrp = addr; 4487 prot = 0; 4488 goto out; 4489 } 4490 pn = seg_page(seg, addr); 4491 } 4492 } 4493 } 4494 4495 /* 4496 * Get the protections on the page corresponding to addr. 4497 */ 4498 pn = seg_page(seg, addr); 4499 ASSERT(pn >= pagev->pg_pnbase); 4500 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages)); 4501 4502 prot = pagev->pg_protv[pn - pagev->pg_pnbase]; 4503 getwatchprot(seg->s_as, addr, &prot); 4504 *saddrp = addr; 4505 4506 /* 4507 * Now loop until we find a backed page with different protections 4508 * or we reach the end of this segment. 4509 */ 4510 while ((addr += PAGESIZE) < eaddr) { 4511 /* 4512 * If pn has advanced to the page number following what we 4513 * have information on, refill the page vector and reset 4514 * addr and pn. If pr_pagev_fill does not return the 4515 * address of the next page, we have a discontiguity and 4516 * thus have reached the end of the current mapping. 4517 */ 4518 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 4519 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr); 4520 if (naddr != addr) 4521 goto out; 4522 pn = seg_page(seg, addr); 4523 } 4524 4525 /* 4526 * The previous page's protections are in prot, and it has 4527 * backing. If this page is MAP_NORESERVE and has no backing, 4528 * then end this mapping and return the previous protections. 4529 */ 4530 if (pagev->pg_incore != NULL && 4531 pagev->pg_incore[pn - pagev->pg_pnbase] == 0) 4532 break; 4533 4534 /* 4535 * Otherwise end the mapping if this page's protections (nprot) 4536 * are different than those in the previous page (prot). 4537 */ 4538 nprot = pagev->pg_protv[pn - pagev->pg_pnbase]; 4539 getwatchprot(seg->s_as, addr, &nprot); 4540 4541 if (nprot != prot) 4542 break; 4543 } 4544 4545 out: 4546 *protp = prot; 4547 return (addr); 4548 } 4549 4550 size_t 4551 pr_getsegsize(struct seg *seg, int reserved) 4552 { 4553 size_t size = seg->s_size; 4554 4555 /* 4556 * If we're interested in the reserved space, return the size of the 4557 * segment itself. Everything else in this function is a special case 4558 * to determine the actual underlying size of various segment types. 4559 */ 4560 if (reserved) 4561 return (size); 4562 4563 /* 4564 * If this is a segvn mapping of a regular file, return the smaller 4565 * of the segment size and the remaining size of the file beyond 4566 * the file offset corresponding to seg->s_base. 4567 */ 4568 if (seg->s_ops == &segvn_ops) { 4569 vattr_t vattr; 4570 vnode_t *vp; 4571 4572 vattr.va_mask = AT_SIZE; 4573 4574 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 4575 vp != NULL && vp->v_type == VREG && 4576 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 4577 4578 u_offset_t fsize = vattr.va_size; 4579 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base); 4580 4581 if (fsize < offset) 4582 fsize = 0; 4583 else 4584 fsize -= offset; 4585 4586 fsize = roundup(fsize, (u_offset_t)PAGESIZE); 4587 4588 if (fsize < (u_offset_t)size) 4589 size = (size_t)fsize; 4590 } 4591 4592 return (size); 4593 } 4594 4595 /* 4596 * If this is an ISM shared segment, don't include pages that are 4597 * beyond the real size of the spt segment that backs it. 4598 */ 4599 if (seg->s_ops == &segspt_shmops) 4600 return (MIN(spt_realsize(seg), size)); 4601 4602 /* 4603 * If this is segment is a mapping from /dev/null, then this is a 4604 * reservation of virtual address space and has no actual size. 4605 * Such segments are backed by segdev and have type set to neither 4606 * MAP_SHARED nor MAP_PRIVATE. 4607 */ 4608 if (seg->s_ops == &segdev_ops && 4609 ((SEGOP_GETTYPE(seg, seg->s_base) & 4610 (MAP_SHARED | MAP_PRIVATE)) == 0)) 4611 return (0); 4612 4613 /* 4614 * If this segment doesn't match one of the special types we handle, 4615 * just return the size of the segment itself. 4616 */ 4617 return (size); 4618 } 4619 4620 uint_t 4621 pr_getprot(struct seg *seg, int reserved, void **tmp, 4622 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr) 4623 { 4624 struct as *as = seg->s_as; 4625 4626 caddr_t saddr = *saddrp; 4627 caddr_t naddr; 4628 4629 int check_noreserve; 4630 uint_t prot; 4631 4632 union { 4633 struct segvn_data *svd; 4634 struct segdev_data *sdp; 4635 void *data; 4636 } s; 4637 4638 s.data = seg->s_data; 4639 4640 ASSERT(AS_WRITE_HELD(as)); 4641 ASSERT(saddr >= seg->s_base && saddr < eaddr); 4642 ASSERT(eaddr <= seg->s_base + seg->s_size); 4643 4644 /* 4645 * Don't include MAP_NORESERVE pages in the address range 4646 * unless their mappings have actually materialized. 4647 * We cheat by knowing that segvn is the only segment 4648 * driver that supports MAP_NORESERVE. 4649 */ 4650 check_noreserve = 4651 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL && 4652 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) && 4653 (s.svd->flags & MAP_NORESERVE)); 4654 4655 /* 4656 * Examine every page only as a last resort. We use guilty knowledge 4657 * of segvn and segdev to avoid this: if there are no per-page 4658 * protections present in the segment and we don't care about 4659 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment. 4660 */ 4661 if (!check_noreserve && saddr == seg->s_base && 4662 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) { 4663 prot = s.svd->prot; 4664 getwatchprot(as, saddr, &prot); 4665 naddr = eaddr; 4666 4667 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops && 4668 s.sdp != NULL && s.sdp->pageprot == 0) { 4669 prot = s.sdp->prot; 4670 getwatchprot(as, saddr, &prot); 4671 naddr = eaddr; 4672 4673 } else { 4674 prpagev_t *pagev; 4675 4676 /* 4677 * If addr is sitting at the start of the segment, then 4678 * create a page vector to store protection and incore 4679 * information for pages in the segment, and fill it. 4680 * Otherwise, we expect *tmp to address the prpagev_t 4681 * allocated by a previous call to this function. 4682 */ 4683 if (saddr == seg->s_base) { 4684 pagev = pr_pagev_create(seg, check_noreserve); 4685 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr); 4686 4687 ASSERT(*tmp == NULL); 4688 *tmp = pagev; 4689 4690 ASSERT(saddr <= eaddr); 4691 *saddrp = saddr; 4692 4693 if (saddr == eaddr) { 4694 naddr = saddr; 4695 prot = 0; 4696 goto out; 4697 } 4698 4699 } else { 4700 ASSERT(*tmp != NULL); 4701 pagev = (prpagev_t *)*tmp; 4702 } 4703 4704 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot); 4705 ASSERT(naddr <= eaddr); 4706 } 4707 4708 out: 4709 if (naddr == eaddr) 4710 pr_getprot_done(tmp); 4711 *naddrp = naddr; 4712 return (prot); 4713 } 4714 4715 void 4716 pr_getprot_done(void **tmp) 4717 { 4718 if (*tmp != NULL) { 4719 pr_pagev_destroy((prpagev_t *)*tmp); 4720 *tmp = NULL; 4721 } 4722 } 4723 4724 /* 4725 * Return true iff the vnode is a /proc file from the object directory. 4726 */ 4727 int 4728 pr_isobject(vnode_t *vp) 4729 { 4730 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT); 4731 } 4732 4733 /* 4734 * Return true iff the vnode is a /proc file opened by the process itself. 4735 */ 4736 int 4737 pr_isself(vnode_t *vp) 4738 { 4739 /* 4740 * XXX: To retain binary compatibility with the old 4741 * ioctl()-based version of /proc, we exempt self-opens 4742 * of /proc/<pid> from being marked close-on-exec. 4743 */ 4744 return (vn_matchops(vp, prvnodeops) && 4745 (VTOP(vp)->pr_flags & PR_ISSELF) && 4746 VTOP(vp)->pr_type != PR_PIDDIR); 4747 } 4748 4749 static ssize_t 4750 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr) 4751 { 4752 ssize_t pagesize, hatsize; 4753 4754 ASSERT(AS_WRITE_HELD(seg->s_as)); 4755 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE)); 4756 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE)); 4757 ASSERT(saddr < eaddr); 4758 4759 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr); 4760 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize)); 4761 ASSERT(pagesize != 0); 4762 4763 if (pagesize == -1) 4764 pagesize = PAGESIZE; 4765 4766 saddr += P2NPHASE((uintptr_t)saddr, pagesize); 4767 4768 while (saddr < eaddr) { 4769 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr)) 4770 break; 4771 ASSERT(IS_P2ALIGNED(saddr, pagesize)); 4772 saddr += pagesize; 4773 } 4774 4775 *naddrp = ((saddr < eaddr) ? saddr : eaddr); 4776 return (hatsize); 4777 } 4778 4779 /* 4780 * Return an array of structures with extended memory map information. 4781 * We allocate here; the caller must deallocate. 4782 */ 4783 int 4784 prgetxmap(proc_t *p, list_t *iolhead) 4785 { 4786 struct as *as = p->p_as; 4787 prxmap_t *mp; 4788 struct seg *seg; 4789 struct seg *brkseg, *stkseg; 4790 struct vnode *vp; 4791 struct vattr vattr; 4792 uint_t prot; 4793 4794 ASSERT(as != &kas && AS_WRITE_HELD(as)); 4795 4796 /* 4797 * Request an initial buffer size that doesn't waste memory 4798 * if the address space has only a small number of segments. 4799 */ 4800 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 4801 4802 if ((seg = AS_SEGFIRST(as)) == NULL) 4803 return (0); 4804 4805 brkseg = break_seg(p); 4806 stkseg = as_segat(as, prgetstackbase(p)); 4807 4808 do { 4809 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 4810 caddr_t saddr, naddr, baddr; 4811 void *tmp = NULL; 4812 ssize_t psz; 4813 char *parr; 4814 uint64_t npages; 4815 uint64_t pagenum; 4816 4817 if ((seg->s_flags & S_HOLE) != 0) { 4818 continue; 4819 } 4820 /* 4821 * Segment loop part one: iterate from the base of the segment 4822 * to its end, pausing at each address boundary (baddr) between 4823 * ranges that have different virtual memory protections. 4824 */ 4825 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 4826 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 4827 ASSERT(baddr >= saddr && baddr <= eaddr); 4828 4829 /* 4830 * Segment loop part two: iterate from the current 4831 * position to the end of the protection boundary, 4832 * pausing at each address boundary (naddr) between 4833 * ranges that have different underlying page sizes. 4834 */ 4835 for (; saddr < baddr; saddr = naddr) { 4836 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 4837 ASSERT(naddr >= saddr && naddr <= baddr); 4838 4839 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 4840 4841 mp->pr_vaddr = (uintptr_t)saddr; 4842 mp->pr_size = naddr - saddr; 4843 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 4844 mp->pr_mflags = 0; 4845 if (prot & PROT_READ) 4846 mp->pr_mflags |= MA_READ; 4847 if (prot & PROT_WRITE) 4848 mp->pr_mflags |= MA_WRITE; 4849 if (prot & PROT_EXEC) 4850 mp->pr_mflags |= MA_EXEC; 4851 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 4852 mp->pr_mflags |= MA_SHARED; 4853 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 4854 mp->pr_mflags |= MA_NORESERVE; 4855 if (seg->s_ops == &segspt_shmops || 4856 (seg->s_ops == &segvn_ops && 4857 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 4858 vp == NULL))) 4859 mp->pr_mflags |= MA_ANON; 4860 if (seg == brkseg) 4861 mp->pr_mflags |= MA_BREAK; 4862 else if (seg == stkseg) 4863 mp->pr_mflags |= MA_STACK; 4864 if (seg->s_ops == &segspt_shmops) 4865 mp->pr_mflags |= MA_ISM | MA_SHM; 4866 4867 mp->pr_pagesize = PAGESIZE; 4868 if (psz == -1) { 4869 mp->pr_hatpagesize = 0; 4870 } else { 4871 mp->pr_hatpagesize = psz; 4872 } 4873 4874 /* 4875 * Manufacture a filename for the "object" dir. 4876 */ 4877 mp->pr_dev = PRNODEV; 4878 vattr.va_mask = AT_FSID|AT_NODEID; 4879 if (seg->s_ops == &segvn_ops && 4880 SEGOP_GETVP(seg, saddr, &vp) == 0 && 4881 vp != NULL && vp->v_type == VREG && 4882 VOP_GETATTR(vp, &vattr, 0, CRED(), 4883 NULL) == 0) { 4884 mp->pr_dev = vattr.va_fsid; 4885 mp->pr_ino = vattr.va_nodeid; 4886 if (vp == p->p_exec) 4887 (void) strcpy(mp->pr_mapname, 4888 "a.out"); 4889 else 4890 pr_object_name(mp->pr_mapname, 4891 vp, &vattr); 4892 } 4893 4894 /* 4895 * Get the SysV shared memory id, if any. 4896 */ 4897 if ((mp->pr_mflags & MA_SHARED) && 4898 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4899 seg->s_base)) != SHMID_NONE) { 4900 if (mp->pr_shmid == SHMID_FREE) 4901 mp->pr_shmid = -1; 4902 4903 mp->pr_mflags |= MA_SHM; 4904 } else { 4905 mp->pr_shmid = -1; 4906 } 4907 4908 npages = ((uintptr_t)(naddr - saddr)) >> 4909 PAGESHIFT; 4910 parr = kmem_zalloc(npages, KM_SLEEP); 4911 4912 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4913 4914 for (pagenum = 0; pagenum < npages; pagenum++) { 4915 if (parr[pagenum] & SEG_PAGE_INCORE) 4916 mp->pr_rss++; 4917 if (parr[pagenum] & SEG_PAGE_ANON) 4918 mp->pr_anon++; 4919 if (parr[pagenum] & SEG_PAGE_LOCKED) 4920 mp->pr_locked++; 4921 } 4922 kmem_free(parr, npages); 4923 } 4924 } 4925 ASSERT(tmp == NULL); 4926 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4927 4928 return (0); 4929 } 4930 4931 /* 4932 * Return the process's credentials. We don't need a 32-bit equivalent of 4933 * this function because prcred_t and prcred32_t are actually the same. 4934 */ 4935 void 4936 prgetcred(proc_t *p, prcred_t *pcrp) 4937 { 4938 mutex_enter(&p->p_crlock); 4939 cred2prcred(p->p_cred, pcrp); 4940 mutex_exit(&p->p_crlock); 4941 } 4942 4943 void 4944 prgetsecflags(proc_t *p, prsecflags_t *psfp) 4945 { 4946 ASSERT(psfp != NULL); 4947 4948 bzero(psfp, sizeof (*psfp)); 4949 psfp->pr_version = PRSECFLAGS_VERSION_CURRENT; 4950 psfp->pr_lower = p->p_secflags.psf_lower; 4951 psfp->pr_upper = p->p_secflags.psf_upper; 4952 psfp->pr_effective = p->p_secflags.psf_effective; 4953 psfp->pr_inherit = p->p_secflags.psf_inherit; 4954 } 4955 4956 /* 4957 * Compute actual size of the prpriv_t structure. 4958 */ 4959 4960 size_t 4961 prgetprivsize(void) 4962 { 4963 return (priv_prgetprivsize(NULL)); 4964 } 4965 4966 /* 4967 * Return the process's privileges. We don't need a 32-bit equivalent of 4968 * this function because prpriv_t and prpriv32_t are actually the same. 4969 */ 4970 void 4971 prgetpriv(proc_t *p, prpriv_t *pprp) 4972 { 4973 mutex_enter(&p->p_crlock); 4974 cred2prpriv(p->p_cred, pprp); 4975 mutex_exit(&p->p_crlock); 4976 } 4977 4978 #ifdef _SYSCALL32_IMPL 4979 /* 4980 * Return an array of structures with HAT memory map information. 4981 * We allocate here; the caller must deallocate. 4982 */ 4983 int 4984 prgetxmap32(proc_t *p, list_t *iolhead) 4985 { 4986 struct as *as = p->p_as; 4987 prxmap32_t *mp; 4988 struct seg *seg; 4989 struct seg *brkseg, *stkseg; 4990 struct vnode *vp; 4991 struct vattr vattr; 4992 uint_t prot; 4993 4994 ASSERT(as != &kas && AS_WRITE_HELD(as)); 4995 4996 /* 4997 * Request an initial buffer size that doesn't waste memory 4998 * if the address space has only a small number of segments. 4999 */ 5000 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 5001 5002 if ((seg = AS_SEGFIRST(as)) == NULL) 5003 return (0); 5004 5005 brkseg = break_seg(p); 5006 stkseg = as_segat(as, prgetstackbase(p)); 5007 5008 do { 5009 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 5010 caddr_t saddr, naddr, baddr; 5011 void *tmp = NULL; 5012 ssize_t psz; 5013 char *parr; 5014 uint64_t npages; 5015 uint64_t pagenum; 5016 5017 if ((seg->s_flags & S_HOLE) != 0) { 5018 continue; 5019 } 5020 5021 /* 5022 * Segment loop part one: iterate from the base of the segment 5023 * to its end, pausing at each address boundary (baddr) between 5024 * ranges that have different virtual memory protections. 5025 */ 5026 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 5027 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 5028 ASSERT(baddr >= saddr && baddr <= eaddr); 5029 5030 /* 5031 * Segment loop part two: iterate from the current 5032 * position to the end of the protection boundary, 5033 * pausing at each address boundary (naddr) between 5034 * ranges that have different underlying page sizes. 5035 */ 5036 for (; saddr < baddr; saddr = naddr) { 5037 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 5038 ASSERT(naddr >= saddr && naddr <= baddr); 5039 5040 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 5041 5042 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 5043 mp->pr_size = (size32_t)(naddr - saddr); 5044 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 5045 mp->pr_mflags = 0; 5046 if (prot & PROT_READ) 5047 mp->pr_mflags |= MA_READ; 5048 if (prot & PROT_WRITE) 5049 mp->pr_mflags |= MA_WRITE; 5050 if (prot & PROT_EXEC) 5051 mp->pr_mflags |= MA_EXEC; 5052 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 5053 mp->pr_mflags |= MA_SHARED; 5054 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 5055 mp->pr_mflags |= MA_NORESERVE; 5056 if (seg->s_ops == &segspt_shmops || 5057 (seg->s_ops == &segvn_ops && 5058 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 5059 vp == NULL))) 5060 mp->pr_mflags |= MA_ANON; 5061 if (seg == brkseg) 5062 mp->pr_mflags |= MA_BREAK; 5063 else if (seg == stkseg) 5064 mp->pr_mflags |= MA_STACK; 5065 if (seg->s_ops == &segspt_shmops) 5066 mp->pr_mflags |= MA_ISM | MA_SHM; 5067 5068 mp->pr_pagesize = PAGESIZE; 5069 if (psz == -1) { 5070 mp->pr_hatpagesize = 0; 5071 } else { 5072 mp->pr_hatpagesize = psz; 5073 } 5074 5075 /* 5076 * Manufacture a filename for the "object" dir. 5077 */ 5078 mp->pr_dev = PRNODEV32; 5079 vattr.va_mask = AT_FSID|AT_NODEID; 5080 if (seg->s_ops == &segvn_ops && 5081 SEGOP_GETVP(seg, saddr, &vp) == 0 && 5082 vp != NULL && vp->v_type == VREG && 5083 VOP_GETATTR(vp, &vattr, 0, CRED(), 5084 NULL) == 0) { 5085 (void) cmpldev(&mp->pr_dev, 5086 vattr.va_fsid); 5087 mp->pr_ino = vattr.va_nodeid; 5088 if (vp == p->p_exec) 5089 (void) strcpy(mp->pr_mapname, 5090 "a.out"); 5091 else 5092 pr_object_name(mp->pr_mapname, 5093 vp, &vattr); 5094 } 5095 5096 /* 5097 * Get the SysV shared memory id, if any. 5098 */ 5099 if ((mp->pr_mflags & MA_SHARED) && 5100 p->p_segacct && (mp->pr_shmid = shmgetid(p, 5101 seg->s_base)) != SHMID_NONE) { 5102 if (mp->pr_shmid == SHMID_FREE) 5103 mp->pr_shmid = -1; 5104 5105 mp->pr_mflags |= MA_SHM; 5106 } else { 5107 mp->pr_shmid = -1; 5108 } 5109 5110 npages = ((uintptr_t)(naddr - saddr)) >> 5111 PAGESHIFT; 5112 parr = kmem_zalloc(npages, KM_SLEEP); 5113 5114 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 5115 5116 for (pagenum = 0; pagenum < npages; pagenum++) { 5117 if (parr[pagenum] & SEG_PAGE_INCORE) 5118 mp->pr_rss++; 5119 if (parr[pagenum] & SEG_PAGE_ANON) 5120 mp->pr_anon++; 5121 if (parr[pagenum] & SEG_PAGE_LOCKED) 5122 mp->pr_locked++; 5123 } 5124 kmem_free(parr, npages); 5125 } 5126 } 5127 ASSERT(tmp == NULL); 5128 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 5129 5130 return (0); 5131 } 5132 #endif /* _SYSCALL32_IMPL */ 5133