1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 32 #pragma ident "%Z%%M% %I% %E% SMI" 33 34 #include <sys/types.h> 35 #include <sys/t_lock.h> 36 #include <sys/param.h> 37 #include <sys/cmn_err.h> 38 #include <sys/cred.h> 39 #include <sys/priv.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/inline.h> 43 #include <sys/kmem.h> 44 #include <sys/mman.h> 45 #include <sys/proc.h> 46 #include <sys/sobject.h> 47 #include <sys/sysmacros.h> 48 #include <sys/systm.h> 49 #include <sys/uio.h> 50 #include <sys/var.h> 51 #include <sys/vfs.h> 52 #include <sys/vnode.h> 53 #include <sys/session.h> 54 #include <sys/pcb.h> 55 #include <sys/signal.h> 56 #include <sys/user.h> 57 #include <sys/disp.h> 58 #include <sys/class.h> 59 #include <sys/ts.h> 60 #include <sys/bitmap.h> 61 #include <sys/poll.h> 62 #include <sys/shm_impl.h> 63 #include <sys/fault.h> 64 #include <sys/syscall.h> 65 #include <sys/procfs.h> 66 #include <sys/processor.h> 67 #include <sys/cpuvar.h> 68 #include <sys/copyops.h> 69 #include <sys/time.h> 70 #include <sys/msacct.h> 71 #include <vm/as.h> 72 #include <vm/rm.h> 73 #include <vm/seg.h> 74 #include <vm/seg_vn.h> 75 #include <vm/seg_dev.h> 76 #include <vm/seg_spt.h> 77 #include <vm/page.h> 78 #include <sys/vmparam.h> 79 #include <sys/swap.h> 80 #include <fs/proc/prdata.h> 81 #include <sys/task.h> 82 #include <sys/project.h> 83 #include <sys/contract_impl.h> 84 #include <sys/contract/process.h> 85 #include <sys/contract/process_impl.h> 86 #include <sys/schedctl.h> 87 #include <sys/pool.h> 88 #include <sys/zone.h> 89 #include <sys/atomic.h> 90 91 #define MAX_ITERS_SPIN 5 92 93 typedef struct prpagev { 94 uint_t *pg_protv; /* vector of page permissions */ 95 char *pg_incore; /* vector of incore flags */ 96 size_t pg_npages; /* number of pages in protv and incore */ 97 ulong_t pg_pnbase; /* pn within segment of first protv element */ 98 } prpagev_t; 99 100 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */ 101 102 extern struct seg_ops segdev_ops; /* needs a header file */ 103 extern struct seg_ops segspt_shmops; /* needs a header file */ 104 105 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t); 106 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t); 107 108 /* 109 * Choose an lwp from the complete set of lwps for the process. 110 * This is called for any operation applied to the process 111 * file descriptor that requires an lwp to operate upon. 112 * 113 * Returns a pointer to the thread for the selected LWP, 114 * and with the dispatcher lock held for the thread. 115 * 116 * The algorithm for choosing an lwp is critical for /proc semantics; 117 * don't touch this code unless you know all of the implications. 118 */ 119 kthread_t * 120 prchoose(proc_t *p) 121 { 122 kthread_t *t; 123 kthread_t *t_onproc = NULL; /* running on processor */ 124 kthread_t *t_run = NULL; /* runnable, on disp queue */ 125 kthread_t *t_sleep = NULL; /* sleeping */ 126 kthread_t *t_hold = NULL; /* sleeping, performing hold */ 127 kthread_t *t_susp = NULL; /* suspended stop */ 128 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */ 129 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */ 130 kthread_t *t_req = NULL; /* requested stop */ 131 kthread_t *t_istop = NULL; /* event-of-interest stop */ 132 133 ASSERT(MUTEX_HELD(&p->p_lock)); 134 135 /* 136 * If the agent lwp exists, it takes precedence over all others. 137 */ 138 if ((t = p->p_agenttp) != NULL) { 139 thread_lock(t); 140 return (t); 141 } 142 143 if ((t = p->p_tlist) == NULL) /* start at the head of the list */ 144 return (t); 145 do { /* for eacn lwp in the process */ 146 if (VSTOPPED(t)) { /* virtually stopped */ 147 if (t_req == NULL) 148 t_req = t; 149 continue; 150 } 151 152 thread_lock(t); /* make sure thread is in good state */ 153 switch (t->t_state) { 154 default: 155 panic("prchoose: bad thread state %d, thread 0x%p", 156 t->t_state, (void *)t); 157 /*NOTREACHED*/ 158 case TS_SLEEP: 159 /* this is filthy */ 160 if (t->t_wchan == (caddr_t)&p->p_holdlwps && 161 t->t_wchan0 == NULL) { 162 if (t_hold == NULL) 163 t_hold = t; 164 } else { 165 if (t_sleep == NULL) 166 t_sleep = t; 167 } 168 break; 169 case TS_RUN: 170 if (t_run == NULL) 171 t_run = t; 172 break; 173 case TS_ONPROC: 174 if (t_onproc == NULL) 175 t_onproc = t; 176 break; 177 case TS_ZOMB: /* last possible choice */ 178 break; 179 case TS_STOPPED: 180 switch (t->t_whystop) { 181 case PR_SUSPENDED: 182 if (t_susp == NULL) 183 t_susp = t; 184 break; 185 case PR_JOBCONTROL: 186 if (t->t_proc_flag & TP_PRSTOP) { 187 if (t_jdstop == NULL) 188 t_jdstop = t; 189 } else { 190 if (t_jstop == NULL) 191 t_jstop = t; 192 } 193 break; 194 case PR_REQUESTED: 195 if (t_req == NULL) 196 t_req = t; 197 break; 198 case PR_SYSENTRY: 199 case PR_SYSEXIT: 200 case PR_SIGNALLED: 201 case PR_FAULTED: 202 /* 203 * Make an lwp calling exit() be the 204 * last lwp seen in the process. 205 */ 206 if (t_istop == NULL || 207 (t_istop->t_whystop == PR_SYSENTRY && 208 t_istop->t_whatstop == SYS_exit)) 209 t_istop = t; 210 break; 211 case PR_CHECKPOINT: /* can't happen? */ 212 break; 213 default: 214 panic("prchoose: bad t_whystop %d, thread 0x%p", 215 t->t_whystop, (void *)t); 216 /*NOTREACHED*/ 217 } 218 break; 219 } 220 thread_unlock(t); 221 } while ((t = t->t_forw) != p->p_tlist); 222 223 if (t_onproc) 224 t = t_onproc; 225 else if (t_run) 226 t = t_run; 227 else if (t_sleep) 228 t = t_sleep; 229 else if (t_jstop) 230 t = t_jstop; 231 else if (t_jdstop) 232 t = t_jdstop; 233 else if (t_istop) 234 t = t_istop; 235 else if (t_req) 236 t = t_req; 237 else if (t_hold) 238 t = t_hold; 239 else if (t_susp) 240 t = t_susp; 241 else /* TS_ZOMB */ 242 t = p->p_tlist; 243 244 if (t != NULL) 245 thread_lock(t); 246 return (t); 247 } 248 249 /* 250 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop. 251 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI 252 * on the /proc file descriptor. Called from stop() when a traced 253 * process stops on an event of interest. Also called from exit() 254 * and prinvalidate() to indicate POLLHUP and POLLERR respectively. 255 */ 256 void 257 prnotify(struct vnode *vp) 258 { 259 prcommon_t *pcp = VTOP(vp)->pr_common; 260 261 mutex_enter(&pcp->prc_mutex); 262 cv_broadcast(&pcp->prc_wait); 263 mutex_exit(&pcp->prc_mutex); 264 if (pcp->prc_flags & PRC_POLL) { 265 /* 266 * We call pollwakeup() with POLLHUP to ensure that 267 * the pollers are awakened even if they are polling 268 * for nothing (i.e., waiting for the process to exit). 269 * This enables the use of the PRC_POLL flag for optimization 270 * (we can turn off PRC_POLL only if we know no pollers remain). 271 */ 272 pcp->prc_flags &= ~PRC_POLL; 273 pollwakeup(&pcp->prc_pollhead, POLLHUP); 274 } 275 } 276 277 /* called immediately below, in prfree() */ 278 static void 279 prfreenotify(vnode_t *vp) 280 { 281 prnode_t *pnp; 282 prcommon_t *pcp; 283 284 while (vp != NULL) { 285 pnp = VTOP(vp); 286 pcp = pnp->pr_common; 287 ASSERT(pcp->prc_thread == NULL); 288 pcp->prc_proc = NULL; 289 /* 290 * We can't call prnotify() here because we are holding 291 * pidlock. We assert that there is no need to. 292 */ 293 mutex_enter(&pcp->prc_mutex); 294 cv_broadcast(&pcp->prc_wait); 295 mutex_exit(&pcp->prc_mutex); 296 ASSERT(!(pcp->prc_flags & PRC_POLL)); 297 298 vp = pnp->pr_next; 299 pnp->pr_next = NULL; 300 } 301 } 302 303 /* 304 * Called from a hook in freeproc() when a traced process is removed 305 * from the process table. The proc-table pointers of all associated 306 * /proc vnodes are cleared to indicate that the process has gone away. 307 */ 308 void 309 prfree(proc_t *p) 310 { 311 uint_t slot = p->p_slot; 312 313 ASSERT(MUTEX_HELD(&pidlock)); 314 315 /* 316 * Block the process against /proc so it can be freed. 317 * It cannot be freed while locked by some controlling process. 318 * Lock ordering: 319 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex 320 */ 321 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */ 322 mutex_enter(&p->p_lock); 323 while (p->p_proc_flag & P_PR_LOCK) { 324 mutex_exit(&pr_pidlock); 325 cv_wait(&pr_pid_cv[slot], &p->p_lock); 326 mutex_exit(&p->p_lock); 327 mutex_enter(&pr_pidlock); 328 mutex_enter(&p->p_lock); 329 } 330 331 ASSERT(p->p_tlist == NULL); 332 333 prfreenotify(p->p_plist); 334 p->p_plist = NULL; 335 336 prfreenotify(p->p_trace); 337 p->p_trace = NULL; 338 339 /* 340 * We broadcast to wake up everyone waiting for this process. 341 * No one can reach this process from this point on. 342 */ 343 cv_broadcast(&pr_pid_cv[slot]); 344 345 mutex_exit(&p->p_lock); 346 mutex_exit(&pr_pidlock); 347 } 348 349 /* 350 * Called from a hook in exit() when a traced process is becoming a zombie. 351 */ 352 void 353 prexit(proc_t *p) 354 { 355 ASSERT(MUTEX_HELD(&p->p_lock)); 356 357 if (pr_watch_active(p)) { 358 pr_free_watchpoints(p); 359 watch_disable(curthread); 360 } 361 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */ 362 if (p->p_trace) { 363 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY; 364 prnotify(p->p_trace); 365 } 366 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */ 367 } 368 369 /* 370 * Called when a thread calls lwp_exit(). 371 */ 372 void 373 prlwpexit(kthread_t *t) 374 { 375 vnode_t *vp; 376 prnode_t *pnp; 377 prcommon_t *pcp; 378 proc_t *p = ttoproc(t); 379 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry; 380 381 ASSERT(t == curthread); 382 ASSERT(MUTEX_HELD(&p->p_lock)); 383 384 /* 385 * The process must be blocked against /proc to do this safely. 386 * The lwp must not disappear while the process is marked P_PR_LOCK. 387 * It is the caller's responsibility to have called prbarrier(p). 388 */ 389 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 390 391 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 392 pnp = VTOP(vp); 393 pcp = pnp->pr_common; 394 if (pcp->prc_thread == t) { 395 pcp->prc_thread = NULL; 396 pcp->prc_flags |= PRC_DESTROY; 397 } 398 } 399 400 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) { 401 pnp = VTOP(vp); 402 pcp = pnp->pr_common; 403 pcp->prc_thread = NULL; 404 pcp->prc_flags |= PRC_DESTROY; 405 prnotify(vp); 406 } 407 408 if (p->p_trace) 409 prnotify(p->p_trace); 410 } 411 412 /* 413 * Called when a zombie thread is joined or when a 414 * detached lwp exits. Called from lwp_hash_out(). 415 */ 416 void 417 prlwpfree(proc_t *p, lwpent_t *lep) 418 { 419 vnode_t *vp; 420 prnode_t *pnp; 421 prcommon_t *pcp; 422 423 ASSERT(MUTEX_HELD(&p->p_lock)); 424 425 /* 426 * The process must be blocked against /proc to do this safely. 427 * The lwp must not disappear while the process is marked P_PR_LOCK. 428 * It is the caller's responsibility to have called prbarrier(p). 429 */ 430 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 431 432 vp = lep->le_trace; 433 lep->le_trace = NULL; 434 while (vp) { 435 prnotify(vp); 436 pnp = VTOP(vp); 437 pcp = pnp->pr_common; 438 ASSERT(pcp->prc_thread == NULL && 439 (pcp->prc_flags & PRC_DESTROY)); 440 pcp->prc_tslot = -1; 441 vp = pnp->pr_next; 442 pnp->pr_next = NULL; 443 } 444 445 if (p->p_trace) 446 prnotify(p->p_trace); 447 } 448 449 /* 450 * Called from a hook in exec() when a thread starts exec(). 451 */ 452 void 453 prexecstart(void) 454 { 455 proc_t *p = ttoproc(curthread); 456 klwp_t *lwp = ttolwp(curthread); 457 458 /* 459 * The P_PR_EXEC flag blocks /proc operations for 460 * the duration of the exec(). 461 * We can't start exec() while the process is 462 * locked by /proc, so we call prbarrier(). 463 * lwp_nostop keeps the process from being stopped 464 * via job control for the duration of the exec(). 465 */ 466 467 ASSERT(MUTEX_HELD(&p->p_lock)); 468 prbarrier(p); 469 lwp->lwp_nostop++; 470 p->p_proc_flag |= P_PR_EXEC; 471 } 472 473 /* 474 * Called from a hook in exec() when a thread finishes exec(). 475 * The thread may or may not have succeeded. Some other thread 476 * may have beat it to the punch. 477 */ 478 void 479 prexecend(void) 480 { 481 proc_t *p = ttoproc(curthread); 482 klwp_t *lwp = ttolwp(curthread); 483 vnode_t *vp; 484 prnode_t *pnp; 485 prcommon_t *pcp; 486 model_t model = p->p_model; 487 id_t tid = curthread->t_tid; 488 int tslot = curthread->t_dslot; 489 490 ASSERT(MUTEX_HELD(&p->p_lock)); 491 492 lwp->lwp_nostop--; 493 if (p->p_flag & SEXITLWPS) { 494 /* 495 * We are on our way to exiting because some 496 * other thread beat us in the race to exec(). 497 * Don't clear the P_PR_EXEC flag in this case. 498 */ 499 return; 500 } 501 502 /* 503 * Wake up anyone waiting in /proc for the process to complete exec(). 504 */ 505 p->p_proc_flag &= ~P_PR_EXEC; 506 if ((vp = p->p_trace) != NULL) { 507 pcp = VTOP(vp)->pr_common; 508 mutex_enter(&pcp->prc_mutex); 509 cv_broadcast(&pcp->prc_wait); 510 mutex_exit(&pcp->prc_mutex); 511 for (; vp != NULL; vp = pnp->pr_next) { 512 pnp = VTOP(vp); 513 pnp->pr_common->prc_datamodel = model; 514 } 515 } 516 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) { 517 /* 518 * We dealt with the process common above. 519 */ 520 ASSERT(p->p_trace != NULL); 521 pcp = VTOP(vp)->pr_common; 522 mutex_enter(&pcp->prc_mutex); 523 cv_broadcast(&pcp->prc_wait); 524 mutex_exit(&pcp->prc_mutex); 525 for (; vp != NULL; vp = pnp->pr_next) { 526 pnp = VTOP(vp); 527 pcp = pnp->pr_common; 528 pcp->prc_datamodel = model; 529 pcp->prc_tid = tid; 530 pcp->prc_tslot = tslot; 531 } 532 } 533 } 534 535 /* 536 * Called from a hook in relvm() just before freeing the address space. 537 * We free all the watched areas now. 538 */ 539 void 540 prrelvm(void) 541 { 542 proc_t *p = ttoproc(curthread); 543 544 mutex_enter(&p->p_lock); 545 prbarrier(p); /* block all other /proc operations */ 546 if (pr_watch_active(p)) { 547 pr_free_watchpoints(p); 548 watch_disable(curthread); 549 } 550 mutex_exit(&p->p_lock); 551 pr_free_watched_pages(p); 552 } 553 554 /* 555 * Called from hooks in exec-related code when a traced process 556 * attempts to exec(2) a setuid/setgid program or an unreadable 557 * file. Rather than fail the exec we invalidate the associated 558 * /proc vnodes so that subsequent attempts to use them will fail. 559 * 560 * All /proc vnodes, except directory vnodes, are retained on a linked 561 * list (rooted at p_plist in the process structure) until last close. 562 * 563 * A controlling process must re-open the /proc files in order to 564 * regain control. 565 */ 566 void 567 prinvalidate(struct user *up) 568 { 569 kthread_t *t = curthread; 570 proc_t *p = ttoproc(t); 571 vnode_t *vp; 572 prnode_t *pnp; 573 int writers = 0; 574 575 mutex_enter(&p->p_lock); 576 prbarrier(p); /* block all other /proc operations */ 577 578 /* 579 * At this moment, there can be only one lwp in the process. 580 */ 581 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 582 583 /* 584 * Invalidate any currently active /proc vnodes. 585 */ 586 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 587 pnp = VTOP(vp); 588 switch (pnp->pr_type) { 589 case PR_PSINFO: /* these files can read by anyone */ 590 case PR_LPSINFO: 591 case PR_LWPSINFO: 592 case PR_LWPDIR: 593 case PR_LWPIDDIR: 594 case PR_USAGE: 595 case PR_LUSAGE: 596 case PR_LWPUSAGE: 597 break; 598 default: 599 pnp->pr_flags |= PR_INVAL; 600 break; 601 } 602 } 603 /* 604 * Wake up anyone waiting for the process or lwp. 605 * p->p_trace is guaranteed to be non-NULL if there 606 * are any open /proc files for this process. 607 */ 608 if ((vp = p->p_trace) != NULL) { 609 prcommon_t *pcp = VTOP(vp)->pr_pcommon; 610 611 prnotify(vp); 612 /* 613 * Are there any writers? 614 */ 615 if ((writers = pcp->prc_writers) != 0) { 616 /* 617 * Clear the exclusive open flag (old /proc interface). 618 * Set prc_selfopens equal to prc_writers so that 619 * the next O_EXCL|O_WRITE open will succeed 620 * even with existing (though invalid) writers. 621 * prclose() must decrement prc_selfopens when 622 * the invalid files are closed. 623 */ 624 pcp->prc_flags &= ~PRC_EXCL; 625 ASSERT(pcp->prc_selfopens <= writers); 626 pcp->prc_selfopens = writers; 627 } 628 } 629 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace; 630 while (vp != NULL) { 631 /* 632 * We should not invalidate the lwpiddir vnodes, 633 * but the necessities of maintaining the old 634 * ioctl()-based version of /proc require it. 635 */ 636 pnp = VTOP(vp); 637 pnp->pr_flags |= PR_INVAL; 638 prnotify(vp); 639 vp = pnp->pr_next; 640 } 641 642 /* 643 * If any tracing flags are in effect and any vnodes are open for 644 * writing then set the requested-stop and run-on-last-close flags. 645 * Otherwise, clear all tracing flags. 646 */ 647 t->t_proc_flag &= ~TP_PAUSE; 648 if ((p->p_proc_flag & P_PR_TRACE) && writers) { 649 t->t_proc_flag |= TP_PRSTOP; 650 aston(t); /* so ISSIG will see the flag */ 651 p->p_proc_flag |= P_PR_RUNLCL; 652 } else { 653 premptyset(&up->u_entrymask); /* syscalls */ 654 premptyset(&up->u_exitmask); 655 up->u_systrap = 0; 656 premptyset(&p->p_sigmask); /* signals */ 657 premptyset(&p->p_fltmask); /* faults */ 658 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING); 659 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE); 660 prnostep(ttolwp(t)); 661 } 662 663 mutex_exit(&p->p_lock); 664 } 665 666 /* 667 * Acquire the controlled process's p_lock and mark it P_PR_LOCK. 668 * Return with pr_pidlock held in all cases. 669 * Return with p_lock held if the the process still exists. 670 * Return value is the process pointer if the process still exists, else NULL. 671 * If we lock the process, give ourself kernel priority to avoid deadlocks; 672 * this is undone in prunlock(). 673 */ 674 proc_t * 675 pr_p_lock(prnode_t *pnp) 676 { 677 proc_t *p; 678 prcommon_t *pcp; 679 680 mutex_enter(&pr_pidlock); 681 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL) 682 return (NULL); 683 mutex_enter(&p->p_lock); 684 while (p->p_proc_flag & P_PR_LOCK) { 685 /* 686 * This cv/mutex pair is persistent even if 687 * the process disappears while we sleep. 688 */ 689 kcondvar_t *cv = &pr_pid_cv[p->p_slot]; 690 kmutex_t *mp = &p->p_lock; 691 692 mutex_exit(&pr_pidlock); 693 cv_wait(cv, mp); 694 mutex_exit(mp); 695 mutex_enter(&pr_pidlock); 696 if (pcp->prc_proc == NULL) 697 return (NULL); 698 ASSERT(p == pcp->prc_proc); 699 mutex_enter(&p->p_lock); 700 } 701 p->p_proc_flag |= P_PR_LOCK; 702 THREAD_KPRI_REQUEST(); 703 return (p); 704 } 705 706 /* 707 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock. 708 * This prevents any lwp of the process from disappearing and 709 * blocks most operations that a process can perform on itself. 710 * Returns 0 on success, a non-zero error number on failure. 711 * 712 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when 713 * the subject process is a zombie (ZYES) or fail for zombies (ZNO). 714 * 715 * error returns: 716 * ENOENT: process or lwp has disappeared or process is exiting 717 * (or has become a zombie and zdisp == ZNO). 718 * EAGAIN: procfs vnode has become invalid. 719 * EINTR: signal arrived while waiting for exec to complete. 720 */ 721 int 722 prlock(prnode_t *pnp, int zdisp) 723 { 724 prcommon_t *pcp; 725 proc_t *p; 726 727 again: 728 pcp = pnp->pr_common; 729 p = pr_p_lock(pnp); 730 mutex_exit(&pr_pidlock); 731 732 /* 733 * Return ENOENT immediately if there is no process. 734 */ 735 if (p == NULL) 736 return (ENOENT); 737 738 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL); 739 740 /* 741 * Return ENOENT if process entered zombie state or is exiting 742 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies. 743 */ 744 if (zdisp == ZNO && 745 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) { 746 prunlock(pnp); 747 return (ENOENT); 748 } 749 750 /* 751 * If lwp-specific, check to see if lwp has disappeared. 752 */ 753 if (pcp->prc_flags & PRC_LWP) { 754 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) || 755 pcp->prc_tslot == -1) { 756 prunlock(pnp); 757 return (ENOENT); 758 } 759 } 760 761 /* 762 * Return EAGAIN if we have encountered a security violation. 763 * (The process exec'd a set-id or unreadable executable file.) 764 */ 765 if (pnp->pr_flags & PR_INVAL) { 766 prunlock(pnp); 767 return (EAGAIN); 768 } 769 770 /* 771 * If process is undergoing an exec(), wait for 772 * completion and then start all over again. 773 */ 774 if (p->p_proc_flag & P_PR_EXEC) { 775 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */ 776 mutex_enter(&pcp->prc_mutex); 777 prunlock(pnp); 778 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) { 779 mutex_exit(&pcp->prc_mutex); 780 return (EINTR); 781 } 782 mutex_exit(&pcp->prc_mutex); 783 goto again; 784 } 785 786 /* 787 * We return holding p->p_lock. 788 */ 789 return (0); 790 } 791 792 /* 793 * Undo prlock() and pr_p_lock(). 794 * p->p_lock is still held; pr_pidlock is no longer held. 795 * 796 * prunmark() drops the P_PR_LOCK flag and wakes up another thread, 797 * if any, waiting for the flag to be dropped; it retains p->p_lock. 798 * 799 * prunlock() calls prunmark() and then drops p->p_lock. 800 */ 801 void 802 prunmark(proc_t *p) 803 { 804 ASSERT(p->p_proc_flag & P_PR_LOCK); 805 ASSERT(MUTEX_HELD(&p->p_lock)); 806 807 cv_signal(&pr_pid_cv[p->p_slot]); 808 p->p_proc_flag &= ~P_PR_LOCK; 809 THREAD_KPRI_RELEASE(); 810 } 811 812 void 813 prunlock(prnode_t *pnp) 814 { 815 proc_t *p = pnp->pr_pcommon->prc_proc; 816 817 prunmark(p); 818 mutex_exit(&p->p_lock); 819 } 820 821 /* 822 * Called while holding p->p_lock to delay until the process is unlocked. 823 * We enter holding p->p_lock; p->p_lock is dropped and reacquired. 824 * The process cannot become locked again until p->p_lock is dropped. 825 */ 826 void 827 prbarrier(proc_t *p) 828 { 829 ASSERT(MUTEX_HELD(&p->p_lock)); 830 831 if (p->p_proc_flag & P_PR_LOCK) { 832 /* The process is locked; delay until not locked */ 833 uint_t slot = p->p_slot; 834 835 while (p->p_proc_flag & P_PR_LOCK) 836 cv_wait(&pr_pid_cv[slot], &p->p_lock); 837 cv_signal(&pr_pid_cv[slot]); 838 } 839 } 840 841 /* 842 * Return process/lwp status. 843 * The u-block is mapped in by this routine and unmapped at the end. 844 */ 845 void 846 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp) 847 { 848 kthread_t *t; 849 850 ASSERT(MUTEX_HELD(&p->p_lock)); 851 852 t = prchoose(p); /* returns locked thread */ 853 ASSERT(t != NULL); 854 thread_unlock(t); 855 856 /* just bzero the process part, prgetlwpstatus() does the rest */ 857 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t)); 858 sp->pr_nlwp = p->p_lwpcnt; 859 sp->pr_nzomb = p->p_zombcnt; 860 prassignset(&sp->pr_sigpend, &p->p_sig); 861 sp->pr_brkbase = (uintptr_t)p->p_brkbase; 862 sp->pr_brksize = p->p_brksize; 863 sp->pr_stkbase = (uintptr_t)prgetstackbase(p); 864 sp->pr_stksize = p->p_stksize; 865 sp->pr_pid = p->p_pid; 866 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 867 (p->p_flag & SZONETOP)) { 868 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 869 /* 870 * Inside local zones, fake zsched's pid as parent pids for 871 * processes which reference processes outside of the zone. 872 */ 873 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 874 } else { 875 sp->pr_ppid = p->p_ppid; 876 } 877 sp->pr_pgid = p->p_pgrp; 878 sp->pr_sid = p->p_sessp->s_sid; 879 sp->pr_taskid = p->p_task->tk_tkid; 880 sp->pr_projid = p->p_task->tk_proj->kpj_id; 881 sp->pr_zoneid = p->p_zone->zone_id; 882 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 883 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 884 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime); 885 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime); 886 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 887 prassignset(&sp->pr_flttrace, &p->p_fltmask); 888 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 889 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 890 switch (p->p_model) { 891 case DATAMODEL_ILP32: 892 sp->pr_dmodel = PR_MODEL_ILP32; 893 break; 894 case DATAMODEL_LP64: 895 sp->pr_dmodel = PR_MODEL_LP64; 896 break; 897 } 898 if (p->p_agenttp) 899 sp->pr_agentid = p->p_agenttp->t_tid; 900 901 /* get the chosen lwp's status */ 902 prgetlwpstatus(t, &sp->pr_lwp, zp); 903 904 /* replicate the flags */ 905 sp->pr_flags = sp->pr_lwp.pr_flags; 906 } 907 908 #ifdef _SYSCALL32_IMPL 909 void 910 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp) 911 { 912 proc_t *p = ttoproc(t); 913 klwp_t *lwp = ttolwp(t); 914 struct mstate *ms = &lwp->lwp_mstate; 915 hrtime_t usr, sys; 916 int flags; 917 ulong_t instr; 918 919 ASSERT(MUTEX_HELD(&p->p_lock)); 920 921 bzero(sp, sizeof (*sp)); 922 flags = 0L; 923 if (t->t_state == TS_STOPPED) { 924 flags |= PR_STOPPED; 925 if ((t->t_schedflag & TS_PSTART) == 0) 926 flags |= PR_ISTOP; 927 } else if (VSTOPPED(t)) { 928 flags |= PR_STOPPED|PR_ISTOP; 929 } 930 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 931 flags |= PR_DSTOP; 932 if (lwp->lwp_asleep) 933 flags |= PR_ASLEEP; 934 if (t == p->p_agenttp) 935 flags |= PR_AGENT; 936 if (!(t->t_proc_flag & TP_TWAIT)) 937 flags |= PR_DETACH; 938 if (t->t_proc_flag & TP_DAEMON) 939 flags |= PR_DAEMON; 940 if (p->p_proc_flag & P_PR_FORK) 941 flags |= PR_FORK; 942 if (p->p_proc_flag & P_PR_RUNLCL) 943 flags |= PR_RLC; 944 if (p->p_proc_flag & P_PR_KILLCL) 945 flags |= PR_KLC; 946 if (p->p_proc_flag & P_PR_ASYNC) 947 flags |= PR_ASYNC; 948 if (p->p_proc_flag & P_PR_BPTADJ) 949 flags |= PR_BPTADJ; 950 if (p->p_proc_flag & P_PR_PTRACE) 951 flags |= PR_PTRACE; 952 if (p->p_flag & SMSACCT) 953 flags |= PR_MSACCT; 954 if (p->p_flag & SMSFORK) 955 flags |= PR_MSFORK; 956 if (p->p_flag & SVFWAIT) 957 flags |= PR_VFORKP; 958 sp->pr_flags = flags; 959 if (VSTOPPED(t)) { 960 sp->pr_why = PR_REQUESTED; 961 sp->pr_what = 0; 962 } else { 963 sp->pr_why = t->t_whystop; 964 sp->pr_what = t->t_whatstop; 965 } 966 sp->pr_lwpid = t->t_tid; 967 sp->pr_cursig = lwp->lwp_cursig; 968 prassignset(&sp->pr_lwppend, &t->t_sig); 969 schedctl_finish_sigblock(t); 970 prassignset(&sp->pr_lwphold, &t->t_hold); 971 if (t->t_whystop == PR_FAULTED) { 972 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info); 973 if (t->t_whatstop == FLTPAGE) 974 sp->pr_info.si_addr = 975 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr; 976 } else if (lwp->lwp_curinfo) 977 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info); 978 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 979 sp->pr_info.si_zoneid != zp->zone_id) { 980 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 981 sp->pr_info.si_uid = 0; 982 sp->pr_info.si_ctid = -1; 983 sp->pr_info.si_zoneid = zp->zone_id; 984 } 985 sp->pr_altstack.ss_sp = 986 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; 987 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size; 988 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags; 989 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 990 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext; 991 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack; 992 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 993 sizeof (sp->pr_clname) - 1); 994 if (flags & PR_STOPPED) 995 hrt2ts32(t->t_stoptime, &sp->pr_tstamp); 996 usr = ms->ms_acct[LMS_USER]; 997 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 998 scalehrtime(&usr); 999 scalehrtime(&sys); 1000 hrt2ts32(usr, &sp->pr_utime); 1001 hrt2ts32(sys, &sp->pr_stime); 1002 1003 /* 1004 * Fetch the current instruction, if not a system process. 1005 * We don't attempt this unless the lwp is stopped. 1006 */ 1007 if ((p->p_flag & SSYS) || p->p_as == &kas) 1008 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1009 else if (!(flags & PR_STOPPED)) 1010 sp->pr_flags |= PR_PCINVAL; 1011 else if (!prfetchinstr(lwp, &instr)) 1012 sp->pr_flags |= PR_PCINVAL; 1013 else 1014 sp->pr_instr = (uint32_t)instr; 1015 1016 /* 1017 * Drop p_lock while touching the lwp's stack. 1018 */ 1019 mutex_exit(&p->p_lock); 1020 if (prisstep(lwp)) 1021 sp->pr_flags |= PR_STEP; 1022 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1023 int i; 1024 1025 sp->pr_syscall = get_syscall32_args(lwp, 1026 (int *)sp->pr_sysarg, &i); 1027 sp->pr_nsysarg = (ushort_t)i; 1028 } 1029 if ((flags & PR_STOPPED) || t == curthread) 1030 prgetprregs32(lwp, sp->pr_reg); 1031 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1032 (flags & PR_VFORKP)) { 1033 long r1, r2; 1034 user_t *up; 1035 auxv_t *auxp; 1036 int i; 1037 1038 sp->pr_errno = prgetrvals(lwp, &r1, &r2); 1039 if (sp->pr_errno == 0) { 1040 sp->pr_rval1 = (int32_t)r1; 1041 sp->pr_rval2 = (int32_t)r2; 1042 sp->pr_errpriv = PRIV_NONE; 1043 } else 1044 sp->pr_errpriv = lwp->lwp_badpriv; 1045 1046 if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) { 1047 up = PTOU(p); 1048 sp->pr_sysarg[0] = 0; 1049 sp->pr_sysarg[1] = (caddr32_t)up->u_argv; 1050 sp->pr_sysarg[2] = (caddr32_t)up->u_envp; 1051 for (i = 0, auxp = up->u_auxv; 1052 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1053 i++, auxp++) { 1054 if (auxp->a_type == AT_SUN_EXECNAME) { 1055 sp->pr_sysarg[0] = 1056 (caddr32_t)(uintptr_t)auxp->a_un.a_ptr; 1057 break; 1058 } 1059 } 1060 } 1061 } 1062 if (prhasfp()) 1063 prgetprfpregs32(lwp, &sp->pr_fpreg); 1064 mutex_enter(&p->p_lock); 1065 } 1066 1067 void 1068 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp) 1069 { 1070 kthread_t *t; 1071 1072 ASSERT(MUTEX_HELD(&p->p_lock)); 1073 1074 t = prchoose(p); /* returns locked thread */ 1075 ASSERT(t != NULL); 1076 thread_unlock(t); 1077 1078 /* just bzero the process part, prgetlwpstatus32() does the rest */ 1079 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t)); 1080 sp->pr_nlwp = p->p_lwpcnt; 1081 sp->pr_nzomb = p->p_zombcnt; 1082 prassignset(&sp->pr_sigpend, &p->p_sig); 1083 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase; 1084 sp->pr_brksize = (uint32_t)p->p_brksize; 1085 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p); 1086 sp->pr_stksize = (uint32_t)p->p_stksize; 1087 sp->pr_pid = p->p_pid; 1088 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 1089 (p->p_flag & SZONETOP)) { 1090 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 1091 /* 1092 * Inside local zones, fake zsched's pid as parent pids for 1093 * processes which reference processes outside of the zone. 1094 */ 1095 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 1096 } else { 1097 sp->pr_ppid = p->p_ppid; 1098 } 1099 sp->pr_pgid = p->p_pgrp; 1100 sp->pr_sid = p->p_sessp->s_sid; 1101 sp->pr_taskid = p->p_task->tk_tkid; 1102 sp->pr_projid = p->p_task->tk_proj->kpj_id; 1103 sp->pr_zoneid = p->p_zone->zone_id; 1104 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 1105 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 1106 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime); 1107 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime); 1108 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 1109 prassignset(&sp->pr_flttrace, &p->p_fltmask); 1110 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 1111 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 1112 switch (p->p_model) { 1113 case DATAMODEL_ILP32: 1114 sp->pr_dmodel = PR_MODEL_ILP32; 1115 break; 1116 case DATAMODEL_LP64: 1117 sp->pr_dmodel = PR_MODEL_LP64; 1118 break; 1119 } 1120 if (p->p_agenttp) 1121 sp->pr_agentid = p->p_agenttp->t_tid; 1122 1123 /* get the chosen lwp's status */ 1124 prgetlwpstatus32(t, &sp->pr_lwp, zp); 1125 1126 /* replicate the flags */ 1127 sp->pr_flags = sp->pr_lwp.pr_flags; 1128 } 1129 #endif /* _SYSCALL32_IMPL */ 1130 1131 /* 1132 * Return lwp status. 1133 */ 1134 void 1135 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp) 1136 { 1137 proc_t *p = ttoproc(t); 1138 klwp_t *lwp = ttolwp(t); 1139 struct mstate *ms = &lwp->lwp_mstate; 1140 hrtime_t usr, sys; 1141 int flags; 1142 ulong_t instr; 1143 1144 ASSERT(MUTEX_HELD(&p->p_lock)); 1145 1146 bzero(sp, sizeof (*sp)); 1147 flags = 0L; 1148 if (t->t_state == TS_STOPPED) { 1149 flags |= PR_STOPPED; 1150 if ((t->t_schedflag & TS_PSTART) == 0) 1151 flags |= PR_ISTOP; 1152 } else if (VSTOPPED(t)) { 1153 flags |= PR_STOPPED|PR_ISTOP; 1154 } 1155 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 1156 flags |= PR_DSTOP; 1157 if (lwp->lwp_asleep) 1158 flags |= PR_ASLEEP; 1159 if (t == p->p_agenttp) 1160 flags |= PR_AGENT; 1161 if (!(t->t_proc_flag & TP_TWAIT)) 1162 flags |= PR_DETACH; 1163 if (t->t_proc_flag & TP_DAEMON) 1164 flags |= PR_DAEMON; 1165 if (p->p_proc_flag & P_PR_FORK) 1166 flags |= PR_FORK; 1167 if (p->p_proc_flag & P_PR_RUNLCL) 1168 flags |= PR_RLC; 1169 if (p->p_proc_flag & P_PR_KILLCL) 1170 flags |= PR_KLC; 1171 if (p->p_proc_flag & P_PR_ASYNC) 1172 flags |= PR_ASYNC; 1173 if (p->p_proc_flag & P_PR_BPTADJ) 1174 flags |= PR_BPTADJ; 1175 if (p->p_proc_flag & P_PR_PTRACE) 1176 flags |= PR_PTRACE; 1177 if (p->p_flag & SMSACCT) 1178 flags |= PR_MSACCT; 1179 if (p->p_flag & SMSFORK) 1180 flags |= PR_MSFORK; 1181 if (p->p_flag & SVFWAIT) 1182 flags |= PR_VFORKP; 1183 if (p->p_pgidp->pid_pgorphaned) 1184 flags |= PR_ORPHAN; 1185 sp->pr_flags = flags; 1186 if (VSTOPPED(t)) { 1187 sp->pr_why = PR_REQUESTED; 1188 sp->pr_what = 0; 1189 } else { 1190 sp->pr_why = t->t_whystop; 1191 sp->pr_what = t->t_whatstop; 1192 } 1193 sp->pr_lwpid = t->t_tid; 1194 sp->pr_cursig = lwp->lwp_cursig; 1195 prassignset(&sp->pr_lwppend, &t->t_sig); 1196 schedctl_finish_sigblock(t); 1197 prassignset(&sp->pr_lwphold, &t->t_hold); 1198 if (t->t_whystop == PR_FAULTED) 1199 bcopy(&lwp->lwp_siginfo, 1200 &sp->pr_info, sizeof (k_siginfo_t)); 1201 else if (lwp->lwp_curinfo) 1202 bcopy(&lwp->lwp_curinfo->sq_info, 1203 &sp->pr_info, sizeof (k_siginfo_t)); 1204 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1205 sp->pr_info.si_zoneid != zp->zone_id) { 1206 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1207 sp->pr_info.si_uid = 0; 1208 sp->pr_info.si_ctid = -1; 1209 sp->pr_info.si_zoneid = zp->zone_id; 1210 } 1211 sp->pr_altstack = lwp->lwp_sigaltstack; 1212 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1213 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext; 1214 sp->pr_ustack = lwp->lwp_ustack; 1215 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1216 sizeof (sp->pr_clname) - 1); 1217 if (flags & PR_STOPPED) 1218 hrt2ts(t->t_stoptime, &sp->pr_tstamp); 1219 usr = ms->ms_acct[LMS_USER]; 1220 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1221 scalehrtime(&usr); 1222 scalehrtime(&sys); 1223 hrt2ts(usr, &sp->pr_utime); 1224 hrt2ts(sys, &sp->pr_stime); 1225 1226 /* 1227 * Fetch the current instruction, if not a system process. 1228 * We don't attempt this unless the lwp is stopped. 1229 */ 1230 if ((p->p_flag & SSYS) || p->p_as == &kas) 1231 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1232 else if (!(flags & PR_STOPPED)) 1233 sp->pr_flags |= PR_PCINVAL; 1234 else if (!prfetchinstr(lwp, &instr)) 1235 sp->pr_flags |= PR_PCINVAL; 1236 else 1237 sp->pr_instr = instr; 1238 1239 /* 1240 * Drop p_lock while touching the lwp's stack. 1241 */ 1242 mutex_exit(&p->p_lock); 1243 if (prisstep(lwp)) 1244 sp->pr_flags |= PR_STEP; 1245 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1246 int i; 1247 1248 sp->pr_syscall = get_syscall_args(lwp, 1249 (long *)sp->pr_sysarg, &i); 1250 sp->pr_nsysarg = (ushort_t)i; 1251 } 1252 if ((flags & PR_STOPPED) || t == curthread) 1253 prgetprregs(lwp, sp->pr_reg); 1254 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1255 (flags & PR_VFORKP)) { 1256 user_t *up; 1257 auxv_t *auxp; 1258 int i; 1259 1260 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2); 1261 if (sp->pr_errno == 0) 1262 sp->pr_errpriv = PRIV_NONE; 1263 else 1264 sp->pr_errpriv = lwp->lwp_badpriv; 1265 1266 if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) { 1267 up = PTOU(p); 1268 sp->pr_sysarg[0] = 0; 1269 sp->pr_sysarg[1] = (uintptr_t)up->u_argv; 1270 sp->pr_sysarg[2] = (uintptr_t)up->u_envp; 1271 for (i = 0, auxp = up->u_auxv; 1272 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1273 i++, auxp++) { 1274 if (auxp->a_type == AT_SUN_EXECNAME) { 1275 sp->pr_sysarg[0] = 1276 (uintptr_t)auxp->a_un.a_ptr; 1277 break; 1278 } 1279 } 1280 } 1281 } 1282 if (prhasfp()) 1283 prgetprfpregs(lwp, &sp->pr_fpreg); 1284 mutex_enter(&p->p_lock); 1285 } 1286 1287 /* 1288 * Get the sigaction structure for the specified signal. The u-block 1289 * must already have been mapped in by the caller. 1290 */ 1291 void 1292 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp) 1293 { 1294 bzero(sp, sizeof (*sp)); 1295 1296 if (sig != 0 && (unsigned)sig < NSIG) { 1297 sp->sa_handler = up->u_signal[sig-1]; 1298 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1299 if (sigismember(&up->u_sigonstack, sig)) 1300 sp->sa_flags |= SA_ONSTACK; 1301 if (sigismember(&up->u_sigresethand, sig)) 1302 sp->sa_flags |= SA_RESETHAND; 1303 if (sigismember(&up->u_sigrestart, sig)) 1304 sp->sa_flags |= SA_RESTART; 1305 if (sigismember(&p->p_siginfo, sig)) 1306 sp->sa_flags |= SA_SIGINFO; 1307 if (sigismember(&up->u_signodefer, sig)) 1308 sp->sa_flags |= SA_NODEFER; 1309 if (sig == SIGCLD) { 1310 if (p->p_flag & SNOWAIT) 1311 sp->sa_flags |= SA_NOCLDWAIT; 1312 if ((p->p_flag & SJCTL) == 0) 1313 sp->sa_flags |= SA_NOCLDSTOP; 1314 } 1315 } 1316 } 1317 1318 #ifdef _SYSCALL32_IMPL 1319 void 1320 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp) 1321 { 1322 bzero(sp, sizeof (*sp)); 1323 1324 if (sig != 0 && (unsigned)sig < NSIG) { 1325 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1]; 1326 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1327 if (sigismember(&up->u_sigonstack, sig)) 1328 sp->sa_flags |= SA_ONSTACK; 1329 if (sigismember(&up->u_sigresethand, sig)) 1330 sp->sa_flags |= SA_RESETHAND; 1331 if (sigismember(&up->u_sigrestart, sig)) 1332 sp->sa_flags |= SA_RESTART; 1333 if (sigismember(&p->p_siginfo, sig)) 1334 sp->sa_flags |= SA_SIGINFO; 1335 if (sigismember(&up->u_signodefer, sig)) 1336 sp->sa_flags |= SA_NODEFER; 1337 if (sig == SIGCLD) { 1338 if (p->p_flag & SNOWAIT) 1339 sp->sa_flags |= SA_NOCLDWAIT; 1340 if ((p->p_flag & SJCTL) == 0) 1341 sp->sa_flags |= SA_NOCLDSTOP; 1342 } 1343 } 1344 } 1345 #endif /* _SYSCALL32_IMPL */ 1346 1347 /* 1348 * Count the number of segments in this process's address space. 1349 */ 1350 int 1351 prnsegs(struct as *as, int reserved) 1352 { 1353 int n = 0; 1354 struct seg *seg; 1355 1356 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1357 1358 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1359 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1360 caddr_t saddr, naddr; 1361 void *tmp = NULL; 1362 1363 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1364 (void) pr_getprot(seg, reserved, &tmp, 1365 &saddr, &naddr, eaddr); 1366 if (saddr != naddr) 1367 n++; 1368 } 1369 1370 ASSERT(tmp == NULL); 1371 } 1372 1373 return (n); 1374 } 1375 1376 /* 1377 * Convert uint32_t to decimal string w/o leading zeros. 1378 * Add trailing null characters if 'len' is greater than string length. 1379 * Return the string length. 1380 */ 1381 int 1382 pr_u32tos(uint32_t n, char *s, int len) 1383 { 1384 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */ 1385 char *cp = cbuf; 1386 char *end = s + len; 1387 1388 do { 1389 *cp++ = (char)(n % 10 + '0'); 1390 n /= 10; 1391 } while (n); 1392 1393 len = (int)(cp - cbuf); 1394 1395 do { 1396 *s++ = *--cp; 1397 } while (cp > cbuf); 1398 1399 while (s < end) /* optional pad */ 1400 *s++ = '\0'; 1401 1402 return (len); 1403 } 1404 1405 /* 1406 * Convert uint64_t to decimal string w/o leading zeros. 1407 * Return the string length. 1408 */ 1409 static int 1410 pr_u64tos(uint64_t n, char *s) 1411 { 1412 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */ 1413 char *cp = cbuf; 1414 int len; 1415 1416 do { 1417 *cp++ = (char)(n % 10 + '0'); 1418 n /= 10; 1419 } while (n); 1420 1421 len = (int)(cp - cbuf); 1422 1423 do { 1424 *s++ = *--cp; 1425 } while (cp > cbuf); 1426 1427 return (len); 1428 } 1429 1430 void 1431 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr) 1432 { 1433 char *s = name; 1434 struct vfs *vfsp; 1435 struct vfssw *vfsswp; 1436 1437 if ((vfsp = vp->v_vfsp) != NULL && 1438 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) && 1439 *vfsswp->vsw_name) { 1440 (void) strcpy(s, vfsswp->vsw_name); 1441 s += strlen(s); 1442 *s++ = '.'; 1443 } 1444 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0); 1445 *s++ = '.'; 1446 s += pr_u32tos(getminor(vattr->va_fsid), s, 0); 1447 *s++ = '.'; 1448 s += pr_u64tos(vattr->va_nodeid, s); 1449 *s++ = '\0'; 1450 } 1451 1452 struct seg * 1453 break_seg(proc_t *p) 1454 { 1455 caddr_t addr = p->p_brkbase; 1456 struct seg *seg; 1457 struct vnode *vp; 1458 1459 if (p->p_brksize != 0) 1460 addr += p->p_brksize - 1; 1461 seg = as_segat(p->p_as, addr); 1462 if (seg != NULL && seg->s_ops == &segvn_ops && 1463 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)) 1464 return (seg); 1465 return (NULL); 1466 } 1467 1468 /* 1469 * Return an array of structures with memory map information. 1470 * We allocate here; the caller must deallocate. 1471 */ 1472 #define INITIAL_MAPSIZE 65536 1473 #define MAPSIZE 8192 1474 int 1475 prgetmap(proc_t *p, int reserved, prmap_t **prmapp, size_t *sizep) 1476 { 1477 struct as *as = p->p_as; 1478 int nmaps = 0; 1479 prmap_t *mp; 1480 size_t size; 1481 struct seg *seg; 1482 struct seg *brkseg, *stkseg; 1483 struct vnode *vp; 1484 struct vattr vattr; 1485 uint_t prot; 1486 1487 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1488 1489 /* initial allocation */ 1490 *sizep = size = INITIAL_MAPSIZE; 1491 *prmapp = mp = kmem_alloc(size, KM_SLEEP); 1492 1493 if ((seg = AS_SEGFIRST(as)) == NULL) 1494 return (0); 1495 1496 brkseg = break_seg(p); 1497 stkseg = as_segat(as, prgetstackbase(p)); 1498 1499 do { 1500 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1501 caddr_t saddr, naddr; 1502 void *tmp = NULL; 1503 1504 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1505 prot = pr_getprot(seg, reserved, &tmp, 1506 &saddr, &naddr, eaddr); 1507 if (saddr == naddr) 1508 continue; 1509 /* reallocate if necessary */ 1510 if ((nmaps + 1) * sizeof (prmap_t) > size) { 1511 size_t newsize = size + 3 * size / 16; 1512 prmap_t *newmp = kmem_alloc(newsize, KM_SLEEP); 1513 1514 bcopy(*prmapp, newmp, nmaps * sizeof (prmap_t)); 1515 kmem_free(*prmapp, size); 1516 *sizep = size = newsize; 1517 *prmapp = newmp; 1518 mp = newmp + nmaps; 1519 } 1520 bzero(mp, sizeof (*mp)); 1521 mp->pr_vaddr = (uintptr_t)saddr; 1522 mp->pr_size = naddr - saddr; 1523 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1524 mp->pr_mflags = 0; 1525 if (prot & PROT_READ) 1526 mp->pr_mflags |= MA_READ; 1527 if (prot & PROT_WRITE) 1528 mp->pr_mflags |= MA_WRITE; 1529 if (prot & PROT_EXEC) 1530 mp->pr_mflags |= MA_EXEC; 1531 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1532 mp->pr_mflags |= MA_SHARED; 1533 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1534 mp->pr_mflags |= MA_NORESERVE; 1535 if (seg->s_ops == &segspt_shmops || 1536 (seg->s_ops == &segvn_ops && 1537 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1538 mp->pr_mflags |= MA_ANON; 1539 if (seg == brkseg) 1540 mp->pr_mflags |= MA_BREAK; 1541 else if (seg == stkseg) { 1542 mp->pr_mflags |= MA_STACK; 1543 if (reserved) { 1544 size_t maxstack = 1545 ((size_t)p->p_stk_ctl + 1546 PAGEOFFSET) & PAGEMASK; 1547 mp->pr_vaddr = 1548 (uintptr_t)prgetstackbase(p) + 1549 p->p_stksize - maxstack; 1550 mp->pr_size = (uintptr_t)naddr - 1551 mp->pr_vaddr; 1552 } 1553 } 1554 if (seg->s_ops == &segspt_shmops) 1555 mp->pr_mflags |= MA_ISM | MA_SHM; 1556 mp->pr_pagesize = PAGESIZE; 1557 1558 /* 1559 * Manufacture a filename for the "object" directory. 1560 */ 1561 vattr.va_mask = AT_FSID|AT_NODEID; 1562 if (seg->s_ops == &segvn_ops && 1563 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1564 vp != NULL && vp->v_type == VREG && 1565 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 1566 if (vp == p->p_exec) 1567 (void) strcpy(mp->pr_mapname, "a.out"); 1568 else 1569 pr_object_name(mp->pr_mapname, 1570 vp, &vattr); 1571 } 1572 1573 /* 1574 * Get the SysV shared memory id, if any. 1575 */ 1576 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1577 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1578 SHMID_NONE) { 1579 if (mp->pr_shmid == SHMID_FREE) 1580 mp->pr_shmid = -1; 1581 1582 mp->pr_mflags |= MA_SHM; 1583 } else { 1584 mp->pr_shmid = -1; 1585 } 1586 1587 mp++; 1588 nmaps++; 1589 } 1590 ASSERT(tmp == NULL); 1591 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1592 1593 return (nmaps); 1594 } 1595 1596 #ifdef _SYSCALL32_IMPL 1597 int 1598 prgetmap32(proc_t *p, int reserved, prmap32_t **prmapp, size_t *sizep) 1599 { 1600 struct as *as = p->p_as; 1601 int nmaps = 0; 1602 prmap32_t *mp; 1603 size_t size; 1604 struct seg *seg; 1605 struct seg *brkseg, *stkseg; 1606 struct vnode *vp; 1607 struct vattr vattr; 1608 uint_t prot; 1609 1610 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1611 1612 /* initial allocation */ 1613 *sizep = size = INITIAL_MAPSIZE; 1614 *prmapp = mp = kmem_alloc(size, KM_SLEEP); 1615 1616 if ((seg = AS_SEGFIRST(as)) == NULL) 1617 return (0); 1618 1619 brkseg = break_seg(p); 1620 stkseg = as_segat(as, prgetstackbase(p)); 1621 1622 do { 1623 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1624 caddr_t saddr, naddr; 1625 void *tmp = NULL; 1626 1627 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1628 prot = pr_getprot(seg, reserved, &tmp, 1629 &saddr, &naddr, eaddr); 1630 if (saddr == naddr) 1631 continue; 1632 /* reallocate if necessary */ 1633 if ((nmaps + 1) * sizeof (prmap32_t) > size) { 1634 size_t newsize = size + 3 * size / 16; 1635 prmap32_t *newmp = 1636 kmem_alloc(newsize, KM_SLEEP); 1637 1638 bcopy(*prmapp, newmp, 1639 nmaps * sizeof (prmap32_t)); 1640 kmem_free(*prmapp, size); 1641 *sizep = size = newsize; 1642 *prmapp = newmp; 1643 mp = newmp + nmaps; 1644 } 1645 bzero(mp, sizeof (*mp)); 1646 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 1647 mp->pr_size = (size32_t)(naddr - saddr); 1648 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1649 mp->pr_mflags = 0; 1650 if (prot & PROT_READ) 1651 mp->pr_mflags |= MA_READ; 1652 if (prot & PROT_WRITE) 1653 mp->pr_mflags |= MA_WRITE; 1654 if (prot & PROT_EXEC) 1655 mp->pr_mflags |= MA_EXEC; 1656 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1657 mp->pr_mflags |= MA_SHARED; 1658 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1659 mp->pr_mflags |= MA_NORESERVE; 1660 if (seg->s_ops == &segspt_shmops || 1661 (seg->s_ops == &segvn_ops && 1662 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1663 mp->pr_mflags |= MA_ANON; 1664 if (seg == brkseg) 1665 mp->pr_mflags |= MA_BREAK; 1666 else if (seg == stkseg) { 1667 mp->pr_mflags |= MA_STACK; 1668 if (reserved) { 1669 size_t maxstack = 1670 ((size_t)p->p_stk_ctl + 1671 PAGEOFFSET) & PAGEMASK; 1672 uintptr_t vaddr = 1673 (uintptr_t)prgetstackbase(p) + 1674 p->p_stksize - maxstack; 1675 mp->pr_vaddr = (caddr32_t)vaddr; 1676 mp->pr_size = (size32_t) 1677 ((uintptr_t)naddr - vaddr); 1678 } 1679 } 1680 if (seg->s_ops == &segspt_shmops) 1681 mp->pr_mflags |= MA_ISM | MA_SHM; 1682 mp->pr_pagesize = PAGESIZE; 1683 1684 /* 1685 * Manufacture a filename for the "object" directory. 1686 */ 1687 vattr.va_mask = AT_FSID|AT_NODEID; 1688 if (seg->s_ops == &segvn_ops && 1689 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1690 vp != NULL && vp->v_type == VREG && 1691 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 1692 if (vp == p->p_exec) 1693 (void) strcpy(mp->pr_mapname, "a.out"); 1694 else 1695 pr_object_name(mp->pr_mapname, 1696 vp, &vattr); 1697 } 1698 1699 /* 1700 * Get the SysV shared memory id, if any. 1701 */ 1702 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1703 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1704 SHMID_NONE) { 1705 if (mp->pr_shmid == SHMID_FREE) 1706 mp->pr_shmid = -1; 1707 1708 mp->pr_mflags |= MA_SHM; 1709 } else { 1710 mp->pr_shmid = -1; 1711 } 1712 1713 mp++; 1714 nmaps++; 1715 } 1716 ASSERT(tmp == NULL); 1717 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1718 1719 return (nmaps); 1720 } 1721 #endif /* _SYSCALL32_IMPL */ 1722 1723 /* 1724 * Return the size of the /proc page data file. 1725 */ 1726 size_t 1727 prpdsize(struct as *as) 1728 { 1729 struct seg *seg; 1730 size_t size; 1731 1732 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1733 1734 if ((seg = AS_SEGFIRST(as)) == NULL) 1735 return (0); 1736 1737 size = sizeof (prpageheader_t); 1738 do { 1739 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1740 caddr_t saddr, naddr; 1741 void *tmp = NULL; 1742 size_t npage; 1743 1744 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1745 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1746 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1747 size += sizeof (prasmap_t) + round8(npage); 1748 } 1749 ASSERT(tmp == NULL); 1750 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1751 1752 return (size); 1753 } 1754 1755 #ifdef _SYSCALL32_IMPL 1756 size_t 1757 prpdsize32(struct as *as) 1758 { 1759 struct seg *seg; 1760 size_t size; 1761 1762 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1763 1764 if ((seg = AS_SEGFIRST(as)) == NULL) 1765 return (0); 1766 1767 size = sizeof (prpageheader32_t); 1768 do { 1769 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1770 caddr_t saddr, naddr; 1771 void *tmp = NULL; 1772 size_t npage; 1773 1774 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1775 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1776 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1777 size += sizeof (prasmap32_t) + round8(npage); 1778 } 1779 ASSERT(tmp == NULL); 1780 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1781 1782 return (size); 1783 } 1784 #endif /* _SYSCALL32_IMPL */ 1785 1786 /* 1787 * Read page data information. 1788 */ 1789 int 1790 prpdread(proc_t *p, uint_t hatid, struct uio *uiop) 1791 { 1792 struct as *as = p->p_as; 1793 caddr_t buf; 1794 size_t size; 1795 prpageheader_t *php; 1796 prasmap_t *pmp; 1797 struct seg *seg; 1798 int error; 1799 1800 again: 1801 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1802 1803 if ((seg = AS_SEGFIRST(as)) == NULL) { 1804 AS_LOCK_EXIT(as, &as->a_lock); 1805 return (0); 1806 } 1807 size = prpdsize(as); 1808 if (uiop->uio_resid < size) { 1809 AS_LOCK_EXIT(as, &as->a_lock); 1810 return (E2BIG); 1811 } 1812 1813 buf = kmem_zalloc(size, KM_SLEEP); 1814 php = (prpageheader_t *)buf; 1815 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t)); 1816 1817 hrt2ts(gethrtime(), &php->pr_tstamp); 1818 php->pr_nmap = 0; 1819 php->pr_npage = 0; 1820 do { 1821 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1822 caddr_t saddr, naddr; 1823 void *tmp = NULL; 1824 1825 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1826 struct vnode *vp; 1827 struct vattr vattr; 1828 size_t len; 1829 size_t npage; 1830 uint_t prot; 1831 uintptr_t next; 1832 1833 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1834 if ((len = (size_t)(naddr - saddr)) == 0) 1835 continue; 1836 npage = len / PAGESIZE; 1837 next = (uintptr_t)(pmp + 1) + round8(npage); 1838 /* 1839 * It's possible that the address space can change 1840 * subtlely even though we're holding as->a_lock 1841 * due to the nondeterminism of page_exists() in 1842 * the presence of asychronously flushed pages or 1843 * mapped files whose sizes are changing. 1844 * page_exists() may be called indirectly from 1845 * pr_getprot() by a SEGOP_INCORE() routine. 1846 * If this happens we need to make sure we don't 1847 * overrun the buffer whose size we computed based 1848 * on the initial iteration through the segments. 1849 * Once we've detected an overflow, we need to clean 1850 * up the temporary memory allocated in pr_getprot() 1851 * and retry. If there's a pending signal, we return 1852 * EINTR so that this thread can be dislodged if 1853 * a latent bug causes us to spin indefinitely. 1854 */ 1855 if (next > (uintptr_t)buf + size) { 1856 pr_getprot_done(&tmp); 1857 AS_LOCK_EXIT(as, &as->a_lock); 1858 1859 kmem_free(buf, size); 1860 1861 if (ISSIG(curthread, JUSTLOOKING)) 1862 return (EINTR); 1863 1864 goto again; 1865 } 1866 1867 php->pr_nmap++; 1868 php->pr_npage += npage; 1869 pmp->pr_vaddr = (uintptr_t)saddr; 1870 pmp->pr_npage = npage; 1871 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1872 pmp->pr_mflags = 0; 1873 if (prot & PROT_READ) 1874 pmp->pr_mflags |= MA_READ; 1875 if (prot & PROT_WRITE) 1876 pmp->pr_mflags |= MA_WRITE; 1877 if (prot & PROT_EXEC) 1878 pmp->pr_mflags |= MA_EXEC; 1879 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1880 pmp->pr_mflags |= MA_SHARED; 1881 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1882 pmp->pr_mflags |= MA_NORESERVE; 1883 if (seg->s_ops == &segspt_shmops || 1884 (seg->s_ops == &segvn_ops && 1885 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1886 pmp->pr_mflags |= MA_ANON; 1887 if (seg->s_ops == &segspt_shmops) 1888 pmp->pr_mflags |= MA_ISM | MA_SHM; 1889 pmp->pr_pagesize = PAGESIZE; 1890 /* 1891 * Manufacture a filename for the "object" directory. 1892 */ 1893 vattr.va_mask = AT_FSID|AT_NODEID; 1894 if (seg->s_ops == &segvn_ops && 1895 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1896 vp != NULL && vp->v_type == VREG && 1897 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 1898 if (vp == p->p_exec) 1899 (void) strcpy(pmp->pr_mapname, "a.out"); 1900 else 1901 pr_object_name(pmp->pr_mapname, 1902 vp, &vattr); 1903 } 1904 1905 /* 1906 * Get the SysV shared memory id, if any. 1907 */ 1908 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 1909 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 1910 SHMID_NONE) { 1911 if (pmp->pr_shmid == SHMID_FREE) 1912 pmp->pr_shmid = -1; 1913 1914 pmp->pr_mflags |= MA_SHM; 1915 } else { 1916 pmp->pr_shmid = -1; 1917 } 1918 1919 hat_getstat(as, saddr, len, hatid, 1920 (char *)(pmp + 1), HAT_SYNC_ZERORM); 1921 pmp = (prasmap_t *)next; 1922 } 1923 ASSERT(tmp == NULL); 1924 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1925 1926 AS_LOCK_EXIT(as, &as->a_lock); 1927 1928 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 1929 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 1930 kmem_free(buf, size); 1931 1932 return (error); 1933 } 1934 1935 #ifdef _SYSCALL32_IMPL 1936 int 1937 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop) 1938 { 1939 struct as *as = p->p_as; 1940 caddr_t buf; 1941 size_t size; 1942 prpageheader32_t *php; 1943 prasmap32_t *pmp; 1944 struct seg *seg; 1945 int error; 1946 1947 again: 1948 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1949 1950 if ((seg = AS_SEGFIRST(as)) == NULL) { 1951 AS_LOCK_EXIT(as, &as->a_lock); 1952 return (0); 1953 } 1954 size = prpdsize32(as); 1955 if (uiop->uio_resid < size) { 1956 AS_LOCK_EXIT(as, &as->a_lock); 1957 return (E2BIG); 1958 } 1959 1960 buf = kmem_zalloc(size, KM_SLEEP); 1961 php = (prpageheader32_t *)buf; 1962 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t)); 1963 1964 hrt2ts32(gethrtime(), &php->pr_tstamp); 1965 php->pr_nmap = 0; 1966 php->pr_npage = 0; 1967 do { 1968 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1969 caddr_t saddr, naddr; 1970 void *tmp = NULL; 1971 1972 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1973 struct vnode *vp; 1974 struct vattr vattr; 1975 size_t len; 1976 size_t npage; 1977 uint_t prot; 1978 uintptr_t next; 1979 1980 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1981 if ((len = (size_t)(naddr - saddr)) == 0) 1982 continue; 1983 npage = len / PAGESIZE; 1984 next = (uintptr_t)(pmp + 1) + round8(npage); 1985 /* 1986 * It's possible that the address space can change 1987 * subtlely even though we're holding as->a_lock 1988 * due to the nondeterminism of page_exists() in 1989 * the presence of asychronously flushed pages or 1990 * mapped files whose sizes are changing. 1991 * page_exists() may be called indirectly from 1992 * pr_getprot() by a SEGOP_INCORE() routine. 1993 * If this happens we need to make sure we don't 1994 * overrun the buffer whose size we computed based 1995 * on the initial iteration through the segments. 1996 * Once we've detected an overflow, we need to clean 1997 * up the temporary memory allocated in pr_getprot() 1998 * and retry. If there's a pending signal, we return 1999 * EINTR so that this thread can be dislodged if 2000 * a latent bug causes us to spin indefinitely. 2001 */ 2002 if (next > (uintptr_t)buf + size) { 2003 pr_getprot_done(&tmp); 2004 AS_LOCK_EXIT(as, &as->a_lock); 2005 2006 kmem_free(buf, size); 2007 2008 if (ISSIG(curthread, JUSTLOOKING)) 2009 return (EINTR); 2010 2011 goto again; 2012 } 2013 2014 php->pr_nmap++; 2015 php->pr_npage += npage; 2016 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 2017 pmp->pr_npage = (size32_t)npage; 2018 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2019 pmp->pr_mflags = 0; 2020 if (prot & PROT_READ) 2021 pmp->pr_mflags |= MA_READ; 2022 if (prot & PROT_WRITE) 2023 pmp->pr_mflags |= MA_WRITE; 2024 if (prot & PROT_EXEC) 2025 pmp->pr_mflags |= MA_EXEC; 2026 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2027 pmp->pr_mflags |= MA_SHARED; 2028 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2029 pmp->pr_mflags |= MA_NORESERVE; 2030 if (seg->s_ops == &segspt_shmops || 2031 (seg->s_ops == &segvn_ops && 2032 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2033 pmp->pr_mflags |= MA_ANON; 2034 if (seg->s_ops == &segspt_shmops) 2035 pmp->pr_mflags |= MA_ISM | MA_SHM; 2036 pmp->pr_pagesize = PAGESIZE; 2037 /* 2038 * Manufacture a filename for the "object" directory. 2039 */ 2040 vattr.va_mask = AT_FSID|AT_NODEID; 2041 if (seg->s_ops == &segvn_ops && 2042 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2043 vp != NULL && vp->v_type == VREG && 2044 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 2045 if (vp == p->p_exec) 2046 (void) strcpy(pmp->pr_mapname, "a.out"); 2047 else 2048 pr_object_name(pmp->pr_mapname, 2049 vp, &vattr); 2050 } 2051 2052 /* 2053 * Get the SysV shared memory id, if any. 2054 */ 2055 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2056 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2057 SHMID_NONE) { 2058 if (pmp->pr_shmid == SHMID_FREE) 2059 pmp->pr_shmid = -1; 2060 2061 pmp->pr_mflags |= MA_SHM; 2062 } else { 2063 pmp->pr_shmid = -1; 2064 } 2065 2066 hat_getstat(as, saddr, len, hatid, 2067 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2068 pmp = (prasmap32_t *)next; 2069 } 2070 ASSERT(tmp == NULL); 2071 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2072 2073 AS_LOCK_EXIT(as, &as->a_lock); 2074 2075 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2076 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2077 kmem_free(buf, size); 2078 2079 return (error); 2080 } 2081 #endif /* _SYSCALL32_IMPL */ 2082 2083 ushort_t 2084 prgetpctcpu(uint64_t pct) 2085 { 2086 /* 2087 * The value returned will be relevant in the zone of the examiner, 2088 * which may not be the same as the zone which performed the procfs 2089 * mount. 2090 */ 2091 int nonline = zone_ncpus_online_get(curproc->p_zone); 2092 2093 /* 2094 * Prorate over online cpus so we don't exceed 100% 2095 */ 2096 if (nonline > 1) 2097 pct /= nonline; 2098 pct >>= 16; /* convert to 16-bit scaled integer */ 2099 if (pct > 0x8000) /* might happen, due to rounding */ 2100 pct = 0x8000; 2101 return ((ushort_t)pct); 2102 } 2103 2104 /* 2105 * Return information used by ps(1). 2106 */ 2107 void 2108 prgetpsinfo(proc_t *p, psinfo_t *psp) 2109 { 2110 kthread_t *t; 2111 struct cred *cred; 2112 hrtime_t hrutime, hrstime; 2113 2114 ASSERT(MUTEX_HELD(&p->p_lock)); 2115 2116 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2117 bzero(psp, sizeof (*psp)); 2118 else { 2119 thread_unlock(t); 2120 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2121 } 2122 2123 /* 2124 * only export SSYS and SMSACCT; everything else is off-limits to 2125 * userland apps. 2126 */ 2127 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2128 psp->pr_nlwp = p->p_lwpcnt; 2129 psp->pr_nzomb = p->p_zombcnt; 2130 mutex_enter(&p->p_crlock); 2131 cred = p->p_cred; 2132 psp->pr_uid = crgetruid(cred); 2133 psp->pr_euid = crgetuid(cred); 2134 psp->pr_gid = crgetrgid(cred); 2135 psp->pr_egid = crgetgid(cred); 2136 mutex_exit(&p->p_crlock); 2137 psp->pr_pid = p->p_pid; 2138 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2139 (p->p_flag & SZONETOP)) { 2140 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2141 /* 2142 * Inside local zones, fake zsched's pid as parent pids for 2143 * processes which reference processes outside of the zone. 2144 */ 2145 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2146 } else { 2147 psp->pr_ppid = p->p_ppid; 2148 } 2149 psp->pr_pgid = p->p_pgrp; 2150 psp->pr_sid = p->p_sessp->s_sid; 2151 psp->pr_taskid = p->p_task->tk_tkid; 2152 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2153 psp->pr_poolid = p->p_pool->pool_id; 2154 psp->pr_zoneid = p->p_zone->zone_id; 2155 if ((psp->pr_contract = PRCTID(p)) == 0) 2156 psp->pr_contract = -1; 2157 psp->pr_addr = (uintptr_t)prgetpsaddr(p); 2158 switch (p->p_model) { 2159 case DATAMODEL_ILP32: 2160 psp->pr_dmodel = PR_MODEL_ILP32; 2161 break; 2162 case DATAMODEL_LP64: 2163 psp->pr_dmodel = PR_MODEL_LP64; 2164 break; 2165 } 2166 hrutime = mstate_aggr_state(p, LMS_USER); 2167 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2168 hrt2ts((hrutime + hrstime), &psp->pr_time); 2169 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2170 2171 if (t == NULL) { 2172 int wcode = p->p_wcode; /* must be atomic read */ 2173 2174 if (wcode) 2175 psp->pr_wstat = wstat(wcode, p->p_wdata); 2176 psp->pr_ttydev = PRNODEV; 2177 psp->pr_lwp.pr_state = SZOMB; 2178 psp->pr_lwp.pr_sname = 'Z'; 2179 psp->pr_lwp.pr_bindpro = PBIND_NONE; 2180 psp->pr_lwp.pr_bindpset = PS_NONE; 2181 } else { 2182 user_t *up = PTOU(p); 2183 struct as *as; 2184 dev_t d; 2185 extern dev_t rwsconsdev, rconsdev, uconsdev; 2186 2187 d = cttydev(p); 2188 /* 2189 * If the controlling terminal is the real 2190 * or workstation console device, map to what the 2191 * user thinks is the console device. 2192 */ 2193 if (d == rwsconsdev || d == rconsdev) 2194 d = uconsdev; 2195 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d; 2196 psp->pr_start = up->u_start; 2197 bcopy(up->u_comm, psp->pr_fname, 2198 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2199 bcopy(up->u_psargs, psp->pr_psargs, 2200 MIN(PRARGSZ-1, PSARGSZ)); 2201 psp->pr_argc = up->u_argc; 2202 psp->pr_argv = up->u_argv; 2203 psp->pr_envp = up->u_envp; 2204 2205 /* get the chosen lwp's lwpsinfo */ 2206 prgetlwpsinfo(t, &psp->pr_lwp); 2207 2208 /* compute %cpu for the process */ 2209 if (p->p_lwpcnt == 1) 2210 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2211 else { 2212 uint64_t pct = 0; 2213 hrtime_t cur_time = gethrtime_unscaled(); 2214 2215 t = p->p_tlist; 2216 do { 2217 pct += cpu_update_pct(t, cur_time); 2218 } while ((t = t->t_forw) != p->p_tlist); 2219 2220 psp->pr_pctcpu = prgetpctcpu(pct); 2221 } 2222 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2223 psp->pr_size = 0; 2224 psp->pr_rssize = 0; 2225 } else { 2226 mutex_exit(&p->p_lock); 2227 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2228 psp->pr_size = btopr(rm_assize(as)) * (PAGESIZE / 1024); 2229 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024); 2230 psp->pr_pctmem = rm_pctmemory(as); 2231 AS_LOCK_EXIT(as, &as->a_lock); 2232 mutex_enter(&p->p_lock); 2233 } 2234 } 2235 } 2236 2237 #ifdef _SYSCALL32_IMPL 2238 void 2239 prgetpsinfo32(proc_t *p, psinfo32_t *psp) 2240 { 2241 kthread_t *t; 2242 struct cred *cred; 2243 hrtime_t hrutime, hrstime; 2244 2245 ASSERT(MUTEX_HELD(&p->p_lock)); 2246 2247 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2248 bzero(psp, sizeof (*psp)); 2249 else { 2250 thread_unlock(t); 2251 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2252 } 2253 2254 /* 2255 * only export SSYS and SMSACCT; everything else is off-limits to 2256 * userland apps. 2257 */ 2258 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2259 psp->pr_nlwp = p->p_lwpcnt; 2260 psp->pr_nzomb = p->p_zombcnt; 2261 mutex_enter(&p->p_crlock); 2262 cred = p->p_cred; 2263 psp->pr_uid = crgetruid(cred); 2264 psp->pr_euid = crgetuid(cred); 2265 psp->pr_gid = crgetrgid(cred); 2266 psp->pr_egid = crgetgid(cred); 2267 mutex_exit(&p->p_crlock); 2268 psp->pr_pid = p->p_pid; 2269 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2270 (p->p_flag & SZONETOP)) { 2271 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2272 /* 2273 * Inside local zones, fake zsched's pid as parent pids for 2274 * processes which reference processes outside of the zone. 2275 */ 2276 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2277 } else { 2278 psp->pr_ppid = p->p_ppid; 2279 } 2280 psp->pr_pgid = p->p_pgrp; 2281 psp->pr_sid = p->p_sessp->s_sid; 2282 psp->pr_taskid = p->p_task->tk_tkid; 2283 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2284 psp->pr_poolid = p->p_pool->pool_id; 2285 psp->pr_zoneid = p->p_zone->zone_id; 2286 if ((psp->pr_contract = PRCTID(p)) == 0) 2287 psp->pr_contract = -1; 2288 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2289 switch (p->p_model) { 2290 case DATAMODEL_ILP32: 2291 psp->pr_dmodel = PR_MODEL_ILP32; 2292 break; 2293 case DATAMODEL_LP64: 2294 psp->pr_dmodel = PR_MODEL_LP64; 2295 break; 2296 } 2297 hrutime = mstate_aggr_state(p, LMS_USER); 2298 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2299 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2300 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2301 2302 if (t == NULL) { 2303 extern int wstat(int, int); /* needs a header file */ 2304 int wcode = p->p_wcode; /* must be atomic read */ 2305 2306 if (wcode) 2307 psp->pr_wstat = wstat(wcode, p->p_wdata); 2308 psp->pr_ttydev = PRNODEV32; 2309 psp->pr_lwp.pr_state = SZOMB; 2310 psp->pr_lwp.pr_sname = 'Z'; 2311 } else { 2312 user_t *up = PTOU(p); 2313 struct as *as; 2314 dev_t d; 2315 extern dev_t rwsconsdev, rconsdev, uconsdev; 2316 2317 d = cttydev(p); 2318 /* 2319 * If the controlling terminal is the real 2320 * or workstation console device, map to what the 2321 * user thinks is the console device. 2322 */ 2323 if (d == rwsconsdev || d == rconsdev) 2324 d = uconsdev; 2325 (void) cmpldev(&psp->pr_ttydev, d); 2326 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start); 2327 bcopy(up->u_comm, psp->pr_fname, 2328 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2329 bcopy(up->u_psargs, psp->pr_psargs, 2330 MIN(PRARGSZ-1, PSARGSZ)); 2331 psp->pr_argc = up->u_argc; 2332 psp->pr_argv = (caddr32_t)up->u_argv; 2333 psp->pr_envp = (caddr32_t)up->u_envp; 2334 2335 /* get the chosen lwp's lwpsinfo */ 2336 prgetlwpsinfo32(t, &psp->pr_lwp); 2337 2338 /* compute %cpu for the process */ 2339 if (p->p_lwpcnt == 1) 2340 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2341 else { 2342 uint64_t pct = 0; 2343 hrtime_t cur_time; 2344 2345 t = p->p_tlist; 2346 cur_time = gethrtime_unscaled(); 2347 do { 2348 pct += cpu_update_pct(t, cur_time); 2349 } while ((t = t->t_forw) != p->p_tlist); 2350 2351 psp->pr_pctcpu = prgetpctcpu(pct); 2352 } 2353 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2354 psp->pr_size = 0; 2355 psp->pr_rssize = 0; 2356 } else { 2357 mutex_exit(&p->p_lock); 2358 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2359 psp->pr_size = (size32_t) 2360 (btopr(rm_assize(as)) * (PAGESIZE / 1024)); 2361 psp->pr_rssize = (size32_t) 2362 (rm_asrss(as) * (PAGESIZE / 1024)); 2363 psp->pr_pctmem = rm_pctmemory(as); 2364 AS_LOCK_EXIT(as, &as->a_lock); 2365 mutex_enter(&p->p_lock); 2366 } 2367 } 2368 2369 /* 2370 * If we are looking at an LP64 process, zero out 2371 * the fields that cannot be represented in ILP32. 2372 */ 2373 if (p->p_model != DATAMODEL_ILP32) { 2374 psp->pr_size = 0; 2375 psp->pr_rssize = 0; 2376 psp->pr_argv = 0; 2377 psp->pr_envp = 0; 2378 } 2379 } 2380 #endif /* _SYSCALL32_IMPL */ 2381 2382 void 2383 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp) 2384 { 2385 klwp_t *lwp = ttolwp(t); 2386 sobj_ops_t *sobj; 2387 char c, state; 2388 uint64_t pct; 2389 int retval, niceval; 2390 hrtime_t hrutime, hrstime; 2391 2392 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 2393 2394 bzero(psp, sizeof (*psp)); 2395 2396 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2397 psp->pr_lwpid = t->t_tid; 2398 psp->pr_addr = (uintptr_t)t; 2399 psp->pr_wchan = (uintptr_t)t->t_wchan; 2400 2401 /* map the thread state enum into a process state enum */ 2402 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2403 switch (state) { 2404 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2405 case TS_RUN: state = SRUN; c = 'R'; break; 2406 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2407 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2408 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2409 default: state = 0; c = '?'; break; 2410 } 2411 psp->pr_state = state; 2412 psp->pr_sname = c; 2413 if ((sobj = t->t_sobj_ops) != NULL) 2414 psp->pr_stype = SOBJ_TYPE(sobj); 2415 retval = CL_DONICE(t, NULL, 0, &niceval); 2416 if (retval == 0) { 2417 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2418 psp->pr_nice = niceval + NZERO; 2419 } 2420 psp->pr_syscall = t->t_sysnum; 2421 psp->pr_pri = t->t_pri; 2422 psp->pr_start.tv_sec = t->t_start; 2423 psp->pr_start.tv_nsec = 0L; 2424 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2425 scalehrtime(&hrutime); 2426 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2427 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2428 scalehrtime(&hrstime); 2429 hrt2ts(hrutime + hrstime, &psp->pr_time); 2430 /* compute %cpu for the lwp */ 2431 pct = cpu_update_pct(t, gethrtime_unscaled()); 2432 psp->pr_pctcpu = prgetpctcpu(pct); 2433 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2434 if (psp->pr_cpu > 99) 2435 psp->pr_cpu = 99; 2436 2437 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2438 sizeof (psp->pr_clname) - 1); 2439 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2440 psp->pr_onpro = t->t_cpu->cpu_id; 2441 psp->pr_bindpro = t->t_bind_cpu; 2442 psp->pr_bindpset = t->t_bind_pset; 2443 } 2444 2445 #ifdef _SYSCALL32_IMPL 2446 void 2447 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp) 2448 { 2449 proc_t *p = ttoproc(t); 2450 klwp_t *lwp = ttolwp(t); 2451 sobj_ops_t *sobj; 2452 char c, state; 2453 uint64_t pct; 2454 int retval, niceval; 2455 hrtime_t hrutime, hrstime; 2456 2457 ASSERT(MUTEX_HELD(&p->p_lock)); 2458 2459 bzero(psp, sizeof (*psp)); 2460 2461 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2462 psp->pr_lwpid = t->t_tid; 2463 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2464 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */ 2465 2466 /* map the thread state enum into a process state enum */ 2467 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2468 switch (state) { 2469 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2470 case TS_RUN: state = SRUN; c = 'R'; break; 2471 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2472 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2473 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2474 default: state = 0; c = '?'; break; 2475 } 2476 psp->pr_state = state; 2477 psp->pr_sname = c; 2478 if ((sobj = t->t_sobj_ops) != NULL) 2479 psp->pr_stype = SOBJ_TYPE(sobj); 2480 retval = CL_DONICE(t, NULL, 0, &niceval); 2481 if (retval == 0) { 2482 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2483 psp->pr_nice = niceval + NZERO; 2484 } else { 2485 psp->pr_oldpri = 0; 2486 psp->pr_nice = 0; 2487 } 2488 psp->pr_syscall = t->t_sysnum; 2489 psp->pr_pri = t->t_pri; 2490 psp->pr_start.tv_sec = (time32_t)t->t_start; 2491 psp->pr_start.tv_nsec = 0L; 2492 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2493 scalehrtime(&hrutime); 2494 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2495 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2496 scalehrtime(&hrstime); 2497 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2498 /* compute %cpu for the lwp */ 2499 pct = cpu_update_pct(t, gethrtime_unscaled()); 2500 psp->pr_pctcpu = prgetpctcpu(pct); 2501 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2502 if (psp->pr_cpu > 99) 2503 psp->pr_cpu = 99; 2504 2505 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2506 sizeof (psp->pr_clname) - 1); 2507 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2508 psp->pr_onpro = t->t_cpu->cpu_id; 2509 psp->pr_bindpro = t->t_bind_cpu; 2510 psp->pr_bindpset = t->t_bind_pset; 2511 } 2512 #endif /* _SYSCALL32_IMPL */ 2513 2514 /* 2515 * This used to get called when microstate accounting was disabled but 2516 * microstate information was requested. Since Microstate accounting is on 2517 * regardless of the proc flags, this simply makes it appear to procfs that 2518 * microstate accounting is on. This is relatively meaningless since you 2519 * can't turn it off, but this is here for the sake of appearances. 2520 */ 2521 2522 /*ARGSUSED*/ 2523 void 2524 estimate_msacct(kthread_t *t, hrtime_t curtime) 2525 { 2526 proc_t *p; 2527 2528 if (t == NULL) 2529 return; 2530 2531 p = ttoproc(t); 2532 ASSERT(MUTEX_HELD(&p->p_lock)); 2533 2534 /* 2535 * A system process (p0) could be referenced if the thread is 2536 * in the process of exiting. Don't turn on microstate accounting 2537 * in that case. 2538 */ 2539 if (p->p_flag & SSYS) 2540 return; 2541 2542 /* 2543 * Loop through all the LWPs (kernel threads) in the process. 2544 */ 2545 t = p->p_tlist; 2546 do { 2547 t->t_proc_flag |= TP_MSACCT; 2548 } while ((t = t->t_forw) != p->p_tlist); 2549 2550 p->p_flag |= SMSACCT; /* set process-wide MSACCT */ 2551 } 2552 2553 /* 2554 * It's not really possible to disable microstate accounting anymore. 2555 * However, this routine simply turns off the ms accounting flags in a process 2556 * This way procfs can still pretend to turn microstate accounting on and 2557 * off for a process, but it actually doesn't do anything. This is 2558 * a neutered form of preemptive idiot-proofing. 2559 */ 2560 void 2561 disable_msacct(proc_t *p) 2562 { 2563 kthread_t *t; 2564 2565 ASSERT(MUTEX_HELD(&p->p_lock)); 2566 2567 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */ 2568 /* 2569 * Loop through all the LWPs (kernel threads) in the process. 2570 */ 2571 if ((t = p->p_tlist) != NULL) { 2572 do { 2573 /* clear per-thread flag */ 2574 t->t_proc_flag &= ~TP_MSACCT; 2575 } while ((t = t->t_forw) != p->p_tlist); 2576 } 2577 } 2578 2579 /* 2580 * Return resource usage information. 2581 */ 2582 void 2583 prgetusage(kthread_t *t, prhusage_t *pup) 2584 { 2585 klwp_t *lwp = ttolwp(t); 2586 hrtime_t *mstimep; 2587 struct mstate *ms = &lwp->lwp_mstate; 2588 int state; 2589 int i; 2590 hrtime_t curtime; 2591 hrtime_t waitrq; 2592 hrtime_t tmp1; 2593 2594 curtime = gethrtime_unscaled(); 2595 2596 pup->pr_lwpid = t->t_tid; 2597 pup->pr_count = 1; 2598 pup->pr_create = ms->ms_start; 2599 pup->pr_term = ms->ms_term; 2600 scalehrtime(&pup->pr_create); 2601 scalehrtime(&pup->pr_term); 2602 if (ms->ms_term == 0) { 2603 pup->pr_rtime = curtime - ms->ms_start; 2604 scalehrtime(&pup->pr_rtime); 2605 } else { 2606 pup->pr_rtime = ms->ms_term - ms->ms_start; 2607 scalehrtime(&pup->pr_rtime); 2608 } 2609 2610 2611 pup->pr_utime = ms->ms_acct[LMS_USER]; 2612 pup->pr_stime = ms->ms_acct[LMS_SYSTEM]; 2613 pup->pr_ttime = ms->ms_acct[LMS_TRAP]; 2614 pup->pr_tftime = ms->ms_acct[LMS_TFAULT]; 2615 pup->pr_dftime = ms->ms_acct[LMS_DFAULT]; 2616 pup->pr_kftime = ms->ms_acct[LMS_KFAULT]; 2617 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2618 pup->pr_slptime = ms->ms_acct[LMS_SLEEP]; 2619 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2620 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2621 2622 prscaleusage(pup); 2623 2624 /* 2625 * Adjust for time waiting in the dispatcher queue. 2626 */ 2627 waitrq = t->t_waitrq; /* hopefully atomic */ 2628 if (waitrq != 0) { 2629 tmp1 = curtime - waitrq; 2630 scalehrtime(&tmp1); 2631 pup->pr_wtime += tmp1; 2632 curtime = waitrq; 2633 } 2634 2635 /* 2636 * Adjust for time spent in current microstate. 2637 */ 2638 if (ms->ms_state_start > curtime) { 2639 curtime = gethrtime_unscaled(); 2640 } 2641 2642 i = 0; 2643 do { 2644 switch (state = t->t_mstate) { 2645 case LMS_SLEEP: 2646 /* 2647 * Update the timer for the current sleep state. 2648 */ 2649 switch (state = ms->ms_prev) { 2650 case LMS_TFAULT: 2651 case LMS_DFAULT: 2652 case LMS_KFAULT: 2653 case LMS_USER_LOCK: 2654 break; 2655 default: 2656 state = LMS_SLEEP; 2657 break; 2658 } 2659 break; 2660 case LMS_TFAULT: 2661 case LMS_DFAULT: 2662 case LMS_KFAULT: 2663 case LMS_USER_LOCK: 2664 state = LMS_SYSTEM; 2665 break; 2666 } 2667 switch (state) { 2668 case LMS_USER: mstimep = &pup->pr_utime; break; 2669 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2670 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2671 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2672 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2673 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2674 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2675 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2676 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2677 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2678 default: panic("prgetusage: unknown microstate"); 2679 } 2680 tmp1 = curtime - ms->ms_state_start; 2681 if (tmp1 <= 0) { 2682 curtime = gethrtime_unscaled(); 2683 tmp1 = 0; 2684 i++; 2685 continue; 2686 } 2687 scalehrtime(&tmp1); 2688 } while (tmp1 <= 0 && i < MAX_ITERS_SPIN); 2689 2690 *mstimep += tmp1; 2691 2692 /* update pup timestamp */ 2693 pup->pr_tstamp = curtime; 2694 scalehrtime(&pup->pr_tstamp); 2695 2696 /* 2697 * Resource usage counters. 2698 */ 2699 pup->pr_minf = lwp->lwp_ru.minflt; 2700 pup->pr_majf = lwp->lwp_ru.majflt; 2701 pup->pr_nswap = lwp->lwp_ru.nswap; 2702 pup->pr_inblk = lwp->lwp_ru.inblock; 2703 pup->pr_oublk = lwp->lwp_ru.oublock; 2704 pup->pr_msnd = lwp->lwp_ru.msgsnd; 2705 pup->pr_mrcv = lwp->lwp_ru.msgrcv; 2706 pup->pr_sigs = lwp->lwp_ru.nsignals; 2707 pup->pr_vctx = lwp->lwp_ru.nvcsw; 2708 pup->pr_ictx = lwp->lwp_ru.nivcsw; 2709 pup->pr_sysc = lwp->lwp_ru.sysc; 2710 pup->pr_ioch = lwp->lwp_ru.ioch; 2711 } 2712 2713 /* 2714 * Convert ms_acct stats from unscaled high-res time to nanoseconds 2715 */ 2716 void 2717 prscaleusage(prhusage_t *usg) 2718 { 2719 scalehrtime(&usg->pr_utime); 2720 scalehrtime(&usg->pr_stime); 2721 scalehrtime(&usg->pr_ttime); 2722 scalehrtime(&usg->pr_tftime); 2723 scalehrtime(&usg->pr_dftime); 2724 scalehrtime(&usg->pr_kftime); 2725 scalehrtime(&usg->pr_ltime); 2726 scalehrtime(&usg->pr_slptime); 2727 scalehrtime(&usg->pr_wtime); 2728 scalehrtime(&usg->pr_stoptime); 2729 } 2730 2731 2732 /* 2733 * Sum resource usage information. 2734 */ 2735 void 2736 praddusage(kthread_t *t, prhusage_t *pup) 2737 { 2738 klwp_t *lwp = ttolwp(t); 2739 hrtime_t *mstimep; 2740 struct mstate *ms = &lwp->lwp_mstate; 2741 int state; 2742 int i; 2743 hrtime_t curtime; 2744 hrtime_t waitrq; 2745 hrtime_t tmp; 2746 prhusage_t conv; 2747 2748 curtime = gethrtime_unscaled(); 2749 2750 if (ms->ms_term == 0) { 2751 tmp = curtime - ms->ms_start; 2752 scalehrtime(&tmp); 2753 pup->pr_rtime += tmp; 2754 } else { 2755 tmp = ms->ms_term - ms->ms_start; 2756 scalehrtime(&tmp); 2757 pup->pr_rtime += tmp; 2758 } 2759 2760 conv.pr_utime = ms->ms_acct[LMS_USER]; 2761 conv.pr_stime = ms->ms_acct[LMS_SYSTEM]; 2762 conv.pr_ttime = ms->ms_acct[LMS_TRAP]; 2763 conv.pr_tftime = ms->ms_acct[LMS_TFAULT]; 2764 conv.pr_dftime = ms->ms_acct[LMS_DFAULT]; 2765 conv.pr_kftime = ms->ms_acct[LMS_KFAULT]; 2766 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2767 conv.pr_slptime = ms->ms_acct[LMS_SLEEP]; 2768 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2769 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2770 2771 prscaleusage(&conv); 2772 2773 pup->pr_utime += conv.pr_utime; 2774 pup->pr_stime += conv.pr_stime; 2775 pup->pr_ttime += conv.pr_ttime; 2776 pup->pr_tftime += conv.pr_tftime; 2777 pup->pr_dftime += conv.pr_dftime; 2778 pup->pr_kftime += conv.pr_kftime; 2779 pup->pr_ltime += conv.pr_ltime; 2780 pup->pr_slptime += conv.pr_slptime; 2781 pup->pr_wtime += conv.pr_wtime; 2782 pup->pr_stoptime += conv.pr_stoptime; 2783 2784 /* 2785 * Adjust for time waiting in the dispatcher queue. 2786 */ 2787 waitrq = t->t_waitrq; /* hopefully atomic */ 2788 if (waitrq != 0) { 2789 tmp = curtime - waitrq; 2790 scalehrtime(&tmp); 2791 pup->pr_wtime += tmp; 2792 curtime = waitrq; 2793 } 2794 2795 /* 2796 * Adjust for time spent in current microstate. 2797 */ 2798 if (ms->ms_state_start > curtime) { 2799 curtime = gethrtime_unscaled(); 2800 } 2801 2802 i = 0; 2803 do { 2804 switch (state = t->t_mstate) { 2805 case LMS_SLEEP: 2806 /* 2807 * Update the timer for the current sleep state. 2808 */ 2809 switch (state = ms->ms_prev) { 2810 case LMS_TFAULT: 2811 case LMS_DFAULT: 2812 case LMS_KFAULT: 2813 case LMS_USER_LOCK: 2814 break; 2815 default: 2816 state = LMS_SLEEP; 2817 break; 2818 } 2819 break; 2820 case LMS_TFAULT: 2821 case LMS_DFAULT: 2822 case LMS_KFAULT: 2823 case LMS_USER_LOCK: 2824 state = LMS_SYSTEM; 2825 break; 2826 } 2827 switch (state) { 2828 case LMS_USER: mstimep = &pup->pr_utime; break; 2829 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2830 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2831 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2832 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2833 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2834 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2835 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2836 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2837 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2838 default: panic("praddusage: unknown microstate"); 2839 } 2840 tmp = curtime - ms->ms_state_start; 2841 if (tmp <= 0) { 2842 curtime = gethrtime_unscaled(); 2843 tmp = 0; 2844 i++; 2845 continue; 2846 } 2847 scalehrtime(&tmp); 2848 } while (tmp <= 0 && i < MAX_ITERS_SPIN); 2849 2850 *mstimep += tmp; 2851 2852 /* update pup timestamp */ 2853 pup->pr_tstamp = curtime; 2854 scalehrtime(&pup->pr_tstamp); 2855 2856 /* 2857 * Resource usage counters. 2858 */ 2859 pup->pr_minf += lwp->lwp_ru.minflt; 2860 pup->pr_majf += lwp->lwp_ru.majflt; 2861 pup->pr_nswap += lwp->lwp_ru.nswap; 2862 pup->pr_inblk += lwp->lwp_ru.inblock; 2863 pup->pr_oublk += lwp->lwp_ru.oublock; 2864 pup->pr_msnd += lwp->lwp_ru.msgsnd; 2865 pup->pr_mrcv += lwp->lwp_ru.msgrcv; 2866 pup->pr_sigs += lwp->lwp_ru.nsignals; 2867 pup->pr_vctx += lwp->lwp_ru.nvcsw; 2868 pup->pr_ictx += lwp->lwp_ru.nivcsw; 2869 pup->pr_sysc += lwp->lwp_ru.sysc; 2870 pup->pr_ioch += lwp->lwp_ru.ioch; 2871 } 2872 2873 /* 2874 * Convert a prhusage_t to a prusage_t. 2875 * This means convert each hrtime_t to a timestruc_t 2876 * and copy the count fields uint64_t => ulong_t. 2877 */ 2878 void 2879 prcvtusage(prhusage_t *pup, prusage_t *upup) 2880 { 2881 uint64_t *ullp; 2882 ulong_t *ulp; 2883 int i; 2884 2885 upup->pr_lwpid = pup->pr_lwpid; 2886 upup->pr_count = pup->pr_count; 2887 2888 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp); 2889 hrt2ts(pup->pr_create, &upup->pr_create); 2890 hrt2ts(pup->pr_term, &upup->pr_term); 2891 hrt2ts(pup->pr_rtime, &upup->pr_rtime); 2892 hrt2ts(pup->pr_utime, &upup->pr_utime); 2893 hrt2ts(pup->pr_stime, &upup->pr_stime); 2894 hrt2ts(pup->pr_ttime, &upup->pr_ttime); 2895 hrt2ts(pup->pr_tftime, &upup->pr_tftime); 2896 hrt2ts(pup->pr_dftime, &upup->pr_dftime); 2897 hrt2ts(pup->pr_kftime, &upup->pr_kftime); 2898 hrt2ts(pup->pr_ltime, &upup->pr_ltime); 2899 hrt2ts(pup->pr_slptime, &upup->pr_slptime); 2900 hrt2ts(pup->pr_wtime, &upup->pr_wtime); 2901 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime); 2902 bzero(upup->filltime, sizeof (upup->filltime)); 2903 2904 ullp = &pup->pr_minf; 2905 ulp = &upup->pr_minf; 2906 for (i = 0; i < 22; i++) 2907 *ulp++ = (ulong_t)*ullp++; 2908 } 2909 2910 #ifdef _SYSCALL32_IMPL 2911 void 2912 prcvtusage32(prhusage_t *pup, prusage32_t *upup) 2913 { 2914 uint64_t *ullp; 2915 uint32_t *ulp; 2916 int i; 2917 2918 upup->pr_lwpid = pup->pr_lwpid; 2919 upup->pr_count = pup->pr_count; 2920 2921 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp); 2922 hrt2ts32(pup->pr_create, &upup->pr_create); 2923 hrt2ts32(pup->pr_term, &upup->pr_term); 2924 hrt2ts32(pup->pr_rtime, &upup->pr_rtime); 2925 hrt2ts32(pup->pr_utime, &upup->pr_utime); 2926 hrt2ts32(pup->pr_stime, &upup->pr_stime); 2927 hrt2ts32(pup->pr_ttime, &upup->pr_ttime); 2928 hrt2ts32(pup->pr_tftime, &upup->pr_tftime); 2929 hrt2ts32(pup->pr_dftime, &upup->pr_dftime); 2930 hrt2ts32(pup->pr_kftime, &upup->pr_kftime); 2931 hrt2ts32(pup->pr_ltime, &upup->pr_ltime); 2932 hrt2ts32(pup->pr_slptime, &upup->pr_slptime); 2933 hrt2ts32(pup->pr_wtime, &upup->pr_wtime); 2934 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime); 2935 bzero(upup->filltime, sizeof (upup->filltime)); 2936 2937 ullp = &pup->pr_minf; 2938 ulp = &upup->pr_minf; 2939 for (i = 0; i < 22; i++) 2940 *ulp++ = (uint32_t)*ullp++; 2941 } 2942 #endif /* _SYSCALL32_IMPL */ 2943 2944 /* 2945 * Determine whether a set is empty. 2946 */ 2947 int 2948 setisempty(uint32_t *sp, uint_t n) 2949 { 2950 while (n--) 2951 if (*sp++) 2952 return (0); 2953 return (1); 2954 } 2955 2956 /* 2957 * Utility routine for establishing a watched area in the process. 2958 * Keep the list of watched areas sorted by virtual address. 2959 */ 2960 int 2961 set_watched_area(proc_t *p, struct watched_area *pwa) 2962 { 2963 caddr_t vaddr = pwa->wa_vaddr; 2964 caddr_t eaddr = pwa->wa_eaddr; 2965 ulong_t flags = pwa->wa_flags; 2966 struct watched_area *target; 2967 avl_index_t where; 2968 int error = 0; 2969 2970 /* we must not be holding p->p_lock, but the process must be locked */ 2971 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 2972 ASSERT(p->p_proc_flag & P_PR_LOCK); 2973 2974 /* 2975 * If this is our first watchpoint, enable watchpoints for the process. 2976 */ 2977 if (!pr_watch_active(p)) { 2978 kthread_t *t; 2979 2980 mutex_enter(&p->p_lock); 2981 if ((t = p->p_tlist) != NULL) { 2982 do { 2983 watch_enable(t); 2984 } while ((t = t->t_forw) != p->p_tlist); 2985 } 2986 mutex_exit(&p->p_lock); 2987 } 2988 2989 target = pr_find_watched_area(p, pwa, &where); 2990 if (target != NULL) { 2991 /* 2992 * We discovered an existing, overlapping watched area. 2993 * Allow it only if it is an exact match. 2994 */ 2995 if (target->wa_vaddr != vaddr || 2996 target->wa_eaddr != eaddr) 2997 error = EINVAL; 2998 else if (target->wa_flags != flags) { 2999 error = set_watched_page(p, vaddr, eaddr, 3000 flags, target->wa_flags); 3001 target->wa_flags = flags; 3002 } 3003 kmem_free(pwa, sizeof (struct watched_area)); 3004 } else { 3005 avl_insert(&p->p_warea, pwa, where); 3006 error = set_watched_page(p, vaddr, eaddr, flags, 0); 3007 } 3008 3009 return (error); 3010 } 3011 3012 /* 3013 * Utility routine for clearing a watched area in the process. 3014 * Must be an exact match of the virtual address. 3015 * size and flags don't matter. 3016 */ 3017 int 3018 clear_watched_area(proc_t *p, struct watched_area *pwa) 3019 { 3020 struct watched_area *found; 3021 3022 /* we must not be holding p->p_lock, but the process must be locked */ 3023 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3024 ASSERT(p->p_proc_flag & P_PR_LOCK); 3025 3026 3027 if (!pr_watch_active(p)) { 3028 kmem_free(pwa, sizeof (struct watched_area)); 3029 return (0); 3030 } 3031 3032 /* 3033 * Look for a matching address in the watched areas. If a match is 3034 * found, clear the old watched area and adjust the watched page(s). It 3035 * is not an error if there is no match. 3036 */ 3037 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL && 3038 found->wa_vaddr == pwa->wa_vaddr) { 3039 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr, 3040 found->wa_flags); 3041 avl_remove(&p->p_warea, found); 3042 kmem_free(found, sizeof (struct watched_area)); 3043 } 3044 3045 kmem_free(pwa, sizeof (struct watched_area)); 3046 3047 /* 3048 * If we removed the last watched area from the process, disable 3049 * watchpoints. 3050 */ 3051 if (!pr_watch_active(p)) { 3052 kthread_t *t; 3053 3054 mutex_enter(&p->p_lock); 3055 if ((t = p->p_tlist) != NULL) { 3056 do { 3057 watch_disable(t); 3058 } while ((t = t->t_forw) != p->p_tlist); 3059 } 3060 mutex_exit(&p->p_lock); 3061 } 3062 3063 return (0); 3064 } 3065 3066 /* 3067 * Frees all the watched_area structures 3068 */ 3069 void 3070 pr_free_watchpoints(proc_t *p) 3071 { 3072 struct watched_area *delp; 3073 void *cookie; 3074 3075 cookie = NULL; 3076 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL) 3077 kmem_free(delp, sizeof (struct watched_area)); 3078 3079 avl_destroy(&p->p_warea); 3080 } 3081 3082 /* 3083 * This one is called by the traced process to unwatch all the 3084 * pages while deallocating the list of watched_page structs. 3085 */ 3086 void 3087 pr_free_watched_pages(proc_t *p) 3088 { 3089 struct as *as = p->p_as; 3090 struct watched_page *pwp; 3091 uint_t prot; 3092 int retrycnt, err; 3093 void *cookie; 3094 3095 if (as == NULL || avl_numnodes(&as->a_wpage) == 0) 3096 return; 3097 3098 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 3099 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3100 3101 pwp = avl_first(&as->a_wpage); 3102 3103 cookie = NULL; 3104 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) { 3105 retrycnt = 0; 3106 if ((prot = pwp->wp_oprot) != 0) { 3107 caddr_t addr = pwp->wp_vaddr; 3108 struct seg *seg; 3109 retry: 3110 3111 if ((pwp->wp_prot != prot || 3112 (pwp->wp_flags & WP_NOWATCH)) && 3113 (seg = as_segat(as, addr)) != NULL) { 3114 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); 3115 if (err == IE_RETRY) { 3116 ASSERT(retrycnt == 0); 3117 retrycnt++; 3118 goto retry; 3119 } 3120 } 3121 } 3122 kmem_free(pwp, sizeof (struct watched_page)); 3123 } 3124 3125 avl_destroy(&as->a_wpage); 3126 p->p_wprot = NULL; 3127 3128 AS_LOCK_EXIT(as, &as->a_lock); 3129 } 3130 3131 /* 3132 * Insert a watched area into the list of watched pages. 3133 * If oflags is zero then we are adding a new watched area. 3134 * Otherwise we are changing the flags of an existing watched area. 3135 */ 3136 static int 3137 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, 3138 ulong_t flags, ulong_t oflags) 3139 { 3140 struct as *as = p->p_as; 3141 avl_tree_t *pwp_tree; 3142 struct watched_page *pwp, *newpwp; 3143 struct watched_page tpw; 3144 avl_index_t where; 3145 struct seg *seg; 3146 uint_t prot; 3147 caddr_t addr; 3148 3149 /* 3150 * We need to pre-allocate a list of structures before we grab the 3151 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks 3152 * held. 3153 */ 3154 newpwp = NULL; 3155 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3156 addr < eaddr; addr += PAGESIZE) { 3157 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP); 3158 pwp->wp_list = newpwp; 3159 newpwp = pwp; 3160 } 3161 3162 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3163 3164 /* 3165 * Search for an existing watched page to contain the watched area. 3166 * If none is found, grab a new one from the available list 3167 * and insert it in the active list, keeping the list sorted 3168 * by user-level virtual address. 3169 */ 3170 if (p->p_flag & SVFWAIT) 3171 pwp_tree = &p->p_wpage; 3172 else 3173 pwp_tree = &as->a_wpage; 3174 3175 again: 3176 if (avl_numnodes(pwp_tree) > prnwatch) { 3177 AS_LOCK_EXIT(as, &as->a_lock); 3178 while (newpwp != NULL) { 3179 pwp = newpwp->wp_list; 3180 kmem_free(newpwp, sizeof (struct watched_page)); 3181 newpwp = pwp; 3182 } 3183 return (E2BIG); 3184 } 3185 3186 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3187 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) { 3188 pwp = newpwp; 3189 newpwp = newpwp->wp_list; 3190 pwp->wp_list = NULL; 3191 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr & 3192 (uintptr_t)PAGEMASK); 3193 avl_insert(pwp_tree, pwp, where); 3194 } 3195 3196 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE); 3197 3198 if (oflags & WA_READ) 3199 pwp->wp_read--; 3200 if (oflags & WA_WRITE) 3201 pwp->wp_write--; 3202 if (oflags & WA_EXEC) 3203 pwp->wp_exec--; 3204 3205 ASSERT(pwp->wp_read >= 0); 3206 ASSERT(pwp->wp_write >= 0); 3207 ASSERT(pwp->wp_exec >= 0); 3208 3209 if (flags & WA_READ) 3210 pwp->wp_read++; 3211 if (flags & WA_WRITE) 3212 pwp->wp_write++; 3213 if (flags & WA_EXEC) 3214 pwp->wp_exec++; 3215 3216 if (!(p->p_flag & SVFWAIT)) { 3217 vaddr = pwp->wp_vaddr; 3218 if (pwp->wp_oprot == 0 && 3219 (seg = as_segat(as, vaddr)) != NULL) { 3220 SEGOP_GETPROT(seg, vaddr, 0, &prot); 3221 pwp->wp_oprot = (uchar_t)prot; 3222 pwp->wp_prot = (uchar_t)prot; 3223 } 3224 if (pwp->wp_oprot != 0) { 3225 prot = pwp->wp_oprot; 3226 if (pwp->wp_read) 3227 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3228 if (pwp->wp_write) 3229 prot &= ~PROT_WRITE; 3230 if (pwp->wp_exec) 3231 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3232 if (!(pwp->wp_flags & WP_NOWATCH) && 3233 pwp->wp_prot != prot && 3234 (pwp->wp_flags & WP_SETPROT) == 0) { 3235 pwp->wp_flags |= WP_SETPROT; 3236 pwp->wp_list = p->p_wprot; 3237 p->p_wprot = pwp; 3238 } 3239 pwp->wp_prot = (uchar_t)prot; 3240 } 3241 } 3242 3243 /* 3244 * If the watched area extends into the next page then do 3245 * it over again with the virtual address of the next page. 3246 */ 3247 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr) 3248 goto again; 3249 3250 AS_LOCK_EXIT(as, &as->a_lock); 3251 3252 /* 3253 * Free any pages we may have over-allocated 3254 */ 3255 while (newpwp != NULL) { 3256 pwp = newpwp->wp_list; 3257 kmem_free(newpwp, sizeof (struct watched_page)); 3258 newpwp = pwp; 3259 } 3260 3261 return (0); 3262 } 3263 3264 /* 3265 * Remove a watched area from the list of watched pages. 3266 * A watched area may extend over more than one page. 3267 */ 3268 static void 3269 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags) 3270 { 3271 struct as *as = p->p_as; 3272 struct watched_page *pwp; 3273 struct watched_page tpw; 3274 avl_tree_t *tree; 3275 avl_index_t where; 3276 3277 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3278 3279 if (p->p_flag & SVFWAIT) 3280 tree = &p->p_wpage; 3281 else 3282 tree = &as->a_wpage; 3283 3284 tpw.wp_vaddr = vaddr = 3285 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3286 pwp = avl_find(tree, &tpw, &where); 3287 if (pwp == NULL) 3288 pwp = avl_nearest(tree, where, AVL_AFTER); 3289 3290 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3291 ASSERT(vaddr <= pwp->wp_vaddr); 3292 3293 if (flags & WA_READ) 3294 pwp->wp_read--; 3295 if (flags & WA_WRITE) 3296 pwp->wp_write--; 3297 if (flags & WA_EXEC) 3298 pwp->wp_exec--; 3299 3300 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) { 3301 /* 3302 * Reset the hat layer's protections on this page. 3303 */ 3304 if (pwp->wp_oprot != 0) { 3305 uint_t prot = pwp->wp_oprot; 3306 3307 if (pwp->wp_read) 3308 prot &= 3309 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3310 if (pwp->wp_write) 3311 prot &= ~PROT_WRITE; 3312 if (pwp->wp_exec) 3313 prot &= 3314 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3315 if (!(pwp->wp_flags & WP_NOWATCH) && 3316 pwp->wp_prot != prot && 3317 (pwp->wp_flags & WP_SETPROT) == 0) { 3318 pwp->wp_flags |= WP_SETPROT; 3319 pwp->wp_list = p->p_wprot; 3320 p->p_wprot = pwp; 3321 } 3322 pwp->wp_prot = (uchar_t)prot; 3323 } 3324 } else { 3325 /* 3326 * No watched areas remain in this page. 3327 * Reset everything to normal. 3328 */ 3329 if (pwp->wp_oprot != 0) { 3330 pwp->wp_prot = pwp->wp_oprot; 3331 if ((pwp->wp_flags & WP_SETPROT) == 0) { 3332 pwp->wp_flags |= WP_SETPROT; 3333 pwp->wp_list = p->p_wprot; 3334 p->p_wprot = pwp; 3335 } 3336 } 3337 } 3338 3339 pwp = AVL_NEXT(tree, pwp); 3340 } 3341 3342 AS_LOCK_EXIT(as, &as->a_lock); 3343 } 3344 3345 /* 3346 * Return the original protections for the specified page. 3347 */ 3348 static void 3349 getwatchprot(struct as *as, caddr_t addr, uint_t *prot) 3350 { 3351 struct watched_page *pwp; 3352 struct watched_page tpw; 3353 3354 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 3355 3356 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3357 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL) 3358 *prot = pwp->wp_oprot; 3359 } 3360 3361 static prpagev_t * 3362 pr_pagev_create(struct seg *seg, int check_noreserve) 3363 { 3364 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP); 3365 size_t total_pages = seg_pages(seg); 3366 3367 /* 3368 * Limit the size of our vectors to pagev_lim pages at a time. We need 3369 * 4 or 5 bytes of storage per page, so this means we limit ourself 3370 * to about a megabyte of kernel heap by default. 3371 */ 3372 pagev->pg_npages = MIN(total_pages, pagev_lim); 3373 pagev->pg_pnbase = 0; 3374 3375 pagev->pg_protv = 3376 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP); 3377 3378 if (check_noreserve) 3379 pagev->pg_incore = 3380 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP); 3381 else 3382 pagev->pg_incore = NULL; 3383 3384 return (pagev); 3385 } 3386 3387 static void 3388 pr_pagev_destroy(prpagev_t *pagev) 3389 { 3390 if (pagev->pg_incore != NULL) 3391 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char)); 3392 3393 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t)); 3394 kmem_free(pagev, sizeof (prpagev_t)); 3395 } 3396 3397 static caddr_t 3398 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr) 3399 { 3400 ulong_t lastpg = seg_page(seg, eaddr - 1); 3401 ulong_t pn, pnlim; 3402 caddr_t saddr; 3403 size_t len; 3404 3405 ASSERT(addr >= seg->s_base && addr <= eaddr); 3406 3407 if (addr == eaddr) 3408 return (eaddr); 3409 3410 refill: 3411 ASSERT(addr < eaddr); 3412 pagev->pg_pnbase = seg_page(seg, addr); 3413 pnlim = pagev->pg_pnbase + pagev->pg_npages; 3414 saddr = addr; 3415 3416 if (lastpg < pnlim) 3417 len = (size_t)(eaddr - addr); 3418 else 3419 len = pagev->pg_npages * PAGESIZE; 3420 3421 if (pagev->pg_incore != NULL) { 3422 /* 3423 * INCORE cleverly has different semantics than GETPROT: 3424 * it returns info on pages up to but NOT including addr + len. 3425 */ 3426 SEGOP_INCORE(seg, addr, len, pagev->pg_incore); 3427 pn = pagev->pg_pnbase; 3428 3429 do { 3430 /* 3431 * Guilty knowledge here: We know that segvn_incore 3432 * returns more than just the low-order bit that 3433 * indicates the page is actually in memory. If any 3434 * bits are set, then the page has backing store. 3435 */ 3436 if (pagev->pg_incore[pn++ - pagev->pg_pnbase]) 3437 goto out; 3438 3439 } while ((addr += PAGESIZE) < eaddr && pn < pnlim); 3440 3441 /* 3442 * If we examined all the pages in the vector but we're not 3443 * at the end of the segment, take another lap. 3444 */ 3445 if (addr < eaddr) 3446 goto refill; 3447 } 3448 3449 /* 3450 * Need to take len - 1 because addr + len is the address of the 3451 * first byte of the page just past the end of what we want. 3452 */ 3453 out: 3454 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv); 3455 return (addr); 3456 } 3457 3458 static caddr_t 3459 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg, 3460 caddr_t *saddrp, caddr_t eaddr, uint_t *protp) 3461 { 3462 /* 3463 * Our starting address is either the specified address, or the base 3464 * address from the start of the pagev. If the latter is greater, 3465 * this means a previous call to pr_pagev_fill has already scanned 3466 * further than the end of the previous mapping. 3467 */ 3468 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE; 3469 caddr_t addr = MAX(*saddrp, base); 3470 ulong_t pn = seg_page(seg, addr); 3471 uint_t prot, nprot; 3472 3473 /* 3474 * If we're dealing with noreserve pages, then advance addr to 3475 * the address of the next page which has backing store. 3476 */ 3477 if (pagev->pg_incore != NULL) { 3478 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) { 3479 if ((addr += PAGESIZE) == eaddr) { 3480 *saddrp = addr; 3481 prot = 0; 3482 goto out; 3483 } 3484 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3485 addr = pr_pagev_fill(pagev, seg, addr, eaddr); 3486 if (addr == eaddr) { 3487 *saddrp = addr; 3488 prot = 0; 3489 goto out; 3490 } 3491 pn = seg_page(seg, addr); 3492 } 3493 } 3494 } 3495 3496 /* 3497 * Get the protections on the page corresponding to addr. 3498 */ 3499 pn = seg_page(seg, addr); 3500 ASSERT(pn >= pagev->pg_pnbase); 3501 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages)); 3502 3503 prot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3504 getwatchprot(seg->s_as, addr, &prot); 3505 *saddrp = addr; 3506 3507 /* 3508 * Now loop until we find a backed page with different protections 3509 * or we reach the end of this segment. 3510 */ 3511 while ((addr += PAGESIZE) < eaddr) { 3512 /* 3513 * If pn has advanced to the page number following what we 3514 * have information on, refill the page vector and reset 3515 * addr and pn. If pr_pagev_fill does not return the 3516 * address of the next page, we have a discontiguity and 3517 * thus have reached the end of the current mapping. 3518 */ 3519 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3520 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr); 3521 if (naddr != addr) 3522 goto out; 3523 pn = seg_page(seg, addr); 3524 } 3525 3526 /* 3527 * The previous page's protections are in prot, and it has 3528 * backing. If this page is MAP_NORESERVE and has no backing, 3529 * then end this mapping and return the previous protections. 3530 */ 3531 if (pagev->pg_incore != NULL && 3532 pagev->pg_incore[pn - pagev->pg_pnbase] == 0) 3533 break; 3534 3535 /* 3536 * Otherwise end the mapping if this page's protections (nprot) 3537 * are different than those in the previous page (prot). 3538 */ 3539 nprot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3540 getwatchprot(seg->s_as, addr, &nprot); 3541 3542 if (nprot != prot) 3543 break; 3544 } 3545 3546 out: 3547 *protp = prot; 3548 return (addr); 3549 } 3550 3551 size_t 3552 pr_getsegsize(struct seg *seg, int reserved) 3553 { 3554 size_t size = seg->s_size; 3555 3556 /* 3557 * If we're interested in the reserved space, return the size of the 3558 * segment itself. Everything else in this function is a special case 3559 * to determine the actual underlying size of various segment types. 3560 */ 3561 if (reserved) 3562 return (size); 3563 3564 /* 3565 * If this is a segvn mapping of a regular file, return the smaller 3566 * of the segment size and the remaining size of the file beyond 3567 * the file offset corresponding to seg->s_base. 3568 */ 3569 if (seg->s_ops == &segvn_ops) { 3570 vattr_t vattr; 3571 vnode_t *vp; 3572 3573 vattr.va_mask = AT_SIZE; 3574 3575 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 3576 vp != NULL && vp->v_type == VREG && 3577 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 3578 3579 u_offset_t fsize = vattr.va_size; 3580 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base); 3581 3582 if (fsize < offset) 3583 fsize = 0; 3584 else 3585 fsize -= offset; 3586 3587 fsize = roundup(fsize, (u_offset_t)PAGESIZE); 3588 3589 if (fsize < (u_offset_t)size) 3590 size = (size_t)fsize; 3591 } 3592 3593 return (size); 3594 } 3595 3596 /* 3597 * If this is an ISM shared segment, don't include pages that are 3598 * beyond the real size of the spt segment that backs it. 3599 */ 3600 if (seg->s_ops == &segspt_shmops) 3601 return (MIN(spt_realsize(seg), size)); 3602 3603 /* 3604 * If this is segment is a mapping from /dev/null, then this is a 3605 * reservation of virtual address space and has no actual size. 3606 * Such segments are backed by segdev and have type set to neither 3607 * MAP_SHARED nor MAP_PRIVATE. 3608 */ 3609 if (seg->s_ops == &segdev_ops && 3610 ((SEGOP_GETTYPE(seg, seg->s_base) & 3611 (MAP_SHARED | MAP_PRIVATE)) == 0)) 3612 return (0); 3613 3614 /* 3615 * If this segment doesn't match one of the special types we handle, 3616 * just return the size of the segment itself. 3617 */ 3618 return (size); 3619 } 3620 3621 uint_t 3622 pr_getprot(struct seg *seg, int reserved, void **tmp, 3623 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr) 3624 { 3625 struct as *as = seg->s_as; 3626 3627 caddr_t saddr = *saddrp; 3628 caddr_t naddr; 3629 3630 int check_noreserve; 3631 uint_t prot; 3632 3633 union { 3634 struct segvn_data *svd; 3635 struct segdev_data *sdp; 3636 void *data; 3637 } s; 3638 3639 s.data = seg->s_data; 3640 3641 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3642 ASSERT(saddr >= seg->s_base && saddr < eaddr); 3643 ASSERT(eaddr <= seg->s_base + seg->s_size); 3644 3645 /* 3646 * Don't include MAP_NORESERVE pages in the address range 3647 * unless their mappings have actually materialized. 3648 * We cheat by knowing that segvn is the only segment 3649 * driver that supports MAP_NORESERVE. 3650 */ 3651 check_noreserve = 3652 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL && 3653 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) && 3654 (s.svd->flags & MAP_NORESERVE)); 3655 3656 /* 3657 * Examine every page only as a last resort. We use guilty knowledge 3658 * of segvn and segdev to avoid this: if there are no per-page 3659 * protections present in the segment and we don't care about 3660 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment. 3661 */ 3662 if (!check_noreserve && saddr == seg->s_base && 3663 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) { 3664 prot = s.svd->prot; 3665 getwatchprot(as, saddr, &prot); 3666 naddr = eaddr; 3667 3668 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops && 3669 s.sdp != NULL && s.sdp->pageprot == 0) { 3670 prot = s.sdp->prot; 3671 getwatchprot(as, saddr, &prot); 3672 naddr = eaddr; 3673 3674 } else { 3675 prpagev_t *pagev; 3676 3677 /* 3678 * If addr is sitting at the start of the segment, then 3679 * create a page vector to store protection and incore 3680 * information for pages in the segment, and fill it. 3681 * Otherwise, we expect *tmp to address the prpagev_t 3682 * allocated by a previous call to this function. 3683 */ 3684 if (saddr == seg->s_base) { 3685 pagev = pr_pagev_create(seg, check_noreserve); 3686 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr); 3687 3688 ASSERT(*tmp == NULL); 3689 *tmp = pagev; 3690 3691 ASSERT(saddr <= eaddr); 3692 *saddrp = saddr; 3693 3694 if (saddr == eaddr) { 3695 naddr = saddr; 3696 prot = 0; 3697 goto out; 3698 } 3699 3700 } else { 3701 ASSERT(*tmp != NULL); 3702 pagev = (prpagev_t *)*tmp; 3703 } 3704 3705 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot); 3706 ASSERT(naddr <= eaddr); 3707 } 3708 3709 out: 3710 if (naddr == eaddr) 3711 pr_getprot_done(tmp); 3712 *naddrp = naddr; 3713 return (prot); 3714 } 3715 3716 void 3717 pr_getprot_done(void **tmp) 3718 { 3719 if (*tmp != NULL) { 3720 pr_pagev_destroy((prpagev_t *)*tmp); 3721 *tmp = NULL; 3722 } 3723 } 3724 3725 /* 3726 * Return true iff the vnode is a /proc file from the object directory. 3727 */ 3728 int 3729 pr_isobject(vnode_t *vp) 3730 { 3731 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT); 3732 } 3733 3734 /* 3735 * Return true iff the vnode is a /proc file opened by the process itself. 3736 */ 3737 int 3738 pr_isself(vnode_t *vp) 3739 { 3740 /* 3741 * XXX: To retain binary compatibility with the old 3742 * ioctl()-based version of /proc, we exempt self-opens 3743 * of /proc/<pid> from being marked close-on-exec. 3744 */ 3745 return (vn_matchops(vp, prvnodeops) && 3746 (VTOP(vp)->pr_flags & PR_ISSELF) && 3747 VTOP(vp)->pr_type != PR_PIDDIR); 3748 } 3749 3750 static ssize_t 3751 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr) 3752 { 3753 ssize_t pagesize, hatsize; 3754 3755 ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 3756 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE)); 3757 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE)); 3758 ASSERT(saddr < eaddr); 3759 3760 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr); 3761 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize)); 3762 ASSERT(pagesize != 0); 3763 3764 if (pagesize == -1) 3765 pagesize = PAGESIZE; 3766 3767 saddr += P2NPHASE((uintptr_t)saddr, pagesize); 3768 3769 while (saddr < eaddr) { 3770 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr)) 3771 break; 3772 ASSERT(IS_P2ALIGNED(saddr, pagesize)); 3773 saddr += pagesize; 3774 } 3775 3776 *naddrp = ((saddr < eaddr) ? saddr : eaddr); 3777 return (hatsize); 3778 } 3779 3780 /* 3781 * Return an array of structures with extended memory map information. 3782 * We allocate here; the caller must deallocate. 3783 */ 3784 int 3785 prgetxmap(proc_t *p, prxmap_t **prxmapp, size_t *sizep) 3786 { 3787 struct as *as = p->p_as; 3788 int nmaps = 0; 3789 prxmap_t *mp; 3790 size_t size; 3791 struct seg *seg; 3792 struct seg *brkseg, *stkseg; 3793 struct vnode *vp; 3794 struct vattr vattr; 3795 uint_t prot; 3796 3797 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 3798 3799 /* initial allocation */ 3800 *sizep = size = INITIAL_MAPSIZE; 3801 *prxmapp = mp = kmem_alloc(size, KM_SLEEP); 3802 3803 if ((seg = AS_SEGFIRST(as)) == NULL) 3804 return (0); 3805 3806 brkseg = break_seg(p); 3807 stkseg = as_segat(as, prgetstackbase(p)); 3808 3809 do { 3810 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 3811 caddr_t saddr, naddr, baddr; 3812 void *tmp = NULL; 3813 ssize_t psz; 3814 char *parr; 3815 uint64_t npages; 3816 uint64_t pagenum; 3817 3818 /* 3819 * Segment loop part one: iterate from the base of the segment 3820 * to its end, pausing at each address boundary (baddr) between 3821 * ranges that have different virtual memory protections. 3822 */ 3823 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 3824 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 3825 ASSERT(baddr >= saddr && baddr <= eaddr); 3826 3827 /* 3828 * Segment loop part two: iterate from the current 3829 * position to the end of the protection boundary, 3830 * pausing at each address boundary (naddr) between 3831 * ranges that have different underlying page sizes. 3832 */ 3833 for (; saddr < baddr; saddr = naddr) { 3834 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 3835 ASSERT(naddr >= saddr && naddr <= baddr); 3836 3837 /* reallocate if necessary */ 3838 if ((nmaps + 1) * sizeof (prxmap_t) > size) { 3839 size_t newsize = size + 3 * size / 16; 3840 prxmap_t *newmp = 3841 kmem_alloc(newsize, KM_SLEEP); 3842 3843 bcopy(*prxmapp, newmp, 3844 nmaps * sizeof (prxmap_t)); 3845 kmem_free(*prxmapp, size); 3846 *sizep = size = newsize; 3847 *prxmapp = newmp; 3848 mp = newmp + nmaps; 3849 } 3850 3851 bzero(mp, sizeof (*mp)); 3852 mp->pr_vaddr = (uintptr_t)saddr; 3853 mp->pr_size = naddr - saddr; 3854 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 3855 mp->pr_mflags = 0; 3856 if (prot & PROT_READ) 3857 mp->pr_mflags |= MA_READ; 3858 if (prot & PROT_WRITE) 3859 mp->pr_mflags |= MA_WRITE; 3860 if (prot & PROT_EXEC) 3861 mp->pr_mflags |= MA_EXEC; 3862 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 3863 mp->pr_mflags |= MA_SHARED; 3864 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 3865 mp->pr_mflags |= MA_NORESERVE; 3866 if (seg->s_ops == &segspt_shmops || 3867 (seg->s_ops == &segvn_ops && 3868 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 3869 vp == NULL))) 3870 mp->pr_mflags |= MA_ANON; 3871 if (seg == brkseg) 3872 mp->pr_mflags |= MA_BREAK; 3873 else if (seg == stkseg) 3874 mp->pr_mflags |= MA_STACK; 3875 if (seg->s_ops == &segspt_shmops) 3876 mp->pr_mflags |= MA_ISM | MA_SHM; 3877 3878 mp->pr_pagesize = PAGESIZE; 3879 if (psz == -1) { 3880 mp->pr_hatpagesize = 0; 3881 } else { 3882 mp->pr_hatpagesize = psz; 3883 } 3884 3885 /* 3886 * Manufacture a filename for the "object" dir. 3887 */ 3888 mp->pr_dev = PRNODEV; 3889 vattr.va_mask = AT_FSID|AT_NODEID; 3890 if (seg->s_ops == &segvn_ops && 3891 SEGOP_GETVP(seg, saddr, &vp) == 0 && 3892 vp != NULL && vp->v_type == VREG && 3893 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 3894 mp->pr_dev = vattr.va_fsid; 3895 mp->pr_ino = vattr.va_nodeid; 3896 if (vp == p->p_exec) 3897 (void) strcpy(mp->pr_mapname, 3898 "a.out"); 3899 else 3900 pr_object_name(mp->pr_mapname, 3901 vp, &vattr); 3902 } 3903 3904 /* 3905 * Get the SysV shared memory id, if any. 3906 */ 3907 if ((mp->pr_mflags & MA_SHARED) && 3908 p->p_segacct && (mp->pr_shmid = shmgetid(p, 3909 seg->s_base)) != SHMID_NONE) { 3910 if (mp->pr_shmid == SHMID_FREE) 3911 mp->pr_shmid = -1; 3912 3913 mp->pr_mflags |= MA_SHM; 3914 } else { 3915 mp->pr_shmid = -1; 3916 } 3917 3918 npages = ((uintptr_t)(naddr - saddr)) >> 3919 PAGESHIFT; 3920 parr = kmem_zalloc(npages, KM_SLEEP); 3921 3922 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 3923 3924 for (pagenum = 0; pagenum < npages; pagenum++) { 3925 if (parr[pagenum] & SEG_PAGE_INCORE) 3926 mp->pr_rss++; 3927 if (parr[pagenum] & SEG_PAGE_ANON) 3928 mp->pr_anon++; 3929 if (parr[pagenum] & SEG_PAGE_LOCKED) 3930 mp->pr_locked++; 3931 } 3932 kmem_free(parr, npages); 3933 mp++; 3934 nmaps++; 3935 } 3936 } 3937 ASSERT(tmp == NULL); 3938 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 3939 3940 return (nmaps); 3941 } 3942 3943 /* 3944 * Return the process's credentials. We don't need a 32-bit equivalent of 3945 * this function because prcred_t and prcred32_t are actually the same. 3946 */ 3947 void 3948 prgetcred(proc_t *p, prcred_t *pcrp) 3949 { 3950 mutex_enter(&p->p_crlock); 3951 cred2prcred(p->p_cred, pcrp); 3952 mutex_exit(&p->p_crlock); 3953 } 3954 3955 /* 3956 * Compute actual size of the prpriv_t structure. 3957 */ 3958 3959 size_t 3960 prgetprivsize(void) 3961 { 3962 return (priv_prgetprivsize(NULL)); 3963 } 3964 3965 /* 3966 * Return the process's privileges. We don't need a 32-bit equivalent of 3967 * this function because prpriv_t and prpriv32_t are actually the same. 3968 */ 3969 void 3970 prgetpriv(proc_t *p, prpriv_t *pprp) 3971 { 3972 mutex_enter(&p->p_crlock); 3973 cred2prpriv(p->p_cred, pprp); 3974 mutex_exit(&p->p_crlock); 3975 } 3976 3977 #ifdef _SYSCALL32_IMPL 3978 /* 3979 * Return an array of structures with HAT memory map information. 3980 * We allocate here; the caller must deallocate. 3981 */ 3982 int 3983 prgetxmap32(proc_t *p, prxmap32_t **prxmapp, size_t *sizep) 3984 { 3985 struct as *as = p->p_as; 3986 int nmaps = 0; 3987 prxmap32_t *mp; 3988 size_t size; 3989 struct seg *seg; 3990 struct seg *brkseg, *stkseg; 3991 struct vnode *vp; 3992 struct vattr vattr; 3993 uint_t prot; 3994 3995 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 3996 3997 /* initial allocation */ 3998 *sizep = size = INITIAL_MAPSIZE; 3999 *prxmapp = mp = kmem_alloc(size, KM_SLEEP); 4000 4001 if ((seg = AS_SEGFIRST(as)) == NULL) 4002 return (0); 4003 4004 brkseg = break_seg(p); 4005 stkseg = as_segat(as, prgetstackbase(p)); 4006 4007 do { 4008 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 4009 caddr_t saddr, naddr, baddr; 4010 void *tmp = NULL; 4011 ssize_t psz; 4012 char *parr; 4013 uint64_t npages; 4014 uint64_t pagenum; 4015 4016 /* 4017 * Segment loop part one: iterate from the base of the segment 4018 * to its end, pausing at each address boundary (baddr) between 4019 * ranges that have different virtual memory protections. 4020 */ 4021 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 4022 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 4023 ASSERT(baddr >= saddr && baddr <= eaddr); 4024 4025 /* 4026 * Segment loop part two: iterate from the current 4027 * position to the end of the protection boundary, 4028 * pausing at each address boundary (naddr) between 4029 * ranges that have different underlying page sizes. 4030 */ 4031 for (; saddr < baddr; saddr = naddr) { 4032 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 4033 ASSERT(naddr >= saddr && naddr <= baddr); 4034 4035 /* reallocate if necessary */ 4036 if ((nmaps + 1) * sizeof (prxmap32_t) > size) { 4037 size_t newsize = size + 3 * size / 16; 4038 prxmap32_t *newmp = 4039 kmem_alloc(newsize, KM_SLEEP); 4040 4041 bcopy(*prxmapp, newmp, 4042 nmaps * sizeof (prxmap32_t)); 4043 kmem_free(*prxmapp, size); 4044 *sizep = size = newsize; 4045 *prxmapp = newmp; 4046 mp = newmp + nmaps; 4047 } 4048 4049 bzero(mp, sizeof (*mp)); 4050 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 4051 mp->pr_size = (size32_t)(naddr - saddr); 4052 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 4053 mp->pr_mflags = 0; 4054 if (prot & PROT_READ) 4055 mp->pr_mflags |= MA_READ; 4056 if (prot & PROT_WRITE) 4057 mp->pr_mflags |= MA_WRITE; 4058 if (prot & PROT_EXEC) 4059 mp->pr_mflags |= MA_EXEC; 4060 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 4061 mp->pr_mflags |= MA_SHARED; 4062 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 4063 mp->pr_mflags |= MA_NORESERVE; 4064 if (seg->s_ops == &segspt_shmops || 4065 (seg->s_ops == &segvn_ops && 4066 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 4067 vp == NULL))) 4068 mp->pr_mflags |= MA_ANON; 4069 if (seg == brkseg) 4070 mp->pr_mflags |= MA_BREAK; 4071 else if (seg == stkseg) 4072 mp->pr_mflags |= MA_STACK; 4073 if (seg->s_ops == &segspt_shmops) 4074 mp->pr_mflags |= MA_ISM | MA_SHM; 4075 4076 mp->pr_pagesize = PAGESIZE; 4077 if (psz == -1) { 4078 mp->pr_hatpagesize = 0; 4079 } else { 4080 mp->pr_hatpagesize = psz; 4081 } 4082 4083 /* 4084 * Manufacture a filename for the "object" dir. 4085 */ 4086 mp->pr_dev = PRNODEV32; 4087 vattr.va_mask = AT_FSID|AT_NODEID; 4088 if (seg->s_ops == &segvn_ops && 4089 SEGOP_GETVP(seg, saddr, &vp) == 0 && 4090 vp != NULL && vp->v_type == VREG && 4091 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 4092 (void) cmpldev(&mp->pr_dev, 4093 vattr.va_fsid); 4094 mp->pr_ino = vattr.va_nodeid; 4095 if (vp == p->p_exec) 4096 (void) strcpy(mp->pr_mapname, 4097 "a.out"); 4098 else 4099 pr_object_name(mp->pr_mapname, 4100 vp, &vattr); 4101 } 4102 4103 /* 4104 * Get the SysV shared memory id, if any. 4105 */ 4106 if ((mp->pr_mflags & MA_SHARED) && 4107 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4108 seg->s_base)) != SHMID_NONE) { 4109 if (mp->pr_shmid == SHMID_FREE) 4110 mp->pr_shmid = -1; 4111 4112 mp->pr_mflags |= MA_SHM; 4113 } else { 4114 mp->pr_shmid = -1; 4115 } 4116 4117 npages = ((uintptr_t)(naddr - saddr)) >> 4118 PAGESHIFT; 4119 parr = kmem_zalloc(npages, KM_SLEEP); 4120 4121 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4122 4123 for (pagenum = 0; pagenum < npages; pagenum++) { 4124 if (parr[pagenum] & SEG_PAGE_INCORE) 4125 mp->pr_rss++; 4126 if (parr[pagenum] & SEG_PAGE_ANON) 4127 mp->pr_anon++; 4128 if (parr[pagenum] & SEG_PAGE_LOCKED) 4129 mp->pr_locked++; 4130 } 4131 kmem_free(parr, npages); 4132 mp++; 4133 nmaps++; 4134 } 4135 } 4136 ASSERT(tmp == NULL); 4137 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4138 4139 return (nmaps); 4140 } 4141 #endif /* _SYSCALL32_IMPL */ 4142