1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/cmn_err.h> 33 #include <sys/cred.h> 34 #include <sys/priv.h> 35 #include <sys/debug.h> 36 #include <sys/errno.h> 37 #include <sys/inline.h> 38 #include <sys/kmem.h> 39 #include <sys/mman.h> 40 #include <sys/proc.h> 41 #include <sys/brand.h> 42 #include <sys/sobject.h> 43 #include <sys/sysmacros.h> 44 #include <sys/systm.h> 45 #include <sys/uio.h> 46 #include <sys/var.h> 47 #include <sys/vfs.h> 48 #include <sys/vnode.h> 49 #include <sys/session.h> 50 #include <sys/pcb.h> 51 #include <sys/signal.h> 52 #include <sys/user.h> 53 #include <sys/disp.h> 54 #include <sys/class.h> 55 #include <sys/ts.h> 56 #include <sys/bitmap.h> 57 #include <sys/poll.h> 58 #include <sys/shm_impl.h> 59 #include <sys/fault.h> 60 #include <sys/syscall.h> 61 #include <sys/procfs.h> 62 #include <sys/processor.h> 63 #include <sys/cpuvar.h> 64 #include <sys/copyops.h> 65 #include <sys/time.h> 66 #include <sys/msacct.h> 67 #include <vm/as.h> 68 #include <vm/rm.h> 69 #include <vm/seg.h> 70 #include <vm/seg_vn.h> 71 #include <vm/seg_dev.h> 72 #include <vm/seg_spt.h> 73 #include <vm/page.h> 74 #include <sys/vmparam.h> 75 #include <sys/swap.h> 76 #include <fs/proc/prdata.h> 77 #include <sys/task.h> 78 #include <sys/project.h> 79 #include <sys/contract_impl.h> 80 #include <sys/contract/process.h> 81 #include <sys/contract/process_impl.h> 82 #include <sys/schedctl.h> 83 #include <sys/pool.h> 84 #include <sys/zone.h> 85 #include <sys/atomic.h> 86 #include <sys/sdt.h> 87 88 #define MAX_ITERS_SPIN 5 89 90 typedef struct prpagev { 91 uint_t *pg_protv; /* vector of page permissions */ 92 char *pg_incore; /* vector of incore flags */ 93 size_t pg_npages; /* number of pages in protv and incore */ 94 ulong_t pg_pnbase; /* pn within segment of first protv element */ 95 } prpagev_t; 96 97 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */ 98 99 extern struct seg_ops segdev_ops; /* needs a header file */ 100 extern struct seg_ops segspt_shmops; /* needs a header file */ 101 102 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t); 103 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t); 104 105 /* 106 * Choose an lwp from the complete set of lwps for the process. 107 * This is called for any operation applied to the process 108 * file descriptor that requires an lwp to operate upon. 109 * 110 * Returns a pointer to the thread for the selected LWP, 111 * and with the dispatcher lock held for the thread. 112 * 113 * The algorithm for choosing an lwp is critical for /proc semantics; 114 * don't touch this code unless you know all of the implications. 115 */ 116 kthread_t * 117 prchoose(proc_t *p) 118 { 119 kthread_t *t; 120 kthread_t *t_onproc = NULL; /* running on processor */ 121 kthread_t *t_run = NULL; /* runnable, on disp queue */ 122 kthread_t *t_sleep = NULL; /* sleeping */ 123 kthread_t *t_hold = NULL; /* sleeping, performing hold */ 124 kthread_t *t_susp = NULL; /* suspended stop */ 125 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */ 126 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */ 127 kthread_t *t_req = NULL; /* requested stop */ 128 kthread_t *t_istop = NULL; /* event-of-interest stop */ 129 kthread_t *t_dtrace = NULL; /* DTrace stop */ 130 131 ASSERT(MUTEX_HELD(&p->p_lock)); 132 133 /* 134 * If the agent lwp exists, it takes precedence over all others. 135 */ 136 if ((t = p->p_agenttp) != NULL) { 137 thread_lock(t); 138 return (t); 139 } 140 141 if ((t = p->p_tlist) == NULL) /* start at the head of the list */ 142 return (t); 143 do { /* for eacn lwp in the process */ 144 if (VSTOPPED(t)) { /* virtually stopped */ 145 if (t_req == NULL) 146 t_req = t; 147 continue; 148 } 149 150 thread_lock(t); /* make sure thread is in good state */ 151 switch (t->t_state) { 152 default: 153 panic("prchoose: bad thread state %d, thread 0x%p", 154 t->t_state, (void *)t); 155 /*NOTREACHED*/ 156 case TS_SLEEP: 157 /* this is filthy */ 158 if (t->t_wchan == (caddr_t)&p->p_holdlwps && 159 t->t_wchan0 == NULL) { 160 if (t_hold == NULL) 161 t_hold = t; 162 } else { 163 if (t_sleep == NULL) 164 t_sleep = t; 165 } 166 break; 167 case TS_RUN: 168 case TS_WAIT: 169 if (t_run == NULL) 170 t_run = t; 171 break; 172 case TS_ONPROC: 173 if (t_onproc == NULL) 174 t_onproc = t; 175 break; 176 case TS_ZOMB: /* last possible choice */ 177 break; 178 case TS_STOPPED: 179 switch (t->t_whystop) { 180 case PR_SUSPENDED: 181 if (t_susp == NULL) 182 t_susp = t; 183 break; 184 case PR_JOBCONTROL: 185 if (t->t_proc_flag & TP_PRSTOP) { 186 if (t_jdstop == NULL) 187 t_jdstop = t; 188 } else { 189 if (t_jstop == NULL) 190 t_jstop = t; 191 } 192 break; 193 case PR_REQUESTED: 194 if (t->t_dtrace_stop && t_dtrace == NULL) 195 t_dtrace = t; 196 else if (t_req == NULL) 197 t_req = t; 198 break; 199 case PR_SYSENTRY: 200 case PR_SYSEXIT: 201 case PR_SIGNALLED: 202 case PR_FAULTED: 203 /* 204 * Make an lwp calling exit() be the 205 * last lwp seen in the process. 206 */ 207 if (t_istop == NULL || 208 (t_istop->t_whystop == PR_SYSENTRY && 209 t_istop->t_whatstop == SYS_exit)) 210 t_istop = t; 211 break; 212 case PR_CHECKPOINT: /* can't happen? */ 213 break; 214 default: 215 panic("prchoose: bad t_whystop %d, thread 0x%p", 216 t->t_whystop, (void *)t); 217 /*NOTREACHED*/ 218 } 219 break; 220 } 221 thread_unlock(t); 222 } while ((t = t->t_forw) != p->p_tlist); 223 224 if (t_onproc) 225 t = t_onproc; 226 else if (t_run) 227 t = t_run; 228 else if (t_sleep) 229 t = t_sleep; 230 else if (t_jstop) 231 t = t_jstop; 232 else if (t_jdstop) 233 t = t_jdstop; 234 else if (t_istop) 235 t = t_istop; 236 else if (t_dtrace) 237 t = t_dtrace; 238 else if (t_req) 239 t = t_req; 240 else if (t_hold) 241 t = t_hold; 242 else if (t_susp) 243 t = t_susp; 244 else /* TS_ZOMB */ 245 t = p->p_tlist; 246 247 if (t != NULL) 248 thread_lock(t); 249 return (t); 250 } 251 252 /* 253 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop. 254 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI 255 * on the /proc file descriptor. Called from stop() when a traced 256 * process stops on an event of interest. Also called from exit() 257 * and prinvalidate() to indicate POLLHUP and POLLERR respectively. 258 */ 259 void 260 prnotify(struct vnode *vp) 261 { 262 prcommon_t *pcp = VTOP(vp)->pr_common; 263 264 mutex_enter(&pcp->prc_mutex); 265 cv_broadcast(&pcp->prc_wait); 266 mutex_exit(&pcp->prc_mutex); 267 if (pcp->prc_flags & PRC_POLL) { 268 /* 269 * We call pollwakeup() with POLLHUP to ensure that 270 * the pollers are awakened even if they are polling 271 * for nothing (i.e., waiting for the process to exit). 272 * This enables the use of the PRC_POLL flag for optimization 273 * (we can turn off PRC_POLL only if we know no pollers remain). 274 */ 275 pcp->prc_flags &= ~PRC_POLL; 276 pollwakeup(&pcp->prc_pollhead, POLLHUP); 277 } 278 } 279 280 /* called immediately below, in prfree() */ 281 static void 282 prfreenotify(vnode_t *vp) 283 { 284 prnode_t *pnp; 285 prcommon_t *pcp; 286 287 while (vp != NULL) { 288 pnp = VTOP(vp); 289 pcp = pnp->pr_common; 290 ASSERT(pcp->prc_thread == NULL); 291 pcp->prc_proc = NULL; 292 /* 293 * We can't call prnotify() here because we are holding 294 * pidlock. We assert that there is no need to. 295 */ 296 mutex_enter(&pcp->prc_mutex); 297 cv_broadcast(&pcp->prc_wait); 298 mutex_exit(&pcp->prc_mutex); 299 ASSERT(!(pcp->prc_flags & PRC_POLL)); 300 301 vp = pnp->pr_next; 302 pnp->pr_next = NULL; 303 } 304 } 305 306 /* 307 * Called from a hook in freeproc() when a traced process is removed 308 * from the process table. The proc-table pointers of all associated 309 * /proc vnodes are cleared to indicate that the process has gone away. 310 */ 311 void 312 prfree(proc_t *p) 313 { 314 uint_t slot = p->p_slot; 315 316 ASSERT(MUTEX_HELD(&pidlock)); 317 318 /* 319 * Block the process against /proc so it can be freed. 320 * It cannot be freed while locked by some controlling process. 321 * Lock ordering: 322 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex 323 */ 324 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */ 325 mutex_enter(&p->p_lock); 326 while (p->p_proc_flag & P_PR_LOCK) { 327 mutex_exit(&pr_pidlock); 328 cv_wait(&pr_pid_cv[slot], &p->p_lock); 329 mutex_exit(&p->p_lock); 330 mutex_enter(&pr_pidlock); 331 mutex_enter(&p->p_lock); 332 } 333 334 ASSERT(p->p_tlist == NULL); 335 336 prfreenotify(p->p_plist); 337 p->p_plist = NULL; 338 339 prfreenotify(p->p_trace); 340 p->p_trace = NULL; 341 342 /* 343 * We broadcast to wake up everyone waiting for this process. 344 * No one can reach this process from this point on. 345 */ 346 cv_broadcast(&pr_pid_cv[slot]); 347 348 mutex_exit(&p->p_lock); 349 mutex_exit(&pr_pidlock); 350 } 351 352 /* 353 * Called from a hook in exit() when a traced process is becoming a zombie. 354 */ 355 void 356 prexit(proc_t *p) 357 { 358 ASSERT(MUTEX_HELD(&p->p_lock)); 359 360 if (pr_watch_active(p)) { 361 pr_free_watchpoints(p); 362 watch_disable(curthread); 363 } 364 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */ 365 if (p->p_trace) { 366 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY; 367 prnotify(p->p_trace); 368 } 369 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */ 370 } 371 372 /* 373 * Called when a thread calls lwp_exit(). 374 */ 375 void 376 prlwpexit(kthread_t *t) 377 { 378 vnode_t *vp; 379 prnode_t *pnp; 380 prcommon_t *pcp; 381 proc_t *p = ttoproc(t); 382 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry; 383 384 ASSERT(t == curthread); 385 ASSERT(MUTEX_HELD(&p->p_lock)); 386 387 /* 388 * The process must be blocked against /proc to do this safely. 389 * The lwp must not disappear while the process is marked P_PR_LOCK. 390 * It is the caller's responsibility to have called prbarrier(p). 391 */ 392 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 393 394 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 395 pnp = VTOP(vp); 396 pcp = pnp->pr_common; 397 if (pcp->prc_thread == t) { 398 pcp->prc_thread = NULL; 399 pcp->prc_flags |= PRC_DESTROY; 400 } 401 } 402 403 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) { 404 pnp = VTOP(vp); 405 pcp = pnp->pr_common; 406 pcp->prc_thread = NULL; 407 pcp->prc_flags |= PRC_DESTROY; 408 prnotify(vp); 409 } 410 411 if (p->p_trace) 412 prnotify(p->p_trace); 413 } 414 415 /* 416 * Called when a zombie thread is joined or when a 417 * detached lwp exits. Called from lwp_hash_out(). 418 */ 419 void 420 prlwpfree(proc_t *p, lwpent_t *lep) 421 { 422 vnode_t *vp; 423 prnode_t *pnp; 424 prcommon_t *pcp; 425 426 ASSERT(MUTEX_HELD(&p->p_lock)); 427 428 /* 429 * The process must be blocked against /proc to do this safely. 430 * The lwp must not disappear while the process is marked P_PR_LOCK. 431 * It is the caller's responsibility to have called prbarrier(p). 432 */ 433 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 434 435 vp = lep->le_trace; 436 lep->le_trace = NULL; 437 while (vp) { 438 prnotify(vp); 439 pnp = VTOP(vp); 440 pcp = pnp->pr_common; 441 ASSERT(pcp->prc_thread == NULL && 442 (pcp->prc_flags & PRC_DESTROY)); 443 pcp->prc_tslot = -1; 444 vp = pnp->pr_next; 445 pnp->pr_next = NULL; 446 } 447 448 if (p->p_trace) 449 prnotify(p->p_trace); 450 } 451 452 /* 453 * Called from a hook in exec() when a thread starts exec(). 454 */ 455 void 456 prexecstart(void) 457 { 458 proc_t *p = ttoproc(curthread); 459 klwp_t *lwp = ttolwp(curthread); 460 461 /* 462 * The P_PR_EXEC flag blocks /proc operations for 463 * the duration of the exec(). 464 * We can't start exec() while the process is 465 * locked by /proc, so we call prbarrier(). 466 * lwp_nostop keeps the process from being stopped 467 * via job control for the duration of the exec(). 468 */ 469 470 ASSERT(MUTEX_HELD(&p->p_lock)); 471 prbarrier(p); 472 lwp->lwp_nostop++; 473 p->p_proc_flag |= P_PR_EXEC; 474 } 475 476 /* 477 * Called from a hook in exec() when a thread finishes exec(). 478 * The thread may or may not have succeeded. Some other thread 479 * may have beat it to the punch. 480 */ 481 void 482 prexecend(void) 483 { 484 proc_t *p = ttoproc(curthread); 485 klwp_t *lwp = ttolwp(curthread); 486 vnode_t *vp; 487 prnode_t *pnp; 488 prcommon_t *pcp; 489 model_t model = p->p_model; 490 id_t tid = curthread->t_tid; 491 int tslot = curthread->t_dslot; 492 493 ASSERT(MUTEX_HELD(&p->p_lock)); 494 495 lwp->lwp_nostop--; 496 if (p->p_flag & SEXITLWPS) { 497 /* 498 * We are on our way to exiting because some 499 * other thread beat us in the race to exec(). 500 * Don't clear the P_PR_EXEC flag in this case. 501 */ 502 return; 503 } 504 505 /* 506 * Wake up anyone waiting in /proc for the process to complete exec(). 507 */ 508 p->p_proc_flag &= ~P_PR_EXEC; 509 if ((vp = p->p_trace) != NULL) { 510 pcp = VTOP(vp)->pr_common; 511 mutex_enter(&pcp->prc_mutex); 512 cv_broadcast(&pcp->prc_wait); 513 mutex_exit(&pcp->prc_mutex); 514 for (; vp != NULL; vp = pnp->pr_next) { 515 pnp = VTOP(vp); 516 pnp->pr_common->prc_datamodel = model; 517 } 518 } 519 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) { 520 /* 521 * We dealt with the process common above. 522 */ 523 ASSERT(p->p_trace != NULL); 524 pcp = VTOP(vp)->pr_common; 525 mutex_enter(&pcp->prc_mutex); 526 cv_broadcast(&pcp->prc_wait); 527 mutex_exit(&pcp->prc_mutex); 528 for (; vp != NULL; vp = pnp->pr_next) { 529 pnp = VTOP(vp); 530 pcp = pnp->pr_common; 531 pcp->prc_datamodel = model; 532 pcp->prc_tid = tid; 533 pcp->prc_tslot = tslot; 534 } 535 } 536 } 537 538 /* 539 * Called from a hook in relvm() just before freeing the address space. 540 * We free all the watched areas now. 541 */ 542 void 543 prrelvm(void) 544 { 545 proc_t *p = ttoproc(curthread); 546 547 mutex_enter(&p->p_lock); 548 prbarrier(p); /* block all other /proc operations */ 549 if (pr_watch_active(p)) { 550 pr_free_watchpoints(p); 551 watch_disable(curthread); 552 } 553 mutex_exit(&p->p_lock); 554 pr_free_watched_pages(p); 555 } 556 557 /* 558 * Called from hooks in exec-related code when a traced process 559 * attempts to exec(2) a setuid/setgid program or an unreadable 560 * file. Rather than fail the exec we invalidate the associated 561 * /proc vnodes so that subsequent attempts to use them will fail. 562 * 563 * All /proc vnodes, except directory vnodes, are retained on a linked 564 * list (rooted at p_plist in the process structure) until last close. 565 * 566 * A controlling process must re-open the /proc files in order to 567 * regain control. 568 */ 569 void 570 prinvalidate(struct user *up) 571 { 572 kthread_t *t = curthread; 573 proc_t *p = ttoproc(t); 574 vnode_t *vp; 575 prnode_t *pnp; 576 int writers = 0; 577 578 mutex_enter(&p->p_lock); 579 prbarrier(p); /* block all other /proc operations */ 580 581 /* 582 * At this moment, there can be only one lwp in the process. 583 */ 584 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 585 586 /* 587 * Invalidate any currently active /proc vnodes. 588 */ 589 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 590 pnp = VTOP(vp); 591 switch (pnp->pr_type) { 592 case PR_PSINFO: /* these files can read by anyone */ 593 case PR_LPSINFO: 594 case PR_LWPSINFO: 595 case PR_LWPDIR: 596 case PR_LWPIDDIR: 597 case PR_USAGE: 598 case PR_LUSAGE: 599 case PR_LWPUSAGE: 600 break; 601 default: 602 pnp->pr_flags |= PR_INVAL; 603 break; 604 } 605 } 606 /* 607 * Wake up anyone waiting for the process or lwp. 608 * p->p_trace is guaranteed to be non-NULL if there 609 * are any open /proc files for this process. 610 */ 611 if ((vp = p->p_trace) != NULL) { 612 prcommon_t *pcp = VTOP(vp)->pr_pcommon; 613 614 prnotify(vp); 615 /* 616 * Are there any writers? 617 */ 618 if ((writers = pcp->prc_writers) != 0) { 619 /* 620 * Clear the exclusive open flag (old /proc interface). 621 * Set prc_selfopens equal to prc_writers so that 622 * the next O_EXCL|O_WRITE open will succeed 623 * even with existing (though invalid) writers. 624 * prclose() must decrement prc_selfopens when 625 * the invalid files are closed. 626 */ 627 pcp->prc_flags &= ~PRC_EXCL; 628 ASSERT(pcp->prc_selfopens <= writers); 629 pcp->prc_selfopens = writers; 630 } 631 } 632 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace; 633 while (vp != NULL) { 634 /* 635 * We should not invalidate the lwpiddir vnodes, 636 * but the necessities of maintaining the old 637 * ioctl()-based version of /proc require it. 638 */ 639 pnp = VTOP(vp); 640 pnp->pr_flags |= PR_INVAL; 641 prnotify(vp); 642 vp = pnp->pr_next; 643 } 644 645 /* 646 * If any tracing flags are in effect and any vnodes are open for 647 * writing then set the requested-stop and run-on-last-close flags. 648 * Otherwise, clear all tracing flags. 649 */ 650 t->t_proc_flag &= ~TP_PAUSE; 651 if ((p->p_proc_flag & P_PR_TRACE) && writers) { 652 t->t_proc_flag |= TP_PRSTOP; 653 aston(t); /* so ISSIG will see the flag */ 654 p->p_proc_flag |= P_PR_RUNLCL; 655 } else { 656 premptyset(&up->u_entrymask); /* syscalls */ 657 premptyset(&up->u_exitmask); 658 up->u_systrap = 0; 659 premptyset(&p->p_sigmask); /* signals */ 660 premptyset(&p->p_fltmask); /* faults */ 661 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING); 662 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE); 663 prnostep(ttolwp(t)); 664 } 665 666 mutex_exit(&p->p_lock); 667 } 668 669 /* 670 * Acquire the controlled process's p_lock and mark it P_PR_LOCK. 671 * Return with pr_pidlock held in all cases. 672 * Return with p_lock held if the the process still exists. 673 * Return value is the process pointer if the process still exists, else NULL. 674 * If we lock the process, give ourself kernel priority to avoid deadlocks; 675 * this is undone in prunlock(). 676 */ 677 proc_t * 678 pr_p_lock(prnode_t *pnp) 679 { 680 proc_t *p; 681 prcommon_t *pcp; 682 683 mutex_enter(&pr_pidlock); 684 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL) 685 return (NULL); 686 mutex_enter(&p->p_lock); 687 while (p->p_proc_flag & P_PR_LOCK) { 688 /* 689 * This cv/mutex pair is persistent even if 690 * the process disappears while we sleep. 691 */ 692 kcondvar_t *cv = &pr_pid_cv[p->p_slot]; 693 kmutex_t *mp = &p->p_lock; 694 695 mutex_exit(&pr_pidlock); 696 cv_wait(cv, mp); 697 mutex_exit(mp); 698 mutex_enter(&pr_pidlock); 699 if (pcp->prc_proc == NULL) 700 return (NULL); 701 ASSERT(p == pcp->prc_proc); 702 mutex_enter(&p->p_lock); 703 } 704 p->p_proc_flag |= P_PR_LOCK; 705 THREAD_KPRI_REQUEST(); 706 return (p); 707 } 708 709 /* 710 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock. 711 * This prevents any lwp of the process from disappearing and 712 * blocks most operations that a process can perform on itself. 713 * Returns 0 on success, a non-zero error number on failure. 714 * 715 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when 716 * the subject process is a zombie (ZYES) or fail for zombies (ZNO). 717 * 718 * error returns: 719 * ENOENT: process or lwp has disappeared or process is exiting 720 * (or has become a zombie and zdisp == ZNO). 721 * EAGAIN: procfs vnode has become invalid. 722 * EINTR: signal arrived while waiting for exec to complete. 723 */ 724 int 725 prlock(prnode_t *pnp, int zdisp) 726 { 727 prcommon_t *pcp; 728 proc_t *p; 729 730 again: 731 pcp = pnp->pr_common; 732 p = pr_p_lock(pnp); 733 mutex_exit(&pr_pidlock); 734 735 /* 736 * Return ENOENT immediately if there is no process. 737 */ 738 if (p == NULL) 739 return (ENOENT); 740 741 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL); 742 743 /* 744 * Return ENOENT if process entered zombie state or is exiting 745 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies. 746 */ 747 if (zdisp == ZNO && 748 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) { 749 prunlock(pnp); 750 return (ENOENT); 751 } 752 753 /* 754 * If lwp-specific, check to see if lwp has disappeared. 755 */ 756 if (pcp->prc_flags & PRC_LWP) { 757 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) || 758 pcp->prc_tslot == -1) { 759 prunlock(pnp); 760 return (ENOENT); 761 } 762 } 763 764 /* 765 * Return EAGAIN if we have encountered a security violation. 766 * (The process exec'd a set-id or unreadable executable file.) 767 */ 768 if (pnp->pr_flags & PR_INVAL) { 769 prunlock(pnp); 770 return (EAGAIN); 771 } 772 773 /* 774 * If process is undergoing an exec(), wait for 775 * completion and then start all over again. 776 */ 777 if (p->p_proc_flag & P_PR_EXEC) { 778 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */ 779 mutex_enter(&pcp->prc_mutex); 780 prunlock(pnp); 781 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) { 782 mutex_exit(&pcp->prc_mutex); 783 return (EINTR); 784 } 785 mutex_exit(&pcp->prc_mutex); 786 goto again; 787 } 788 789 /* 790 * We return holding p->p_lock. 791 */ 792 return (0); 793 } 794 795 /* 796 * Undo prlock() and pr_p_lock(). 797 * p->p_lock is still held; pr_pidlock is no longer held. 798 * 799 * prunmark() drops the P_PR_LOCK flag and wakes up another thread, 800 * if any, waiting for the flag to be dropped; it retains p->p_lock. 801 * 802 * prunlock() calls prunmark() and then drops p->p_lock. 803 */ 804 void 805 prunmark(proc_t *p) 806 { 807 ASSERT(p->p_proc_flag & P_PR_LOCK); 808 ASSERT(MUTEX_HELD(&p->p_lock)); 809 810 cv_signal(&pr_pid_cv[p->p_slot]); 811 p->p_proc_flag &= ~P_PR_LOCK; 812 THREAD_KPRI_RELEASE(); 813 } 814 815 void 816 prunlock(prnode_t *pnp) 817 { 818 prcommon_t *pcp = pnp->pr_common; 819 proc_t *p = pcp->prc_proc; 820 821 /* 822 * If we (or someone) gave it a SIGKILL, and it is not 823 * already a zombie, set it running unconditionally. 824 */ 825 if ((p->p_flag & SKILLED) && 826 !(p->p_flag & SEXITING) && 827 !(pcp->prc_flags & PRC_DESTROY) && 828 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) 829 (void) pr_setrun(pnp, 0); 830 prunmark(p); 831 mutex_exit(&p->p_lock); 832 } 833 834 /* 835 * Called while holding p->p_lock to delay until the process is unlocked. 836 * We enter holding p->p_lock; p->p_lock is dropped and reacquired. 837 * The process cannot become locked again until p->p_lock is dropped. 838 */ 839 void 840 prbarrier(proc_t *p) 841 { 842 ASSERT(MUTEX_HELD(&p->p_lock)); 843 844 if (p->p_proc_flag & P_PR_LOCK) { 845 /* The process is locked; delay until not locked */ 846 uint_t slot = p->p_slot; 847 848 while (p->p_proc_flag & P_PR_LOCK) 849 cv_wait(&pr_pid_cv[slot], &p->p_lock); 850 cv_signal(&pr_pid_cv[slot]); 851 } 852 } 853 854 /* 855 * Return process/lwp status. 856 * The u-block is mapped in by this routine and unmapped at the end. 857 */ 858 void 859 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp) 860 { 861 kthread_t *t; 862 863 ASSERT(MUTEX_HELD(&p->p_lock)); 864 865 t = prchoose(p); /* returns locked thread */ 866 ASSERT(t != NULL); 867 thread_unlock(t); 868 869 /* just bzero the process part, prgetlwpstatus() does the rest */ 870 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t)); 871 sp->pr_nlwp = p->p_lwpcnt; 872 sp->pr_nzomb = p->p_zombcnt; 873 prassignset(&sp->pr_sigpend, &p->p_sig); 874 sp->pr_brkbase = (uintptr_t)p->p_brkbase; 875 sp->pr_brksize = p->p_brksize; 876 sp->pr_stkbase = (uintptr_t)prgetstackbase(p); 877 sp->pr_stksize = p->p_stksize; 878 sp->pr_pid = p->p_pid; 879 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 880 (p->p_flag & SZONETOP)) { 881 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 882 /* 883 * Inside local zones, fake zsched's pid as parent pids for 884 * processes which reference processes outside of the zone. 885 */ 886 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 887 } else { 888 sp->pr_ppid = p->p_ppid; 889 } 890 sp->pr_pgid = p->p_pgrp; 891 sp->pr_sid = p->p_sessp->s_sid; 892 sp->pr_taskid = p->p_task->tk_tkid; 893 sp->pr_projid = p->p_task->tk_proj->kpj_id; 894 sp->pr_zoneid = p->p_zone->zone_id; 895 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 896 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 897 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime); 898 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime); 899 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 900 prassignset(&sp->pr_flttrace, &p->p_fltmask); 901 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 902 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 903 switch (p->p_model) { 904 case DATAMODEL_ILP32: 905 sp->pr_dmodel = PR_MODEL_ILP32; 906 break; 907 case DATAMODEL_LP64: 908 sp->pr_dmodel = PR_MODEL_LP64; 909 break; 910 } 911 if (p->p_agenttp) 912 sp->pr_agentid = p->p_agenttp->t_tid; 913 914 /* get the chosen lwp's status */ 915 prgetlwpstatus(t, &sp->pr_lwp, zp); 916 917 /* replicate the flags */ 918 sp->pr_flags = sp->pr_lwp.pr_flags; 919 } 920 921 #ifdef _SYSCALL32_IMPL 922 void 923 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp) 924 { 925 proc_t *p = ttoproc(t); 926 klwp_t *lwp = ttolwp(t); 927 struct mstate *ms = &lwp->lwp_mstate; 928 hrtime_t usr, sys; 929 int flags; 930 ulong_t instr; 931 932 ASSERT(MUTEX_HELD(&p->p_lock)); 933 934 bzero(sp, sizeof (*sp)); 935 flags = 0L; 936 if (t->t_state == TS_STOPPED) { 937 flags |= PR_STOPPED; 938 if ((t->t_schedflag & TS_PSTART) == 0) 939 flags |= PR_ISTOP; 940 } else if (VSTOPPED(t)) { 941 flags |= PR_STOPPED|PR_ISTOP; 942 } 943 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 944 flags |= PR_DSTOP; 945 if (lwp->lwp_asleep) 946 flags |= PR_ASLEEP; 947 if (t == p->p_agenttp) 948 flags |= PR_AGENT; 949 if (!(t->t_proc_flag & TP_TWAIT)) 950 flags |= PR_DETACH; 951 if (t->t_proc_flag & TP_DAEMON) 952 flags |= PR_DAEMON; 953 if (p->p_proc_flag & P_PR_FORK) 954 flags |= PR_FORK; 955 if (p->p_proc_flag & P_PR_RUNLCL) 956 flags |= PR_RLC; 957 if (p->p_proc_flag & P_PR_KILLCL) 958 flags |= PR_KLC; 959 if (p->p_proc_flag & P_PR_ASYNC) 960 flags |= PR_ASYNC; 961 if (p->p_proc_flag & P_PR_BPTADJ) 962 flags |= PR_BPTADJ; 963 if (p->p_proc_flag & P_PR_PTRACE) 964 flags |= PR_PTRACE; 965 if (p->p_flag & SMSACCT) 966 flags |= PR_MSACCT; 967 if (p->p_flag & SMSFORK) 968 flags |= PR_MSFORK; 969 if (p->p_flag & SVFWAIT) 970 flags |= PR_VFORKP; 971 sp->pr_flags = flags; 972 if (VSTOPPED(t)) { 973 sp->pr_why = PR_REQUESTED; 974 sp->pr_what = 0; 975 } else { 976 sp->pr_why = t->t_whystop; 977 sp->pr_what = t->t_whatstop; 978 } 979 sp->pr_lwpid = t->t_tid; 980 sp->pr_cursig = lwp->lwp_cursig; 981 prassignset(&sp->pr_lwppend, &t->t_sig); 982 schedctl_finish_sigblock(t); 983 prassignset(&sp->pr_lwphold, &t->t_hold); 984 if (t->t_whystop == PR_FAULTED) { 985 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info); 986 if (t->t_whatstop == FLTPAGE) 987 sp->pr_info.si_addr = 988 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr; 989 } else if (lwp->lwp_curinfo) 990 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info); 991 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 992 sp->pr_info.si_zoneid != zp->zone_id) { 993 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 994 sp->pr_info.si_uid = 0; 995 sp->pr_info.si_ctid = -1; 996 sp->pr_info.si_zoneid = zp->zone_id; 997 } 998 sp->pr_altstack.ss_sp = 999 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; 1000 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size; 1001 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags; 1002 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1003 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext; 1004 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack; 1005 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1006 sizeof (sp->pr_clname) - 1); 1007 if (flags & PR_STOPPED) 1008 hrt2ts32(t->t_stoptime, &sp->pr_tstamp); 1009 usr = ms->ms_acct[LMS_USER]; 1010 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1011 scalehrtime(&usr); 1012 scalehrtime(&sys); 1013 hrt2ts32(usr, &sp->pr_utime); 1014 hrt2ts32(sys, &sp->pr_stime); 1015 1016 /* 1017 * Fetch the current instruction, if not a system process. 1018 * We don't attempt this unless the lwp is stopped. 1019 */ 1020 if ((p->p_flag & SSYS) || p->p_as == &kas) 1021 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1022 else if (!(flags & PR_STOPPED)) 1023 sp->pr_flags |= PR_PCINVAL; 1024 else if (!prfetchinstr(lwp, &instr)) 1025 sp->pr_flags |= PR_PCINVAL; 1026 else 1027 sp->pr_instr = (uint32_t)instr; 1028 1029 /* 1030 * Drop p_lock while touching the lwp's stack. 1031 */ 1032 mutex_exit(&p->p_lock); 1033 if (prisstep(lwp)) 1034 sp->pr_flags |= PR_STEP; 1035 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1036 int i; 1037 1038 sp->pr_syscall = get_syscall32_args(lwp, 1039 (int *)sp->pr_sysarg, &i); 1040 sp->pr_nsysarg = (ushort_t)i; 1041 } 1042 if ((flags & PR_STOPPED) || t == curthread) 1043 prgetprregs32(lwp, sp->pr_reg); 1044 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1045 (flags & PR_VFORKP)) { 1046 long r1, r2; 1047 user_t *up; 1048 auxv_t *auxp; 1049 int i; 1050 1051 sp->pr_errno = prgetrvals(lwp, &r1, &r2); 1052 if (sp->pr_errno == 0) { 1053 sp->pr_rval1 = (int32_t)r1; 1054 sp->pr_rval2 = (int32_t)r2; 1055 sp->pr_errpriv = PRIV_NONE; 1056 } else 1057 sp->pr_errpriv = lwp->lwp_badpriv; 1058 1059 if (t->t_sysnum == SYS_execve) { 1060 up = PTOU(p); 1061 sp->pr_sysarg[0] = 0; 1062 sp->pr_sysarg[1] = (caddr32_t)up->u_argv; 1063 sp->pr_sysarg[2] = (caddr32_t)up->u_envp; 1064 for (i = 0, auxp = up->u_auxv; 1065 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1066 i++, auxp++) { 1067 if (auxp->a_type == AT_SUN_EXECNAME) { 1068 sp->pr_sysarg[0] = 1069 (caddr32_t) 1070 (uintptr_t)auxp->a_un.a_ptr; 1071 break; 1072 } 1073 } 1074 } 1075 } 1076 if (prhasfp()) 1077 prgetprfpregs32(lwp, &sp->pr_fpreg); 1078 mutex_enter(&p->p_lock); 1079 } 1080 1081 void 1082 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp) 1083 { 1084 kthread_t *t; 1085 1086 ASSERT(MUTEX_HELD(&p->p_lock)); 1087 1088 t = prchoose(p); /* returns locked thread */ 1089 ASSERT(t != NULL); 1090 thread_unlock(t); 1091 1092 /* just bzero the process part, prgetlwpstatus32() does the rest */ 1093 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t)); 1094 sp->pr_nlwp = p->p_lwpcnt; 1095 sp->pr_nzomb = p->p_zombcnt; 1096 prassignset(&sp->pr_sigpend, &p->p_sig); 1097 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase; 1098 sp->pr_brksize = (uint32_t)p->p_brksize; 1099 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p); 1100 sp->pr_stksize = (uint32_t)p->p_stksize; 1101 sp->pr_pid = p->p_pid; 1102 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 1103 (p->p_flag & SZONETOP)) { 1104 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 1105 /* 1106 * Inside local zones, fake zsched's pid as parent pids for 1107 * processes which reference processes outside of the zone. 1108 */ 1109 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 1110 } else { 1111 sp->pr_ppid = p->p_ppid; 1112 } 1113 sp->pr_pgid = p->p_pgrp; 1114 sp->pr_sid = p->p_sessp->s_sid; 1115 sp->pr_taskid = p->p_task->tk_tkid; 1116 sp->pr_projid = p->p_task->tk_proj->kpj_id; 1117 sp->pr_zoneid = p->p_zone->zone_id; 1118 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 1119 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 1120 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime); 1121 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime); 1122 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 1123 prassignset(&sp->pr_flttrace, &p->p_fltmask); 1124 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 1125 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 1126 switch (p->p_model) { 1127 case DATAMODEL_ILP32: 1128 sp->pr_dmodel = PR_MODEL_ILP32; 1129 break; 1130 case DATAMODEL_LP64: 1131 sp->pr_dmodel = PR_MODEL_LP64; 1132 break; 1133 } 1134 if (p->p_agenttp) 1135 sp->pr_agentid = p->p_agenttp->t_tid; 1136 1137 /* get the chosen lwp's status */ 1138 prgetlwpstatus32(t, &sp->pr_lwp, zp); 1139 1140 /* replicate the flags */ 1141 sp->pr_flags = sp->pr_lwp.pr_flags; 1142 } 1143 #endif /* _SYSCALL32_IMPL */ 1144 1145 /* 1146 * Return lwp status. 1147 */ 1148 void 1149 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp) 1150 { 1151 proc_t *p = ttoproc(t); 1152 klwp_t *lwp = ttolwp(t); 1153 struct mstate *ms = &lwp->lwp_mstate; 1154 hrtime_t usr, sys; 1155 int flags; 1156 ulong_t instr; 1157 1158 ASSERT(MUTEX_HELD(&p->p_lock)); 1159 1160 bzero(sp, sizeof (*sp)); 1161 flags = 0L; 1162 if (t->t_state == TS_STOPPED) { 1163 flags |= PR_STOPPED; 1164 if ((t->t_schedflag & TS_PSTART) == 0) 1165 flags |= PR_ISTOP; 1166 } else if (VSTOPPED(t)) { 1167 flags |= PR_STOPPED|PR_ISTOP; 1168 } 1169 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 1170 flags |= PR_DSTOP; 1171 if (lwp->lwp_asleep) 1172 flags |= PR_ASLEEP; 1173 if (t == p->p_agenttp) 1174 flags |= PR_AGENT; 1175 if (!(t->t_proc_flag & TP_TWAIT)) 1176 flags |= PR_DETACH; 1177 if (t->t_proc_flag & TP_DAEMON) 1178 flags |= PR_DAEMON; 1179 if (p->p_proc_flag & P_PR_FORK) 1180 flags |= PR_FORK; 1181 if (p->p_proc_flag & P_PR_RUNLCL) 1182 flags |= PR_RLC; 1183 if (p->p_proc_flag & P_PR_KILLCL) 1184 flags |= PR_KLC; 1185 if (p->p_proc_flag & P_PR_ASYNC) 1186 flags |= PR_ASYNC; 1187 if (p->p_proc_flag & P_PR_BPTADJ) 1188 flags |= PR_BPTADJ; 1189 if (p->p_proc_flag & P_PR_PTRACE) 1190 flags |= PR_PTRACE; 1191 if (p->p_flag & SMSACCT) 1192 flags |= PR_MSACCT; 1193 if (p->p_flag & SMSFORK) 1194 flags |= PR_MSFORK; 1195 if (p->p_flag & SVFWAIT) 1196 flags |= PR_VFORKP; 1197 if (p->p_pgidp->pid_pgorphaned) 1198 flags |= PR_ORPHAN; 1199 if (p->p_pidflag & CLDNOSIGCHLD) 1200 flags |= PR_NOSIGCHLD; 1201 if (p->p_pidflag & CLDWAITPID) 1202 flags |= PR_WAITPID; 1203 sp->pr_flags = flags; 1204 if (VSTOPPED(t)) { 1205 sp->pr_why = PR_REQUESTED; 1206 sp->pr_what = 0; 1207 } else { 1208 sp->pr_why = t->t_whystop; 1209 sp->pr_what = t->t_whatstop; 1210 } 1211 sp->pr_lwpid = t->t_tid; 1212 sp->pr_cursig = lwp->lwp_cursig; 1213 prassignset(&sp->pr_lwppend, &t->t_sig); 1214 schedctl_finish_sigblock(t); 1215 prassignset(&sp->pr_lwphold, &t->t_hold); 1216 if (t->t_whystop == PR_FAULTED) 1217 bcopy(&lwp->lwp_siginfo, 1218 &sp->pr_info, sizeof (k_siginfo_t)); 1219 else if (lwp->lwp_curinfo) 1220 bcopy(&lwp->lwp_curinfo->sq_info, 1221 &sp->pr_info, sizeof (k_siginfo_t)); 1222 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1223 sp->pr_info.si_zoneid != zp->zone_id) { 1224 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1225 sp->pr_info.si_uid = 0; 1226 sp->pr_info.si_ctid = -1; 1227 sp->pr_info.si_zoneid = zp->zone_id; 1228 } 1229 sp->pr_altstack = lwp->lwp_sigaltstack; 1230 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1231 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext; 1232 sp->pr_ustack = lwp->lwp_ustack; 1233 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1234 sizeof (sp->pr_clname) - 1); 1235 if (flags & PR_STOPPED) 1236 hrt2ts(t->t_stoptime, &sp->pr_tstamp); 1237 usr = ms->ms_acct[LMS_USER]; 1238 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1239 scalehrtime(&usr); 1240 scalehrtime(&sys); 1241 hrt2ts(usr, &sp->pr_utime); 1242 hrt2ts(sys, &sp->pr_stime); 1243 1244 /* 1245 * Fetch the current instruction, if not a system process. 1246 * We don't attempt this unless the lwp is stopped. 1247 */ 1248 if ((p->p_flag & SSYS) || p->p_as == &kas) 1249 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1250 else if (!(flags & PR_STOPPED)) 1251 sp->pr_flags |= PR_PCINVAL; 1252 else if (!prfetchinstr(lwp, &instr)) 1253 sp->pr_flags |= PR_PCINVAL; 1254 else 1255 sp->pr_instr = instr; 1256 1257 /* 1258 * Drop p_lock while touching the lwp's stack. 1259 */ 1260 mutex_exit(&p->p_lock); 1261 if (prisstep(lwp)) 1262 sp->pr_flags |= PR_STEP; 1263 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1264 int i; 1265 1266 sp->pr_syscall = get_syscall_args(lwp, 1267 (long *)sp->pr_sysarg, &i); 1268 sp->pr_nsysarg = (ushort_t)i; 1269 } 1270 if ((flags & PR_STOPPED) || t == curthread) 1271 prgetprregs(lwp, sp->pr_reg); 1272 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1273 (flags & PR_VFORKP)) { 1274 user_t *up; 1275 auxv_t *auxp; 1276 int i; 1277 1278 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2); 1279 if (sp->pr_errno == 0) 1280 sp->pr_errpriv = PRIV_NONE; 1281 else 1282 sp->pr_errpriv = lwp->lwp_badpriv; 1283 1284 if (t->t_sysnum == SYS_execve) { 1285 up = PTOU(p); 1286 sp->pr_sysarg[0] = 0; 1287 sp->pr_sysarg[1] = (uintptr_t)up->u_argv; 1288 sp->pr_sysarg[2] = (uintptr_t)up->u_envp; 1289 for (i = 0, auxp = up->u_auxv; 1290 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1291 i++, auxp++) { 1292 if (auxp->a_type == AT_SUN_EXECNAME) { 1293 sp->pr_sysarg[0] = 1294 (uintptr_t)auxp->a_un.a_ptr; 1295 break; 1296 } 1297 } 1298 } 1299 } 1300 if (prhasfp()) 1301 prgetprfpregs(lwp, &sp->pr_fpreg); 1302 mutex_enter(&p->p_lock); 1303 } 1304 1305 /* 1306 * Get the sigaction structure for the specified signal. The u-block 1307 * must already have been mapped in by the caller. 1308 */ 1309 void 1310 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp) 1311 { 1312 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1313 1314 bzero(sp, sizeof (*sp)); 1315 1316 if (sig != 0 && (unsigned)sig < nsig) { 1317 sp->sa_handler = up->u_signal[sig-1]; 1318 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1319 if (sigismember(&up->u_sigonstack, sig)) 1320 sp->sa_flags |= SA_ONSTACK; 1321 if (sigismember(&up->u_sigresethand, sig)) 1322 sp->sa_flags |= SA_RESETHAND; 1323 if (sigismember(&up->u_sigrestart, sig)) 1324 sp->sa_flags |= SA_RESTART; 1325 if (sigismember(&p->p_siginfo, sig)) 1326 sp->sa_flags |= SA_SIGINFO; 1327 if (sigismember(&up->u_signodefer, sig)) 1328 sp->sa_flags |= SA_NODEFER; 1329 if (sig == SIGCLD) { 1330 if (p->p_flag & SNOWAIT) 1331 sp->sa_flags |= SA_NOCLDWAIT; 1332 if ((p->p_flag & SJCTL) == 0) 1333 sp->sa_flags |= SA_NOCLDSTOP; 1334 } 1335 } 1336 } 1337 1338 #ifdef _SYSCALL32_IMPL 1339 void 1340 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp) 1341 { 1342 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1343 1344 bzero(sp, sizeof (*sp)); 1345 1346 if (sig != 0 && (unsigned)sig < nsig) { 1347 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1]; 1348 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1349 if (sigismember(&up->u_sigonstack, sig)) 1350 sp->sa_flags |= SA_ONSTACK; 1351 if (sigismember(&up->u_sigresethand, sig)) 1352 sp->sa_flags |= SA_RESETHAND; 1353 if (sigismember(&up->u_sigrestart, sig)) 1354 sp->sa_flags |= SA_RESTART; 1355 if (sigismember(&p->p_siginfo, sig)) 1356 sp->sa_flags |= SA_SIGINFO; 1357 if (sigismember(&up->u_signodefer, sig)) 1358 sp->sa_flags |= SA_NODEFER; 1359 if (sig == SIGCLD) { 1360 if (p->p_flag & SNOWAIT) 1361 sp->sa_flags |= SA_NOCLDWAIT; 1362 if ((p->p_flag & SJCTL) == 0) 1363 sp->sa_flags |= SA_NOCLDSTOP; 1364 } 1365 } 1366 } 1367 #endif /* _SYSCALL32_IMPL */ 1368 1369 /* 1370 * Count the number of segments in this process's address space. 1371 */ 1372 int 1373 prnsegs(struct as *as, int reserved) 1374 { 1375 int n = 0; 1376 struct seg *seg; 1377 1378 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1379 1380 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1381 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1382 caddr_t saddr, naddr; 1383 void *tmp = NULL; 1384 1385 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1386 (void) pr_getprot(seg, reserved, &tmp, 1387 &saddr, &naddr, eaddr); 1388 if (saddr != naddr) 1389 n++; 1390 } 1391 1392 ASSERT(tmp == NULL); 1393 } 1394 1395 return (n); 1396 } 1397 1398 /* 1399 * Convert uint32_t to decimal string w/o leading zeros. 1400 * Add trailing null characters if 'len' is greater than string length. 1401 * Return the string length. 1402 */ 1403 int 1404 pr_u32tos(uint32_t n, char *s, int len) 1405 { 1406 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */ 1407 char *cp = cbuf; 1408 char *end = s + len; 1409 1410 do { 1411 *cp++ = (char)(n % 10 + '0'); 1412 n /= 10; 1413 } while (n); 1414 1415 len = (int)(cp - cbuf); 1416 1417 do { 1418 *s++ = *--cp; 1419 } while (cp > cbuf); 1420 1421 while (s < end) /* optional pad */ 1422 *s++ = '\0'; 1423 1424 return (len); 1425 } 1426 1427 /* 1428 * Convert uint64_t to decimal string w/o leading zeros. 1429 * Return the string length. 1430 */ 1431 static int 1432 pr_u64tos(uint64_t n, char *s) 1433 { 1434 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */ 1435 char *cp = cbuf; 1436 int len; 1437 1438 do { 1439 *cp++ = (char)(n % 10 + '0'); 1440 n /= 10; 1441 } while (n); 1442 1443 len = (int)(cp - cbuf); 1444 1445 do { 1446 *s++ = *--cp; 1447 } while (cp > cbuf); 1448 1449 return (len); 1450 } 1451 1452 void 1453 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr) 1454 { 1455 char *s = name; 1456 struct vfs *vfsp; 1457 struct vfssw *vfsswp; 1458 1459 if ((vfsp = vp->v_vfsp) != NULL && 1460 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) && 1461 *vfsswp->vsw_name) { 1462 (void) strcpy(s, vfsswp->vsw_name); 1463 s += strlen(s); 1464 *s++ = '.'; 1465 } 1466 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0); 1467 *s++ = '.'; 1468 s += pr_u32tos(getminor(vattr->va_fsid), s, 0); 1469 *s++ = '.'; 1470 s += pr_u64tos(vattr->va_nodeid, s); 1471 *s++ = '\0'; 1472 } 1473 1474 struct seg * 1475 break_seg(proc_t *p) 1476 { 1477 caddr_t addr = p->p_brkbase; 1478 struct seg *seg; 1479 struct vnode *vp; 1480 1481 if (p->p_brksize != 0) 1482 addr += p->p_brksize - 1; 1483 seg = as_segat(p->p_as, addr); 1484 if (seg != NULL && seg->s_ops == &segvn_ops && 1485 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)) 1486 return (seg); 1487 return (NULL); 1488 } 1489 1490 /* 1491 * Implementation of service functions to handle procfs generic chained 1492 * copyout buffers. 1493 */ 1494 typedef struct pr_iobuf_list { 1495 list_node_t piol_link; /* buffer linkage */ 1496 size_t piol_size; /* total size (header + data) */ 1497 size_t piol_usedsize; /* amount to copy out from this buf */ 1498 } piol_t; 1499 1500 #define MAPSIZE (64 * 1024) 1501 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1])) 1502 1503 void 1504 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n) 1505 { 1506 piol_t *iol; 1507 size_t initial_size = MIN(1, n) * itemsize; 1508 1509 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link)); 1510 1511 ASSERT(list_head(iolhead) == NULL); 1512 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1513 ASSERT(initial_size > 0); 1514 1515 /* 1516 * Someone creating chained copyout buffers may ask for less than 1517 * MAPSIZE if the amount of data to be buffered is known to be 1518 * smaller than that. 1519 * But in order to prevent involuntary self-denial of service, 1520 * the requested input size is clamped at MAPSIZE. 1521 */ 1522 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol)); 1523 iol = kmem_alloc(initial_size, KM_SLEEP); 1524 list_insert_head(iolhead, iol); 1525 iol->piol_usedsize = 0; 1526 iol->piol_size = initial_size; 1527 } 1528 1529 void * 1530 pr_iol_newbuf(list_t *iolhead, size_t itemsize) 1531 { 1532 piol_t *iol; 1533 char *new; 1534 1535 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1536 ASSERT(list_head(iolhead) != NULL); 1537 1538 iol = (piol_t *)list_tail(iolhead); 1539 1540 if (iol->piol_size < 1541 iol->piol_usedsize + sizeof (*iol) + itemsize) { 1542 /* 1543 * Out of space in the current buffer. Allocate more. 1544 */ 1545 piol_t *newiol; 1546 1547 newiol = kmem_alloc(MAPSIZE, KM_SLEEP); 1548 newiol->piol_size = MAPSIZE; 1549 newiol->piol_usedsize = 0; 1550 1551 list_insert_after(iolhead, iol, newiol); 1552 iol = list_next(iolhead, iol); 1553 ASSERT(iol == newiol); 1554 } 1555 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize; 1556 iol->piol_usedsize += itemsize; 1557 bzero(new, itemsize); 1558 return (new); 1559 } 1560 1561 int 1562 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin) 1563 { 1564 int error = errin; 1565 piol_t *iol; 1566 1567 while ((iol = list_head(iolhead)) != NULL) { 1568 list_remove(iolhead, iol); 1569 if (!error) { 1570 if (copyout(PIOL_DATABUF(iol), *tgt, 1571 iol->piol_usedsize)) 1572 error = EFAULT; 1573 *tgt += iol->piol_usedsize; 1574 } 1575 kmem_free(iol, iol->piol_size); 1576 } 1577 list_destroy(iolhead); 1578 1579 return (error); 1580 } 1581 1582 int 1583 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin) 1584 { 1585 offset_t off = uiop->uio_offset; 1586 char *base; 1587 size_t size; 1588 piol_t *iol; 1589 int error = errin; 1590 1591 while ((iol = list_head(iolhead)) != NULL) { 1592 list_remove(iolhead, iol); 1593 base = PIOL_DATABUF(iol); 1594 size = iol->piol_usedsize; 1595 if (off <= size && error == 0 && uiop->uio_resid > 0) 1596 error = uiomove(base + off, size - off, 1597 UIO_READ, uiop); 1598 off = MAX(0, off - (offset_t)size); 1599 kmem_free(iol, iol->piol_size); 1600 } 1601 list_destroy(iolhead); 1602 1603 return (error); 1604 } 1605 1606 /* 1607 * Return an array of structures with memory map information. 1608 * We allocate here; the caller must deallocate. 1609 */ 1610 int 1611 prgetmap(proc_t *p, int reserved, list_t *iolhead) 1612 { 1613 struct as *as = p->p_as; 1614 prmap_t *mp; 1615 struct seg *seg; 1616 struct seg *brkseg, *stkseg; 1617 struct vnode *vp; 1618 struct vattr vattr; 1619 uint_t prot; 1620 1621 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1622 1623 /* 1624 * Request an initial buffer size that doesn't waste memory 1625 * if the address space has only a small number of segments. 1626 */ 1627 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1628 1629 if ((seg = AS_SEGFIRST(as)) == NULL) 1630 return (0); 1631 1632 brkseg = break_seg(p); 1633 stkseg = as_segat(as, prgetstackbase(p)); 1634 1635 do { 1636 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1637 caddr_t saddr, naddr; 1638 void *tmp = NULL; 1639 1640 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1641 prot = pr_getprot(seg, reserved, &tmp, 1642 &saddr, &naddr, eaddr); 1643 if (saddr == naddr) 1644 continue; 1645 1646 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1647 1648 mp->pr_vaddr = (uintptr_t)saddr; 1649 mp->pr_size = naddr - saddr; 1650 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1651 mp->pr_mflags = 0; 1652 if (prot & PROT_READ) 1653 mp->pr_mflags |= MA_READ; 1654 if (prot & PROT_WRITE) 1655 mp->pr_mflags |= MA_WRITE; 1656 if (prot & PROT_EXEC) 1657 mp->pr_mflags |= MA_EXEC; 1658 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1659 mp->pr_mflags |= MA_SHARED; 1660 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1661 mp->pr_mflags |= MA_NORESERVE; 1662 if (seg->s_ops == &segspt_shmops || 1663 (seg->s_ops == &segvn_ops && 1664 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1665 mp->pr_mflags |= MA_ANON; 1666 if (seg == brkseg) 1667 mp->pr_mflags |= MA_BREAK; 1668 else if (seg == stkseg) { 1669 mp->pr_mflags |= MA_STACK; 1670 if (reserved) { 1671 size_t maxstack = 1672 ((size_t)p->p_stk_ctl + 1673 PAGEOFFSET) & PAGEMASK; 1674 mp->pr_vaddr = 1675 (uintptr_t)prgetstackbase(p) + 1676 p->p_stksize - maxstack; 1677 mp->pr_size = (uintptr_t)naddr - 1678 mp->pr_vaddr; 1679 } 1680 } 1681 if (seg->s_ops == &segspt_shmops) 1682 mp->pr_mflags |= MA_ISM | MA_SHM; 1683 mp->pr_pagesize = PAGESIZE; 1684 1685 /* 1686 * Manufacture a filename for the "object" directory. 1687 */ 1688 vattr.va_mask = AT_FSID|AT_NODEID; 1689 if (seg->s_ops == &segvn_ops && 1690 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1691 vp != NULL && vp->v_type == VREG && 1692 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 1693 if (vp == p->p_exec) 1694 (void) strcpy(mp->pr_mapname, "a.out"); 1695 else 1696 pr_object_name(mp->pr_mapname, 1697 vp, &vattr); 1698 } 1699 1700 /* 1701 * Get the SysV shared memory id, if any. 1702 */ 1703 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1704 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1705 SHMID_NONE) { 1706 if (mp->pr_shmid == SHMID_FREE) 1707 mp->pr_shmid = -1; 1708 1709 mp->pr_mflags |= MA_SHM; 1710 } else { 1711 mp->pr_shmid = -1; 1712 } 1713 } 1714 ASSERT(tmp == NULL); 1715 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1716 1717 return (0); 1718 } 1719 1720 #ifdef _SYSCALL32_IMPL 1721 int 1722 prgetmap32(proc_t *p, int reserved, list_t *iolhead) 1723 { 1724 struct as *as = p->p_as; 1725 prmap32_t *mp; 1726 struct seg *seg; 1727 struct seg *brkseg, *stkseg; 1728 struct vnode *vp; 1729 struct vattr vattr; 1730 uint_t prot; 1731 1732 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1733 1734 /* 1735 * Request an initial buffer size that doesn't waste memory 1736 * if the address space has only a small number of segments. 1737 */ 1738 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1739 1740 if ((seg = AS_SEGFIRST(as)) == NULL) 1741 return (0); 1742 1743 brkseg = break_seg(p); 1744 stkseg = as_segat(as, prgetstackbase(p)); 1745 1746 do { 1747 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1748 caddr_t saddr, naddr; 1749 void *tmp = NULL; 1750 1751 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1752 prot = pr_getprot(seg, reserved, &tmp, 1753 &saddr, &naddr, eaddr); 1754 if (saddr == naddr) 1755 continue; 1756 1757 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1758 1759 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 1760 mp->pr_size = (size32_t)(naddr - saddr); 1761 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1762 mp->pr_mflags = 0; 1763 if (prot & PROT_READ) 1764 mp->pr_mflags |= MA_READ; 1765 if (prot & PROT_WRITE) 1766 mp->pr_mflags |= MA_WRITE; 1767 if (prot & PROT_EXEC) 1768 mp->pr_mflags |= MA_EXEC; 1769 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1770 mp->pr_mflags |= MA_SHARED; 1771 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1772 mp->pr_mflags |= MA_NORESERVE; 1773 if (seg->s_ops == &segspt_shmops || 1774 (seg->s_ops == &segvn_ops && 1775 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1776 mp->pr_mflags |= MA_ANON; 1777 if (seg == brkseg) 1778 mp->pr_mflags |= MA_BREAK; 1779 else if (seg == stkseg) { 1780 mp->pr_mflags |= MA_STACK; 1781 if (reserved) { 1782 size_t maxstack = 1783 ((size_t)p->p_stk_ctl + 1784 PAGEOFFSET) & PAGEMASK; 1785 uintptr_t vaddr = 1786 (uintptr_t)prgetstackbase(p) + 1787 p->p_stksize - maxstack; 1788 mp->pr_vaddr = (caddr32_t)vaddr; 1789 mp->pr_size = (size32_t) 1790 ((uintptr_t)naddr - vaddr); 1791 } 1792 } 1793 if (seg->s_ops == &segspt_shmops) 1794 mp->pr_mflags |= MA_ISM | MA_SHM; 1795 mp->pr_pagesize = PAGESIZE; 1796 1797 /* 1798 * Manufacture a filename for the "object" directory. 1799 */ 1800 vattr.va_mask = AT_FSID|AT_NODEID; 1801 if (seg->s_ops == &segvn_ops && 1802 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1803 vp != NULL && vp->v_type == VREG && 1804 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 1805 if (vp == p->p_exec) 1806 (void) strcpy(mp->pr_mapname, "a.out"); 1807 else 1808 pr_object_name(mp->pr_mapname, 1809 vp, &vattr); 1810 } 1811 1812 /* 1813 * Get the SysV shared memory id, if any. 1814 */ 1815 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1816 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1817 SHMID_NONE) { 1818 if (mp->pr_shmid == SHMID_FREE) 1819 mp->pr_shmid = -1; 1820 1821 mp->pr_mflags |= MA_SHM; 1822 } else { 1823 mp->pr_shmid = -1; 1824 } 1825 } 1826 ASSERT(tmp == NULL); 1827 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1828 1829 return (0); 1830 } 1831 #endif /* _SYSCALL32_IMPL */ 1832 1833 /* 1834 * Return the size of the /proc page data file. 1835 */ 1836 size_t 1837 prpdsize(struct as *as) 1838 { 1839 struct seg *seg; 1840 size_t size; 1841 1842 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1843 1844 if ((seg = AS_SEGFIRST(as)) == NULL) 1845 return (0); 1846 1847 size = sizeof (prpageheader_t); 1848 do { 1849 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1850 caddr_t saddr, naddr; 1851 void *tmp = NULL; 1852 size_t npage; 1853 1854 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1855 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1856 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1857 size += sizeof (prasmap_t) + round8(npage); 1858 } 1859 ASSERT(tmp == NULL); 1860 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1861 1862 return (size); 1863 } 1864 1865 #ifdef _SYSCALL32_IMPL 1866 size_t 1867 prpdsize32(struct as *as) 1868 { 1869 struct seg *seg; 1870 size_t size; 1871 1872 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1873 1874 if ((seg = AS_SEGFIRST(as)) == NULL) 1875 return (0); 1876 1877 size = sizeof (prpageheader32_t); 1878 do { 1879 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1880 caddr_t saddr, naddr; 1881 void *tmp = NULL; 1882 size_t npage; 1883 1884 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1885 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1886 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1887 size += sizeof (prasmap32_t) + round8(npage); 1888 } 1889 ASSERT(tmp == NULL); 1890 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1891 1892 return (size); 1893 } 1894 #endif /* _SYSCALL32_IMPL */ 1895 1896 /* 1897 * Read page data information. 1898 */ 1899 int 1900 prpdread(proc_t *p, uint_t hatid, struct uio *uiop) 1901 { 1902 struct as *as = p->p_as; 1903 caddr_t buf; 1904 size_t size; 1905 prpageheader_t *php; 1906 prasmap_t *pmp; 1907 struct seg *seg; 1908 int error; 1909 1910 again: 1911 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1912 1913 if ((seg = AS_SEGFIRST(as)) == NULL) { 1914 AS_LOCK_EXIT(as, &as->a_lock); 1915 return (0); 1916 } 1917 size = prpdsize(as); 1918 if (uiop->uio_resid < size) { 1919 AS_LOCK_EXIT(as, &as->a_lock); 1920 return (E2BIG); 1921 } 1922 1923 buf = kmem_zalloc(size, KM_SLEEP); 1924 php = (prpageheader_t *)buf; 1925 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t)); 1926 1927 hrt2ts(gethrtime(), &php->pr_tstamp); 1928 php->pr_nmap = 0; 1929 php->pr_npage = 0; 1930 do { 1931 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1932 caddr_t saddr, naddr; 1933 void *tmp = NULL; 1934 1935 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1936 struct vnode *vp; 1937 struct vattr vattr; 1938 size_t len; 1939 size_t npage; 1940 uint_t prot; 1941 uintptr_t next; 1942 1943 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1944 if ((len = (size_t)(naddr - saddr)) == 0) 1945 continue; 1946 npage = len / PAGESIZE; 1947 next = (uintptr_t)(pmp + 1) + round8(npage); 1948 /* 1949 * It's possible that the address space can change 1950 * subtlely even though we're holding as->a_lock 1951 * due to the nondeterminism of page_exists() in 1952 * the presence of asychronously flushed pages or 1953 * mapped files whose sizes are changing. 1954 * page_exists() may be called indirectly from 1955 * pr_getprot() by a SEGOP_INCORE() routine. 1956 * If this happens we need to make sure we don't 1957 * overrun the buffer whose size we computed based 1958 * on the initial iteration through the segments. 1959 * Once we've detected an overflow, we need to clean 1960 * up the temporary memory allocated in pr_getprot() 1961 * and retry. If there's a pending signal, we return 1962 * EINTR so that this thread can be dislodged if 1963 * a latent bug causes us to spin indefinitely. 1964 */ 1965 if (next > (uintptr_t)buf + size) { 1966 pr_getprot_done(&tmp); 1967 AS_LOCK_EXIT(as, &as->a_lock); 1968 1969 kmem_free(buf, size); 1970 1971 if (ISSIG(curthread, JUSTLOOKING)) 1972 return (EINTR); 1973 1974 goto again; 1975 } 1976 1977 php->pr_nmap++; 1978 php->pr_npage += npage; 1979 pmp->pr_vaddr = (uintptr_t)saddr; 1980 pmp->pr_npage = npage; 1981 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1982 pmp->pr_mflags = 0; 1983 if (prot & PROT_READ) 1984 pmp->pr_mflags |= MA_READ; 1985 if (prot & PROT_WRITE) 1986 pmp->pr_mflags |= MA_WRITE; 1987 if (prot & PROT_EXEC) 1988 pmp->pr_mflags |= MA_EXEC; 1989 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1990 pmp->pr_mflags |= MA_SHARED; 1991 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1992 pmp->pr_mflags |= MA_NORESERVE; 1993 if (seg->s_ops == &segspt_shmops || 1994 (seg->s_ops == &segvn_ops && 1995 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1996 pmp->pr_mflags |= MA_ANON; 1997 if (seg->s_ops == &segspt_shmops) 1998 pmp->pr_mflags |= MA_ISM | MA_SHM; 1999 pmp->pr_pagesize = PAGESIZE; 2000 /* 2001 * Manufacture a filename for the "object" directory. 2002 */ 2003 vattr.va_mask = AT_FSID|AT_NODEID; 2004 if (seg->s_ops == &segvn_ops && 2005 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2006 vp != NULL && vp->v_type == VREG && 2007 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2008 if (vp == p->p_exec) 2009 (void) strcpy(pmp->pr_mapname, "a.out"); 2010 else 2011 pr_object_name(pmp->pr_mapname, 2012 vp, &vattr); 2013 } 2014 2015 /* 2016 * Get the SysV shared memory id, if any. 2017 */ 2018 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2019 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2020 SHMID_NONE) { 2021 if (pmp->pr_shmid == SHMID_FREE) 2022 pmp->pr_shmid = -1; 2023 2024 pmp->pr_mflags |= MA_SHM; 2025 } else { 2026 pmp->pr_shmid = -1; 2027 } 2028 2029 hat_getstat(as, saddr, len, hatid, 2030 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2031 pmp = (prasmap_t *)next; 2032 } 2033 ASSERT(tmp == NULL); 2034 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2035 2036 AS_LOCK_EXIT(as, &as->a_lock); 2037 2038 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2039 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2040 kmem_free(buf, size); 2041 2042 return (error); 2043 } 2044 2045 #ifdef _SYSCALL32_IMPL 2046 int 2047 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop) 2048 { 2049 struct as *as = p->p_as; 2050 caddr_t buf; 2051 size_t size; 2052 prpageheader32_t *php; 2053 prasmap32_t *pmp; 2054 struct seg *seg; 2055 int error; 2056 2057 again: 2058 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2059 2060 if ((seg = AS_SEGFIRST(as)) == NULL) { 2061 AS_LOCK_EXIT(as, &as->a_lock); 2062 return (0); 2063 } 2064 size = prpdsize32(as); 2065 if (uiop->uio_resid < size) { 2066 AS_LOCK_EXIT(as, &as->a_lock); 2067 return (E2BIG); 2068 } 2069 2070 buf = kmem_zalloc(size, KM_SLEEP); 2071 php = (prpageheader32_t *)buf; 2072 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t)); 2073 2074 hrt2ts32(gethrtime(), &php->pr_tstamp); 2075 php->pr_nmap = 0; 2076 php->pr_npage = 0; 2077 do { 2078 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2079 caddr_t saddr, naddr; 2080 void *tmp = NULL; 2081 2082 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2083 struct vnode *vp; 2084 struct vattr vattr; 2085 size_t len; 2086 size_t npage; 2087 uint_t prot; 2088 uintptr_t next; 2089 2090 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2091 if ((len = (size_t)(naddr - saddr)) == 0) 2092 continue; 2093 npage = len / PAGESIZE; 2094 next = (uintptr_t)(pmp + 1) + round8(npage); 2095 /* 2096 * It's possible that the address space can change 2097 * subtlely even though we're holding as->a_lock 2098 * due to the nondeterminism of page_exists() in 2099 * the presence of asychronously flushed pages or 2100 * mapped files whose sizes are changing. 2101 * page_exists() may be called indirectly from 2102 * pr_getprot() by a SEGOP_INCORE() routine. 2103 * If this happens we need to make sure we don't 2104 * overrun the buffer whose size we computed based 2105 * on the initial iteration through the segments. 2106 * Once we've detected an overflow, we need to clean 2107 * up the temporary memory allocated in pr_getprot() 2108 * and retry. If there's a pending signal, we return 2109 * EINTR so that this thread can be dislodged if 2110 * a latent bug causes us to spin indefinitely. 2111 */ 2112 if (next > (uintptr_t)buf + size) { 2113 pr_getprot_done(&tmp); 2114 AS_LOCK_EXIT(as, &as->a_lock); 2115 2116 kmem_free(buf, size); 2117 2118 if (ISSIG(curthread, JUSTLOOKING)) 2119 return (EINTR); 2120 2121 goto again; 2122 } 2123 2124 php->pr_nmap++; 2125 php->pr_npage += npage; 2126 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 2127 pmp->pr_npage = (size32_t)npage; 2128 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2129 pmp->pr_mflags = 0; 2130 if (prot & PROT_READ) 2131 pmp->pr_mflags |= MA_READ; 2132 if (prot & PROT_WRITE) 2133 pmp->pr_mflags |= MA_WRITE; 2134 if (prot & PROT_EXEC) 2135 pmp->pr_mflags |= MA_EXEC; 2136 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2137 pmp->pr_mflags |= MA_SHARED; 2138 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2139 pmp->pr_mflags |= MA_NORESERVE; 2140 if (seg->s_ops == &segspt_shmops || 2141 (seg->s_ops == &segvn_ops && 2142 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2143 pmp->pr_mflags |= MA_ANON; 2144 if (seg->s_ops == &segspt_shmops) 2145 pmp->pr_mflags |= MA_ISM | MA_SHM; 2146 pmp->pr_pagesize = PAGESIZE; 2147 /* 2148 * Manufacture a filename for the "object" directory. 2149 */ 2150 vattr.va_mask = AT_FSID|AT_NODEID; 2151 if (seg->s_ops == &segvn_ops && 2152 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2153 vp != NULL && vp->v_type == VREG && 2154 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2155 if (vp == p->p_exec) 2156 (void) strcpy(pmp->pr_mapname, "a.out"); 2157 else 2158 pr_object_name(pmp->pr_mapname, 2159 vp, &vattr); 2160 } 2161 2162 /* 2163 * Get the SysV shared memory id, if any. 2164 */ 2165 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2166 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2167 SHMID_NONE) { 2168 if (pmp->pr_shmid == SHMID_FREE) 2169 pmp->pr_shmid = -1; 2170 2171 pmp->pr_mflags |= MA_SHM; 2172 } else { 2173 pmp->pr_shmid = -1; 2174 } 2175 2176 hat_getstat(as, saddr, len, hatid, 2177 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2178 pmp = (prasmap32_t *)next; 2179 } 2180 ASSERT(tmp == NULL); 2181 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2182 2183 AS_LOCK_EXIT(as, &as->a_lock); 2184 2185 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2186 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2187 kmem_free(buf, size); 2188 2189 return (error); 2190 } 2191 #endif /* _SYSCALL32_IMPL */ 2192 2193 ushort_t 2194 prgetpctcpu(uint64_t pct) 2195 { 2196 /* 2197 * The value returned will be relevant in the zone of the examiner, 2198 * which may not be the same as the zone which performed the procfs 2199 * mount. 2200 */ 2201 int nonline = zone_ncpus_online_get(curproc->p_zone); 2202 2203 /* 2204 * Prorate over online cpus so we don't exceed 100% 2205 */ 2206 if (nonline > 1) 2207 pct /= nonline; 2208 pct >>= 16; /* convert to 16-bit scaled integer */ 2209 if (pct > 0x8000) /* might happen, due to rounding */ 2210 pct = 0x8000; 2211 return ((ushort_t)pct); 2212 } 2213 2214 /* 2215 * Return information used by ps(1). 2216 */ 2217 void 2218 prgetpsinfo(proc_t *p, psinfo_t *psp) 2219 { 2220 kthread_t *t; 2221 struct cred *cred; 2222 hrtime_t hrutime, hrstime; 2223 2224 ASSERT(MUTEX_HELD(&p->p_lock)); 2225 2226 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2227 bzero(psp, sizeof (*psp)); 2228 else { 2229 thread_unlock(t); 2230 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2231 } 2232 2233 /* 2234 * only export SSYS and SMSACCT; everything else is off-limits to 2235 * userland apps. 2236 */ 2237 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2238 psp->pr_nlwp = p->p_lwpcnt; 2239 psp->pr_nzomb = p->p_zombcnt; 2240 mutex_enter(&p->p_crlock); 2241 cred = p->p_cred; 2242 psp->pr_uid = crgetruid(cred); 2243 psp->pr_euid = crgetuid(cred); 2244 psp->pr_gid = crgetrgid(cred); 2245 psp->pr_egid = crgetgid(cred); 2246 mutex_exit(&p->p_crlock); 2247 psp->pr_pid = p->p_pid; 2248 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2249 (p->p_flag & SZONETOP)) { 2250 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2251 /* 2252 * Inside local zones, fake zsched's pid as parent pids for 2253 * processes which reference processes outside of the zone. 2254 */ 2255 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2256 } else { 2257 psp->pr_ppid = p->p_ppid; 2258 } 2259 psp->pr_pgid = p->p_pgrp; 2260 psp->pr_sid = p->p_sessp->s_sid; 2261 psp->pr_taskid = p->p_task->tk_tkid; 2262 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2263 psp->pr_poolid = p->p_pool->pool_id; 2264 psp->pr_zoneid = p->p_zone->zone_id; 2265 if ((psp->pr_contract = PRCTID(p)) == 0) 2266 psp->pr_contract = -1; 2267 psp->pr_addr = (uintptr_t)prgetpsaddr(p); 2268 switch (p->p_model) { 2269 case DATAMODEL_ILP32: 2270 psp->pr_dmodel = PR_MODEL_ILP32; 2271 break; 2272 case DATAMODEL_LP64: 2273 psp->pr_dmodel = PR_MODEL_LP64; 2274 break; 2275 } 2276 hrutime = mstate_aggr_state(p, LMS_USER); 2277 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2278 hrt2ts((hrutime + hrstime), &psp->pr_time); 2279 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2280 2281 if (t == NULL) { 2282 int wcode = p->p_wcode; /* must be atomic read */ 2283 2284 if (wcode) 2285 psp->pr_wstat = wstat(wcode, p->p_wdata); 2286 psp->pr_ttydev = PRNODEV; 2287 psp->pr_lwp.pr_state = SZOMB; 2288 psp->pr_lwp.pr_sname = 'Z'; 2289 psp->pr_lwp.pr_bindpro = PBIND_NONE; 2290 psp->pr_lwp.pr_bindpset = PS_NONE; 2291 } else { 2292 user_t *up = PTOU(p); 2293 struct as *as; 2294 dev_t d; 2295 extern dev_t rwsconsdev, rconsdev, uconsdev; 2296 2297 d = cttydev(p); 2298 /* 2299 * If the controlling terminal is the real 2300 * or workstation console device, map to what the 2301 * user thinks is the console device. Handle case when 2302 * rwsconsdev or rconsdev is set to NODEV for Starfire. 2303 */ 2304 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 2305 d = uconsdev; 2306 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d; 2307 psp->pr_start = up->u_start; 2308 bcopy(up->u_comm, psp->pr_fname, 2309 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2310 bcopy(up->u_psargs, psp->pr_psargs, 2311 MIN(PRARGSZ-1, PSARGSZ)); 2312 psp->pr_argc = up->u_argc; 2313 psp->pr_argv = up->u_argv; 2314 psp->pr_envp = up->u_envp; 2315 2316 /* get the chosen lwp's lwpsinfo */ 2317 prgetlwpsinfo(t, &psp->pr_lwp); 2318 2319 /* compute %cpu for the process */ 2320 if (p->p_lwpcnt == 1) 2321 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2322 else { 2323 uint64_t pct = 0; 2324 hrtime_t cur_time = gethrtime_unscaled(); 2325 2326 t = p->p_tlist; 2327 do { 2328 pct += cpu_update_pct(t, cur_time); 2329 } while ((t = t->t_forw) != p->p_tlist); 2330 2331 psp->pr_pctcpu = prgetpctcpu(pct); 2332 } 2333 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2334 psp->pr_size = 0; 2335 psp->pr_rssize = 0; 2336 } else { 2337 mutex_exit(&p->p_lock); 2338 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2339 psp->pr_size = btopr(as->a_resvsize) * 2340 (PAGESIZE / 1024); 2341 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024); 2342 psp->pr_pctmem = rm_pctmemory(as); 2343 AS_LOCK_EXIT(as, &as->a_lock); 2344 mutex_enter(&p->p_lock); 2345 } 2346 } 2347 } 2348 2349 #ifdef _SYSCALL32_IMPL 2350 void 2351 prgetpsinfo32(proc_t *p, psinfo32_t *psp) 2352 { 2353 kthread_t *t; 2354 struct cred *cred; 2355 hrtime_t hrutime, hrstime; 2356 2357 ASSERT(MUTEX_HELD(&p->p_lock)); 2358 2359 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2360 bzero(psp, sizeof (*psp)); 2361 else { 2362 thread_unlock(t); 2363 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2364 } 2365 2366 /* 2367 * only export SSYS and SMSACCT; everything else is off-limits to 2368 * userland apps. 2369 */ 2370 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2371 psp->pr_nlwp = p->p_lwpcnt; 2372 psp->pr_nzomb = p->p_zombcnt; 2373 mutex_enter(&p->p_crlock); 2374 cred = p->p_cred; 2375 psp->pr_uid = crgetruid(cred); 2376 psp->pr_euid = crgetuid(cred); 2377 psp->pr_gid = crgetrgid(cred); 2378 psp->pr_egid = crgetgid(cred); 2379 mutex_exit(&p->p_crlock); 2380 psp->pr_pid = p->p_pid; 2381 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2382 (p->p_flag & SZONETOP)) { 2383 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2384 /* 2385 * Inside local zones, fake zsched's pid as parent pids for 2386 * processes which reference processes outside of the zone. 2387 */ 2388 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2389 } else { 2390 psp->pr_ppid = p->p_ppid; 2391 } 2392 psp->pr_pgid = p->p_pgrp; 2393 psp->pr_sid = p->p_sessp->s_sid; 2394 psp->pr_taskid = p->p_task->tk_tkid; 2395 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2396 psp->pr_poolid = p->p_pool->pool_id; 2397 psp->pr_zoneid = p->p_zone->zone_id; 2398 if ((psp->pr_contract = PRCTID(p)) == 0) 2399 psp->pr_contract = -1; 2400 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2401 switch (p->p_model) { 2402 case DATAMODEL_ILP32: 2403 psp->pr_dmodel = PR_MODEL_ILP32; 2404 break; 2405 case DATAMODEL_LP64: 2406 psp->pr_dmodel = PR_MODEL_LP64; 2407 break; 2408 } 2409 hrutime = mstate_aggr_state(p, LMS_USER); 2410 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2411 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2412 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2413 2414 if (t == NULL) { 2415 extern int wstat(int, int); /* needs a header file */ 2416 int wcode = p->p_wcode; /* must be atomic read */ 2417 2418 if (wcode) 2419 psp->pr_wstat = wstat(wcode, p->p_wdata); 2420 psp->pr_ttydev = PRNODEV32; 2421 psp->pr_lwp.pr_state = SZOMB; 2422 psp->pr_lwp.pr_sname = 'Z'; 2423 } else { 2424 user_t *up = PTOU(p); 2425 struct as *as; 2426 dev_t d; 2427 extern dev_t rwsconsdev, rconsdev, uconsdev; 2428 2429 d = cttydev(p); 2430 /* 2431 * If the controlling terminal is the real 2432 * or workstation console device, map to what the 2433 * user thinks is the console device. Handle case when 2434 * rwsconsdev or rconsdev is set to NODEV for Starfire. 2435 */ 2436 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 2437 d = uconsdev; 2438 (void) cmpldev(&psp->pr_ttydev, d); 2439 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start); 2440 bcopy(up->u_comm, psp->pr_fname, 2441 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2442 bcopy(up->u_psargs, psp->pr_psargs, 2443 MIN(PRARGSZ-1, PSARGSZ)); 2444 psp->pr_argc = up->u_argc; 2445 psp->pr_argv = (caddr32_t)up->u_argv; 2446 psp->pr_envp = (caddr32_t)up->u_envp; 2447 2448 /* get the chosen lwp's lwpsinfo */ 2449 prgetlwpsinfo32(t, &psp->pr_lwp); 2450 2451 /* compute %cpu for the process */ 2452 if (p->p_lwpcnt == 1) 2453 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2454 else { 2455 uint64_t pct = 0; 2456 hrtime_t cur_time; 2457 2458 t = p->p_tlist; 2459 cur_time = gethrtime_unscaled(); 2460 do { 2461 pct += cpu_update_pct(t, cur_time); 2462 } while ((t = t->t_forw) != p->p_tlist); 2463 2464 psp->pr_pctcpu = prgetpctcpu(pct); 2465 } 2466 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2467 psp->pr_size = 0; 2468 psp->pr_rssize = 0; 2469 } else { 2470 mutex_exit(&p->p_lock); 2471 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2472 psp->pr_size = (size32_t) 2473 (btopr(as->a_resvsize) * (PAGESIZE / 1024)); 2474 psp->pr_rssize = (size32_t) 2475 (rm_asrss(as) * (PAGESIZE / 1024)); 2476 psp->pr_pctmem = rm_pctmemory(as); 2477 AS_LOCK_EXIT(as, &as->a_lock); 2478 mutex_enter(&p->p_lock); 2479 } 2480 } 2481 2482 /* 2483 * If we are looking at an LP64 process, zero out 2484 * the fields that cannot be represented in ILP32. 2485 */ 2486 if (p->p_model != DATAMODEL_ILP32) { 2487 psp->pr_size = 0; 2488 psp->pr_rssize = 0; 2489 psp->pr_argv = 0; 2490 psp->pr_envp = 0; 2491 } 2492 } 2493 #endif /* _SYSCALL32_IMPL */ 2494 2495 void 2496 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp) 2497 { 2498 klwp_t *lwp = ttolwp(t); 2499 sobj_ops_t *sobj; 2500 char c, state; 2501 uint64_t pct; 2502 int retval, niceval; 2503 hrtime_t hrutime, hrstime; 2504 2505 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 2506 2507 bzero(psp, sizeof (*psp)); 2508 2509 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2510 psp->pr_lwpid = t->t_tid; 2511 psp->pr_addr = (uintptr_t)t; 2512 psp->pr_wchan = (uintptr_t)t->t_wchan; 2513 2514 /* map the thread state enum into a process state enum */ 2515 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2516 switch (state) { 2517 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2518 case TS_RUN: state = SRUN; c = 'R'; break; 2519 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2520 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2521 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2522 case TS_WAIT: state = SWAIT; c = 'W'; break; 2523 default: state = 0; c = '?'; break; 2524 } 2525 psp->pr_state = state; 2526 psp->pr_sname = c; 2527 if ((sobj = t->t_sobj_ops) != NULL) 2528 psp->pr_stype = SOBJ_TYPE(sobj); 2529 retval = CL_DONICE(t, NULL, 0, &niceval); 2530 if (retval == 0) { 2531 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2532 psp->pr_nice = niceval + NZERO; 2533 } 2534 psp->pr_syscall = t->t_sysnum; 2535 psp->pr_pri = t->t_pri; 2536 psp->pr_start.tv_sec = t->t_start; 2537 psp->pr_start.tv_nsec = 0L; 2538 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2539 scalehrtime(&hrutime); 2540 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2541 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2542 scalehrtime(&hrstime); 2543 hrt2ts(hrutime + hrstime, &psp->pr_time); 2544 /* compute %cpu for the lwp */ 2545 pct = cpu_update_pct(t, gethrtime_unscaled()); 2546 psp->pr_pctcpu = prgetpctcpu(pct); 2547 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2548 if (psp->pr_cpu > 99) 2549 psp->pr_cpu = 99; 2550 2551 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2552 sizeof (psp->pr_clname) - 1); 2553 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2554 psp->pr_onpro = t->t_cpu->cpu_id; 2555 psp->pr_bindpro = t->t_bind_cpu; 2556 psp->pr_bindpset = t->t_bind_pset; 2557 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 2558 } 2559 2560 #ifdef _SYSCALL32_IMPL 2561 void 2562 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp) 2563 { 2564 proc_t *p = ttoproc(t); 2565 klwp_t *lwp = ttolwp(t); 2566 sobj_ops_t *sobj; 2567 char c, state; 2568 uint64_t pct; 2569 int retval, niceval; 2570 hrtime_t hrutime, hrstime; 2571 2572 ASSERT(MUTEX_HELD(&p->p_lock)); 2573 2574 bzero(psp, sizeof (*psp)); 2575 2576 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2577 psp->pr_lwpid = t->t_tid; 2578 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2579 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */ 2580 2581 /* map the thread state enum into a process state enum */ 2582 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2583 switch (state) { 2584 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2585 case TS_RUN: state = SRUN; c = 'R'; break; 2586 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2587 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2588 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2589 case TS_WAIT: state = SWAIT; c = 'W'; break; 2590 default: state = 0; c = '?'; break; 2591 } 2592 psp->pr_state = state; 2593 psp->pr_sname = c; 2594 if ((sobj = t->t_sobj_ops) != NULL) 2595 psp->pr_stype = SOBJ_TYPE(sobj); 2596 retval = CL_DONICE(t, NULL, 0, &niceval); 2597 if (retval == 0) { 2598 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2599 psp->pr_nice = niceval + NZERO; 2600 } else { 2601 psp->pr_oldpri = 0; 2602 psp->pr_nice = 0; 2603 } 2604 psp->pr_syscall = t->t_sysnum; 2605 psp->pr_pri = t->t_pri; 2606 psp->pr_start.tv_sec = (time32_t)t->t_start; 2607 psp->pr_start.tv_nsec = 0L; 2608 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2609 scalehrtime(&hrutime); 2610 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2611 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2612 scalehrtime(&hrstime); 2613 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2614 /* compute %cpu for the lwp */ 2615 pct = cpu_update_pct(t, gethrtime_unscaled()); 2616 psp->pr_pctcpu = prgetpctcpu(pct); 2617 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2618 if (psp->pr_cpu > 99) 2619 psp->pr_cpu = 99; 2620 2621 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2622 sizeof (psp->pr_clname) - 1); 2623 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2624 psp->pr_onpro = t->t_cpu->cpu_id; 2625 psp->pr_bindpro = t->t_bind_cpu; 2626 psp->pr_bindpset = t->t_bind_pset; 2627 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 2628 } 2629 #endif /* _SYSCALL32_IMPL */ 2630 2631 /* 2632 * This used to get called when microstate accounting was disabled but 2633 * microstate information was requested. Since Microstate accounting is on 2634 * regardless of the proc flags, this simply makes it appear to procfs that 2635 * microstate accounting is on. This is relatively meaningless since you 2636 * can't turn it off, but this is here for the sake of appearances. 2637 */ 2638 2639 /*ARGSUSED*/ 2640 void 2641 estimate_msacct(kthread_t *t, hrtime_t curtime) 2642 { 2643 proc_t *p; 2644 2645 if (t == NULL) 2646 return; 2647 2648 p = ttoproc(t); 2649 ASSERT(MUTEX_HELD(&p->p_lock)); 2650 2651 /* 2652 * A system process (p0) could be referenced if the thread is 2653 * in the process of exiting. Don't turn on microstate accounting 2654 * in that case. 2655 */ 2656 if (p->p_flag & SSYS) 2657 return; 2658 2659 /* 2660 * Loop through all the LWPs (kernel threads) in the process. 2661 */ 2662 t = p->p_tlist; 2663 do { 2664 t->t_proc_flag |= TP_MSACCT; 2665 } while ((t = t->t_forw) != p->p_tlist); 2666 2667 p->p_flag |= SMSACCT; /* set process-wide MSACCT */ 2668 } 2669 2670 /* 2671 * It's not really possible to disable microstate accounting anymore. 2672 * However, this routine simply turns off the ms accounting flags in a process 2673 * This way procfs can still pretend to turn microstate accounting on and 2674 * off for a process, but it actually doesn't do anything. This is 2675 * a neutered form of preemptive idiot-proofing. 2676 */ 2677 void 2678 disable_msacct(proc_t *p) 2679 { 2680 kthread_t *t; 2681 2682 ASSERT(MUTEX_HELD(&p->p_lock)); 2683 2684 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */ 2685 /* 2686 * Loop through all the LWPs (kernel threads) in the process. 2687 */ 2688 if ((t = p->p_tlist) != NULL) { 2689 do { 2690 /* clear per-thread flag */ 2691 t->t_proc_flag &= ~TP_MSACCT; 2692 } while ((t = t->t_forw) != p->p_tlist); 2693 } 2694 } 2695 2696 /* 2697 * Return resource usage information. 2698 */ 2699 void 2700 prgetusage(kthread_t *t, prhusage_t *pup) 2701 { 2702 klwp_t *lwp = ttolwp(t); 2703 hrtime_t *mstimep; 2704 struct mstate *ms = &lwp->lwp_mstate; 2705 int state; 2706 int i; 2707 hrtime_t curtime; 2708 hrtime_t waitrq; 2709 hrtime_t tmp1; 2710 2711 curtime = gethrtime_unscaled(); 2712 2713 pup->pr_lwpid = t->t_tid; 2714 pup->pr_count = 1; 2715 pup->pr_create = ms->ms_start; 2716 pup->pr_term = ms->ms_term; 2717 scalehrtime(&pup->pr_create); 2718 scalehrtime(&pup->pr_term); 2719 if (ms->ms_term == 0) { 2720 pup->pr_rtime = curtime - ms->ms_start; 2721 scalehrtime(&pup->pr_rtime); 2722 } else { 2723 pup->pr_rtime = ms->ms_term - ms->ms_start; 2724 scalehrtime(&pup->pr_rtime); 2725 } 2726 2727 2728 pup->pr_utime = ms->ms_acct[LMS_USER]; 2729 pup->pr_stime = ms->ms_acct[LMS_SYSTEM]; 2730 pup->pr_ttime = ms->ms_acct[LMS_TRAP]; 2731 pup->pr_tftime = ms->ms_acct[LMS_TFAULT]; 2732 pup->pr_dftime = ms->ms_acct[LMS_DFAULT]; 2733 pup->pr_kftime = ms->ms_acct[LMS_KFAULT]; 2734 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2735 pup->pr_slptime = ms->ms_acct[LMS_SLEEP]; 2736 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2737 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2738 2739 prscaleusage(pup); 2740 2741 /* 2742 * Adjust for time waiting in the dispatcher queue. 2743 */ 2744 waitrq = t->t_waitrq; /* hopefully atomic */ 2745 if (waitrq != 0) { 2746 if (waitrq > curtime) { 2747 curtime = gethrtime_unscaled(); 2748 } 2749 tmp1 = curtime - waitrq; 2750 scalehrtime(&tmp1); 2751 pup->pr_wtime += tmp1; 2752 curtime = waitrq; 2753 } 2754 2755 /* 2756 * Adjust for time spent in current microstate. 2757 */ 2758 if (ms->ms_state_start > curtime) { 2759 curtime = gethrtime_unscaled(); 2760 } 2761 2762 i = 0; 2763 do { 2764 switch (state = t->t_mstate) { 2765 case LMS_SLEEP: 2766 /* 2767 * Update the timer for the current sleep state. 2768 */ 2769 switch (state = ms->ms_prev) { 2770 case LMS_TFAULT: 2771 case LMS_DFAULT: 2772 case LMS_KFAULT: 2773 case LMS_USER_LOCK: 2774 break; 2775 default: 2776 state = LMS_SLEEP; 2777 break; 2778 } 2779 break; 2780 case LMS_TFAULT: 2781 case LMS_DFAULT: 2782 case LMS_KFAULT: 2783 case LMS_USER_LOCK: 2784 state = LMS_SYSTEM; 2785 break; 2786 } 2787 switch (state) { 2788 case LMS_USER: mstimep = &pup->pr_utime; break; 2789 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2790 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2791 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2792 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2793 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2794 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2795 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2796 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2797 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2798 default: panic("prgetusage: unknown microstate"); 2799 } 2800 tmp1 = curtime - ms->ms_state_start; 2801 if (tmp1 < 0) { 2802 curtime = gethrtime_unscaled(); 2803 i++; 2804 continue; 2805 } 2806 scalehrtime(&tmp1); 2807 } while (tmp1 < 0 && i < MAX_ITERS_SPIN); 2808 2809 *mstimep += tmp1; 2810 2811 /* update pup timestamp */ 2812 pup->pr_tstamp = curtime; 2813 scalehrtime(&pup->pr_tstamp); 2814 2815 /* 2816 * Resource usage counters. 2817 */ 2818 pup->pr_minf = lwp->lwp_ru.minflt; 2819 pup->pr_majf = lwp->lwp_ru.majflt; 2820 pup->pr_nswap = lwp->lwp_ru.nswap; 2821 pup->pr_inblk = lwp->lwp_ru.inblock; 2822 pup->pr_oublk = lwp->lwp_ru.oublock; 2823 pup->pr_msnd = lwp->lwp_ru.msgsnd; 2824 pup->pr_mrcv = lwp->lwp_ru.msgrcv; 2825 pup->pr_sigs = lwp->lwp_ru.nsignals; 2826 pup->pr_vctx = lwp->lwp_ru.nvcsw; 2827 pup->pr_ictx = lwp->lwp_ru.nivcsw; 2828 pup->pr_sysc = lwp->lwp_ru.sysc; 2829 pup->pr_ioch = lwp->lwp_ru.ioch; 2830 } 2831 2832 /* 2833 * Convert ms_acct stats from unscaled high-res time to nanoseconds 2834 */ 2835 void 2836 prscaleusage(prhusage_t *usg) 2837 { 2838 scalehrtime(&usg->pr_utime); 2839 scalehrtime(&usg->pr_stime); 2840 scalehrtime(&usg->pr_ttime); 2841 scalehrtime(&usg->pr_tftime); 2842 scalehrtime(&usg->pr_dftime); 2843 scalehrtime(&usg->pr_kftime); 2844 scalehrtime(&usg->pr_ltime); 2845 scalehrtime(&usg->pr_slptime); 2846 scalehrtime(&usg->pr_wtime); 2847 scalehrtime(&usg->pr_stoptime); 2848 } 2849 2850 2851 /* 2852 * Sum resource usage information. 2853 */ 2854 void 2855 praddusage(kthread_t *t, prhusage_t *pup) 2856 { 2857 klwp_t *lwp = ttolwp(t); 2858 hrtime_t *mstimep; 2859 struct mstate *ms = &lwp->lwp_mstate; 2860 int state; 2861 int i; 2862 hrtime_t curtime; 2863 hrtime_t waitrq; 2864 hrtime_t tmp; 2865 prhusage_t conv; 2866 2867 curtime = gethrtime_unscaled(); 2868 2869 if (ms->ms_term == 0) { 2870 tmp = curtime - ms->ms_start; 2871 scalehrtime(&tmp); 2872 pup->pr_rtime += tmp; 2873 } else { 2874 tmp = ms->ms_term - ms->ms_start; 2875 scalehrtime(&tmp); 2876 pup->pr_rtime += tmp; 2877 } 2878 2879 conv.pr_utime = ms->ms_acct[LMS_USER]; 2880 conv.pr_stime = ms->ms_acct[LMS_SYSTEM]; 2881 conv.pr_ttime = ms->ms_acct[LMS_TRAP]; 2882 conv.pr_tftime = ms->ms_acct[LMS_TFAULT]; 2883 conv.pr_dftime = ms->ms_acct[LMS_DFAULT]; 2884 conv.pr_kftime = ms->ms_acct[LMS_KFAULT]; 2885 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2886 conv.pr_slptime = ms->ms_acct[LMS_SLEEP]; 2887 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2888 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2889 2890 prscaleusage(&conv); 2891 2892 pup->pr_utime += conv.pr_utime; 2893 pup->pr_stime += conv.pr_stime; 2894 pup->pr_ttime += conv.pr_ttime; 2895 pup->pr_tftime += conv.pr_tftime; 2896 pup->pr_dftime += conv.pr_dftime; 2897 pup->pr_kftime += conv.pr_kftime; 2898 pup->pr_ltime += conv.pr_ltime; 2899 pup->pr_slptime += conv.pr_slptime; 2900 pup->pr_wtime += conv.pr_wtime; 2901 pup->pr_stoptime += conv.pr_stoptime; 2902 2903 /* 2904 * Adjust for time waiting in the dispatcher queue. 2905 */ 2906 waitrq = t->t_waitrq; /* hopefully atomic */ 2907 if (waitrq != 0) { 2908 if (waitrq > curtime) { 2909 curtime = gethrtime_unscaled(); 2910 } 2911 tmp = curtime - waitrq; 2912 scalehrtime(&tmp); 2913 pup->pr_wtime += tmp; 2914 curtime = waitrq; 2915 } 2916 2917 /* 2918 * Adjust for time spent in current microstate. 2919 */ 2920 if (ms->ms_state_start > curtime) { 2921 curtime = gethrtime_unscaled(); 2922 } 2923 2924 i = 0; 2925 do { 2926 switch (state = t->t_mstate) { 2927 case LMS_SLEEP: 2928 /* 2929 * Update the timer for the current sleep state. 2930 */ 2931 switch (state = ms->ms_prev) { 2932 case LMS_TFAULT: 2933 case LMS_DFAULT: 2934 case LMS_KFAULT: 2935 case LMS_USER_LOCK: 2936 break; 2937 default: 2938 state = LMS_SLEEP; 2939 break; 2940 } 2941 break; 2942 case LMS_TFAULT: 2943 case LMS_DFAULT: 2944 case LMS_KFAULT: 2945 case LMS_USER_LOCK: 2946 state = LMS_SYSTEM; 2947 break; 2948 } 2949 switch (state) { 2950 case LMS_USER: mstimep = &pup->pr_utime; break; 2951 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2952 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2953 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2954 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2955 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2956 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2957 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2958 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2959 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2960 default: panic("praddusage: unknown microstate"); 2961 } 2962 tmp = curtime - ms->ms_state_start; 2963 if (tmp < 0) { 2964 curtime = gethrtime_unscaled(); 2965 i++; 2966 continue; 2967 } 2968 scalehrtime(&tmp); 2969 } while (tmp < 0 && i < MAX_ITERS_SPIN); 2970 2971 *mstimep += tmp; 2972 2973 /* update pup timestamp */ 2974 pup->pr_tstamp = curtime; 2975 scalehrtime(&pup->pr_tstamp); 2976 2977 /* 2978 * Resource usage counters. 2979 */ 2980 pup->pr_minf += lwp->lwp_ru.minflt; 2981 pup->pr_majf += lwp->lwp_ru.majflt; 2982 pup->pr_nswap += lwp->lwp_ru.nswap; 2983 pup->pr_inblk += lwp->lwp_ru.inblock; 2984 pup->pr_oublk += lwp->lwp_ru.oublock; 2985 pup->pr_msnd += lwp->lwp_ru.msgsnd; 2986 pup->pr_mrcv += lwp->lwp_ru.msgrcv; 2987 pup->pr_sigs += lwp->lwp_ru.nsignals; 2988 pup->pr_vctx += lwp->lwp_ru.nvcsw; 2989 pup->pr_ictx += lwp->lwp_ru.nivcsw; 2990 pup->pr_sysc += lwp->lwp_ru.sysc; 2991 pup->pr_ioch += lwp->lwp_ru.ioch; 2992 } 2993 2994 /* 2995 * Convert a prhusage_t to a prusage_t. 2996 * This means convert each hrtime_t to a timestruc_t 2997 * and copy the count fields uint64_t => ulong_t. 2998 */ 2999 void 3000 prcvtusage(prhusage_t *pup, prusage_t *upup) 3001 { 3002 uint64_t *ullp; 3003 ulong_t *ulp; 3004 int i; 3005 3006 upup->pr_lwpid = pup->pr_lwpid; 3007 upup->pr_count = pup->pr_count; 3008 3009 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp); 3010 hrt2ts(pup->pr_create, &upup->pr_create); 3011 hrt2ts(pup->pr_term, &upup->pr_term); 3012 hrt2ts(pup->pr_rtime, &upup->pr_rtime); 3013 hrt2ts(pup->pr_utime, &upup->pr_utime); 3014 hrt2ts(pup->pr_stime, &upup->pr_stime); 3015 hrt2ts(pup->pr_ttime, &upup->pr_ttime); 3016 hrt2ts(pup->pr_tftime, &upup->pr_tftime); 3017 hrt2ts(pup->pr_dftime, &upup->pr_dftime); 3018 hrt2ts(pup->pr_kftime, &upup->pr_kftime); 3019 hrt2ts(pup->pr_ltime, &upup->pr_ltime); 3020 hrt2ts(pup->pr_slptime, &upup->pr_slptime); 3021 hrt2ts(pup->pr_wtime, &upup->pr_wtime); 3022 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime); 3023 bzero(upup->filltime, sizeof (upup->filltime)); 3024 3025 ullp = &pup->pr_minf; 3026 ulp = &upup->pr_minf; 3027 for (i = 0; i < 22; i++) 3028 *ulp++ = (ulong_t)*ullp++; 3029 } 3030 3031 #ifdef _SYSCALL32_IMPL 3032 void 3033 prcvtusage32(prhusage_t *pup, prusage32_t *upup) 3034 { 3035 uint64_t *ullp; 3036 uint32_t *ulp; 3037 int i; 3038 3039 upup->pr_lwpid = pup->pr_lwpid; 3040 upup->pr_count = pup->pr_count; 3041 3042 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp); 3043 hrt2ts32(pup->pr_create, &upup->pr_create); 3044 hrt2ts32(pup->pr_term, &upup->pr_term); 3045 hrt2ts32(pup->pr_rtime, &upup->pr_rtime); 3046 hrt2ts32(pup->pr_utime, &upup->pr_utime); 3047 hrt2ts32(pup->pr_stime, &upup->pr_stime); 3048 hrt2ts32(pup->pr_ttime, &upup->pr_ttime); 3049 hrt2ts32(pup->pr_tftime, &upup->pr_tftime); 3050 hrt2ts32(pup->pr_dftime, &upup->pr_dftime); 3051 hrt2ts32(pup->pr_kftime, &upup->pr_kftime); 3052 hrt2ts32(pup->pr_ltime, &upup->pr_ltime); 3053 hrt2ts32(pup->pr_slptime, &upup->pr_slptime); 3054 hrt2ts32(pup->pr_wtime, &upup->pr_wtime); 3055 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime); 3056 bzero(upup->filltime, sizeof (upup->filltime)); 3057 3058 ullp = &pup->pr_minf; 3059 ulp = &upup->pr_minf; 3060 for (i = 0; i < 22; i++) 3061 *ulp++ = (uint32_t)*ullp++; 3062 } 3063 #endif /* _SYSCALL32_IMPL */ 3064 3065 /* 3066 * Determine whether a set is empty. 3067 */ 3068 int 3069 setisempty(uint32_t *sp, uint_t n) 3070 { 3071 while (n--) 3072 if (*sp++) 3073 return (0); 3074 return (1); 3075 } 3076 3077 /* 3078 * Utility routine for establishing a watched area in the process. 3079 * Keep the list of watched areas sorted by virtual address. 3080 */ 3081 int 3082 set_watched_area(proc_t *p, struct watched_area *pwa) 3083 { 3084 caddr_t vaddr = pwa->wa_vaddr; 3085 caddr_t eaddr = pwa->wa_eaddr; 3086 ulong_t flags = pwa->wa_flags; 3087 struct watched_area *target; 3088 avl_index_t where; 3089 int error = 0; 3090 3091 /* we must not be holding p->p_lock, but the process must be locked */ 3092 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3093 ASSERT(p->p_proc_flag & P_PR_LOCK); 3094 3095 /* 3096 * If this is our first watchpoint, enable watchpoints for the process. 3097 */ 3098 if (!pr_watch_active(p)) { 3099 kthread_t *t; 3100 3101 mutex_enter(&p->p_lock); 3102 if ((t = p->p_tlist) != NULL) { 3103 do { 3104 watch_enable(t); 3105 } while ((t = t->t_forw) != p->p_tlist); 3106 } 3107 mutex_exit(&p->p_lock); 3108 } 3109 3110 target = pr_find_watched_area(p, pwa, &where); 3111 if (target != NULL) { 3112 /* 3113 * We discovered an existing, overlapping watched area. 3114 * Allow it only if it is an exact match. 3115 */ 3116 if (target->wa_vaddr != vaddr || 3117 target->wa_eaddr != eaddr) 3118 error = EINVAL; 3119 else if (target->wa_flags != flags) { 3120 error = set_watched_page(p, vaddr, eaddr, 3121 flags, target->wa_flags); 3122 target->wa_flags = flags; 3123 } 3124 kmem_free(pwa, sizeof (struct watched_area)); 3125 } else { 3126 avl_insert(&p->p_warea, pwa, where); 3127 error = set_watched_page(p, vaddr, eaddr, flags, 0); 3128 } 3129 3130 return (error); 3131 } 3132 3133 /* 3134 * Utility routine for clearing a watched area in the process. 3135 * Must be an exact match of the virtual address. 3136 * size and flags don't matter. 3137 */ 3138 int 3139 clear_watched_area(proc_t *p, struct watched_area *pwa) 3140 { 3141 struct watched_area *found; 3142 3143 /* we must not be holding p->p_lock, but the process must be locked */ 3144 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3145 ASSERT(p->p_proc_flag & P_PR_LOCK); 3146 3147 3148 if (!pr_watch_active(p)) { 3149 kmem_free(pwa, sizeof (struct watched_area)); 3150 return (0); 3151 } 3152 3153 /* 3154 * Look for a matching address in the watched areas. If a match is 3155 * found, clear the old watched area and adjust the watched page(s). It 3156 * is not an error if there is no match. 3157 */ 3158 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL && 3159 found->wa_vaddr == pwa->wa_vaddr) { 3160 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr, 3161 found->wa_flags); 3162 avl_remove(&p->p_warea, found); 3163 kmem_free(found, sizeof (struct watched_area)); 3164 } 3165 3166 kmem_free(pwa, sizeof (struct watched_area)); 3167 3168 /* 3169 * If we removed the last watched area from the process, disable 3170 * watchpoints. 3171 */ 3172 if (!pr_watch_active(p)) { 3173 kthread_t *t; 3174 3175 mutex_enter(&p->p_lock); 3176 if ((t = p->p_tlist) != NULL) { 3177 do { 3178 watch_disable(t); 3179 } while ((t = t->t_forw) != p->p_tlist); 3180 } 3181 mutex_exit(&p->p_lock); 3182 } 3183 3184 return (0); 3185 } 3186 3187 /* 3188 * Frees all the watched_area structures 3189 */ 3190 void 3191 pr_free_watchpoints(proc_t *p) 3192 { 3193 struct watched_area *delp; 3194 void *cookie; 3195 3196 cookie = NULL; 3197 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL) 3198 kmem_free(delp, sizeof (struct watched_area)); 3199 3200 avl_destroy(&p->p_warea); 3201 } 3202 3203 /* 3204 * This one is called by the traced process to unwatch all the 3205 * pages while deallocating the list of watched_page structs. 3206 */ 3207 void 3208 pr_free_watched_pages(proc_t *p) 3209 { 3210 struct as *as = p->p_as; 3211 struct watched_page *pwp; 3212 uint_t prot; 3213 int retrycnt, err; 3214 void *cookie; 3215 3216 if (as == NULL || avl_numnodes(&as->a_wpage) == 0) 3217 return; 3218 3219 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 3220 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3221 3222 pwp = avl_first(&as->a_wpage); 3223 3224 cookie = NULL; 3225 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) { 3226 retrycnt = 0; 3227 if ((prot = pwp->wp_oprot) != 0) { 3228 caddr_t addr = pwp->wp_vaddr; 3229 struct seg *seg; 3230 retry: 3231 3232 if ((pwp->wp_prot != prot || 3233 (pwp->wp_flags & WP_NOWATCH)) && 3234 (seg = as_segat(as, addr)) != NULL) { 3235 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); 3236 if (err == IE_RETRY) { 3237 ASSERT(retrycnt == 0); 3238 retrycnt++; 3239 goto retry; 3240 } 3241 } 3242 } 3243 kmem_free(pwp, sizeof (struct watched_page)); 3244 } 3245 3246 avl_destroy(&as->a_wpage); 3247 p->p_wprot = NULL; 3248 3249 AS_LOCK_EXIT(as, &as->a_lock); 3250 } 3251 3252 /* 3253 * Insert a watched area into the list of watched pages. 3254 * If oflags is zero then we are adding a new watched area. 3255 * Otherwise we are changing the flags of an existing watched area. 3256 */ 3257 static int 3258 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, 3259 ulong_t flags, ulong_t oflags) 3260 { 3261 struct as *as = p->p_as; 3262 avl_tree_t *pwp_tree; 3263 struct watched_page *pwp, *newpwp; 3264 struct watched_page tpw; 3265 avl_index_t where; 3266 struct seg *seg; 3267 uint_t prot; 3268 caddr_t addr; 3269 3270 /* 3271 * We need to pre-allocate a list of structures before we grab the 3272 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks 3273 * held. 3274 */ 3275 newpwp = NULL; 3276 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3277 addr < eaddr; addr += PAGESIZE) { 3278 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP); 3279 pwp->wp_list = newpwp; 3280 newpwp = pwp; 3281 } 3282 3283 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3284 3285 /* 3286 * Search for an existing watched page to contain the watched area. 3287 * If none is found, grab a new one from the available list 3288 * and insert it in the active list, keeping the list sorted 3289 * by user-level virtual address. 3290 */ 3291 if (p->p_flag & SVFWAIT) 3292 pwp_tree = &p->p_wpage; 3293 else 3294 pwp_tree = &as->a_wpage; 3295 3296 again: 3297 if (avl_numnodes(pwp_tree) > prnwatch) { 3298 AS_LOCK_EXIT(as, &as->a_lock); 3299 while (newpwp != NULL) { 3300 pwp = newpwp->wp_list; 3301 kmem_free(newpwp, sizeof (struct watched_page)); 3302 newpwp = pwp; 3303 } 3304 return (E2BIG); 3305 } 3306 3307 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3308 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) { 3309 pwp = newpwp; 3310 newpwp = newpwp->wp_list; 3311 pwp->wp_list = NULL; 3312 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr & 3313 (uintptr_t)PAGEMASK); 3314 avl_insert(pwp_tree, pwp, where); 3315 } 3316 3317 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE); 3318 3319 if (oflags & WA_READ) 3320 pwp->wp_read--; 3321 if (oflags & WA_WRITE) 3322 pwp->wp_write--; 3323 if (oflags & WA_EXEC) 3324 pwp->wp_exec--; 3325 3326 ASSERT(pwp->wp_read >= 0); 3327 ASSERT(pwp->wp_write >= 0); 3328 ASSERT(pwp->wp_exec >= 0); 3329 3330 if (flags & WA_READ) 3331 pwp->wp_read++; 3332 if (flags & WA_WRITE) 3333 pwp->wp_write++; 3334 if (flags & WA_EXEC) 3335 pwp->wp_exec++; 3336 3337 if (!(p->p_flag & SVFWAIT)) { 3338 vaddr = pwp->wp_vaddr; 3339 if (pwp->wp_oprot == 0 && 3340 (seg = as_segat(as, vaddr)) != NULL) { 3341 SEGOP_GETPROT(seg, vaddr, 0, &prot); 3342 pwp->wp_oprot = (uchar_t)prot; 3343 pwp->wp_prot = (uchar_t)prot; 3344 } 3345 if (pwp->wp_oprot != 0) { 3346 prot = pwp->wp_oprot; 3347 if (pwp->wp_read) 3348 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3349 if (pwp->wp_write) 3350 prot &= ~PROT_WRITE; 3351 if (pwp->wp_exec) 3352 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3353 if (!(pwp->wp_flags & WP_NOWATCH) && 3354 pwp->wp_prot != prot && 3355 (pwp->wp_flags & WP_SETPROT) == 0) { 3356 pwp->wp_flags |= WP_SETPROT; 3357 pwp->wp_list = p->p_wprot; 3358 p->p_wprot = pwp; 3359 } 3360 pwp->wp_prot = (uchar_t)prot; 3361 } 3362 } 3363 3364 /* 3365 * If the watched area extends into the next page then do 3366 * it over again with the virtual address of the next page. 3367 */ 3368 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr) 3369 goto again; 3370 3371 AS_LOCK_EXIT(as, &as->a_lock); 3372 3373 /* 3374 * Free any pages we may have over-allocated 3375 */ 3376 while (newpwp != NULL) { 3377 pwp = newpwp->wp_list; 3378 kmem_free(newpwp, sizeof (struct watched_page)); 3379 newpwp = pwp; 3380 } 3381 3382 return (0); 3383 } 3384 3385 /* 3386 * Remove a watched area from the list of watched pages. 3387 * A watched area may extend over more than one page. 3388 */ 3389 static void 3390 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags) 3391 { 3392 struct as *as = p->p_as; 3393 struct watched_page *pwp; 3394 struct watched_page tpw; 3395 avl_tree_t *tree; 3396 avl_index_t where; 3397 3398 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3399 3400 if (p->p_flag & SVFWAIT) 3401 tree = &p->p_wpage; 3402 else 3403 tree = &as->a_wpage; 3404 3405 tpw.wp_vaddr = vaddr = 3406 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3407 pwp = avl_find(tree, &tpw, &where); 3408 if (pwp == NULL) 3409 pwp = avl_nearest(tree, where, AVL_AFTER); 3410 3411 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3412 ASSERT(vaddr <= pwp->wp_vaddr); 3413 3414 if (flags & WA_READ) 3415 pwp->wp_read--; 3416 if (flags & WA_WRITE) 3417 pwp->wp_write--; 3418 if (flags & WA_EXEC) 3419 pwp->wp_exec--; 3420 3421 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) { 3422 /* 3423 * Reset the hat layer's protections on this page. 3424 */ 3425 if (pwp->wp_oprot != 0) { 3426 uint_t prot = pwp->wp_oprot; 3427 3428 if (pwp->wp_read) 3429 prot &= 3430 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3431 if (pwp->wp_write) 3432 prot &= ~PROT_WRITE; 3433 if (pwp->wp_exec) 3434 prot &= 3435 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3436 if (!(pwp->wp_flags & WP_NOWATCH) && 3437 pwp->wp_prot != prot && 3438 (pwp->wp_flags & WP_SETPROT) == 0) { 3439 pwp->wp_flags |= WP_SETPROT; 3440 pwp->wp_list = p->p_wprot; 3441 p->p_wprot = pwp; 3442 } 3443 pwp->wp_prot = (uchar_t)prot; 3444 } 3445 } else { 3446 /* 3447 * No watched areas remain in this page. 3448 * Reset everything to normal. 3449 */ 3450 if (pwp->wp_oprot != 0) { 3451 pwp->wp_prot = pwp->wp_oprot; 3452 if ((pwp->wp_flags & WP_SETPROT) == 0) { 3453 pwp->wp_flags |= WP_SETPROT; 3454 pwp->wp_list = p->p_wprot; 3455 p->p_wprot = pwp; 3456 } 3457 } 3458 } 3459 3460 pwp = AVL_NEXT(tree, pwp); 3461 } 3462 3463 AS_LOCK_EXIT(as, &as->a_lock); 3464 } 3465 3466 /* 3467 * Return the original protections for the specified page. 3468 */ 3469 static void 3470 getwatchprot(struct as *as, caddr_t addr, uint_t *prot) 3471 { 3472 struct watched_page *pwp; 3473 struct watched_page tpw; 3474 3475 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 3476 3477 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3478 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL) 3479 *prot = pwp->wp_oprot; 3480 } 3481 3482 static prpagev_t * 3483 pr_pagev_create(struct seg *seg, int check_noreserve) 3484 { 3485 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP); 3486 size_t total_pages = seg_pages(seg); 3487 3488 /* 3489 * Limit the size of our vectors to pagev_lim pages at a time. We need 3490 * 4 or 5 bytes of storage per page, so this means we limit ourself 3491 * to about a megabyte of kernel heap by default. 3492 */ 3493 pagev->pg_npages = MIN(total_pages, pagev_lim); 3494 pagev->pg_pnbase = 0; 3495 3496 pagev->pg_protv = 3497 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP); 3498 3499 if (check_noreserve) 3500 pagev->pg_incore = 3501 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP); 3502 else 3503 pagev->pg_incore = NULL; 3504 3505 return (pagev); 3506 } 3507 3508 static void 3509 pr_pagev_destroy(prpagev_t *pagev) 3510 { 3511 if (pagev->pg_incore != NULL) 3512 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char)); 3513 3514 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t)); 3515 kmem_free(pagev, sizeof (prpagev_t)); 3516 } 3517 3518 static caddr_t 3519 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr) 3520 { 3521 ulong_t lastpg = seg_page(seg, eaddr - 1); 3522 ulong_t pn, pnlim; 3523 caddr_t saddr; 3524 size_t len; 3525 3526 ASSERT(addr >= seg->s_base && addr <= eaddr); 3527 3528 if (addr == eaddr) 3529 return (eaddr); 3530 3531 refill: 3532 ASSERT(addr < eaddr); 3533 pagev->pg_pnbase = seg_page(seg, addr); 3534 pnlim = pagev->pg_pnbase + pagev->pg_npages; 3535 saddr = addr; 3536 3537 if (lastpg < pnlim) 3538 len = (size_t)(eaddr - addr); 3539 else 3540 len = pagev->pg_npages * PAGESIZE; 3541 3542 if (pagev->pg_incore != NULL) { 3543 /* 3544 * INCORE cleverly has different semantics than GETPROT: 3545 * it returns info on pages up to but NOT including addr + len. 3546 */ 3547 SEGOP_INCORE(seg, addr, len, pagev->pg_incore); 3548 pn = pagev->pg_pnbase; 3549 3550 do { 3551 /* 3552 * Guilty knowledge here: We know that segvn_incore 3553 * returns more than just the low-order bit that 3554 * indicates the page is actually in memory. If any 3555 * bits are set, then the page has backing store. 3556 */ 3557 if (pagev->pg_incore[pn++ - pagev->pg_pnbase]) 3558 goto out; 3559 3560 } while ((addr += PAGESIZE) < eaddr && pn < pnlim); 3561 3562 /* 3563 * If we examined all the pages in the vector but we're not 3564 * at the end of the segment, take another lap. 3565 */ 3566 if (addr < eaddr) 3567 goto refill; 3568 } 3569 3570 /* 3571 * Need to take len - 1 because addr + len is the address of the 3572 * first byte of the page just past the end of what we want. 3573 */ 3574 out: 3575 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv); 3576 return (addr); 3577 } 3578 3579 static caddr_t 3580 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg, 3581 caddr_t *saddrp, caddr_t eaddr, uint_t *protp) 3582 { 3583 /* 3584 * Our starting address is either the specified address, or the base 3585 * address from the start of the pagev. If the latter is greater, 3586 * this means a previous call to pr_pagev_fill has already scanned 3587 * further than the end of the previous mapping. 3588 */ 3589 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE; 3590 caddr_t addr = MAX(*saddrp, base); 3591 ulong_t pn = seg_page(seg, addr); 3592 uint_t prot, nprot; 3593 3594 /* 3595 * If we're dealing with noreserve pages, then advance addr to 3596 * the address of the next page which has backing store. 3597 */ 3598 if (pagev->pg_incore != NULL) { 3599 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) { 3600 if ((addr += PAGESIZE) == eaddr) { 3601 *saddrp = addr; 3602 prot = 0; 3603 goto out; 3604 } 3605 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3606 addr = pr_pagev_fill(pagev, seg, addr, eaddr); 3607 if (addr == eaddr) { 3608 *saddrp = addr; 3609 prot = 0; 3610 goto out; 3611 } 3612 pn = seg_page(seg, addr); 3613 } 3614 } 3615 } 3616 3617 /* 3618 * Get the protections on the page corresponding to addr. 3619 */ 3620 pn = seg_page(seg, addr); 3621 ASSERT(pn >= pagev->pg_pnbase); 3622 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages)); 3623 3624 prot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3625 getwatchprot(seg->s_as, addr, &prot); 3626 *saddrp = addr; 3627 3628 /* 3629 * Now loop until we find a backed page with different protections 3630 * or we reach the end of this segment. 3631 */ 3632 while ((addr += PAGESIZE) < eaddr) { 3633 /* 3634 * If pn has advanced to the page number following what we 3635 * have information on, refill the page vector and reset 3636 * addr and pn. If pr_pagev_fill does not return the 3637 * address of the next page, we have a discontiguity and 3638 * thus have reached the end of the current mapping. 3639 */ 3640 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3641 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr); 3642 if (naddr != addr) 3643 goto out; 3644 pn = seg_page(seg, addr); 3645 } 3646 3647 /* 3648 * The previous page's protections are in prot, and it has 3649 * backing. If this page is MAP_NORESERVE and has no backing, 3650 * then end this mapping and return the previous protections. 3651 */ 3652 if (pagev->pg_incore != NULL && 3653 pagev->pg_incore[pn - pagev->pg_pnbase] == 0) 3654 break; 3655 3656 /* 3657 * Otherwise end the mapping if this page's protections (nprot) 3658 * are different than those in the previous page (prot). 3659 */ 3660 nprot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3661 getwatchprot(seg->s_as, addr, &nprot); 3662 3663 if (nprot != prot) 3664 break; 3665 } 3666 3667 out: 3668 *protp = prot; 3669 return (addr); 3670 } 3671 3672 size_t 3673 pr_getsegsize(struct seg *seg, int reserved) 3674 { 3675 size_t size = seg->s_size; 3676 3677 /* 3678 * If we're interested in the reserved space, return the size of the 3679 * segment itself. Everything else in this function is a special case 3680 * to determine the actual underlying size of various segment types. 3681 */ 3682 if (reserved) 3683 return (size); 3684 3685 /* 3686 * If this is a segvn mapping of a regular file, return the smaller 3687 * of the segment size and the remaining size of the file beyond 3688 * the file offset corresponding to seg->s_base. 3689 */ 3690 if (seg->s_ops == &segvn_ops) { 3691 vattr_t vattr; 3692 vnode_t *vp; 3693 3694 vattr.va_mask = AT_SIZE; 3695 3696 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 3697 vp != NULL && vp->v_type == VREG && 3698 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 3699 3700 u_offset_t fsize = vattr.va_size; 3701 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base); 3702 3703 if (fsize < offset) 3704 fsize = 0; 3705 else 3706 fsize -= offset; 3707 3708 fsize = roundup(fsize, (u_offset_t)PAGESIZE); 3709 3710 if (fsize < (u_offset_t)size) 3711 size = (size_t)fsize; 3712 } 3713 3714 return (size); 3715 } 3716 3717 /* 3718 * If this is an ISM shared segment, don't include pages that are 3719 * beyond the real size of the spt segment that backs it. 3720 */ 3721 if (seg->s_ops == &segspt_shmops) 3722 return (MIN(spt_realsize(seg), size)); 3723 3724 /* 3725 * If this is segment is a mapping from /dev/null, then this is a 3726 * reservation of virtual address space and has no actual size. 3727 * Such segments are backed by segdev and have type set to neither 3728 * MAP_SHARED nor MAP_PRIVATE. 3729 */ 3730 if (seg->s_ops == &segdev_ops && 3731 ((SEGOP_GETTYPE(seg, seg->s_base) & 3732 (MAP_SHARED | MAP_PRIVATE)) == 0)) 3733 return (0); 3734 3735 /* 3736 * If this segment doesn't match one of the special types we handle, 3737 * just return the size of the segment itself. 3738 */ 3739 return (size); 3740 } 3741 3742 uint_t 3743 pr_getprot(struct seg *seg, int reserved, void **tmp, 3744 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr) 3745 { 3746 struct as *as = seg->s_as; 3747 3748 caddr_t saddr = *saddrp; 3749 caddr_t naddr; 3750 3751 int check_noreserve; 3752 uint_t prot; 3753 3754 union { 3755 struct segvn_data *svd; 3756 struct segdev_data *sdp; 3757 void *data; 3758 } s; 3759 3760 s.data = seg->s_data; 3761 3762 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3763 ASSERT(saddr >= seg->s_base && saddr < eaddr); 3764 ASSERT(eaddr <= seg->s_base + seg->s_size); 3765 3766 /* 3767 * Don't include MAP_NORESERVE pages in the address range 3768 * unless their mappings have actually materialized. 3769 * We cheat by knowing that segvn is the only segment 3770 * driver that supports MAP_NORESERVE. 3771 */ 3772 check_noreserve = 3773 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL && 3774 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) && 3775 (s.svd->flags & MAP_NORESERVE)); 3776 3777 /* 3778 * Examine every page only as a last resort. We use guilty knowledge 3779 * of segvn and segdev to avoid this: if there are no per-page 3780 * protections present in the segment and we don't care about 3781 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment. 3782 */ 3783 if (!check_noreserve && saddr == seg->s_base && 3784 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) { 3785 prot = s.svd->prot; 3786 getwatchprot(as, saddr, &prot); 3787 naddr = eaddr; 3788 3789 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops && 3790 s.sdp != NULL && s.sdp->pageprot == 0) { 3791 prot = s.sdp->prot; 3792 getwatchprot(as, saddr, &prot); 3793 naddr = eaddr; 3794 3795 } else { 3796 prpagev_t *pagev; 3797 3798 /* 3799 * If addr is sitting at the start of the segment, then 3800 * create a page vector to store protection and incore 3801 * information for pages in the segment, and fill it. 3802 * Otherwise, we expect *tmp to address the prpagev_t 3803 * allocated by a previous call to this function. 3804 */ 3805 if (saddr == seg->s_base) { 3806 pagev = pr_pagev_create(seg, check_noreserve); 3807 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr); 3808 3809 ASSERT(*tmp == NULL); 3810 *tmp = pagev; 3811 3812 ASSERT(saddr <= eaddr); 3813 *saddrp = saddr; 3814 3815 if (saddr == eaddr) { 3816 naddr = saddr; 3817 prot = 0; 3818 goto out; 3819 } 3820 3821 } else { 3822 ASSERT(*tmp != NULL); 3823 pagev = (prpagev_t *)*tmp; 3824 } 3825 3826 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot); 3827 ASSERT(naddr <= eaddr); 3828 } 3829 3830 out: 3831 if (naddr == eaddr) 3832 pr_getprot_done(tmp); 3833 *naddrp = naddr; 3834 return (prot); 3835 } 3836 3837 void 3838 pr_getprot_done(void **tmp) 3839 { 3840 if (*tmp != NULL) { 3841 pr_pagev_destroy((prpagev_t *)*tmp); 3842 *tmp = NULL; 3843 } 3844 } 3845 3846 /* 3847 * Return true iff the vnode is a /proc file from the object directory. 3848 */ 3849 int 3850 pr_isobject(vnode_t *vp) 3851 { 3852 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT); 3853 } 3854 3855 /* 3856 * Return true iff the vnode is a /proc file opened by the process itself. 3857 */ 3858 int 3859 pr_isself(vnode_t *vp) 3860 { 3861 /* 3862 * XXX: To retain binary compatibility with the old 3863 * ioctl()-based version of /proc, we exempt self-opens 3864 * of /proc/<pid> from being marked close-on-exec. 3865 */ 3866 return (vn_matchops(vp, prvnodeops) && 3867 (VTOP(vp)->pr_flags & PR_ISSELF) && 3868 VTOP(vp)->pr_type != PR_PIDDIR); 3869 } 3870 3871 static ssize_t 3872 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr) 3873 { 3874 ssize_t pagesize, hatsize; 3875 3876 ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 3877 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE)); 3878 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE)); 3879 ASSERT(saddr < eaddr); 3880 3881 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr); 3882 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize)); 3883 ASSERT(pagesize != 0); 3884 3885 if (pagesize == -1) 3886 pagesize = PAGESIZE; 3887 3888 saddr += P2NPHASE((uintptr_t)saddr, pagesize); 3889 3890 while (saddr < eaddr) { 3891 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr)) 3892 break; 3893 ASSERT(IS_P2ALIGNED(saddr, pagesize)); 3894 saddr += pagesize; 3895 } 3896 3897 *naddrp = ((saddr < eaddr) ? saddr : eaddr); 3898 return (hatsize); 3899 } 3900 3901 /* 3902 * Return an array of structures with extended memory map information. 3903 * We allocate here; the caller must deallocate. 3904 */ 3905 int 3906 prgetxmap(proc_t *p, list_t *iolhead) 3907 { 3908 struct as *as = p->p_as; 3909 prxmap_t *mp; 3910 struct seg *seg; 3911 struct seg *brkseg, *stkseg; 3912 struct vnode *vp; 3913 struct vattr vattr; 3914 uint_t prot; 3915 3916 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 3917 3918 /* 3919 * Request an initial buffer size that doesn't waste memory 3920 * if the address space has only a small number of segments. 3921 */ 3922 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 3923 3924 if ((seg = AS_SEGFIRST(as)) == NULL) 3925 return (0); 3926 3927 brkseg = break_seg(p); 3928 stkseg = as_segat(as, prgetstackbase(p)); 3929 3930 do { 3931 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 3932 caddr_t saddr, naddr, baddr; 3933 void *tmp = NULL; 3934 ssize_t psz; 3935 char *parr; 3936 uint64_t npages; 3937 uint64_t pagenum; 3938 3939 /* 3940 * Segment loop part one: iterate from the base of the segment 3941 * to its end, pausing at each address boundary (baddr) between 3942 * ranges that have different virtual memory protections. 3943 */ 3944 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 3945 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 3946 ASSERT(baddr >= saddr && baddr <= eaddr); 3947 3948 /* 3949 * Segment loop part two: iterate from the current 3950 * position to the end of the protection boundary, 3951 * pausing at each address boundary (naddr) between 3952 * ranges that have different underlying page sizes. 3953 */ 3954 for (; saddr < baddr; saddr = naddr) { 3955 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 3956 ASSERT(naddr >= saddr && naddr <= baddr); 3957 3958 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 3959 3960 mp->pr_vaddr = (uintptr_t)saddr; 3961 mp->pr_size = naddr - saddr; 3962 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 3963 mp->pr_mflags = 0; 3964 if (prot & PROT_READ) 3965 mp->pr_mflags |= MA_READ; 3966 if (prot & PROT_WRITE) 3967 mp->pr_mflags |= MA_WRITE; 3968 if (prot & PROT_EXEC) 3969 mp->pr_mflags |= MA_EXEC; 3970 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 3971 mp->pr_mflags |= MA_SHARED; 3972 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 3973 mp->pr_mflags |= MA_NORESERVE; 3974 if (seg->s_ops == &segspt_shmops || 3975 (seg->s_ops == &segvn_ops && 3976 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 3977 vp == NULL))) 3978 mp->pr_mflags |= MA_ANON; 3979 if (seg == brkseg) 3980 mp->pr_mflags |= MA_BREAK; 3981 else if (seg == stkseg) 3982 mp->pr_mflags |= MA_STACK; 3983 if (seg->s_ops == &segspt_shmops) 3984 mp->pr_mflags |= MA_ISM | MA_SHM; 3985 3986 mp->pr_pagesize = PAGESIZE; 3987 if (psz == -1) { 3988 mp->pr_hatpagesize = 0; 3989 } else { 3990 mp->pr_hatpagesize = psz; 3991 } 3992 3993 /* 3994 * Manufacture a filename for the "object" dir. 3995 */ 3996 mp->pr_dev = PRNODEV; 3997 vattr.va_mask = AT_FSID|AT_NODEID; 3998 if (seg->s_ops == &segvn_ops && 3999 SEGOP_GETVP(seg, saddr, &vp) == 0 && 4000 vp != NULL && vp->v_type == VREG && 4001 VOP_GETATTR(vp, &vattr, 0, CRED(), 4002 NULL) == 0) { 4003 mp->pr_dev = vattr.va_fsid; 4004 mp->pr_ino = vattr.va_nodeid; 4005 if (vp == p->p_exec) 4006 (void) strcpy(mp->pr_mapname, 4007 "a.out"); 4008 else 4009 pr_object_name(mp->pr_mapname, 4010 vp, &vattr); 4011 } 4012 4013 /* 4014 * Get the SysV shared memory id, if any. 4015 */ 4016 if ((mp->pr_mflags & MA_SHARED) && 4017 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4018 seg->s_base)) != SHMID_NONE) { 4019 if (mp->pr_shmid == SHMID_FREE) 4020 mp->pr_shmid = -1; 4021 4022 mp->pr_mflags |= MA_SHM; 4023 } else { 4024 mp->pr_shmid = -1; 4025 } 4026 4027 npages = ((uintptr_t)(naddr - saddr)) >> 4028 PAGESHIFT; 4029 parr = kmem_zalloc(npages, KM_SLEEP); 4030 4031 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4032 4033 for (pagenum = 0; pagenum < npages; pagenum++) { 4034 if (parr[pagenum] & SEG_PAGE_INCORE) 4035 mp->pr_rss++; 4036 if (parr[pagenum] & SEG_PAGE_ANON) 4037 mp->pr_anon++; 4038 if (parr[pagenum] & SEG_PAGE_LOCKED) 4039 mp->pr_locked++; 4040 } 4041 kmem_free(parr, npages); 4042 } 4043 } 4044 ASSERT(tmp == NULL); 4045 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4046 4047 return (0); 4048 } 4049 4050 /* 4051 * Return the process's credentials. We don't need a 32-bit equivalent of 4052 * this function because prcred_t and prcred32_t are actually the same. 4053 */ 4054 void 4055 prgetcred(proc_t *p, prcred_t *pcrp) 4056 { 4057 mutex_enter(&p->p_crlock); 4058 cred2prcred(p->p_cred, pcrp); 4059 mutex_exit(&p->p_crlock); 4060 } 4061 4062 /* 4063 * Compute actual size of the prpriv_t structure. 4064 */ 4065 4066 size_t 4067 prgetprivsize(void) 4068 { 4069 return (priv_prgetprivsize(NULL)); 4070 } 4071 4072 /* 4073 * Return the process's privileges. We don't need a 32-bit equivalent of 4074 * this function because prpriv_t and prpriv32_t are actually the same. 4075 */ 4076 void 4077 prgetpriv(proc_t *p, prpriv_t *pprp) 4078 { 4079 mutex_enter(&p->p_crlock); 4080 cred2prpriv(p->p_cred, pprp); 4081 mutex_exit(&p->p_crlock); 4082 } 4083 4084 #ifdef _SYSCALL32_IMPL 4085 /* 4086 * Return an array of structures with HAT memory map information. 4087 * We allocate here; the caller must deallocate. 4088 */ 4089 int 4090 prgetxmap32(proc_t *p, list_t *iolhead) 4091 { 4092 struct as *as = p->p_as; 4093 prxmap32_t *mp; 4094 struct seg *seg; 4095 struct seg *brkseg, *stkseg; 4096 struct vnode *vp; 4097 struct vattr vattr; 4098 uint_t prot; 4099 4100 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 4101 4102 /* 4103 * Request an initial buffer size that doesn't waste memory 4104 * if the address space has only a small number of segments. 4105 */ 4106 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 4107 4108 if ((seg = AS_SEGFIRST(as)) == NULL) 4109 return (0); 4110 4111 brkseg = break_seg(p); 4112 stkseg = as_segat(as, prgetstackbase(p)); 4113 4114 do { 4115 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 4116 caddr_t saddr, naddr, baddr; 4117 void *tmp = NULL; 4118 ssize_t psz; 4119 char *parr; 4120 uint64_t npages; 4121 uint64_t pagenum; 4122 4123 /* 4124 * Segment loop part one: iterate from the base of the segment 4125 * to its end, pausing at each address boundary (baddr) between 4126 * ranges that have different virtual memory protections. 4127 */ 4128 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 4129 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 4130 ASSERT(baddr >= saddr && baddr <= eaddr); 4131 4132 /* 4133 * Segment loop part two: iterate from the current 4134 * position to the end of the protection boundary, 4135 * pausing at each address boundary (naddr) between 4136 * ranges that have different underlying page sizes. 4137 */ 4138 for (; saddr < baddr; saddr = naddr) { 4139 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 4140 ASSERT(naddr >= saddr && naddr <= baddr); 4141 4142 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 4143 4144 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 4145 mp->pr_size = (size32_t)(naddr - saddr); 4146 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 4147 mp->pr_mflags = 0; 4148 if (prot & PROT_READ) 4149 mp->pr_mflags |= MA_READ; 4150 if (prot & PROT_WRITE) 4151 mp->pr_mflags |= MA_WRITE; 4152 if (prot & PROT_EXEC) 4153 mp->pr_mflags |= MA_EXEC; 4154 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 4155 mp->pr_mflags |= MA_SHARED; 4156 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 4157 mp->pr_mflags |= MA_NORESERVE; 4158 if (seg->s_ops == &segspt_shmops || 4159 (seg->s_ops == &segvn_ops && 4160 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 4161 vp == NULL))) 4162 mp->pr_mflags |= MA_ANON; 4163 if (seg == brkseg) 4164 mp->pr_mflags |= MA_BREAK; 4165 else if (seg == stkseg) 4166 mp->pr_mflags |= MA_STACK; 4167 if (seg->s_ops == &segspt_shmops) 4168 mp->pr_mflags |= MA_ISM | MA_SHM; 4169 4170 mp->pr_pagesize = PAGESIZE; 4171 if (psz == -1) { 4172 mp->pr_hatpagesize = 0; 4173 } else { 4174 mp->pr_hatpagesize = psz; 4175 } 4176 4177 /* 4178 * Manufacture a filename for the "object" dir. 4179 */ 4180 mp->pr_dev = PRNODEV32; 4181 vattr.va_mask = AT_FSID|AT_NODEID; 4182 if (seg->s_ops == &segvn_ops && 4183 SEGOP_GETVP(seg, saddr, &vp) == 0 && 4184 vp != NULL && vp->v_type == VREG && 4185 VOP_GETATTR(vp, &vattr, 0, CRED(), 4186 NULL) == 0) { 4187 (void) cmpldev(&mp->pr_dev, 4188 vattr.va_fsid); 4189 mp->pr_ino = vattr.va_nodeid; 4190 if (vp == p->p_exec) 4191 (void) strcpy(mp->pr_mapname, 4192 "a.out"); 4193 else 4194 pr_object_name(mp->pr_mapname, 4195 vp, &vattr); 4196 } 4197 4198 /* 4199 * Get the SysV shared memory id, if any. 4200 */ 4201 if ((mp->pr_mflags & MA_SHARED) && 4202 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4203 seg->s_base)) != SHMID_NONE) { 4204 if (mp->pr_shmid == SHMID_FREE) 4205 mp->pr_shmid = -1; 4206 4207 mp->pr_mflags |= MA_SHM; 4208 } else { 4209 mp->pr_shmid = -1; 4210 } 4211 4212 npages = ((uintptr_t)(naddr - saddr)) >> 4213 PAGESHIFT; 4214 parr = kmem_zalloc(npages, KM_SLEEP); 4215 4216 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4217 4218 for (pagenum = 0; pagenum < npages; pagenum++) { 4219 if (parr[pagenum] & SEG_PAGE_INCORE) 4220 mp->pr_rss++; 4221 if (parr[pagenum] & SEG_PAGE_ANON) 4222 mp->pr_anon++; 4223 if (parr[pagenum] & SEG_PAGE_LOCKED) 4224 mp->pr_locked++; 4225 } 4226 kmem_free(parr, npages); 4227 } 4228 } 4229 ASSERT(tmp == NULL); 4230 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4231 4232 return (0); 4233 } 4234 #endif /* _SYSCALL32_IMPL */ 4235