1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <sys/types.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/cmn_err.h> 34 #include <sys/cred.h> 35 #include <sys/priv.h> 36 #include <sys/debug.h> 37 #include <sys/errno.h> 38 #include <sys/inline.h> 39 #include <sys/kmem.h> 40 #include <sys/mman.h> 41 #include <sys/proc.h> 42 #include <sys/brand.h> 43 #include <sys/sobject.h> 44 #include <sys/sysmacros.h> 45 #include <sys/systm.h> 46 #include <sys/uio.h> 47 #include <sys/var.h> 48 #include <sys/vfs.h> 49 #include <sys/vnode.h> 50 #include <sys/session.h> 51 #include <sys/pcb.h> 52 #include <sys/signal.h> 53 #include <sys/user.h> 54 #include <sys/disp.h> 55 #include <sys/class.h> 56 #include <sys/ts.h> 57 #include <sys/bitmap.h> 58 #include <sys/poll.h> 59 #include <sys/shm_impl.h> 60 #include <sys/fault.h> 61 #include <sys/syscall.h> 62 #include <sys/procfs.h> 63 #include <sys/processor.h> 64 #include <sys/cpuvar.h> 65 #include <sys/copyops.h> 66 #include <sys/time.h> 67 #include <sys/msacct.h> 68 #include <vm/as.h> 69 #include <vm/rm.h> 70 #include <vm/seg.h> 71 #include <vm/seg_vn.h> 72 #include <vm/seg_dev.h> 73 #include <vm/seg_spt.h> 74 #include <vm/page.h> 75 #include <sys/vmparam.h> 76 #include <sys/swap.h> 77 #include <fs/proc/prdata.h> 78 #include <sys/task.h> 79 #include <sys/project.h> 80 #include <sys/contract_impl.h> 81 #include <sys/contract/process.h> 82 #include <sys/contract/process_impl.h> 83 #include <sys/schedctl.h> 84 #include <sys/pool.h> 85 #include <sys/zone.h> 86 #include <sys/atomic.h> 87 #include <sys/sdt.h> 88 89 #define MAX_ITERS_SPIN 5 90 91 typedef struct prpagev { 92 uint_t *pg_protv; /* vector of page permissions */ 93 char *pg_incore; /* vector of incore flags */ 94 size_t pg_npages; /* number of pages in protv and incore */ 95 ulong_t pg_pnbase; /* pn within segment of first protv element */ 96 } prpagev_t; 97 98 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */ 99 100 extern struct seg_ops segdev_ops; /* needs a header file */ 101 extern struct seg_ops segspt_shmops; /* needs a header file */ 102 103 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t); 104 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t); 105 106 /* 107 * Choose an lwp from the complete set of lwps for the process. 108 * This is called for any operation applied to the process 109 * file descriptor that requires an lwp to operate upon. 110 * 111 * Returns a pointer to the thread for the selected LWP, 112 * and with the dispatcher lock held for the thread. 113 * 114 * The algorithm for choosing an lwp is critical for /proc semantics; 115 * don't touch this code unless you know all of the implications. 116 */ 117 kthread_t * 118 prchoose(proc_t *p) 119 { 120 kthread_t *t; 121 kthread_t *t_onproc = NULL; /* running on processor */ 122 kthread_t *t_run = NULL; /* runnable, on disp queue */ 123 kthread_t *t_sleep = NULL; /* sleeping */ 124 kthread_t *t_hold = NULL; /* sleeping, performing hold */ 125 kthread_t *t_susp = NULL; /* suspended stop */ 126 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */ 127 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */ 128 kthread_t *t_req = NULL; /* requested stop */ 129 kthread_t *t_istop = NULL; /* event-of-interest stop */ 130 kthread_t *t_dtrace = NULL; /* DTrace stop */ 131 132 ASSERT(MUTEX_HELD(&p->p_lock)); 133 134 /* 135 * If the agent lwp exists, it takes precedence over all others. 136 */ 137 if ((t = p->p_agenttp) != NULL) { 138 thread_lock(t); 139 return (t); 140 } 141 142 if ((t = p->p_tlist) == NULL) /* start at the head of the list */ 143 return (t); 144 do { /* for eacn lwp in the process */ 145 if (VSTOPPED(t)) { /* virtually stopped */ 146 if (t_req == NULL) 147 t_req = t; 148 continue; 149 } 150 151 thread_lock(t); /* make sure thread is in good state */ 152 switch (t->t_state) { 153 default: 154 panic("prchoose: bad thread state %d, thread 0x%p", 155 t->t_state, (void *)t); 156 /*NOTREACHED*/ 157 case TS_SLEEP: 158 /* this is filthy */ 159 if (t->t_wchan == (caddr_t)&p->p_holdlwps && 160 t->t_wchan0 == NULL) { 161 if (t_hold == NULL) 162 t_hold = t; 163 } else { 164 if (t_sleep == NULL) 165 t_sleep = t; 166 } 167 break; 168 case TS_RUN: 169 case TS_WAIT: 170 if (t_run == NULL) 171 t_run = t; 172 break; 173 case TS_ONPROC: 174 if (t_onproc == NULL) 175 t_onproc = t; 176 break; 177 case TS_ZOMB: /* last possible choice */ 178 break; 179 case TS_STOPPED: 180 switch (t->t_whystop) { 181 case PR_SUSPENDED: 182 if (t_susp == NULL) 183 t_susp = t; 184 break; 185 case PR_JOBCONTROL: 186 if (t->t_proc_flag & TP_PRSTOP) { 187 if (t_jdstop == NULL) 188 t_jdstop = t; 189 } else { 190 if (t_jstop == NULL) 191 t_jstop = t; 192 } 193 break; 194 case PR_REQUESTED: 195 if (t->t_dtrace_stop && t_dtrace == NULL) 196 t_dtrace = t; 197 else if (t_req == NULL) 198 t_req = t; 199 break; 200 case PR_SYSENTRY: 201 case PR_SYSEXIT: 202 case PR_SIGNALLED: 203 case PR_FAULTED: 204 /* 205 * Make an lwp calling exit() be the 206 * last lwp seen in the process. 207 */ 208 if (t_istop == NULL || 209 (t_istop->t_whystop == PR_SYSENTRY && 210 t_istop->t_whatstop == SYS_exit)) 211 t_istop = t; 212 break; 213 case PR_CHECKPOINT: /* can't happen? */ 214 break; 215 default: 216 panic("prchoose: bad t_whystop %d, thread 0x%p", 217 t->t_whystop, (void *)t); 218 /*NOTREACHED*/ 219 } 220 break; 221 } 222 thread_unlock(t); 223 } while ((t = t->t_forw) != p->p_tlist); 224 225 if (t_onproc) 226 t = t_onproc; 227 else if (t_run) 228 t = t_run; 229 else if (t_sleep) 230 t = t_sleep; 231 else if (t_jstop) 232 t = t_jstop; 233 else if (t_jdstop) 234 t = t_jdstop; 235 else if (t_istop) 236 t = t_istop; 237 else if (t_dtrace) 238 t = t_dtrace; 239 else if (t_req) 240 t = t_req; 241 else if (t_hold) 242 t = t_hold; 243 else if (t_susp) 244 t = t_susp; 245 else /* TS_ZOMB */ 246 t = p->p_tlist; 247 248 if (t != NULL) 249 thread_lock(t); 250 return (t); 251 } 252 253 /* 254 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop. 255 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI 256 * on the /proc file descriptor. Called from stop() when a traced 257 * process stops on an event of interest. Also called from exit() 258 * and prinvalidate() to indicate POLLHUP and POLLERR respectively. 259 */ 260 void 261 prnotify(struct vnode *vp) 262 { 263 prcommon_t *pcp = VTOP(vp)->pr_common; 264 265 mutex_enter(&pcp->prc_mutex); 266 cv_broadcast(&pcp->prc_wait); 267 mutex_exit(&pcp->prc_mutex); 268 if (pcp->prc_flags & PRC_POLL) { 269 /* 270 * We call pollwakeup() with POLLHUP to ensure that 271 * the pollers are awakened even if they are polling 272 * for nothing (i.e., waiting for the process to exit). 273 * This enables the use of the PRC_POLL flag for optimization 274 * (we can turn off PRC_POLL only if we know no pollers remain). 275 */ 276 pcp->prc_flags &= ~PRC_POLL; 277 pollwakeup(&pcp->prc_pollhead, POLLHUP); 278 } 279 } 280 281 /* called immediately below, in prfree() */ 282 static void 283 prfreenotify(vnode_t *vp) 284 { 285 prnode_t *pnp; 286 prcommon_t *pcp; 287 288 while (vp != NULL) { 289 pnp = VTOP(vp); 290 pcp = pnp->pr_common; 291 ASSERT(pcp->prc_thread == NULL); 292 pcp->prc_proc = NULL; 293 /* 294 * We can't call prnotify() here because we are holding 295 * pidlock. We assert that there is no need to. 296 */ 297 mutex_enter(&pcp->prc_mutex); 298 cv_broadcast(&pcp->prc_wait); 299 mutex_exit(&pcp->prc_mutex); 300 ASSERT(!(pcp->prc_flags & PRC_POLL)); 301 302 vp = pnp->pr_next; 303 pnp->pr_next = NULL; 304 } 305 } 306 307 /* 308 * Called from a hook in freeproc() when a traced process is removed 309 * from the process table. The proc-table pointers of all associated 310 * /proc vnodes are cleared to indicate that the process has gone away. 311 */ 312 void 313 prfree(proc_t *p) 314 { 315 uint_t slot = p->p_slot; 316 317 ASSERT(MUTEX_HELD(&pidlock)); 318 319 /* 320 * Block the process against /proc so it can be freed. 321 * It cannot be freed while locked by some controlling process. 322 * Lock ordering: 323 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex 324 */ 325 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */ 326 mutex_enter(&p->p_lock); 327 while (p->p_proc_flag & P_PR_LOCK) { 328 mutex_exit(&pr_pidlock); 329 cv_wait(&pr_pid_cv[slot], &p->p_lock); 330 mutex_exit(&p->p_lock); 331 mutex_enter(&pr_pidlock); 332 mutex_enter(&p->p_lock); 333 } 334 335 ASSERT(p->p_tlist == NULL); 336 337 prfreenotify(p->p_plist); 338 p->p_plist = NULL; 339 340 prfreenotify(p->p_trace); 341 p->p_trace = NULL; 342 343 /* 344 * We broadcast to wake up everyone waiting for this process. 345 * No one can reach this process from this point on. 346 */ 347 cv_broadcast(&pr_pid_cv[slot]); 348 349 mutex_exit(&p->p_lock); 350 mutex_exit(&pr_pidlock); 351 } 352 353 /* 354 * Called from a hook in exit() when a traced process is becoming a zombie. 355 */ 356 void 357 prexit(proc_t *p) 358 { 359 ASSERT(MUTEX_HELD(&p->p_lock)); 360 361 if (pr_watch_active(p)) { 362 pr_free_watchpoints(p); 363 watch_disable(curthread); 364 } 365 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */ 366 if (p->p_trace) { 367 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY; 368 prnotify(p->p_trace); 369 } 370 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */ 371 } 372 373 /* 374 * Called when a thread calls lwp_exit(). 375 */ 376 void 377 prlwpexit(kthread_t *t) 378 { 379 vnode_t *vp; 380 prnode_t *pnp; 381 prcommon_t *pcp; 382 proc_t *p = ttoproc(t); 383 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry; 384 385 ASSERT(t == curthread); 386 ASSERT(MUTEX_HELD(&p->p_lock)); 387 388 /* 389 * The process must be blocked against /proc to do this safely. 390 * The lwp must not disappear while the process is marked P_PR_LOCK. 391 * It is the caller's responsibility to have called prbarrier(p). 392 */ 393 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 394 395 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 396 pnp = VTOP(vp); 397 pcp = pnp->pr_common; 398 if (pcp->prc_thread == t) { 399 pcp->prc_thread = NULL; 400 pcp->prc_flags |= PRC_DESTROY; 401 } 402 } 403 404 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) { 405 pnp = VTOP(vp); 406 pcp = pnp->pr_common; 407 pcp->prc_thread = NULL; 408 pcp->prc_flags |= PRC_DESTROY; 409 prnotify(vp); 410 } 411 412 if (p->p_trace) 413 prnotify(p->p_trace); 414 } 415 416 /* 417 * Called when a zombie thread is joined or when a 418 * detached lwp exits. Called from lwp_hash_out(). 419 */ 420 void 421 prlwpfree(proc_t *p, lwpent_t *lep) 422 { 423 vnode_t *vp; 424 prnode_t *pnp; 425 prcommon_t *pcp; 426 427 ASSERT(MUTEX_HELD(&p->p_lock)); 428 429 /* 430 * The process must be blocked against /proc to do this safely. 431 * The lwp must not disappear while the process is marked P_PR_LOCK. 432 * It is the caller's responsibility to have called prbarrier(p). 433 */ 434 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 435 436 vp = lep->le_trace; 437 lep->le_trace = NULL; 438 while (vp) { 439 prnotify(vp); 440 pnp = VTOP(vp); 441 pcp = pnp->pr_common; 442 ASSERT(pcp->prc_thread == NULL && 443 (pcp->prc_flags & PRC_DESTROY)); 444 pcp->prc_tslot = -1; 445 vp = pnp->pr_next; 446 pnp->pr_next = NULL; 447 } 448 449 if (p->p_trace) 450 prnotify(p->p_trace); 451 } 452 453 /* 454 * Called from a hook in exec() when a thread starts exec(). 455 */ 456 void 457 prexecstart(void) 458 { 459 proc_t *p = ttoproc(curthread); 460 klwp_t *lwp = ttolwp(curthread); 461 462 /* 463 * The P_PR_EXEC flag blocks /proc operations for 464 * the duration of the exec(). 465 * We can't start exec() while the process is 466 * locked by /proc, so we call prbarrier(). 467 * lwp_nostop keeps the process from being stopped 468 * via job control for the duration of the exec(). 469 */ 470 471 ASSERT(MUTEX_HELD(&p->p_lock)); 472 prbarrier(p); 473 lwp->lwp_nostop++; 474 p->p_proc_flag |= P_PR_EXEC; 475 } 476 477 /* 478 * Called from a hook in exec() when a thread finishes exec(). 479 * The thread may or may not have succeeded. Some other thread 480 * may have beat it to the punch. 481 */ 482 void 483 prexecend(void) 484 { 485 proc_t *p = ttoproc(curthread); 486 klwp_t *lwp = ttolwp(curthread); 487 vnode_t *vp; 488 prnode_t *pnp; 489 prcommon_t *pcp; 490 model_t model = p->p_model; 491 id_t tid = curthread->t_tid; 492 int tslot = curthread->t_dslot; 493 494 ASSERT(MUTEX_HELD(&p->p_lock)); 495 496 lwp->lwp_nostop--; 497 if (p->p_flag & SEXITLWPS) { 498 /* 499 * We are on our way to exiting because some 500 * other thread beat us in the race to exec(). 501 * Don't clear the P_PR_EXEC flag in this case. 502 */ 503 return; 504 } 505 506 /* 507 * Wake up anyone waiting in /proc for the process to complete exec(). 508 */ 509 p->p_proc_flag &= ~P_PR_EXEC; 510 if ((vp = p->p_trace) != NULL) { 511 pcp = VTOP(vp)->pr_common; 512 mutex_enter(&pcp->prc_mutex); 513 cv_broadcast(&pcp->prc_wait); 514 mutex_exit(&pcp->prc_mutex); 515 for (; vp != NULL; vp = pnp->pr_next) { 516 pnp = VTOP(vp); 517 pnp->pr_common->prc_datamodel = model; 518 } 519 } 520 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) { 521 /* 522 * We dealt with the process common above. 523 */ 524 ASSERT(p->p_trace != NULL); 525 pcp = VTOP(vp)->pr_common; 526 mutex_enter(&pcp->prc_mutex); 527 cv_broadcast(&pcp->prc_wait); 528 mutex_exit(&pcp->prc_mutex); 529 for (; vp != NULL; vp = pnp->pr_next) { 530 pnp = VTOP(vp); 531 pcp = pnp->pr_common; 532 pcp->prc_datamodel = model; 533 pcp->prc_tid = tid; 534 pcp->prc_tslot = tslot; 535 } 536 } 537 } 538 539 /* 540 * Called from a hook in relvm() just before freeing the address space. 541 * We free all the watched areas now. 542 */ 543 void 544 prrelvm(void) 545 { 546 proc_t *p = ttoproc(curthread); 547 548 mutex_enter(&p->p_lock); 549 prbarrier(p); /* block all other /proc operations */ 550 if (pr_watch_active(p)) { 551 pr_free_watchpoints(p); 552 watch_disable(curthread); 553 } 554 mutex_exit(&p->p_lock); 555 pr_free_watched_pages(p); 556 } 557 558 /* 559 * Called from hooks in exec-related code when a traced process 560 * attempts to exec(2) a setuid/setgid program or an unreadable 561 * file. Rather than fail the exec we invalidate the associated 562 * /proc vnodes so that subsequent attempts to use them will fail. 563 * 564 * All /proc vnodes, except directory vnodes, are retained on a linked 565 * list (rooted at p_plist in the process structure) until last close. 566 * 567 * A controlling process must re-open the /proc files in order to 568 * regain control. 569 */ 570 void 571 prinvalidate(struct user *up) 572 { 573 kthread_t *t = curthread; 574 proc_t *p = ttoproc(t); 575 vnode_t *vp; 576 prnode_t *pnp; 577 int writers = 0; 578 579 mutex_enter(&p->p_lock); 580 prbarrier(p); /* block all other /proc operations */ 581 582 /* 583 * At this moment, there can be only one lwp in the process. 584 */ 585 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 586 587 /* 588 * Invalidate any currently active /proc vnodes. 589 */ 590 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 591 pnp = VTOP(vp); 592 switch (pnp->pr_type) { 593 case PR_PSINFO: /* these files can read by anyone */ 594 case PR_LPSINFO: 595 case PR_LWPSINFO: 596 case PR_LWPDIR: 597 case PR_LWPIDDIR: 598 case PR_USAGE: 599 case PR_LUSAGE: 600 case PR_LWPUSAGE: 601 break; 602 default: 603 pnp->pr_flags |= PR_INVAL; 604 break; 605 } 606 } 607 /* 608 * Wake up anyone waiting for the process or lwp. 609 * p->p_trace is guaranteed to be non-NULL if there 610 * are any open /proc files for this process. 611 */ 612 if ((vp = p->p_trace) != NULL) { 613 prcommon_t *pcp = VTOP(vp)->pr_pcommon; 614 615 prnotify(vp); 616 /* 617 * Are there any writers? 618 */ 619 if ((writers = pcp->prc_writers) != 0) { 620 /* 621 * Clear the exclusive open flag (old /proc interface). 622 * Set prc_selfopens equal to prc_writers so that 623 * the next O_EXCL|O_WRITE open will succeed 624 * even with existing (though invalid) writers. 625 * prclose() must decrement prc_selfopens when 626 * the invalid files are closed. 627 */ 628 pcp->prc_flags &= ~PRC_EXCL; 629 ASSERT(pcp->prc_selfopens <= writers); 630 pcp->prc_selfopens = writers; 631 } 632 } 633 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace; 634 while (vp != NULL) { 635 /* 636 * We should not invalidate the lwpiddir vnodes, 637 * but the necessities of maintaining the old 638 * ioctl()-based version of /proc require it. 639 */ 640 pnp = VTOP(vp); 641 pnp->pr_flags |= PR_INVAL; 642 prnotify(vp); 643 vp = pnp->pr_next; 644 } 645 646 /* 647 * If any tracing flags are in effect and any vnodes are open for 648 * writing then set the requested-stop and run-on-last-close flags. 649 * Otherwise, clear all tracing flags. 650 */ 651 t->t_proc_flag &= ~TP_PAUSE; 652 if ((p->p_proc_flag & P_PR_TRACE) && writers) { 653 t->t_proc_flag |= TP_PRSTOP; 654 aston(t); /* so ISSIG will see the flag */ 655 p->p_proc_flag |= P_PR_RUNLCL; 656 } else { 657 premptyset(&up->u_entrymask); /* syscalls */ 658 premptyset(&up->u_exitmask); 659 up->u_systrap = 0; 660 premptyset(&p->p_sigmask); /* signals */ 661 premptyset(&p->p_fltmask); /* faults */ 662 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING); 663 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE); 664 prnostep(ttolwp(t)); 665 } 666 667 mutex_exit(&p->p_lock); 668 } 669 670 /* 671 * Acquire the controlled process's p_lock and mark it P_PR_LOCK. 672 * Return with pr_pidlock held in all cases. 673 * Return with p_lock held if the the process still exists. 674 * Return value is the process pointer if the process still exists, else NULL. 675 * If we lock the process, give ourself kernel priority to avoid deadlocks; 676 * this is undone in prunlock(). 677 */ 678 proc_t * 679 pr_p_lock(prnode_t *pnp) 680 { 681 proc_t *p; 682 prcommon_t *pcp; 683 684 mutex_enter(&pr_pidlock); 685 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL) 686 return (NULL); 687 mutex_enter(&p->p_lock); 688 while (p->p_proc_flag & P_PR_LOCK) { 689 /* 690 * This cv/mutex pair is persistent even if 691 * the process disappears while we sleep. 692 */ 693 kcondvar_t *cv = &pr_pid_cv[p->p_slot]; 694 kmutex_t *mp = &p->p_lock; 695 696 mutex_exit(&pr_pidlock); 697 cv_wait(cv, mp); 698 mutex_exit(mp); 699 mutex_enter(&pr_pidlock); 700 if (pcp->prc_proc == NULL) 701 return (NULL); 702 ASSERT(p == pcp->prc_proc); 703 mutex_enter(&p->p_lock); 704 } 705 p->p_proc_flag |= P_PR_LOCK; 706 THREAD_KPRI_REQUEST(); 707 return (p); 708 } 709 710 /* 711 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock. 712 * This prevents any lwp of the process from disappearing and 713 * blocks most operations that a process can perform on itself. 714 * Returns 0 on success, a non-zero error number on failure. 715 * 716 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when 717 * the subject process is a zombie (ZYES) or fail for zombies (ZNO). 718 * 719 * error returns: 720 * ENOENT: process or lwp has disappeared or process is exiting 721 * (or has become a zombie and zdisp == ZNO). 722 * EAGAIN: procfs vnode has become invalid. 723 * EINTR: signal arrived while waiting for exec to complete. 724 */ 725 int 726 prlock(prnode_t *pnp, int zdisp) 727 { 728 prcommon_t *pcp; 729 proc_t *p; 730 731 again: 732 pcp = pnp->pr_common; 733 p = pr_p_lock(pnp); 734 mutex_exit(&pr_pidlock); 735 736 /* 737 * Return ENOENT immediately if there is no process. 738 */ 739 if (p == NULL) 740 return (ENOENT); 741 742 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL); 743 744 /* 745 * Return ENOENT if process entered zombie state or is exiting 746 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies. 747 */ 748 if (zdisp == ZNO && 749 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) { 750 prunlock(pnp); 751 return (ENOENT); 752 } 753 754 /* 755 * If lwp-specific, check to see if lwp has disappeared. 756 */ 757 if (pcp->prc_flags & PRC_LWP) { 758 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) || 759 pcp->prc_tslot == -1) { 760 prunlock(pnp); 761 return (ENOENT); 762 } 763 } 764 765 /* 766 * Return EAGAIN if we have encountered a security violation. 767 * (The process exec'd a set-id or unreadable executable file.) 768 */ 769 if (pnp->pr_flags & PR_INVAL) { 770 prunlock(pnp); 771 return (EAGAIN); 772 } 773 774 /* 775 * If process is undergoing an exec(), wait for 776 * completion and then start all over again. 777 */ 778 if (p->p_proc_flag & P_PR_EXEC) { 779 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */ 780 mutex_enter(&pcp->prc_mutex); 781 prunlock(pnp); 782 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) { 783 mutex_exit(&pcp->prc_mutex); 784 return (EINTR); 785 } 786 mutex_exit(&pcp->prc_mutex); 787 goto again; 788 } 789 790 /* 791 * We return holding p->p_lock. 792 */ 793 return (0); 794 } 795 796 /* 797 * Undo prlock() and pr_p_lock(). 798 * p->p_lock is still held; pr_pidlock is no longer held. 799 * 800 * prunmark() drops the P_PR_LOCK flag and wakes up another thread, 801 * if any, waiting for the flag to be dropped; it retains p->p_lock. 802 * 803 * prunlock() calls prunmark() and then drops p->p_lock. 804 */ 805 void 806 prunmark(proc_t *p) 807 { 808 ASSERT(p->p_proc_flag & P_PR_LOCK); 809 ASSERT(MUTEX_HELD(&p->p_lock)); 810 811 cv_signal(&pr_pid_cv[p->p_slot]); 812 p->p_proc_flag &= ~P_PR_LOCK; 813 THREAD_KPRI_RELEASE(); 814 } 815 816 void 817 prunlock(prnode_t *pnp) 818 { 819 prcommon_t *pcp = pnp->pr_common; 820 proc_t *p = pcp->prc_proc; 821 822 /* 823 * If we (or someone) gave it a SIGKILL, and it is not 824 * already a zombie, set it running unconditionally. 825 */ 826 if ((p->p_flag & SKILLED) && 827 !(p->p_flag & SEXITING) && 828 !(pcp->prc_flags & PRC_DESTROY) && 829 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) 830 (void) pr_setrun(pnp, 0); 831 prunmark(p); 832 mutex_exit(&p->p_lock); 833 } 834 835 /* 836 * Called while holding p->p_lock to delay until the process is unlocked. 837 * We enter holding p->p_lock; p->p_lock is dropped and reacquired. 838 * The process cannot become locked again until p->p_lock is dropped. 839 */ 840 void 841 prbarrier(proc_t *p) 842 { 843 ASSERT(MUTEX_HELD(&p->p_lock)); 844 845 if (p->p_proc_flag & P_PR_LOCK) { 846 /* The process is locked; delay until not locked */ 847 uint_t slot = p->p_slot; 848 849 while (p->p_proc_flag & P_PR_LOCK) 850 cv_wait(&pr_pid_cv[slot], &p->p_lock); 851 cv_signal(&pr_pid_cv[slot]); 852 } 853 } 854 855 /* 856 * Return process/lwp status. 857 * The u-block is mapped in by this routine and unmapped at the end. 858 */ 859 void 860 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp) 861 { 862 kthread_t *t; 863 864 ASSERT(MUTEX_HELD(&p->p_lock)); 865 866 t = prchoose(p); /* returns locked thread */ 867 ASSERT(t != NULL); 868 thread_unlock(t); 869 870 /* just bzero the process part, prgetlwpstatus() does the rest */ 871 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t)); 872 sp->pr_nlwp = p->p_lwpcnt; 873 sp->pr_nzomb = p->p_zombcnt; 874 prassignset(&sp->pr_sigpend, &p->p_sig); 875 sp->pr_brkbase = (uintptr_t)p->p_brkbase; 876 sp->pr_brksize = p->p_brksize; 877 sp->pr_stkbase = (uintptr_t)prgetstackbase(p); 878 sp->pr_stksize = p->p_stksize; 879 sp->pr_pid = p->p_pid; 880 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 881 (p->p_flag & SZONETOP)) { 882 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 883 /* 884 * Inside local zones, fake zsched's pid as parent pids for 885 * processes which reference processes outside of the zone. 886 */ 887 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 888 } else { 889 sp->pr_ppid = p->p_ppid; 890 } 891 sp->pr_pgid = p->p_pgrp; 892 sp->pr_sid = p->p_sessp->s_sid; 893 sp->pr_taskid = p->p_task->tk_tkid; 894 sp->pr_projid = p->p_task->tk_proj->kpj_id; 895 sp->pr_zoneid = p->p_zone->zone_id; 896 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 897 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 898 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime); 899 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime); 900 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 901 prassignset(&sp->pr_flttrace, &p->p_fltmask); 902 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 903 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 904 switch (p->p_model) { 905 case DATAMODEL_ILP32: 906 sp->pr_dmodel = PR_MODEL_ILP32; 907 break; 908 case DATAMODEL_LP64: 909 sp->pr_dmodel = PR_MODEL_LP64; 910 break; 911 } 912 if (p->p_agenttp) 913 sp->pr_agentid = p->p_agenttp->t_tid; 914 915 /* get the chosen lwp's status */ 916 prgetlwpstatus(t, &sp->pr_lwp, zp); 917 918 /* replicate the flags */ 919 sp->pr_flags = sp->pr_lwp.pr_flags; 920 } 921 922 #ifdef _SYSCALL32_IMPL 923 void 924 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp) 925 { 926 proc_t *p = ttoproc(t); 927 klwp_t *lwp = ttolwp(t); 928 struct mstate *ms = &lwp->lwp_mstate; 929 hrtime_t usr, sys; 930 int flags; 931 ulong_t instr; 932 933 ASSERT(MUTEX_HELD(&p->p_lock)); 934 935 bzero(sp, sizeof (*sp)); 936 flags = 0L; 937 if (t->t_state == TS_STOPPED) { 938 flags |= PR_STOPPED; 939 if ((t->t_schedflag & TS_PSTART) == 0) 940 flags |= PR_ISTOP; 941 } else if (VSTOPPED(t)) { 942 flags |= PR_STOPPED|PR_ISTOP; 943 } 944 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 945 flags |= PR_DSTOP; 946 if (lwp->lwp_asleep) 947 flags |= PR_ASLEEP; 948 if (t == p->p_agenttp) 949 flags |= PR_AGENT; 950 if (!(t->t_proc_flag & TP_TWAIT)) 951 flags |= PR_DETACH; 952 if (t->t_proc_flag & TP_DAEMON) 953 flags |= PR_DAEMON; 954 if (p->p_proc_flag & P_PR_FORK) 955 flags |= PR_FORK; 956 if (p->p_proc_flag & P_PR_RUNLCL) 957 flags |= PR_RLC; 958 if (p->p_proc_flag & P_PR_KILLCL) 959 flags |= PR_KLC; 960 if (p->p_proc_flag & P_PR_ASYNC) 961 flags |= PR_ASYNC; 962 if (p->p_proc_flag & P_PR_BPTADJ) 963 flags |= PR_BPTADJ; 964 if (p->p_proc_flag & P_PR_PTRACE) 965 flags |= PR_PTRACE; 966 if (p->p_flag & SMSACCT) 967 flags |= PR_MSACCT; 968 if (p->p_flag & SMSFORK) 969 flags |= PR_MSFORK; 970 if (p->p_flag & SVFWAIT) 971 flags |= PR_VFORKP; 972 sp->pr_flags = flags; 973 if (VSTOPPED(t)) { 974 sp->pr_why = PR_REQUESTED; 975 sp->pr_what = 0; 976 } else { 977 sp->pr_why = t->t_whystop; 978 sp->pr_what = t->t_whatstop; 979 } 980 sp->pr_lwpid = t->t_tid; 981 sp->pr_cursig = lwp->lwp_cursig; 982 prassignset(&sp->pr_lwppend, &t->t_sig); 983 schedctl_finish_sigblock(t); 984 prassignset(&sp->pr_lwphold, &t->t_hold); 985 if (t->t_whystop == PR_FAULTED) { 986 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info); 987 if (t->t_whatstop == FLTPAGE) 988 sp->pr_info.si_addr = 989 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr; 990 } else if (lwp->lwp_curinfo) 991 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info); 992 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 993 sp->pr_info.si_zoneid != zp->zone_id) { 994 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 995 sp->pr_info.si_uid = 0; 996 sp->pr_info.si_ctid = -1; 997 sp->pr_info.si_zoneid = zp->zone_id; 998 } 999 sp->pr_altstack.ss_sp = 1000 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; 1001 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size; 1002 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags; 1003 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1004 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext; 1005 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack; 1006 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1007 sizeof (sp->pr_clname) - 1); 1008 if (flags & PR_STOPPED) 1009 hrt2ts32(t->t_stoptime, &sp->pr_tstamp); 1010 usr = ms->ms_acct[LMS_USER]; 1011 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1012 scalehrtime(&usr); 1013 scalehrtime(&sys); 1014 hrt2ts32(usr, &sp->pr_utime); 1015 hrt2ts32(sys, &sp->pr_stime); 1016 1017 /* 1018 * Fetch the current instruction, if not a system process. 1019 * We don't attempt this unless the lwp is stopped. 1020 */ 1021 if ((p->p_flag & SSYS) || p->p_as == &kas) 1022 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1023 else if (!(flags & PR_STOPPED)) 1024 sp->pr_flags |= PR_PCINVAL; 1025 else if (!prfetchinstr(lwp, &instr)) 1026 sp->pr_flags |= PR_PCINVAL; 1027 else 1028 sp->pr_instr = (uint32_t)instr; 1029 1030 /* 1031 * Drop p_lock while touching the lwp's stack. 1032 */ 1033 mutex_exit(&p->p_lock); 1034 if (prisstep(lwp)) 1035 sp->pr_flags |= PR_STEP; 1036 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1037 int i; 1038 1039 sp->pr_syscall = get_syscall32_args(lwp, 1040 (int *)sp->pr_sysarg, &i); 1041 sp->pr_nsysarg = (ushort_t)i; 1042 } 1043 if ((flags & PR_STOPPED) || t == curthread) 1044 prgetprregs32(lwp, sp->pr_reg); 1045 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1046 (flags & PR_VFORKP)) { 1047 long r1, r2; 1048 user_t *up; 1049 auxv_t *auxp; 1050 int i; 1051 1052 sp->pr_errno = prgetrvals(lwp, &r1, &r2); 1053 if (sp->pr_errno == 0) { 1054 sp->pr_rval1 = (int32_t)r1; 1055 sp->pr_rval2 = (int32_t)r2; 1056 sp->pr_errpriv = PRIV_NONE; 1057 } else 1058 sp->pr_errpriv = lwp->lwp_badpriv; 1059 1060 if (t->t_sysnum == SYS_execve) { 1061 up = PTOU(p); 1062 sp->pr_sysarg[0] = 0; 1063 sp->pr_sysarg[1] = (caddr32_t)up->u_argv; 1064 sp->pr_sysarg[2] = (caddr32_t)up->u_envp; 1065 for (i = 0, auxp = up->u_auxv; 1066 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1067 i++, auxp++) { 1068 if (auxp->a_type == AT_SUN_EXECNAME) { 1069 sp->pr_sysarg[0] = 1070 (caddr32_t) 1071 (uintptr_t)auxp->a_un.a_ptr; 1072 break; 1073 } 1074 } 1075 } 1076 } 1077 if (prhasfp()) 1078 prgetprfpregs32(lwp, &sp->pr_fpreg); 1079 mutex_enter(&p->p_lock); 1080 } 1081 1082 void 1083 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp) 1084 { 1085 kthread_t *t; 1086 1087 ASSERT(MUTEX_HELD(&p->p_lock)); 1088 1089 t = prchoose(p); /* returns locked thread */ 1090 ASSERT(t != NULL); 1091 thread_unlock(t); 1092 1093 /* just bzero the process part, prgetlwpstatus32() does the rest */ 1094 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t)); 1095 sp->pr_nlwp = p->p_lwpcnt; 1096 sp->pr_nzomb = p->p_zombcnt; 1097 prassignset(&sp->pr_sigpend, &p->p_sig); 1098 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase; 1099 sp->pr_brksize = (uint32_t)p->p_brksize; 1100 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p); 1101 sp->pr_stksize = (uint32_t)p->p_stksize; 1102 sp->pr_pid = p->p_pid; 1103 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 1104 (p->p_flag & SZONETOP)) { 1105 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 1106 /* 1107 * Inside local zones, fake zsched's pid as parent pids for 1108 * processes which reference processes outside of the zone. 1109 */ 1110 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 1111 } else { 1112 sp->pr_ppid = p->p_ppid; 1113 } 1114 sp->pr_pgid = p->p_pgrp; 1115 sp->pr_sid = p->p_sessp->s_sid; 1116 sp->pr_taskid = p->p_task->tk_tkid; 1117 sp->pr_projid = p->p_task->tk_proj->kpj_id; 1118 sp->pr_zoneid = p->p_zone->zone_id; 1119 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 1120 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 1121 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime); 1122 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime); 1123 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 1124 prassignset(&sp->pr_flttrace, &p->p_fltmask); 1125 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 1126 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 1127 switch (p->p_model) { 1128 case DATAMODEL_ILP32: 1129 sp->pr_dmodel = PR_MODEL_ILP32; 1130 break; 1131 case DATAMODEL_LP64: 1132 sp->pr_dmodel = PR_MODEL_LP64; 1133 break; 1134 } 1135 if (p->p_agenttp) 1136 sp->pr_agentid = p->p_agenttp->t_tid; 1137 1138 /* get the chosen lwp's status */ 1139 prgetlwpstatus32(t, &sp->pr_lwp, zp); 1140 1141 /* replicate the flags */ 1142 sp->pr_flags = sp->pr_lwp.pr_flags; 1143 } 1144 #endif /* _SYSCALL32_IMPL */ 1145 1146 /* 1147 * Return lwp status. 1148 */ 1149 void 1150 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp) 1151 { 1152 proc_t *p = ttoproc(t); 1153 klwp_t *lwp = ttolwp(t); 1154 struct mstate *ms = &lwp->lwp_mstate; 1155 hrtime_t usr, sys; 1156 int flags; 1157 ulong_t instr; 1158 1159 ASSERT(MUTEX_HELD(&p->p_lock)); 1160 1161 bzero(sp, sizeof (*sp)); 1162 flags = 0L; 1163 if (t->t_state == TS_STOPPED) { 1164 flags |= PR_STOPPED; 1165 if ((t->t_schedflag & TS_PSTART) == 0) 1166 flags |= PR_ISTOP; 1167 } else if (VSTOPPED(t)) { 1168 flags |= PR_STOPPED|PR_ISTOP; 1169 } 1170 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 1171 flags |= PR_DSTOP; 1172 if (lwp->lwp_asleep) 1173 flags |= PR_ASLEEP; 1174 if (t == p->p_agenttp) 1175 flags |= PR_AGENT; 1176 if (!(t->t_proc_flag & TP_TWAIT)) 1177 flags |= PR_DETACH; 1178 if (t->t_proc_flag & TP_DAEMON) 1179 flags |= PR_DAEMON; 1180 if (p->p_proc_flag & P_PR_FORK) 1181 flags |= PR_FORK; 1182 if (p->p_proc_flag & P_PR_RUNLCL) 1183 flags |= PR_RLC; 1184 if (p->p_proc_flag & P_PR_KILLCL) 1185 flags |= PR_KLC; 1186 if (p->p_proc_flag & P_PR_ASYNC) 1187 flags |= PR_ASYNC; 1188 if (p->p_proc_flag & P_PR_BPTADJ) 1189 flags |= PR_BPTADJ; 1190 if (p->p_proc_flag & P_PR_PTRACE) 1191 flags |= PR_PTRACE; 1192 if (p->p_flag & SMSACCT) 1193 flags |= PR_MSACCT; 1194 if (p->p_flag & SMSFORK) 1195 flags |= PR_MSFORK; 1196 if (p->p_flag & SVFWAIT) 1197 flags |= PR_VFORKP; 1198 if (p->p_pgidp->pid_pgorphaned) 1199 flags |= PR_ORPHAN; 1200 if (p->p_pidflag & CLDNOSIGCHLD) 1201 flags |= PR_NOSIGCHLD; 1202 if (p->p_pidflag & CLDWAITPID) 1203 flags |= PR_WAITPID; 1204 sp->pr_flags = flags; 1205 if (VSTOPPED(t)) { 1206 sp->pr_why = PR_REQUESTED; 1207 sp->pr_what = 0; 1208 } else { 1209 sp->pr_why = t->t_whystop; 1210 sp->pr_what = t->t_whatstop; 1211 } 1212 sp->pr_lwpid = t->t_tid; 1213 sp->pr_cursig = lwp->lwp_cursig; 1214 prassignset(&sp->pr_lwppend, &t->t_sig); 1215 schedctl_finish_sigblock(t); 1216 prassignset(&sp->pr_lwphold, &t->t_hold); 1217 if (t->t_whystop == PR_FAULTED) 1218 bcopy(&lwp->lwp_siginfo, 1219 &sp->pr_info, sizeof (k_siginfo_t)); 1220 else if (lwp->lwp_curinfo) 1221 bcopy(&lwp->lwp_curinfo->sq_info, 1222 &sp->pr_info, sizeof (k_siginfo_t)); 1223 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1224 sp->pr_info.si_zoneid != zp->zone_id) { 1225 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1226 sp->pr_info.si_uid = 0; 1227 sp->pr_info.si_ctid = -1; 1228 sp->pr_info.si_zoneid = zp->zone_id; 1229 } 1230 sp->pr_altstack = lwp->lwp_sigaltstack; 1231 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1232 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext; 1233 sp->pr_ustack = lwp->lwp_ustack; 1234 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1235 sizeof (sp->pr_clname) - 1); 1236 if (flags & PR_STOPPED) 1237 hrt2ts(t->t_stoptime, &sp->pr_tstamp); 1238 usr = ms->ms_acct[LMS_USER]; 1239 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1240 scalehrtime(&usr); 1241 scalehrtime(&sys); 1242 hrt2ts(usr, &sp->pr_utime); 1243 hrt2ts(sys, &sp->pr_stime); 1244 1245 /* 1246 * Fetch the current instruction, if not a system process. 1247 * We don't attempt this unless the lwp is stopped. 1248 */ 1249 if ((p->p_flag & SSYS) || p->p_as == &kas) 1250 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1251 else if (!(flags & PR_STOPPED)) 1252 sp->pr_flags |= PR_PCINVAL; 1253 else if (!prfetchinstr(lwp, &instr)) 1254 sp->pr_flags |= PR_PCINVAL; 1255 else 1256 sp->pr_instr = instr; 1257 1258 /* 1259 * Drop p_lock while touching the lwp's stack. 1260 */ 1261 mutex_exit(&p->p_lock); 1262 if (prisstep(lwp)) 1263 sp->pr_flags |= PR_STEP; 1264 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1265 int i; 1266 1267 sp->pr_syscall = get_syscall_args(lwp, 1268 (long *)sp->pr_sysarg, &i); 1269 sp->pr_nsysarg = (ushort_t)i; 1270 } 1271 if ((flags & PR_STOPPED) || t == curthread) 1272 prgetprregs(lwp, sp->pr_reg); 1273 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1274 (flags & PR_VFORKP)) { 1275 user_t *up; 1276 auxv_t *auxp; 1277 int i; 1278 1279 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2); 1280 if (sp->pr_errno == 0) 1281 sp->pr_errpriv = PRIV_NONE; 1282 else 1283 sp->pr_errpriv = lwp->lwp_badpriv; 1284 1285 if (t->t_sysnum == SYS_execve) { 1286 up = PTOU(p); 1287 sp->pr_sysarg[0] = 0; 1288 sp->pr_sysarg[1] = (uintptr_t)up->u_argv; 1289 sp->pr_sysarg[2] = (uintptr_t)up->u_envp; 1290 for (i = 0, auxp = up->u_auxv; 1291 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1292 i++, auxp++) { 1293 if (auxp->a_type == AT_SUN_EXECNAME) { 1294 sp->pr_sysarg[0] = 1295 (uintptr_t)auxp->a_un.a_ptr; 1296 break; 1297 } 1298 } 1299 } 1300 } 1301 if (prhasfp()) 1302 prgetprfpregs(lwp, &sp->pr_fpreg); 1303 mutex_enter(&p->p_lock); 1304 } 1305 1306 /* 1307 * Get the sigaction structure for the specified signal. The u-block 1308 * must already have been mapped in by the caller. 1309 */ 1310 void 1311 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp) 1312 { 1313 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1314 1315 bzero(sp, sizeof (*sp)); 1316 1317 if (sig != 0 && (unsigned)sig < nsig) { 1318 sp->sa_handler = up->u_signal[sig-1]; 1319 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1320 if (sigismember(&up->u_sigonstack, sig)) 1321 sp->sa_flags |= SA_ONSTACK; 1322 if (sigismember(&up->u_sigresethand, sig)) 1323 sp->sa_flags |= SA_RESETHAND; 1324 if (sigismember(&up->u_sigrestart, sig)) 1325 sp->sa_flags |= SA_RESTART; 1326 if (sigismember(&p->p_siginfo, sig)) 1327 sp->sa_flags |= SA_SIGINFO; 1328 if (sigismember(&up->u_signodefer, sig)) 1329 sp->sa_flags |= SA_NODEFER; 1330 if (sig == SIGCLD) { 1331 if (p->p_flag & SNOWAIT) 1332 sp->sa_flags |= SA_NOCLDWAIT; 1333 if ((p->p_flag & SJCTL) == 0) 1334 sp->sa_flags |= SA_NOCLDSTOP; 1335 } 1336 } 1337 } 1338 1339 #ifdef _SYSCALL32_IMPL 1340 void 1341 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp) 1342 { 1343 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; 1344 1345 bzero(sp, sizeof (*sp)); 1346 1347 if (sig != 0 && (unsigned)sig < nsig) { 1348 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1]; 1349 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1350 if (sigismember(&up->u_sigonstack, sig)) 1351 sp->sa_flags |= SA_ONSTACK; 1352 if (sigismember(&up->u_sigresethand, sig)) 1353 sp->sa_flags |= SA_RESETHAND; 1354 if (sigismember(&up->u_sigrestart, sig)) 1355 sp->sa_flags |= SA_RESTART; 1356 if (sigismember(&p->p_siginfo, sig)) 1357 sp->sa_flags |= SA_SIGINFO; 1358 if (sigismember(&up->u_signodefer, sig)) 1359 sp->sa_flags |= SA_NODEFER; 1360 if (sig == SIGCLD) { 1361 if (p->p_flag & SNOWAIT) 1362 sp->sa_flags |= SA_NOCLDWAIT; 1363 if ((p->p_flag & SJCTL) == 0) 1364 sp->sa_flags |= SA_NOCLDSTOP; 1365 } 1366 } 1367 } 1368 #endif /* _SYSCALL32_IMPL */ 1369 1370 /* 1371 * Count the number of segments in this process's address space. 1372 */ 1373 int 1374 prnsegs(struct as *as, int reserved) 1375 { 1376 int n = 0; 1377 struct seg *seg; 1378 1379 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1380 1381 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1382 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1383 caddr_t saddr, naddr; 1384 void *tmp = NULL; 1385 1386 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1387 (void) pr_getprot(seg, reserved, &tmp, 1388 &saddr, &naddr, eaddr); 1389 if (saddr != naddr) 1390 n++; 1391 } 1392 1393 ASSERT(tmp == NULL); 1394 } 1395 1396 return (n); 1397 } 1398 1399 /* 1400 * Convert uint32_t to decimal string w/o leading zeros. 1401 * Add trailing null characters if 'len' is greater than string length. 1402 * Return the string length. 1403 */ 1404 int 1405 pr_u32tos(uint32_t n, char *s, int len) 1406 { 1407 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */ 1408 char *cp = cbuf; 1409 char *end = s + len; 1410 1411 do { 1412 *cp++ = (char)(n % 10 + '0'); 1413 n /= 10; 1414 } while (n); 1415 1416 len = (int)(cp - cbuf); 1417 1418 do { 1419 *s++ = *--cp; 1420 } while (cp > cbuf); 1421 1422 while (s < end) /* optional pad */ 1423 *s++ = '\0'; 1424 1425 return (len); 1426 } 1427 1428 /* 1429 * Convert uint64_t to decimal string w/o leading zeros. 1430 * Return the string length. 1431 */ 1432 static int 1433 pr_u64tos(uint64_t n, char *s) 1434 { 1435 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */ 1436 char *cp = cbuf; 1437 int len; 1438 1439 do { 1440 *cp++ = (char)(n % 10 + '0'); 1441 n /= 10; 1442 } while (n); 1443 1444 len = (int)(cp - cbuf); 1445 1446 do { 1447 *s++ = *--cp; 1448 } while (cp > cbuf); 1449 1450 return (len); 1451 } 1452 1453 void 1454 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr) 1455 { 1456 char *s = name; 1457 struct vfs *vfsp; 1458 struct vfssw *vfsswp; 1459 1460 if ((vfsp = vp->v_vfsp) != NULL && 1461 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) && 1462 *vfsswp->vsw_name) { 1463 (void) strcpy(s, vfsswp->vsw_name); 1464 s += strlen(s); 1465 *s++ = '.'; 1466 } 1467 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0); 1468 *s++ = '.'; 1469 s += pr_u32tos(getminor(vattr->va_fsid), s, 0); 1470 *s++ = '.'; 1471 s += pr_u64tos(vattr->va_nodeid, s); 1472 *s++ = '\0'; 1473 } 1474 1475 struct seg * 1476 break_seg(proc_t *p) 1477 { 1478 caddr_t addr = p->p_brkbase; 1479 struct seg *seg; 1480 struct vnode *vp; 1481 1482 if (p->p_brksize != 0) 1483 addr += p->p_brksize - 1; 1484 seg = as_segat(p->p_as, addr); 1485 if (seg != NULL && seg->s_ops == &segvn_ops && 1486 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)) 1487 return (seg); 1488 return (NULL); 1489 } 1490 1491 /* 1492 * Implementation of service functions to handle procfs generic chained 1493 * copyout buffers. 1494 */ 1495 typedef struct pr_iobuf_list { 1496 list_node_t piol_link; /* buffer linkage */ 1497 size_t piol_size; /* total size (header + data) */ 1498 size_t piol_usedsize; /* amount to copy out from this buf */ 1499 } piol_t; 1500 1501 #define MAPSIZE (64 * 1024) 1502 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1])) 1503 1504 void 1505 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n) 1506 { 1507 piol_t *iol; 1508 size_t initial_size = MIN(1, n) * itemsize; 1509 1510 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link)); 1511 1512 ASSERT(list_head(iolhead) == NULL); 1513 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1514 ASSERT(initial_size > 0); 1515 1516 /* 1517 * Someone creating chained copyout buffers may ask for less than 1518 * MAPSIZE if the amount of data to be buffered is known to be 1519 * smaller than that. 1520 * But in order to prevent involuntary self-denial of service, 1521 * the requested input size is clamped at MAPSIZE. 1522 */ 1523 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol)); 1524 iol = kmem_alloc(initial_size, KM_SLEEP); 1525 list_insert_head(iolhead, iol); 1526 iol->piol_usedsize = 0; 1527 iol->piol_size = initial_size; 1528 } 1529 1530 void * 1531 pr_iol_newbuf(list_t *iolhead, size_t itemsize) 1532 { 1533 piol_t *iol; 1534 char *new; 1535 1536 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1537 ASSERT(list_head(iolhead) != NULL); 1538 1539 iol = (piol_t *)list_tail(iolhead); 1540 1541 if (iol->piol_size < 1542 iol->piol_usedsize + sizeof (*iol) + itemsize) { 1543 /* 1544 * Out of space in the current buffer. Allocate more. 1545 */ 1546 piol_t *newiol; 1547 1548 newiol = kmem_alloc(MAPSIZE, KM_SLEEP); 1549 newiol->piol_size = MAPSIZE; 1550 newiol->piol_usedsize = 0; 1551 1552 list_insert_after(iolhead, iol, newiol); 1553 iol = list_next(iolhead, iol); 1554 ASSERT(iol == newiol); 1555 } 1556 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize; 1557 iol->piol_usedsize += itemsize; 1558 bzero(new, itemsize); 1559 return (new); 1560 } 1561 1562 int 1563 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin) 1564 { 1565 int error = errin; 1566 piol_t *iol; 1567 1568 while ((iol = list_head(iolhead)) != NULL) { 1569 list_remove(iolhead, iol); 1570 if (!error) { 1571 if (copyout(PIOL_DATABUF(iol), *tgt, 1572 iol->piol_usedsize)) 1573 error = EFAULT; 1574 *tgt += iol->piol_usedsize; 1575 } 1576 kmem_free(iol, iol->piol_size); 1577 } 1578 list_destroy(iolhead); 1579 1580 return (error); 1581 } 1582 1583 int 1584 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin) 1585 { 1586 offset_t off = uiop->uio_offset; 1587 char *base; 1588 size_t size; 1589 piol_t *iol; 1590 int error = errin; 1591 1592 while ((iol = list_head(iolhead)) != NULL) { 1593 list_remove(iolhead, iol); 1594 base = PIOL_DATABUF(iol); 1595 size = iol->piol_usedsize; 1596 if (off <= size && error == 0 && uiop->uio_resid > 0) 1597 error = uiomove(base + off, size - off, 1598 UIO_READ, uiop); 1599 off = MAX(0, off - (offset_t)size); 1600 kmem_free(iol, iol->piol_size); 1601 } 1602 list_destroy(iolhead); 1603 1604 return (error); 1605 } 1606 1607 /* 1608 * Return an array of structures with memory map information. 1609 * We allocate here; the caller must deallocate. 1610 */ 1611 int 1612 prgetmap(proc_t *p, int reserved, list_t *iolhead) 1613 { 1614 struct as *as = p->p_as; 1615 prmap_t *mp; 1616 struct seg *seg; 1617 struct seg *brkseg, *stkseg; 1618 struct vnode *vp; 1619 struct vattr vattr; 1620 uint_t prot; 1621 1622 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1623 1624 /* 1625 * Request an initial buffer size that doesn't waste memory 1626 * if the address space has only a small number of segments. 1627 */ 1628 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1629 1630 if ((seg = AS_SEGFIRST(as)) == NULL) 1631 return (0); 1632 1633 brkseg = break_seg(p); 1634 stkseg = as_segat(as, prgetstackbase(p)); 1635 1636 do { 1637 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1638 caddr_t saddr, naddr; 1639 void *tmp = NULL; 1640 1641 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1642 prot = pr_getprot(seg, reserved, &tmp, 1643 &saddr, &naddr, eaddr); 1644 if (saddr == naddr) 1645 continue; 1646 1647 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1648 1649 mp->pr_vaddr = (uintptr_t)saddr; 1650 mp->pr_size = naddr - saddr; 1651 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1652 mp->pr_mflags = 0; 1653 if (prot & PROT_READ) 1654 mp->pr_mflags |= MA_READ; 1655 if (prot & PROT_WRITE) 1656 mp->pr_mflags |= MA_WRITE; 1657 if (prot & PROT_EXEC) 1658 mp->pr_mflags |= MA_EXEC; 1659 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1660 mp->pr_mflags |= MA_SHARED; 1661 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1662 mp->pr_mflags |= MA_NORESERVE; 1663 if (seg->s_ops == &segspt_shmops || 1664 (seg->s_ops == &segvn_ops && 1665 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1666 mp->pr_mflags |= MA_ANON; 1667 if (seg == brkseg) 1668 mp->pr_mflags |= MA_BREAK; 1669 else if (seg == stkseg) { 1670 mp->pr_mflags |= MA_STACK; 1671 if (reserved) { 1672 size_t maxstack = 1673 ((size_t)p->p_stk_ctl + 1674 PAGEOFFSET) & PAGEMASK; 1675 mp->pr_vaddr = 1676 (uintptr_t)prgetstackbase(p) + 1677 p->p_stksize - maxstack; 1678 mp->pr_size = (uintptr_t)naddr - 1679 mp->pr_vaddr; 1680 } 1681 } 1682 if (seg->s_ops == &segspt_shmops) 1683 mp->pr_mflags |= MA_ISM | MA_SHM; 1684 mp->pr_pagesize = PAGESIZE; 1685 1686 /* 1687 * Manufacture a filename for the "object" directory. 1688 */ 1689 vattr.va_mask = AT_FSID|AT_NODEID; 1690 if (seg->s_ops == &segvn_ops && 1691 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1692 vp != NULL && vp->v_type == VREG && 1693 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 1694 if (vp == p->p_exec) 1695 (void) strcpy(mp->pr_mapname, "a.out"); 1696 else 1697 pr_object_name(mp->pr_mapname, 1698 vp, &vattr); 1699 } 1700 1701 /* 1702 * Get the SysV shared memory id, if any. 1703 */ 1704 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1705 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1706 SHMID_NONE) { 1707 if (mp->pr_shmid == SHMID_FREE) 1708 mp->pr_shmid = -1; 1709 1710 mp->pr_mflags |= MA_SHM; 1711 } else { 1712 mp->pr_shmid = -1; 1713 } 1714 } 1715 ASSERT(tmp == NULL); 1716 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1717 1718 return (0); 1719 } 1720 1721 #ifdef _SYSCALL32_IMPL 1722 int 1723 prgetmap32(proc_t *p, int reserved, list_t *iolhead) 1724 { 1725 struct as *as = p->p_as; 1726 prmap32_t *mp; 1727 struct seg *seg; 1728 struct seg *brkseg, *stkseg; 1729 struct vnode *vp; 1730 struct vattr vattr; 1731 uint_t prot; 1732 1733 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1734 1735 /* 1736 * Request an initial buffer size that doesn't waste memory 1737 * if the address space has only a small number of segments. 1738 */ 1739 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1740 1741 if ((seg = AS_SEGFIRST(as)) == NULL) 1742 return (0); 1743 1744 brkseg = break_seg(p); 1745 stkseg = as_segat(as, prgetstackbase(p)); 1746 1747 do { 1748 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1749 caddr_t saddr, naddr; 1750 void *tmp = NULL; 1751 1752 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1753 prot = pr_getprot(seg, reserved, &tmp, 1754 &saddr, &naddr, eaddr); 1755 if (saddr == naddr) 1756 continue; 1757 1758 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1759 1760 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 1761 mp->pr_size = (size32_t)(naddr - saddr); 1762 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1763 mp->pr_mflags = 0; 1764 if (prot & PROT_READ) 1765 mp->pr_mflags |= MA_READ; 1766 if (prot & PROT_WRITE) 1767 mp->pr_mflags |= MA_WRITE; 1768 if (prot & PROT_EXEC) 1769 mp->pr_mflags |= MA_EXEC; 1770 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1771 mp->pr_mflags |= MA_SHARED; 1772 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1773 mp->pr_mflags |= MA_NORESERVE; 1774 if (seg->s_ops == &segspt_shmops || 1775 (seg->s_ops == &segvn_ops && 1776 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1777 mp->pr_mflags |= MA_ANON; 1778 if (seg == brkseg) 1779 mp->pr_mflags |= MA_BREAK; 1780 else if (seg == stkseg) { 1781 mp->pr_mflags |= MA_STACK; 1782 if (reserved) { 1783 size_t maxstack = 1784 ((size_t)p->p_stk_ctl + 1785 PAGEOFFSET) & PAGEMASK; 1786 uintptr_t vaddr = 1787 (uintptr_t)prgetstackbase(p) + 1788 p->p_stksize - maxstack; 1789 mp->pr_vaddr = (caddr32_t)vaddr; 1790 mp->pr_size = (size32_t) 1791 ((uintptr_t)naddr - vaddr); 1792 } 1793 } 1794 if (seg->s_ops == &segspt_shmops) 1795 mp->pr_mflags |= MA_ISM | MA_SHM; 1796 mp->pr_pagesize = PAGESIZE; 1797 1798 /* 1799 * Manufacture a filename for the "object" directory. 1800 */ 1801 vattr.va_mask = AT_FSID|AT_NODEID; 1802 if (seg->s_ops == &segvn_ops && 1803 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1804 vp != NULL && vp->v_type == VREG && 1805 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 1806 if (vp == p->p_exec) 1807 (void) strcpy(mp->pr_mapname, "a.out"); 1808 else 1809 pr_object_name(mp->pr_mapname, 1810 vp, &vattr); 1811 } 1812 1813 /* 1814 * Get the SysV shared memory id, if any. 1815 */ 1816 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1817 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1818 SHMID_NONE) { 1819 if (mp->pr_shmid == SHMID_FREE) 1820 mp->pr_shmid = -1; 1821 1822 mp->pr_mflags |= MA_SHM; 1823 } else { 1824 mp->pr_shmid = -1; 1825 } 1826 } 1827 ASSERT(tmp == NULL); 1828 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1829 1830 return (0); 1831 } 1832 #endif /* _SYSCALL32_IMPL */ 1833 1834 /* 1835 * Return the size of the /proc page data file. 1836 */ 1837 size_t 1838 prpdsize(struct as *as) 1839 { 1840 struct seg *seg; 1841 size_t size; 1842 1843 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1844 1845 if ((seg = AS_SEGFIRST(as)) == NULL) 1846 return (0); 1847 1848 size = sizeof (prpageheader_t); 1849 do { 1850 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1851 caddr_t saddr, naddr; 1852 void *tmp = NULL; 1853 size_t npage; 1854 1855 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1856 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1857 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1858 size += sizeof (prasmap_t) + round8(npage); 1859 } 1860 ASSERT(tmp == NULL); 1861 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1862 1863 return (size); 1864 } 1865 1866 #ifdef _SYSCALL32_IMPL 1867 size_t 1868 prpdsize32(struct as *as) 1869 { 1870 struct seg *seg; 1871 size_t size; 1872 1873 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1874 1875 if ((seg = AS_SEGFIRST(as)) == NULL) 1876 return (0); 1877 1878 size = sizeof (prpageheader32_t); 1879 do { 1880 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1881 caddr_t saddr, naddr; 1882 void *tmp = NULL; 1883 size_t npage; 1884 1885 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1886 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1887 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1888 size += sizeof (prasmap32_t) + round8(npage); 1889 } 1890 ASSERT(tmp == NULL); 1891 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1892 1893 return (size); 1894 } 1895 #endif /* _SYSCALL32_IMPL */ 1896 1897 /* 1898 * Read page data information. 1899 */ 1900 int 1901 prpdread(proc_t *p, uint_t hatid, struct uio *uiop) 1902 { 1903 struct as *as = p->p_as; 1904 caddr_t buf; 1905 size_t size; 1906 prpageheader_t *php; 1907 prasmap_t *pmp; 1908 struct seg *seg; 1909 int error; 1910 1911 again: 1912 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1913 1914 if ((seg = AS_SEGFIRST(as)) == NULL) { 1915 AS_LOCK_EXIT(as, &as->a_lock); 1916 return (0); 1917 } 1918 size = prpdsize(as); 1919 if (uiop->uio_resid < size) { 1920 AS_LOCK_EXIT(as, &as->a_lock); 1921 return (E2BIG); 1922 } 1923 1924 buf = kmem_zalloc(size, KM_SLEEP); 1925 php = (prpageheader_t *)buf; 1926 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t)); 1927 1928 hrt2ts(gethrtime(), &php->pr_tstamp); 1929 php->pr_nmap = 0; 1930 php->pr_npage = 0; 1931 do { 1932 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1933 caddr_t saddr, naddr; 1934 void *tmp = NULL; 1935 1936 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1937 struct vnode *vp; 1938 struct vattr vattr; 1939 size_t len; 1940 size_t npage; 1941 uint_t prot; 1942 uintptr_t next; 1943 1944 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1945 if ((len = (size_t)(naddr - saddr)) == 0) 1946 continue; 1947 npage = len / PAGESIZE; 1948 next = (uintptr_t)(pmp + 1) + round8(npage); 1949 /* 1950 * It's possible that the address space can change 1951 * subtlely even though we're holding as->a_lock 1952 * due to the nondeterminism of page_exists() in 1953 * the presence of asychronously flushed pages or 1954 * mapped files whose sizes are changing. 1955 * page_exists() may be called indirectly from 1956 * pr_getprot() by a SEGOP_INCORE() routine. 1957 * If this happens we need to make sure we don't 1958 * overrun the buffer whose size we computed based 1959 * on the initial iteration through the segments. 1960 * Once we've detected an overflow, we need to clean 1961 * up the temporary memory allocated in pr_getprot() 1962 * and retry. If there's a pending signal, we return 1963 * EINTR so that this thread can be dislodged if 1964 * a latent bug causes us to spin indefinitely. 1965 */ 1966 if (next > (uintptr_t)buf + size) { 1967 pr_getprot_done(&tmp); 1968 AS_LOCK_EXIT(as, &as->a_lock); 1969 1970 kmem_free(buf, size); 1971 1972 if (ISSIG(curthread, JUSTLOOKING)) 1973 return (EINTR); 1974 1975 goto again; 1976 } 1977 1978 php->pr_nmap++; 1979 php->pr_npage += npage; 1980 pmp->pr_vaddr = (uintptr_t)saddr; 1981 pmp->pr_npage = npage; 1982 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1983 pmp->pr_mflags = 0; 1984 if (prot & PROT_READ) 1985 pmp->pr_mflags |= MA_READ; 1986 if (prot & PROT_WRITE) 1987 pmp->pr_mflags |= MA_WRITE; 1988 if (prot & PROT_EXEC) 1989 pmp->pr_mflags |= MA_EXEC; 1990 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1991 pmp->pr_mflags |= MA_SHARED; 1992 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1993 pmp->pr_mflags |= MA_NORESERVE; 1994 if (seg->s_ops == &segspt_shmops || 1995 (seg->s_ops == &segvn_ops && 1996 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1997 pmp->pr_mflags |= MA_ANON; 1998 if (seg->s_ops == &segspt_shmops) 1999 pmp->pr_mflags |= MA_ISM | MA_SHM; 2000 pmp->pr_pagesize = PAGESIZE; 2001 /* 2002 * Manufacture a filename for the "object" directory. 2003 */ 2004 vattr.va_mask = AT_FSID|AT_NODEID; 2005 if (seg->s_ops == &segvn_ops && 2006 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2007 vp != NULL && vp->v_type == VREG && 2008 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2009 if (vp == p->p_exec) 2010 (void) strcpy(pmp->pr_mapname, "a.out"); 2011 else 2012 pr_object_name(pmp->pr_mapname, 2013 vp, &vattr); 2014 } 2015 2016 /* 2017 * Get the SysV shared memory id, if any. 2018 */ 2019 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2020 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2021 SHMID_NONE) { 2022 if (pmp->pr_shmid == SHMID_FREE) 2023 pmp->pr_shmid = -1; 2024 2025 pmp->pr_mflags |= MA_SHM; 2026 } else { 2027 pmp->pr_shmid = -1; 2028 } 2029 2030 hat_getstat(as, saddr, len, hatid, 2031 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2032 pmp = (prasmap_t *)next; 2033 } 2034 ASSERT(tmp == NULL); 2035 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2036 2037 AS_LOCK_EXIT(as, &as->a_lock); 2038 2039 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2040 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2041 kmem_free(buf, size); 2042 2043 return (error); 2044 } 2045 2046 #ifdef _SYSCALL32_IMPL 2047 int 2048 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop) 2049 { 2050 struct as *as = p->p_as; 2051 caddr_t buf; 2052 size_t size; 2053 prpageheader32_t *php; 2054 prasmap32_t *pmp; 2055 struct seg *seg; 2056 int error; 2057 2058 again: 2059 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2060 2061 if ((seg = AS_SEGFIRST(as)) == NULL) { 2062 AS_LOCK_EXIT(as, &as->a_lock); 2063 return (0); 2064 } 2065 size = prpdsize32(as); 2066 if (uiop->uio_resid < size) { 2067 AS_LOCK_EXIT(as, &as->a_lock); 2068 return (E2BIG); 2069 } 2070 2071 buf = kmem_zalloc(size, KM_SLEEP); 2072 php = (prpageheader32_t *)buf; 2073 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t)); 2074 2075 hrt2ts32(gethrtime(), &php->pr_tstamp); 2076 php->pr_nmap = 0; 2077 php->pr_npage = 0; 2078 do { 2079 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2080 caddr_t saddr, naddr; 2081 void *tmp = NULL; 2082 2083 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2084 struct vnode *vp; 2085 struct vattr vattr; 2086 size_t len; 2087 size_t npage; 2088 uint_t prot; 2089 uintptr_t next; 2090 2091 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2092 if ((len = (size_t)(naddr - saddr)) == 0) 2093 continue; 2094 npage = len / PAGESIZE; 2095 next = (uintptr_t)(pmp + 1) + round8(npage); 2096 /* 2097 * It's possible that the address space can change 2098 * subtlely even though we're holding as->a_lock 2099 * due to the nondeterminism of page_exists() in 2100 * the presence of asychronously flushed pages or 2101 * mapped files whose sizes are changing. 2102 * page_exists() may be called indirectly from 2103 * pr_getprot() by a SEGOP_INCORE() routine. 2104 * If this happens we need to make sure we don't 2105 * overrun the buffer whose size we computed based 2106 * on the initial iteration through the segments. 2107 * Once we've detected an overflow, we need to clean 2108 * up the temporary memory allocated in pr_getprot() 2109 * and retry. If there's a pending signal, we return 2110 * EINTR so that this thread can be dislodged if 2111 * a latent bug causes us to spin indefinitely. 2112 */ 2113 if (next > (uintptr_t)buf + size) { 2114 pr_getprot_done(&tmp); 2115 AS_LOCK_EXIT(as, &as->a_lock); 2116 2117 kmem_free(buf, size); 2118 2119 if (ISSIG(curthread, JUSTLOOKING)) 2120 return (EINTR); 2121 2122 goto again; 2123 } 2124 2125 php->pr_nmap++; 2126 php->pr_npage += npage; 2127 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 2128 pmp->pr_npage = (size32_t)npage; 2129 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2130 pmp->pr_mflags = 0; 2131 if (prot & PROT_READ) 2132 pmp->pr_mflags |= MA_READ; 2133 if (prot & PROT_WRITE) 2134 pmp->pr_mflags |= MA_WRITE; 2135 if (prot & PROT_EXEC) 2136 pmp->pr_mflags |= MA_EXEC; 2137 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2138 pmp->pr_mflags |= MA_SHARED; 2139 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2140 pmp->pr_mflags |= MA_NORESERVE; 2141 if (seg->s_ops == &segspt_shmops || 2142 (seg->s_ops == &segvn_ops && 2143 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2144 pmp->pr_mflags |= MA_ANON; 2145 if (seg->s_ops == &segspt_shmops) 2146 pmp->pr_mflags |= MA_ISM | MA_SHM; 2147 pmp->pr_pagesize = PAGESIZE; 2148 /* 2149 * Manufacture a filename for the "object" directory. 2150 */ 2151 vattr.va_mask = AT_FSID|AT_NODEID; 2152 if (seg->s_ops == &segvn_ops && 2153 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2154 vp != NULL && vp->v_type == VREG && 2155 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2156 if (vp == p->p_exec) 2157 (void) strcpy(pmp->pr_mapname, "a.out"); 2158 else 2159 pr_object_name(pmp->pr_mapname, 2160 vp, &vattr); 2161 } 2162 2163 /* 2164 * Get the SysV shared memory id, if any. 2165 */ 2166 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2167 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2168 SHMID_NONE) { 2169 if (pmp->pr_shmid == SHMID_FREE) 2170 pmp->pr_shmid = -1; 2171 2172 pmp->pr_mflags |= MA_SHM; 2173 } else { 2174 pmp->pr_shmid = -1; 2175 } 2176 2177 hat_getstat(as, saddr, len, hatid, 2178 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2179 pmp = (prasmap32_t *)next; 2180 } 2181 ASSERT(tmp == NULL); 2182 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2183 2184 AS_LOCK_EXIT(as, &as->a_lock); 2185 2186 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2187 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2188 kmem_free(buf, size); 2189 2190 return (error); 2191 } 2192 #endif /* _SYSCALL32_IMPL */ 2193 2194 ushort_t 2195 prgetpctcpu(uint64_t pct) 2196 { 2197 /* 2198 * The value returned will be relevant in the zone of the examiner, 2199 * which may not be the same as the zone which performed the procfs 2200 * mount. 2201 */ 2202 int nonline = zone_ncpus_online_get(curproc->p_zone); 2203 2204 /* 2205 * Prorate over online cpus so we don't exceed 100% 2206 */ 2207 if (nonline > 1) 2208 pct /= nonline; 2209 pct >>= 16; /* convert to 16-bit scaled integer */ 2210 if (pct > 0x8000) /* might happen, due to rounding */ 2211 pct = 0x8000; 2212 return ((ushort_t)pct); 2213 } 2214 2215 /* 2216 * Return information used by ps(1). 2217 */ 2218 void 2219 prgetpsinfo(proc_t *p, psinfo_t *psp) 2220 { 2221 kthread_t *t; 2222 struct cred *cred; 2223 hrtime_t hrutime, hrstime; 2224 2225 ASSERT(MUTEX_HELD(&p->p_lock)); 2226 2227 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2228 bzero(psp, sizeof (*psp)); 2229 else { 2230 thread_unlock(t); 2231 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2232 } 2233 2234 /* 2235 * only export SSYS and SMSACCT; everything else is off-limits to 2236 * userland apps. 2237 */ 2238 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2239 psp->pr_nlwp = p->p_lwpcnt; 2240 psp->pr_nzomb = p->p_zombcnt; 2241 mutex_enter(&p->p_crlock); 2242 cred = p->p_cred; 2243 psp->pr_uid = crgetruid(cred); 2244 psp->pr_euid = crgetuid(cred); 2245 psp->pr_gid = crgetrgid(cred); 2246 psp->pr_egid = crgetgid(cred); 2247 mutex_exit(&p->p_crlock); 2248 psp->pr_pid = p->p_pid; 2249 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2250 (p->p_flag & SZONETOP)) { 2251 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2252 /* 2253 * Inside local zones, fake zsched's pid as parent pids for 2254 * processes which reference processes outside of the zone. 2255 */ 2256 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2257 } else { 2258 psp->pr_ppid = p->p_ppid; 2259 } 2260 psp->pr_pgid = p->p_pgrp; 2261 psp->pr_sid = p->p_sessp->s_sid; 2262 psp->pr_taskid = p->p_task->tk_tkid; 2263 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2264 psp->pr_poolid = p->p_pool->pool_id; 2265 psp->pr_zoneid = p->p_zone->zone_id; 2266 if ((psp->pr_contract = PRCTID(p)) == 0) 2267 psp->pr_contract = -1; 2268 psp->pr_addr = (uintptr_t)prgetpsaddr(p); 2269 switch (p->p_model) { 2270 case DATAMODEL_ILP32: 2271 psp->pr_dmodel = PR_MODEL_ILP32; 2272 break; 2273 case DATAMODEL_LP64: 2274 psp->pr_dmodel = PR_MODEL_LP64; 2275 break; 2276 } 2277 hrutime = mstate_aggr_state(p, LMS_USER); 2278 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2279 hrt2ts((hrutime + hrstime), &psp->pr_time); 2280 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2281 2282 if (t == NULL) { 2283 int wcode = p->p_wcode; /* must be atomic read */ 2284 2285 if (wcode) 2286 psp->pr_wstat = wstat(wcode, p->p_wdata); 2287 psp->pr_ttydev = PRNODEV; 2288 psp->pr_lwp.pr_state = SZOMB; 2289 psp->pr_lwp.pr_sname = 'Z'; 2290 psp->pr_lwp.pr_bindpro = PBIND_NONE; 2291 psp->pr_lwp.pr_bindpset = PS_NONE; 2292 } else { 2293 user_t *up = PTOU(p); 2294 struct as *as; 2295 dev_t d; 2296 extern dev_t rwsconsdev, rconsdev, uconsdev; 2297 2298 d = cttydev(p); 2299 /* 2300 * If the controlling terminal is the real 2301 * or workstation console device, map to what the 2302 * user thinks is the console device. Handle case when 2303 * rwsconsdev or rconsdev is set to NODEV for Starfire. 2304 */ 2305 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 2306 d = uconsdev; 2307 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d; 2308 psp->pr_start = up->u_start; 2309 bcopy(up->u_comm, psp->pr_fname, 2310 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2311 bcopy(up->u_psargs, psp->pr_psargs, 2312 MIN(PRARGSZ-1, PSARGSZ)); 2313 psp->pr_argc = up->u_argc; 2314 psp->pr_argv = up->u_argv; 2315 psp->pr_envp = up->u_envp; 2316 2317 /* get the chosen lwp's lwpsinfo */ 2318 prgetlwpsinfo(t, &psp->pr_lwp); 2319 2320 /* compute %cpu for the process */ 2321 if (p->p_lwpcnt == 1) 2322 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2323 else { 2324 uint64_t pct = 0; 2325 hrtime_t cur_time = gethrtime_unscaled(); 2326 2327 t = p->p_tlist; 2328 do { 2329 pct += cpu_update_pct(t, cur_time); 2330 } while ((t = t->t_forw) != p->p_tlist); 2331 2332 psp->pr_pctcpu = prgetpctcpu(pct); 2333 } 2334 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2335 psp->pr_size = 0; 2336 psp->pr_rssize = 0; 2337 } else { 2338 mutex_exit(&p->p_lock); 2339 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2340 psp->pr_size = btopr(as->a_resvsize) * 2341 (PAGESIZE / 1024); 2342 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024); 2343 psp->pr_pctmem = rm_pctmemory(as); 2344 AS_LOCK_EXIT(as, &as->a_lock); 2345 mutex_enter(&p->p_lock); 2346 } 2347 } 2348 } 2349 2350 #ifdef _SYSCALL32_IMPL 2351 void 2352 prgetpsinfo32(proc_t *p, psinfo32_t *psp) 2353 { 2354 kthread_t *t; 2355 struct cred *cred; 2356 hrtime_t hrutime, hrstime; 2357 2358 ASSERT(MUTEX_HELD(&p->p_lock)); 2359 2360 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2361 bzero(psp, sizeof (*psp)); 2362 else { 2363 thread_unlock(t); 2364 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2365 } 2366 2367 /* 2368 * only export SSYS and SMSACCT; everything else is off-limits to 2369 * userland apps. 2370 */ 2371 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2372 psp->pr_nlwp = p->p_lwpcnt; 2373 psp->pr_nzomb = p->p_zombcnt; 2374 mutex_enter(&p->p_crlock); 2375 cred = p->p_cred; 2376 psp->pr_uid = crgetruid(cred); 2377 psp->pr_euid = crgetuid(cred); 2378 psp->pr_gid = crgetrgid(cred); 2379 psp->pr_egid = crgetgid(cred); 2380 mutex_exit(&p->p_crlock); 2381 psp->pr_pid = p->p_pid; 2382 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2383 (p->p_flag & SZONETOP)) { 2384 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2385 /* 2386 * Inside local zones, fake zsched's pid as parent pids for 2387 * processes which reference processes outside of the zone. 2388 */ 2389 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2390 } else { 2391 psp->pr_ppid = p->p_ppid; 2392 } 2393 psp->pr_pgid = p->p_pgrp; 2394 psp->pr_sid = p->p_sessp->s_sid; 2395 psp->pr_taskid = p->p_task->tk_tkid; 2396 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2397 psp->pr_poolid = p->p_pool->pool_id; 2398 psp->pr_zoneid = p->p_zone->zone_id; 2399 if ((psp->pr_contract = PRCTID(p)) == 0) 2400 psp->pr_contract = -1; 2401 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2402 switch (p->p_model) { 2403 case DATAMODEL_ILP32: 2404 psp->pr_dmodel = PR_MODEL_ILP32; 2405 break; 2406 case DATAMODEL_LP64: 2407 psp->pr_dmodel = PR_MODEL_LP64; 2408 break; 2409 } 2410 hrutime = mstate_aggr_state(p, LMS_USER); 2411 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2412 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2413 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2414 2415 if (t == NULL) { 2416 extern int wstat(int, int); /* needs a header file */ 2417 int wcode = p->p_wcode; /* must be atomic read */ 2418 2419 if (wcode) 2420 psp->pr_wstat = wstat(wcode, p->p_wdata); 2421 psp->pr_ttydev = PRNODEV32; 2422 psp->pr_lwp.pr_state = SZOMB; 2423 psp->pr_lwp.pr_sname = 'Z'; 2424 } else { 2425 user_t *up = PTOU(p); 2426 struct as *as; 2427 dev_t d; 2428 extern dev_t rwsconsdev, rconsdev, uconsdev; 2429 2430 d = cttydev(p); 2431 /* 2432 * If the controlling terminal is the real 2433 * or workstation console device, map to what the 2434 * user thinks is the console device. Handle case when 2435 * rwsconsdev or rconsdev is set to NODEV for Starfire. 2436 */ 2437 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 2438 d = uconsdev; 2439 (void) cmpldev(&psp->pr_ttydev, d); 2440 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start); 2441 bcopy(up->u_comm, psp->pr_fname, 2442 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2443 bcopy(up->u_psargs, psp->pr_psargs, 2444 MIN(PRARGSZ-1, PSARGSZ)); 2445 psp->pr_argc = up->u_argc; 2446 psp->pr_argv = (caddr32_t)up->u_argv; 2447 psp->pr_envp = (caddr32_t)up->u_envp; 2448 2449 /* get the chosen lwp's lwpsinfo */ 2450 prgetlwpsinfo32(t, &psp->pr_lwp); 2451 2452 /* compute %cpu for the process */ 2453 if (p->p_lwpcnt == 1) 2454 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2455 else { 2456 uint64_t pct = 0; 2457 hrtime_t cur_time; 2458 2459 t = p->p_tlist; 2460 cur_time = gethrtime_unscaled(); 2461 do { 2462 pct += cpu_update_pct(t, cur_time); 2463 } while ((t = t->t_forw) != p->p_tlist); 2464 2465 psp->pr_pctcpu = prgetpctcpu(pct); 2466 } 2467 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2468 psp->pr_size = 0; 2469 psp->pr_rssize = 0; 2470 } else { 2471 mutex_exit(&p->p_lock); 2472 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2473 psp->pr_size = (size32_t) 2474 (btopr(as->a_resvsize) * (PAGESIZE / 1024)); 2475 psp->pr_rssize = (size32_t) 2476 (rm_asrss(as) * (PAGESIZE / 1024)); 2477 psp->pr_pctmem = rm_pctmemory(as); 2478 AS_LOCK_EXIT(as, &as->a_lock); 2479 mutex_enter(&p->p_lock); 2480 } 2481 } 2482 2483 /* 2484 * If we are looking at an LP64 process, zero out 2485 * the fields that cannot be represented in ILP32. 2486 */ 2487 if (p->p_model != DATAMODEL_ILP32) { 2488 psp->pr_size = 0; 2489 psp->pr_rssize = 0; 2490 psp->pr_argv = 0; 2491 psp->pr_envp = 0; 2492 } 2493 } 2494 #endif /* _SYSCALL32_IMPL */ 2495 2496 void 2497 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp) 2498 { 2499 klwp_t *lwp = ttolwp(t); 2500 sobj_ops_t *sobj; 2501 char c, state; 2502 uint64_t pct; 2503 int retval, niceval; 2504 hrtime_t hrutime, hrstime; 2505 2506 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 2507 2508 bzero(psp, sizeof (*psp)); 2509 2510 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2511 psp->pr_lwpid = t->t_tid; 2512 psp->pr_addr = (uintptr_t)t; 2513 psp->pr_wchan = (uintptr_t)t->t_wchan; 2514 2515 /* map the thread state enum into a process state enum */ 2516 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2517 switch (state) { 2518 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2519 case TS_RUN: state = SRUN; c = 'R'; break; 2520 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2521 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2522 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2523 case TS_WAIT: state = SWAIT; c = 'W'; break; 2524 default: state = 0; c = '?'; break; 2525 } 2526 psp->pr_state = state; 2527 psp->pr_sname = c; 2528 if ((sobj = t->t_sobj_ops) != NULL) 2529 psp->pr_stype = SOBJ_TYPE(sobj); 2530 retval = CL_DONICE(t, NULL, 0, &niceval); 2531 if (retval == 0) { 2532 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2533 psp->pr_nice = niceval + NZERO; 2534 } 2535 psp->pr_syscall = t->t_sysnum; 2536 psp->pr_pri = t->t_pri; 2537 psp->pr_start.tv_sec = t->t_start; 2538 psp->pr_start.tv_nsec = 0L; 2539 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2540 scalehrtime(&hrutime); 2541 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2542 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2543 scalehrtime(&hrstime); 2544 hrt2ts(hrutime + hrstime, &psp->pr_time); 2545 /* compute %cpu for the lwp */ 2546 pct = cpu_update_pct(t, gethrtime_unscaled()); 2547 psp->pr_pctcpu = prgetpctcpu(pct); 2548 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2549 if (psp->pr_cpu > 99) 2550 psp->pr_cpu = 99; 2551 2552 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2553 sizeof (psp->pr_clname) - 1); 2554 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2555 psp->pr_onpro = t->t_cpu->cpu_id; 2556 psp->pr_bindpro = t->t_bind_cpu; 2557 psp->pr_bindpset = t->t_bind_pset; 2558 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 2559 } 2560 2561 #ifdef _SYSCALL32_IMPL 2562 void 2563 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp) 2564 { 2565 proc_t *p = ttoproc(t); 2566 klwp_t *lwp = ttolwp(t); 2567 sobj_ops_t *sobj; 2568 char c, state; 2569 uint64_t pct; 2570 int retval, niceval; 2571 hrtime_t hrutime, hrstime; 2572 2573 ASSERT(MUTEX_HELD(&p->p_lock)); 2574 2575 bzero(psp, sizeof (*psp)); 2576 2577 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2578 psp->pr_lwpid = t->t_tid; 2579 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2580 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */ 2581 2582 /* map the thread state enum into a process state enum */ 2583 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2584 switch (state) { 2585 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2586 case TS_RUN: state = SRUN; c = 'R'; break; 2587 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2588 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2589 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2590 case TS_WAIT: state = SWAIT; c = 'W'; break; 2591 default: state = 0; c = '?'; break; 2592 } 2593 psp->pr_state = state; 2594 psp->pr_sname = c; 2595 if ((sobj = t->t_sobj_ops) != NULL) 2596 psp->pr_stype = SOBJ_TYPE(sobj); 2597 retval = CL_DONICE(t, NULL, 0, &niceval); 2598 if (retval == 0) { 2599 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2600 psp->pr_nice = niceval + NZERO; 2601 } else { 2602 psp->pr_oldpri = 0; 2603 psp->pr_nice = 0; 2604 } 2605 psp->pr_syscall = t->t_sysnum; 2606 psp->pr_pri = t->t_pri; 2607 psp->pr_start.tv_sec = (time32_t)t->t_start; 2608 psp->pr_start.tv_nsec = 0L; 2609 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2610 scalehrtime(&hrutime); 2611 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2612 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2613 scalehrtime(&hrstime); 2614 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2615 /* compute %cpu for the lwp */ 2616 pct = cpu_update_pct(t, gethrtime_unscaled()); 2617 psp->pr_pctcpu = prgetpctcpu(pct); 2618 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2619 if (psp->pr_cpu > 99) 2620 psp->pr_cpu = 99; 2621 2622 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2623 sizeof (psp->pr_clname) - 1); 2624 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2625 psp->pr_onpro = t->t_cpu->cpu_id; 2626 psp->pr_bindpro = t->t_bind_cpu; 2627 psp->pr_bindpset = t->t_bind_pset; 2628 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 2629 } 2630 #endif /* _SYSCALL32_IMPL */ 2631 2632 /* 2633 * This used to get called when microstate accounting was disabled but 2634 * microstate information was requested. Since Microstate accounting is on 2635 * regardless of the proc flags, this simply makes it appear to procfs that 2636 * microstate accounting is on. This is relatively meaningless since you 2637 * can't turn it off, but this is here for the sake of appearances. 2638 */ 2639 2640 /*ARGSUSED*/ 2641 void 2642 estimate_msacct(kthread_t *t, hrtime_t curtime) 2643 { 2644 proc_t *p; 2645 2646 if (t == NULL) 2647 return; 2648 2649 p = ttoproc(t); 2650 ASSERT(MUTEX_HELD(&p->p_lock)); 2651 2652 /* 2653 * A system process (p0) could be referenced if the thread is 2654 * in the process of exiting. Don't turn on microstate accounting 2655 * in that case. 2656 */ 2657 if (p->p_flag & SSYS) 2658 return; 2659 2660 /* 2661 * Loop through all the LWPs (kernel threads) in the process. 2662 */ 2663 t = p->p_tlist; 2664 do { 2665 t->t_proc_flag |= TP_MSACCT; 2666 } while ((t = t->t_forw) != p->p_tlist); 2667 2668 p->p_flag |= SMSACCT; /* set process-wide MSACCT */ 2669 } 2670 2671 /* 2672 * It's not really possible to disable microstate accounting anymore. 2673 * However, this routine simply turns off the ms accounting flags in a process 2674 * This way procfs can still pretend to turn microstate accounting on and 2675 * off for a process, but it actually doesn't do anything. This is 2676 * a neutered form of preemptive idiot-proofing. 2677 */ 2678 void 2679 disable_msacct(proc_t *p) 2680 { 2681 kthread_t *t; 2682 2683 ASSERT(MUTEX_HELD(&p->p_lock)); 2684 2685 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */ 2686 /* 2687 * Loop through all the LWPs (kernel threads) in the process. 2688 */ 2689 if ((t = p->p_tlist) != NULL) { 2690 do { 2691 /* clear per-thread flag */ 2692 t->t_proc_flag &= ~TP_MSACCT; 2693 } while ((t = t->t_forw) != p->p_tlist); 2694 } 2695 } 2696 2697 /* 2698 * Return resource usage information. 2699 */ 2700 void 2701 prgetusage(kthread_t *t, prhusage_t *pup) 2702 { 2703 klwp_t *lwp = ttolwp(t); 2704 hrtime_t *mstimep; 2705 struct mstate *ms = &lwp->lwp_mstate; 2706 int state; 2707 int i; 2708 hrtime_t curtime; 2709 hrtime_t waitrq; 2710 hrtime_t tmp1; 2711 2712 curtime = gethrtime_unscaled(); 2713 2714 pup->pr_lwpid = t->t_tid; 2715 pup->pr_count = 1; 2716 pup->pr_create = ms->ms_start; 2717 pup->pr_term = ms->ms_term; 2718 scalehrtime(&pup->pr_create); 2719 scalehrtime(&pup->pr_term); 2720 if (ms->ms_term == 0) { 2721 pup->pr_rtime = curtime - ms->ms_start; 2722 scalehrtime(&pup->pr_rtime); 2723 } else { 2724 pup->pr_rtime = ms->ms_term - ms->ms_start; 2725 scalehrtime(&pup->pr_rtime); 2726 } 2727 2728 2729 pup->pr_utime = ms->ms_acct[LMS_USER]; 2730 pup->pr_stime = ms->ms_acct[LMS_SYSTEM]; 2731 pup->pr_ttime = ms->ms_acct[LMS_TRAP]; 2732 pup->pr_tftime = ms->ms_acct[LMS_TFAULT]; 2733 pup->pr_dftime = ms->ms_acct[LMS_DFAULT]; 2734 pup->pr_kftime = ms->ms_acct[LMS_KFAULT]; 2735 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2736 pup->pr_slptime = ms->ms_acct[LMS_SLEEP]; 2737 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2738 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2739 2740 prscaleusage(pup); 2741 2742 /* 2743 * Adjust for time waiting in the dispatcher queue. 2744 */ 2745 waitrq = t->t_waitrq; /* hopefully atomic */ 2746 if (waitrq != 0) { 2747 tmp1 = curtime - waitrq; 2748 scalehrtime(&tmp1); 2749 pup->pr_wtime += tmp1; 2750 curtime = waitrq; 2751 } 2752 2753 /* 2754 * Adjust for time spent in current microstate. 2755 */ 2756 if (ms->ms_state_start > curtime) { 2757 curtime = gethrtime_unscaled(); 2758 } 2759 2760 i = 0; 2761 do { 2762 switch (state = t->t_mstate) { 2763 case LMS_SLEEP: 2764 /* 2765 * Update the timer for the current sleep state. 2766 */ 2767 switch (state = ms->ms_prev) { 2768 case LMS_TFAULT: 2769 case LMS_DFAULT: 2770 case LMS_KFAULT: 2771 case LMS_USER_LOCK: 2772 break; 2773 default: 2774 state = LMS_SLEEP; 2775 break; 2776 } 2777 break; 2778 case LMS_TFAULT: 2779 case LMS_DFAULT: 2780 case LMS_KFAULT: 2781 case LMS_USER_LOCK: 2782 state = LMS_SYSTEM; 2783 break; 2784 } 2785 switch (state) { 2786 case LMS_USER: mstimep = &pup->pr_utime; break; 2787 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2788 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2789 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2790 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2791 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2792 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2793 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2794 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2795 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2796 default: panic("prgetusage: unknown microstate"); 2797 } 2798 tmp1 = curtime - ms->ms_state_start; 2799 if (tmp1 < 0) { 2800 curtime = gethrtime_unscaled(); 2801 i++; 2802 continue; 2803 } 2804 scalehrtime(&tmp1); 2805 } while (tmp1 < 0 && i < MAX_ITERS_SPIN); 2806 2807 *mstimep += tmp1; 2808 2809 /* update pup timestamp */ 2810 pup->pr_tstamp = curtime; 2811 scalehrtime(&pup->pr_tstamp); 2812 2813 /* 2814 * Resource usage counters. 2815 */ 2816 pup->pr_minf = lwp->lwp_ru.minflt; 2817 pup->pr_majf = lwp->lwp_ru.majflt; 2818 pup->pr_nswap = lwp->lwp_ru.nswap; 2819 pup->pr_inblk = lwp->lwp_ru.inblock; 2820 pup->pr_oublk = lwp->lwp_ru.oublock; 2821 pup->pr_msnd = lwp->lwp_ru.msgsnd; 2822 pup->pr_mrcv = lwp->lwp_ru.msgrcv; 2823 pup->pr_sigs = lwp->lwp_ru.nsignals; 2824 pup->pr_vctx = lwp->lwp_ru.nvcsw; 2825 pup->pr_ictx = lwp->lwp_ru.nivcsw; 2826 pup->pr_sysc = lwp->lwp_ru.sysc; 2827 pup->pr_ioch = lwp->lwp_ru.ioch; 2828 } 2829 2830 /* 2831 * Convert ms_acct stats from unscaled high-res time to nanoseconds 2832 */ 2833 void 2834 prscaleusage(prhusage_t *usg) 2835 { 2836 scalehrtime(&usg->pr_utime); 2837 scalehrtime(&usg->pr_stime); 2838 scalehrtime(&usg->pr_ttime); 2839 scalehrtime(&usg->pr_tftime); 2840 scalehrtime(&usg->pr_dftime); 2841 scalehrtime(&usg->pr_kftime); 2842 scalehrtime(&usg->pr_ltime); 2843 scalehrtime(&usg->pr_slptime); 2844 scalehrtime(&usg->pr_wtime); 2845 scalehrtime(&usg->pr_stoptime); 2846 } 2847 2848 2849 /* 2850 * Sum resource usage information. 2851 */ 2852 void 2853 praddusage(kthread_t *t, prhusage_t *pup) 2854 { 2855 klwp_t *lwp = ttolwp(t); 2856 hrtime_t *mstimep; 2857 struct mstate *ms = &lwp->lwp_mstate; 2858 int state; 2859 int i; 2860 hrtime_t curtime; 2861 hrtime_t waitrq; 2862 hrtime_t tmp; 2863 prhusage_t conv; 2864 2865 curtime = gethrtime_unscaled(); 2866 2867 if (ms->ms_term == 0) { 2868 tmp = curtime - ms->ms_start; 2869 scalehrtime(&tmp); 2870 pup->pr_rtime += tmp; 2871 } else { 2872 tmp = ms->ms_term - ms->ms_start; 2873 scalehrtime(&tmp); 2874 pup->pr_rtime += tmp; 2875 } 2876 2877 conv.pr_utime = ms->ms_acct[LMS_USER]; 2878 conv.pr_stime = ms->ms_acct[LMS_SYSTEM]; 2879 conv.pr_ttime = ms->ms_acct[LMS_TRAP]; 2880 conv.pr_tftime = ms->ms_acct[LMS_TFAULT]; 2881 conv.pr_dftime = ms->ms_acct[LMS_DFAULT]; 2882 conv.pr_kftime = ms->ms_acct[LMS_KFAULT]; 2883 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2884 conv.pr_slptime = ms->ms_acct[LMS_SLEEP]; 2885 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2886 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2887 2888 prscaleusage(&conv); 2889 2890 pup->pr_utime += conv.pr_utime; 2891 pup->pr_stime += conv.pr_stime; 2892 pup->pr_ttime += conv.pr_ttime; 2893 pup->pr_tftime += conv.pr_tftime; 2894 pup->pr_dftime += conv.pr_dftime; 2895 pup->pr_kftime += conv.pr_kftime; 2896 pup->pr_ltime += conv.pr_ltime; 2897 pup->pr_slptime += conv.pr_slptime; 2898 pup->pr_wtime += conv.pr_wtime; 2899 pup->pr_stoptime += conv.pr_stoptime; 2900 2901 /* 2902 * Adjust for time waiting in the dispatcher queue. 2903 */ 2904 waitrq = t->t_waitrq; /* hopefully atomic */ 2905 if (waitrq != 0) { 2906 tmp = curtime - waitrq; 2907 scalehrtime(&tmp); 2908 pup->pr_wtime += tmp; 2909 curtime = waitrq; 2910 } 2911 2912 /* 2913 * Adjust for time spent in current microstate. 2914 */ 2915 if (ms->ms_state_start > curtime) { 2916 curtime = gethrtime_unscaled(); 2917 } 2918 2919 i = 0; 2920 do { 2921 switch (state = t->t_mstate) { 2922 case LMS_SLEEP: 2923 /* 2924 * Update the timer for the current sleep state. 2925 */ 2926 switch (state = ms->ms_prev) { 2927 case LMS_TFAULT: 2928 case LMS_DFAULT: 2929 case LMS_KFAULT: 2930 case LMS_USER_LOCK: 2931 break; 2932 default: 2933 state = LMS_SLEEP; 2934 break; 2935 } 2936 break; 2937 case LMS_TFAULT: 2938 case LMS_DFAULT: 2939 case LMS_KFAULT: 2940 case LMS_USER_LOCK: 2941 state = LMS_SYSTEM; 2942 break; 2943 } 2944 switch (state) { 2945 case LMS_USER: mstimep = &pup->pr_utime; break; 2946 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2947 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2948 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2949 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2950 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2951 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2952 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2953 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2954 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2955 default: panic("praddusage: unknown microstate"); 2956 } 2957 tmp = curtime - ms->ms_state_start; 2958 if (tmp < 0) { 2959 curtime = gethrtime_unscaled(); 2960 i++; 2961 continue; 2962 } 2963 scalehrtime(&tmp); 2964 } while (tmp < 0 && i < MAX_ITERS_SPIN); 2965 2966 *mstimep += tmp; 2967 2968 /* update pup timestamp */ 2969 pup->pr_tstamp = curtime; 2970 scalehrtime(&pup->pr_tstamp); 2971 2972 /* 2973 * Resource usage counters. 2974 */ 2975 pup->pr_minf += lwp->lwp_ru.minflt; 2976 pup->pr_majf += lwp->lwp_ru.majflt; 2977 pup->pr_nswap += lwp->lwp_ru.nswap; 2978 pup->pr_inblk += lwp->lwp_ru.inblock; 2979 pup->pr_oublk += lwp->lwp_ru.oublock; 2980 pup->pr_msnd += lwp->lwp_ru.msgsnd; 2981 pup->pr_mrcv += lwp->lwp_ru.msgrcv; 2982 pup->pr_sigs += lwp->lwp_ru.nsignals; 2983 pup->pr_vctx += lwp->lwp_ru.nvcsw; 2984 pup->pr_ictx += lwp->lwp_ru.nivcsw; 2985 pup->pr_sysc += lwp->lwp_ru.sysc; 2986 pup->pr_ioch += lwp->lwp_ru.ioch; 2987 } 2988 2989 /* 2990 * Convert a prhusage_t to a prusage_t. 2991 * This means convert each hrtime_t to a timestruc_t 2992 * and copy the count fields uint64_t => ulong_t. 2993 */ 2994 void 2995 prcvtusage(prhusage_t *pup, prusage_t *upup) 2996 { 2997 uint64_t *ullp; 2998 ulong_t *ulp; 2999 int i; 3000 3001 upup->pr_lwpid = pup->pr_lwpid; 3002 upup->pr_count = pup->pr_count; 3003 3004 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp); 3005 hrt2ts(pup->pr_create, &upup->pr_create); 3006 hrt2ts(pup->pr_term, &upup->pr_term); 3007 hrt2ts(pup->pr_rtime, &upup->pr_rtime); 3008 hrt2ts(pup->pr_utime, &upup->pr_utime); 3009 hrt2ts(pup->pr_stime, &upup->pr_stime); 3010 hrt2ts(pup->pr_ttime, &upup->pr_ttime); 3011 hrt2ts(pup->pr_tftime, &upup->pr_tftime); 3012 hrt2ts(pup->pr_dftime, &upup->pr_dftime); 3013 hrt2ts(pup->pr_kftime, &upup->pr_kftime); 3014 hrt2ts(pup->pr_ltime, &upup->pr_ltime); 3015 hrt2ts(pup->pr_slptime, &upup->pr_slptime); 3016 hrt2ts(pup->pr_wtime, &upup->pr_wtime); 3017 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime); 3018 bzero(upup->filltime, sizeof (upup->filltime)); 3019 3020 ullp = &pup->pr_minf; 3021 ulp = &upup->pr_minf; 3022 for (i = 0; i < 22; i++) 3023 *ulp++ = (ulong_t)*ullp++; 3024 } 3025 3026 #ifdef _SYSCALL32_IMPL 3027 void 3028 prcvtusage32(prhusage_t *pup, prusage32_t *upup) 3029 { 3030 uint64_t *ullp; 3031 uint32_t *ulp; 3032 int i; 3033 3034 upup->pr_lwpid = pup->pr_lwpid; 3035 upup->pr_count = pup->pr_count; 3036 3037 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp); 3038 hrt2ts32(pup->pr_create, &upup->pr_create); 3039 hrt2ts32(pup->pr_term, &upup->pr_term); 3040 hrt2ts32(pup->pr_rtime, &upup->pr_rtime); 3041 hrt2ts32(pup->pr_utime, &upup->pr_utime); 3042 hrt2ts32(pup->pr_stime, &upup->pr_stime); 3043 hrt2ts32(pup->pr_ttime, &upup->pr_ttime); 3044 hrt2ts32(pup->pr_tftime, &upup->pr_tftime); 3045 hrt2ts32(pup->pr_dftime, &upup->pr_dftime); 3046 hrt2ts32(pup->pr_kftime, &upup->pr_kftime); 3047 hrt2ts32(pup->pr_ltime, &upup->pr_ltime); 3048 hrt2ts32(pup->pr_slptime, &upup->pr_slptime); 3049 hrt2ts32(pup->pr_wtime, &upup->pr_wtime); 3050 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime); 3051 bzero(upup->filltime, sizeof (upup->filltime)); 3052 3053 ullp = &pup->pr_minf; 3054 ulp = &upup->pr_minf; 3055 for (i = 0; i < 22; i++) 3056 *ulp++ = (uint32_t)*ullp++; 3057 } 3058 #endif /* _SYSCALL32_IMPL */ 3059 3060 /* 3061 * Determine whether a set is empty. 3062 */ 3063 int 3064 setisempty(uint32_t *sp, uint_t n) 3065 { 3066 while (n--) 3067 if (*sp++) 3068 return (0); 3069 return (1); 3070 } 3071 3072 /* 3073 * Utility routine for establishing a watched area in the process. 3074 * Keep the list of watched areas sorted by virtual address. 3075 */ 3076 int 3077 set_watched_area(proc_t *p, struct watched_area *pwa) 3078 { 3079 caddr_t vaddr = pwa->wa_vaddr; 3080 caddr_t eaddr = pwa->wa_eaddr; 3081 ulong_t flags = pwa->wa_flags; 3082 struct watched_area *target; 3083 avl_index_t where; 3084 int error = 0; 3085 3086 /* we must not be holding p->p_lock, but the process must be locked */ 3087 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3088 ASSERT(p->p_proc_flag & P_PR_LOCK); 3089 3090 /* 3091 * If this is our first watchpoint, enable watchpoints for the process. 3092 */ 3093 if (!pr_watch_active(p)) { 3094 kthread_t *t; 3095 3096 mutex_enter(&p->p_lock); 3097 if ((t = p->p_tlist) != NULL) { 3098 do { 3099 watch_enable(t); 3100 } while ((t = t->t_forw) != p->p_tlist); 3101 } 3102 mutex_exit(&p->p_lock); 3103 } 3104 3105 target = pr_find_watched_area(p, pwa, &where); 3106 if (target != NULL) { 3107 /* 3108 * We discovered an existing, overlapping watched area. 3109 * Allow it only if it is an exact match. 3110 */ 3111 if (target->wa_vaddr != vaddr || 3112 target->wa_eaddr != eaddr) 3113 error = EINVAL; 3114 else if (target->wa_flags != flags) { 3115 error = set_watched_page(p, vaddr, eaddr, 3116 flags, target->wa_flags); 3117 target->wa_flags = flags; 3118 } 3119 kmem_free(pwa, sizeof (struct watched_area)); 3120 } else { 3121 avl_insert(&p->p_warea, pwa, where); 3122 error = set_watched_page(p, vaddr, eaddr, flags, 0); 3123 } 3124 3125 return (error); 3126 } 3127 3128 /* 3129 * Utility routine for clearing a watched area in the process. 3130 * Must be an exact match of the virtual address. 3131 * size and flags don't matter. 3132 */ 3133 int 3134 clear_watched_area(proc_t *p, struct watched_area *pwa) 3135 { 3136 struct watched_area *found; 3137 3138 /* we must not be holding p->p_lock, but the process must be locked */ 3139 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3140 ASSERT(p->p_proc_flag & P_PR_LOCK); 3141 3142 3143 if (!pr_watch_active(p)) { 3144 kmem_free(pwa, sizeof (struct watched_area)); 3145 return (0); 3146 } 3147 3148 /* 3149 * Look for a matching address in the watched areas. If a match is 3150 * found, clear the old watched area and adjust the watched page(s). It 3151 * is not an error if there is no match. 3152 */ 3153 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL && 3154 found->wa_vaddr == pwa->wa_vaddr) { 3155 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr, 3156 found->wa_flags); 3157 avl_remove(&p->p_warea, found); 3158 kmem_free(found, sizeof (struct watched_area)); 3159 } 3160 3161 kmem_free(pwa, sizeof (struct watched_area)); 3162 3163 /* 3164 * If we removed the last watched area from the process, disable 3165 * watchpoints. 3166 */ 3167 if (!pr_watch_active(p)) { 3168 kthread_t *t; 3169 3170 mutex_enter(&p->p_lock); 3171 if ((t = p->p_tlist) != NULL) { 3172 do { 3173 watch_disable(t); 3174 } while ((t = t->t_forw) != p->p_tlist); 3175 } 3176 mutex_exit(&p->p_lock); 3177 } 3178 3179 return (0); 3180 } 3181 3182 /* 3183 * Frees all the watched_area structures 3184 */ 3185 void 3186 pr_free_watchpoints(proc_t *p) 3187 { 3188 struct watched_area *delp; 3189 void *cookie; 3190 3191 cookie = NULL; 3192 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL) 3193 kmem_free(delp, sizeof (struct watched_area)); 3194 3195 avl_destroy(&p->p_warea); 3196 } 3197 3198 /* 3199 * This one is called by the traced process to unwatch all the 3200 * pages while deallocating the list of watched_page structs. 3201 */ 3202 void 3203 pr_free_watched_pages(proc_t *p) 3204 { 3205 struct as *as = p->p_as; 3206 struct watched_page *pwp; 3207 uint_t prot; 3208 int retrycnt, err; 3209 void *cookie; 3210 3211 if (as == NULL || avl_numnodes(&as->a_wpage) == 0) 3212 return; 3213 3214 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 3215 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3216 3217 pwp = avl_first(&as->a_wpage); 3218 3219 cookie = NULL; 3220 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) { 3221 retrycnt = 0; 3222 if ((prot = pwp->wp_oprot) != 0) { 3223 caddr_t addr = pwp->wp_vaddr; 3224 struct seg *seg; 3225 retry: 3226 3227 if ((pwp->wp_prot != prot || 3228 (pwp->wp_flags & WP_NOWATCH)) && 3229 (seg = as_segat(as, addr)) != NULL) { 3230 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); 3231 if (err == IE_RETRY) { 3232 ASSERT(retrycnt == 0); 3233 retrycnt++; 3234 goto retry; 3235 } 3236 } 3237 } 3238 kmem_free(pwp, sizeof (struct watched_page)); 3239 } 3240 3241 avl_destroy(&as->a_wpage); 3242 p->p_wprot = NULL; 3243 3244 AS_LOCK_EXIT(as, &as->a_lock); 3245 } 3246 3247 /* 3248 * Insert a watched area into the list of watched pages. 3249 * If oflags is zero then we are adding a new watched area. 3250 * Otherwise we are changing the flags of an existing watched area. 3251 */ 3252 static int 3253 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, 3254 ulong_t flags, ulong_t oflags) 3255 { 3256 struct as *as = p->p_as; 3257 avl_tree_t *pwp_tree; 3258 struct watched_page *pwp, *newpwp; 3259 struct watched_page tpw; 3260 avl_index_t where; 3261 struct seg *seg; 3262 uint_t prot; 3263 caddr_t addr; 3264 3265 /* 3266 * We need to pre-allocate a list of structures before we grab the 3267 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks 3268 * held. 3269 */ 3270 newpwp = NULL; 3271 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3272 addr < eaddr; addr += PAGESIZE) { 3273 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP); 3274 pwp->wp_list = newpwp; 3275 newpwp = pwp; 3276 } 3277 3278 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3279 3280 /* 3281 * Search for an existing watched page to contain the watched area. 3282 * If none is found, grab a new one from the available list 3283 * and insert it in the active list, keeping the list sorted 3284 * by user-level virtual address. 3285 */ 3286 if (p->p_flag & SVFWAIT) 3287 pwp_tree = &p->p_wpage; 3288 else 3289 pwp_tree = &as->a_wpage; 3290 3291 again: 3292 if (avl_numnodes(pwp_tree) > prnwatch) { 3293 AS_LOCK_EXIT(as, &as->a_lock); 3294 while (newpwp != NULL) { 3295 pwp = newpwp->wp_list; 3296 kmem_free(newpwp, sizeof (struct watched_page)); 3297 newpwp = pwp; 3298 } 3299 return (E2BIG); 3300 } 3301 3302 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3303 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) { 3304 pwp = newpwp; 3305 newpwp = newpwp->wp_list; 3306 pwp->wp_list = NULL; 3307 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr & 3308 (uintptr_t)PAGEMASK); 3309 avl_insert(pwp_tree, pwp, where); 3310 } 3311 3312 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE); 3313 3314 if (oflags & WA_READ) 3315 pwp->wp_read--; 3316 if (oflags & WA_WRITE) 3317 pwp->wp_write--; 3318 if (oflags & WA_EXEC) 3319 pwp->wp_exec--; 3320 3321 ASSERT(pwp->wp_read >= 0); 3322 ASSERT(pwp->wp_write >= 0); 3323 ASSERT(pwp->wp_exec >= 0); 3324 3325 if (flags & WA_READ) 3326 pwp->wp_read++; 3327 if (flags & WA_WRITE) 3328 pwp->wp_write++; 3329 if (flags & WA_EXEC) 3330 pwp->wp_exec++; 3331 3332 if (!(p->p_flag & SVFWAIT)) { 3333 vaddr = pwp->wp_vaddr; 3334 if (pwp->wp_oprot == 0 && 3335 (seg = as_segat(as, vaddr)) != NULL) { 3336 SEGOP_GETPROT(seg, vaddr, 0, &prot); 3337 pwp->wp_oprot = (uchar_t)prot; 3338 pwp->wp_prot = (uchar_t)prot; 3339 } 3340 if (pwp->wp_oprot != 0) { 3341 prot = pwp->wp_oprot; 3342 if (pwp->wp_read) 3343 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3344 if (pwp->wp_write) 3345 prot &= ~PROT_WRITE; 3346 if (pwp->wp_exec) 3347 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3348 if (!(pwp->wp_flags & WP_NOWATCH) && 3349 pwp->wp_prot != prot && 3350 (pwp->wp_flags & WP_SETPROT) == 0) { 3351 pwp->wp_flags |= WP_SETPROT; 3352 pwp->wp_list = p->p_wprot; 3353 p->p_wprot = pwp; 3354 } 3355 pwp->wp_prot = (uchar_t)prot; 3356 } 3357 } 3358 3359 /* 3360 * If the watched area extends into the next page then do 3361 * it over again with the virtual address of the next page. 3362 */ 3363 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr) 3364 goto again; 3365 3366 AS_LOCK_EXIT(as, &as->a_lock); 3367 3368 /* 3369 * Free any pages we may have over-allocated 3370 */ 3371 while (newpwp != NULL) { 3372 pwp = newpwp->wp_list; 3373 kmem_free(newpwp, sizeof (struct watched_page)); 3374 newpwp = pwp; 3375 } 3376 3377 return (0); 3378 } 3379 3380 /* 3381 * Remove a watched area from the list of watched pages. 3382 * A watched area may extend over more than one page. 3383 */ 3384 static void 3385 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags) 3386 { 3387 struct as *as = p->p_as; 3388 struct watched_page *pwp; 3389 struct watched_page tpw; 3390 avl_tree_t *tree; 3391 avl_index_t where; 3392 3393 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3394 3395 if (p->p_flag & SVFWAIT) 3396 tree = &p->p_wpage; 3397 else 3398 tree = &as->a_wpage; 3399 3400 tpw.wp_vaddr = vaddr = 3401 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3402 pwp = avl_find(tree, &tpw, &where); 3403 if (pwp == NULL) 3404 pwp = avl_nearest(tree, where, AVL_AFTER); 3405 3406 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3407 ASSERT(vaddr <= pwp->wp_vaddr); 3408 3409 if (flags & WA_READ) 3410 pwp->wp_read--; 3411 if (flags & WA_WRITE) 3412 pwp->wp_write--; 3413 if (flags & WA_EXEC) 3414 pwp->wp_exec--; 3415 3416 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) { 3417 /* 3418 * Reset the hat layer's protections on this page. 3419 */ 3420 if (pwp->wp_oprot != 0) { 3421 uint_t prot = pwp->wp_oprot; 3422 3423 if (pwp->wp_read) 3424 prot &= 3425 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3426 if (pwp->wp_write) 3427 prot &= ~PROT_WRITE; 3428 if (pwp->wp_exec) 3429 prot &= 3430 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3431 if (!(pwp->wp_flags & WP_NOWATCH) && 3432 pwp->wp_prot != prot && 3433 (pwp->wp_flags & WP_SETPROT) == 0) { 3434 pwp->wp_flags |= WP_SETPROT; 3435 pwp->wp_list = p->p_wprot; 3436 p->p_wprot = pwp; 3437 } 3438 pwp->wp_prot = (uchar_t)prot; 3439 } 3440 } else { 3441 /* 3442 * No watched areas remain in this page. 3443 * Reset everything to normal. 3444 */ 3445 if (pwp->wp_oprot != 0) { 3446 pwp->wp_prot = pwp->wp_oprot; 3447 if ((pwp->wp_flags & WP_SETPROT) == 0) { 3448 pwp->wp_flags |= WP_SETPROT; 3449 pwp->wp_list = p->p_wprot; 3450 p->p_wprot = pwp; 3451 } 3452 } 3453 } 3454 3455 pwp = AVL_NEXT(tree, pwp); 3456 } 3457 3458 AS_LOCK_EXIT(as, &as->a_lock); 3459 } 3460 3461 /* 3462 * Return the original protections for the specified page. 3463 */ 3464 static void 3465 getwatchprot(struct as *as, caddr_t addr, uint_t *prot) 3466 { 3467 struct watched_page *pwp; 3468 struct watched_page tpw; 3469 3470 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 3471 3472 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3473 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL) 3474 *prot = pwp->wp_oprot; 3475 } 3476 3477 static prpagev_t * 3478 pr_pagev_create(struct seg *seg, int check_noreserve) 3479 { 3480 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP); 3481 size_t total_pages = seg_pages(seg); 3482 3483 /* 3484 * Limit the size of our vectors to pagev_lim pages at a time. We need 3485 * 4 or 5 bytes of storage per page, so this means we limit ourself 3486 * to about a megabyte of kernel heap by default. 3487 */ 3488 pagev->pg_npages = MIN(total_pages, pagev_lim); 3489 pagev->pg_pnbase = 0; 3490 3491 pagev->pg_protv = 3492 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP); 3493 3494 if (check_noreserve) 3495 pagev->pg_incore = 3496 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP); 3497 else 3498 pagev->pg_incore = NULL; 3499 3500 return (pagev); 3501 } 3502 3503 static void 3504 pr_pagev_destroy(prpagev_t *pagev) 3505 { 3506 if (pagev->pg_incore != NULL) 3507 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char)); 3508 3509 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t)); 3510 kmem_free(pagev, sizeof (prpagev_t)); 3511 } 3512 3513 static caddr_t 3514 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr) 3515 { 3516 ulong_t lastpg = seg_page(seg, eaddr - 1); 3517 ulong_t pn, pnlim; 3518 caddr_t saddr; 3519 size_t len; 3520 3521 ASSERT(addr >= seg->s_base && addr <= eaddr); 3522 3523 if (addr == eaddr) 3524 return (eaddr); 3525 3526 refill: 3527 ASSERT(addr < eaddr); 3528 pagev->pg_pnbase = seg_page(seg, addr); 3529 pnlim = pagev->pg_pnbase + pagev->pg_npages; 3530 saddr = addr; 3531 3532 if (lastpg < pnlim) 3533 len = (size_t)(eaddr - addr); 3534 else 3535 len = pagev->pg_npages * PAGESIZE; 3536 3537 if (pagev->pg_incore != NULL) { 3538 /* 3539 * INCORE cleverly has different semantics than GETPROT: 3540 * it returns info on pages up to but NOT including addr + len. 3541 */ 3542 SEGOP_INCORE(seg, addr, len, pagev->pg_incore); 3543 pn = pagev->pg_pnbase; 3544 3545 do { 3546 /* 3547 * Guilty knowledge here: We know that segvn_incore 3548 * returns more than just the low-order bit that 3549 * indicates the page is actually in memory. If any 3550 * bits are set, then the page has backing store. 3551 */ 3552 if (pagev->pg_incore[pn++ - pagev->pg_pnbase]) 3553 goto out; 3554 3555 } while ((addr += PAGESIZE) < eaddr && pn < pnlim); 3556 3557 /* 3558 * If we examined all the pages in the vector but we're not 3559 * at the end of the segment, take another lap. 3560 */ 3561 if (addr < eaddr) 3562 goto refill; 3563 } 3564 3565 /* 3566 * Need to take len - 1 because addr + len is the address of the 3567 * first byte of the page just past the end of what we want. 3568 */ 3569 out: 3570 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv); 3571 return (addr); 3572 } 3573 3574 static caddr_t 3575 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg, 3576 caddr_t *saddrp, caddr_t eaddr, uint_t *protp) 3577 { 3578 /* 3579 * Our starting address is either the specified address, or the base 3580 * address from the start of the pagev. If the latter is greater, 3581 * this means a previous call to pr_pagev_fill has already scanned 3582 * further than the end of the previous mapping. 3583 */ 3584 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE; 3585 caddr_t addr = MAX(*saddrp, base); 3586 ulong_t pn = seg_page(seg, addr); 3587 uint_t prot, nprot; 3588 3589 /* 3590 * If we're dealing with noreserve pages, then advance addr to 3591 * the address of the next page which has backing store. 3592 */ 3593 if (pagev->pg_incore != NULL) { 3594 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) { 3595 if ((addr += PAGESIZE) == eaddr) { 3596 *saddrp = addr; 3597 prot = 0; 3598 goto out; 3599 } 3600 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3601 addr = pr_pagev_fill(pagev, seg, addr, eaddr); 3602 if (addr == eaddr) { 3603 *saddrp = addr; 3604 prot = 0; 3605 goto out; 3606 } 3607 pn = seg_page(seg, addr); 3608 } 3609 } 3610 } 3611 3612 /* 3613 * Get the protections on the page corresponding to addr. 3614 */ 3615 pn = seg_page(seg, addr); 3616 ASSERT(pn >= pagev->pg_pnbase); 3617 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages)); 3618 3619 prot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3620 getwatchprot(seg->s_as, addr, &prot); 3621 *saddrp = addr; 3622 3623 /* 3624 * Now loop until we find a backed page with different protections 3625 * or we reach the end of this segment. 3626 */ 3627 while ((addr += PAGESIZE) < eaddr) { 3628 /* 3629 * If pn has advanced to the page number following what we 3630 * have information on, refill the page vector and reset 3631 * addr and pn. If pr_pagev_fill does not return the 3632 * address of the next page, we have a discontiguity and 3633 * thus have reached the end of the current mapping. 3634 */ 3635 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3636 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr); 3637 if (naddr != addr) 3638 goto out; 3639 pn = seg_page(seg, addr); 3640 } 3641 3642 /* 3643 * The previous page's protections are in prot, and it has 3644 * backing. If this page is MAP_NORESERVE and has no backing, 3645 * then end this mapping and return the previous protections. 3646 */ 3647 if (pagev->pg_incore != NULL && 3648 pagev->pg_incore[pn - pagev->pg_pnbase] == 0) 3649 break; 3650 3651 /* 3652 * Otherwise end the mapping if this page's protections (nprot) 3653 * are different than those in the previous page (prot). 3654 */ 3655 nprot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3656 getwatchprot(seg->s_as, addr, &nprot); 3657 3658 if (nprot != prot) 3659 break; 3660 } 3661 3662 out: 3663 *protp = prot; 3664 return (addr); 3665 } 3666 3667 size_t 3668 pr_getsegsize(struct seg *seg, int reserved) 3669 { 3670 size_t size = seg->s_size; 3671 3672 /* 3673 * If we're interested in the reserved space, return the size of the 3674 * segment itself. Everything else in this function is a special case 3675 * to determine the actual underlying size of various segment types. 3676 */ 3677 if (reserved) 3678 return (size); 3679 3680 /* 3681 * If this is a segvn mapping of a regular file, return the smaller 3682 * of the segment size and the remaining size of the file beyond 3683 * the file offset corresponding to seg->s_base. 3684 */ 3685 if (seg->s_ops == &segvn_ops) { 3686 vattr_t vattr; 3687 vnode_t *vp; 3688 3689 vattr.va_mask = AT_SIZE; 3690 3691 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 3692 vp != NULL && vp->v_type == VREG && 3693 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 3694 3695 u_offset_t fsize = vattr.va_size; 3696 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base); 3697 3698 if (fsize < offset) 3699 fsize = 0; 3700 else 3701 fsize -= offset; 3702 3703 fsize = roundup(fsize, (u_offset_t)PAGESIZE); 3704 3705 if (fsize < (u_offset_t)size) 3706 size = (size_t)fsize; 3707 } 3708 3709 return (size); 3710 } 3711 3712 /* 3713 * If this is an ISM shared segment, don't include pages that are 3714 * beyond the real size of the spt segment that backs it. 3715 */ 3716 if (seg->s_ops == &segspt_shmops) 3717 return (MIN(spt_realsize(seg), size)); 3718 3719 /* 3720 * If this is segment is a mapping from /dev/null, then this is a 3721 * reservation of virtual address space and has no actual size. 3722 * Such segments are backed by segdev and have type set to neither 3723 * MAP_SHARED nor MAP_PRIVATE. 3724 */ 3725 if (seg->s_ops == &segdev_ops && 3726 ((SEGOP_GETTYPE(seg, seg->s_base) & 3727 (MAP_SHARED | MAP_PRIVATE)) == 0)) 3728 return (0); 3729 3730 /* 3731 * If this segment doesn't match one of the special types we handle, 3732 * just return the size of the segment itself. 3733 */ 3734 return (size); 3735 } 3736 3737 uint_t 3738 pr_getprot(struct seg *seg, int reserved, void **tmp, 3739 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr) 3740 { 3741 struct as *as = seg->s_as; 3742 3743 caddr_t saddr = *saddrp; 3744 caddr_t naddr; 3745 3746 int check_noreserve; 3747 uint_t prot; 3748 3749 union { 3750 struct segvn_data *svd; 3751 struct segdev_data *sdp; 3752 void *data; 3753 } s; 3754 3755 s.data = seg->s_data; 3756 3757 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3758 ASSERT(saddr >= seg->s_base && saddr < eaddr); 3759 ASSERT(eaddr <= seg->s_base + seg->s_size); 3760 3761 /* 3762 * Don't include MAP_NORESERVE pages in the address range 3763 * unless their mappings have actually materialized. 3764 * We cheat by knowing that segvn is the only segment 3765 * driver that supports MAP_NORESERVE. 3766 */ 3767 check_noreserve = 3768 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL && 3769 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) && 3770 (s.svd->flags & MAP_NORESERVE)); 3771 3772 /* 3773 * Examine every page only as a last resort. We use guilty knowledge 3774 * of segvn and segdev to avoid this: if there are no per-page 3775 * protections present in the segment and we don't care about 3776 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment. 3777 */ 3778 if (!check_noreserve && saddr == seg->s_base && 3779 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) { 3780 prot = s.svd->prot; 3781 getwatchprot(as, saddr, &prot); 3782 naddr = eaddr; 3783 3784 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops && 3785 s.sdp != NULL && s.sdp->pageprot == 0) { 3786 prot = s.sdp->prot; 3787 getwatchprot(as, saddr, &prot); 3788 naddr = eaddr; 3789 3790 } else { 3791 prpagev_t *pagev; 3792 3793 /* 3794 * If addr is sitting at the start of the segment, then 3795 * create a page vector to store protection and incore 3796 * information for pages in the segment, and fill it. 3797 * Otherwise, we expect *tmp to address the prpagev_t 3798 * allocated by a previous call to this function. 3799 */ 3800 if (saddr == seg->s_base) { 3801 pagev = pr_pagev_create(seg, check_noreserve); 3802 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr); 3803 3804 ASSERT(*tmp == NULL); 3805 *tmp = pagev; 3806 3807 ASSERT(saddr <= eaddr); 3808 *saddrp = saddr; 3809 3810 if (saddr == eaddr) { 3811 naddr = saddr; 3812 prot = 0; 3813 goto out; 3814 } 3815 3816 } else { 3817 ASSERT(*tmp != NULL); 3818 pagev = (prpagev_t *)*tmp; 3819 } 3820 3821 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot); 3822 ASSERT(naddr <= eaddr); 3823 } 3824 3825 out: 3826 if (naddr == eaddr) 3827 pr_getprot_done(tmp); 3828 *naddrp = naddr; 3829 return (prot); 3830 } 3831 3832 void 3833 pr_getprot_done(void **tmp) 3834 { 3835 if (*tmp != NULL) { 3836 pr_pagev_destroy((prpagev_t *)*tmp); 3837 *tmp = NULL; 3838 } 3839 } 3840 3841 /* 3842 * Return true iff the vnode is a /proc file from the object directory. 3843 */ 3844 int 3845 pr_isobject(vnode_t *vp) 3846 { 3847 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT); 3848 } 3849 3850 /* 3851 * Return true iff the vnode is a /proc file opened by the process itself. 3852 */ 3853 int 3854 pr_isself(vnode_t *vp) 3855 { 3856 /* 3857 * XXX: To retain binary compatibility with the old 3858 * ioctl()-based version of /proc, we exempt self-opens 3859 * of /proc/<pid> from being marked close-on-exec. 3860 */ 3861 return (vn_matchops(vp, prvnodeops) && 3862 (VTOP(vp)->pr_flags & PR_ISSELF) && 3863 VTOP(vp)->pr_type != PR_PIDDIR); 3864 } 3865 3866 static ssize_t 3867 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr) 3868 { 3869 ssize_t pagesize, hatsize; 3870 3871 ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 3872 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE)); 3873 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE)); 3874 ASSERT(saddr < eaddr); 3875 3876 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr); 3877 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize)); 3878 ASSERT(pagesize != 0); 3879 3880 if (pagesize == -1) 3881 pagesize = PAGESIZE; 3882 3883 saddr += P2NPHASE((uintptr_t)saddr, pagesize); 3884 3885 while (saddr < eaddr) { 3886 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr)) 3887 break; 3888 ASSERT(IS_P2ALIGNED(saddr, pagesize)); 3889 saddr += pagesize; 3890 } 3891 3892 *naddrp = ((saddr < eaddr) ? saddr : eaddr); 3893 return (hatsize); 3894 } 3895 3896 /* 3897 * Return an array of structures with extended memory map information. 3898 * We allocate here; the caller must deallocate. 3899 */ 3900 int 3901 prgetxmap(proc_t *p, list_t *iolhead) 3902 { 3903 struct as *as = p->p_as; 3904 prxmap_t *mp; 3905 struct seg *seg; 3906 struct seg *brkseg, *stkseg; 3907 struct vnode *vp; 3908 struct vattr vattr; 3909 uint_t prot; 3910 3911 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 3912 3913 /* 3914 * Request an initial buffer size that doesn't waste memory 3915 * if the address space has only a small number of segments. 3916 */ 3917 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 3918 3919 if ((seg = AS_SEGFIRST(as)) == NULL) 3920 return (0); 3921 3922 brkseg = break_seg(p); 3923 stkseg = as_segat(as, prgetstackbase(p)); 3924 3925 do { 3926 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 3927 caddr_t saddr, naddr, baddr; 3928 void *tmp = NULL; 3929 ssize_t psz; 3930 char *parr; 3931 uint64_t npages; 3932 uint64_t pagenum; 3933 3934 /* 3935 * Segment loop part one: iterate from the base of the segment 3936 * to its end, pausing at each address boundary (baddr) between 3937 * ranges that have different virtual memory protections. 3938 */ 3939 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 3940 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 3941 ASSERT(baddr >= saddr && baddr <= eaddr); 3942 3943 /* 3944 * Segment loop part two: iterate from the current 3945 * position to the end of the protection boundary, 3946 * pausing at each address boundary (naddr) between 3947 * ranges that have different underlying page sizes. 3948 */ 3949 for (; saddr < baddr; saddr = naddr) { 3950 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 3951 ASSERT(naddr >= saddr && naddr <= baddr); 3952 3953 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 3954 3955 mp->pr_vaddr = (uintptr_t)saddr; 3956 mp->pr_size = naddr - saddr; 3957 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 3958 mp->pr_mflags = 0; 3959 if (prot & PROT_READ) 3960 mp->pr_mflags |= MA_READ; 3961 if (prot & PROT_WRITE) 3962 mp->pr_mflags |= MA_WRITE; 3963 if (prot & PROT_EXEC) 3964 mp->pr_mflags |= MA_EXEC; 3965 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 3966 mp->pr_mflags |= MA_SHARED; 3967 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 3968 mp->pr_mflags |= MA_NORESERVE; 3969 if (seg->s_ops == &segspt_shmops || 3970 (seg->s_ops == &segvn_ops && 3971 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 3972 vp == NULL))) 3973 mp->pr_mflags |= MA_ANON; 3974 if (seg == brkseg) 3975 mp->pr_mflags |= MA_BREAK; 3976 else if (seg == stkseg) 3977 mp->pr_mflags |= MA_STACK; 3978 if (seg->s_ops == &segspt_shmops) 3979 mp->pr_mflags |= MA_ISM | MA_SHM; 3980 3981 mp->pr_pagesize = PAGESIZE; 3982 if (psz == -1) { 3983 mp->pr_hatpagesize = 0; 3984 } else { 3985 mp->pr_hatpagesize = psz; 3986 } 3987 3988 /* 3989 * Manufacture a filename for the "object" dir. 3990 */ 3991 mp->pr_dev = PRNODEV; 3992 vattr.va_mask = AT_FSID|AT_NODEID; 3993 if (seg->s_ops == &segvn_ops && 3994 SEGOP_GETVP(seg, saddr, &vp) == 0 && 3995 vp != NULL && vp->v_type == VREG && 3996 VOP_GETATTR(vp, &vattr, 0, CRED(), 3997 NULL) == 0) { 3998 mp->pr_dev = vattr.va_fsid; 3999 mp->pr_ino = vattr.va_nodeid; 4000 if (vp == p->p_exec) 4001 (void) strcpy(mp->pr_mapname, 4002 "a.out"); 4003 else 4004 pr_object_name(mp->pr_mapname, 4005 vp, &vattr); 4006 } 4007 4008 /* 4009 * Get the SysV shared memory id, if any. 4010 */ 4011 if ((mp->pr_mflags & MA_SHARED) && 4012 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4013 seg->s_base)) != SHMID_NONE) { 4014 if (mp->pr_shmid == SHMID_FREE) 4015 mp->pr_shmid = -1; 4016 4017 mp->pr_mflags |= MA_SHM; 4018 } else { 4019 mp->pr_shmid = -1; 4020 } 4021 4022 npages = ((uintptr_t)(naddr - saddr)) >> 4023 PAGESHIFT; 4024 parr = kmem_zalloc(npages, KM_SLEEP); 4025 4026 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4027 4028 for (pagenum = 0; pagenum < npages; pagenum++) { 4029 if (parr[pagenum] & SEG_PAGE_INCORE) 4030 mp->pr_rss++; 4031 if (parr[pagenum] & SEG_PAGE_ANON) 4032 mp->pr_anon++; 4033 if (parr[pagenum] & SEG_PAGE_LOCKED) 4034 mp->pr_locked++; 4035 } 4036 kmem_free(parr, npages); 4037 } 4038 } 4039 ASSERT(tmp == NULL); 4040 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4041 4042 return (0); 4043 } 4044 4045 /* 4046 * Return the process's credentials. We don't need a 32-bit equivalent of 4047 * this function because prcred_t and prcred32_t are actually the same. 4048 */ 4049 void 4050 prgetcred(proc_t *p, prcred_t *pcrp) 4051 { 4052 mutex_enter(&p->p_crlock); 4053 cred2prcred(p->p_cred, pcrp); 4054 mutex_exit(&p->p_crlock); 4055 } 4056 4057 /* 4058 * Compute actual size of the prpriv_t structure. 4059 */ 4060 4061 size_t 4062 prgetprivsize(void) 4063 { 4064 return (priv_prgetprivsize(NULL)); 4065 } 4066 4067 /* 4068 * Return the process's privileges. We don't need a 32-bit equivalent of 4069 * this function because prpriv_t and prpriv32_t are actually the same. 4070 */ 4071 void 4072 prgetpriv(proc_t *p, prpriv_t *pprp) 4073 { 4074 mutex_enter(&p->p_crlock); 4075 cred2prpriv(p->p_cred, pprp); 4076 mutex_exit(&p->p_crlock); 4077 } 4078 4079 #ifdef _SYSCALL32_IMPL 4080 /* 4081 * Return an array of structures with HAT memory map information. 4082 * We allocate here; the caller must deallocate. 4083 */ 4084 int 4085 prgetxmap32(proc_t *p, list_t *iolhead) 4086 { 4087 struct as *as = p->p_as; 4088 prxmap32_t *mp; 4089 struct seg *seg; 4090 struct seg *brkseg, *stkseg; 4091 struct vnode *vp; 4092 struct vattr vattr; 4093 uint_t prot; 4094 4095 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 4096 4097 /* 4098 * Request an initial buffer size that doesn't waste memory 4099 * if the address space has only a small number of segments. 4100 */ 4101 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 4102 4103 if ((seg = AS_SEGFIRST(as)) == NULL) 4104 return (0); 4105 4106 brkseg = break_seg(p); 4107 stkseg = as_segat(as, prgetstackbase(p)); 4108 4109 do { 4110 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 4111 caddr_t saddr, naddr, baddr; 4112 void *tmp = NULL; 4113 ssize_t psz; 4114 char *parr; 4115 uint64_t npages; 4116 uint64_t pagenum; 4117 4118 /* 4119 * Segment loop part one: iterate from the base of the segment 4120 * to its end, pausing at each address boundary (baddr) between 4121 * ranges that have different virtual memory protections. 4122 */ 4123 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 4124 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 4125 ASSERT(baddr >= saddr && baddr <= eaddr); 4126 4127 /* 4128 * Segment loop part two: iterate from the current 4129 * position to the end of the protection boundary, 4130 * pausing at each address boundary (naddr) between 4131 * ranges that have different underlying page sizes. 4132 */ 4133 for (; saddr < baddr; saddr = naddr) { 4134 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 4135 ASSERT(naddr >= saddr && naddr <= baddr); 4136 4137 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 4138 4139 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 4140 mp->pr_size = (size32_t)(naddr - saddr); 4141 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 4142 mp->pr_mflags = 0; 4143 if (prot & PROT_READ) 4144 mp->pr_mflags |= MA_READ; 4145 if (prot & PROT_WRITE) 4146 mp->pr_mflags |= MA_WRITE; 4147 if (prot & PROT_EXEC) 4148 mp->pr_mflags |= MA_EXEC; 4149 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 4150 mp->pr_mflags |= MA_SHARED; 4151 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 4152 mp->pr_mflags |= MA_NORESERVE; 4153 if (seg->s_ops == &segspt_shmops || 4154 (seg->s_ops == &segvn_ops && 4155 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 4156 vp == NULL))) 4157 mp->pr_mflags |= MA_ANON; 4158 if (seg == brkseg) 4159 mp->pr_mflags |= MA_BREAK; 4160 else if (seg == stkseg) 4161 mp->pr_mflags |= MA_STACK; 4162 if (seg->s_ops == &segspt_shmops) 4163 mp->pr_mflags |= MA_ISM | MA_SHM; 4164 4165 mp->pr_pagesize = PAGESIZE; 4166 if (psz == -1) { 4167 mp->pr_hatpagesize = 0; 4168 } else { 4169 mp->pr_hatpagesize = psz; 4170 } 4171 4172 /* 4173 * Manufacture a filename for the "object" dir. 4174 */ 4175 mp->pr_dev = PRNODEV32; 4176 vattr.va_mask = AT_FSID|AT_NODEID; 4177 if (seg->s_ops == &segvn_ops && 4178 SEGOP_GETVP(seg, saddr, &vp) == 0 && 4179 vp != NULL && vp->v_type == VREG && 4180 VOP_GETATTR(vp, &vattr, 0, CRED(), 4181 NULL) == 0) { 4182 (void) cmpldev(&mp->pr_dev, 4183 vattr.va_fsid); 4184 mp->pr_ino = vattr.va_nodeid; 4185 if (vp == p->p_exec) 4186 (void) strcpy(mp->pr_mapname, 4187 "a.out"); 4188 else 4189 pr_object_name(mp->pr_mapname, 4190 vp, &vattr); 4191 } 4192 4193 /* 4194 * Get the SysV shared memory id, if any. 4195 */ 4196 if ((mp->pr_mflags & MA_SHARED) && 4197 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4198 seg->s_base)) != SHMID_NONE) { 4199 if (mp->pr_shmid == SHMID_FREE) 4200 mp->pr_shmid = -1; 4201 4202 mp->pr_mflags |= MA_SHM; 4203 } else { 4204 mp->pr_shmid = -1; 4205 } 4206 4207 npages = ((uintptr_t)(naddr - saddr)) >> 4208 PAGESHIFT; 4209 parr = kmem_zalloc(npages, KM_SLEEP); 4210 4211 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4212 4213 for (pagenum = 0; pagenum < npages; pagenum++) { 4214 if (parr[pagenum] & SEG_PAGE_INCORE) 4215 mp->pr_rss++; 4216 if (parr[pagenum] & SEG_PAGE_ANON) 4217 mp->pr_anon++; 4218 if (parr[pagenum] & SEG_PAGE_LOCKED) 4219 mp->pr_locked++; 4220 } 4221 kmem_free(parr, npages); 4222 } 4223 } 4224 ASSERT(tmp == NULL); 4225 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4226 4227 return (0); 4228 } 4229 #endif /* _SYSCALL32_IMPL */ 4230