1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/t_lock.h> 34 #include <sys/param.h> 35 #include <sys/cmn_err.h> 36 #include <sys/cred.h> 37 #include <sys/priv.h> 38 #include <sys/debug.h> 39 #include <sys/errno.h> 40 #include <sys/inline.h> 41 #include <sys/kmem.h> 42 #include <sys/mman.h> 43 #include <sys/proc.h> 44 #include <sys/sobject.h> 45 #include <sys/sysmacros.h> 46 #include <sys/systm.h> 47 #include <sys/uio.h> 48 #include <sys/var.h> 49 #include <sys/vfs.h> 50 #include <sys/vnode.h> 51 #include <sys/session.h> 52 #include <sys/pcb.h> 53 #include <sys/signal.h> 54 #include <sys/user.h> 55 #include <sys/disp.h> 56 #include <sys/class.h> 57 #include <sys/ts.h> 58 #include <sys/bitmap.h> 59 #include <sys/poll.h> 60 #include <sys/shm_impl.h> 61 #include <sys/fault.h> 62 #include <sys/syscall.h> 63 #include <sys/procfs.h> 64 #include <sys/processor.h> 65 #include <sys/cpuvar.h> 66 #include <sys/copyops.h> 67 #include <sys/time.h> 68 #include <sys/msacct.h> 69 #include <vm/as.h> 70 #include <vm/rm.h> 71 #include <vm/seg.h> 72 #include <vm/seg_vn.h> 73 #include <vm/seg_dev.h> 74 #include <vm/seg_spt.h> 75 #include <vm/page.h> 76 #include <sys/vmparam.h> 77 #include <sys/swap.h> 78 #include <fs/proc/prdata.h> 79 #include <sys/task.h> 80 #include <sys/project.h> 81 #include <sys/contract_impl.h> 82 #include <sys/contract/process.h> 83 #include <sys/contract/process_impl.h> 84 #include <sys/schedctl.h> 85 #include <sys/pool.h> 86 #include <sys/zone.h> 87 #include <sys/atomic.h> 88 #include <sys/sdt.h> 89 90 #define MAX_ITERS_SPIN 5 91 92 typedef struct prpagev { 93 uint_t *pg_protv; /* vector of page permissions */ 94 char *pg_incore; /* vector of incore flags */ 95 size_t pg_npages; /* number of pages in protv and incore */ 96 ulong_t pg_pnbase; /* pn within segment of first protv element */ 97 } prpagev_t; 98 99 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */ 100 101 extern struct seg_ops segdev_ops; /* needs a header file */ 102 extern struct seg_ops segspt_shmops; /* needs a header file */ 103 104 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t); 105 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t); 106 107 /* 108 * Choose an lwp from the complete set of lwps for the process. 109 * This is called for any operation applied to the process 110 * file descriptor that requires an lwp to operate upon. 111 * 112 * Returns a pointer to the thread for the selected LWP, 113 * and with the dispatcher lock held for the thread. 114 * 115 * The algorithm for choosing an lwp is critical for /proc semantics; 116 * don't touch this code unless you know all of the implications. 117 */ 118 kthread_t * 119 prchoose(proc_t *p) 120 { 121 kthread_t *t; 122 kthread_t *t_onproc = NULL; /* running on processor */ 123 kthread_t *t_run = NULL; /* runnable, on disp queue */ 124 kthread_t *t_sleep = NULL; /* sleeping */ 125 kthread_t *t_hold = NULL; /* sleeping, performing hold */ 126 kthread_t *t_susp = NULL; /* suspended stop */ 127 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */ 128 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */ 129 kthread_t *t_req = NULL; /* requested stop */ 130 kthread_t *t_istop = NULL; /* event-of-interest stop */ 131 132 ASSERT(MUTEX_HELD(&p->p_lock)); 133 134 /* 135 * If the agent lwp exists, it takes precedence over all others. 136 */ 137 if ((t = p->p_agenttp) != NULL) { 138 thread_lock(t); 139 return (t); 140 } 141 142 if ((t = p->p_tlist) == NULL) /* start at the head of the list */ 143 return (t); 144 do { /* for eacn lwp in the process */ 145 if (VSTOPPED(t)) { /* virtually stopped */ 146 if (t_req == NULL) 147 t_req = t; 148 continue; 149 } 150 151 thread_lock(t); /* make sure thread is in good state */ 152 switch (t->t_state) { 153 default: 154 panic("prchoose: bad thread state %d, thread 0x%p", 155 t->t_state, (void *)t); 156 /*NOTREACHED*/ 157 case TS_SLEEP: 158 /* this is filthy */ 159 if (t->t_wchan == (caddr_t)&p->p_holdlwps && 160 t->t_wchan0 == NULL) { 161 if (t_hold == NULL) 162 t_hold = t; 163 } else { 164 if (t_sleep == NULL) 165 t_sleep = t; 166 } 167 break; 168 case TS_RUN: 169 if (t_run == NULL) 170 t_run = t; 171 break; 172 case TS_ONPROC: 173 if (t_onproc == NULL) 174 t_onproc = t; 175 break; 176 case TS_ZOMB: /* last possible choice */ 177 break; 178 case TS_STOPPED: 179 switch (t->t_whystop) { 180 case PR_SUSPENDED: 181 if (t_susp == NULL) 182 t_susp = t; 183 break; 184 case PR_JOBCONTROL: 185 if (t->t_proc_flag & TP_PRSTOP) { 186 if (t_jdstop == NULL) 187 t_jdstop = t; 188 } else { 189 if (t_jstop == NULL) 190 t_jstop = t; 191 } 192 break; 193 case PR_REQUESTED: 194 if (t_req == NULL) 195 t_req = t; 196 break; 197 case PR_SYSENTRY: 198 case PR_SYSEXIT: 199 case PR_SIGNALLED: 200 case PR_FAULTED: 201 /* 202 * Make an lwp calling exit() be the 203 * last lwp seen in the process. 204 */ 205 if (t_istop == NULL || 206 (t_istop->t_whystop == PR_SYSENTRY && 207 t_istop->t_whatstop == SYS_exit)) 208 t_istop = t; 209 break; 210 case PR_CHECKPOINT: /* can't happen? */ 211 break; 212 default: 213 panic("prchoose: bad t_whystop %d, thread 0x%p", 214 t->t_whystop, (void *)t); 215 /*NOTREACHED*/ 216 } 217 break; 218 } 219 thread_unlock(t); 220 } while ((t = t->t_forw) != p->p_tlist); 221 222 if (t_onproc) 223 t = t_onproc; 224 else if (t_run) 225 t = t_run; 226 else if (t_sleep) 227 t = t_sleep; 228 else if (t_jstop) 229 t = t_jstop; 230 else if (t_jdstop) 231 t = t_jdstop; 232 else if (t_istop) 233 t = t_istop; 234 else if (t_req) 235 t = t_req; 236 else if (t_hold) 237 t = t_hold; 238 else if (t_susp) 239 t = t_susp; 240 else /* TS_ZOMB */ 241 t = p->p_tlist; 242 243 if (t != NULL) 244 thread_lock(t); 245 return (t); 246 } 247 248 /* 249 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop. 250 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI 251 * on the /proc file descriptor. Called from stop() when a traced 252 * process stops on an event of interest. Also called from exit() 253 * and prinvalidate() to indicate POLLHUP and POLLERR respectively. 254 */ 255 void 256 prnotify(struct vnode *vp) 257 { 258 prcommon_t *pcp = VTOP(vp)->pr_common; 259 260 mutex_enter(&pcp->prc_mutex); 261 cv_broadcast(&pcp->prc_wait); 262 mutex_exit(&pcp->prc_mutex); 263 if (pcp->prc_flags & PRC_POLL) { 264 /* 265 * We call pollwakeup() with POLLHUP to ensure that 266 * the pollers are awakened even if they are polling 267 * for nothing (i.e., waiting for the process to exit). 268 * This enables the use of the PRC_POLL flag for optimization 269 * (we can turn off PRC_POLL only if we know no pollers remain). 270 */ 271 pcp->prc_flags &= ~PRC_POLL; 272 pollwakeup(&pcp->prc_pollhead, POLLHUP); 273 } 274 } 275 276 /* called immediately below, in prfree() */ 277 static void 278 prfreenotify(vnode_t *vp) 279 { 280 prnode_t *pnp; 281 prcommon_t *pcp; 282 283 while (vp != NULL) { 284 pnp = VTOP(vp); 285 pcp = pnp->pr_common; 286 ASSERT(pcp->prc_thread == NULL); 287 pcp->prc_proc = NULL; 288 /* 289 * We can't call prnotify() here because we are holding 290 * pidlock. We assert that there is no need to. 291 */ 292 mutex_enter(&pcp->prc_mutex); 293 cv_broadcast(&pcp->prc_wait); 294 mutex_exit(&pcp->prc_mutex); 295 ASSERT(!(pcp->prc_flags & PRC_POLL)); 296 297 vp = pnp->pr_next; 298 pnp->pr_next = NULL; 299 } 300 } 301 302 /* 303 * Called from a hook in freeproc() when a traced process is removed 304 * from the process table. The proc-table pointers of all associated 305 * /proc vnodes are cleared to indicate that the process has gone away. 306 */ 307 void 308 prfree(proc_t *p) 309 { 310 uint_t slot = p->p_slot; 311 312 ASSERT(MUTEX_HELD(&pidlock)); 313 314 /* 315 * Block the process against /proc so it can be freed. 316 * It cannot be freed while locked by some controlling process. 317 * Lock ordering: 318 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex 319 */ 320 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */ 321 mutex_enter(&p->p_lock); 322 while (p->p_proc_flag & P_PR_LOCK) { 323 mutex_exit(&pr_pidlock); 324 cv_wait(&pr_pid_cv[slot], &p->p_lock); 325 mutex_exit(&p->p_lock); 326 mutex_enter(&pr_pidlock); 327 mutex_enter(&p->p_lock); 328 } 329 330 ASSERT(p->p_tlist == NULL); 331 332 prfreenotify(p->p_plist); 333 p->p_plist = NULL; 334 335 prfreenotify(p->p_trace); 336 p->p_trace = NULL; 337 338 /* 339 * We broadcast to wake up everyone waiting for this process. 340 * No one can reach this process from this point on. 341 */ 342 cv_broadcast(&pr_pid_cv[slot]); 343 344 mutex_exit(&p->p_lock); 345 mutex_exit(&pr_pidlock); 346 } 347 348 /* 349 * Called from a hook in exit() when a traced process is becoming a zombie. 350 */ 351 void 352 prexit(proc_t *p) 353 { 354 ASSERT(MUTEX_HELD(&p->p_lock)); 355 356 if (pr_watch_active(p)) { 357 pr_free_watchpoints(p); 358 watch_disable(curthread); 359 } 360 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */ 361 if (p->p_trace) { 362 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY; 363 prnotify(p->p_trace); 364 } 365 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */ 366 } 367 368 /* 369 * Called when a thread calls lwp_exit(). 370 */ 371 void 372 prlwpexit(kthread_t *t) 373 { 374 vnode_t *vp; 375 prnode_t *pnp; 376 prcommon_t *pcp; 377 proc_t *p = ttoproc(t); 378 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry; 379 380 ASSERT(t == curthread); 381 ASSERT(MUTEX_HELD(&p->p_lock)); 382 383 /* 384 * The process must be blocked against /proc to do this safely. 385 * The lwp must not disappear while the process is marked P_PR_LOCK. 386 * It is the caller's responsibility to have called prbarrier(p). 387 */ 388 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 389 390 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 391 pnp = VTOP(vp); 392 pcp = pnp->pr_common; 393 if (pcp->prc_thread == t) { 394 pcp->prc_thread = NULL; 395 pcp->prc_flags |= PRC_DESTROY; 396 } 397 } 398 399 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) { 400 pnp = VTOP(vp); 401 pcp = pnp->pr_common; 402 pcp->prc_thread = NULL; 403 pcp->prc_flags |= PRC_DESTROY; 404 prnotify(vp); 405 } 406 407 if (p->p_trace) 408 prnotify(p->p_trace); 409 } 410 411 /* 412 * Called when a zombie thread is joined or when a 413 * detached lwp exits. Called from lwp_hash_out(). 414 */ 415 void 416 prlwpfree(proc_t *p, lwpent_t *lep) 417 { 418 vnode_t *vp; 419 prnode_t *pnp; 420 prcommon_t *pcp; 421 422 ASSERT(MUTEX_HELD(&p->p_lock)); 423 424 /* 425 * The process must be blocked against /proc to do this safely. 426 * The lwp must not disappear while the process is marked P_PR_LOCK. 427 * It is the caller's responsibility to have called prbarrier(p). 428 */ 429 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 430 431 vp = lep->le_trace; 432 lep->le_trace = NULL; 433 while (vp) { 434 prnotify(vp); 435 pnp = VTOP(vp); 436 pcp = pnp->pr_common; 437 ASSERT(pcp->prc_thread == NULL && 438 (pcp->prc_flags & PRC_DESTROY)); 439 pcp->prc_tslot = -1; 440 vp = pnp->pr_next; 441 pnp->pr_next = NULL; 442 } 443 444 if (p->p_trace) 445 prnotify(p->p_trace); 446 } 447 448 /* 449 * Called from a hook in exec() when a thread starts exec(). 450 */ 451 void 452 prexecstart(void) 453 { 454 proc_t *p = ttoproc(curthread); 455 klwp_t *lwp = ttolwp(curthread); 456 457 /* 458 * The P_PR_EXEC flag blocks /proc operations for 459 * the duration of the exec(). 460 * We can't start exec() while the process is 461 * locked by /proc, so we call prbarrier(). 462 * lwp_nostop keeps the process from being stopped 463 * via job control for the duration of the exec(). 464 */ 465 466 ASSERT(MUTEX_HELD(&p->p_lock)); 467 prbarrier(p); 468 lwp->lwp_nostop++; 469 p->p_proc_flag |= P_PR_EXEC; 470 } 471 472 /* 473 * Called from a hook in exec() when a thread finishes exec(). 474 * The thread may or may not have succeeded. Some other thread 475 * may have beat it to the punch. 476 */ 477 void 478 prexecend(void) 479 { 480 proc_t *p = ttoproc(curthread); 481 klwp_t *lwp = ttolwp(curthread); 482 vnode_t *vp; 483 prnode_t *pnp; 484 prcommon_t *pcp; 485 model_t model = p->p_model; 486 id_t tid = curthread->t_tid; 487 int tslot = curthread->t_dslot; 488 489 ASSERT(MUTEX_HELD(&p->p_lock)); 490 491 lwp->lwp_nostop--; 492 if (p->p_flag & SEXITLWPS) { 493 /* 494 * We are on our way to exiting because some 495 * other thread beat us in the race to exec(). 496 * Don't clear the P_PR_EXEC flag in this case. 497 */ 498 return; 499 } 500 501 /* 502 * Wake up anyone waiting in /proc for the process to complete exec(). 503 */ 504 p->p_proc_flag &= ~P_PR_EXEC; 505 if ((vp = p->p_trace) != NULL) { 506 pcp = VTOP(vp)->pr_common; 507 mutex_enter(&pcp->prc_mutex); 508 cv_broadcast(&pcp->prc_wait); 509 mutex_exit(&pcp->prc_mutex); 510 for (; vp != NULL; vp = pnp->pr_next) { 511 pnp = VTOP(vp); 512 pnp->pr_common->prc_datamodel = model; 513 } 514 } 515 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) { 516 /* 517 * We dealt with the process common above. 518 */ 519 ASSERT(p->p_trace != NULL); 520 pcp = VTOP(vp)->pr_common; 521 mutex_enter(&pcp->prc_mutex); 522 cv_broadcast(&pcp->prc_wait); 523 mutex_exit(&pcp->prc_mutex); 524 for (; vp != NULL; vp = pnp->pr_next) { 525 pnp = VTOP(vp); 526 pcp = pnp->pr_common; 527 pcp->prc_datamodel = model; 528 pcp->prc_tid = tid; 529 pcp->prc_tslot = tslot; 530 } 531 } 532 } 533 534 /* 535 * Called from a hook in relvm() just before freeing the address space. 536 * We free all the watched areas now. 537 */ 538 void 539 prrelvm(void) 540 { 541 proc_t *p = ttoproc(curthread); 542 543 mutex_enter(&p->p_lock); 544 prbarrier(p); /* block all other /proc operations */ 545 if (pr_watch_active(p)) { 546 pr_free_watchpoints(p); 547 watch_disable(curthread); 548 } 549 mutex_exit(&p->p_lock); 550 pr_free_watched_pages(p); 551 } 552 553 /* 554 * Called from hooks in exec-related code when a traced process 555 * attempts to exec(2) a setuid/setgid program or an unreadable 556 * file. Rather than fail the exec we invalidate the associated 557 * /proc vnodes so that subsequent attempts to use them will fail. 558 * 559 * All /proc vnodes, except directory vnodes, are retained on a linked 560 * list (rooted at p_plist in the process structure) until last close. 561 * 562 * A controlling process must re-open the /proc files in order to 563 * regain control. 564 */ 565 void 566 prinvalidate(struct user *up) 567 { 568 kthread_t *t = curthread; 569 proc_t *p = ttoproc(t); 570 vnode_t *vp; 571 prnode_t *pnp; 572 int writers = 0; 573 574 mutex_enter(&p->p_lock); 575 prbarrier(p); /* block all other /proc operations */ 576 577 /* 578 * At this moment, there can be only one lwp in the process. 579 */ 580 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 581 582 /* 583 * Invalidate any currently active /proc vnodes. 584 */ 585 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 586 pnp = VTOP(vp); 587 switch (pnp->pr_type) { 588 case PR_PSINFO: /* these files can read by anyone */ 589 case PR_LPSINFO: 590 case PR_LWPSINFO: 591 case PR_LWPDIR: 592 case PR_LWPIDDIR: 593 case PR_USAGE: 594 case PR_LUSAGE: 595 case PR_LWPUSAGE: 596 break; 597 default: 598 pnp->pr_flags |= PR_INVAL; 599 break; 600 } 601 } 602 /* 603 * Wake up anyone waiting for the process or lwp. 604 * p->p_trace is guaranteed to be non-NULL if there 605 * are any open /proc files for this process. 606 */ 607 if ((vp = p->p_trace) != NULL) { 608 prcommon_t *pcp = VTOP(vp)->pr_pcommon; 609 610 prnotify(vp); 611 /* 612 * Are there any writers? 613 */ 614 if ((writers = pcp->prc_writers) != 0) { 615 /* 616 * Clear the exclusive open flag (old /proc interface). 617 * Set prc_selfopens equal to prc_writers so that 618 * the next O_EXCL|O_WRITE open will succeed 619 * even with existing (though invalid) writers. 620 * prclose() must decrement prc_selfopens when 621 * the invalid files are closed. 622 */ 623 pcp->prc_flags &= ~PRC_EXCL; 624 ASSERT(pcp->prc_selfopens <= writers); 625 pcp->prc_selfopens = writers; 626 } 627 } 628 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace; 629 while (vp != NULL) { 630 /* 631 * We should not invalidate the lwpiddir vnodes, 632 * but the necessities of maintaining the old 633 * ioctl()-based version of /proc require it. 634 */ 635 pnp = VTOP(vp); 636 pnp->pr_flags |= PR_INVAL; 637 prnotify(vp); 638 vp = pnp->pr_next; 639 } 640 641 /* 642 * If any tracing flags are in effect and any vnodes are open for 643 * writing then set the requested-stop and run-on-last-close flags. 644 * Otherwise, clear all tracing flags. 645 */ 646 t->t_proc_flag &= ~TP_PAUSE; 647 if ((p->p_proc_flag & P_PR_TRACE) && writers) { 648 t->t_proc_flag |= TP_PRSTOP; 649 aston(t); /* so ISSIG will see the flag */ 650 p->p_proc_flag |= P_PR_RUNLCL; 651 } else { 652 premptyset(&up->u_entrymask); /* syscalls */ 653 premptyset(&up->u_exitmask); 654 up->u_systrap = 0; 655 premptyset(&p->p_sigmask); /* signals */ 656 premptyset(&p->p_fltmask); /* faults */ 657 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING); 658 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE); 659 prnostep(ttolwp(t)); 660 } 661 662 mutex_exit(&p->p_lock); 663 } 664 665 /* 666 * Acquire the controlled process's p_lock and mark it P_PR_LOCK. 667 * Return with pr_pidlock held in all cases. 668 * Return with p_lock held if the the process still exists. 669 * Return value is the process pointer if the process still exists, else NULL. 670 * If we lock the process, give ourself kernel priority to avoid deadlocks; 671 * this is undone in prunlock(). 672 */ 673 proc_t * 674 pr_p_lock(prnode_t *pnp) 675 { 676 proc_t *p; 677 prcommon_t *pcp; 678 679 mutex_enter(&pr_pidlock); 680 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL) 681 return (NULL); 682 mutex_enter(&p->p_lock); 683 while (p->p_proc_flag & P_PR_LOCK) { 684 /* 685 * This cv/mutex pair is persistent even if 686 * the process disappears while we sleep. 687 */ 688 kcondvar_t *cv = &pr_pid_cv[p->p_slot]; 689 kmutex_t *mp = &p->p_lock; 690 691 mutex_exit(&pr_pidlock); 692 cv_wait(cv, mp); 693 mutex_exit(mp); 694 mutex_enter(&pr_pidlock); 695 if (pcp->prc_proc == NULL) 696 return (NULL); 697 ASSERT(p == pcp->prc_proc); 698 mutex_enter(&p->p_lock); 699 } 700 p->p_proc_flag |= P_PR_LOCK; 701 THREAD_KPRI_REQUEST(); 702 return (p); 703 } 704 705 /* 706 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock. 707 * This prevents any lwp of the process from disappearing and 708 * blocks most operations that a process can perform on itself. 709 * Returns 0 on success, a non-zero error number on failure. 710 * 711 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when 712 * the subject process is a zombie (ZYES) or fail for zombies (ZNO). 713 * 714 * error returns: 715 * ENOENT: process or lwp has disappeared or process is exiting 716 * (or has become a zombie and zdisp == ZNO). 717 * EAGAIN: procfs vnode has become invalid. 718 * EINTR: signal arrived while waiting for exec to complete. 719 */ 720 int 721 prlock(prnode_t *pnp, int zdisp) 722 { 723 prcommon_t *pcp; 724 proc_t *p; 725 726 again: 727 pcp = pnp->pr_common; 728 p = pr_p_lock(pnp); 729 mutex_exit(&pr_pidlock); 730 731 /* 732 * Return ENOENT immediately if there is no process. 733 */ 734 if (p == NULL) 735 return (ENOENT); 736 737 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL); 738 739 /* 740 * Return ENOENT if process entered zombie state or is exiting 741 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies. 742 */ 743 if (zdisp == ZNO && 744 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) { 745 prunlock(pnp); 746 return (ENOENT); 747 } 748 749 /* 750 * If lwp-specific, check to see if lwp has disappeared. 751 */ 752 if (pcp->prc_flags & PRC_LWP) { 753 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) || 754 pcp->prc_tslot == -1) { 755 prunlock(pnp); 756 return (ENOENT); 757 } 758 } 759 760 /* 761 * Return EAGAIN if we have encountered a security violation. 762 * (The process exec'd a set-id or unreadable executable file.) 763 */ 764 if (pnp->pr_flags & PR_INVAL) { 765 prunlock(pnp); 766 return (EAGAIN); 767 } 768 769 /* 770 * If process is undergoing an exec(), wait for 771 * completion and then start all over again. 772 */ 773 if (p->p_proc_flag & P_PR_EXEC) { 774 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */ 775 mutex_enter(&pcp->prc_mutex); 776 prunlock(pnp); 777 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) { 778 mutex_exit(&pcp->prc_mutex); 779 return (EINTR); 780 } 781 mutex_exit(&pcp->prc_mutex); 782 goto again; 783 } 784 785 /* 786 * We return holding p->p_lock. 787 */ 788 return (0); 789 } 790 791 /* 792 * Undo prlock() and pr_p_lock(). 793 * p->p_lock is still held; pr_pidlock is no longer held. 794 * 795 * prunmark() drops the P_PR_LOCK flag and wakes up another thread, 796 * if any, waiting for the flag to be dropped; it retains p->p_lock. 797 * 798 * prunlock() calls prunmark() and then drops p->p_lock. 799 */ 800 void 801 prunmark(proc_t *p) 802 { 803 ASSERT(p->p_proc_flag & P_PR_LOCK); 804 ASSERT(MUTEX_HELD(&p->p_lock)); 805 806 cv_signal(&pr_pid_cv[p->p_slot]); 807 p->p_proc_flag &= ~P_PR_LOCK; 808 THREAD_KPRI_RELEASE(); 809 } 810 811 void 812 prunlock(prnode_t *pnp) 813 { 814 prcommon_t *pcp = pnp->pr_common; 815 proc_t *p = pcp->prc_proc; 816 817 /* 818 * If we (or someone) gave it a SIGKILL, and it is not 819 * already a zombie, set it running unconditionally. 820 */ 821 if ((p->p_flag & SKILLED) && 822 !(p->p_flag & SEXITING) && 823 !(pcp->prc_flags & PRC_DESTROY) && 824 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) 825 (void) pr_setrun(pnp, 0); 826 prunmark(p); 827 mutex_exit(&p->p_lock); 828 } 829 830 /* 831 * Called while holding p->p_lock to delay until the process is unlocked. 832 * We enter holding p->p_lock; p->p_lock is dropped and reacquired. 833 * The process cannot become locked again until p->p_lock is dropped. 834 */ 835 void 836 prbarrier(proc_t *p) 837 { 838 ASSERT(MUTEX_HELD(&p->p_lock)); 839 840 if (p->p_proc_flag & P_PR_LOCK) { 841 /* The process is locked; delay until not locked */ 842 uint_t slot = p->p_slot; 843 844 while (p->p_proc_flag & P_PR_LOCK) 845 cv_wait(&pr_pid_cv[slot], &p->p_lock); 846 cv_signal(&pr_pid_cv[slot]); 847 } 848 } 849 850 /* 851 * Return process/lwp status. 852 * The u-block is mapped in by this routine and unmapped at the end. 853 */ 854 void 855 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp) 856 { 857 kthread_t *t; 858 859 ASSERT(MUTEX_HELD(&p->p_lock)); 860 861 t = prchoose(p); /* returns locked thread */ 862 ASSERT(t != NULL); 863 thread_unlock(t); 864 865 /* just bzero the process part, prgetlwpstatus() does the rest */ 866 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t)); 867 sp->pr_nlwp = p->p_lwpcnt; 868 sp->pr_nzomb = p->p_zombcnt; 869 prassignset(&sp->pr_sigpend, &p->p_sig); 870 sp->pr_brkbase = (uintptr_t)p->p_brkbase; 871 sp->pr_brksize = p->p_brksize; 872 sp->pr_stkbase = (uintptr_t)prgetstackbase(p); 873 sp->pr_stksize = p->p_stksize; 874 sp->pr_pid = p->p_pid; 875 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 876 (p->p_flag & SZONETOP)) { 877 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 878 /* 879 * Inside local zones, fake zsched's pid as parent pids for 880 * processes which reference processes outside of the zone. 881 */ 882 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 883 } else { 884 sp->pr_ppid = p->p_ppid; 885 } 886 sp->pr_pgid = p->p_pgrp; 887 sp->pr_sid = p->p_sessp->s_sid; 888 sp->pr_taskid = p->p_task->tk_tkid; 889 sp->pr_projid = p->p_task->tk_proj->kpj_id; 890 sp->pr_zoneid = p->p_zone->zone_id; 891 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 892 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 893 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime); 894 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime); 895 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 896 prassignset(&sp->pr_flttrace, &p->p_fltmask); 897 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 898 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 899 switch (p->p_model) { 900 case DATAMODEL_ILP32: 901 sp->pr_dmodel = PR_MODEL_ILP32; 902 break; 903 case DATAMODEL_LP64: 904 sp->pr_dmodel = PR_MODEL_LP64; 905 break; 906 } 907 if (p->p_agenttp) 908 sp->pr_agentid = p->p_agenttp->t_tid; 909 910 /* get the chosen lwp's status */ 911 prgetlwpstatus(t, &sp->pr_lwp, zp); 912 913 /* replicate the flags */ 914 sp->pr_flags = sp->pr_lwp.pr_flags; 915 } 916 917 #ifdef _SYSCALL32_IMPL 918 void 919 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp) 920 { 921 proc_t *p = ttoproc(t); 922 klwp_t *lwp = ttolwp(t); 923 struct mstate *ms = &lwp->lwp_mstate; 924 hrtime_t usr, sys; 925 int flags; 926 ulong_t instr; 927 928 ASSERT(MUTEX_HELD(&p->p_lock)); 929 930 bzero(sp, sizeof (*sp)); 931 flags = 0L; 932 if (t->t_state == TS_STOPPED) { 933 flags |= PR_STOPPED; 934 if ((t->t_schedflag & TS_PSTART) == 0) 935 flags |= PR_ISTOP; 936 } else if (VSTOPPED(t)) { 937 flags |= PR_STOPPED|PR_ISTOP; 938 } 939 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 940 flags |= PR_DSTOP; 941 if (lwp->lwp_asleep) 942 flags |= PR_ASLEEP; 943 if (t == p->p_agenttp) 944 flags |= PR_AGENT; 945 if (!(t->t_proc_flag & TP_TWAIT)) 946 flags |= PR_DETACH; 947 if (t->t_proc_flag & TP_DAEMON) 948 flags |= PR_DAEMON; 949 if (p->p_proc_flag & P_PR_FORK) 950 flags |= PR_FORK; 951 if (p->p_proc_flag & P_PR_RUNLCL) 952 flags |= PR_RLC; 953 if (p->p_proc_flag & P_PR_KILLCL) 954 flags |= PR_KLC; 955 if (p->p_proc_flag & P_PR_ASYNC) 956 flags |= PR_ASYNC; 957 if (p->p_proc_flag & P_PR_BPTADJ) 958 flags |= PR_BPTADJ; 959 if (p->p_proc_flag & P_PR_PTRACE) 960 flags |= PR_PTRACE; 961 if (p->p_flag & SMSACCT) 962 flags |= PR_MSACCT; 963 if (p->p_flag & SMSFORK) 964 flags |= PR_MSFORK; 965 if (p->p_flag & SVFWAIT) 966 flags |= PR_VFORKP; 967 sp->pr_flags = flags; 968 if (VSTOPPED(t)) { 969 sp->pr_why = PR_REQUESTED; 970 sp->pr_what = 0; 971 } else { 972 sp->pr_why = t->t_whystop; 973 sp->pr_what = t->t_whatstop; 974 } 975 sp->pr_lwpid = t->t_tid; 976 sp->pr_cursig = lwp->lwp_cursig; 977 prassignset(&sp->pr_lwppend, &t->t_sig); 978 schedctl_finish_sigblock(t); 979 prassignset(&sp->pr_lwphold, &t->t_hold); 980 if (t->t_whystop == PR_FAULTED) { 981 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info); 982 if (t->t_whatstop == FLTPAGE) 983 sp->pr_info.si_addr = 984 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr; 985 } else if (lwp->lwp_curinfo) 986 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info); 987 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 988 sp->pr_info.si_zoneid != zp->zone_id) { 989 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 990 sp->pr_info.si_uid = 0; 991 sp->pr_info.si_ctid = -1; 992 sp->pr_info.si_zoneid = zp->zone_id; 993 } 994 sp->pr_altstack.ss_sp = 995 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; 996 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size; 997 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags; 998 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 999 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext; 1000 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack; 1001 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1002 sizeof (sp->pr_clname) - 1); 1003 if (flags & PR_STOPPED) 1004 hrt2ts32(t->t_stoptime, &sp->pr_tstamp); 1005 usr = ms->ms_acct[LMS_USER]; 1006 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1007 scalehrtime(&usr); 1008 scalehrtime(&sys); 1009 hrt2ts32(usr, &sp->pr_utime); 1010 hrt2ts32(sys, &sp->pr_stime); 1011 1012 /* 1013 * Fetch the current instruction, if not a system process. 1014 * We don't attempt this unless the lwp is stopped. 1015 */ 1016 if ((p->p_flag & SSYS) || p->p_as == &kas) 1017 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1018 else if (!(flags & PR_STOPPED)) 1019 sp->pr_flags |= PR_PCINVAL; 1020 else if (!prfetchinstr(lwp, &instr)) 1021 sp->pr_flags |= PR_PCINVAL; 1022 else 1023 sp->pr_instr = (uint32_t)instr; 1024 1025 /* 1026 * Drop p_lock while touching the lwp's stack. 1027 */ 1028 mutex_exit(&p->p_lock); 1029 if (prisstep(lwp)) 1030 sp->pr_flags |= PR_STEP; 1031 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1032 int i; 1033 1034 sp->pr_syscall = get_syscall32_args(lwp, 1035 (int *)sp->pr_sysarg, &i); 1036 sp->pr_nsysarg = (ushort_t)i; 1037 } 1038 if ((flags & PR_STOPPED) || t == curthread) 1039 prgetprregs32(lwp, sp->pr_reg); 1040 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1041 (flags & PR_VFORKP)) { 1042 long r1, r2; 1043 user_t *up; 1044 auxv_t *auxp; 1045 int i; 1046 1047 sp->pr_errno = prgetrvals(lwp, &r1, &r2); 1048 if (sp->pr_errno == 0) { 1049 sp->pr_rval1 = (int32_t)r1; 1050 sp->pr_rval2 = (int32_t)r2; 1051 sp->pr_errpriv = PRIV_NONE; 1052 } else 1053 sp->pr_errpriv = lwp->lwp_badpriv; 1054 1055 if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) { 1056 up = PTOU(p); 1057 sp->pr_sysarg[0] = 0; 1058 sp->pr_sysarg[1] = (caddr32_t)up->u_argv; 1059 sp->pr_sysarg[2] = (caddr32_t)up->u_envp; 1060 for (i = 0, auxp = up->u_auxv; 1061 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1062 i++, auxp++) { 1063 if (auxp->a_type == AT_SUN_EXECNAME) { 1064 sp->pr_sysarg[0] = 1065 (caddr32_t)(uintptr_t)auxp->a_un.a_ptr; 1066 break; 1067 } 1068 } 1069 } 1070 } 1071 if (prhasfp()) 1072 prgetprfpregs32(lwp, &sp->pr_fpreg); 1073 mutex_enter(&p->p_lock); 1074 } 1075 1076 void 1077 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp) 1078 { 1079 kthread_t *t; 1080 1081 ASSERT(MUTEX_HELD(&p->p_lock)); 1082 1083 t = prchoose(p); /* returns locked thread */ 1084 ASSERT(t != NULL); 1085 thread_unlock(t); 1086 1087 /* just bzero the process part, prgetlwpstatus32() does the rest */ 1088 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t)); 1089 sp->pr_nlwp = p->p_lwpcnt; 1090 sp->pr_nzomb = p->p_zombcnt; 1091 prassignset(&sp->pr_sigpend, &p->p_sig); 1092 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase; 1093 sp->pr_brksize = (uint32_t)p->p_brksize; 1094 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p); 1095 sp->pr_stksize = (uint32_t)p->p_stksize; 1096 sp->pr_pid = p->p_pid; 1097 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 1098 (p->p_flag & SZONETOP)) { 1099 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 1100 /* 1101 * Inside local zones, fake zsched's pid as parent pids for 1102 * processes which reference processes outside of the zone. 1103 */ 1104 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 1105 } else { 1106 sp->pr_ppid = p->p_ppid; 1107 } 1108 sp->pr_pgid = p->p_pgrp; 1109 sp->pr_sid = p->p_sessp->s_sid; 1110 sp->pr_taskid = p->p_task->tk_tkid; 1111 sp->pr_projid = p->p_task->tk_proj->kpj_id; 1112 sp->pr_zoneid = p->p_zone->zone_id; 1113 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 1114 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 1115 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime); 1116 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime); 1117 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 1118 prassignset(&sp->pr_flttrace, &p->p_fltmask); 1119 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 1120 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 1121 switch (p->p_model) { 1122 case DATAMODEL_ILP32: 1123 sp->pr_dmodel = PR_MODEL_ILP32; 1124 break; 1125 case DATAMODEL_LP64: 1126 sp->pr_dmodel = PR_MODEL_LP64; 1127 break; 1128 } 1129 if (p->p_agenttp) 1130 sp->pr_agentid = p->p_agenttp->t_tid; 1131 1132 /* get the chosen lwp's status */ 1133 prgetlwpstatus32(t, &sp->pr_lwp, zp); 1134 1135 /* replicate the flags */ 1136 sp->pr_flags = sp->pr_lwp.pr_flags; 1137 } 1138 #endif /* _SYSCALL32_IMPL */ 1139 1140 /* 1141 * Return lwp status. 1142 */ 1143 void 1144 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp) 1145 { 1146 proc_t *p = ttoproc(t); 1147 klwp_t *lwp = ttolwp(t); 1148 struct mstate *ms = &lwp->lwp_mstate; 1149 hrtime_t usr, sys; 1150 int flags; 1151 ulong_t instr; 1152 1153 ASSERT(MUTEX_HELD(&p->p_lock)); 1154 1155 bzero(sp, sizeof (*sp)); 1156 flags = 0L; 1157 if (t->t_state == TS_STOPPED) { 1158 flags |= PR_STOPPED; 1159 if ((t->t_schedflag & TS_PSTART) == 0) 1160 flags |= PR_ISTOP; 1161 } else if (VSTOPPED(t)) { 1162 flags |= PR_STOPPED|PR_ISTOP; 1163 } 1164 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 1165 flags |= PR_DSTOP; 1166 if (lwp->lwp_asleep) 1167 flags |= PR_ASLEEP; 1168 if (t == p->p_agenttp) 1169 flags |= PR_AGENT; 1170 if (!(t->t_proc_flag & TP_TWAIT)) 1171 flags |= PR_DETACH; 1172 if (t->t_proc_flag & TP_DAEMON) 1173 flags |= PR_DAEMON; 1174 if (p->p_proc_flag & P_PR_FORK) 1175 flags |= PR_FORK; 1176 if (p->p_proc_flag & P_PR_RUNLCL) 1177 flags |= PR_RLC; 1178 if (p->p_proc_flag & P_PR_KILLCL) 1179 flags |= PR_KLC; 1180 if (p->p_proc_flag & P_PR_ASYNC) 1181 flags |= PR_ASYNC; 1182 if (p->p_proc_flag & P_PR_BPTADJ) 1183 flags |= PR_BPTADJ; 1184 if (p->p_proc_flag & P_PR_PTRACE) 1185 flags |= PR_PTRACE; 1186 if (p->p_flag & SMSACCT) 1187 flags |= PR_MSACCT; 1188 if (p->p_flag & SMSFORK) 1189 flags |= PR_MSFORK; 1190 if (p->p_flag & SVFWAIT) 1191 flags |= PR_VFORKP; 1192 if (p->p_pgidp->pid_pgorphaned) 1193 flags |= PR_ORPHAN; 1194 if (p->p_pidflag & CLDNOSIGCHLD) 1195 flags |= PR_NOSIGCHLD; 1196 if (p->p_pidflag & CLDWAITPID) 1197 flags |= PR_WAITPID; 1198 sp->pr_flags = flags; 1199 if (VSTOPPED(t)) { 1200 sp->pr_why = PR_REQUESTED; 1201 sp->pr_what = 0; 1202 } else { 1203 sp->pr_why = t->t_whystop; 1204 sp->pr_what = t->t_whatstop; 1205 } 1206 sp->pr_lwpid = t->t_tid; 1207 sp->pr_cursig = lwp->lwp_cursig; 1208 prassignset(&sp->pr_lwppend, &t->t_sig); 1209 schedctl_finish_sigblock(t); 1210 prassignset(&sp->pr_lwphold, &t->t_hold); 1211 if (t->t_whystop == PR_FAULTED) 1212 bcopy(&lwp->lwp_siginfo, 1213 &sp->pr_info, sizeof (k_siginfo_t)); 1214 else if (lwp->lwp_curinfo) 1215 bcopy(&lwp->lwp_curinfo->sq_info, 1216 &sp->pr_info, sizeof (k_siginfo_t)); 1217 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1218 sp->pr_info.si_zoneid != zp->zone_id) { 1219 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1220 sp->pr_info.si_uid = 0; 1221 sp->pr_info.si_ctid = -1; 1222 sp->pr_info.si_zoneid = zp->zone_id; 1223 } 1224 sp->pr_altstack = lwp->lwp_sigaltstack; 1225 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1226 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext; 1227 sp->pr_ustack = lwp->lwp_ustack; 1228 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1229 sizeof (sp->pr_clname) - 1); 1230 if (flags & PR_STOPPED) 1231 hrt2ts(t->t_stoptime, &sp->pr_tstamp); 1232 usr = ms->ms_acct[LMS_USER]; 1233 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1234 scalehrtime(&usr); 1235 scalehrtime(&sys); 1236 hrt2ts(usr, &sp->pr_utime); 1237 hrt2ts(sys, &sp->pr_stime); 1238 1239 /* 1240 * Fetch the current instruction, if not a system process. 1241 * We don't attempt this unless the lwp is stopped. 1242 */ 1243 if ((p->p_flag & SSYS) || p->p_as == &kas) 1244 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1245 else if (!(flags & PR_STOPPED)) 1246 sp->pr_flags |= PR_PCINVAL; 1247 else if (!prfetchinstr(lwp, &instr)) 1248 sp->pr_flags |= PR_PCINVAL; 1249 else 1250 sp->pr_instr = instr; 1251 1252 /* 1253 * Drop p_lock while touching the lwp's stack. 1254 */ 1255 mutex_exit(&p->p_lock); 1256 if (prisstep(lwp)) 1257 sp->pr_flags |= PR_STEP; 1258 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1259 int i; 1260 1261 sp->pr_syscall = get_syscall_args(lwp, 1262 (long *)sp->pr_sysarg, &i); 1263 sp->pr_nsysarg = (ushort_t)i; 1264 } 1265 if ((flags & PR_STOPPED) || t == curthread) 1266 prgetprregs(lwp, sp->pr_reg); 1267 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1268 (flags & PR_VFORKP)) { 1269 user_t *up; 1270 auxv_t *auxp; 1271 int i; 1272 1273 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2); 1274 if (sp->pr_errno == 0) 1275 sp->pr_errpriv = PRIV_NONE; 1276 else 1277 sp->pr_errpriv = lwp->lwp_badpriv; 1278 1279 if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) { 1280 up = PTOU(p); 1281 sp->pr_sysarg[0] = 0; 1282 sp->pr_sysarg[1] = (uintptr_t)up->u_argv; 1283 sp->pr_sysarg[2] = (uintptr_t)up->u_envp; 1284 for (i = 0, auxp = up->u_auxv; 1285 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1286 i++, auxp++) { 1287 if (auxp->a_type == AT_SUN_EXECNAME) { 1288 sp->pr_sysarg[0] = 1289 (uintptr_t)auxp->a_un.a_ptr; 1290 break; 1291 } 1292 } 1293 } 1294 } 1295 if (prhasfp()) 1296 prgetprfpregs(lwp, &sp->pr_fpreg); 1297 mutex_enter(&p->p_lock); 1298 } 1299 1300 /* 1301 * Get the sigaction structure for the specified signal. The u-block 1302 * must already have been mapped in by the caller. 1303 */ 1304 void 1305 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp) 1306 { 1307 bzero(sp, sizeof (*sp)); 1308 1309 if (sig != 0 && (unsigned)sig < NSIG) { 1310 sp->sa_handler = up->u_signal[sig-1]; 1311 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1312 if (sigismember(&up->u_sigonstack, sig)) 1313 sp->sa_flags |= SA_ONSTACK; 1314 if (sigismember(&up->u_sigresethand, sig)) 1315 sp->sa_flags |= SA_RESETHAND; 1316 if (sigismember(&up->u_sigrestart, sig)) 1317 sp->sa_flags |= SA_RESTART; 1318 if (sigismember(&p->p_siginfo, sig)) 1319 sp->sa_flags |= SA_SIGINFO; 1320 if (sigismember(&up->u_signodefer, sig)) 1321 sp->sa_flags |= SA_NODEFER; 1322 if (sig == SIGCLD) { 1323 if (p->p_flag & SNOWAIT) 1324 sp->sa_flags |= SA_NOCLDWAIT; 1325 if ((p->p_flag & SJCTL) == 0) 1326 sp->sa_flags |= SA_NOCLDSTOP; 1327 } 1328 } 1329 } 1330 1331 #ifdef _SYSCALL32_IMPL 1332 void 1333 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp) 1334 { 1335 bzero(sp, sizeof (*sp)); 1336 1337 if (sig != 0 && (unsigned)sig < NSIG) { 1338 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1]; 1339 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1340 if (sigismember(&up->u_sigonstack, sig)) 1341 sp->sa_flags |= SA_ONSTACK; 1342 if (sigismember(&up->u_sigresethand, sig)) 1343 sp->sa_flags |= SA_RESETHAND; 1344 if (sigismember(&up->u_sigrestart, sig)) 1345 sp->sa_flags |= SA_RESTART; 1346 if (sigismember(&p->p_siginfo, sig)) 1347 sp->sa_flags |= SA_SIGINFO; 1348 if (sigismember(&up->u_signodefer, sig)) 1349 sp->sa_flags |= SA_NODEFER; 1350 if (sig == SIGCLD) { 1351 if (p->p_flag & SNOWAIT) 1352 sp->sa_flags |= SA_NOCLDWAIT; 1353 if ((p->p_flag & SJCTL) == 0) 1354 sp->sa_flags |= SA_NOCLDSTOP; 1355 } 1356 } 1357 } 1358 #endif /* _SYSCALL32_IMPL */ 1359 1360 /* 1361 * Count the number of segments in this process's address space. 1362 */ 1363 int 1364 prnsegs(struct as *as, int reserved) 1365 { 1366 int n = 0; 1367 struct seg *seg; 1368 1369 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1370 1371 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1372 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1373 caddr_t saddr, naddr; 1374 void *tmp = NULL; 1375 1376 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1377 (void) pr_getprot(seg, reserved, &tmp, 1378 &saddr, &naddr, eaddr); 1379 if (saddr != naddr) 1380 n++; 1381 } 1382 1383 ASSERT(tmp == NULL); 1384 } 1385 1386 return (n); 1387 } 1388 1389 /* 1390 * Convert uint32_t to decimal string w/o leading zeros. 1391 * Add trailing null characters if 'len' is greater than string length. 1392 * Return the string length. 1393 */ 1394 int 1395 pr_u32tos(uint32_t n, char *s, int len) 1396 { 1397 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */ 1398 char *cp = cbuf; 1399 char *end = s + len; 1400 1401 do { 1402 *cp++ = (char)(n % 10 + '0'); 1403 n /= 10; 1404 } while (n); 1405 1406 len = (int)(cp - cbuf); 1407 1408 do { 1409 *s++ = *--cp; 1410 } while (cp > cbuf); 1411 1412 while (s < end) /* optional pad */ 1413 *s++ = '\0'; 1414 1415 return (len); 1416 } 1417 1418 /* 1419 * Convert uint64_t to decimal string w/o leading zeros. 1420 * Return the string length. 1421 */ 1422 static int 1423 pr_u64tos(uint64_t n, char *s) 1424 { 1425 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */ 1426 char *cp = cbuf; 1427 int len; 1428 1429 do { 1430 *cp++ = (char)(n % 10 + '0'); 1431 n /= 10; 1432 } while (n); 1433 1434 len = (int)(cp - cbuf); 1435 1436 do { 1437 *s++ = *--cp; 1438 } while (cp > cbuf); 1439 1440 return (len); 1441 } 1442 1443 void 1444 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr) 1445 { 1446 char *s = name; 1447 struct vfs *vfsp; 1448 struct vfssw *vfsswp; 1449 1450 if ((vfsp = vp->v_vfsp) != NULL && 1451 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) && 1452 *vfsswp->vsw_name) { 1453 (void) strcpy(s, vfsswp->vsw_name); 1454 s += strlen(s); 1455 *s++ = '.'; 1456 } 1457 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0); 1458 *s++ = '.'; 1459 s += pr_u32tos(getminor(vattr->va_fsid), s, 0); 1460 *s++ = '.'; 1461 s += pr_u64tos(vattr->va_nodeid, s); 1462 *s++ = '\0'; 1463 } 1464 1465 struct seg * 1466 break_seg(proc_t *p) 1467 { 1468 caddr_t addr = p->p_brkbase; 1469 struct seg *seg; 1470 struct vnode *vp; 1471 1472 if (p->p_brksize != 0) 1473 addr += p->p_brksize - 1; 1474 seg = as_segat(p->p_as, addr); 1475 if (seg != NULL && seg->s_ops == &segvn_ops && 1476 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)) 1477 return (seg); 1478 return (NULL); 1479 } 1480 1481 /* 1482 * Implementation of service functions to handle procfs generic chained 1483 * copyout buffers. 1484 */ 1485 typedef struct pr_iobuf_list { 1486 list_node_t piol_link; /* buffer linkage */ 1487 size_t piol_size; /* total size (header + data) */ 1488 size_t piol_usedsize; /* amount to copy out from this buf */ 1489 } piol_t; 1490 1491 #define MAPSIZE (64 * 1024) 1492 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1])) 1493 1494 void 1495 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n) 1496 { 1497 piol_t *iol; 1498 size_t initial_size = MIN(1, n) * itemsize; 1499 1500 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link)); 1501 1502 ASSERT(list_head(iolhead) == NULL); 1503 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1504 ASSERT(initial_size > 0); 1505 1506 /* 1507 * Someone creating chained copyout buffers may ask for less than 1508 * MAPSIZE if the amount of data to be buffered is known to be 1509 * smaller than that. 1510 * But in order to prevent involuntary self-denial of service, 1511 * the requested input size is clamped at MAPSIZE. 1512 */ 1513 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol)); 1514 iol = kmem_alloc(initial_size, KM_SLEEP); 1515 list_insert_head(iolhead, iol); 1516 iol->piol_usedsize = 0; 1517 iol->piol_size = initial_size; 1518 } 1519 1520 void * 1521 pr_iol_newbuf(list_t *iolhead, size_t itemsize) 1522 { 1523 piol_t *iol; 1524 char *new; 1525 1526 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1527 ASSERT(list_head(iolhead) != NULL); 1528 1529 iol = (piol_t *)list_tail(iolhead); 1530 1531 if (iol->piol_size < 1532 iol->piol_usedsize + sizeof (*iol) + itemsize) { 1533 /* 1534 * Out of space in the current buffer. Allocate more. 1535 */ 1536 piol_t *newiol; 1537 1538 newiol = kmem_alloc(MAPSIZE, KM_SLEEP); 1539 newiol->piol_size = MAPSIZE; 1540 newiol->piol_usedsize = 0; 1541 1542 list_insert_after(iolhead, iol, newiol); 1543 iol = list_next(iolhead, iol); 1544 ASSERT(iol == newiol); 1545 } 1546 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize; 1547 iol->piol_usedsize += itemsize; 1548 bzero(new, itemsize); 1549 return (new); 1550 } 1551 1552 int 1553 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin) 1554 { 1555 int error = errin; 1556 piol_t *iol; 1557 1558 while ((iol = list_head(iolhead)) != NULL) { 1559 list_remove(iolhead, iol); 1560 if (!error) { 1561 if (copyout(PIOL_DATABUF(iol), *tgt, 1562 iol->piol_usedsize)) 1563 error = EFAULT; 1564 *tgt += iol->piol_usedsize; 1565 } 1566 kmem_free(iol, iol->piol_size); 1567 } 1568 list_destroy(iolhead); 1569 1570 return (error); 1571 } 1572 1573 int 1574 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin) 1575 { 1576 offset_t off = uiop->uio_offset; 1577 char *base; 1578 size_t size; 1579 piol_t *iol; 1580 int error = errin; 1581 1582 while ((iol = list_head(iolhead)) != NULL) { 1583 list_remove(iolhead, iol); 1584 base = PIOL_DATABUF(iol); 1585 size = iol->piol_usedsize; 1586 if (off <= size && error == 0 && uiop->uio_resid > 0) 1587 error = uiomove(base + off, size - off, 1588 UIO_READ, uiop); 1589 off = MAX(0, off - (offset_t)size); 1590 kmem_free(iol, iol->piol_size); 1591 } 1592 list_destroy(iolhead); 1593 1594 return (error); 1595 } 1596 1597 /* 1598 * Return an array of structures with memory map information. 1599 * We allocate here; the caller must deallocate. 1600 */ 1601 int 1602 prgetmap(proc_t *p, int reserved, list_t *iolhead) 1603 { 1604 struct as *as = p->p_as; 1605 prmap_t *mp; 1606 struct seg *seg; 1607 struct seg *brkseg, *stkseg; 1608 struct vnode *vp; 1609 struct vattr vattr; 1610 uint_t prot; 1611 1612 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1613 1614 /* 1615 * Request an initial buffer size that doesn't waste memory 1616 * if the address space has only a small number of segments. 1617 */ 1618 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1619 1620 if ((seg = AS_SEGFIRST(as)) == NULL) 1621 return (0); 1622 1623 brkseg = break_seg(p); 1624 stkseg = as_segat(as, prgetstackbase(p)); 1625 1626 do { 1627 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1628 caddr_t saddr, naddr; 1629 void *tmp = NULL; 1630 1631 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1632 prot = pr_getprot(seg, reserved, &tmp, 1633 &saddr, &naddr, eaddr); 1634 if (saddr == naddr) 1635 continue; 1636 1637 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1638 1639 mp->pr_vaddr = (uintptr_t)saddr; 1640 mp->pr_size = naddr - saddr; 1641 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1642 mp->pr_mflags = 0; 1643 if (prot & PROT_READ) 1644 mp->pr_mflags |= MA_READ; 1645 if (prot & PROT_WRITE) 1646 mp->pr_mflags |= MA_WRITE; 1647 if (prot & PROT_EXEC) 1648 mp->pr_mflags |= MA_EXEC; 1649 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1650 mp->pr_mflags |= MA_SHARED; 1651 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1652 mp->pr_mflags |= MA_NORESERVE; 1653 if (seg->s_ops == &segspt_shmops || 1654 (seg->s_ops == &segvn_ops && 1655 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1656 mp->pr_mflags |= MA_ANON; 1657 if (seg == brkseg) 1658 mp->pr_mflags |= MA_BREAK; 1659 else if (seg == stkseg) { 1660 mp->pr_mflags |= MA_STACK; 1661 if (reserved) { 1662 size_t maxstack = 1663 ((size_t)p->p_stk_ctl + 1664 PAGEOFFSET) & PAGEMASK; 1665 mp->pr_vaddr = 1666 (uintptr_t)prgetstackbase(p) + 1667 p->p_stksize - maxstack; 1668 mp->pr_size = (uintptr_t)naddr - 1669 mp->pr_vaddr; 1670 } 1671 } 1672 if (seg->s_ops == &segspt_shmops) 1673 mp->pr_mflags |= MA_ISM | MA_SHM; 1674 mp->pr_pagesize = PAGESIZE; 1675 1676 /* 1677 * Manufacture a filename for the "object" directory. 1678 */ 1679 vattr.va_mask = AT_FSID|AT_NODEID; 1680 if (seg->s_ops == &segvn_ops && 1681 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1682 vp != NULL && vp->v_type == VREG && 1683 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 1684 if (vp == p->p_exec) 1685 (void) strcpy(mp->pr_mapname, "a.out"); 1686 else 1687 pr_object_name(mp->pr_mapname, 1688 vp, &vattr); 1689 } 1690 1691 /* 1692 * Get the SysV shared memory id, if any. 1693 */ 1694 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1695 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1696 SHMID_NONE) { 1697 if (mp->pr_shmid == SHMID_FREE) 1698 mp->pr_shmid = -1; 1699 1700 mp->pr_mflags |= MA_SHM; 1701 } else { 1702 mp->pr_shmid = -1; 1703 } 1704 } 1705 ASSERT(tmp == NULL); 1706 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1707 1708 return (0); 1709 } 1710 1711 #ifdef _SYSCALL32_IMPL 1712 int 1713 prgetmap32(proc_t *p, int reserved, list_t *iolhead) 1714 { 1715 struct as *as = p->p_as; 1716 prmap32_t *mp; 1717 struct seg *seg; 1718 struct seg *brkseg, *stkseg; 1719 struct vnode *vp; 1720 struct vattr vattr; 1721 uint_t prot; 1722 1723 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1724 1725 /* 1726 * Request an initial buffer size that doesn't waste memory 1727 * if the address space has only a small number of segments. 1728 */ 1729 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1730 1731 if ((seg = AS_SEGFIRST(as)) == NULL) 1732 return (0); 1733 1734 brkseg = break_seg(p); 1735 stkseg = as_segat(as, prgetstackbase(p)); 1736 1737 do { 1738 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1739 caddr_t saddr, naddr; 1740 void *tmp = NULL; 1741 1742 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1743 prot = pr_getprot(seg, reserved, &tmp, 1744 &saddr, &naddr, eaddr); 1745 if (saddr == naddr) 1746 continue; 1747 1748 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1749 1750 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 1751 mp->pr_size = (size32_t)(naddr - saddr); 1752 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1753 mp->pr_mflags = 0; 1754 if (prot & PROT_READ) 1755 mp->pr_mflags |= MA_READ; 1756 if (prot & PROT_WRITE) 1757 mp->pr_mflags |= MA_WRITE; 1758 if (prot & PROT_EXEC) 1759 mp->pr_mflags |= MA_EXEC; 1760 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1761 mp->pr_mflags |= MA_SHARED; 1762 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1763 mp->pr_mflags |= MA_NORESERVE; 1764 if (seg->s_ops == &segspt_shmops || 1765 (seg->s_ops == &segvn_ops && 1766 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1767 mp->pr_mflags |= MA_ANON; 1768 if (seg == brkseg) 1769 mp->pr_mflags |= MA_BREAK; 1770 else if (seg == stkseg) { 1771 mp->pr_mflags |= MA_STACK; 1772 if (reserved) { 1773 size_t maxstack = 1774 ((size_t)p->p_stk_ctl + 1775 PAGEOFFSET) & PAGEMASK; 1776 uintptr_t vaddr = 1777 (uintptr_t)prgetstackbase(p) + 1778 p->p_stksize - maxstack; 1779 mp->pr_vaddr = (caddr32_t)vaddr; 1780 mp->pr_size = (size32_t) 1781 ((uintptr_t)naddr - vaddr); 1782 } 1783 } 1784 if (seg->s_ops == &segspt_shmops) 1785 mp->pr_mflags |= MA_ISM | MA_SHM; 1786 mp->pr_pagesize = PAGESIZE; 1787 1788 /* 1789 * Manufacture a filename for the "object" directory. 1790 */ 1791 vattr.va_mask = AT_FSID|AT_NODEID; 1792 if (seg->s_ops == &segvn_ops && 1793 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1794 vp != NULL && vp->v_type == VREG && 1795 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 1796 if (vp == p->p_exec) 1797 (void) strcpy(mp->pr_mapname, "a.out"); 1798 else 1799 pr_object_name(mp->pr_mapname, 1800 vp, &vattr); 1801 } 1802 1803 /* 1804 * Get the SysV shared memory id, if any. 1805 */ 1806 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1807 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1808 SHMID_NONE) { 1809 if (mp->pr_shmid == SHMID_FREE) 1810 mp->pr_shmid = -1; 1811 1812 mp->pr_mflags |= MA_SHM; 1813 } else { 1814 mp->pr_shmid = -1; 1815 } 1816 } 1817 ASSERT(tmp == NULL); 1818 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1819 1820 return (0); 1821 } 1822 #endif /* _SYSCALL32_IMPL */ 1823 1824 /* 1825 * Return the size of the /proc page data file. 1826 */ 1827 size_t 1828 prpdsize(struct as *as) 1829 { 1830 struct seg *seg; 1831 size_t size; 1832 1833 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1834 1835 if ((seg = AS_SEGFIRST(as)) == NULL) 1836 return (0); 1837 1838 size = sizeof (prpageheader_t); 1839 do { 1840 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1841 caddr_t saddr, naddr; 1842 void *tmp = NULL; 1843 size_t npage; 1844 1845 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1846 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1847 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1848 size += sizeof (prasmap_t) + round8(npage); 1849 } 1850 ASSERT(tmp == NULL); 1851 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1852 1853 return (size); 1854 } 1855 1856 #ifdef _SYSCALL32_IMPL 1857 size_t 1858 prpdsize32(struct as *as) 1859 { 1860 struct seg *seg; 1861 size_t size; 1862 1863 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1864 1865 if ((seg = AS_SEGFIRST(as)) == NULL) 1866 return (0); 1867 1868 size = sizeof (prpageheader32_t); 1869 do { 1870 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1871 caddr_t saddr, naddr; 1872 void *tmp = NULL; 1873 size_t npage; 1874 1875 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1876 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1877 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1878 size += sizeof (prasmap32_t) + round8(npage); 1879 } 1880 ASSERT(tmp == NULL); 1881 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1882 1883 return (size); 1884 } 1885 #endif /* _SYSCALL32_IMPL */ 1886 1887 /* 1888 * Read page data information. 1889 */ 1890 int 1891 prpdread(proc_t *p, uint_t hatid, struct uio *uiop) 1892 { 1893 struct as *as = p->p_as; 1894 caddr_t buf; 1895 size_t size; 1896 prpageheader_t *php; 1897 prasmap_t *pmp; 1898 struct seg *seg; 1899 int error; 1900 1901 again: 1902 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1903 1904 if ((seg = AS_SEGFIRST(as)) == NULL) { 1905 AS_LOCK_EXIT(as, &as->a_lock); 1906 return (0); 1907 } 1908 size = prpdsize(as); 1909 if (uiop->uio_resid < size) { 1910 AS_LOCK_EXIT(as, &as->a_lock); 1911 return (E2BIG); 1912 } 1913 1914 buf = kmem_zalloc(size, KM_SLEEP); 1915 php = (prpageheader_t *)buf; 1916 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t)); 1917 1918 hrt2ts(gethrtime(), &php->pr_tstamp); 1919 php->pr_nmap = 0; 1920 php->pr_npage = 0; 1921 do { 1922 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1923 caddr_t saddr, naddr; 1924 void *tmp = NULL; 1925 1926 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1927 struct vnode *vp; 1928 struct vattr vattr; 1929 size_t len; 1930 size_t npage; 1931 uint_t prot; 1932 uintptr_t next; 1933 1934 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1935 if ((len = (size_t)(naddr - saddr)) == 0) 1936 continue; 1937 npage = len / PAGESIZE; 1938 next = (uintptr_t)(pmp + 1) + round8(npage); 1939 /* 1940 * It's possible that the address space can change 1941 * subtlely even though we're holding as->a_lock 1942 * due to the nondeterminism of page_exists() in 1943 * the presence of asychronously flushed pages or 1944 * mapped files whose sizes are changing. 1945 * page_exists() may be called indirectly from 1946 * pr_getprot() by a SEGOP_INCORE() routine. 1947 * If this happens we need to make sure we don't 1948 * overrun the buffer whose size we computed based 1949 * on the initial iteration through the segments. 1950 * Once we've detected an overflow, we need to clean 1951 * up the temporary memory allocated in pr_getprot() 1952 * and retry. If there's a pending signal, we return 1953 * EINTR so that this thread can be dislodged if 1954 * a latent bug causes us to spin indefinitely. 1955 */ 1956 if (next > (uintptr_t)buf + size) { 1957 pr_getprot_done(&tmp); 1958 AS_LOCK_EXIT(as, &as->a_lock); 1959 1960 kmem_free(buf, size); 1961 1962 if (ISSIG(curthread, JUSTLOOKING)) 1963 return (EINTR); 1964 1965 goto again; 1966 } 1967 1968 php->pr_nmap++; 1969 php->pr_npage += npage; 1970 pmp->pr_vaddr = (uintptr_t)saddr; 1971 pmp->pr_npage = npage; 1972 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1973 pmp->pr_mflags = 0; 1974 if (prot & PROT_READ) 1975 pmp->pr_mflags |= MA_READ; 1976 if (prot & PROT_WRITE) 1977 pmp->pr_mflags |= MA_WRITE; 1978 if (prot & PROT_EXEC) 1979 pmp->pr_mflags |= MA_EXEC; 1980 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1981 pmp->pr_mflags |= MA_SHARED; 1982 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1983 pmp->pr_mflags |= MA_NORESERVE; 1984 if (seg->s_ops == &segspt_shmops || 1985 (seg->s_ops == &segvn_ops && 1986 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1987 pmp->pr_mflags |= MA_ANON; 1988 if (seg->s_ops == &segspt_shmops) 1989 pmp->pr_mflags |= MA_ISM | MA_SHM; 1990 pmp->pr_pagesize = PAGESIZE; 1991 /* 1992 * Manufacture a filename for the "object" directory. 1993 */ 1994 vattr.va_mask = AT_FSID|AT_NODEID; 1995 if (seg->s_ops == &segvn_ops && 1996 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1997 vp != NULL && vp->v_type == VREG && 1998 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 1999 if (vp == p->p_exec) 2000 (void) strcpy(pmp->pr_mapname, "a.out"); 2001 else 2002 pr_object_name(pmp->pr_mapname, 2003 vp, &vattr); 2004 } 2005 2006 /* 2007 * Get the SysV shared memory id, if any. 2008 */ 2009 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2010 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2011 SHMID_NONE) { 2012 if (pmp->pr_shmid == SHMID_FREE) 2013 pmp->pr_shmid = -1; 2014 2015 pmp->pr_mflags |= MA_SHM; 2016 } else { 2017 pmp->pr_shmid = -1; 2018 } 2019 2020 hat_getstat(as, saddr, len, hatid, 2021 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2022 pmp = (prasmap_t *)next; 2023 } 2024 ASSERT(tmp == NULL); 2025 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2026 2027 AS_LOCK_EXIT(as, &as->a_lock); 2028 2029 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2030 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2031 kmem_free(buf, size); 2032 2033 return (error); 2034 } 2035 2036 #ifdef _SYSCALL32_IMPL 2037 int 2038 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop) 2039 { 2040 struct as *as = p->p_as; 2041 caddr_t buf; 2042 size_t size; 2043 prpageheader32_t *php; 2044 prasmap32_t *pmp; 2045 struct seg *seg; 2046 int error; 2047 2048 again: 2049 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2050 2051 if ((seg = AS_SEGFIRST(as)) == NULL) { 2052 AS_LOCK_EXIT(as, &as->a_lock); 2053 return (0); 2054 } 2055 size = prpdsize32(as); 2056 if (uiop->uio_resid < size) { 2057 AS_LOCK_EXIT(as, &as->a_lock); 2058 return (E2BIG); 2059 } 2060 2061 buf = kmem_zalloc(size, KM_SLEEP); 2062 php = (prpageheader32_t *)buf; 2063 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t)); 2064 2065 hrt2ts32(gethrtime(), &php->pr_tstamp); 2066 php->pr_nmap = 0; 2067 php->pr_npage = 0; 2068 do { 2069 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2070 caddr_t saddr, naddr; 2071 void *tmp = NULL; 2072 2073 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2074 struct vnode *vp; 2075 struct vattr vattr; 2076 size_t len; 2077 size_t npage; 2078 uint_t prot; 2079 uintptr_t next; 2080 2081 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2082 if ((len = (size_t)(naddr - saddr)) == 0) 2083 continue; 2084 npage = len / PAGESIZE; 2085 next = (uintptr_t)(pmp + 1) + round8(npage); 2086 /* 2087 * It's possible that the address space can change 2088 * subtlely even though we're holding as->a_lock 2089 * due to the nondeterminism of page_exists() in 2090 * the presence of asychronously flushed pages or 2091 * mapped files whose sizes are changing. 2092 * page_exists() may be called indirectly from 2093 * pr_getprot() by a SEGOP_INCORE() routine. 2094 * If this happens we need to make sure we don't 2095 * overrun the buffer whose size we computed based 2096 * on the initial iteration through the segments. 2097 * Once we've detected an overflow, we need to clean 2098 * up the temporary memory allocated in pr_getprot() 2099 * and retry. If there's a pending signal, we return 2100 * EINTR so that this thread can be dislodged if 2101 * a latent bug causes us to spin indefinitely. 2102 */ 2103 if (next > (uintptr_t)buf + size) { 2104 pr_getprot_done(&tmp); 2105 AS_LOCK_EXIT(as, &as->a_lock); 2106 2107 kmem_free(buf, size); 2108 2109 if (ISSIG(curthread, JUSTLOOKING)) 2110 return (EINTR); 2111 2112 goto again; 2113 } 2114 2115 php->pr_nmap++; 2116 php->pr_npage += npage; 2117 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 2118 pmp->pr_npage = (size32_t)npage; 2119 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2120 pmp->pr_mflags = 0; 2121 if (prot & PROT_READ) 2122 pmp->pr_mflags |= MA_READ; 2123 if (prot & PROT_WRITE) 2124 pmp->pr_mflags |= MA_WRITE; 2125 if (prot & PROT_EXEC) 2126 pmp->pr_mflags |= MA_EXEC; 2127 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2128 pmp->pr_mflags |= MA_SHARED; 2129 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2130 pmp->pr_mflags |= MA_NORESERVE; 2131 if (seg->s_ops == &segspt_shmops || 2132 (seg->s_ops == &segvn_ops && 2133 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2134 pmp->pr_mflags |= MA_ANON; 2135 if (seg->s_ops == &segspt_shmops) 2136 pmp->pr_mflags |= MA_ISM | MA_SHM; 2137 pmp->pr_pagesize = PAGESIZE; 2138 /* 2139 * Manufacture a filename for the "object" directory. 2140 */ 2141 vattr.va_mask = AT_FSID|AT_NODEID; 2142 if (seg->s_ops == &segvn_ops && 2143 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2144 vp != NULL && vp->v_type == VREG && 2145 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 2146 if (vp == p->p_exec) 2147 (void) strcpy(pmp->pr_mapname, "a.out"); 2148 else 2149 pr_object_name(pmp->pr_mapname, 2150 vp, &vattr); 2151 } 2152 2153 /* 2154 * Get the SysV shared memory id, if any. 2155 */ 2156 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2157 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2158 SHMID_NONE) { 2159 if (pmp->pr_shmid == SHMID_FREE) 2160 pmp->pr_shmid = -1; 2161 2162 pmp->pr_mflags |= MA_SHM; 2163 } else { 2164 pmp->pr_shmid = -1; 2165 } 2166 2167 hat_getstat(as, saddr, len, hatid, 2168 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2169 pmp = (prasmap32_t *)next; 2170 } 2171 ASSERT(tmp == NULL); 2172 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2173 2174 AS_LOCK_EXIT(as, &as->a_lock); 2175 2176 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2177 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2178 kmem_free(buf, size); 2179 2180 return (error); 2181 } 2182 #endif /* _SYSCALL32_IMPL */ 2183 2184 ushort_t 2185 prgetpctcpu(uint64_t pct) 2186 { 2187 /* 2188 * The value returned will be relevant in the zone of the examiner, 2189 * which may not be the same as the zone which performed the procfs 2190 * mount. 2191 */ 2192 int nonline = zone_ncpus_online_get(curproc->p_zone); 2193 2194 /* 2195 * Prorate over online cpus so we don't exceed 100% 2196 */ 2197 if (nonline > 1) 2198 pct /= nonline; 2199 pct >>= 16; /* convert to 16-bit scaled integer */ 2200 if (pct > 0x8000) /* might happen, due to rounding */ 2201 pct = 0x8000; 2202 return ((ushort_t)pct); 2203 } 2204 2205 /* 2206 * Return information used by ps(1). 2207 */ 2208 void 2209 prgetpsinfo(proc_t *p, psinfo_t *psp) 2210 { 2211 kthread_t *t; 2212 struct cred *cred; 2213 hrtime_t hrutime, hrstime; 2214 2215 ASSERT(MUTEX_HELD(&p->p_lock)); 2216 2217 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2218 bzero(psp, sizeof (*psp)); 2219 else { 2220 thread_unlock(t); 2221 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2222 } 2223 2224 /* 2225 * only export SSYS and SMSACCT; everything else is off-limits to 2226 * userland apps. 2227 */ 2228 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2229 psp->pr_nlwp = p->p_lwpcnt; 2230 psp->pr_nzomb = p->p_zombcnt; 2231 mutex_enter(&p->p_crlock); 2232 cred = p->p_cred; 2233 psp->pr_uid = crgetruid(cred); 2234 psp->pr_euid = crgetuid(cred); 2235 psp->pr_gid = crgetrgid(cred); 2236 psp->pr_egid = crgetgid(cred); 2237 mutex_exit(&p->p_crlock); 2238 psp->pr_pid = p->p_pid; 2239 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2240 (p->p_flag & SZONETOP)) { 2241 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2242 /* 2243 * Inside local zones, fake zsched's pid as parent pids for 2244 * processes which reference processes outside of the zone. 2245 */ 2246 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2247 } else { 2248 psp->pr_ppid = p->p_ppid; 2249 } 2250 psp->pr_pgid = p->p_pgrp; 2251 psp->pr_sid = p->p_sessp->s_sid; 2252 psp->pr_taskid = p->p_task->tk_tkid; 2253 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2254 psp->pr_poolid = p->p_pool->pool_id; 2255 psp->pr_zoneid = p->p_zone->zone_id; 2256 if ((psp->pr_contract = PRCTID(p)) == 0) 2257 psp->pr_contract = -1; 2258 psp->pr_addr = (uintptr_t)prgetpsaddr(p); 2259 switch (p->p_model) { 2260 case DATAMODEL_ILP32: 2261 psp->pr_dmodel = PR_MODEL_ILP32; 2262 break; 2263 case DATAMODEL_LP64: 2264 psp->pr_dmodel = PR_MODEL_LP64; 2265 break; 2266 } 2267 hrutime = mstate_aggr_state(p, LMS_USER); 2268 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2269 hrt2ts((hrutime + hrstime), &psp->pr_time); 2270 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2271 2272 if (t == NULL) { 2273 int wcode = p->p_wcode; /* must be atomic read */ 2274 2275 if (wcode) 2276 psp->pr_wstat = wstat(wcode, p->p_wdata); 2277 psp->pr_ttydev = PRNODEV; 2278 psp->pr_lwp.pr_state = SZOMB; 2279 psp->pr_lwp.pr_sname = 'Z'; 2280 psp->pr_lwp.pr_bindpro = PBIND_NONE; 2281 psp->pr_lwp.pr_bindpset = PS_NONE; 2282 } else { 2283 user_t *up = PTOU(p); 2284 struct as *as; 2285 dev_t d; 2286 extern dev_t rwsconsdev, rconsdev, uconsdev; 2287 2288 d = cttydev(p); 2289 /* 2290 * If the controlling terminal is the real 2291 * or workstation console device, map to what the 2292 * user thinks is the console device. 2293 */ 2294 if (d == rwsconsdev || d == rconsdev) 2295 d = uconsdev; 2296 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d; 2297 psp->pr_start = up->u_start; 2298 bcopy(up->u_comm, psp->pr_fname, 2299 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2300 bcopy(up->u_psargs, psp->pr_psargs, 2301 MIN(PRARGSZ-1, PSARGSZ)); 2302 psp->pr_argc = up->u_argc; 2303 psp->pr_argv = up->u_argv; 2304 psp->pr_envp = up->u_envp; 2305 2306 /* get the chosen lwp's lwpsinfo */ 2307 prgetlwpsinfo(t, &psp->pr_lwp); 2308 2309 /* compute %cpu for the process */ 2310 if (p->p_lwpcnt == 1) 2311 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2312 else { 2313 uint64_t pct = 0; 2314 hrtime_t cur_time = gethrtime_unscaled(); 2315 2316 t = p->p_tlist; 2317 do { 2318 pct += cpu_update_pct(t, cur_time); 2319 } while ((t = t->t_forw) != p->p_tlist); 2320 2321 psp->pr_pctcpu = prgetpctcpu(pct); 2322 } 2323 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2324 psp->pr_size = 0; 2325 psp->pr_rssize = 0; 2326 } else { 2327 mutex_exit(&p->p_lock); 2328 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2329 psp->pr_size = btopr(rm_assize(as)) * (PAGESIZE / 1024); 2330 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024); 2331 psp->pr_pctmem = rm_pctmemory(as); 2332 AS_LOCK_EXIT(as, &as->a_lock); 2333 mutex_enter(&p->p_lock); 2334 } 2335 } 2336 } 2337 2338 #ifdef _SYSCALL32_IMPL 2339 void 2340 prgetpsinfo32(proc_t *p, psinfo32_t *psp) 2341 { 2342 kthread_t *t; 2343 struct cred *cred; 2344 hrtime_t hrutime, hrstime; 2345 2346 ASSERT(MUTEX_HELD(&p->p_lock)); 2347 2348 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2349 bzero(psp, sizeof (*psp)); 2350 else { 2351 thread_unlock(t); 2352 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2353 } 2354 2355 /* 2356 * only export SSYS and SMSACCT; everything else is off-limits to 2357 * userland apps. 2358 */ 2359 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2360 psp->pr_nlwp = p->p_lwpcnt; 2361 psp->pr_nzomb = p->p_zombcnt; 2362 mutex_enter(&p->p_crlock); 2363 cred = p->p_cred; 2364 psp->pr_uid = crgetruid(cred); 2365 psp->pr_euid = crgetuid(cred); 2366 psp->pr_gid = crgetrgid(cred); 2367 psp->pr_egid = crgetgid(cred); 2368 mutex_exit(&p->p_crlock); 2369 psp->pr_pid = p->p_pid; 2370 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2371 (p->p_flag & SZONETOP)) { 2372 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2373 /* 2374 * Inside local zones, fake zsched's pid as parent pids for 2375 * processes which reference processes outside of the zone. 2376 */ 2377 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2378 } else { 2379 psp->pr_ppid = p->p_ppid; 2380 } 2381 psp->pr_pgid = p->p_pgrp; 2382 psp->pr_sid = p->p_sessp->s_sid; 2383 psp->pr_taskid = p->p_task->tk_tkid; 2384 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2385 psp->pr_poolid = p->p_pool->pool_id; 2386 psp->pr_zoneid = p->p_zone->zone_id; 2387 if ((psp->pr_contract = PRCTID(p)) == 0) 2388 psp->pr_contract = -1; 2389 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2390 switch (p->p_model) { 2391 case DATAMODEL_ILP32: 2392 psp->pr_dmodel = PR_MODEL_ILP32; 2393 break; 2394 case DATAMODEL_LP64: 2395 psp->pr_dmodel = PR_MODEL_LP64; 2396 break; 2397 } 2398 hrutime = mstate_aggr_state(p, LMS_USER); 2399 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2400 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2401 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2402 2403 if (t == NULL) { 2404 extern int wstat(int, int); /* needs a header file */ 2405 int wcode = p->p_wcode; /* must be atomic read */ 2406 2407 if (wcode) 2408 psp->pr_wstat = wstat(wcode, p->p_wdata); 2409 psp->pr_ttydev = PRNODEV32; 2410 psp->pr_lwp.pr_state = SZOMB; 2411 psp->pr_lwp.pr_sname = 'Z'; 2412 } else { 2413 user_t *up = PTOU(p); 2414 struct as *as; 2415 dev_t d; 2416 extern dev_t rwsconsdev, rconsdev, uconsdev; 2417 2418 d = cttydev(p); 2419 /* 2420 * If the controlling terminal is the real 2421 * or workstation console device, map to what the 2422 * user thinks is the console device. 2423 */ 2424 if (d == rwsconsdev || d == rconsdev) 2425 d = uconsdev; 2426 (void) cmpldev(&psp->pr_ttydev, d); 2427 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start); 2428 bcopy(up->u_comm, psp->pr_fname, 2429 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2430 bcopy(up->u_psargs, psp->pr_psargs, 2431 MIN(PRARGSZ-1, PSARGSZ)); 2432 psp->pr_argc = up->u_argc; 2433 psp->pr_argv = (caddr32_t)up->u_argv; 2434 psp->pr_envp = (caddr32_t)up->u_envp; 2435 2436 /* get the chosen lwp's lwpsinfo */ 2437 prgetlwpsinfo32(t, &psp->pr_lwp); 2438 2439 /* compute %cpu for the process */ 2440 if (p->p_lwpcnt == 1) 2441 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2442 else { 2443 uint64_t pct = 0; 2444 hrtime_t cur_time; 2445 2446 t = p->p_tlist; 2447 cur_time = gethrtime_unscaled(); 2448 do { 2449 pct += cpu_update_pct(t, cur_time); 2450 } while ((t = t->t_forw) != p->p_tlist); 2451 2452 psp->pr_pctcpu = prgetpctcpu(pct); 2453 } 2454 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2455 psp->pr_size = 0; 2456 psp->pr_rssize = 0; 2457 } else { 2458 mutex_exit(&p->p_lock); 2459 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2460 psp->pr_size = (size32_t) 2461 (btopr(rm_assize(as)) * (PAGESIZE / 1024)); 2462 psp->pr_rssize = (size32_t) 2463 (rm_asrss(as) * (PAGESIZE / 1024)); 2464 psp->pr_pctmem = rm_pctmemory(as); 2465 AS_LOCK_EXIT(as, &as->a_lock); 2466 mutex_enter(&p->p_lock); 2467 } 2468 } 2469 2470 /* 2471 * If we are looking at an LP64 process, zero out 2472 * the fields that cannot be represented in ILP32. 2473 */ 2474 if (p->p_model != DATAMODEL_ILP32) { 2475 psp->pr_size = 0; 2476 psp->pr_rssize = 0; 2477 psp->pr_argv = 0; 2478 psp->pr_envp = 0; 2479 } 2480 } 2481 #endif /* _SYSCALL32_IMPL */ 2482 2483 void 2484 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp) 2485 { 2486 klwp_t *lwp = ttolwp(t); 2487 sobj_ops_t *sobj; 2488 char c, state; 2489 uint64_t pct; 2490 int retval, niceval; 2491 hrtime_t hrutime, hrstime; 2492 2493 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 2494 2495 bzero(psp, sizeof (*psp)); 2496 2497 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2498 psp->pr_lwpid = t->t_tid; 2499 psp->pr_addr = (uintptr_t)t; 2500 psp->pr_wchan = (uintptr_t)t->t_wchan; 2501 2502 /* map the thread state enum into a process state enum */ 2503 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2504 switch (state) { 2505 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2506 case TS_RUN: state = SRUN; c = 'R'; break; 2507 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2508 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2509 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2510 default: state = 0; c = '?'; break; 2511 } 2512 psp->pr_state = state; 2513 psp->pr_sname = c; 2514 if ((sobj = t->t_sobj_ops) != NULL) 2515 psp->pr_stype = SOBJ_TYPE(sobj); 2516 retval = CL_DONICE(t, NULL, 0, &niceval); 2517 if (retval == 0) { 2518 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2519 psp->pr_nice = niceval + NZERO; 2520 } 2521 psp->pr_syscall = t->t_sysnum; 2522 psp->pr_pri = t->t_pri; 2523 psp->pr_start.tv_sec = t->t_start; 2524 psp->pr_start.tv_nsec = 0L; 2525 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2526 scalehrtime(&hrutime); 2527 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2528 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2529 scalehrtime(&hrstime); 2530 hrt2ts(hrutime + hrstime, &psp->pr_time); 2531 /* compute %cpu for the lwp */ 2532 pct = cpu_update_pct(t, gethrtime_unscaled()); 2533 psp->pr_pctcpu = prgetpctcpu(pct); 2534 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2535 if (psp->pr_cpu > 99) 2536 psp->pr_cpu = 99; 2537 2538 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2539 sizeof (psp->pr_clname) - 1); 2540 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2541 psp->pr_onpro = t->t_cpu->cpu_id; 2542 psp->pr_bindpro = t->t_bind_cpu; 2543 psp->pr_bindpset = t->t_bind_pset; 2544 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 2545 } 2546 2547 #ifdef _SYSCALL32_IMPL 2548 void 2549 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp) 2550 { 2551 proc_t *p = ttoproc(t); 2552 klwp_t *lwp = ttolwp(t); 2553 sobj_ops_t *sobj; 2554 char c, state; 2555 uint64_t pct; 2556 int retval, niceval; 2557 hrtime_t hrutime, hrstime; 2558 2559 ASSERT(MUTEX_HELD(&p->p_lock)); 2560 2561 bzero(psp, sizeof (*psp)); 2562 2563 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2564 psp->pr_lwpid = t->t_tid; 2565 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2566 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */ 2567 2568 /* map the thread state enum into a process state enum */ 2569 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2570 switch (state) { 2571 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2572 case TS_RUN: state = SRUN; c = 'R'; break; 2573 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2574 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2575 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2576 default: state = 0; c = '?'; break; 2577 } 2578 psp->pr_state = state; 2579 psp->pr_sname = c; 2580 if ((sobj = t->t_sobj_ops) != NULL) 2581 psp->pr_stype = SOBJ_TYPE(sobj); 2582 retval = CL_DONICE(t, NULL, 0, &niceval); 2583 if (retval == 0) { 2584 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2585 psp->pr_nice = niceval + NZERO; 2586 } else { 2587 psp->pr_oldpri = 0; 2588 psp->pr_nice = 0; 2589 } 2590 psp->pr_syscall = t->t_sysnum; 2591 psp->pr_pri = t->t_pri; 2592 psp->pr_start.tv_sec = (time32_t)t->t_start; 2593 psp->pr_start.tv_nsec = 0L; 2594 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2595 scalehrtime(&hrutime); 2596 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2597 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2598 scalehrtime(&hrstime); 2599 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2600 /* compute %cpu for the lwp */ 2601 pct = cpu_update_pct(t, gethrtime_unscaled()); 2602 psp->pr_pctcpu = prgetpctcpu(pct); 2603 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2604 if (psp->pr_cpu > 99) 2605 psp->pr_cpu = 99; 2606 2607 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2608 sizeof (psp->pr_clname) - 1); 2609 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2610 psp->pr_onpro = t->t_cpu->cpu_id; 2611 psp->pr_bindpro = t->t_bind_cpu; 2612 psp->pr_bindpset = t->t_bind_pset; 2613 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 2614 } 2615 #endif /* _SYSCALL32_IMPL */ 2616 2617 /* 2618 * This used to get called when microstate accounting was disabled but 2619 * microstate information was requested. Since Microstate accounting is on 2620 * regardless of the proc flags, this simply makes it appear to procfs that 2621 * microstate accounting is on. This is relatively meaningless since you 2622 * can't turn it off, but this is here for the sake of appearances. 2623 */ 2624 2625 /*ARGSUSED*/ 2626 void 2627 estimate_msacct(kthread_t *t, hrtime_t curtime) 2628 { 2629 proc_t *p; 2630 2631 if (t == NULL) 2632 return; 2633 2634 p = ttoproc(t); 2635 ASSERT(MUTEX_HELD(&p->p_lock)); 2636 2637 /* 2638 * A system process (p0) could be referenced if the thread is 2639 * in the process of exiting. Don't turn on microstate accounting 2640 * in that case. 2641 */ 2642 if (p->p_flag & SSYS) 2643 return; 2644 2645 /* 2646 * Loop through all the LWPs (kernel threads) in the process. 2647 */ 2648 t = p->p_tlist; 2649 do { 2650 t->t_proc_flag |= TP_MSACCT; 2651 } while ((t = t->t_forw) != p->p_tlist); 2652 2653 p->p_flag |= SMSACCT; /* set process-wide MSACCT */ 2654 } 2655 2656 /* 2657 * It's not really possible to disable microstate accounting anymore. 2658 * However, this routine simply turns off the ms accounting flags in a process 2659 * This way procfs can still pretend to turn microstate accounting on and 2660 * off for a process, but it actually doesn't do anything. This is 2661 * a neutered form of preemptive idiot-proofing. 2662 */ 2663 void 2664 disable_msacct(proc_t *p) 2665 { 2666 kthread_t *t; 2667 2668 ASSERT(MUTEX_HELD(&p->p_lock)); 2669 2670 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */ 2671 /* 2672 * Loop through all the LWPs (kernel threads) in the process. 2673 */ 2674 if ((t = p->p_tlist) != NULL) { 2675 do { 2676 /* clear per-thread flag */ 2677 t->t_proc_flag &= ~TP_MSACCT; 2678 } while ((t = t->t_forw) != p->p_tlist); 2679 } 2680 } 2681 2682 /* 2683 * Return resource usage information. 2684 */ 2685 void 2686 prgetusage(kthread_t *t, prhusage_t *pup) 2687 { 2688 klwp_t *lwp = ttolwp(t); 2689 hrtime_t *mstimep; 2690 struct mstate *ms = &lwp->lwp_mstate; 2691 int state; 2692 int i; 2693 hrtime_t curtime; 2694 hrtime_t waitrq; 2695 hrtime_t tmp1; 2696 2697 curtime = gethrtime_unscaled(); 2698 2699 pup->pr_lwpid = t->t_tid; 2700 pup->pr_count = 1; 2701 pup->pr_create = ms->ms_start; 2702 pup->pr_term = ms->ms_term; 2703 scalehrtime(&pup->pr_create); 2704 scalehrtime(&pup->pr_term); 2705 if (ms->ms_term == 0) { 2706 pup->pr_rtime = curtime - ms->ms_start; 2707 scalehrtime(&pup->pr_rtime); 2708 } else { 2709 pup->pr_rtime = ms->ms_term - ms->ms_start; 2710 scalehrtime(&pup->pr_rtime); 2711 } 2712 2713 2714 pup->pr_utime = ms->ms_acct[LMS_USER]; 2715 pup->pr_stime = ms->ms_acct[LMS_SYSTEM]; 2716 pup->pr_ttime = ms->ms_acct[LMS_TRAP]; 2717 pup->pr_tftime = ms->ms_acct[LMS_TFAULT]; 2718 pup->pr_dftime = ms->ms_acct[LMS_DFAULT]; 2719 pup->pr_kftime = ms->ms_acct[LMS_KFAULT]; 2720 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2721 pup->pr_slptime = ms->ms_acct[LMS_SLEEP]; 2722 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2723 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2724 2725 prscaleusage(pup); 2726 2727 /* 2728 * Adjust for time waiting in the dispatcher queue. 2729 */ 2730 waitrq = t->t_waitrq; /* hopefully atomic */ 2731 if (waitrq != 0) { 2732 tmp1 = curtime - waitrq; 2733 scalehrtime(&tmp1); 2734 pup->pr_wtime += tmp1; 2735 curtime = waitrq; 2736 } 2737 2738 /* 2739 * Adjust for time spent in current microstate. 2740 */ 2741 if (ms->ms_state_start > curtime) { 2742 curtime = gethrtime_unscaled(); 2743 } 2744 2745 i = 0; 2746 do { 2747 switch (state = t->t_mstate) { 2748 case LMS_SLEEP: 2749 /* 2750 * Update the timer for the current sleep state. 2751 */ 2752 switch (state = ms->ms_prev) { 2753 case LMS_TFAULT: 2754 case LMS_DFAULT: 2755 case LMS_KFAULT: 2756 case LMS_USER_LOCK: 2757 break; 2758 default: 2759 state = LMS_SLEEP; 2760 break; 2761 } 2762 break; 2763 case LMS_TFAULT: 2764 case LMS_DFAULT: 2765 case LMS_KFAULT: 2766 case LMS_USER_LOCK: 2767 state = LMS_SYSTEM; 2768 break; 2769 } 2770 switch (state) { 2771 case LMS_USER: mstimep = &pup->pr_utime; break; 2772 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2773 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2774 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2775 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2776 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2777 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2778 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2779 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2780 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2781 default: panic("prgetusage: unknown microstate"); 2782 } 2783 tmp1 = curtime - ms->ms_state_start; 2784 if (tmp1 < 0) { 2785 curtime = gethrtime_unscaled(); 2786 i++; 2787 continue; 2788 } 2789 scalehrtime(&tmp1); 2790 } while (tmp1 < 0 && i < MAX_ITERS_SPIN); 2791 2792 *mstimep += tmp1; 2793 2794 /* update pup timestamp */ 2795 pup->pr_tstamp = curtime; 2796 scalehrtime(&pup->pr_tstamp); 2797 2798 /* 2799 * Resource usage counters. 2800 */ 2801 pup->pr_minf = lwp->lwp_ru.minflt; 2802 pup->pr_majf = lwp->lwp_ru.majflt; 2803 pup->pr_nswap = lwp->lwp_ru.nswap; 2804 pup->pr_inblk = lwp->lwp_ru.inblock; 2805 pup->pr_oublk = lwp->lwp_ru.oublock; 2806 pup->pr_msnd = lwp->lwp_ru.msgsnd; 2807 pup->pr_mrcv = lwp->lwp_ru.msgrcv; 2808 pup->pr_sigs = lwp->lwp_ru.nsignals; 2809 pup->pr_vctx = lwp->lwp_ru.nvcsw; 2810 pup->pr_ictx = lwp->lwp_ru.nivcsw; 2811 pup->pr_sysc = lwp->lwp_ru.sysc; 2812 pup->pr_ioch = lwp->lwp_ru.ioch; 2813 } 2814 2815 /* 2816 * Convert ms_acct stats from unscaled high-res time to nanoseconds 2817 */ 2818 void 2819 prscaleusage(prhusage_t *usg) 2820 { 2821 scalehrtime(&usg->pr_utime); 2822 scalehrtime(&usg->pr_stime); 2823 scalehrtime(&usg->pr_ttime); 2824 scalehrtime(&usg->pr_tftime); 2825 scalehrtime(&usg->pr_dftime); 2826 scalehrtime(&usg->pr_kftime); 2827 scalehrtime(&usg->pr_ltime); 2828 scalehrtime(&usg->pr_slptime); 2829 scalehrtime(&usg->pr_wtime); 2830 scalehrtime(&usg->pr_stoptime); 2831 } 2832 2833 2834 /* 2835 * Sum resource usage information. 2836 */ 2837 void 2838 praddusage(kthread_t *t, prhusage_t *pup) 2839 { 2840 klwp_t *lwp = ttolwp(t); 2841 hrtime_t *mstimep; 2842 struct mstate *ms = &lwp->lwp_mstate; 2843 int state; 2844 int i; 2845 hrtime_t curtime; 2846 hrtime_t waitrq; 2847 hrtime_t tmp; 2848 prhusage_t conv; 2849 2850 curtime = gethrtime_unscaled(); 2851 2852 if (ms->ms_term == 0) { 2853 tmp = curtime - ms->ms_start; 2854 scalehrtime(&tmp); 2855 pup->pr_rtime += tmp; 2856 } else { 2857 tmp = ms->ms_term - ms->ms_start; 2858 scalehrtime(&tmp); 2859 pup->pr_rtime += tmp; 2860 } 2861 2862 conv.pr_utime = ms->ms_acct[LMS_USER]; 2863 conv.pr_stime = ms->ms_acct[LMS_SYSTEM]; 2864 conv.pr_ttime = ms->ms_acct[LMS_TRAP]; 2865 conv.pr_tftime = ms->ms_acct[LMS_TFAULT]; 2866 conv.pr_dftime = ms->ms_acct[LMS_DFAULT]; 2867 conv.pr_kftime = ms->ms_acct[LMS_KFAULT]; 2868 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2869 conv.pr_slptime = ms->ms_acct[LMS_SLEEP]; 2870 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2871 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2872 2873 prscaleusage(&conv); 2874 2875 pup->pr_utime += conv.pr_utime; 2876 pup->pr_stime += conv.pr_stime; 2877 pup->pr_ttime += conv.pr_ttime; 2878 pup->pr_tftime += conv.pr_tftime; 2879 pup->pr_dftime += conv.pr_dftime; 2880 pup->pr_kftime += conv.pr_kftime; 2881 pup->pr_ltime += conv.pr_ltime; 2882 pup->pr_slptime += conv.pr_slptime; 2883 pup->pr_wtime += conv.pr_wtime; 2884 pup->pr_stoptime += conv.pr_stoptime; 2885 2886 /* 2887 * Adjust for time waiting in the dispatcher queue. 2888 */ 2889 waitrq = t->t_waitrq; /* hopefully atomic */ 2890 if (waitrq != 0) { 2891 tmp = curtime - waitrq; 2892 scalehrtime(&tmp); 2893 pup->pr_wtime += tmp; 2894 curtime = waitrq; 2895 } 2896 2897 /* 2898 * Adjust for time spent in current microstate. 2899 */ 2900 if (ms->ms_state_start > curtime) { 2901 curtime = gethrtime_unscaled(); 2902 } 2903 2904 i = 0; 2905 do { 2906 switch (state = t->t_mstate) { 2907 case LMS_SLEEP: 2908 /* 2909 * Update the timer for the current sleep state. 2910 */ 2911 switch (state = ms->ms_prev) { 2912 case LMS_TFAULT: 2913 case LMS_DFAULT: 2914 case LMS_KFAULT: 2915 case LMS_USER_LOCK: 2916 break; 2917 default: 2918 state = LMS_SLEEP; 2919 break; 2920 } 2921 break; 2922 case LMS_TFAULT: 2923 case LMS_DFAULT: 2924 case LMS_KFAULT: 2925 case LMS_USER_LOCK: 2926 state = LMS_SYSTEM; 2927 break; 2928 } 2929 switch (state) { 2930 case LMS_USER: mstimep = &pup->pr_utime; break; 2931 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2932 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2933 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2934 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2935 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2936 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2937 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2938 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2939 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2940 default: panic("praddusage: unknown microstate"); 2941 } 2942 tmp = curtime - ms->ms_state_start; 2943 if (tmp < 0) { 2944 curtime = gethrtime_unscaled(); 2945 i++; 2946 continue; 2947 } 2948 scalehrtime(&tmp); 2949 } while (tmp < 0 && i < MAX_ITERS_SPIN); 2950 2951 *mstimep += tmp; 2952 2953 /* update pup timestamp */ 2954 pup->pr_tstamp = curtime; 2955 scalehrtime(&pup->pr_tstamp); 2956 2957 /* 2958 * Resource usage counters. 2959 */ 2960 pup->pr_minf += lwp->lwp_ru.minflt; 2961 pup->pr_majf += lwp->lwp_ru.majflt; 2962 pup->pr_nswap += lwp->lwp_ru.nswap; 2963 pup->pr_inblk += lwp->lwp_ru.inblock; 2964 pup->pr_oublk += lwp->lwp_ru.oublock; 2965 pup->pr_msnd += lwp->lwp_ru.msgsnd; 2966 pup->pr_mrcv += lwp->lwp_ru.msgrcv; 2967 pup->pr_sigs += lwp->lwp_ru.nsignals; 2968 pup->pr_vctx += lwp->lwp_ru.nvcsw; 2969 pup->pr_ictx += lwp->lwp_ru.nivcsw; 2970 pup->pr_sysc += lwp->lwp_ru.sysc; 2971 pup->pr_ioch += lwp->lwp_ru.ioch; 2972 } 2973 2974 /* 2975 * Convert a prhusage_t to a prusage_t. 2976 * This means convert each hrtime_t to a timestruc_t 2977 * and copy the count fields uint64_t => ulong_t. 2978 */ 2979 void 2980 prcvtusage(prhusage_t *pup, prusage_t *upup) 2981 { 2982 uint64_t *ullp; 2983 ulong_t *ulp; 2984 int i; 2985 2986 upup->pr_lwpid = pup->pr_lwpid; 2987 upup->pr_count = pup->pr_count; 2988 2989 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp); 2990 hrt2ts(pup->pr_create, &upup->pr_create); 2991 hrt2ts(pup->pr_term, &upup->pr_term); 2992 hrt2ts(pup->pr_rtime, &upup->pr_rtime); 2993 hrt2ts(pup->pr_utime, &upup->pr_utime); 2994 hrt2ts(pup->pr_stime, &upup->pr_stime); 2995 hrt2ts(pup->pr_ttime, &upup->pr_ttime); 2996 hrt2ts(pup->pr_tftime, &upup->pr_tftime); 2997 hrt2ts(pup->pr_dftime, &upup->pr_dftime); 2998 hrt2ts(pup->pr_kftime, &upup->pr_kftime); 2999 hrt2ts(pup->pr_ltime, &upup->pr_ltime); 3000 hrt2ts(pup->pr_slptime, &upup->pr_slptime); 3001 hrt2ts(pup->pr_wtime, &upup->pr_wtime); 3002 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime); 3003 bzero(upup->filltime, sizeof (upup->filltime)); 3004 3005 ullp = &pup->pr_minf; 3006 ulp = &upup->pr_minf; 3007 for (i = 0; i < 22; i++) 3008 *ulp++ = (ulong_t)*ullp++; 3009 } 3010 3011 #ifdef _SYSCALL32_IMPL 3012 void 3013 prcvtusage32(prhusage_t *pup, prusage32_t *upup) 3014 { 3015 uint64_t *ullp; 3016 uint32_t *ulp; 3017 int i; 3018 3019 upup->pr_lwpid = pup->pr_lwpid; 3020 upup->pr_count = pup->pr_count; 3021 3022 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp); 3023 hrt2ts32(pup->pr_create, &upup->pr_create); 3024 hrt2ts32(pup->pr_term, &upup->pr_term); 3025 hrt2ts32(pup->pr_rtime, &upup->pr_rtime); 3026 hrt2ts32(pup->pr_utime, &upup->pr_utime); 3027 hrt2ts32(pup->pr_stime, &upup->pr_stime); 3028 hrt2ts32(pup->pr_ttime, &upup->pr_ttime); 3029 hrt2ts32(pup->pr_tftime, &upup->pr_tftime); 3030 hrt2ts32(pup->pr_dftime, &upup->pr_dftime); 3031 hrt2ts32(pup->pr_kftime, &upup->pr_kftime); 3032 hrt2ts32(pup->pr_ltime, &upup->pr_ltime); 3033 hrt2ts32(pup->pr_slptime, &upup->pr_slptime); 3034 hrt2ts32(pup->pr_wtime, &upup->pr_wtime); 3035 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime); 3036 bzero(upup->filltime, sizeof (upup->filltime)); 3037 3038 ullp = &pup->pr_minf; 3039 ulp = &upup->pr_minf; 3040 for (i = 0; i < 22; i++) 3041 *ulp++ = (uint32_t)*ullp++; 3042 } 3043 #endif /* _SYSCALL32_IMPL */ 3044 3045 /* 3046 * Determine whether a set is empty. 3047 */ 3048 int 3049 setisempty(uint32_t *sp, uint_t n) 3050 { 3051 while (n--) 3052 if (*sp++) 3053 return (0); 3054 return (1); 3055 } 3056 3057 /* 3058 * Utility routine for establishing a watched area in the process. 3059 * Keep the list of watched areas sorted by virtual address. 3060 */ 3061 int 3062 set_watched_area(proc_t *p, struct watched_area *pwa) 3063 { 3064 caddr_t vaddr = pwa->wa_vaddr; 3065 caddr_t eaddr = pwa->wa_eaddr; 3066 ulong_t flags = pwa->wa_flags; 3067 struct watched_area *target; 3068 avl_index_t where; 3069 int error = 0; 3070 3071 /* we must not be holding p->p_lock, but the process must be locked */ 3072 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3073 ASSERT(p->p_proc_flag & P_PR_LOCK); 3074 3075 /* 3076 * If this is our first watchpoint, enable watchpoints for the process. 3077 */ 3078 if (!pr_watch_active(p)) { 3079 kthread_t *t; 3080 3081 mutex_enter(&p->p_lock); 3082 if ((t = p->p_tlist) != NULL) { 3083 do { 3084 watch_enable(t); 3085 } while ((t = t->t_forw) != p->p_tlist); 3086 } 3087 mutex_exit(&p->p_lock); 3088 } 3089 3090 target = pr_find_watched_area(p, pwa, &where); 3091 if (target != NULL) { 3092 /* 3093 * We discovered an existing, overlapping watched area. 3094 * Allow it only if it is an exact match. 3095 */ 3096 if (target->wa_vaddr != vaddr || 3097 target->wa_eaddr != eaddr) 3098 error = EINVAL; 3099 else if (target->wa_flags != flags) { 3100 error = set_watched_page(p, vaddr, eaddr, 3101 flags, target->wa_flags); 3102 target->wa_flags = flags; 3103 } 3104 kmem_free(pwa, sizeof (struct watched_area)); 3105 } else { 3106 avl_insert(&p->p_warea, pwa, where); 3107 error = set_watched_page(p, vaddr, eaddr, flags, 0); 3108 } 3109 3110 return (error); 3111 } 3112 3113 /* 3114 * Utility routine for clearing a watched area in the process. 3115 * Must be an exact match of the virtual address. 3116 * size and flags don't matter. 3117 */ 3118 int 3119 clear_watched_area(proc_t *p, struct watched_area *pwa) 3120 { 3121 struct watched_area *found; 3122 3123 /* we must not be holding p->p_lock, but the process must be locked */ 3124 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3125 ASSERT(p->p_proc_flag & P_PR_LOCK); 3126 3127 3128 if (!pr_watch_active(p)) { 3129 kmem_free(pwa, sizeof (struct watched_area)); 3130 return (0); 3131 } 3132 3133 /* 3134 * Look for a matching address in the watched areas. If a match is 3135 * found, clear the old watched area and adjust the watched page(s). It 3136 * is not an error if there is no match. 3137 */ 3138 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL && 3139 found->wa_vaddr == pwa->wa_vaddr) { 3140 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr, 3141 found->wa_flags); 3142 avl_remove(&p->p_warea, found); 3143 kmem_free(found, sizeof (struct watched_area)); 3144 } 3145 3146 kmem_free(pwa, sizeof (struct watched_area)); 3147 3148 /* 3149 * If we removed the last watched area from the process, disable 3150 * watchpoints. 3151 */ 3152 if (!pr_watch_active(p)) { 3153 kthread_t *t; 3154 3155 mutex_enter(&p->p_lock); 3156 if ((t = p->p_tlist) != NULL) { 3157 do { 3158 watch_disable(t); 3159 } while ((t = t->t_forw) != p->p_tlist); 3160 } 3161 mutex_exit(&p->p_lock); 3162 } 3163 3164 return (0); 3165 } 3166 3167 /* 3168 * Frees all the watched_area structures 3169 */ 3170 void 3171 pr_free_watchpoints(proc_t *p) 3172 { 3173 struct watched_area *delp; 3174 void *cookie; 3175 3176 cookie = NULL; 3177 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL) 3178 kmem_free(delp, sizeof (struct watched_area)); 3179 3180 avl_destroy(&p->p_warea); 3181 } 3182 3183 /* 3184 * This one is called by the traced process to unwatch all the 3185 * pages while deallocating the list of watched_page structs. 3186 */ 3187 void 3188 pr_free_watched_pages(proc_t *p) 3189 { 3190 struct as *as = p->p_as; 3191 struct watched_page *pwp; 3192 uint_t prot; 3193 int retrycnt, err; 3194 void *cookie; 3195 3196 if (as == NULL || avl_numnodes(&as->a_wpage) == 0) 3197 return; 3198 3199 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 3200 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3201 3202 pwp = avl_first(&as->a_wpage); 3203 3204 cookie = NULL; 3205 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) { 3206 retrycnt = 0; 3207 if ((prot = pwp->wp_oprot) != 0) { 3208 caddr_t addr = pwp->wp_vaddr; 3209 struct seg *seg; 3210 retry: 3211 3212 if ((pwp->wp_prot != prot || 3213 (pwp->wp_flags & WP_NOWATCH)) && 3214 (seg = as_segat(as, addr)) != NULL) { 3215 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); 3216 if (err == IE_RETRY) { 3217 ASSERT(retrycnt == 0); 3218 retrycnt++; 3219 goto retry; 3220 } 3221 } 3222 } 3223 kmem_free(pwp, sizeof (struct watched_page)); 3224 } 3225 3226 avl_destroy(&as->a_wpage); 3227 p->p_wprot = NULL; 3228 3229 AS_LOCK_EXIT(as, &as->a_lock); 3230 } 3231 3232 /* 3233 * Insert a watched area into the list of watched pages. 3234 * If oflags is zero then we are adding a new watched area. 3235 * Otherwise we are changing the flags of an existing watched area. 3236 */ 3237 static int 3238 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, 3239 ulong_t flags, ulong_t oflags) 3240 { 3241 struct as *as = p->p_as; 3242 avl_tree_t *pwp_tree; 3243 struct watched_page *pwp, *newpwp; 3244 struct watched_page tpw; 3245 avl_index_t where; 3246 struct seg *seg; 3247 uint_t prot; 3248 caddr_t addr; 3249 3250 /* 3251 * We need to pre-allocate a list of structures before we grab the 3252 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks 3253 * held. 3254 */ 3255 newpwp = NULL; 3256 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3257 addr < eaddr; addr += PAGESIZE) { 3258 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP); 3259 pwp->wp_list = newpwp; 3260 newpwp = pwp; 3261 } 3262 3263 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3264 3265 /* 3266 * Search for an existing watched page to contain the watched area. 3267 * If none is found, grab a new one from the available list 3268 * and insert it in the active list, keeping the list sorted 3269 * by user-level virtual address. 3270 */ 3271 if (p->p_flag & SVFWAIT) 3272 pwp_tree = &p->p_wpage; 3273 else 3274 pwp_tree = &as->a_wpage; 3275 3276 again: 3277 if (avl_numnodes(pwp_tree) > prnwatch) { 3278 AS_LOCK_EXIT(as, &as->a_lock); 3279 while (newpwp != NULL) { 3280 pwp = newpwp->wp_list; 3281 kmem_free(newpwp, sizeof (struct watched_page)); 3282 newpwp = pwp; 3283 } 3284 return (E2BIG); 3285 } 3286 3287 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3288 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) { 3289 pwp = newpwp; 3290 newpwp = newpwp->wp_list; 3291 pwp->wp_list = NULL; 3292 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr & 3293 (uintptr_t)PAGEMASK); 3294 avl_insert(pwp_tree, pwp, where); 3295 } 3296 3297 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE); 3298 3299 if (oflags & WA_READ) 3300 pwp->wp_read--; 3301 if (oflags & WA_WRITE) 3302 pwp->wp_write--; 3303 if (oflags & WA_EXEC) 3304 pwp->wp_exec--; 3305 3306 ASSERT(pwp->wp_read >= 0); 3307 ASSERT(pwp->wp_write >= 0); 3308 ASSERT(pwp->wp_exec >= 0); 3309 3310 if (flags & WA_READ) 3311 pwp->wp_read++; 3312 if (flags & WA_WRITE) 3313 pwp->wp_write++; 3314 if (flags & WA_EXEC) 3315 pwp->wp_exec++; 3316 3317 if (!(p->p_flag & SVFWAIT)) { 3318 vaddr = pwp->wp_vaddr; 3319 if (pwp->wp_oprot == 0 && 3320 (seg = as_segat(as, vaddr)) != NULL) { 3321 SEGOP_GETPROT(seg, vaddr, 0, &prot); 3322 pwp->wp_oprot = (uchar_t)prot; 3323 pwp->wp_prot = (uchar_t)prot; 3324 } 3325 if (pwp->wp_oprot != 0) { 3326 prot = pwp->wp_oprot; 3327 if (pwp->wp_read) 3328 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3329 if (pwp->wp_write) 3330 prot &= ~PROT_WRITE; 3331 if (pwp->wp_exec) 3332 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3333 if (!(pwp->wp_flags & WP_NOWATCH) && 3334 pwp->wp_prot != prot && 3335 (pwp->wp_flags & WP_SETPROT) == 0) { 3336 pwp->wp_flags |= WP_SETPROT; 3337 pwp->wp_list = p->p_wprot; 3338 p->p_wprot = pwp; 3339 } 3340 pwp->wp_prot = (uchar_t)prot; 3341 } 3342 } 3343 3344 /* 3345 * If the watched area extends into the next page then do 3346 * it over again with the virtual address of the next page. 3347 */ 3348 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr) 3349 goto again; 3350 3351 AS_LOCK_EXIT(as, &as->a_lock); 3352 3353 /* 3354 * Free any pages we may have over-allocated 3355 */ 3356 while (newpwp != NULL) { 3357 pwp = newpwp->wp_list; 3358 kmem_free(newpwp, sizeof (struct watched_page)); 3359 newpwp = pwp; 3360 } 3361 3362 return (0); 3363 } 3364 3365 /* 3366 * Remove a watched area from the list of watched pages. 3367 * A watched area may extend over more than one page. 3368 */ 3369 static void 3370 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags) 3371 { 3372 struct as *as = p->p_as; 3373 struct watched_page *pwp; 3374 struct watched_page tpw; 3375 avl_tree_t *tree; 3376 avl_index_t where; 3377 3378 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3379 3380 if (p->p_flag & SVFWAIT) 3381 tree = &p->p_wpage; 3382 else 3383 tree = &as->a_wpage; 3384 3385 tpw.wp_vaddr = vaddr = 3386 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3387 pwp = avl_find(tree, &tpw, &where); 3388 if (pwp == NULL) 3389 pwp = avl_nearest(tree, where, AVL_AFTER); 3390 3391 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3392 ASSERT(vaddr <= pwp->wp_vaddr); 3393 3394 if (flags & WA_READ) 3395 pwp->wp_read--; 3396 if (flags & WA_WRITE) 3397 pwp->wp_write--; 3398 if (flags & WA_EXEC) 3399 pwp->wp_exec--; 3400 3401 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) { 3402 /* 3403 * Reset the hat layer's protections on this page. 3404 */ 3405 if (pwp->wp_oprot != 0) { 3406 uint_t prot = pwp->wp_oprot; 3407 3408 if (pwp->wp_read) 3409 prot &= 3410 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3411 if (pwp->wp_write) 3412 prot &= ~PROT_WRITE; 3413 if (pwp->wp_exec) 3414 prot &= 3415 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3416 if (!(pwp->wp_flags & WP_NOWATCH) && 3417 pwp->wp_prot != prot && 3418 (pwp->wp_flags & WP_SETPROT) == 0) { 3419 pwp->wp_flags |= WP_SETPROT; 3420 pwp->wp_list = p->p_wprot; 3421 p->p_wprot = pwp; 3422 } 3423 pwp->wp_prot = (uchar_t)prot; 3424 } 3425 } else { 3426 /* 3427 * No watched areas remain in this page. 3428 * Reset everything to normal. 3429 */ 3430 if (pwp->wp_oprot != 0) { 3431 pwp->wp_prot = pwp->wp_oprot; 3432 if ((pwp->wp_flags & WP_SETPROT) == 0) { 3433 pwp->wp_flags |= WP_SETPROT; 3434 pwp->wp_list = p->p_wprot; 3435 p->p_wprot = pwp; 3436 } 3437 } 3438 } 3439 3440 pwp = AVL_NEXT(tree, pwp); 3441 } 3442 3443 AS_LOCK_EXIT(as, &as->a_lock); 3444 } 3445 3446 /* 3447 * Return the original protections for the specified page. 3448 */ 3449 static void 3450 getwatchprot(struct as *as, caddr_t addr, uint_t *prot) 3451 { 3452 struct watched_page *pwp; 3453 struct watched_page tpw; 3454 3455 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 3456 3457 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3458 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL) 3459 *prot = pwp->wp_oprot; 3460 } 3461 3462 static prpagev_t * 3463 pr_pagev_create(struct seg *seg, int check_noreserve) 3464 { 3465 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP); 3466 size_t total_pages = seg_pages(seg); 3467 3468 /* 3469 * Limit the size of our vectors to pagev_lim pages at a time. We need 3470 * 4 or 5 bytes of storage per page, so this means we limit ourself 3471 * to about a megabyte of kernel heap by default. 3472 */ 3473 pagev->pg_npages = MIN(total_pages, pagev_lim); 3474 pagev->pg_pnbase = 0; 3475 3476 pagev->pg_protv = 3477 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP); 3478 3479 if (check_noreserve) 3480 pagev->pg_incore = 3481 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP); 3482 else 3483 pagev->pg_incore = NULL; 3484 3485 return (pagev); 3486 } 3487 3488 static void 3489 pr_pagev_destroy(prpagev_t *pagev) 3490 { 3491 if (pagev->pg_incore != NULL) 3492 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char)); 3493 3494 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t)); 3495 kmem_free(pagev, sizeof (prpagev_t)); 3496 } 3497 3498 static caddr_t 3499 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr) 3500 { 3501 ulong_t lastpg = seg_page(seg, eaddr - 1); 3502 ulong_t pn, pnlim; 3503 caddr_t saddr; 3504 size_t len; 3505 3506 ASSERT(addr >= seg->s_base && addr <= eaddr); 3507 3508 if (addr == eaddr) 3509 return (eaddr); 3510 3511 refill: 3512 ASSERT(addr < eaddr); 3513 pagev->pg_pnbase = seg_page(seg, addr); 3514 pnlim = pagev->pg_pnbase + pagev->pg_npages; 3515 saddr = addr; 3516 3517 if (lastpg < pnlim) 3518 len = (size_t)(eaddr - addr); 3519 else 3520 len = pagev->pg_npages * PAGESIZE; 3521 3522 if (pagev->pg_incore != NULL) { 3523 /* 3524 * INCORE cleverly has different semantics than GETPROT: 3525 * it returns info on pages up to but NOT including addr + len. 3526 */ 3527 SEGOP_INCORE(seg, addr, len, pagev->pg_incore); 3528 pn = pagev->pg_pnbase; 3529 3530 do { 3531 /* 3532 * Guilty knowledge here: We know that segvn_incore 3533 * returns more than just the low-order bit that 3534 * indicates the page is actually in memory. If any 3535 * bits are set, then the page has backing store. 3536 */ 3537 if (pagev->pg_incore[pn++ - pagev->pg_pnbase]) 3538 goto out; 3539 3540 } while ((addr += PAGESIZE) < eaddr && pn < pnlim); 3541 3542 /* 3543 * If we examined all the pages in the vector but we're not 3544 * at the end of the segment, take another lap. 3545 */ 3546 if (addr < eaddr) 3547 goto refill; 3548 } 3549 3550 /* 3551 * Need to take len - 1 because addr + len is the address of the 3552 * first byte of the page just past the end of what we want. 3553 */ 3554 out: 3555 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv); 3556 return (addr); 3557 } 3558 3559 static caddr_t 3560 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg, 3561 caddr_t *saddrp, caddr_t eaddr, uint_t *protp) 3562 { 3563 /* 3564 * Our starting address is either the specified address, or the base 3565 * address from the start of the pagev. If the latter is greater, 3566 * this means a previous call to pr_pagev_fill has already scanned 3567 * further than the end of the previous mapping. 3568 */ 3569 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE; 3570 caddr_t addr = MAX(*saddrp, base); 3571 ulong_t pn = seg_page(seg, addr); 3572 uint_t prot, nprot; 3573 3574 /* 3575 * If we're dealing with noreserve pages, then advance addr to 3576 * the address of the next page which has backing store. 3577 */ 3578 if (pagev->pg_incore != NULL) { 3579 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) { 3580 if ((addr += PAGESIZE) == eaddr) { 3581 *saddrp = addr; 3582 prot = 0; 3583 goto out; 3584 } 3585 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3586 addr = pr_pagev_fill(pagev, seg, addr, eaddr); 3587 if (addr == eaddr) { 3588 *saddrp = addr; 3589 prot = 0; 3590 goto out; 3591 } 3592 pn = seg_page(seg, addr); 3593 } 3594 } 3595 } 3596 3597 /* 3598 * Get the protections on the page corresponding to addr. 3599 */ 3600 pn = seg_page(seg, addr); 3601 ASSERT(pn >= pagev->pg_pnbase); 3602 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages)); 3603 3604 prot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3605 getwatchprot(seg->s_as, addr, &prot); 3606 *saddrp = addr; 3607 3608 /* 3609 * Now loop until we find a backed page with different protections 3610 * or we reach the end of this segment. 3611 */ 3612 while ((addr += PAGESIZE) < eaddr) { 3613 /* 3614 * If pn has advanced to the page number following what we 3615 * have information on, refill the page vector and reset 3616 * addr and pn. If pr_pagev_fill does not return the 3617 * address of the next page, we have a discontiguity and 3618 * thus have reached the end of the current mapping. 3619 */ 3620 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3621 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr); 3622 if (naddr != addr) 3623 goto out; 3624 pn = seg_page(seg, addr); 3625 } 3626 3627 /* 3628 * The previous page's protections are in prot, and it has 3629 * backing. If this page is MAP_NORESERVE and has no backing, 3630 * then end this mapping and return the previous protections. 3631 */ 3632 if (pagev->pg_incore != NULL && 3633 pagev->pg_incore[pn - pagev->pg_pnbase] == 0) 3634 break; 3635 3636 /* 3637 * Otherwise end the mapping if this page's protections (nprot) 3638 * are different than those in the previous page (prot). 3639 */ 3640 nprot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3641 getwatchprot(seg->s_as, addr, &nprot); 3642 3643 if (nprot != prot) 3644 break; 3645 } 3646 3647 out: 3648 *protp = prot; 3649 return (addr); 3650 } 3651 3652 size_t 3653 pr_getsegsize(struct seg *seg, int reserved) 3654 { 3655 size_t size = seg->s_size; 3656 3657 /* 3658 * If we're interested in the reserved space, return the size of the 3659 * segment itself. Everything else in this function is a special case 3660 * to determine the actual underlying size of various segment types. 3661 */ 3662 if (reserved) 3663 return (size); 3664 3665 /* 3666 * If this is a segvn mapping of a regular file, return the smaller 3667 * of the segment size and the remaining size of the file beyond 3668 * the file offset corresponding to seg->s_base. 3669 */ 3670 if (seg->s_ops == &segvn_ops) { 3671 vattr_t vattr; 3672 vnode_t *vp; 3673 3674 vattr.va_mask = AT_SIZE; 3675 3676 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 3677 vp != NULL && vp->v_type == VREG && 3678 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 3679 3680 u_offset_t fsize = vattr.va_size; 3681 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base); 3682 3683 if (fsize < offset) 3684 fsize = 0; 3685 else 3686 fsize -= offset; 3687 3688 fsize = roundup(fsize, (u_offset_t)PAGESIZE); 3689 3690 if (fsize < (u_offset_t)size) 3691 size = (size_t)fsize; 3692 } 3693 3694 return (size); 3695 } 3696 3697 /* 3698 * If this is an ISM shared segment, don't include pages that are 3699 * beyond the real size of the spt segment that backs it. 3700 */ 3701 if (seg->s_ops == &segspt_shmops) 3702 return (MIN(spt_realsize(seg), size)); 3703 3704 /* 3705 * If this is segment is a mapping from /dev/null, then this is a 3706 * reservation of virtual address space and has no actual size. 3707 * Such segments are backed by segdev and have type set to neither 3708 * MAP_SHARED nor MAP_PRIVATE. 3709 */ 3710 if (seg->s_ops == &segdev_ops && 3711 ((SEGOP_GETTYPE(seg, seg->s_base) & 3712 (MAP_SHARED | MAP_PRIVATE)) == 0)) 3713 return (0); 3714 3715 /* 3716 * If this segment doesn't match one of the special types we handle, 3717 * just return the size of the segment itself. 3718 */ 3719 return (size); 3720 } 3721 3722 uint_t 3723 pr_getprot(struct seg *seg, int reserved, void **tmp, 3724 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr) 3725 { 3726 struct as *as = seg->s_as; 3727 3728 caddr_t saddr = *saddrp; 3729 caddr_t naddr; 3730 3731 int check_noreserve; 3732 uint_t prot; 3733 3734 union { 3735 struct segvn_data *svd; 3736 struct segdev_data *sdp; 3737 void *data; 3738 } s; 3739 3740 s.data = seg->s_data; 3741 3742 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3743 ASSERT(saddr >= seg->s_base && saddr < eaddr); 3744 ASSERT(eaddr <= seg->s_base + seg->s_size); 3745 3746 /* 3747 * Don't include MAP_NORESERVE pages in the address range 3748 * unless their mappings have actually materialized. 3749 * We cheat by knowing that segvn is the only segment 3750 * driver that supports MAP_NORESERVE. 3751 */ 3752 check_noreserve = 3753 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL && 3754 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) && 3755 (s.svd->flags & MAP_NORESERVE)); 3756 3757 /* 3758 * Examine every page only as a last resort. We use guilty knowledge 3759 * of segvn and segdev to avoid this: if there are no per-page 3760 * protections present in the segment and we don't care about 3761 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment. 3762 */ 3763 if (!check_noreserve && saddr == seg->s_base && 3764 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) { 3765 prot = s.svd->prot; 3766 getwatchprot(as, saddr, &prot); 3767 naddr = eaddr; 3768 3769 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops && 3770 s.sdp != NULL && s.sdp->pageprot == 0) { 3771 prot = s.sdp->prot; 3772 getwatchprot(as, saddr, &prot); 3773 naddr = eaddr; 3774 3775 } else { 3776 prpagev_t *pagev; 3777 3778 /* 3779 * If addr is sitting at the start of the segment, then 3780 * create a page vector to store protection and incore 3781 * information for pages in the segment, and fill it. 3782 * Otherwise, we expect *tmp to address the prpagev_t 3783 * allocated by a previous call to this function. 3784 */ 3785 if (saddr == seg->s_base) { 3786 pagev = pr_pagev_create(seg, check_noreserve); 3787 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr); 3788 3789 ASSERT(*tmp == NULL); 3790 *tmp = pagev; 3791 3792 ASSERT(saddr <= eaddr); 3793 *saddrp = saddr; 3794 3795 if (saddr == eaddr) { 3796 naddr = saddr; 3797 prot = 0; 3798 goto out; 3799 } 3800 3801 } else { 3802 ASSERT(*tmp != NULL); 3803 pagev = (prpagev_t *)*tmp; 3804 } 3805 3806 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot); 3807 ASSERT(naddr <= eaddr); 3808 } 3809 3810 out: 3811 if (naddr == eaddr) 3812 pr_getprot_done(tmp); 3813 *naddrp = naddr; 3814 return (prot); 3815 } 3816 3817 void 3818 pr_getprot_done(void **tmp) 3819 { 3820 if (*tmp != NULL) { 3821 pr_pagev_destroy((prpagev_t *)*tmp); 3822 *tmp = NULL; 3823 } 3824 } 3825 3826 /* 3827 * Return true iff the vnode is a /proc file from the object directory. 3828 */ 3829 int 3830 pr_isobject(vnode_t *vp) 3831 { 3832 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT); 3833 } 3834 3835 /* 3836 * Return true iff the vnode is a /proc file opened by the process itself. 3837 */ 3838 int 3839 pr_isself(vnode_t *vp) 3840 { 3841 /* 3842 * XXX: To retain binary compatibility with the old 3843 * ioctl()-based version of /proc, we exempt self-opens 3844 * of /proc/<pid> from being marked close-on-exec. 3845 */ 3846 return (vn_matchops(vp, prvnodeops) && 3847 (VTOP(vp)->pr_flags & PR_ISSELF) && 3848 VTOP(vp)->pr_type != PR_PIDDIR); 3849 } 3850 3851 static ssize_t 3852 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr) 3853 { 3854 ssize_t pagesize, hatsize; 3855 3856 ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 3857 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE)); 3858 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE)); 3859 ASSERT(saddr < eaddr); 3860 3861 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr); 3862 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize)); 3863 ASSERT(pagesize != 0); 3864 3865 if (pagesize == -1) 3866 pagesize = PAGESIZE; 3867 3868 saddr += P2NPHASE((uintptr_t)saddr, pagesize); 3869 3870 while (saddr < eaddr) { 3871 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr)) 3872 break; 3873 ASSERT(IS_P2ALIGNED(saddr, pagesize)); 3874 saddr += pagesize; 3875 } 3876 3877 *naddrp = ((saddr < eaddr) ? saddr : eaddr); 3878 return (hatsize); 3879 } 3880 3881 /* 3882 * Return an array of structures with extended memory map information. 3883 * We allocate here; the caller must deallocate. 3884 */ 3885 int 3886 prgetxmap(proc_t *p, list_t *iolhead) 3887 { 3888 struct as *as = p->p_as; 3889 prxmap_t *mp; 3890 struct seg *seg; 3891 struct seg *brkseg, *stkseg; 3892 struct vnode *vp; 3893 struct vattr vattr; 3894 uint_t prot; 3895 3896 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 3897 3898 /* 3899 * Request an initial buffer size that doesn't waste memory 3900 * if the address space has only a small number of segments. 3901 */ 3902 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 3903 3904 if ((seg = AS_SEGFIRST(as)) == NULL) 3905 return (0); 3906 3907 brkseg = break_seg(p); 3908 stkseg = as_segat(as, prgetstackbase(p)); 3909 3910 do { 3911 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 3912 caddr_t saddr, naddr, baddr; 3913 void *tmp = NULL; 3914 ssize_t psz; 3915 char *parr; 3916 uint64_t npages; 3917 uint64_t pagenum; 3918 3919 /* 3920 * Segment loop part one: iterate from the base of the segment 3921 * to its end, pausing at each address boundary (baddr) between 3922 * ranges that have different virtual memory protections. 3923 */ 3924 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 3925 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 3926 ASSERT(baddr >= saddr && baddr <= eaddr); 3927 3928 /* 3929 * Segment loop part two: iterate from the current 3930 * position to the end of the protection boundary, 3931 * pausing at each address boundary (naddr) between 3932 * ranges that have different underlying page sizes. 3933 */ 3934 for (; saddr < baddr; saddr = naddr) { 3935 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 3936 ASSERT(naddr >= saddr && naddr <= baddr); 3937 3938 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 3939 3940 mp->pr_vaddr = (uintptr_t)saddr; 3941 mp->pr_size = naddr - saddr; 3942 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 3943 mp->pr_mflags = 0; 3944 if (prot & PROT_READ) 3945 mp->pr_mflags |= MA_READ; 3946 if (prot & PROT_WRITE) 3947 mp->pr_mflags |= MA_WRITE; 3948 if (prot & PROT_EXEC) 3949 mp->pr_mflags |= MA_EXEC; 3950 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 3951 mp->pr_mflags |= MA_SHARED; 3952 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 3953 mp->pr_mflags |= MA_NORESERVE; 3954 if (seg->s_ops == &segspt_shmops || 3955 (seg->s_ops == &segvn_ops && 3956 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 3957 vp == NULL))) 3958 mp->pr_mflags |= MA_ANON; 3959 if (seg == brkseg) 3960 mp->pr_mflags |= MA_BREAK; 3961 else if (seg == stkseg) 3962 mp->pr_mflags |= MA_STACK; 3963 if (seg->s_ops == &segspt_shmops) 3964 mp->pr_mflags |= MA_ISM | MA_SHM; 3965 3966 mp->pr_pagesize = PAGESIZE; 3967 if (psz == -1) { 3968 mp->pr_hatpagesize = 0; 3969 } else { 3970 mp->pr_hatpagesize = psz; 3971 } 3972 3973 /* 3974 * Manufacture a filename for the "object" dir. 3975 */ 3976 mp->pr_dev = PRNODEV; 3977 vattr.va_mask = AT_FSID|AT_NODEID; 3978 if (seg->s_ops == &segvn_ops && 3979 SEGOP_GETVP(seg, saddr, &vp) == 0 && 3980 vp != NULL && vp->v_type == VREG && 3981 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 3982 mp->pr_dev = vattr.va_fsid; 3983 mp->pr_ino = vattr.va_nodeid; 3984 if (vp == p->p_exec) 3985 (void) strcpy(mp->pr_mapname, 3986 "a.out"); 3987 else 3988 pr_object_name(mp->pr_mapname, 3989 vp, &vattr); 3990 } 3991 3992 /* 3993 * Get the SysV shared memory id, if any. 3994 */ 3995 if ((mp->pr_mflags & MA_SHARED) && 3996 p->p_segacct && (mp->pr_shmid = shmgetid(p, 3997 seg->s_base)) != SHMID_NONE) { 3998 if (mp->pr_shmid == SHMID_FREE) 3999 mp->pr_shmid = -1; 4000 4001 mp->pr_mflags |= MA_SHM; 4002 } else { 4003 mp->pr_shmid = -1; 4004 } 4005 4006 npages = ((uintptr_t)(naddr - saddr)) >> 4007 PAGESHIFT; 4008 parr = kmem_zalloc(npages, KM_SLEEP); 4009 4010 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4011 4012 for (pagenum = 0; pagenum < npages; pagenum++) { 4013 if (parr[pagenum] & SEG_PAGE_INCORE) 4014 mp->pr_rss++; 4015 if (parr[pagenum] & SEG_PAGE_ANON) 4016 mp->pr_anon++; 4017 if (parr[pagenum] & SEG_PAGE_LOCKED) 4018 mp->pr_locked++; 4019 } 4020 kmem_free(parr, npages); 4021 } 4022 } 4023 ASSERT(tmp == NULL); 4024 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4025 4026 return (0); 4027 } 4028 4029 /* 4030 * Return the process's credentials. We don't need a 32-bit equivalent of 4031 * this function because prcred_t and prcred32_t are actually the same. 4032 */ 4033 void 4034 prgetcred(proc_t *p, prcred_t *pcrp) 4035 { 4036 mutex_enter(&p->p_crlock); 4037 cred2prcred(p->p_cred, pcrp); 4038 mutex_exit(&p->p_crlock); 4039 } 4040 4041 /* 4042 * Compute actual size of the prpriv_t structure. 4043 */ 4044 4045 size_t 4046 prgetprivsize(void) 4047 { 4048 return (priv_prgetprivsize(NULL)); 4049 } 4050 4051 /* 4052 * Return the process's privileges. We don't need a 32-bit equivalent of 4053 * this function because prpriv_t and prpriv32_t are actually the same. 4054 */ 4055 void 4056 prgetpriv(proc_t *p, prpriv_t *pprp) 4057 { 4058 mutex_enter(&p->p_crlock); 4059 cred2prpriv(p->p_cred, pprp); 4060 mutex_exit(&p->p_crlock); 4061 } 4062 4063 #ifdef _SYSCALL32_IMPL 4064 /* 4065 * Return an array of structures with HAT memory map information. 4066 * We allocate here; the caller must deallocate. 4067 */ 4068 int 4069 prgetxmap32(proc_t *p, list_t *iolhead) 4070 { 4071 struct as *as = p->p_as; 4072 prxmap32_t *mp; 4073 struct seg *seg; 4074 struct seg *brkseg, *stkseg; 4075 struct vnode *vp; 4076 struct vattr vattr; 4077 uint_t prot; 4078 4079 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 4080 4081 /* 4082 * Request an initial buffer size that doesn't waste memory 4083 * if the address space has only a small number of segments. 4084 */ 4085 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 4086 4087 if ((seg = AS_SEGFIRST(as)) == NULL) 4088 return (0); 4089 4090 brkseg = break_seg(p); 4091 stkseg = as_segat(as, prgetstackbase(p)); 4092 4093 do { 4094 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 4095 caddr_t saddr, naddr, baddr; 4096 void *tmp = NULL; 4097 ssize_t psz; 4098 char *parr; 4099 uint64_t npages; 4100 uint64_t pagenum; 4101 4102 /* 4103 * Segment loop part one: iterate from the base of the segment 4104 * to its end, pausing at each address boundary (baddr) between 4105 * ranges that have different virtual memory protections. 4106 */ 4107 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 4108 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 4109 ASSERT(baddr >= saddr && baddr <= eaddr); 4110 4111 /* 4112 * Segment loop part two: iterate from the current 4113 * position to the end of the protection boundary, 4114 * pausing at each address boundary (naddr) between 4115 * ranges that have different underlying page sizes. 4116 */ 4117 for (; saddr < baddr; saddr = naddr) { 4118 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 4119 ASSERT(naddr >= saddr && naddr <= baddr); 4120 4121 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 4122 4123 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 4124 mp->pr_size = (size32_t)(naddr - saddr); 4125 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 4126 mp->pr_mflags = 0; 4127 if (prot & PROT_READ) 4128 mp->pr_mflags |= MA_READ; 4129 if (prot & PROT_WRITE) 4130 mp->pr_mflags |= MA_WRITE; 4131 if (prot & PROT_EXEC) 4132 mp->pr_mflags |= MA_EXEC; 4133 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 4134 mp->pr_mflags |= MA_SHARED; 4135 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 4136 mp->pr_mflags |= MA_NORESERVE; 4137 if (seg->s_ops == &segspt_shmops || 4138 (seg->s_ops == &segvn_ops && 4139 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 4140 vp == NULL))) 4141 mp->pr_mflags |= MA_ANON; 4142 if (seg == brkseg) 4143 mp->pr_mflags |= MA_BREAK; 4144 else if (seg == stkseg) 4145 mp->pr_mflags |= MA_STACK; 4146 if (seg->s_ops == &segspt_shmops) 4147 mp->pr_mflags |= MA_ISM | MA_SHM; 4148 4149 mp->pr_pagesize = PAGESIZE; 4150 if (psz == -1) { 4151 mp->pr_hatpagesize = 0; 4152 } else { 4153 mp->pr_hatpagesize = psz; 4154 } 4155 4156 /* 4157 * Manufacture a filename for the "object" dir. 4158 */ 4159 mp->pr_dev = PRNODEV32; 4160 vattr.va_mask = AT_FSID|AT_NODEID; 4161 if (seg->s_ops == &segvn_ops && 4162 SEGOP_GETVP(seg, saddr, &vp) == 0 && 4163 vp != NULL && vp->v_type == VREG && 4164 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 4165 (void) cmpldev(&mp->pr_dev, 4166 vattr.va_fsid); 4167 mp->pr_ino = vattr.va_nodeid; 4168 if (vp == p->p_exec) 4169 (void) strcpy(mp->pr_mapname, 4170 "a.out"); 4171 else 4172 pr_object_name(mp->pr_mapname, 4173 vp, &vattr); 4174 } 4175 4176 /* 4177 * Get the SysV shared memory id, if any. 4178 */ 4179 if ((mp->pr_mflags & MA_SHARED) && 4180 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4181 seg->s_base)) != SHMID_NONE) { 4182 if (mp->pr_shmid == SHMID_FREE) 4183 mp->pr_shmid = -1; 4184 4185 mp->pr_mflags |= MA_SHM; 4186 } else { 4187 mp->pr_shmid = -1; 4188 } 4189 4190 npages = ((uintptr_t)(naddr - saddr)) >> 4191 PAGESHIFT; 4192 parr = kmem_zalloc(npages, KM_SLEEP); 4193 4194 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4195 4196 for (pagenum = 0; pagenum < npages; pagenum++) { 4197 if (parr[pagenum] & SEG_PAGE_INCORE) 4198 mp->pr_rss++; 4199 if (parr[pagenum] & SEG_PAGE_ANON) 4200 mp->pr_anon++; 4201 if (parr[pagenum] & SEG_PAGE_LOCKED) 4202 mp->pr_locked++; 4203 } 4204 kmem_free(parr, npages); 4205 } 4206 } 4207 ASSERT(tmp == NULL); 4208 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4209 4210 return (0); 4211 } 4212 #endif /* _SYSCALL32_IMPL */ 4213