1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/t_lock.h> 34 #include <sys/param.h> 35 #include <sys/cmn_err.h> 36 #include <sys/cred.h> 37 #include <sys/priv.h> 38 #include <sys/debug.h> 39 #include <sys/errno.h> 40 #include <sys/inline.h> 41 #include <sys/kmem.h> 42 #include <sys/mman.h> 43 #include <sys/proc.h> 44 #include <sys/sobject.h> 45 #include <sys/sysmacros.h> 46 #include <sys/systm.h> 47 #include <sys/uio.h> 48 #include <sys/var.h> 49 #include <sys/vfs.h> 50 #include <sys/vnode.h> 51 #include <sys/session.h> 52 #include <sys/pcb.h> 53 #include <sys/signal.h> 54 #include <sys/user.h> 55 #include <sys/disp.h> 56 #include <sys/class.h> 57 #include <sys/ts.h> 58 #include <sys/bitmap.h> 59 #include <sys/poll.h> 60 #include <sys/shm_impl.h> 61 #include <sys/fault.h> 62 #include <sys/syscall.h> 63 #include <sys/procfs.h> 64 #include <sys/processor.h> 65 #include <sys/cpuvar.h> 66 #include <sys/copyops.h> 67 #include <sys/time.h> 68 #include <sys/msacct.h> 69 #include <vm/as.h> 70 #include <vm/rm.h> 71 #include <vm/seg.h> 72 #include <vm/seg_vn.h> 73 #include <vm/seg_dev.h> 74 #include <vm/seg_spt.h> 75 #include <vm/page.h> 76 #include <sys/vmparam.h> 77 #include <sys/swap.h> 78 #include <fs/proc/prdata.h> 79 #include <sys/task.h> 80 #include <sys/project.h> 81 #include <sys/contract_impl.h> 82 #include <sys/contract/process.h> 83 #include <sys/contract/process_impl.h> 84 #include <sys/schedctl.h> 85 #include <sys/pool.h> 86 #include <sys/zone.h> 87 #include <sys/atomic.h> 88 #include <sys/sdt.h> 89 90 #define MAX_ITERS_SPIN 5 91 92 typedef struct prpagev { 93 uint_t *pg_protv; /* vector of page permissions */ 94 char *pg_incore; /* vector of incore flags */ 95 size_t pg_npages; /* number of pages in protv and incore */ 96 ulong_t pg_pnbase; /* pn within segment of first protv element */ 97 } prpagev_t; 98 99 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */ 100 101 extern struct seg_ops segdev_ops; /* needs a header file */ 102 extern struct seg_ops segspt_shmops; /* needs a header file */ 103 104 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t); 105 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t); 106 107 /* 108 * Choose an lwp from the complete set of lwps for the process. 109 * This is called for any operation applied to the process 110 * file descriptor that requires an lwp to operate upon. 111 * 112 * Returns a pointer to the thread for the selected LWP, 113 * and with the dispatcher lock held for the thread. 114 * 115 * The algorithm for choosing an lwp is critical for /proc semantics; 116 * don't touch this code unless you know all of the implications. 117 */ 118 kthread_t * 119 prchoose(proc_t *p) 120 { 121 kthread_t *t; 122 kthread_t *t_onproc = NULL; /* running on processor */ 123 kthread_t *t_run = NULL; /* runnable, on disp queue */ 124 kthread_t *t_sleep = NULL; /* sleeping */ 125 kthread_t *t_hold = NULL; /* sleeping, performing hold */ 126 kthread_t *t_susp = NULL; /* suspended stop */ 127 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */ 128 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */ 129 kthread_t *t_req = NULL; /* requested stop */ 130 kthread_t *t_istop = NULL; /* event-of-interest stop */ 131 132 ASSERT(MUTEX_HELD(&p->p_lock)); 133 134 /* 135 * If the agent lwp exists, it takes precedence over all others. 136 */ 137 if ((t = p->p_agenttp) != NULL) { 138 thread_lock(t); 139 return (t); 140 } 141 142 if ((t = p->p_tlist) == NULL) /* start at the head of the list */ 143 return (t); 144 do { /* for eacn lwp in the process */ 145 if (VSTOPPED(t)) { /* virtually stopped */ 146 if (t_req == NULL) 147 t_req = t; 148 continue; 149 } 150 151 thread_lock(t); /* make sure thread is in good state */ 152 switch (t->t_state) { 153 default: 154 panic("prchoose: bad thread state %d, thread 0x%p", 155 t->t_state, (void *)t); 156 /*NOTREACHED*/ 157 case TS_SLEEP: 158 /* this is filthy */ 159 if (t->t_wchan == (caddr_t)&p->p_holdlwps && 160 t->t_wchan0 == NULL) { 161 if (t_hold == NULL) 162 t_hold = t; 163 } else { 164 if (t_sleep == NULL) 165 t_sleep = t; 166 } 167 break; 168 case TS_RUN: 169 case TS_WAIT: 170 if (t_run == NULL) 171 t_run = t; 172 break; 173 case TS_ONPROC: 174 if (t_onproc == NULL) 175 t_onproc = t; 176 break; 177 case TS_ZOMB: /* last possible choice */ 178 break; 179 case TS_STOPPED: 180 switch (t->t_whystop) { 181 case PR_SUSPENDED: 182 if (t_susp == NULL) 183 t_susp = t; 184 break; 185 case PR_JOBCONTROL: 186 if (t->t_proc_flag & TP_PRSTOP) { 187 if (t_jdstop == NULL) 188 t_jdstop = t; 189 } else { 190 if (t_jstop == NULL) 191 t_jstop = t; 192 } 193 break; 194 case PR_REQUESTED: 195 if (t_req == NULL) 196 t_req = t; 197 break; 198 case PR_SYSENTRY: 199 case PR_SYSEXIT: 200 case PR_SIGNALLED: 201 case PR_FAULTED: 202 /* 203 * Make an lwp calling exit() be the 204 * last lwp seen in the process. 205 */ 206 if (t_istop == NULL || 207 (t_istop->t_whystop == PR_SYSENTRY && 208 t_istop->t_whatstop == SYS_exit)) 209 t_istop = t; 210 break; 211 case PR_CHECKPOINT: /* can't happen? */ 212 break; 213 default: 214 panic("prchoose: bad t_whystop %d, thread 0x%p", 215 t->t_whystop, (void *)t); 216 /*NOTREACHED*/ 217 } 218 break; 219 } 220 thread_unlock(t); 221 } while ((t = t->t_forw) != p->p_tlist); 222 223 if (t_onproc) 224 t = t_onproc; 225 else if (t_run) 226 t = t_run; 227 else if (t_sleep) 228 t = t_sleep; 229 else if (t_jstop) 230 t = t_jstop; 231 else if (t_jdstop) 232 t = t_jdstop; 233 else if (t_istop) 234 t = t_istop; 235 else if (t_req) 236 t = t_req; 237 else if (t_hold) 238 t = t_hold; 239 else if (t_susp) 240 t = t_susp; 241 else /* TS_ZOMB */ 242 t = p->p_tlist; 243 244 if (t != NULL) 245 thread_lock(t); 246 return (t); 247 } 248 249 /* 250 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop. 251 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI 252 * on the /proc file descriptor. Called from stop() when a traced 253 * process stops on an event of interest. Also called from exit() 254 * and prinvalidate() to indicate POLLHUP and POLLERR respectively. 255 */ 256 void 257 prnotify(struct vnode *vp) 258 { 259 prcommon_t *pcp = VTOP(vp)->pr_common; 260 261 mutex_enter(&pcp->prc_mutex); 262 cv_broadcast(&pcp->prc_wait); 263 mutex_exit(&pcp->prc_mutex); 264 if (pcp->prc_flags & PRC_POLL) { 265 /* 266 * We call pollwakeup() with POLLHUP to ensure that 267 * the pollers are awakened even if they are polling 268 * for nothing (i.e., waiting for the process to exit). 269 * This enables the use of the PRC_POLL flag for optimization 270 * (we can turn off PRC_POLL only if we know no pollers remain). 271 */ 272 pcp->prc_flags &= ~PRC_POLL; 273 pollwakeup(&pcp->prc_pollhead, POLLHUP); 274 } 275 } 276 277 /* called immediately below, in prfree() */ 278 static void 279 prfreenotify(vnode_t *vp) 280 { 281 prnode_t *pnp; 282 prcommon_t *pcp; 283 284 while (vp != NULL) { 285 pnp = VTOP(vp); 286 pcp = pnp->pr_common; 287 ASSERT(pcp->prc_thread == NULL); 288 pcp->prc_proc = NULL; 289 /* 290 * We can't call prnotify() here because we are holding 291 * pidlock. We assert that there is no need to. 292 */ 293 mutex_enter(&pcp->prc_mutex); 294 cv_broadcast(&pcp->prc_wait); 295 mutex_exit(&pcp->prc_mutex); 296 ASSERT(!(pcp->prc_flags & PRC_POLL)); 297 298 vp = pnp->pr_next; 299 pnp->pr_next = NULL; 300 } 301 } 302 303 /* 304 * Called from a hook in freeproc() when a traced process is removed 305 * from the process table. The proc-table pointers of all associated 306 * /proc vnodes are cleared to indicate that the process has gone away. 307 */ 308 void 309 prfree(proc_t *p) 310 { 311 uint_t slot = p->p_slot; 312 313 ASSERT(MUTEX_HELD(&pidlock)); 314 315 /* 316 * Block the process against /proc so it can be freed. 317 * It cannot be freed while locked by some controlling process. 318 * Lock ordering: 319 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex 320 */ 321 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */ 322 mutex_enter(&p->p_lock); 323 while (p->p_proc_flag & P_PR_LOCK) { 324 mutex_exit(&pr_pidlock); 325 cv_wait(&pr_pid_cv[slot], &p->p_lock); 326 mutex_exit(&p->p_lock); 327 mutex_enter(&pr_pidlock); 328 mutex_enter(&p->p_lock); 329 } 330 331 ASSERT(p->p_tlist == NULL); 332 333 prfreenotify(p->p_plist); 334 p->p_plist = NULL; 335 336 prfreenotify(p->p_trace); 337 p->p_trace = NULL; 338 339 /* 340 * We broadcast to wake up everyone waiting for this process. 341 * No one can reach this process from this point on. 342 */ 343 cv_broadcast(&pr_pid_cv[slot]); 344 345 mutex_exit(&p->p_lock); 346 mutex_exit(&pr_pidlock); 347 } 348 349 /* 350 * Called from a hook in exit() when a traced process is becoming a zombie. 351 */ 352 void 353 prexit(proc_t *p) 354 { 355 ASSERT(MUTEX_HELD(&p->p_lock)); 356 357 if (pr_watch_active(p)) { 358 pr_free_watchpoints(p); 359 watch_disable(curthread); 360 } 361 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */ 362 if (p->p_trace) { 363 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY; 364 prnotify(p->p_trace); 365 } 366 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */ 367 } 368 369 /* 370 * Called when a thread calls lwp_exit(). 371 */ 372 void 373 prlwpexit(kthread_t *t) 374 { 375 vnode_t *vp; 376 prnode_t *pnp; 377 prcommon_t *pcp; 378 proc_t *p = ttoproc(t); 379 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry; 380 381 ASSERT(t == curthread); 382 ASSERT(MUTEX_HELD(&p->p_lock)); 383 384 /* 385 * The process must be blocked against /proc to do this safely. 386 * The lwp must not disappear while the process is marked P_PR_LOCK. 387 * It is the caller's responsibility to have called prbarrier(p). 388 */ 389 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 390 391 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 392 pnp = VTOP(vp); 393 pcp = pnp->pr_common; 394 if (pcp->prc_thread == t) { 395 pcp->prc_thread = NULL; 396 pcp->prc_flags |= PRC_DESTROY; 397 } 398 } 399 400 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) { 401 pnp = VTOP(vp); 402 pcp = pnp->pr_common; 403 pcp->prc_thread = NULL; 404 pcp->prc_flags |= PRC_DESTROY; 405 prnotify(vp); 406 } 407 408 if (p->p_trace) 409 prnotify(p->p_trace); 410 } 411 412 /* 413 * Called when a zombie thread is joined or when a 414 * detached lwp exits. Called from lwp_hash_out(). 415 */ 416 void 417 prlwpfree(proc_t *p, lwpent_t *lep) 418 { 419 vnode_t *vp; 420 prnode_t *pnp; 421 prcommon_t *pcp; 422 423 ASSERT(MUTEX_HELD(&p->p_lock)); 424 425 /* 426 * The process must be blocked against /proc to do this safely. 427 * The lwp must not disappear while the process is marked P_PR_LOCK. 428 * It is the caller's responsibility to have called prbarrier(p). 429 */ 430 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 431 432 vp = lep->le_trace; 433 lep->le_trace = NULL; 434 while (vp) { 435 prnotify(vp); 436 pnp = VTOP(vp); 437 pcp = pnp->pr_common; 438 ASSERT(pcp->prc_thread == NULL && 439 (pcp->prc_flags & PRC_DESTROY)); 440 pcp->prc_tslot = -1; 441 vp = pnp->pr_next; 442 pnp->pr_next = NULL; 443 } 444 445 if (p->p_trace) 446 prnotify(p->p_trace); 447 } 448 449 /* 450 * Called from a hook in exec() when a thread starts exec(). 451 */ 452 void 453 prexecstart(void) 454 { 455 proc_t *p = ttoproc(curthread); 456 klwp_t *lwp = ttolwp(curthread); 457 458 /* 459 * The P_PR_EXEC flag blocks /proc operations for 460 * the duration of the exec(). 461 * We can't start exec() while the process is 462 * locked by /proc, so we call prbarrier(). 463 * lwp_nostop keeps the process from being stopped 464 * via job control for the duration of the exec(). 465 */ 466 467 ASSERT(MUTEX_HELD(&p->p_lock)); 468 prbarrier(p); 469 lwp->lwp_nostop++; 470 p->p_proc_flag |= P_PR_EXEC; 471 } 472 473 /* 474 * Called from a hook in exec() when a thread finishes exec(). 475 * The thread may or may not have succeeded. Some other thread 476 * may have beat it to the punch. 477 */ 478 void 479 prexecend(void) 480 { 481 proc_t *p = ttoproc(curthread); 482 klwp_t *lwp = ttolwp(curthread); 483 vnode_t *vp; 484 prnode_t *pnp; 485 prcommon_t *pcp; 486 model_t model = p->p_model; 487 id_t tid = curthread->t_tid; 488 int tslot = curthread->t_dslot; 489 490 ASSERT(MUTEX_HELD(&p->p_lock)); 491 492 lwp->lwp_nostop--; 493 if (p->p_flag & SEXITLWPS) { 494 /* 495 * We are on our way to exiting because some 496 * other thread beat us in the race to exec(). 497 * Don't clear the P_PR_EXEC flag in this case. 498 */ 499 return; 500 } 501 502 /* 503 * Wake up anyone waiting in /proc for the process to complete exec(). 504 */ 505 p->p_proc_flag &= ~P_PR_EXEC; 506 if ((vp = p->p_trace) != NULL) { 507 pcp = VTOP(vp)->pr_common; 508 mutex_enter(&pcp->prc_mutex); 509 cv_broadcast(&pcp->prc_wait); 510 mutex_exit(&pcp->prc_mutex); 511 for (; vp != NULL; vp = pnp->pr_next) { 512 pnp = VTOP(vp); 513 pnp->pr_common->prc_datamodel = model; 514 } 515 } 516 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) { 517 /* 518 * We dealt with the process common above. 519 */ 520 ASSERT(p->p_trace != NULL); 521 pcp = VTOP(vp)->pr_common; 522 mutex_enter(&pcp->prc_mutex); 523 cv_broadcast(&pcp->prc_wait); 524 mutex_exit(&pcp->prc_mutex); 525 for (; vp != NULL; vp = pnp->pr_next) { 526 pnp = VTOP(vp); 527 pcp = pnp->pr_common; 528 pcp->prc_datamodel = model; 529 pcp->prc_tid = tid; 530 pcp->prc_tslot = tslot; 531 } 532 } 533 } 534 535 /* 536 * Called from a hook in relvm() just before freeing the address space. 537 * We free all the watched areas now. 538 */ 539 void 540 prrelvm(void) 541 { 542 proc_t *p = ttoproc(curthread); 543 544 mutex_enter(&p->p_lock); 545 prbarrier(p); /* block all other /proc operations */ 546 if (pr_watch_active(p)) { 547 pr_free_watchpoints(p); 548 watch_disable(curthread); 549 } 550 mutex_exit(&p->p_lock); 551 pr_free_watched_pages(p); 552 } 553 554 /* 555 * Called from hooks in exec-related code when a traced process 556 * attempts to exec(2) a setuid/setgid program or an unreadable 557 * file. Rather than fail the exec we invalidate the associated 558 * /proc vnodes so that subsequent attempts to use them will fail. 559 * 560 * All /proc vnodes, except directory vnodes, are retained on a linked 561 * list (rooted at p_plist in the process structure) until last close. 562 * 563 * A controlling process must re-open the /proc files in order to 564 * regain control. 565 */ 566 void 567 prinvalidate(struct user *up) 568 { 569 kthread_t *t = curthread; 570 proc_t *p = ttoproc(t); 571 vnode_t *vp; 572 prnode_t *pnp; 573 int writers = 0; 574 575 mutex_enter(&p->p_lock); 576 prbarrier(p); /* block all other /proc operations */ 577 578 /* 579 * At this moment, there can be only one lwp in the process. 580 */ 581 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 582 583 /* 584 * Invalidate any currently active /proc vnodes. 585 */ 586 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 587 pnp = VTOP(vp); 588 switch (pnp->pr_type) { 589 case PR_PSINFO: /* these files can read by anyone */ 590 case PR_LPSINFO: 591 case PR_LWPSINFO: 592 case PR_LWPDIR: 593 case PR_LWPIDDIR: 594 case PR_USAGE: 595 case PR_LUSAGE: 596 case PR_LWPUSAGE: 597 break; 598 default: 599 pnp->pr_flags |= PR_INVAL; 600 break; 601 } 602 } 603 /* 604 * Wake up anyone waiting for the process or lwp. 605 * p->p_trace is guaranteed to be non-NULL if there 606 * are any open /proc files for this process. 607 */ 608 if ((vp = p->p_trace) != NULL) { 609 prcommon_t *pcp = VTOP(vp)->pr_pcommon; 610 611 prnotify(vp); 612 /* 613 * Are there any writers? 614 */ 615 if ((writers = pcp->prc_writers) != 0) { 616 /* 617 * Clear the exclusive open flag (old /proc interface). 618 * Set prc_selfopens equal to prc_writers so that 619 * the next O_EXCL|O_WRITE open will succeed 620 * even with existing (though invalid) writers. 621 * prclose() must decrement prc_selfopens when 622 * the invalid files are closed. 623 */ 624 pcp->prc_flags &= ~PRC_EXCL; 625 ASSERT(pcp->prc_selfopens <= writers); 626 pcp->prc_selfopens = writers; 627 } 628 } 629 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace; 630 while (vp != NULL) { 631 /* 632 * We should not invalidate the lwpiddir vnodes, 633 * but the necessities of maintaining the old 634 * ioctl()-based version of /proc require it. 635 */ 636 pnp = VTOP(vp); 637 pnp->pr_flags |= PR_INVAL; 638 prnotify(vp); 639 vp = pnp->pr_next; 640 } 641 642 /* 643 * If any tracing flags are in effect and any vnodes are open for 644 * writing then set the requested-stop and run-on-last-close flags. 645 * Otherwise, clear all tracing flags. 646 */ 647 t->t_proc_flag &= ~TP_PAUSE; 648 if ((p->p_proc_flag & P_PR_TRACE) && writers) { 649 t->t_proc_flag |= TP_PRSTOP; 650 aston(t); /* so ISSIG will see the flag */ 651 p->p_proc_flag |= P_PR_RUNLCL; 652 } else { 653 premptyset(&up->u_entrymask); /* syscalls */ 654 premptyset(&up->u_exitmask); 655 up->u_systrap = 0; 656 premptyset(&p->p_sigmask); /* signals */ 657 premptyset(&p->p_fltmask); /* faults */ 658 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING); 659 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE); 660 prnostep(ttolwp(t)); 661 } 662 663 mutex_exit(&p->p_lock); 664 } 665 666 /* 667 * Acquire the controlled process's p_lock and mark it P_PR_LOCK. 668 * Return with pr_pidlock held in all cases. 669 * Return with p_lock held if the the process still exists. 670 * Return value is the process pointer if the process still exists, else NULL. 671 * If we lock the process, give ourself kernel priority to avoid deadlocks; 672 * this is undone in prunlock(). 673 */ 674 proc_t * 675 pr_p_lock(prnode_t *pnp) 676 { 677 proc_t *p; 678 prcommon_t *pcp; 679 680 mutex_enter(&pr_pidlock); 681 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL) 682 return (NULL); 683 mutex_enter(&p->p_lock); 684 while (p->p_proc_flag & P_PR_LOCK) { 685 /* 686 * This cv/mutex pair is persistent even if 687 * the process disappears while we sleep. 688 */ 689 kcondvar_t *cv = &pr_pid_cv[p->p_slot]; 690 kmutex_t *mp = &p->p_lock; 691 692 mutex_exit(&pr_pidlock); 693 cv_wait(cv, mp); 694 mutex_exit(mp); 695 mutex_enter(&pr_pidlock); 696 if (pcp->prc_proc == NULL) 697 return (NULL); 698 ASSERT(p == pcp->prc_proc); 699 mutex_enter(&p->p_lock); 700 } 701 p->p_proc_flag |= P_PR_LOCK; 702 THREAD_KPRI_REQUEST(); 703 return (p); 704 } 705 706 /* 707 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock. 708 * This prevents any lwp of the process from disappearing and 709 * blocks most operations that a process can perform on itself. 710 * Returns 0 on success, a non-zero error number on failure. 711 * 712 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when 713 * the subject process is a zombie (ZYES) or fail for zombies (ZNO). 714 * 715 * error returns: 716 * ENOENT: process or lwp has disappeared or process is exiting 717 * (or has become a zombie and zdisp == ZNO). 718 * EAGAIN: procfs vnode has become invalid. 719 * EINTR: signal arrived while waiting for exec to complete. 720 */ 721 int 722 prlock(prnode_t *pnp, int zdisp) 723 { 724 prcommon_t *pcp; 725 proc_t *p; 726 727 again: 728 pcp = pnp->pr_common; 729 p = pr_p_lock(pnp); 730 mutex_exit(&pr_pidlock); 731 732 /* 733 * Return ENOENT immediately if there is no process. 734 */ 735 if (p == NULL) 736 return (ENOENT); 737 738 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL); 739 740 /* 741 * Return ENOENT if process entered zombie state or is exiting 742 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies. 743 */ 744 if (zdisp == ZNO && 745 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) { 746 prunlock(pnp); 747 return (ENOENT); 748 } 749 750 /* 751 * If lwp-specific, check to see if lwp has disappeared. 752 */ 753 if (pcp->prc_flags & PRC_LWP) { 754 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) || 755 pcp->prc_tslot == -1) { 756 prunlock(pnp); 757 return (ENOENT); 758 } 759 } 760 761 /* 762 * Return EAGAIN if we have encountered a security violation. 763 * (The process exec'd a set-id or unreadable executable file.) 764 */ 765 if (pnp->pr_flags & PR_INVAL) { 766 prunlock(pnp); 767 return (EAGAIN); 768 } 769 770 /* 771 * If process is undergoing an exec(), wait for 772 * completion and then start all over again. 773 */ 774 if (p->p_proc_flag & P_PR_EXEC) { 775 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */ 776 mutex_enter(&pcp->prc_mutex); 777 prunlock(pnp); 778 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) { 779 mutex_exit(&pcp->prc_mutex); 780 return (EINTR); 781 } 782 mutex_exit(&pcp->prc_mutex); 783 goto again; 784 } 785 786 /* 787 * We return holding p->p_lock. 788 */ 789 return (0); 790 } 791 792 /* 793 * Undo prlock() and pr_p_lock(). 794 * p->p_lock is still held; pr_pidlock is no longer held. 795 * 796 * prunmark() drops the P_PR_LOCK flag and wakes up another thread, 797 * if any, waiting for the flag to be dropped; it retains p->p_lock. 798 * 799 * prunlock() calls prunmark() and then drops p->p_lock. 800 */ 801 void 802 prunmark(proc_t *p) 803 { 804 ASSERT(p->p_proc_flag & P_PR_LOCK); 805 ASSERT(MUTEX_HELD(&p->p_lock)); 806 807 cv_signal(&pr_pid_cv[p->p_slot]); 808 p->p_proc_flag &= ~P_PR_LOCK; 809 THREAD_KPRI_RELEASE(); 810 } 811 812 void 813 prunlock(prnode_t *pnp) 814 { 815 prcommon_t *pcp = pnp->pr_common; 816 proc_t *p = pcp->prc_proc; 817 818 /* 819 * If we (or someone) gave it a SIGKILL, and it is not 820 * already a zombie, set it running unconditionally. 821 */ 822 if ((p->p_flag & SKILLED) && 823 !(p->p_flag & SEXITING) && 824 !(pcp->prc_flags & PRC_DESTROY) && 825 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) 826 (void) pr_setrun(pnp, 0); 827 prunmark(p); 828 mutex_exit(&p->p_lock); 829 } 830 831 /* 832 * Called while holding p->p_lock to delay until the process is unlocked. 833 * We enter holding p->p_lock; p->p_lock is dropped and reacquired. 834 * The process cannot become locked again until p->p_lock is dropped. 835 */ 836 void 837 prbarrier(proc_t *p) 838 { 839 ASSERT(MUTEX_HELD(&p->p_lock)); 840 841 if (p->p_proc_flag & P_PR_LOCK) { 842 /* The process is locked; delay until not locked */ 843 uint_t slot = p->p_slot; 844 845 while (p->p_proc_flag & P_PR_LOCK) 846 cv_wait(&pr_pid_cv[slot], &p->p_lock); 847 cv_signal(&pr_pid_cv[slot]); 848 } 849 } 850 851 /* 852 * Return process/lwp status. 853 * The u-block is mapped in by this routine and unmapped at the end. 854 */ 855 void 856 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp) 857 { 858 kthread_t *t; 859 860 ASSERT(MUTEX_HELD(&p->p_lock)); 861 862 t = prchoose(p); /* returns locked thread */ 863 ASSERT(t != NULL); 864 thread_unlock(t); 865 866 /* just bzero the process part, prgetlwpstatus() does the rest */ 867 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t)); 868 sp->pr_nlwp = p->p_lwpcnt; 869 sp->pr_nzomb = p->p_zombcnt; 870 prassignset(&sp->pr_sigpend, &p->p_sig); 871 sp->pr_brkbase = (uintptr_t)p->p_brkbase; 872 sp->pr_brksize = p->p_brksize; 873 sp->pr_stkbase = (uintptr_t)prgetstackbase(p); 874 sp->pr_stksize = p->p_stksize; 875 sp->pr_pid = p->p_pid; 876 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 877 (p->p_flag & SZONETOP)) { 878 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 879 /* 880 * Inside local zones, fake zsched's pid as parent pids for 881 * processes which reference processes outside of the zone. 882 */ 883 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 884 } else { 885 sp->pr_ppid = p->p_ppid; 886 } 887 sp->pr_pgid = p->p_pgrp; 888 sp->pr_sid = p->p_sessp->s_sid; 889 sp->pr_taskid = p->p_task->tk_tkid; 890 sp->pr_projid = p->p_task->tk_proj->kpj_id; 891 sp->pr_zoneid = p->p_zone->zone_id; 892 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 893 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 894 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime); 895 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime); 896 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 897 prassignset(&sp->pr_flttrace, &p->p_fltmask); 898 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 899 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 900 switch (p->p_model) { 901 case DATAMODEL_ILP32: 902 sp->pr_dmodel = PR_MODEL_ILP32; 903 break; 904 case DATAMODEL_LP64: 905 sp->pr_dmodel = PR_MODEL_LP64; 906 break; 907 } 908 if (p->p_agenttp) 909 sp->pr_agentid = p->p_agenttp->t_tid; 910 911 /* get the chosen lwp's status */ 912 prgetlwpstatus(t, &sp->pr_lwp, zp); 913 914 /* replicate the flags */ 915 sp->pr_flags = sp->pr_lwp.pr_flags; 916 } 917 918 #ifdef _SYSCALL32_IMPL 919 void 920 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp) 921 { 922 proc_t *p = ttoproc(t); 923 klwp_t *lwp = ttolwp(t); 924 struct mstate *ms = &lwp->lwp_mstate; 925 hrtime_t usr, sys; 926 int flags; 927 ulong_t instr; 928 929 ASSERT(MUTEX_HELD(&p->p_lock)); 930 931 bzero(sp, sizeof (*sp)); 932 flags = 0L; 933 if (t->t_state == TS_STOPPED) { 934 flags |= PR_STOPPED; 935 if ((t->t_schedflag & TS_PSTART) == 0) 936 flags |= PR_ISTOP; 937 } else if (VSTOPPED(t)) { 938 flags |= PR_STOPPED|PR_ISTOP; 939 } 940 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 941 flags |= PR_DSTOP; 942 if (lwp->lwp_asleep) 943 flags |= PR_ASLEEP; 944 if (t == p->p_agenttp) 945 flags |= PR_AGENT; 946 if (!(t->t_proc_flag & TP_TWAIT)) 947 flags |= PR_DETACH; 948 if (t->t_proc_flag & TP_DAEMON) 949 flags |= PR_DAEMON; 950 if (p->p_proc_flag & P_PR_FORK) 951 flags |= PR_FORK; 952 if (p->p_proc_flag & P_PR_RUNLCL) 953 flags |= PR_RLC; 954 if (p->p_proc_flag & P_PR_KILLCL) 955 flags |= PR_KLC; 956 if (p->p_proc_flag & P_PR_ASYNC) 957 flags |= PR_ASYNC; 958 if (p->p_proc_flag & P_PR_BPTADJ) 959 flags |= PR_BPTADJ; 960 if (p->p_proc_flag & P_PR_PTRACE) 961 flags |= PR_PTRACE; 962 if (p->p_flag & SMSACCT) 963 flags |= PR_MSACCT; 964 if (p->p_flag & SMSFORK) 965 flags |= PR_MSFORK; 966 if (p->p_flag & SVFWAIT) 967 flags |= PR_VFORKP; 968 sp->pr_flags = flags; 969 if (VSTOPPED(t)) { 970 sp->pr_why = PR_REQUESTED; 971 sp->pr_what = 0; 972 } else { 973 sp->pr_why = t->t_whystop; 974 sp->pr_what = t->t_whatstop; 975 } 976 sp->pr_lwpid = t->t_tid; 977 sp->pr_cursig = lwp->lwp_cursig; 978 prassignset(&sp->pr_lwppend, &t->t_sig); 979 schedctl_finish_sigblock(t); 980 prassignset(&sp->pr_lwphold, &t->t_hold); 981 if (t->t_whystop == PR_FAULTED) { 982 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info); 983 if (t->t_whatstop == FLTPAGE) 984 sp->pr_info.si_addr = 985 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr; 986 } else if (lwp->lwp_curinfo) 987 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info); 988 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 989 sp->pr_info.si_zoneid != zp->zone_id) { 990 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 991 sp->pr_info.si_uid = 0; 992 sp->pr_info.si_ctid = -1; 993 sp->pr_info.si_zoneid = zp->zone_id; 994 } 995 sp->pr_altstack.ss_sp = 996 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; 997 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size; 998 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags; 999 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1000 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext; 1001 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack; 1002 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1003 sizeof (sp->pr_clname) - 1); 1004 if (flags & PR_STOPPED) 1005 hrt2ts32(t->t_stoptime, &sp->pr_tstamp); 1006 usr = ms->ms_acct[LMS_USER]; 1007 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1008 scalehrtime(&usr); 1009 scalehrtime(&sys); 1010 hrt2ts32(usr, &sp->pr_utime); 1011 hrt2ts32(sys, &sp->pr_stime); 1012 1013 /* 1014 * Fetch the current instruction, if not a system process. 1015 * We don't attempt this unless the lwp is stopped. 1016 */ 1017 if ((p->p_flag & SSYS) || p->p_as == &kas) 1018 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1019 else if (!(flags & PR_STOPPED)) 1020 sp->pr_flags |= PR_PCINVAL; 1021 else if (!prfetchinstr(lwp, &instr)) 1022 sp->pr_flags |= PR_PCINVAL; 1023 else 1024 sp->pr_instr = (uint32_t)instr; 1025 1026 /* 1027 * Drop p_lock while touching the lwp's stack. 1028 */ 1029 mutex_exit(&p->p_lock); 1030 if (prisstep(lwp)) 1031 sp->pr_flags |= PR_STEP; 1032 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1033 int i; 1034 1035 sp->pr_syscall = get_syscall32_args(lwp, 1036 (int *)sp->pr_sysarg, &i); 1037 sp->pr_nsysarg = (ushort_t)i; 1038 } 1039 if ((flags & PR_STOPPED) || t == curthread) 1040 prgetprregs32(lwp, sp->pr_reg); 1041 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1042 (flags & PR_VFORKP)) { 1043 long r1, r2; 1044 user_t *up; 1045 auxv_t *auxp; 1046 int i; 1047 1048 sp->pr_errno = prgetrvals(lwp, &r1, &r2); 1049 if (sp->pr_errno == 0) { 1050 sp->pr_rval1 = (int32_t)r1; 1051 sp->pr_rval2 = (int32_t)r2; 1052 sp->pr_errpriv = PRIV_NONE; 1053 } else 1054 sp->pr_errpriv = lwp->lwp_badpriv; 1055 1056 if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) { 1057 up = PTOU(p); 1058 sp->pr_sysarg[0] = 0; 1059 sp->pr_sysarg[1] = (caddr32_t)up->u_argv; 1060 sp->pr_sysarg[2] = (caddr32_t)up->u_envp; 1061 for (i = 0, auxp = up->u_auxv; 1062 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1063 i++, auxp++) { 1064 if (auxp->a_type == AT_SUN_EXECNAME) { 1065 sp->pr_sysarg[0] = 1066 (caddr32_t) 1067 (uintptr_t)auxp->a_un.a_ptr; 1068 break; 1069 } 1070 } 1071 } 1072 } 1073 if (prhasfp()) 1074 prgetprfpregs32(lwp, &sp->pr_fpreg); 1075 mutex_enter(&p->p_lock); 1076 } 1077 1078 void 1079 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp) 1080 { 1081 kthread_t *t; 1082 1083 ASSERT(MUTEX_HELD(&p->p_lock)); 1084 1085 t = prchoose(p); /* returns locked thread */ 1086 ASSERT(t != NULL); 1087 thread_unlock(t); 1088 1089 /* just bzero the process part, prgetlwpstatus32() does the rest */ 1090 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t)); 1091 sp->pr_nlwp = p->p_lwpcnt; 1092 sp->pr_nzomb = p->p_zombcnt; 1093 prassignset(&sp->pr_sigpend, &p->p_sig); 1094 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase; 1095 sp->pr_brksize = (uint32_t)p->p_brksize; 1096 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p); 1097 sp->pr_stksize = (uint32_t)p->p_stksize; 1098 sp->pr_pid = p->p_pid; 1099 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 1100 (p->p_flag & SZONETOP)) { 1101 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 1102 /* 1103 * Inside local zones, fake zsched's pid as parent pids for 1104 * processes which reference processes outside of the zone. 1105 */ 1106 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 1107 } else { 1108 sp->pr_ppid = p->p_ppid; 1109 } 1110 sp->pr_pgid = p->p_pgrp; 1111 sp->pr_sid = p->p_sessp->s_sid; 1112 sp->pr_taskid = p->p_task->tk_tkid; 1113 sp->pr_projid = p->p_task->tk_proj->kpj_id; 1114 sp->pr_zoneid = p->p_zone->zone_id; 1115 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 1116 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 1117 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime); 1118 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime); 1119 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 1120 prassignset(&sp->pr_flttrace, &p->p_fltmask); 1121 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 1122 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 1123 switch (p->p_model) { 1124 case DATAMODEL_ILP32: 1125 sp->pr_dmodel = PR_MODEL_ILP32; 1126 break; 1127 case DATAMODEL_LP64: 1128 sp->pr_dmodel = PR_MODEL_LP64; 1129 break; 1130 } 1131 if (p->p_agenttp) 1132 sp->pr_agentid = p->p_agenttp->t_tid; 1133 1134 /* get the chosen lwp's status */ 1135 prgetlwpstatus32(t, &sp->pr_lwp, zp); 1136 1137 /* replicate the flags */ 1138 sp->pr_flags = sp->pr_lwp.pr_flags; 1139 } 1140 #endif /* _SYSCALL32_IMPL */ 1141 1142 /* 1143 * Return lwp status. 1144 */ 1145 void 1146 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp) 1147 { 1148 proc_t *p = ttoproc(t); 1149 klwp_t *lwp = ttolwp(t); 1150 struct mstate *ms = &lwp->lwp_mstate; 1151 hrtime_t usr, sys; 1152 int flags; 1153 ulong_t instr; 1154 1155 ASSERT(MUTEX_HELD(&p->p_lock)); 1156 1157 bzero(sp, sizeof (*sp)); 1158 flags = 0L; 1159 if (t->t_state == TS_STOPPED) { 1160 flags |= PR_STOPPED; 1161 if ((t->t_schedflag & TS_PSTART) == 0) 1162 flags |= PR_ISTOP; 1163 } else if (VSTOPPED(t)) { 1164 flags |= PR_STOPPED|PR_ISTOP; 1165 } 1166 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 1167 flags |= PR_DSTOP; 1168 if (lwp->lwp_asleep) 1169 flags |= PR_ASLEEP; 1170 if (t == p->p_agenttp) 1171 flags |= PR_AGENT; 1172 if (!(t->t_proc_flag & TP_TWAIT)) 1173 flags |= PR_DETACH; 1174 if (t->t_proc_flag & TP_DAEMON) 1175 flags |= PR_DAEMON; 1176 if (p->p_proc_flag & P_PR_FORK) 1177 flags |= PR_FORK; 1178 if (p->p_proc_flag & P_PR_RUNLCL) 1179 flags |= PR_RLC; 1180 if (p->p_proc_flag & P_PR_KILLCL) 1181 flags |= PR_KLC; 1182 if (p->p_proc_flag & P_PR_ASYNC) 1183 flags |= PR_ASYNC; 1184 if (p->p_proc_flag & P_PR_BPTADJ) 1185 flags |= PR_BPTADJ; 1186 if (p->p_proc_flag & P_PR_PTRACE) 1187 flags |= PR_PTRACE; 1188 if (p->p_flag & SMSACCT) 1189 flags |= PR_MSACCT; 1190 if (p->p_flag & SMSFORK) 1191 flags |= PR_MSFORK; 1192 if (p->p_flag & SVFWAIT) 1193 flags |= PR_VFORKP; 1194 if (p->p_pgidp->pid_pgorphaned) 1195 flags |= PR_ORPHAN; 1196 if (p->p_pidflag & CLDNOSIGCHLD) 1197 flags |= PR_NOSIGCHLD; 1198 if (p->p_pidflag & CLDWAITPID) 1199 flags |= PR_WAITPID; 1200 sp->pr_flags = flags; 1201 if (VSTOPPED(t)) { 1202 sp->pr_why = PR_REQUESTED; 1203 sp->pr_what = 0; 1204 } else { 1205 sp->pr_why = t->t_whystop; 1206 sp->pr_what = t->t_whatstop; 1207 } 1208 sp->pr_lwpid = t->t_tid; 1209 sp->pr_cursig = lwp->lwp_cursig; 1210 prassignset(&sp->pr_lwppend, &t->t_sig); 1211 schedctl_finish_sigblock(t); 1212 prassignset(&sp->pr_lwphold, &t->t_hold); 1213 if (t->t_whystop == PR_FAULTED) 1214 bcopy(&lwp->lwp_siginfo, 1215 &sp->pr_info, sizeof (k_siginfo_t)); 1216 else if (lwp->lwp_curinfo) 1217 bcopy(&lwp->lwp_curinfo->sq_info, 1218 &sp->pr_info, sizeof (k_siginfo_t)); 1219 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1220 sp->pr_info.si_zoneid != zp->zone_id) { 1221 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1222 sp->pr_info.si_uid = 0; 1223 sp->pr_info.si_ctid = -1; 1224 sp->pr_info.si_zoneid = zp->zone_id; 1225 } 1226 sp->pr_altstack = lwp->lwp_sigaltstack; 1227 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1228 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext; 1229 sp->pr_ustack = lwp->lwp_ustack; 1230 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1231 sizeof (sp->pr_clname) - 1); 1232 if (flags & PR_STOPPED) 1233 hrt2ts(t->t_stoptime, &sp->pr_tstamp); 1234 usr = ms->ms_acct[LMS_USER]; 1235 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1236 scalehrtime(&usr); 1237 scalehrtime(&sys); 1238 hrt2ts(usr, &sp->pr_utime); 1239 hrt2ts(sys, &sp->pr_stime); 1240 1241 /* 1242 * Fetch the current instruction, if not a system process. 1243 * We don't attempt this unless the lwp is stopped. 1244 */ 1245 if ((p->p_flag & SSYS) || p->p_as == &kas) 1246 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1247 else if (!(flags & PR_STOPPED)) 1248 sp->pr_flags |= PR_PCINVAL; 1249 else if (!prfetchinstr(lwp, &instr)) 1250 sp->pr_flags |= PR_PCINVAL; 1251 else 1252 sp->pr_instr = instr; 1253 1254 /* 1255 * Drop p_lock while touching the lwp's stack. 1256 */ 1257 mutex_exit(&p->p_lock); 1258 if (prisstep(lwp)) 1259 sp->pr_flags |= PR_STEP; 1260 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1261 int i; 1262 1263 sp->pr_syscall = get_syscall_args(lwp, 1264 (long *)sp->pr_sysarg, &i); 1265 sp->pr_nsysarg = (ushort_t)i; 1266 } 1267 if ((flags & PR_STOPPED) || t == curthread) 1268 prgetprregs(lwp, sp->pr_reg); 1269 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1270 (flags & PR_VFORKP)) { 1271 user_t *up; 1272 auxv_t *auxp; 1273 int i; 1274 1275 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2); 1276 if (sp->pr_errno == 0) 1277 sp->pr_errpriv = PRIV_NONE; 1278 else 1279 sp->pr_errpriv = lwp->lwp_badpriv; 1280 1281 if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) { 1282 up = PTOU(p); 1283 sp->pr_sysarg[0] = 0; 1284 sp->pr_sysarg[1] = (uintptr_t)up->u_argv; 1285 sp->pr_sysarg[2] = (uintptr_t)up->u_envp; 1286 for (i = 0, auxp = up->u_auxv; 1287 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1288 i++, auxp++) { 1289 if (auxp->a_type == AT_SUN_EXECNAME) { 1290 sp->pr_sysarg[0] = 1291 (uintptr_t)auxp->a_un.a_ptr; 1292 break; 1293 } 1294 } 1295 } 1296 } 1297 if (prhasfp()) 1298 prgetprfpregs(lwp, &sp->pr_fpreg); 1299 mutex_enter(&p->p_lock); 1300 } 1301 1302 /* 1303 * Get the sigaction structure for the specified signal. The u-block 1304 * must already have been mapped in by the caller. 1305 */ 1306 void 1307 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp) 1308 { 1309 bzero(sp, sizeof (*sp)); 1310 1311 if (sig != 0 && (unsigned)sig < NSIG) { 1312 sp->sa_handler = up->u_signal[sig-1]; 1313 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1314 if (sigismember(&up->u_sigonstack, sig)) 1315 sp->sa_flags |= SA_ONSTACK; 1316 if (sigismember(&up->u_sigresethand, sig)) 1317 sp->sa_flags |= SA_RESETHAND; 1318 if (sigismember(&up->u_sigrestart, sig)) 1319 sp->sa_flags |= SA_RESTART; 1320 if (sigismember(&p->p_siginfo, sig)) 1321 sp->sa_flags |= SA_SIGINFO; 1322 if (sigismember(&up->u_signodefer, sig)) 1323 sp->sa_flags |= SA_NODEFER; 1324 if (sig == SIGCLD) { 1325 if (p->p_flag & SNOWAIT) 1326 sp->sa_flags |= SA_NOCLDWAIT; 1327 if ((p->p_flag & SJCTL) == 0) 1328 sp->sa_flags |= SA_NOCLDSTOP; 1329 } 1330 } 1331 } 1332 1333 #ifdef _SYSCALL32_IMPL 1334 void 1335 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp) 1336 { 1337 bzero(sp, sizeof (*sp)); 1338 1339 if (sig != 0 && (unsigned)sig < NSIG) { 1340 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1]; 1341 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1342 if (sigismember(&up->u_sigonstack, sig)) 1343 sp->sa_flags |= SA_ONSTACK; 1344 if (sigismember(&up->u_sigresethand, sig)) 1345 sp->sa_flags |= SA_RESETHAND; 1346 if (sigismember(&up->u_sigrestart, sig)) 1347 sp->sa_flags |= SA_RESTART; 1348 if (sigismember(&p->p_siginfo, sig)) 1349 sp->sa_flags |= SA_SIGINFO; 1350 if (sigismember(&up->u_signodefer, sig)) 1351 sp->sa_flags |= SA_NODEFER; 1352 if (sig == SIGCLD) { 1353 if (p->p_flag & SNOWAIT) 1354 sp->sa_flags |= SA_NOCLDWAIT; 1355 if ((p->p_flag & SJCTL) == 0) 1356 sp->sa_flags |= SA_NOCLDSTOP; 1357 } 1358 } 1359 } 1360 #endif /* _SYSCALL32_IMPL */ 1361 1362 /* 1363 * Count the number of segments in this process's address space. 1364 */ 1365 int 1366 prnsegs(struct as *as, int reserved) 1367 { 1368 int n = 0; 1369 struct seg *seg; 1370 1371 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1372 1373 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1374 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1375 caddr_t saddr, naddr; 1376 void *tmp = NULL; 1377 1378 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1379 (void) pr_getprot(seg, reserved, &tmp, 1380 &saddr, &naddr, eaddr); 1381 if (saddr != naddr) 1382 n++; 1383 } 1384 1385 ASSERT(tmp == NULL); 1386 } 1387 1388 return (n); 1389 } 1390 1391 /* 1392 * Convert uint32_t to decimal string w/o leading zeros. 1393 * Add trailing null characters if 'len' is greater than string length. 1394 * Return the string length. 1395 */ 1396 int 1397 pr_u32tos(uint32_t n, char *s, int len) 1398 { 1399 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */ 1400 char *cp = cbuf; 1401 char *end = s + len; 1402 1403 do { 1404 *cp++ = (char)(n % 10 + '0'); 1405 n /= 10; 1406 } while (n); 1407 1408 len = (int)(cp - cbuf); 1409 1410 do { 1411 *s++ = *--cp; 1412 } while (cp > cbuf); 1413 1414 while (s < end) /* optional pad */ 1415 *s++ = '\0'; 1416 1417 return (len); 1418 } 1419 1420 /* 1421 * Convert uint64_t to decimal string w/o leading zeros. 1422 * Return the string length. 1423 */ 1424 static int 1425 pr_u64tos(uint64_t n, char *s) 1426 { 1427 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */ 1428 char *cp = cbuf; 1429 int len; 1430 1431 do { 1432 *cp++ = (char)(n % 10 + '0'); 1433 n /= 10; 1434 } while (n); 1435 1436 len = (int)(cp - cbuf); 1437 1438 do { 1439 *s++ = *--cp; 1440 } while (cp > cbuf); 1441 1442 return (len); 1443 } 1444 1445 void 1446 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr) 1447 { 1448 char *s = name; 1449 struct vfs *vfsp; 1450 struct vfssw *vfsswp; 1451 1452 if ((vfsp = vp->v_vfsp) != NULL && 1453 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) && 1454 *vfsswp->vsw_name) { 1455 (void) strcpy(s, vfsswp->vsw_name); 1456 s += strlen(s); 1457 *s++ = '.'; 1458 } 1459 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0); 1460 *s++ = '.'; 1461 s += pr_u32tos(getminor(vattr->va_fsid), s, 0); 1462 *s++ = '.'; 1463 s += pr_u64tos(vattr->va_nodeid, s); 1464 *s++ = '\0'; 1465 } 1466 1467 struct seg * 1468 break_seg(proc_t *p) 1469 { 1470 caddr_t addr = p->p_brkbase; 1471 struct seg *seg; 1472 struct vnode *vp; 1473 1474 if (p->p_brksize != 0) 1475 addr += p->p_brksize - 1; 1476 seg = as_segat(p->p_as, addr); 1477 if (seg != NULL && seg->s_ops == &segvn_ops && 1478 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)) 1479 return (seg); 1480 return (NULL); 1481 } 1482 1483 /* 1484 * Implementation of service functions to handle procfs generic chained 1485 * copyout buffers. 1486 */ 1487 typedef struct pr_iobuf_list { 1488 list_node_t piol_link; /* buffer linkage */ 1489 size_t piol_size; /* total size (header + data) */ 1490 size_t piol_usedsize; /* amount to copy out from this buf */ 1491 } piol_t; 1492 1493 #define MAPSIZE (64 * 1024) 1494 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1])) 1495 1496 void 1497 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n) 1498 { 1499 piol_t *iol; 1500 size_t initial_size = MIN(1, n) * itemsize; 1501 1502 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link)); 1503 1504 ASSERT(list_head(iolhead) == NULL); 1505 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1506 ASSERT(initial_size > 0); 1507 1508 /* 1509 * Someone creating chained copyout buffers may ask for less than 1510 * MAPSIZE if the amount of data to be buffered is known to be 1511 * smaller than that. 1512 * But in order to prevent involuntary self-denial of service, 1513 * the requested input size is clamped at MAPSIZE. 1514 */ 1515 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol)); 1516 iol = kmem_alloc(initial_size, KM_SLEEP); 1517 list_insert_head(iolhead, iol); 1518 iol->piol_usedsize = 0; 1519 iol->piol_size = initial_size; 1520 } 1521 1522 void * 1523 pr_iol_newbuf(list_t *iolhead, size_t itemsize) 1524 { 1525 piol_t *iol; 1526 char *new; 1527 1528 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1529 ASSERT(list_head(iolhead) != NULL); 1530 1531 iol = (piol_t *)list_tail(iolhead); 1532 1533 if (iol->piol_size < 1534 iol->piol_usedsize + sizeof (*iol) + itemsize) { 1535 /* 1536 * Out of space in the current buffer. Allocate more. 1537 */ 1538 piol_t *newiol; 1539 1540 newiol = kmem_alloc(MAPSIZE, KM_SLEEP); 1541 newiol->piol_size = MAPSIZE; 1542 newiol->piol_usedsize = 0; 1543 1544 list_insert_after(iolhead, iol, newiol); 1545 iol = list_next(iolhead, iol); 1546 ASSERT(iol == newiol); 1547 } 1548 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize; 1549 iol->piol_usedsize += itemsize; 1550 bzero(new, itemsize); 1551 return (new); 1552 } 1553 1554 int 1555 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin) 1556 { 1557 int error = errin; 1558 piol_t *iol; 1559 1560 while ((iol = list_head(iolhead)) != NULL) { 1561 list_remove(iolhead, iol); 1562 if (!error) { 1563 if (copyout(PIOL_DATABUF(iol), *tgt, 1564 iol->piol_usedsize)) 1565 error = EFAULT; 1566 *tgt += iol->piol_usedsize; 1567 } 1568 kmem_free(iol, iol->piol_size); 1569 } 1570 list_destroy(iolhead); 1571 1572 return (error); 1573 } 1574 1575 int 1576 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin) 1577 { 1578 offset_t off = uiop->uio_offset; 1579 char *base; 1580 size_t size; 1581 piol_t *iol; 1582 int error = errin; 1583 1584 while ((iol = list_head(iolhead)) != NULL) { 1585 list_remove(iolhead, iol); 1586 base = PIOL_DATABUF(iol); 1587 size = iol->piol_usedsize; 1588 if (off <= size && error == 0 && uiop->uio_resid > 0) 1589 error = uiomove(base + off, size - off, 1590 UIO_READ, uiop); 1591 off = MAX(0, off - (offset_t)size); 1592 kmem_free(iol, iol->piol_size); 1593 } 1594 list_destroy(iolhead); 1595 1596 return (error); 1597 } 1598 1599 /* 1600 * Return an array of structures with memory map information. 1601 * We allocate here; the caller must deallocate. 1602 */ 1603 int 1604 prgetmap(proc_t *p, int reserved, list_t *iolhead) 1605 { 1606 struct as *as = p->p_as; 1607 prmap_t *mp; 1608 struct seg *seg; 1609 struct seg *brkseg, *stkseg; 1610 struct vnode *vp; 1611 struct vattr vattr; 1612 uint_t prot; 1613 1614 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1615 1616 /* 1617 * Request an initial buffer size that doesn't waste memory 1618 * if the address space has only a small number of segments. 1619 */ 1620 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1621 1622 if ((seg = AS_SEGFIRST(as)) == NULL) 1623 return (0); 1624 1625 brkseg = break_seg(p); 1626 stkseg = as_segat(as, prgetstackbase(p)); 1627 1628 do { 1629 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1630 caddr_t saddr, naddr; 1631 void *tmp = NULL; 1632 1633 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1634 prot = pr_getprot(seg, reserved, &tmp, 1635 &saddr, &naddr, eaddr); 1636 if (saddr == naddr) 1637 continue; 1638 1639 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1640 1641 mp->pr_vaddr = (uintptr_t)saddr; 1642 mp->pr_size = naddr - saddr; 1643 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1644 mp->pr_mflags = 0; 1645 if (prot & PROT_READ) 1646 mp->pr_mflags |= MA_READ; 1647 if (prot & PROT_WRITE) 1648 mp->pr_mflags |= MA_WRITE; 1649 if (prot & PROT_EXEC) 1650 mp->pr_mflags |= MA_EXEC; 1651 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1652 mp->pr_mflags |= MA_SHARED; 1653 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1654 mp->pr_mflags |= MA_NORESERVE; 1655 if (seg->s_ops == &segspt_shmops || 1656 (seg->s_ops == &segvn_ops && 1657 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1658 mp->pr_mflags |= MA_ANON; 1659 if (seg == brkseg) 1660 mp->pr_mflags |= MA_BREAK; 1661 else if (seg == stkseg) { 1662 mp->pr_mflags |= MA_STACK; 1663 if (reserved) { 1664 size_t maxstack = 1665 ((size_t)p->p_stk_ctl + 1666 PAGEOFFSET) & PAGEMASK; 1667 mp->pr_vaddr = 1668 (uintptr_t)prgetstackbase(p) + 1669 p->p_stksize - maxstack; 1670 mp->pr_size = (uintptr_t)naddr - 1671 mp->pr_vaddr; 1672 } 1673 } 1674 if (seg->s_ops == &segspt_shmops) 1675 mp->pr_mflags |= MA_ISM | MA_SHM; 1676 mp->pr_pagesize = PAGESIZE; 1677 1678 /* 1679 * Manufacture a filename for the "object" directory. 1680 */ 1681 vattr.va_mask = AT_FSID|AT_NODEID; 1682 if (seg->s_ops == &segvn_ops && 1683 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1684 vp != NULL && vp->v_type == VREG && 1685 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 1686 if (vp == p->p_exec) 1687 (void) strcpy(mp->pr_mapname, "a.out"); 1688 else 1689 pr_object_name(mp->pr_mapname, 1690 vp, &vattr); 1691 } 1692 1693 /* 1694 * Get the SysV shared memory id, if any. 1695 */ 1696 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1697 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1698 SHMID_NONE) { 1699 if (mp->pr_shmid == SHMID_FREE) 1700 mp->pr_shmid = -1; 1701 1702 mp->pr_mflags |= MA_SHM; 1703 } else { 1704 mp->pr_shmid = -1; 1705 } 1706 } 1707 ASSERT(tmp == NULL); 1708 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1709 1710 return (0); 1711 } 1712 1713 #ifdef _SYSCALL32_IMPL 1714 int 1715 prgetmap32(proc_t *p, int reserved, list_t *iolhead) 1716 { 1717 struct as *as = p->p_as; 1718 prmap32_t *mp; 1719 struct seg *seg; 1720 struct seg *brkseg, *stkseg; 1721 struct vnode *vp; 1722 struct vattr vattr; 1723 uint_t prot; 1724 1725 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1726 1727 /* 1728 * Request an initial buffer size that doesn't waste memory 1729 * if the address space has only a small number of segments. 1730 */ 1731 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1732 1733 if ((seg = AS_SEGFIRST(as)) == NULL) 1734 return (0); 1735 1736 brkseg = break_seg(p); 1737 stkseg = as_segat(as, prgetstackbase(p)); 1738 1739 do { 1740 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1741 caddr_t saddr, naddr; 1742 void *tmp = NULL; 1743 1744 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1745 prot = pr_getprot(seg, reserved, &tmp, 1746 &saddr, &naddr, eaddr); 1747 if (saddr == naddr) 1748 continue; 1749 1750 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1751 1752 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 1753 mp->pr_size = (size32_t)(naddr - saddr); 1754 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1755 mp->pr_mflags = 0; 1756 if (prot & PROT_READ) 1757 mp->pr_mflags |= MA_READ; 1758 if (prot & PROT_WRITE) 1759 mp->pr_mflags |= MA_WRITE; 1760 if (prot & PROT_EXEC) 1761 mp->pr_mflags |= MA_EXEC; 1762 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1763 mp->pr_mflags |= MA_SHARED; 1764 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1765 mp->pr_mflags |= MA_NORESERVE; 1766 if (seg->s_ops == &segspt_shmops || 1767 (seg->s_ops == &segvn_ops && 1768 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1769 mp->pr_mflags |= MA_ANON; 1770 if (seg == brkseg) 1771 mp->pr_mflags |= MA_BREAK; 1772 else if (seg == stkseg) { 1773 mp->pr_mflags |= MA_STACK; 1774 if (reserved) { 1775 size_t maxstack = 1776 ((size_t)p->p_stk_ctl + 1777 PAGEOFFSET) & PAGEMASK; 1778 uintptr_t vaddr = 1779 (uintptr_t)prgetstackbase(p) + 1780 p->p_stksize - maxstack; 1781 mp->pr_vaddr = (caddr32_t)vaddr; 1782 mp->pr_size = (size32_t) 1783 ((uintptr_t)naddr - vaddr); 1784 } 1785 } 1786 if (seg->s_ops == &segspt_shmops) 1787 mp->pr_mflags |= MA_ISM | MA_SHM; 1788 mp->pr_pagesize = PAGESIZE; 1789 1790 /* 1791 * Manufacture a filename for the "object" directory. 1792 */ 1793 vattr.va_mask = AT_FSID|AT_NODEID; 1794 if (seg->s_ops == &segvn_ops && 1795 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1796 vp != NULL && vp->v_type == VREG && 1797 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 1798 if (vp == p->p_exec) 1799 (void) strcpy(mp->pr_mapname, "a.out"); 1800 else 1801 pr_object_name(mp->pr_mapname, 1802 vp, &vattr); 1803 } 1804 1805 /* 1806 * Get the SysV shared memory id, if any. 1807 */ 1808 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1809 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1810 SHMID_NONE) { 1811 if (mp->pr_shmid == SHMID_FREE) 1812 mp->pr_shmid = -1; 1813 1814 mp->pr_mflags |= MA_SHM; 1815 } else { 1816 mp->pr_shmid = -1; 1817 } 1818 } 1819 ASSERT(tmp == NULL); 1820 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1821 1822 return (0); 1823 } 1824 #endif /* _SYSCALL32_IMPL */ 1825 1826 /* 1827 * Return the size of the /proc page data file. 1828 */ 1829 size_t 1830 prpdsize(struct as *as) 1831 { 1832 struct seg *seg; 1833 size_t size; 1834 1835 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1836 1837 if ((seg = AS_SEGFIRST(as)) == NULL) 1838 return (0); 1839 1840 size = sizeof (prpageheader_t); 1841 do { 1842 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1843 caddr_t saddr, naddr; 1844 void *tmp = NULL; 1845 size_t npage; 1846 1847 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1848 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1849 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1850 size += sizeof (prasmap_t) + round8(npage); 1851 } 1852 ASSERT(tmp == NULL); 1853 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1854 1855 return (size); 1856 } 1857 1858 #ifdef _SYSCALL32_IMPL 1859 size_t 1860 prpdsize32(struct as *as) 1861 { 1862 struct seg *seg; 1863 size_t size; 1864 1865 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1866 1867 if ((seg = AS_SEGFIRST(as)) == NULL) 1868 return (0); 1869 1870 size = sizeof (prpageheader32_t); 1871 do { 1872 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1873 caddr_t saddr, naddr; 1874 void *tmp = NULL; 1875 size_t npage; 1876 1877 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1878 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1879 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1880 size += sizeof (prasmap32_t) + round8(npage); 1881 } 1882 ASSERT(tmp == NULL); 1883 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1884 1885 return (size); 1886 } 1887 #endif /* _SYSCALL32_IMPL */ 1888 1889 /* 1890 * Read page data information. 1891 */ 1892 int 1893 prpdread(proc_t *p, uint_t hatid, struct uio *uiop) 1894 { 1895 struct as *as = p->p_as; 1896 caddr_t buf; 1897 size_t size; 1898 prpageheader_t *php; 1899 prasmap_t *pmp; 1900 struct seg *seg; 1901 int error; 1902 1903 again: 1904 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1905 1906 if ((seg = AS_SEGFIRST(as)) == NULL) { 1907 AS_LOCK_EXIT(as, &as->a_lock); 1908 return (0); 1909 } 1910 size = prpdsize(as); 1911 if (uiop->uio_resid < size) { 1912 AS_LOCK_EXIT(as, &as->a_lock); 1913 return (E2BIG); 1914 } 1915 1916 buf = kmem_zalloc(size, KM_SLEEP); 1917 php = (prpageheader_t *)buf; 1918 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t)); 1919 1920 hrt2ts(gethrtime(), &php->pr_tstamp); 1921 php->pr_nmap = 0; 1922 php->pr_npage = 0; 1923 do { 1924 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1925 caddr_t saddr, naddr; 1926 void *tmp = NULL; 1927 1928 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1929 struct vnode *vp; 1930 struct vattr vattr; 1931 size_t len; 1932 size_t npage; 1933 uint_t prot; 1934 uintptr_t next; 1935 1936 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1937 if ((len = (size_t)(naddr - saddr)) == 0) 1938 continue; 1939 npage = len / PAGESIZE; 1940 next = (uintptr_t)(pmp + 1) + round8(npage); 1941 /* 1942 * It's possible that the address space can change 1943 * subtlely even though we're holding as->a_lock 1944 * due to the nondeterminism of page_exists() in 1945 * the presence of asychronously flushed pages or 1946 * mapped files whose sizes are changing. 1947 * page_exists() may be called indirectly from 1948 * pr_getprot() by a SEGOP_INCORE() routine. 1949 * If this happens we need to make sure we don't 1950 * overrun the buffer whose size we computed based 1951 * on the initial iteration through the segments. 1952 * Once we've detected an overflow, we need to clean 1953 * up the temporary memory allocated in pr_getprot() 1954 * and retry. If there's a pending signal, we return 1955 * EINTR so that this thread can be dislodged if 1956 * a latent bug causes us to spin indefinitely. 1957 */ 1958 if (next > (uintptr_t)buf + size) { 1959 pr_getprot_done(&tmp); 1960 AS_LOCK_EXIT(as, &as->a_lock); 1961 1962 kmem_free(buf, size); 1963 1964 if (ISSIG(curthread, JUSTLOOKING)) 1965 return (EINTR); 1966 1967 goto again; 1968 } 1969 1970 php->pr_nmap++; 1971 php->pr_npage += npage; 1972 pmp->pr_vaddr = (uintptr_t)saddr; 1973 pmp->pr_npage = npage; 1974 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1975 pmp->pr_mflags = 0; 1976 if (prot & PROT_READ) 1977 pmp->pr_mflags |= MA_READ; 1978 if (prot & PROT_WRITE) 1979 pmp->pr_mflags |= MA_WRITE; 1980 if (prot & PROT_EXEC) 1981 pmp->pr_mflags |= MA_EXEC; 1982 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1983 pmp->pr_mflags |= MA_SHARED; 1984 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1985 pmp->pr_mflags |= MA_NORESERVE; 1986 if (seg->s_ops == &segspt_shmops || 1987 (seg->s_ops == &segvn_ops && 1988 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1989 pmp->pr_mflags |= MA_ANON; 1990 if (seg->s_ops == &segspt_shmops) 1991 pmp->pr_mflags |= MA_ISM | MA_SHM; 1992 pmp->pr_pagesize = PAGESIZE; 1993 /* 1994 * Manufacture a filename for the "object" directory. 1995 */ 1996 vattr.va_mask = AT_FSID|AT_NODEID; 1997 if (seg->s_ops == &segvn_ops && 1998 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1999 vp != NULL && vp->v_type == VREG && 2000 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2001 if (vp == p->p_exec) 2002 (void) strcpy(pmp->pr_mapname, "a.out"); 2003 else 2004 pr_object_name(pmp->pr_mapname, 2005 vp, &vattr); 2006 } 2007 2008 /* 2009 * Get the SysV shared memory id, if any. 2010 */ 2011 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2012 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2013 SHMID_NONE) { 2014 if (pmp->pr_shmid == SHMID_FREE) 2015 pmp->pr_shmid = -1; 2016 2017 pmp->pr_mflags |= MA_SHM; 2018 } else { 2019 pmp->pr_shmid = -1; 2020 } 2021 2022 hat_getstat(as, saddr, len, hatid, 2023 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2024 pmp = (prasmap_t *)next; 2025 } 2026 ASSERT(tmp == NULL); 2027 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2028 2029 AS_LOCK_EXIT(as, &as->a_lock); 2030 2031 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2032 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2033 kmem_free(buf, size); 2034 2035 return (error); 2036 } 2037 2038 #ifdef _SYSCALL32_IMPL 2039 int 2040 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop) 2041 { 2042 struct as *as = p->p_as; 2043 caddr_t buf; 2044 size_t size; 2045 prpageheader32_t *php; 2046 prasmap32_t *pmp; 2047 struct seg *seg; 2048 int error; 2049 2050 again: 2051 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2052 2053 if ((seg = AS_SEGFIRST(as)) == NULL) { 2054 AS_LOCK_EXIT(as, &as->a_lock); 2055 return (0); 2056 } 2057 size = prpdsize32(as); 2058 if (uiop->uio_resid < size) { 2059 AS_LOCK_EXIT(as, &as->a_lock); 2060 return (E2BIG); 2061 } 2062 2063 buf = kmem_zalloc(size, KM_SLEEP); 2064 php = (prpageheader32_t *)buf; 2065 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t)); 2066 2067 hrt2ts32(gethrtime(), &php->pr_tstamp); 2068 php->pr_nmap = 0; 2069 php->pr_npage = 0; 2070 do { 2071 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2072 caddr_t saddr, naddr; 2073 void *tmp = NULL; 2074 2075 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2076 struct vnode *vp; 2077 struct vattr vattr; 2078 size_t len; 2079 size_t npage; 2080 uint_t prot; 2081 uintptr_t next; 2082 2083 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2084 if ((len = (size_t)(naddr - saddr)) == 0) 2085 continue; 2086 npage = len / PAGESIZE; 2087 next = (uintptr_t)(pmp + 1) + round8(npage); 2088 /* 2089 * It's possible that the address space can change 2090 * subtlely even though we're holding as->a_lock 2091 * due to the nondeterminism of page_exists() in 2092 * the presence of asychronously flushed pages or 2093 * mapped files whose sizes are changing. 2094 * page_exists() may be called indirectly from 2095 * pr_getprot() by a SEGOP_INCORE() routine. 2096 * If this happens we need to make sure we don't 2097 * overrun the buffer whose size we computed based 2098 * on the initial iteration through the segments. 2099 * Once we've detected an overflow, we need to clean 2100 * up the temporary memory allocated in pr_getprot() 2101 * and retry. If there's a pending signal, we return 2102 * EINTR so that this thread can be dislodged if 2103 * a latent bug causes us to spin indefinitely. 2104 */ 2105 if (next > (uintptr_t)buf + size) { 2106 pr_getprot_done(&tmp); 2107 AS_LOCK_EXIT(as, &as->a_lock); 2108 2109 kmem_free(buf, size); 2110 2111 if (ISSIG(curthread, JUSTLOOKING)) 2112 return (EINTR); 2113 2114 goto again; 2115 } 2116 2117 php->pr_nmap++; 2118 php->pr_npage += npage; 2119 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 2120 pmp->pr_npage = (size32_t)npage; 2121 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2122 pmp->pr_mflags = 0; 2123 if (prot & PROT_READ) 2124 pmp->pr_mflags |= MA_READ; 2125 if (prot & PROT_WRITE) 2126 pmp->pr_mflags |= MA_WRITE; 2127 if (prot & PROT_EXEC) 2128 pmp->pr_mflags |= MA_EXEC; 2129 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2130 pmp->pr_mflags |= MA_SHARED; 2131 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2132 pmp->pr_mflags |= MA_NORESERVE; 2133 if (seg->s_ops == &segspt_shmops || 2134 (seg->s_ops == &segvn_ops && 2135 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2136 pmp->pr_mflags |= MA_ANON; 2137 if (seg->s_ops == &segspt_shmops) 2138 pmp->pr_mflags |= MA_ISM | MA_SHM; 2139 pmp->pr_pagesize = PAGESIZE; 2140 /* 2141 * Manufacture a filename for the "object" directory. 2142 */ 2143 vattr.va_mask = AT_FSID|AT_NODEID; 2144 if (seg->s_ops == &segvn_ops && 2145 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2146 vp != NULL && vp->v_type == VREG && 2147 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 2148 if (vp == p->p_exec) 2149 (void) strcpy(pmp->pr_mapname, "a.out"); 2150 else 2151 pr_object_name(pmp->pr_mapname, 2152 vp, &vattr); 2153 } 2154 2155 /* 2156 * Get the SysV shared memory id, if any. 2157 */ 2158 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2159 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2160 SHMID_NONE) { 2161 if (pmp->pr_shmid == SHMID_FREE) 2162 pmp->pr_shmid = -1; 2163 2164 pmp->pr_mflags |= MA_SHM; 2165 } else { 2166 pmp->pr_shmid = -1; 2167 } 2168 2169 hat_getstat(as, saddr, len, hatid, 2170 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2171 pmp = (prasmap32_t *)next; 2172 } 2173 ASSERT(tmp == NULL); 2174 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2175 2176 AS_LOCK_EXIT(as, &as->a_lock); 2177 2178 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2179 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2180 kmem_free(buf, size); 2181 2182 return (error); 2183 } 2184 #endif /* _SYSCALL32_IMPL */ 2185 2186 ushort_t 2187 prgetpctcpu(uint64_t pct) 2188 { 2189 /* 2190 * The value returned will be relevant in the zone of the examiner, 2191 * which may not be the same as the zone which performed the procfs 2192 * mount. 2193 */ 2194 int nonline = zone_ncpus_online_get(curproc->p_zone); 2195 2196 /* 2197 * Prorate over online cpus so we don't exceed 100% 2198 */ 2199 if (nonline > 1) 2200 pct /= nonline; 2201 pct >>= 16; /* convert to 16-bit scaled integer */ 2202 if (pct > 0x8000) /* might happen, due to rounding */ 2203 pct = 0x8000; 2204 return ((ushort_t)pct); 2205 } 2206 2207 /* 2208 * Return information used by ps(1). 2209 */ 2210 void 2211 prgetpsinfo(proc_t *p, psinfo_t *psp) 2212 { 2213 kthread_t *t; 2214 struct cred *cred; 2215 hrtime_t hrutime, hrstime; 2216 2217 ASSERT(MUTEX_HELD(&p->p_lock)); 2218 2219 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2220 bzero(psp, sizeof (*psp)); 2221 else { 2222 thread_unlock(t); 2223 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2224 } 2225 2226 /* 2227 * only export SSYS and SMSACCT; everything else is off-limits to 2228 * userland apps. 2229 */ 2230 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2231 psp->pr_nlwp = p->p_lwpcnt; 2232 psp->pr_nzomb = p->p_zombcnt; 2233 mutex_enter(&p->p_crlock); 2234 cred = p->p_cred; 2235 psp->pr_uid = crgetruid(cred); 2236 psp->pr_euid = crgetuid(cred); 2237 psp->pr_gid = crgetrgid(cred); 2238 psp->pr_egid = crgetgid(cred); 2239 mutex_exit(&p->p_crlock); 2240 psp->pr_pid = p->p_pid; 2241 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2242 (p->p_flag & SZONETOP)) { 2243 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2244 /* 2245 * Inside local zones, fake zsched's pid as parent pids for 2246 * processes which reference processes outside of the zone. 2247 */ 2248 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2249 } else { 2250 psp->pr_ppid = p->p_ppid; 2251 } 2252 psp->pr_pgid = p->p_pgrp; 2253 psp->pr_sid = p->p_sessp->s_sid; 2254 psp->pr_taskid = p->p_task->tk_tkid; 2255 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2256 psp->pr_poolid = p->p_pool->pool_id; 2257 psp->pr_zoneid = p->p_zone->zone_id; 2258 if ((psp->pr_contract = PRCTID(p)) == 0) 2259 psp->pr_contract = -1; 2260 psp->pr_addr = (uintptr_t)prgetpsaddr(p); 2261 switch (p->p_model) { 2262 case DATAMODEL_ILP32: 2263 psp->pr_dmodel = PR_MODEL_ILP32; 2264 break; 2265 case DATAMODEL_LP64: 2266 psp->pr_dmodel = PR_MODEL_LP64; 2267 break; 2268 } 2269 hrutime = mstate_aggr_state(p, LMS_USER); 2270 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2271 hrt2ts((hrutime + hrstime), &psp->pr_time); 2272 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2273 2274 if (t == NULL) { 2275 int wcode = p->p_wcode; /* must be atomic read */ 2276 2277 if (wcode) 2278 psp->pr_wstat = wstat(wcode, p->p_wdata); 2279 psp->pr_ttydev = PRNODEV; 2280 psp->pr_lwp.pr_state = SZOMB; 2281 psp->pr_lwp.pr_sname = 'Z'; 2282 psp->pr_lwp.pr_bindpro = PBIND_NONE; 2283 psp->pr_lwp.pr_bindpset = PS_NONE; 2284 } else { 2285 user_t *up = PTOU(p); 2286 struct as *as; 2287 dev_t d; 2288 extern dev_t rwsconsdev, rconsdev, uconsdev; 2289 2290 d = cttydev(p); 2291 /* 2292 * If the controlling terminal is the real 2293 * or workstation console device, map to what the 2294 * user thinks is the console device. Handle case when 2295 * rwsconsdev or rconsdev is set to NODEV for Starfire. 2296 */ 2297 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 2298 d = uconsdev; 2299 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d; 2300 psp->pr_start = up->u_start; 2301 bcopy(up->u_comm, psp->pr_fname, 2302 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2303 bcopy(up->u_psargs, psp->pr_psargs, 2304 MIN(PRARGSZ-1, PSARGSZ)); 2305 psp->pr_argc = up->u_argc; 2306 psp->pr_argv = up->u_argv; 2307 psp->pr_envp = up->u_envp; 2308 2309 /* get the chosen lwp's lwpsinfo */ 2310 prgetlwpsinfo(t, &psp->pr_lwp); 2311 2312 /* compute %cpu for the process */ 2313 if (p->p_lwpcnt == 1) 2314 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2315 else { 2316 uint64_t pct = 0; 2317 hrtime_t cur_time = gethrtime_unscaled(); 2318 2319 t = p->p_tlist; 2320 do { 2321 pct += cpu_update_pct(t, cur_time); 2322 } while ((t = t->t_forw) != p->p_tlist); 2323 2324 psp->pr_pctcpu = prgetpctcpu(pct); 2325 } 2326 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2327 psp->pr_size = 0; 2328 psp->pr_rssize = 0; 2329 } else { 2330 mutex_exit(&p->p_lock); 2331 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2332 psp->pr_size = btopr(rm_assize(as)) * (PAGESIZE / 1024); 2333 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024); 2334 psp->pr_pctmem = rm_pctmemory(as); 2335 AS_LOCK_EXIT(as, &as->a_lock); 2336 mutex_enter(&p->p_lock); 2337 } 2338 } 2339 } 2340 2341 #ifdef _SYSCALL32_IMPL 2342 void 2343 prgetpsinfo32(proc_t *p, psinfo32_t *psp) 2344 { 2345 kthread_t *t; 2346 struct cred *cred; 2347 hrtime_t hrutime, hrstime; 2348 2349 ASSERT(MUTEX_HELD(&p->p_lock)); 2350 2351 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2352 bzero(psp, sizeof (*psp)); 2353 else { 2354 thread_unlock(t); 2355 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2356 } 2357 2358 /* 2359 * only export SSYS and SMSACCT; everything else is off-limits to 2360 * userland apps. 2361 */ 2362 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2363 psp->pr_nlwp = p->p_lwpcnt; 2364 psp->pr_nzomb = p->p_zombcnt; 2365 mutex_enter(&p->p_crlock); 2366 cred = p->p_cred; 2367 psp->pr_uid = crgetruid(cred); 2368 psp->pr_euid = crgetuid(cred); 2369 psp->pr_gid = crgetrgid(cred); 2370 psp->pr_egid = crgetgid(cred); 2371 mutex_exit(&p->p_crlock); 2372 psp->pr_pid = p->p_pid; 2373 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2374 (p->p_flag & SZONETOP)) { 2375 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2376 /* 2377 * Inside local zones, fake zsched's pid as parent pids for 2378 * processes which reference processes outside of the zone. 2379 */ 2380 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2381 } else { 2382 psp->pr_ppid = p->p_ppid; 2383 } 2384 psp->pr_pgid = p->p_pgrp; 2385 psp->pr_sid = p->p_sessp->s_sid; 2386 psp->pr_taskid = p->p_task->tk_tkid; 2387 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2388 psp->pr_poolid = p->p_pool->pool_id; 2389 psp->pr_zoneid = p->p_zone->zone_id; 2390 if ((psp->pr_contract = PRCTID(p)) == 0) 2391 psp->pr_contract = -1; 2392 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2393 switch (p->p_model) { 2394 case DATAMODEL_ILP32: 2395 psp->pr_dmodel = PR_MODEL_ILP32; 2396 break; 2397 case DATAMODEL_LP64: 2398 psp->pr_dmodel = PR_MODEL_LP64; 2399 break; 2400 } 2401 hrutime = mstate_aggr_state(p, LMS_USER); 2402 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2403 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2404 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2405 2406 if (t == NULL) { 2407 extern int wstat(int, int); /* needs a header file */ 2408 int wcode = p->p_wcode; /* must be atomic read */ 2409 2410 if (wcode) 2411 psp->pr_wstat = wstat(wcode, p->p_wdata); 2412 psp->pr_ttydev = PRNODEV32; 2413 psp->pr_lwp.pr_state = SZOMB; 2414 psp->pr_lwp.pr_sname = 'Z'; 2415 } else { 2416 user_t *up = PTOU(p); 2417 struct as *as; 2418 dev_t d; 2419 extern dev_t rwsconsdev, rconsdev, uconsdev; 2420 2421 d = cttydev(p); 2422 /* 2423 * If the controlling terminal is the real 2424 * or workstation console device, map to what the 2425 * user thinks is the console device. Handle case when 2426 * rwsconsdev or rconsdev is set to NODEV for Starfire. 2427 */ 2428 if ((d == rwsconsdev || d == rconsdev) && d != NODEV) 2429 d = uconsdev; 2430 (void) cmpldev(&psp->pr_ttydev, d); 2431 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start); 2432 bcopy(up->u_comm, psp->pr_fname, 2433 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2434 bcopy(up->u_psargs, psp->pr_psargs, 2435 MIN(PRARGSZ-1, PSARGSZ)); 2436 psp->pr_argc = up->u_argc; 2437 psp->pr_argv = (caddr32_t)up->u_argv; 2438 psp->pr_envp = (caddr32_t)up->u_envp; 2439 2440 /* get the chosen lwp's lwpsinfo */ 2441 prgetlwpsinfo32(t, &psp->pr_lwp); 2442 2443 /* compute %cpu for the process */ 2444 if (p->p_lwpcnt == 1) 2445 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2446 else { 2447 uint64_t pct = 0; 2448 hrtime_t cur_time; 2449 2450 t = p->p_tlist; 2451 cur_time = gethrtime_unscaled(); 2452 do { 2453 pct += cpu_update_pct(t, cur_time); 2454 } while ((t = t->t_forw) != p->p_tlist); 2455 2456 psp->pr_pctcpu = prgetpctcpu(pct); 2457 } 2458 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2459 psp->pr_size = 0; 2460 psp->pr_rssize = 0; 2461 } else { 2462 mutex_exit(&p->p_lock); 2463 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2464 psp->pr_size = (size32_t) 2465 (btopr(rm_assize(as)) * (PAGESIZE / 1024)); 2466 psp->pr_rssize = (size32_t) 2467 (rm_asrss(as) * (PAGESIZE / 1024)); 2468 psp->pr_pctmem = rm_pctmemory(as); 2469 AS_LOCK_EXIT(as, &as->a_lock); 2470 mutex_enter(&p->p_lock); 2471 } 2472 } 2473 2474 /* 2475 * If we are looking at an LP64 process, zero out 2476 * the fields that cannot be represented in ILP32. 2477 */ 2478 if (p->p_model != DATAMODEL_ILP32) { 2479 psp->pr_size = 0; 2480 psp->pr_rssize = 0; 2481 psp->pr_argv = 0; 2482 psp->pr_envp = 0; 2483 } 2484 } 2485 #endif /* _SYSCALL32_IMPL */ 2486 2487 void 2488 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp) 2489 { 2490 klwp_t *lwp = ttolwp(t); 2491 sobj_ops_t *sobj; 2492 char c, state; 2493 uint64_t pct; 2494 int retval, niceval; 2495 hrtime_t hrutime, hrstime; 2496 2497 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 2498 2499 bzero(psp, sizeof (*psp)); 2500 2501 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2502 psp->pr_lwpid = t->t_tid; 2503 psp->pr_addr = (uintptr_t)t; 2504 psp->pr_wchan = (uintptr_t)t->t_wchan; 2505 2506 /* map the thread state enum into a process state enum */ 2507 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2508 switch (state) { 2509 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2510 case TS_RUN: state = SRUN; c = 'R'; break; 2511 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2512 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2513 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2514 case TS_WAIT: state = SWAIT; c = 'W'; break; 2515 default: state = 0; c = '?'; break; 2516 } 2517 psp->pr_state = state; 2518 psp->pr_sname = c; 2519 if ((sobj = t->t_sobj_ops) != NULL) 2520 psp->pr_stype = SOBJ_TYPE(sobj); 2521 retval = CL_DONICE(t, NULL, 0, &niceval); 2522 if (retval == 0) { 2523 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2524 psp->pr_nice = niceval + NZERO; 2525 } 2526 psp->pr_syscall = t->t_sysnum; 2527 psp->pr_pri = t->t_pri; 2528 psp->pr_start.tv_sec = t->t_start; 2529 psp->pr_start.tv_nsec = 0L; 2530 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2531 scalehrtime(&hrutime); 2532 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2533 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2534 scalehrtime(&hrstime); 2535 hrt2ts(hrutime + hrstime, &psp->pr_time); 2536 /* compute %cpu for the lwp */ 2537 pct = cpu_update_pct(t, gethrtime_unscaled()); 2538 psp->pr_pctcpu = prgetpctcpu(pct); 2539 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2540 if (psp->pr_cpu > 99) 2541 psp->pr_cpu = 99; 2542 2543 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2544 sizeof (psp->pr_clname) - 1); 2545 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2546 psp->pr_onpro = t->t_cpu->cpu_id; 2547 psp->pr_bindpro = t->t_bind_cpu; 2548 psp->pr_bindpset = t->t_bind_pset; 2549 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 2550 } 2551 2552 #ifdef _SYSCALL32_IMPL 2553 void 2554 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp) 2555 { 2556 proc_t *p = ttoproc(t); 2557 klwp_t *lwp = ttolwp(t); 2558 sobj_ops_t *sobj; 2559 char c, state; 2560 uint64_t pct; 2561 int retval, niceval; 2562 hrtime_t hrutime, hrstime; 2563 2564 ASSERT(MUTEX_HELD(&p->p_lock)); 2565 2566 bzero(psp, sizeof (*psp)); 2567 2568 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2569 psp->pr_lwpid = t->t_tid; 2570 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2571 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */ 2572 2573 /* map the thread state enum into a process state enum */ 2574 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2575 switch (state) { 2576 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2577 case TS_RUN: state = SRUN; c = 'R'; break; 2578 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2579 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2580 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2581 case TS_WAIT: state = SWAIT; c = 'W'; break; 2582 default: state = 0; c = '?'; break; 2583 } 2584 psp->pr_state = state; 2585 psp->pr_sname = c; 2586 if ((sobj = t->t_sobj_ops) != NULL) 2587 psp->pr_stype = SOBJ_TYPE(sobj); 2588 retval = CL_DONICE(t, NULL, 0, &niceval); 2589 if (retval == 0) { 2590 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2591 psp->pr_nice = niceval + NZERO; 2592 } else { 2593 psp->pr_oldpri = 0; 2594 psp->pr_nice = 0; 2595 } 2596 psp->pr_syscall = t->t_sysnum; 2597 psp->pr_pri = t->t_pri; 2598 psp->pr_start.tv_sec = (time32_t)t->t_start; 2599 psp->pr_start.tv_nsec = 0L; 2600 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2601 scalehrtime(&hrutime); 2602 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2603 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2604 scalehrtime(&hrstime); 2605 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2606 /* compute %cpu for the lwp */ 2607 pct = cpu_update_pct(t, gethrtime_unscaled()); 2608 psp->pr_pctcpu = prgetpctcpu(pct); 2609 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2610 if (psp->pr_cpu > 99) 2611 psp->pr_cpu = 99; 2612 2613 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2614 sizeof (psp->pr_clname) - 1); 2615 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2616 psp->pr_onpro = t->t_cpu->cpu_id; 2617 psp->pr_bindpro = t->t_bind_cpu; 2618 psp->pr_bindpset = t->t_bind_pset; 2619 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 2620 } 2621 #endif /* _SYSCALL32_IMPL */ 2622 2623 /* 2624 * This used to get called when microstate accounting was disabled but 2625 * microstate information was requested. Since Microstate accounting is on 2626 * regardless of the proc flags, this simply makes it appear to procfs that 2627 * microstate accounting is on. This is relatively meaningless since you 2628 * can't turn it off, but this is here for the sake of appearances. 2629 */ 2630 2631 /*ARGSUSED*/ 2632 void 2633 estimate_msacct(kthread_t *t, hrtime_t curtime) 2634 { 2635 proc_t *p; 2636 2637 if (t == NULL) 2638 return; 2639 2640 p = ttoproc(t); 2641 ASSERT(MUTEX_HELD(&p->p_lock)); 2642 2643 /* 2644 * A system process (p0) could be referenced if the thread is 2645 * in the process of exiting. Don't turn on microstate accounting 2646 * in that case. 2647 */ 2648 if (p->p_flag & SSYS) 2649 return; 2650 2651 /* 2652 * Loop through all the LWPs (kernel threads) in the process. 2653 */ 2654 t = p->p_tlist; 2655 do { 2656 t->t_proc_flag |= TP_MSACCT; 2657 } while ((t = t->t_forw) != p->p_tlist); 2658 2659 p->p_flag |= SMSACCT; /* set process-wide MSACCT */ 2660 } 2661 2662 /* 2663 * It's not really possible to disable microstate accounting anymore. 2664 * However, this routine simply turns off the ms accounting flags in a process 2665 * This way procfs can still pretend to turn microstate accounting on and 2666 * off for a process, but it actually doesn't do anything. This is 2667 * a neutered form of preemptive idiot-proofing. 2668 */ 2669 void 2670 disable_msacct(proc_t *p) 2671 { 2672 kthread_t *t; 2673 2674 ASSERT(MUTEX_HELD(&p->p_lock)); 2675 2676 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */ 2677 /* 2678 * Loop through all the LWPs (kernel threads) in the process. 2679 */ 2680 if ((t = p->p_tlist) != NULL) { 2681 do { 2682 /* clear per-thread flag */ 2683 t->t_proc_flag &= ~TP_MSACCT; 2684 } while ((t = t->t_forw) != p->p_tlist); 2685 } 2686 } 2687 2688 /* 2689 * Return resource usage information. 2690 */ 2691 void 2692 prgetusage(kthread_t *t, prhusage_t *pup) 2693 { 2694 klwp_t *lwp = ttolwp(t); 2695 hrtime_t *mstimep; 2696 struct mstate *ms = &lwp->lwp_mstate; 2697 int state; 2698 int i; 2699 hrtime_t curtime; 2700 hrtime_t waitrq; 2701 hrtime_t tmp1; 2702 2703 curtime = gethrtime_unscaled(); 2704 2705 pup->pr_lwpid = t->t_tid; 2706 pup->pr_count = 1; 2707 pup->pr_create = ms->ms_start; 2708 pup->pr_term = ms->ms_term; 2709 scalehrtime(&pup->pr_create); 2710 scalehrtime(&pup->pr_term); 2711 if (ms->ms_term == 0) { 2712 pup->pr_rtime = curtime - ms->ms_start; 2713 scalehrtime(&pup->pr_rtime); 2714 } else { 2715 pup->pr_rtime = ms->ms_term - ms->ms_start; 2716 scalehrtime(&pup->pr_rtime); 2717 } 2718 2719 2720 pup->pr_utime = ms->ms_acct[LMS_USER]; 2721 pup->pr_stime = ms->ms_acct[LMS_SYSTEM]; 2722 pup->pr_ttime = ms->ms_acct[LMS_TRAP]; 2723 pup->pr_tftime = ms->ms_acct[LMS_TFAULT]; 2724 pup->pr_dftime = ms->ms_acct[LMS_DFAULT]; 2725 pup->pr_kftime = ms->ms_acct[LMS_KFAULT]; 2726 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2727 pup->pr_slptime = ms->ms_acct[LMS_SLEEP]; 2728 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2729 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2730 2731 prscaleusage(pup); 2732 2733 /* 2734 * Adjust for time waiting in the dispatcher queue. 2735 */ 2736 waitrq = t->t_waitrq; /* hopefully atomic */ 2737 if (waitrq != 0) { 2738 tmp1 = curtime - waitrq; 2739 scalehrtime(&tmp1); 2740 pup->pr_wtime += tmp1; 2741 curtime = waitrq; 2742 } 2743 2744 /* 2745 * Adjust for time spent in current microstate. 2746 */ 2747 if (ms->ms_state_start > curtime) { 2748 curtime = gethrtime_unscaled(); 2749 } 2750 2751 i = 0; 2752 do { 2753 switch (state = t->t_mstate) { 2754 case LMS_SLEEP: 2755 /* 2756 * Update the timer for the current sleep state. 2757 */ 2758 switch (state = ms->ms_prev) { 2759 case LMS_TFAULT: 2760 case LMS_DFAULT: 2761 case LMS_KFAULT: 2762 case LMS_USER_LOCK: 2763 break; 2764 default: 2765 state = LMS_SLEEP; 2766 break; 2767 } 2768 break; 2769 case LMS_TFAULT: 2770 case LMS_DFAULT: 2771 case LMS_KFAULT: 2772 case LMS_USER_LOCK: 2773 state = LMS_SYSTEM; 2774 break; 2775 } 2776 switch (state) { 2777 case LMS_USER: mstimep = &pup->pr_utime; break; 2778 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2779 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2780 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2781 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2782 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2783 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2784 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2785 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2786 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2787 default: panic("prgetusage: unknown microstate"); 2788 } 2789 tmp1 = curtime - ms->ms_state_start; 2790 if (tmp1 < 0) { 2791 curtime = gethrtime_unscaled(); 2792 i++; 2793 continue; 2794 } 2795 scalehrtime(&tmp1); 2796 } while (tmp1 < 0 && i < MAX_ITERS_SPIN); 2797 2798 *mstimep += tmp1; 2799 2800 /* update pup timestamp */ 2801 pup->pr_tstamp = curtime; 2802 scalehrtime(&pup->pr_tstamp); 2803 2804 /* 2805 * Resource usage counters. 2806 */ 2807 pup->pr_minf = lwp->lwp_ru.minflt; 2808 pup->pr_majf = lwp->lwp_ru.majflt; 2809 pup->pr_nswap = lwp->lwp_ru.nswap; 2810 pup->pr_inblk = lwp->lwp_ru.inblock; 2811 pup->pr_oublk = lwp->lwp_ru.oublock; 2812 pup->pr_msnd = lwp->lwp_ru.msgsnd; 2813 pup->pr_mrcv = lwp->lwp_ru.msgrcv; 2814 pup->pr_sigs = lwp->lwp_ru.nsignals; 2815 pup->pr_vctx = lwp->lwp_ru.nvcsw; 2816 pup->pr_ictx = lwp->lwp_ru.nivcsw; 2817 pup->pr_sysc = lwp->lwp_ru.sysc; 2818 pup->pr_ioch = lwp->lwp_ru.ioch; 2819 } 2820 2821 /* 2822 * Convert ms_acct stats from unscaled high-res time to nanoseconds 2823 */ 2824 void 2825 prscaleusage(prhusage_t *usg) 2826 { 2827 scalehrtime(&usg->pr_utime); 2828 scalehrtime(&usg->pr_stime); 2829 scalehrtime(&usg->pr_ttime); 2830 scalehrtime(&usg->pr_tftime); 2831 scalehrtime(&usg->pr_dftime); 2832 scalehrtime(&usg->pr_kftime); 2833 scalehrtime(&usg->pr_ltime); 2834 scalehrtime(&usg->pr_slptime); 2835 scalehrtime(&usg->pr_wtime); 2836 scalehrtime(&usg->pr_stoptime); 2837 } 2838 2839 2840 /* 2841 * Sum resource usage information. 2842 */ 2843 void 2844 praddusage(kthread_t *t, prhusage_t *pup) 2845 { 2846 klwp_t *lwp = ttolwp(t); 2847 hrtime_t *mstimep; 2848 struct mstate *ms = &lwp->lwp_mstate; 2849 int state; 2850 int i; 2851 hrtime_t curtime; 2852 hrtime_t waitrq; 2853 hrtime_t tmp; 2854 prhusage_t conv; 2855 2856 curtime = gethrtime_unscaled(); 2857 2858 if (ms->ms_term == 0) { 2859 tmp = curtime - ms->ms_start; 2860 scalehrtime(&tmp); 2861 pup->pr_rtime += tmp; 2862 } else { 2863 tmp = ms->ms_term - ms->ms_start; 2864 scalehrtime(&tmp); 2865 pup->pr_rtime += tmp; 2866 } 2867 2868 conv.pr_utime = ms->ms_acct[LMS_USER]; 2869 conv.pr_stime = ms->ms_acct[LMS_SYSTEM]; 2870 conv.pr_ttime = ms->ms_acct[LMS_TRAP]; 2871 conv.pr_tftime = ms->ms_acct[LMS_TFAULT]; 2872 conv.pr_dftime = ms->ms_acct[LMS_DFAULT]; 2873 conv.pr_kftime = ms->ms_acct[LMS_KFAULT]; 2874 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2875 conv.pr_slptime = ms->ms_acct[LMS_SLEEP]; 2876 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2877 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2878 2879 prscaleusage(&conv); 2880 2881 pup->pr_utime += conv.pr_utime; 2882 pup->pr_stime += conv.pr_stime; 2883 pup->pr_ttime += conv.pr_ttime; 2884 pup->pr_tftime += conv.pr_tftime; 2885 pup->pr_dftime += conv.pr_dftime; 2886 pup->pr_kftime += conv.pr_kftime; 2887 pup->pr_ltime += conv.pr_ltime; 2888 pup->pr_slptime += conv.pr_slptime; 2889 pup->pr_wtime += conv.pr_wtime; 2890 pup->pr_stoptime += conv.pr_stoptime; 2891 2892 /* 2893 * Adjust for time waiting in the dispatcher queue. 2894 */ 2895 waitrq = t->t_waitrq; /* hopefully atomic */ 2896 if (waitrq != 0) { 2897 tmp = curtime - waitrq; 2898 scalehrtime(&tmp); 2899 pup->pr_wtime += tmp; 2900 curtime = waitrq; 2901 } 2902 2903 /* 2904 * Adjust for time spent in current microstate. 2905 */ 2906 if (ms->ms_state_start > curtime) { 2907 curtime = gethrtime_unscaled(); 2908 } 2909 2910 i = 0; 2911 do { 2912 switch (state = t->t_mstate) { 2913 case LMS_SLEEP: 2914 /* 2915 * Update the timer for the current sleep state. 2916 */ 2917 switch (state = ms->ms_prev) { 2918 case LMS_TFAULT: 2919 case LMS_DFAULT: 2920 case LMS_KFAULT: 2921 case LMS_USER_LOCK: 2922 break; 2923 default: 2924 state = LMS_SLEEP; 2925 break; 2926 } 2927 break; 2928 case LMS_TFAULT: 2929 case LMS_DFAULT: 2930 case LMS_KFAULT: 2931 case LMS_USER_LOCK: 2932 state = LMS_SYSTEM; 2933 break; 2934 } 2935 switch (state) { 2936 case LMS_USER: mstimep = &pup->pr_utime; break; 2937 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2938 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2939 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2940 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2941 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2942 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2943 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2944 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2945 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2946 default: panic("praddusage: unknown microstate"); 2947 } 2948 tmp = curtime - ms->ms_state_start; 2949 if (tmp < 0) { 2950 curtime = gethrtime_unscaled(); 2951 i++; 2952 continue; 2953 } 2954 scalehrtime(&tmp); 2955 } while (tmp < 0 && i < MAX_ITERS_SPIN); 2956 2957 *mstimep += tmp; 2958 2959 /* update pup timestamp */ 2960 pup->pr_tstamp = curtime; 2961 scalehrtime(&pup->pr_tstamp); 2962 2963 /* 2964 * Resource usage counters. 2965 */ 2966 pup->pr_minf += lwp->lwp_ru.minflt; 2967 pup->pr_majf += lwp->lwp_ru.majflt; 2968 pup->pr_nswap += lwp->lwp_ru.nswap; 2969 pup->pr_inblk += lwp->lwp_ru.inblock; 2970 pup->pr_oublk += lwp->lwp_ru.oublock; 2971 pup->pr_msnd += lwp->lwp_ru.msgsnd; 2972 pup->pr_mrcv += lwp->lwp_ru.msgrcv; 2973 pup->pr_sigs += lwp->lwp_ru.nsignals; 2974 pup->pr_vctx += lwp->lwp_ru.nvcsw; 2975 pup->pr_ictx += lwp->lwp_ru.nivcsw; 2976 pup->pr_sysc += lwp->lwp_ru.sysc; 2977 pup->pr_ioch += lwp->lwp_ru.ioch; 2978 } 2979 2980 /* 2981 * Convert a prhusage_t to a prusage_t. 2982 * This means convert each hrtime_t to a timestruc_t 2983 * and copy the count fields uint64_t => ulong_t. 2984 */ 2985 void 2986 prcvtusage(prhusage_t *pup, prusage_t *upup) 2987 { 2988 uint64_t *ullp; 2989 ulong_t *ulp; 2990 int i; 2991 2992 upup->pr_lwpid = pup->pr_lwpid; 2993 upup->pr_count = pup->pr_count; 2994 2995 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp); 2996 hrt2ts(pup->pr_create, &upup->pr_create); 2997 hrt2ts(pup->pr_term, &upup->pr_term); 2998 hrt2ts(pup->pr_rtime, &upup->pr_rtime); 2999 hrt2ts(pup->pr_utime, &upup->pr_utime); 3000 hrt2ts(pup->pr_stime, &upup->pr_stime); 3001 hrt2ts(pup->pr_ttime, &upup->pr_ttime); 3002 hrt2ts(pup->pr_tftime, &upup->pr_tftime); 3003 hrt2ts(pup->pr_dftime, &upup->pr_dftime); 3004 hrt2ts(pup->pr_kftime, &upup->pr_kftime); 3005 hrt2ts(pup->pr_ltime, &upup->pr_ltime); 3006 hrt2ts(pup->pr_slptime, &upup->pr_slptime); 3007 hrt2ts(pup->pr_wtime, &upup->pr_wtime); 3008 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime); 3009 bzero(upup->filltime, sizeof (upup->filltime)); 3010 3011 ullp = &pup->pr_minf; 3012 ulp = &upup->pr_minf; 3013 for (i = 0; i < 22; i++) 3014 *ulp++ = (ulong_t)*ullp++; 3015 } 3016 3017 #ifdef _SYSCALL32_IMPL 3018 void 3019 prcvtusage32(prhusage_t *pup, prusage32_t *upup) 3020 { 3021 uint64_t *ullp; 3022 uint32_t *ulp; 3023 int i; 3024 3025 upup->pr_lwpid = pup->pr_lwpid; 3026 upup->pr_count = pup->pr_count; 3027 3028 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp); 3029 hrt2ts32(pup->pr_create, &upup->pr_create); 3030 hrt2ts32(pup->pr_term, &upup->pr_term); 3031 hrt2ts32(pup->pr_rtime, &upup->pr_rtime); 3032 hrt2ts32(pup->pr_utime, &upup->pr_utime); 3033 hrt2ts32(pup->pr_stime, &upup->pr_stime); 3034 hrt2ts32(pup->pr_ttime, &upup->pr_ttime); 3035 hrt2ts32(pup->pr_tftime, &upup->pr_tftime); 3036 hrt2ts32(pup->pr_dftime, &upup->pr_dftime); 3037 hrt2ts32(pup->pr_kftime, &upup->pr_kftime); 3038 hrt2ts32(pup->pr_ltime, &upup->pr_ltime); 3039 hrt2ts32(pup->pr_slptime, &upup->pr_slptime); 3040 hrt2ts32(pup->pr_wtime, &upup->pr_wtime); 3041 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime); 3042 bzero(upup->filltime, sizeof (upup->filltime)); 3043 3044 ullp = &pup->pr_minf; 3045 ulp = &upup->pr_minf; 3046 for (i = 0; i < 22; i++) 3047 *ulp++ = (uint32_t)*ullp++; 3048 } 3049 #endif /* _SYSCALL32_IMPL */ 3050 3051 /* 3052 * Determine whether a set is empty. 3053 */ 3054 int 3055 setisempty(uint32_t *sp, uint_t n) 3056 { 3057 while (n--) 3058 if (*sp++) 3059 return (0); 3060 return (1); 3061 } 3062 3063 /* 3064 * Utility routine for establishing a watched area in the process. 3065 * Keep the list of watched areas sorted by virtual address. 3066 */ 3067 int 3068 set_watched_area(proc_t *p, struct watched_area *pwa) 3069 { 3070 caddr_t vaddr = pwa->wa_vaddr; 3071 caddr_t eaddr = pwa->wa_eaddr; 3072 ulong_t flags = pwa->wa_flags; 3073 struct watched_area *target; 3074 avl_index_t where; 3075 int error = 0; 3076 3077 /* we must not be holding p->p_lock, but the process must be locked */ 3078 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3079 ASSERT(p->p_proc_flag & P_PR_LOCK); 3080 3081 /* 3082 * If this is our first watchpoint, enable watchpoints for the process. 3083 */ 3084 if (!pr_watch_active(p)) { 3085 kthread_t *t; 3086 3087 mutex_enter(&p->p_lock); 3088 if ((t = p->p_tlist) != NULL) { 3089 do { 3090 watch_enable(t); 3091 } while ((t = t->t_forw) != p->p_tlist); 3092 } 3093 mutex_exit(&p->p_lock); 3094 } 3095 3096 target = pr_find_watched_area(p, pwa, &where); 3097 if (target != NULL) { 3098 /* 3099 * We discovered an existing, overlapping watched area. 3100 * Allow it only if it is an exact match. 3101 */ 3102 if (target->wa_vaddr != vaddr || 3103 target->wa_eaddr != eaddr) 3104 error = EINVAL; 3105 else if (target->wa_flags != flags) { 3106 error = set_watched_page(p, vaddr, eaddr, 3107 flags, target->wa_flags); 3108 target->wa_flags = flags; 3109 } 3110 kmem_free(pwa, sizeof (struct watched_area)); 3111 } else { 3112 avl_insert(&p->p_warea, pwa, where); 3113 error = set_watched_page(p, vaddr, eaddr, flags, 0); 3114 } 3115 3116 return (error); 3117 } 3118 3119 /* 3120 * Utility routine for clearing a watched area in the process. 3121 * Must be an exact match of the virtual address. 3122 * size and flags don't matter. 3123 */ 3124 int 3125 clear_watched_area(proc_t *p, struct watched_area *pwa) 3126 { 3127 struct watched_area *found; 3128 3129 /* we must not be holding p->p_lock, but the process must be locked */ 3130 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3131 ASSERT(p->p_proc_flag & P_PR_LOCK); 3132 3133 3134 if (!pr_watch_active(p)) { 3135 kmem_free(pwa, sizeof (struct watched_area)); 3136 return (0); 3137 } 3138 3139 /* 3140 * Look for a matching address in the watched areas. If a match is 3141 * found, clear the old watched area and adjust the watched page(s). It 3142 * is not an error if there is no match. 3143 */ 3144 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL && 3145 found->wa_vaddr == pwa->wa_vaddr) { 3146 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr, 3147 found->wa_flags); 3148 avl_remove(&p->p_warea, found); 3149 kmem_free(found, sizeof (struct watched_area)); 3150 } 3151 3152 kmem_free(pwa, sizeof (struct watched_area)); 3153 3154 /* 3155 * If we removed the last watched area from the process, disable 3156 * watchpoints. 3157 */ 3158 if (!pr_watch_active(p)) { 3159 kthread_t *t; 3160 3161 mutex_enter(&p->p_lock); 3162 if ((t = p->p_tlist) != NULL) { 3163 do { 3164 watch_disable(t); 3165 } while ((t = t->t_forw) != p->p_tlist); 3166 } 3167 mutex_exit(&p->p_lock); 3168 } 3169 3170 return (0); 3171 } 3172 3173 /* 3174 * Frees all the watched_area structures 3175 */ 3176 void 3177 pr_free_watchpoints(proc_t *p) 3178 { 3179 struct watched_area *delp; 3180 void *cookie; 3181 3182 cookie = NULL; 3183 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL) 3184 kmem_free(delp, sizeof (struct watched_area)); 3185 3186 avl_destroy(&p->p_warea); 3187 } 3188 3189 /* 3190 * This one is called by the traced process to unwatch all the 3191 * pages while deallocating the list of watched_page structs. 3192 */ 3193 void 3194 pr_free_watched_pages(proc_t *p) 3195 { 3196 struct as *as = p->p_as; 3197 struct watched_page *pwp; 3198 uint_t prot; 3199 int retrycnt, err; 3200 void *cookie; 3201 3202 if (as == NULL || avl_numnodes(&as->a_wpage) == 0) 3203 return; 3204 3205 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 3206 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3207 3208 pwp = avl_first(&as->a_wpage); 3209 3210 cookie = NULL; 3211 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) { 3212 retrycnt = 0; 3213 if ((prot = pwp->wp_oprot) != 0) { 3214 caddr_t addr = pwp->wp_vaddr; 3215 struct seg *seg; 3216 retry: 3217 3218 if ((pwp->wp_prot != prot || 3219 (pwp->wp_flags & WP_NOWATCH)) && 3220 (seg = as_segat(as, addr)) != NULL) { 3221 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); 3222 if (err == IE_RETRY) { 3223 ASSERT(retrycnt == 0); 3224 retrycnt++; 3225 goto retry; 3226 } 3227 } 3228 } 3229 kmem_free(pwp, sizeof (struct watched_page)); 3230 } 3231 3232 avl_destroy(&as->a_wpage); 3233 p->p_wprot = NULL; 3234 3235 AS_LOCK_EXIT(as, &as->a_lock); 3236 } 3237 3238 /* 3239 * Insert a watched area into the list of watched pages. 3240 * If oflags is zero then we are adding a new watched area. 3241 * Otherwise we are changing the flags of an existing watched area. 3242 */ 3243 static int 3244 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, 3245 ulong_t flags, ulong_t oflags) 3246 { 3247 struct as *as = p->p_as; 3248 avl_tree_t *pwp_tree; 3249 struct watched_page *pwp, *newpwp; 3250 struct watched_page tpw; 3251 avl_index_t where; 3252 struct seg *seg; 3253 uint_t prot; 3254 caddr_t addr; 3255 3256 /* 3257 * We need to pre-allocate a list of structures before we grab the 3258 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks 3259 * held. 3260 */ 3261 newpwp = NULL; 3262 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3263 addr < eaddr; addr += PAGESIZE) { 3264 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP); 3265 pwp->wp_list = newpwp; 3266 newpwp = pwp; 3267 } 3268 3269 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3270 3271 /* 3272 * Search for an existing watched page to contain the watched area. 3273 * If none is found, grab a new one from the available list 3274 * and insert it in the active list, keeping the list sorted 3275 * by user-level virtual address. 3276 */ 3277 if (p->p_flag & SVFWAIT) 3278 pwp_tree = &p->p_wpage; 3279 else 3280 pwp_tree = &as->a_wpage; 3281 3282 again: 3283 if (avl_numnodes(pwp_tree) > prnwatch) { 3284 AS_LOCK_EXIT(as, &as->a_lock); 3285 while (newpwp != NULL) { 3286 pwp = newpwp->wp_list; 3287 kmem_free(newpwp, sizeof (struct watched_page)); 3288 newpwp = pwp; 3289 } 3290 return (E2BIG); 3291 } 3292 3293 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3294 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) { 3295 pwp = newpwp; 3296 newpwp = newpwp->wp_list; 3297 pwp->wp_list = NULL; 3298 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr & 3299 (uintptr_t)PAGEMASK); 3300 avl_insert(pwp_tree, pwp, where); 3301 } 3302 3303 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE); 3304 3305 if (oflags & WA_READ) 3306 pwp->wp_read--; 3307 if (oflags & WA_WRITE) 3308 pwp->wp_write--; 3309 if (oflags & WA_EXEC) 3310 pwp->wp_exec--; 3311 3312 ASSERT(pwp->wp_read >= 0); 3313 ASSERT(pwp->wp_write >= 0); 3314 ASSERT(pwp->wp_exec >= 0); 3315 3316 if (flags & WA_READ) 3317 pwp->wp_read++; 3318 if (flags & WA_WRITE) 3319 pwp->wp_write++; 3320 if (flags & WA_EXEC) 3321 pwp->wp_exec++; 3322 3323 if (!(p->p_flag & SVFWAIT)) { 3324 vaddr = pwp->wp_vaddr; 3325 if (pwp->wp_oprot == 0 && 3326 (seg = as_segat(as, vaddr)) != NULL) { 3327 SEGOP_GETPROT(seg, vaddr, 0, &prot); 3328 pwp->wp_oprot = (uchar_t)prot; 3329 pwp->wp_prot = (uchar_t)prot; 3330 } 3331 if (pwp->wp_oprot != 0) { 3332 prot = pwp->wp_oprot; 3333 if (pwp->wp_read) 3334 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3335 if (pwp->wp_write) 3336 prot &= ~PROT_WRITE; 3337 if (pwp->wp_exec) 3338 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3339 if (!(pwp->wp_flags & WP_NOWATCH) && 3340 pwp->wp_prot != prot && 3341 (pwp->wp_flags & WP_SETPROT) == 0) { 3342 pwp->wp_flags |= WP_SETPROT; 3343 pwp->wp_list = p->p_wprot; 3344 p->p_wprot = pwp; 3345 } 3346 pwp->wp_prot = (uchar_t)prot; 3347 } 3348 } 3349 3350 /* 3351 * If the watched area extends into the next page then do 3352 * it over again with the virtual address of the next page. 3353 */ 3354 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr) 3355 goto again; 3356 3357 AS_LOCK_EXIT(as, &as->a_lock); 3358 3359 /* 3360 * Free any pages we may have over-allocated 3361 */ 3362 while (newpwp != NULL) { 3363 pwp = newpwp->wp_list; 3364 kmem_free(newpwp, sizeof (struct watched_page)); 3365 newpwp = pwp; 3366 } 3367 3368 return (0); 3369 } 3370 3371 /* 3372 * Remove a watched area from the list of watched pages. 3373 * A watched area may extend over more than one page. 3374 */ 3375 static void 3376 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags) 3377 { 3378 struct as *as = p->p_as; 3379 struct watched_page *pwp; 3380 struct watched_page tpw; 3381 avl_tree_t *tree; 3382 avl_index_t where; 3383 3384 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3385 3386 if (p->p_flag & SVFWAIT) 3387 tree = &p->p_wpage; 3388 else 3389 tree = &as->a_wpage; 3390 3391 tpw.wp_vaddr = vaddr = 3392 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3393 pwp = avl_find(tree, &tpw, &where); 3394 if (pwp == NULL) 3395 pwp = avl_nearest(tree, where, AVL_AFTER); 3396 3397 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3398 ASSERT(vaddr <= pwp->wp_vaddr); 3399 3400 if (flags & WA_READ) 3401 pwp->wp_read--; 3402 if (flags & WA_WRITE) 3403 pwp->wp_write--; 3404 if (flags & WA_EXEC) 3405 pwp->wp_exec--; 3406 3407 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) { 3408 /* 3409 * Reset the hat layer's protections on this page. 3410 */ 3411 if (pwp->wp_oprot != 0) { 3412 uint_t prot = pwp->wp_oprot; 3413 3414 if (pwp->wp_read) 3415 prot &= 3416 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3417 if (pwp->wp_write) 3418 prot &= ~PROT_WRITE; 3419 if (pwp->wp_exec) 3420 prot &= 3421 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3422 if (!(pwp->wp_flags & WP_NOWATCH) && 3423 pwp->wp_prot != prot && 3424 (pwp->wp_flags & WP_SETPROT) == 0) { 3425 pwp->wp_flags |= WP_SETPROT; 3426 pwp->wp_list = p->p_wprot; 3427 p->p_wprot = pwp; 3428 } 3429 pwp->wp_prot = (uchar_t)prot; 3430 } 3431 } else { 3432 /* 3433 * No watched areas remain in this page. 3434 * Reset everything to normal. 3435 */ 3436 if (pwp->wp_oprot != 0) { 3437 pwp->wp_prot = pwp->wp_oprot; 3438 if ((pwp->wp_flags & WP_SETPROT) == 0) { 3439 pwp->wp_flags |= WP_SETPROT; 3440 pwp->wp_list = p->p_wprot; 3441 p->p_wprot = pwp; 3442 } 3443 } 3444 } 3445 3446 pwp = AVL_NEXT(tree, pwp); 3447 } 3448 3449 AS_LOCK_EXIT(as, &as->a_lock); 3450 } 3451 3452 /* 3453 * Return the original protections for the specified page. 3454 */ 3455 static void 3456 getwatchprot(struct as *as, caddr_t addr, uint_t *prot) 3457 { 3458 struct watched_page *pwp; 3459 struct watched_page tpw; 3460 3461 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 3462 3463 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3464 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL) 3465 *prot = pwp->wp_oprot; 3466 } 3467 3468 static prpagev_t * 3469 pr_pagev_create(struct seg *seg, int check_noreserve) 3470 { 3471 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP); 3472 size_t total_pages = seg_pages(seg); 3473 3474 /* 3475 * Limit the size of our vectors to pagev_lim pages at a time. We need 3476 * 4 or 5 bytes of storage per page, so this means we limit ourself 3477 * to about a megabyte of kernel heap by default. 3478 */ 3479 pagev->pg_npages = MIN(total_pages, pagev_lim); 3480 pagev->pg_pnbase = 0; 3481 3482 pagev->pg_protv = 3483 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP); 3484 3485 if (check_noreserve) 3486 pagev->pg_incore = 3487 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP); 3488 else 3489 pagev->pg_incore = NULL; 3490 3491 return (pagev); 3492 } 3493 3494 static void 3495 pr_pagev_destroy(prpagev_t *pagev) 3496 { 3497 if (pagev->pg_incore != NULL) 3498 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char)); 3499 3500 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t)); 3501 kmem_free(pagev, sizeof (prpagev_t)); 3502 } 3503 3504 static caddr_t 3505 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr) 3506 { 3507 ulong_t lastpg = seg_page(seg, eaddr - 1); 3508 ulong_t pn, pnlim; 3509 caddr_t saddr; 3510 size_t len; 3511 3512 ASSERT(addr >= seg->s_base && addr <= eaddr); 3513 3514 if (addr == eaddr) 3515 return (eaddr); 3516 3517 refill: 3518 ASSERT(addr < eaddr); 3519 pagev->pg_pnbase = seg_page(seg, addr); 3520 pnlim = pagev->pg_pnbase + pagev->pg_npages; 3521 saddr = addr; 3522 3523 if (lastpg < pnlim) 3524 len = (size_t)(eaddr - addr); 3525 else 3526 len = pagev->pg_npages * PAGESIZE; 3527 3528 if (pagev->pg_incore != NULL) { 3529 /* 3530 * INCORE cleverly has different semantics than GETPROT: 3531 * it returns info on pages up to but NOT including addr + len. 3532 */ 3533 SEGOP_INCORE(seg, addr, len, pagev->pg_incore); 3534 pn = pagev->pg_pnbase; 3535 3536 do { 3537 /* 3538 * Guilty knowledge here: We know that segvn_incore 3539 * returns more than just the low-order bit that 3540 * indicates the page is actually in memory. If any 3541 * bits are set, then the page has backing store. 3542 */ 3543 if (pagev->pg_incore[pn++ - pagev->pg_pnbase]) 3544 goto out; 3545 3546 } while ((addr += PAGESIZE) < eaddr && pn < pnlim); 3547 3548 /* 3549 * If we examined all the pages in the vector but we're not 3550 * at the end of the segment, take another lap. 3551 */ 3552 if (addr < eaddr) 3553 goto refill; 3554 } 3555 3556 /* 3557 * Need to take len - 1 because addr + len is the address of the 3558 * first byte of the page just past the end of what we want. 3559 */ 3560 out: 3561 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv); 3562 return (addr); 3563 } 3564 3565 static caddr_t 3566 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg, 3567 caddr_t *saddrp, caddr_t eaddr, uint_t *protp) 3568 { 3569 /* 3570 * Our starting address is either the specified address, or the base 3571 * address from the start of the pagev. If the latter is greater, 3572 * this means a previous call to pr_pagev_fill has already scanned 3573 * further than the end of the previous mapping. 3574 */ 3575 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE; 3576 caddr_t addr = MAX(*saddrp, base); 3577 ulong_t pn = seg_page(seg, addr); 3578 uint_t prot, nprot; 3579 3580 /* 3581 * If we're dealing with noreserve pages, then advance addr to 3582 * the address of the next page which has backing store. 3583 */ 3584 if (pagev->pg_incore != NULL) { 3585 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) { 3586 if ((addr += PAGESIZE) == eaddr) { 3587 *saddrp = addr; 3588 prot = 0; 3589 goto out; 3590 } 3591 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3592 addr = pr_pagev_fill(pagev, seg, addr, eaddr); 3593 if (addr == eaddr) { 3594 *saddrp = addr; 3595 prot = 0; 3596 goto out; 3597 } 3598 pn = seg_page(seg, addr); 3599 } 3600 } 3601 } 3602 3603 /* 3604 * Get the protections on the page corresponding to addr. 3605 */ 3606 pn = seg_page(seg, addr); 3607 ASSERT(pn >= pagev->pg_pnbase); 3608 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages)); 3609 3610 prot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3611 getwatchprot(seg->s_as, addr, &prot); 3612 *saddrp = addr; 3613 3614 /* 3615 * Now loop until we find a backed page with different protections 3616 * or we reach the end of this segment. 3617 */ 3618 while ((addr += PAGESIZE) < eaddr) { 3619 /* 3620 * If pn has advanced to the page number following what we 3621 * have information on, refill the page vector and reset 3622 * addr and pn. If pr_pagev_fill does not return the 3623 * address of the next page, we have a discontiguity and 3624 * thus have reached the end of the current mapping. 3625 */ 3626 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3627 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr); 3628 if (naddr != addr) 3629 goto out; 3630 pn = seg_page(seg, addr); 3631 } 3632 3633 /* 3634 * The previous page's protections are in prot, and it has 3635 * backing. If this page is MAP_NORESERVE and has no backing, 3636 * then end this mapping and return the previous protections. 3637 */ 3638 if (pagev->pg_incore != NULL && 3639 pagev->pg_incore[pn - pagev->pg_pnbase] == 0) 3640 break; 3641 3642 /* 3643 * Otherwise end the mapping if this page's protections (nprot) 3644 * are different than those in the previous page (prot). 3645 */ 3646 nprot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3647 getwatchprot(seg->s_as, addr, &nprot); 3648 3649 if (nprot != prot) 3650 break; 3651 } 3652 3653 out: 3654 *protp = prot; 3655 return (addr); 3656 } 3657 3658 size_t 3659 pr_getsegsize(struct seg *seg, int reserved) 3660 { 3661 size_t size = seg->s_size; 3662 3663 /* 3664 * If we're interested in the reserved space, return the size of the 3665 * segment itself. Everything else in this function is a special case 3666 * to determine the actual underlying size of various segment types. 3667 */ 3668 if (reserved) 3669 return (size); 3670 3671 /* 3672 * If this is a segvn mapping of a regular file, return the smaller 3673 * of the segment size and the remaining size of the file beyond 3674 * the file offset corresponding to seg->s_base. 3675 */ 3676 if (seg->s_ops == &segvn_ops) { 3677 vattr_t vattr; 3678 vnode_t *vp; 3679 3680 vattr.va_mask = AT_SIZE; 3681 3682 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 3683 vp != NULL && vp->v_type == VREG && 3684 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { 3685 3686 u_offset_t fsize = vattr.va_size; 3687 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base); 3688 3689 if (fsize < offset) 3690 fsize = 0; 3691 else 3692 fsize -= offset; 3693 3694 fsize = roundup(fsize, (u_offset_t)PAGESIZE); 3695 3696 if (fsize < (u_offset_t)size) 3697 size = (size_t)fsize; 3698 } 3699 3700 return (size); 3701 } 3702 3703 /* 3704 * If this is an ISM shared segment, don't include pages that are 3705 * beyond the real size of the spt segment that backs it. 3706 */ 3707 if (seg->s_ops == &segspt_shmops) 3708 return (MIN(spt_realsize(seg), size)); 3709 3710 /* 3711 * If this is segment is a mapping from /dev/null, then this is a 3712 * reservation of virtual address space and has no actual size. 3713 * Such segments are backed by segdev and have type set to neither 3714 * MAP_SHARED nor MAP_PRIVATE. 3715 */ 3716 if (seg->s_ops == &segdev_ops && 3717 ((SEGOP_GETTYPE(seg, seg->s_base) & 3718 (MAP_SHARED | MAP_PRIVATE)) == 0)) 3719 return (0); 3720 3721 /* 3722 * If this segment doesn't match one of the special types we handle, 3723 * just return the size of the segment itself. 3724 */ 3725 return (size); 3726 } 3727 3728 uint_t 3729 pr_getprot(struct seg *seg, int reserved, void **tmp, 3730 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr) 3731 { 3732 struct as *as = seg->s_as; 3733 3734 caddr_t saddr = *saddrp; 3735 caddr_t naddr; 3736 3737 int check_noreserve; 3738 uint_t prot; 3739 3740 union { 3741 struct segvn_data *svd; 3742 struct segdev_data *sdp; 3743 void *data; 3744 } s; 3745 3746 s.data = seg->s_data; 3747 3748 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3749 ASSERT(saddr >= seg->s_base && saddr < eaddr); 3750 ASSERT(eaddr <= seg->s_base + seg->s_size); 3751 3752 /* 3753 * Don't include MAP_NORESERVE pages in the address range 3754 * unless their mappings have actually materialized. 3755 * We cheat by knowing that segvn is the only segment 3756 * driver that supports MAP_NORESERVE. 3757 */ 3758 check_noreserve = 3759 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL && 3760 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) && 3761 (s.svd->flags & MAP_NORESERVE)); 3762 3763 /* 3764 * Examine every page only as a last resort. We use guilty knowledge 3765 * of segvn and segdev to avoid this: if there are no per-page 3766 * protections present in the segment and we don't care about 3767 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment. 3768 */ 3769 if (!check_noreserve && saddr == seg->s_base && 3770 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) { 3771 prot = s.svd->prot; 3772 getwatchprot(as, saddr, &prot); 3773 naddr = eaddr; 3774 3775 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops && 3776 s.sdp != NULL && s.sdp->pageprot == 0) { 3777 prot = s.sdp->prot; 3778 getwatchprot(as, saddr, &prot); 3779 naddr = eaddr; 3780 3781 } else { 3782 prpagev_t *pagev; 3783 3784 /* 3785 * If addr is sitting at the start of the segment, then 3786 * create a page vector to store protection and incore 3787 * information for pages in the segment, and fill it. 3788 * Otherwise, we expect *tmp to address the prpagev_t 3789 * allocated by a previous call to this function. 3790 */ 3791 if (saddr == seg->s_base) { 3792 pagev = pr_pagev_create(seg, check_noreserve); 3793 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr); 3794 3795 ASSERT(*tmp == NULL); 3796 *tmp = pagev; 3797 3798 ASSERT(saddr <= eaddr); 3799 *saddrp = saddr; 3800 3801 if (saddr == eaddr) { 3802 naddr = saddr; 3803 prot = 0; 3804 goto out; 3805 } 3806 3807 } else { 3808 ASSERT(*tmp != NULL); 3809 pagev = (prpagev_t *)*tmp; 3810 } 3811 3812 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot); 3813 ASSERT(naddr <= eaddr); 3814 } 3815 3816 out: 3817 if (naddr == eaddr) 3818 pr_getprot_done(tmp); 3819 *naddrp = naddr; 3820 return (prot); 3821 } 3822 3823 void 3824 pr_getprot_done(void **tmp) 3825 { 3826 if (*tmp != NULL) { 3827 pr_pagev_destroy((prpagev_t *)*tmp); 3828 *tmp = NULL; 3829 } 3830 } 3831 3832 /* 3833 * Return true iff the vnode is a /proc file from the object directory. 3834 */ 3835 int 3836 pr_isobject(vnode_t *vp) 3837 { 3838 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT); 3839 } 3840 3841 /* 3842 * Return true iff the vnode is a /proc file opened by the process itself. 3843 */ 3844 int 3845 pr_isself(vnode_t *vp) 3846 { 3847 /* 3848 * XXX: To retain binary compatibility with the old 3849 * ioctl()-based version of /proc, we exempt self-opens 3850 * of /proc/<pid> from being marked close-on-exec. 3851 */ 3852 return (vn_matchops(vp, prvnodeops) && 3853 (VTOP(vp)->pr_flags & PR_ISSELF) && 3854 VTOP(vp)->pr_type != PR_PIDDIR); 3855 } 3856 3857 static ssize_t 3858 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr) 3859 { 3860 ssize_t pagesize, hatsize; 3861 3862 ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 3863 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE)); 3864 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE)); 3865 ASSERT(saddr < eaddr); 3866 3867 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr); 3868 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize)); 3869 ASSERT(pagesize != 0); 3870 3871 if (pagesize == -1) 3872 pagesize = PAGESIZE; 3873 3874 saddr += P2NPHASE((uintptr_t)saddr, pagesize); 3875 3876 while (saddr < eaddr) { 3877 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr)) 3878 break; 3879 ASSERT(IS_P2ALIGNED(saddr, pagesize)); 3880 saddr += pagesize; 3881 } 3882 3883 *naddrp = ((saddr < eaddr) ? saddr : eaddr); 3884 return (hatsize); 3885 } 3886 3887 /* 3888 * Return an array of structures with extended memory map information. 3889 * We allocate here; the caller must deallocate. 3890 */ 3891 int 3892 prgetxmap(proc_t *p, list_t *iolhead) 3893 { 3894 struct as *as = p->p_as; 3895 prxmap_t *mp; 3896 struct seg *seg; 3897 struct seg *brkseg, *stkseg; 3898 struct vnode *vp; 3899 struct vattr vattr; 3900 uint_t prot; 3901 3902 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 3903 3904 /* 3905 * Request an initial buffer size that doesn't waste memory 3906 * if the address space has only a small number of segments. 3907 */ 3908 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 3909 3910 if ((seg = AS_SEGFIRST(as)) == NULL) 3911 return (0); 3912 3913 brkseg = break_seg(p); 3914 stkseg = as_segat(as, prgetstackbase(p)); 3915 3916 do { 3917 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 3918 caddr_t saddr, naddr, baddr; 3919 void *tmp = NULL; 3920 ssize_t psz; 3921 char *parr; 3922 uint64_t npages; 3923 uint64_t pagenum; 3924 3925 /* 3926 * Segment loop part one: iterate from the base of the segment 3927 * to its end, pausing at each address boundary (baddr) between 3928 * ranges that have different virtual memory protections. 3929 */ 3930 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 3931 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 3932 ASSERT(baddr >= saddr && baddr <= eaddr); 3933 3934 /* 3935 * Segment loop part two: iterate from the current 3936 * position to the end of the protection boundary, 3937 * pausing at each address boundary (naddr) between 3938 * ranges that have different underlying page sizes. 3939 */ 3940 for (; saddr < baddr; saddr = naddr) { 3941 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 3942 ASSERT(naddr >= saddr && naddr <= baddr); 3943 3944 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 3945 3946 mp->pr_vaddr = (uintptr_t)saddr; 3947 mp->pr_size = naddr - saddr; 3948 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 3949 mp->pr_mflags = 0; 3950 if (prot & PROT_READ) 3951 mp->pr_mflags |= MA_READ; 3952 if (prot & PROT_WRITE) 3953 mp->pr_mflags |= MA_WRITE; 3954 if (prot & PROT_EXEC) 3955 mp->pr_mflags |= MA_EXEC; 3956 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 3957 mp->pr_mflags |= MA_SHARED; 3958 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 3959 mp->pr_mflags |= MA_NORESERVE; 3960 if (seg->s_ops == &segspt_shmops || 3961 (seg->s_ops == &segvn_ops && 3962 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 3963 vp == NULL))) 3964 mp->pr_mflags |= MA_ANON; 3965 if (seg == brkseg) 3966 mp->pr_mflags |= MA_BREAK; 3967 else if (seg == stkseg) 3968 mp->pr_mflags |= MA_STACK; 3969 if (seg->s_ops == &segspt_shmops) 3970 mp->pr_mflags |= MA_ISM | MA_SHM; 3971 3972 mp->pr_pagesize = PAGESIZE; 3973 if (psz == -1) { 3974 mp->pr_hatpagesize = 0; 3975 } else { 3976 mp->pr_hatpagesize = psz; 3977 } 3978 3979 /* 3980 * Manufacture a filename for the "object" dir. 3981 */ 3982 mp->pr_dev = PRNODEV; 3983 vattr.va_mask = AT_FSID|AT_NODEID; 3984 if (seg->s_ops == &segvn_ops && 3985 SEGOP_GETVP(seg, saddr, &vp) == 0 && 3986 vp != NULL && vp->v_type == VREG && 3987 VOP_GETATTR(vp, &vattr, 0, CRED(), 3988 NULL) == 0) { 3989 mp->pr_dev = vattr.va_fsid; 3990 mp->pr_ino = vattr.va_nodeid; 3991 if (vp == p->p_exec) 3992 (void) strcpy(mp->pr_mapname, 3993 "a.out"); 3994 else 3995 pr_object_name(mp->pr_mapname, 3996 vp, &vattr); 3997 } 3998 3999 /* 4000 * Get the SysV shared memory id, if any. 4001 */ 4002 if ((mp->pr_mflags & MA_SHARED) && 4003 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4004 seg->s_base)) != SHMID_NONE) { 4005 if (mp->pr_shmid == SHMID_FREE) 4006 mp->pr_shmid = -1; 4007 4008 mp->pr_mflags |= MA_SHM; 4009 } else { 4010 mp->pr_shmid = -1; 4011 } 4012 4013 npages = ((uintptr_t)(naddr - saddr)) >> 4014 PAGESHIFT; 4015 parr = kmem_zalloc(npages, KM_SLEEP); 4016 4017 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4018 4019 for (pagenum = 0; pagenum < npages; pagenum++) { 4020 if (parr[pagenum] & SEG_PAGE_INCORE) 4021 mp->pr_rss++; 4022 if (parr[pagenum] & SEG_PAGE_ANON) 4023 mp->pr_anon++; 4024 if (parr[pagenum] & SEG_PAGE_LOCKED) 4025 mp->pr_locked++; 4026 } 4027 kmem_free(parr, npages); 4028 } 4029 } 4030 ASSERT(tmp == NULL); 4031 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4032 4033 return (0); 4034 } 4035 4036 /* 4037 * Return the process's credentials. We don't need a 32-bit equivalent of 4038 * this function because prcred_t and prcred32_t are actually the same. 4039 */ 4040 void 4041 prgetcred(proc_t *p, prcred_t *pcrp) 4042 { 4043 mutex_enter(&p->p_crlock); 4044 cred2prcred(p->p_cred, pcrp); 4045 mutex_exit(&p->p_crlock); 4046 } 4047 4048 /* 4049 * Compute actual size of the prpriv_t structure. 4050 */ 4051 4052 size_t 4053 prgetprivsize(void) 4054 { 4055 return (priv_prgetprivsize(NULL)); 4056 } 4057 4058 /* 4059 * Return the process's privileges. We don't need a 32-bit equivalent of 4060 * this function because prpriv_t and prpriv32_t are actually the same. 4061 */ 4062 void 4063 prgetpriv(proc_t *p, prpriv_t *pprp) 4064 { 4065 mutex_enter(&p->p_crlock); 4066 cred2prpriv(p->p_cred, pprp); 4067 mutex_exit(&p->p_crlock); 4068 } 4069 4070 #ifdef _SYSCALL32_IMPL 4071 /* 4072 * Return an array of structures with HAT memory map information. 4073 * We allocate here; the caller must deallocate. 4074 */ 4075 int 4076 prgetxmap32(proc_t *p, list_t *iolhead) 4077 { 4078 struct as *as = p->p_as; 4079 prxmap32_t *mp; 4080 struct seg *seg; 4081 struct seg *brkseg, *stkseg; 4082 struct vnode *vp; 4083 struct vattr vattr; 4084 uint_t prot; 4085 4086 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 4087 4088 /* 4089 * Request an initial buffer size that doesn't waste memory 4090 * if the address space has only a small number of segments. 4091 */ 4092 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 4093 4094 if ((seg = AS_SEGFIRST(as)) == NULL) 4095 return (0); 4096 4097 brkseg = break_seg(p); 4098 stkseg = as_segat(as, prgetstackbase(p)); 4099 4100 do { 4101 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 4102 caddr_t saddr, naddr, baddr; 4103 void *tmp = NULL; 4104 ssize_t psz; 4105 char *parr; 4106 uint64_t npages; 4107 uint64_t pagenum; 4108 4109 /* 4110 * Segment loop part one: iterate from the base of the segment 4111 * to its end, pausing at each address boundary (baddr) between 4112 * ranges that have different virtual memory protections. 4113 */ 4114 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 4115 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 4116 ASSERT(baddr >= saddr && baddr <= eaddr); 4117 4118 /* 4119 * Segment loop part two: iterate from the current 4120 * position to the end of the protection boundary, 4121 * pausing at each address boundary (naddr) between 4122 * ranges that have different underlying page sizes. 4123 */ 4124 for (; saddr < baddr; saddr = naddr) { 4125 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 4126 ASSERT(naddr >= saddr && naddr <= baddr); 4127 4128 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 4129 4130 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 4131 mp->pr_size = (size32_t)(naddr - saddr); 4132 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 4133 mp->pr_mflags = 0; 4134 if (prot & PROT_READ) 4135 mp->pr_mflags |= MA_READ; 4136 if (prot & PROT_WRITE) 4137 mp->pr_mflags |= MA_WRITE; 4138 if (prot & PROT_EXEC) 4139 mp->pr_mflags |= MA_EXEC; 4140 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 4141 mp->pr_mflags |= MA_SHARED; 4142 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 4143 mp->pr_mflags |= MA_NORESERVE; 4144 if (seg->s_ops == &segspt_shmops || 4145 (seg->s_ops == &segvn_ops && 4146 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 4147 vp == NULL))) 4148 mp->pr_mflags |= MA_ANON; 4149 if (seg == brkseg) 4150 mp->pr_mflags |= MA_BREAK; 4151 else if (seg == stkseg) 4152 mp->pr_mflags |= MA_STACK; 4153 if (seg->s_ops == &segspt_shmops) 4154 mp->pr_mflags |= MA_ISM | MA_SHM; 4155 4156 mp->pr_pagesize = PAGESIZE; 4157 if (psz == -1) { 4158 mp->pr_hatpagesize = 0; 4159 } else { 4160 mp->pr_hatpagesize = psz; 4161 } 4162 4163 /* 4164 * Manufacture a filename for the "object" dir. 4165 */ 4166 mp->pr_dev = PRNODEV32; 4167 vattr.va_mask = AT_FSID|AT_NODEID; 4168 if (seg->s_ops == &segvn_ops && 4169 SEGOP_GETVP(seg, saddr, &vp) == 0 && 4170 vp != NULL && vp->v_type == VREG && 4171 VOP_GETATTR(vp, &vattr, 0, CRED(), 4172 NULL) == 0) { 4173 (void) cmpldev(&mp->pr_dev, 4174 vattr.va_fsid); 4175 mp->pr_ino = vattr.va_nodeid; 4176 if (vp == p->p_exec) 4177 (void) strcpy(mp->pr_mapname, 4178 "a.out"); 4179 else 4180 pr_object_name(mp->pr_mapname, 4181 vp, &vattr); 4182 } 4183 4184 /* 4185 * Get the SysV shared memory id, if any. 4186 */ 4187 if ((mp->pr_mflags & MA_SHARED) && 4188 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4189 seg->s_base)) != SHMID_NONE) { 4190 if (mp->pr_shmid == SHMID_FREE) 4191 mp->pr_shmid = -1; 4192 4193 mp->pr_mflags |= MA_SHM; 4194 } else { 4195 mp->pr_shmid = -1; 4196 } 4197 4198 npages = ((uintptr_t)(naddr - saddr)) >> 4199 PAGESHIFT; 4200 parr = kmem_zalloc(npages, KM_SLEEP); 4201 4202 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4203 4204 for (pagenum = 0; pagenum < npages; pagenum++) { 4205 if (parr[pagenum] & SEG_PAGE_INCORE) 4206 mp->pr_rss++; 4207 if (parr[pagenum] & SEG_PAGE_ANON) 4208 mp->pr_anon++; 4209 if (parr[pagenum] & SEG_PAGE_LOCKED) 4210 mp->pr_locked++; 4211 } 4212 kmem_free(parr, npages); 4213 } 4214 } 4215 ASSERT(tmp == NULL); 4216 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4217 4218 return (0); 4219 } 4220 #endif /* _SYSCALL32_IMPL */ 4221