1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/types.h> 34 #include <sys/t_lock.h> 35 #include <sys/param.h> 36 #include <sys/cmn_err.h> 37 #include <sys/cred.h> 38 #include <sys/priv.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/inline.h> 42 #include <sys/kmem.h> 43 #include <sys/mman.h> 44 #include <sys/proc.h> 45 #include <sys/sobject.h> 46 #include <sys/sysmacros.h> 47 #include <sys/systm.h> 48 #include <sys/uio.h> 49 #include <sys/var.h> 50 #include <sys/vfs.h> 51 #include <sys/vnode.h> 52 #include <sys/session.h> 53 #include <sys/pcb.h> 54 #include <sys/signal.h> 55 #include <sys/user.h> 56 #include <sys/disp.h> 57 #include <sys/class.h> 58 #include <sys/ts.h> 59 #include <sys/bitmap.h> 60 #include <sys/poll.h> 61 #include <sys/shm_impl.h> 62 #include <sys/fault.h> 63 #include <sys/syscall.h> 64 #include <sys/procfs.h> 65 #include <sys/processor.h> 66 #include <sys/cpuvar.h> 67 #include <sys/copyops.h> 68 #include <sys/time.h> 69 #include <sys/msacct.h> 70 #include <vm/as.h> 71 #include <vm/rm.h> 72 #include <vm/seg.h> 73 #include <vm/seg_vn.h> 74 #include <vm/seg_dev.h> 75 #include <vm/seg_spt.h> 76 #include <vm/page.h> 77 #include <sys/vmparam.h> 78 #include <sys/swap.h> 79 #include <fs/proc/prdata.h> 80 #include <sys/task.h> 81 #include <sys/project.h> 82 #include <sys/contract_impl.h> 83 #include <sys/contract/process.h> 84 #include <sys/contract/process_impl.h> 85 #include <sys/schedctl.h> 86 #include <sys/pool.h> 87 #include <sys/zone.h> 88 #include <sys/atomic.h> 89 #include <sys/sdt.h> 90 91 #define MAX_ITERS_SPIN 5 92 93 typedef struct prpagev { 94 uint_t *pg_protv; /* vector of page permissions */ 95 char *pg_incore; /* vector of incore flags */ 96 size_t pg_npages; /* number of pages in protv and incore */ 97 ulong_t pg_pnbase; /* pn within segment of first protv element */ 98 } prpagev_t; 99 100 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */ 101 102 extern struct seg_ops segdev_ops; /* needs a header file */ 103 extern struct seg_ops segspt_shmops; /* needs a header file */ 104 105 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t); 106 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t); 107 108 /* 109 * Choose an lwp from the complete set of lwps for the process. 110 * This is called for any operation applied to the process 111 * file descriptor that requires an lwp to operate upon. 112 * 113 * Returns a pointer to the thread for the selected LWP, 114 * and with the dispatcher lock held for the thread. 115 * 116 * The algorithm for choosing an lwp is critical for /proc semantics; 117 * don't touch this code unless you know all of the implications. 118 */ 119 kthread_t * 120 prchoose(proc_t *p) 121 { 122 kthread_t *t; 123 kthread_t *t_onproc = NULL; /* running on processor */ 124 kthread_t *t_run = NULL; /* runnable, on disp queue */ 125 kthread_t *t_sleep = NULL; /* sleeping */ 126 kthread_t *t_hold = NULL; /* sleeping, performing hold */ 127 kthread_t *t_susp = NULL; /* suspended stop */ 128 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */ 129 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */ 130 kthread_t *t_req = NULL; /* requested stop */ 131 kthread_t *t_istop = NULL; /* event-of-interest stop */ 132 133 ASSERT(MUTEX_HELD(&p->p_lock)); 134 135 /* 136 * If the agent lwp exists, it takes precedence over all others. 137 */ 138 if ((t = p->p_agenttp) != NULL) { 139 thread_lock(t); 140 return (t); 141 } 142 143 if ((t = p->p_tlist) == NULL) /* start at the head of the list */ 144 return (t); 145 do { /* for eacn lwp in the process */ 146 if (VSTOPPED(t)) { /* virtually stopped */ 147 if (t_req == NULL) 148 t_req = t; 149 continue; 150 } 151 152 thread_lock(t); /* make sure thread is in good state */ 153 switch (t->t_state) { 154 default: 155 panic("prchoose: bad thread state %d, thread 0x%p", 156 t->t_state, (void *)t); 157 /*NOTREACHED*/ 158 case TS_SLEEP: 159 /* this is filthy */ 160 if (t->t_wchan == (caddr_t)&p->p_holdlwps && 161 t->t_wchan0 == NULL) { 162 if (t_hold == NULL) 163 t_hold = t; 164 } else { 165 if (t_sleep == NULL) 166 t_sleep = t; 167 } 168 break; 169 case TS_RUN: 170 if (t_run == NULL) 171 t_run = t; 172 break; 173 case TS_ONPROC: 174 if (t_onproc == NULL) 175 t_onproc = t; 176 break; 177 case TS_ZOMB: /* last possible choice */ 178 break; 179 case TS_STOPPED: 180 switch (t->t_whystop) { 181 case PR_SUSPENDED: 182 if (t_susp == NULL) 183 t_susp = t; 184 break; 185 case PR_JOBCONTROL: 186 if (t->t_proc_flag & TP_PRSTOP) { 187 if (t_jdstop == NULL) 188 t_jdstop = t; 189 } else { 190 if (t_jstop == NULL) 191 t_jstop = t; 192 } 193 break; 194 case PR_REQUESTED: 195 if (t_req == NULL) 196 t_req = t; 197 break; 198 case PR_SYSENTRY: 199 case PR_SYSEXIT: 200 case PR_SIGNALLED: 201 case PR_FAULTED: 202 /* 203 * Make an lwp calling exit() be the 204 * last lwp seen in the process. 205 */ 206 if (t_istop == NULL || 207 (t_istop->t_whystop == PR_SYSENTRY && 208 t_istop->t_whatstop == SYS_exit)) 209 t_istop = t; 210 break; 211 case PR_CHECKPOINT: /* can't happen? */ 212 break; 213 default: 214 panic("prchoose: bad t_whystop %d, thread 0x%p", 215 t->t_whystop, (void *)t); 216 /*NOTREACHED*/ 217 } 218 break; 219 } 220 thread_unlock(t); 221 } while ((t = t->t_forw) != p->p_tlist); 222 223 if (t_onproc) 224 t = t_onproc; 225 else if (t_run) 226 t = t_run; 227 else if (t_sleep) 228 t = t_sleep; 229 else if (t_jstop) 230 t = t_jstop; 231 else if (t_jdstop) 232 t = t_jdstop; 233 else if (t_istop) 234 t = t_istop; 235 else if (t_req) 236 t = t_req; 237 else if (t_hold) 238 t = t_hold; 239 else if (t_susp) 240 t = t_susp; 241 else /* TS_ZOMB */ 242 t = p->p_tlist; 243 244 if (t != NULL) 245 thread_lock(t); 246 return (t); 247 } 248 249 /* 250 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop. 251 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI 252 * on the /proc file descriptor. Called from stop() when a traced 253 * process stops on an event of interest. Also called from exit() 254 * and prinvalidate() to indicate POLLHUP and POLLERR respectively. 255 */ 256 void 257 prnotify(struct vnode *vp) 258 { 259 prcommon_t *pcp = VTOP(vp)->pr_common; 260 261 mutex_enter(&pcp->prc_mutex); 262 cv_broadcast(&pcp->prc_wait); 263 mutex_exit(&pcp->prc_mutex); 264 if (pcp->prc_flags & PRC_POLL) { 265 /* 266 * We call pollwakeup() with POLLHUP to ensure that 267 * the pollers are awakened even if they are polling 268 * for nothing (i.e., waiting for the process to exit). 269 * This enables the use of the PRC_POLL flag for optimization 270 * (we can turn off PRC_POLL only if we know no pollers remain). 271 */ 272 pcp->prc_flags &= ~PRC_POLL; 273 pollwakeup(&pcp->prc_pollhead, POLLHUP); 274 } 275 } 276 277 /* called immediately below, in prfree() */ 278 static void 279 prfreenotify(vnode_t *vp) 280 { 281 prnode_t *pnp; 282 prcommon_t *pcp; 283 284 while (vp != NULL) { 285 pnp = VTOP(vp); 286 pcp = pnp->pr_common; 287 ASSERT(pcp->prc_thread == NULL); 288 pcp->prc_proc = NULL; 289 /* 290 * We can't call prnotify() here because we are holding 291 * pidlock. We assert that there is no need to. 292 */ 293 mutex_enter(&pcp->prc_mutex); 294 cv_broadcast(&pcp->prc_wait); 295 mutex_exit(&pcp->prc_mutex); 296 ASSERT(!(pcp->prc_flags & PRC_POLL)); 297 298 vp = pnp->pr_next; 299 pnp->pr_next = NULL; 300 } 301 } 302 303 /* 304 * Called from a hook in freeproc() when a traced process is removed 305 * from the process table. The proc-table pointers of all associated 306 * /proc vnodes are cleared to indicate that the process has gone away. 307 */ 308 void 309 prfree(proc_t *p) 310 { 311 uint_t slot = p->p_slot; 312 313 ASSERT(MUTEX_HELD(&pidlock)); 314 315 /* 316 * Block the process against /proc so it can be freed. 317 * It cannot be freed while locked by some controlling process. 318 * Lock ordering: 319 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex 320 */ 321 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */ 322 mutex_enter(&p->p_lock); 323 while (p->p_proc_flag & P_PR_LOCK) { 324 mutex_exit(&pr_pidlock); 325 cv_wait(&pr_pid_cv[slot], &p->p_lock); 326 mutex_exit(&p->p_lock); 327 mutex_enter(&pr_pidlock); 328 mutex_enter(&p->p_lock); 329 } 330 331 ASSERT(p->p_tlist == NULL); 332 333 prfreenotify(p->p_plist); 334 p->p_plist = NULL; 335 336 prfreenotify(p->p_trace); 337 p->p_trace = NULL; 338 339 /* 340 * We broadcast to wake up everyone waiting for this process. 341 * No one can reach this process from this point on. 342 */ 343 cv_broadcast(&pr_pid_cv[slot]); 344 345 mutex_exit(&p->p_lock); 346 mutex_exit(&pr_pidlock); 347 } 348 349 /* 350 * Called from a hook in exit() when a traced process is becoming a zombie. 351 */ 352 void 353 prexit(proc_t *p) 354 { 355 ASSERT(MUTEX_HELD(&p->p_lock)); 356 357 if (pr_watch_active(p)) { 358 pr_free_watchpoints(p); 359 watch_disable(curthread); 360 } 361 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */ 362 if (p->p_trace) { 363 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY; 364 prnotify(p->p_trace); 365 } 366 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */ 367 } 368 369 /* 370 * Called when a thread calls lwp_exit(). 371 */ 372 void 373 prlwpexit(kthread_t *t) 374 { 375 vnode_t *vp; 376 prnode_t *pnp; 377 prcommon_t *pcp; 378 proc_t *p = ttoproc(t); 379 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry; 380 381 ASSERT(t == curthread); 382 ASSERT(MUTEX_HELD(&p->p_lock)); 383 384 /* 385 * The process must be blocked against /proc to do this safely. 386 * The lwp must not disappear while the process is marked P_PR_LOCK. 387 * It is the caller's responsibility to have called prbarrier(p). 388 */ 389 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 390 391 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 392 pnp = VTOP(vp); 393 pcp = pnp->pr_common; 394 if (pcp->prc_thread == t) { 395 pcp->prc_thread = NULL; 396 pcp->prc_flags |= PRC_DESTROY; 397 } 398 } 399 400 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) { 401 pnp = VTOP(vp); 402 pcp = pnp->pr_common; 403 pcp->prc_thread = NULL; 404 pcp->prc_flags |= PRC_DESTROY; 405 prnotify(vp); 406 } 407 408 if (p->p_trace) 409 prnotify(p->p_trace); 410 } 411 412 /* 413 * Called when a zombie thread is joined or when a 414 * detached lwp exits. Called from lwp_hash_out(). 415 */ 416 void 417 prlwpfree(proc_t *p, lwpent_t *lep) 418 { 419 vnode_t *vp; 420 prnode_t *pnp; 421 prcommon_t *pcp; 422 423 ASSERT(MUTEX_HELD(&p->p_lock)); 424 425 /* 426 * The process must be blocked against /proc to do this safely. 427 * The lwp must not disappear while the process is marked P_PR_LOCK. 428 * It is the caller's responsibility to have called prbarrier(p). 429 */ 430 ASSERT(!(p->p_proc_flag & P_PR_LOCK)); 431 432 vp = lep->le_trace; 433 lep->le_trace = NULL; 434 while (vp) { 435 prnotify(vp); 436 pnp = VTOP(vp); 437 pcp = pnp->pr_common; 438 ASSERT(pcp->prc_thread == NULL && 439 (pcp->prc_flags & PRC_DESTROY)); 440 pcp->prc_tslot = -1; 441 vp = pnp->pr_next; 442 pnp->pr_next = NULL; 443 } 444 445 if (p->p_trace) 446 prnotify(p->p_trace); 447 } 448 449 /* 450 * Called from a hook in exec() when a thread starts exec(). 451 */ 452 void 453 prexecstart(void) 454 { 455 proc_t *p = ttoproc(curthread); 456 klwp_t *lwp = ttolwp(curthread); 457 458 /* 459 * The P_PR_EXEC flag blocks /proc operations for 460 * the duration of the exec(). 461 * We can't start exec() while the process is 462 * locked by /proc, so we call prbarrier(). 463 * lwp_nostop keeps the process from being stopped 464 * via job control for the duration of the exec(). 465 */ 466 467 ASSERT(MUTEX_HELD(&p->p_lock)); 468 prbarrier(p); 469 lwp->lwp_nostop++; 470 p->p_proc_flag |= P_PR_EXEC; 471 } 472 473 /* 474 * Called from a hook in exec() when a thread finishes exec(). 475 * The thread may or may not have succeeded. Some other thread 476 * may have beat it to the punch. 477 */ 478 void 479 prexecend(void) 480 { 481 proc_t *p = ttoproc(curthread); 482 klwp_t *lwp = ttolwp(curthread); 483 vnode_t *vp; 484 prnode_t *pnp; 485 prcommon_t *pcp; 486 model_t model = p->p_model; 487 id_t tid = curthread->t_tid; 488 int tslot = curthread->t_dslot; 489 490 ASSERT(MUTEX_HELD(&p->p_lock)); 491 492 lwp->lwp_nostop--; 493 if (p->p_flag & SEXITLWPS) { 494 /* 495 * We are on our way to exiting because some 496 * other thread beat us in the race to exec(). 497 * Don't clear the P_PR_EXEC flag in this case. 498 */ 499 return; 500 } 501 502 /* 503 * Wake up anyone waiting in /proc for the process to complete exec(). 504 */ 505 p->p_proc_flag &= ~P_PR_EXEC; 506 if ((vp = p->p_trace) != NULL) { 507 pcp = VTOP(vp)->pr_common; 508 mutex_enter(&pcp->prc_mutex); 509 cv_broadcast(&pcp->prc_wait); 510 mutex_exit(&pcp->prc_mutex); 511 for (; vp != NULL; vp = pnp->pr_next) { 512 pnp = VTOP(vp); 513 pnp->pr_common->prc_datamodel = model; 514 } 515 } 516 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) { 517 /* 518 * We dealt with the process common above. 519 */ 520 ASSERT(p->p_trace != NULL); 521 pcp = VTOP(vp)->pr_common; 522 mutex_enter(&pcp->prc_mutex); 523 cv_broadcast(&pcp->prc_wait); 524 mutex_exit(&pcp->prc_mutex); 525 for (; vp != NULL; vp = pnp->pr_next) { 526 pnp = VTOP(vp); 527 pcp = pnp->pr_common; 528 pcp->prc_datamodel = model; 529 pcp->prc_tid = tid; 530 pcp->prc_tslot = tslot; 531 } 532 } 533 } 534 535 /* 536 * Called from a hook in relvm() just before freeing the address space. 537 * We free all the watched areas now. 538 */ 539 void 540 prrelvm(void) 541 { 542 proc_t *p = ttoproc(curthread); 543 544 mutex_enter(&p->p_lock); 545 prbarrier(p); /* block all other /proc operations */ 546 if (pr_watch_active(p)) { 547 pr_free_watchpoints(p); 548 watch_disable(curthread); 549 } 550 mutex_exit(&p->p_lock); 551 pr_free_watched_pages(p); 552 } 553 554 /* 555 * Called from hooks in exec-related code when a traced process 556 * attempts to exec(2) a setuid/setgid program or an unreadable 557 * file. Rather than fail the exec we invalidate the associated 558 * /proc vnodes so that subsequent attempts to use them will fail. 559 * 560 * All /proc vnodes, except directory vnodes, are retained on a linked 561 * list (rooted at p_plist in the process structure) until last close. 562 * 563 * A controlling process must re-open the /proc files in order to 564 * regain control. 565 */ 566 void 567 prinvalidate(struct user *up) 568 { 569 kthread_t *t = curthread; 570 proc_t *p = ttoproc(t); 571 vnode_t *vp; 572 prnode_t *pnp; 573 int writers = 0; 574 575 mutex_enter(&p->p_lock); 576 prbarrier(p); /* block all other /proc operations */ 577 578 /* 579 * At this moment, there can be only one lwp in the process. 580 */ 581 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); 582 583 /* 584 * Invalidate any currently active /proc vnodes. 585 */ 586 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { 587 pnp = VTOP(vp); 588 switch (pnp->pr_type) { 589 case PR_PSINFO: /* these files can read by anyone */ 590 case PR_LPSINFO: 591 case PR_LWPSINFO: 592 case PR_LWPDIR: 593 case PR_LWPIDDIR: 594 case PR_USAGE: 595 case PR_LUSAGE: 596 case PR_LWPUSAGE: 597 break; 598 default: 599 pnp->pr_flags |= PR_INVAL; 600 break; 601 } 602 } 603 /* 604 * Wake up anyone waiting for the process or lwp. 605 * p->p_trace is guaranteed to be non-NULL if there 606 * are any open /proc files for this process. 607 */ 608 if ((vp = p->p_trace) != NULL) { 609 prcommon_t *pcp = VTOP(vp)->pr_pcommon; 610 611 prnotify(vp); 612 /* 613 * Are there any writers? 614 */ 615 if ((writers = pcp->prc_writers) != 0) { 616 /* 617 * Clear the exclusive open flag (old /proc interface). 618 * Set prc_selfopens equal to prc_writers so that 619 * the next O_EXCL|O_WRITE open will succeed 620 * even with existing (though invalid) writers. 621 * prclose() must decrement prc_selfopens when 622 * the invalid files are closed. 623 */ 624 pcp->prc_flags &= ~PRC_EXCL; 625 ASSERT(pcp->prc_selfopens <= writers); 626 pcp->prc_selfopens = writers; 627 } 628 } 629 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace; 630 while (vp != NULL) { 631 /* 632 * We should not invalidate the lwpiddir vnodes, 633 * but the necessities of maintaining the old 634 * ioctl()-based version of /proc require it. 635 */ 636 pnp = VTOP(vp); 637 pnp->pr_flags |= PR_INVAL; 638 prnotify(vp); 639 vp = pnp->pr_next; 640 } 641 642 /* 643 * If any tracing flags are in effect and any vnodes are open for 644 * writing then set the requested-stop and run-on-last-close flags. 645 * Otherwise, clear all tracing flags. 646 */ 647 t->t_proc_flag &= ~TP_PAUSE; 648 if ((p->p_proc_flag & P_PR_TRACE) && writers) { 649 t->t_proc_flag |= TP_PRSTOP; 650 aston(t); /* so ISSIG will see the flag */ 651 p->p_proc_flag |= P_PR_RUNLCL; 652 } else { 653 premptyset(&up->u_entrymask); /* syscalls */ 654 premptyset(&up->u_exitmask); 655 up->u_systrap = 0; 656 premptyset(&p->p_sigmask); /* signals */ 657 premptyset(&p->p_fltmask); /* faults */ 658 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING); 659 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE); 660 prnostep(ttolwp(t)); 661 } 662 663 mutex_exit(&p->p_lock); 664 } 665 666 /* 667 * Acquire the controlled process's p_lock and mark it P_PR_LOCK. 668 * Return with pr_pidlock held in all cases. 669 * Return with p_lock held if the the process still exists. 670 * Return value is the process pointer if the process still exists, else NULL. 671 * If we lock the process, give ourself kernel priority to avoid deadlocks; 672 * this is undone in prunlock(). 673 */ 674 proc_t * 675 pr_p_lock(prnode_t *pnp) 676 { 677 proc_t *p; 678 prcommon_t *pcp; 679 680 mutex_enter(&pr_pidlock); 681 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL) 682 return (NULL); 683 mutex_enter(&p->p_lock); 684 while (p->p_proc_flag & P_PR_LOCK) { 685 /* 686 * This cv/mutex pair is persistent even if 687 * the process disappears while we sleep. 688 */ 689 kcondvar_t *cv = &pr_pid_cv[p->p_slot]; 690 kmutex_t *mp = &p->p_lock; 691 692 mutex_exit(&pr_pidlock); 693 cv_wait(cv, mp); 694 mutex_exit(mp); 695 mutex_enter(&pr_pidlock); 696 if (pcp->prc_proc == NULL) 697 return (NULL); 698 ASSERT(p == pcp->prc_proc); 699 mutex_enter(&p->p_lock); 700 } 701 p->p_proc_flag |= P_PR_LOCK; 702 THREAD_KPRI_REQUEST(); 703 return (p); 704 } 705 706 /* 707 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock. 708 * This prevents any lwp of the process from disappearing and 709 * blocks most operations that a process can perform on itself. 710 * Returns 0 on success, a non-zero error number on failure. 711 * 712 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when 713 * the subject process is a zombie (ZYES) or fail for zombies (ZNO). 714 * 715 * error returns: 716 * ENOENT: process or lwp has disappeared or process is exiting 717 * (or has become a zombie and zdisp == ZNO). 718 * EAGAIN: procfs vnode has become invalid. 719 * EINTR: signal arrived while waiting for exec to complete. 720 */ 721 int 722 prlock(prnode_t *pnp, int zdisp) 723 { 724 prcommon_t *pcp; 725 proc_t *p; 726 727 again: 728 pcp = pnp->pr_common; 729 p = pr_p_lock(pnp); 730 mutex_exit(&pr_pidlock); 731 732 /* 733 * Return ENOENT immediately if there is no process. 734 */ 735 if (p == NULL) 736 return (ENOENT); 737 738 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL); 739 740 /* 741 * Return ENOENT if process entered zombie state or is exiting 742 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies. 743 */ 744 if (zdisp == ZNO && 745 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) { 746 prunlock(pnp); 747 return (ENOENT); 748 } 749 750 /* 751 * If lwp-specific, check to see if lwp has disappeared. 752 */ 753 if (pcp->prc_flags & PRC_LWP) { 754 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) || 755 pcp->prc_tslot == -1) { 756 prunlock(pnp); 757 return (ENOENT); 758 } 759 } 760 761 /* 762 * Return EAGAIN if we have encountered a security violation. 763 * (The process exec'd a set-id or unreadable executable file.) 764 */ 765 if (pnp->pr_flags & PR_INVAL) { 766 prunlock(pnp); 767 return (EAGAIN); 768 } 769 770 /* 771 * If process is undergoing an exec(), wait for 772 * completion and then start all over again. 773 */ 774 if (p->p_proc_flag & P_PR_EXEC) { 775 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */ 776 mutex_enter(&pcp->prc_mutex); 777 prunlock(pnp); 778 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) { 779 mutex_exit(&pcp->prc_mutex); 780 return (EINTR); 781 } 782 mutex_exit(&pcp->prc_mutex); 783 goto again; 784 } 785 786 /* 787 * We return holding p->p_lock. 788 */ 789 return (0); 790 } 791 792 /* 793 * Undo prlock() and pr_p_lock(). 794 * p->p_lock is still held; pr_pidlock is no longer held. 795 * 796 * prunmark() drops the P_PR_LOCK flag and wakes up another thread, 797 * if any, waiting for the flag to be dropped; it retains p->p_lock. 798 * 799 * prunlock() calls prunmark() and then drops p->p_lock. 800 */ 801 void 802 prunmark(proc_t *p) 803 { 804 ASSERT(p->p_proc_flag & P_PR_LOCK); 805 ASSERT(MUTEX_HELD(&p->p_lock)); 806 807 cv_signal(&pr_pid_cv[p->p_slot]); 808 p->p_proc_flag &= ~P_PR_LOCK; 809 THREAD_KPRI_RELEASE(); 810 } 811 812 void 813 prunlock(prnode_t *pnp) 814 { 815 prcommon_t *pcp = pnp->pr_common; 816 proc_t *p = pcp->prc_proc; 817 818 /* 819 * If we (or someone) gave it a SIGKILL, and it is not 820 * already a zombie, set it running unconditionally. 821 */ 822 if ((p->p_flag & SKILLED) && 823 !(p->p_flag & SEXITING) && 824 !(pcp->prc_flags & PRC_DESTROY) && 825 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) 826 (void) pr_setrun(pnp, 0); 827 prunmark(p); 828 mutex_exit(&p->p_lock); 829 } 830 831 /* 832 * Called while holding p->p_lock to delay until the process is unlocked. 833 * We enter holding p->p_lock; p->p_lock is dropped and reacquired. 834 * The process cannot become locked again until p->p_lock is dropped. 835 */ 836 void 837 prbarrier(proc_t *p) 838 { 839 ASSERT(MUTEX_HELD(&p->p_lock)); 840 841 if (p->p_proc_flag & P_PR_LOCK) { 842 /* The process is locked; delay until not locked */ 843 uint_t slot = p->p_slot; 844 845 while (p->p_proc_flag & P_PR_LOCK) 846 cv_wait(&pr_pid_cv[slot], &p->p_lock); 847 cv_signal(&pr_pid_cv[slot]); 848 } 849 } 850 851 /* 852 * Return process/lwp status. 853 * The u-block is mapped in by this routine and unmapped at the end. 854 */ 855 void 856 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp) 857 { 858 kthread_t *t; 859 860 ASSERT(MUTEX_HELD(&p->p_lock)); 861 862 t = prchoose(p); /* returns locked thread */ 863 ASSERT(t != NULL); 864 thread_unlock(t); 865 866 /* just bzero the process part, prgetlwpstatus() does the rest */ 867 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t)); 868 sp->pr_nlwp = p->p_lwpcnt; 869 sp->pr_nzomb = p->p_zombcnt; 870 prassignset(&sp->pr_sigpend, &p->p_sig); 871 sp->pr_brkbase = (uintptr_t)p->p_brkbase; 872 sp->pr_brksize = p->p_brksize; 873 sp->pr_stkbase = (uintptr_t)prgetstackbase(p); 874 sp->pr_stksize = p->p_stksize; 875 sp->pr_pid = p->p_pid; 876 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 877 (p->p_flag & SZONETOP)) { 878 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 879 /* 880 * Inside local zones, fake zsched's pid as parent pids for 881 * processes which reference processes outside of the zone. 882 */ 883 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 884 } else { 885 sp->pr_ppid = p->p_ppid; 886 } 887 sp->pr_pgid = p->p_pgrp; 888 sp->pr_sid = p->p_sessp->s_sid; 889 sp->pr_taskid = p->p_task->tk_tkid; 890 sp->pr_projid = p->p_task->tk_proj->kpj_id; 891 sp->pr_zoneid = p->p_zone->zone_id; 892 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 893 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 894 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime); 895 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime); 896 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 897 prassignset(&sp->pr_flttrace, &p->p_fltmask); 898 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 899 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 900 switch (p->p_model) { 901 case DATAMODEL_ILP32: 902 sp->pr_dmodel = PR_MODEL_ILP32; 903 break; 904 case DATAMODEL_LP64: 905 sp->pr_dmodel = PR_MODEL_LP64; 906 break; 907 } 908 if (p->p_agenttp) 909 sp->pr_agentid = p->p_agenttp->t_tid; 910 911 /* get the chosen lwp's status */ 912 prgetlwpstatus(t, &sp->pr_lwp, zp); 913 914 /* replicate the flags */ 915 sp->pr_flags = sp->pr_lwp.pr_flags; 916 } 917 918 #ifdef _SYSCALL32_IMPL 919 void 920 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp) 921 { 922 proc_t *p = ttoproc(t); 923 klwp_t *lwp = ttolwp(t); 924 struct mstate *ms = &lwp->lwp_mstate; 925 hrtime_t usr, sys; 926 int flags; 927 ulong_t instr; 928 929 ASSERT(MUTEX_HELD(&p->p_lock)); 930 931 bzero(sp, sizeof (*sp)); 932 flags = 0L; 933 if (t->t_state == TS_STOPPED) { 934 flags |= PR_STOPPED; 935 if ((t->t_schedflag & TS_PSTART) == 0) 936 flags |= PR_ISTOP; 937 } else if (VSTOPPED(t)) { 938 flags |= PR_STOPPED|PR_ISTOP; 939 } 940 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 941 flags |= PR_DSTOP; 942 if (lwp->lwp_asleep) 943 flags |= PR_ASLEEP; 944 if (t == p->p_agenttp) 945 flags |= PR_AGENT; 946 if (!(t->t_proc_flag & TP_TWAIT)) 947 flags |= PR_DETACH; 948 if (t->t_proc_flag & TP_DAEMON) 949 flags |= PR_DAEMON; 950 if (p->p_proc_flag & P_PR_FORK) 951 flags |= PR_FORK; 952 if (p->p_proc_flag & P_PR_RUNLCL) 953 flags |= PR_RLC; 954 if (p->p_proc_flag & P_PR_KILLCL) 955 flags |= PR_KLC; 956 if (p->p_proc_flag & P_PR_ASYNC) 957 flags |= PR_ASYNC; 958 if (p->p_proc_flag & P_PR_BPTADJ) 959 flags |= PR_BPTADJ; 960 if (p->p_proc_flag & P_PR_PTRACE) 961 flags |= PR_PTRACE; 962 if (p->p_flag & SMSACCT) 963 flags |= PR_MSACCT; 964 if (p->p_flag & SMSFORK) 965 flags |= PR_MSFORK; 966 if (p->p_flag & SVFWAIT) 967 flags |= PR_VFORKP; 968 sp->pr_flags = flags; 969 if (VSTOPPED(t)) { 970 sp->pr_why = PR_REQUESTED; 971 sp->pr_what = 0; 972 } else { 973 sp->pr_why = t->t_whystop; 974 sp->pr_what = t->t_whatstop; 975 } 976 sp->pr_lwpid = t->t_tid; 977 sp->pr_cursig = lwp->lwp_cursig; 978 prassignset(&sp->pr_lwppend, &t->t_sig); 979 schedctl_finish_sigblock(t); 980 prassignset(&sp->pr_lwphold, &t->t_hold); 981 if (t->t_whystop == PR_FAULTED) { 982 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info); 983 if (t->t_whatstop == FLTPAGE) 984 sp->pr_info.si_addr = 985 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr; 986 } else if (lwp->lwp_curinfo) 987 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info); 988 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 989 sp->pr_info.si_zoneid != zp->zone_id) { 990 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 991 sp->pr_info.si_uid = 0; 992 sp->pr_info.si_ctid = -1; 993 sp->pr_info.si_zoneid = zp->zone_id; 994 } 995 sp->pr_altstack.ss_sp = 996 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; 997 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size; 998 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags; 999 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1000 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext; 1001 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack; 1002 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1003 sizeof (sp->pr_clname) - 1); 1004 if (flags & PR_STOPPED) 1005 hrt2ts32(t->t_stoptime, &sp->pr_tstamp); 1006 usr = ms->ms_acct[LMS_USER]; 1007 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1008 scalehrtime(&usr); 1009 scalehrtime(&sys); 1010 hrt2ts32(usr, &sp->pr_utime); 1011 hrt2ts32(sys, &sp->pr_stime); 1012 1013 /* 1014 * Fetch the current instruction, if not a system process. 1015 * We don't attempt this unless the lwp is stopped. 1016 */ 1017 if ((p->p_flag & SSYS) || p->p_as == &kas) 1018 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1019 else if (!(flags & PR_STOPPED)) 1020 sp->pr_flags |= PR_PCINVAL; 1021 else if (!prfetchinstr(lwp, &instr)) 1022 sp->pr_flags |= PR_PCINVAL; 1023 else 1024 sp->pr_instr = (uint32_t)instr; 1025 1026 /* 1027 * Drop p_lock while touching the lwp's stack. 1028 */ 1029 mutex_exit(&p->p_lock); 1030 if (prisstep(lwp)) 1031 sp->pr_flags |= PR_STEP; 1032 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1033 int i; 1034 1035 sp->pr_syscall = get_syscall32_args(lwp, 1036 (int *)sp->pr_sysarg, &i); 1037 sp->pr_nsysarg = (ushort_t)i; 1038 } 1039 if ((flags & PR_STOPPED) || t == curthread) 1040 prgetprregs32(lwp, sp->pr_reg); 1041 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1042 (flags & PR_VFORKP)) { 1043 long r1, r2; 1044 user_t *up; 1045 auxv_t *auxp; 1046 int i; 1047 1048 sp->pr_errno = prgetrvals(lwp, &r1, &r2); 1049 if (sp->pr_errno == 0) { 1050 sp->pr_rval1 = (int32_t)r1; 1051 sp->pr_rval2 = (int32_t)r2; 1052 sp->pr_errpriv = PRIV_NONE; 1053 } else 1054 sp->pr_errpriv = lwp->lwp_badpriv; 1055 1056 if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) { 1057 up = PTOU(p); 1058 sp->pr_sysarg[0] = 0; 1059 sp->pr_sysarg[1] = (caddr32_t)up->u_argv; 1060 sp->pr_sysarg[2] = (caddr32_t)up->u_envp; 1061 for (i = 0, auxp = up->u_auxv; 1062 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1063 i++, auxp++) { 1064 if (auxp->a_type == AT_SUN_EXECNAME) { 1065 sp->pr_sysarg[0] = 1066 (caddr32_t)(uintptr_t)auxp->a_un.a_ptr; 1067 break; 1068 } 1069 } 1070 } 1071 } 1072 if (prhasfp()) 1073 prgetprfpregs32(lwp, &sp->pr_fpreg); 1074 mutex_enter(&p->p_lock); 1075 } 1076 1077 void 1078 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp) 1079 { 1080 kthread_t *t; 1081 1082 ASSERT(MUTEX_HELD(&p->p_lock)); 1083 1084 t = prchoose(p); /* returns locked thread */ 1085 ASSERT(t != NULL); 1086 thread_unlock(t); 1087 1088 /* just bzero the process part, prgetlwpstatus32() does the rest */ 1089 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t)); 1090 sp->pr_nlwp = p->p_lwpcnt; 1091 sp->pr_nzomb = p->p_zombcnt; 1092 prassignset(&sp->pr_sigpend, &p->p_sig); 1093 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase; 1094 sp->pr_brksize = (uint32_t)p->p_brksize; 1095 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p); 1096 sp->pr_stksize = (uint32_t)p->p_stksize; 1097 sp->pr_pid = p->p_pid; 1098 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 1099 (p->p_flag & SZONETOP)) { 1100 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 1101 /* 1102 * Inside local zones, fake zsched's pid as parent pids for 1103 * processes which reference processes outside of the zone. 1104 */ 1105 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 1106 } else { 1107 sp->pr_ppid = p->p_ppid; 1108 } 1109 sp->pr_pgid = p->p_pgrp; 1110 sp->pr_sid = p->p_sessp->s_sid; 1111 sp->pr_taskid = p->p_task->tk_tkid; 1112 sp->pr_projid = p->p_task->tk_proj->kpj_id; 1113 sp->pr_zoneid = p->p_zone->zone_id; 1114 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); 1115 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); 1116 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime); 1117 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime); 1118 prassignset(&sp->pr_sigtrace, &p->p_sigmask); 1119 prassignset(&sp->pr_flttrace, &p->p_fltmask); 1120 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); 1121 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); 1122 switch (p->p_model) { 1123 case DATAMODEL_ILP32: 1124 sp->pr_dmodel = PR_MODEL_ILP32; 1125 break; 1126 case DATAMODEL_LP64: 1127 sp->pr_dmodel = PR_MODEL_LP64; 1128 break; 1129 } 1130 if (p->p_agenttp) 1131 sp->pr_agentid = p->p_agenttp->t_tid; 1132 1133 /* get the chosen lwp's status */ 1134 prgetlwpstatus32(t, &sp->pr_lwp, zp); 1135 1136 /* replicate the flags */ 1137 sp->pr_flags = sp->pr_lwp.pr_flags; 1138 } 1139 #endif /* _SYSCALL32_IMPL */ 1140 1141 /* 1142 * Return lwp status. 1143 */ 1144 void 1145 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp) 1146 { 1147 proc_t *p = ttoproc(t); 1148 klwp_t *lwp = ttolwp(t); 1149 struct mstate *ms = &lwp->lwp_mstate; 1150 hrtime_t usr, sys; 1151 int flags; 1152 ulong_t instr; 1153 1154 ASSERT(MUTEX_HELD(&p->p_lock)); 1155 1156 bzero(sp, sizeof (*sp)); 1157 flags = 0L; 1158 if (t->t_state == TS_STOPPED) { 1159 flags |= PR_STOPPED; 1160 if ((t->t_schedflag & TS_PSTART) == 0) 1161 flags |= PR_ISTOP; 1162 } else if (VSTOPPED(t)) { 1163 flags |= PR_STOPPED|PR_ISTOP; 1164 } 1165 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) 1166 flags |= PR_DSTOP; 1167 if (lwp->lwp_asleep) 1168 flags |= PR_ASLEEP; 1169 if (t == p->p_agenttp) 1170 flags |= PR_AGENT; 1171 if (!(t->t_proc_flag & TP_TWAIT)) 1172 flags |= PR_DETACH; 1173 if (t->t_proc_flag & TP_DAEMON) 1174 flags |= PR_DAEMON; 1175 if (p->p_proc_flag & P_PR_FORK) 1176 flags |= PR_FORK; 1177 if (p->p_proc_flag & P_PR_RUNLCL) 1178 flags |= PR_RLC; 1179 if (p->p_proc_flag & P_PR_KILLCL) 1180 flags |= PR_KLC; 1181 if (p->p_proc_flag & P_PR_ASYNC) 1182 flags |= PR_ASYNC; 1183 if (p->p_proc_flag & P_PR_BPTADJ) 1184 flags |= PR_BPTADJ; 1185 if (p->p_proc_flag & P_PR_PTRACE) 1186 flags |= PR_PTRACE; 1187 if (p->p_flag & SMSACCT) 1188 flags |= PR_MSACCT; 1189 if (p->p_flag & SMSFORK) 1190 flags |= PR_MSFORK; 1191 if (p->p_flag & SVFWAIT) 1192 flags |= PR_VFORKP; 1193 if (p->p_pgidp->pid_pgorphaned) 1194 flags |= PR_ORPHAN; 1195 sp->pr_flags = flags; 1196 if (VSTOPPED(t)) { 1197 sp->pr_why = PR_REQUESTED; 1198 sp->pr_what = 0; 1199 } else { 1200 sp->pr_why = t->t_whystop; 1201 sp->pr_what = t->t_whatstop; 1202 } 1203 sp->pr_lwpid = t->t_tid; 1204 sp->pr_cursig = lwp->lwp_cursig; 1205 prassignset(&sp->pr_lwppend, &t->t_sig); 1206 schedctl_finish_sigblock(t); 1207 prassignset(&sp->pr_lwphold, &t->t_hold); 1208 if (t->t_whystop == PR_FAULTED) 1209 bcopy(&lwp->lwp_siginfo, 1210 &sp->pr_info, sizeof (k_siginfo_t)); 1211 else if (lwp->lwp_curinfo) 1212 bcopy(&lwp->lwp_curinfo->sq_info, 1213 &sp->pr_info, sizeof (k_siginfo_t)); 1214 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && 1215 sp->pr_info.si_zoneid != zp->zone_id) { 1216 sp->pr_info.si_pid = zp->zone_zsched->p_pid; 1217 sp->pr_info.si_uid = 0; 1218 sp->pr_info.si_ctid = -1; 1219 sp->pr_info.si_zoneid = zp->zone_id; 1220 } 1221 sp->pr_altstack = lwp->lwp_sigaltstack; 1222 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); 1223 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext; 1224 sp->pr_ustack = lwp->lwp_ustack; 1225 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, 1226 sizeof (sp->pr_clname) - 1); 1227 if (flags & PR_STOPPED) 1228 hrt2ts(t->t_stoptime, &sp->pr_tstamp); 1229 usr = ms->ms_acct[LMS_USER]; 1230 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; 1231 scalehrtime(&usr); 1232 scalehrtime(&sys); 1233 hrt2ts(usr, &sp->pr_utime); 1234 hrt2ts(sys, &sp->pr_stime); 1235 1236 /* 1237 * Fetch the current instruction, if not a system process. 1238 * We don't attempt this unless the lwp is stopped. 1239 */ 1240 if ((p->p_flag & SSYS) || p->p_as == &kas) 1241 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); 1242 else if (!(flags & PR_STOPPED)) 1243 sp->pr_flags |= PR_PCINVAL; 1244 else if (!prfetchinstr(lwp, &instr)) 1245 sp->pr_flags |= PR_PCINVAL; 1246 else 1247 sp->pr_instr = instr; 1248 1249 /* 1250 * Drop p_lock while touching the lwp's stack. 1251 */ 1252 mutex_exit(&p->p_lock); 1253 if (prisstep(lwp)) 1254 sp->pr_flags |= PR_STEP; 1255 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { 1256 int i; 1257 1258 sp->pr_syscall = get_syscall_args(lwp, 1259 (long *)sp->pr_sysarg, &i); 1260 sp->pr_nsysarg = (ushort_t)i; 1261 } 1262 if ((flags & PR_STOPPED) || t == curthread) 1263 prgetprregs(lwp, sp->pr_reg); 1264 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || 1265 (flags & PR_VFORKP)) { 1266 user_t *up; 1267 auxv_t *auxp; 1268 int i; 1269 1270 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2); 1271 if (sp->pr_errno == 0) 1272 sp->pr_errpriv = PRIV_NONE; 1273 else 1274 sp->pr_errpriv = lwp->lwp_badpriv; 1275 1276 if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) { 1277 up = PTOU(p); 1278 sp->pr_sysarg[0] = 0; 1279 sp->pr_sysarg[1] = (uintptr_t)up->u_argv; 1280 sp->pr_sysarg[2] = (uintptr_t)up->u_envp; 1281 for (i = 0, auxp = up->u_auxv; 1282 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); 1283 i++, auxp++) { 1284 if (auxp->a_type == AT_SUN_EXECNAME) { 1285 sp->pr_sysarg[0] = 1286 (uintptr_t)auxp->a_un.a_ptr; 1287 break; 1288 } 1289 } 1290 } 1291 } 1292 if (prhasfp()) 1293 prgetprfpregs(lwp, &sp->pr_fpreg); 1294 mutex_enter(&p->p_lock); 1295 } 1296 1297 /* 1298 * Get the sigaction structure for the specified signal. The u-block 1299 * must already have been mapped in by the caller. 1300 */ 1301 void 1302 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp) 1303 { 1304 bzero(sp, sizeof (*sp)); 1305 1306 if (sig != 0 && (unsigned)sig < NSIG) { 1307 sp->sa_handler = up->u_signal[sig-1]; 1308 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1309 if (sigismember(&up->u_sigonstack, sig)) 1310 sp->sa_flags |= SA_ONSTACK; 1311 if (sigismember(&up->u_sigresethand, sig)) 1312 sp->sa_flags |= SA_RESETHAND; 1313 if (sigismember(&up->u_sigrestart, sig)) 1314 sp->sa_flags |= SA_RESTART; 1315 if (sigismember(&p->p_siginfo, sig)) 1316 sp->sa_flags |= SA_SIGINFO; 1317 if (sigismember(&up->u_signodefer, sig)) 1318 sp->sa_flags |= SA_NODEFER; 1319 if (sig == SIGCLD) { 1320 if (p->p_flag & SNOWAIT) 1321 sp->sa_flags |= SA_NOCLDWAIT; 1322 if ((p->p_flag & SJCTL) == 0) 1323 sp->sa_flags |= SA_NOCLDSTOP; 1324 } 1325 } 1326 } 1327 1328 #ifdef _SYSCALL32_IMPL 1329 void 1330 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp) 1331 { 1332 bzero(sp, sizeof (*sp)); 1333 1334 if (sig != 0 && (unsigned)sig < NSIG) { 1335 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1]; 1336 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); 1337 if (sigismember(&up->u_sigonstack, sig)) 1338 sp->sa_flags |= SA_ONSTACK; 1339 if (sigismember(&up->u_sigresethand, sig)) 1340 sp->sa_flags |= SA_RESETHAND; 1341 if (sigismember(&up->u_sigrestart, sig)) 1342 sp->sa_flags |= SA_RESTART; 1343 if (sigismember(&p->p_siginfo, sig)) 1344 sp->sa_flags |= SA_SIGINFO; 1345 if (sigismember(&up->u_signodefer, sig)) 1346 sp->sa_flags |= SA_NODEFER; 1347 if (sig == SIGCLD) { 1348 if (p->p_flag & SNOWAIT) 1349 sp->sa_flags |= SA_NOCLDWAIT; 1350 if ((p->p_flag & SJCTL) == 0) 1351 sp->sa_flags |= SA_NOCLDSTOP; 1352 } 1353 } 1354 } 1355 #endif /* _SYSCALL32_IMPL */ 1356 1357 /* 1358 * Count the number of segments in this process's address space. 1359 */ 1360 int 1361 prnsegs(struct as *as, int reserved) 1362 { 1363 int n = 0; 1364 struct seg *seg; 1365 1366 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1367 1368 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1369 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1370 caddr_t saddr, naddr; 1371 void *tmp = NULL; 1372 1373 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1374 (void) pr_getprot(seg, reserved, &tmp, 1375 &saddr, &naddr, eaddr); 1376 if (saddr != naddr) 1377 n++; 1378 } 1379 1380 ASSERT(tmp == NULL); 1381 } 1382 1383 return (n); 1384 } 1385 1386 /* 1387 * Convert uint32_t to decimal string w/o leading zeros. 1388 * Add trailing null characters if 'len' is greater than string length. 1389 * Return the string length. 1390 */ 1391 int 1392 pr_u32tos(uint32_t n, char *s, int len) 1393 { 1394 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */ 1395 char *cp = cbuf; 1396 char *end = s + len; 1397 1398 do { 1399 *cp++ = (char)(n % 10 + '0'); 1400 n /= 10; 1401 } while (n); 1402 1403 len = (int)(cp - cbuf); 1404 1405 do { 1406 *s++ = *--cp; 1407 } while (cp > cbuf); 1408 1409 while (s < end) /* optional pad */ 1410 *s++ = '\0'; 1411 1412 return (len); 1413 } 1414 1415 /* 1416 * Convert uint64_t to decimal string w/o leading zeros. 1417 * Return the string length. 1418 */ 1419 static int 1420 pr_u64tos(uint64_t n, char *s) 1421 { 1422 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */ 1423 char *cp = cbuf; 1424 int len; 1425 1426 do { 1427 *cp++ = (char)(n % 10 + '0'); 1428 n /= 10; 1429 } while (n); 1430 1431 len = (int)(cp - cbuf); 1432 1433 do { 1434 *s++ = *--cp; 1435 } while (cp > cbuf); 1436 1437 return (len); 1438 } 1439 1440 void 1441 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr) 1442 { 1443 char *s = name; 1444 struct vfs *vfsp; 1445 struct vfssw *vfsswp; 1446 1447 if ((vfsp = vp->v_vfsp) != NULL && 1448 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) && 1449 *vfsswp->vsw_name) { 1450 (void) strcpy(s, vfsswp->vsw_name); 1451 s += strlen(s); 1452 *s++ = '.'; 1453 } 1454 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0); 1455 *s++ = '.'; 1456 s += pr_u32tos(getminor(vattr->va_fsid), s, 0); 1457 *s++ = '.'; 1458 s += pr_u64tos(vattr->va_nodeid, s); 1459 *s++ = '\0'; 1460 } 1461 1462 struct seg * 1463 break_seg(proc_t *p) 1464 { 1465 caddr_t addr = p->p_brkbase; 1466 struct seg *seg; 1467 struct vnode *vp; 1468 1469 if (p->p_brksize != 0) 1470 addr += p->p_brksize - 1; 1471 seg = as_segat(p->p_as, addr); 1472 if (seg != NULL && seg->s_ops == &segvn_ops && 1473 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)) 1474 return (seg); 1475 return (NULL); 1476 } 1477 1478 /* 1479 * Implementation of service functions to handle procfs generic chained 1480 * copyout buffers. 1481 */ 1482 typedef struct pr_iobuf_list { 1483 list_node_t piol_link; /* buffer linkage */ 1484 size_t piol_size; /* total size (header + data) */ 1485 size_t piol_usedsize; /* amount to copy out from this buf */ 1486 } piol_t; 1487 1488 #define MAPSIZE (64 * 1024) 1489 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1])) 1490 1491 void 1492 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n) 1493 { 1494 piol_t *iol; 1495 size_t initial_size = MIN(1, n) * itemsize; 1496 1497 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link)); 1498 1499 ASSERT(list_head(iolhead) == NULL); 1500 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1501 ASSERT(initial_size > 0); 1502 1503 /* 1504 * Someone creating chained copyout buffers may ask for less than 1505 * MAPSIZE if the amount of data to be buffered is known to be 1506 * smaller than that. 1507 * But in order to prevent involuntary self-denial of service, 1508 * the requested input size is clamped at MAPSIZE. 1509 */ 1510 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol)); 1511 iol = kmem_alloc(initial_size, KM_SLEEP); 1512 list_insert_head(iolhead, iol); 1513 iol->piol_usedsize = 0; 1514 iol->piol_size = initial_size; 1515 } 1516 1517 void * 1518 pr_iol_newbuf(list_t *iolhead, size_t itemsize) 1519 { 1520 piol_t *iol; 1521 char *new; 1522 1523 ASSERT(itemsize < MAPSIZE - sizeof (*iol)); 1524 ASSERT(list_head(iolhead) != NULL); 1525 1526 iol = (piol_t *)list_tail(iolhead); 1527 1528 if (iol->piol_size < 1529 iol->piol_usedsize + sizeof (*iol) + itemsize) { 1530 /* 1531 * Out of space in the current buffer. Allocate more. 1532 */ 1533 piol_t *newiol; 1534 1535 newiol = kmem_alloc(MAPSIZE, KM_SLEEP); 1536 newiol->piol_size = MAPSIZE; 1537 newiol->piol_usedsize = 0; 1538 1539 list_insert_after(iolhead, iol, newiol); 1540 iol = list_next(iolhead, iol); 1541 ASSERT(iol == newiol); 1542 } 1543 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize; 1544 iol->piol_usedsize += itemsize; 1545 bzero(new, itemsize); 1546 return (new); 1547 } 1548 1549 int 1550 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin) 1551 { 1552 int error = errin; 1553 piol_t *iol; 1554 1555 while ((iol = list_head(iolhead)) != NULL) { 1556 list_remove(iolhead, iol); 1557 if (!error) { 1558 if (copyout(PIOL_DATABUF(iol), *tgt, 1559 iol->piol_usedsize)) 1560 error = EFAULT; 1561 *tgt += iol->piol_usedsize; 1562 } 1563 kmem_free(iol, iol->piol_size); 1564 } 1565 list_destroy(iolhead); 1566 1567 return (error); 1568 } 1569 1570 int 1571 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin) 1572 { 1573 offset_t off = uiop->uio_offset; 1574 char *base; 1575 size_t size; 1576 piol_t *iol; 1577 int error = errin; 1578 1579 while ((iol = list_head(iolhead)) != NULL) { 1580 list_remove(iolhead, iol); 1581 base = PIOL_DATABUF(iol); 1582 size = iol->piol_usedsize; 1583 if (off <= size && error == 0 && uiop->uio_resid > 0) 1584 error = uiomove(base + off, size - off, 1585 UIO_READ, uiop); 1586 off = MAX(0, off - (offset_t)size); 1587 kmem_free(iol, iol->piol_size); 1588 } 1589 list_destroy(iolhead); 1590 1591 return (error); 1592 } 1593 1594 /* 1595 * Return an array of structures with memory map information. 1596 * We allocate here; the caller must deallocate. 1597 */ 1598 int 1599 prgetmap(proc_t *p, int reserved, list_t *iolhead) 1600 { 1601 struct as *as = p->p_as; 1602 prmap_t *mp; 1603 struct seg *seg; 1604 struct seg *brkseg, *stkseg; 1605 struct vnode *vp; 1606 struct vattr vattr; 1607 uint_t prot; 1608 1609 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1610 1611 /* 1612 * Request an initial buffer size that doesn't waste memory 1613 * if the address space has only a small number of segments. 1614 */ 1615 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1616 1617 if ((seg = AS_SEGFIRST(as)) == NULL) 1618 return (0); 1619 1620 brkseg = break_seg(p); 1621 stkseg = as_segat(as, prgetstackbase(p)); 1622 1623 do { 1624 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1625 caddr_t saddr, naddr; 1626 void *tmp = NULL; 1627 1628 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1629 prot = pr_getprot(seg, reserved, &tmp, 1630 &saddr, &naddr, eaddr); 1631 if (saddr == naddr) 1632 continue; 1633 1634 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1635 1636 mp->pr_vaddr = (uintptr_t)saddr; 1637 mp->pr_size = naddr - saddr; 1638 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1639 mp->pr_mflags = 0; 1640 if (prot & PROT_READ) 1641 mp->pr_mflags |= MA_READ; 1642 if (prot & PROT_WRITE) 1643 mp->pr_mflags |= MA_WRITE; 1644 if (prot & PROT_EXEC) 1645 mp->pr_mflags |= MA_EXEC; 1646 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1647 mp->pr_mflags |= MA_SHARED; 1648 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1649 mp->pr_mflags |= MA_NORESERVE; 1650 if (seg->s_ops == &segspt_shmops || 1651 (seg->s_ops == &segvn_ops && 1652 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1653 mp->pr_mflags |= MA_ANON; 1654 if (seg == brkseg) 1655 mp->pr_mflags |= MA_BREAK; 1656 else if (seg == stkseg) { 1657 mp->pr_mflags |= MA_STACK; 1658 if (reserved) { 1659 size_t maxstack = 1660 ((size_t)p->p_stk_ctl + 1661 PAGEOFFSET) & PAGEMASK; 1662 mp->pr_vaddr = 1663 (uintptr_t)prgetstackbase(p) + 1664 p->p_stksize - maxstack; 1665 mp->pr_size = (uintptr_t)naddr - 1666 mp->pr_vaddr; 1667 } 1668 } 1669 if (seg->s_ops == &segspt_shmops) 1670 mp->pr_mflags |= MA_ISM | MA_SHM; 1671 mp->pr_pagesize = PAGESIZE; 1672 1673 /* 1674 * Manufacture a filename for the "object" directory. 1675 */ 1676 vattr.va_mask = AT_FSID|AT_NODEID; 1677 if (seg->s_ops == &segvn_ops && 1678 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1679 vp != NULL && vp->v_type == VREG && 1680 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 1681 if (vp == p->p_exec) 1682 (void) strcpy(mp->pr_mapname, "a.out"); 1683 else 1684 pr_object_name(mp->pr_mapname, 1685 vp, &vattr); 1686 } 1687 1688 /* 1689 * Get the SysV shared memory id, if any. 1690 */ 1691 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1692 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1693 SHMID_NONE) { 1694 if (mp->pr_shmid == SHMID_FREE) 1695 mp->pr_shmid = -1; 1696 1697 mp->pr_mflags |= MA_SHM; 1698 } else { 1699 mp->pr_shmid = -1; 1700 } 1701 } 1702 ASSERT(tmp == NULL); 1703 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1704 1705 return (0); 1706 } 1707 1708 #ifdef _SYSCALL32_IMPL 1709 int 1710 prgetmap32(proc_t *p, int reserved, list_t *iolhead) 1711 { 1712 struct as *as = p->p_as; 1713 prmap32_t *mp; 1714 struct seg *seg; 1715 struct seg *brkseg, *stkseg; 1716 struct vnode *vp; 1717 struct vattr vattr; 1718 uint_t prot; 1719 1720 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1721 1722 /* 1723 * Request an initial buffer size that doesn't waste memory 1724 * if the address space has only a small number of segments. 1725 */ 1726 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 1727 1728 if ((seg = AS_SEGFIRST(as)) == NULL) 1729 return (0); 1730 1731 brkseg = break_seg(p); 1732 stkseg = as_segat(as, prgetstackbase(p)); 1733 1734 do { 1735 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); 1736 caddr_t saddr, naddr; 1737 void *tmp = NULL; 1738 1739 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1740 prot = pr_getprot(seg, reserved, &tmp, 1741 &saddr, &naddr, eaddr); 1742 if (saddr == naddr) 1743 continue; 1744 1745 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 1746 1747 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 1748 mp->pr_size = (size32_t)(naddr - saddr); 1749 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1750 mp->pr_mflags = 0; 1751 if (prot & PROT_READ) 1752 mp->pr_mflags |= MA_READ; 1753 if (prot & PROT_WRITE) 1754 mp->pr_mflags |= MA_WRITE; 1755 if (prot & PROT_EXEC) 1756 mp->pr_mflags |= MA_EXEC; 1757 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1758 mp->pr_mflags |= MA_SHARED; 1759 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1760 mp->pr_mflags |= MA_NORESERVE; 1761 if (seg->s_ops == &segspt_shmops || 1762 (seg->s_ops == &segvn_ops && 1763 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1764 mp->pr_mflags |= MA_ANON; 1765 if (seg == brkseg) 1766 mp->pr_mflags |= MA_BREAK; 1767 else if (seg == stkseg) { 1768 mp->pr_mflags |= MA_STACK; 1769 if (reserved) { 1770 size_t maxstack = 1771 ((size_t)p->p_stk_ctl + 1772 PAGEOFFSET) & PAGEMASK; 1773 uintptr_t vaddr = 1774 (uintptr_t)prgetstackbase(p) + 1775 p->p_stksize - maxstack; 1776 mp->pr_vaddr = (caddr32_t)vaddr; 1777 mp->pr_size = (size32_t) 1778 ((uintptr_t)naddr - vaddr); 1779 } 1780 } 1781 if (seg->s_ops == &segspt_shmops) 1782 mp->pr_mflags |= MA_ISM | MA_SHM; 1783 mp->pr_pagesize = PAGESIZE; 1784 1785 /* 1786 * Manufacture a filename for the "object" directory. 1787 */ 1788 vattr.va_mask = AT_FSID|AT_NODEID; 1789 if (seg->s_ops == &segvn_ops && 1790 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1791 vp != NULL && vp->v_type == VREG && 1792 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 1793 if (vp == p->p_exec) 1794 (void) strcpy(mp->pr_mapname, "a.out"); 1795 else 1796 pr_object_name(mp->pr_mapname, 1797 vp, &vattr); 1798 } 1799 1800 /* 1801 * Get the SysV shared memory id, if any. 1802 */ 1803 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && 1804 (mp->pr_shmid = shmgetid(p, seg->s_base)) != 1805 SHMID_NONE) { 1806 if (mp->pr_shmid == SHMID_FREE) 1807 mp->pr_shmid = -1; 1808 1809 mp->pr_mflags |= MA_SHM; 1810 } else { 1811 mp->pr_shmid = -1; 1812 } 1813 } 1814 ASSERT(tmp == NULL); 1815 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1816 1817 return (0); 1818 } 1819 #endif /* _SYSCALL32_IMPL */ 1820 1821 /* 1822 * Return the size of the /proc page data file. 1823 */ 1824 size_t 1825 prpdsize(struct as *as) 1826 { 1827 struct seg *seg; 1828 size_t size; 1829 1830 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1831 1832 if ((seg = AS_SEGFIRST(as)) == NULL) 1833 return (0); 1834 1835 size = sizeof (prpageheader_t); 1836 do { 1837 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1838 caddr_t saddr, naddr; 1839 void *tmp = NULL; 1840 size_t npage; 1841 1842 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1843 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1844 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1845 size += sizeof (prasmap_t) + round8(npage); 1846 } 1847 ASSERT(tmp == NULL); 1848 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1849 1850 return (size); 1851 } 1852 1853 #ifdef _SYSCALL32_IMPL 1854 size_t 1855 prpdsize32(struct as *as) 1856 { 1857 struct seg *seg; 1858 size_t size; 1859 1860 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 1861 1862 if ((seg = AS_SEGFIRST(as)) == NULL) 1863 return (0); 1864 1865 size = sizeof (prpageheader32_t); 1866 do { 1867 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1868 caddr_t saddr, naddr; 1869 void *tmp = NULL; 1870 size_t npage; 1871 1872 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1873 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1874 if ((npage = (naddr - saddr) / PAGESIZE) != 0) 1875 size += sizeof (prasmap32_t) + round8(npage); 1876 } 1877 ASSERT(tmp == NULL); 1878 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 1879 1880 return (size); 1881 } 1882 #endif /* _SYSCALL32_IMPL */ 1883 1884 /* 1885 * Read page data information. 1886 */ 1887 int 1888 prpdread(proc_t *p, uint_t hatid, struct uio *uiop) 1889 { 1890 struct as *as = p->p_as; 1891 caddr_t buf; 1892 size_t size; 1893 prpageheader_t *php; 1894 prasmap_t *pmp; 1895 struct seg *seg; 1896 int error; 1897 1898 again: 1899 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1900 1901 if ((seg = AS_SEGFIRST(as)) == NULL) { 1902 AS_LOCK_EXIT(as, &as->a_lock); 1903 return (0); 1904 } 1905 size = prpdsize(as); 1906 if (uiop->uio_resid < size) { 1907 AS_LOCK_EXIT(as, &as->a_lock); 1908 return (E2BIG); 1909 } 1910 1911 buf = kmem_zalloc(size, KM_SLEEP); 1912 php = (prpageheader_t *)buf; 1913 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t)); 1914 1915 hrt2ts(gethrtime(), &php->pr_tstamp); 1916 php->pr_nmap = 0; 1917 php->pr_npage = 0; 1918 do { 1919 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 1920 caddr_t saddr, naddr; 1921 void *tmp = NULL; 1922 1923 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 1924 struct vnode *vp; 1925 struct vattr vattr; 1926 size_t len; 1927 size_t npage; 1928 uint_t prot; 1929 uintptr_t next; 1930 1931 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 1932 if ((len = (size_t)(naddr - saddr)) == 0) 1933 continue; 1934 npage = len / PAGESIZE; 1935 next = (uintptr_t)(pmp + 1) + round8(npage); 1936 /* 1937 * It's possible that the address space can change 1938 * subtlely even though we're holding as->a_lock 1939 * due to the nondeterminism of page_exists() in 1940 * the presence of asychronously flushed pages or 1941 * mapped files whose sizes are changing. 1942 * page_exists() may be called indirectly from 1943 * pr_getprot() by a SEGOP_INCORE() routine. 1944 * If this happens we need to make sure we don't 1945 * overrun the buffer whose size we computed based 1946 * on the initial iteration through the segments. 1947 * Once we've detected an overflow, we need to clean 1948 * up the temporary memory allocated in pr_getprot() 1949 * and retry. If there's a pending signal, we return 1950 * EINTR so that this thread can be dislodged if 1951 * a latent bug causes us to spin indefinitely. 1952 */ 1953 if (next > (uintptr_t)buf + size) { 1954 pr_getprot_done(&tmp); 1955 AS_LOCK_EXIT(as, &as->a_lock); 1956 1957 kmem_free(buf, size); 1958 1959 if (ISSIG(curthread, JUSTLOOKING)) 1960 return (EINTR); 1961 1962 goto again; 1963 } 1964 1965 php->pr_nmap++; 1966 php->pr_npage += npage; 1967 pmp->pr_vaddr = (uintptr_t)saddr; 1968 pmp->pr_npage = npage; 1969 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 1970 pmp->pr_mflags = 0; 1971 if (prot & PROT_READ) 1972 pmp->pr_mflags |= MA_READ; 1973 if (prot & PROT_WRITE) 1974 pmp->pr_mflags |= MA_WRITE; 1975 if (prot & PROT_EXEC) 1976 pmp->pr_mflags |= MA_EXEC; 1977 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 1978 pmp->pr_mflags |= MA_SHARED; 1979 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 1980 pmp->pr_mflags |= MA_NORESERVE; 1981 if (seg->s_ops == &segspt_shmops || 1982 (seg->s_ops == &segvn_ops && 1983 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 1984 pmp->pr_mflags |= MA_ANON; 1985 if (seg->s_ops == &segspt_shmops) 1986 pmp->pr_mflags |= MA_ISM | MA_SHM; 1987 pmp->pr_pagesize = PAGESIZE; 1988 /* 1989 * Manufacture a filename for the "object" directory. 1990 */ 1991 vattr.va_mask = AT_FSID|AT_NODEID; 1992 if (seg->s_ops == &segvn_ops && 1993 SEGOP_GETVP(seg, saddr, &vp) == 0 && 1994 vp != NULL && vp->v_type == VREG && 1995 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 1996 if (vp == p->p_exec) 1997 (void) strcpy(pmp->pr_mapname, "a.out"); 1998 else 1999 pr_object_name(pmp->pr_mapname, 2000 vp, &vattr); 2001 } 2002 2003 /* 2004 * Get the SysV shared memory id, if any. 2005 */ 2006 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2007 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2008 SHMID_NONE) { 2009 if (pmp->pr_shmid == SHMID_FREE) 2010 pmp->pr_shmid = -1; 2011 2012 pmp->pr_mflags |= MA_SHM; 2013 } else { 2014 pmp->pr_shmid = -1; 2015 } 2016 2017 hat_getstat(as, saddr, len, hatid, 2018 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2019 pmp = (prasmap_t *)next; 2020 } 2021 ASSERT(tmp == NULL); 2022 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2023 2024 AS_LOCK_EXIT(as, &as->a_lock); 2025 2026 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2027 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2028 kmem_free(buf, size); 2029 2030 return (error); 2031 } 2032 2033 #ifdef _SYSCALL32_IMPL 2034 int 2035 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop) 2036 { 2037 struct as *as = p->p_as; 2038 caddr_t buf; 2039 size_t size; 2040 prpageheader32_t *php; 2041 prasmap32_t *pmp; 2042 struct seg *seg; 2043 int error; 2044 2045 again: 2046 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2047 2048 if ((seg = AS_SEGFIRST(as)) == NULL) { 2049 AS_LOCK_EXIT(as, &as->a_lock); 2050 return (0); 2051 } 2052 size = prpdsize32(as); 2053 if (uiop->uio_resid < size) { 2054 AS_LOCK_EXIT(as, &as->a_lock); 2055 return (E2BIG); 2056 } 2057 2058 buf = kmem_zalloc(size, KM_SLEEP); 2059 php = (prpageheader32_t *)buf; 2060 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t)); 2061 2062 hrt2ts32(gethrtime(), &php->pr_tstamp); 2063 php->pr_nmap = 0; 2064 php->pr_npage = 0; 2065 do { 2066 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2067 caddr_t saddr, naddr; 2068 void *tmp = NULL; 2069 2070 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2071 struct vnode *vp; 2072 struct vattr vattr; 2073 size_t len; 2074 size_t npage; 2075 uint_t prot; 2076 uintptr_t next; 2077 2078 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2079 if ((len = (size_t)(naddr - saddr)) == 0) 2080 continue; 2081 npage = len / PAGESIZE; 2082 next = (uintptr_t)(pmp + 1) + round8(npage); 2083 /* 2084 * It's possible that the address space can change 2085 * subtlely even though we're holding as->a_lock 2086 * due to the nondeterminism of page_exists() in 2087 * the presence of asychronously flushed pages or 2088 * mapped files whose sizes are changing. 2089 * page_exists() may be called indirectly from 2090 * pr_getprot() by a SEGOP_INCORE() routine. 2091 * If this happens we need to make sure we don't 2092 * overrun the buffer whose size we computed based 2093 * on the initial iteration through the segments. 2094 * Once we've detected an overflow, we need to clean 2095 * up the temporary memory allocated in pr_getprot() 2096 * and retry. If there's a pending signal, we return 2097 * EINTR so that this thread can be dislodged if 2098 * a latent bug causes us to spin indefinitely. 2099 */ 2100 if (next > (uintptr_t)buf + size) { 2101 pr_getprot_done(&tmp); 2102 AS_LOCK_EXIT(as, &as->a_lock); 2103 2104 kmem_free(buf, size); 2105 2106 if (ISSIG(curthread, JUSTLOOKING)) 2107 return (EINTR); 2108 2109 goto again; 2110 } 2111 2112 php->pr_nmap++; 2113 php->pr_npage += npage; 2114 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 2115 pmp->pr_npage = (size32_t)npage; 2116 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 2117 pmp->pr_mflags = 0; 2118 if (prot & PROT_READ) 2119 pmp->pr_mflags |= MA_READ; 2120 if (prot & PROT_WRITE) 2121 pmp->pr_mflags |= MA_WRITE; 2122 if (prot & PROT_EXEC) 2123 pmp->pr_mflags |= MA_EXEC; 2124 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 2125 pmp->pr_mflags |= MA_SHARED; 2126 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 2127 pmp->pr_mflags |= MA_NORESERVE; 2128 if (seg->s_ops == &segspt_shmops || 2129 (seg->s_ops == &segvn_ops && 2130 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) 2131 pmp->pr_mflags |= MA_ANON; 2132 if (seg->s_ops == &segspt_shmops) 2133 pmp->pr_mflags |= MA_ISM | MA_SHM; 2134 pmp->pr_pagesize = PAGESIZE; 2135 /* 2136 * Manufacture a filename for the "object" directory. 2137 */ 2138 vattr.va_mask = AT_FSID|AT_NODEID; 2139 if (seg->s_ops == &segvn_ops && 2140 SEGOP_GETVP(seg, saddr, &vp) == 0 && 2141 vp != NULL && vp->v_type == VREG && 2142 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 2143 if (vp == p->p_exec) 2144 (void) strcpy(pmp->pr_mapname, "a.out"); 2145 else 2146 pr_object_name(pmp->pr_mapname, 2147 vp, &vattr); 2148 } 2149 2150 /* 2151 * Get the SysV shared memory id, if any. 2152 */ 2153 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && 2154 (pmp->pr_shmid = shmgetid(p, seg->s_base)) != 2155 SHMID_NONE) { 2156 if (pmp->pr_shmid == SHMID_FREE) 2157 pmp->pr_shmid = -1; 2158 2159 pmp->pr_mflags |= MA_SHM; 2160 } else { 2161 pmp->pr_shmid = -1; 2162 } 2163 2164 hat_getstat(as, saddr, len, hatid, 2165 (char *)(pmp + 1), HAT_SYNC_ZERORM); 2166 pmp = (prasmap32_t *)next; 2167 } 2168 ASSERT(tmp == NULL); 2169 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2170 2171 AS_LOCK_EXIT(as, &as->a_lock); 2172 2173 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); 2174 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); 2175 kmem_free(buf, size); 2176 2177 return (error); 2178 } 2179 #endif /* _SYSCALL32_IMPL */ 2180 2181 ushort_t 2182 prgetpctcpu(uint64_t pct) 2183 { 2184 /* 2185 * The value returned will be relevant in the zone of the examiner, 2186 * which may not be the same as the zone which performed the procfs 2187 * mount. 2188 */ 2189 int nonline = zone_ncpus_online_get(curproc->p_zone); 2190 2191 /* 2192 * Prorate over online cpus so we don't exceed 100% 2193 */ 2194 if (nonline > 1) 2195 pct /= nonline; 2196 pct >>= 16; /* convert to 16-bit scaled integer */ 2197 if (pct > 0x8000) /* might happen, due to rounding */ 2198 pct = 0x8000; 2199 return ((ushort_t)pct); 2200 } 2201 2202 /* 2203 * Return information used by ps(1). 2204 */ 2205 void 2206 prgetpsinfo(proc_t *p, psinfo_t *psp) 2207 { 2208 kthread_t *t; 2209 struct cred *cred; 2210 hrtime_t hrutime, hrstime; 2211 2212 ASSERT(MUTEX_HELD(&p->p_lock)); 2213 2214 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2215 bzero(psp, sizeof (*psp)); 2216 else { 2217 thread_unlock(t); 2218 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2219 } 2220 2221 /* 2222 * only export SSYS and SMSACCT; everything else is off-limits to 2223 * userland apps. 2224 */ 2225 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2226 psp->pr_nlwp = p->p_lwpcnt; 2227 psp->pr_nzomb = p->p_zombcnt; 2228 mutex_enter(&p->p_crlock); 2229 cred = p->p_cred; 2230 psp->pr_uid = crgetruid(cred); 2231 psp->pr_euid = crgetuid(cred); 2232 psp->pr_gid = crgetrgid(cred); 2233 psp->pr_egid = crgetgid(cred); 2234 mutex_exit(&p->p_crlock); 2235 psp->pr_pid = p->p_pid; 2236 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2237 (p->p_flag & SZONETOP)) { 2238 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2239 /* 2240 * Inside local zones, fake zsched's pid as parent pids for 2241 * processes which reference processes outside of the zone. 2242 */ 2243 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2244 } else { 2245 psp->pr_ppid = p->p_ppid; 2246 } 2247 psp->pr_pgid = p->p_pgrp; 2248 psp->pr_sid = p->p_sessp->s_sid; 2249 psp->pr_taskid = p->p_task->tk_tkid; 2250 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2251 psp->pr_poolid = p->p_pool->pool_id; 2252 psp->pr_zoneid = p->p_zone->zone_id; 2253 if ((psp->pr_contract = PRCTID(p)) == 0) 2254 psp->pr_contract = -1; 2255 psp->pr_addr = (uintptr_t)prgetpsaddr(p); 2256 switch (p->p_model) { 2257 case DATAMODEL_ILP32: 2258 psp->pr_dmodel = PR_MODEL_ILP32; 2259 break; 2260 case DATAMODEL_LP64: 2261 psp->pr_dmodel = PR_MODEL_LP64; 2262 break; 2263 } 2264 hrutime = mstate_aggr_state(p, LMS_USER); 2265 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2266 hrt2ts((hrutime + hrstime), &psp->pr_time); 2267 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2268 2269 if (t == NULL) { 2270 int wcode = p->p_wcode; /* must be atomic read */ 2271 2272 if (wcode) 2273 psp->pr_wstat = wstat(wcode, p->p_wdata); 2274 psp->pr_ttydev = PRNODEV; 2275 psp->pr_lwp.pr_state = SZOMB; 2276 psp->pr_lwp.pr_sname = 'Z'; 2277 psp->pr_lwp.pr_bindpro = PBIND_NONE; 2278 psp->pr_lwp.pr_bindpset = PS_NONE; 2279 } else { 2280 user_t *up = PTOU(p); 2281 struct as *as; 2282 dev_t d; 2283 extern dev_t rwsconsdev, rconsdev, uconsdev; 2284 2285 d = cttydev(p); 2286 /* 2287 * If the controlling terminal is the real 2288 * or workstation console device, map to what the 2289 * user thinks is the console device. 2290 */ 2291 if (d == rwsconsdev || d == rconsdev) 2292 d = uconsdev; 2293 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d; 2294 psp->pr_start = up->u_start; 2295 bcopy(up->u_comm, psp->pr_fname, 2296 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2297 bcopy(up->u_psargs, psp->pr_psargs, 2298 MIN(PRARGSZ-1, PSARGSZ)); 2299 psp->pr_argc = up->u_argc; 2300 psp->pr_argv = up->u_argv; 2301 psp->pr_envp = up->u_envp; 2302 2303 /* get the chosen lwp's lwpsinfo */ 2304 prgetlwpsinfo(t, &psp->pr_lwp); 2305 2306 /* compute %cpu for the process */ 2307 if (p->p_lwpcnt == 1) 2308 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2309 else { 2310 uint64_t pct = 0; 2311 hrtime_t cur_time = gethrtime_unscaled(); 2312 2313 t = p->p_tlist; 2314 do { 2315 pct += cpu_update_pct(t, cur_time); 2316 } while ((t = t->t_forw) != p->p_tlist); 2317 2318 psp->pr_pctcpu = prgetpctcpu(pct); 2319 } 2320 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2321 psp->pr_size = 0; 2322 psp->pr_rssize = 0; 2323 } else { 2324 mutex_exit(&p->p_lock); 2325 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2326 psp->pr_size = btopr(rm_assize(as)) * (PAGESIZE / 1024); 2327 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024); 2328 psp->pr_pctmem = rm_pctmemory(as); 2329 AS_LOCK_EXIT(as, &as->a_lock); 2330 mutex_enter(&p->p_lock); 2331 } 2332 } 2333 } 2334 2335 #ifdef _SYSCALL32_IMPL 2336 void 2337 prgetpsinfo32(proc_t *p, psinfo32_t *psp) 2338 { 2339 kthread_t *t; 2340 struct cred *cred; 2341 hrtime_t hrutime, hrstime; 2342 2343 ASSERT(MUTEX_HELD(&p->p_lock)); 2344 2345 if ((t = prchoose(p)) == NULL) /* returns locked thread */ 2346 bzero(psp, sizeof (*psp)); 2347 else { 2348 thread_unlock(t); 2349 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); 2350 } 2351 2352 /* 2353 * only export SSYS and SMSACCT; everything else is off-limits to 2354 * userland apps. 2355 */ 2356 psp->pr_flag = p->p_flag & (SSYS | SMSACCT); 2357 psp->pr_nlwp = p->p_lwpcnt; 2358 psp->pr_nzomb = p->p_zombcnt; 2359 mutex_enter(&p->p_crlock); 2360 cred = p->p_cred; 2361 psp->pr_uid = crgetruid(cred); 2362 psp->pr_euid = crgetuid(cred); 2363 psp->pr_gid = crgetrgid(cred); 2364 psp->pr_egid = crgetgid(cred); 2365 mutex_exit(&p->p_crlock); 2366 psp->pr_pid = p->p_pid; 2367 if (curproc->p_zone->zone_id != GLOBAL_ZONEID && 2368 (p->p_flag & SZONETOP)) { 2369 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); 2370 /* 2371 * Inside local zones, fake zsched's pid as parent pids for 2372 * processes which reference processes outside of the zone. 2373 */ 2374 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; 2375 } else { 2376 psp->pr_ppid = p->p_ppid; 2377 } 2378 psp->pr_pgid = p->p_pgrp; 2379 psp->pr_sid = p->p_sessp->s_sid; 2380 psp->pr_taskid = p->p_task->tk_tkid; 2381 psp->pr_projid = p->p_task->tk_proj->kpj_id; 2382 psp->pr_poolid = p->p_pool->pool_id; 2383 psp->pr_zoneid = p->p_zone->zone_id; 2384 if ((psp->pr_contract = PRCTID(p)) == 0) 2385 psp->pr_contract = -1; 2386 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2387 switch (p->p_model) { 2388 case DATAMODEL_ILP32: 2389 psp->pr_dmodel = PR_MODEL_ILP32; 2390 break; 2391 case DATAMODEL_LP64: 2392 psp->pr_dmodel = PR_MODEL_LP64; 2393 break; 2394 } 2395 hrutime = mstate_aggr_state(p, LMS_USER); 2396 hrstime = mstate_aggr_state(p, LMS_SYSTEM); 2397 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2398 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime); 2399 2400 if (t == NULL) { 2401 extern int wstat(int, int); /* needs a header file */ 2402 int wcode = p->p_wcode; /* must be atomic read */ 2403 2404 if (wcode) 2405 psp->pr_wstat = wstat(wcode, p->p_wdata); 2406 psp->pr_ttydev = PRNODEV32; 2407 psp->pr_lwp.pr_state = SZOMB; 2408 psp->pr_lwp.pr_sname = 'Z'; 2409 } else { 2410 user_t *up = PTOU(p); 2411 struct as *as; 2412 dev_t d; 2413 extern dev_t rwsconsdev, rconsdev, uconsdev; 2414 2415 d = cttydev(p); 2416 /* 2417 * If the controlling terminal is the real 2418 * or workstation console device, map to what the 2419 * user thinks is the console device. 2420 */ 2421 if (d == rwsconsdev || d == rconsdev) 2422 d = uconsdev; 2423 (void) cmpldev(&psp->pr_ttydev, d); 2424 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start); 2425 bcopy(up->u_comm, psp->pr_fname, 2426 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); 2427 bcopy(up->u_psargs, psp->pr_psargs, 2428 MIN(PRARGSZ-1, PSARGSZ)); 2429 psp->pr_argc = up->u_argc; 2430 psp->pr_argv = (caddr32_t)up->u_argv; 2431 psp->pr_envp = (caddr32_t)up->u_envp; 2432 2433 /* get the chosen lwp's lwpsinfo */ 2434 prgetlwpsinfo32(t, &psp->pr_lwp); 2435 2436 /* compute %cpu for the process */ 2437 if (p->p_lwpcnt == 1) 2438 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; 2439 else { 2440 uint64_t pct = 0; 2441 hrtime_t cur_time; 2442 2443 t = p->p_tlist; 2444 cur_time = gethrtime_unscaled(); 2445 do { 2446 pct += cpu_update_pct(t, cur_time); 2447 } while ((t = t->t_forw) != p->p_tlist); 2448 2449 psp->pr_pctcpu = prgetpctcpu(pct); 2450 } 2451 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { 2452 psp->pr_size = 0; 2453 psp->pr_rssize = 0; 2454 } else { 2455 mutex_exit(&p->p_lock); 2456 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2457 psp->pr_size = (size32_t) 2458 (btopr(rm_assize(as)) * (PAGESIZE / 1024)); 2459 psp->pr_rssize = (size32_t) 2460 (rm_asrss(as) * (PAGESIZE / 1024)); 2461 psp->pr_pctmem = rm_pctmemory(as); 2462 AS_LOCK_EXIT(as, &as->a_lock); 2463 mutex_enter(&p->p_lock); 2464 } 2465 } 2466 2467 /* 2468 * If we are looking at an LP64 process, zero out 2469 * the fields that cannot be represented in ILP32. 2470 */ 2471 if (p->p_model != DATAMODEL_ILP32) { 2472 psp->pr_size = 0; 2473 psp->pr_rssize = 0; 2474 psp->pr_argv = 0; 2475 psp->pr_envp = 0; 2476 } 2477 } 2478 #endif /* _SYSCALL32_IMPL */ 2479 2480 void 2481 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp) 2482 { 2483 klwp_t *lwp = ttolwp(t); 2484 sobj_ops_t *sobj; 2485 char c, state; 2486 uint64_t pct; 2487 int retval, niceval; 2488 hrtime_t hrutime, hrstime; 2489 2490 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 2491 2492 bzero(psp, sizeof (*psp)); 2493 2494 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2495 psp->pr_lwpid = t->t_tid; 2496 psp->pr_addr = (uintptr_t)t; 2497 psp->pr_wchan = (uintptr_t)t->t_wchan; 2498 2499 /* map the thread state enum into a process state enum */ 2500 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2501 switch (state) { 2502 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2503 case TS_RUN: state = SRUN; c = 'R'; break; 2504 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2505 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2506 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2507 default: state = 0; c = '?'; break; 2508 } 2509 psp->pr_state = state; 2510 psp->pr_sname = c; 2511 if ((sobj = t->t_sobj_ops) != NULL) 2512 psp->pr_stype = SOBJ_TYPE(sobj); 2513 retval = CL_DONICE(t, NULL, 0, &niceval); 2514 if (retval == 0) { 2515 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2516 psp->pr_nice = niceval + NZERO; 2517 } 2518 psp->pr_syscall = t->t_sysnum; 2519 psp->pr_pri = t->t_pri; 2520 psp->pr_start.tv_sec = t->t_start; 2521 psp->pr_start.tv_nsec = 0L; 2522 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2523 scalehrtime(&hrutime); 2524 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2525 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2526 scalehrtime(&hrstime); 2527 hrt2ts(hrutime + hrstime, &psp->pr_time); 2528 /* compute %cpu for the lwp */ 2529 pct = cpu_update_pct(t, gethrtime_unscaled()); 2530 psp->pr_pctcpu = prgetpctcpu(pct); 2531 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2532 if (psp->pr_cpu > 99) 2533 psp->pr_cpu = 99; 2534 2535 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2536 sizeof (psp->pr_clname) - 1); 2537 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2538 psp->pr_onpro = t->t_cpu->cpu_id; 2539 psp->pr_bindpro = t->t_bind_cpu; 2540 psp->pr_bindpset = t->t_bind_pset; 2541 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 2542 } 2543 2544 #ifdef _SYSCALL32_IMPL 2545 void 2546 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp) 2547 { 2548 proc_t *p = ttoproc(t); 2549 klwp_t *lwp = ttolwp(t); 2550 sobj_ops_t *sobj; 2551 char c, state; 2552 uint64_t pct; 2553 int retval, niceval; 2554 hrtime_t hrutime, hrstime; 2555 2556 ASSERT(MUTEX_HELD(&p->p_lock)); 2557 2558 bzero(psp, sizeof (*psp)); 2559 2560 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ 2561 psp->pr_lwpid = t->t_tid; 2562 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ 2563 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */ 2564 2565 /* map the thread state enum into a process state enum */ 2566 state = VSTOPPED(t) ? TS_STOPPED : t->t_state; 2567 switch (state) { 2568 case TS_SLEEP: state = SSLEEP; c = 'S'; break; 2569 case TS_RUN: state = SRUN; c = 'R'; break; 2570 case TS_ONPROC: state = SONPROC; c = 'O'; break; 2571 case TS_ZOMB: state = SZOMB; c = 'Z'; break; 2572 case TS_STOPPED: state = SSTOP; c = 'T'; break; 2573 default: state = 0; c = '?'; break; 2574 } 2575 psp->pr_state = state; 2576 psp->pr_sname = c; 2577 if ((sobj = t->t_sobj_ops) != NULL) 2578 psp->pr_stype = SOBJ_TYPE(sobj); 2579 retval = CL_DONICE(t, NULL, 0, &niceval); 2580 if (retval == 0) { 2581 psp->pr_oldpri = v.v_maxsyspri - t->t_pri; 2582 psp->pr_nice = niceval + NZERO; 2583 } else { 2584 psp->pr_oldpri = 0; 2585 psp->pr_nice = 0; 2586 } 2587 psp->pr_syscall = t->t_sysnum; 2588 psp->pr_pri = t->t_pri; 2589 psp->pr_start.tv_sec = (time32_t)t->t_start; 2590 psp->pr_start.tv_nsec = 0L; 2591 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; 2592 scalehrtime(&hrutime); 2593 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + 2594 lwp->lwp_mstate.ms_acct[LMS_TRAP]; 2595 scalehrtime(&hrstime); 2596 hrt2ts32(hrutime + hrstime, &psp->pr_time); 2597 /* compute %cpu for the lwp */ 2598 pct = cpu_update_pct(t, gethrtime_unscaled()); 2599 psp->pr_pctcpu = prgetpctcpu(pct); 2600 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ 2601 if (psp->pr_cpu > 99) 2602 psp->pr_cpu = 99; 2603 2604 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, 2605 sizeof (psp->pr_clname) - 1); 2606 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ 2607 psp->pr_onpro = t->t_cpu->cpu_id; 2608 psp->pr_bindpro = t->t_bind_cpu; 2609 psp->pr_bindpset = t->t_bind_pset; 2610 psp->pr_lgrp = t->t_lpl->lpl_lgrpid; 2611 } 2612 #endif /* _SYSCALL32_IMPL */ 2613 2614 /* 2615 * This used to get called when microstate accounting was disabled but 2616 * microstate information was requested. Since Microstate accounting is on 2617 * regardless of the proc flags, this simply makes it appear to procfs that 2618 * microstate accounting is on. This is relatively meaningless since you 2619 * can't turn it off, but this is here for the sake of appearances. 2620 */ 2621 2622 /*ARGSUSED*/ 2623 void 2624 estimate_msacct(kthread_t *t, hrtime_t curtime) 2625 { 2626 proc_t *p; 2627 2628 if (t == NULL) 2629 return; 2630 2631 p = ttoproc(t); 2632 ASSERT(MUTEX_HELD(&p->p_lock)); 2633 2634 /* 2635 * A system process (p0) could be referenced if the thread is 2636 * in the process of exiting. Don't turn on microstate accounting 2637 * in that case. 2638 */ 2639 if (p->p_flag & SSYS) 2640 return; 2641 2642 /* 2643 * Loop through all the LWPs (kernel threads) in the process. 2644 */ 2645 t = p->p_tlist; 2646 do { 2647 t->t_proc_flag |= TP_MSACCT; 2648 } while ((t = t->t_forw) != p->p_tlist); 2649 2650 p->p_flag |= SMSACCT; /* set process-wide MSACCT */ 2651 } 2652 2653 /* 2654 * It's not really possible to disable microstate accounting anymore. 2655 * However, this routine simply turns off the ms accounting flags in a process 2656 * This way procfs can still pretend to turn microstate accounting on and 2657 * off for a process, but it actually doesn't do anything. This is 2658 * a neutered form of preemptive idiot-proofing. 2659 */ 2660 void 2661 disable_msacct(proc_t *p) 2662 { 2663 kthread_t *t; 2664 2665 ASSERT(MUTEX_HELD(&p->p_lock)); 2666 2667 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */ 2668 /* 2669 * Loop through all the LWPs (kernel threads) in the process. 2670 */ 2671 if ((t = p->p_tlist) != NULL) { 2672 do { 2673 /* clear per-thread flag */ 2674 t->t_proc_flag &= ~TP_MSACCT; 2675 } while ((t = t->t_forw) != p->p_tlist); 2676 } 2677 } 2678 2679 /* 2680 * Return resource usage information. 2681 */ 2682 void 2683 prgetusage(kthread_t *t, prhusage_t *pup) 2684 { 2685 klwp_t *lwp = ttolwp(t); 2686 hrtime_t *mstimep; 2687 struct mstate *ms = &lwp->lwp_mstate; 2688 int state; 2689 int i; 2690 hrtime_t curtime; 2691 hrtime_t waitrq; 2692 hrtime_t tmp1; 2693 2694 curtime = gethrtime_unscaled(); 2695 2696 pup->pr_lwpid = t->t_tid; 2697 pup->pr_count = 1; 2698 pup->pr_create = ms->ms_start; 2699 pup->pr_term = ms->ms_term; 2700 scalehrtime(&pup->pr_create); 2701 scalehrtime(&pup->pr_term); 2702 if (ms->ms_term == 0) { 2703 pup->pr_rtime = curtime - ms->ms_start; 2704 scalehrtime(&pup->pr_rtime); 2705 } else { 2706 pup->pr_rtime = ms->ms_term - ms->ms_start; 2707 scalehrtime(&pup->pr_rtime); 2708 } 2709 2710 2711 pup->pr_utime = ms->ms_acct[LMS_USER]; 2712 pup->pr_stime = ms->ms_acct[LMS_SYSTEM]; 2713 pup->pr_ttime = ms->ms_acct[LMS_TRAP]; 2714 pup->pr_tftime = ms->ms_acct[LMS_TFAULT]; 2715 pup->pr_dftime = ms->ms_acct[LMS_DFAULT]; 2716 pup->pr_kftime = ms->ms_acct[LMS_KFAULT]; 2717 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2718 pup->pr_slptime = ms->ms_acct[LMS_SLEEP]; 2719 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2720 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2721 2722 prscaleusage(pup); 2723 2724 /* 2725 * Adjust for time waiting in the dispatcher queue. 2726 */ 2727 waitrq = t->t_waitrq; /* hopefully atomic */ 2728 if (waitrq != 0) { 2729 tmp1 = curtime - waitrq; 2730 scalehrtime(&tmp1); 2731 pup->pr_wtime += tmp1; 2732 curtime = waitrq; 2733 } 2734 2735 /* 2736 * Adjust for time spent in current microstate. 2737 */ 2738 if (ms->ms_state_start > curtime) { 2739 curtime = gethrtime_unscaled(); 2740 } 2741 2742 i = 0; 2743 do { 2744 switch (state = t->t_mstate) { 2745 case LMS_SLEEP: 2746 /* 2747 * Update the timer for the current sleep state. 2748 */ 2749 switch (state = ms->ms_prev) { 2750 case LMS_TFAULT: 2751 case LMS_DFAULT: 2752 case LMS_KFAULT: 2753 case LMS_USER_LOCK: 2754 break; 2755 default: 2756 state = LMS_SLEEP; 2757 break; 2758 } 2759 break; 2760 case LMS_TFAULT: 2761 case LMS_DFAULT: 2762 case LMS_KFAULT: 2763 case LMS_USER_LOCK: 2764 state = LMS_SYSTEM; 2765 break; 2766 } 2767 switch (state) { 2768 case LMS_USER: mstimep = &pup->pr_utime; break; 2769 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2770 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2771 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2772 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2773 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2774 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2775 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2776 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2777 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2778 default: panic("prgetusage: unknown microstate"); 2779 } 2780 tmp1 = curtime - ms->ms_state_start; 2781 if (tmp1 < 0) { 2782 curtime = gethrtime_unscaled(); 2783 i++; 2784 continue; 2785 } 2786 scalehrtime(&tmp1); 2787 } while (tmp1 < 0 && i < MAX_ITERS_SPIN); 2788 2789 *mstimep += tmp1; 2790 2791 /* update pup timestamp */ 2792 pup->pr_tstamp = curtime; 2793 scalehrtime(&pup->pr_tstamp); 2794 2795 /* 2796 * Resource usage counters. 2797 */ 2798 pup->pr_minf = lwp->lwp_ru.minflt; 2799 pup->pr_majf = lwp->lwp_ru.majflt; 2800 pup->pr_nswap = lwp->lwp_ru.nswap; 2801 pup->pr_inblk = lwp->lwp_ru.inblock; 2802 pup->pr_oublk = lwp->lwp_ru.oublock; 2803 pup->pr_msnd = lwp->lwp_ru.msgsnd; 2804 pup->pr_mrcv = lwp->lwp_ru.msgrcv; 2805 pup->pr_sigs = lwp->lwp_ru.nsignals; 2806 pup->pr_vctx = lwp->lwp_ru.nvcsw; 2807 pup->pr_ictx = lwp->lwp_ru.nivcsw; 2808 pup->pr_sysc = lwp->lwp_ru.sysc; 2809 pup->pr_ioch = lwp->lwp_ru.ioch; 2810 } 2811 2812 /* 2813 * Convert ms_acct stats from unscaled high-res time to nanoseconds 2814 */ 2815 void 2816 prscaleusage(prhusage_t *usg) 2817 { 2818 scalehrtime(&usg->pr_utime); 2819 scalehrtime(&usg->pr_stime); 2820 scalehrtime(&usg->pr_ttime); 2821 scalehrtime(&usg->pr_tftime); 2822 scalehrtime(&usg->pr_dftime); 2823 scalehrtime(&usg->pr_kftime); 2824 scalehrtime(&usg->pr_ltime); 2825 scalehrtime(&usg->pr_slptime); 2826 scalehrtime(&usg->pr_wtime); 2827 scalehrtime(&usg->pr_stoptime); 2828 } 2829 2830 2831 /* 2832 * Sum resource usage information. 2833 */ 2834 void 2835 praddusage(kthread_t *t, prhusage_t *pup) 2836 { 2837 klwp_t *lwp = ttolwp(t); 2838 hrtime_t *mstimep; 2839 struct mstate *ms = &lwp->lwp_mstate; 2840 int state; 2841 int i; 2842 hrtime_t curtime; 2843 hrtime_t waitrq; 2844 hrtime_t tmp; 2845 prhusage_t conv; 2846 2847 curtime = gethrtime_unscaled(); 2848 2849 if (ms->ms_term == 0) { 2850 tmp = curtime - ms->ms_start; 2851 scalehrtime(&tmp); 2852 pup->pr_rtime += tmp; 2853 } else { 2854 tmp = ms->ms_term - ms->ms_start; 2855 scalehrtime(&tmp); 2856 pup->pr_rtime += tmp; 2857 } 2858 2859 conv.pr_utime = ms->ms_acct[LMS_USER]; 2860 conv.pr_stime = ms->ms_acct[LMS_SYSTEM]; 2861 conv.pr_ttime = ms->ms_acct[LMS_TRAP]; 2862 conv.pr_tftime = ms->ms_acct[LMS_TFAULT]; 2863 conv.pr_dftime = ms->ms_acct[LMS_DFAULT]; 2864 conv.pr_kftime = ms->ms_acct[LMS_KFAULT]; 2865 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK]; 2866 conv.pr_slptime = ms->ms_acct[LMS_SLEEP]; 2867 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; 2868 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED]; 2869 2870 prscaleusage(&conv); 2871 2872 pup->pr_utime += conv.pr_utime; 2873 pup->pr_stime += conv.pr_stime; 2874 pup->pr_ttime += conv.pr_ttime; 2875 pup->pr_tftime += conv.pr_tftime; 2876 pup->pr_dftime += conv.pr_dftime; 2877 pup->pr_kftime += conv.pr_kftime; 2878 pup->pr_ltime += conv.pr_ltime; 2879 pup->pr_slptime += conv.pr_slptime; 2880 pup->pr_wtime += conv.pr_wtime; 2881 pup->pr_stoptime += conv.pr_stoptime; 2882 2883 /* 2884 * Adjust for time waiting in the dispatcher queue. 2885 */ 2886 waitrq = t->t_waitrq; /* hopefully atomic */ 2887 if (waitrq != 0) { 2888 tmp = curtime - waitrq; 2889 scalehrtime(&tmp); 2890 pup->pr_wtime += tmp; 2891 curtime = waitrq; 2892 } 2893 2894 /* 2895 * Adjust for time spent in current microstate. 2896 */ 2897 if (ms->ms_state_start > curtime) { 2898 curtime = gethrtime_unscaled(); 2899 } 2900 2901 i = 0; 2902 do { 2903 switch (state = t->t_mstate) { 2904 case LMS_SLEEP: 2905 /* 2906 * Update the timer for the current sleep state. 2907 */ 2908 switch (state = ms->ms_prev) { 2909 case LMS_TFAULT: 2910 case LMS_DFAULT: 2911 case LMS_KFAULT: 2912 case LMS_USER_LOCK: 2913 break; 2914 default: 2915 state = LMS_SLEEP; 2916 break; 2917 } 2918 break; 2919 case LMS_TFAULT: 2920 case LMS_DFAULT: 2921 case LMS_KFAULT: 2922 case LMS_USER_LOCK: 2923 state = LMS_SYSTEM; 2924 break; 2925 } 2926 switch (state) { 2927 case LMS_USER: mstimep = &pup->pr_utime; break; 2928 case LMS_SYSTEM: mstimep = &pup->pr_stime; break; 2929 case LMS_TRAP: mstimep = &pup->pr_ttime; break; 2930 case LMS_TFAULT: mstimep = &pup->pr_tftime; break; 2931 case LMS_DFAULT: mstimep = &pup->pr_dftime; break; 2932 case LMS_KFAULT: mstimep = &pup->pr_kftime; break; 2933 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; 2934 case LMS_SLEEP: mstimep = &pup->pr_slptime; break; 2935 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; 2936 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; 2937 default: panic("praddusage: unknown microstate"); 2938 } 2939 tmp = curtime - ms->ms_state_start; 2940 if (tmp < 0) { 2941 curtime = gethrtime_unscaled(); 2942 i++; 2943 continue; 2944 } 2945 scalehrtime(&tmp); 2946 } while (tmp < 0 && i < MAX_ITERS_SPIN); 2947 2948 *mstimep += tmp; 2949 2950 /* update pup timestamp */ 2951 pup->pr_tstamp = curtime; 2952 scalehrtime(&pup->pr_tstamp); 2953 2954 /* 2955 * Resource usage counters. 2956 */ 2957 pup->pr_minf += lwp->lwp_ru.minflt; 2958 pup->pr_majf += lwp->lwp_ru.majflt; 2959 pup->pr_nswap += lwp->lwp_ru.nswap; 2960 pup->pr_inblk += lwp->lwp_ru.inblock; 2961 pup->pr_oublk += lwp->lwp_ru.oublock; 2962 pup->pr_msnd += lwp->lwp_ru.msgsnd; 2963 pup->pr_mrcv += lwp->lwp_ru.msgrcv; 2964 pup->pr_sigs += lwp->lwp_ru.nsignals; 2965 pup->pr_vctx += lwp->lwp_ru.nvcsw; 2966 pup->pr_ictx += lwp->lwp_ru.nivcsw; 2967 pup->pr_sysc += lwp->lwp_ru.sysc; 2968 pup->pr_ioch += lwp->lwp_ru.ioch; 2969 } 2970 2971 /* 2972 * Convert a prhusage_t to a prusage_t. 2973 * This means convert each hrtime_t to a timestruc_t 2974 * and copy the count fields uint64_t => ulong_t. 2975 */ 2976 void 2977 prcvtusage(prhusage_t *pup, prusage_t *upup) 2978 { 2979 uint64_t *ullp; 2980 ulong_t *ulp; 2981 int i; 2982 2983 upup->pr_lwpid = pup->pr_lwpid; 2984 upup->pr_count = pup->pr_count; 2985 2986 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp); 2987 hrt2ts(pup->pr_create, &upup->pr_create); 2988 hrt2ts(pup->pr_term, &upup->pr_term); 2989 hrt2ts(pup->pr_rtime, &upup->pr_rtime); 2990 hrt2ts(pup->pr_utime, &upup->pr_utime); 2991 hrt2ts(pup->pr_stime, &upup->pr_stime); 2992 hrt2ts(pup->pr_ttime, &upup->pr_ttime); 2993 hrt2ts(pup->pr_tftime, &upup->pr_tftime); 2994 hrt2ts(pup->pr_dftime, &upup->pr_dftime); 2995 hrt2ts(pup->pr_kftime, &upup->pr_kftime); 2996 hrt2ts(pup->pr_ltime, &upup->pr_ltime); 2997 hrt2ts(pup->pr_slptime, &upup->pr_slptime); 2998 hrt2ts(pup->pr_wtime, &upup->pr_wtime); 2999 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime); 3000 bzero(upup->filltime, sizeof (upup->filltime)); 3001 3002 ullp = &pup->pr_minf; 3003 ulp = &upup->pr_minf; 3004 for (i = 0; i < 22; i++) 3005 *ulp++ = (ulong_t)*ullp++; 3006 } 3007 3008 #ifdef _SYSCALL32_IMPL 3009 void 3010 prcvtusage32(prhusage_t *pup, prusage32_t *upup) 3011 { 3012 uint64_t *ullp; 3013 uint32_t *ulp; 3014 int i; 3015 3016 upup->pr_lwpid = pup->pr_lwpid; 3017 upup->pr_count = pup->pr_count; 3018 3019 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp); 3020 hrt2ts32(pup->pr_create, &upup->pr_create); 3021 hrt2ts32(pup->pr_term, &upup->pr_term); 3022 hrt2ts32(pup->pr_rtime, &upup->pr_rtime); 3023 hrt2ts32(pup->pr_utime, &upup->pr_utime); 3024 hrt2ts32(pup->pr_stime, &upup->pr_stime); 3025 hrt2ts32(pup->pr_ttime, &upup->pr_ttime); 3026 hrt2ts32(pup->pr_tftime, &upup->pr_tftime); 3027 hrt2ts32(pup->pr_dftime, &upup->pr_dftime); 3028 hrt2ts32(pup->pr_kftime, &upup->pr_kftime); 3029 hrt2ts32(pup->pr_ltime, &upup->pr_ltime); 3030 hrt2ts32(pup->pr_slptime, &upup->pr_slptime); 3031 hrt2ts32(pup->pr_wtime, &upup->pr_wtime); 3032 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime); 3033 bzero(upup->filltime, sizeof (upup->filltime)); 3034 3035 ullp = &pup->pr_minf; 3036 ulp = &upup->pr_minf; 3037 for (i = 0; i < 22; i++) 3038 *ulp++ = (uint32_t)*ullp++; 3039 } 3040 #endif /* _SYSCALL32_IMPL */ 3041 3042 /* 3043 * Determine whether a set is empty. 3044 */ 3045 int 3046 setisempty(uint32_t *sp, uint_t n) 3047 { 3048 while (n--) 3049 if (*sp++) 3050 return (0); 3051 return (1); 3052 } 3053 3054 /* 3055 * Utility routine for establishing a watched area in the process. 3056 * Keep the list of watched areas sorted by virtual address. 3057 */ 3058 int 3059 set_watched_area(proc_t *p, struct watched_area *pwa) 3060 { 3061 caddr_t vaddr = pwa->wa_vaddr; 3062 caddr_t eaddr = pwa->wa_eaddr; 3063 ulong_t flags = pwa->wa_flags; 3064 struct watched_area *target; 3065 avl_index_t where; 3066 int error = 0; 3067 3068 /* we must not be holding p->p_lock, but the process must be locked */ 3069 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3070 ASSERT(p->p_proc_flag & P_PR_LOCK); 3071 3072 /* 3073 * If this is our first watchpoint, enable watchpoints for the process. 3074 */ 3075 if (!pr_watch_active(p)) { 3076 kthread_t *t; 3077 3078 mutex_enter(&p->p_lock); 3079 if ((t = p->p_tlist) != NULL) { 3080 do { 3081 watch_enable(t); 3082 } while ((t = t->t_forw) != p->p_tlist); 3083 } 3084 mutex_exit(&p->p_lock); 3085 } 3086 3087 target = pr_find_watched_area(p, pwa, &where); 3088 if (target != NULL) { 3089 /* 3090 * We discovered an existing, overlapping watched area. 3091 * Allow it only if it is an exact match. 3092 */ 3093 if (target->wa_vaddr != vaddr || 3094 target->wa_eaddr != eaddr) 3095 error = EINVAL; 3096 else if (target->wa_flags != flags) { 3097 error = set_watched_page(p, vaddr, eaddr, 3098 flags, target->wa_flags); 3099 target->wa_flags = flags; 3100 } 3101 kmem_free(pwa, sizeof (struct watched_area)); 3102 } else { 3103 avl_insert(&p->p_warea, pwa, where); 3104 error = set_watched_page(p, vaddr, eaddr, flags, 0); 3105 } 3106 3107 return (error); 3108 } 3109 3110 /* 3111 * Utility routine for clearing a watched area in the process. 3112 * Must be an exact match of the virtual address. 3113 * size and flags don't matter. 3114 */ 3115 int 3116 clear_watched_area(proc_t *p, struct watched_area *pwa) 3117 { 3118 struct watched_area *found; 3119 3120 /* we must not be holding p->p_lock, but the process must be locked */ 3121 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 3122 ASSERT(p->p_proc_flag & P_PR_LOCK); 3123 3124 3125 if (!pr_watch_active(p)) { 3126 kmem_free(pwa, sizeof (struct watched_area)); 3127 return (0); 3128 } 3129 3130 /* 3131 * Look for a matching address in the watched areas. If a match is 3132 * found, clear the old watched area and adjust the watched page(s). It 3133 * is not an error if there is no match. 3134 */ 3135 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL && 3136 found->wa_vaddr == pwa->wa_vaddr) { 3137 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr, 3138 found->wa_flags); 3139 avl_remove(&p->p_warea, found); 3140 kmem_free(found, sizeof (struct watched_area)); 3141 } 3142 3143 kmem_free(pwa, sizeof (struct watched_area)); 3144 3145 /* 3146 * If we removed the last watched area from the process, disable 3147 * watchpoints. 3148 */ 3149 if (!pr_watch_active(p)) { 3150 kthread_t *t; 3151 3152 mutex_enter(&p->p_lock); 3153 if ((t = p->p_tlist) != NULL) { 3154 do { 3155 watch_disable(t); 3156 } while ((t = t->t_forw) != p->p_tlist); 3157 } 3158 mutex_exit(&p->p_lock); 3159 } 3160 3161 return (0); 3162 } 3163 3164 /* 3165 * Frees all the watched_area structures 3166 */ 3167 void 3168 pr_free_watchpoints(proc_t *p) 3169 { 3170 struct watched_area *delp; 3171 void *cookie; 3172 3173 cookie = NULL; 3174 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL) 3175 kmem_free(delp, sizeof (struct watched_area)); 3176 3177 avl_destroy(&p->p_warea); 3178 } 3179 3180 /* 3181 * This one is called by the traced process to unwatch all the 3182 * pages while deallocating the list of watched_page structs. 3183 */ 3184 void 3185 pr_free_watched_pages(proc_t *p) 3186 { 3187 struct as *as = p->p_as; 3188 struct watched_page *pwp; 3189 uint_t prot; 3190 int retrycnt, err; 3191 void *cookie; 3192 3193 if (as == NULL || avl_numnodes(&as->a_wpage) == 0) 3194 return; 3195 3196 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 3197 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3198 3199 pwp = avl_first(&as->a_wpage); 3200 3201 cookie = NULL; 3202 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) { 3203 retrycnt = 0; 3204 if ((prot = pwp->wp_oprot) != 0) { 3205 caddr_t addr = pwp->wp_vaddr; 3206 struct seg *seg; 3207 retry: 3208 3209 if ((pwp->wp_prot != prot || 3210 (pwp->wp_flags & WP_NOWATCH)) && 3211 (seg = as_segat(as, addr)) != NULL) { 3212 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); 3213 if (err == IE_RETRY) { 3214 ASSERT(retrycnt == 0); 3215 retrycnt++; 3216 goto retry; 3217 } 3218 } 3219 } 3220 kmem_free(pwp, sizeof (struct watched_page)); 3221 } 3222 3223 avl_destroy(&as->a_wpage); 3224 p->p_wprot = NULL; 3225 3226 AS_LOCK_EXIT(as, &as->a_lock); 3227 } 3228 3229 /* 3230 * Insert a watched area into the list of watched pages. 3231 * If oflags is zero then we are adding a new watched area. 3232 * Otherwise we are changing the flags of an existing watched area. 3233 */ 3234 static int 3235 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, 3236 ulong_t flags, ulong_t oflags) 3237 { 3238 struct as *as = p->p_as; 3239 avl_tree_t *pwp_tree; 3240 struct watched_page *pwp, *newpwp; 3241 struct watched_page tpw; 3242 avl_index_t where; 3243 struct seg *seg; 3244 uint_t prot; 3245 caddr_t addr; 3246 3247 /* 3248 * We need to pre-allocate a list of structures before we grab the 3249 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks 3250 * held. 3251 */ 3252 newpwp = NULL; 3253 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3254 addr < eaddr; addr += PAGESIZE) { 3255 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP); 3256 pwp->wp_list = newpwp; 3257 newpwp = pwp; 3258 } 3259 3260 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3261 3262 /* 3263 * Search for an existing watched page to contain the watched area. 3264 * If none is found, grab a new one from the available list 3265 * and insert it in the active list, keeping the list sorted 3266 * by user-level virtual address. 3267 */ 3268 if (p->p_flag & SVFWAIT) 3269 pwp_tree = &p->p_wpage; 3270 else 3271 pwp_tree = &as->a_wpage; 3272 3273 again: 3274 if (avl_numnodes(pwp_tree) > prnwatch) { 3275 AS_LOCK_EXIT(as, &as->a_lock); 3276 while (newpwp != NULL) { 3277 pwp = newpwp->wp_list; 3278 kmem_free(newpwp, sizeof (struct watched_page)); 3279 newpwp = pwp; 3280 } 3281 return (E2BIG); 3282 } 3283 3284 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3285 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) { 3286 pwp = newpwp; 3287 newpwp = newpwp->wp_list; 3288 pwp->wp_list = NULL; 3289 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr & 3290 (uintptr_t)PAGEMASK); 3291 avl_insert(pwp_tree, pwp, where); 3292 } 3293 3294 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE); 3295 3296 if (oflags & WA_READ) 3297 pwp->wp_read--; 3298 if (oflags & WA_WRITE) 3299 pwp->wp_write--; 3300 if (oflags & WA_EXEC) 3301 pwp->wp_exec--; 3302 3303 ASSERT(pwp->wp_read >= 0); 3304 ASSERT(pwp->wp_write >= 0); 3305 ASSERT(pwp->wp_exec >= 0); 3306 3307 if (flags & WA_READ) 3308 pwp->wp_read++; 3309 if (flags & WA_WRITE) 3310 pwp->wp_write++; 3311 if (flags & WA_EXEC) 3312 pwp->wp_exec++; 3313 3314 if (!(p->p_flag & SVFWAIT)) { 3315 vaddr = pwp->wp_vaddr; 3316 if (pwp->wp_oprot == 0 && 3317 (seg = as_segat(as, vaddr)) != NULL) { 3318 SEGOP_GETPROT(seg, vaddr, 0, &prot); 3319 pwp->wp_oprot = (uchar_t)prot; 3320 pwp->wp_prot = (uchar_t)prot; 3321 } 3322 if (pwp->wp_oprot != 0) { 3323 prot = pwp->wp_oprot; 3324 if (pwp->wp_read) 3325 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3326 if (pwp->wp_write) 3327 prot &= ~PROT_WRITE; 3328 if (pwp->wp_exec) 3329 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3330 if (!(pwp->wp_flags & WP_NOWATCH) && 3331 pwp->wp_prot != prot && 3332 (pwp->wp_flags & WP_SETPROT) == 0) { 3333 pwp->wp_flags |= WP_SETPROT; 3334 pwp->wp_list = p->p_wprot; 3335 p->p_wprot = pwp; 3336 } 3337 pwp->wp_prot = (uchar_t)prot; 3338 } 3339 } 3340 3341 /* 3342 * If the watched area extends into the next page then do 3343 * it over again with the virtual address of the next page. 3344 */ 3345 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr) 3346 goto again; 3347 3348 AS_LOCK_EXIT(as, &as->a_lock); 3349 3350 /* 3351 * Free any pages we may have over-allocated 3352 */ 3353 while (newpwp != NULL) { 3354 pwp = newpwp->wp_list; 3355 kmem_free(newpwp, sizeof (struct watched_page)); 3356 newpwp = pwp; 3357 } 3358 3359 return (0); 3360 } 3361 3362 /* 3363 * Remove a watched area from the list of watched pages. 3364 * A watched area may extend over more than one page. 3365 */ 3366 static void 3367 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags) 3368 { 3369 struct as *as = p->p_as; 3370 struct watched_page *pwp; 3371 struct watched_page tpw; 3372 avl_tree_t *tree; 3373 avl_index_t where; 3374 3375 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 3376 3377 if (p->p_flag & SVFWAIT) 3378 tree = &p->p_wpage; 3379 else 3380 tree = &as->a_wpage; 3381 3382 tpw.wp_vaddr = vaddr = 3383 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 3384 pwp = avl_find(tree, &tpw, &where); 3385 if (pwp == NULL) 3386 pwp = avl_nearest(tree, where, AVL_AFTER); 3387 3388 while (pwp != NULL && pwp->wp_vaddr < eaddr) { 3389 ASSERT(vaddr <= pwp->wp_vaddr); 3390 3391 if (flags & WA_READ) 3392 pwp->wp_read--; 3393 if (flags & WA_WRITE) 3394 pwp->wp_write--; 3395 if (flags & WA_EXEC) 3396 pwp->wp_exec--; 3397 3398 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) { 3399 /* 3400 * Reset the hat layer's protections on this page. 3401 */ 3402 if (pwp->wp_oprot != 0) { 3403 uint_t prot = pwp->wp_oprot; 3404 3405 if (pwp->wp_read) 3406 prot &= 3407 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3408 if (pwp->wp_write) 3409 prot &= ~PROT_WRITE; 3410 if (pwp->wp_exec) 3411 prot &= 3412 ~(PROT_READ|PROT_WRITE|PROT_EXEC); 3413 if (!(pwp->wp_flags & WP_NOWATCH) && 3414 pwp->wp_prot != prot && 3415 (pwp->wp_flags & WP_SETPROT) == 0) { 3416 pwp->wp_flags |= WP_SETPROT; 3417 pwp->wp_list = p->p_wprot; 3418 p->p_wprot = pwp; 3419 } 3420 pwp->wp_prot = (uchar_t)prot; 3421 } 3422 } else { 3423 /* 3424 * No watched areas remain in this page. 3425 * Reset everything to normal. 3426 */ 3427 if (pwp->wp_oprot != 0) { 3428 pwp->wp_prot = pwp->wp_oprot; 3429 if ((pwp->wp_flags & WP_SETPROT) == 0) { 3430 pwp->wp_flags |= WP_SETPROT; 3431 pwp->wp_list = p->p_wprot; 3432 p->p_wprot = pwp; 3433 } 3434 } 3435 } 3436 3437 pwp = AVL_NEXT(tree, pwp); 3438 } 3439 3440 AS_LOCK_EXIT(as, &as->a_lock); 3441 } 3442 3443 /* 3444 * Return the original protections for the specified page. 3445 */ 3446 static void 3447 getwatchprot(struct as *as, caddr_t addr, uint_t *prot) 3448 { 3449 struct watched_page *pwp; 3450 struct watched_page tpw; 3451 3452 ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 3453 3454 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 3455 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL) 3456 *prot = pwp->wp_oprot; 3457 } 3458 3459 static prpagev_t * 3460 pr_pagev_create(struct seg *seg, int check_noreserve) 3461 { 3462 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP); 3463 size_t total_pages = seg_pages(seg); 3464 3465 /* 3466 * Limit the size of our vectors to pagev_lim pages at a time. We need 3467 * 4 or 5 bytes of storage per page, so this means we limit ourself 3468 * to about a megabyte of kernel heap by default. 3469 */ 3470 pagev->pg_npages = MIN(total_pages, pagev_lim); 3471 pagev->pg_pnbase = 0; 3472 3473 pagev->pg_protv = 3474 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP); 3475 3476 if (check_noreserve) 3477 pagev->pg_incore = 3478 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP); 3479 else 3480 pagev->pg_incore = NULL; 3481 3482 return (pagev); 3483 } 3484 3485 static void 3486 pr_pagev_destroy(prpagev_t *pagev) 3487 { 3488 if (pagev->pg_incore != NULL) 3489 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char)); 3490 3491 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t)); 3492 kmem_free(pagev, sizeof (prpagev_t)); 3493 } 3494 3495 static caddr_t 3496 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr) 3497 { 3498 ulong_t lastpg = seg_page(seg, eaddr - 1); 3499 ulong_t pn, pnlim; 3500 caddr_t saddr; 3501 size_t len; 3502 3503 ASSERT(addr >= seg->s_base && addr <= eaddr); 3504 3505 if (addr == eaddr) 3506 return (eaddr); 3507 3508 refill: 3509 ASSERT(addr < eaddr); 3510 pagev->pg_pnbase = seg_page(seg, addr); 3511 pnlim = pagev->pg_pnbase + pagev->pg_npages; 3512 saddr = addr; 3513 3514 if (lastpg < pnlim) 3515 len = (size_t)(eaddr - addr); 3516 else 3517 len = pagev->pg_npages * PAGESIZE; 3518 3519 if (pagev->pg_incore != NULL) { 3520 /* 3521 * INCORE cleverly has different semantics than GETPROT: 3522 * it returns info on pages up to but NOT including addr + len. 3523 */ 3524 SEGOP_INCORE(seg, addr, len, pagev->pg_incore); 3525 pn = pagev->pg_pnbase; 3526 3527 do { 3528 /* 3529 * Guilty knowledge here: We know that segvn_incore 3530 * returns more than just the low-order bit that 3531 * indicates the page is actually in memory. If any 3532 * bits are set, then the page has backing store. 3533 */ 3534 if (pagev->pg_incore[pn++ - pagev->pg_pnbase]) 3535 goto out; 3536 3537 } while ((addr += PAGESIZE) < eaddr && pn < pnlim); 3538 3539 /* 3540 * If we examined all the pages in the vector but we're not 3541 * at the end of the segment, take another lap. 3542 */ 3543 if (addr < eaddr) 3544 goto refill; 3545 } 3546 3547 /* 3548 * Need to take len - 1 because addr + len is the address of the 3549 * first byte of the page just past the end of what we want. 3550 */ 3551 out: 3552 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv); 3553 return (addr); 3554 } 3555 3556 static caddr_t 3557 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg, 3558 caddr_t *saddrp, caddr_t eaddr, uint_t *protp) 3559 { 3560 /* 3561 * Our starting address is either the specified address, or the base 3562 * address from the start of the pagev. If the latter is greater, 3563 * this means a previous call to pr_pagev_fill has already scanned 3564 * further than the end of the previous mapping. 3565 */ 3566 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE; 3567 caddr_t addr = MAX(*saddrp, base); 3568 ulong_t pn = seg_page(seg, addr); 3569 uint_t prot, nprot; 3570 3571 /* 3572 * If we're dealing with noreserve pages, then advance addr to 3573 * the address of the next page which has backing store. 3574 */ 3575 if (pagev->pg_incore != NULL) { 3576 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) { 3577 if ((addr += PAGESIZE) == eaddr) { 3578 *saddrp = addr; 3579 prot = 0; 3580 goto out; 3581 } 3582 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3583 addr = pr_pagev_fill(pagev, seg, addr, eaddr); 3584 if (addr == eaddr) { 3585 *saddrp = addr; 3586 prot = 0; 3587 goto out; 3588 } 3589 pn = seg_page(seg, addr); 3590 } 3591 } 3592 } 3593 3594 /* 3595 * Get the protections on the page corresponding to addr. 3596 */ 3597 pn = seg_page(seg, addr); 3598 ASSERT(pn >= pagev->pg_pnbase); 3599 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages)); 3600 3601 prot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3602 getwatchprot(seg->s_as, addr, &prot); 3603 *saddrp = addr; 3604 3605 /* 3606 * Now loop until we find a backed page with different protections 3607 * or we reach the end of this segment. 3608 */ 3609 while ((addr += PAGESIZE) < eaddr) { 3610 /* 3611 * If pn has advanced to the page number following what we 3612 * have information on, refill the page vector and reset 3613 * addr and pn. If pr_pagev_fill does not return the 3614 * address of the next page, we have a discontiguity and 3615 * thus have reached the end of the current mapping. 3616 */ 3617 if (++pn == pagev->pg_pnbase + pagev->pg_npages) { 3618 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr); 3619 if (naddr != addr) 3620 goto out; 3621 pn = seg_page(seg, addr); 3622 } 3623 3624 /* 3625 * The previous page's protections are in prot, and it has 3626 * backing. If this page is MAP_NORESERVE and has no backing, 3627 * then end this mapping and return the previous protections. 3628 */ 3629 if (pagev->pg_incore != NULL && 3630 pagev->pg_incore[pn - pagev->pg_pnbase] == 0) 3631 break; 3632 3633 /* 3634 * Otherwise end the mapping if this page's protections (nprot) 3635 * are different than those in the previous page (prot). 3636 */ 3637 nprot = pagev->pg_protv[pn - pagev->pg_pnbase]; 3638 getwatchprot(seg->s_as, addr, &nprot); 3639 3640 if (nprot != prot) 3641 break; 3642 } 3643 3644 out: 3645 *protp = prot; 3646 return (addr); 3647 } 3648 3649 size_t 3650 pr_getsegsize(struct seg *seg, int reserved) 3651 { 3652 size_t size = seg->s_size; 3653 3654 /* 3655 * If we're interested in the reserved space, return the size of the 3656 * segment itself. Everything else in this function is a special case 3657 * to determine the actual underlying size of various segment types. 3658 */ 3659 if (reserved) 3660 return (size); 3661 3662 /* 3663 * If this is a segvn mapping of a regular file, return the smaller 3664 * of the segment size and the remaining size of the file beyond 3665 * the file offset corresponding to seg->s_base. 3666 */ 3667 if (seg->s_ops == &segvn_ops) { 3668 vattr_t vattr; 3669 vnode_t *vp; 3670 3671 vattr.va_mask = AT_SIZE; 3672 3673 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 3674 vp != NULL && vp->v_type == VREG && 3675 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 3676 3677 u_offset_t fsize = vattr.va_size; 3678 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base); 3679 3680 if (fsize < offset) 3681 fsize = 0; 3682 else 3683 fsize -= offset; 3684 3685 fsize = roundup(fsize, (u_offset_t)PAGESIZE); 3686 3687 if (fsize < (u_offset_t)size) 3688 size = (size_t)fsize; 3689 } 3690 3691 return (size); 3692 } 3693 3694 /* 3695 * If this is an ISM shared segment, don't include pages that are 3696 * beyond the real size of the spt segment that backs it. 3697 */ 3698 if (seg->s_ops == &segspt_shmops) 3699 return (MIN(spt_realsize(seg), size)); 3700 3701 /* 3702 * If this is segment is a mapping from /dev/null, then this is a 3703 * reservation of virtual address space and has no actual size. 3704 * Such segments are backed by segdev and have type set to neither 3705 * MAP_SHARED nor MAP_PRIVATE. 3706 */ 3707 if (seg->s_ops == &segdev_ops && 3708 ((SEGOP_GETTYPE(seg, seg->s_base) & 3709 (MAP_SHARED | MAP_PRIVATE)) == 0)) 3710 return (0); 3711 3712 /* 3713 * If this segment doesn't match one of the special types we handle, 3714 * just return the size of the segment itself. 3715 */ 3716 return (size); 3717 } 3718 3719 uint_t 3720 pr_getprot(struct seg *seg, int reserved, void **tmp, 3721 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr) 3722 { 3723 struct as *as = seg->s_as; 3724 3725 caddr_t saddr = *saddrp; 3726 caddr_t naddr; 3727 3728 int check_noreserve; 3729 uint_t prot; 3730 3731 union { 3732 struct segvn_data *svd; 3733 struct segdev_data *sdp; 3734 void *data; 3735 } s; 3736 3737 s.data = seg->s_data; 3738 3739 ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 3740 ASSERT(saddr >= seg->s_base && saddr < eaddr); 3741 ASSERT(eaddr <= seg->s_base + seg->s_size); 3742 3743 /* 3744 * Don't include MAP_NORESERVE pages in the address range 3745 * unless their mappings have actually materialized. 3746 * We cheat by knowing that segvn is the only segment 3747 * driver that supports MAP_NORESERVE. 3748 */ 3749 check_noreserve = 3750 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL && 3751 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) && 3752 (s.svd->flags & MAP_NORESERVE)); 3753 3754 /* 3755 * Examine every page only as a last resort. We use guilty knowledge 3756 * of segvn and segdev to avoid this: if there are no per-page 3757 * protections present in the segment and we don't care about 3758 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment. 3759 */ 3760 if (!check_noreserve && saddr == seg->s_base && 3761 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) { 3762 prot = s.svd->prot; 3763 getwatchprot(as, saddr, &prot); 3764 naddr = eaddr; 3765 3766 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops && 3767 s.sdp != NULL && s.sdp->pageprot == 0) { 3768 prot = s.sdp->prot; 3769 getwatchprot(as, saddr, &prot); 3770 naddr = eaddr; 3771 3772 } else { 3773 prpagev_t *pagev; 3774 3775 /* 3776 * If addr is sitting at the start of the segment, then 3777 * create a page vector to store protection and incore 3778 * information for pages in the segment, and fill it. 3779 * Otherwise, we expect *tmp to address the prpagev_t 3780 * allocated by a previous call to this function. 3781 */ 3782 if (saddr == seg->s_base) { 3783 pagev = pr_pagev_create(seg, check_noreserve); 3784 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr); 3785 3786 ASSERT(*tmp == NULL); 3787 *tmp = pagev; 3788 3789 ASSERT(saddr <= eaddr); 3790 *saddrp = saddr; 3791 3792 if (saddr == eaddr) { 3793 naddr = saddr; 3794 prot = 0; 3795 goto out; 3796 } 3797 3798 } else { 3799 ASSERT(*tmp != NULL); 3800 pagev = (prpagev_t *)*tmp; 3801 } 3802 3803 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot); 3804 ASSERT(naddr <= eaddr); 3805 } 3806 3807 out: 3808 if (naddr == eaddr) 3809 pr_getprot_done(tmp); 3810 *naddrp = naddr; 3811 return (prot); 3812 } 3813 3814 void 3815 pr_getprot_done(void **tmp) 3816 { 3817 if (*tmp != NULL) { 3818 pr_pagev_destroy((prpagev_t *)*tmp); 3819 *tmp = NULL; 3820 } 3821 } 3822 3823 /* 3824 * Return true iff the vnode is a /proc file from the object directory. 3825 */ 3826 int 3827 pr_isobject(vnode_t *vp) 3828 { 3829 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT); 3830 } 3831 3832 /* 3833 * Return true iff the vnode is a /proc file opened by the process itself. 3834 */ 3835 int 3836 pr_isself(vnode_t *vp) 3837 { 3838 /* 3839 * XXX: To retain binary compatibility with the old 3840 * ioctl()-based version of /proc, we exempt self-opens 3841 * of /proc/<pid> from being marked close-on-exec. 3842 */ 3843 return (vn_matchops(vp, prvnodeops) && 3844 (VTOP(vp)->pr_flags & PR_ISSELF) && 3845 VTOP(vp)->pr_type != PR_PIDDIR); 3846 } 3847 3848 static ssize_t 3849 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr) 3850 { 3851 ssize_t pagesize, hatsize; 3852 3853 ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 3854 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE)); 3855 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE)); 3856 ASSERT(saddr < eaddr); 3857 3858 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr); 3859 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize)); 3860 ASSERT(pagesize != 0); 3861 3862 if (pagesize == -1) 3863 pagesize = PAGESIZE; 3864 3865 saddr += P2NPHASE((uintptr_t)saddr, pagesize); 3866 3867 while (saddr < eaddr) { 3868 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr)) 3869 break; 3870 ASSERT(IS_P2ALIGNED(saddr, pagesize)); 3871 saddr += pagesize; 3872 } 3873 3874 *naddrp = ((saddr < eaddr) ? saddr : eaddr); 3875 return (hatsize); 3876 } 3877 3878 /* 3879 * Return an array of structures with extended memory map information. 3880 * We allocate here; the caller must deallocate. 3881 */ 3882 int 3883 prgetxmap(proc_t *p, list_t *iolhead) 3884 { 3885 struct as *as = p->p_as; 3886 prxmap_t *mp; 3887 struct seg *seg; 3888 struct seg *brkseg, *stkseg; 3889 struct vnode *vp; 3890 struct vattr vattr; 3891 uint_t prot; 3892 3893 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 3894 3895 /* 3896 * Request an initial buffer size that doesn't waste memory 3897 * if the address space has only a small number of segments. 3898 */ 3899 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 3900 3901 if ((seg = AS_SEGFIRST(as)) == NULL) 3902 return (0); 3903 3904 brkseg = break_seg(p); 3905 stkseg = as_segat(as, prgetstackbase(p)); 3906 3907 do { 3908 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 3909 caddr_t saddr, naddr, baddr; 3910 void *tmp = NULL; 3911 ssize_t psz; 3912 char *parr; 3913 uint64_t npages; 3914 uint64_t pagenum; 3915 3916 /* 3917 * Segment loop part one: iterate from the base of the segment 3918 * to its end, pausing at each address boundary (baddr) between 3919 * ranges that have different virtual memory protections. 3920 */ 3921 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 3922 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 3923 ASSERT(baddr >= saddr && baddr <= eaddr); 3924 3925 /* 3926 * Segment loop part two: iterate from the current 3927 * position to the end of the protection boundary, 3928 * pausing at each address boundary (naddr) between 3929 * ranges that have different underlying page sizes. 3930 */ 3931 for (; saddr < baddr; saddr = naddr) { 3932 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 3933 ASSERT(naddr >= saddr && naddr <= baddr); 3934 3935 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 3936 3937 mp->pr_vaddr = (uintptr_t)saddr; 3938 mp->pr_size = naddr - saddr; 3939 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 3940 mp->pr_mflags = 0; 3941 if (prot & PROT_READ) 3942 mp->pr_mflags |= MA_READ; 3943 if (prot & PROT_WRITE) 3944 mp->pr_mflags |= MA_WRITE; 3945 if (prot & PROT_EXEC) 3946 mp->pr_mflags |= MA_EXEC; 3947 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 3948 mp->pr_mflags |= MA_SHARED; 3949 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 3950 mp->pr_mflags |= MA_NORESERVE; 3951 if (seg->s_ops == &segspt_shmops || 3952 (seg->s_ops == &segvn_ops && 3953 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 3954 vp == NULL))) 3955 mp->pr_mflags |= MA_ANON; 3956 if (seg == brkseg) 3957 mp->pr_mflags |= MA_BREAK; 3958 else if (seg == stkseg) 3959 mp->pr_mflags |= MA_STACK; 3960 if (seg->s_ops == &segspt_shmops) 3961 mp->pr_mflags |= MA_ISM | MA_SHM; 3962 3963 mp->pr_pagesize = PAGESIZE; 3964 if (psz == -1) { 3965 mp->pr_hatpagesize = 0; 3966 } else { 3967 mp->pr_hatpagesize = psz; 3968 } 3969 3970 /* 3971 * Manufacture a filename for the "object" dir. 3972 */ 3973 mp->pr_dev = PRNODEV; 3974 vattr.va_mask = AT_FSID|AT_NODEID; 3975 if (seg->s_ops == &segvn_ops && 3976 SEGOP_GETVP(seg, saddr, &vp) == 0 && 3977 vp != NULL && vp->v_type == VREG && 3978 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 3979 mp->pr_dev = vattr.va_fsid; 3980 mp->pr_ino = vattr.va_nodeid; 3981 if (vp == p->p_exec) 3982 (void) strcpy(mp->pr_mapname, 3983 "a.out"); 3984 else 3985 pr_object_name(mp->pr_mapname, 3986 vp, &vattr); 3987 } 3988 3989 /* 3990 * Get the SysV shared memory id, if any. 3991 */ 3992 if ((mp->pr_mflags & MA_SHARED) && 3993 p->p_segacct && (mp->pr_shmid = shmgetid(p, 3994 seg->s_base)) != SHMID_NONE) { 3995 if (mp->pr_shmid == SHMID_FREE) 3996 mp->pr_shmid = -1; 3997 3998 mp->pr_mflags |= MA_SHM; 3999 } else { 4000 mp->pr_shmid = -1; 4001 } 4002 4003 npages = ((uintptr_t)(naddr - saddr)) >> 4004 PAGESHIFT; 4005 parr = kmem_zalloc(npages, KM_SLEEP); 4006 4007 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4008 4009 for (pagenum = 0; pagenum < npages; pagenum++) { 4010 if (parr[pagenum] & SEG_PAGE_INCORE) 4011 mp->pr_rss++; 4012 if (parr[pagenum] & SEG_PAGE_ANON) 4013 mp->pr_anon++; 4014 if (parr[pagenum] & SEG_PAGE_LOCKED) 4015 mp->pr_locked++; 4016 } 4017 kmem_free(parr, npages); 4018 } 4019 } 4020 ASSERT(tmp == NULL); 4021 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4022 4023 return (0); 4024 } 4025 4026 /* 4027 * Return the process's credentials. We don't need a 32-bit equivalent of 4028 * this function because prcred_t and prcred32_t are actually the same. 4029 */ 4030 void 4031 prgetcred(proc_t *p, prcred_t *pcrp) 4032 { 4033 mutex_enter(&p->p_crlock); 4034 cred2prcred(p->p_cred, pcrp); 4035 mutex_exit(&p->p_crlock); 4036 } 4037 4038 /* 4039 * Compute actual size of the prpriv_t structure. 4040 */ 4041 4042 size_t 4043 prgetprivsize(void) 4044 { 4045 return (priv_prgetprivsize(NULL)); 4046 } 4047 4048 /* 4049 * Return the process's privileges. We don't need a 32-bit equivalent of 4050 * this function because prpriv_t and prpriv32_t are actually the same. 4051 */ 4052 void 4053 prgetpriv(proc_t *p, prpriv_t *pprp) 4054 { 4055 mutex_enter(&p->p_crlock); 4056 cred2prpriv(p->p_cred, pprp); 4057 mutex_exit(&p->p_crlock); 4058 } 4059 4060 #ifdef _SYSCALL32_IMPL 4061 /* 4062 * Return an array of structures with HAT memory map information. 4063 * We allocate here; the caller must deallocate. 4064 */ 4065 int 4066 prgetxmap32(proc_t *p, list_t *iolhead) 4067 { 4068 struct as *as = p->p_as; 4069 prxmap32_t *mp; 4070 struct seg *seg; 4071 struct seg *brkseg, *stkseg; 4072 struct vnode *vp; 4073 struct vattr vattr; 4074 uint_t prot; 4075 4076 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock)); 4077 4078 /* 4079 * Request an initial buffer size that doesn't waste memory 4080 * if the address space has only a small number of segments. 4081 */ 4082 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); 4083 4084 if ((seg = AS_SEGFIRST(as)) == NULL) 4085 return (0); 4086 4087 brkseg = break_seg(p); 4088 stkseg = as_segat(as, prgetstackbase(p)); 4089 4090 do { 4091 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 4092 caddr_t saddr, naddr, baddr; 4093 void *tmp = NULL; 4094 ssize_t psz; 4095 char *parr; 4096 uint64_t npages; 4097 uint64_t pagenum; 4098 4099 /* 4100 * Segment loop part one: iterate from the base of the segment 4101 * to its end, pausing at each address boundary (baddr) between 4102 * ranges that have different virtual memory protections. 4103 */ 4104 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { 4105 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); 4106 ASSERT(baddr >= saddr && baddr <= eaddr); 4107 4108 /* 4109 * Segment loop part two: iterate from the current 4110 * position to the end of the protection boundary, 4111 * pausing at each address boundary (naddr) between 4112 * ranges that have different underlying page sizes. 4113 */ 4114 for (; saddr < baddr; saddr = naddr) { 4115 psz = pr_getpagesize(seg, saddr, &naddr, baddr); 4116 ASSERT(naddr >= saddr && naddr <= baddr); 4117 4118 mp = pr_iol_newbuf(iolhead, sizeof (*mp)); 4119 4120 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; 4121 mp->pr_size = (size32_t)(naddr - saddr); 4122 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); 4123 mp->pr_mflags = 0; 4124 if (prot & PROT_READ) 4125 mp->pr_mflags |= MA_READ; 4126 if (prot & PROT_WRITE) 4127 mp->pr_mflags |= MA_WRITE; 4128 if (prot & PROT_EXEC) 4129 mp->pr_mflags |= MA_EXEC; 4130 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) 4131 mp->pr_mflags |= MA_SHARED; 4132 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) 4133 mp->pr_mflags |= MA_NORESERVE; 4134 if (seg->s_ops == &segspt_shmops || 4135 (seg->s_ops == &segvn_ops && 4136 (SEGOP_GETVP(seg, saddr, &vp) != 0 || 4137 vp == NULL))) 4138 mp->pr_mflags |= MA_ANON; 4139 if (seg == brkseg) 4140 mp->pr_mflags |= MA_BREAK; 4141 else if (seg == stkseg) 4142 mp->pr_mflags |= MA_STACK; 4143 if (seg->s_ops == &segspt_shmops) 4144 mp->pr_mflags |= MA_ISM | MA_SHM; 4145 4146 mp->pr_pagesize = PAGESIZE; 4147 if (psz == -1) { 4148 mp->pr_hatpagesize = 0; 4149 } else { 4150 mp->pr_hatpagesize = psz; 4151 } 4152 4153 /* 4154 * Manufacture a filename for the "object" dir. 4155 */ 4156 mp->pr_dev = PRNODEV32; 4157 vattr.va_mask = AT_FSID|AT_NODEID; 4158 if (seg->s_ops == &segvn_ops && 4159 SEGOP_GETVP(seg, saddr, &vp) == 0 && 4160 vp != NULL && vp->v_type == VREG && 4161 VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) { 4162 (void) cmpldev(&mp->pr_dev, 4163 vattr.va_fsid); 4164 mp->pr_ino = vattr.va_nodeid; 4165 if (vp == p->p_exec) 4166 (void) strcpy(mp->pr_mapname, 4167 "a.out"); 4168 else 4169 pr_object_name(mp->pr_mapname, 4170 vp, &vattr); 4171 } 4172 4173 /* 4174 * Get the SysV shared memory id, if any. 4175 */ 4176 if ((mp->pr_mflags & MA_SHARED) && 4177 p->p_segacct && (mp->pr_shmid = shmgetid(p, 4178 seg->s_base)) != SHMID_NONE) { 4179 if (mp->pr_shmid == SHMID_FREE) 4180 mp->pr_shmid = -1; 4181 4182 mp->pr_mflags |= MA_SHM; 4183 } else { 4184 mp->pr_shmid = -1; 4185 } 4186 4187 npages = ((uintptr_t)(naddr - saddr)) >> 4188 PAGESHIFT; 4189 parr = kmem_zalloc(npages, KM_SLEEP); 4190 4191 SEGOP_INCORE(seg, saddr, naddr - saddr, parr); 4192 4193 for (pagenum = 0; pagenum < npages; pagenum++) { 4194 if (parr[pagenum] & SEG_PAGE_INCORE) 4195 mp->pr_rss++; 4196 if (parr[pagenum] & SEG_PAGE_ANON) 4197 mp->pr_anon++; 4198 if (parr[pagenum] & SEG_PAGE_LOCKED) 4199 mp->pr_locked++; 4200 } 4201 kmem_free(parr, npages); 4202 } 4203 } 4204 ASSERT(tmp == NULL); 4205 } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 4206 4207 return (0); 4208 } 4209 #endif /* _SYSCALL32_IMPL */ 4210