1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/sysmacros.h> 33 #include <sys/proc.h> 34 #include <sys/kmem.h> 35 #include <sys/tuneable.h> 36 #include <sys/var.h> 37 #include <sys/cred.h> 38 #include <sys/systm.h> 39 #include <sys/prsystm.h> 40 #include <sys/vnode.h> 41 #include <sys/session.h> 42 #include <sys/cpuvar.h> 43 #include <sys/cmn_err.h> 44 #include <sys/bitmap.h> 45 #include <sys/debug.h> 46 #include <c2/audit.h> 47 #include <sys/zone.h> 48 49 /* directory entries for /proc */ 50 union procent { 51 proc_t *pe_proc; 52 union procent *pe_next; 53 }; 54 55 struct pid pid0 = { 56 0, /* pid_prinactive */ 57 1, /* pid_pgorphaned */ 58 0, /* pid_padding */ 59 0, /* pid_prslot */ 60 0, /* pid_id */ 61 NULL, /* pid_pglink */ 62 NULL, /* pid_pgtail */ 63 NULL, /* pid_link */ 64 3 /* pid_ref */ 65 }; 66 67 static int pid_hashlen = 4; /* desired average hash chain length */ 68 static int pid_hashsz; /* number of buckets in the hash table */ 69 70 #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))]) 71 72 extern uint_t nproc; 73 extern struct kmem_cache *process_cache; 74 static void upcount_init(void); 75 76 kmutex_t pidlock; /* global process lock */ 77 kmutex_t pr_pidlock; /* /proc global process lock */ 78 kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */ 79 struct plock *proc_lock; /* persistent array of p_lock's */ 80 81 /* 82 * See the comment above pid_getlockslot() for a detailed explanation of this 83 * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence 84 * granularity; if the coherence granularity is ever changed, this constant 85 * should be modified to reflect the change to minimize proc_lock false 86 * sharing (correctness, however, is guaranteed regardless of the coherence 87 * granularity). 88 */ 89 #define PLOCK_SHIFT 3 90 91 static kmutex_t pidlinklock; 92 static struct pid **pidhash; 93 static pid_t minpid; 94 static pid_t mpid = FAMOUS_PIDS; /* one more than the last famous pid */ 95 static union procent *procdir; 96 static union procent *procentfree; 97 98 static struct pid * 99 pid_lookup(pid_t pid) 100 { 101 struct pid *pidp; 102 103 ASSERT(MUTEX_HELD(&pidlinklock)); 104 105 for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) { 106 if (pidp->pid_id == pid) { 107 ASSERT(pidp->pid_ref > 0); 108 break; 109 } 110 } 111 return (pidp); 112 } 113 114 struct pid * 115 pid_find(pid_t pid) 116 { 117 struct pid *pidp; 118 119 mutex_enter(&pidlinklock); 120 pidp = pid_lookup(pid); 121 mutex_exit(&pidlinklock); 122 123 return (pidp); 124 } 125 126 void 127 pid_setmin(void) 128 { 129 if (jump_pid && jump_pid > mpid) 130 minpid = mpid = jump_pid; 131 else 132 minpid = mpid; 133 } 134 135 /* 136 * When prslots are simply used as an index to determine a process' p_lock, 137 * adjacent prslots share adjacent p_locks. On machines where the size 138 * of a mutex is smaller than that of a cache line (which, as of this writing, 139 * is true for all machines on which Solaris runs), this can potentially 140 * induce false sharing. The standard solution for false sharing is to pad 141 * out one's data structures (in this case, struct plock). However, 142 * given the size and (generally) sparse use of the proc_lock array, this 143 * is suboptimal. We therefore stride through the proc_lock array with 144 * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as: 145 * 146 * log_2 (coherence_granularity / sizeof (kmutex_t)) 147 * 148 * Under this scheme, false sharing is still possible -- but only when 149 * the number of active processes is very large. Note that the one-to-one 150 * mapping between prslots and lockslots is maintained. 151 */ 152 static int 153 pid_getlockslot(int prslot) 154 { 155 int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT; 156 int perlap = even >> PLOCK_SHIFT; 157 158 if (prslot >= even) 159 return (prslot); 160 161 return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap)); 162 } 163 164 /* 165 * This function allocates a pid structure, a free pid, and optionally a 166 * slot in the proc table for it. 167 * 168 * pid_allocate() returns the new pid on success, -1 on failure. 169 */ 170 pid_t 171 pid_allocate(proc_t *prp, pid_t pid, int flags) 172 { 173 struct pid *pidp; 174 union procent *pep; 175 pid_t newpid, startpid; 176 177 pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP); 178 179 mutex_enter(&pidlinklock); 180 if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) { 181 /* 182 * ran out of /proc directory entries 183 */ 184 goto failed; 185 } 186 187 if (pid != 0) { 188 VERIFY(minpid == 0); 189 VERIFY3P(pid, <, mpid); 190 VERIFY3P(pid_lookup(pid), ==, NULL); 191 newpid = pid; 192 } else { 193 /* 194 * Allocate a pid 195 */ 196 ASSERT(minpid <= mpid && mpid < maxpid); 197 198 startpid = mpid; 199 for (;;) { 200 newpid = mpid; 201 if (++mpid == maxpid) 202 mpid = minpid; 203 204 if (pid_lookup(newpid) == NULL) 205 break; 206 207 if (mpid == startpid) 208 goto failed; 209 } 210 } 211 212 /* 213 * Put pid into the pid hash table. 214 */ 215 pidp->pid_link = HASHPID(newpid); 216 HASHPID(newpid) = pidp; 217 pidp->pid_ref = 1; 218 pidp->pid_id = newpid; 219 220 if (flags & PID_ALLOC_PROC) { 221 procentfree = pep->pe_next; 222 pidp->pid_prslot = pep - procdir; 223 pep->pe_proc = prp; 224 prp->p_pidp = pidp; 225 prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; 226 } else { 227 pidp->pid_prslot = 0; 228 } 229 230 mutex_exit(&pidlinklock); 231 232 return (newpid); 233 234 failed: 235 mutex_exit(&pidlinklock); 236 kmem_free(pidp, sizeof (struct pid)); 237 return (-1); 238 } 239 240 /* 241 * decrement the reference count for pid 242 */ 243 int 244 pid_rele(struct pid *pidp) 245 { 246 struct pid **pidpp; 247 248 mutex_enter(&pidlinklock); 249 ASSERT(pidp != &pid0); 250 251 pidpp = &HASHPID(pidp->pid_id); 252 for (;;) { 253 ASSERT(*pidpp != NULL); 254 if (*pidpp == pidp) 255 break; 256 pidpp = &(*pidpp)->pid_link; 257 } 258 259 *pidpp = pidp->pid_link; 260 mutex_exit(&pidlinklock); 261 262 kmem_free(pidp, sizeof (*pidp)); 263 return (0); 264 } 265 266 void 267 proc_entry_free(struct pid *pidp) 268 { 269 mutex_enter(&pidlinklock); 270 pidp->pid_prinactive = 1; 271 procdir[pidp->pid_prslot].pe_next = procentfree; 272 procentfree = &procdir[pidp->pid_prslot]; 273 mutex_exit(&pidlinklock); 274 } 275 276 void 277 pid_exit(proc_t *prp) 278 { 279 struct pid *pidp; 280 281 ASSERT(MUTEX_HELD(&pidlock)); 282 283 /* 284 * Exit process group. If it is NULL, it's because fork failed 285 * before calling pgjoin(). 286 */ 287 ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL); 288 if (prp->p_pgidp != NULL) 289 pgexit(prp); 290 291 sess_rele(prp->p_sessp, B_TRUE); 292 293 pidp = prp->p_pidp; 294 295 proc_entry_free(pidp); 296 297 if (audit_active) 298 audit_pfree(prp); 299 300 if (practive == prp) { 301 practive = prp->p_next; 302 } 303 304 if (prp->p_next) { 305 prp->p_next->p_prev = prp->p_prev; 306 } 307 if (prp->p_prev) { 308 prp->p_prev->p_next = prp->p_next; 309 } 310 311 PID_RELE(pidp); 312 313 mutex_destroy(&prp->p_crlock); 314 kmem_cache_free(process_cache, prp); 315 nproc--; 316 } 317 318 /* 319 * Find a process visible from the specified zone given its process ID. 320 */ 321 proc_t * 322 prfind_zone(pid_t pid, zoneid_t zoneid) 323 { 324 struct pid *pidp; 325 proc_t *p; 326 327 ASSERT(MUTEX_HELD(&pidlock)); 328 329 mutex_enter(&pidlinklock); 330 pidp = pid_lookup(pid); 331 mutex_exit(&pidlinklock); 332 if (pidp != NULL && pidp->pid_prinactive == 0) { 333 p = procdir[pidp->pid_prslot].pe_proc; 334 if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) 335 return (p); 336 } 337 return (NULL); 338 } 339 340 /* 341 * Find a process given its process ID. This obeys zone restrictions, 342 * so if the caller is in a non-global zone it won't find processes 343 * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to 344 * bypass this restriction. 345 */ 346 proc_t * 347 prfind(pid_t pid) 348 { 349 zoneid_t zoneid; 350 351 if (INGLOBALZONE(curproc)) 352 zoneid = ALL_ZONES; 353 else 354 zoneid = getzoneid(); 355 return (prfind_zone(pid, zoneid)); 356 } 357 358 proc_t * 359 pgfind_zone(pid_t pgid, zoneid_t zoneid) 360 { 361 struct pid *pidp; 362 363 ASSERT(MUTEX_HELD(&pidlock)); 364 365 mutex_enter(&pidlinklock); 366 pidp = pid_lookup(pgid); 367 mutex_exit(&pidlinklock); 368 if (pidp != NULL) { 369 proc_t *p = pidp->pid_pglink; 370 371 if (zoneid == ALL_ZONES || pgid == 0 || p == NULL || 372 p->p_zone->zone_id == zoneid) 373 return (p); 374 } 375 return (NULL); 376 } 377 378 /* 379 * return the head of the list of processes whose process group ID is 'pgid', 380 * or NULL, if no such process group 381 */ 382 proc_t * 383 pgfind(pid_t pgid) 384 { 385 zoneid_t zoneid; 386 387 if (INGLOBALZONE(curproc)) 388 zoneid = ALL_ZONES; 389 else 390 zoneid = getzoneid(); 391 return (pgfind_zone(pgid, zoneid)); 392 } 393 394 /* 395 * Sets P_PR_LOCK on a non-system process. Process must be fully created 396 * and not exiting to succeed. 397 * 398 * Returns 0 on success. 399 * Returns 1 if P_PR_LOCK is set. 400 * Returns -1 if proc is in invalid state. 401 */ 402 int 403 sprtrylock_proc(proc_t *p) 404 { 405 ASSERT(MUTEX_HELD(&p->p_lock)); 406 407 /* skip system and incomplete processes */ 408 if (p->p_stat == SIDL || p->p_stat == SZOMB || 409 (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) { 410 return (-1); 411 } 412 413 if (p->p_proc_flag & P_PR_LOCK) 414 return (1); 415 416 p->p_proc_flag |= P_PR_LOCK; 417 THREAD_KPRI_REQUEST(); 418 419 return (0); 420 } 421 422 /* 423 * Wait for P_PR_LOCK to become clear. Returns with p_lock dropped, 424 * and the proc pointer no longer valid, as the proc may have exited. 425 */ 426 void 427 sprwaitlock_proc(proc_t *p) 428 { 429 kmutex_t *mp; 430 431 ASSERT(MUTEX_HELD(&p->p_lock)); 432 ASSERT(p->p_proc_flag & P_PR_LOCK); 433 434 /* 435 * p_lock is persistent, but p itself is not -- it could 436 * vanish during cv_wait(). Load p->p_lock now so we can 437 * drop it after cv_wait() without referencing p. 438 */ 439 mp = &p->p_lock; 440 cv_wait(&pr_pid_cv[p->p_slot], mp); 441 mutex_exit(mp); 442 } 443 444 /* 445 * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK. 446 * Returns the proc pointer on success, NULL on failure. sprlock() is 447 * really just a stripped-down version of pr_p_lock() to allow practive 448 * walkers like dofusers() and dumpsys() to synchronize with /proc. 449 */ 450 proc_t * 451 sprlock_zone(pid_t pid, zoneid_t zoneid) 452 { 453 proc_t *p; 454 int ret; 455 456 for (;;) { 457 mutex_enter(&pidlock); 458 if ((p = prfind_zone(pid, zoneid)) == NULL) { 459 mutex_exit(&pidlock); 460 return (NULL); 461 } 462 mutex_enter(&p->p_lock); 463 mutex_exit(&pidlock); 464 465 if (panicstr) 466 return (p); 467 468 ret = sprtrylock_proc(p); 469 if (ret == -1) { 470 mutex_exit(&p->p_lock); 471 return (NULL); 472 } else if (ret == 0) { 473 break; 474 } 475 sprwaitlock_proc(p); 476 } 477 return (p); 478 } 479 480 proc_t * 481 sprlock(pid_t pid) 482 { 483 zoneid_t zoneid; 484 485 if (INGLOBALZONE(curproc)) 486 zoneid = ALL_ZONES; 487 else 488 zoneid = getzoneid(); 489 return (sprlock_zone(pid, zoneid)); 490 } 491 492 void 493 sprlock_proc(proc_t *p) 494 { 495 ASSERT(MUTEX_HELD(&p->p_lock)); 496 497 while (p->p_proc_flag & P_PR_LOCK) { 498 cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock); 499 } 500 501 p->p_proc_flag |= P_PR_LOCK; 502 THREAD_KPRI_REQUEST(); 503 } 504 505 void 506 sprunlock(proc_t *p) 507 { 508 if (panicstr) { 509 mutex_exit(&p->p_lock); 510 return; 511 } 512 513 ASSERT(p->p_proc_flag & P_PR_LOCK); 514 ASSERT(MUTEX_HELD(&p->p_lock)); 515 516 cv_signal(&pr_pid_cv[p->p_slot]); 517 p->p_proc_flag &= ~P_PR_LOCK; 518 mutex_exit(&p->p_lock); 519 THREAD_KPRI_RELEASE(); 520 } 521 522 void 523 pid_init(void) 524 { 525 int i; 526 527 pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen); 528 529 pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP); 530 procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP); 531 pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP); 532 proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP); 533 534 nproc = 1; 535 practive = proc_sched; 536 proc_sched->p_next = NULL; 537 procdir[0].pe_proc = proc_sched; 538 539 procentfree = &procdir[1]; 540 for (i = 1; i < v.v_proc - 1; i++) 541 procdir[i].pe_next = &procdir[i+1]; 542 procdir[i].pe_next = NULL; 543 544 HASHPID(0) = &pid0; 545 546 upcount_init(); 547 } 548 549 proc_t * 550 pid_entry(int slot) 551 { 552 union procent *pep; 553 proc_t *prp; 554 555 ASSERT(MUTEX_HELD(&pidlock)); 556 ASSERT(slot >= 0 && slot < v.v_proc); 557 558 pep = procdir[slot].pe_next; 559 if (pep >= procdir && pep < &procdir[v.v_proc]) 560 return (NULL); 561 prp = procdir[slot].pe_proc; 562 if (prp != 0 && prp->p_stat == SIDL) 563 return (NULL); 564 return (prp); 565 } 566 567 /* 568 * Send the specified signal to all processes whose process group ID is 569 * equal to 'pgid' 570 */ 571 572 void 573 signal(pid_t pgid, int sig) 574 { 575 struct pid *pidp; 576 proc_t *prp; 577 578 mutex_enter(&pidlock); 579 mutex_enter(&pidlinklock); 580 if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) { 581 mutex_exit(&pidlinklock); 582 mutex_exit(&pidlock); 583 return; 584 } 585 mutex_exit(&pidlinklock); 586 for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) { 587 mutex_enter(&prp->p_lock); 588 sigtoproc(prp, NULL, sig); 589 mutex_exit(&prp->p_lock); 590 } 591 mutex_exit(&pidlock); 592 } 593 594 /* 595 * Send the specified signal to the specified process 596 */ 597 598 void 599 prsignal(struct pid *pidp, int sig) 600 { 601 if (!(pidp->pid_prinactive)) 602 psignal(procdir[pidp->pid_prslot].pe_proc, sig); 603 } 604 605 #include <sys/sunddi.h> 606 607 /* 608 * DDI/DKI interfaces for drivers to send signals to processes 609 */ 610 611 /* 612 * obtain an opaque reference to a process for signaling 613 */ 614 void * 615 proc_ref(void) 616 { 617 struct pid *pidp; 618 619 mutex_enter(&pidlock); 620 pidp = curproc->p_pidp; 621 PID_HOLD(pidp); 622 mutex_exit(&pidlock); 623 624 return (pidp); 625 } 626 627 /* 628 * release a reference to a process 629 * - a process can exit even if a driver has a reference to it 630 * - one proc_unref for every proc_ref 631 */ 632 void 633 proc_unref(void *pref) 634 { 635 mutex_enter(&pidlock); 636 PID_RELE((struct pid *)pref); 637 mutex_exit(&pidlock); 638 } 639 640 /* 641 * send a signal to a process 642 * 643 * - send the process the signal 644 * - if the process went away, return a -1 645 * - if the process is still there return 0 646 */ 647 int 648 proc_signal(void *pref, int sig) 649 { 650 struct pid *pidp = pref; 651 652 prsignal(pidp, sig); 653 return (pidp->pid_prinactive ? -1 : 0); 654 } 655 656 657 static struct upcount **upc_hash; /* a boot time allocated array */ 658 static ulong_t upc_hashmask; 659 #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask) 660 661 /* 662 * Get us off the ground. Called once at boot. 663 */ 664 void 665 upcount_init(void) 666 { 667 ulong_t upc_hashsize; 668 669 /* 670 * An entry per MB of memory is our current guess 671 */ 672 /* 673 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT 674 * converts pages to megs (without overflowing a u_int 675 * if you have more than 4G of memory, like ptob(physmem)/1M 676 * would). 677 */ 678 upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT))); 679 upc_hashmask = upc_hashsize - 1; 680 upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *), 681 KM_SLEEP); 682 } 683 684 /* 685 * Increment the number of processes associated with a given uid and zoneid. 686 */ 687 void 688 upcount_inc(uid_t uid, zoneid_t zoneid) 689 { 690 struct upcount **upc, **hupc; 691 struct upcount *new; 692 693 ASSERT(MUTEX_HELD(&pidlock)); 694 new = NULL; 695 hupc = &upc_hash[UPC_HASH(uid, zoneid)]; 696 top: 697 upc = hupc; 698 while ((*upc) != NULL) { 699 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 700 (*upc)->up_count++; 701 if (new) { 702 /* 703 * did not need `new' afterall. 704 */ 705 kmem_free(new, sizeof (*new)); 706 } 707 return; 708 } 709 upc = &(*upc)->up_next; 710 } 711 712 /* 713 * There is no entry for this <uid,zoneid> pair. 714 * Allocate one. If we have to drop pidlock, check 715 * again. 716 */ 717 if (new == NULL) { 718 new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP); 719 if (new == NULL) { 720 mutex_exit(&pidlock); 721 new = (struct upcount *)kmem_alloc(sizeof (*new), 722 KM_SLEEP); 723 mutex_enter(&pidlock); 724 goto top; 725 } 726 } 727 728 729 /* 730 * On the assumption that a new user is going to do some 731 * more forks, put the new upcount structure on the front. 732 */ 733 upc = hupc; 734 735 new->up_uid = uid; 736 new->up_zoneid = zoneid; 737 new->up_count = 1; 738 new->up_next = *upc; 739 740 *upc = new; 741 } 742 743 /* 744 * Decrement the number of processes a given uid and zoneid has. 745 */ 746 void 747 upcount_dec(uid_t uid, zoneid_t zoneid) 748 { 749 struct upcount **upc; 750 struct upcount *done; 751 752 ASSERT(MUTEX_HELD(&pidlock)); 753 754 upc = &upc_hash[UPC_HASH(uid, zoneid)]; 755 while ((*upc) != NULL) { 756 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 757 (*upc)->up_count--; 758 if ((*upc)->up_count == 0) { 759 done = *upc; 760 *upc = (*upc)->up_next; 761 kmem_free(done, sizeof (*done)); 762 } 763 return; 764 } 765 upc = &(*upc)->up_next; 766 } 767 cmn_err(CE_PANIC, "decr_upcount-off the end"); 768 } 769 770 /* 771 * Returns the number of processes a uid has. 772 * Non-existent uid's are assumed to have no processes. 773 */ 774 int 775 upcount_get(uid_t uid, zoneid_t zoneid) 776 { 777 struct upcount *upc; 778 779 ASSERT(MUTEX_HELD(&pidlock)); 780 781 upc = upc_hash[UPC_HASH(uid, zoneid)]; 782 while (upc != NULL) { 783 if (upc->up_uid == uid && upc->up_zoneid == zoneid) { 784 return (upc->up_count); 785 } 786 upc = upc->up_next; 787 } 788 return (0); 789 } 790