1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/sysmacros.h> 32 #include <sys/proc.h> 33 #include <sys/kmem.h> 34 #include <sys/tuneable.h> 35 #include <sys/var.h> 36 #include <sys/cred.h> 37 #include <sys/systm.h> 38 #include <sys/prsystm.h> 39 #include <sys/vnode.h> 40 #include <sys/session.h> 41 #include <sys/cpuvar.h> 42 #include <sys/cmn_err.h> 43 #include <sys/bitmap.h> 44 #include <sys/debug.h> 45 #include <c2/audit.h> 46 #include <sys/zone.h> 47 48 /* directory entries for /proc */ 49 union procent { 50 proc_t *pe_proc; 51 union procent *pe_next; 52 }; 53 54 struct pid pid0 = { 55 0, /* pid_prinactive */ 56 1, /* pid_pgorphaned */ 57 0, /* pid_padding */ 58 0, /* pid_prslot */ 59 0, /* pid_id */ 60 NULL, /* pid_pglink */ 61 NULL, /* pid_pgtail */ 62 NULL, /* pid_link */ 63 3 /* pid_ref */ 64 }; 65 66 static int pid_hashlen = 4; /* desired average hash chain length */ 67 static int pid_hashsz; /* number of buckets in the hash table */ 68 69 #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))]) 70 71 extern uint_t nproc; 72 extern struct kmem_cache *process_cache; 73 static void upcount_init(void); 74 75 kmutex_t pidlock; /* global process lock */ 76 kmutex_t pr_pidlock; /* /proc global process lock */ 77 kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */ 78 struct plock *proc_lock; /* persistent array of p_lock's */ 79 80 /* 81 * See the comment above pid_getlockslot() for a detailed explanation of this 82 * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence 83 * granularity; if the coherence granularity is ever changed, this constant 84 * should be modified to reflect the change to minimize proc_lock false 85 * sharing (correctness, however, is guaranteed regardless of the coherence 86 * granularity). 87 */ 88 #define PLOCK_SHIFT 3 89 90 static kmutex_t pidlinklock; 91 static struct pid **pidhash; 92 static pid_t minpid; 93 static pid_t mpid = FAMOUS_PIDS; /* one more than the last famous pid */ 94 static union procent *procdir; 95 static union procent *procentfree; 96 97 static struct pid * 98 pid_lookup(pid_t pid) 99 { 100 struct pid *pidp; 101 102 ASSERT(MUTEX_HELD(&pidlinklock)); 103 104 for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) { 105 if (pidp->pid_id == pid) { 106 ASSERT(pidp->pid_ref > 0); 107 break; 108 } 109 } 110 return (pidp); 111 } 112 113 void 114 pid_setmin(void) 115 { 116 if (jump_pid && jump_pid > mpid) 117 minpid = mpid = jump_pid; 118 else 119 minpid = mpid; 120 } 121 122 /* 123 * When prslots are simply used as an index to determine a process' p_lock, 124 * adjacent prslots share adjacent p_locks. On machines where the size 125 * of a mutex is smaller than that of a cache line (which, as of this writing, 126 * is true for all machines on which Solaris runs), this can potentially 127 * induce false sharing. The standard solution for false sharing is to pad 128 * out one's data structures (in this case, struct plock). However, 129 * given the size and (generally) sparse use of the proc_lock array, this 130 * is suboptimal. We therefore stride through the proc_lock array with 131 * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as: 132 * 133 * log_2 (coherence_granularity / sizeof (kmutex_t)) 134 * 135 * Under this scheme, false sharing is still possible -- but only when 136 * the number of active processes is very large. Note that the one-to-one 137 * mapping between prslots and lockslots is maintained. 138 */ 139 static int 140 pid_getlockslot(int prslot) 141 { 142 int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT; 143 int perlap = even >> PLOCK_SHIFT; 144 145 if (prslot >= even) 146 return (prslot); 147 148 return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap)); 149 } 150 151 /* 152 * This function allocates a pid structure, a free pid, and optionally a 153 * slot in the proc table for it. 154 * 155 * pid_allocate() returns the new pid on success, -1 on failure. 156 */ 157 pid_t 158 pid_allocate(proc_t *prp, pid_t pid, int flags) 159 { 160 struct pid *pidp; 161 union procent *pep; 162 pid_t newpid, startpid; 163 164 pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP); 165 166 mutex_enter(&pidlinklock); 167 if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) { 168 /* 169 * ran out of /proc directory entries 170 */ 171 goto failed; 172 } 173 174 if (pid != 0) { 175 VERIFY(minpid == 0); 176 VERIFY3P(pid, <, mpid); 177 VERIFY3P(pid_lookup(pid), ==, NULL); 178 newpid = pid; 179 } else { 180 /* 181 * Allocate a pid 182 */ 183 ASSERT(minpid <= mpid && mpid < maxpid); 184 185 startpid = mpid; 186 for (;;) { 187 newpid = mpid; 188 if (++mpid == maxpid) 189 mpid = minpid; 190 191 if (pid_lookup(newpid) == NULL) 192 break; 193 194 if (mpid == startpid) 195 goto failed; 196 } 197 } 198 199 /* 200 * Put pid into the pid hash table. 201 */ 202 pidp->pid_link = HASHPID(newpid); 203 HASHPID(newpid) = pidp; 204 pidp->pid_ref = 1; 205 pidp->pid_id = newpid; 206 207 if (flags & PID_ALLOC_PROC) { 208 procentfree = pep->pe_next; 209 pidp->pid_prslot = pep - procdir; 210 pep->pe_proc = prp; 211 prp->p_pidp = pidp; 212 prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; 213 } else { 214 pidp->pid_prslot = 0; 215 } 216 217 mutex_exit(&pidlinklock); 218 219 return (newpid); 220 221 failed: 222 mutex_exit(&pidlinklock); 223 kmem_free(pidp, sizeof (struct pid)); 224 return (-1); 225 } 226 227 /* 228 * decrement the reference count for pid 229 */ 230 int 231 pid_rele(struct pid *pidp) 232 { 233 struct pid **pidpp; 234 235 mutex_enter(&pidlinklock); 236 ASSERT(pidp != &pid0); 237 238 pidpp = &HASHPID(pidp->pid_id); 239 for (;;) { 240 ASSERT(*pidpp != NULL); 241 if (*pidpp == pidp) 242 break; 243 pidpp = &(*pidpp)->pid_link; 244 } 245 246 *pidpp = pidp->pid_link; 247 mutex_exit(&pidlinklock); 248 249 kmem_free(pidp, sizeof (*pidp)); 250 return (0); 251 } 252 253 void 254 proc_entry_free(struct pid *pidp) 255 { 256 mutex_enter(&pidlinklock); 257 pidp->pid_prinactive = 1; 258 procdir[pidp->pid_prslot].pe_next = procentfree; 259 procentfree = &procdir[pidp->pid_prslot]; 260 mutex_exit(&pidlinklock); 261 } 262 263 void 264 pid_exit(proc_t *prp) 265 { 266 struct pid *pidp; 267 268 ASSERT(MUTEX_HELD(&pidlock)); 269 270 /* 271 * Exit process group. If it is NULL, it's because fork failed 272 * before calling pgjoin(). 273 */ 274 ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL); 275 if (prp->p_pgidp != NULL) 276 pgexit(prp); 277 278 sess_rele(prp->p_sessp, B_TRUE); 279 280 pidp = prp->p_pidp; 281 282 proc_entry_free(pidp); 283 284 if (audit_active) 285 audit_pfree(prp); 286 287 if (practive == prp) { 288 practive = prp->p_next; 289 } 290 291 if (prp->p_next) { 292 prp->p_next->p_prev = prp->p_prev; 293 } 294 if (prp->p_prev) { 295 prp->p_prev->p_next = prp->p_next; 296 } 297 298 PID_RELE(pidp); 299 300 mutex_destroy(&prp->p_crlock); 301 kmem_cache_free(process_cache, prp); 302 nproc--; 303 } 304 305 /* 306 * Find a process visible from the specified zone given its process ID. 307 */ 308 proc_t * 309 prfind_zone(pid_t pid, zoneid_t zoneid) 310 { 311 struct pid *pidp; 312 proc_t *p; 313 314 ASSERT(MUTEX_HELD(&pidlock)); 315 316 mutex_enter(&pidlinklock); 317 pidp = pid_lookup(pid); 318 mutex_exit(&pidlinklock); 319 if (pidp != NULL && pidp->pid_prinactive == 0) { 320 p = procdir[pidp->pid_prslot].pe_proc; 321 if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) 322 return (p); 323 } 324 return (NULL); 325 } 326 327 /* 328 * Find a process given its process ID. This obeys zone restrictions, 329 * so if the caller is in a non-global zone it won't find processes 330 * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to 331 * bypass this restriction. 332 */ 333 proc_t * 334 prfind(pid_t pid) 335 { 336 zoneid_t zoneid; 337 338 if (INGLOBALZONE(curproc)) 339 zoneid = ALL_ZONES; 340 else 341 zoneid = getzoneid(); 342 return (prfind_zone(pid, zoneid)); 343 } 344 345 proc_t * 346 pgfind_zone(pid_t pgid, zoneid_t zoneid) 347 { 348 struct pid *pidp; 349 350 ASSERT(MUTEX_HELD(&pidlock)); 351 352 mutex_enter(&pidlinklock); 353 pidp = pid_lookup(pgid); 354 mutex_exit(&pidlinklock); 355 if (pidp != NULL) { 356 proc_t *p = pidp->pid_pglink; 357 358 if (zoneid == ALL_ZONES || pgid == 0 || p == NULL || 359 p->p_zone->zone_id == zoneid) 360 return (p); 361 } 362 return (NULL); 363 } 364 365 /* 366 * return the head of the list of processes whose process group ID is 'pgid', 367 * or NULL, if no such process group 368 */ 369 proc_t * 370 pgfind(pid_t pgid) 371 { 372 zoneid_t zoneid; 373 374 if (INGLOBALZONE(curproc)) 375 zoneid = ALL_ZONES; 376 else 377 zoneid = getzoneid(); 378 return (pgfind_zone(pgid, zoneid)); 379 } 380 381 /* 382 * Sets P_PR_LOCK on a non-system process. Process must be fully created 383 * and not exiting to succeed. 384 * 385 * Returns 0 on success. 386 * Returns 1 if P_PR_LOCK is set. 387 * Returns -1 if proc is in invalid state. 388 */ 389 int 390 sprtrylock_proc(proc_t *p) 391 { 392 ASSERT(MUTEX_HELD(&p->p_lock)); 393 394 /* skip system and incomplete processes */ 395 if (p->p_stat == SIDL || p->p_stat == SZOMB || 396 (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) { 397 return (-1); 398 } 399 400 if (p->p_proc_flag & P_PR_LOCK) 401 return (1); 402 403 p->p_proc_flag |= P_PR_LOCK; 404 THREAD_KPRI_REQUEST(); 405 406 return (0); 407 } 408 409 /* 410 * Wait for P_PR_LOCK to become clear. Returns with p_lock dropped, 411 * and the proc pointer no longer valid, as the proc may have exited. 412 */ 413 void 414 sprwaitlock_proc(proc_t *p) 415 { 416 kmutex_t *mp; 417 418 ASSERT(MUTEX_HELD(&p->p_lock)); 419 ASSERT(p->p_proc_flag & P_PR_LOCK); 420 421 /* 422 * p_lock is persistent, but p itself is not -- it could 423 * vanish during cv_wait(). Load p->p_lock now so we can 424 * drop it after cv_wait() without referencing p. 425 */ 426 mp = &p->p_lock; 427 cv_wait(&pr_pid_cv[p->p_slot], mp); 428 mutex_exit(mp); 429 } 430 431 /* 432 * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK. 433 * Returns the proc pointer on success, NULL on failure. sprlock() is 434 * really just a stripped-down version of pr_p_lock() to allow practive 435 * walkers like dofusers() and dumpsys() to synchronize with /proc. 436 */ 437 proc_t * 438 sprlock_zone(pid_t pid, zoneid_t zoneid) 439 { 440 proc_t *p; 441 int ret; 442 443 for (;;) { 444 mutex_enter(&pidlock); 445 if ((p = prfind_zone(pid, zoneid)) == NULL) { 446 mutex_exit(&pidlock); 447 return (NULL); 448 } 449 mutex_enter(&p->p_lock); 450 mutex_exit(&pidlock); 451 452 if (panicstr) 453 return (p); 454 455 ret = sprtrylock_proc(p); 456 if (ret == -1) { 457 mutex_exit(&p->p_lock); 458 return (NULL); 459 } else if (ret == 0) { 460 break; 461 } 462 sprwaitlock_proc(p); 463 } 464 return (p); 465 } 466 467 proc_t * 468 sprlock(pid_t pid) 469 { 470 zoneid_t zoneid; 471 472 if (INGLOBALZONE(curproc)) 473 zoneid = ALL_ZONES; 474 else 475 zoneid = getzoneid(); 476 return (sprlock_zone(pid, zoneid)); 477 } 478 479 void 480 sprlock_proc(proc_t *p) 481 { 482 ASSERT(MUTEX_HELD(&p->p_lock)); 483 484 while (p->p_proc_flag & P_PR_LOCK) { 485 cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock); 486 } 487 488 p->p_proc_flag |= P_PR_LOCK; 489 THREAD_KPRI_REQUEST(); 490 } 491 492 void 493 sprunlock(proc_t *p) 494 { 495 if (panicstr) { 496 mutex_exit(&p->p_lock); 497 return; 498 } 499 500 ASSERT(p->p_proc_flag & P_PR_LOCK); 501 ASSERT(MUTEX_HELD(&p->p_lock)); 502 503 cv_signal(&pr_pid_cv[p->p_slot]); 504 p->p_proc_flag &= ~P_PR_LOCK; 505 mutex_exit(&p->p_lock); 506 THREAD_KPRI_RELEASE(); 507 } 508 509 void 510 pid_init(void) 511 { 512 int i; 513 514 pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen); 515 516 pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP); 517 procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP); 518 pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP); 519 proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP); 520 521 nproc = 1; 522 practive = proc_sched; 523 proc_sched->p_next = NULL; 524 procdir[0].pe_proc = proc_sched; 525 526 procentfree = &procdir[1]; 527 for (i = 1; i < v.v_proc - 1; i++) 528 procdir[i].pe_next = &procdir[i+1]; 529 procdir[i].pe_next = NULL; 530 531 HASHPID(0) = &pid0; 532 533 upcount_init(); 534 } 535 536 proc_t * 537 pid_entry(int slot) 538 { 539 union procent *pep; 540 proc_t *prp; 541 542 ASSERT(MUTEX_HELD(&pidlock)); 543 ASSERT(slot >= 0 && slot < v.v_proc); 544 545 pep = procdir[slot].pe_next; 546 if (pep >= procdir && pep < &procdir[v.v_proc]) 547 return (NULL); 548 prp = procdir[slot].pe_proc; 549 if (prp != 0 && prp->p_stat == SIDL) 550 return (NULL); 551 return (prp); 552 } 553 554 /* 555 * Send the specified signal to all processes whose process group ID is 556 * equal to 'pgid' 557 */ 558 559 void 560 signal(pid_t pgid, int sig) 561 { 562 struct pid *pidp; 563 proc_t *prp; 564 565 mutex_enter(&pidlock); 566 mutex_enter(&pidlinklock); 567 if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) { 568 mutex_exit(&pidlinklock); 569 mutex_exit(&pidlock); 570 return; 571 } 572 mutex_exit(&pidlinklock); 573 for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) { 574 mutex_enter(&prp->p_lock); 575 sigtoproc(prp, NULL, sig); 576 mutex_exit(&prp->p_lock); 577 } 578 mutex_exit(&pidlock); 579 } 580 581 /* 582 * Send the specified signal to the specified process 583 */ 584 585 void 586 prsignal(struct pid *pidp, int sig) 587 { 588 if (!(pidp->pid_prinactive)) 589 psignal(procdir[pidp->pid_prslot].pe_proc, sig); 590 } 591 592 #include <sys/sunddi.h> 593 594 /* 595 * DDI/DKI interfaces for drivers to send signals to processes 596 */ 597 598 /* 599 * obtain an opaque reference to a process for signaling 600 */ 601 void * 602 proc_ref(void) 603 { 604 struct pid *pidp; 605 606 mutex_enter(&pidlock); 607 pidp = curproc->p_pidp; 608 PID_HOLD(pidp); 609 mutex_exit(&pidlock); 610 611 return (pidp); 612 } 613 614 /* 615 * release a reference to a process 616 * - a process can exit even if a driver has a reference to it 617 * - one proc_unref for every proc_ref 618 */ 619 void 620 proc_unref(void *pref) 621 { 622 mutex_enter(&pidlock); 623 PID_RELE((struct pid *)pref); 624 mutex_exit(&pidlock); 625 } 626 627 /* 628 * send a signal to a process 629 * 630 * - send the process the signal 631 * - if the process went away, return a -1 632 * - if the process is still there return 0 633 */ 634 int 635 proc_signal(void *pref, int sig) 636 { 637 struct pid *pidp = pref; 638 639 prsignal(pidp, sig); 640 return (pidp->pid_prinactive ? -1 : 0); 641 } 642 643 644 static struct upcount **upc_hash; /* a boot time allocated array */ 645 static ulong_t upc_hashmask; 646 #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask) 647 648 /* 649 * Get us off the ground. Called once at boot. 650 */ 651 void 652 upcount_init(void) 653 { 654 ulong_t upc_hashsize; 655 656 /* 657 * An entry per MB of memory is our current guess 658 */ 659 /* 660 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT 661 * converts pages to megs (without overflowing a u_int 662 * if you have more than 4G of memory, like ptob(physmem)/1M 663 * would). 664 */ 665 upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT))); 666 upc_hashmask = upc_hashsize - 1; 667 upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *), 668 KM_SLEEP); 669 } 670 671 /* 672 * Increment the number of processes associated with a given uid and zoneid. 673 */ 674 void 675 upcount_inc(uid_t uid, zoneid_t zoneid) 676 { 677 struct upcount **upc, **hupc; 678 struct upcount *new; 679 680 ASSERT(MUTEX_HELD(&pidlock)); 681 new = NULL; 682 hupc = &upc_hash[UPC_HASH(uid, zoneid)]; 683 top: 684 upc = hupc; 685 while ((*upc) != NULL) { 686 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 687 (*upc)->up_count++; 688 if (new) { 689 /* 690 * did not need `new' afterall. 691 */ 692 kmem_free(new, sizeof (*new)); 693 } 694 return; 695 } 696 upc = &(*upc)->up_next; 697 } 698 699 /* 700 * There is no entry for this <uid,zoneid> pair. 701 * Allocate one. If we have to drop pidlock, check 702 * again. 703 */ 704 if (new == NULL) { 705 new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP); 706 if (new == NULL) { 707 mutex_exit(&pidlock); 708 new = (struct upcount *)kmem_alloc(sizeof (*new), 709 KM_SLEEP); 710 mutex_enter(&pidlock); 711 goto top; 712 } 713 } 714 715 716 /* 717 * On the assumption that a new user is going to do some 718 * more forks, put the new upcount structure on the front. 719 */ 720 upc = hupc; 721 722 new->up_uid = uid; 723 new->up_zoneid = zoneid; 724 new->up_count = 1; 725 new->up_next = *upc; 726 727 *upc = new; 728 } 729 730 /* 731 * Decrement the number of processes a given uid and zoneid has. 732 */ 733 void 734 upcount_dec(uid_t uid, zoneid_t zoneid) 735 { 736 struct upcount **upc; 737 struct upcount *done; 738 739 ASSERT(MUTEX_HELD(&pidlock)); 740 741 upc = &upc_hash[UPC_HASH(uid, zoneid)]; 742 while ((*upc) != NULL) { 743 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 744 (*upc)->up_count--; 745 if ((*upc)->up_count == 0) { 746 done = *upc; 747 *upc = (*upc)->up_next; 748 kmem_free(done, sizeof (*done)); 749 } 750 return; 751 } 752 upc = &(*upc)->up_next; 753 } 754 cmn_err(CE_PANIC, "decr_upcount-off the end"); 755 } 756 757 /* 758 * Returns the number of processes a uid has. 759 * Non-existent uid's are assumed to have no processes. 760 */ 761 int 762 upcount_get(uid_t uid, zoneid_t zoneid) 763 { 764 struct upcount *upc; 765 766 ASSERT(MUTEX_HELD(&pidlock)); 767 768 upc = upc_hash[UPC_HASH(uid, zoneid)]; 769 while (upc != NULL) { 770 if (upc->up_uid == uid && upc->up_zoneid == zoneid) { 771 return (upc->up_count); 772 } 773 upc = upc->up_next; 774 } 775 return (0); 776 } 777