1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/sysmacros.h> 36 #include <sys/proc.h> 37 #include <sys/kmem.h> 38 #include <sys/tuneable.h> 39 #include <sys/var.h> 40 #include <sys/cred.h> 41 #include <sys/systm.h> 42 #include <sys/prsystm.h> 43 #include <sys/vnode.h> 44 #include <sys/session.h> 45 #include <sys/cpuvar.h> 46 #include <sys/cmn_err.h> 47 #include <sys/bitmap.h> 48 #include <sys/debug.h> 49 #include <c2/audit.h> 50 #include <sys/zone.h> 51 52 /* directory entries for /proc */ 53 union procent { 54 proc_t *pe_proc; 55 union procent *pe_next; 56 }; 57 58 struct pid pid0 = { 59 0, /* pid_prinactive */ 60 1, /* pid_pgorphaned */ 61 0, /* pid_padding */ 62 0, /* pid_prslot */ 63 0, /* pid_id */ 64 NULL, /* pid_pglink */ 65 NULL, /* pid_pgtail */ 66 NULL, /* pid_link */ 67 3 /* pid_ref */ 68 }; 69 70 static int pid_hashlen = 4; /* desired average hash chain length */ 71 static int pid_hashsz; /* number of buckets in the hash table */ 72 73 #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))]) 74 75 extern uint_t nproc; 76 extern struct kmem_cache *process_cache; 77 static void upcount_init(void); 78 79 kmutex_t pidlock; /* global process lock */ 80 kmutex_t pr_pidlock; /* /proc global process lock */ 81 kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */ 82 struct plock *proc_lock; /* persistent array of p_lock's */ 83 84 /* 85 * See the comment above pid_getlockslot() for a detailed explanation of this 86 * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence 87 * granularity; if the coherence granularity is ever changed, this constant 88 * should be modified to reflect the change to minimize proc_lock false 89 * sharing (correctness, however, is guaranteed regardless of the coherence 90 * granularity). 91 */ 92 #define PLOCK_SHIFT 3 93 94 static kmutex_t pidlinklock; 95 static struct pid **pidhash; 96 static pid_t minpid; 97 static pid_t mpid; 98 static union procent *procdir; 99 static union procent *procentfree; 100 101 static struct pid * 102 pid_lookup(pid_t pid) 103 { 104 struct pid *pidp; 105 106 ASSERT(MUTEX_HELD(&pidlinklock)); 107 108 for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) { 109 if (pidp->pid_id == pid) { 110 ASSERT(pidp->pid_ref > 0); 111 break; 112 } 113 } 114 return (pidp); 115 } 116 117 struct pid * 118 pid_find(pid_t pid) 119 { 120 struct pid *pidp; 121 122 mutex_enter(&pidlinklock); 123 pidp = pid_lookup(pid); 124 mutex_exit(&pidlinklock); 125 126 return (pidp); 127 } 128 129 void 130 pid_setmin(void) 131 { 132 if (jump_pid && jump_pid > mpid) 133 minpid = mpid = jump_pid; 134 else 135 minpid = mpid + 1; 136 } 137 138 /* 139 * When prslots are simply used as an index to determine a process' p_lock, 140 * adjacent prslots share adjacent p_locks. On machines where the size 141 * of a mutex is smaller than that of a cache line (which, as of this writing, 142 * is true for all machines on which Solaris runs), this can potentially 143 * induce false sharing. The standard solution for false sharing is to pad 144 * out one's data structures (in this case, struct plock). However, 145 * given the size and (generally) sparse use of the proc_lock array, this 146 * is suboptimal. We therefore stride through the proc_lock array with 147 * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as: 148 * 149 * log_2 (coherence_granularity / sizeof (kmutex_t)) 150 * 151 * Under this scheme, false sharing is still possible -- but only when 152 * the number of active processes is very large. Note that the one-to-one 153 * mapping between prslots and lockslots is maintained. 154 */ 155 static int 156 pid_getlockslot(int prslot) 157 { 158 int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT; 159 int perlap = even >> PLOCK_SHIFT; 160 161 if (prslot >= even) 162 return (prslot); 163 164 return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap)); 165 } 166 167 /* 168 * This function allocates a pid structure, a free pid, and optionally a 169 * slot in the proc table for it. 170 * 171 * pid_allocate() returns the new pid on success, -1 on failure. 172 */ 173 pid_t 174 pid_allocate(proc_t *prp, int flags) 175 { 176 struct pid *pidp; 177 union procent *pep; 178 pid_t newpid, startpid; 179 180 pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP); 181 182 mutex_enter(&pidlinklock); 183 if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) { 184 /* 185 * ran out of /proc directory entries 186 */ 187 goto failed; 188 } 189 190 /* 191 * Allocate a pid 192 */ 193 startpid = mpid; 194 do { 195 newpid = (++mpid == maxpid ? mpid = minpid : mpid); 196 } while (pid_lookup(newpid) && newpid != startpid); 197 198 if (newpid == startpid && pid_lookup(newpid)) { 199 /* couldn't find a free pid */ 200 goto failed; 201 } 202 203 /* 204 * Put pid into the pid hash table. 205 */ 206 pidp->pid_link = HASHPID(newpid); 207 HASHPID(newpid) = pidp; 208 pidp->pid_ref = 1; 209 pidp->pid_id = newpid; 210 211 if (flags & PID_ALLOC_PROC) { 212 procentfree = pep->pe_next; 213 pidp->pid_prslot = pep - procdir; 214 pep->pe_proc = prp; 215 prp->p_pidp = pidp; 216 prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; 217 } else { 218 pidp->pid_prslot = 0; 219 } 220 221 mutex_exit(&pidlinklock); 222 223 return (newpid); 224 225 failed: 226 mutex_exit(&pidlinklock); 227 kmem_free(pidp, sizeof (struct pid)); 228 return (-1); 229 } 230 231 /* 232 * decrement the reference count for pid 233 */ 234 int 235 pid_rele(struct pid *pidp) 236 { 237 struct pid **pidpp; 238 239 mutex_enter(&pidlinklock); 240 ASSERT(pidp != &pid0); 241 242 pidpp = &HASHPID(pidp->pid_id); 243 for (;;) { 244 ASSERT(*pidpp != NULL); 245 if (*pidpp == pidp) 246 break; 247 pidpp = &(*pidpp)->pid_link; 248 } 249 250 *pidpp = pidp->pid_link; 251 mutex_exit(&pidlinklock); 252 253 kmem_free(pidp, sizeof (*pidp)); 254 return (0); 255 } 256 257 void 258 proc_entry_free(struct pid *pidp) 259 { 260 mutex_enter(&pidlinklock); 261 pidp->pid_prinactive = 1; 262 procdir[pidp->pid_prslot].pe_next = procentfree; 263 procentfree = &procdir[pidp->pid_prslot]; 264 mutex_exit(&pidlinklock); 265 } 266 267 void 268 pid_exit(proc_t *prp) 269 { 270 struct pid *pidp; 271 272 ASSERT(MUTEX_HELD(&pidlock)); 273 274 /* 275 * Exit process group. If it is NULL, it's because fork failed 276 * before calling pgjoin(). 277 */ 278 ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL); 279 if (prp->p_pgidp != NULL) 280 pgexit(prp); 281 282 sess_rele(prp->p_sessp, B_TRUE); 283 284 pidp = prp->p_pidp; 285 286 proc_entry_free(pidp); 287 288 #ifdef C2_AUDIT 289 if (audit_active) 290 audit_pfree(prp); 291 #endif 292 293 if (practive == prp) { 294 practive = prp->p_next; 295 } 296 297 if (prp->p_next) { 298 prp->p_next->p_prev = prp->p_prev; 299 } 300 if (prp->p_prev) { 301 prp->p_prev->p_next = prp->p_next; 302 } 303 304 PID_RELE(pidp); 305 306 mutex_destroy(&prp->p_crlock); 307 kmem_cache_free(process_cache, prp); 308 nproc--; 309 } 310 311 /* 312 * Find a process visible from the specified zone given its process ID. 313 */ 314 proc_t * 315 prfind_zone(pid_t pid, zoneid_t zoneid) 316 { 317 struct pid *pidp; 318 proc_t *p; 319 320 ASSERT(MUTEX_HELD(&pidlock)); 321 322 mutex_enter(&pidlinklock); 323 pidp = pid_lookup(pid); 324 mutex_exit(&pidlinklock); 325 if (pidp != NULL && pidp->pid_prinactive == 0) { 326 p = procdir[pidp->pid_prslot].pe_proc; 327 if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) 328 return (p); 329 } 330 return (NULL); 331 } 332 333 /* 334 * Find a process given its process ID. This obeys zone restrictions, 335 * so if the caller is in a non-global zone it won't find processes 336 * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to 337 * bypass this restriction. 338 */ 339 proc_t * 340 prfind(pid_t pid) 341 { 342 zoneid_t zoneid; 343 344 if (INGLOBALZONE(curproc)) 345 zoneid = ALL_ZONES; 346 else 347 zoneid = getzoneid(); 348 return (prfind_zone(pid, zoneid)); 349 } 350 351 proc_t * 352 pgfind_zone(pid_t pgid, zoneid_t zoneid) 353 { 354 struct pid *pidp; 355 356 ASSERT(MUTEX_HELD(&pidlock)); 357 358 mutex_enter(&pidlinklock); 359 pidp = pid_lookup(pgid); 360 mutex_exit(&pidlinklock); 361 if (pidp != NULL) { 362 proc_t *p = pidp->pid_pglink; 363 364 if (zoneid == ALL_ZONES || pgid == 0 || p == NULL || 365 p->p_zone->zone_id == zoneid) 366 return (p); 367 } 368 return (NULL); 369 } 370 371 /* 372 * return the head of the list of processes whose process group ID is 'pgid', 373 * or NULL, if no such process group 374 */ 375 proc_t * 376 pgfind(pid_t pgid) 377 { 378 zoneid_t zoneid; 379 380 if (INGLOBALZONE(curproc)) 381 zoneid = ALL_ZONES; 382 else 383 zoneid = getzoneid(); 384 return (pgfind_zone(pgid, zoneid)); 385 } 386 387 /* 388 * Sets P_PR_LOCK on a non-system process. Process must be fully created 389 * and not exiting to succeed. 390 * 391 * Returns 0 on success. 392 * Returns 1 if P_PR_LOCK is set. 393 * Returns -1 if proc is in invalid state. 394 */ 395 int 396 sprtrylock_proc(proc_t *p) 397 { 398 ASSERT(MUTEX_HELD(&p->p_lock)); 399 400 /* skip system and incomplete processes */ 401 if (p->p_stat == SIDL || p->p_stat == SZOMB || 402 (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) { 403 return (-1); 404 } 405 406 if (p->p_proc_flag & P_PR_LOCK) 407 return (1); 408 409 p->p_proc_flag |= P_PR_LOCK; 410 THREAD_KPRI_REQUEST(); 411 412 return (0); 413 } 414 415 /* 416 * Wait for P_PR_LOCK to become clear. Returns with p_lock dropped, 417 * and the proc pointer no longer valid, as the proc may have exited. 418 */ 419 void 420 sprwaitlock_proc(proc_t *p) 421 { 422 kmutex_t *mp; 423 424 ASSERT(MUTEX_HELD(&p->p_lock)); 425 ASSERT(p->p_proc_flag & P_PR_LOCK); 426 427 /* 428 * p_lock is persistent, but p itself is not -- it could 429 * vanish during cv_wait(). Load p->p_lock now so we can 430 * drop it after cv_wait() without referencing p. 431 */ 432 mp = &p->p_lock; 433 cv_wait(&pr_pid_cv[p->p_slot], mp); 434 mutex_exit(mp); 435 } 436 437 /* 438 * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK. 439 * Returns the proc pointer on success, NULL on failure. sprlock() is 440 * really just a stripped-down version of pr_p_lock() to allow practive 441 * walkers like dofusers() and dumpsys() to synchronize with /proc. 442 */ 443 proc_t * 444 sprlock_zone(pid_t pid, zoneid_t zoneid) 445 { 446 proc_t *p; 447 int ret; 448 449 for (;;) { 450 mutex_enter(&pidlock); 451 if ((p = prfind_zone(pid, zoneid)) == NULL) { 452 mutex_exit(&pidlock); 453 return (NULL); 454 } 455 mutex_enter(&p->p_lock); 456 mutex_exit(&pidlock); 457 458 if (panicstr) 459 return (p); 460 461 ret = sprtrylock_proc(p); 462 if (ret == -1) { 463 mutex_exit(&p->p_lock); 464 return (NULL); 465 } else if (ret == 0) { 466 break; 467 } 468 sprwaitlock_proc(p); 469 } 470 return (p); 471 } 472 473 proc_t * 474 sprlock(pid_t pid) 475 { 476 zoneid_t zoneid; 477 478 if (INGLOBALZONE(curproc)) 479 zoneid = ALL_ZONES; 480 else 481 zoneid = getzoneid(); 482 return (sprlock_zone(pid, zoneid)); 483 } 484 485 void 486 sprlock_proc(proc_t *p) 487 { 488 ASSERT(MUTEX_HELD(&p->p_lock)); 489 490 while (p->p_proc_flag & P_PR_LOCK) { 491 cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock); 492 } 493 494 p->p_proc_flag |= P_PR_LOCK; 495 THREAD_KPRI_REQUEST(); 496 } 497 498 void 499 sprunlock(proc_t *p) 500 { 501 if (panicstr) { 502 mutex_exit(&p->p_lock); 503 return; 504 } 505 506 ASSERT(p->p_proc_flag & P_PR_LOCK); 507 ASSERT(MUTEX_HELD(&p->p_lock)); 508 509 cv_signal(&pr_pid_cv[p->p_slot]); 510 p->p_proc_flag &= ~P_PR_LOCK; 511 mutex_exit(&p->p_lock); 512 THREAD_KPRI_RELEASE(); 513 } 514 515 void 516 pid_init(void) 517 { 518 int i; 519 520 pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen); 521 522 pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP); 523 procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP); 524 pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP); 525 proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP); 526 527 nproc = 1; 528 practive = proc_sched; 529 proc_sched->p_next = NULL; 530 procdir[0].pe_proc = proc_sched; 531 532 procentfree = &procdir[1]; 533 for (i = 1; i < v.v_proc - 1; i++) 534 procdir[i].pe_next = &procdir[i+1]; 535 procdir[i].pe_next = NULL; 536 537 HASHPID(0) = &pid0; 538 539 upcount_init(); 540 } 541 542 proc_t * 543 pid_entry(int slot) 544 { 545 union procent *pep; 546 proc_t *prp; 547 548 ASSERT(MUTEX_HELD(&pidlock)); 549 ASSERT(slot >= 0 && slot < v.v_proc); 550 551 pep = procdir[slot].pe_next; 552 if (pep >= procdir && pep < &procdir[v.v_proc]) 553 return (NULL); 554 prp = procdir[slot].pe_proc; 555 if (prp != 0 && prp->p_stat == SIDL) 556 return (NULL); 557 return (prp); 558 } 559 560 /* 561 * Send the specified signal to all processes whose process group ID is 562 * equal to 'pgid' 563 */ 564 565 void 566 signal(pid_t pgid, int sig) 567 { 568 struct pid *pidp; 569 proc_t *prp; 570 571 mutex_enter(&pidlock); 572 mutex_enter(&pidlinklock); 573 if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) { 574 mutex_exit(&pidlinklock); 575 mutex_exit(&pidlock); 576 return; 577 } 578 mutex_exit(&pidlinklock); 579 for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) { 580 mutex_enter(&prp->p_lock); 581 sigtoproc(prp, NULL, sig); 582 mutex_exit(&prp->p_lock); 583 } 584 mutex_exit(&pidlock); 585 } 586 587 /* 588 * Send the specified signal to the specified process 589 */ 590 591 void 592 prsignal(struct pid *pidp, int sig) 593 { 594 if (!(pidp->pid_prinactive)) 595 psignal(procdir[pidp->pid_prslot].pe_proc, sig); 596 } 597 598 #include <sys/sunddi.h> 599 600 /* 601 * DDI/DKI interfaces for drivers to send signals to processes 602 */ 603 604 /* 605 * obtain an opaque reference to a process for signaling 606 */ 607 void * 608 proc_ref(void) 609 { 610 struct pid *pidp; 611 612 mutex_enter(&pidlock); 613 pidp = curproc->p_pidp; 614 PID_HOLD(pidp); 615 mutex_exit(&pidlock); 616 617 return (pidp); 618 } 619 620 /* 621 * release a reference to a process 622 * - a process can exit even if a driver has a reference to it 623 * - one proc_unref for every proc_ref 624 */ 625 void 626 proc_unref(void *pref) 627 { 628 mutex_enter(&pidlock); 629 PID_RELE((struct pid *)pref); 630 mutex_exit(&pidlock); 631 } 632 633 /* 634 * send a signal to a process 635 * 636 * - send the process the signal 637 * - if the process went away, return a -1 638 * - if the process is still there return 0 639 */ 640 int 641 proc_signal(void *pref, int sig) 642 { 643 struct pid *pidp = pref; 644 645 prsignal(pidp, sig); 646 return (pidp->pid_prinactive ? -1 : 0); 647 } 648 649 650 static struct upcount **upc_hash; /* a boot time allocated array */ 651 static ulong_t upc_hashmask; 652 #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask) 653 654 /* 655 * Get us off the ground. Called once at boot. 656 */ 657 void 658 upcount_init(void) 659 { 660 ulong_t upc_hashsize; 661 662 /* 663 * An entry per MB of memory is our current guess 664 */ 665 /* 666 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT 667 * converts pages to megs (without overflowing a u_int 668 * if you have more than 4G of memory, like ptob(physmem)/1M 669 * would). 670 */ 671 upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT))); 672 upc_hashmask = upc_hashsize - 1; 673 upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *), 674 KM_SLEEP); 675 } 676 677 /* 678 * Increment the number of processes associated with a given uid and zoneid. 679 */ 680 void 681 upcount_inc(uid_t uid, zoneid_t zoneid) 682 { 683 struct upcount **upc, **hupc; 684 struct upcount *new; 685 686 ASSERT(MUTEX_HELD(&pidlock)); 687 new = NULL; 688 hupc = &upc_hash[UPC_HASH(uid, zoneid)]; 689 top: 690 upc = hupc; 691 while ((*upc) != NULL) { 692 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 693 (*upc)->up_count++; 694 if (new) { 695 /* 696 * did not need `new' afterall. 697 */ 698 kmem_free(new, sizeof (*new)); 699 } 700 return; 701 } 702 upc = &(*upc)->up_next; 703 } 704 705 /* 706 * There is no entry for this <uid,zoneid> pair. 707 * Allocate one. If we have to drop pidlock, check 708 * again. 709 */ 710 if (new == NULL) { 711 new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP); 712 if (new == NULL) { 713 mutex_exit(&pidlock); 714 new = (struct upcount *)kmem_alloc(sizeof (*new), 715 KM_SLEEP); 716 mutex_enter(&pidlock); 717 goto top; 718 } 719 } 720 721 722 /* 723 * On the assumption that a new user is going to do some 724 * more forks, put the new upcount structure on the front. 725 */ 726 upc = hupc; 727 728 new->up_uid = uid; 729 new->up_zoneid = zoneid; 730 new->up_count = 1; 731 new->up_next = *upc; 732 733 *upc = new; 734 } 735 736 /* 737 * Decrement the number of processes a given uid and zoneid has. 738 */ 739 void 740 upcount_dec(uid_t uid, zoneid_t zoneid) 741 { 742 struct upcount **upc; 743 struct upcount *done; 744 745 ASSERT(MUTEX_HELD(&pidlock)); 746 747 upc = &upc_hash[UPC_HASH(uid, zoneid)]; 748 while ((*upc) != NULL) { 749 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 750 (*upc)->up_count--; 751 if ((*upc)->up_count == 0) { 752 done = *upc; 753 *upc = (*upc)->up_next; 754 kmem_free(done, sizeof (*done)); 755 } 756 return; 757 } 758 upc = &(*upc)->up_next; 759 } 760 cmn_err(CE_PANIC, "decr_upcount-off the end"); 761 } 762 763 /* 764 * Returns the number of processes a uid has. 765 * Non-existent uid's are assumed to have no processes. 766 */ 767 int 768 upcount_get(uid_t uid, zoneid_t zoneid) 769 { 770 struct upcount *upc; 771 772 ASSERT(MUTEX_HELD(&pidlock)); 773 774 upc = upc_hash[UPC_HASH(uid, zoneid)]; 775 while (upc != NULL) { 776 if (upc->up_uid == uid && upc->up_zoneid == zoneid) { 777 return (upc->up_count); 778 } 779 upc = upc->up_next; 780 } 781 return (0); 782 } 783