1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/sysmacros.h> 36 #include <sys/proc.h> 37 #include <sys/kmem.h> 38 #include <sys/tuneable.h> 39 #include <sys/var.h> 40 #include <sys/cred.h> 41 #include <sys/systm.h> 42 #include <sys/prsystm.h> 43 #include <sys/vnode.h> 44 #include <sys/session.h> 45 #include <sys/cpuvar.h> 46 #include <sys/cmn_err.h> 47 #include <sys/bitmap.h> 48 #include <sys/debug.h> 49 #include <c2/audit.h> 50 #include <sys/zone.h> 51 52 /* directory entries for /proc */ 53 union procent { 54 proc_t *pe_proc; 55 union procent *pe_next; 56 }; 57 58 struct pid pid0 = { 59 0, /* pid_prinactive */ 60 1, /* pid_pgorphaned */ 61 0, /* pid_padding */ 62 0, /* pid_prslot */ 63 0, /* pid_id */ 64 NULL, /* pid_pglink */ 65 NULL, /* pid_pgtail */ 66 NULL, /* pid_link */ 67 3 /* pid_ref */ 68 }; 69 70 static int pid_hashlen = 4; /* desired average hash chain length */ 71 static int pid_hashsz; /* number of buckets in the hash table */ 72 73 #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))]) 74 75 extern uint_t nproc; 76 extern struct kmem_cache *process_cache; 77 static void upcount_init(void); 78 79 kmutex_t pidlock; /* global process lock */ 80 kmutex_t pr_pidlock; /* /proc global process lock */ 81 kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */ 82 struct plock *proc_lock; /* persistent array of p_lock's */ 83 84 /* 85 * See the comment above pid_getlockslot() for a detailed explanation of this 86 * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence 87 * granularity; if the coherence granularity is ever changed, this constant 88 * should be modified to reflect the change to minimize proc_lock false 89 * sharing (correctness, however, is guaranteed regardless of the coherence 90 * granularity). 91 */ 92 #define PLOCK_SHIFT 3 93 94 static kmutex_t pidlinklock; 95 static struct pid **pidhash; 96 static pid_t minpid; 97 static pid_t mpid; 98 static union procent *procdir; 99 static union procent *procentfree; 100 101 static struct pid * 102 pid_lookup(pid_t pid) 103 { 104 struct pid *pidp; 105 106 ASSERT(MUTEX_HELD(&pidlinklock)); 107 108 for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) { 109 if (pidp->pid_id == pid) { 110 ASSERT(pidp->pid_ref > 0); 111 break; 112 } 113 } 114 return (pidp); 115 } 116 117 struct pid * 118 pid_find(pid_t pid) 119 { 120 struct pid *pidp; 121 122 mutex_enter(&pidlinklock); 123 pidp = pid_lookup(pid); 124 mutex_exit(&pidlinklock); 125 126 return (pidp); 127 } 128 129 void 130 pid_setmin(void) 131 { 132 if (jump_pid && jump_pid > mpid) 133 minpid = mpid = jump_pid; 134 else 135 minpid = mpid + 1; 136 } 137 138 /* 139 * When prslots are simply used as an index to determine a process' p_lock, 140 * adjacent prslots share adjacent p_locks. On machines where the size 141 * of a mutex is smaller than that of a cache line (which, as of this writing, 142 * is true for all machines on which Solaris runs), this can potentially 143 * induce false sharing. The standard solution for false sharing is to pad 144 * out one's data structures (in this case, struct plock). However, 145 * given the size and (generally) sparse use of the proc_lock array, this 146 * is suboptimal. We therefore stride through the proc_lock array with 147 * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as: 148 * 149 * log_2 (coherence_granularity / sizeof (kmutex_t)) 150 * 151 * Under this scheme, false sharing is still possible -- but only when 152 * the number of active processes is very large. Note that the one-to-one 153 * mapping between prslots and lockslots is maintained. 154 */ 155 static int 156 pid_getlockslot(int prslot) 157 { 158 int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT; 159 int perlap = even >> PLOCK_SHIFT; 160 161 if (prslot >= even) 162 return (prslot); 163 164 return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap)); 165 } 166 167 /* 168 * This function allocates a pid structure, a free pid, and optionally a 169 * slot in the proc table for it. 170 * 171 * pid_allocate() returns the new pid on success, -1 on failure. 172 */ 173 pid_t 174 pid_allocate(proc_t *prp, int flags) 175 { 176 struct pid *pidp; 177 union procent *pep; 178 pid_t newpid, startpid; 179 180 pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP); 181 182 mutex_enter(&pidlinklock); 183 if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) { 184 /* 185 * ran out of /proc directory entries 186 */ 187 goto failed; 188 } 189 190 /* 191 * Allocate a pid 192 */ 193 startpid = mpid; 194 do { 195 newpid = (++mpid == maxpid ? mpid = minpid : mpid); 196 } while (pid_lookup(newpid) && newpid != startpid); 197 198 if (newpid == startpid && pid_lookup(newpid)) { 199 /* couldn't find a free pid */ 200 goto failed; 201 } 202 203 /* 204 * Put pid into the pid hash table. 205 */ 206 pidp->pid_link = HASHPID(newpid); 207 HASHPID(newpid) = pidp; 208 pidp->pid_ref = 1; 209 pidp->pid_id = newpid; 210 211 if (flags & PID_ALLOC_PROC) { 212 procentfree = pep->pe_next; 213 pidp->pid_prslot = pep - procdir; 214 pep->pe_proc = prp; 215 prp->p_pidp = pidp; 216 prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; 217 } else { 218 pidp->pid_prslot = 0; 219 } 220 221 mutex_exit(&pidlinklock); 222 223 return (newpid); 224 225 failed: 226 mutex_exit(&pidlinklock); 227 kmem_free(pidp, sizeof (struct pid)); 228 return (-1); 229 } 230 231 /* 232 * decrement the reference count for pid 233 */ 234 int 235 pid_rele(struct pid *pidp) 236 { 237 struct pid **pidpp; 238 239 mutex_enter(&pidlinklock); 240 ASSERT(pidp != &pid0); 241 242 pidpp = &HASHPID(pidp->pid_id); 243 for (;;) { 244 ASSERT(*pidpp != NULL); 245 if (*pidpp == pidp) 246 break; 247 pidpp = &(*pidpp)->pid_link; 248 } 249 250 *pidpp = pidp->pid_link; 251 mutex_exit(&pidlinklock); 252 253 kmem_free(pidp, sizeof (*pidp)); 254 return (0); 255 } 256 257 void 258 proc_entry_free(struct pid *pidp) 259 { 260 mutex_enter(&pidlinklock); 261 pidp->pid_prinactive = 1; 262 procdir[pidp->pid_prslot].pe_next = procentfree; 263 procentfree = &procdir[pidp->pid_prslot]; 264 mutex_exit(&pidlinklock); 265 } 266 267 void 268 pid_exit(proc_t *prp) 269 { 270 struct pid *pidp; 271 272 ASSERT(MUTEX_HELD(&pidlock)); 273 274 /* 275 * Exit process group. If it is NULL, it's because fork failed 276 * before calling pgjoin(). 277 */ 278 ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL); 279 if (prp->p_pgidp != NULL) 280 pgexit(prp); 281 282 sess_rele(prp->p_sessp, B_TRUE); 283 284 pidp = prp->p_pidp; 285 286 proc_entry_free(pidp); 287 288 #ifdef C2_AUDIT 289 if (audit_active) 290 audit_pfree(prp); 291 #endif 292 293 if (practive == prp) { 294 practive = prp->p_next; 295 } 296 297 if (prp->p_next) { 298 prp->p_next->p_prev = prp->p_prev; 299 } 300 if (prp->p_prev) { 301 prp->p_prev->p_next = prp->p_next; 302 } 303 304 PID_RELE(pidp); 305 306 mutex_destroy(&prp->p_crlock); 307 kmem_cache_free(process_cache, prp); 308 nproc--; 309 } 310 311 /* 312 * Find a process visible from the specified zone given its process ID. 313 */ 314 proc_t * 315 prfind_zone(pid_t pid, zoneid_t zoneid) 316 { 317 struct pid *pidp; 318 proc_t *p; 319 320 ASSERT(MUTEX_HELD(&pidlock)); 321 322 mutex_enter(&pidlinklock); 323 pidp = pid_lookup(pid); 324 mutex_exit(&pidlinklock); 325 if (pidp != NULL && pidp->pid_prinactive == 0) { 326 p = procdir[pidp->pid_prslot].pe_proc; 327 if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) 328 return (p); 329 } 330 return (NULL); 331 } 332 333 /* 334 * Find a process given its process ID. This obeys zone restrictions, 335 * so if the caller is in a non-global zone it won't find processes 336 * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to 337 * bypass this restriction. 338 */ 339 proc_t * 340 prfind(pid_t pid) 341 { 342 zoneid_t zoneid; 343 344 if (INGLOBALZONE(curproc)) 345 zoneid = ALL_ZONES; 346 else 347 zoneid = getzoneid(); 348 return (prfind_zone(pid, zoneid)); 349 } 350 351 proc_t * 352 pgfind_zone(pid_t pgid, zoneid_t zoneid) 353 { 354 struct pid *pidp; 355 356 ASSERT(MUTEX_HELD(&pidlock)); 357 358 mutex_enter(&pidlinklock); 359 pidp = pid_lookup(pgid); 360 mutex_exit(&pidlinklock); 361 if (pidp != NULL) { 362 proc_t *p = pidp->pid_pglink; 363 364 if (zoneid == ALL_ZONES || pgid == 0 || p == NULL || 365 p->p_zone->zone_id == zoneid) 366 return (p); 367 } 368 return (NULL); 369 } 370 371 /* 372 * return the head of the list of processes whose process group ID is 'pgid', 373 * or NULL, if no such process group 374 */ 375 proc_t * 376 pgfind(pid_t pgid) 377 { 378 zoneid_t zoneid; 379 380 if (INGLOBALZONE(curproc)) 381 zoneid = ALL_ZONES; 382 else 383 zoneid = getzoneid(); 384 return (pgfind_zone(pgid, zoneid)); 385 } 386 387 /* 388 * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK. 389 * Returns the proc pointer on success, NULL on failure. sprlock() is 390 * really just a stripped-down version of pr_p_lock() to allow practive 391 * walkers like dofusers() and dumpsys() to synchronize with /proc. 392 */ 393 proc_t * 394 sprlock_zone(pid_t pid, zoneid_t zoneid) 395 { 396 proc_t *p; 397 kmutex_t *mp; 398 399 for (;;) { 400 mutex_enter(&pidlock); 401 if ((p = prfind_zone(pid, zoneid)) == NULL) { 402 mutex_exit(&pidlock); 403 return (NULL); 404 } 405 /* 406 * p_lock is persistent, but p itself is not -- it could 407 * vanish during cv_wait(). Load p->p_lock now so we can 408 * drop it after cv_wait() without referencing p. 409 */ 410 mp = &p->p_lock; 411 mutex_enter(mp); 412 mutex_exit(&pidlock); 413 /* 414 * If the process is in some half-baked state, fail. 415 */ 416 if (p->p_stat == SZOMB || p->p_stat == SIDL || 417 (p->p_flag & (SEXITING | SEXITLWPS))) { 418 mutex_exit(mp); 419 return (NULL); 420 } 421 if (panicstr) 422 return (p); 423 if (!(p->p_proc_flag & P_PR_LOCK)) 424 break; 425 cv_wait(&pr_pid_cv[p->p_slot], mp); 426 mutex_exit(mp); 427 } 428 p->p_proc_flag |= P_PR_LOCK; 429 THREAD_KPRI_REQUEST(); 430 return (p); 431 } 432 433 proc_t * 434 sprlock(pid_t pid) 435 { 436 zoneid_t zoneid; 437 438 if (INGLOBALZONE(curproc)) 439 zoneid = ALL_ZONES; 440 else 441 zoneid = getzoneid(); 442 return (sprlock_zone(pid, zoneid)); 443 } 444 445 void 446 sprlock_proc(proc_t *p) 447 { 448 ASSERT(MUTEX_HELD(&p->p_lock)); 449 450 while (p->p_proc_flag & P_PR_LOCK) { 451 cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock); 452 } 453 454 p->p_proc_flag |= P_PR_LOCK; 455 THREAD_KPRI_REQUEST(); 456 } 457 458 void 459 sprunlock(proc_t *p) 460 { 461 if (panicstr) { 462 mutex_exit(&p->p_lock); 463 return; 464 } 465 466 ASSERT(p->p_proc_flag & P_PR_LOCK); 467 ASSERT(MUTEX_HELD(&p->p_lock)); 468 469 cv_signal(&pr_pid_cv[p->p_slot]); 470 p->p_proc_flag &= ~P_PR_LOCK; 471 mutex_exit(&p->p_lock); 472 THREAD_KPRI_RELEASE(); 473 } 474 475 void 476 pid_init(void) 477 { 478 int i; 479 480 pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen); 481 482 pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP); 483 procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP); 484 pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP); 485 proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP); 486 487 nproc = 1; 488 practive = proc_sched; 489 proc_sched->p_next = NULL; 490 procdir[0].pe_proc = proc_sched; 491 492 procentfree = &procdir[1]; 493 for (i = 1; i < v.v_proc - 1; i++) 494 procdir[i].pe_next = &procdir[i+1]; 495 procdir[i].pe_next = NULL; 496 497 HASHPID(0) = &pid0; 498 499 upcount_init(); 500 } 501 502 proc_t * 503 pid_entry(int slot) 504 { 505 union procent *pep; 506 proc_t *prp; 507 508 ASSERT(MUTEX_HELD(&pidlock)); 509 ASSERT(slot >= 0 && slot < v.v_proc); 510 511 pep = procdir[slot].pe_next; 512 if (pep >= procdir && pep < &procdir[v.v_proc]) 513 return (NULL); 514 prp = procdir[slot].pe_proc; 515 if (prp != 0 && prp->p_stat == SIDL) 516 return (NULL); 517 return (prp); 518 } 519 520 /* 521 * Send the specified signal to all processes whose process group ID is 522 * equal to 'pgid' 523 */ 524 525 void 526 signal(pid_t pgid, int sig) 527 { 528 struct pid *pidp; 529 proc_t *prp; 530 531 mutex_enter(&pidlock); 532 mutex_enter(&pidlinklock); 533 if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) { 534 mutex_exit(&pidlinklock); 535 mutex_exit(&pidlock); 536 return; 537 } 538 mutex_exit(&pidlinklock); 539 for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) { 540 mutex_enter(&prp->p_lock); 541 sigtoproc(prp, NULL, sig); 542 mutex_exit(&prp->p_lock); 543 } 544 mutex_exit(&pidlock); 545 } 546 547 /* 548 * Send the specified signal to the specified process 549 */ 550 551 void 552 prsignal(struct pid *pidp, int sig) 553 { 554 if (!(pidp->pid_prinactive)) 555 psignal(procdir[pidp->pid_prslot].pe_proc, sig); 556 } 557 558 #include <sys/sunddi.h> 559 560 /* 561 * DDI/DKI interfaces for drivers to send signals to processes 562 */ 563 564 /* 565 * obtain an opaque reference to a process for signaling 566 */ 567 void * 568 proc_ref(void) 569 { 570 struct pid *pidp; 571 572 mutex_enter(&pidlock); 573 pidp = curproc->p_pidp; 574 PID_HOLD(pidp); 575 mutex_exit(&pidlock); 576 577 return (pidp); 578 } 579 580 /* 581 * release a reference to a process 582 * - a process can exit even if a driver has a reference to it 583 * - one proc_unref for every proc_ref 584 */ 585 void 586 proc_unref(void *pref) 587 { 588 mutex_enter(&pidlock); 589 PID_RELE((struct pid *)pref); 590 mutex_exit(&pidlock); 591 } 592 593 /* 594 * send a signal to a process 595 * 596 * - send the process the signal 597 * - if the process went away, return a -1 598 * - if the process is still there return 0 599 */ 600 int 601 proc_signal(void *pref, int sig) 602 { 603 struct pid *pidp = pref; 604 605 prsignal(pidp, sig); 606 return (pidp->pid_prinactive ? -1 : 0); 607 } 608 609 610 static struct upcount **upc_hash; /* a boot time allocated array */ 611 static ulong_t upc_hashmask; 612 #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask) 613 614 /* 615 * Get us off the ground. Called once at boot. 616 */ 617 void 618 upcount_init(void) 619 { 620 ulong_t upc_hashsize; 621 622 /* 623 * An entry per MB of memory is our current guess 624 */ 625 /* 626 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT 627 * converts pages to megs (without overflowing a u_int 628 * if you have more than 4G of memory, like ptob(physmem)/1M 629 * would). 630 */ 631 upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT))); 632 upc_hashmask = upc_hashsize - 1; 633 upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *), 634 KM_SLEEP); 635 } 636 637 /* 638 * Increment the number of processes associated with a given uid and zoneid. 639 */ 640 void 641 upcount_inc(uid_t uid, zoneid_t zoneid) 642 { 643 struct upcount **upc, **hupc; 644 struct upcount *new; 645 646 ASSERT(MUTEX_HELD(&pidlock)); 647 new = NULL; 648 hupc = &upc_hash[UPC_HASH(uid, zoneid)]; 649 top: 650 upc = hupc; 651 while ((*upc) != NULL) { 652 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 653 (*upc)->up_count++; 654 if (new) { 655 /* 656 * did not need `new' afterall. 657 */ 658 kmem_free(new, sizeof (*new)); 659 } 660 return; 661 } 662 upc = &(*upc)->up_next; 663 } 664 665 /* 666 * There is no entry for this <uid,zoneid> pair. 667 * Allocate one. If we have to drop pidlock, check 668 * again. 669 */ 670 if (new == NULL) { 671 new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP); 672 if (new == NULL) { 673 mutex_exit(&pidlock); 674 new = (struct upcount *)kmem_alloc(sizeof (*new), 675 KM_SLEEP); 676 mutex_enter(&pidlock); 677 goto top; 678 } 679 } 680 681 682 /* 683 * On the assumption that a new user is going to do some 684 * more forks, put the new upcount structure on the front. 685 */ 686 upc = hupc; 687 688 new->up_uid = uid; 689 new->up_zoneid = zoneid; 690 new->up_count = 1; 691 new->up_next = *upc; 692 693 *upc = new; 694 } 695 696 /* 697 * Decrement the number of processes a given uid and zoneid has. 698 */ 699 void 700 upcount_dec(uid_t uid, zoneid_t zoneid) 701 { 702 struct upcount **upc; 703 struct upcount *done; 704 705 ASSERT(MUTEX_HELD(&pidlock)); 706 707 upc = &upc_hash[UPC_HASH(uid, zoneid)]; 708 while ((*upc) != NULL) { 709 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 710 (*upc)->up_count--; 711 if ((*upc)->up_count == 0) { 712 done = *upc; 713 *upc = (*upc)->up_next; 714 kmem_free(done, sizeof (*done)); 715 } 716 return; 717 } 718 upc = &(*upc)->up_next; 719 } 720 cmn_err(CE_PANIC, "decr_upcount-off the end"); 721 } 722 723 /* 724 * Returns the number of processes a uid has. 725 * Non-existent uid's are assumed to have no processes. 726 */ 727 int 728 upcount_get(uid_t uid, zoneid_t zoneid) 729 { 730 struct upcount *upc; 731 732 ASSERT(MUTEX_HELD(&pidlock)); 733 734 upc = upc_hash[UPC_HASH(uid, zoneid)]; 735 while (upc != NULL) { 736 if (upc->up_uid == uid && upc->up_zoneid == zoneid) { 737 return (upc->up_count); 738 } 739 upc = upc->up_next; 740 } 741 return (0); 742 } 743