1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 32 #pragma ident "%Z%%M% %I% %E% SMI" 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/sysmacros.h> 37 #include <sys/proc.h> 38 #include <sys/kmem.h> 39 #include <sys/tuneable.h> 40 #include <sys/var.h> 41 #include <sys/cred.h> 42 #include <sys/systm.h> 43 #include <sys/prsystm.h> 44 #include <sys/vnode.h> 45 #include <sys/session.h> 46 #include <sys/cpuvar.h> 47 #include <sys/cmn_err.h> 48 #include <sys/bitmap.h> 49 #include <sys/debug.h> 50 #include <c2/audit.h> 51 #include <sys/zone.h> 52 53 /* directory entries for /proc */ 54 union procent { 55 proc_t *pe_proc; 56 union procent *pe_next; 57 }; 58 59 struct pid pid0 = { 60 0, /* pid_prinactive */ 61 1, /* pid_pgorphaned */ 62 0, /* pid_padding */ 63 0, /* pid_prslot */ 64 0, /* pid_id */ 65 NULL, /* pid_pglink */ 66 NULL, /* pid_pgtail */ 67 NULL, /* pid_link */ 68 3 /* pid_ref */ 69 }; 70 71 static int pid_hashlen = 4; /* desired average hash chain length */ 72 static int pid_hashsz; /* number of buckets in the hash table */ 73 74 #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))]) 75 76 extern uint_t nproc; 77 extern struct kmem_cache *process_cache; 78 static void upcount_init(void); 79 80 kmutex_t pidlock; /* global process lock */ 81 kmutex_t pr_pidlock; /* /proc global process lock */ 82 kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */ 83 struct plock *proc_lock; /* persistent array of p_lock's */ 84 85 /* 86 * See the comment above pid_getlockslot() for a detailed explanation of this 87 * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence 88 * granularity; if the coherence granularity is ever changed, this constant 89 * should be modified to reflect the change to minimize proc_lock false 90 * sharing (correctness, however, is guaranteed regardless of the coherence 91 * granularity). 92 */ 93 #define PLOCK_SHIFT 3 94 95 static kmutex_t pidlinklock; 96 static struct pid **pidhash; 97 static pid_t minpid; 98 static pid_t mpid; 99 static union procent *procdir; 100 static union procent *procentfree; 101 102 static struct pid * 103 pid_lookup(pid_t pid) 104 { 105 struct pid *pidp; 106 107 ASSERT(MUTEX_HELD(&pidlinklock)); 108 109 for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) { 110 if (pidp->pid_id == pid) { 111 ASSERT(pidp->pid_ref > 0); 112 break; 113 } 114 } 115 return (pidp); 116 } 117 118 void 119 pid_setmin(void) 120 { 121 if (jump_pid && jump_pid > mpid) 122 minpid = mpid = jump_pid; 123 else 124 minpid = mpid + 1; 125 } 126 127 /* 128 * When prslots are simply used as an index to determine a process' p_lock, 129 * adjacent prslots share adjacent p_locks. On machines where the size 130 * of a mutex is smaller than that of a cache line (which, as of this writing, 131 * is true for all machines on which Solaris runs), this can potentially 132 * induce false sharing. The standard solution for false sharing is to pad 133 * out one's data structures (in this case, struct plock). However, 134 * given the size and (generally) sparse use of the proc_lock array, this 135 * is suboptimal. We therefore stride through the proc_lock array with 136 * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as: 137 * 138 * log_2 (coherence_granularity / sizeof (kmutex_t)) 139 * 140 * Under this scheme, false sharing is still possible -- but only when 141 * the number of active processes is very large. Note that the one-to-one 142 * mapping between prslots and lockslots is maintained. 143 */ 144 static int 145 pid_getlockslot(int prslot) 146 { 147 int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT; 148 int perlap = even >> PLOCK_SHIFT; 149 150 if (prslot >= even) 151 return (prslot); 152 153 return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap)); 154 } 155 156 /* 157 * This function assigns a pid for use in a fork request. It allocates 158 * a pid structure, tries to find an empty slot in the proc table, 159 * and selects the process id. 160 * 161 * pid_assign() returns the new pid on success, -1 on failure. 162 */ 163 pid_t 164 pid_assign(proc_t *prp) 165 { 166 struct pid *pidp; 167 union procent *pep; 168 pid_t newpid, startpid; 169 170 pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP); 171 172 mutex_enter(&pidlinklock); 173 if ((pep = procentfree) == NULL) { 174 /* 175 * ran out of /proc directory entries 176 */ 177 goto failed; 178 } 179 180 /* 181 * Allocate a pid 182 */ 183 startpid = mpid; 184 do { 185 newpid = (++mpid == maxpid ? mpid = minpid : mpid); 186 } while (pid_lookup(newpid) && newpid != startpid); 187 188 if (newpid == startpid && pid_lookup(newpid)) { 189 /* couldn't find a free pid */ 190 goto failed; 191 } 192 193 procentfree = pep->pe_next; 194 pep->pe_proc = prp; 195 prp->p_pidp = pidp; 196 197 /* 198 * Put pid into the pid hash table. 199 */ 200 pidp->pid_link = HASHPID(newpid); 201 HASHPID(newpid) = pidp; 202 pidp->pid_ref = 1; 203 pidp->pid_id = newpid; 204 pidp->pid_prslot = pep - procdir; 205 prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; 206 mutex_exit(&pidlinklock); 207 208 return (newpid); 209 210 failed: 211 mutex_exit(&pidlinklock); 212 kmem_free(pidp, sizeof (struct pid)); 213 return (-1); 214 } 215 216 /* 217 * decrement the reference count for pid 218 */ 219 int 220 pid_rele(struct pid *pidp) 221 { 222 struct pid **pidpp; 223 224 mutex_enter(&pidlinklock); 225 ASSERT(pidp != &pid0); 226 227 pidpp = &HASHPID(pidp->pid_id); 228 for (;;) { 229 ASSERT(*pidpp != NULL); 230 if (*pidpp == pidp) 231 break; 232 pidpp = &(*pidpp)->pid_link; 233 } 234 235 *pidpp = pidp->pid_link; 236 mutex_exit(&pidlinklock); 237 238 kmem_free(pidp, sizeof (*pidp)); 239 return (0); 240 } 241 242 void 243 proc_entry_free(struct pid *pidp) 244 { 245 mutex_enter(&pidlinklock); 246 pidp->pid_prinactive = 1; 247 procdir[pidp->pid_prslot].pe_next = procentfree; 248 procentfree = &procdir[pidp->pid_prslot]; 249 mutex_exit(&pidlinklock); 250 } 251 252 void 253 pid_exit(proc_t *prp) 254 { 255 struct pid *pidp; 256 257 ASSERT(MUTEX_HELD(&pidlock)); 258 259 /* 260 * Exit process group. If it is NULL, it's because fork failed 261 * before calling pgjoin(). 262 */ 263 ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL); 264 if (prp->p_pgidp != NULL) 265 pgexit(prp); 266 267 SESS_RELE(prp->p_sessp); 268 269 pidp = prp->p_pidp; 270 271 proc_entry_free(pidp); 272 273 #ifdef C2_AUDIT 274 if (audit_active) 275 audit_pfree(prp); 276 #endif 277 278 if (practive == prp) { 279 practive = prp->p_next; 280 } 281 282 if (prp->p_next) { 283 prp->p_next->p_prev = prp->p_prev; 284 } 285 if (prp->p_prev) { 286 prp->p_prev->p_next = prp->p_next; 287 } 288 289 PID_RELE(pidp); 290 291 mutex_destroy(&prp->p_crlock); 292 kmem_cache_free(process_cache, prp); 293 nproc--; 294 } 295 296 /* 297 * Find a process visible from the specified zone given its process ID. 298 */ 299 proc_t * 300 prfind_zone(pid_t pid, zoneid_t zoneid) 301 { 302 struct pid *pidp; 303 proc_t *p; 304 305 ASSERT(MUTEX_HELD(&pidlock)); 306 307 mutex_enter(&pidlinklock); 308 pidp = pid_lookup(pid); 309 mutex_exit(&pidlinklock); 310 if (pidp != NULL && pidp->pid_prinactive == 0) { 311 p = procdir[pidp->pid_prslot].pe_proc; 312 if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) 313 return (p); 314 } 315 return (NULL); 316 } 317 318 /* 319 * Find a process given its process ID. This obeys zone restrictions, 320 * so if the caller is in a non-global zone it won't find processes 321 * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to 322 * bypass this restriction. 323 */ 324 proc_t * 325 prfind(pid_t pid) 326 { 327 zoneid_t zoneid; 328 329 if (INGLOBALZONE(curproc)) 330 zoneid = ALL_ZONES; 331 else 332 zoneid = getzoneid(); 333 return (prfind_zone(pid, zoneid)); 334 } 335 336 proc_t * 337 pgfind_zone(pid_t pgid, zoneid_t zoneid) 338 { 339 struct pid *pidp; 340 341 ASSERT(MUTEX_HELD(&pidlock)); 342 343 mutex_enter(&pidlinklock); 344 pidp = pid_lookup(pgid); 345 mutex_exit(&pidlinklock); 346 if (pidp != NULL) { 347 proc_t *p = pidp->pid_pglink; 348 349 if (zoneid == ALL_ZONES || pgid == 0 || p == NULL || 350 p->p_zone->zone_id == zoneid) 351 return (p); 352 } 353 return (NULL); 354 } 355 356 /* 357 * return the head of the list of processes whose process group ID is 'pgid', 358 * or NULL, if no such process group 359 */ 360 proc_t * 361 pgfind(pid_t pgid) 362 { 363 zoneid_t zoneid; 364 365 if (INGLOBALZONE(curproc)) 366 zoneid = ALL_ZONES; 367 else 368 zoneid = getzoneid(); 369 return (pgfind_zone(pgid, zoneid)); 370 } 371 372 /* 373 * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK. 374 * Returns the proc pointer on success, NULL on failure. sprlock() is 375 * really just a stripped-down version of pr_p_lock() to allow practive 376 * walkers like dofusers() and dumpsys() to synchronize with /proc. 377 */ 378 proc_t * 379 sprlock_zone(pid_t pid, zoneid_t zoneid) 380 { 381 proc_t *p; 382 kmutex_t *mp; 383 384 for (;;) { 385 mutex_enter(&pidlock); 386 if ((p = prfind_zone(pid, zoneid)) == NULL) { 387 mutex_exit(&pidlock); 388 return (NULL); 389 } 390 /* 391 * p_lock is persistent, but p itself is not -- it could 392 * vanish during cv_wait(). Load p->p_lock now so we can 393 * drop it after cv_wait() without referencing p. 394 */ 395 mp = &p->p_lock; 396 mutex_enter(mp); 397 mutex_exit(&pidlock); 398 /* 399 * If the process is in some half-baked state, fail. 400 */ 401 if (p->p_stat == SZOMB || p->p_stat == SIDL || 402 (p->p_flag & (SEXITING | SEXITLWPS))) { 403 mutex_exit(mp); 404 return (NULL); 405 } 406 if (panicstr) 407 return (p); 408 if (!(p->p_proc_flag & P_PR_LOCK)) 409 break; 410 cv_wait(&pr_pid_cv[p->p_slot], mp); 411 mutex_exit(mp); 412 } 413 p->p_proc_flag |= P_PR_LOCK; 414 THREAD_KPRI_REQUEST(); 415 return (p); 416 } 417 418 proc_t * 419 sprlock(pid_t pid) 420 { 421 zoneid_t zoneid; 422 423 if (INGLOBALZONE(curproc)) 424 zoneid = ALL_ZONES; 425 else 426 zoneid = getzoneid(); 427 return (sprlock_zone(pid, zoneid)); 428 } 429 430 void 431 sprlock_proc(proc_t *p) 432 { 433 ASSERT(MUTEX_HELD(&p->p_lock)); 434 435 while (p->p_proc_flag & P_PR_LOCK) { 436 cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock); 437 } 438 439 p->p_proc_flag |= P_PR_LOCK; 440 THREAD_KPRI_REQUEST(); 441 } 442 443 void 444 sprunlock(proc_t *p) 445 { 446 if (panicstr) { 447 mutex_exit(&p->p_lock); 448 return; 449 } 450 451 ASSERT(p->p_proc_flag & P_PR_LOCK); 452 ASSERT(MUTEX_HELD(&p->p_lock)); 453 454 cv_signal(&pr_pid_cv[p->p_slot]); 455 p->p_proc_flag &= ~P_PR_LOCK; 456 mutex_exit(&p->p_lock); 457 THREAD_KPRI_RELEASE(); 458 } 459 460 void 461 pid_init(void) 462 { 463 int i; 464 465 pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen); 466 467 pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP); 468 procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP); 469 pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP); 470 proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP); 471 472 nproc = 1; 473 practive = proc_sched; 474 proc_sched->p_next = NULL; 475 procdir[0].pe_proc = proc_sched; 476 477 procentfree = &procdir[1]; 478 for (i = 1; i < v.v_proc - 1; i++) 479 procdir[i].pe_next = &procdir[i+1]; 480 procdir[i].pe_next = NULL; 481 482 HASHPID(0) = &pid0; 483 484 upcount_init(); 485 } 486 487 proc_t * 488 pid_entry(int slot) 489 { 490 union procent *pep; 491 proc_t *prp; 492 493 ASSERT(MUTEX_HELD(&pidlock)); 494 ASSERT(slot >= 0 && slot < v.v_proc); 495 496 pep = procdir[slot].pe_next; 497 if (pep >= procdir && pep < &procdir[v.v_proc]) 498 return (NULL); 499 prp = procdir[slot].pe_proc; 500 if (prp != 0 && prp->p_stat == SIDL) 501 return (NULL); 502 return (prp); 503 } 504 505 /* 506 * Send the specified signal to all processes whose process group ID is 507 * equal to 'pgid' 508 */ 509 510 void 511 signal(pid_t pgid, int sig) 512 { 513 struct pid *pidp; 514 proc_t *prp; 515 516 mutex_enter(&pidlock); 517 mutex_enter(&pidlinklock); 518 if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) { 519 mutex_exit(&pidlinklock); 520 mutex_exit(&pidlock); 521 return; 522 } 523 mutex_exit(&pidlinklock); 524 for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) { 525 mutex_enter(&prp->p_lock); 526 sigtoproc(prp, NULL, sig); 527 mutex_exit(&prp->p_lock); 528 } 529 mutex_exit(&pidlock); 530 } 531 532 /* 533 * Send the specified signal to the specified process 534 */ 535 536 void 537 prsignal(struct pid *pidp, int sig) 538 { 539 if (!(pidp->pid_prinactive)) 540 psignal(procdir[pidp->pid_prslot].pe_proc, sig); 541 } 542 543 #include <sys/sunddi.h> 544 545 /* 546 * DDI/DKI interfaces for drivers to send signals to processes 547 */ 548 549 /* 550 * obtain an opaque reference to a process for signaling 551 */ 552 void * 553 proc_ref(void) 554 { 555 struct pid *pidp; 556 557 mutex_enter(&pidlock); 558 pidp = curproc->p_pidp; 559 PID_HOLD(pidp); 560 mutex_exit(&pidlock); 561 562 return (pidp); 563 } 564 565 /* 566 * release a reference to a process 567 * - a process can exit even if a driver has a reference to it 568 * - one proc_unref for every proc_ref 569 */ 570 void 571 proc_unref(void *pref) 572 { 573 mutex_enter(&pidlock); 574 PID_RELE((struct pid *)pref); 575 mutex_exit(&pidlock); 576 } 577 578 /* 579 * send a signal to a process 580 * 581 * - send the process the signal 582 * - if the process went away, return a -1 583 * - if the process is still there return 0 584 */ 585 int 586 proc_signal(void *pref, int sig) 587 { 588 struct pid *pidp = pref; 589 590 prsignal(pidp, sig); 591 return (pidp->pid_prinactive ? -1 : 0); 592 } 593 594 595 static struct upcount **upc_hash; /* a boot time allocated array */ 596 static ulong_t upc_hashmask; 597 #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask) 598 599 /* 600 * Get us off the ground. Called once at boot. 601 */ 602 void 603 upcount_init(void) 604 { 605 ulong_t upc_hashsize; 606 607 /* 608 * An entry per MB of memory is our current guess 609 */ 610 /* 611 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT 612 * converts pages to megs (without overflowing a u_int 613 * if you have more than 4G of memory, like ptob(physmem)/1M 614 * would). 615 */ 616 upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT))); 617 upc_hashmask = upc_hashsize - 1; 618 upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *), 619 KM_SLEEP); 620 } 621 622 /* 623 * Increment the number of processes associated with a given uid and zoneid. 624 */ 625 void 626 upcount_inc(uid_t uid, zoneid_t zoneid) 627 { 628 struct upcount **upc, **hupc; 629 struct upcount *new; 630 631 ASSERT(MUTEX_HELD(&pidlock)); 632 new = NULL; 633 hupc = &upc_hash[UPC_HASH(uid, zoneid)]; 634 top: 635 upc = hupc; 636 while ((*upc) != NULL) { 637 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 638 (*upc)->up_count++; 639 if (new) { 640 /* 641 * did not need `new' afterall. 642 */ 643 kmem_free(new, sizeof (*new)); 644 } 645 return; 646 } 647 upc = &(*upc)->up_next; 648 } 649 650 /* 651 * There is no entry for this <uid,zoneid> pair. 652 * Allocate one. If we have to drop pidlock, check 653 * again. 654 */ 655 if (new == NULL) { 656 new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP); 657 if (new == NULL) { 658 mutex_exit(&pidlock); 659 new = (struct upcount *)kmem_alloc(sizeof (*new), 660 KM_SLEEP); 661 mutex_enter(&pidlock); 662 goto top; 663 } 664 } 665 666 667 /* 668 * On the assumption that a new user is going to do some 669 * more forks, put the new upcount structure on the front. 670 */ 671 upc = hupc; 672 673 new->up_uid = uid; 674 new->up_zoneid = zoneid; 675 new->up_count = 1; 676 new->up_next = *upc; 677 678 *upc = new; 679 } 680 681 /* 682 * Decrement the number of processes a given uid and zoneid has. 683 */ 684 void 685 upcount_dec(uid_t uid, zoneid_t zoneid) 686 { 687 struct upcount **upc; 688 struct upcount *done; 689 690 ASSERT(MUTEX_HELD(&pidlock)); 691 692 upc = &upc_hash[UPC_HASH(uid, zoneid)]; 693 while ((*upc) != NULL) { 694 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 695 (*upc)->up_count--; 696 if ((*upc)->up_count == 0) { 697 done = *upc; 698 *upc = (*upc)->up_next; 699 kmem_free(done, sizeof (*done)); 700 } 701 return; 702 } 703 upc = &(*upc)->up_next; 704 } 705 cmn_err(CE_PANIC, "decr_upcount-off the end"); 706 } 707 708 /* 709 * Returns the number of processes a uid has. 710 * Non-existent uid's are assumed to have no processes. 711 */ 712 int 713 upcount_get(uid_t uid, zoneid_t zoneid) 714 { 715 struct upcount *upc; 716 717 ASSERT(MUTEX_HELD(&pidlock)); 718 719 upc = upc_hash[UPC_HASH(uid, zoneid)]; 720 while (upc != NULL) { 721 if (upc->up_uid == uid && upc->up_zoneid == zoneid) { 722 return (upc->up_count); 723 } 724 upc = upc->up_next; 725 } 726 return (0); 727 } 728