1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 32 #pragma ident "%Z%%M% %I% %E% SMI" 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/sysmacros.h> 37 #include <sys/proc.h> 38 #include <sys/kmem.h> 39 #include <sys/tuneable.h> 40 #include <sys/var.h> 41 #include <sys/cred.h> 42 #include <sys/systm.h> 43 #include <sys/prsystm.h> 44 #include <sys/vnode.h> 45 #include <sys/session.h> 46 #include <sys/cpuvar.h> 47 #include <sys/cmn_err.h> 48 #include <sys/bitmap.h> 49 #include <sys/debug.h> 50 #include <c2/audit.h> 51 #include <sys/zone.h> 52 53 /* directory entries for /proc */ 54 union procent { 55 proc_t *pe_proc; 56 union procent *pe_next; 57 }; 58 59 struct pid pid0 = { 60 0, /* pid_prinactive */ 61 1, /* pid_pgorphaned */ 62 0, /* pid_padding */ 63 0, /* pid_prslot */ 64 0, /* pid_id */ 65 NULL, /* pid_pglink */ 66 NULL, /* pid_link */ 67 3 /* pid_ref */ 68 }; 69 70 static int pid_hashlen = 4; /* desired average hash chain length */ 71 static int pid_hashsz; /* number of buckets in the hash table */ 72 73 #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))]) 74 75 extern uint_t nproc; 76 extern struct kmem_cache *process_cache; 77 static void upcount_init(void); 78 79 kmutex_t pidlock; /* global process lock */ 80 kmutex_t pr_pidlock; /* /proc global process lock */ 81 kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */ 82 struct plock *proc_lock; /* persistent array of p_lock's */ 83 84 /* 85 * See the comment above pid_getlockslot() for a detailed explanation of this 86 * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence 87 * granularity; if the coherence granularity is ever changed, this constant 88 * should be modified to reflect the change to minimize proc_lock false 89 * sharing (correctness, however, is guaranteed regardless of the coherence 90 * granularity). 91 */ 92 #define PLOCK_SHIFT 3 93 94 static kmutex_t pidlinklock; 95 static struct pid **pidhash; 96 static pid_t minpid; 97 static pid_t mpid; 98 static union procent *procdir; 99 static union procent *procentfree; 100 101 static struct pid * 102 pid_lookup(pid_t pid) 103 { 104 struct pid *pidp; 105 106 ASSERT(MUTEX_HELD(&pidlinklock)); 107 108 for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) { 109 if (pidp->pid_id == pid) { 110 ASSERT(pidp->pid_ref > 0); 111 break; 112 } 113 } 114 return (pidp); 115 } 116 117 void 118 pid_setmin(void) 119 { 120 if (jump_pid && jump_pid > mpid) 121 minpid = mpid = jump_pid; 122 else 123 minpid = mpid + 1; 124 } 125 126 /* 127 * When prslots are simply used as an index to determine a process' p_lock, 128 * adjacent prslots share adjacent p_locks. On machines where the size 129 * of a mutex is smaller than that of a cache line (which, as of this writing, 130 * is true for all machines on which Solaris runs), this can potentially 131 * induce false sharing. The standard solution for false sharing is to pad 132 * out one's data structures (in this case, struct plock). However, 133 * given the size and (generally) sparse use of the proc_lock array, this 134 * is suboptimal. We therefore stride through the proc_lock array with 135 * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as: 136 * 137 * log_2 (coherence_granularity / sizeof (kmutex_t)) 138 * 139 * Under this scheme, false sharing is still possible -- but only when 140 * the number of active processes is very large. Note that the one-to-one 141 * mapping between prslots and lockslots is maintained. 142 */ 143 static int 144 pid_getlockslot(int prslot) 145 { 146 int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT; 147 int perlap = even >> PLOCK_SHIFT; 148 149 if (prslot >= even) 150 return (prslot); 151 152 return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap)); 153 } 154 155 /* 156 * This function assigns a pid for use in a fork request. It allocates 157 * a pid structure, tries to find an empty slot in the proc table, 158 * and selects the process id. 159 * 160 * pid_assign() returns the new pid on success, -1 on failure. 161 */ 162 pid_t 163 pid_assign(proc_t *prp) 164 { 165 struct pid *pidp; 166 union procent *pep; 167 pid_t newpid, startpid; 168 169 pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP); 170 171 mutex_enter(&pidlinklock); 172 if ((pep = procentfree) == NULL) { 173 /* 174 * ran out of /proc directory entries 175 */ 176 goto failed; 177 } 178 179 /* 180 * Allocate a pid 181 */ 182 startpid = mpid; 183 do { 184 newpid = (++mpid == maxpid ? mpid = minpid : mpid); 185 } while (pid_lookup(newpid) && newpid != startpid); 186 187 if (newpid == startpid && pid_lookup(newpid)) { 188 /* couldn't find a free pid */ 189 goto failed; 190 } 191 192 procentfree = pep->pe_next; 193 pep->pe_proc = prp; 194 prp->p_pidp = pidp; 195 196 /* 197 * Put pid into the pid hash table. 198 */ 199 pidp->pid_link = HASHPID(newpid); 200 HASHPID(newpid) = pidp; 201 pidp->pid_ref = 1; 202 pidp->pid_id = newpid; 203 pidp->pid_prslot = pep - procdir; 204 prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; 205 mutex_exit(&pidlinklock); 206 207 return (newpid); 208 209 failed: 210 mutex_exit(&pidlinklock); 211 kmem_free(pidp, sizeof (struct pid)); 212 return (-1); 213 } 214 215 /* 216 * decrement the reference count for pid 217 */ 218 int 219 pid_rele(struct pid *pidp) 220 { 221 struct pid **pidpp; 222 223 mutex_enter(&pidlinklock); 224 ASSERT(pidp != &pid0); 225 226 pidpp = &HASHPID(pidp->pid_id); 227 for (;;) { 228 ASSERT(*pidpp != NULL); 229 if (*pidpp == pidp) 230 break; 231 pidpp = &(*pidpp)->pid_link; 232 } 233 234 *pidpp = pidp->pid_link; 235 mutex_exit(&pidlinklock); 236 237 kmem_free(pidp, sizeof (*pidp)); 238 return (0); 239 } 240 241 void 242 proc_entry_free(struct pid *pidp) 243 { 244 mutex_enter(&pidlinklock); 245 pidp->pid_prinactive = 1; 246 procdir[pidp->pid_prslot].pe_next = procentfree; 247 procentfree = &procdir[pidp->pid_prslot]; 248 mutex_exit(&pidlinklock); 249 } 250 251 void 252 pid_exit(proc_t *prp) 253 { 254 struct pid *pidp; 255 256 ASSERT(MUTEX_HELD(&pidlock)); 257 258 /* 259 * Exit process group. If it is NULL, it's because fork failed 260 * before calling pgjoin(). 261 */ 262 ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL); 263 if (prp->p_pgidp != NULL) 264 pgexit(prp); 265 266 SESS_RELE(prp->p_sessp); 267 268 pidp = prp->p_pidp; 269 270 proc_entry_free(pidp); 271 272 #ifdef C2_AUDIT 273 if (audit_active) 274 audit_pfree(prp); 275 #endif 276 277 if (practive == prp) { 278 practive = prp->p_next; 279 } 280 281 if (prp->p_next) { 282 prp->p_next->p_prev = prp->p_prev; 283 } 284 if (prp->p_prev) { 285 prp->p_prev->p_next = prp->p_next; 286 } 287 288 PID_RELE(pidp); 289 290 mutex_destroy(&prp->p_crlock); 291 kmem_cache_free(process_cache, prp); 292 nproc--; 293 } 294 295 /* 296 * Find a process visible from the specified zone given its process ID. 297 */ 298 proc_t * 299 prfind_zone(pid_t pid, zoneid_t zoneid) 300 { 301 struct pid *pidp; 302 proc_t *p; 303 304 ASSERT(MUTEX_HELD(&pidlock)); 305 306 mutex_enter(&pidlinklock); 307 pidp = pid_lookup(pid); 308 mutex_exit(&pidlinklock); 309 if (pidp != NULL && pidp->pid_prinactive == 0) { 310 p = procdir[pidp->pid_prslot].pe_proc; 311 if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) 312 return (p); 313 } 314 return (NULL); 315 } 316 317 /* 318 * Find a process given its process ID. This obeys zone restrictions, 319 * so if the caller is in a non-global zone it won't find processes 320 * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to 321 * bypass this restriction. 322 */ 323 proc_t * 324 prfind(pid_t pid) 325 { 326 zoneid_t zoneid; 327 328 if (INGLOBALZONE(curproc)) 329 zoneid = ALL_ZONES; 330 else 331 zoneid = getzoneid(); 332 return (prfind_zone(pid, zoneid)); 333 } 334 335 proc_t * 336 pgfind_zone(pid_t pgid, zoneid_t zoneid) 337 { 338 struct pid *pidp; 339 340 ASSERT(MUTEX_HELD(&pidlock)); 341 342 mutex_enter(&pidlinklock); 343 pidp = pid_lookup(pgid); 344 mutex_exit(&pidlinklock); 345 if (pidp != NULL) { 346 proc_t *p = pidp->pid_pglink; 347 348 if (zoneid == ALL_ZONES || pgid == 0 || p == NULL || 349 p->p_zone->zone_id == zoneid) 350 return (p); 351 } 352 return (NULL); 353 } 354 355 /* 356 * return the head of the list of processes whose process group ID is 'pgid', 357 * or NULL, if no such process group 358 */ 359 proc_t * 360 pgfind(pid_t pgid) 361 { 362 zoneid_t zoneid; 363 364 if (INGLOBALZONE(curproc)) 365 zoneid = ALL_ZONES; 366 else 367 zoneid = getzoneid(); 368 return (pgfind_zone(pgid, zoneid)); 369 } 370 371 /* 372 * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK. 373 * Returns the proc pointer on success, NULL on failure. sprlock() is 374 * really just a stripped-down version of pr_p_lock() to allow practive 375 * walkers like dofusers() and dumpsys() to synchronize with /proc. 376 */ 377 proc_t * 378 sprlock_zone(pid_t pid, zoneid_t zoneid) 379 { 380 proc_t *p; 381 kmutex_t *mp; 382 383 for (;;) { 384 mutex_enter(&pidlock); 385 if ((p = prfind_zone(pid, zoneid)) == NULL) { 386 mutex_exit(&pidlock); 387 return (NULL); 388 } 389 /* 390 * p_lock is persistent, but p itself is not -- it could 391 * vanish during cv_wait(). Load p->p_lock now so we can 392 * drop it after cv_wait() without referencing p. 393 */ 394 mp = &p->p_lock; 395 mutex_enter(mp); 396 mutex_exit(&pidlock); 397 /* 398 * If the process is in some half-baked state, fail. 399 */ 400 if (p->p_stat == SZOMB || p->p_stat == SIDL || 401 (p->p_flag & (SEXITING | SEXITLWPS))) { 402 mutex_exit(mp); 403 return (NULL); 404 } 405 if (panicstr) 406 return (p); 407 if (!(p->p_proc_flag & P_PR_LOCK)) 408 break; 409 cv_wait(&pr_pid_cv[p->p_slot], mp); 410 mutex_exit(mp); 411 } 412 p->p_proc_flag |= P_PR_LOCK; 413 THREAD_KPRI_REQUEST(); 414 return (p); 415 } 416 417 proc_t * 418 sprlock(pid_t pid) 419 { 420 zoneid_t zoneid; 421 422 if (INGLOBALZONE(curproc)) 423 zoneid = ALL_ZONES; 424 else 425 zoneid = getzoneid(); 426 return (sprlock_zone(pid, zoneid)); 427 } 428 429 void 430 sprlock_proc(proc_t *p) 431 { 432 ASSERT(MUTEX_HELD(&p->p_lock)); 433 434 while (p->p_proc_flag & P_PR_LOCK) { 435 cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock); 436 } 437 438 p->p_proc_flag |= P_PR_LOCK; 439 THREAD_KPRI_REQUEST(); 440 } 441 442 void 443 sprunlock(proc_t *p) 444 { 445 if (panicstr) { 446 mutex_exit(&p->p_lock); 447 return; 448 } 449 450 ASSERT(p->p_proc_flag & P_PR_LOCK); 451 ASSERT(MUTEX_HELD(&p->p_lock)); 452 453 cv_signal(&pr_pid_cv[p->p_slot]); 454 p->p_proc_flag &= ~P_PR_LOCK; 455 mutex_exit(&p->p_lock); 456 THREAD_KPRI_RELEASE(); 457 } 458 459 void 460 pid_init(void) 461 { 462 int i; 463 464 pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen); 465 466 pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP); 467 procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP); 468 pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP); 469 proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP); 470 471 nproc = 1; 472 practive = proc_sched; 473 proc_sched->p_next = NULL; 474 procdir[0].pe_proc = proc_sched; 475 476 procentfree = &procdir[1]; 477 for (i = 1; i < v.v_proc - 1; i++) 478 procdir[i].pe_next = &procdir[i+1]; 479 procdir[i].pe_next = NULL; 480 481 HASHPID(0) = &pid0; 482 483 upcount_init(); 484 } 485 486 proc_t * 487 pid_entry(int slot) 488 { 489 union procent *pep; 490 proc_t *prp; 491 492 ASSERT(MUTEX_HELD(&pidlock)); 493 ASSERT(slot >= 0 && slot < v.v_proc); 494 495 pep = procdir[slot].pe_next; 496 if (pep >= procdir && pep < &procdir[v.v_proc]) 497 return (NULL); 498 prp = procdir[slot].pe_proc; 499 if (prp != 0 && prp->p_stat == SIDL) 500 return (NULL); 501 return (prp); 502 } 503 504 /* 505 * Send the specified signal to all processes whose process group ID is 506 * equal to 'pgid' 507 */ 508 509 void 510 signal(pid_t pgid, int sig) 511 { 512 struct pid *pidp; 513 proc_t *prp; 514 515 mutex_enter(&pidlock); 516 mutex_enter(&pidlinklock); 517 if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) { 518 mutex_exit(&pidlinklock); 519 mutex_exit(&pidlock); 520 return; 521 } 522 mutex_exit(&pidlinklock); 523 for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) { 524 mutex_enter(&prp->p_lock); 525 sigtoproc(prp, NULL, sig); 526 mutex_exit(&prp->p_lock); 527 } 528 mutex_exit(&pidlock); 529 } 530 531 /* 532 * Send the specified signal to the specified process 533 */ 534 535 void 536 prsignal(struct pid *pidp, int sig) 537 { 538 if (!(pidp->pid_prinactive)) 539 psignal(procdir[pidp->pid_prslot].pe_proc, sig); 540 } 541 542 #include <sys/sunddi.h> 543 544 /* 545 * DDI/DKI interfaces for drivers to send signals to processes 546 */ 547 548 /* 549 * obtain an opaque reference to a process for signaling 550 */ 551 void * 552 proc_ref(void) 553 { 554 struct pid *pidp; 555 556 mutex_enter(&pidlock); 557 pidp = curproc->p_pidp; 558 PID_HOLD(pidp); 559 mutex_exit(&pidlock); 560 561 return (pidp); 562 } 563 564 /* 565 * release a reference to a process 566 * - a process can exit even if a driver has a reference to it 567 * - one proc_unref for every proc_ref 568 */ 569 void 570 proc_unref(void *pref) 571 { 572 mutex_enter(&pidlock); 573 PID_RELE((struct pid *)pref); 574 mutex_exit(&pidlock); 575 } 576 577 /* 578 * send a signal to a process 579 * 580 * - send the process the signal 581 * - if the process went away, return a -1 582 * - if the process is still there return 0 583 */ 584 int 585 proc_signal(void *pref, int sig) 586 { 587 struct pid *pidp = pref; 588 589 prsignal(pidp, sig); 590 return (pidp->pid_prinactive ? -1 : 0); 591 } 592 593 594 static struct upcount **upc_hash; /* a boot time allocated array */ 595 static ulong_t upc_hashmask; 596 #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask) 597 598 /* 599 * Get us off the ground. Called once at boot. 600 */ 601 void 602 upcount_init(void) 603 { 604 ulong_t upc_hashsize; 605 606 /* 607 * An entry per MB of memory is our current guess 608 */ 609 /* 610 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT 611 * converts pages to megs (without overflowing a u_int 612 * if you have more than 4G of memory, like ptob(physmem)/1M 613 * would). 614 */ 615 upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT))); 616 upc_hashmask = upc_hashsize - 1; 617 upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *), 618 KM_SLEEP); 619 } 620 621 /* 622 * Increment the number of processes associated with a given uid and zoneid. 623 */ 624 void 625 upcount_inc(uid_t uid, zoneid_t zoneid) 626 { 627 struct upcount **upc, **hupc; 628 struct upcount *new; 629 630 ASSERT(MUTEX_HELD(&pidlock)); 631 new = NULL; 632 hupc = &upc_hash[UPC_HASH(uid, zoneid)]; 633 top: 634 upc = hupc; 635 while ((*upc) != NULL) { 636 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 637 (*upc)->up_count++; 638 if (new) { 639 /* 640 * did not need `new' afterall. 641 */ 642 kmem_free(new, sizeof (*new)); 643 } 644 return; 645 } 646 upc = &(*upc)->up_next; 647 } 648 649 /* 650 * There is no entry for this <uid,zoneid> pair. 651 * Allocate one. If we have to drop pidlock, check 652 * again. 653 */ 654 if (new == NULL) { 655 new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP); 656 if (new == NULL) { 657 mutex_exit(&pidlock); 658 new = (struct upcount *)kmem_alloc(sizeof (*new), 659 KM_SLEEP); 660 mutex_enter(&pidlock); 661 goto top; 662 } 663 } 664 665 666 /* 667 * On the assumption that a new user is going to do some 668 * more forks, put the new upcount structure on the front. 669 */ 670 upc = hupc; 671 672 new->up_uid = uid; 673 new->up_zoneid = zoneid; 674 new->up_count = 1; 675 new->up_next = *upc; 676 677 *upc = new; 678 } 679 680 /* 681 * Decrement the number of processes a given uid and zoneid has. 682 */ 683 void 684 upcount_dec(uid_t uid, zoneid_t zoneid) 685 { 686 struct upcount **upc; 687 struct upcount *done; 688 689 ASSERT(MUTEX_HELD(&pidlock)); 690 691 upc = &upc_hash[UPC_HASH(uid, zoneid)]; 692 while ((*upc) != NULL) { 693 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 694 (*upc)->up_count--; 695 if ((*upc)->up_count == 0) { 696 done = *upc; 697 *upc = (*upc)->up_next; 698 kmem_free(done, sizeof (*done)); 699 } 700 return; 701 } 702 upc = &(*upc)->up_next; 703 } 704 cmn_err(CE_PANIC, "decr_upcount-off the end"); 705 } 706 707 /* 708 * Returns the number of processes a uid has. 709 * Non-existent uid's are assumed to have no processes. 710 */ 711 int 712 upcount_get(uid_t uid, zoneid_t zoneid) 713 { 714 struct upcount *upc; 715 716 ASSERT(MUTEX_HELD(&pidlock)); 717 718 upc = upc_hash[UPC_HASH(uid, zoneid)]; 719 while (upc != NULL) { 720 if (upc->up_uid == uid && upc->up_zoneid == zoneid) { 721 return (upc->up_count); 722 } 723 upc = upc->up_next; 724 } 725 return (0); 726 } 727