1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/sysmacros.h> 36 #include <sys/proc.h> 37 #include <sys/kmem.h> 38 #include <sys/tuneable.h> 39 #include <sys/var.h> 40 #include <sys/cred.h> 41 #include <sys/systm.h> 42 #include <sys/prsystm.h> 43 #include <sys/vnode.h> 44 #include <sys/session.h> 45 #include <sys/cpuvar.h> 46 #include <sys/cmn_err.h> 47 #include <sys/bitmap.h> 48 #include <sys/debug.h> 49 #include <c2/audit.h> 50 #include <sys/zone.h> 51 52 /* directory entries for /proc */ 53 union procent { 54 proc_t *pe_proc; 55 union procent *pe_next; 56 }; 57 58 struct pid pid0 = { 59 0, /* pid_prinactive */ 60 1, /* pid_pgorphaned */ 61 0, /* pid_padding */ 62 0, /* pid_prslot */ 63 0, /* pid_id */ 64 NULL, /* pid_pglink */ 65 NULL, /* pid_link */ 66 3 /* pid_ref */ 67 }; 68 69 static int pid_hashlen = 4; /* desired average hash chain length */ 70 static int pid_hashsz; /* number of buckets in the hash table */ 71 72 #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))]) 73 74 extern uint_t nproc; 75 extern struct kmem_cache *process_cache; 76 static void upcount_init(void); 77 78 kmutex_t pidlock; /* global process lock */ 79 kmutex_t pr_pidlock; /* /proc global process lock */ 80 kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */ 81 struct plock *proc_lock; /* persistent array of p_lock's */ 82 83 /* 84 * See the comment above pid_getlockslot() for a detailed explanation of this 85 * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence 86 * granularity; if the coherence granularity is ever changed, this constant 87 * should be modified to reflect the change to minimize proc_lock false 88 * sharing (correctness, however, is guaranteed regardless of the coherence 89 * granularity). 90 */ 91 #define PLOCK_SHIFT 3 92 93 static kmutex_t pidlinklock; 94 static struct pid **pidhash; 95 static pid_t minpid; 96 static pid_t mpid; 97 static union procent *procdir; 98 static union procent *procentfree; 99 100 static struct pid * 101 pid_lookup(pid_t pid) 102 { 103 struct pid *pidp; 104 105 ASSERT(MUTEX_HELD(&pidlinklock)); 106 107 for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) { 108 if (pidp->pid_id == pid) { 109 ASSERT(pidp->pid_ref > 0); 110 break; 111 } 112 } 113 return (pidp); 114 } 115 116 void 117 pid_setmin(void) 118 { 119 if (jump_pid && jump_pid > mpid) 120 minpid = mpid = jump_pid; 121 else 122 minpid = mpid + 1; 123 } 124 125 /* 126 * When prslots are simply used as an index to determine a process' p_lock, 127 * adjacent prslots share adjacent p_locks. On machines where the size 128 * of a mutex is smaller than that of a cache line (which, as of this writing, 129 * is true for all machines on which Solaris runs), this can potentially 130 * induce false sharing. The standard solution for false sharing is to pad 131 * out one's data structures (in this case, struct plock). However, 132 * given the size and (generally) sparse use of the proc_lock array, this 133 * is suboptimal. We therefore stride through the proc_lock array with 134 * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as: 135 * 136 * log_2 (coherence_granularity / sizeof (kmutex_t)) 137 * 138 * Under this scheme, false sharing is still possible -- but only when 139 * the number of active processes is very large. Note that the one-to-one 140 * mapping between prslots and lockslots is maintained. 141 */ 142 static int 143 pid_getlockslot(int prslot) 144 { 145 int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT; 146 int perlap = even >> PLOCK_SHIFT; 147 148 if (prslot >= even) 149 return (prslot); 150 151 return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap)); 152 } 153 154 /* 155 * This function assigns a pid for use in a fork request. It allocates 156 * a pid structure, tries to find an empty slot in the proc table, 157 * and selects the process id. 158 * 159 * pid_assign() returns the new pid on success, -1 on failure. 160 */ 161 pid_t 162 pid_assign(proc_t *prp) 163 { 164 struct pid *pidp; 165 union procent *pep; 166 pid_t newpid, startpid; 167 168 pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP); 169 170 mutex_enter(&pidlinklock); 171 if ((pep = procentfree) == NULL) { 172 /* 173 * ran out of /proc directory entries 174 */ 175 goto failed; 176 } 177 178 /* 179 * Allocate a pid 180 */ 181 startpid = mpid; 182 do { 183 newpid = (++mpid == maxpid ? mpid = minpid : mpid); 184 } while (pid_lookup(newpid) && newpid != startpid); 185 186 if (newpid == startpid && pid_lookup(newpid)) { 187 /* couldn't find a free pid */ 188 goto failed; 189 } 190 191 procentfree = pep->pe_next; 192 pep->pe_proc = prp; 193 prp->p_pidp = pidp; 194 195 /* 196 * Put pid into the pid hash table. 197 */ 198 pidp->pid_link = HASHPID(newpid); 199 HASHPID(newpid) = pidp; 200 pidp->pid_ref = 1; 201 pidp->pid_id = newpid; 202 pidp->pid_prslot = pep - procdir; 203 prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; 204 mutex_exit(&pidlinklock); 205 206 return (newpid); 207 208 failed: 209 mutex_exit(&pidlinklock); 210 kmem_free(pidp, sizeof (struct pid)); 211 return (-1); 212 } 213 214 /* 215 * decrement the reference count for pid 216 */ 217 int 218 pid_rele(struct pid *pidp) 219 { 220 struct pid **pidpp; 221 222 mutex_enter(&pidlinklock); 223 ASSERT(pidp != &pid0); 224 225 pidpp = &HASHPID(pidp->pid_id); 226 for (;;) { 227 ASSERT(*pidpp != NULL); 228 if (*pidpp == pidp) 229 break; 230 pidpp = &(*pidpp)->pid_link; 231 } 232 233 *pidpp = pidp->pid_link; 234 mutex_exit(&pidlinklock); 235 236 kmem_free(pidp, sizeof (*pidp)); 237 return (0); 238 } 239 240 void 241 proc_entry_free(struct pid *pidp) 242 { 243 mutex_enter(&pidlinklock); 244 pidp->pid_prinactive = 1; 245 procdir[pidp->pid_prslot].pe_next = procentfree; 246 procentfree = &procdir[pidp->pid_prslot]; 247 mutex_exit(&pidlinklock); 248 } 249 250 void 251 pid_exit(proc_t *prp) 252 { 253 struct pid *pidp; 254 255 ASSERT(MUTEX_HELD(&pidlock)); 256 257 /* 258 * Exit process group. If it is NULL, it's because fork failed 259 * before calling pgjoin(). 260 */ 261 ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL); 262 if (prp->p_pgidp != NULL) 263 pgexit(prp); 264 265 SESS_RELE(prp->p_sessp); 266 267 pidp = prp->p_pidp; 268 269 proc_entry_free(pidp); 270 271 #ifdef C2_AUDIT 272 if (audit_active) 273 audit_pfree(prp); 274 #endif 275 276 if (practive == prp) { 277 practive = prp->p_next; 278 } 279 280 if (prp->p_next) { 281 prp->p_next->p_prev = prp->p_prev; 282 } 283 if (prp->p_prev) { 284 prp->p_prev->p_next = prp->p_next; 285 } 286 287 PID_RELE(pidp); 288 289 mutex_destroy(&prp->p_crlock); 290 kmem_cache_free(process_cache, prp); 291 nproc--; 292 } 293 294 /* 295 * Find a process visible from the specified zone given its process ID. 296 */ 297 proc_t * 298 prfind_zone(pid_t pid, zoneid_t zoneid) 299 { 300 struct pid *pidp; 301 proc_t *p; 302 303 ASSERT(MUTEX_HELD(&pidlock)); 304 305 mutex_enter(&pidlinklock); 306 pidp = pid_lookup(pid); 307 mutex_exit(&pidlinklock); 308 if (pidp != NULL && pidp->pid_prinactive == 0) { 309 p = procdir[pidp->pid_prslot].pe_proc; 310 if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) 311 return (p); 312 } 313 return (NULL); 314 } 315 316 /* 317 * Find a process given its process ID. This obeys zone restrictions, 318 * so if the caller is in a non-global zone it won't find processes 319 * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to 320 * bypass this restriction. 321 */ 322 proc_t * 323 prfind(pid_t pid) 324 { 325 zoneid_t zoneid; 326 327 if (INGLOBALZONE(curproc)) 328 zoneid = ALL_ZONES; 329 else 330 zoneid = getzoneid(); 331 return (prfind_zone(pid, zoneid)); 332 } 333 334 proc_t * 335 pgfind_zone(pid_t pgid, zoneid_t zoneid) 336 { 337 struct pid *pidp; 338 339 ASSERT(MUTEX_HELD(&pidlock)); 340 341 mutex_enter(&pidlinklock); 342 pidp = pid_lookup(pgid); 343 mutex_exit(&pidlinklock); 344 if (pidp != NULL) { 345 proc_t *p = pidp->pid_pglink; 346 347 if (zoneid == ALL_ZONES || pgid == 0 || p == NULL || 348 p->p_zone->zone_id == zoneid) 349 return (p); 350 } 351 return (NULL); 352 } 353 354 /* 355 * return the head of the list of processes whose process group ID is 'pgid', 356 * or NULL, if no such process group 357 */ 358 proc_t * 359 pgfind(pid_t pgid) 360 { 361 zoneid_t zoneid; 362 363 if (INGLOBALZONE(curproc)) 364 zoneid = ALL_ZONES; 365 else 366 zoneid = getzoneid(); 367 return (pgfind_zone(pgid, zoneid)); 368 } 369 370 /* 371 * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK. 372 * Returns the proc pointer on success, NULL on failure. sprlock() is 373 * really just a stripped-down version of pr_p_lock() to allow practive 374 * walkers like dofusers() and dumpsys() to synchronize with /proc. 375 */ 376 proc_t * 377 sprlock_zone(pid_t pid, zoneid_t zoneid) 378 { 379 proc_t *p; 380 kmutex_t *mp; 381 382 for (;;) { 383 mutex_enter(&pidlock); 384 if ((p = prfind_zone(pid, zoneid)) == NULL) { 385 mutex_exit(&pidlock); 386 return (NULL); 387 } 388 /* 389 * p_lock is persistent, but p itself is not -- it could 390 * vanish during cv_wait(). Load p->p_lock now so we can 391 * drop it after cv_wait() without referencing p. 392 */ 393 mp = &p->p_lock; 394 mutex_enter(mp); 395 mutex_exit(&pidlock); 396 /* 397 * If the process is in some half-baked state, fail. 398 */ 399 if (p->p_stat == SZOMB || p->p_stat == SIDL || 400 p->p_tlist == NULL || (p->p_flag & SEXITLWPS)) { 401 mutex_exit(mp); 402 return (NULL); 403 } 404 if (panicstr) 405 return (p); 406 if (!(p->p_proc_flag & P_PR_LOCK)) 407 break; 408 cv_wait(&pr_pid_cv[p->p_slot], mp); 409 mutex_exit(mp); 410 } 411 p->p_proc_flag |= P_PR_LOCK; 412 THREAD_KPRI_REQUEST(); 413 return (p); 414 } 415 416 proc_t * 417 sprlock(pid_t pid) 418 { 419 zoneid_t zoneid; 420 421 if (INGLOBALZONE(curproc)) 422 zoneid = ALL_ZONES; 423 else 424 zoneid = getzoneid(); 425 return (sprlock_zone(pid, zoneid)); 426 } 427 428 void 429 sprlock_proc(proc_t *p) 430 { 431 ASSERT(MUTEX_HELD(&p->p_lock)); 432 433 while (p->p_proc_flag & P_PR_LOCK) { 434 cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock); 435 } 436 437 p->p_proc_flag |= P_PR_LOCK; 438 THREAD_KPRI_REQUEST(); 439 } 440 441 void 442 sprunlock(proc_t *p) 443 { 444 if (panicstr) { 445 mutex_exit(&p->p_lock); 446 return; 447 } 448 449 ASSERT(p->p_proc_flag & P_PR_LOCK); 450 ASSERT(MUTEX_HELD(&p->p_lock)); 451 452 cv_signal(&pr_pid_cv[p->p_slot]); 453 p->p_proc_flag &= ~P_PR_LOCK; 454 mutex_exit(&p->p_lock); 455 THREAD_KPRI_RELEASE(); 456 } 457 458 void 459 pid_init(void) 460 { 461 int i; 462 463 pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen); 464 465 pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP); 466 procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP); 467 pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP); 468 proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP); 469 470 nproc = 1; 471 practive = proc_sched; 472 proc_sched->p_next = NULL; 473 procdir[0].pe_proc = proc_sched; 474 475 procentfree = &procdir[1]; 476 for (i = 1; i < v.v_proc - 1; i++) 477 procdir[i].pe_next = &procdir[i+1]; 478 procdir[i].pe_next = NULL; 479 480 HASHPID(0) = &pid0; 481 482 upcount_init(); 483 } 484 485 proc_t * 486 pid_entry(int slot) 487 { 488 union procent *pep; 489 proc_t *prp; 490 491 ASSERT(MUTEX_HELD(&pidlock)); 492 ASSERT(slot >= 0 && slot < v.v_proc); 493 494 pep = procdir[slot].pe_next; 495 if (pep >= procdir && pep < &procdir[v.v_proc]) 496 return (NULL); 497 prp = procdir[slot].pe_proc; 498 if (prp != 0 && prp->p_stat == SIDL) 499 return (NULL); 500 return (prp); 501 } 502 503 /* 504 * Send the specified signal to all processes whose process group ID is 505 * equal to 'pgid' 506 */ 507 508 void 509 signal(pid_t pgid, int sig) 510 { 511 struct pid *pidp; 512 proc_t *prp; 513 514 mutex_enter(&pidlock); 515 mutex_enter(&pidlinklock); 516 if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) { 517 mutex_exit(&pidlinklock); 518 mutex_exit(&pidlock); 519 return; 520 } 521 mutex_exit(&pidlinklock); 522 for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) { 523 mutex_enter(&prp->p_lock); 524 sigtoproc(prp, NULL, sig); 525 mutex_exit(&prp->p_lock); 526 } 527 mutex_exit(&pidlock); 528 } 529 530 /* 531 * Send the specified signal to the specified process 532 */ 533 534 void 535 prsignal(struct pid *pidp, int sig) 536 { 537 if (!(pidp->pid_prinactive)) 538 psignal(procdir[pidp->pid_prslot].pe_proc, sig); 539 } 540 541 #include <sys/sunddi.h> 542 543 /* 544 * DDI/DKI interfaces for drivers to send signals to processes 545 */ 546 547 /* 548 * obtain an opaque reference to a process for signaling 549 */ 550 void * 551 proc_ref(void) 552 { 553 struct pid *pidp; 554 555 mutex_enter(&pidlock); 556 pidp = curproc->p_pidp; 557 PID_HOLD(pidp); 558 mutex_exit(&pidlock); 559 560 return (pidp); 561 } 562 563 /* 564 * release a reference to a process 565 * - a process can exit even if a driver has a reference to it 566 * - one proc_unref for every proc_ref 567 */ 568 void 569 proc_unref(void *pref) 570 { 571 mutex_enter(&pidlock); 572 PID_RELE((struct pid *)pref); 573 mutex_exit(&pidlock); 574 } 575 576 /* 577 * send a signal to a process 578 * 579 * - send the process the signal 580 * - if the process went away, return a -1 581 * - if the process is still there return 0 582 */ 583 int 584 proc_signal(void *pref, int sig) 585 { 586 struct pid *pidp = pref; 587 588 prsignal(pidp, sig); 589 return (pidp->pid_prinactive ? -1 : 0); 590 } 591 592 593 static struct upcount **upc_hash; /* a boot time allocated array */ 594 static ulong_t upc_hashmask; 595 #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask) 596 597 /* 598 * Get us off the ground. Called once at boot. 599 */ 600 void 601 upcount_init(void) 602 { 603 ulong_t upc_hashsize; 604 605 /* 606 * An entry per MB of memory is our current guess 607 */ 608 /* 609 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT 610 * converts pages to megs (without overflowing a u_int 611 * if you have more than 4G of memory, like ptob(physmem)/1M 612 * would). 613 */ 614 upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT))); 615 upc_hashmask = upc_hashsize - 1; 616 upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *), 617 KM_SLEEP); 618 } 619 620 /* 621 * Increment the number of processes associated with a given uid and zoneid. 622 */ 623 void 624 upcount_inc(uid_t uid, zoneid_t zoneid) 625 { 626 struct upcount **upc, **hupc; 627 struct upcount *new; 628 629 ASSERT(MUTEX_HELD(&pidlock)); 630 new = NULL; 631 hupc = &upc_hash[UPC_HASH(uid, zoneid)]; 632 top: 633 upc = hupc; 634 while ((*upc) != NULL) { 635 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 636 (*upc)->up_count++; 637 if (new) { 638 /* 639 * did not need `new' afterall. 640 */ 641 kmem_free(new, sizeof (*new)); 642 } 643 return; 644 } 645 upc = &(*upc)->up_next; 646 } 647 648 /* 649 * There is no entry for this <uid,zoneid> pair. 650 * Allocate one. If we have to drop pidlock, check 651 * again. 652 */ 653 if (new == NULL) { 654 new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP); 655 if (new == NULL) { 656 mutex_exit(&pidlock); 657 new = (struct upcount *)kmem_alloc(sizeof (*new), 658 KM_SLEEP); 659 mutex_enter(&pidlock); 660 goto top; 661 } 662 } 663 664 665 /* 666 * On the assumption that a new user is going to do some 667 * more forks, put the new upcount structure on the front. 668 */ 669 upc = hupc; 670 671 new->up_uid = uid; 672 new->up_zoneid = zoneid; 673 new->up_count = 1; 674 new->up_next = *upc; 675 676 *upc = new; 677 } 678 679 /* 680 * Decrement the number of processes a given uid and zoneid has. 681 */ 682 void 683 upcount_dec(uid_t uid, zoneid_t zoneid) 684 { 685 struct upcount **upc; 686 struct upcount *done; 687 688 ASSERT(MUTEX_HELD(&pidlock)); 689 690 upc = &upc_hash[UPC_HASH(uid, zoneid)]; 691 while ((*upc) != NULL) { 692 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { 693 (*upc)->up_count--; 694 if ((*upc)->up_count == 0) { 695 done = *upc; 696 *upc = (*upc)->up_next; 697 kmem_free(done, sizeof (*done)); 698 } 699 return; 700 } 701 upc = &(*upc)->up_next; 702 } 703 cmn_err(CE_PANIC, "decr_upcount-off the end"); 704 } 705 706 /* 707 * Returns the number of processes a uid has. 708 * Non-existent uid's are assumed to have no processes. 709 */ 710 int 711 upcount_get(uid_t uid, zoneid_t zoneid) 712 { 713 struct upcount *upc; 714 715 ASSERT(MUTEX_HELD(&pidlock)); 716 717 upc = upc_hash[UPC_HASH(uid, zoneid)]; 718 while (upc != NULL) { 719 if (upc->up_uid == uid && upc->up_zoneid == zoneid) { 720 return (upc->up_count); 721 } 722 upc = upc->up_next; 723 } 724 return (0); 725 } 726