1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * Inter-Process Communication Semaphore Facility. 34 * 35 * See os/ipc.c for a description of common IPC functionality. 36 * 37 * Resource controls 38 * ----------------- 39 * 40 * Control: zone.max-sem-ids (rc_zone_semmni) 41 * Description: Maximum number of semaphore ids allowed a zone. 42 * 43 * When semget() is used to allocate a semaphore set, one id is 44 * allocated. If the id allocation doesn't succeed, semget() fails 45 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID) 46 * the id is deallocated. 47 * 48 * Control: project.max-sem-ids (rc_project_semmni) 49 * Description: Maximum number of semaphore ids allowed a project. 50 * 51 * When semget() is used to allocate a semaphore set, one id is 52 * allocated. If the id allocation doesn't succeed, semget() fails 53 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID) 54 * the id is deallocated. 55 * 56 * Control: process.max-sem-nsems (rc_process_semmsl) 57 * Description: Maximum number of semaphores allowed per semaphore set. 58 * 59 * When semget() is used to allocate a semaphore set, the size of the 60 * set is compared with this limit. If the number of semaphores 61 * exceeds the limit, semget() fails and errno is set to EINVAL. 62 * 63 * Control: process.max-sem-ops (rc_process_semopm) 64 * Description: Maximum number of semaphore operations allowed per 65 * semop call. 66 * 67 * When semget() successfully allocates a semaphore set, the minimum 68 * enforced value of this limit is used to initialize the 69 * "system-imposed maximum" number of operations a semop() call for 70 * this set can perform. 71 * 72 * Undo structures 73 * --------------- 74 * 75 * Removing the undo structure tunables involved a serious redesign of 76 * how they were implemented. There is now one undo structure for 77 * every process/semaphore array combination (lazily allocated, of 78 * course), and each is equal in size to the semaphore it corresponds 79 * to. To avoid scalability and performance problems, the undo 80 * structures are stored in two places: a per-process AVL tree sorted 81 * by ksemid pointer (p_semacct, protected by p_lock) and an unsorted 82 * per-semaphore linked list (sem_undos, protected by the semaphore's 83 * ID lock). The former is used by semop, where a lookup is performed 84 * once and cached if SEM_UNDO is specified for any of the operations, 85 * and at process exit where the undoable operations are rolled back. 86 * The latter is used when removing the semaphore, so the undo 87 * structures can be removed from the appropriate processes' trees. 88 * 89 * The undo structure itself contains pointers to the ksemid and proc 90 * to which it corresponds, a list node, an AVL node, and an array of 91 * adjust-on-exit (AOE) values. When an undo structure is allocated it 92 * is immediately added to both the process's tree and the semaphore's 93 * list. Lastly, the reference count on the semaphore is increased. 94 * 95 * Avoiding a lock ordering violation between p_lock and the ID lock, 96 * wont to occur when there is a race between a process exiting and the 97 * removal of a semaphore, mandates the delicate dance that exists 98 * between semexit and sem_rmid. 99 * 100 * sem_rmid, holding the ID lock, iterates through all undo structures 101 * and for each takes the appropriate process's p_lock and checks to 102 * see if p_semacct is NULL. If it is, it skips that undo structure 103 * and continues to the next. Otherwise, it removes the undo structure 104 * from both the AVL tree and the semaphore's list, and releases the 105 * hold that the undo structure had on the semaphore. 106 * 107 * The important other half of this is semexit, which will immediately 108 * take p_lock, obtain the AVL pointer, clear p_semacct, and drop 109 * p_lock. From this point on it is semexit's responsibility to clean 110 * up all undo structures found in the tree -- a coexecuting sem_rmid 111 * will see the NULL p_semacct and skip that undo structure. It walks 112 * the AVL tree (using avl_destroy_nodes) and for each undo structure 113 * takes the appropriate semaphore's ID lock (always legal since the 114 * undo structure has a hold on the semaphore), updates all semaphores 115 * with non-zero AOE values, and removes the structure from the 116 * semaphore's list. It then drops the structure's reference on the 117 * semaphore, drops the ID lock, and frees the undo structure. 118 */ 119 120 #include <sys/types.h> 121 #include <sys/t_lock.h> 122 #include <sys/param.h> 123 #include <sys/systm.h> 124 #include <sys/sysmacros.h> 125 #include <sys/cred.h> 126 #include <sys/vmem.h> 127 #include <sys/kmem.h> 128 #include <sys/errno.h> 129 #include <sys/time.h> 130 #include <sys/ipc.h> 131 #include <sys/ipc_impl.h> 132 #include <sys/sem.h> 133 #include <sys/sem_impl.h> 134 #include <sys/user.h> 135 #include <sys/proc.h> 136 #include <sys/cpuvar.h> 137 #include <sys/debug.h> 138 #include <sys/var.h> 139 #include <sys/cmn_err.h> 140 #include <sys/modctl.h> 141 #include <sys/syscall.h> 142 #include <sys/avl.h> 143 #include <sys/list.h> 144 #include <sys/zone.h> 145 146 #include <c2/audit.h> 147 148 extern rctl_hndl_t rc_zone_semmni; 149 extern rctl_hndl_t rc_project_semmni; 150 extern rctl_hndl_t rc_process_semmsl; 151 extern rctl_hndl_t rc_process_semopm; 152 static ipc_service_t *sem_svc; 153 static zone_key_t sem_zone_key; 154 155 /* 156 * The following tunables are obsolete. Though for compatibility we 157 * still read and interpret seminfo_semmsl, seminfo_semopm and 158 * seminfo_semmni (see os/project.c and os/rctl_proc.c), the preferred 159 * mechanism for administrating the IPC Semaphore facility is through 160 * the resource controls described at the top of this file. 161 */ 162 int seminfo_semaem = 16384; /* (obsolete) */ 163 int seminfo_semmap = 10; /* (obsolete) */ 164 int seminfo_semmni = 10; /* (obsolete) */ 165 int seminfo_semmns = 60; /* (obsolete) */ 166 int seminfo_semmnu = 30; /* (obsolete) */ 167 int seminfo_semmsl = 25; /* (obsolete) */ 168 int seminfo_semopm = 10; /* (obsolete) */ 169 int seminfo_semume = 10; /* (obsolete) */ 170 int seminfo_semusz = 96; /* (obsolete) */ 171 int seminfo_semvmx = 32767; /* (obsolete) */ 172 173 #define SEM_MAXUCOPS 4096 /* max # of unchecked ops per semop call */ 174 #define SEM_UNDOSZ(n) (sizeof (struct sem_undo) + (n - 1) * sizeof (int)) 175 176 static int semsys(int opcode, uintptr_t a0, uintptr_t a1, 177 uintptr_t a2, uintptr_t a3); 178 static void sem_dtor(kipc_perm_t *); 179 static void sem_rmid(kipc_perm_t *); 180 static void sem_remove_zone(zoneid_t, void *); 181 182 static struct sysent ipcsem_sysent = { 183 5, 184 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1, 185 semsys 186 }; 187 188 /* 189 * Module linkage information for the kernel. 190 */ 191 static struct modlsys modlsys = { 192 &mod_syscallops, "System V semaphore facility", &ipcsem_sysent 193 }; 194 195 #ifdef _SYSCALL32_IMPL 196 static struct modlsys modlsys32 = { 197 &mod_syscallops32, "32-bit System V semaphore facility", &ipcsem_sysent 198 }; 199 #endif 200 201 static struct modlinkage modlinkage = { 202 MODREV_1, 203 &modlsys, 204 #ifdef _SYSCALL32_IMPL 205 &modlsys32, 206 #endif 207 NULL 208 }; 209 210 211 int 212 _init(void) 213 { 214 int result; 215 216 sem_svc = ipcs_create("semids", rc_project_semmni, rc_zone_semmni, 217 sizeof (ksemid_t), sem_dtor, sem_rmid, AT_IPC_SEM, 218 offsetof(ipc_rqty_t, ipcq_semmni)); 219 zone_key_create(&sem_zone_key, NULL, sem_remove_zone, NULL); 220 221 if ((result = mod_install(&modlinkage)) == 0) 222 return (0); 223 224 (void) zone_key_delete(sem_zone_key); 225 ipcs_destroy(sem_svc); 226 227 return (result); 228 } 229 230 int 231 _fini(void) 232 { 233 return (EBUSY); 234 } 235 236 int 237 _info(struct modinfo *modinfop) 238 { 239 return (mod_info(&modlinkage, modinfop)); 240 } 241 242 static void 243 sem_dtor(kipc_perm_t *perm) 244 { 245 ksemid_t *sp = (ksemid_t *)perm; 246 247 kmem_free(sp->sem_base, 248 P2ROUNDUP(sp->sem_nsems * sizeof (struct sem), 64)); 249 list_destroy(&sp->sem_undos); 250 } 251 252 /* 253 * sem_undo_add - Create or update adjust on exit entry. 254 */ 255 static int 256 sem_undo_add(short val, ushort_t num, struct sem_undo *undo) 257 { 258 int newval = undo->un_aoe[num] - val; 259 260 if (newval > USHRT_MAX || newval < -USHRT_MAX) 261 return (ERANGE); 262 undo->un_aoe[num] = newval; 263 264 return (0); 265 } 266 267 /* 268 * sem_undo_clear - clears all undo entries for specified semaphores 269 * 270 * Used when semaphores are reset by SETVAL or SETALL. 271 */ 272 static void 273 sem_undo_clear(ksemid_t *sp, ushort_t low, ushort_t high) 274 { 275 struct sem_undo *undo; 276 int i; 277 278 ASSERT(low <= high); 279 ASSERT(high < sp->sem_nsems); 280 281 for (undo = list_head(&sp->sem_undos); undo; 282 undo = list_next(&sp->sem_undos, undo)) 283 for (i = low; i <= high; i++) 284 undo->un_aoe[i] = 0; 285 } 286 287 /* 288 * sem_rollback - roll back work done so far if unable to complete operation 289 */ 290 static void 291 sem_rollback(ksemid_t *sp, struct sembuf *op, int n, struct sem_undo *undo) 292 { 293 struct sem *semp; /* semaphore ptr */ 294 295 for (op += n - 1; n--; op--) { 296 if (op->sem_op == 0) 297 continue; 298 semp = &sp->sem_base[op->sem_num]; 299 semp->semval -= op->sem_op; 300 if (op->sem_flg & SEM_UNDO) { 301 ASSERT(undo != NULL); 302 (void) sem_undo_add(-op->sem_op, op->sem_num, undo); 303 } 304 } 305 } 306 307 static void 308 sem_rmid(kipc_perm_t *perm) 309 { 310 ksemid_t *sp = (ksemid_t *)perm; 311 struct sem *semp; 312 struct sem_undo *undo; 313 size_t size = SEM_UNDOSZ(sp->sem_nsems); 314 int i; 315 316 /*LINTED*/ 317 while (undo = list_head(&sp->sem_undos)) { 318 list_remove(&sp->sem_undos, undo); 319 mutex_enter(&undo->un_proc->p_lock); 320 if (undo->un_proc->p_semacct == NULL) { 321 mutex_exit(&undo->un_proc->p_lock); 322 continue; 323 } 324 avl_remove(undo->un_proc->p_semacct, undo); 325 mutex_exit(&undo->un_proc->p_lock); 326 kmem_free(undo, size); 327 ipc_rele_locked(sem_svc, (kipc_perm_t *)sp); 328 } 329 330 for (i = 0; i < sp->sem_nsems; i++) { 331 semp = &sp->sem_base[i]; 332 semp->semval = semp->sempid = 0; 333 if (semp->semncnt) { 334 cv_broadcast(&semp->semncnt_cv); 335 semp->semncnt = 0; 336 } 337 if (semp->semzcnt) { 338 cv_broadcast(&semp->semzcnt_cv); 339 semp->semzcnt = 0; 340 } 341 } 342 } 343 344 /* 345 * semctl - Semctl system call. 346 */ 347 static int 348 semctl(int semid, uint_t semnum, int cmd, uintptr_t arg) 349 { 350 ksemid_t *sp; /* ptr to semaphore header */ 351 struct sem *p; /* ptr to semaphore */ 352 unsigned int i; /* loop control */ 353 ushort_t *vals, *vp; 354 size_t vsize = 0; 355 int error = 0; 356 int retval = 0; 357 struct cred *cr; 358 kmutex_t *lock; 359 model_t mdl = get_udatamodel(); 360 STRUCT_DECL(semid_ds, sid); 361 struct semid_ds64 ds64; 362 363 STRUCT_INIT(sid, mdl); 364 cr = CRED(); 365 366 /* 367 * Perform pre- or non-lookup actions (e.g. copyins, RMID). 368 */ 369 switch (cmd) { 370 case IPC_SET: 371 if (copyin((void *)arg, STRUCT_BUF(sid), STRUCT_SIZE(sid))) 372 return (set_errno(EFAULT)); 373 break; 374 375 case IPC_SET64: 376 if (copyin((void *)arg, &ds64, sizeof (struct semid_ds64))) 377 return (set_errno(EFAULT)); 378 break; 379 380 case SETALL: 381 if ((lock = ipc_lookup(sem_svc, semid, 382 (kipc_perm_t **)&sp)) == NULL) 383 return (set_errno(EINVAL)); 384 vsize = sp->sem_nsems * sizeof (*vals); 385 mutex_exit(lock); 386 387 /* allocate space to hold all semaphore values */ 388 vals = kmem_alloc(vsize, KM_SLEEP); 389 390 if (copyin((void *)arg, vals, vsize)) { 391 kmem_free(vals, vsize); 392 return (set_errno(EFAULT)); 393 } 394 break; 395 396 case IPC_RMID: 397 if (error = ipc_rmid(sem_svc, semid, cr)) 398 return (set_errno(error)); 399 return (0); 400 } 401 402 if ((lock = ipc_lookup(sem_svc, semid, (kipc_perm_t **)&sp)) == NULL) { 403 if (vsize != 0) 404 kmem_free(vals, vsize); 405 return (set_errno(EINVAL)); 406 } 407 switch (cmd) { 408 /* Set ownership and permissions. */ 409 case IPC_SET: 410 411 if (error = ipcperm_set(sem_svc, cr, &sp->sem_perm, 412 &STRUCT_BUF(sid)->sem_perm, mdl)) { 413 mutex_exit(lock); 414 return (set_errno(error)); 415 } 416 sp->sem_ctime = gethrestime_sec(); 417 mutex_exit(lock); 418 return (0); 419 420 /* Get semaphore data structure. */ 421 case IPC_STAT: 422 423 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 424 mutex_exit(lock); 425 return (set_errno(error)); 426 } 427 428 ipcperm_stat(&STRUCT_BUF(sid)->sem_perm, &sp->sem_perm, mdl); 429 STRUCT_FSETP(sid, sem_base, NULL); /* kernel addr */ 430 STRUCT_FSET(sid, sem_nsems, sp->sem_nsems); 431 STRUCT_FSET(sid, sem_otime, sp->sem_otime); 432 STRUCT_FSET(sid, sem_ctime, sp->sem_ctime); 433 STRUCT_FSET(sid, sem_binary, sp->sem_binary); 434 mutex_exit(lock); 435 436 if (copyout(STRUCT_BUF(sid), (void *)arg, STRUCT_SIZE(sid))) 437 return (set_errno(EFAULT)); 438 return (0); 439 440 case IPC_SET64: 441 442 if (error = ipcperm_set64(sem_svc, cr, &sp->sem_perm, 443 &ds64.semx_perm)) { 444 mutex_exit(lock); 445 return (set_errno(error)); 446 } 447 sp->sem_ctime = gethrestime_sec(); 448 mutex_exit(lock); 449 return (0); 450 451 case IPC_STAT64: 452 453 ipcperm_stat64(&ds64.semx_perm, &sp->sem_perm); 454 ds64.semx_nsems = sp->sem_nsems; 455 ds64.semx_otime = sp->sem_otime; 456 ds64.semx_ctime = sp->sem_ctime; 457 458 mutex_exit(lock); 459 if (copyout(&ds64, (void *)arg, sizeof (struct semid_ds64))) 460 return (set_errno(EFAULT)); 461 462 return (0); 463 464 /* Get # of processes sleeping for greater semval. */ 465 case GETNCNT: 466 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 467 mutex_exit(lock); 468 return (set_errno(error)); 469 } 470 if (semnum >= sp->sem_nsems) { 471 mutex_exit(lock); 472 return (set_errno(EINVAL)); 473 } 474 retval = sp->sem_base[semnum].semncnt; 475 mutex_exit(lock); 476 return (retval); 477 478 /* Get pid of last process to operate on semaphore. */ 479 case GETPID: 480 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 481 mutex_exit(lock); 482 return (set_errno(error)); 483 } 484 if (semnum >= sp->sem_nsems) { 485 mutex_exit(lock); 486 return (set_errno(EINVAL)); 487 } 488 retval = sp->sem_base[semnum].sempid; 489 mutex_exit(lock); 490 return (retval); 491 492 /* Get semval of one semaphore. */ 493 case GETVAL: 494 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 495 mutex_exit(lock); 496 return (set_errno(error)); 497 } 498 if (semnum >= sp->sem_nsems) { 499 mutex_exit(lock); 500 return (set_errno(EINVAL)); 501 } 502 retval = sp->sem_base[semnum].semval; 503 mutex_exit(lock); 504 return (retval); 505 506 /* Get all semvals in set. */ 507 case GETALL: 508 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 509 mutex_exit(lock); 510 return (set_errno(error)); 511 } 512 513 /* allocate space to hold all semaphore values */ 514 vsize = sp->sem_nsems * sizeof (*vals); 515 vals = vp = kmem_alloc(vsize, KM_SLEEP); 516 517 for (i = sp->sem_nsems, p = sp->sem_base; i--; p++, vp++) 518 bcopy(&p->semval, vp, sizeof (p->semval)); 519 520 mutex_exit(lock); 521 522 if (copyout((void *)vals, (void *)arg, vsize)) { 523 kmem_free(vals, vsize); 524 return (set_errno(EFAULT)); 525 } 526 527 kmem_free(vals, vsize); 528 return (0); 529 530 /* Get # of processes sleeping for semval to become zero. */ 531 case GETZCNT: 532 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 533 mutex_exit(lock); 534 return (set_errno(error)); 535 } 536 if (semnum >= sp->sem_nsems) { 537 mutex_exit(lock); 538 return (set_errno(EINVAL)); 539 } 540 retval = sp->sem_base[semnum].semzcnt; 541 mutex_exit(lock); 542 return (retval); 543 544 /* Set semval of one semaphore. */ 545 case SETVAL: 546 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 547 mutex_exit(lock); 548 return (set_errno(error)); 549 } 550 if (semnum >= sp->sem_nsems) { 551 mutex_exit(lock); 552 return (set_errno(EINVAL)); 553 } 554 if ((uint_t)arg > USHRT_MAX) { 555 mutex_exit(lock); 556 return (set_errno(ERANGE)); 557 } 558 p = &sp->sem_base[semnum]; 559 if ((p->semval = (ushort_t)arg) != 0) { 560 if (p->semncnt) { 561 cv_broadcast(&p->semncnt_cv); 562 } 563 } else if (p->semzcnt) { 564 cv_broadcast(&p->semzcnt_cv); 565 } 566 p->sempid = curproc->p_pid; 567 sem_undo_clear(sp, (ushort_t)semnum, (ushort_t)semnum); 568 mutex_exit(lock); 569 return (0); 570 571 /* Set semvals of all semaphores in set. */ 572 case SETALL: 573 /* Check if semaphore set has been deleted and reallocated. */ 574 if (sp->sem_nsems * sizeof (*vals) != vsize) { 575 error = set_errno(EINVAL); 576 goto seterr; 577 } 578 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 579 error = set_errno(error); 580 goto seterr; 581 } 582 sem_undo_clear(sp, 0, sp->sem_nsems - 1); 583 for (i = 0, p = sp->sem_base; i < sp->sem_nsems; 584 (p++)->sempid = curproc->p_pid) { 585 if ((p->semval = vals[i++]) != 0) { 586 if (p->semncnt) { 587 cv_broadcast(&p->semncnt_cv); 588 } 589 } else if (p->semzcnt) { 590 cv_broadcast(&p->semzcnt_cv); 591 } 592 } 593 seterr: 594 mutex_exit(lock); 595 kmem_free(vals, vsize); 596 return (error); 597 598 default: 599 mutex_exit(lock); 600 return (set_errno(EINVAL)); 601 } 602 603 /* NOTREACHED */ 604 } 605 606 /* 607 * semexit - Called by exit() to clean up on process exit. 608 */ 609 void 610 semexit(proc_t *pp) 611 { 612 avl_tree_t *tree; 613 struct sem_undo *undo; 614 void *cookie = NULL; 615 616 mutex_enter(&pp->p_lock); 617 tree = pp->p_semacct; 618 pp->p_semacct = NULL; 619 mutex_exit(&pp->p_lock); 620 621 while (undo = avl_destroy_nodes(tree, &cookie)) { 622 ksemid_t *sp = undo->un_sp; 623 size_t size = SEM_UNDOSZ(sp->sem_nsems); 624 int i; 625 626 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 627 if (!IPC_FREE(&sp->sem_perm)) { 628 for (i = 0; i < sp->sem_nsems; i++) { 629 int adj = undo->un_aoe[i]; 630 if (adj) { 631 struct sem *semp = &sp->sem_base[i]; 632 int v = (int)semp->semval + adj; 633 634 if (v < 0 || v > USHRT_MAX) 635 continue; 636 semp->semval = (ushort_t)v; 637 if (v == 0 && semp->semzcnt) 638 cv_broadcast(&semp->semzcnt_cv); 639 if (adj > 0 && semp->semncnt) 640 cv_broadcast(&semp->semncnt_cv); 641 } 642 } 643 list_remove(&sp->sem_undos, undo); 644 } 645 ipc_rele(sem_svc, (kipc_perm_t *)sp); 646 kmem_free(undo, size); 647 } 648 649 avl_destroy(tree); 650 kmem_free(tree, sizeof (avl_tree_t)); 651 } 652 653 /* 654 * Remove all semaphores associated with a given zone. Called by 655 * zone_shutdown when the zone is halted. 656 */ 657 /*ARGSUSED1*/ 658 static void 659 sem_remove_zone(zoneid_t zoneid, void *arg) 660 { 661 ipc_remove_zone(sem_svc, zoneid); 662 } 663 664 /* 665 * semget - Semget system call. 666 */ 667 static int 668 semget(key_t key, int nsems, int semflg) 669 { 670 ksemid_t *sp; 671 kmutex_t *lock; 672 int id, error; 673 proc_t *pp = curproc; 674 675 top: 676 if (error = ipc_get(sem_svc, key, semflg, (kipc_perm_t **)&sp, &lock)) 677 return (set_errno(error)); 678 679 if (!IPC_FREE(&sp->sem_perm)) { 680 /* 681 * A semaphore with the requested key exists. 682 */ 683 if (!((nsems >= 0) && (nsems <= sp->sem_nsems))) { 684 mutex_exit(lock); 685 return (set_errno(EINVAL)); 686 } 687 } else { 688 /* 689 * This is a new semaphore set. Finish initialization. 690 */ 691 if (nsems <= 0 || (rctl_test(rc_process_semmsl, pp->p_rctls, pp, 692 nsems, RCA_SAFE) & RCT_DENY)) { 693 mutex_exit(lock); 694 mutex_exit(&pp->p_lock); 695 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 696 return (set_errno(EINVAL)); 697 } 698 mutex_exit(lock); 699 mutex_exit(&pp->p_lock); 700 701 /* 702 * We round the allocation up to coherency granularity 703 * so that multiple semaphore allocations won't result 704 * in the false sharing of their sem structures. 705 */ 706 sp->sem_base = 707 kmem_zalloc(P2ROUNDUP(nsems * sizeof (struct sem), 64), 708 KM_SLEEP); 709 sp->sem_binary = (nsems == 1); 710 sp->sem_nsems = (ushort_t)nsems; 711 sp->sem_ctime = gethrestime_sec(); 712 sp->sem_otime = 0; 713 list_create(&sp->sem_undos, sizeof (struct sem_undo), 714 offsetof(struct sem_undo, un_list)); 715 716 if (error = ipc_commit_begin(sem_svc, key, semflg, 717 (kipc_perm_t *)sp)) { 718 if (error == EAGAIN) 719 goto top; 720 return (set_errno(error)); 721 } 722 sp->sem_maxops = 723 rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp); 724 if (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems, 725 RCA_SAFE) & RCT_DENY) { 726 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 727 return (set_errno(EINVAL)); 728 } 729 lock = ipc_commit_end(sem_svc, &sp->sem_perm); 730 } 731 #ifdef C2_AUDIT 732 if (audit_active) 733 audit_ipcget(AT_IPC_SEM, (void *)sp); 734 #endif 735 id = sp->sem_perm.ipc_id; 736 mutex_exit(lock); 737 return (id); 738 } 739 740 /* 741 * semids system call. 742 */ 743 static int 744 semids(int *buf, uint_t nids, uint_t *pnids) 745 { 746 int error; 747 748 if (error = ipc_ids(sem_svc, buf, nids, pnids)) 749 return (set_errno(error)); 750 751 return (0); 752 } 753 754 755 /* 756 * Helper function for semop - copies in the provided timespec and 757 * computes the absolute future time after which we must return. 758 */ 759 static int 760 compute_timeout(timespec_t **tsp, timespec_t *ts, timespec_t *now, 761 timespec_t *timeout) 762 { 763 model_t datamodel = get_udatamodel(); 764 765 if (datamodel == DATAMODEL_NATIVE) { 766 if (copyin(timeout, ts, sizeof (timespec_t))) 767 return (EFAULT); 768 } else { 769 timespec32_t ts32; 770 771 if (copyin(timeout, &ts32, sizeof (timespec32_t))) 772 return (EFAULT); 773 TIMESPEC32_TO_TIMESPEC(ts, &ts32) 774 } 775 776 if (itimerspecfix(ts)) 777 return (EINVAL); 778 779 /* 780 * Convert the timespec value into absolute time. 781 */ 782 timespecadd(ts, now); 783 *tsp = ts; 784 785 return (0); 786 } 787 788 /* 789 * Undo structure comparator. We sort based on ksemid_t pointer. 790 */ 791 static int 792 sem_undo_compar(const void *x, const void *y) 793 { 794 struct sem_undo *undo1 = (struct sem_undo *)x; 795 struct sem_undo *undo2 = (struct sem_undo *)y; 796 797 if (undo1->un_sp < undo2->un_sp) 798 return (-1); 799 if (undo1->un_sp > undo2->un_sp) 800 return (1); 801 return (0); 802 } 803 804 /* 805 * Helper function for semop - creates an undo structure and adds it to 806 * the process's avl tree and the semaphore's list. 807 */ 808 static int 809 sem_undo_alloc(proc_t *pp, ksemid_t *sp, kmutex_t **lock, 810 struct sem_undo *template, struct sem_undo **un) 811 { 812 size_t size; 813 struct sem_undo *undo; 814 avl_tree_t *tree = NULL; 815 avl_index_t where; 816 817 mutex_exit(*lock); 818 819 size = SEM_UNDOSZ(sp->sem_nsems); 820 undo = kmem_zalloc(size, KM_SLEEP); 821 undo->un_proc = pp; 822 undo->un_sp = sp; 823 824 if (pp->p_semacct == NULL) 825 tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 826 827 *lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 828 if (IPC_FREE(&sp->sem_perm)) { 829 kmem_free(undo, size); 830 if (tree) 831 kmem_free(tree, sizeof (avl_tree_t)); 832 return (EIDRM); 833 } 834 835 mutex_enter(&pp->p_lock); 836 if (tree) { 837 if (pp->p_semacct == NULL) { 838 avl_create(tree, sem_undo_compar, 839 sizeof (struct sem_undo), 840 offsetof(struct sem_undo, un_avl)); 841 pp->p_semacct = tree; 842 } else { 843 kmem_free(tree, sizeof (avl_tree_t)); 844 } 845 } 846 847 if (*un = avl_find(pp->p_semacct, template, &where)) { 848 mutex_exit(&pp->p_lock); 849 kmem_free(undo, size); 850 } else { 851 *un = undo; 852 avl_insert(pp->p_semacct, undo, where); 853 mutex_exit(&pp->p_lock); 854 list_insert_head(&sp->sem_undos, undo); 855 ipc_hold(sem_svc, (kipc_perm_t *)sp); 856 } 857 858 859 return (0); 860 } 861 862 /* 863 * semop - Semop system call. 864 */ 865 static int 866 semop(int semid, struct sembuf *sops, size_t nsops, timespec_t *timeout) 867 { 868 ksemid_t *sp = NULL; 869 kmutex_t *lock; 870 struct sembuf *op; /* ptr to operation */ 871 int i; /* loop control */ 872 struct sem *semp; /* ptr to semaphore */ 873 int error = 0; 874 struct sembuf *uops; /* ptr to copy of user ops */ 875 struct sembuf x_sem; /* avoid kmem_alloc's */ 876 timespec_t now, ts, *tsp = NULL; 877 int timecheck = 0; 878 int cvres, needundo, mode; 879 struct sem_undo *undo; 880 proc_t *pp = curproc; 881 int held = 0; 882 883 CPU_STATS_ADDQ(CPU, sys, sema, 1); /* bump semaphore op count */ 884 885 /* 886 * To avoid the cost of copying in 'timeout' in the common 887 * case, we could only grab the time here and defer the copyin 888 * and associated computations until we are about to block. 889 * 890 * The down side to this is that we would then have to spin 891 * some goto top nonsense to avoid the copyin behind the semid 892 * lock. As a common use of timed semaphores is as an explicit 893 * blocking mechanism, this could incur a greater penalty. 894 * 895 * If we eventually decide that this would be a wise route to 896 * take, the deferrable functionality is completely contained 897 * in 'compute_timeout', and the interface is defined such that 898 * we can legally not validate 'timeout' if it is unused. 899 */ 900 if (timeout != NULL) { 901 timecheck = timechanged; 902 gethrestime(&now); 903 if (error = compute_timeout(&tsp, &ts, &now, timeout)) 904 return (set_errno(error)); 905 } 906 907 /* 908 * Allocate space to hold the vector of semaphore ops. If 909 * there is only 1 operation we use a preallocated buffer on 910 * the stack for speed. 911 * 912 * Since we don't want to allow the user to allocate an 913 * arbitrary amount of kernel memory, we need to check against 914 * the number of operations allowed by the semaphore. We only 915 * bother doing this if the number of operations is larger than 916 * SEM_MAXUCOPS. 917 */ 918 if (nsops == 1) 919 uops = &x_sem; 920 else if (nsops == 0) 921 return (0); 922 else if (nsops <= SEM_MAXUCOPS) 923 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 924 925 if (nsops > SEM_MAXUCOPS) { 926 if ((lock = ipc_lookup(sem_svc, semid, 927 (kipc_perm_t **)&sp)) == NULL) 928 return (set_errno(EFAULT)); 929 930 if (nsops > sp->sem_maxops) { 931 mutex_exit(lock); 932 return (set_errno(E2BIG)); 933 } 934 held = 1; 935 ipc_hold(sem_svc, (kipc_perm_t *)sp); 936 mutex_exit(lock); 937 938 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 939 if (copyin(sops, uops, nsops * sizeof (*op))) { 940 error = EFAULT; 941 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 942 goto semoperr; 943 } 944 945 lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 946 if (IPC_FREE(&sp->sem_perm)) { 947 error = EIDRM; 948 goto semoperr; 949 } 950 } else { 951 /* 952 * This could be interleaved with the above code, but 953 * keeping them separate improves readability. 954 */ 955 if (copyin(sops, uops, nsops * sizeof (*op))) { 956 error = EFAULT; 957 goto semoperr_unlocked; 958 } 959 960 if ((lock = ipc_lookup(sem_svc, semid, 961 (kipc_perm_t **)&sp)) == NULL) { 962 error = EINVAL; 963 goto semoperr_unlocked; 964 } 965 966 if (nsops > sp->sem_maxops) { 967 error = E2BIG; 968 goto semoperr; 969 } 970 } 971 972 /* 973 * Scan all operations. Verify that sem #s are in range and 974 * this process is allowed the requested operations. If any 975 * operations are marked SEM_UNDO, find (or allocate) the undo 976 * structure for this process and semaphore. 977 */ 978 needundo = 0; 979 mode = 0; 980 for (i = 0, op = uops; i++ < nsops; op++) { 981 mode |= op->sem_op ? SEM_A : SEM_R; 982 if (op->sem_num >= sp->sem_nsems) { 983 error = EFBIG; 984 goto semoperr; 985 } 986 if ((op->sem_flg & SEM_UNDO) && op->sem_op) 987 needundo = 1; 988 } 989 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 990 goto semoperr; 991 992 if (needundo) { 993 struct sem_undo template; 994 995 template.un_sp = sp; 996 mutex_enter(&pp->p_lock); 997 if (pp->p_semacct) 998 undo = avl_find(pp->p_semacct, &template, NULL); 999 else 1000 undo = NULL; 1001 mutex_exit(&pp->p_lock); 1002 if (undo == NULL) { 1003 if (error = sem_undo_alloc(pp, sp, &lock, &template, 1004 &undo)) 1005 goto semoperr; 1006 1007 /* sem_undo_alloc unlocks the semaphore */ 1008 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 1009 goto semoperr; 1010 } 1011 } 1012 1013 check: 1014 /* 1015 * Loop waiting for the operations to be satisfied atomically. 1016 * Actually, do the operations and undo them if a wait is needed 1017 * or an error is detected. 1018 */ 1019 for (i = 0; i < nsops; i++) { 1020 op = &uops[i]; 1021 semp = &sp->sem_base[op->sem_num]; 1022 1023 /* 1024 * Raise the semaphore (i.e. sema_v) 1025 */ 1026 if (op->sem_op > 0) { 1027 if (op->sem_op + (int)semp->semval > USHRT_MAX || 1028 ((op->sem_flg & SEM_UNDO) && 1029 (error = sem_undo_add(op->sem_op, op->sem_num, 1030 undo)))) { 1031 if (i) 1032 sem_rollback(sp, uops, i, undo); 1033 if (error == 0) 1034 error = ERANGE; 1035 goto semoperr; 1036 } 1037 semp->semval += op->sem_op; 1038 /* 1039 * If we are only incrementing the semaphore value 1040 * by one on a binary semaphore, we can cv_signal. 1041 */ 1042 if (semp->semncnt) { 1043 if (op->sem_op == 1 && sp->sem_binary) 1044 cv_signal(&semp->semncnt_cv); 1045 else 1046 cv_broadcast(&semp->semncnt_cv); 1047 } 1048 if (semp->semzcnt && !semp->semval) 1049 cv_broadcast(&semp->semzcnt_cv); 1050 continue; 1051 } 1052 1053 /* 1054 * Lower the semaphore (i.e. sema_p) 1055 */ 1056 if (op->sem_op < 0) { 1057 if (semp->semval >= (unsigned)(-op->sem_op)) { 1058 if ((op->sem_flg & SEM_UNDO) && 1059 (error = sem_undo_add(op->sem_op, 1060 op->sem_num, undo))) { 1061 if (i) 1062 sem_rollback(sp, uops, i, undo); 1063 goto semoperr; 1064 } 1065 semp->semval += op->sem_op; 1066 if (semp->semzcnt && !semp->semval) 1067 cv_broadcast(&semp->semzcnt_cv); 1068 continue; 1069 } 1070 if (i) 1071 sem_rollback(sp, uops, i, undo); 1072 if (op->sem_flg & IPC_NOWAIT) { 1073 error = EAGAIN; 1074 goto semoperr; 1075 } 1076 1077 /* 1078 * Mark the semaphore set as not a binary type 1079 * if we are decrementing the value by more than 1. 1080 * 1081 * V operations will resort to cv_broadcast 1082 * for this set because there are too many weird 1083 * cases that have to be caught. 1084 */ 1085 if (op->sem_op < -1) 1086 sp->sem_binary = 0; 1087 if (!held) { 1088 held = 1; 1089 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1090 } 1091 semp->semncnt++; 1092 cvres = cv_waituntil_sig(&semp->semncnt_cv, lock, 1093 tsp, timecheck); 1094 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1095 1096 if (!IPC_FREE(&sp->sem_perm)) { 1097 ASSERT(semp->semncnt != 0); 1098 semp->semncnt--; 1099 if (cvres > 0) /* normal wakeup */ 1100 goto check; 1101 } 1102 1103 /* EINTR or EAGAIN overrides EIDRM */ 1104 if (cvres == 0) 1105 error = EINTR; 1106 else if (cvres < 0) 1107 error = EAGAIN; 1108 else 1109 error = EIDRM; 1110 goto semoperr; 1111 } 1112 1113 /* 1114 * Wait for zero value 1115 */ 1116 if (semp->semval) { 1117 if (i) 1118 sem_rollback(sp, uops, i, undo); 1119 if (op->sem_flg & IPC_NOWAIT) { 1120 error = EAGAIN; 1121 goto semoperr; 1122 } 1123 1124 if (!held) { 1125 held = 1; 1126 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1127 } 1128 semp->semzcnt++; 1129 cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock, 1130 tsp, timecheck); 1131 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1132 1133 /* 1134 * Don't touch semp if the semaphores have been removed. 1135 */ 1136 if (!IPC_FREE(&sp->sem_perm)) { 1137 ASSERT(semp->semzcnt != 0); 1138 semp->semzcnt--; 1139 if (cvres > 0) /* normal wakeup */ 1140 goto check; 1141 } 1142 1143 /* EINTR or EAGAIN overrides EIDRM */ 1144 if (cvres == 0) 1145 error = EINTR; 1146 else if (cvres < 0) 1147 error = EAGAIN; 1148 else 1149 error = EIDRM; 1150 goto semoperr; 1151 } 1152 } 1153 1154 /* All operations succeeded. Update sempid for accessed semaphores. */ 1155 for (i = 0, op = uops; i++ < nsops; 1156 sp->sem_base[(op++)->sem_num].sempid = pp->p_pid) 1157 ; 1158 sp->sem_otime = gethrestime_sec(); 1159 if (held) 1160 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1161 else 1162 mutex_exit(lock); 1163 1164 /* Before leaving, deallocate the buffer that held the user semops */ 1165 if (nsops != 1) 1166 kmem_free(uops, sizeof (*uops) * nsops); 1167 return (0); 1168 1169 /* 1170 * Error return labels 1171 */ 1172 semoperr: 1173 if (held) 1174 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1175 else 1176 mutex_exit(lock); 1177 1178 semoperr_unlocked: 1179 1180 /* Before leaving, deallocate the buffer that held the user semops */ 1181 if (nsops != 1) 1182 kmem_free(uops, sizeof (*uops) * nsops); 1183 return (set_errno(error)); 1184 } 1185 1186 /* 1187 * semsys - System entry point for semctl, semget, and semop system calls. 1188 */ 1189 static int 1190 semsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4) 1191 { 1192 int error; 1193 1194 switch (opcode) { 1195 case SEMCTL: 1196 error = semctl((int)a1, (uint_t)a2, (int)a3, a4); 1197 break; 1198 case SEMGET: 1199 error = semget((key_t)a1, (int)a2, (int)a3); 1200 break; 1201 case SEMOP: 1202 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 0); 1203 break; 1204 case SEMIDS: 1205 error = semids((int *)a1, (uint_t)a2, (uint_t *)a3); 1206 break; 1207 case SEMTIMEDOP: 1208 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 1209 (timespec_t *)a4); 1210 break; 1211 default: 1212 error = set_errno(EINVAL); 1213 break; 1214 } 1215 return (error); 1216 } 1217