1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * Inter-Process Communication Semaphore Facility. 34 * 35 * See os/ipc.c for a description of common IPC functionality. 36 * 37 * Resource controls 38 * ----------------- 39 * 40 * Control: zone.max-sem-ids (rc_zone_semmni) 41 * Description: Maximum number of semaphore ids allowed a zone. 42 * 43 * When semget() is used to allocate a semaphore set, one id is 44 * allocated. If the id allocation doesn't succeed, semget() fails 45 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID) 46 * the id is deallocated. 47 * 48 * Control: project.max-sem-ids (rc_project_semmni) 49 * Description: Maximum number of semaphore ids allowed a project. 50 * 51 * When semget() is used to allocate a semaphore set, one id is 52 * allocated. If the id allocation doesn't succeed, semget() fails 53 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID) 54 * the id is deallocated. 55 * 56 * Control: process.max-sem-nsems (rc_process_semmsl) 57 * Description: Maximum number of semaphores allowed per semaphore set. 58 * 59 * When semget() is used to allocate a semaphore set, the size of the 60 * set is compared with this limit. If the number of semaphores 61 * exceeds the limit, semget() fails and errno is set to EINVAL. 62 * 63 * Control: process.max-sem-ops (rc_process_semopm) 64 * Description: Maximum number of semaphore operations allowed per 65 * semop call. 66 * 67 * When semget() successfully allocates a semaphore set, the minimum 68 * enforced value of this limit is used to initialize the 69 * "system-imposed maximum" number of operations a semop() call for 70 * this set can perform. 71 * 72 * Undo structures 73 * --------------- 74 * 75 * Removing the undo structure tunables involved a serious redesign of 76 * how they were implemented. There is now one undo structure for 77 * every process/semaphore array combination (lazily allocated, of 78 * course), and each is equal in size to the semaphore it corresponds 79 * to. To avoid scalability and performance problems, the undo 80 * structures are stored in two places: a per-process AVL tree sorted 81 * by ksemid pointer (p_semacct, protected by p_lock) and an unsorted 82 * per-semaphore linked list (sem_undos, protected by the semaphore's 83 * ID lock). The former is used by semop, where a lookup is performed 84 * once and cached if SEM_UNDO is specified for any of the operations, 85 * and at process exit where the undoable operations are rolled back. 86 * The latter is used when removing the semaphore, so the undo 87 * structures can be removed from the appropriate processes' trees. 88 * 89 * The undo structure itself contains pointers to the ksemid and proc 90 * to which it corresponds, a list node, an AVL node, and an array of 91 * adjust-on-exit (AOE) values. When an undo structure is allocated it 92 * is immediately added to both the process's tree and the semaphore's 93 * list. Lastly, the reference count on the semaphore is increased. 94 * 95 * Avoiding a lock ordering violation between p_lock and the ID lock, 96 * wont to occur when there is a race between a process exiting and the 97 * removal of a semaphore, mandates the delicate dance that exists 98 * between semexit and sem_rmid. 99 * 100 * sem_rmid, holding the ID lock, iterates through all undo structures 101 * and for each takes the appropriate process's p_lock and checks to 102 * see if p_semacct is NULL. If it is, it skips that undo structure 103 * and continues to the next. Otherwise, it removes the undo structure 104 * from both the AVL tree and the semaphore's list, and releases the 105 * hold that the undo structure had on the semaphore. 106 * 107 * The important other half of this is semexit, which will immediately 108 * take p_lock, obtain the AVL pointer, clear p_semacct, and drop 109 * p_lock. From this point on it is semexit's responsibility to clean 110 * up all undo structures found in the tree -- a coexecuting sem_rmid 111 * will see the NULL p_semacct and skip that undo structure. It walks 112 * the AVL tree (using avl_destroy_nodes) and for each undo structure 113 * takes the appropriate semaphore's ID lock (always legal since the 114 * undo structure has a hold on the semaphore), updates all semaphores 115 * with non-zero AOE values, and removes the structure from the 116 * semaphore's list. It then drops the structure's reference on the 117 * semaphore, drops the ID lock, and frees the undo structure. 118 */ 119 120 #include <sys/types.h> 121 #include <sys/t_lock.h> 122 #include <sys/param.h> 123 #include <sys/systm.h> 124 #include <sys/sysmacros.h> 125 #include <sys/cred.h> 126 #include <sys/vmem.h> 127 #include <sys/kmem.h> 128 #include <sys/errno.h> 129 #include <sys/time.h> 130 #include <sys/ipc.h> 131 #include <sys/ipc_impl.h> 132 #include <sys/sem.h> 133 #include <sys/sem_impl.h> 134 #include <sys/user.h> 135 #include <sys/proc.h> 136 #include <sys/cpuvar.h> 137 #include <sys/debug.h> 138 #include <sys/var.h> 139 #include <sys/cmn_err.h> 140 #include <sys/modctl.h> 141 #include <sys/syscall.h> 142 #include <sys/avl.h> 143 #include <sys/list.h> 144 #include <sys/zone.h> 145 146 #include <c2/audit.h> 147 148 extern rctl_hndl_t rc_zone_semmni; 149 extern rctl_hndl_t rc_project_semmni; 150 extern rctl_hndl_t rc_process_semmsl; 151 extern rctl_hndl_t rc_process_semopm; 152 static ipc_service_t *sem_svc; 153 static zone_key_t sem_zone_key; 154 155 /* 156 * The following tunables are obsolete. Though for compatibility we 157 * still read and interpret seminfo_semmsl, seminfo_semopm and 158 * seminfo_semmni (see os/project.c and os/rctl_proc.c), the preferred 159 * mechanism for administrating the IPC Semaphore facility is through 160 * the resource controls described at the top of this file. 161 */ 162 int seminfo_semaem = 16384; /* (obsolete) */ 163 int seminfo_semmap = 10; /* (obsolete) */ 164 int seminfo_semmni = 10; /* (obsolete) */ 165 int seminfo_semmns = 60; /* (obsolete) */ 166 int seminfo_semmnu = 30; /* (obsolete) */ 167 int seminfo_semmsl = 25; /* (obsolete) */ 168 int seminfo_semopm = 10; /* (obsolete) */ 169 int seminfo_semume = 10; /* (obsolete) */ 170 int seminfo_semusz = 96; /* (obsolete) */ 171 int seminfo_semvmx = 32767; /* (obsolete) */ 172 173 #define SEM_MAXUCOPS 4096 /* max # of unchecked ops per semop call */ 174 #define SEM_UNDOSZ(n) (sizeof (struct sem_undo) + (n - 1) * sizeof (int)) 175 176 static int semsys(int opcode, uintptr_t a0, uintptr_t a1, 177 uintptr_t a2, uintptr_t a3); 178 static void sem_dtor(kipc_perm_t *); 179 static void sem_rmid(kipc_perm_t *); 180 static void sem_remove_zone(zoneid_t, void *); 181 182 static struct sysent ipcsem_sysent = { 183 5, 184 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1, 185 semsys 186 }; 187 188 /* 189 * Module linkage information for the kernel. 190 */ 191 static struct modlsys modlsys = { 192 &mod_syscallops, "System V semaphore facility", &ipcsem_sysent 193 }; 194 195 #ifdef _SYSCALL32_IMPL 196 static struct modlsys modlsys32 = { 197 &mod_syscallops32, "32-bit System V semaphore facility", &ipcsem_sysent 198 }; 199 #endif 200 201 static struct modlinkage modlinkage = { 202 MODREV_1, 203 &modlsys, 204 #ifdef _SYSCALL32_IMPL 205 &modlsys32, 206 #endif 207 NULL 208 }; 209 210 211 int 212 _init(void) 213 { 214 int result; 215 216 sem_svc = ipcs_create("semids", rc_project_semmni, rc_zone_semmni, 217 sizeof (ksemid_t), sem_dtor, sem_rmid, AT_IPC_SEM, 218 offsetof(ipc_rqty_t, ipcq_semmni)); 219 zone_key_create(&sem_zone_key, NULL, sem_remove_zone, NULL); 220 221 if ((result = mod_install(&modlinkage)) == 0) 222 return (0); 223 224 (void) zone_key_delete(sem_zone_key); 225 ipcs_destroy(sem_svc); 226 227 return (result); 228 } 229 230 int 231 _fini(void) 232 { 233 return (EBUSY); 234 } 235 236 int 237 _info(struct modinfo *modinfop) 238 { 239 return (mod_info(&modlinkage, modinfop)); 240 } 241 242 static void 243 sem_dtor(kipc_perm_t *perm) 244 { 245 ksemid_t *sp = (ksemid_t *)perm; 246 247 kmem_free(sp->sem_base, 248 P2ROUNDUP(sp->sem_nsems * sizeof (struct sem), 64)); 249 list_destroy(&sp->sem_undos); 250 } 251 252 /* 253 * sem_undo_add - Create or update adjust on exit entry. 254 */ 255 static int 256 sem_undo_add(short val, ushort_t num, struct sem_undo *undo) 257 { 258 int newval = undo->un_aoe[num] - val; 259 260 if (newval > USHRT_MAX || newval < -USHRT_MAX) 261 return (ERANGE); 262 undo->un_aoe[num] = newval; 263 264 return (0); 265 } 266 267 /* 268 * sem_undo_clear - clears all undo entries for specified semaphores 269 * 270 * Used when semaphores are reset by SETVAL or SETALL. 271 */ 272 static void 273 sem_undo_clear(ksemid_t *sp, ushort_t low, ushort_t high) 274 { 275 struct sem_undo *undo; 276 int i; 277 278 ASSERT(low <= high); 279 ASSERT(high < sp->sem_nsems); 280 281 for (undo = list_head(&sp->sem_undos); undo; 282 undo = list_next(&sp->sem_undos, undo)) 283 for (i = low; i <= high; i++) 284 undo->un_aoe[i] = 0; 285 } 286 287 /* 288 * sem_rollback - roll back work done so far if unable to complete operation 289 */ 290 static void 291 sem_rollback(ksemid_t *sp, struct sembuf *op, int n, struct sem_undo *undo) 292 { 293 struct sem *semp; /* semaphore ptr */ 294 295 for (op += n - 1; n--; op--) { 296 if (op->sem_op == 0) 297 continue; 298 semp = &sp->sem_base[op->sem_num]; 299 semp->semval -= op->sem_op; 300 if (op->sem_flg & SEM_UNDO) { 301 ASSERT(undo != NULL); 302 (void) sem_undo_add(-op->sem_op, op->sem_num, undo); 303 } 304 } 305 } 306 307 static void 308 sem_rmid(kipc_perm_t *perm) 309 { 310 ksemid_t *sp = (ksemid_t *)perm; 311 struct sem *semp; 312 struct sem_undo *undo; 313 size_t size = SEM_UNDOSZ(sp->sem_nsems); 314 int i; 315 316 /*LINTED*/ 317 while (undo = list_head(&sp->sem_undos)) { 318 list_remove(&sp->sem_undos, undo); 319 mutex_enter(&undo->un_proc->p_lock); 320 if (undo->un_proc->p_semacct == NULL) { 321 mutex_exit(&undo->un_proc->p_lock); 322 continue; 323 } 324 avl_remove(undo->un_proc->p_semacct, undo); 325 mutex_exit(&undo->un_proc->p_lock); 326 kmem_free(undo, size); 327 ipc_rele_locked(sem_svc, (kipc_perm_t *)sp); 328 } 329 330 for (i = 0; i < sp->sem_nsems; i++) { 331 semp = &sp->sem_base[i]; 332 semp->semval = semp->sempid = 0; 333 if (semp->semncnt) { 334 cv_broadcast(&semp->semncnt_cv); 335 semp->semncnt = 0; 336 } 337 if (semp->semzcnt) { 338 cv_broadcast(&semp->semzcnt_cv); 339 semp->semzcnt = 0; 340 } 341 } 342 } 343 344 /* 345 * semctl - Semctl system call. 346 */ 347 static int 348 semctl(int semid, uint_t semnum, int cmd, uintptr_t arg) 349 { 350 ksemid_t *sp; /* ptr to semaphore header */ 351 struct sem *p; /* ptr to semaphore */ 352 unsigned int i; /* loop control */ 353 ushort_t *vals, *vp; 354 size_t vsize = 0; 355 int error = 0; 356 int retval = 0; 357 struct cred *cr; 358 kmutex_t *lock; 359 model_t mdl = get_udatamodel(); 360 STRUCT_DECL(semid_ds, sid); 361 struct semid_ds64 ds64; 362 363 STRUCT_INIT(sid, mdl); 364 cr = CRED(); 365 366 /* 367 * Perform pre- or non-lookup actions (e.g. copyins, RMID). 368 */ 369 switch (cmd) { 370 case IPC_SET: 371 if (copyin((void *)arg, STRUCT_BUF(sid), STRUCT_SIZE(sid))) 372 return (set_errno(EFAULT)); 373 break; 374 375 case IPC_SET64: 376 if (copyin((void *)arg, &ds64, sizeof (struct semid_ds64))) 377 return (set_errno(EFAULT)); 378 break; 379 380 case SETALL: 381 if ((lock = ipc_lookup(sem_svc, semid, 382 (kipc_perm_t **)&sp)) == NULL) 383 return (set_errno(EINVAL)); 384 vsize = sp->sem_nsems * sizeof (*vals); 385 mutex_exit(lock); 386 387 /* allocate space to hold all semaphore values */ 388 vals = kmem_alloc(vsize, KM_SLEEP); 389 390 if (copyin((void *)arg, vals, vsize)) { 391 kmem_free(vals, vsize); 392 return (set_errno(EFAULT)); 393 } 394 break; 395 396 case IPC_RMID: 397 if (error = ipc_rmid(sem_svc, semid, cr)) 398 return (set_errno(error)); 399 return (0); 400 } 401 402 if ((lock = ipc_lookup(sem_svc, semid, (kipc_perm_t **)&sp)) == NULL) { 403 if (vsize != 0) 404 kmem_free(vals, vsize); 405 return (set_errno(EINVAL)); 406 } 407 switch (cmd) { 408 /* Set ownership and permissions. */ 409 case IPC_SET: 410 411 if (error = ipcperm_set(sem_svc, cr, &sp->sem_perm, 412 &STRUCT_BUF(sid)->sem_perm, mdl)) { 413 mutex_exit(lock); 414 return (set_errno(error)); 415 } 416 sp->sem_ctime = gethrestime_sec(); 417 mutex_exit(lock); 418 return (0); 419 420 /* Get semaphore data structure. */ 421 case IPC_STAT: 422 423 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 424 mutex_exit(lock); 425 return (set_errno(error)); 426 } 427 428 ipcperm_stat(&STRUCT_BUF(sid)->sem_perm, &sp->sem_perm, mdl); 429 STRUCT_FSETP(sid, sem_base, NULL); /* kernel addr */ 430 STRUCT_FSET(sid, sem_nsems, sp->sem_nsems); 431 STRUCT_FSET(sid, sem_otime, sp->sem_otime); 432 STRUCT_FSET(sid, sem_ctime, sp->sem_ctime); 433 STRUCT_FSET(sid, sem_binary, sp->sem_binary); 434 mutex_exit(lock); 435 436 if (copyout(STRUCT_BUF(sid), (void *)arg, STRUCT_SIZE(sid))) 437 return (set_errno(EFAULT)); 438 return (0); 439 440 case IPC_SET64: 441 442 if (error = ipcperm_set64(sem_svc, cr, &sp->sem_perm, 443 &ds64.semx_perm)) { 444 mutex_exit(lock); 445 return (set_errno(error)); 446 } 447 sp->sem_ctime = gethrestime_sec(); 448 mutex_exit(lock); 449 return (0); 450 451 case IPC_STAT64: 452 453 ipcperm_stat64(&ds64.semx_perm, &sp->sem_perm); 454 ds64.semx_nsems = sp->sem_nsems; 455 ds64.semx_otime = sp->sem_otime; 456 ds64.semx_ctime = sp->sem_ctime; 457 458 mutex_exit(lock); 459 if (copyout(&ds64, (void *)arg, sizeof (struct semid_ds64))) 460 return (set_errno(EFAULT)); 461 462 return (0); 463 464 /* Get # of processes sleeping for greater semval. */ 465 case GETNCNT: 466 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 467 mutex_exit(lock); 468 return (set_errno(error)); 469 } 470 if (semnum >= sp->sem_nsems) { 471 mutex_exit(lock); 472 return (set_errno(EINVAL)); 473 } 474 retval = sp->sem_base[semnum].semncnt; 475 mutex_exit(lock); 476 return (retval); 477 478 /* Get pid of last process to operate on semaphore. */ 479 case GETPID: 480 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 481 mutex_exit(lock); 482 return (set_errno(error)); 483 } 484 if (semnum >= sp->sem_nsems) { 485 mutex_exit(lock); 486 return (set_errno(EINVAL)); 487 } 488 retval = sp->sem_base[semnum].sempid; 489 mutex_exit(lock); 490 return (retval); 491 492 /* Get semval of one semaphore. */ 493 case GETVAL: 494 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 495 mutex_exit(lock); 496 return (set_errno(error)); 497 } 498 if (semnum >= sp->sem_nsems) { 499 mutex_exit(lock); 500 return (set_errno(EINVAL)); 501 } 502 retval = sp->sem_base[semnum].semval; 503 mutex_exit(lock); 504 return (retval); 505 506 /* Get all semvals in set. */ 507 case GETALL: 508 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 509 mutex_exit(lock); 510 return (set_errno(error)); 511 } 512 513 /* allocate space to hold all semaphore values */ 514 vsize = sp->sem_nsems * sizeof (*vals); 515 vals = vp = kmem_alloc(vsize, KM_SLEEP); 516 517 for (i = sp->sem_nsems, p = sp->sem_base; i--; p++, vp++) 518 bcopy(&p->semval, vp, sizeof (p->semval)); 519 520 mutex_exit(lock); 521 522 if (copyout((void *)vals, (void *)arg, vsize)) { 523 kmem_free(vals, vsize); 524 return (set_errno(EFAULT)); 525 } 526 527 kmem_free(vals, vsize); 528 return (0); 529 530 /* Get # of processes sleeping for semval to become zero. */ 531 case GETZCNT: 532 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 533 mutex_exit(lock); 534 return (set_errno(error)); 535 } 536 if (semnum >= sp->sem_nsems) { 537 mutex_exit(lock); 538 return (set_errno(EINVAL)); 539 } 540 retval = sp->sem_base[semnum].semzcnt; 541 mutex_exit(lock); 542 return (retval); 543 544 /* Set semval of one semaphore. */ 545 case SETVAL: 546 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 547 mutex_exit(lock); 548 return (set_errno(error)); 549 } 550 if (semnum >= sp->sem_nsems) { 551 mutex_exit(lock); 552 return (set_errno(EINVAL)); 553 } 554 if ((uint_t)arg > USHRT_MAX) { 555 mutex_exit(lock); 556 return (set_errno(ERANGE)); 557 } 558 p = &sp->sem_base[semnum]; 559 if ((p->semval = (ushort_t)arg) != 0) { 560 if (p->semncnt) { 561 cv_broadcast(&p->semncnt_cv); 562 } 563 } else if (p->semzcnt) { 564 cv_broadcast(&p->semzcnt_cv); 565 } 566 p->sempid = curproc->p_pid; 567 sem_undo_clear(sp, (ushort_t)semnum, (ushort_t)semnum); 568 mutex_exit(lock); 569 return (0); 570 571 /* Set semvals of all semaphores in set. */ 572 case SETALL: 573 /* Check if semaphore set has been deleted and reallocated. */ 574 if (sp->sem_nsems * sizeof (*vals) != vsize) { 575 error = set_errno(EINVAL); 576 goto seterr; 577 } 578 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 579 error = set_errno(error); 580 goto seterr; 581 } 582 sem_undo_clear(sp, 0, sp->sem_nsems - 1); 583 for (i = 0, p = sp->sem_base; i < sp->sem_nsems; 584 (p++)->sempid = curproc->p_pid) { 585 if ((p->semval = vals[i++]) != 0) { 586 if (p->semncnt) { 587 cv_broadcast(&p->semncnt_cv); 588 } 589 } else if (p->semzcnt) { 590 cv_broadcast(&p->semzcnt_cv); 591 } 592 } 593 seterr: 594 mutex_exit(lock); 595 kmem_free(vals, vsize); 596 return (error); 597 598 default: 599 mutex_exit(lock); 600 return (set_errno(EINVAL)); 601 } 602 603 /* NOTREACHED */ 604 } 605 606 /* 607 * semexit - Called by exit() to clean up on process exit. 608 */ 609 void 610 semexit(proc_t *pp) 611 { 612 avl_tree_t *tree; 613 struct sem_undo *undo; 614 void *cookie = NULL; 615 616 mutex_enter(&pp->p_lock); 617 tree = pp->p_semacct; 618 pp->p_semacct = NULL; 619 mutex_exit(&pp->p_lock); 620 621 while (undo = avl_destroy_nodes(tree, &cookie)) { 622 ksemid_t *sp = undo->un_sp; 623 size_t size = SEM_UNDOSZ(sp->sem_nsems); 624 int i; 625 626 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 627 if (!IPC_FREE(&sp->sem_perm)) { 628 for (i = 0; i < sp->sem_nsems; i++) { 629 int adj = undo->un_aoe[i]; 630 if (adj) { 631 struct sem *semp = &sp->sem_base[i]; 632 int v = (int)semp->semval + adj; 633 634 if (v < 0 || v > USHRT_MAX) 635 continue; 636 semp->semval = (ushort_t)v; 637 if (v == 0 && semp->semzcnt) 638 cv_broadcast(&semp->semzcnt_cv); 639 if (adj > 0 && semp->semncnt) 640 cv_broadcast(&semp->semncnt_cv); 641 } 642 } 643 list_remove(&sp->sem_undos, undo); 644 } 645 ipc_rele(sem_svc, (kipc_perm_t *)sp); 646 kmem_free(undo, size); 647 } 648 649 avl_destroy(tree); 650 kmem_free(tree, sizeof (avl_tree_t)); 651 } 652 653 /* 654 * Remove all semaphores associated with a given zone. Called by 655 * zone_shutdown when the zone is halted. 656 */ 657 /*ARGSUSED1*/ 658 static void 659 sem_remove_zone(zoneid_t zoneid, void *arg) 660 { 661 ipc_remove_zone(sem_svc, zoneid); 662 } 663 664 /* 665 * semget - Semget system call. 666 */ 667 static int 668 semget(key_t key, int nsems, int semflg) 669 { 670 ksemid_t *sp; 671 kmutex_t *lock; 672 int id, error; 673 proc_t *pp = curproc; 674 675 top: 676 if (error = ipc_get(sem_svc, key, semflg, (kipc_perm_t **)&sp, &lock)) 677 return (set_errno(error)); 678 679 if (!IPC_FREE(&sp->sem_perm)) { 680 /* 681 * A semaphore with the requested key exists. 682 */ 683 if (!((nsems >= 0) && (nsems <= sp->sem_nsems))) { 684 mutex_exit(lock); 685 return (set_errno(EINVAL)); 686 } 687 } else { 688 /* 689 * This is a new semaphore set. Finish initialization. 690 */ 691 if (nsems <= 0 || (rctl_test(rc_process_semmsl, pp->p_rctls, pp, 692 nsems, RCA_SAFE) & RCT_DENY)) { 693 mutex_exit(lock); 694 mutex_exit(&pp->p_lock); 695 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 696 return (set_errno(EINVAL)); 697 } 698 mutex_exit(lock); 699 mutex_exit(&pp->p_lock); 700 701 /* 702 * We round the allocation up to coherency granularity 703 * so that multiple semaphore allocations won't result 704 * in the false sharing of their sem structures. 705 */ 706 sp->sem_base = 707 kmem_zalloc(P2ROUNDUP(nsems * sizeof (struct sem), 64), 708 KM_SLEEP); 709 sp->sem_binary = (nsems == 1); 710 sp->sem_nsems = (ushort_t)nsems; 711 sp->sem_ctime = gethrestime_sec(); 712 sp->sem_otime = 0; 713 list_create(&sp->sem_undos, sizeof (struct sem_undo), 714 offsetof(struct sem_undo, un_list)); 715 716 if (error = ipc_commit_begin(sem_svc, key, semflg, 717 (kipc_perm_t *)sp)) { 718 if (error == EAGAIN) 719 goto top; 720 return (set_errno(error)); 721 } 722 sp->sem_maxops = 723 rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp); 724 if (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems, 725 RCA_SAFE) & RCT_DENY) { 726 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 727 return (set_errno(EINVAL)); 728 } 729 lock = ipc_commit_end(sem_svc, &sp->sem_perm); 730 } 731 if (audit_active) 732 audit_ipcget(AT_IPC_SEM, (void *)sp); 733 id = sp->sem_perm.ipc_id; 734 mutex_exit(lock); 735 return (id); 736 } 737 738 /* 739 * semids system call. 740 */ 741 static int 742 semids(int *buf, uint_t nids, uint_t *pnids) 743 { 744 int error; 745 746 if (error = ipc_ids(sem_svc, buf, nids, pnids)) 747 return (set_errno(error)); 748 749 return (0); 750 } 751 752 753 /* 754 * Helper function for semop - copies in the provided timespec and 755 * computes the absolute future time after which we must return. 756 */ 757 static int 758 compute_timeout(timespec_t **tsp, timespec_t *ts, timespec_t *now, 759 timespec_t *timeout) 760 { 761 model_t datamodel = get_udatamodel(); 762 763 if (datamodel == DATAMODEL_NATIVE) { 764 if (copyin(timeout, ts, sizeof (timespec_t))) 765 return (EFAULT); 766 } else { 767 timespec32_t ts32; 768 769 if (copyin(timeout, &ts32, sizeof (timespec32_t))) 770 return (EFAULT); 771 TIMESPEC32_TO_TIMESPEC(ts, &ts32) 772 } 773 774 if (itimerspecfix(ts)) 775 return (EINVAL); 776 777 /* 778 * Convert the timespec value into absolute time. 779 */ 780 timespecadd(ts, now); 781 *tsp = ts; 782 783 return (0); 784 } 785 786 /* 787 * Undo structure comparator. We sort based on ksemid_t pointer. 788 */ 789 static int 790 sem_undo_compar(const void *x, const void *y) 791 { 792 struct sem_undo *undo1 = (struct sem_undo *)x; 793 struct sem_undo *undo2 = (struct sem_undo *)y; 794 795 if (undo1->un_sp < undo2->un_sp) 796 return (-1); 797 if (undo1->un_sp > undo2->un_sp) 798 return (1); 799 return (0); 800 } 801 802 /* 803 * Helper function for semop - creates an undo structure and adds it to 804 * the process's avl tree and the semaphore's list. 805 */ 806 static int 807 sem_undo_alloc(proc_t *pp, ksemid_t *sp, kmutex_t **lock, 808 struct sem_undo *template, struct sem_undo **un) 809 { 810 size_t size; 811 struct sem_undo *undo; 812 avl_tree_t *tree = NULL; 813 avl_index_t where; 814 815 mutex_exit(*lock); 816 817 size = SEM_UNDOSZ(sp->sem_nsems); 818 undo = kmem_zalloc(size, KM_SLEEP); 819 undo->un_proc = pp; 820 undo->un_sp = sp; 821 822 if (pp->p_semacct == NULL) 823 tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 824 825 *lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 826 if (IPC_FREE(&sp->sem_perm)) { 827 kmem_free(undo, size); 828 if (tree) 829 kmem_free(tree, sizeof (avl_tree_t)); 830 return (EIDRM); 831 } 832 833 mutex_enter(&pp->p_lock); 834 if (tree) { 835 if (pp->p_semacct == NULL) { 836 avl_create(tree, sem_undo_compar, 837 sizeof (struct sem_undo), 838 offsetof(struct sem_undo, un_avl)); 839 pp->p_semacct = tree; 840 } else { 841 kmem_free(tree, sizeof (avl_tree_t)); 842 } 843 } 844 845 if (*un = avl_find(pp->p_semacct, template, &where)) { 846 mutex_exit(&pp->p_lock); 847 kmem_free(undo, size); 848 } else { 849 *un = undo; 850 avl_insert(pp->p_semacct, undo, where); 851 mutex_exit(&pp->p_lock); 852 list_insert_head(&sp->sem_undos, undo); 853 ipc_hold(sem_svc, (kipc_perm_t *)sp); 854 } 855 856 857 return (0); 858 } 859 860 /* 861 * semop - Semop system call. 862 */ 863 static int 864 semop(int semid, struct sembuf *sops, size_t nsops, timespec_t *timeout) 865 { 866 ksemid_t *sp = NULL; 867 kmutex_t *lock; 868 struct sembuf *op; /* ptr to operation */ 869 int i; /* loop control */ 870 struct sem *semp; /* ptr to semaphore */ 871 int error = 0; 872 struct sembuf *uops; /* ptr to copy of user ops */ 873 struct sembuf x_sem; /* avoid kmem_alloc's */ 874 timespec_t now, ts, *tsp = NULL; 875 int timecheck = 0; 876 int cvres, needundo, mode; 877 struct sem_undo *undo; 878 proc_t *pp = curproc; 879 int held = 0; 880 881 CPU_STATS_ADDQ(CPU, sys, sema, 1); /* bump semaphore op count */ 882 883 /* 884 * To avoid the cost of copying in 'timeout' in the common 885 * case, we could only grab the time here and defer the copyin 886 * and associated computations until we are about to block. 887 * 888 * The down side to this is that we would then have to spin 889 * some goto top nonsense to avoid the copyin behind the semid 890 * lock. As a common use of timed semaphores is as an explicit 891 * blocking mechanism, this could incur a greater penalty. 892 * 893 * If we eventually decide that this would be a wise route to 894 * take, the deferrable functionality is completely contained 895 * in 'compute_timeout', and the interface is defined such that 896 * we can legally not validate 'timeout' if it is unused. 897 */ 898 if (timeout != NULL) { 899 timecheck = timechanged; 900 gethrestime(&now); 901 if (error = compute_timeout(&tsp, &ts, &now, timeout)) 902 return (set_errno(error)); 903 } 904 905 /* 906 * Allocate space to hold the vector of semaphore ops. If 907 * there is only 1 operation we use a preallocated buffer on 908 * the stack for speed. 909 * 910 * Since we don't want to allow the user to allocate an 911 * arbitrary amount of kernel memory, we need to check against 912 * the number of operations allowed by the semaphore. We only 913 * bother doing this if the number of operations is larger than 914 * SEM_MAXUCOPS. 915 */ 916 if (nsops == 1) 917 uops = &x_sem; 918 else if (nsops == 0) 919 return (0); 920 else if (nsops <= SEM_MAXUCOPS) 921 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 922 923 if (nsops > SEM_MAXUCOPS) { 924 if ((lock = ipc_lookup(sem_svc, semid, 925 (kipc_perm_t **)&sp)) == NULL) 926 return (set_errno(EFAULT)); 927 928 if (nsops > sp->sem_maxops) { 929 mutex_exit(lock); 930 return (set_errno(E2BIG)); 931 } 932 held = 1; 933 ipc_hold(sem_svc, (kipc_perm_t *)sp); 934 mutex_exit(lock); 935 936 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 937 if (copyin(sops, uops, nsops * sizeof (*op))) { 938 error = EFAULT; 939 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 940 goto semoperr; 941 } 942 943 lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 944 if (IPC_FREE(&sp->sem_perm)) { 945 error = EIDRM; 946 goto semoperr; 947 } 948 } else { 949 /* 950 * This could be interleaved with the above code, but 951 * keeping them separate improves readability. 952 */ 953 if (copyin(sops, uops, nsops * sizeof (*op))) { 954 error = EFAULT; 955 goto semoperr_unlocked; 956 } 957 958 if ((lock = ipc_lookup(sem_svc, semid, 959 (kipc_perm_t **)&sp)) == NULL) { 960 error = EINVAL; 961 goto semoperr_unlocked; 962 } 963 964 if (nsops > sp->sem_maxops) { 965 error = E2BIG; 966 goto semoperr; 967 } 968 } 969 970 /* 971 * Scan all operations. Verify that sem #s are in range and 972 * this process is allowed the requested operations. If any 973 * operations are marked SEM_UNDO, find (or allocate) the undo 974 * structure for this process and semaphore. 975 */ 976 needundo = 0; 977 mode = 0; 978 for (i = 0, op = uops; i++ < nsops; op++) { 979 mode |= op->sem_op ? SEM_A : SEM_R; 980 if (op->sem_num >= sp->sem_nsems) { 981 error = EFBIG; 982 goto semoperr; 983 } 984 if ((op->sem_flg & SEM_UNDO) && op->sem_op) 985 needundo = 1; 986 } 987 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 988 goto semoperr; 989 990 if (needundo) { 991 struct sem_undo template; 992 993 template.un_sp = sp; 994 mutex_enter(&pp->p_lock); 995 if (pp->p_semacct) 996 undo = avl_find(pp->p_semacct, &template, NULL); 997 else 998 undo = NULL; 999 mutex_exit(&pp->p_lock); 1000 if (undo == NULL) { 1001 if (error = sem_undo_alloc(pp, sp, &lock, &template, 1002 &undo)) 1003 goto semoperr; 1004 1005 /* sem_undo_alloc unlocks the semaphore */ 1006 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 1007 goto semoperr; 1008 } 1009 } 1010 1011 check: 1012 /* 1013 * Loop waiting for the operations to be satisfied atomically. 1014 * Actually, do the operations and undo them if a wait is needed 1015 * or an error is detected. 1016 */ 1017 for (i = 0; i < nsops; i++) { 1018 op = &uops[i]; 1019 semp = &sp->sem_base[op->sem_num]; 1020 1021 /* 1022 * Raise the semaphore (i.e. sema_v) 1023 */ 1024 if (op->sem_op > 0) { 1025 if (op->sem_op + (int)semp->semval > USHRT_MAX || 1026 ((op->sem_flg & SEM_UNDO) && 1027 (error = sem_undo_add(op->sem_op, op->sem_num, 1028 undo)))) { 1029 if (i) 1030 sem_rollback(sp, uops, i, undo); 1031 if (error == 0) 1032 error = ERANGE; 1033 goto semoperr; 1034 } 1035 semp->semval += op->sem_op; 1036 /* 1037 * If we are only incrementing the semaphore value 1038 * by one on a binary semaphore, we can cv_signal. 1039 */ 1040 if (semp->semncnt) { 1041 if (op->sem_op == 1 && sp->sem_binary) 1042 cv_signal(&semp->semncnt_cv); 1043 else 1044 cv_broadcast(&semp->semncnt_cv); 1045 } 1046 if (semp->semzcnt && !semp->semval) 1047 cv_broadcast(&semp->semzcnt_cv); 1048 continue; 1049 } 1050 1051 /* 1052 * Lower the semaphore (i.e. sema_p) 1053 */ 1054 if (op->sem_op < 0) { 1055 if (semp->semval >= (unsigned)(-op->sem_op)) { 1056 if ((op->sem_flg & SEM_UNDO) && 1057 (error = sem_undo_add(op->sem_op, 1058 op->sem_num, undo))) { 1059 if (i) 1060 sem_rollback(sp, uops, i, undo); 1061 goto semoperr; 1062 } 1063 semp->semval += op->sem_op; 1064 if (semp->semzcnt && !semp->semval) 1065 cv_broadcast(&semp->semzcnt_cv); 1066 continue; 1067 } 1068 if (i) 1069 sem_rollback(sp, uops, i, undo); 1070 if (op->sem_flg & IPC_NOWAIT) { 1071 error = EAGAIN; 1072 goto semoperr; 1073 } 1074 1075 /* 1076 * Mark the semaphore set as not a binary type 1077 * if we are decrementing the value by more than 1. 1078 * 1079 * V operations will resort to cv_broadcast 1080 * for this set because there are too many weird 1081 * cases that have to be caught. 1082 */ 1083 if (op->sem_op < -1) 1084 sp->sem_binary = 0; 1085 if (!held) { 1086 held = 1; 1087 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1088 } 1089 semp->semncnt++; 1090 cvres = cv_waituntil_sig(&semp->semncnt_cv, lock, 1091 tsp, timecheck); 1092 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1093 1094 if (!IPC_FREE(&sp->sem_perm)) { 1095 ASSERT(semp->semncnt != 0); 1096 semp->semncnt--; 1097 if (cvres > 0) /* normal wakeup */ 1098 goto check; 1099 } 1100 1101 /* EINTR or EAGAIN overrides EIDRM */ 1102 if (cvres == 0) 1103 error = EINTR; 1104 else if (cvres < 0) 1105 error = EAGAIN; 1106 else 1107 error = EIDRM; 1108 goto semoperr; 1109 } 1110 1111 /* 1112 * Wait for zero value 1113 */ 1114 if (semp->semval) { 1115 if (i) 1116 sem_rollback(sp, uops, i, undo); 1117 if (op->sem_flg & IPC_NOWAIT) { 1118 error = EAGAIN; 1119 goto semoperr; 1120 } 1121 1122 if (!held) { 1123 held = 1; 1124 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1125 } 1126 semp->semzcnt++; 1127 cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock, 1128 tsp, timecheck); 1129 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1130 1131 /* 1132 * Don't touch semp if the semaphores have been removed. 1133 */ 1134 if (!IPC_FREE(&sp->sem_perm)) { 1135 ASSERT(semp->semzcnt != 0); 1136 semp->semzcnt--; 1137 if (cvres > 0) /* normal wakeup */ 1138 goto check; 1139 } 1140 1141 /* EINTR or EAGAIN overrides EIDRM */ 1142 if (cvres == 0) 1143 error = EINTR; 1144 else if (cvres < 0) 1145 error = EAGAIN; 1146 else 1147 error = EIDRM; 1148 goto semoperr; 1149 } 1150 } 1151 1152 /* All operations succeeded. Update sempid for accessed semaphores. */ 1153 for (i = 0, op = uops; i++ < nsops; 1154 sp->sem_base[(op++)->sem_num].sempid = pp->p_pid) 1155 ; 1156 sp->sem_otime = gethrestime_sec(); 1157 if (held) 1158 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1159 else 1160 mutex_exit(lock); 1161 1162 /* Before leaving, deallocate the buffer that held the user semops */ 1163 if (nsops != 1) 1164 kmem_free(uops, sizeof (*uops) * nsops); 1165 return (0); 1166 1167 /* 1168 * Error return labels 1169 */ 1170 semoperr: 1171 if (held) 1172 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1173 else 1174 mutex_exit(lock); 1175 1176 semoperr_unlocked: 1177 1178 /* Before leaving, deallocate the buffer that held the user semops */ 1179 if (nsops != 1) 1180 kmem_free(uops, sizeof (*uops) * nsops); 1181 return (set_errno(error)); 1182 } 1183 1184 /* 1185 * semsys - System entry point for semctl, semget, and semop system calls. 1186 */ 1187 static int 1188 semsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4) 1189 { 1190 int error; 1191 1192 switch (opcode) { 1193 case SEMCTL: 1194 error = semctl((int)a1, (uint_t)a2, (int)a3, a4); 1195 break; 1196 case SEMGET: 1197 error = semget((key_t)a1, (int)a2, (int)a3); 1198 break; 1199 case SEMOP: 1200 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 0); 1201 break; 1202 case SEMIDS: 1203 error = semids((int *)a1, (uint_t)a2, (uint_t *)a3); 1204 break; 1205 case SEMTIMEDOP: 1206 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 1207 (timespec_t *)a4); 1208 break; 1209 default: 1210 error = set_errno(EINVAL); 1211 break; 1212 } 1213 return (error); 1214 } 1215