1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Inter-Process Communication Semaphore Facility. 31 * 32 * See os/ipc.c for a description of common IPC functionality. 33 * 34 * Resource controls 35 * ----------------- 36 * 37 * Control: zone.max-sem-ids (rc_zone_semmni) 38 * Description: Maximum number of semaphore ids allowed a zone. 39 * 40 * When semget() is used to allocate a semaphore set, one id is 41 * allocated. If the id allocation doesn't succeed, semget() fails 42 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID) 43 * the id is deallocated. 44 * 45 * Control: project.max-sem-ids (rc_project_semmni) 46 * Description: Maximum number of semaphore ids allowed a project. 47 * 48 * When semget() is used to allocate a semaphore set, one id is 49 * allocated. If the id allocation doesn't succeed, semget() fails 50 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID) 51 * the id is deallocated. 52 * 53 * Control: process.max-sem-nsems (rc_process_semmsl) 54 * Description: Maximum number of semaphores allowed per semaphore set. 55 * 56 * When semget() is used to allocate a semaphore set, the size of the 57 * set is compared with this limit. If the number of semaphores 58 * exceeds the limit, semget() fails and errno is set to EINVAL. 59 * 60 * Control: process.max-sem-ops (rc_process_semopm) 61 * Description: Maximum number of semaphore operations allowed per 62 * semop call. 63 * 64 * When semget() successfully allocates a semaphore set, the minimum 65 * enforced value of this limit is used to initialize the 66 * "system-imposed maximum" number of operations a semop() call for 67 * this set can perform. 68 * 69 * Undo structures 70 * --------------- 71 * 72 * Removing the undo structure tunables involved a serious redesign of 73 * how they were implemented. There is now one undo structure for 74 * every process/semaphore array combination (lazily allocated, of 75 * course), and each is equal in size to the semaphore it corresponds 76 * to. To avoid scalability and performance problems, the undo 77 * structures are stored in two places: a per-process AVL tree sorted 78 * by ksemid pointer (p_semacct, protected by p_lock) and an unsorted 79 * per-semaphore linked list (sem_undos, protected by the semaphore's 80 * ID lock). The former is used by semop, where a lookup is performed 81 * once and cached if SEM_UNDO is specified for any of the operations, 82 * and at process exit where the undoable operations are rolled back. 83 * The latter is used when removing the semaphore, so the undo 84 * structures can be removed from the appropriate processes' trees. 85 * 86 * The undo structure itself contains pointers to the ksemid and proc 87 * to which it corresponds, a list node, an AVL node, and an array of 88 * adjust-on-exit (AOE) values. When an undo structure is allocated it 89 * is immediately added to both the process's tree and the semaphore's 90 * list. Lastly, the reference count on the semaphore is increased. 91 * 92 * Avoiding a lock ordering violation between p_lock and the ID lock, 93 * wont to occur when there is a race between a process exiting and the 94 * removal of a semaphore, mandates the delicate dance that exists 95 * between semexit and sem_rmid. 96 * 97 * sem_rmid, holding the ID lock, iterates through all undo structures 98 * and for each takes the appropriate process's p_lock and checks to 99 * see if p_semacct is NULL. If it is, it skips that undo structure 100 * and continues to the next. Otherwise, it removes the undo structure 101 * from both the AVL tree and the semaphore's list, and releases the 102 * hold that the undo structure had on the semaphore. 103 * 104 * The important other half of this is semexit, which will immediately 105 * take p_lock, obtain the AVL pointer, clear p_semacct, and drop 106 * p_lock. From this point on it is semexit's responsibility to clean 107 * up all undo structures found in the tree -- a coexecuting sem_rmid 108 * will see the NULL p_semacct and skip that undo structure. It walks 109 * the AVL tree (using avl_destroy_nodes) and for each undo structure 110 * takes the appropriate semaphore's ID lock (always legal since the 111 * undo structure has a hold on the semaphore), updates all semaphores 112 * with non-zero AOE values, and removes the structure from the 113 * semaphore's list. It then drops the structure's reference on the 114 * semaphore, drops the ID lock, and frees the undo structure. 115 */ 116 117 #include <sys/types.h> 118 #include <sys/t_lock.h> 119 #include <sys/param.h> 120 #include <sys/systm.h> 121 #include <sys/sysmacros.h> 122 #include <sys/cred.h> 123 #include <sys/vmem.h> 124 #include <sys/kmem.h> 125 #include <sys/errno.h> 126 #include <sys/time.h> 127 #include <sys/ipc.h> 128 #include <sys/ipc_impl.h> 129 #include <sys/sem.h> 130 #include <sys/sem_impl.h> 131 #include <sys/user.h> 132 #include <sys/proc.h> 133 #include <sys/cpuvar.h> 134 #include <sys/debug.h> 135 #include <sys/var.h> 136 #include <sys/cmn_err.h> 137 #include <sys/modctl.h> 138 #include <sys/syscall.h> 139 #include <sys/avl.h> 140 #include <sys/list.h> 141 #include <sys/zone.h> 142 143 #include <c2/audit.h> 144 145 extern rctl_hndl_t rc_zone_semmni; 146 extern rctl_hndl_t rc_project_semmni; 147 extern rctl_hndl_t rc_process_semmsl; 148 extern rctl_hndl_t rc_process_semopm; 149 static ipc_service_t *sem_svc; 150 static zone_key_t sem_zone_key; 151 152 /* 153 * The following tunables are obsolete. Though for compatibility we 154 * still read and interpret seminfo_semmsl, seminfo_semopm and 155 * seminfo_semmni (see os/project.c and os/rctl_proc.c), the preferred 156 * mechanism for administrating the IPC Semaphore facility is through 157 * the resource controls described at the top of this file. 158 */ 159 int seminfo_semaem = 16384; /* (obsolete) */ 160 int seminfo_semmap = 10; /* (obsolete) */ 161 int seminfo_semmni = 10; /* (obsolete) */ 162 int seminfo_semmns = 60; /* (obsolete) */ 163 int seminfo_semmnu = 30; /* (obsolete) */ 164 int seminfo_semmsl = 25; /* (obsolete) */ 165 int seminfo_semopm = 10; /* (obsolete) */ 166 int seminfo_semume = 10; /* (obsolete) */ 167 int seminfo_semusz = 96; /* (obsolete) */ 168 int seminfo_semvmx = 32767; /* (obsolete) */ 169 170 #define SEM_MAXUCOPS 4096 /* max # of unchecked ops per semop call */ 171 #define SEM_UNDOSZ(n) (sizeof (struct sem_undo) + (n - 1) * sizeof (int)) 172 173 static int semsys(int opcode, uintptr_t a0, uintptr_t a1, 174 uintptr_t a2, uintptr_t a3); 175 static void sem_dtor(kipc_perm_t *); 176 static void sem_rmid(kipc_perm_t *); 177 static void sem_remove_zone(zoneid_t, void *); 178 179 static struct sysent ipcsem_sysent = { 180 5, 181 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1, 182 semsys 183 }; 184 185 /* 186 * Module linkage information for the kernel. 187 */ 188 static struct modlsys modlsys = { 189 &mod_syscallops, "System V semaphore facility", &ipcsem_sysent 190 }; 191 192 #ifdef _SYSCALL32_IMPL 193 static struct modlsys modlsys32 = { 194 &mod_syscallops32, "32-bit System V semaphore facility", &ipcsem_sysent 195 }; 196 #endif 197 198 static struct modlinkage modlinkage = { 199 MODREV_1, 200 &modlsys, 201 #ifdef _SYSCALL32_IMPL 202 &modlsys32, 203 #endif 204 NULL 205 }; 206 207 208 int 209 _init(void) 210 { 211 int result; 212 213 sem_svc = ipcs_create("semids", rc_project_semmni, rc_zone_semmni, 214 sizeof (ksemid_t), sem_dtor, sem_rmid, AT_IPC_SEM, 215 offsetof(ipc_rqty_t, ipcq_semmni)); 216 zone_key_create(&sem_zone_key, NULL, sem_remove_zone, NULL); 217 218 if ((result = mod_install(&modlinkage)) == 0) 219 return (0); 220 221 (void) zone_key_delete(sem_zone_key); 222 ipcs_destroy(sem_svc); 223 224 return (result); 225 } 226 227 int 228 _fini(void) 229 { 230 return (EBUSY); 231 } 232 233 int 234 _info(struct modinfo *modinfop) 235 { 236 return (mod_info(&modlinkage, modinfop)); 237 } 238 239 static void 240 sem_dtor(kipc_perm_t *perm) 241 { 242 ksemid_t *sp = (ksemid_t *)perm; 243 244 kmem_free(sp->sem_base, 245 P2ROUNDUP(sp->sem_nsems * sizeof (struct sem), 64)); 246 list_destroy(&sp->sem_undos); 247 } 248 249 /* 250 * sem_undo_add - Create or update adjust on exit entry. 251 */ 252 static int 253 sem_undo_add(short val, ushort_t num, struct sem_undo *undo) 254 { 255 int newval = undo->un_aoe[num] - val; 256 257 if (newval > USHRT_MAX || newval < -USHRT_MAX) 258 return (ERANGE); 259 undo->un_aoe[num] = newval; 260 261 return (0); 262 } 263 264 /* 265 * sem_undo_clear - clears all undo entries for specified semaphores 266 * 267 * Used when semaphores are reset by SETVAL or SETALL. 268 */ 269 static void 270 sem_undo_clear(ksemid_t *sp, ushort_t low, ushort_t high) 271 { 272 struct sem_undo *undo; 273 int i; 274 275 ASSERT(low <= high); 276 ASSERT(high < sp->sem_nsems); 277 278 for (undo = list_head(&sp->sem_undos); undo; 279 undo = list_next(&sp->sem_undos, undo)) 280 for (i = low; i <= high; i++) 281 undo->un_aoe[i] = 0; 282 } 283 284 /* 285 * sem_rollback - roll back work done so far if unable to complete operation 286 */ 287 static void 288 sem_rollback(ksemid_t *sp, struct sembuf *op, int n, struct sem_undo *undo) 289 { 290 struct sem *semp; /* semaphore ptr */ 291 292 for (op += n - 1; n--; op--) { 293 if (op->sem_op == 0) 294 continue; 295 semp = &sp->sem_base[op->sem_num]; 296 semp->semval -= op->sem_op; 297 if (op->sem_flg & SEM_UNDO) { 298 ASSERT(undo != NULL); 299 (void) sem_undo_add(-op->sem_op, op->sem_num, undo); 300 } 301 } 302 } 303 304 static void 305 sem_rmid(kipc_perm_t *perm) 306 { 307 ksemid_t *sp = (ksemid_t *)perm; 308 struct sem *semp; 309 struct sem_undo *undo; 310 size_t size = SEM_UNDOSZ(sp->sem_nsems); 311 int i; 312 313 /*LINTED*/ 314 while (undo = list_head(&sp->sem_undos)) { 315 list_remove(&sp->sem_undos, undo); 316 mutex_enter(&undo->un_proc->p_lock); 317 if (undo->un_proc->p_semacct == NULL) { 318 mutex_exit(&undo->un_proc->p_lock); 319 continue; 320 } 321 avl_remove(undo->un_proc->p_semacct, undo); 322 mutex_exit(&undo->un_proc->p_lock); 323 kmem_free(undo, size); 324 ipc_rele_locked(sem_svc, (kipc_perm_t *)sp); 325 } 326 327 for (i = 0; i < sp->sem_nsems; i++) { 328 semp = &sp->sem_base[i]; 329 semp->semval = semp->sempid = 0; 330 if (semp->semncnt) { 331 cv_broadcast(&semp->semncnt_cv); 332 semp->semncnt = 0; 333 } 334 if (semp->semzcnt) { 335 cv_broadcast(&semp->semzcnt_cv); 336 semp->semzcnt = 0; 337 } 338 } 339 } 340 341 /* 342 * semctl - Semctl system call. 343 */ 344 static int 345 semctl(int semid, uint_t semnum, int cmd, uintptr_t arg) 346 { 347 ksemid_t *sp; /* ptr to semaphore header */ 348 struct sem *p; /* ptr to semaphore */ 349 unsigned int i; /* loop control */ 350 ushort_t *vals, *vp; 351 size_t vsize = 0; 352 int error = 0; 353 int retval = 0; 354 struct cred *cr; 355 kmutex_t *lock; 356 model_t mdl = get_udatamodel(); 357 STRUCT_DECL(semid_ds, sid); 358 struct semid_ds64 ds64; 359 360 STRUCT_INIT(sid, mdl); 361 cr = CRED(); 362 363 /* 364 * Perform pre- or non-lookup actions (e.g. copyins, RMID). 365 */ 366 switch (cmd) { 367 case IPC_SET: 368 if (copyin((void *)arg, STRUCT_BUF(sid), STRUCT_SIZE(sid))) 369 return (set_errno(EFAULT)); 370 break; 371 372 case IPC_SET64: 373 if (copyin((void *)arg, &ds64, sizeof (struct semid_ds64))) 374 return (set_errno(EFAULT)); 375 break; 376 377 case SETALL: 378 if ((lock = ipc_lookup(sem_svc, semid, 379 (kipc_perm_t **)&sp)) == NULL) 380 return (set_errno(EINVAL)); 381 vsize = sp->sem_nsems * sizeof (*vals); 382 mutex_exit(lock); 383 384 /* allocate space to hold all semaphore values */ 385 vals = kmem_alloc(vsize, KM_SLEEP); 386 387 if (copyin((void *)arg, vals, vsize)) { 388 kmem_free(vals, vsize); 389 return (set_errno(EFAULT)); 390 } 391 break; 392 393 case IPC_RMID: 394 if (error = ipc_rmid(sem_svc, semid, cr)) 395 return (set_errno(error)); 396 return (0); 397 } 398 399 if ((lock = ipc_lookup(sem_svc, semid, (kipc_perm_t **)&sp)) == NULL) { 400 if (vsize != 0) 401 kmem_free(vals, vsize); 402 return (set_errno(EINVAL)); 403 } 404 switch (cmd) { 405 /* Set ownership and permissions. */ 406 case IPC_SET: 407 408 if (error = ipcperm_set(sem_svc, cr, &sp->sem_perm, 409 &STRUCT_BUF(sid)->sem_perm, mdl)) { 410 mutex_exit(lock); 411 return (set_errno(error)); 412 } 413 sp->sem_ctime = gethrestime_sec(); 414 mutex_exit(lock); 415 return (0); 416 417 /* Get semaphore data structure. */ 418 case IPC_STAT: 419 420 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 421 mutex_exit(lock); 422 return (set_errno(error)); 423 } 424 425 ipcperm_stat(&STRUCT_BUF(sid)->sem_perm, &sp->sem_perm, mdl); 426 STRUCT_FSETP(sid, sem_base, NULL); /* kernel addr */ 427 STRUCT_FSET(sid, sem_nsems, sp->sem_nsems); 428 STRUCT_FSET(sid, sem_otime, sp->sem_otime); 429 STRUCT_FSET(sid, sem_ctime, sp->sem_ctime); 430 STRUCT_FSET(sid, sem_binary, sp->sem_binary); 431 mutex_exit(lock); 432 433 if (copyout(STRUCT_BUF(sid), (void *)arg, STRUCT_SIZE(sid))) 434 return (set_errno(EFAULT)); 435 return (0); 436 437 case IPC_SET64: 438 439 if (error = ipcperm_set64(sem_svc, cr, &sp->sem_perm, 440 &ds64.semx_perm)) { 441 mutex_exit(lock); 442 return (set_errno(error)); 443 } 444 sp->sem_ctime = gethrestime_sec(); 445 mutex_exit(lock); 446 return (0); 447 448 case IPC_STAT64: 449 450 ipcperm_stat64(&ds64.semx_perm, &sp->sem_perm); 451 ds64.semx_nsems = sp->sem_nsems; 452 ds64.semx_otime = sp->sem_otime; 453 ds64.semx_ctime = sp->sem_ctime; 454 455 mutex_exit(lock); 456 if (copyout(&ds64, (void *)arg, sizeof (struct semid_ds64))) 457 return (set_errno(EFAULT)); 458 459 return (0); 460 461 /* Get # of processes sleeping for greater semval. */ 462 case GETNCNT: 463 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 464 mutex_exit(lock); 465 return (set_errno(error)); 466 } 467 if (semnum >= sp->sem_nsems) { 468 mutex_exit(lock); 469 return (set_errno(EINVAL)); 470 } 471 retval = sp->sem_base[semnum].semncnt; 472 mutex_exit(lock); 473 return (retval); 474 475 /* Get pid of last process to operate on semaphore. */ 476 case GETPID: 477 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 478 mutex_exit(lock); 479 return (set_errno(error)); 480 } 481 if (semnum >= sp->sem_nsems) { 482 mutex_exit(lock); 483 return (set_errno(EINVAL)); 484 } 485 retval = sp->sem_base[semnum].sempid; 486 mutex_exit(lock); 487 return (retval); 488 489 /* Get semval of one semaphore. */ 490 case GETVAL: 491 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 492 mutex_exit(lock); 493 return (set_errno(error)); 494 } 495 if (semnum >= sp->sem_nsems) { 496 mutex_exit(lock); 497 return (set_errno(EINVAL)); 498 } 499 retval = sp->sem_base[semnum].semval; 500 mutex_exit(lock); 501 return (retval); 502 503 /* Get all semvals in set. */ 504 case GETALL: 505 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 506 mutex_exit(lock); 507 return (set_errno(error)); 508 } 509 510 /* allocate space to hold all semaphore values */ 511 vsize = sp->sem_nsems * sizeof (*vals); 512 vals = vp = kmem_alloc(vsize, KM_SLEEP); 513 514 for (i = sp->sem_nsems, p = sp->sem_base; i--; p++, vp++) 515 bcopy(&p->semval, vp, sizeof (p->semval)); 516 517 mutex_exit(lock); 518 519 if (copyout((void *)vals, (void *)arg, vsize)) { 520 kmem_free(vals, vsize); 521 return (set_errno(EFAULT)); 522 } 523 524 kmem_free(vals, vsize); 525 return (0); 526 527 /* Get # of processes sleeping for semval to become zero. */ 528 case GETZCNT: 529 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 530 mutex_exit(lock); 531 return (set_errno(error)); 532 } 533 if (semnum >= sp->sem_nsems) { 534 mutex_exit(lock); 535 return (set_errno(EINVAL)); 536 } 537 retval = sp->sem_base[semnum].semzcnt; 538 mutex_exit(lock); 539 return (retval); 540 541 /* Set semval of one semaphore. */ 542 case SETVAL: 543 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 544 mutex_exit(lock); 545 return (set_errno(error)); 546 } 547 if (semnum >= sp->sem_nsems) { 548 mutex_exit(lock); 549 return (set_errno(EINVAL)); 550 } 551 if ((uint_t)arg > USHRT_MAX) { 552 mutex_exit(lock); 553 return (set_errno(ERANGE)); 554 } 555 p = &sp->sem_base[semnum]; 556 if ((p->semval = (ushort_t)arg) != 0) { 557 if (p->semncnt) { 558 cv_broadcast(&p->semncnt_cv); 559 } 560 } else if (p->semzcnt) { 561 cv_broadcast(&p->semzcnt_cv); 562 } 563 p->sempid = curproc->p_pid; 564 sem_undo_clear(sp, (ushort_t)semnum, (ushort_t)semnum); 565 mutex_exit(lock); 566 return (0); 567 568 /* Set semvals of all semaphores in set. */ 569 case SETALL: 570 /* Check if semaphore set has been deleted and reallocated. */ 571 if (sp->sem_nsems * sizeof (*vals) != vsize) { 572 error = set_errno(EINVAL); 573 goto seterr; 574 } 575 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 576 error = set_errno(error); 577 goto seterr; 578 } 579 sem_undo_clear(sp, 0, sp->sem_nsems - 1); 580 for (i = 0, p = sp->sem_base; i < sp->sem_nsems; 581 (p++)->sempid = curproc->p_pid) { 582 if ((p->semval = vals[i++]) != 0) { 583 if (p->semncnt) { 584 cv_broadcast(&p->semncnt_cv); 585 } 586 } else if (p->semzcnt) { 587 cv_broadcast(&p->semzcnt_cv); 588 } 589 } 590 seterr: 591 mutex_exit(lock); 592 kmem_free(vals, vsize); 593 return (error); 594 595 default: 596 mutex_exit(lock); 597 return (set_errno(EINVAL)); 598 } 599 600 /* NOTREACHED */ 601 } 602 603 /* 604 * semexit - Called by exit() to clean up on process exit. 605 */ 606 void 607 semexit(proc_t *pp) 608 { 609 avl_tree_t *tree; 610 struct sem_undo *undo; 611 void *cookie = NULL; 612 613 mutex_enter(&pp->p_lock); 614 tree = pp->p_semacct; 615 pp->p_semacct = NULL; 616 mutex_exit(&pp->p_lock); 617 618 while (undo = avl_destroy_nodes(tree, &cookie)) { 619 ksemid_t *sp = undo->un_sp; 620 size_t size = SEM_UNDOSZ(sp->sem_nsems); 621 int i; 622 623 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 624 if (!IPC_FREE(&sp->sem_perm)) { 625 for (i = 0; i < sp->sem_nsems; i++) { 626 int adj = undo->un_aoe[i]; 627 if (adj) { 628 struct sem *semp = &sp->sem_base[i]; 629 int v = (int)semp->semval + adj; 630 631 if (v < 0 || v > USHRT_MAX) 632 continue; 633 semp->semval = (ushort_t)v; 634 if (v == 0 && semp->semzcnt) 635 cv_broadcast(&semp->semzcnt_cv); 636 if (adj > 0 && semp->semncnt) 637 cv_broadcast(&semp->semncnt_cv); 638 } 639 } 640 list_remove(&sp->sem_undos, undo); 641 } 642 ipc_rele(sem_svc, (kipc_perm_t *)sp); 643 kmem_free(undo, size); 644 } 645 646 avl_destroy(tree); 647 kmem_free(tree, sizeof (avl_tree_t)); 648 } 649 650 /* 651 * Remove all semaphores associated with a given zone. Called by 652 * zone_shutdown when the zone is halted. 653 */ 654 /*ARGSUSED1*/ 655 static void 656 sem_remove_zone(zoneid_t zoneid, void *arg) 657 { 658 ipc_remove_zone(sem_svc, zoneid); 659 } 660 661 /* 662 * semget - Semget system call. 663 */ 664 static int 665 semget(key_t key, int nsems, int semflg) 666 { 667 ksemid_t *sp; 668 kmutex_t *lock; 669 int id, error; 670 proc_t *pp = curproc; 671 672 top: 673 if (error = ipc_get(sem_svc, key, semflg, (kipc_perm_t **)&sp, &lock)) 674 return (set_errno(error)); 675 676 if (!IPC_FREE(&sp->sem_perm)) { 677 /* 678 * A semaphore with the requested key exists. 679 */ 680 if (!((nsems >= 0) && (nsems <= sp->sem_nsems))) { 681 mutex_exit(lock); 682 return (set_errno(EINVAL)); 683 } 684 } else { 685 /* 686 * This is a new semaphore set. Finish initialization. 687 */ 688 if (nsems <= 0 || (rctl_test(rc_process_semmsl, pp->p_rctls, pp, 689 nsems, RCA_SAFE) & RCT_DENY)) { 690 mutex_exit(lock); 691 mutex_exit(&pp->p_lock); 692 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 693 return (set_errno(EINVAL)); 694 } 695 mutex_exit(lock); 696 mutex_exit(&pp->p_lock); 697 698 /* 699 * We round the allocation up to coherency granularity 700 * so that multiple semaphore allocations won't result 701 * in the false sharing of their sem structures. 702 */ 703 sp->sem_base = 704 kmem_zalloc(P2ROUNDUP(nsems * sizeof (struct sem), 64), 705 KM_SLEEP); 706 sp->sem_binary = (nsems == 1); 707 sp->sem_nsems = (ushort_t)nsems; 708 sp->sem_ctime = gethrestime_sec(); 709 sp->sem_otime = 0; 710 list_create(&sp->sem_undos, sizeof (struct sem_undo), 711 offsetof(struct sem_undo, un_list)); 712 713 if (error = ipc_commit_begin(sem_svc, key, semflg, 714 (kipc_perm_t *)sp)) { 715 if (error == EAGAIN) 716 goto top; 717 return (set_errno(error)); 718 } 719 sp->sem_maxops = 720 rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp); 721 if (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems, 722 RCA_SAFE) & RCT_DENY) { 723 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 724 return (set_errno(EINVAL)); 725 } 726 lock = ipc_commit_end(sem_svc, &sp->sem_perm); 727 } 728 if (audit_active) 729 audit_ipcget(AT_IPC_SEM, (void *)sp); 730 id = sp->sem_perm.ipc_id; 731 mutex_exit(lock); 732 return (id); 733 } 734 735 /* 736 * semids system call. 737 */ 738 static int 739 semids(int *buf, uint_t nids, uint_t *pnids) 740 { 741 int error; 742 743 if (error = ipc_ids(sem_svc, buf, nids, pnids)) 744 return (set_errno(error)); 745 746 return (0); 747 } 748 749 750 /* 751 * Helper function for semop - copies in the provided timespec and 752 * computes the absolute future time after which we must return. 753 */ 754 static int 755 compute_timeout(timespec_t **tsp, timespec_t *ts, timespec_t *now, 756 timespec_t *timeout) 757 { 758 model_t datamodel = get_udatamodel(); 759 760 if (datamodel == DATAMODEL_NATIVE) { 761 if (copyin(timeout, ts, sizeof (timespec_t))) 762 return (EFAULT); 763 } else { 764 timespec32_t ts32; 765 766 if (copyin(timeout, &ts32, sizeof (timespec32_t))) 767 return (EFAULT); 768 TIMESPEC32_TO_TIMESPEC(ts, &ts32) 769 } 770 771 if (itimerspecfix(ts)) 772 return (EINVAL); 773 774 /* 775 * Convert the timespec value into absolute time. 776 */ 777 timespecadd(ts, now); 778 *tsp = ts; 779 780 return (0); 781 } 782 783 /* 784 * Undo structure comparator. We sort based on ksemid_t pointer. 785 */ 786 static int 787 sem_undo_compar(const void *x, const void *y) 788 { 789 struct sem_undo *undo1 = (struct sem_undo *)x; 790 struct sem_undo *undo2 = (struct sem_undo *)y; 791 792 if (undo1->un_sp < undo2->un_sp) 793 return (-1); 794 if (undo1->un_sp > undo2->un_sp) 795 return (1); 796 return (0); 797 } 798 799 /* 800 * Helper function for semop - creates an undo structure and adds it to 801 * the process's avl tree and the semaphore's list. 802 */ 803 static int 804 sem_undo_alloc(proc_t *pp, ksemid_t *sp, kmutex_t **lock, 805 struct sem_undo *template, struct sem_undo **un) 806 { 807 size_t size; 808 struct sem_undo *undo; 809 avl_tree_t *tree = NULL; 810 avl_index_t where; 811 812 mutex_exit(*lock); 813 814 size = SEM_UNDOSZ(sp->sem_nsems); 815 undo = kmem_zalloc(size, KM_SLEEP); 816 undo->un_proc = pp; 817 undo->un_sp = sp; 818 819 if (pp->p_semacct == NULL) 820 tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 821 822 *lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 823 if (IPC_FREE(&sp->sem_perm)) { 824 kmem_free(undo, size); 825 if (tree) 826 kmem_free(tree, sizeof (avl_tree_t)); 827 return (EIDRM); 828 } 829 830 mutex_enter(&pp->p_lock); 831 if (tree) { 832 if (pp->p_semacct == NULL) { 833 avl_create(tree, sem_undo_compar, 834 sizeof (struct sem_undo), 835 offsetof(struct sem_undo, un_avl)); 836 pp->p_semacct = tree; 837 } else { 838 kmem_free(tree, sizeof (avl_tree_t)); 839 } 840 } 841 842 if (*un = avl_find(pp->p_semacct, template, &where)) { 843 mutex_exit(&pp->p_lock); 844 kmem_free(undo, size); 845 } else { 846 *un = undo; 847 avl_insert(pp->p_semacct, undo, where); 848 mutex_exit(&pp->p_lock); 849 list_insert_head(&sp->sem_undos, undo); 850 ipc_hold(sem_svc, (kipc_perm_t *)sp); 851 } 852 853 854 return (0); 855 } 856 857 /* 858 * semop - Semop system call. 859 */ 860 static int 861 semop(int semid, struct sembuf *sops, size_t nsops, timespec_t *timeout) 862 { 863 ksemid_t *sp = NULL; 864 kmutex_t *lock; 865 struct sembuf *op; /* ptr to operation */ 866 int i; /* loop control */ 867 struct sem *semp; /* ptr to semaphore */ 868 int error = 0; 869 struct sembuf *uops; /* ptr to copy of user ops */ 870 struct sembuf x_sem; /* avoid kmem_alloc's */ 871 timespec_t now, ts, *tsp = NULL; 872 int timecheck = 0; 873 int cvres, needundo, mode; 874 struct sem_undo *undo; 875 proc_t *pp = curproc; 876 int held = 0; 877 878 CPU_STATS_ADDQ(CPU, sys, sema, 1); /* bump semaphore op count */ 879 880 /* 881 * To avoid the cost of copying in 'timeout' in the common 882 * case, we could only grab the time here and defer the copyin 883 * and associated computations until we are about to block. 884 * 885 * The down side to this is that we would then have to spin 886 * some goto top nonsense to avoid the copyin behind the semid 887 * lock. As a common use of timed semaphores is as an explicit 888 * blocking mechanism, this could incur a greater penalty. 889 * 890 * If we eventually decide that this would be a wise route to 891 * take, the deferrable functionality is completely contained 892 * in 'compute_timeout', and the interface is defined such that 893 * we can legally not validate 'timeout' if it is unused. 894 */ 895 if (timeout != NULL) { 896 timecheck = timechanged; 897 gethrestime(&now); 898 if (error = compute_timeout(&tsp, &ts, &now, timeout)) 899 return (set_errno(error)); 900 } 901 902 /* 903 * Allocate space to hold the vector of semaphore ops. If 904 * there is only 1 operation we use a preallocated buffer on 905 * the stack for speed. 906 * 907 * Since we don't want to allow the user to allocate an 908 * arbitrary amount of kernel memory, we need to check against 909 * the number of operations allowed by the semaphore. We only 910 * bother doing this if the number of operations is larger than 911 * SEM_MAXUCOPS. 912 */ 913 if (nsops == 1) 914 uops = &x_sem; 915 else if (nsops == 0) 916 return (0); 917 else if (nsops <= SEM_MAXUCOPS) 918 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 919 920 if (nsops > SEM_MAXUCOPS) { 921 if ((lock = ipc_lookup(sem_svc, semid, 922 (kipc_perm_t **)&sp)) == NULL) 923 return (set_errno(EFAULT)); 924 925 if (nsops > sp->sem_maxops) { 926 mutex_exit(lock); 927 return (set_errno(E2BIG)); 928 } 929 held = 1; 930 ipc_hold(sem_svc, (kipc_perm_t *)sp); 931 mutex_exit(lock); 932 933 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 934 if (copyin(sops, uops, nsops * sizeof (*op))) { 935 error = EFAULT; 936 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 937 goto semoperr; 938 } 939 940 lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 941 if (IPC_FREE(&sp->sem_perm)) { 942 error = EIDRM; 943 goto semoperr; 944 } 945 } else { 946 /* 947 * This could be interleaved with the above code, but 948 * keeping them separate improves readability. 949 */ 950 if (copyin(sops, uops, nsops * sizeof (*op))) { 951 error = EFAULT; 952 goto semoperr_unlocked; 953 } 954 955 if ((lock = ipc_lookup(sem_svc, semid, 956 (kipc_perm_t **)&sp)) == NULL) { 957 error = EINVAL; 958 goto semoperr_unlocked; 959 } 960 961 if (nsops > sp->sem_maxops) { 962 error = E2BIG; 963 goto semoperr; 964 } 965 } 966 967 /* 968 * Scan all operations. Verify that sem #s are in range and 969 * this process is allowed the requested operations. If any 970 * operations are marked SEM_UNDO, find (or allocate) the undo 971 * structure for this process and semaphore. 972 */ 973 needundo = 0; 974 mode = 0; 975 for (i = 0, op = uops; i++ < nsops; op++) { 976 mode |= op->sem_op ? SEM_A : SEM_R; 977 if (op->sem_num >= sp->sem_nsems) { 978 error = EFBIG; 979 goto semoperr; 980 } 981 if ((op->sem_flg & SEM_UNDO) && op->sem_op) 982 needundo = 1; 983 } 984 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 985 goto semoperr; 986 987 if (needundo) { 988 struct sem_undo template; 989 990 template.un_sp = sp; 991 mutex_enter(&pp->p_lock); 992 if (pp->p_semacct) 993 undo = avl_find(pp->p_semacct, &template, NULL); 994 else 995 undo = NULL; 996 mutex_exit(&pp->p_lock); 997 if (undo == NULL) { 998 if (!held) { 999 held = 1; 1000 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1001 } 1002 if (error = sem_undo_alloc(pp, sp, &lock, &template, 1003 &undo)) 1004 goto semoperr; 1005 1006 /* sem_undo_alloc unlocks the semaphore */ 1007 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 1008 goto semoperr; 1009 } 1010 } 1011 1012 check: 1013 /* 1014 * Loop waiting for the operations to be satisfied atomically. 1015 * Actually, do the operations and undo them if a wait is needed 1016 * or an error is detected. 1017 */ 1018 for (i = 0; i < nsops; i++) { 1019 op = &uops[i]; 1020 semp = &sp->sem_base[op->sem_num]; 1021 1022 /* 1023 * Raise the semaphore (i.e. sema_v) 1024 */ 1025 if (op->sem_op > 0) { 1026 if (op->sem_op + (int)semp->semval > USHRT_MAX || 1027 ((op->sem_flg & SEM_UNDO) && 1028 (error = sem_undo_add(op->sem_op, op->sem_num, 1029 undo)))) { 1030 if (i) 1031 sem_rollback(sp, uops, i, undo); 1032 if (error == 0) 1033 error = ERANGE; 1034 goto semoperr; 1035 } 1036 semp->semval += op->sem_op; 1037 /* 1038 * If we are only incrementing the semaphore value 1039 * by one on a binary semaphore, we can cv_signal. 1040 */ 1041 if (semp->semncnt) { 1042 if (op->sem_op == 1 && sp->sem_binary) 1043 cv_signal(&semp->semncnt_cv); 1044 else 1045 cv_broadcast(&semp->semncnt_cv); 1046 } 1047 if (semp->semzcnt && !semp->semval) 1048 cv_broadcast(&semp->semzcnt_cv); 1049 continue; 1050 } 1051 1052 /* 1053 * Lower the semaphore (i.e. sema_p) 1054 */ 1055 if (op->sem_op < 0) { 1056 if (semp->semval >= (unsigned)(-op->sem_op)) { 1057 if ((op->sem_flg & SEM_UNDO) && 1058 (error = sem_undo_add(op->sem_op, 1059 op->sem_num, undo))) { 1060 if (i) 1061 sem_rollback(sp, uops, i, undo); 1062 goto semoperr; 1063 } 1064 semp->semval += op->sem_op; 1065 if (semp->semzcnt && !semp->semval) 1066 cv_broadcast(&semp->semzcnt_cv); 1067 continue; 1068 } 1069 if (i) 1070 sem_rollback(sp, uops, i, undo); 1071 if (op->sem_flg & IPC_NOWAIT) { 1072 error = EAGAIN; 1073 goto semoperr; 1074 } 1075 1076 /* 1077 * Mark the semaphore set as not a binary type 1078 * if we are decrementing the value by more than 1. 1079 * 1080 * V operations will resort to cv_broadcast 1081 * for this set because there are too many weird 1082 * cases that have to be caught. 1083 */ 1084 if (op->sem_op < -1) 1085 sp->sem_binary = 0; 1086 if (!held) { 1087 held = 1; 1088 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1089 } 1090 semp->semncnt++; 1091 cvres = cv_waituntil_sig(&semp->semncnt_cv, lock, 1092 tsp, timecheck); 1093 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1094 1095 if (!IPC_FREE(&sp->sem_perm)) { 1096 ASSERT(semp->semncnt != 0); 1097 semp->semncnt--; 1098 if (cvres > 0) /* normal wakeup */ 1099 goto check; 1100 } 1101 1102 /* EINTR or EAGAIN overrides EIDRM */ 1103 if (cvres == 0) 1104 error = EINTR; 1105 else if (cvres < 0) 1106 error = EAGAIN; 1107 else 1108 error = EIDRM; 1109 goto semoperr; 1110 } 1111 1112 /* 1113 * Wait for zero value 1114 */ 1115 if (semp->semval) { 1116 if (i) 1117 sem_rollback(sp, uops, i, undo); 1118 if (op->sem_flg & IPC_NOWAIT) { 1119 error = EAGAIN; 1120 goto semoperr; 1121 } 1122 1123 if (!held) { 1124 held = 1; 1125 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1126 } 1127 semp->semzcnt++; 1128 cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock, 1129 tsp, timecheck); 1130 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1131 1132 /* 1133 * Don't touch semp if the semaphores have been removed. 1134 */ 1135 if (!IPC_FREE(&sp->sem_perm)) { 1136 ASSERT(semp->semzcnt != 0); 1137 semp->semzcnt--; 1138 if (cvres > 0) /* normal wakeup */ 1139 goto check; 1140 } 1141 1142 /* EINTR or EAGAIN overrides EIDRM */ 1143 if (cvres == 0) 1144 error = EINTR; 1145 else if (cvres < 0) 1146 error = EAGAIN; 1147 else 1148 error = EIDRM; 1149 goto semoperr; 1150 } 1151 } 1152 1153 /* All operations succeeded. Update sempid for accessed semaphores. */ 1154 for (i = 0, op = uops; i++ < nsops; 1155 sp->sem_base[(op++)->sem_num].sempid = pp->p_pid) 1156 ; 1157 sp->sem_otime = gethrestime_sec(); 1158 if (held) 1159 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1160 else 1161 mutex_exit(lock); 1162 1163 /* Before leaving, deallocate the buffer that held the user semops */ 1164 if (nsops != 1) 1165 kmem_free(uops, sizeof (*uops) * nsops); 1166 return (0); 1167 1168 /* 1169 * Error return labels 1170 */ 1171 semoperr: 1172 if (held) 1173 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1174 else 1175 mutex_exit(lock); 1176 1177 semoperr_unlocked: 1178 1179 /* Before leaving, deallocate the buffer that held the user semops */ 1180 if (nsops != 1) 1181 kmem_free(uops, sizeof (*uops) * nsops); 1182 return (set_errno(error)); 1183 } 1184 1185 /* 1186 * semsys - System entry point for semctl, semget, and semop system calls. 1187 */ 1188 static int 1189 semsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4) 1190 { 1191 int error; 1192 1193 switch (opcode) { 1194 case SEMCTL: 1195 error = semctl((int)a1, (uint_t)a2, (int)a3, a4); 1196 break; 1197 case SEMGET: 1198 error = semget((key_t)a1, (int)a2, (int)a3); 1199 break; 1200 case SEMOP: 1201 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 0); 1202 break; 1203 case SEMIDS: 1204 error = semids((int *)a1, (uint_t)a2, (uint_t *)a3); 1205 break; 1206 case SEMTIMEDOP: 1207 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 1208 (timespec_t *)a4); 1209 break; 1210 default: 1211 error = set_errno(EINVAL); 1212 break; 1213 } 1214 return (error); 1215 } 1216