1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Inter-Process Communication Semaphore Facility. 31 * 32 * See os/ipc.c for a description of common IPC functionality. 33 * 34 * Resource controls 35 * ----------------- 36 * 37 * Control: zone.max-sem-ids (rc_zone_semmni) 38 * Description: Maximum number of semaphore ids allowed a zone. 39 * 40 * When semget() is used to allocate a semaphore set, one id is 41 * allocated. If the id allocation doesn't succeed, semget() fails 42 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID) 43 * the id is deallocated. 44 * 45 * Control: project.max-sem-ids (rc_project_semmni) 46 * Description: Maximum number of semaphore ids allowed a project. 47 * 48 * When semget() is used to allocate a semaphore set, one id is 49 * allocated. If the id allocation doesn't succeed, semget() fails 50 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID) 51 * the id is deallocated. 52 * 53 * Control: process.max-sem-nsems (rc_process_semmsl) 54 * Description: Maximum number of semaphores allowed per semaphore set. 55 * 56 * When semget() is used to allocate a semaphore set, the size of the 57 * set is compared with this limit. If the number of semaphores 58 * exceeds the limit, semget() fails and errno is set to EINVAL. 59 * 60 * Control: process.max-sem-ops (rc_process_semopm) 61 * Description: Maximum number of semaphore operations allowed per 62 * semop call. 63 * 64 * When semget() successfully allocates a semaphore set, the minimum 65 * enforced value of this limit is used to initialize the 66 * "system-imposed maximum" number of operations a semop() call for 67 * this set can perform. 68 * 69 * Undo structures 70 * --------------- 71 * 72 * Removing the undo structure tunables involved a serious redesign of 73 * how they were implemented. There is now one undo structure for 74 * every process/semaphore array combination (lazily allocated, of 75 * course), and each is equal in size to the semaphore it corresponds 76 * to. To avoid scalability and performance problems, the undo 77 * structures are stored in two places: a per-process AVL tree sorted 78 * by ksemid pointer (p_semacct, protected by p_lock) and an unsorted 79 * per-semaphore linked list (sem_undos, protected by the semaphore's 80 * ID lock). The former is used by semop, where a lookup is performed 81 * once and cached if SEM_UNDO is specified for any of the operations, 82 * and at process exit where the undoable operations are rolled back. 83 * The latter is used when removing the semaphore, so the undo 84 * structures can be removed from the appropriate processes' trees. 85 * 86 * The undo structure itself contains pointers to the ksemid and proc 87 * to which it corresponds, a list node, an AVL node, and an array of 88 * adjust-on-exit (AOE) values. When an undo structure is allocated it 89 * is immediately added to both the process's tree and the semaphore's 90 * list. Lastly, the reference count on the semaphore is increased. 91 * 92 * Avoiding a lock ordering violation between p_lock and the ID lock, 93 * wont to occur when there is a race between a process exiting and the 94 * removal of a semaphore, mandates the delicate dance that exists 95 * between semexit and sem_rmid. 96 * 97 * sem_rmid, holding the ID lock, iterates through all undo structures 98 * and for each takes the appropriate process's p_lock and checks to 99 * see if p_semacct is NULL. If it is, it skips that undo structure 100 * and continues to the next. Otherwise, it removes the undo structure 101 * from both the AVL tree and the semaphore's list, and releases the 102 * hold that the undo structure had on the semaphore. 103 * 104 * The important other half of this is semexit, which will immediately 105 * take p_lock, obtain the AVL pointer, clear p_semacct, and drop 106 * p_lock. From this point on it is semexit's responsibility to clean 107 * up all undo structures found in the tree -- a coexecuting sem_rmid 108 * will see the NULL p_semacct and skip that undo structure. It walks 109 * the AVL tree (using avl_destroy_nodes) and for each undo structure 110 * takes the appropriate semaphore's ID lock (always legal since the 111 * undo structure has a hold on the semaphore), updates all semaphores 112 * with non-zero AOE values, and removes the structure from the 113 * semaphore's list. It then drops the structure's reference on the 114 * semaphore, drops the ID lock, and frees the undo structure. 115 */ 116 117 #include <sys/types.h> 118 #include <sys/t_lock.h> 119 #include <sys/param.h> 120 #include <sys/systm.h> 121 #include <sys/sysmacros.h> 122 #include <sys/cred.h> 123 #include <sys/vmem.h> 124 #include <sys/kmem.h> 125 #include <sys/errno.h> 126 #include <sys/time.h> 127 #include <sys/ipc.h> 128 #include <sys/ipc_impl.h> 129 #include <sys/sem.h> 130 #include <sys/sem_impl.h> 131 #include <sys/user.h> 132 #include <sys/proc.h> 133 #include <sys/cpuvar.h> 134 #include <sys/debug.h> 135 #include <sys/var.h> 136 #include <sys/cmn_err.h> 137 #include <sys/modctl.h> 138 #include <sys/syscall.h> 139 #include <sys/avl.h> 140 #include <sys/list.h> 141 #include <sys/zone.h> 142 143 #include <c2/audit.h> 144 145 extern rctl_hndl_t rc_zone_semmni; 146 extern rctl_hndl_t rc_project_semmni; 147 extern rctl_hndl_t rc_process_semmsl; 148 extern rctl_hndl_t rc_process_semopm; 149 static ipc_service_t *sem_svc; 150 static zone_key_t sem_zone_key; 151 152 /* 153 * The following tunables are obsolete. Though for compatibility we 154 * still read and interpret seminfo_semmsl, seminfo_semopm and 155 * seminfo_semmni (see os/project.c and os/rctl_proc.c), the preferred 156 * mechanism for administrating the IPC Semaphore facility is through 157 * the resource controls described at the top of this file. 158 */ 159 int seminfo_semaem = 16384; /* (obsolete) */ 160 int seminfo_semmap = 10; /* (obsolete) */ 161 int seminfo_semmni = 10; /* (obsolete) */ 162 int seminfo_semmns = 60; /* (obsolete) */ 163 int seminfo_semmnu = 30; /* (obsolete) */ 164 int seminfo_semmsl = 25; /* (obsolete) */ 165 int seminfo_semopm = 10; /* (obsolete) */ 166 int seminfo_semume = 10; /* (obsolete) */ 167 int seminfo_semusz = 96; /* (obsolete) */ 168 int seminfo_semvmx = 32767; /* (obsolete) */ 169 170 #define SEM_MAXUCOPS 4096 /* max # of unchecked ops per semop call */ 171 #define SEM_UNDOSZ(n) (sizeof (struct sem_undo) + (n - 1) * sizeof (int)) 172 173 static int semsys(int opcode, uintptr_t a0, uintptr_t a1, 174 uintptr_t a2, uintptr_t a3); 175 static void sem_dtor(kipc_perm_t *); 176 static void sem_rmid(kipc_perm_t *); 177 static void sem_remove_zone(zoneid_t, void *); 178 179 static struct sysent ipcsem_sysent = { 180 5, 181 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1, 182 semsys 183 }; 184 185 /* 186 * Module linkage information for the kernel. 187 */ 188 static struct modlsys modlsys = { 189 &mod_syscallops, "System V semaphore facility", &ipcsem_sysent 190 }; 191 192 #ifdef _SYSCALL32_IMPL 193 static struct modlsys modlsys32 = { 194 &mod_syscallops32, "32-bit System V semaphore facility", &ipcsem_sysent 195 }; 196 #endif 197 198 static struct modlinkage modlinkage = { 199 MODREV_1, 200 &modlsys, 201 #ifdef _SYSCALL32_IMPL 202 &modlsys32, 203 #endif 204 NULL 205 }; 206 207 208 int 209 _init(void) 210 { 211 int result; 212 213 sem_svc = ipcs_create("semids", rc_project_semmni, rc_zone_semmni, 214 sizeof (ksemid_t), sem_dtor, sem_rmid, AT_IPC_SEM, 215 offsetof(ipc_rqty_t, ipcq_semmni)); 216 zone_key_create(&sem_zone_key, NULL, sem_remove_zone, NULL); 217 218 if ((result = mod_install(&modlinkage)) == 0) 219 return (0); 220 221 (void) zone_key_delete(sem_zone_key); 222 ipcs_destroy(sem_svc); 223 224 return (result); 225 } 226 227 int 228 _fini(void) 229 { 230 return (EBUSY); 231 } 232 233 int 234 _info(struct modinfo *modinfop) 235 { 236 return (mod_info(&modlinkage, modinfop)); 237 } 238 239 static void 240 sem_dtor(kipc_perm_t *perm) 241 { 242 ksemid_t *sp = (ksemid_t *)perm; 243 244 kmem_free(sp->sem_base, 245 P2ROUNDUP(sp->sem_nsems * sizeof (struct sem), 64)); 246 list_destroy(&sp->sem_undos); 247 } 248 249 /* 250 * sem_undo_add - Create or update adjust on exit entry. 251 */ 252 static int 253 sem_undo_add(short val, ushort_t num, struct sem_undo *undo) 254 { 255 int newval = undo->un_aoe[num] - val; 256 257 if (newval > USHRT_MAX || newval < -USHRT_MAX) 258 return (ERANGE); 259 undo->un_aoe[num] = newval; 260 261 return (0); 262 } 263 264 /* 265 * sem_undo_clear - clears all undo entries for specified semaphores 266 * 267 * Used when semaphores are reset by SETVAL or SETALL. 268 */ 269 static void 270 sem_undo_clear(ksemid_t *sp, ushort_t low, ushort_t high) 271 { 272 struct sem_undo *undo; 273 int i; 274 275 ASSERT(low <= high); 276 ASSERT(high < sp->sem_nsems); 277 278 for (undo = list_head(&sp->sem_undos); undo; 279 undo = list_next(&sp->sem_undos, undo)) 280 for (i = low; i <= high; i++) 281 undo->un_aoe[i] = 0; 282 } 283 284 /* 285 * sem_rollback - roll back work done so far if unable to complete operation 286 */ 287 static void 288 sem_rollback(ksemid_t *sp, struct sembuf *op, int n, struct sem_undo *undo) 289 { 290 struct sem *semp; /* semaphore ptr */ 291 292 for (op += n - 1; n--; op--) { 293 if (op->sem_op == 0) 294 continue; 295 semp = &sp->sem_base[op->sem_num]; 296 semp->semval -= op->sem_op; 297 if (op->sem_flg & SEM_UNDO) { 298 ASSERT(undo != NULL); 299 (void) sem_undo_add(-op->sem_op, op->sem_num, undo); 300 } 301 } 302 } 303 304 static void 305 sem_rmid(kipc_perm_t *perm) 306 { 307 ksemid_t *sp = (ksemid_t *)perm; 308 struct sem *semp; 309 struct sem_undo *undo; 310 size_t size = SEM_UNDOSZ(sp->sem_nsems); 311 int i; 312 313 /*LINTED*/ 314 while (undo = list_head(&sp->sem_undos)) { 315 list_remove(&sp->sem_undos, undo); 316 mutex_enter(&undo->un_proc->p_lock); 317 if (undo->un_proc->p_semacct == NULL) { 318 mutex_exit(&undo->un_proc->p_lock); 319 continue; 320 } 321 avl_remove(undo->un_proc->p_semacct, undo); 322 mutex_exit(&undo->un_proc->p_lock); 323 kmem_free(undo, size); 324 ipc_rele_locked(sem_svc, (kipc_perm_t *)sp); 325 } 326 327 for (i = 0; i < sp->sem_nsems; i++) { 328 semp = &sp->sem_base[i]; 329 semp->semval = semp->sempid = 0; 330 if (semp->semncnt) { 331 cv_broadcast(&semp->semncnt_cv); 332 semp->semncnt = 0; 333 } 334 if (semp->semzcnt) { 335 cv_broadcast(&semp->semzcnt_cv); 336 semp->semzcnt = 0; 337 } 338 } 339 } 340 341 /* 342 * semctl - Semctl system call. 343 */ 344 static int 345 semctl(int semid, uint_t semnum, int cmd, uintptr_t arg) 346 { 347 ksemid_t *sp; /* ptr to semaphore header */ 348 struct sem *p; /* ptr to semaphore */ 349 unsigned int i; /* loop control */ 350 ushort_t *vals, *vp; 351 size_t vsize = 0; 352 int error = 0; 353 int retval = 0; 354 struct cred *cr; 355 kmutex_t *lock; 356 model_t mdl = get_udatamodel(); 357 STRUCT_DECL(semid_ds, sid); 358 struct semid_ds64 ds64; 359 360 STRUCT_INIT(sid, mdl); 361 cr = CRED(); 362 363 /* 364 * Perform pre- or non-lookup actions (e.g. copyins, RMID). 365 */ 366 switch (cmd) { 367 case IPC_SET: 368 if (copyin((void *)arg, STRUCT_BUF(sid), STRUCT_SIZE(sid))) 369 return (set_errno(EFAULT)); 370 break; 371 372 case IPC_SET64: 373 if (copyin((void *)arg, &ds64, sizeof (struct semid_ds64))) 374 return (set_errno(EFAULT)); 375 break; 376 377 case SETALL: 378 if ((lock = ipc_lookup(sem_svc, semid, 379 (kipc_perm_t **)&sp)) == NULL) 380 return (set_errno(EINVAL)); 381 vsize = sp->sem_nsems * sizeof (*vals); 382 mutex_exit(lock); 383 384 /* allocate space to hold all semaphore values */ 385 vals = kmem_alloc(vsize, KM_SLEEP); 386 387 if (copyin((void *)arg, vals, vsize)) { 388 kmem_free(vals, vsize); 389 return (set_errno(EFAULT)); 390 } 391 break; 392 393 case IPC_RMID: 394 if (error = ipc_rmid(sem_svc, semid, cr)) 395 return (set_errno(error)); 396 return (0); 397 } 398 399 if ((lock = ipc_lookup(sem_svc, semid, (kipc_perm_t **)&sp)) == NULL) { 400 if (vsize != 0) 401 kmem_free(vals, vsize); 402 return (set_errno(EINVAL)); 403 } 404 switch (cmd) { 405 /* Set ownership and permissions. */ 406 case IPC_SET: 407 408 if (error = ipcperm_set(sem_svc, cr, &sp->sem_perm, 409 &STRUCT_BUF(sid)->sem_perm, mdl)) { 410 mutex_exit(lock); 411 return (set_errno(error)); 412 } 413 sp->sem_ctime = gethrestime_sec(); 414 mutex_exit(lock); 415 return (0); 416 417 /* Get semaphore data structure. */ 418 case IPC_STAT: 419 420 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 421 mutex_exit(lock); 422 return (set_errno(error)); 423 } 424 425 ipcperm_stat(&STRUCT_BUF(sid)->sem_perm, &sp->sem_perm, mdl); 426 STRUCT_FSETP(sid, sem_base, NULL); /* kernel addr */ 427 STRUCT_FSET(sid, sem_nsems, sp->sem_nsems); 428 STRUCT_FSET(sid, sem_otime, sp->sem_otime); 429 STRUCT_FSET(sid, sem_ctime, sp->sem_ctime); 430 STRUCT_FSET(sid, sem_binary, sp->sem_binary); 431 mutex_exit(lock); 432 433 if (copyout(STRUCT_BUF(sid), (void *)arg, STRUCT_SIZE(sid))) 434 return (set_errno(EFAULT)); 435 return (0); 436 437 case IPC_SET64: 438 439 if (error = ipcperm_set64(sem_svc, cr, &sp->sem_perm, 440 &ds64.semx_perm)) { 441 mutex_exit(lock); 442 return (set_errno(error)); 443 } 444 sp->sem_ctime = gethrestime_sec(); 445 mutex_exit(lock); 446 return (0); 447 448 case IPC_STAT64: 449 450 ipcperm_stat64(&ds64.semx_perm, &sp->sem_perm); 451 ds64.semx_nsems = sp->sem_nsems; 452 ds64.semx_otime = sp->sem_otime; 453 ds64.semx_ctime = sp->sem_ctime; 454 455 mutex_exit(lock); 456 if (copyout(&ds64, (void *)arg, sizeof (struct semid_ds64))) 457 return (set_errno(EFAULT)); 458 459 return (0); 460 461 /* Get # of processes sleeping for greater semval. */ 462 case GETNCNT: 463 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 464 mutex_exit(lock); 465 return (set_errno(error)); 466 } 467 if (semnum >= sp->sem_nsems) { 468 mutex_exit(lock); 469 return (set_errno(EINVAL)); 470 } 471 retval = sp->sem_base[semnum].semncnt; 472 mutex_exit(lock); 473 return (retval); 474 475 /* Get pid of last process to operate on semaphore. */ 476 case GETPID: 477 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 478 mutex_exit(lock); 479 return (set_errno(error)); 480 } 481 if (semnum >= sp->sem_nsems) { 482 mutex_exit(lock); 483 return (set_errno(EINVAL)); 484 } 485 retval = sp->sem_base[semnum].sempid; 486 mutex_exit(lock); 487 return (retval); 488 489 /* Get semval of one semaphore. */ 490 case GETVAL: 491 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 492 mutex_exit(lock); 493 return (set_errno(error)); 494 } 495 if (semnum >= sp->sem_nsems) { 496 mutex_exit(lock); 497 return (set_errno(EINVAL)); 498 } 499 retval = sp->sem_base[semnum].semval; 500 mutex_exit(lock); 501 return (retval); 502 503 /* Get all semvals in set. */ 504 case GETALL: 505 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 506 mutex_exit(lock); 507 return (set_errno(error)); 508 } 509 510 /* allocate space to hold all semaphore values */ 511 vsize = sp->sem_nsems * sizeof (*vals); 512 vals = vp = kmem_alloc(vsize, KM_SLEEP); 513 514 for (i = sp->sem_nsems, p = sp->sem_base; i--; p++, vp++) 515 bcopy(&p->semval, vp, sizeof (p->semval)); 516 517 mutex_exit(lock); 518 519 if (copyout((void *)vals, (void *)arg, vsize)) { 520 kmem_free(vals, vsize); 521 return (set_errno(EFAULT)); 522 } 523 524 kmem_free(vals, vsize); 525 return (0); 526 527 /* Get # of processes sleeping for semval to become zero. */ 528 case GETZCNT: 529 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 530 mutex_exit(lock); 531 return (set_errno(error)); 532 } 533 if (semnum >= sp->sem_nsems) { 534 mutex_exit(lock); 535 return (set_errno(EINVAL)); 536 } 537 retval = sp->sem_base[semnum].semzcnt; 538 mutex_exit(lock); 539 return (retval); 540 541 /* Set semval of one semaphore. */ 542 case SETVAL: 543 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 544 mutex_exit(lock); 545 return (set_errno(error)); 546 } 547 if (semnum >= sp->sem_nsems) { 548 mutex_exit(lock); 549 return (set_errno(EINVAL)); 550 } 551 if ((uint_t)arg > USHRT_MAX) { 552 mutex_exit(lock); 553 return (set_errno(ERANGE)); 554 } 555 p = &sp->sem_base[semnum]; 556 if ((p->semval = (ushort_t)arg) != 0) { 557 if (p->semncnt) { 558 cv_broadcast(&p->semncnt_cv); 559 } 560 } else if (p->semzcnt) { 561 cv_broadcast(&p->semzcnt_cv); 562 } 563 p->sempid = curproc->p_pid; 564 sem_undo_clear(sp, (ushort_t)semnum, (ushort_t)semnum); 565 mutex_exit(lock); 566 return (0); 567 568 /* Set semvals of all semaphores in set. */ 569 case SETALL: 570 /* Check if semaphore set has been deleted and reallocated. */ 571 if (sp->sem_nsems * sizeof (*vals) != vsize) { 572 error = set_errno(EINVAL); 573 goto seterr; 574 } 575 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 576 error = set_errno(error); 577 goto seterr; 578 } 579 sem_undo_clear(sp, 0, sp->sem_nsems - 1); 580 for (i = 0, p = sp->sem_base; i < sp->sem_nsems; 581 (p++)->sempid = curproc->p_pid) { 582 if ((p->semval = vals[i++]) != 0) { 583 if (p->semncnt) { 584 cv_broadcast(&p->semncnt_cv); 585 } 586 } else if (p->semzcnt) { 587 cv_broadcast(&p->semzcnt_cv); 588 } 589 } 590 seterr: 591 mutex_exit(lock); 592 kmem_free(vals, vsize); 593 return (error); 594 595 default: 596 mutex_exit(lock); 597 return (set_errno(EINVAL)); 598 } 599 600 /* NOTREACHED */ 601 } 602 603 /* 604 * semexit - Called by exit() to clean up on process exit. 605 */ 606 void 607 semexit(proc_t *pp) 608 { 609 avl_tree_t *tree; 610 struct sem_undo *undo; 611 void *cookie = NULL; 612 613 mutex_enter(&pp->p_lock); 614 tree = pp->p_semacct; 615 pp->p_semacct = NULL; 616 mutex_exit(&pp->p_lock); 617 618 while (undo = avl_destroy_nodes(tree, &cookie)) { 619 ksemid_t *sp = undo->un_sp; 620 size_t size = SEM_UNDOSZ(sp->sem_nsems); 621 int i; 622 623 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 624 if (!IPC_FREE(&sp->sem_perm)) { 625 for (i = 0; i < sp->sem_nsems; i++) { 626 int adj = undo->un_aoe[i]; 627 if (adj) { 628 struct sem *semp = &sp->sem_base[i]; 629 int v = (int)semp->semval + adj; 630 631 if (v < 0 || v > USHRT_MAX) 632 continue; 633 semp->semval = (ushort_t)v; 634 if (v == 0 && semp->semzcnt) 635 cv_broadcast(&semp->semzcnt_cv); 636 if (adj > 0 && semp->semncnt) 637 cv_broadcast(&semp->semncnt_cv); 638 } 639 } 640 list_remove(&sp->sem_undos, undo); 641 } 642 ipc_rele(sem_svc, (kipc_perm_t *)sp); 643 kmem_free(undo, size); 644 } 645 646 avl_destroy(tree); 647 kmem_free(tree, sizeof (avl_tree_t)); 648 } 649 650 /* 651 * Remove all semaphores associated with a given zone. Called by 652 * zone_shutdown when the zone is halted. 653 */ 654 /*ARGSUSED1*/ 655 static void 656 sem_remove_zone(zoneid_t zoneid, void *arg) 657 { 658 ipc_remove_zone(sem_svc, zoneid); 659 } 660 661 /* 662 * semget - Semget system call. 663 */ 664 static int 665 semget(key_t key, int nsems, int semflg) 666 { 667 ksemid_t *sp; 668 kmutex_t *lock; 669 int id, error; 670 proc_t *pp = curproc; 671 672 top: 673 if (error = ipc_get(sem_svc, key, semflg, (kipc_perm_t **)&sp, &lock)) 674 return (set_errno(error)); 675 676 if (!IPC_FREE(&sp->sem_perm)) { 677 /* 678 * A semaphore with the requested key exists. 679 */ 680 if (!((nsems >= 0) && (nsems <= sp->sem_nsems))) { 681 mutex_exit(lock); 682 return (set_errno(EINVAL)); 683 } 684 } else { 685 /* 686 * This is a new semaphore set. Finish initialization. 687 */ 688 if (nsems <= 0 || (rctl_test(rc_process_semmsl, pp->p_rctls, pp, 689 nsems, RCA_SAFE) & RCT_DENY)) { 690 mutex_exit(lock); 691 mutex_exit(&pp->p_lock); 692 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 693 return (set_errno(EINVAL)); 694 } 695 mutex_exit(lock); 696 mutex_exit(&pp->p_lock); 697 698 /* 699 * We round the allocation up to coherency granularity 700 * so that multiple semaphore allocations won't result 701 * in the false sharing of their sem structures. 702 */ 703 sp->sem_base = 704 kmem_zalloc(P2ROUNDUP(nsems * sizeof (struct sem), 64), 705 KM_SLEEP); 706 sp->sem_binary = (nsems == 1); 707 sp->sem_nsems = (ushort_t)nsems; 708 sp->sem_ctime = gethrestime_sec(); 709 sp->sem_otime = 0; 710 list_create(&sp->sem_undos, sizeof (struct sem_undo), 711 offsetof(struct sem_undo, un_list)); 712 713 if (error = ipc_commit_begin(sem_svc, key, semflg, 714 (kipc_perm_t *)sp)) { 715 if (error == EAGAIN) 716 goto top; 717 return (set_errno(error)); 718 } 719 sp->sem_maxops = 720 rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp); 721 if (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems, 722 RCA_SAFE) & RCT_DENY) { 723 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 724 return (set_errno(EINVAL)); 725 } 726 lock = ipc_commit_end(sem_svc, &sp->sem_perm); 727 } 728 729 if (AU_AUDITING()) 730 audit_ipcget(AT_IPC_SEM, (void *)sp); 731 732 id = sp->sem_perm.ipc_id; 733 mutex_exit(lock); 734 return (id); 735 } 736 737 /* 738 * semids system call. 739 */ 740 static int 741 semids(int *buf, uint_t nids, uint_t *pnids) 742 { 743 int error; 744 745 if (error = ipc_ids(sem_svc, buf, nids, pnids)) 746 return (set_errno(error)); 747 748 return (0); 749 } 750 751 752 /* 753 * Helper function for semop - copies in the provided timespec and 754 * computes the absolute future time after which we must return. 755 */ 756 static int 757 compute_timeout(timespec_t **tsp, timespec_t *ts, timespec_t *now, 758 timespec_t *timeout) 759 { 760 model_t datamodel = get_udatamodel(); 761 762 if (datamodel == DATAMODEL_NATIVE) { 763 if (copyin(timeout, ts, sizeof (timespec_t))) 764 return (EFAULT); 765 } else { 766 timespec32_t ts32; 767 768 if (copyin(timeout, &ts32, sizeof (timespec32_t))) 769 return (EFAULT); 770 TIMESPEC32_TO_TIMESPEC(ts, &ts32) 771 } 772 773 if (itimerspecfix(ts)) 774 return (EINVAL); 775 776 /* 777 * Convert the timespec value into absolute time. 778 */ 779 timespecadd(ts, now); 780 *tsp = ts; 781 782 return (0); 783 } 784 785 /* 786 * Undo structure comparator. We sort based on ksemid_t pointer. 787 */ 788 static int 789 sem_undo_compar(const void *x, const void *y) 790 { 791 struct sem_undo *undo1 = (struct sem_undo *)x; 792 struct sem_undo *undo2 = (struct sem_undo *)y; 793 794 if (undo1->un_sp < undo2->un_sp) 795 return (-1); 796 if (undo1->un_sp > undo2->un_sp) 797 return (1); 798 return (0); 799 } 800 801 /* 802 * Helper function for semop - creates an undo structure and adds it to 803 * the process's avl tree and the semaphore's list. 804 */ 805 static int 806 sem_undo_alloc(proc_t *pp, ksemid_t *sp, kmutex_t **lock, 807 struct sem_undo *template, struct sem_undo **un) 808 { 809 size_t size; 810 struct sem_undo *undo; 811 avl_tree_t *tree = NULL; 812 avl_index_t where; 813 814 mutex_exit(*lock); 815 816 size = SEM_UNDOSZ(sp->sem_nsems); 817 undo = kmem_zalloc(size, KM_SLEEP); 818 undo->un_proc = pp; 819 undo->un_sp = sp; 820 821 if (pp->p_semacct == NULL) 822 tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 823 824 *lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 825 if (IPC_FREE(&sp->sem_perm)) { 826 kmem_free(undo, size); 827 if (tree) 828 kmem_free(tree, sizeof (avl_tree_t)); 829 return (EIDRM); 830 } 831 832 mutex_enter(&pp->p_lock); 833 if (tree) { 834 if (pp->p_semacct == NULL) { 835 avl_create(tree, sem_undo_compar, 836 sizeof (struct sem_undo), 837 offsetof(struct sem_undo, un_avl)); 838 pp->p_semacct = tree; 839 } else { 840 kmem_free(tree, sizeof (avl_tree_t)); 841 } 842 } 843 844 if (*un = avl_find(pp->p_semacct, template, &where)) { 845 mutex_exit(&pp->p_lock); 846 kmem_free(undo, size); 847 } else { 848 *un = undo; 849 avl_insert(pp->p_semacct, undo, where); 850 mutex_exit(&pp->p_lock); 851 list_insert_head(&sp->sem_undos, undo); 852 ipc_hold(sem_svc, (kipc_perm_t *)sp); 853 } 854 855 856 return (0); 857 } 858 859 /* 860 * semop - Semop system call. 861 */ 862 static int 863 semop(int semid, struct sembuf *sops, size_t nsops, timespec_t *timeout) 864 { 865 ksemid_t *sp = NULL; 866 kmutex_t *lock; 867 struct sembuf *op; /* ptr to operation */ 868 int i; /* loop control */ 869 struct sem *semp; /* ptr to semaphore */ 870 int error = 0; 871 struct sembuf *uops; /* ptr to copy of user ops */ 872 struct sembuf x_sem; /* avoid kmem_alloc's */ 873 timespec_t now, ts, *tsp = NULL; 874 int timecheck = 0; 875 int cvres, needundo, mode; 876 struct sem_undo *undo; 877 proc_t *pp = curproc; 878 int held = 0; 879 880 CPU_STATS_ADDQ(CPU, sys, sema, 1); /* bump semaphore op count */ 881 882 /* 883 * To avoid the cost of copying in 'timeout' in the common 884 * case, we could only grab the time here and defer the copyin 885 * and associated computations until we are about to block. 886 * 887 * The down side to this is that we would then have to spin 888 * some goto top nonsense to avoid the copyin behind the semid 889 * lock. As a common use of timed semaphores is as an explicit 890 * blocking mechanism, this could incur a greater penalty. 891 * 892 * If we eventually decide that this would be a wise route to 893 * take, the deferrable functionality is completely contained 894 * in 'compute_timeout', and the interface is defined such that 895 * we can legally not validate 'timeout' if it is unused. 896 */ 897 if (timeout != NULL) { 898 timecheck = timechanged; 899 gethrestime(&now); 900 if (error = compute_timeout(&tsp, &ts, &now, timeout)) 901 return (set_errno(error)); 902 } 903 904 /* 905 * Allocate space to hold the vector of semaphore ops. If 906 * there is only 1 operation we use a preallocated buffer on 907 * the stack for speed. 908 * 909 * Since we don't want to allow the user to allocate an 910 * arbitrary amount of kernel memory, we need to check against 911 * the number of operations allowed by the semaphore. We only 912 * bother doing this if the number of operations is larger than 913 * SEM_MAXUCOPS. 914 */ 915 if (nsops == 1) 916 uops = &x_sem; 917 else if (nsops == 0) 918 return (0); 919 else if (nsops <= SEM_MAXUCOPS) 920 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 921 922 if (nsops > SEM_MAXUCOPS) { 923 if ((lock = ipc_lookup(sem_svc, semid, 924 (kipc_perm_t **)&sp)) == NULL) 925 return (set_errno(EFAULT)); 926 927 if (nsops > sp->sem_maxops) { 928 mutex_exit(lock); 929 return (set_errno(E2BIG)); 930 } 931 held = 1; 932 ipc_hold(sem_svc, (kipc_perm_t *)sp); 933 mutex_exit(lock); 934 935 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 936 if (copyin(sops, uops, nsops * sizeof (*op))) { 937 error = EFAULT; 938 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 939 goto semoperr; 940 } 941 942 lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 943 if (IPC_FREE(&sp->sem_perm)) { 944 error = EIDRM; 945 goto semoperr; 946 } 947 } else { 948 /* 949 * This could be interleaved with the above code, but 950 * keeping them separate improves readability. 951 */ 952 if (copyin(sops, uops, nsops * sizeof (*op))) { 953 error = EFAULT; 954 goto semoperr_unlocked; 955 } 956 957 if ((lock = ipc_lookup(sem_svc, semid, 958 (kipc_perm_t **)&sp)) == NULL) { 959 error = EINVAL; 960 goto semoperr_unlocked; 961 } 962 963 if (nsops > sp->sem_maxops) { 964 error = E2BIG; 965 goto semoperr; 966 } 967 } 968 969 /* 970 * Scan all operations. Verify that sem #s are in range and 971 * this process is allowed the requested operations. If any 972 * operations are marked SEM_UNDO, find (or allocate) the undo 973 * structure for this process and semaphore. 974 */ 975 needundo = 0; 976 mode = 0; 977 for (i = 0, op = uops; i++ < nsops; op++) { 978 mode |= op->sem_op ? SEM_A : SEM_R; 979 if (op->sem_num >= sp->sem_nsems) { 980 error = EFBIG; 981 goto semoperr; 982 } 983 if ((op->sem_flg & SEM_UNDO) && op->sem_op) 984 needundo = 1; 985 } 986 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 987 goto semoperr; 988 989 if (needundo) { 990 struct sem_undo template; 991 992 template.un_sp = sp; 993 mutex_enter(&pp->p_lock); 994 if (pp->p_semacct) 995 undo = avl_find(pp->p_semacct, &template, NULL); 996 else 997 undo = NULL; 998 mutex_exit(&pp->p_lock); 999 if (undo == NULL) { 1000 if (!held) { 1001 held = 1; 1002 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1003 } 1004 if (error = sem_undo_alloc(pp, sp, &lock, &template, 1005 &undo)) 1006 goto semoperr; 1007 1008 /* sem_undo_alloc unlocks the semaphore */ 1009 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 1010 goto semoperr; 1011 } 1012 } 1013 1014 check: 1015 /* 1016 * Loop waiting for the operations to be satisfied atomically. 1017 * Actually, do the operations and undo them if a wait is needed 1018 * or an error is detected. 1019 */ 1020 for (i = 0; i < nsops; i++) { 1021 op = &uops[i]; 1022 semp = &sp->sem_base[op->sem_num]; 1023 1024 /* 1025 * Raise the semaphore (i.e. sema_v) 1026 */ 1027 if (op->sem_op > 0) { 1028 if (op->sem_op + (int)semp->semval > USHRT_MAX || 1029 ((op->sem_flg & SEM_UNDO) && 1030 (error = sem_undo_add(op->sem_op, op->sem_num, 1031 undo)))) { 1032 if (i) 1033 sem_rollback(sp, uops, i, undo); 1034 if (error == 0) 1035 error = ERANGE; 1036 goto semoperr; 1037 } 1038 semp->semval += op->sem_op; 1039 /* 1040 * If we are only incrementing the semaphore value 1041 * by one on a binary semaphore, we can cv_signal. 1042 */ 1043 if (semp->semncnt) { 1044 if (op->sem_op == 1 && sp->sem_binary) 1045 cv_signal(&semp->semncnt_cv); 1046 else 1047 cv_broadcast(&semp->semncnt_cv); 1048 } 1049 if (semp->semzcnt && !semp->semval) 1050 cv_broadcast(&semp->semzcnt_cv); 1051 continue; 1052 } 1053 1054 /* 1055 * Lower the semaphore (i.e. sema_p) 1056 */ 1057 if (op->sem_op < 0) { 1058 if (semp->semval >= (unsigned)(-op->sem_op)) { 1059 if ((op->sem_flg & SEM_UNDO) && 1060 (error = sem_undo_add(op->sem_op, 1061 op->sem_num, undo))) { 1062 if (i) 1063 sem_rollback(sp, uops, i, undo); 1064 goto semoperr; 1065 } 1066 semp->semval += op->sem_op; 1067 if (semp->semzcnt && !semp->semval) 1068 cv_broadcast(&semp->semzcnt_cv); 1069 continue; 1070 } 1071 if (i) 1072 sem_rollback(sp, uops, i, undo); 1073 if (op->sem_flg & IPC_NOWAIT) { 1074 error = EAGAIN; 1075 goto semoperr; 1076 } 1077 1078 /* 1079 * Mark the semaphore set as not a binary type 1080 * if we are decrementing the value by more than 1. 1081 * 1082 * V operations will resort to cv_broadcast 1083 * for this set because there are too many weird 1084 * cases that have to be caught. 1085 */ 1086 if (op->sem_op < -1) 1087 sp->sem_binary = 0; 1088 if (!held) { 1089 held = 1; 1090 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1091 } 1092 semp->semncnt++; 1093 cvres = cv_waituntil_sig(&semp->semncnt_cv, lock, 1094 tsp, timecheck); 1095 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1096 1097 if (!IPC_FREE(&sp->sem_perm)) { 1098 ASSERT(semp->semncnt != 0); 1099 semp->semncnt--; 1100 if (cvres > 0) /* normal wakeup */ 1101 goto check; 1102 } 1103 1104 /* EINTR or EAGAIN overrides EIDRM */ 1105 if (cvres == 0) 1106 error = EINTR; 1107 else if (cvres < 0) 1108 error = EAGAIN; 1109 else 1110 error = EIDRM; 1111 goto semoperr; 1112 } 1113 1114 /* 1115 * Wait for zero value 1116 */ 1117 if (semp->semval) { 1118 if (i) 1119 sem_rollback(sp, uops, i, undo); 1120 if (op->sem_flg & IPC_NOWAIT) { 1121 error = EAGAIN; 1122 goto semoperr; 1123 } 1124 1125 if (!held) { 1126 held = 1; 1127 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1128 } 1129 semp->semzcnt++; 1130 cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock, 1131 tsp, timecheck); 1132 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1133 1134 /* 1135 * Don't touch semp if the semaphores have been removed. 1136 */ 1137 if (!IPC_FREE(&sp->sem_perm)) { 1138 ASSERT(semp->semzcnt != 0); 1139 semp->semzcnt--; 1140 if (cvres > 0) /* normal wakeup */ 1141 goto check; 1142 } 1143 1144 /* EINTR or EAGAIN overrides EIDRM */ 1145 if (cvres == 0) 1146 error = EINTR; 1147 else if (cvres < 0) 1148 error = EAGAIN; 1149 else 1150 error = EIDRM; 1151 goto semoperr; 1152 } 1153 } 1154 1155 /* All operations succeeded. Update sempid for accessed semaphores. */ 1156 for (i = 0, op = uops; i++ < nsops; 1157 sp->sem_base[(op++)->sem_num].sempid = pp->p_pid) 1158 ; 1159 sp->sem_otime = gethrestime_sec(); 1160 if (held) 1161 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1162 else 1163 mutex_exit(lock); 1164 1165 /* Before leaving, deallocate the buffer that held the user semops */ 1166 if (nsops != 1) 1167 kmem_free(uops, sizeof (*uops) * nsops); 1168 return (0); 1169 1170 /* 1171 * Error return labels 1172 */ 1173 semoperr: 1174 if (held) 1175 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1176 else 1177 mutex_exit(lock); 1178 1179 semoperr_unlocked: 1180 1181 /* Before leaving, deallocate the buffer that held the user semops */ 1182 if (nsops != 1) 1183 kmem_free(uops, sizeof (*uops) * nsops); 1184 return (set_errno(error)); 1185 } 1186 1187 /* 1188 * semsys - System entry point for semctl, semget, and semop system calls. 1189 */ 1190 static int 1191 semsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4) 1192 { 1193 int error; 1194 1195 switch (opcode) { 1196 case SEMCTL: 1197 error = semctl((int)a1, (uint_t)a2, (int)a3, a4); 1198 break; 1199 case SEMGET: 1200 error = semget((key_t)a1, (int)a2, (int)a3); 1201 break; 1202 case SEMOP: 1203 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 0); 1204 break; 1205 case SEMIDS: 1206 error = semids((int *)a1, (uint_t)a2, (uint_t *)a3); 1207 break; 1208 case SEMTIMEDOP: 1209 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 1210 (timespec_t *)a4); 1211 break; 1212 default: 1213 error = set_errno(EINVAL); 1214 break; 1215 } 1216 return (error); 1217 } 1218