1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 /* 34 * Inter-Process Communication Semaphore Facility. 35 * 36 * See os/ipc.c for a description of common IPC functionality. 37 * 38 * Resource controls 39 * ----------------- 40 * 41 * Control: project.max-sem-ids (rc_project_semmni) 42 * Description: Maximum number of semaphore ids allowed a project. 43 * 44 * When semget() is used to allocate a semaphore set, one id is 45 * allocated. If the id allocation doesn't succeed, semget() fails 46 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID) 47 * the id is deallocated. 48 * 49 * Control: process.max-sem-nsems (rc_process_semmsl) 50 * Description: Maximum number of semaphores allowed per semaphore set. 51 * 52 * When semget() is used to allocate a semaphore set, the size of the 53 * set is compared with this limit. If the number of semaphores 54 * exceeds the limit, semget() fails and errno is set to EINVAL. 55 * 56 * Control: process.max-sem-ops (rc_process_semopm) 57 * Description: Maximum number of semaphore operations allowed per 58 * semop call. 59 * 60 * When semget() successfully allocates a semaphore set, the minimum 61 * enforced value of this limit is used to initialize the 62 * "system-imposed maximum" number of operations a semop() call for 63 * this set can perform. 64 * 65 * Undo structures 66 * --------------- 67 * 68 * Removing the undo structure tunables involved a serious redesign of 69 * how they were implemented. There is now one undo structure for 70 * every process/semaphore array combination (lazily allocated, of 71 * course), and each is equal in size to the semaphore it corresponds 72 * to. To avoid scalability and performance problems, the undo 73 * structures are stored in two places: a per-process AVL tree sorted 74 * by ksemid pointer (p_semacct, protected by p_lock) and an unsorted 75 * per-semaphore linked list (sem_undos, protected by the semaphore's 76 * ID lock). The former is used by semop, where a lookup is performed 77 * once and cached if SEM_UNDO is specified for any of the operations, 78 * and at process exit where the undoable operations are rolled back. 79 * The latter is used when removing the semaphore, so the undo 80 * structures can be removed from the appropriate processes' trees. 81 * 82 * The undo structure itself contains pointers to the ksemid and proc 83 * to which it corresponds, a list node, an AVL node, and an array of 84 * adjust-on-exit (AOE) values. When an undo structure is allocated it 85 * is immediately added to both the process's tree and the semaphore's 86 * list. Lastly, the reference count on the semaphore is increased. 87 * 88 * Avoiding a lock ordering violation between p_lock and the ID lock, 89 * wont to occur when there is a race between a process exiting and the 90 * removal of a semaphore, mandates the delicate dance that exists 91 * between semexit and sem_rmid. 92 * 93 * sem_rmid, holding the ID lock, iterates through all undo structures 94 * and for each takes the appropriate process's p_lock and checks to 95 * see if p_semacct is NULL. If it is, it skips that undo structure 96 * and continues to the next. Otherwise, it removes the undo structure 97 * from both the AVL tree and the semaphore's list, and releases the 98 * hold that the undo structure had on the semaphore. 99 * 100 * The important other half of this is semexit, which will immediately 101 * take p_lock, obtain the AVL pointer, clear p_semacct, and drop 102 * p_lock. From this point on it is semexit's responsibility to clean 103 * up all undo structures found in the tree -- a coexecuting sem_rmid 104 * will see the NULL p_semacct and skip that undo structure. It walks 105 * the AVL tree (using avl_destroy_nodes) and for each undo structure 106 * takes the appropriate semaphore's ID lock (always legal since the 107 * undo structure has a hold on the semaphore), updates all semaphores 108 * with non-zero AOE values, and removes the structure from the 109 * semaphore's list. It then drops the structure's reference on the 110 * semaphore, drops the ID lock, and frees the undo structure. 111 */ 112 113 #include <sys/types.h> 114 #include <sys/t_lock.h> 115 #include <sys/param.h> 116 #include <sys/systm.h> 117 #include <sys/sysmacros.h> 118 #include <sys/cred.h> 119 #include <sys/vmem.h> 120 #include <sys/kmem.h> 121 #include <sys/errno.h> 122 #include <sys/time.h> 123 #include <sys/ipc.h> 124 #include <sys/ipc_impl.h> 125 #include <sys/sem.h> 126 #include <sys/sem_impl.h> 127 #include <sys/user.h> 128 #include <sys/proc.h> 129 #include <sys/cpuvar.h> 130 #include <sys/debug.h> 131 #include <sys/var.h> 132 #include <sys/cmn_err.h> 133 #include <sys/modctl.h> 134 #include <sys/syscall.h> 135 #include <sys/avl.h> 136 #include <sys/list.h> 137 #include <sys/zone.h> 138 139 #include <c2/audit.h> 140 141 extern rctl_hndl_t rc_project_semmni; 142 extern rctl_hndl_t rc_process_semmsl; 143 extern rctl_hndl_t rc_process_semopm; 144 static ipc_service_t *sem_svc; 145 static zone_key_t sem_zone_key; 146 147 /* 148 * The following tunables are obsolete. Though for compatibility we 149 * still read and interpret seminfo_semmsl, seminfo_semopm and 150 * seminfo_semmni (see os/project.c and os/rctl_proc.c), the preferred 151 * mechanism for administrating the IPC Semaphore facility is through 152 * the resource controls described at the top of this file. 153 */ 154 int seminfo_semaem = 16384; /* (obsolete) */ 155 int seminfo_semmap = 10; /* (obsolete) */ 156 int seminfo_semmni = 10; /* (obsolete) */ 157 int seminfo_semmns = 60; /* (obsolete) */ 158 int seminfo_semmnu = 30; /* (obsolete) */ 159 int seminfo_semmsl = 25; /* (obsolete) */ 160 int seminfo_semopm = 10; /* (obsolete) */ 161 int seminfo_semume = 10; /* (obsolete) */ 162 int seminfo_semusz = 96; /* (obsolete) */ 163 int seminfo_semvmx = 32767; /* (obsolete) */ 164 165 #define SEM_MAXUCOPS 4096 /* max # of unchecked ops per semop call */ 166 #define SEM_UNDOSZ(n) (sizeof (struct sem_undo) + (n - 1) * sizeof (int)) 167 168 static int semsys(int opcode, uintptr_t a0, uintptr_t a1, 169 uintptr_t a2, uintptr_t a3); 170 static void sem_dtor(kipc_perm_t *); 171 static void sem_rmid(kipc_perm_t *); 172 static void sem_remove_zone(zoneid_t, void *); 173 174 static struct sysent ipcsem_sysent = { 175 5, 176 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1, 177 semsys 178 }; 179 180 /* 181 * Module linkage information for the kernel. 182 */ 183 static struct modlsys modlsys = { 184 &mod_syscallops, "System V semaphore facility", &ipcsem_sysent 185 }; 186 187 #ifdef _SYSCALL32_IMPL 188 static struct modlsys modlsys32 = { 189 &mod_syscallops32, "32-bit System V semaphore facility", &ipcsem_sysent 190 }; 191 #endif 192 193 static struct modlinkage modlinkage = { 194 MODREV_1, 195 &modlsys, 196 #ifdef _SYSCALL32_IMPL 197 &modlsys32, 198 #endif 199 NULL 200 }; 201 202 203 int 204 _init(void) 205 { 206 int result; 207 208 sem_svc = ipcs_create("semids", rc_project_semmni, sizeof (ksemid_t), 209 sem_dtor, sem_rmid, AT_IPC_SEM, 210 offsetof(kproject_data_t, kpd_semmni)); 211 zone_key_create(&sem_zone_key, NULL, sem_remove_zone, NULL); 212 213 if ((result = mod_install(&modlinkage)) == 0) 214 return (0); 215 216 (void) zone_key_delete(sem_zone_key); 217 ipcs_destroy(sem_svc); 218 219 return (result); 220 } 221 222 int 223 _fini(void) 224 { 225 return (EBUSY); 226 } 227 228 int 229 _info(struct modinfo *modinfop) 230 { 231 return (mod_info(&modlinkage, modinfop)); 232 } 233 234 static void 235 sem_dtor(kipc_perm_t *perm) 236 { 237 ksemid_t *sp = (ksemid_t *)perm; 238 239 kmem_free(sp->sem_base, 240 P2ROUNDUP(sp->sem_nsems * sizeof (struct sem), 64)); 241 list_destroy(&sp->sem_undos); 242 } 243 244 /* 245 * sem_undo_add - Create or update adjust on exit entry. 246 */ 247 static int 248 sem_undo_add(short val, ushort_t num, struct sem_undo *undo) 249 { 250 int newval = undo->un_aoe[num] - val; 251 252 if (newval > USHRT_MAX || newval < -USHRT_MAX) 253 return (ERANGE); 254 undo->un_aoe[num] = newval; 255 256 return (0); 257 } 258 259 /* 260 * sem_undo_clear - clears all undo entries for specified semaphores 261 * 262 * Used when semaphores are reset by SETVAL or SETALL. 263 */ 264 static void 265 sem_undo_clear(ksemid_t *sp, ushort_t low, ushort_t high) 266 { 267 struct sem_undo *undo; 268 int i; 269 270 ASSERT(low <= high); 271 ASSERT(high < sp->sem_nsems); 272 273 for (undo = list_head(&sp->sem_undos); undo; 274 undo = list_next(&sp->sem_undos, undo)) 275 for (i = low; i <= high; i++) 276 undo->un_aoe[i] = 0; 277 } 278 279 /* 280 * sem_rollback - roll back work done so far if unable to complete operation 281 */ 282 static void 283 sem_rollback(ksemid_t *sp, struct sembuf *op, int n, struct sem_undo *undo) 284 { 285 struct sem *semp; /* semaphore ptr */ 286 287 for (op += n - 1; n--; op--) { 288 if (op->sem_op == 0) 289 continue; 290 semp = &sp->sem_base[op->sem_num]; 291 semp->semval -= op->sem_op; 292 if (op->sem_flg & SEM_UNDO) { 293 ASSERT(undo != NULL); 294 (void) sem_undo_add(-op->sem_op, op->sem_num, undo); 295 } 296 } 297 } 298 299 static void 300 sem_rmid(kipc_perm_t *perm) 301 { 302 ksemid_t *sp = (ksemid_t *)perm; 303 struct sem *semp; 304 struct sem_undo *undo; 305 size_t size = SEM_UNDOSZ(sp->sem_nsems); 306 int i; 307 308 /*LINTED*/ 309 while (undo = list_head(&sp->sem_undos)) { 310 list_remove(&sp->sem_undos, undo); 311 mutex_enter(&undo->un_proc->p_lock); 312 if (undo->un_proc->p_semacct == NULL) { 313 mutex_exit(&undo->un_proc->p_lock); 314 continue; 315 } 316 avl_remove(undo->un_proc->p_semacct, undo); 317 mutex_exit(&undo->un_proc->p_lock); 318 kmem_free(undo, size); 319 ipc_rele_locked(sem_svc, (kipc_perm_t *)sp); 320 } 321 322 for (i = 0; i < sp->sem_nsems; i++) { 323 semp = &sp->sem_base[i]; 324 semp->semval = semp->sempid = 0; 325 if (semp->semncnt) { 326 cv_broadcast(&semp->semncnt_cv); 327 semp->semncnt = 0; 328 } 329 if (semp->semzcnt) { 330 cv_broadcast(&semp->semzcnt_cv); 331 semp->semzcnt = 0; 332 } 333 } 334 } 335 336 /* 337 * semctl - Semctl system call. 338 */ 339 static int 340 semctl(int semid, uint_t semnum, int cmd, uintptr_t arg) 341 { 342 ksemid_t *sp; /* ptr to semaphore header */ 343 struct sem *p; /* ptr to semaphore */ 344 unsigned int i; /* loop control */ 345 ushort_t *vals, *vp; 346 size_t vsize = 0; 347 int error = 0; 348 int retval = 0; 349 struct cred *cr; 350 kmutex_t *lock; 351 model_t mdl = get_udatamodel(); 352 STRUCT_DECL(semid_ds, sid); 353 struct semid_ds64 ds64; 354 355 STRUCT_INIT(sid, mdl); 356 cr = CRED(); 357 358 /* 359 * Perform pre- or non-lookup actions (e.g. copyins, RMID). 360 */ 361 switch (cmd) { 362 case IPC_SET: 363 if (copyin((void *)arg, STRUCT_BUF(sid), STRUCT_SIZE(sid))) 364 return (set_errno(EFAULT)); 365 break; 366 367 case IPC_SET64: 368 if (copyin((void *)arg, &ds64, sizeof (struct semid_ds64))) 369 return (set_errno(EFAULT)); 370 break; 371 372 case SETALL: 373 if ((lock = ipc_lookup(sem_svc, semid, 374 (kipc_perm_t **)&sp)) == NULL) 375 return (set_errno(EINVAL)); 376 vsize = sp->sem_nsems * sizeof (*vals); 377 mutex_exit(lock); 378 379 /* allocate space to hold all semaphore values */ 380 vals = kmem_alloc(vsize, KM_SLEEP); 381 382 if (copyin((void *)arg, vals, vsize)) { 383 kmem_free(vals, vsize); 384 return (set_errno(EFAULT)); 385 } 386 break; 387 388 case IPC_RMID: 389 if (error = ipc_rmid(sem_svc, semid, cr)) 390 return (set_errno(error)); 391 return (0); 392 } 393 394 if ((lock = ipc_lookup(sem_svc, semid, (kipc_perm_t **)&sp)) == NULL) { 395 if (vsize != 0) 396 kmem_free(vals, vsize); 397 return (set_errno(EINVAL)); 398 } 399 switch (cmd) { 400 /* Set ownership and permissions. */ 401 case IPC_SET: 402 403 if (error = ipcperm_set(sem_svc, cr, &sp->sem_perm, 404 &STRUCT_BUF(sid)->sem_perm, mdl)) { 405 mutex_exit(lock); 406 return (set_errno(error)); 407 } 408 sp->sem_ctime = gethrestime_sec(); 409 mutex_exit(lock); 410 return (0); 411 412 /* Get semaphore data structure. */ 413 case IPC_STAT: 414 415 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 416 mutex_exit(lock); 417 return (set_errno(error)); 418 } 419 420 ipcperm_stat(&STRUCT_BUF(sid)->sem_perm, &sp->sem_perm, mdl); 421 STRUCT_FSETP(sid, sem_base, NULL); /* kernel addr */ 422 STRUCT_FSET(sid, sem_nsems, sp->sem_nsems); 423 STRUCT_FSET(sid, sem_otime, sp->sem_otime); 424 STRUCT_FSET(sid, sem_ctime, sp->sem_ctime); 425 STRUCT_FSET(sid, sem_binary, sp->sem_binary); 426 mutex_exit(lock); 427 428 if (copyout(STRUCT_BUF(sid), (void *)arg, STRUCT_SIZE(sid))) 429 return (set_errno(EFAULT)); 430 return (0); 431 432 case IPC_SET64: 433 434 if (error = ipcperm_set64(sem_svc, cr, &sp->sem_perm, 435 &ds64.semx_perm)) { 436 mutex_exit(lock); 437 return (set_errno(error)); 438 } 439 sp->sem_ctime = gethrestime_sec(); 440 mutex_exit(lock); 441 return (0); 442 443 case IPC_STAT64: 444 445 ipcperm_stat64(&ds64.semx_perm, &sp->sem_perm); 446 ds64.semx_nsems = sp->sem_nsems; 447 ds64.semx_otime = sp->sem_otime; 448 ds64.semx_ctime = sp->sem_ctime; 449 450 mutex_exit(lock); 451 if (copyout(&ds64, (void *)arg, sizeof (struct semid_ds64))) 452 return (set_errno(EFAULT)); 453 454 return (0); 455 456 /* Get # of processes sleeping for greater semval. */ 457 case GETNCNT: 458 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 459 mutex_exit(lock); 460 return (set_errno(error)); 461 } 462 if (semnum >= sp->sem_nsems) { 463 mutex_exit(lock); 464 return (set_errno(EINVAL)); 465 } 466 retval = sp->sem_base[semnum].semncnt; 467 mutex_exit(lock); 468 return (retval); 469 470 /* Get pid of last process to operate on semaphore. */ 471 case GETPID: 472 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 473 mutex_exit(lock); 474 return (set_errno(error)); 475 } 476 if (semnum >= sp->sem_nsems) { 477 mutex_exit(lock); 478 return (set_errno(EINVAL)); 479 } 480 retval = sp->sem_base[semnum].sempid; 481 mutex_exit(lock); 482 return (retval); 483 484 /* Get semval of one semaphore. */ 485 case GETVAL: 486 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 487 mutex_exit(lock); 488 return (set_errno(error)); 489 } 490 if (semnum >= sp->sem_nsems) { 491 mutex_exit(lock); 492 return (set_errno(EINVAL)); 493 } 494 retval = sp->sem_base[semnum].semval; 495 mutex_exit(lock); 496 return (retval); 497 498 /* Get all semvals in set. */ 499 case GETALL: 500 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 501 mutex_exit(lock); 502 return (set_errno(error)); 503 } 504 505 /* allocate space to hold all semaphore values */ 506 vsize = sp->sem_nsems * sizeof (*vals); 507 vals = vp = kmem_alloc(vsize, KM_SLEEP); 508 509 for (i = sp->sem_nsems, p = sp->sem_base; i--; p++, vp++) 510 bcopy(&p->semval, vp, sizeof (p->semval)); 511 512 mutex_exit(lock); 513 514 if (copyout((void *)vals, (void *)arg, vsize)) { 515 kmem_free(vals, vsize); 516 return (set_errno(EFAULT)); 517 } 518 519 kmem_free(vals, vsize); 520 return (0); 521 522 /* Get # of processes sleeping for semval to become zero. */ 523 case GETZCNT: 524 if (error = ipcperm_access(&sp->sem_perm, SEM_R, cr)) { 525 mutex_exit(lock); 526 return (set_errno(error)); 527 } 528 if (semnum >= sp->sem_nsems) { 529 mutex_exit(lock); 530 return (set_errno(EINVAL)); 531 } 532 retval = sp->sem_base[semnum].semzcnt; 533 mutex_exit(lock); 534 return (retval); 535 536 /* Set semval of one semaphore. */ 537 case SETVAL: 538 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 539 mutex_exit(lock); 540 return (set_errno(error)); 541 } 542 if (semnum >= sp->sem_nsems) { 543 mutex_exit(lock); 544 return (set_errno(EINVAL)); 545 } 546 if ((uint_t)arg > USHRT_MAX) { 547 mutex_exit(lock); 548 return (set_errno(ERANGE)); 549 } 550 p = &sp->sem_base[semnum]; 551 if ((p->semval = (ushort_t)arg) != 0) { 552 if (p->semncnt) { 553 cv_broadcast(&p->semncnt_cv); 554 } 555 } else if (p->semzcnt) { 556 cv_broadcast(&p->semzcnt_cv); 557 } 558 p->sempid = curproc->p_pid; 559 sem_undo_clear(sp, (ushort_t)semnum, (ushort_t)semnum); 560 mutex_exit(lock); 561 return (0); 562 563 /* Set semvals of all semaphores in set. */ 564 case SETALL: 565 /* Check if semaphore set has been deleted and reallocated. */ 566 if (sp->sem_nsems * sizeof (*vals) != vsize) { 567 error = set_errno(EINVAL); 568 goto seterr; 569 } 570 if (error = ipcperm_access(&sp->sem_perm, SEM_A, cr)) { 571 error = set_errno(error); 572 goto seterr; 573 } 574 sem_undo_clear(sp, 0, sp->sem_nsems - 1); 575 for (i = 0, p = sp->sem_base; i < sp->sem_nsems; 576 (p++)->sempid = curproc->p_pid) { 577 if ((p->semval = vals[i++]) != 0) { 578 if (p->semncnt) { 579 cv_broadcast(&p->semncnt_cv); 580 } 581 } else if (p->semzcnt) { 582 cv_broadcast(&p->semzcnt_cv); 583 } 584 } 585 seterr: 586 mutex_exit(lock); 587 kmem_free(vals, vsize); 588 return (error); 589 590 default: 591 mutex_exit(lock); 592 return (set_errno(EINVAL)); 593 } 594 595 /* NOTREACHED */ 596 } 597 598 /* 599 * semexit - Called by exit() to clean up on process exit. 600 */ 601 void 602 semexit(proc_t *pp) 603 { 604 avl_tree_t *tree; 605 struct sem_undo *undo; 606 void *cookie = NULL; 607 608 mutex_enter(&pp->p_lock); 609 tree = pp->p_semacct; 610 pp->p_semacct = NULL; 611 mutex_exit(&pp->p_lock); 612 613 while (undo = avl_destroy_nodes(tree, &cookie)) { 614 ksemid_t *sp = undo->un_sp; 615 size_t size = SEM_UNDOSZ(sp->sem_nsems); 616 int i; 617 618 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 619 if (!IPC_FREE(&sp->sem_perm)) { 620 for (i = 0; i < sp->sem_nsems; i++) { 621 int adj = undo->un_aoe[i]; 622 if (adj) { 623 struct sem *semp = &sp->sem_base[i]; 624 int v = (int)semp->semval + adj; 625 626 if (v < 0 || v > USHRT_MAX) 627 continue; 628 semp->semval = (ushort_t)v; 629 if (v == 0 && semp->semzcnt) 630 cv_broadcast(&semp->semzcnt_cv); 631 if (adj > 0 && semp->semncnt) 632 cv_broadcast(&semp->semncnt_cv); 633 } 634 } 635 list_remove(&sp->sem_undos, undo); 636 } 637 ipc_rele(sem_svc, (kipc_perm_t *)sp); 638 kmem_free(undo, size); 639 } 640 641 avl_destroy(tree); 642 kmem_free(tree, sizeof (avl_tree_t)); 643 } 644 645 /* 646 * Remove all semaphores associated with a given zone. Called by 647 * zone_shutdown when the zone is halted. 648 */ 649 /*ARGSUSED1*/ 650 static void 651 sem_remove_zone(zoneid_t zoneid, void *arg) 652 { 653 ipc_remove_zone(sem_svc, zoneid); 654 } 655 656 /* 657 * semget - Semget system call. 658 */ 659 static int 660 semget(key_t key, int nsems, int semflg) 661 { 662 ksemid_t *sp; 663 kmutex_t *lock; 664 int id, error; 665 proc_t *pp = curproc; 666 667 top: 668 if (error = ipc_get(sem_svc, key, semflg, (kipc_perm_t **)&sp, &lock)) 669 return (set_errno(error)); 670 671 if (!IPC_FREE(&sp->sem_perm)) { 672 /* 673 * A semaphore with the requested key exists. 674 */ 675 if (!((nsems >= 0) && (nsems <= sp->sem_nsems))) { 676 mutex_exit(lock); 677 return (set_errno(EINVAL)); 678 } 679 } else { 680 /* 681 * This is a new semaphore set. Finish initialization. 682 */ 683 if (nsems <= 0 || (rctl_test(rc_process_semmsl, pp->p_rctls, pp, 684 nsems, RCA_SAFE) & RCT_DENY)) { 685 mutex_exit(lock); 686 mutex_exit(&pp->p_lock); 687 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 688 return (set_errno(EINVAL)); 689 } 690 mutex_exit(lock); 691 mutex_exit(&pp->p_lock); 692 693 /* 694 * We round the allocation up to coherency granularity 695 * so that multiple semaphore allocations won't result 696 * in the false sharing of their sem structures. 697 */ 698 sp->sem_base = 699 kmem_zalloc(P2ROUNDUP(nsems * sizeof (struct sem), 64), 700 KM_SLEEP); 701 sp->sem_binary = (nsems == 1); 702 sp->sem_nsems = (ushort_t)nsems; 703 sp->sem_ctime = gethrestime_sec(); 704 sp->sem_otime = 0; 705 list_create(&sp->sem_undos, sizeof (struct sem_undo), 706 offsetof(struct sem_undo, un_list)); 707 708 if (error = ipc_commit_begin(sem_svc, key, semflg, 709 (kipc_perm_t *)sp)) { 710 if (error == EAGAIN) 711 goto top; 712 return (set_errno(error)); 713 } 714 sp->sem_maxops = 715 rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp); 716 if (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems, 717 RCA_SAFE) & RCT_DENY) { 718 ipc_cleanup(sem_svc, (kipc_perm_t *)sp); 719 return (set_errno(EINVAL)); 720 } 721 lock = ipc_commit_end(sem_svc, &sp->sem_perm); 722 } 723 #ifdef C2_AUDIT 724 if (audit_active) 725 audit_ipcget(AT_IPC_SEM, (void *)sp); 726 #endif 727 id = sp->sem_perm.ipc_id; 728 mutex_exit(lock); 729 return (id); 730 } 731 732 /* 733 * semids system call. 734 */ 735 static int 736 semids(int *buf, uint_t nids, uint_t *pnids) 737 { 738 int error; 739 740 if (error = ipc_ids(sem_svc, buf, nids, pnids)) 741 return (set_errno(error)); 742 743 return (0); 744 } 745 746 747 /* 748 * Helper function for semop - copies in the provided timespec and 749 * computes the absolute future time after which we must return. 750 */ 751 static int 752 compute_timeout(timespec_t **tsp, timespec_t *ts, timespec_t *now, 753 timespec_t *timeout) 754 { 755 model_t datamodel = get_udatamodel(); 756 757 if (datamodel == DATAMODEL_NATIVE) { 758 if (copyin(timeout, ts, sizeof (timespec_t))) 759 return (EFAULT); 760 } else { 761 timespec32_t ts32; 762 763 if (copyin(timeout, &ts32, sizeof (timespec32_t))) 764 return (EFAULT); 765 TIMESPEC32_TO_TIMESPEC(ts, &ts32) 766 } 767 768 if (itimerspecfix(ts)) 769 return (EINVAL); 770 771 /* 772 * Convert the timespec value into absolute time. 773 */ 774 timespecadd(ts, now); 775 *tsp = ts; 776 777 return (0); 778 } 779 780 /* 781 * Undo structure comparator. We sort based on ksemid_t pointer. 782 */ 783 static int 784 sem_undo_compar(const void *x, const void *y) 785 { 786 struct sem_undo *undo1 = (struct sem_undo *)x; 787 struct sem_undo *undo2 = (struct sem_undo *)y; 788 789 if (undo1->un_sp < undo2->un_sp) 790 return (-1); 791 if (undo1->un_sp > undo2->un_sp) 792 return (1); 793 return (0); 794 } 795 796 /* 797 * Helper function for semop - creates an undo structure and adds it to 798 * the process's avl tree and the semaphore's list. 799 */ 800 static int 801 sem_undo_alloc(proc_t *pp, ksemid_t *sp, kmutex_t **lock, 802 struct sem_undo *template, struct sem_undo **un) 803 { 804 size_t size; 805 struct sem_undo *undo; 806 avl_tree_t *tree = NULL; 807 avl_index_t where; 808 809 mutex_exit(*lock); 810 811 size = SEM_UNDOSZ(sp->sem_nsems); 812 undo = kmem_zalloc(size, KM_SLEEP); 813 undo->un_proc = pp; 814 undo->un_sp = sp; 815 816 if (pp->p_semacct == NULL) 817 tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 818 819 *lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 820 if (IPC_FREE(&sp->sem_perm)) { 821 kmem_free(undo, size); 822 if (tree) 823 kmem_free(tree, sizeof (avl_tree_t)); 824 return (EIDRM); 825 } 826 827 mutex_enter(&pp->p_lock); 828 if (tree) { 829 if (pp->p_semacct == NULL) { 830 avl_create(tree, sem_undo_compar, 831 sizeof (struct sem_undo), 832 offsetof(struct sem_undo, un_avl)); 833 pp->p_semacct = tree; 834 } else { 835 kmem_free(tree, sizeof (avl_tree_t)); 836 } 837 } 838 839 if (*un = avl_find(pp->p_semacct, template, &where)) { 840 mutex_exit(&pp->p_lock); 841 kmem_free(undo, size); 842 } else { 843 *un = undo; 844 avl_insert(pp->p_semacct, undo, where); 845 mutex_exit(&pp->p_lock); 846 list_insert_head(&sp->sem_undos, undo); 847 ipc_hold(sem_svc, (kipc_perm_t *)sp); 848 } 849 850 851 return (0); 852 } 853 854 /* 855 * semop - Semop system call. 856 */ 857 static int 858 semop(int semid, struct sembuf *sops, size_t nsops, timespec_t *timeout) 859 { 860 ksemid_t *sp = NULL; 861 kmutex_t *lock; 862 struct sembuf *op; /* ptr to operation */ 863 int i; /* loop control */ 864 struct sem *semp; /* ptr to semaphore */ 865 int error = 0; 866 struct sembuf *uops; /* ptr to copy of user ops */ 867 struct sembuf x_sem; /* avoid kmem_alloc's */ 868 timespec_t now, ts, *tsp = NULL; 869 int timecheck = 0; 870 int cvres, needundo, mode; 871 struct sem_undo *undo; 872 proc_t *pp = curproc; 873 int held = 0; 874 875 CPU_STATS_ADDQ(CPU, sys, sema, 1); /* bump semaphore op count */ 876 877 /* 878 * To avoid the cost of copying in 'timeout' in the common 879 * case, we could only grab the time here and defer the copyin 880 * and associated computations until we are about to block. 881 * 882 * The down side to this is that we would then have to spin 883 * some goto top nonsense to avoid the copyin behind the semid 884 * lock. As a common use of timed semaphores is as an explicit 885 * blocking mechanism, this could incur a greater penalty. 886 * 887 * If we eventually decide that this would be a wise route to 888 * take, the deferrable functionality is completely contained 889 * in 'compute_timeout', and the interface is defined such that 890 * we can legally not validate 'timeout' if it is unused. 891 */ 892 if (timeout != NULL) { 893 timecheck = timechanged; 894 gethrestime(&now); 895 if (error = compute_timeout(&tsp, &ts, &now, timeout)) 896 return (set_errno(error)); 897 } 898 899 /* 900 * Allocate space to hold the vector of semaphore ops. If 901 * there is only 1 operation we use a preallocated buffer on 902 * the stack for speed. 903 * 904 * Since we don't want to allow the user to allocate an 905 * arbitrary amount of kernel memory, we need to check against 906 * the number of operations allowed by the semaphore. We only 907 * bother doing this if the number of operations is larger than 908 * SEM_MAXUCOPS. 909 */ 910 if (nsops == 1) 911 uops = &x_sem; 912 else if (nsops == 0) 913 return (0); 914 else if (nsops <= SEM_MAXUCOPS) 915 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 916 917 if (nsops > SEM_MAXUCOPS) { 918 if ((lock = ipc_lookup(sem_svc, semid, 919 (kipc_perm_t **)&sp)) == NULL) 920 return (set_errno(EFAULT)); 921 922 if (nsops > sp->sem_maxops) { 923 mutex_exit(lock); 924 return (set_errno(E2BIG)); 925 } 926 held = 1; 927 ipc_hold(sem_svc, (kipc_perm_t *)sp); 928 mutex_exit(lock); 929 930 uops = kmem_alloc(nsops * sizeof (*uops), KM_SLEEP); 931 if (copyin(sops, uops, nsops * sizeof (*op))) { 932 error = EFAULT; 933 (void) ipc_lock(sem_svc, sp->sem_perm.ipc_id); 934 goto semoperr; 935 } 936 937 lock = ipc_lock(sem_svc, sp->sem_perm.ipc_id); 938 if (IPC_FREE(&sp->sem_perm)) { 939 error = EIDRM; 940 goto semoperr; 941 } 942 } else { 943 /* 944 * This could be interleaved with the above code, but 945 * keeping them separate improves readability. 946 */ 947 if (copyin(sops, uops, nsops * sizeof (*op))) { 948 error = EFAULT; 949 goto semoperr_unlocked; 950 } 951 952 if ((lock = ipc_lookup(sem_svc, semid, 953 (kipc_perm_t **)&sp)) == NULL) { 954 error = EINVAL; 955 goto semoperr_unlocked; 956 } 957 958 if (nsops > sp->sem_maxops) { 959 error = E2BIG; 960 goto semoperr; 961 } 962 } 963 964 /* 965 * Scan all operations. Verify that sem #s are in range and 966 * this process is allowed the requested operations. If any 967 * operations are marked SEM_UNDO, find (or allocate) the undo 968 * structure for this process and semaphore. 969 */ 970 needundo = 0; 971 mode = 0; 972 for (i = 0, op = uops; i++ < nsops; op++) { 973 mode |= op->sem_op ? SEM_A : SEM_R; 974 if (op->sem_num >= sp->sem_nsems) { 975 error = EFBIG; 976 goto semoperr; 977 } 978 if ((op->sem_flg & SEM_UNDO) && op->sem_op) 979 needundo = 1; 980 } 981 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 982 goto semoperr; 983 984 if (needundo) { 985 struct sem_undo template; 986 987 template.un_sp = sp; 988 mutex_enter(&pp->p_lock); 989 if (pp->p_semacct) 990 undo = avl_find(pp->p_semacct, &template, NULL); 991 else 992 undo = NULL; 993 mutex_exit(&pp->p_lock); 994 if (undo == NULL) { 995 if (error = sem_undo_alloc(pp, sp, &lock, &template, 996 &undo)) 997 goto semoperr; 998 999 /* sem_undo_alloc unlocks the semaphore */ 1000 if (error = ipcperm_access(&sp->sem_perm, mode, CRED())) 1001 goto semoperr; 1002 } 1003 } 1004 1005 check: 1006 /* 1007 * Loop waiting for the operations to be satisfied atomically. 1008 * Actually, do the operations and undo them if a wait is needed 1009 * or an error is detected. 1010 */ 1011 for (i = 0; i < nsops; i++) { 1012 op = &uops[i]; 1013 semp = &sp->sem_base[op->sem_num]; 1014 1015 /* 1016 * Raise the semaphore (i.e. sema_v) 1017 */ 1018 if (op->sem_op > 0) { 1019 if (op->sem_op + (int)semp->semval > USHRT_MAX || 1020 ((op->sem_flg & SEM_UNDO) && 1021 (error = sem_undo_add(op->sem_op, op->sem_num, 1022 undo)))) { 1023 if (i) 1024 sem_rollback(sp, uops, i, undo); 1025 if (error == 0) 1026 error = ERANGE; 1027 goto semoperr; 1028 } 1029 semp->semval += op->sem_op; 1030 /* 1031 * If we are only incrementing the semaphore value 1032 * by one on a binary semaphore, we can cv_signal. 1033 */ 1034 if (semp->semncnt) { 1035 if (op->sem_op == 1 && sp->sem_binary) 1036 cv_signal(&semp->semncnt_cv); 1037 else 1038 cv_broadcast(&semp->semncnt_cv); 1039 } 1040 if (semp->semzcnt && !semp->semval) 1041 cv_broadcast(&semp->semzcnt_cv); 1042 continue; 1043 } 1044 1045 /* 1046 * Lower the semaphore (i.e. sema_p) 1047 */ 1048 if (op->sem_op < 0) { 1049 if (semp->semval >= (unsigned)(-op->sem_op)) { 1050 if ((op->sem_flg & SEM_UNDO) && 1051 (error = sem_undo_add(op->sem_op, 1052 op->sem_num, undo))) { 1053 if (i) 1054 sem_rollback(sp, uops, i, undo); 1055 goto semoperr; 1056 } 1057 semp->semval += op->sem_op; 1058 if (semp->semzcnt && !semp->semval) 1059 cv_broadcast(&semp->semzcnt_cv); 1060 continue; 1061 } 1062 if (i) 1063 sem_rollback(sp, uops, i, undo); 1064 if (op->sem_flg & IPC_NOWAIT) { 1065 error = EAGAIN; 1066 goto semoperr; 1067 } 1068 1069 /* 1070 * Mark the semaphore set as not a binary type 1071 * if we are decrementing the value by more than 1. 1072 * 1073 * V operations will resort to cv_broadcast 1074 * for this set because there are too many weird 1075 * cases that have to be caught. 1076 */ 1077 if (op->sem_op < -1) 1078 sp->sem_binary = 0; 1079 if (!held) { 1080 held = 1; 1081 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1082 } 1083 semp->semncnt++; 1084 cvres = cv_waituntil_sig(&semp->semncnt_cv, lock, 1085 tsp, timecheck); 1086 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1087 1088 if (!IPC_FREE(&sp->sem_perm)) { 1089 ASSERT(semp->semncnt != 0); 1090 semp->semncnt--; 1091 if (cvres > 0) /* normal wakeup */ 1092 goto check; 1093 } 1094 1095 /* EINTR or EAGAIN overrides EIDRM */ 1096 if (cvres == 0) 1097 error = EINTR; 1098 else if (cvres < 0) 1099 error = EAGAIN; 1100 else 1101 error = EIDRM; 1102 goto semoperr; 1103 } 1104 1105 /* 1106 * Wait for zero value 1107 */ 1108 if (semp->semval) { 1109 if (i) 1110 sem_rollback(sp, uops, i, undo); 1111 if (op->sem_flg & IPC_NOWAIT) { 1112 error = EAGAIN; 1113 goto semoperr; 1114 } 1115 1116 if (!held) { 1117 held = 1; 1118 ipc_hold(sem_svc, (kipc_perm_t *)sp); 1119 } 1120 semp->semzcnt++; 1121 cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock, 1122 tsp, timecheck); 1123 lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock); 1124 1125 /* 1126 * Don't touch semp if the semaphores have been removed. 1127 */ 1128 if (!IPC_FREE(&sp->sem_perm)) { 1129 ASSERT(semp->semzcnt != 0); 1130 semp->semzcnt--; 1131 if (cvres > 0) /* normal wakeup */ 1132 goto check; 1133 } 1134 1135 /* EINTR or EAGAIN overrides EIDRM */ 1136 if (cvres == 0) 1137 error = EINTR; 1138 else if (cvres < 0) 1139 error = EAGAIN; 1140 else 1141 error = EIDRM; 1142 goto semoperr; 1143 } 1144 } 1145 1146 /* All operations succeeded. Update sempid for accessed semaphores. */ 1147 for (i = 0, op = uops; i++ < nsops; 1148 sp->sem_base[(op++)->sem_num].sempid = pp->p_pid) 1149 ; 1150 sp->sem_otime = gethrestime_sec(); 1151 if (held) 1152 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1153 else 1154 mutex_exit(lock); 1155 1156 /* Before leaving, deallocate the buffer that held the user semops */ 1157 if (nsops != 1) 1158 kmem_free(uops, sizeof (*uops) * nsops); 1159 return (0); 1160 1161 /* 1162 * Error return labels 1163 */ 1164 semoperr: 1165 if (held) 1166 ipc_rele(sem_svc, (kipc_perm_t *)sp); 1167 else 1168 mutex_exit(lock); 1169 1170 semoperr_unlocked: 1171 1172 /* Before leaving, deallocate the buffer that held the user semops */ 1173 if (nsops != 1) 1174 kmem_free(uops, sizeof (*uops) * nsops); 1175 return (set_errno(error)); 1176 } 1177 1178 /* 1179 * semsys - System entry point for semctl, semget, and semop system calls. 1180 */ 1181 static int 1182 semsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4) 1183 { 1184 int error; 1185 1186 switch (opcode) { 1187 case SEMCTL: 1188 error = semctl((int)a1, (uint_t)a2, (int)a3, a4); 1189 break; 1190 case SEMGET: 1191 error = semget((key_t)a1, (int)a2, (int)a3); 1192 break; 1193 case SEMOP: 1194 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 0); 1195 break; 1196 case SEMIDS: 1197 error = semids((int *)a1, (uint_t)a2, (uint_t *)a3); 1198 break; 1199 case SEMTIMEDOP: 1200 error = semop((int)a1, (struct sembuf *)a2, (size_t)a3, 1201 (timespec_t *)a4); 1202 break; 1203 default: 1204 error = set_errno(EINVAL); 1205 break; 1206 } 1207 return (error); 1208 } 1209