1 /*- 2 * Implementation of SVID semaphores 3 * 4 * Author: Daniel Boulet 5 * 6 * This software is provided ``AS IS'' without any warranties of any kind. 7 */ 8 /*- 9 * Copyright (c) 2003-2005 McAfee, Inc. 10 * All rights reserved. 11 * 12 * This software was developed for the FreeBSD Project in part by McAfee 13 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR 14 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research 15 * program. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_compat.h" 43 #include "opt_sysvipc.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/sysproto.h> 48 #include <sys/eventhandler.h> 49 #include <sys/kernel.h> 50 #include <sys/proc.h> 51 #include <sys/lock.h> 52 #include <sys/module.h> 53 #include <sys/mutex.h> 54 #include <sys/racct.h> 55 #include <sys/sem.h> 56 #include <sys/sx.h> 57 #include <sys/syscall.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/sysent.h> 60 #include <sys/sysctl.h> 61 #include <sys/uio.h> 62 #include <sys/malloc.h> 63 #include <sys/jail.h> 64 65 #include <security/mac/mac_framework.h> 66 67 FEATURE(sysv_sem, "System V semaphores support"); 68 69 static MALLOC_DEFINE(M_SEM, "sem", "SVID compatible semaphores"); 70 71 #ifdef SEM_DEBUG 72 #define DPRINTF(a) printf a 73 #else 74 #define DPRINTF(a) 75 #endif 76 77 static int seminit(void); 78 static int sysvsem_modload(struct module *, int, void *); 79 static int semunload(void); 80 static void semexit_myhook(void *arg, struct proc *p); 81 static int sysctl_sema(SYSCTL_HANDLER_ARGS); 82 static int semvalid(int semid, struct prison *rpr, 83 struct semid_kernel *semakptr); 84 static void sem_remove(int semidx, struct ucred *cred); 85 static struct prison *sem_find_prison(struct ucred *); 86 static int sem_prison_cansee(struct prison *, struct semid_kernel *); 87 static int sem_prison_check(void *, void *); 88 static int sem_prison_set(void *, void *); 89 static int sem_prison_get(void *, void *); 90 static int sem_prison_remove(void *, void *); 91 static void sem_prison_cleanup(struct prison *); 92 93 #ifndef _SYS_SYSPROTO_H_ 94 struct __semctl_args; 95 int __semctl(struct thread *td, struct __semctl_args *uap); 96 struct semget_args; 97 int semget(struct thread *td, struct semget_args *uap); 98 struct semop_args; 99 int semop(struct thread *td, struct semop_args *uap); 100 #endif 101 102 static struct sem_undo *semu_alloc(struct thread *td); 103 static int semundo_adjust(struct thread *td, struct sem_undo **supptr, 104 int semid, int semseq, int semnum, int adjval); 105 static void semundo_clear(int semid, int semnum); 106 107 static struct mtx sem_mtx; /* semaphore global lock */ 108 static struct mtx sem_undo_mtx; 109 static int semtot = 0; 110 static struct semid_kernel *sema; /* semaphore id pool */ 111 static struct mtx *sema_mtx; /* semaphore id pool mutexes*/ 112 static struct sem *sem; /* semaphore pool */ 113 LIST_HEAD(, sem_undo) semu_list; /* list of active undo structures */ 114 LIST_HEAD(, sem_undo) semu_free_list; /* list of free undo structures */ 115 static int *semu; /* undo structure pool */ 116 static eventhandler_tag semexit_tag; 117 static unsigned sem_prison_slot; /* prison OSD slot */ 118 119 #define SEMUNDO_MTX sem_undo_mtx 120 #define SEMUNDO_LOCK() mtx_lock(&SEMUNDO_MTX); 121 #define SEMUNDO_UNLOCK() mtx_unlock(&SEMUNDO_MTX); 122 #define SEMUNDO_LOCKASSERT(how) mtx_assert(&SEMUNDO_MTX, (how)); 123 124 struct sem { 125 u_short semval; /* semaphore value */ 126 pid_t sempid; /* pid of last operation */ 127 u_short semncnt; /* # awaiting semval > cval */ 128 u_short semzcnt; /* # awaiting semval = 0 */ 129 }; 130 131 /* 132 * Undo structure (one per process) 133 */ 134 struct sem_undo { 135 LIST_ENTRY(sem_undo) un_next; /* ptr to next active undo structure */ 136 struct proc *un_proc; /* owner of this structure */ 137 short un_cnt; /* # of active entries */ 138 struct undo { 139 short un_adjval; /* adjust on exit values */ 140 short un_num; /* semaphore # */ 141 int un_id; /* semid */ 142 unsigned short un_seq; 143 } un_ent[1]; /* undo entries */ 144 }; 145 146 /* 147 * Configuration parameters 148 */ 149 #ifndef SEMMNI 150 #define SEMMNI 50 /* # of semaphore identifiers */ 151 #endif 152 #ifndef SEMMNS 153 #define SEMMNS 340 /* # of semaphores in system */ 154 #endif 155 #ifndef SEMUME 156 #define SEMUME 50 /* max # of undo entries per process */ 157 #endif 158 #ifndef SEMMNU 159 #define SEMMNU 150 /* # of undo structures in system */ 160 #endif 161 162 /* shouldn't need tuning */ 163 #ifndef SEMMSL 164 #define SEMMSL SEMMNS /* max # of semaphores per id */ 165 #endif 166 #ifndef SEMOPM 167 #define SEMOPM 100 /* max # of operations per semop call */ 168 #endif 169 170 #define SEMVMX 32767 /* semaphore maximum value */ 171 #define SEMAEM 16384 /* adjust on exit max value */ 172 173 /* 174 * Due to the way semaphore memory is allocated, we have to ensure that 175 * SEMUSZ is properly aligned. 176 */ 177 178 #define SEM_ALIGN(bytes) roundup2(bytes, sizeof(long)) 179 180 /* actual size of an undo structure */ 181 #define SEMUSZ SEM_ALIGN(offsetof(struct sem_undo, un_ent[SEMUME])) 182 183 /* 184 * Macro to find a particular sem_undo vector 185 */ 186 #define SEMU(ix) \ 187 ((struct sem_undo *)(((intptr_t)semu)+ix * seminfo.semusz)) 188 189 /* 190 * semaphore info struct 191 */ 192 struct seminfo seminfo = { 193 SEMMNI, /* # of semaphore identifiers */ 194 SEMMNS, /* # of semaphores in system */ 195 SEMMNU, /* # of undo structures in system */ 196 SEMMSL, /* max # of semaphores per id */ 197 SEMOPM, /* max # of operations per semop call */ 198 SEMUME, /* max # of undo entries per process */ 199 SEMUSZ, /* size in bytes of undo structure */ 200 SEMVMX, /* semaphore maximum value */ 201 SEMAEM /* adjust on exit max value */ 202 }; 203 204 SYSCTL_INT(_kern_ipc, OID_AUTO, semmni, CTLFLAG_RDTUN, &seminfo.semmni, 0, 205 "Number of semaphore identifiers"); 206 SYSCTL_INT(_kern_ipc, OID_AUTO, semmns, CTLFLAG_RDTUN, &seminfo.semmns, 0, 207 "Maximum number of semaphores in the system"); 208 SYSCTL_INT(_kern_ipc, OID_AUTO, semmnu, CTLFLAG_RDTUN, &seminfo.semmnu, 0, 209 "Maximum number of undo structures in the system"); 210 SYSCTL_INT(_kern_ipc, OID_AUTO, semmsl, CTLFLAG_RWTUN, &seminfo.semmsl, 0, 211 "Max semaphores per id"); 212 SYSCTL_INT(_kern_ipc, OID_AUTO, semopm, CTLFLAG_RDTUN, &seminfo.semopm, 0, 213 "Max operations per semop call"); 214 SYSCTL_INT(_kern_ipc, OID_AUTO, semume, CTLFLAG_RDTUN, &seminfo.semume, 0, 215 "Max undo entries per process"); 216 SYSCTL_INT(_kern_ipc, OID_AUTO, semusz, CTLFLAG_RDTUN, &seminfo.semusz, 0, 217 "Size in bytes of undo structure"); 218 SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, CTLFLAG_RWTUN, &seminfo.semvmx, 0, 219 "Semaphore maximum value"); 220 SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RWTUN, &seminfo.semaem, 0, 221 "Adjust on exit max value"); 222 SYSCTL_PROC(_kern_ipc, OID_AUTO, sema, 223 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, 224 NULL, 0, sysctl_sema, "", "Semaphore id pool"); 225 226 static struct syscall_helper_data sem_syscalls[] = { 227 SYSCALL_INIT_HELPER(__semctl), 228 SYSCALL_INIT_HELPER(semget), 229 SYSCALL_INIT_HELPER(semop), 230 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 231 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 232 SYSCALL_INIT_HELPER(semsys), 233 SYSCALL_INIT_HELPER_COMPAT(freebsd7___semctl), 234 #endif 235 SYSCALL_INIT_LAST 236 }; 237 238 #ifdef COMPAT_FREEBSD32 239 #include <compat/freebsd32/freebsd32.h> 240 #include <compat/freebsd32/freebsd32_ipc.h> 241 #include <compat/freebsd32/freebsd32_proto.h> 242 #include <compat/freebsd32/freebsd32_signal.h> 243 #include <compat/freebsd32/freebsd32_syscall.h> 244 #include <compat/freebsd32/freebsd32_util.h> 245 246 static struct syscall_helper_data sem32_syscalls[] = { 247 SYSCALL32_INIT_HELPER(freebsd32_semctl), 248 SYSCALL32_INIT_HELPER_COMPAT(semget), 249 SYSCALL32_INIT_HELPER_COMPAT(semop), 250 SYSCALL32_INIT_HELPER(freebsd32_semsys), 251 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 252 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 253 SYSCALL32_INIT_HELPER(freebsd7_freebsd32_semctl), 254 #endif 255 SYSCALL_INIT_LAST 256 }; 257 #endif 258 259 static int 260 seminit(void) 261 { 262 struct prison *pr; 263 void **rsv; 264 int i, error; 265 osd_method_t methods[PR_MAXMETHOD] = { 266 [PR_METHOD_CHECK] = sem_prison_check, 267 [PR_METHOD_SET] = sem_prison_set, 268 [PR_METHOD_GET] = sem_prison_get, 269 [PR_METHOD_REMOVE] = sem_prison_remove, 270 }; 271 272 sem = malloc(sizeof(struct sem) * seminfo.semmns, M_SEM, M_WAITOK); 273 sema = malloc(sizeof(struct semid_kernel) * seminfo.semmni, M_SEM, 274 M_WAITOK); 275 sema_mtx = malloc(sizeof(struct mtx) * seminfo.semmni, M_SEM, 276 M_WAITOK | M_ZERO); 277 semu = malloc(seminfo.semmnu * seminfo.semusz, M_SEM, M_WAITOK); 278 279 for (i = 0; i < seminfo.semmni; i++) { 280 sema[i].u.sem_base = 0; 281 sema[i].u.sem_perm.mode = 0; 282 sema[i].u.sem_perm.seq = 0; 283 #ifdef MAC 284 mac_sysvsem_init(&sema[i]); 285 #endif 286 } 287 for (i = 0; i < seminfo.semmni; i++) 288 mtx_init(&sema_mtx[i], "semid", NULL, MTX_DEF); 289 LIST_INIT(&semu_free_list); 290 for (i = 0; i < seminfo.semmnu; i++) { 291 struct sem_undo *suptr = SEMU(i); 292 suptr->un_proc = NULL; 293 LIST_INSERT_HEAD(&semu_free_list, suptr, un_next); 294 } 295 LIST_INIT(&semu_list); 296 mtx_init(&sem_mtx, "sem", NULL, MTX_DEF); 297 mtx_init(&sem_undo_mtx, "semu", NULL, MTX_DEF); 298 semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL, 299 EVENTHANDLER_PRI_ANY); 300 301 /* Set current prisons according to their allow.sysvipc. */ 302 sem_prison_slot = osd_jail_register(NULL, methods); 303 rsv = osd_reserve(sem_prison_slot); 304 prison_lock(&prison0); 305 (void)osd_jail_set_reserved(&prison0, sem_prison_slot, rsv, &prison0); 306 prison_unlock(&prison0); 307 rsv = NULL; 308 sx_slock(&allprison_lock); 309 TAILQ_FOREACH(pr, &allprison, pr_list) { 310 if (rsv == NULL) 311 rsv = osd_reserve(sem_prison_slot); 312 prison_lock(pr); 313 if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) { 314 (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv, 315 &prison0); 316 rsv = NULL; 317 } 318 prison_unlock(pr); 319 } 320 if (rsv != NULL) 321 osd_free_reserved(rsv); 322 sx_sunlock(&allprison_lock); 323 324 error = syscall_helper_register(sem_syscalls, SY_THR_STATIC_KLD); 325 if (error != 0) 326 return (error); 327 #ifdef COMPAT_FREEBSD32 328 error = syscall32_helper_register(sem32_syscalls, SY_THR_STATIC_KLD); 329 if (error != 0) 330 return (error); 331 #endif 332 return (0); 333 } 334 335 static int 336 semunload(void) 337 { 338 int i; 339 340 /* XXXKIB */ 341 if (semtot != 0) 342 return (EBUSY); 343 344 #ifdef COMPAT_FREEBSD32 345 syscall32_helper_unregister(sem32_syscalls); 346 #endif 347 syscall_helper_unregister(sem_syscalls); 348 EVENTHANDLER_DEREGISTER(process_exit, semexit_tag); 349 if (sem_prison_slot != 0) 350 osd_jail_deregister(sem_prison_slot); 351 #ifdef MAC 352 for (i = 0; i < seminfo.semmni; i++) 353 mac_sysvsem_destroy(&sema[i]); 354 #endif 355 free(sem, M_SEM); 356 free(sema, M_SEM); 357 free(semu, M_SEM); 358 for (i = 0; i < seminfo.semmni; i++) 359 mtx_destroy(&sema_mtx[i]); 360 free(sema_mtx, M_SEM); 361 mtx_destroy(&sem_mtx); 362 mtx_destroy(&sem_undo_mtx); 363 return (0); 364 } 365 366 static int 367 sysvsem_modload(struct module *module, int cmd, void *arg) 368 { 369 int error = 0; 370 371 switch (cmd) { 372 case MOD_LOAD: 373 error = seminit(); 374 if (error != 0) 375 semunload(); 376 break; 377 case MOD_UNLOAD: 378 error = semunload(); 379 break; 380 case MOD_SHUTDOWN: 381 break; 382 default: 383 error = EINVAL; 384 break; 385 } 386 return (error); 387 } 388 389 static moduledata_t sysvsem_mod = { 390 "sysvsem", 391 &sysvsem_modload, 392 NULL 393 }; 394 395 DECLARE_MODULE(sysvsem, sysvsem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST); 396 MODULE_VERSION(sysvsem, 1); 397 398 /* 399 * Allocate a new sem_undo structure for a process 400 * (returns ptr to structure or NULL if no more room) 401 */ 402 403 static struct sem_undo * 404 semu_alloc(struct thread *td) 405 { 406 struct sem_undo *suptr; 407 408 SEMUNDO_LOCKASSERT(MA_OWNED); 409 if ((suptr = LIST_FIRST(&semu_free_list)) == NULL) 410 return (NULL); 411 LIST_REMOVE(suptr, un_next); 412 LIST_INSERT_HEAD(&semu_list, suptr, un_next); 413 suptr->un_cnt = 0; 414 suptr->un_proc = td->td_proc; 415 return (suptr); 416 } 417 418 static int 419 semu_try_free(struct sem_undo *suptr) 420 { 421 422 SEMUNDO_LOCKASSERT(MA_OWNED); 423 424 if (suptr->un_cnt != 0) 425 return (0); 426 LIST_REMOVE(suptr, un_next); 427 LIST_INSERT_HEAD(&semu_free_list, suptr, un_next); 428 return (1); 429 } 430 431 /* 432 * Adjust a particular entry for a particular proc 433 */ 434 435 static int 436 semundo_adjust(struct thread *td, struct sem_undo **supptr, int semid, 437 int semseq, int semnum, int adjval) 438 { 439 struct proc *p = td->td_proc; 440 struct sem_undo *suptr; 441 struct undo *sunptr; 442 int i; 443 444 SEMUNDO_LOCKASSERT(MA_OWNED); 445 /* Look for and remember the sem_undo if the caller doesn't provide 446 it */ 447 448 suptr = *supptr; 449 if (suptr == NULL) { 450 LIST_FOREACH(suptr, &semu_list, un_next) { 451 if (suptr->un_proc == p) { 452 *supptr = suptr; 453 break; 454 } 455 } 456 if (suptr == NULL) { 457 if (adjval == 0) 458 return(0); 459 suptr = semu_alloc(td); 460 if (suptr == NULL) 461 return (ENOSPC); 462 *supptr = suptr; 463 } 464 } 465 466 /* 467 * Look for the requested entry and adjust it (delete if adjval becomes 468 * 0). 469 */ 470 sunptr = &suptr->un_ent[0]; 471 for (i = 0; i < suptr->un_cnt; i++, sunptr++) { 472 if (sunptr->un_id != semid || sunptr->un_num != semnum) 473 continue; 474 if (adjval != 0) { 475 adjval += sunptr->un_adjval; 476 if (adjval > seminfo.semaem || adjval < -seminfo.semaem) 477 return (ERANGE); 478 } 479 sunptr->un_adjval = adjval; 480 if (sunptr->un_adjval == 0) { 481 suptr->un_cnt--; 482 if (i < suptr->un_cnt) 483 suptr->un_ent[i] = 484 suptr->un_ent[suptr->un_cnt]; 485 if (suptr->un_cnt == 0) 486 semu_try_free(suptr); 487 } 488 return (0); 489 } 490 491 /* Didn't find the right entry - create it */ 492 if (adjval == 0) 493 return (0); 494 if (adjval > seminfo.semaem || adjval < -seminfo.semaem) 495 return (ERANGE); 496 if (suptr->un_cnt != seminfo.semume) { 497 sunptr = &suptr->un_ent[suptr->un_cnt]; 498 suptr->un_cnt++; 499 sunptr->un_adjval = adjval; 500 sunptr->un_id = semid; 501 sunptr->un_num = semnum; 502 sunptr->un_seq = semseq; 503 } else 504 return (EINVAL); 505 return (0); 506 } 507 508 static void 509 semundo_clear(int semid, int semnum) 510 { 511 struct sem_undo *suptr, *suptr1; 512 struct undo *sunptr; 513 int i; 514 515 SEMUNDO_LOCKASSERT(MA_OWNED); 516 LIST_FOREACH_SAFE(suptr, &semu_list, un_next, suptr1) { 517 sunptr = &suptr->un_ent[0]; 518 for (i = 0; i < suptr->un_cnt; i++, sunptr++) { 519 if (sunptr->un_id != semid) 520 continue; 521 if (semnum == -1 || sunptr->un_num == semnum) { 522 suptr->un_cnt--; 523 if (i < suptr->un_cnt) { 524 suptr->un_ent[i] = 525 suptr->un_ent[suptr->un_cnt]; 526 continue; 527 } 528 semu_try_free(suptr); 529 } 530 if (semnum != -1) 531 break; 532 } 533 } 534 } 535 536 static int 537 semvalid(int semid, struct prison *rpr, struct semid_kernel *semakptr) 538 { 539 540 return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || 541 semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) || 542 sem_prison_cansee(rpr, semakptr) ? EINVAL : 0); 543 } 544 545 static void 546 sem_remove(int semidx, struct ucred *cred) 547 { 548 struct semid_kernel *semakptr; 549 int i; 550 551 KASSERT(semidx >= 0 && semidx < seminfo.semmni, 552 ("semidx out of bounds")); 553 semakptr = &sema[semidx]; 554 semakptr->u.sem_perm.cuid = cred ? cred->cr_uid : 0; 555 semakptr->u.sem_perm.uid = cred ? cred->cr_uid : 0; 556 semakptr->u.sem_perm.mode = 0; 557 racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems); 558 crfree(semakptr->cred); 559 semakptr->cred = NULL; 560 SEMUNDO_LOCK(); 561 semundo_clear(semidx, -1); 562 SEMUNDO_UNLOCK(); 563 #ifdef MAC 564 mac_sysvsem_cleanup(semakptr); 565 #endif 566 wakeup(semakptr); 567 for (i = 0; i < seminfo.semmni; i++) { 568 if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && 569 sema[i].u.sem_base > semakptr->u.sem_base) 570 mtx_lock_flags(&sema_mtx[i], LOP_DUPOK); 571 } 572 for (i = semakptr->u.sem_base - sem; i < semtot; i++) 573 sem[i] = sem[i + semakptr->u.sem_nsems]; 574 for (i = 0; i < seminfo.semmni; i++) { 575 if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && 576 sema[i].u.sem_base > semakptr->u.sem_base) { 577 sema[i].u.sem_base -= semakptr->u.sem_nsems; 578 mtx_unlock(&sema_mtx[i]); 579 } 580 } 581 semtot -= semakptr->u.sem_nsems; 582 } 583 584 static struct prison * 585 sem_find_prison(struct ucred *cred) 586 { 587 struct prison *pr, *rpr; 588 589 pr = cred->cr_prison; 590 prison_lock(pr); 591 rpr = osd_jail_get(pr, sem_prison_slot); 592 prison_unlock(pr); 593 return rpr; 594 } 595 596 static int 597 sem_prison_cansee(struct prison *rpr, struct semid_kernel *semakptr) 598 { 599 600 if (semakptr->cred == NULL || 601 !(rpr == semakptr->cred->cr_prison || 602 prison_ischild(rpr, semakptr->cred->cr_prison))) 603 return (EINVAL); 604 return (0); 605 } 606 607 /* 608 * Note that the user-mode half of this passes a union, not a pointer. 609 */ 610 #ifndef _SYS_SYSPROTO_H_ 611 struct __semctl_args { 612 int semid; 613 int semnum; 614 int cmd; 615 union semun *arg; 616 }; 617 #endif 618 int 619 sys___semctl(struct thread *td, struct __semctl_args *uap) 620 { 621 struct semid_ds dsbuf; 622 union semun arg, semun; 623 register_t rval; 624 int error; 625 626 switch (uap->cmd) { 627 case SEM_STAT: 628 case IPC_SET: 629 case IPC_STAT: 630 case GETALL: 631 case SETVAL: 632 case SETALL: 633 error = copyin(uap->arg, &arg, sizeof(arg)); 634 if (error) 635 return (error); 636 break; 637 } 638 639 switch (uap->cmd) { 640 case SEM_STAT: 641 case IPC_STAT: 642 semun.buf = &dsbuf; 643 break; 644 case IPC_SET: 645 error = copyin(arg.buf, &dsbuf, sizeof(dsbuf)); 646 if (error) 647 return (error); 648 semun.buf = &dsbuf; 649 break; 650 case GETALL: 651 case SETALL: 652 semun.array = arg.array; 653 break; 654 case SETVAL: 655 semun.val = arg.val; 656 break; 657 } 658 659 error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, 660 &rval); 661 if (error) 662 return (error); 663 664 switch (uap->cmd) { 665 case SEM_STAT: 666 case IPC_STAT: 667 error = copyout(&dsbuf, arg.buf, sizeof(dsbuf)); 668 break; 669 } 670 671 if (error == 0) 672 td->td_retval[0] = rval; 673 return (error); 674 } 675 676 int 677 kern_semctl(struct thread *td, int semid, int semnum, int cmd, 678 union semun *arg, register_t *rval) 679 { 680 u_short *array; 681 struct ucred *cred = td->td_ucred; 682 int i, error; 683 struct prison *rpr; 684 struct semid_ds *sbuf; 685 struct semid_kernel *semakptr; 686 struct mtx *sema_mtxp; 687 u_short usval, count; 688 int semidx; 689 690 DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n", 691 semid, semnum, cmd, arg)); 692 693 rpr = sem_find_prison(td->td_ucred); 694 if (sem == NULL) 695 return (ENOSYS); 696 697 array = NULL; 698 699 switch(cmd) { 700 case SEM_STAT: 701 /* 702 * For this command we assume semid is an array index 703 * rather than an IPC id. 704 */ 705 if (semid < 0 || semid >= seminfo.semmni) 706 return (EINVAL); 707 semakptr = &sema[semid]; 708 sema_mtxp = &sema_mtx[semid]; 709 mtx_lock(sema_mtxp); 710 if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) { 711 error = EINVAL; 712 goto done2; 713 } 714 if ((error = sem_prison_cansee(rpr, semakptr))) 715 goto done2; 716 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 717 goto done2; 718 #ifdef MAC 719 error = mac_sysvsem_check_semctl(cred, semakptr, cmd); 720 if (error != 0) 721 goto done2; 722 #endif 723 bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds)); 724 if (cred->cr_prison != semakptr->cred->cr_prison) 725 arg->buf->sem_perm.key = IPC_PRIVATE; 726 *rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm); 727 mtx_unlock(sema_mtxp); 728 return (0); 729 } 730 731 semidx = IPCID_TO_IX(semid); 732 if (semidx < 0 || semidx >= seminfo.semmni) 733 return (EINVAL); 734 735 semakptr = &sema[semidx]; 736 sema_mtxp = &sema_mtx[semidx]; 737 if (cmd == IPC_RMID) 738 mtx_lock(&sem_mtx); 739 mtx_lock(sema_mtxp); 740 741 #ifdef MAC 742 error = mac_sysvsem_check_semctl(cred, semakptr, cmd); 743 if (error != 0) 744 goto done2; 745 #endif 746 747 error = 0; 748 *rval = 0; 749 750 switch (cmd) { 751 case IPC_RMID: 752 if ((error = semvalid(semid, rpr, semakptr)) != 0) 753 goto done2; 754 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M))) 755 goto done2; 756 sem_remove(semidx, cred); 757 break; 758 759 case IPC_SET: 760 if ((error = semvalid(semid, rpr, semakptr)) != 0) 761 goto done2; 762 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M))) 763 goto done2; 764 sbuf = arg->buf; 765 semakptr->u.sem_perm.uid = sbuf->sem_perm.uid; 766 semakptr->u.sem_perm.gid = sbuf->sem_perm.gid; 767 semakptr->u.sem_perm.mode = (semakptr->u.sem_perm.mode & 768 ~0777) | (sbuf->sem_perm.mode & 0777); 769 semakptr->u.sem_ctime = time_second; 770 break; 771 772 case IPC_STAT: 773 if ((error = semvalid(semid, rpr, semakptr)) != 0) 774 goto done2; 775 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 776 goto done2; 777 bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds)); 778 if (cred->cr_prison != semakptr->cred->cr_prison) 779 arg->buf->sem_perm.key = IPC_PRIVATE; 780 break; 781 782 case GETNCNT: 783 if ((error = semvalid(semid, rpr, semakptr)) != 0) 784 goto done2; 785 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 786 goto done2; 787 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { 788 error = EINVAL; 789 goto done2; 790 } 791 *rval = semakptr->u.sem_base[semnum].semncnt; 792 break; 793 794 case GETPID: 795 if ((error = semvalid(semid, rpr, semakptr)) != 0) 796 goto done2; 797 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 798 goto done2; 799 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { 800 error = EINVAL; 801 goto done2; 802 } 803 *rval = semakptr->u.sem_base[semnum].sempid; 804 break; 805 806 case GETVAL: 807 if ((error = semvalid(semid, rpr, semakptr)) != 0) 808 goto done2; 809 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 810 goto done2; 811 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { 812 error = EINVAL; 813 goto done2; 814 } 815 *rval = semakptr->u.sem_base[semnum].semval; 816 break; 817 818 case GETALL: 819 /* 820 * Unfortunately, callers of this function don't know 821 * in advance how many semaphores are in this set. 822 * While we could just allocate the maximum size array 823 * and pass the actual size back to the caller, that 824 * won't work for SETALL since we can't copyin() more 825 * data than the user specified as we may return a 826 * spurious EFAULT. 827 * 828 * Note that the number of semaphores in a set is 829 * fixed for the life of that set. The only way that 830 * the 'count' could change while are blocked in 831 * malloc() is if this semaphore set were destroyed 832 * and a new one created with the same index. 833 * However, semvalid() will catch that due to the 834 * sequence number unless exactly 0x8000 (or a 835 * multiple thereof) semaphore sets for the same index 836 * are created and destroyed while we are in malloc! 837 * 838 */ 839 count = semakptr->u.sem_nsems; 840 mtx_unlock(sema_mtxp); 841 array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK); 842 mtx_lock(sema_mtxp); 843 if ((error = semvalid(semid, rpr, semakptr)) != 0) 844 goto done2; 845 KASSERT(count == semakptr->u.sem_nsems, ("nsems changed")); 846 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 847 goto done2; 848 for (i = 0; i < semakptr->u.sem_nsems; i++) 849 array[i] = semakptr->u.sem_base[i].semval; 850 mtx_unlock(sema_mtxp); 851 error = copyout(array, arg->array, count * sizeof(*array)); 852 mtx_lock(sema_mtxp); 853 break; 854 855 case GETZCNT: 856 if ((error = semvalid(semid, rpr, semakptr)) != 0) 857 goto done2; 858 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) 859 goto done2; 860 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { 861 error = EINVAL; 862 goto done2; 863 } 864 *rval = semakptr->u.sem_base[semnum].semzcnt; 865 break; 866 867 case SETVAL: 868 if ((error = semvalid(semid, rpr, semakptr)) != 0) 869 goto done2; 870 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W))) 871 goto done2; 872 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { 873 error = EINVAL; 874 goto done2; 875 } 876 if (arg->val < 0 || arg->val > seminfo.semvmx) { 877 error = ERANGE; 878 goto done2; 879 } 880 semakptr->u.sem_base[semnum].semval = arg->val; 881 SEMUNDO_LOCK(); 882 semundo_clear(semidx, semnum); 883 SEMUNDO_UNLOCK(); 884 wakeup(semakptr); 885 break; 886 887 case SETALL: 888 /* 889 * See comment on GETALL for why 'count' shouldn't change 890 * and why we require a userland buffer. 891 */ 892 count = semakptr->u.sem_nsems; 893 mtx_unlock(sema_mtxp); 894 array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK); 895 error = copyin(arg->array, array, count * sizeof(*array)); 896 mtx_lock(sema_mtxp); 897 if (error) 898 break; 899 if ((error = semvalid(semid, rpr, semakptr)) != 0) 900 goto done2; 901 KASSERT(count == semakptr->u.sem_nsems, ("nsems changed")); 902 if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W))) 903 goto done2; 904 for (i = 0; i < semakptr->u.sem_nsems; i++) { 905 usval = array[i]; 906 if (usval > seminfo.semvmx) { 907 error = ERANGE; 908 break; 909 } 910 semakptr->u.sem_base[i].semval = usval; 911 } 912 SEMUNDO_LOCK(); 913 semundo_clear(semidx, -1); 914 SEMUNDO_UNLOCK(); 915 wakeup(semakptr); 916 break; 917 918 default: 919 error = EINVAL; 920 break; 921 } 922 923 done2: 924 mtx_unlock(sema_mtxp); 925 if (cmd == IPC_RMID) 926 mtx_unlock(&sem_mtx); 927 if (array != NULL) 928 free(array, M_TEMP); 929 return(error); 930 } 931 932 #ifndef _SYS_SYSPROTO_H_ 933 struct semget_args { 934 key_t key; 935 int nsems; 936 int semflg; 937 }; 938 #endif 939 int 940 sys_semget(struct thread *td, struct semget_args *uap) 941 { 942 int semid, error = 0; 943 int key = uap->key; 944 int nsems = uap->nsems; 945 int semflg = uap->semflg; 946 struct ucred *cred = td->td_ucred; 947 948 DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg)); 949 950 if (sem_find_prison(cred) == NULL) 951 return (ENOSYS); 952 953 mtx_lock(&sem_mtx); 954 if (key != IPC_PRIVATE) { 955 for (semid = 0; semid < seminfo.semmni; semid++) { 956 if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) && 957 sema[semid].cred != NULL && 958 sema[semid].cred->cr_prison == cred->cr_prison && 959 sema[semid].u.sem_perm.key == key) 960 break; 961 } 962 if (semid < seminfo.semmni) { 963 DPRINTF(("found public key\n")); 964 if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) { 965 DPRINTF(("not exclusive\n")); 966 error = EEXIST; 967 goto done2; 968 } 969 if ((error = ipcperm(td, &sema[semid].u.sem_perm, 970 semflg & 0700))) { 971 goto done2; 972 } 973 if (nsems > 0 && sema[semid].u.sem_nsems < nsems) { 974 DPRINTF(("too small\n")); 975 error = EINVAL; 976 goto done2; 977 } 978 #ifdef MAC 979 error = mac_sysvsem_check_semget(cred, &sema[semid]); 980 if (error != 0) 981 goto done2; 982 #endif 983 goto found; 984 } 985 } 986 987 DPRINTF(("need to allocate the semid_kernel\n")); 988 if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) { 989 if (nsems <= 0 || nsems > seminfo.semmsl) { 990 DPRINTF(("nsems out of range (0<%d<=%d)\n", nsems, 991 seminfo.semmsl)); 992 error = EINVAL; 993 goto done2; 994 } 995 if (nsems > seminfo.semmns - semtot) { 996 DPRINTF(( 997 "not enough semaphores left (need %d, got %d)\n", 998 nsems, seminfo.semmns - semtot)); 999 error = ENOSPC; 1000 goto done2; 1001 } 1002 for (semid = 0; semid < seminfo.semmni; semid++) { 1003 if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0) 1004 break; 1005 } 1006 if (semid == seminfo.semmni) { 1007 DPRINTF(("no more semid_kernel's available\n")); 1008 error = ENOSPC; 1009 goto done2; 1010 } 1011 #ifdef RACCT 1012 if (racct_enable) { 1013 PROC_LOCK(td->td_proc); 1014 error = racct_add(td->td_proc, RACCT_NSEM, nsems); 1015 PROC_UNLOCK(td->td_proc); 1016 if (error != 0) { 1017 error = ENOSPC; 1018 goto done2; 1019 } 1020 } 1021 #endif 1022 DPRINTF(("semid %d is available\n", semid)); 1023 mtx_lock(&sema_mtx[semid]); 1024 KASSERT((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0, 1025 ("Lost semaphore %d", semid)); 1026 sema[semid].u.sem_perm.key = key; 1027 sema[semid].u.sem_perm.cuid = cred->cr_uid; 1028 sema[semid].u.sem_perm.uid = cred->cr_uid; 1029 sema[semid].u.sem_perm.cgid = cred->cr_gid; 1030 sema[semid].u.sem_perm.gid = cred->cr_gid; 1031 sema[semid].u.sem_perm.mode = (semflg & 0777) | SEM_ALLOC; 1032 sema[semid].cred = crhold(cred); 1033 sema[semid].u.sem_perm.seq = 1034 (sema[semid].u.sem_perm.seq + 1) & 0x7fff; 1035 sema[semid].u.sem_nsems = nsems; 1036 sema[semid].u.sem_otime = 0; 1037 sema[semid].u.sem_ctime = time_second; 1038 sema[semid].u.sem_base = &sem[semtot]; 1039 semtot += nsems; 1040 bzero(sema[semid].u.sem_base, 1041 sizeof(sema[semid].u.sem_base[0])*nsems); 1042 #ifdef MAC 1043 mac_sysvsem_create(cred, &sema[semid]); 1044 #endif 1045 mtx_unlock(&sema_mtx[semid]); 1046 DPRINTF(("sembase = %p, next = %p\n", 1047 sema[semid].u.sem_base, &sem[semtot])); 1048 } else { 1049 DPRINTF(("didn't find it and wasn't asked to create it\n")); 1050 error = ENOENT; 1051 goto done2; 1052 } 1053 1054 found: 1055 td->td_retval[0] = IXSEQ_TO_IPCID(semid, sema[semid].u.sem_perm); 1056 done2: 1057 mtx_unlock(&sem_mtx); 1058 return (error); 1059 } 1060 1061 #ifndef _SYS_SYSPROTO_H_ 1062 struct semop_args { 1063 int semid; 1064 struct sembuf *sops; 1065 size_t nsops; 1066 }; 1067 #endif 1068 int 1069 sys_semop(struct thread *td, struct semop_args *uap) 1070 { 1071 #define SMALL_SOPS 8 1072 struct sembuf small_sops[SMALL_SOPS]; 1073 int semid = uap->semid; 1074 size_t nsops = uap->nsops; 1075 struct prison *rpr; 1076 struct sembuf *sops; 1077 struct semid_kernel *semakptr; 1078 struct sembuf *sopptr = NULL; 1079 struct sem *semptr = NULL; 1080 struct sem_undo *suptr; 1081 struct mtx *sema_mtxp; 1082 size_t i, j, k; 1083 int error; 1084 int do_wakeup, do_undos; 1085 unsigned short seq; 1086 1087 #ifdef SEM_DEBUG 1088 sops = NULL; 1089 #endif 1090 DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops)); 1091 1092 rpr = sem_find_prison(td->td_ucred); 1093 if (sem == NULL) 1094 return (ENOSYS); 1095 1096 semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ 1097 1098 if (semid < 0 || semid >= seminfo.semmni) 1099 return (EINVAL); 1100 1101 /* Allocate memory for sem_ops */ 1102 if (nsops <= SMALL_SOPS) 1103 sops = small_sops; 1104 else if (nsops > seminfo.semopm) { 1105 DPRINTF(("too many sops (max=%d, nsops=%d)\n", seminfo.semopm, 1106 nsops)); 1107 return (E2BIG); 1108 } else { 1109 #ifdef RACCT 1110 if (racct_enable) { 1111 PROC_LOCK(td->td_proc); 1112 if (nsops > 1113 racct_get_available(td->td_proc, RACCT_NSEMOP)) { 1114 PROC_UNLOCK(td->td_proc); 1115 return (E2BIG); 1116 } 1117 PROC_UNLOCK(td->td_proc); 1118 } 1119 #endif 1120 1121 sops = malloc(nsops * sizeof(*sops), M_TEMP, M_WAITOK); 1122 } 1123 if ((error = copyin(uap->sops, sops, nsops * sizeof(sops[0]))) != 0) { 1124 DPRINTF(("error = %d from copyin(%p, %p, %d)\n", error, 1125 uap->sops, sops, nsops * sizeof(sops[0]))); 1126 if (sops != small_sops) 1127 free(sops, M_SEM); 1128 return (error); 1129 } 1130 1131 semakptr = &sema[semid]; 1132 sema_mtxp = &sema_mtx[semid]; 1133 mtx_lock(sema_mtxp); 1134 if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) { 1135 error = EINVAL; 1136 goto done2; 1137 } 1138 seq = semakptr->u.sem_perm.seq; 1139 if (seq != IPCID_TO_SEQ(uap->semid)) { 1140 error = EINVAL; 1141 goto done2; 1142 } 1143 if ((error = sem_prison_cansee(rpr, semakptr)) != 0) 1144 goto done2; 1145 /* 1146 * Initial pass through sops to see what permissions are needed. 1147 * Also perform any checks that don't need repeating on each 1148 * attempt to satisfy the request vector. 1149 */ 1150 j = 0; /* permission needed */ 1151 do_undos = 0; 1152 for (i = 0; i < nsops; i++) { 1153 sopptr = &sops[i]; 1154 if (sopptr->sem_num >= semakptr->u.sem_nsems) { 1155 error = EFBIG; 1156 goto done2; 1157 } 1158 if (sopptr->sem_flg & SEM_UNDO && sopptr->sem_op != 0) 1159 do_undos = 1; 1160 j |= (sopptr->sem_op == 0) ? SEM_R : SEM_A; 1161 } 1162 1163 if ((error = ipcperm(td, &semakptr->u.sem_perm, j))) { 1164 DPRINTF(("error = %d from ipaccess\n", error)); 1165 goto done2; 1166 } 1167 #ifdef MAC 1168 error = mac_sysvsem_check_semop(td->td_ucred, semakptr, j); 1169 if (error != 0) 1170 goto done2; 1171 #endif 1172 1173 /* 1174 * Loop trying to satisfy the vector of requests. 1175 * If we reach a point where we must wait, any requests already 1176 * performed are rolled back and we go to sleep until some other 1177 * process wakes us up. At this point, we start all over again. 1178 * 1179 * This ensures that from the perspective of other tasks, a set 1180 * of requests is atomic (never partially satisfied). 1181 */ 1182 for (;;) { 1183 do_wakeup = 0; 1184 error = 0; /* error return if necessary */ 1185 1186 for (i = 0; i < nsops; i++) { 1187 sopptr = &sops[i]; 1188 semptr = &semakptr->u.sem_base[sopptr->sem_num]; 1189 1190 DPRINTF(( 1191 "semop: semakptr=%p, sem_base=%p, " 1192 "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n", 1193 semakptr, semakptr->u.sem_base, semptr, 1194 sopptr->sem_num, semptr->semval, sopptr->sem_op, 1195 (sopptr->sem_flg & IPC_NOWAIT) ? 1196 "nowait" : "wait")); 1197 1198 if (sopptr->sem_op < 0) { 1199 if (semptr->semval + sopptr->sem_op < 0) { 1200 DPRINTF(("semop: can't do it now\n")); 1201 break; 1202 } else { 1203 semptr->semval += sopptr->sem_op; 1204 if (semptr->semval == 0 && 1205 semptr->semzcnt > 0) 1206 do_wakeup = 1; 1207 } 1208 } else if (sopptr->sem_op == 0) { 1209 if (semptr->semval != 0) { 1210 DPRINTF(("semop: not zero now\n")); 1211 break; 1212 } 1213 } else if (semptr->semval + sopptr->sem_op > 1214 seminfo.semvmx) { 1215 error = ERANGE; 1216 break; 1217 } else { 1218 if (semptr->semncnt > 0) 1219 do_wakeup = 1; 1220 semptr->semval += sopptr->sem_op; 1221 } 1222 } 1223 1224 /* 1225 * Did we get through the entire vector? 1226 */ 1227 if (i >= nsops) 1228 goto done; 1229 1230 /* 1231 * No ... rollback anything that we've already done 1232 */ 1233 DPRINTF(("semop: rollback 0 through %d\n", i-1)); 1234 for (j = 0; j < i; j++) 1235 semakptr->u.sem_base[sops[j].sem_num].semval -= 1236 sops[j].sem_op; 1237 1238 /* If we detected an error, return it */ 1239 if (error != 0) 1240 goto done2; 1241 1242 /* 1243 * If the request that we couldn't satisfy has the 1244 * NOWAIT flag set then return with EAGAIN. 1245 */ 1246 if (sopptr->sem_flg & IPC_NOWAIT) { 1247 error = EAGAIN; 1248 goto done2; 1249 } 1250 1251 if (sopptr->sem_op == 0) 1252 semptr->semzcnt++; 1253 else 1254 semptr->semncnt++; 1255 1256 DPRINTF(("semop: good night!\n")); 1257 error = msleep(semakptr, sema_mtxp, (PZERO - 4) | PCATCH, 1258 "semwait", 0); 1259 DPRINTF(("semop: good morning (error=%d)!\n", error)); 1260 /* return code is checked below, after sem[nz]cnt-- */ 1261 1262 /* 1263 * Make sure that the semaphore still exists 1264 */ 1265 seq = semakptr->u.sem_perm.seq; 1266 if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || 1267 seq != IPCID_TO_SEQ(uap->semid)) { 1268 error = EIDRM; 1269 goto done2; 1270 } 1271 1272 /* 1273 * Renew the semaphore's pointer after wakeup since 1274 * during msleep sem_base may have been modified and semptr 1275 * is not valid any more 1276 */ 1277 semptr = &semakptr->u.sem_base[sopptr->sem_num]; 1278 1279 /* 1280 * The semaphore is still alive. Readjust the count of 1281 * waiting processes. 1282 */ 1283 if (sopptr->sem_op == 0) 1284 semptr->semzcnt--; 1285 else 1286 semptr->semncnt--; 1287 1288 /* 1289 * Is it really morning, or was our sleep interrupted? 1290 * (Delayed check of msleep() return code because we 1291 * need to decrement sem[nz]cnt either way.) 1292 */ 1293 if (error != 0) { 1294 error = EINTR; 1295 goto done2; 1296 } 1297 DPRINTF(("semop: good morning!\n")); 1298 } 1299 1300 done: 1301 /* 1302 * Process any SEM_UNDO requests. 1303 */ 1304 if (do_undos) { 1305 SEMUNDO_LOCK(); 1306 suptr = NULL; 1307 for (i = 0; i < nsops; i++) { 1308 /* 1309 * We only need to deal with SEM_UNDO's for non-zero 1310 * op's. 1311 */ 1312 int adjval; 1313 1314 if ((sops[i].sem_flg & SEM_UNDO) == 0) 1315 continue; 1316 adjval = sops[i].sem_op; 1317 if (adjval == 0) 1318 continue; 1319 error = semundo_adjust(td, &suptr, semid, seq, 1320 sops[i].sem_num, -adjval); 1321 if (error == 0) 1322 continue; 1323 1324 /* 1325 * Oh-Oh! We ran out of either sem_undo's or undo's. 1326 * Rollback the adjustments to this point and then 1327 * rollback the semaphore ups and down so we can return 1328 * with an error with all structures restored. We 1329 * rollback the undo's in the exact reverse order that 1330 * we applied them. This guarantees that we won't run 1331 * out of space as we roll things back out. 1332 */ 1333 for (j = 0; j < i; j++) { 1334 k = i - j - 1; 1335 if ((sops[k].sem_flg & SEM_UNDO) == 0) 1336 continue; 1337 adjval = sops[k].sem_op; 1338 if (adjval == 0) 1339 continue; 1340 if (semundo_adjust(td, &suptr, semid, seq, 1341 sops[k].sem_num, adjval) != 0) 1342 panic("semop - can't undo undos"); 1343 } 1344 1345 for (j = 0; j < nsops; j++) 1346 semakptr->u.sem_base[sops[j].sem_num].semval -= 1347 sops[j].sem_op; 1348 1349 DPRINTF(("error = %d from semundo_adjust\n", error)); 1350 SEMUNDO_UNLOCK(); 1351 goto done2; 1352 } /* loop through the sops */ 1353 SEMUNDO_UNLOCK(); 1354 } /* if (do_undos) */ 1355 1356 /* We're definitely done - set the sempid's and time */ 1357 for (i = 0; i < nsops; i++) { 1358 sopptr = &sops[i]; 1359 semptr = &semakptr->u.sem_base[sopptr->sem_num]; 1360 semptr->sempid = td->td_proc->p_pid; 1361 } 1362 semakptr->u.sem_otime = time_second; 1363 1364 /* 1365 * Do a wakeup if any semaphore was up'd whilst something was 1366 * sleeping on it. 1367 */ 1368 if (do_wakeup) { 1369 DPRINTF(("semop: doing wakeup\n")); 1370 wakeup(semakptr); 1371 DPRINTF(("semop: back from wakeup\n")); 1372 } 1373 DPRINTF(("semop: done\n")); 1374 td->td_retval[0] = 0; 1375 done2: 1376 mtx_unlock(sema_mtxp); 1377 if (sops != small_sops) 1378 free(sops, M_SEM); 1379 return (error); 1380 } 1381 1382 /* 1383 * Go through the undo structures for this process and apply the adjustments to 1384 * semaphores. 1385 */ 1386 static void 1387 semexit_myhook(void *arg, struct proc *p) 1388 { 1389 struct sem_undo *suptr; 1390 struct semid_kernel *semakptr; 1391 struct mtx *sema_mtxp; 1392 int semid, semnum, adjval, ix; 1393 unsigned short seq; 1394 1395 /* 1396 * Go through the chain of undo vectors looking for one 1397 * associated with this process. 1398 */ 1399 SEMUNDO_LOCK(); 1400 LIST_FOREACH(suptr, &semu_list, un_next) { 1401 if (suptr->un_proc == p) 1402 break; 1403 } 1404 if (suptr == NULL) { 1405 SEMUNDO_UNLOCK(); 1406 return; 1407 } 1408 LIST_REMOVE(suptr, un_next); 1409 1410 DPRINTF(("proc @%p has undo structure with %d entries\n", p, 1411 suptr->un_cnt)); 1412 1413 /* 1414 * If there are any active undo elements then process them. 1415 */ 1416 if (suptr->un_cnt > 0) { 1417 SEMUNDO_UNLOCK(); 1418 for (ix = 0; ix < suptr->un_cnt; ix++) { 1419 semid = suptr->un_ent[ix].un_id; 1420 semnum = suptr->un_ent[ix].un_num; 1421 adjval = suptr->un_ent[ix].un_adjval; 1422 seq = suptr->un_ent[ix].un_seq; 1423 semakptr = &sema[semid]; 1424 sema_mtxp = &sema_mtx[semid]; 1425 1426 mtx_lock(sema_mtxp); 1427 if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || 1428 (semakptr->u.sem_perm.seq != seq)) { 1429 mtx_unlock(sema_mtxp); 1430 continue; 1431 } 1432 if (semnum >= semakptr->u.sem_nsems) 1433 panic("semexit - semnum out of range"); 1434 1435 DPRINTF(( 1436 "semexit: %p id=%d num=%d(adj=%d) ; sem=%d\n", 1437 suptr->un_proc, suptr->un_ent[ix].un_id, 1438 suptr->un_ent[ix].un_num, 1439 suptr->un_ent[ix].un_adjval, 1440 semakptr->u.sem_base[semnum].semval)); 1441 1442 if (adjval < 0 && semakptr->u.sem_base[semnum].semval < 1443 -adjval) 1444 semakptr->u.sem_base[semnum].semval = 0; 1445 else 1446 semakptr->u.sem_base[semnum].semval += adjval; 1447 1448 wakeup(semakptr); 1449 DPRINTF(("semexit: back from wakeup\n")); 1450 mtx_unlock(sema_mtxp); 1451 } 1452 SEMUNDO_LOCK(); 1453 } 1454 1455 /* 1456 * Deallocate the undo vector. 1457 */ 1458 DPRINTF(("removing vector\n")); 1459 suptr->un_proc = NULL; 1460 suptr->un_cnt = 0; 1461 LIST_INSERT_HEAD(&semu_free_list, suptr, un_next); 1462 SEMUNDO_UNLOCK(); 1463 } 1464 1465 static int 1466 sysctl_sema(SYSCTL_HANDLER_ARGS) 1467 { 1468 struct prison *pr, *rpr; 1469 struct semid_kernel tsemak; 1470 int error, i; 1471 1472 pr = req->td->td_ucred->cr_prison; 1473 rpr = sem_find_prison(req->td->td_ucred); 1474 error = 0; 1475 for (i = 0; i < seminfo.semmni; i++) { 1476 mtx_lock(&sema_mtx[i]); 1477 if ((sema[i].u.sem_perm.mode & SEM_ALLOC) == 0 || 1478 rpr == NULL || sem_prison_cansee(rpr, &sema[i]) != 0) 1479 bzero(&tsemak, sizeof(tsemak)); 1480 else { 1481 tsemak = sema[i]; 1482 if (tsemak.cred->cr_prison != pr) 1483 tsemak.u.sem_perm.key = IPC_PRIVATE; 1484 } 1485 mtx_unlock(&sema_mtx[i]); 1486 error = SYSCTL_OUT(req, &tsemak, sizeof(tsemak)); 1487 if (error != 0) 1488 break; 1489 } 1490 return (error); 1491 } 1492 1493 static int 1494 sem_prison_check(void *obj, void *data) 1495 { 1496 struct prison *pr = obj; 1497 struct prison *prpr; 1498 struct vfsoptlist *opts = data; 1499 int error, jsys; 1500 1501 /* 1502 * sysvsem is a jailsys integer. 1503 * It must be "disable" if the parent jail is disabled. 1504 */ 1505 error = vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)); 1506 if (error != ENOENT) { 1507 if (error != 0) 1508 return (error); 1509 switch (jsys) { 1510 case JAIL_SYS_DISABLE: 1511 break; 1512 case JAIL_SYS_NEW: 1513 case JAIL_SYS_INHERIT: 1514 prison_lock(pr->pr_parent); 1515 prpr = osd_jail_get(pr->pr_parent, sem_prison_slot); 1516 prison_unlock(pr->pr_parent); 1517 if (prpr == NULL) 1518 return (EPERM); 1519 break; 1520 default: 1521 return (EINVAL); 1522 } 1523 } 1524 1525 return (0); 1526 } 1527 1528 static int 1529 sem_prison_set(void *obj, void *data) 1530 { 1531 struct prison *pr = obj; 1532 struct prison *tpr, *orpr, *nrpr, *trpr; 1533 struct vfsoptlist *opts = data; 1534 void *rsv; 1535 int jsys, descend; 1536 1537 /* 1538 * sysvsem controls which jail is the root of the associated sems (this 1539 * jail or same as the parent), or if the feature is available at all. 1540 */ 1541 if (vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)) == ENOENT) 1542 jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) 1543 ? JAIL_SYS_INHERIT 1544 : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) 1545 ? JAIL_SYS_DISABLE 1546 : -1; 1547 if (jsys == JAIL_SYS_DISABLE) { 1548 prison_lock(pr); 1549 orpr = osd_jail_get(pr, sem_prison_slot); 1550 if (orpr != NULL) 1551 osd_jail_del(pr, sem_prison_slot); 1552 prison_unlock(pr); 1553 if (orpr != NULL) { 1554 if (orpr == pr) 1555 sem_prison_cleanup(pr); 1556 /* Disable all child jails as well. */ 1557 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1558 prison_lock(tpr); 1559 trpr = osd_jail_get(tpr, sem_prison_slot); 1560 if (trpr != NULL) { 1561 osd_jail_del(tpr, sem_prison_slot); 1562 prison_unlock(tpr); 1563 if (trpr == tpr) 1564 sem_prison_cleanup(tpr); 1565 } else { 1566 prison_unlock(tpr); 1567 descend = 0; 1568 } 1569 } 1570 } 1571 } else if (jsys != -1) { 1572 if (jsys == JAIL_SYS_NEW) 1573 nrpr = pr; 1574 else { 1575 prison_lock(pr->pr_parent); 1576 nrpr = osd_jail_get(pr->pr_parent, sem_prison_slot); 1577 prison_unlock(pr->pr_parent); 1578 } 1579 rsv = osd_reserve(sem_prison_slot); 1580 prison_lock(pr); 1581 orpr = osd_jail_get(pr, sem_prison_slot); 1582 if (orpr != nrpr) 1583 (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv, 1584 nrpr); 1585 else 1586 osd_free_reserved(rsv); 1587 prison_unlock(pr); 1588 if (orpr != nrpr) { 1589 if (orpr == pr) 1590 sem_prison_cleanup(pr); 1591 if (orpr != NULL) { 1592 /* Change child jails matching the old root, */ 1593 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1594 prison_lock(tpr); 1595 trpr = osd_jail_get(tpr, 1596 sem_prison_slot); 1597 if (trpr == orpr) { 1598 (void)osd_jail_set(tpr, 1599 sem_prison_slot, nrpr); 1600 prison_unlock(tpr); 1601 if (trpr == tpr) 1602 sem_prison_cleanup(tpr); 1603 } else { 1604 prison_unlock(tpr); 1605 descend = 0; 1606 } 1607 } 1608 } 1609 } 1610 } 1611 1612 return (0); 1613 } 1614 1615 static int 1616 sem_prison_get(void *obj, void *data) 1617 { 1618 struct prison *pr = obj; 1619 struct prison *rpr; 1620 struct vfsoptlist *opts = data; 1621 int error, jsys; 1622 1623 /* Set sysvsem based on the jail's root prison. */ 1624 prison_lock(pr); 1625 rpr = osd_jail_get(pr, sem_prison_slot); 1626 prison_unlock(pr); 1627 jsys = rpr == NULL ? JAIL_SYS_DISABLE 1628 : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 1629 error = vfs_setopt(opts, "sysvsem", &jsys, sizeof(jsys)); 1630 if (error == ENOENT) 1631 error = 0; 1632 return (error); 1633 } 1634 1635 static int 1636 sem_prison_remove(void *obj, void *data __unused) 1637 { 1638 struct prison *pr = obj; 1639 struct prison *rpr; 1640 1641 prison_lock(pr); 1642 rpr = osd_jail_get(pr, sem_prison_slot); 1643 prison_unlock(pr); 1644 if (rpr == pr) 1645 sem_prison_cleanup(pr); 1646 return (0); 1647 } 1648 1649 static void 1650 sem_prison_cleanup(struct prison *pr) 1651 { 1652 int i; 1653 1654 /* Remove any sems that belong to this jail. */ 1655 mtx_lock(&sem_mtx); 1656 for (i = 0; i < seminfo.semmni; i++) { 1657 if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && 1658 sema[i].cred != NULL && sema[i].cred->cr_prison == pr) { 1659 mtx_lock(&sema_mtx[i]); 1660 sem_remove(i, NULL); 1661 mtx_unlock(&sema_mtx[i]); 1662 } 1663 } 1664 mtx_unlock(&sem_mtx); 1665 } 1666 1667 SYSCTL_JAIL_PARAM_SYS_NODE(sysvsem, CTLFLAG_RW, "SYSV semaphores"); 1668 1669 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1670 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1671 1672 /* XXX casting to (sy_call_t *) is bogus, as usual. */ 1673 static sy_call_t *semcalls[] = { 1674 (sy_call_t *)freebsd7___semctl, (sy_call_t *)sys_semget, 1675 (sy_call_t *)sys_semop 1676 }; 1677 1678 /* 1679 * Entry point for all SEM calls. 1680 */ 1681 int 1682 sys_semsys(td, uap) 1683 struct thread *td; 1684 /* XXX actually varargs. */ 1685 struct semsys_args /* { 1686 int which; 1687 int a2; 1688 int a3; 1689 int a4; 1690 int a5; 1691 } */ *uap; 1692 { 1693 int error; 1694 1695 if (uap->which < 0 || uap->which >= nitems(semcalls)) 1696 return (EINVAL); 1697 error = (*semcalls[uap->which])(td, &uap->a2); 1698 return (error); 1699 } 1700 1701 #ifndef CP 1702 #define CP(src, dst, fld) do { (dst).fld = (src).fld; } while (0) 1703 #endif 1704 1705 #ifndef _SYS_SYSPROTO_H_ 1706 struct freebsd7___semctl_args { 1707 int semid; 1708 int semnum; 1709 int cmd; 1710 union semun_old *arg; 1711 }; 1712 #endif 1713 int 1714 freebsd7___semctl(struct thread *td, struct freebsd7___semctl_args *uap) 1715 { 1716 struct semid_ds_old dsold; 1717 struct semid_ds dsbuf; 1718 union semun_old arg; 1719 union semun semun; 1720 register_t rval; 1721 int error; 1722 1723 switch (uap->cmd) { 1724 case SEM_STAT: 1725 case IPC_SET: 1726 case IPC_STAT: 1727 case GETALL: 1728 case SETVAL: 1729 case SETALL: 1730 error = copyin(uap->arg, &arg, sizeof(arg)); 1731 if (error) 1732 return (error); 1733 break; 1734 } 1735 1736 switch (uap->cmd) { 1737 case SEM_STAT: 1738 case IPC_STAT: 1739 semun.buf = &dsbuf; 1740 break; 1741 case IPC_SET: 1742 error = copyin(arg.buf, &dsold, sizeof(dsold)); 1743 if (error) 1744 return (error); 1745 ipcperm_old2new(&dsold.sem_perm, &dsbuf.sem_perm); 1746 CP(dsold, dsbuf, sem_base); 1747 CP(dsold, dsbuf, sem_nsems); 1748 CP(dsold, dsbuf, sem_otime); 1749 CP(dsold, dsbuf, sem_ctime); 1750 semun.buf = &dsbuf; 1751 break; 1752 case GETALL: 1753 case SETALL: 1754 semun.array = arg.array; 1755 break; 1756 case SETVAL: 1757 semun.val = arg.val; 1758 break; 1759 } 1760 1761 error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, 1762 &rval); 1763 if (error) 1764 return (error); 1765 1766 switch (uap->cmd) { 1767 case SEM_STAT: 1768 case IPC_STAT: 1769 bzero(&dsold, sizeof(dsold)); 1770 ipcperm_new2old(&dsbuf.sem_perm, &dsold.sem_perm); 1771 CP(dsbuf, dsold, sem_base); 1772 CP(dsbuf, dsold, sem_nsems); 1773 CP(dsbuf, dsold, sem_otime); 1774 CP(dsbuf, dsold, sem_ctime); 1775 error = copyout(&dsold, arg.buf, sizeof(dsold)); 1776 break; 1777 } 1778 1779 if (error == 0) 1780 td->td_retval[0] = rval; 1781 return (error); 1782 } 1783 1784 #endif /* COMPAT_FREEBSD{4,5,6,7} */ 1785 1786 #ifdef COMPAT_FREEBSD32 1787 1788 int 1789 freebsd32_semsys(struct thread *td, struct freebsd32_semsys_args *uap) 1790 { 1791 1792 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1793 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1794 switch (uap->which) { 1795 case 0: 1796 return (freebsd7_freebsd32_semctl(td, 1797 (struct freebsd7_freebsd32_semctl_args *)&uap->a2)); 1798 default: 1799 return (sys_semsys(td, (struct semsys_args *)uap)); 1800 } 1801 #else 1802 return (nosys(td, NULL)); 1803 #endif 1804 } 1805 1806 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1807 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1808 int 1809 freebsd7_freebsd32_semctl(struct thread *td, 1810 struct freebsd7_freebsd32_semctl_args *uap) 1811 { 1812 struct semid_ds32_old dsbuf32; 1813 struct semid_ds dsbuf; 1814 union semun semun; 1815 union semun32 arg; 1816 register_t rval; 1817 int error; 1818 1819 switch (uap->cmd) { 1820 case SEM_STAT: 1821 case IPC_SET: 1822 case IPC_STAT: 1823 case GETALL: 1824 case SETVAL: 1825 case SETALL: 1826 error = copyin(uap->arg, &arg, sizeof(arg)); 1827 if (error) 1828 return (error); 1829 break; 1830 } 1831 1832 switch (uap->cmd) { 1833 case SEM_STAT: 1834 case IPC_STAT: 1835 semun.buf = &dsbuf; 1836 break; 1837 case IPC_SET: 1838 error = copyin(PTRIN(arg.buf), &dsbuf32, sizeof(dsbuf32)); 1839 if (error) 1840 return (error); 1841 freebsd32_ipcperm_old_in(&dsbuf32.sem_perm, &dsbuf.sem_perm); 1842 PTRIN_CP(dsbuf32, dsbuf, sem_base); 1843 CP(dsbuf32, dsbuf, sem_nsems); 1844 CP(dsbuf32, dsbuf, sem_otime); 1845 CP(dsbuf32, dsbuf, sem_ctime); 1846 semun.buf = &dsbuf; 1847 break; 1848 case GETALL: 1849 case SETALL: 1850 semun.array = PTRIN(arg.array); 1851 break; 1852 case SETVAL: 1853 semun.val = arg.val; 1854 break; 1855 } 1856 1857 error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, 1858 &rval); 1859 if (error) 1860 return (error); 1861 1862 switch (uap->cmd) { 1863 case SEM_STAT: 1864 case IPC_STAT: 1865 bzero(&dsbuf32, sizeof(dsbuf32)); 1866 freebsd32_ipcperm_old_out(&dsbuf.sem_perm, &dsbuf32.sem_perm); 1867 PTROUT_CP(dsbuf, dsbuf32, sem_base); 1868 CP(dsbuf, dsbuf32, sem_nsems); 1869 CP(dsbuf, dsbuf32, sem_otime); 1870 CP(dsbuf, dsbuf32, sem_ctime); 1871 error = copyout(&dsbuf32, PTRIN(arg.buf), sizeof(dsbuf32)); 1872 break; 1873 } 1874 1875 if (error == 0) 1876 td->td_retval[0] = rval; 1877 return (error); 1878 } 1879 #endif 1880 1881 int 1882 freebsd32_semctl(struct thread *td, struct freebsd32_semctl_args *uap) 1883 { 1884 struct semid_ds32 dsbuf32; 1885 struct semid_ds dsbuf; 1886 union semun semun; 1887 union semun32 arg; 1888 register_t rval; 1889 int error; 1890 1891 switch (uap->cmd) { 1892 case SEM_STAT: 1893 case IPC_SET: 1894 case IPC_STAT: 1895 case GETALL: 1896 case SETVAL: 1897 case SETALL: 1898 error = copyin(uap->arg, &arg, sizeof(arg)); 1899 if (error) 1900 return (error); 1901 break; 1902 } 1903 1904 switch (uap->cmd) { 1905 case SEM_STAT: 1906 case IPC_STAT: 1907 semun.buf = &dsbuf; 1908 break; 1909 case IPC_SET: 1910 error = copyin(PTRIN(arg.buf), &dsbuf32, sizeof(dsbuf32)); 1911 if (error) 1912 return (error); 1913 freebsd32_ipcperm_in(&dsbuf32.sem_perm, &dsbuf.sem_perm); 1914 PTRIN_CP(dsbuf32, dsbuf, sem_base); 1915 CP(dsbuf32, dsbuf, sem_nsems); 1916 CP(dsbuf32, dsbuf, sem_otime); 1917 CP(dsbuf32, dsbuf, sem_ctime); 1918 semun.buf = &dsbuf; 1919 break; 1920 case GETALL: 1921 case SETALL: 1922 semun.array = PTRIN(arg.array); 1923 break; 1924 case SETVAL: 1925 semun.val = arg.val; 1926 break; 1927 } 1928 1929 error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, 1930 &rval); 1931 if (error) 1932 return (error); 1933 1934 switch (uap->cmd) { 1935 case SEM_STAT: 1936 case IPC_STAT: 1937 bzero(&dsbuf32, sizeof(dsbuf32)); 1938 freebsd32_ipcperm_out(&dsbuf.sem_perm, &dsbuf32.sem_perm); 1939 PTROUT_CP(dsbuf, dsbuf32, sem_base); 1940 CP(dsbuf, dsbuf32, sem_nsems); 1941 CP(dsbuf, dsbuf32, sem_otime); 1942 CP(dsbuf, dsbuf32, sem_ctime); 1943 error = copyout(&dsbuf32, PTRIN(arg.buf), sizeof(dsbuf32)); 1944 break; 1945 } 1946 1947 if (error == 0) 1948 td->td_retval[0] = rval; 1949 return (error); 1950 } 1951 1952 #endif /* COMPAT_FREEBSD32 */ 1953