xref: /freebsd/sys/kern/sysv_sem.c (revision aa0a1e58f0189b0fde359a8bda032887e72057fa)
1 /*-
2  * Implementation of SVID semaphores
3  *
4  * Author:  Daniel Boulet
5  *
6  * This software is provided ``AS IS'' without any warranties of any kind.
7  */
8 /*-
9  * Copyright (c) 2003-2005 McAfee, Inc.
10  * All rights reserved.
11  *
12  * This software was developed for the FreeBSD Project in part by McAfee
13  * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
14  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
15  * program.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions
19  * are met:
20  * 1. Redistributions of source code must retain the above copyright
21  *    notice, this list of conditions and the following disclaimer.
22  * 2. Redistributions in binary form must reproduce the above copyright
23  *    notice, this list of conditions and the following disclaimer in the
24  *    documentation and/or other materials provided with the distribution.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include "opt_compat.h"
43 #include "opt_sysvipc.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/eventhandler.h>
49 #include <sys/kernel.h>
50 #include <sys/proc.h>
51 #include <sys/lock.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/sem.h>
55 #include <sys/syscall.h>
56 #include <sys/syscallsubr.h>
57 #include <sys/sysent.h>
58 #include <sys/sysctl.h>
59 #include <sys/uio.h>
60 #include <sys/malloc.h>
61 #include <sys/jail.h>
62 
63 #include <security/mac/mac_framework.h>
64 
65 FEATURE(sysv_sem, "System V semaphores support");
66 
67 static MALLOC_DEFINE(M_SEM, "sem", "SVID compatible semaphores");
68 
69 #ifdef SEM_DEBUG
70 #define DPRINTF(a)	printf a
71 #else
72 #define DPRINTF(a)
73 #endif
74 
75 static int seminit(void);
76 static int sysvsem_modload(struct module *, int, void *);
77 static int semunload(void);
78 static void semexit_myhook(void *arg, struct proc *p);
79 static int sysctl_sema(SYSCTL_HANDLER_ARGS);
80 static int semvalid(int semid, struct semid_kernel *semakptr);
81 
82 #ifndef _SYS_SYSPROTO_H_
83 struct __semctl_args;
84 int __semctl(struct thread *td, struct __semctl_args *uap);
85 struct semget_args;
86 int semget(struct thread *td, struct semget_args *uap);
87 struct semop_args;
88 int semop(struct thread *td, struct semop_args *uap);
89 #endif
90 
91 static struct sem_undo *semu_alloc(struct thread *td);
92 static int semundo_adjust(struct thread *td, struct sem_undo **supptr,
93     int semid, int semseq, int semnum, int adjval);
94 static void semundo_clear(int semid, int semnum);
95 
96 static struct mtx	sem_mtx;	/* semaphore global lock */
97 static struct mtx sem_undo_mtx;
98 static int	semtot = 0;
99 static struct semid_kernel *sema;	/* semaphore id pool */
100 static struct mtx *sema_mtx;	/* semaphore id pool mutexes*/
101 static struct sem *sem;		/* semaphore pool */
102 LIST_HEAD(, sem_undo) semu_list;	/* list of active undo structures */
103 LIST_HEAD(, sem_undo) semu_free_list;	/* list of free undo structures */
104 static int	*semu;		/* undo structure pool */
105 static eventhandler_tag semexit_tag;
106 
107 #define SEMUNDO_MTX		sem_undo_mtx
108 #define SEMUNDO_LOCK()		mtx_lock(&SEMUNDO_MTX);
109 #define SEMUNDO_UNLOCK()	mtx_unlock(&SEMUNDO_MTX);
110 #define SEMUNDO_LOCKASSERT(how)	mtx_assert(&SEMUNDO_MTX, (how));
111 
112 struct sem {
113 	u_short	semval;		/* semaphore value */
114 	pid_t	sempid;		/* pid of last operation */
115 	u_short	semncnt;	/* # awaiting semval > cval */
116 	u_short	semzcnt;	/* # awaiting semval = 0 */
117 };
118 
119 /*
120  * Undo structure (one per process)
121  */
122 struct sem_undo {
123 	LIST_ENTRY(sem_undo) un_next;	/* ptr to next active undo structure */
124 	struct	proc *un_proc;		/* owner of this structure */
125 	short	un_cnt;			/* # of active entries */
126 	struct undo {
127 		short	un_adjval;	/* adjust on exit values */
128 		short	un_num;		/* semaphore # */
129 		int	un_id;		/* semid */
130 		unsigned short un_seq;
131 	} un_ent[1];			/* undo entries */
132 };
133 
134 /*
135  * Configuration parameters
136  */
137 #ifndef SEMMNI
138 #define SEMMNI	50		/* # of semaphore identifiers */
139 #endif
140 #ifndef SEMMNS
141 #define SEMMNS	340		/* # of semaphores in system */
142 #endif
143 #ifndef SEMUME
144 #define SEMUME	50		/* max # of undo entries per process */
145 #endif
146 #ifndef SEMMNU
147 #define SEMMNU	150		/* # of undo structures in system */
148 #endif
149 
150 /* shouldn't need tuning */
151 #ifndef SEMMAP
152 #define SEMMAP	30		/* # of entries in semaphore map */
153 #endif
154 #ifndef SEMMSL
155 #define SEMMSL	SEMMNS		/* max # of semaphores per id */
156 #endif
157 #ifndef SEMOPM
158 #define SEMOPM	100		/* max # of operations per semop call */
159 #endif
160 
161 #define SEMVMX	32767		/* semaphore maximum value */
162 #define SEMAEM	16384		/* adjust on exit max value */
163 
164 /*
165  * Due to the way semaphore memory is allocated, we have to ensure that
166  * SEMUSZ is properly aligned.
167  */
168 
169 #define SEM_ALIGN(bytes) (((bytes) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
170 
171 /* actual size of an undo structure */
172 #define SEMUSZ	SEM_ALIGN(offsetof(struct sem_undo, un_ent[SEMUME]))
173 
174 /*
175  * Macro to find a particular sem_undo vector
176  */
177 #define SEMU(ix) \
178 	((struct sem_undo *)(((intptr_t)semu)+ix * seminfo.semusz))
179 
180 /*
181  * semaphore info struct
182  */
183 struct seminfo seminfo = {
184                 SEMMAP,         /* # of entries in semaphore map */
185                 SEMMNI,         /* # of semaphore identifiers */
186                 SEMMNS,         /* # of semaphores in system */
187                 SEMMNU,         /* # of undo structures in system */
188                 SEMMSL,         /* max # of semaphores per id */
189                 SEMOPM,         /* max # of operations per semop call */
190                 SEMUME,         /* max # of undo entries per process */
191                 SEMUSZ,         /* size in bytes of undo structure */
192                 SEMVMX,         /* semaphore maximum value */
193                 SEMAEM          /* adjust on exit max value */
194 };
195 
196 SYSCTL_INT(_kern_ipc, OID_AUTO, semmap, CTLFLAG_RW, &seminfo.semmap, 0,
197     "Number of entries in the semaphore map");
198 SYSCTL_INT(_kern_ipc, OID_AUTO, semmni, CTLFLAG_RDTUN, &seminfo.semmni, 0,
199     "Number of semaphore identifiers");
200 SYSCTL_INT(_kern_ipc, OID_AUTO, semmns, CTLFLAG_RDTUN, &seminfo.semmns, 0,
201     "Maximum number of semaphores in the system");
202 SYSCTL_INT(_kern_ipc, OID_AUTO, semmnu, CTLFLAG_RDTUN, &seminfo.semmnu, 0,
203     "Maximum number of undo structures in the system");
204 SYSCTL_INT(_kern_ipc, OID_AUTO, semmsl, CTLFLAG_RW, &seminfo.semmsl, 0,
205     "Max semaphores per id");
206 SYSCTL_INT(_kern_ipc, OID_AUTO, semopm, CTLFLAG_RDTUN, &seminfo.semopm, 0,
207     "Max operations per semop call");
208 SYSCTL_INT(_kern_ipc, OID_AUTO, semume, CTLFLAG_RDTUN, &seminfo.semume, 0,
209     "Max undo entries per process");
210 SYSCTL_INT(_kern_ipc, OID_AUTO, semusz, CTLFLAG_RDTUN, &seminfo.semusz, 0,
211     "Size in bytes of undo structure");
212 SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, CTLFLAG_RW, &seminfo.semvmx, 0,
213     "Semaphore maximum value");
214 SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RW, &seminfo.semaem, 0,
215     "Adjust on exit max value");
216 SYSCTL_PROC(_kern_ipc, OID_AUTO, sema, CTLTYPE_OPAQUE | CTLFLAG_RD,
217     NULL, 0, sysctl_sema, "", "Semaphore id pool");
218 
219 static struct syscall_helper_data sem_syscalls[] = {
220 	SYSCALL_INIT_HELPER(__semctl),
221 	SYSCALL_INIT_HELPER(semget),
222 	SYSCALL_INIT_HELPER(semop),
223 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
224     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
225 	SYSCALL_INIT_HELPER(semsys),
226 	SYSCALL_INIT_HELPER(freebsd7___semctl),
227 #endif
228 	SYSCALL_INIT_LAST
229 };
230 
231 #ifdef COMPAT_FREEBSD32
232 #include <compat/freebsd32/freebsd32.h>
233 #include <compat/freebsd32/freebsd32_ipc.h>
234 #include <compat/freebsd32/freebsd32_proto.h>
235 #include <compat/freebsd32/freebsd32_signal.h>
236 #include <compat/freebsd32/freebsd32_syscall.h>
237 #include <compat/freebsd32/freebsd32_util.h>
238 
239 static struct syscall_helper_data sem32_syscalls[] = {
240 	SYSCALL32_INIT_HELPER(freebsd32_semctl),
241 	SYSCALL32_INIT_HELPER(semget),
242 	SYSCALL32_INIT_HELPER(semop),
243 	SYSCALL32_INIT_HELPER(freebsd32_semsys),
244 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
245     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
246 	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_semctl),
247 #endif
248 	SYSCALL_INIT_LAST
249 };
250 #endif
251 
252 static int
253 seminit(void)
254 {
255 	int i, error;
256 
257 	TUNABLE_INT_FETCH("kern.ipc.semmap", &seminfo.semmap);
258 	TUNABLE_INT_FETCH("kern.ipc.semmni", &seminfo.semmni);
259 	TUNABLE_INT_FETCH("kern.ipc.semmns", &seminfo.semmns);
260 	TUNABLE_INT_FETCH("kern.ipc.semmnu", &seminfo.semmnu);
261 	TUNABLE_INT_FETCH("kern.ipc.semmsl", &seminfo.semmsl);
262 	TUNABLE_INT_FETCH("kern.ipc.semopm", &seminfo.semopm);
263 	TUNABLE_INT_FETCH("kern.ipc.semume", &seminfo.semume);
264 	TUNABLE_INT_FETCH("kern.ipc.semusz", &seminfo.semusz);
265 	TUNABLE_INT_FETCH("kern.ipc.semvmx", &seminfo.semvmx);
266 	TUNABLE_INT_FETCH("kern.ipc.semaem", &seminfo.semaem);
267 
268 	sem = malloc(sizeof(struct sem) * seminfo.semmns, M_SEM, M_WAITOK);
269 	sema = malloc(sizeof(struct semid_kernel) * seminfo.semmni, M_SEM,
270 	    M_WAITOK);
271 	sema_mtx = malloc(sizeof(struct mtx) * seminfo.semmni, M_SEM,
272 	    M_WAITOK | M_ZERO);
273 	semu = malloc(seminfo.semmnu * seminfo.semusz, M_SEM, M_WAITOK);
274 
275 	for (i = 0; i < seminfo.semmni; i++) {
276 		sema[i].u.sem_base = 0;
277 		sema[i].u.sem_perm.mode = 0;
278 		sema[i].u.sem_perm.seq = 0;
279 #ifdef MAC
280 		mac_sysvsem_init(&sema[i]);
281 #endif
282 	}
283 	for (i = 0; i < seminfo.semmni; i++)
284 		mtx_init(&sema_mtx[i], "semid", NULL, MTX_DEF);
285 	LIST_INIT(&semu_free_list);
286 	for (i = 0; i < seminfo.semmnu; i++) {
287 		struct sem_undo *suptr = SEMU(i);
288 		suptr->un_proc = NULL;
289 		LIST_INSERT_HEAD(&semu_free_list, suptr, un_next);
290 	}
291 	LIST_INIT(&semu_list);
292 	mtx_init(&sem_mtx, "sem", NULL, MTX_DEF);
293 	mtx_init(&sem_undo_mtx, "semu", NULL, MTX_DEF);
294 	semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL,
295 	    EVENTHANDLER_PRI_ANY);
296 
297 	error = syscall_helper_register(sem_syscalls);
298 	if (error != 0)
299 		return (error);
300 #ifdef COMPAT_FREEBSD32
301 	error = syscall32_helper_register(sem32_syscalls);
302 	if (error != 0)
303 		return (error);
304 #endif
305 	return (0);
306 }
307 
308 static int
309 semunload(void)
310 {
311 	int i;
312 
313 	/* XXXKIB */
314 	if (semtot != 0)
315 		return (EBUSY);
316 
317 #ifdef COMPAT_FREEBSD32
318 	syscall32_helper_unregister(sem32_syscalls);
319 #endif
320 	syscall_helper_unregister(sem_syscalls);
321 	EVENTHANDLER_DEREGISTER(process_exit, semexit_tag);
322 #ifdef MAC
323 	for (i = 0; i < seminfo.semmni; i++)
324 		mac_sysvsem_destroy(&sema[i]);
325 #endif
326 	free(sem, M_SEM);
327 	free(sema, M_SEM);
328 	free(semu, M_SEM);
329 	for (i = 0; i < seminfo.semmni; i++)
330 		mtx_destroy(&sema_mtx[i]);
331 	free(sema_mtx, M_SEM);
332 	mtx_destroy(&sem_mtx);
333 	mtx_destroy(&sem_undo_mtx);
334 	return (0);
335 }
336 
337 static int
338 sysvsem_modload(struct module *module, int cmd, void *arg)
339 {
340 	int error = 0;
341 
342 	switch (cmd) {
343 	case MOD_LOAD:
344 		error = seminit();
345 		if (error != 0)
346 			semunload();
347 		break;
348 	case MOD_UNLOAD:
349 		error = semunload();
350 		break;
351 	case MOD_SHUTDOWN:
352 		break;
353 	default:
354 		error = EINVAL;
355 		break;
356 	}
357 	return (error);
358 }
359 
360 static moduledata_t sysvsem_mod = {
361 	"sysvsem",
362 	&sysvsem_modload,
363 	NULL
364 };
365 
366 DECLARE_MODULE(sysvsem, sysvsem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
367 MODULE_VERSION(sysvsem, 1);
368 
369 /*
370  * Allocate a new sem_undo structure for a process
371  * (returns ptr to structure or NULL if no more room)
372  */
373 
374 static struct sem_undo *
375 semu_alloc(struct thread *td)
376 {
377 	struct sem_undo *suptr;
378 
379 	SEMUNDO_LOCKASSERT(MA_OWNED);
380 	if ((suptr = LIST_FIRST(&semu_free_list)) == NULL)
381 		return (NULL);
382 	LIST_REMOVE(suptr, un_next);
383 	LIST_INSERT_HEAD(&semu_list, suptr, un_next);
384 	suptr->un_cnt = 0;
385 	suptr->un_proc = td->td_proc;
386 	return (suptr);
387 }
388 
389 static int
390 semu_try_free(struct sem_undo *suptr)
391 {
392 
393 	SEMUNDO_LOCKASSERT(MA_OWNED);
394 
395 	if (suptr->un_cnt != 0)
396 		return (0);
397 	LIST_REMOVE(suptr, un_next);
398 	LIST_INSERT_HEAD(&semu_free_list, suptr, un_next);
399 	return (1);
400 }
401 
402 /*
403  * Adjust a particular entry for a particular proc
404  */
405 
406 static int
407 semundo_adjust(struct thread *td, struct sem_undo **supptr, int semid,
408     int semseq, int semnum, int adjval)
409 {
410 	struct proc *p = td->td_proc;
411 	struct sem_undo *suptr;
412 	struct undo *sunptr;
413 	int i;
414 
415 	SEMUNDO_LOCKASSERT(MA_OWNED);
416 	/* Look for and remember the sem_undo if the caller doesn't provide
417 	   it */
418 
419 	suptr = *supptr;
420 	if (suptr == NULL) {
421 		LIST_FOREACH(suptr, &semu_list, un_next) {
422 			if (suptr->un_proc == p) {
423 				*supptr = suptr;
424 				break;
425 			}
426 		}
427 		if (suptr == NULL) {
428 			if (adjval == 0)
429 				return(0);
430 			suptr = semu_alloc(td);
431 			if (suptr == NULL)
432 				return (ENOSPC);
433 			*supptr = suptr;
434 		}
435 	}
436 
437 	/*
438 	 * Look for the requested entry and adjust it (delete if adjval becomes
439 	 * 0).
440 	 */
441 	sunptr = &suptr->un_ent[0];
442 	for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
443 		if (sunptr->un_id != semid || sunptr->un_num != semnum)
444 			continue;
445 		if (adjval != 0) {
446 			adjval += sunptr->un_adjval;
447 			if (adjval > seminfo.semaem || adjval < -seminfo.semaem)
448 				return (ERANGE);
449 		}
450 		sunptr->un_adjval = adjval;
451 		if (sunptr->un_adjval == 0) {
452 			suptr->un_cnt--;
453 			if (i < suptr->un_cnt)
454 				suptr->un_ent[i] =
455 				    suptr->un_ent[suptr->un_cnt];
456 			if (suptr->un_cnt == 0)
457 				semu_try_free(suptr);
458 		}
459 		return (0);
460 	}
461 
462 	/* Didn't find the right entry - create it */
463 	if (adjval == 0)
464 		return (0);
465 	if (adjval > seminfo.semaem || adjval < -seminfo.semaem)
466 		return (ERANGE);
467 	if (suptr->un_cnt != seminfo.semume) {
468 		sunptr = &suptr->un_ent[suptr->un_cnt];
469 		suptr->un_cnt++;
470 		sunptr->un_adjval = adjval;
471 		sunptr->un_id = semid;
472 		sunptr->un_num = semnum;
473 		sunptr->un_seq = semseq;
474 	} else
475 		return (EINVAL);
476 	return (0);
477 }
478 
479 static void
480 semundo_clear(int semid, int semnum)
481 {
482 	struct sem_undo *suptr, *suptr1;
483 	struct undo *sunptr;
484 	int i;
485 
486 	SEMUNDO_LOCKASSERT(MA_OWNED);
487 	LIST_FOREACH_SAFE(suptr, &semu_list, un_next, suptr1) {
488 		sunptr = &suptr->un_ent[0];
489 		for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
490 			if (sunptr->un_id != semid)
491 				continue;
492 			if (semnum == -1 || sunptr->un_num == semnum) {
493 				suptr->un_cnt--;
494 				if (i < suptr->un_cnt) {
495 					suptr->un_ent[i] =
496 					    suptr->un_ent[suptr->un_cnt];
497 					continue;
498 				}
499 				semu_try_free(suptr);
500 			}
501 			if (semnum != -1)
502 				break;
503 		}
504 	}
505 }
506 
507 static int
508 semvalid(int semid, struct semid_kernel *semakptr)
509 {
510 
511 	return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
512 	    semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ? EINVAL : 0);
513 }
514 
515 /*
516  * Note that the user-mode half of this passes a union, not a pointer.
517  */
518 #ifndef _SYS_SYSPROTO_H_
519 struct __semctl_args {
520 	int	semid;
521 	int	semnum;
522 	int	cmd;
523 	union	semun *arg;
524 };
525 #endif
526 int
527 __semctl(struct thread *td, struct __semctl_args *uap)
528 {
529 	struct semid_ds dsbuf;
530 	union semun arg, semun;
531 	register_t rval;
532 	int error;
533 
534 	switch (uap->cmd) {
535 	case SEM_STAT:
536 	case IPC_SET:
537 	case IPC_STAT:
538 	case GETALL:
539 	case SETVAL:
540 	case SETALL:
541 		error = copyin(uap->arg, &arg, sizeof(arg));
542 		if (error)
543 			return (error);
544 		break;
545 	}
546 
547 	switch (uap->cmd) {
548 	case SEM_STAT:
549 	case IPC_STAT:
550 		semun.buf = &dsbuf;
551 		break;
552 	case IPC_SET:
553 		error = copyin(arg.buf, &dsbuf, sizeof(dsbuf));
554 		if (error)
555 			return (error);
556 		semun.buf = &dsbuf;
557 		break;
558 	case GETALL:
559 	case SETALL:
560 		semun.array = arg.array;
561 		break;
562 	case SETVAL:
563 		semun.val = arg.val;
564 		break;
565 	}
566 
567 	error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun,
568 	    &rval);
569 	if (error)
570 		return (error);
571 
572 	switch (uap->cmd) {
573 	case SEM_STAT:
574 	case IPC_STAT:
575 		error = copyout(&dsbuf, arg.buf, sizeof(dsbuf));
576 		break;
577 	}
578 
579 	if (error == 0)
580 		td->td_retval[0] = rval;
581 	return (error);
582 }
583 
584 int
585 kern_semctl(struct thread *td, int semid, int semnum, int cmd,
586     union semun *arg, register_t *rval)
587 {
588 	u_short *array;
589 	struct ucred *cred = td->td_ucred;
590 	int i, error;
591 	struct semid_ds *sbuf;
592 	struct semid_kernel *semakptr;
593 	struct mtx *sema_mtxp;
594 	u_short usval, count;
595 	int semidx;
596 
597 	DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n",
598 	    semid, semnum, cmd, arg));
599 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
600 		return (ENOSYS);
601 
602 	array = NULL;
603 
604 	switch(cmd) {
605 	case SEM_STAT:
606 		/*
607 		 * For this command we assume semid is an array index
608 		 * rather than an IPC id.
609 		 */
610 		if (semid < 0 || semid >= seminfo.semmni)
611 			return (EINVAL);
612 		semakptr = &sema[semid];
613 		sema_mtxp = &sema_mtx[semid];
614 		mtx_lock(sema_mtxp);
615 		if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) {
616 			error = EINVAL;
617 			goto done2;
618 		}
619 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
620 			goto done2;
621 #ifdef MAC
622 		error = mac_sysvsem_check_semctl(cred, semakptr, cmd);
623 		if (error != 0)
624 			goto done2;
625 #endif
626 		bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
627 		*rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm);
628 		mtx_unlock(sema_mtxp);
629 		return (0);
630 	}
631 
632 	semidx = IPCID_TO_IX(semid);
633 	if (semidx < 0 || semidx >= seminfo.semmni)
634 		return (EINVAL);
635 
636 	semakptr = &sema[semidx];
637 	sema_mtxp = &sema_mtx[semidx];
638 	if (cmd == IPC_RMID)
639 		mtx_lock(&sem_mtx);
640 	mtx_lock(sema_mtxp);
641 #ifdef MAC
642 	error = mac_sysvsem_check_semctl(cred, semakptr, cmd);
643 	if (error != 0)
644 		goto done2;
645 #endif
646 
647 	error = 0;
648 	*rval = 0;
649 
650 	switch (cmd) {
651 	case IPC_RMID:
652 		if ((error = semvalid(semid, semakptr)) != 0)
653 			goto done2;
654 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
655 			goto done2;
656 		semakptr->u.sem_perm.cuid = cred->cr_uid;
657 		semakptr->u.sem_perm.uid = cred->cr_uid;
658 		semakptr->u.sem_perm.mode = 0;
659 		SEMUNDO_LOCK();
660 		semundo_clear(semidx, -1);
661 		SEMUNDO_UNLOCK();
662 #ifdef MAC
663 		mac_sysvsem_cleanup(semakptr);
664 #endif
665 		wakeup(semakptr);
666 		for (i = 0; i < seminfo.semmni; i++) {
667 			if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
668 			    sema[i].u.sem_base > semakptr->u.sem_base)
669 				mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
670 		}
671 		for (i = semakptr->u.sem_base - sem; i < semtot; i++)
672 			sem[i] = sem[i + semakptr->u.sem_nsems];
673 		for (i = 0; i < seminfo.semmni; i++) {
674 			if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
675 			    sema[i].u.sem_base > semakptr->u.sem_base) {
676 				sema[i].u.sem_base -= semakptr->u.sem_nsems;
677 				mtx_unlock(&sema_mtx[i]);
678 			}
679 		}
680 		semtot -= semakptr->u.sem_nsems;
681 		break;
682 
683 	case IPC_SET:
684 		if ((error = semvalid(semid, semakptr)) != 0)
685 			goto done2;
686 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
687 			goto done2;
688 		sbuf = arg->buf;
689 		semakptr->u.sem_perm.uid = sbuf->sem_perm.uid;
690 		semakptr->u.sem_perm.gid = sbuf->sem_perm.gid;
691 		semakptr->u.sem_perm.mode = (semakptr->u.sem_perm.mode &
692 		    ~0777) | (sbuf->sem_perm.mode & 0777);
693 		semakptr->u.sem_ctime = time_second;
694 		break;
695 
696 	case IPC_STAT:
697 		if ((error = semvalid(semid, semakptr)) != 0)
698 			goto done2;
699 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
700 			goto done2;
701 		bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
702 		break;
703 
704 	case GETNCNT:
705 		if ((error = semvalid(semid, semakptr)) != 0)
706 			goto done2;
707 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
708 			goto done2;
709 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
710 			error = EINVAL;
711 			goto done2;
712 		}
713 		*rval = semakptr->u.sem_base[semnum].semncnt;
714 		break;
715 
716 	case GETPID:
717 		if ((error = semvalid(semid, semakptr)) != 0)
718 			goto done2;
719 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
720 			goto done2;
721 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
722 			error = EINVAL;
723 			goto done2;
724 		}
725 		*rval = semakptr->u.sem_base[semnum].sempid;
726 		break;
727 
728 	case GETVAL:
729 		if ((error = semvalid(semid, semakptr)) != 0)
730 			goto done2;
731 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
732 			goto done2;
733 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
734 			error = EINVAL;
735 			goto done2;
736 		}
737 		*rval = semakptr->u.sem_base[semnum].semval;
738 		break;
739 
740 	case GETALL:
741 		/*
742 		 * Unfortunately, callers of this function don't know
743 		 * in advance how many semaphores are in this set.
744 		 * While we could just allocate the maximum size array
745 		 * and pass the actual size back to the caller, that
746 		 * won't work for SETALL since we can't copyin() more
747 		 * data than the user specified as we may return a
748 		 * spurious EFAULT.
749 		 *
750 		 * Note that the number of semaphores in a set is
751 		 * fixed for the life of that set.  The only way that
752 		 * the 'count' could change while are blocked in
753 		 * malloc() is if this semaphore set were destroyed
754 		 * and a new one created with the same index.
755 		 * However, semvalid() will catch that due to the
756 		 * sequence number unless exactly 0x8000 (or a
757 		 * multiple thereof) semaphore sets for the same index
758 		 * are created and destroyed while we are in malloc!
759 		 *
760 		 */
761 		count = semakptr->u.sem_nsems;
762 		mtx_unlock(sema_mtxp);
763 		array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK);
764 		mtx_lock(sema_mtxp);
765 		if ((error = semvalid(semid, semakptr)) != 0)
766 			goto done2;
767 		KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
768 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
769 			goto done2;
770 		for (i = 0; i < semakptr->u.sem_nsems; i++)
771 			array[i] = semakptr->u.sem_base[i].semval;
772 		mtx_unlock(sema_mtxp);
773 		error = copyout(array, arg->array, count * sizeof(*array));
774 		mtx_lock(sema_mtxp);
775 		break;
776 
777 	case GETZCNT:
778 		if ((error = semvalid(semid, semakptr)) != 0)
779 			goto done2;
780 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
781 			goto done2;
782 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
783 			error = EINVAL;
784 			goto done2;
785 		}
786 		*rval = semakptr->u.sem_base[semnum].semzcnt;
787 		break;
788 
789 	case SETVAL:
790 		if ((error = semvalid(semid, semakptr)) != 0)
791 			goto done2;
792 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
793 			goto done2;
794 		if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
795 			error = EINVAL;
796 			goto done2;
797 		}
798 		if (arg->val < 0 || arg->val > seminfo.semvmx) {
799 			error = ERANGE;
800 			goto done2;
801 		}
802 		semakptr->u.sem_base[semnum].semval = arg->val;
803 		SEMUNDO_LOCK();
804 		semundo_clear(semidx, semnum);
805 		SEMUNDO_UNLOCK();
806 		wakeup(semakptr);
807 		break;
808 
809 	case SETALL:
810 		/*
811 		 * See comment on GETALL for why 'count' shouldn't change
812 		 * and why we require a userland buffer.
813 		 */
814 		count = semakptr->u.sem_nsems;
815 		mtx_unlock(sema_mtxp);
816 		array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK);
817 		error = copyin(arg->array, array, count * sizeof(*array));
818 		mtx_lock(sema_mtxp);
819 		if (error)
820 			break;
821 		if ((error = semvalid(semid, semakptr)) != 0)
822 			goto done2;
823 		KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
824 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
825 			goto done2;
826 		for (i = 0; i < semakptr->u.sem_nsems; i++) {
827 			usval = array[i];
828 			if (usval > seminfo.semvmx) {
829 				error = ERANGE;
830 				break;
831 			}
832 			semakptr->u.sem_base[i].semval = usval;
833 		}
834 		SEMUNDO_LOCK();
835 		semundo_clear(semidx, -1);
836 		SEMUNDO_UNLOCK();
837 		wakeup(semakptr);
838 		break;
839 
840 	default:
841 		error = EINVAL;
842 		break;
843 	}
844 
845 done2:
846 	mtx_unlock(sema_mtxp);
847 	if (cmd == IPC_RMID)
848 		mtx_unlock(&sem_mtx);
849 	if (array != NULL)
850 		free(array, M_TEMP);
851 	return(error);
852 }
853 
854 #ifndef _SYS_SYSPROTO_H_
855 struct semget_args {
856 	key_t	key;
857 	int	nsems;
858 	int	semflg;
859 };
860 #endif
861 int
862 semget(struct thread *td, struct semget_args *uap)
863 {
864 	int semid, error = 0;
865 	int key = uap->key;
866 	int nsems = uap->nsems;
867 	int semflg = uap->semflg;
868 	struct ucred *cred = td->td_ucred;
869 
870 	DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
871 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
872 		return (ENOSYS);
873 
874 	mtx_lock(&sem_mtx);
875 	if (key != IPC_PRIVATE) {
876 		for (semid = 0; semid < seminfo.semmni; semid++) {
877 			if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) &&
878 			    sema[semid].u.sem_perm.key == key)
879 				break;
880 		}
881 		if (semid < seminfo.semmni) {
882 			DPRINTF(("found public key\n"));
883 			if ((error = ipcperm(td, &sema[semid].u.sem_perm,
884 			    semflg & 0700))) {
885 				goto done2;
886 			}
887 			if (nsems > 0 && sema[semid].u.sem_nsems < nsems) {
888 				DPRINTF(("too small\n"));
889 				error = EINVAL;
890 				goto done2;
891 			}
892 			if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
893 				DPRINTF(("not exclusive\n"));
894 				error = EEXIST;
895 				goto done2;
896 			}
897 #ifdef MAC
898 			error = mac_sysvsem_check_semget(cred, &sema[semid]);
899 			if (error != 0)
900 				goto done2;
901 #endif
902 			goto found;
903 		}
904 	}
905 
906 	DPRINTF(("need to allocate the semid_kernel\n"));
907 	if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
908 		if (nsems <= 0 || nsems > seminfo.semmsl) {
909 			DPRINTF(("nsems out of range (0<%d<=%d)\n", nsems,
910 			    seminfo.semmsl));
911 			error = EINVAL;
912 			goto done2;
913 		}
914 		if (nsems > seminfo.semmns - semtot) {
915 			DPRINTF((
916 			    "not enough semaphores left (need %d, got %d)\n",
917 			    nsems, seminfo.semmns - semtot));
918 			error = ENOSPC;
919 			goto done2;
920 		}
921 		for (semid = 0; semid < seminfo.semmni; semid++) {
922 			if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0)
923 				break;
924 		}
925 		if (semid == seminfo.semmni) {
926 			DPRINTF(("no more semid_kernel's available\n"));
927 			error = ENOSPC;
928 			goto done2;
929 		}
930 		DPRINTF(("semid %d is available\n", semid));
931 		mtx_lock(&sema_mtx[semid]);
932 		KASSERT((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0,
933 		    ("Lost semaphore %d", semid));
934 		sema[semid].u.sem_perm.key = key;
935 		sema[semid].u.sem_perm.cuid = cred->cr_uid;
936 		sema[semid].u.sem_perm.uid = cred->cr_uid;
937 		sema[semid].u.sem_perm.cgid = cred->cr_gid;
938 		sema[semid].u.sem_perm.gid = cred->cr_gid;
939 		sema[semid].u.sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
940 		sema[semid].u.sem_perm.seq =
941 		    (sema[semid].u.sem_perm.seq + 1) & 0x7fff;
942 		sema[semid].u.sem_nsems = nsems;
943 		sema[semid].u.sem_otime = 0;
944 		sema[semid].u.sem_ctime = time_second;
945 		sema[semid].u.sem_base = &sem[semtot];
946 		semtot += nsems;
947 		bzero(sema[semid].u.sem_base,
948 		    sizeof(sema[semid].u.sem_base[0])*nsems);
949 #ifdef MAC
950 		mac_sysvsem_create(cred, &sema[semid]);
951 #endif
952 		mtx_unlock(&sema_mtx[semid]);
953 		DPRINTF(("sembase = %p, next = %p\n",
954 		    sema[semid].u.sem_base, &sem[semtot]));
955 	} else {
956 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
957 		error = ENOENT;
958 		goto done2;
959 	}
960 
961 found:
962 	td->td_retval[0] = IXSEQ_TO_IPCID(semid, sema[semid].u.sem_perm);
963 done2:
964 	mtx_unlock(&sem_mtx);
965 	return (error);
966 }
967 
968 #ifndef _SYS_SYSPROTO_H_
969 struct semop_args {
970 	int	semid;
971 	struct	sembuf *sops;
972 	size_t	nsops;
973 };
974 #endif
975 int
976 semop(struct thread *td, struct semop_args *uap)
977 {
978 #define SMALL_SOPS	8
979 	struct sembuf small_sops[SMALL_SOPS];
980 	int semid = uap->semid;
981 	size_t nsops = uap->nsops;
982 	struct sembuf *sops;
983 	struct semid_kernel *semakptr;
984 	struct sembuf *sopptr = 0;
985 	struct sem *semptr = 0;
986 	struct sem_undo *suptr;
987 	struct mtx *sema_mtxp;
988 	size_t i, j, k;
989 	int error;
990 	int do_wakeup, do_undos;
991 	unsigned short seq;
992 
993 #ifdef SEM_DEBUG
994 	sops = NULL;
995 #endif
996 	DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops));
997 
998 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
999 		return (ENOSYS);
1000 
1001 	semid = IPCID_TO_IX(semid);	/* Convert back to zero origin */
1002 
1003 	if (semid < 0 || semid >= seminfo.semmni)
1004 		return (EINVAL);
1005 
1006 	/* Allocate memory for sem_ops */
1007 	if (nsops <= SMALL_SOPS)
1008 		sops = small_sops;
1009 	else if (nsops <= seminfo.semopm)
1010 		sops = malloc(nsops * sizeof(*sops), M_TEMP, M_WAITOK);
1011 	else {
1012 		DPRINTF(("too many sops (max=%d, nsops=%d)\n", seminfo.semopm,
1013 		    nsops));
1014 		return (E2BIG);
1015 	}
1016 	if ((error = copyin(uap->sops, sops, nsops * sizeof(sops[0]))) != 0) {
1017 		DPRINTF(("error = %d from copyin(%p, %p, %d)\n", error,
1018 		    uap->sops, sops, nsops * sizeof(sops[0])));
1019 		if (sops != small_sops)
1020 			free(sops, M_SEM);
1021 		return (error);
1022 	}
1023 
1024 	semakptr = &sema[semid];
1025 	sema_mtxp = &sema_mtx[semid];
1026 	mtx_lock(sema_mtxp);
1027 	if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) {
1028 		error = EINVAL;
1029 		goto done2;
1030 	}
1031 	seq = semakptr->u.sem_perm.seq;
1032 	if (seq != IPCID_TO_SEQ(uap->semid)) {
1033 		error = EINVAL;
1034 		goto done2;
1035 	}
1036 	/*
1037 	 * Initial pass thru sops to see what permissions are needed.
1038 	 * Also perform any checks that don't need repeating on each
1039 	 * attempt to satisfy the request vector.
1040 	 */
1041 	j = 0;		/* permission needed */
1042 	do_undos = 0;
1043 	for (i = 0; i < nsops; i++) {
1044 		sopptr = &sops[i];
1045 		if (sopptr->sem_num >= semakptr->u.sem_nsems) {
1046 			error = EFBIG;
1047 			goto done2;
1048 		}
1049 		if (sopptr->sem_flg & SEM_UNDO && sopptr->sem_op != 0)
1050 			do_undos = 1;
1051 		j |= (sopptr->sem_op == 0) ? SEM_R : SEM_A;
1052 	}
1053 
1054 	if ((error = ipcperm(td, &semakptr->u.sem_perm, j))) {
1055 		DPRINTF(("error = %d from ipaccess\n", error));
1056 		goto done2;
1057 	}
1058 #ifdef MAC
1059 	error = mac_sysvsem_check_semop(td->td_ucred, semakptr, j);
1060 	if (error != 0)
1061 		goto done2;
1062 #endif
1063 
1064 	/*
1065 	 * Loop trying to satisfy the vector of requests.
1066 	 * If we reach a point where we must wait, any requests already
1067 	 * performed are rolled back and we go to sleep until some other
1068 	 * process wakes us up.  At this point, we start all over again.
1069 	 *
1070 	 * This ensures that from the perspective of other tasks, a set
1071 	 * of requests is atomic (never partially satisfied).
1072 	 */
1073 	for (;;) {
1074 		do_wakeup = 0;
1075 		error = 0;	/* error return if necessary */
1076 
1077 		for (i = 0; i < nsops; i++) {
1078 			sopptr = &sops[i];
1079 			semptr = &semakptr->u.sem_base[sopptr->sem_num];
1080 
1081 			DPRINTF((
1082 			    "semop:  semakptr=%p, sem_base=%p, "
1083 			    "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n",
1084 			    semakptr, semakptr->u.sem_base, semptr,
1085 			    sopptr->sem_num, semptr->semval, sopptr->sem_op,
1086 			    (sopptr->sem_flg & IPC_NOWAIT) ?
1087 			    "nowait" : "wait"));
1088 
1089 			if (sopptr->sem_op < 0) {
1090 				if (semptr->semval + sopptr->sem_op < 0) {
1091 					DPRINTF(("semop:  can't do it now\n"));
1092 					break;
1093 				} else {
1094 					semptr->semval += sopptr->sem_op;
1095 					if (semptr->semval == 0 &&
1096 					    semptr->semzcnt > 0)
1097 						do_wakeup = 1;
1098 				}
1099 			} else if (sopptr->sem_op == 0) {
1100 				if (semptr->semval != 0) {
1101 					DPRINTF(("semop:  not zero now\n"));
1102 					break;
1103 				}
1104 			} else if (semptr->semval + sopptr->sem_op >
1105 			    seminfo.semvmx) {
1106 				error = ERANGE;
1107 				break;
1108 			} else {
1109 				if (semptr->semncnt > 0)
1110 					do_wakeup = 1;
1111 				semptr->semval += sopptr->sem_op;
1112 			}
1113 		}
1114 
1115 		/*
1116 		 * Did we get through the entire vector?
1117 		 */
1118 		if (i >= nsops)
1119 			goto done;
1120 
1121 		/*
1122 		 * No ... rollback anything that we've already done
1123 		 */
1124 		DPRINTF(("semop:  rollback 0 through %d\n", i-1));
1125 		for (j = 0; j < i; j++)
1126 			semakptr->u.sem_base[sops[j].sem_num].semval -=
1127 			    sops[j].sem_op;
1128 
1129 		/* If we detected an error, return it */
1130 		if (error != 0)
1131 			goto done2;
1132 
1133 		/*
1134 		 * If the request that we couldn't satisfy has the
1135 		 * NOWAIT flag set then return with EAGAIN.
1136 		 */
1137 		if (sopptr->sem_flg & IPC_NOWAIT) {
1138 			error = EAGAIN;
1139 			goto done2;
1140 		}
1141 
1142 		if (sopptr->sem_op == 0)
1143 			semptr->semzcnt++;
1144 		else
1145 			semptr->semncnt++;
1146 
1147 		DPRINTF(("semop:  good night!\n"));
1148 		error = msleep(semakptr, sema_mtxp, (PZERO - 4) | PCATCH,
1149 		    "semwait", 0);
1150 		DPRINTF(("semop:  good morning (error=%d)!\n", error));
1151 		/* return code is checked below, after sem[nz]cnt-- */
1152 
1153 		/*
1154 		 * Make sure that the semaphore still exists
1155 		 */
1156 		seq = semakptr->u.sem_perm.seq;
1157 		if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
1158 		    seq != IPCID_TO_SEQ(uap->semid)) {
1159 			error = EIDRM;
1160 			goto done2;
1161 		}
1162 
1163 		/*
1164 		 * Renew the semaphore's pointer after wakeup since
1165 		 * during msleep sem_base may have been modified and semptr
1166 		 * is not valid any more
1167 		 */
1168 		semptr = &semakptr->u.sem_base[sopptr->sem_num];
1169 
1170 		/*
1171 		 * The semaphore is still alive.  Readjust the count of
1172 		 * waiting processes.
1173 		 */
1174 		if (sopptr->sem_op == 0)
1175 			semptr->semzcnt--;
1176 		else
1177 			semptr->semncnt--;
1178 
1179 		/*
1180 		 * Is it really morning, or was our sleep interrupted?
1181 		 * (Delayed check of msleep() return code because we
1182 		 * need to decrement sem[nz]cnt either way.)
1183 		 */
1184 		if (error != 0) {
1185 			error = EINTR;
1186 			goto done2;
1187 		}
1188 		DPRINTF(("semop:  good morning!\n"));
1189 	}
1190 
1191 done:
1192 	/*
1193 	 * Process any SEM_UNDO requests.
1194 	 */
1195 	if (do_undos) {
1196 		SEMUNDO_LOCK();
1197 		suptr = NULL;
1198 		for (i = 0; i < nsops; i++) {
1199 			/*
1200 			 * We only need to deal with SEM_UNDO's for non-zero
1201 			 * op's.
1202 			 */
1203 			int adjval;
1204 
1205 			if ((sops[i].sem_flg & SEM_UNDO) == 0)
1206 				continue;
1207 			adjval = sops[i].sem_op;
1208 			if (adjval == 0)
1209 				continue;
1210 			error = semundo_adjust(td, &suptr, semid, seq,
1211 			    sops[i].sem_num, -adjval);
1212 			if (error == 0)
1213 				continue;
1214 
1215 			/*
1216 			 * Oh-Oh!  We ran out of either sem_undo's or undo's.
1217 			 * Rollback the adjustments to this point and then
1218 			 * rollback the semaphore ups and down so we can return
1219 			 * with an error with all structures restored.  We
1220 			 * rollback the undo's in the exact reverse order that
1221 			 * we applied them.  This guarantees that we won't run
1222 			 * out of space as we roll things back out.
1223 			 */
1224 			for (j = 0; j < i; j++) {
1225 				k = i - j - 1;
1226 				if ((sops[k].sem_flg & SEM_UNDO) == 0)
1227 					continue;
1228 				adjval = sops[k].sem_op;
1229 				if (adjval == 0)
1230 					continue;
1231 				if (semundo_adjust(td, &suptr, semid, seq,
1232 				    sops[k].sem_num, adjval) != 0)
1233 					panic("semop - can't undo undos");
1234 			}
1235 
1236 			for (j = 0; j < nsops; j++)
1237 				semakptr->u.sem_base[sops[j].sem_num].semval -=
1238 				    sops[j].sem_op;
1239 
1240 			DPRINTF(("error = %d from semundo_adjust\n", error));
1241 			SEMUNDO_UNLOCK();
1242 			goto done2;
1243 		} /* loop through the sops */
1244 		SEMUNDO_UNLOCK();
1245 	} /* if (do_undos) */
1246 
1247 	/* We're definitely done - set the sempid's and time */
1248 	for (i = 0; i < nsops; i++) {
1249 		sopptr = &sops[i];
1250 		semptr = &semakptr->u.sem_base[sopptr->sem_num];
1251 		semptr->sempid = td->td_proc->p_pid;
1252 	}
1253 	semakptr->u.sem_otime = time_second;
1254 
1255 	/*
1256 	 * Do a wakeup if any semaphore was up'd whilst something was
1257 	 * sleeping on it.
1258 	 */
1259 	if (do_wakeup) {
1260 		DPRINTF(("semop:  doing wakeup\n"));
1261 		wakeup(semakptr);
1262 		DPRINTF(("semop:  back from wakeup\n"));
1263 	}
1264 	DPRINTF(("semop:  done\n"));
1265 	td->td_retval[0] = 0;
1266 done2:
1267 	mtx_unlock(sema_mtxp);
1268 	if (sops != small_sops)
1269 		free(sops, M_SEM);
1270 	return (error);
1271 }
1272 
1273 /*
1274  * Go through the undo structures for this process and apply the adjustments to
1275  * semaphores.
1276  */
1277 static void
1278 semexit_myhook(void *arg, struct proc *p)
1279 {
1280 	struct sem_undo *suptr;
1281 	struct semid_kernel *semakptr;
1282 	struct mtx *sema_mtxp;
1283 	int semid, semnum, adjval, ix;
1284 	unsigned short seq;
1285 
1286 	/*
1287 	 * Go through the chain of undo vectors looking for one
1288 	 * associated with this process.
1289 	 */
1290 	SEMUNDO_LOCK();
1291 	LIST_FOREACH(suptr, &semu_list, un_next) {
1292 		if (suptr->un_proc == p)
1293 			break;
1294 	}
1295 	if (suptr == NULL) {
1296 		SEMUNDO_UNLOCK();
1297 		return;
1298 	}
1299 	LIST_REMOVE(suptr, un_next);
1300 
1301 	DPRINTF(("proc @%p has undo structure with %d entries\n", p,
1302 	    suptr->un_cnt));
1303 
1304 	/*
1305 	 * If there are any active undo elements then process them.
1306 	 */
1307 	if (suptr->un_cnt > 0) {
1308 		SEMUNDO_UNLOCK();
1309 		for (ix = 0; ix < suptr->un_cnt; ix++) {
1310 			semid = suptr->un_ent[ix].un_id;
1311 			semnum = suptr->un_ent[ix].un_num;
1312 			adjval = suptr->un_ent[ix].un_adjval;
1313 			seq = suptr->un_ent[ix].un_seq;
1314 			semakptr = &sema[semid];
1315 			sema_mtxp = &sema_mtx[semid];
1316 
1317 			mtx_lock(sema_mtxp);
1318 			if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
1319 			    (semakptr->u.sem_perm.seq != seq)) {
1320 				mtx_unlock(sema_mtxp);
1321 				continue;
1322 			}
1323 			if (semnum >= semakptr->u.sem_nsems)
1324 				panic("semexit - semnum out of range");
1325 
1326 			DPRINTF((
1327 			    "semexit:  %p id=%d num=%d(adj=%d) ; sem=%d\n",
1328 			    suptr->un_proc, suptr->un_ent[ix].un_id,
1329 			    suptr->un_ent[ix].un_num,
1330 			    suptr->un_ent[ix].un_adjval,
1331 			    semakptr->u.sem_base[semnum].semval));
1332 
1333 			if (adjval < 0 && semakptr->u.sem_base[semnum].semval <
1334 			    -adjval)
1335 				semakptr->u.sem_base[semnum].semval = 0;
1336 			else
1337 				semakptr->u.sem_base[semnum].semval += adjval;
1338 
1339 			wakeup(semakptr);
1340 			DPRINTF(("semexit:  back from wakeup\n"));
1341 			mtx_unlock(sema_mtxp);
1342 		}
1343 		SEMUNDO_LOCK();
1344 	}
1345 
1346 	/*
1347 	 * Deallocate the undo vector.
1348 	 */
1349 	DPRINTF(("removing vector\n"));
1350 	suptr->un_proc = NULL;
1351 	suptr->un_cnt = 0;
1352 	LIST_INSERT_HEAD(&semu_free_list, suptr, un_next);
1353 	SEMUNDO_UNLOCK();
1354 }
1355 
1356 static int
1357 sysctl_sema(SYSCTL_HANDLER_ARGS)
1358 {
1359 
1360 	return (SYSCTL_OUT(req, sema,
1361 	    sizeof(struct semid_kernel) * seminfo.semmni));
1362 }
1363 
1364 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1365     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1366 
1367 /* XXX casting to (sy_call_t *) is bogus, as usual. */
1368 static sy_call_t *semcalls[] = {
1369 	(sy_call_t *)freebsd7___semctl, (sy_call_t *)semget,
1370 	(sy_call_t *)semop
1371 };
1372 
1373 /*
1374  * Entry point for all SEM calls.
1375  */
1376 int
1377 semsys(td, uap)
1378 	struct thread *td;
1379 	/* XXX actually varargs. */
1380 	struct semsys_args /* {
1381 		int	which;
1382 		int	a2;
1383 		int	a3;
1384 		int	a4;
1385 		int	a5;
1386 	} */ *uap;
1387 {
1388 	int error;
1389 
1390 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
1391 		return (ENOSYS);
1392 	if (uap->which < 0 ||
1393 	    uap->which >= sizeof(semcalls)/sizeof(semcalls[0]))
1394 		return (EINVAL);
1395 	error = (*semcalls[uap->which])(td, &uap->a2);
1396 	return (error);
1397 }
1398 
1399 #ifndef CP
1400 #define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
1401 #endif
1402 
1403 #ifndef _SYS_SYSPROTO_H_
1404 struct freebsd7___semctl_args {
1405 	int	semid;
1406 	int	semnum;
1407 	int	cmd;
1408 	union	semun_old *arg;
1409 };
1410 #endif
1411 int
1412 freebsd7___semctl(struct thread *td, struct freebsd7___semctl_args *uap)
1413 {
1414 	struct semid_ds_old dsold;
1415 	struct semid_ds dsbuf;
1416 	union semun_old arg;
1417 	union semun semun;
1418 	register_t rval;
1419 	int error;
1420 
1421 	switch (uap->cmd) {
1422 	case SEM_STAT:
1423 	case IPC_SET:
1424 	case IPC_STAT:
1425 	case GETALL:
1426 	case SETVAL:
1427 	case SETALL:
1428 		error = copyin(uap->arg, &arg, sizeof(arg));
1429 		if (error)
1430 			return (error);
1431 		break;
1432 	}
1433 
1434 	switch (uap->cmd) {
1435 	case SEM_STAT:
1436 	case IPC_STAT:
1437 		semun.buf = &dsbuf;
1438 		break;
1439 	case IPC_SET:
1440 		error = copyin(arg.buf, &dsold, sizeof(dsold));
1441 		if (error)
1442 			return (error);
1443 		ipcperm_old2new(&dsold.sem_perm, &dsbuf.sem_perm);
1444 		CP(dsold, dsbuf, sem_base);
1445 		CP(dsold, dsbuf, sem_nsems);
1446 		CP(dsold, dsbuf, sem_otime);
1447 		CP(dsold, dsbuf, sem_ctime);
1448 		semun.buf = &dsbuf;
1449 		break;
1450 	case GETALL:
1451 	case SETALL:
1452 		semun.array = arg.array;
1453 		break;
1454 	case SETVAL:
1455 		semun.val = arg.val;
1456 		break;
1457 	}
1458 
1459 	error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun,
1460 	    &rval);
1461 	if (error)
1462 		return (error);
1463 
1464 	switch (uap->cmd) {
1465 	case SEM_STAT:
1466 	case IPC_STAT:
1467 		bzero(&dsold, sizeof(dsold));
1468 		ipcperm_new2old(&dsbuf.sem_perm, &dsold.sem_perm);
1469 		CP(dsbuf, dsold, sem_base);
1470 		CP(dsbuf, dsold, sem_nsems);
1471 		CP(dsbuf, dsold, sem_otime);
1472 		CP(dsbuf, dsold, sem_ctime);
1473 		error = copyout(&dsold, arg.buf, sizeof(dsold));
1474 		break;
1475 	}
1476 
1477 	if (error == 0)
1478 		td->td_retval[0] = rval;
1479 	return (error);
1480 }
1481 
1482 #endif /* COMPAT_FREEBSD{4,5,6,7} */
1483 
1484 #ifdef COMPAT_FREEBSD32
1485 
1486 int
1487 freebsd32_semsys(struct thread *td, struct freebsd32_semsys_args *uap)
1488 {
1489 
1490 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1491     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1492 	switch (uap->which) {
1493 	case 0:
1494 		return (freebsd7_freebsd32_semctl(td,
1495 		    (struct freebsd7_freebsd32_semctl_args *)&uap->a2));
1496 	default:
1497 		return (semsys(td, (struct semsys_args *)uap));
1498 	}
1499 #else
1500 	return (nosys(td, NULL));
1501 #endif
1502 }
1503 
1504 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1505     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1506 int
1507 freebsd7_freebsd32_semctl(struct thread *td,
1508     struct freebsd7_freebsd32_semctl_args *uap)
1509 {
1510 	struct semid_ds32_old dsbuf32;
1511 	struct semid_ds dsbuf;
1512 	union semun semun;
1513 	union semun32 arg;
1514 	register_t rval;
1515 	int error;
1516 
1517 	switch (uap->cmd) {
1518 	case SEM_STAT:
1519 	case IPC_SET:
1520 	case IPC_STAT:
1521 	case GETALL:
1522 	case SETVAL:
1523 	case SETALL:
1524 		error = copyin(uap->arg, &arg, sizeof(arg));
1525 		if (error)
1526 			return (error);
1527 		break;
1528 	}
1529 
1530 	switch (uap->cmd) {
1531 	case SEM_STAT:
1532 	case IPC_STAT:
1533 		semun.buf = &dsbuf;
1534 		break;
1535 	case IPC_SET:
1536 		error = copyin(PTRIN(arg.buf), &dsbuf32, sizeof(dsbuf32));
1537 		if (error)
1538 			return (error);
1539 		freebsd32_ipcperm_old_in(&dsbuf32.sem_perm, &dsbuf.sem_perm);
1540 		PTRIN_CP(dsbuf32, dsbuf, sem_base);
1541 		CP(dsbuf32, dsbuf, sem_nsems);
1542 		CP(dsbuf32, dsbuf, sem_otime);
1543 		CP(dsbuf32, dsbuf, sem_ctime);
1544 		semun.buf = &dsbuf;
1545 		break;
1546 	case GETALL:
1547 	case SETALL:
1548 		semun.array = PTRIN(arg.array);
1549 		break;
1550 	case SETVAL:
1551 		semun.val = arg.val;
1552 		break;
1553 	}
1554 
1555 	error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun,
1556 	    &rval);
1557 	if (error)
1558 		return (error);
1559 
1560 	switch (uap->cmd) {
1561 	case SEM_STAT:
1562 	case IPC_STAT:
1563 		bzero(&dsbuf32, sizeof(dsbuf32));
1564 		freebsd32_ipcperm_old_out(&dsbuf.sem_perm, &dsbuf32.sem_perm);
1565 		PTROUT_CP(dsbuf, dsbuf32, sem_base);
1566 		CP(dsbuf, dsbuf32, sem_nsems);
1567 		CP(dsbuf, dsbuf32, sem_otime);
1568 		CP(dsbuf, dsbuf32, sem_ctime);
1569 		error = copyout(&dsbuf32, PTRIN(arg.buf), sizeof(dsbuf32));
1570 		break;
1571 	}
1572 
1573 	if (error == 0)
1574 		td->td_retval[0] = rval;
1575 	return (error);
1576 }
1577 #endif
1578 
1579 int
1580 freebsd32_semctl(struct thread *td, struct freebsd32_semctl_args *uap)
1581 {
1582 	struct semid_ds32 dsbuf32;
1583 	struct semid_ds dsbuf;
1584 	union semun semun;
1585 	union semun32 arg;
1586 	register_t rval;
1587 	int error;
1588 
1589 	switch (uap->cmd) {
1590 	case SEM_STAT:
1591 	case IPC_SET:
1592 	case IPC_STAT:
1593 	case GETALL:
1594 	case SETVAL:
1595 	case SETALL:
1596 		error = copyin(uap->arg, &arg, sizeof(arg));
1597 		if (error)
1598 			return (error);
1599 		break;
1600 	}
1601 
1602 	switch (uap->cmd) {
1603 	case SEM_STAT:
1604 	case IPC_STAT:
1605 		semun.buf = &dsbuf;
1606 		break;
1607 	case IPC_SET:
1608 		error = copyin(PTRIN(arg.buf), &dsbuf32, sizeof(dsbuf32));
1609 		if (error)
1610 			return (error);
1611 		freebsd32_ipcperm_in(&dsbuf32.sem_perm, &dsbuf.sem_perm);
1612 		PTRIN_CP(dsbuf32, dsbuf, sem_base);
1613 		CP(dsbuf32, dsbuf, sem_nsems);
1614 		CP(dsbuf32, dsbuf, sem_otime);
1615 		CP(dsbuf32, dsbuf, sem_ctime);
1616 		semun.buf = &dsbuf;
1617 		break;
1618 	case GETALL:
1619 	case SETALL:
1620 		semun.array = PTRIN(arg.array);
1621 		break;
1622 	case SETVAL:
1623 		semun.val = arg.val;
1624 		break;
1625 	}
1626 
1627 	error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun,
1628 	    &rval);
1629 	if (error)
1630 		return (error);
1631 
1632 	switch (uap->cmd) {
1633 	case SEM_STAT:
1634 	case IPC_STAT:
1635 		bzero(&dsbuf32, sizeof(dsbuf32));
1636 		freebsd32_ipcperm_out(&dsbuf.sem_perm, &dsbuf32.sem_perm);
1637 		PTROUT_CP(dsbuf, dsbuf32, sem_base);
1638 		CP(dsbuf, dsbuf32, sem_nsems);
1639 		CP(dsbuf, dsbuf32, sem_otime);
1640 		CP(dsbuf, dsbuf32, sem_ctime);
1641 		error = copyout(&dsbuf32, PTRIN(arg.buf), sizeof(dsbuf32));
1642 		break;
1643 	}
1644 
1645 	if (error == 0)
1646 		td->td_retval[0] = rval;
1647 	return (error);
1648 }
1649 
1650 #endif /* COMPAT_FREEBSD32 */
1651