xref: /freebsd/sys/kern/uipc_sem.c (revision d056fa046c6a91b90cd98165face0e42a33a5173)
1 /*-
2  * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
3  * Copyright (c) 2003-2005 SPARTA, Inc.
4  * Copyright (c) 2005 Robert N. M. Watson
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project in part by Network
8  * Associates Laboratories, the Security Research Division of Network
9  * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
10  * as part of the DARPA CHATS research program.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_mac.h"
38 #include "opt_posix.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysproto.h>
43 #include <sys/eventhandler.h>
44 #include <sys/kernel.h>
45 #include <sys/proc.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/module.h>
49 #include <sys/condvar.h>
50 #include <sys/sem.h>
51 #include <sys/uio.h>
52 #include <sys/syscall.h>
53 #include <sys/stat.h>
54 #include <sys/sysent.h>
55 #include <sys/sysctl.h>
56 #include <sys/time.h>
57 #include <sys/mac.h>
58 #include <sys/malloc.h>
59 #include <sys/fcntl.h>
60 
61 #include <posix4/ksem.h>
62 #include <posix4/posix4.h>
63 #include <posix4/semaphore.h>
64 #include <posix4/_semaphore.h>
65 
66 static int sem_count_proc(struct proc *p);
67 static struct ksem *sem_lookup_byname(const char *name);
68 static int sem_create(struct thread *td, const char *name,
69     struct ksem **ksret, mode_t mode, unsigned int value);
70 static void sem_free(struct ksem *ksnew);
71 static int sem_perm(struct thread *td, struct ksem *ks);
72 static void sem_enter(struct proc *p, struct ksem *ks);
73 static int sem_leave(struct proc *p, struct ksem *ks);
74 static void sem_exithook(void *arg, struct proc *p);
75 static void sem_forkhook(void *arg, struct proc *p1, struct proc *p2,
76     int flags);
77 static int sem_hasopen(struct thread *td, struct ksem *ks);
78 
79 static int kern_sem_close(struct thread *td, semid_t id);
80 static int kern_sem_post(struct thread *td, semid_t id);
81 static int kern_sem_wait(struct thread *td, semid_t id, int tryflag,
82     struct timespec *abstime);
83 static int kern_sem_init(struct thread *td, int dir, unsigned int value,
84     semid_t *idp);
85 static int kern_sem_open(struct thread *td, int dir, const char *name,
86     int oflag, mode_t mode, unsigned int value, semid_t *idp);
87 static int kern_sem_unlink(struct thread *td, const char *name);
88 
89 #ifndef SEM_MAX
90 #define SEM_MAX	30
91 #endif
92 
93 #define SEM_MAX_NAMELEN	14
94 
95 #define SEM_TO_ID(x)	((intptr_t)(x))
96 #define ID_TO_SEM(x)	id_to_sem(x)
97 
98 /*
99  * available semaphores go here, this includes sem_init and any semaphores
100  * created via sem_open that have not yet been unlinked.
101  */
102 LIST_HEAD(, ksem) ksem_head = LIST_HEAD_INITIALIZER(&ksem_head);
103 /*
104  * semaphores still in use but have been sem_unlink()'d go here.
105  */
106 LIST_HEAD(, ksem) ksem_deadhead = LIST_HEAD_INITIALIZER(&ksem_deadhead);
107 
108 static struct mtx sem_lock;
109 static MALLOC_DEFINE(M_SEM, "sems", "semaphore data");
110 
111 static int nsems = 0;
112 SYSCTL_DECL(_p1003_1b);
113 SYSCTL_INT(_p1003_1b, OID_AUTO, nsems, CTLFLAG_RD, &nsems, 0, "");
114 
115 static eventhandler_tag sem_exit_tag, sem_exec_tag, sem_fork_tag;
116 
117 #ifdef SEM_DEBUG
118 #define DP(x)	printf x
119 #else
120 #define DP(x)
121 #endif
122 
123 static __inline
124 void
125 sem_ref(struct ksem *ks)
126 {
127 
128 	mtx_assert(&sem_lock, MA_OWNED);
129 	ks->ks_ref++;
130 	DP(("sem_ref: ks = %p, ref = %d\n", ks, ks->ks_ref));
131 }
132 
133 static __inline
134 void
135 sem_rel(struct ksem *ks)
136 {
137 
138 	mtx_assert(&sem_lock, MA_OWNED);
139 	DP(("sem_rel: ks = %p, ref = %d\n", ks, ks->ks_ref - 1));
140 	if (--ks->ks_ref == 0)
141 		sem_free(ks);
142 }
143 
144 static __inline struct ksem *id_to_sem(semid_t id);
145 
146 static __inline
147 struct ksem *
148 id_to_sem(semid_t id)
149 {
150 	struct ksem *ks;
151 
152 	mtx_assert(&sem_lock, MA_OWNED);
153 	DP(("id_to_sem: id = %0x,%p\n", id, (struct ksem *)id));
154 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
155 		DP(("id_to_sem: ks = %p\n", ks));
156 		if (ks == (struct ksem *)id)
157 			return (ks);
158 	}
159 	return (NULL);
160 }
161 
162 static struct ksem *
163 sem_lookup_byname(const char *name)
164 {
165 	struct ksem *ks;
166 
167 	mtx_assert(&sem_lock, MA_OWNED);
168 	LIST_FOREACH(ks, &ksem_head, ks_entry)
169 		if (ks->ks_name != NULL && strcmp(ks->ks_name, name) == 0)
170 			return (ks);
171 	return (NULL);
172 }
173 
174 static int
175 sem_create(struct thread *td, const char *name, struct ksem **ksret,
176     mode_t mode, unsigned int value)
177 {
178 	struct ksem *ret;
179 	struct proc *p;
180 	struct ucred *uc;
181 	size_t len;
182 	int error;
183 
184 	DP(("sem_create\n"));
185 	p = td->td_proc;
186 	uc = td->td_ucred;
187 	if (value > SEM_VALUE_MAX)
188 		return (EINVAL);
189 	ret = malloc(sizeof(*ret), M_SEM, M_WAITOK | M_ZERO);
190 	if (name != NULL) {
191 		len = strlen(name);
192 		if (len > SEM_MAX_NAMELEN) {
193 			free(ret, M_SEM);
194 			return (ENAMETOOLONG);
195 		}
196 		/* name must start with a '/' but not contain one. */
197 		if (*name != '/' || len < 2 || index(name + 1, '/') != NULL) {
198 			free(ret, M_SEM);
199 			return (EINVAL);
200 		}
201 		ret->ks_name = malloc(len + 1, M_SEM, M_WAITOK);
202 		strcpy(ret->ks_name, name);
203 	} else {
204 		ret->ks_name = NULL;
205 	}
206 	ret->ks_mode = mode;
207 	ret->ks_value = value;
208 	ret->ks_ref = 1;
209 	ret->ks_waiters = 0;
210 	ret->ks_uid = uc->cr_uid;
211 	ret->ks_gid = uc->cr_gid;
212 	ret->ks_onlist = 0;
213 	cv_init(&ret->ks_cv, "sem");
214 	LIST_INIT(&ret->ks_users);
215 #ifdef MAC
216 	mac_init_posix_sem(ret);
217 	mac_create_posix_sem(uc, ret);
218 #endif
219 	if (name != NULL)
220 		sem_enter(td->td_proc, ret);
221 	*ksret = ret;
222 	mtx_lock(&sem_lock);
223 	if (nsems >= p31b_getcfg(CTL_P1003_1B_SEM_NSEMS_MAX)) {
224 		sem_leave(td->td_proc, ret);
225 		sem_free(ret);
226 		error = ENFILE;
227 	} else {
228 		nsems++;
229 		error = 0;
230 	}
231 	mtx_unlock(&sem_lock);
232 	return (error);
233 }
234 
235 #ifndef _SYS_SYSPROTO_H_
236 struct ksem_init_args {
237 	unsigned int value;
238 	semid_t *idp;
239 };
240 int ksem_init(struct thread *td, struct ksem_init_args *uap);
241 #endif
242 int
243 ksem_init(struct thread *td, struct ksem_init_args *uap)
244 {
245 	int error;
246 
247 	error = kern_sem_init(td, UIO_USERSPACE, uap->value, uap->idp);
248 	return (error);
249 }
250 
251 static int
252 kern_sem_init(struct thread *td, int dir, unsigned int value, semid_t *idp)
253 {
254 	struct ksem *ks;
255 	semid_t id;
256 	int error;
257 
258 	error = sem_create(td, NULL, &ks, S_IRWXU | S_IRWXG, value);
259 	if (error)
260 		return (error);
261 	id = SEM_TO_ID(ks);
262 	if (dir == UIO_USERSPACE) {
263 		error = copyout(&id, idp, sizeof(id));
264 		if (error) {
265 			mtx_lock(&sem_lock);
266 			sem_rel(ks);
267 			mtx_unlock(&sem_lock);
268 			return (error);
269 		}
270 	} else {
271 		*idp = id;
272 	}
273 	mtx_lock(&sem_lock);
274 	LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
275 	ks->ks_onlist = 1;
276 	mtx_unlock(&sem_lock);
277 	return (error);
278 }
279 
280 #ifndef _SYS_SYSPROTO_H_
281 struct ksem_open_args {
282 	char *name;
283 	int oflag;
284 	mode_t mode;
285 	unsigned int value;
286 	semid_t *idp;
287 };
288 int ksem_open(struct thread *td, struct ksem_open_args *uap);
289 #endif
290 int
291 ksem_open(struct thread *td, struct ksem_open_args *uap)
292 {
293 	char name[SEM_MAX_NAMELEN + 1];
294 	size_t done;
295 	int error;
296 
297 	error = copyinstr(uap->name, name, SEM_MAX_NAMELEN + 1, &done);
298 	if (error)
299 		return (error);
300 	DP((">>> sem_open start\n"));
301 	error = kern_sem_open(td, UIO_USERSPACE,
302 	    name, uap->oflag, uap->mode, uap->value, uap->idp);
303 	DP(("<<< sem_open end\n"));
304 	return (error);
305 }
306 
307 static int
308 kern_sem_open(struct thread *td, int dir, const char *name, int oflag,
309     mode_t mode, unsigned int value, semid_t *idp)
310 {
311 	struct ksem *ksnew, *ks;
312 	int error;
313 	semid_t id;
314 
315 	ksnew = NULL;
316 	mtx_lock(&sem_lock);
317 	ks = sem_lookup_byname(name);
318 	/*
319 	 * If we found it but O_EXCL is set, error.
320 	 */
321 	if (ks != NULL && (oflag & O_EXCL) != 0) {
322 		mtx_unlock(&sem_lock);
323 		return (EEXIST);
324 	}
325 	/*
326 	 * If we didn't find it...
327 	 */
328 	if (ks == NULL) {
329 		/*
330 		 * didn't ask for creation? error.
331 		 */
332 		if ((oflag & O_CREAT) == 0) {
333 			mtx_unlock(&sem_lock);
334 			return (ENOENT);
335 		}
336 		/*
337 		 * We may block during creation, so drop the lock.
338 		 */
339 		mtx_unlock(&sem_lock);
340 		error = sem_create(td, name, &ksnew, mode, value);
341 		if (error != 0)
342 			return (error);
343 		id = SEM_TO_ID(ksnew);
344 		if (dir == UIO_USERSPACE) {
345 			DP(("about to copyout! %d to %p\n", id, idp));
346 			error = copyout(&id, idp, sizeof(id));
347 			if (error) {
348 				mtx_lock(&sem_lock);
349 				sem_leave(td->td_proc, ksnew);
350 				sem_rel(ksnew);
351 				mtx_unlock(&sem_lock);
352 				return (error);
353 			}
354 		} else {
355 			DP(("about to set! %d to %p\n", id, idp));
356 			*idp = id;
357 		}
358 		/*
359 		 * We need to make sure we haven't lost a race while
360 		 * allocating during creation.
361 		 */
362 		mtx_lock(&sem_lock);
363 		ks = sem_lookup_byname(name);
364 		if (ks != NULL) {
365 			/* we lost... */
366 			sem_leave(td->td_proc, ksnew);
367 			sem_rel(ksnew);
368 			/* we lost and we can't loose... */
369 			if ((oflag & O_EXCL) != 0) {
370 				mtx_unlock(&sem_lock);
371 				return (EEXIST);
372 			}
373 		} else {
374 			DP(("sem_create: about to add to list...\n"));
375 			LIST_INSERT_HEAD(&ksem_head, ksnew, ks_entry);
376 			DP(("sem_create: setting list bit...\n"));
377 			ksnew->ks_onlist = 1;
378 			DP(("sem_create: done, about to unlock...\n"));
379 		}
380 	} else {
381 #ifdef MAC
382 		error = mac_check_posix_sem_open(td->td_ucred, ks);
383 		if (error)
384 			goto err_open;
385 #endif
386 		/*
387 		 * if we aren't the creator, then enforce permissions.
388 		 */
389 		error = sem_perm(td, ks);
390 		if (error)
391 			goto err_open;
392 		sem_ref(ks);
393 		mtx_unlock(&sem_lock);
394 		id = SEM_TO_ID(ks);
395 		if (dir == UIO_USERSPACE) {
396 			error = copyout(&id, idp, sizeof(id));
397 			if (error) {
398 				mtx_lock(&sem_lock);
399 				sem_rel(ks);
400 				mtx_unlock(&sem_lock);
401 				return (error);
402 			}
403 		} else {
404 			*idp = id;
405 		}
406 		sem_enter(td->td_proc, ks);
407 		mtx_lock(&sem_lock);
408 		sem_rel(ks);
409 	}
410 err_open:
411 	mtx_unlock(&sem_lock);
412 	return (error);
413 }
414 
415 static int
416 sem_perm(struct thread *td, struct ksem *ks)
417 {
418 	struct ucred *uc;
419 
420 	uc = td->td_ucred;
421 	DP(("sem_perm: uc(%d,%d) ks(%d,%d,%o)\n",
422 	    uc->cr_uid, uc->cr_gid,
423 	     ks->ks_uid, ks->ks_gid, ks->ks_mode));
424 	if ((uc->cr_uid == ks->ks_uid && (ks->ks_mode & S_IWUSR) != 0) ||
425 	    (uc->cr_gid == ks->ks_gid && (ks->ks_mode & S_IWGRP) != 0) ||
426 	    (ks->ks_mode & S_IWOTH) != 0 || suser(td) == 0)
427 		return (0);
428 	return (EPERM);
429 }
430 
431 static void
432 sem_free(struct ksem *ks)
433 {
434 
435 	nsems--;
436 	if (ks->ks_onlist)
437 		LIST_REMOVE(ks, ks_entry);
438 	if (ks->ks_name != NULL)
439 		free(ks->ks_name, M_SEM);
440 	cv_destroy(&ks->ks_cv);
441 	free(ks, M_SEM);
442 }
443 
444 static __inline struct kuser *sem_getuser(struct proc *p, struct ksem *ks);
445 
446 static __inline struct kuser *
447 sem_getuser(struct proc *p, struct ksem *ks)
448 {
449 	struct kuser *k;
450 
451 	LIST_FOREACH(k, &ks->ks_users, ku_next)
452 		if (k->ku_pid == p->p_pid)
453 			return (k);
454 	return (NULL);
455 }
456 
457 static int
458 sem_hasopen(struct thread *td, struct ksem *ks)
459 {
460 
461 	return ((ks->ks_name == NULL && sem_perm(td, ks) == 0)
462 	    || sem_getuser(td->td_proc, ks) != NULL);
463 }
464 
465 static int
466 sem_leave(struct proc *p, struct ksem *ks)
467 {
468 	struct kuser *k;
469 
470 	DP(("sem_leave: ks = %p\n", ks));
471 	k = sem_getuser(p, ks);
472 	DP(("sem_leave: ks = %p, k = %p\n", ks, k));
473 	if (k != NULL) {
474 		LIST_REMOVE(k, ku_next);
475 		sem_rel(ks);
476 		DP(("sem_leave: about to free k\n"));
477 		free(k, M_SEM);
478 		DP(("sem_leave: returning\n"));
479 		return (0);
480 	}
481 	return (EINVAL);
482 }
483 
484 static void
485 sem_enter(p, ks)
486 	struct proc *p;
487 	struct ksem *ks;
488 {
489 	struct kuser *ku, *k;
490 
491 	ku = malloc(sizeof(*ku), M_SEM, M_WAITOK);
492 	ku->ku_pid = p->p_pid;
493 	mtx_lock(&sem_lock);
494 	k = sem_getuser(p, ks);
495 	if (k != NULL) {
496 		mtx_unlock(&sem_lock);
497 		free(ku, M_TEMP);
498 		return;
499 	}
500 	LIST_INSERT_HEAD(&ks->ks_users, ku, ku_next);
501 	sem_ref(ks);
502 	mtx_unlock(&sem_lock);
503 }
504 
505 #ifndef _SYS_SYSPROTO_H_
506 struct ksem_unlink_args {
507 	char *name;
508 };
509 int ksem_unlink(struct thread *td, struct ksem_unlink_args *uap);
510 #endif
511 
512 int
513 ksem_unlink(struct thread *td, struct ksem_unlink_args *uap)
514 {
515 	char name[SEM_MAX_NAMELEN + 1];
516 	size_t done;
517 	int error;
518 
519 	error = copyinstr(uap->name, name, SEM_MAX_NAMELEN + 1, &done);
520 	return (error ? error :
521 	    kern_sem_unlink(td, name));
522 }
523 
524 static int
525 kern_sem_unlink(struct thread *td, const char *name)
526 {
527 	struct ksem *ks;
528 	int error;
529 
530 	mtx_lock(&sem_lock);
531 	ks = sem_lookup_byname(name);
532 	if (ks != NULL) {
533 #ifdef MAC
534 		error = mac_check_posix_sem_unlink(td->td_ucred, ks);
535 		if (error) {
536 			mtx_unlock(&sem_lock);
537 			return (error);
538 		}
539 #endif
540 		error = sem_perm(td, ks);
541 	} else
542 		error = ENOENT;
543 	DP(("sem_unlink: '%s' ks = %p, error = %d\n", name, ks, error));
544 	if (error == 0) {
545 		LIST_REMOVE(ks, ks_entry);
546 		LIST_INSERT_HEAD(&ksem_deadhead, ks, ks_entry);
547 		sem_rel(ks);
548 	}
549 	mtx_unlock(&sem_lock);
550 	return (error);
551 }
552 
553 #ifndef _SYS_SYSPROTO_H_
554 struct ksem_close_args {
555 	semid_t id;
556 };
557 int ksem_close(struct thread *td, struct ksem_close_args *uap);
558 #endif
559 
560 int
561 ksem_close(struct thread *td, struct ksem_close_args *uap)
562 {
563 
564 	return (kern_sem_close(td, uap->id));
565 }
566 
567 static int
568 kern_sem_close(struct thread *td, semid_t id)
569 {
570 	struct ksem *ks;
571 	int error;
572 
573 	error = EINVAL;
574 	mtx_lock(&sem_lock);
575 	ks = ID_TO_SEM(id);
576 	/* this is not a valid operation for unnamed sems */
577 	if (ks != NULL && ks->ks_name != NULL)
578 		error = sem_leave(td->td_proc, ks);
579 	mtx_unlock(&sem_lock);
580 	return (error);
581 }
582 
583 #ifndef _SYS_SYSPROTO_H_
584 struct ksem_post_args {
585 	semid_t id;
586 };
587 int ksem_post(struct thread *td, struct ksem_post_args *uap);
588 #endif
589 int
590 ksem_post(struct thread *td, struct ksem_post_args *uap)
591 {
592 
593 	return (kern_sem_post(td, uap->id));
594 }
595 
596 static int
597 kern_sem_post(struct thread *td, semid_t id)
598 {
599 	struct ksem *ks;
600 	int error;
601 
602 	mtx_lock(&sem_lock);
603 	ks = ID_TO_SEM(id);
604 	if (ks == NULL || !sem_hasopen(td, ks)) {
605 		error = EINVAL;
606 		goto err;
607 	}
608 #ifdef MAC
609 	error = mac_check_posix_sem_post(td->td_ucred, ks);
610 	if (error)
611 		goto err;
612 #endif
613 	if (ks->ks_value == SEM_VALUE_MAX) {
614 		error = EOVERFLOW;
615 		goto err;
616 	}
617 	++ks->ks_value;
618 	if (ks->ks_waiters > 0)
619 		cv_signal(&ks->ks_cv);
620 	error = 0;
621 err:
622 	mtx_unlock(&sem_lock);
623 	return (error);
624 }
625 
626 #ifndef _SYS_SYSPROTO_H_
627 struct ksem_wait_args {
628 	semid_t id;
629 };
630 int ksem_wait(struct thread *td, struct ksem_wait_args *uap);
631 #endif
632 
633 int
634 ksem_wait(struct thread *td, struct ksem_wait_args *uap)
635 {
636 
637 	return (kern_sem_wait(td, uap->id, 0, NULL));
638 }
639 
640 #ifndef _SYS_SYSPROTO_H_
641 struct ksem_timedwait_args {
642 	semid_t id;
643 	const struct timespec *abstime;
644 };
645 int ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap);
646 #endif
647 int
648 ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap)
649 {
650 	struct timespec abstime;
651 	struct timespec *ts;
652 	int error;
653 
654 	/* We allow a null timespec (wait forever). */
655 	if (uap->abstime == NULL)
656 		ts = NULL;
657 	else {
658 		error = copyin(uap->abstime, &abstime, sizeof(abstime));
659 		if (error != 0)
660 			return (error);
661 		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
662 			return (EINVAL);
663 		ts = &abstime;
664 	}
665 	return (kern_sem_wait(td, uap->id, 0, ts));
666 }
667 
668 #ifndef _SYS_SYSPROTO_H_
669 struct ksem_trywait_args {
670 	semid_t id;
671 };
672 int ksem_trywait(struct thread *td, struct ksem_trywait_args *uap);
673 #endif
674 int
675 ksem_trywait(struct thread *td, struct ksem_trywait_args *uap)
676 {
677 
678 	return (kern_sem_wait(td, uap->id, 1, NULL));
679 }
680 
681 static int
682 kern_sem_wait(struct thread *td, semid_t id, int tryflag,
683     struct timespec *abstime)
684 {
685 	struct timespec ts1, ts2;
686 	struct timeval tv;
687 	struct ksem *ks;
688 	int error;
689 
690 	DP((">>> kern_sem_wait entered!\n"));
691 	mtx_lock(&sem_lock);
692 	ks = ID_TO_SEM(id);
693 	if (ks == NULL) {
694 		DP(("kern_sem_wait ks == NULL\n"));
695 		error = EINVAL;
696 		goto err;
697 	}
698 	sem_ref(ks);
699 	if (!sem_hasopen(td, ks)) {
700 		DP(("kern_sem_wait hasopen failed\n"));
701 		error = EINVAL;
702 		goto err;
703 	}
704 #ifdef MAC
705 	error = mac_check_posix_sem_wait(td->td_ucred, ks);
706 	if (error) {
707 		DP(("kern_sem_wait mac failed\n"));
708 		goto err;
709 	}
710 #endif
711 	DP(("kern_sem_wait value = %d, tryflag %d\n", ks->ks_value, tryflag));
712 	if (ks->ks_value == 0) {
713 		ks->ks_waiters++;
714 		if (tryflag != 0)
715 			error = EAGAIN;
716 		else if (abstime == NULL)
717 			error = cv_wait_sig(&ks->ks_cv, &sem_lock);
718 		else {
719 			for (;;) {
720 				ts1 = *abstime;
721 				getnanotime(&ts2);
722 				timespecsub(&ts1, &ts2);
723 				TIMESPEC_TO_TIMEVAL(&tv, &ts1);
724 				if (tv.tv_sec < 0) {
725 					error = ETIMEDOUT;
726 					break;
727 				}
728 				error = cv_timedwait_sig(&ks->ks_cv,
729 				    &sem_lock, tvtohz(&tv));
730 				if (error != EWOULDBLOCK)
731 					break;
732 			}
733 		}
734 		ks->ks_waiters--;
735 		if (error)
736 			goto err;
737 	}
738 	ks->ks_value--;
739 	error = 0;
740 err:
741 	if (ks != NULL)
742 		sem_rel(ks);
743 	mtx_unlock(&sem_lock);
744 	DP(("<<< kern_sem_wait leaving, error = %d\n", error));
745 	return (error);
746 }
747 
748 #ifndef _SYS_SYSPROTO_H_
749 struct ksem_getvalue_args {
750 	semid_t id;
751 	int *val;
752 };
753 int ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap);
754 #endif
755 int
756 ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap)
757 {
758 	struct ksem *ks;
759 	int error, val;
760 
761 	mtx_lock(&sem_lock);
762 	ks = ID_TO_SEM(uap->id);
763 	if (ks == NULL || !sem_hasopen(td, ks)) {
764 		mtx_unlock(&sem_lock);
765 		return (EINVAL);
766 	}
767 #ifdef MAC
768 	error = mac_check_posix_sem_getvalue(td->td_ucred, ks);
769 	if (error) {
770 		mtx_unlock(&sem_lock);
771 		return (error);
772 	}
773 #endif
774 	val = ks->ks_value;
775 	mtx_unlock(&sem_lock);
776 	error = copyout(&val, uap->val, sizeof(val));
777 	return (error);
778 }
779 
780 #ifndef _SYS_SYSPROTO_H_
781 struct ksem_destroy_args {
782 	semid_t id;
783 };
784 int ksem_destroy(struct thread *td, struct ksem_destroy_args *uap);
785 #endif
786 int
787 ksem_destroy(struct thread *td, struct ksem_destroy_args *uap)
788 {
789 	struct ksem *ks;
790 	int error;
791 
792 	mtx_lock(&sem_lock);
793 	ks = ID_TO_SEM(uap->id);
794 	if (ks == NULL || !sem_hasopen(td, ks) ||
795 	    ks->ks_name != NULL) {
796 		error = EINVAL;
797 		goto err;
798 	}
799 #ifdef MAC
800 	error = mac_check_posix_sem_destroy(td->td_ucred, ks);
801 	if (error)
802 		goto err;
803 #endif
804 	if (ks->ks_waiters != 0) {
805 		error = EBUSY;
806 		goto err;
807 	}
808 	sem_rel(ks);
809 	error = 0;
810 err:
811 	mtx_unlock(&sem_lock);
812 	return (error);
813 }
814 
815 /*
816  * Count the number of kusers associated with a proc, so as to guess at how
817  * many to allocate when forking.
818  */
819 static int
820 sem_count_proc(struct proc *p)
821 {
822 	struct ksem *ks;
823 	struct kuser *ku;
824 	int count;
825 
826 	mtx_assert(&sem_lock, MA_OWNED);
827 
828 	count = 0;
829 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
830 		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
831 			if (ku->ku_pid == p->p_pid)
832 				count++;
833 		}
834 	}
835 	LIST_FOREACH(ks, &ksem_deadhead, ks_entry) {
836 		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
837 			if (ku->ku_pid == p->p_pid)
838 				count++;
839 		}
840 	}
841 	return (count);
842 }
843 
844 /*
845  * When a process forks, the child process must gain a reference to each open
846  * semaphore in the parent process, whether it is unlinked or not.  This
847  * requires allocating a kuser structure for each semaphore reference in the
848  * new process.  Because the set of semaphores in the parent can change while
849  * the fork is in progress, we have to handle races -- first we attempt to
850  * allocate enough storage to acquire references to each of the semaphores,
851  * then we enter the semaphores and release the temporary references.
852  */
853 static void
854 sem_forkhook(void *arg, struct proc *p1, struct proc *p2, int flags)
855 {
856 	struct ksem *ks, **sem_array;
857 	int count, i, new_count;
858 	struct kuser *ku;
859 
860 	mtx_lock(&sem_lock);
861 	count = sem_count_proc(p1);
862 	if (count == 0) {
863 		mtx_unlock(&sem_lock);
864 		return;
865 	}
866 race_lost:
867 	mtx_assert(&sem_lock, MA_OWNED);
868 	mtx_unlock(&sem_lock);
869 	sem_array = malloc(sizeof(struct ksem *) * count, M_TEMP, M_WAITOK);
870 	mtx_lock(&sem_lock);
871 	new_count = sem_count_proc(p1);
872 	if (count < new_count) {
873 		/* Lost race, repeat and allocate more storage. */
874 		free(sem_array, M_TEMP);
875 		count = new_count;
876 		goto race_lost;
877 	}
878 	/*
879 	 * Given an array capable of storing an adequate number of semaphore
880 	 * references, now walk the list of semaphores and acquire a new
881 	 * reference for any semaphore opened by p1.
882 	 */
883 	count = new_count;
884 	i = 0;
885 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
886 		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
887 			if (ku->ku_pid == p1->p_pid) {
888 				sem_ref(ks);
889 				sem_array[i] = ks;
890 				i++;
891 				break;
892 			}
893 		}
894 	}
895 	LIST_FOREACH(ks, &ksem_deadhead, ks_entry) {
896 		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
897 			if (ku->ku_pid == p1->p_pid) {
898 				sem_ref(ks);
899 				sem_array[i] = ks;
900 				i++;
901 				break;
902 			}
903 		}
904 	}
905 	mtx_unlock(&sem_lock);
906 	KASSERT(i == count, ("sem_forkhook: i != count (%d, %d)", i, count));
907 	/*
908 	 * Now cause p2 to enter each of the referenced semaphores, then
909 	 * release our temporary reference.  This is pretty inefficient.
910 	 * Finally, free our temporary array.
911 	 */
912 	for (i = 0; i < count; i++) {
913 		sem_enter(p2, sem_array[i]);
914 		mtx_lock(&sem_lock);
915 		sem_rel(sem_array[i]);
916 		mtx_unlock(&sem_lock);
917 	}
918 	free(sem_array, M_TEMP);
919 }
920 
921 static void
922 sem_exithook(void *arg, struct proc *p)
923 {
924 	struct ksem *ks, *ksnext;
925 
926 	mtx_lock(&sem_lock);
927 	ks = LIST_FIRST(&ksem_head);
928 	while (ks != NULL) {
929 		ksnext = LIST_NEXT(ks, ks_entry);
930 		sem_leave(p, ks);
931 		ks = ksnext;
932 	}
933 	ks = LIST_FIRST(&ksem_deadhead);
934 	while (ks != NULL) {
935 		ksnext = LIST_NEXT(ks, ks_entry);
936 		sem_leave(p, ks);
937 		ks = ksnext;
938 	}
939 	mtx_unlock(&sem_lock);
940 }
941 
942 static int
943 sem_modload(struct module *module, int cmd, void *arg)
944 {
945         int error = 0;
946 
947         switch (cmd) {
948         case MOD_LOAD:
949 		mtx_init(&sem_lock, "sem", "semaphore", MTX_DEF);
950 		p31b_setcfg(CTL_P1003_1B_SEM_NSEMS_MAX, SEM_MAX);
951 		p31b_setcfg(CTL_P1003_1B_SEM_VALUE_MAX, SEM_VALUE_MAX);
952 		sem_exit_tag = EVENTHANDLER_REGISTER(process_exit, sem_exithook,
953 		    NULL, EVENTHANDLER_PRI_ANY);
954 		sem_exec_tag = EVENTHANDLER_REGISTER(process_exec, sem_exithook,
955 		    NULL, EVENTHANDLER_PRI_ANY);
956 		sem_fork_tag = EVENTHANDLER_REGISTER(process_fork, sem_forkhook, NULL, EVENTHANDLER_PRI_ANY);
957                 break;
958         case MOD_UNLOAD:
959 		if (nsems != 0) {
960 			error = EOPNOTSUPP;
961 			break;
962 		}
963 		EVENTHANDLER_DEREGISTER(process_exit, sem_exit_tag);
964 		EVENTHANDLER_DEREGISTER(process_exec, sem_exec_tag);
965 		EVENTHANDLER_DEREGISTER(process_fork, sem_fork_tag);
966 		mtx_destroy(&sem_lock);
967                 break;
968         case MOD_SHUTDOWN:
969                 break;
970         default:
971                 error = EINVAL;
972                 break;
973         }
974         return (error);
975 }
976 
977 static moduledata_t sem_mod = {
978         "sem",
979         &sem_modload,
980         NULL
981 };
982 
983 SYSCALL_MODULE_HELPER(ksem_init);
984 SYSCALL_MODULE_HELPER(ksem_open);
985 SYSCALL_MODULE_HELPER(ksem_unlink);
986 SYSCALL_MODULE_HELPER(ksem_close);
987 SYSCALL_MODULE_HELPER(ksem_post);
988 SYSCALL_MODULE_HELPER(ksem_wait);
989 SYSCALL_MODULE_HELPER(ksem_timedwait);
990 SYSCALL_MODULE_HELPER(ksem_trywait);
991 SYSCALL_MODULE_HELPER(ksem_getvalue);
992 SYSCALL_MODULE_HELPER(ksem_destroy);
993 
994 DECLARE_MODULE(sem, sem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
995 MODULE_VERSION(sem, 1);
996