xref: /freebsd/sys/kern/uipc_sem.c (revision 84ee9401a3fc8d3c22424266f421a928989cd692)
1 /*-
2  * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
3  * Copyright (c) 2003-2005 SPARTA, Inc.
4  * Copyright (c) 2005 Robert N. M. Watson
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project in part by Network
8  * Associates Laboratories, the Security Research Division of Network
9  * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
10  * as part of the DARPA CHATS research program.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_mac.h"
38 #include "opt_posix.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysproto.h>
43 #include <sys/eventhandler.h>
44 #include <sys/kernel.h>
45 #include <sys/proc.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/module.h>
49 #include <sys/condvar.h>
50 #include <sys/sem.h>
51 #include <sys/uio.h>
52 #include <sys/syscall.h>
53 #include <sys/stat.h>
54 #include <sys/sysent.h>
55 #include <sys/sysctl.h>
56 #include <sys/time.h>
57 #include <sys/mac.h>
58 #include <sys/malloc.h>
59 #include <sys/fcntl.h>
60 
61 #include <posix4/ksem.h>
62 #include <posix4/posix4.h>
63 #include <posix4/semaphore.h>
64 #include <posix4/_semaphore.h>
65 
66 static int sem_count_proc(struct proc *p);
67 static struct ksem *sem_lookup_byname(const char *name);
68 static int sem_create(struct thread *td, const char *name,
69     struct ksem **ksret, mode_t mode, unsigned int value);
70 static void sem_free(struct ksem *ksnew);
71 static int sem_perm(struct thread *td, struct ksem *ks);
72 static void sem_enter(struct proc *p, struct ksem *ks);
73 static int sem_leave(struct proc *p, struct ksem *ks);
74 static void sem_exechook(void *arg, struct proc *p, struct image_params *imgp);
75 static void sem_exithook(void *arg, struct proc *p);
76 static void sem_forkhook(void *arg, struct proc *p1, struct proc *p2,
77     int flags);
78 static int sem_hasopen(struct thread *td, struct ksem *ks);
79 
80 static int kern_sem_close(struct thread *td, semid_t id);
81 static int kern_sem_post(struct thread *td, semid_t id);
82 static int kern_sem_wait(struct thread *td, semid_t id, int tryflag,
83     struct timespec *abstime);
84 static int kern_sem_init(struct thread *td, int dir, unsigned int value,
85     semid_t *idp);
86 static int kern_sem_open(struct thread *td, int dir, const char *name,
87     int oflag, mode_t mode, unsigned int value, semid_t *idp);
88 static int kern_sem_unlink(struct thread *td, const char *name);
89 
90 #ifndef SEM_MAX
91 #define SEM_MAX	30
92 #endif
93 
94 #define SEM_MAX_NAMELEN	14
95 
96 #define SEM_TO_ID(x)	((intptr_t)(x))
97 #define ID_TO_SEM(x)	id_to_sem(x)
98 
99 /*
100  * available semaphores go here, this includes sem_init and any semaphores
101  * created via sem_open that have not yet been unlinked.
102  */
103 LIST_HEAD(, ksem) ksem_head = LIST_HEAD_INITIALIZER(&ksem_head);
104 /*
105  * semaphores still in use but have been sem_unlink()'d go here.
106  */
107 LIST_HEAD(, ksem) ksem_deadhead = LIST_HEAD_INITIALIZER(&ksem_deadhead);
108 
109 static struct mtx sem_lock;
110 static MALLOC_DEFINE(M_SEM, "sems", "semaphore data");
111 
112 static int nsems = 0;
113 SYSCTL_DECL(_p1003_1b);
114 SYSCTL_INT(_p1003_1b, OID_AUTO, nsems, CTLFLAG_RD, &nsems, 0, "");
115 
116 static eventhandler_tag sem_exit_tag, sem_exec_tag, sem_fork_tag;
117 
118 #ifdef SEM_DEBUG
119 #define DP(x)	printf x
120 #else
121 #define DP(x)
122 #endif
123 
124 static __inline
125 void
126 sem_ref(struct ksem *ks)
127 {
128 
129 	mtx_assert(&sem_lock, MA_OWNED);
130 	ks->ks_ref++;
131 	DP(("sem_ref: ks = %p, ref = %d\n", ks, ks->ks_ref));
132 }
133 
134 static __inline
135 void
136 sem_rel(struct ksem *ks)
137 {
138 
139 	mtx_assert(&sem_lock, MA_OWNED);
140 	DP(("sem_rel: ks = %p, ref = %d\n", ks, ks->ks_ref - 1));
141 	if (--ks->ks_ref == 0)
142 		sem_free(ks);
143 }
144 
145 static __inline struct ksem *id_to_sem(semid_t id);
146 
147 static __inline
148 struct ksem *
149 id_to_sem(semid_t id)
150 {
151 	struct ksem *ks;
152 
153 	mtx_assert(&sem_lock, MA_OWNED);
154 	DP(("id_to_sem: id = %0x,%p\n", id, (struct ksem *)id));
155 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
156 		DP(("id_to_sem: ks = %p\n", ks));
157 		if (ks == (struct ksem *)id)
158 			return (ks);
159 	}
160 	return (NULL);
161 }
162 
163 static struct ksem *
164 sem_lookup_byname(const char *name)
165 {
166 	struct ksem *ks;
167 
168 	mtx_assert(&sem_lock, MA_OWNED);
169 	LIST_FOREACH(ks, &ksem_head, ks_entry)
170 		if (ks->ks_name != NULL && strcmp(ks->ks_name, name) == 0)
171 			return (ks);
172 	return (NULL);
173 }
174 
175 static int
176 sem_create(struct thread *td, const char *name, struct ksem **ksret,
177     mode_t mode, unsigned int value)
178 {
179 	struct ksem *ret;
180 	struct proc *p;
181 	struct ucred *uc;
182 	size_t len;
183 	int error;
184 
185 	DP(("sem_create\n"));
186 	p = td->td_proc;
187 	uc = td->td_ucred;
188 	if (value > SEM_VALUE_MAX)
189 		return (EINVAL);
190 	ret = malloc(sizeof(*ret), M_SEM, M_WAITOK | M_ZERO);
191 	if (name != NULL) {
192 		len = strlen(name);
193 		if (len > SEM_MAX_NAMELEN) {
194 			free(ret, M_SEM);
195 			return (ENAMETOOLONG);
196 		}
197 		/* name must start with a '/' but not contain one. */
198 		if (*name != '/' || len < 2 || index(name + 1, '/') != NULL) {
199 			free(ret, M_SEM);
200 			return (EINVAL);
201 		}
202 		ret->ks_name = malloc(len + 1, M_SEM, M_WAITOK);
203 		strcpy(ret->ks_name, name);
204 	} else {
205 		ret->ks_name = NULL;
206 	}
207 	ret->ks_mode = mode;
208 	ret->ks_value = value;
209 	ret->ks_ref = 1;
210 	ret->ks_waiters = 0;
211 	ret->ks_uid = uc->cr_uid;
212 	ret->ks_gid = uc->cr_gid;
213 	ret->ks_onlist = 0;
214 	cv_init(&ret->ks_cv, "sem");
215 	LIST_INIT(&ret->ks_users);
216 #ifdef MAC
217 	mac_init_posix_sem(ret);
218 	mac_create_posix_sem(uc, ret);
219 #endif
220 	if (name != NULL)
221 		sem_enter(td->td_proc, ret);
222 	*ksret = ret;
223 	mtx_lock(&sem_lock);
224 	if (nsems >= p31b_getcfg(CTL_P1003_1B_SEM_NSEMS_MAX)) {
225 		sem_leave(td->td_proc, ret);
226 		sem_free(ret);
227 		error = ENFILE;
228 	} else {
229 		nsems++;
230 		error = 0;
231 	}
232 	mtx_unlock(&sem_lock);
233 	return (error);
234 }
235 
236 #ifndef _SYS_SYSPROTO_H_
237 struct ksem_init_args {
238 	unsigned int value;
239 	semid_t *idp;
240 };
241 int ksem_init(struct thread *td, struct ksem_init_args *uap);
242 #endif
243 int
244 ksem_init(struct thread *td, struct ksem_init_args *uap)
245 {
246 	int error;
247 
248 	error = kern_sem_init(td, UIO_USERSPACE, uap->value, uap->idp);
249 	return (error);
250 }
251 
252 static int
253 kern_sem_init(struct thread *td, int dir, unsigned int value, semid_t *idp)
254 {
255 	struct ksem *ks;
256 	semid_t id;
257 	int error;
258 
259 	error = sem_create(td, NULL, &ks, S_IRWXU | S_IRWXG, value);
260 	if (error)
261 		return (error);
262 	id = SEM_TO_ID(ks);
263 	if (dir == UIO_USERSPACE) {
264 		error = copyout(&id, idp, sizeof(id));
265 		if (error) {
266 			mtx_lock(&sem_lock);
267 			sem_rel(ks);
268 			mtx_unlock(&sem_lock);
269 			return (error);
270 		}
271 	} else {
272 		*idp = id;
273 	}
274 	mtx_lock(&sem_lock);
275 	LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
276 	ks->ks_onlist = 1;
277 	mtx_unlock(&sem_lock);
278 	return (error);
279 }
280 
281 #ifndef _SYS_SYSPROTO_H_
282 struct ksem_open_args {
283 	char *name;
284 	int oflag;
285 	mode_t mode;
286 	unsigned int value;
287 	semid_t *idp;
288 };
289 int ksem_open(struct thread *td, struct ksem_open_args *uap);
290 #endif
291 int
292 ksem_open(struct thread *td, struct ksem_open_args *uap)
293 {
294 	char name[SEM_MAX_NAMELEN + 1];
295 	size_t done;
296 	int error;
297 
298 	error = copyinstr(uap->name, name, SEM_MAX_NAMELEN + 1, &done);
299 	if (error)
300 		return (error);
301 	DP((">>> sem_open start\n"));
302 	error = kern_sem_open(td, UIO_USERSPACE,
303 	    name, uap->oflag, uap->mode, uap->value, uap->idp);
304 	DP(("<<< sem_open end\n"));
305 	return (error);
306 }
307 
308 static int
309 kern_sem_open(struct thread *td, int dir, const char *name, int oflag,
310     mode_t mode, unsigned int value, semid_t *idp)
311 {
312 	struct ksem *ksnew, *ks;
313 	int error;
314 	semid_t id;
315 
316 	ksnew = NULL;
317 	mtx_lock(&sem_lock);
318 	ks = sem_lookup_byname(name);
319 	/*
320 	 * If we found it but O_EXCL is set, error.
321 	 */
322 	if (ks != NULL && (oflag & O_EXCL) != 0) {
323 		mtx_unlock(&sem_lock);
324 		return (EEXIST);
325 	}
326 	/*
327 	 * If we didn't find it...
328 	 */
329 	if (ks == NULL) {
330 		/*
331 		 * didn't ask for creation? error.
332 		 */
333 		if ((oflag & O_CREAT) == 0) {
334 			mtx_unlock(&sem_lock);
335 			return (ENOENT);
336 		}
337 		/*
338 		 * We may block during creation, so drop the lock.
339 		 */
340 		mtx_unlock(&sem_lock);
341 		error = sem_create(td, name, &ksnew, mode, value);
342 		if (error != 0)
343 			return (error);
344 		id = SEM_TO_ID(ksnew);
345 		if (dir == UIO_USERSPACE) {
346 			DP(("about to copyout! %d to %p\n", id, idp));
347 			error = copyout(&id, idp, sizeof(id));
348 			if (error) {
349 				mtx_lock(&sem_lock);
350 				sem_leave(td->td_proc, ksnew);
351 				sem_rel(ksnew);
352 				mtx_unlock(&sem_lock);
353 				return (error);
354 			}
355 		} else {
356 			DP(("about to set! %d to %p\n", id, idp));
357 			*idp = id;
358 		}
359 		/*
360 		 * We need to make sure we haven't lost a race while
361 		 * allocating during creation.
362 		 */
363 		mtx_lock(&sem_lock);
364 		ks = sem_lookup_byname(name);
365 		if (ks != NULL) {
366 			/* we lost... */
367 			sem_leave(td->td_proc, ksnew);
368 			sem_rel(ksnew);
369 			/* we lost and we can't loose... */
370 			if ((oflag & O_EXCL) != 0) {
371 				mtx_unlock(&sem_lock);
372 				return (EEXIST);
373 			}
374 		} else {
375 			DP(("sem_create: about to add to list...\n"));
376 			LIST_INSERT_HEAD(&ksem_head, ksnew, ks_entry);
377 			DP(("sem_create: setting list bit...\n"));
378 			ksnew->ks_onlist = 1;
379 			DP(("sem_create: done, about to unlock...\n"));
380 		}
381 	} else {
382 #ifdef MAC
383 		error = mac_check_posix_sem_open(td->td_ucred, ks);
384 		if (error)
385 			goto err_open;
386 #endif
387 		/*
388 		 * if we aren't the creator, then enforce permissions.
389 		 */
390 		error = sem_perm(td, ks);
391 		if (error)
392 			goto err_open;
393 		sem_ref(ks);
394 		mtx_unlock(&sem_lock);
395 		id = SEM_TO_ID(ks);
396 		if (dir == UIO_USERSPACE) {
397 			error = copyout(&id, idp, sizeof(id));
398 			if (error) {
399 				mtx_lock(&sem_lock);
400 				sem_rel(ks);
401 				mtx_unlock(&sem_lock);
402 				return (error);
403 			}
404 		} else {
405 			*idp = id;
406 		}
407 		sem_enter(td->td_proc, ks);
408 		mtx_lock(&sem_lock);
409 		sem_rel(ks);
410 	}
411 err_open:
412 	mtx_unlock(&sem_lock);
413 	return (error);
414 }
415 
416 static int
417 sem_perm(struct thread *td, struct ksem *ks)
418 {
419 	struct ucred *uc;
420 
421 	uc = td->td_ucred;
422 	DP(("sem_perm: uc(%d,%d) ks(%d,%d,%o)\n",
423 	    uc->cr_uid, uc->cr_gid,
424 	     ks->ks_uid, ks->ks_gid, ks->ks_mode));
425 	if ((uc->cr_uid == ks->ks_uid && (ks->ks_mode & S_IWUSR) != 0) ||
426 	    (uc->cr_gid == ks->ks_gid && (ks->ks_mode & S_IWGRP) != 0) ||
427 	    (ks->ks_mode & S_IWOTH) != 0 || suser(td) == 0)
428 		return (0);
429 	return (EPERM);
430 }
431 
432 static void
433 sem_free(struct ksem *ks)
434 {
435 
436 	nsems--;
437 	if (ks->ks_onlist)
438 		LIST_REMOVE(ks, ks_entry);
439 	if (ks->ks_name != NULL)
440 		free(ks->ks_name, M_SEM);
441 	cv_destroy(&ks->ks_cv);
442 	free(ks, M_SEM);
443 }
444 
445 static __inline struct kuser *sem_getuser(struct proc *p, struct ksem *ks);
446 
447 static __inline struct kuser *
448 sem_getuser(struct proc *p, struct ksem *ks)
449 {
450 	struct kuser *k;
451 
452 	LIST_FOREACH(k, &ks->ks_users, ku_next)
453 		if (k->ku_pid == p->p_pid)
454 			return (k);
455 	return (NULL);
456 }
457 
458 static int
459 sem_hasopen(struct thread *td, struct ksem *ks)
460 {
461 
462 	return ((ks->ks_name == NULL && sem_perm(td, ks) == 0)
463 	    || sem_getuser(td->td_proc, ks) != NULL);
464 }
465 
466 static int
467 sem_leave(struct proc *p, struct ksem *ks)
468 {
469 	struct kuser *k;
470 
471 	DP(("sem_leave: ks = %p\n", ks));
472 	k = sem_getuser(p, ks);
473 	DP(("sem_leave: ks = %p, k = %p\n", ks, k));
474 	if (k != NULL) {
475 		LIST_REMOVE(k, ku_next);
476 		sem_rel(ks);
477 		DP(("sem_leave: about to free k\n"));
478 		free(k, M_SEM);
479 		DP(("sem_leave: returning\n"));
480 		return (0);
481 	}
482 	return (EINVAL);
483 }
484 
485 static void
486 sem_enter(p, ks)
487 	struct proc *p;
488 	struct ksem *ks;
489 {
490 	struct kuser *ku, *k;
491 
492 	ku = malloc(sizeof(*ku), M_SEM, M_WAITOK);
493 	ku->ku_pid = p->p_pid;
494 	mtx_lock(&sem_lock);
495 	k = sem_getuser(p, ks);
496 	if (k != NULL) {
497 		mtx_unlock(&sem_lock);
498 		free(ku, M_TEMP);
499 		return;
500 	}
501 	LIST_INSERT_HEAD(&ks->ks_users, ku, ku_next);
502 	sem_ref(ks);
503 	mtx_unlock(&sem_lock);
504 }
505 
506 #ifndef _SYS_SYSPROTO_H_
507 struct ksem_unlink_args {
508 	char *name;
509 };
510 int ksem_unlink(struct thread *td, struct ksem_unlink_args *uap);
511 #endif
512 
513 int
514 ksem_unlink(struct thread *td, struct ksem_unlink_args *uap)
515 {
516 	char name[SEM_MAX_NAMELEN + 1];
517 	size_t done;
518 	int error;
519 
520 	error = copyinstr(uap->name, name, SEM_MAX_NAMELEN + 1, &done);
521 	return (error ? error :
522 	    kern_sem_unlink(td, name));
523 }
524 
525 static int
526 kern_sem_unlink(struct thread *td, const char *name)
527 {
528 	struct ksem *ks;
529 	int error;
530 
531 	mtx_lock(&sem_lock);
532 	ks = sem_lookup_byname(name);
533 	if (ks != NULL) {
534 #ifdef MAC
535 		error = mac_check_posix_sem_unlink(td->td_ucred, ks);
536 		if (error) {
537 			mtx_unlock(&sem_lock);
538 			return (error);
539 		}
540 #endif
541 		error = sem_perm(td, ks);
542 	} else
543 		error = ENOENT;
544 	DP(("sem_unlink: '%s' ks = %p, error = %d\n", name, ks, error));
545 	if (error == 0) {
546 		LIST_REMOVE(ks, ks_entry);
547 		LIST_INSERT_HEAD(&ksem_deadhead, ks, ks_entry);
548 		sem_rel(ks);
549 	}
550 	mtx_unlock(&sem_lock);
551 	return (error);
552 }
553 
554 #ifndef _SYS_SYSPROTO_H_
555 struct ksem_close_args {
556 	semid_t id;
557 };
558 int ksem_close(struct thread *td, struct ksem_close_args *uap);
559 #endif
560 
561 int
562 ksem_close(struct thread *td, struct ksem_close_args *uap)
563 {
564 
565 	return (kern_sem_close(td, uap->id));
566 }
567 
568 static int
569 kern_sem_close(struct thread *td, semid_t id)
570 {
571 	struct ksem *ks;
572 	int error;
573 
574 	error = EINVAL;
575 	mtx_lock(&sem_lock);
576 	ks = ID_TO_SEM(id);
577 	/* this is not a valid operation for unnamed sems */
578 	if (ks != NULL && ks->ks_name != NULL)
579 		error = sem_leave(td->td_proc, ks);
580 	mtx_unlock(&sem_lock);
581 	return (error);
582 }
583 
584 #ifndef _SYS_SYSPROTO_H_
585 struct ksem_post_args {
586 	semid_t id;
587 };
588 int ksem_post(struct thread *td, struct ksem_post_args *uap);
589 #endif
590 int
591 ksem_post(struct thread *td, struct ksem_post_args *uap)
592 {
593 
594 	return (kern_sem_post(td, uap->id));
595 }
596 
597 static int
598 kern_sem_post(struct thread *td, semid_t id)
599 {
600 	struct ksem *ks;
601 	int error;
602 
603 	mtx_lock(&sem_lock);
604 	ks = ID_TO_SEM(id);
605 	if (ks == NULL || !sem_hasopen(td, ks)) {
606 		error = EINVAL;
607 		goto err;
608 	}
609 #ifdef MAC
610 	error = mac_check_posix_sem_post(td->td_ucred, ks);
611 	if (error)
612 		goto err;
613 #endif
614 	if (ks->ks_value == SEM_VALUE_MAX) {
615 		error = EOVERFLOW;
616 		goto err;
617 	}
618 	++ks->ks_value;
619 	if (ks->ks_waiters > 0)
620 		cv_signal(&ks->ks_cv);
621 	error = 0;
622 err:
623 	mtx_unlock(&sem_lock);
624 	return (error);
625 }
626 
627 #ifndef _SYS_SYSPROTO_H_
628 struct ksem_wait_args {
629 	semid_t id;
630 };
631 int ksem_wait(struct thread *td, struct ksem_wait_args *uap);
632 #endif
633 
634 int
635 ksem_wait(struct thread *td, struct ksem_wait_args *uap)
636 {
637 
638 	return (kern_sem_wait(td, uap->id, 0, NULL));
639 }
640 
641 #ifndef _SYS_SYSPROTO_H_
642 struct ksem_timedwait_args {
643 	semid_t id;
644 	const struct timespec *abstime;
645 };
646 int ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap);
647 #endif
648 int
649 ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap)
650 {
651 	struct timespec abstime;
652 	struct timespec *ts;
653 	int error;
654 
655 	/* We allow a null timespec (wait forever). */
656 	if (uap->abstime == NULL)
657 		ts = NULL;
658 	else {
659 		error = copyin(uap->abstime, &abstime, sizeof(abstime));
660 		if (error != 0)
661 			return (error);
662 		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
663 			return (EINVAL);
664 		ts = &abstime;
665 	}
666 	return (kern_sem_wait(td, uap->id, 0, ts));
667 }
668 
669 #ifndef _SYS_SYSPROTO_H_
670 struct ksem_trywait_args {
671 	semid_t id;
672 };
673 int ksem_trywait(struct thread *td, struct ksem_trywait_args *uap);
674 #endif
675 int
676 ksem_trywait(struct thread *td, struct ksem_trywait_args *uap)
677 {
678 
679 	return (kern_sem_wait(td, uap->id, 1, NULL));
680 }
681 
682 static int
683 kern_sem_wait(struct thread *td, semid_t id, int tryflag,
684     struct timespec *abstime)
685 {
686 	struct timespec ts1, ts2;
687 	struct timeval tv;
688 	struct ksem *ks;
689 	int error;
690 
691 	DP((">>> kern_sem_wait entered!\n"));
692 	mtx_lock(&sem_lock);
693 	ks = ID_TO_SEM(id);
694 	if (ks == NULL) {
695 		DP(("kern_sem_wait ks == NULL\n"));
696 		error = EINVAL;
697 		goto err;
698 	}
699 	sem_ref(ks);
700 	if (!sem_hasopen(td, ks)) {
701 		DP(("kern_sem_wait hasopen failed\n"));
702 		error = EINVAL;
703 		goto err;
704 	}
705 #ifdef MAC
706 	error = mac_check_posix_sem_wait(td->td_ucred, ks);
707 	if (error) {
708 		DP(("kern_sem_wait mac failed\n"));
709 		goto err;
710 	}
711 #endif
712 	DP(("kern_sem_wait value = %d, tryflag %d\n", ks->ks_value, tryflag));
713 	if (ks->ks_value == 0) {
714 		ks->ks_waiters++;
715 		if (tryflag != 0)
716 			error = EAGAIN;
717 		else if (abstime == NULL)
718 			error = cv_wait_sig(&ks->ks_cv, &sem_lock);
719 		else {
720 			for (;;) {
721 				ts1 = *abstime;
722 				getnanotime(&ts2);
723 				timespecsub(&ts1, &ts2);
724 				TIMESPEC_TO_TIMEVAL(&tv, &ts1);
725 				if (tv.tv_sec < 0) {
726 					error = ETIMEDOUT;
727 					break;
728 				}
729 				error = cv_timedwait_sig(&ks->ks_cv,
730 				    &sem_lock, tvtohz(&tv));
731 				if (error != EWOULDBLOCK)
732 					break;
733 			}
734 		}
735 		ks->ks_waiters--;
736 		if (error)
737 			goto err;
738 	}
739 	ks->ks_value--;
740 	error = 0;
741 err:
742 	if (ks != NULL)
743 		sem_rel(ks);
744 	mtx_unlock(&sem_lock);
745 	DP(("<<< kern_sem_wait leaving, error = %d\n", error));
746 	return (error);
747 }
748 
749 #ifndef _SYS_SYSPROTO_H_
750 struct ksem_getvalue_args {
751 	semid_t id;
752 	int *val;
753 };
754 int ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap);
755 #endif
756 int
757 ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap)
758 {
759 	struct ksem *ks;
760 	int error, val;
761 
762 	mtx_lock(&sem_lock);
763 	ks = ID_TO_SEM(uap->id);
764 	if (ks == NULL || !sem_hasopen(td, ks)) {
765 		mtx_unlock(&sem_lock);
766 		return (EINVAL);
767 	}
768 #ifdef MAC
769 	error = mac_check_posix_sem_getvalue(td->td_ucred, ks);
770 	if (error) {
771 		mtx_unlock(&sem_lock);
772 		return (error);
773 	}
774 #endif
775 	val = ks->ks_value;
776 	mtx_unlock(&sem_lock);
777 	error = copyout(&val, uap->val, sizeof(val));
778 	return (error);
779 }
780 
781 #ifndef _SYS_SYSPROTO_H_
782 struct ksem_destroy_args {
783 	semid_t id;
784 };
785 int ksem_destroy(struct thread *td, struct ksem_destroy_args *uap);
786 #endif
787 int
788 ksem_destroy(struct thread *td, struct ksem_destroy_args *uap)
789 {
790 	struct ksem *ks;
791 	int error;
792 
793 	mtx_lock(&sem_lock);
794 	ks = ID_TO_SEM(uap->id);
795 	if (ks == NULL || !sem_hasopen(td, ks) ||
796 	    ks->ks_name != NULL) {
797 		error = EINVAL;
798 		goto err;
799 	}
800 #ifdef MAC
801 	error = mac_check_posix_sem_destroy(td->td_ucred, ks);
802 	if (error)
803 		goto err;
804 #endif
805 	if (ks->ks_waiters != 0) {
806 		error = EBUSY;
807 		goto err;
808 	}
809 	sem_rel(ks);
810 	error = 0;
811 err:
812 	mtx_unlock(&sem_lock);
813 	return (error);
814 }
815 
816 /*
817  * Count the number of kusers associated with a proc, so as to guess at how
818  * many to allocate when forking.
819  */
820 static int
821 sem_count_proc(struct proc *p)
822 {
823 	struct ksem *ks;
824 	struct kuser *ku;
825 	int count;
826 
827 	mtx_assert(&sem_lock, MA_OWNED);
828 
829 	count = 0;
830 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
831 		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
832 			if (ku->ku_pid == p->p_pid)
833 				count++;
834 		}
835 	}
836 	LIST_FOREACH(ks, &ksem_deadhead, ks_entry) {
837 		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
838 			if (ku->ku_pid == p->p_pid)
839 				count++;
840 		}
841 	}
842 	return (count);
843 }
844 
845 /*
846  * When a process forks, the child process must gain a reference to each open
847  * semaphore in the parent process, whether it is unlinked or not.  This
848  * requires allocating a kuser structure for each semaphore reference in the
849  * new process.  Because the set of semaphores in the parent can change while
850  * the fork is in progress, we have to handle races -- first we attempt to
851  * allocate enough storage to acquire references to each of the semaphores,
852  * then we enter the semaphores and release the temporary references.
853  */
854 static void
855 sem_forkhook(void *arg, struct proc *p1, struct proc *p2, int flags)
856 {
857 	struct ksem *ks, **sem_array;
858 	int count, i, new_count;
859 	struct kuser *ku;
860 
861 	mtx_lock(&sem_lock);
862 	count = sem_count_proc(p1);
863 	if (count == 0) {
864 		mtx_unlock(&sem_lock);
865 		return;
866 	}
867 race_lost:
868 	mtx_assert(&sem_lock, MA_OWNED);
869 	mtx_unlock(&sem_lock);
870 	sem_array = malloc(sizeof(struct ksem *) * count, M_TEMP, M_WAITOK);
871 	mtx_lock(&sem_lock);
872 	new_count = sem_count_proc(p1);
873 	if (count < new_count) {
874 		/* Lost race, repeat and allocate more storage. */
875 		free(sem_array, M_TEMP);
876 		count = new_count;
877 		goto race_lost;
878 	}
879 	/*
880 	 * Given an array capable of storing an adequate number of semaphore
881 	 * references, now walk the list of semaphores and acquire a new
882 	 * reference for any semaphore opened by p1.
883 	 */
884 	count = new_count;
885 	i = 0;
886 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
887 		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
888 			if (ku->ku_pid == p1->p_pid) {
889 				sem_ref(ks);
890 				sem_array[i] = ks;
891 				i++;
892 				break;
893 			}
894 		}
895 	}
896 	LIST_FOREACH(ks, &ksem_deadhead, ks_entry) {
897 		LIST_FOREACH(ku, &ks->ks_users, ku_next) {
898 			if (ku->ku_pid == p1->p_pid) {
899 				sem_ref(ks);
900 				sem_array[i] = ks;
901 				i++;
902 				break;
903 			}
904 		}
905 	}
906 	mtx_unlock(&sem_lock);
907 	KASSERT(i == count, ("sem_forkhook: i != count (%d, %d)", i, count));
908 	/*
909 	 * Now cause p2 to enter each of the referenced semaphores, then
910 	 * release our temporary reference.  This is pretty inefficient.
911 	 * Finally, free our temporary array.
912 	 */
913 	for (i = 0; i < count; i++) {
914 		sem_enter(p2, sem_array[i]);
915 		mtx_lock(&sem_lock);
916 		sem_rel(sem_array[i]);
917 		mtx_unlock(&sem_lock);
918 	}
919 	free(sem_array, M_TEMP);
920 }
921 
922 static void
923 sem_exechook(void *arg, struct proc *p, struct image_params *imgp __unused)
924 {
925    	sem_exithook(arg, p);
926 }
927 
928 static void
929 sem_exithook(void *arg, struct proc *p)
930 {
931 	struct ksem *ks, *ksnext;
932 
933 	mtx_lock(&sem_lock);
934 	ks = LIST_FIRST(&ksem_head);
935 	while (ks != NULL) {
936 		ksnext = LIST_NEXT(ks, ks_entry);
937 		sem_leave(p, ks);
938 		ks = ksnext;
939 	}
940 	ks = LIST_FIRST(&ksem_deadhead);
941 	while (ks != NULL) {
942 		ksnext = LIST_NEXT(ks, ks_entry);
943 		sem_leave(p, ks);
944 		ks = ksnext;
945 	}
946 	mtx_unlock(&sem_lock);
947 }
948 
949 static int
950 sem_modload(struct module *module, int cmd, void *arg)
951 {
952         int error = 0;
953 
954         switch (cmd) {
955         case MOD_LOAD:
956 		mtx_init(&sem_lock, "sem", "semaphore", MTX_DEF);
957 		p31b_setcfg(CTL_P1003_1B_SEM_NSEMS_MAX, SEM_MAX);
958 		p31b_setcfg(CTL_P1003_1B_SEM_VALUE_MAX, SEM_VALUE_MAX);
959 		sem_exit_tag = EVENTHANDLER_REGISTER(process_exit, sem_exithook,
960 		    NULL, EVENTHANDLER_PRI_ANY);
961 		sem_exec_tag = EVENTHANDLER_REGISTER(process_exec, sem_exechook,
962 		    NULL, EVENTHANDLER_PRI_ANY);
963 		sem_fork_tag = EVENTHANDLER_REGISTER(process_fork, sem_forkhook, NULL, EVENTHANDLER_PRI_ANY);
964                 break;
965         case MOD_UNLOAD:
966 		if (nsems != 0) {
967 			error = EOPNOTSUPP;
968 			break;
969 		}
970 		EVENTHANDLER_DEREGISTER(process_exit, sem_exit_tag);
971 		EVENTHANDLER_DEREGISTER(process_exec, sem_exec_tag);
972 		EVENTHANDLER_DEREGISTER(process_fork, sem_fork_tag);
973 		mtx_destroy(&sem_lock);
974                 break;
975         case MOD_SHUTDOWN:
976                 break;
977         default:
978                 error = EINVAL;
979                 break;
980         }
981         return (error);
982 }
983 
984 static moduledata_t sem_mod = {
985         "sem",
986         &sem_modload,
987         NULL
988 };
989 
990 SYSCALL_MODULE_HELPER(ksem_init);
991 SYSCALL_MODULE_HELPER(ksem_open);
992 SYSCALL_MODULE_HELPER(ksem_unlink);
993 SYSCALL_MODULE_HELPER(ksem_close);
994 SYSCALL_MODULE_HELPER(ksem_post);
995 SYSCALL_MODULE_HELPER(ksem_wait);
996 SYSCALL_MODULE_HELPER(ksem_timedwait);
997 SYSCALL_MODULE_HELPER(ksem_trywait);
998 SYSCALL_MODULE_HELPER(ksem_getvalue);
999 SYSCALL_MODULE_HELPER(ksem_destroy);
1000 
1001 DECLARE_MODULE(sem, sem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
1002 MODULE_VERSION(sem, 1);
1003