xref: /freebsd/sys/kern/uipc_sem.c (revision 39ee7a7a6bdd1557b1c3532abf60d139798ac88b)
1 /*-
2  * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
3  * Copyright (c) 2003-2005 SPARTA, Inc.
4  * Copyright (c) 2005 Robert N. M. Watson
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project in part by Network
8  * Associates Laboratories, the Security Research Division of Network
9  * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
10  * as part of the DARPA CHATS research program.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_compat.h"
38 #include "opt_posix.h"
39 
40 #include <sys/param.h>
41 #include <sys/capsicum.h>
42 #include <sys/condvar.h>
43 #include <sys/fcntl.h>
44 #include <sys/file.h>
45 #include <sys/filedesc.h>
46 #include <sys/fnv_hash.h>
47 #include <sys/kernel.h>
48 #include <sys/ksem.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
53 #include <sys/priv.h>
54 #include <sys/proc.h>
55 #include <sys/posix4.h>
56 #include <sys/_semaphore.h>
57 #include <sys/stat.h>
58 #include <sys/syscall.h>
59 #include <sys/syscallsubr.h>
60 #include <sys/sysctl.h>
61 #include <sys/sysent.h>
62 #include <sys/sysproto.h>
63 #include <sys/systm.h>
64 #include <sys/sx.h>
65 #include <sys/user.h>
66 #include <sys/vnode.h>
67 
68 #include <security/mac/mac_framework.h>
69 
70 FEATURE(p1003_1b_semaphores, "POSIX P1003.1B semaphores support");
71 /*
72  * TODO
73  *
74  * - Resource limits?
75  * - Replace global sem_lock with mtx_pool locks?
76  * - Add a MAC check_create() hook for creating new named semaphores.
77  */
78 
79 #ifndef SEM_MAX
80 #define	SEM_MAX	30
81 #endif
82 
83 #ifdef SEM_DEBUG
84 #define	DP(x)	printf x
85 #else
86 #define	DP(x)
87 #endif
88 
89 struct ksem_mapping {
90 	char		*km_path;
91 	Fnv32_t		km_fnv;
92 	struct ksem	*km_ksem;
93 	LIST_ENTRY(ksem_mapping) km_link;
94 };
95 
96 static MALLOC_DEFINE(M_KSEM, "ksem", "semaphore file descriptor");
97 static LIST_HEAD(, ksem_mapping) *ksem_dictionary;
98 static struct sx ksem_dict_lock;
99 static struct mtx ksem_count_lock;
100 static struct mtx sem_lock;
101 static u_long ksem_hash;
102 static int ksem_dead;
103 
104 #define	KSEM_HASH(fnv)	(&ksem_dictionary[(fnv) & ksem_hash])
105 
106 static int nsems = 0;
107 SYSCTL_DECL(_p1003_1b);
108 SYSCTL_INT(_p1003_1b, OID_AUTO, nsems, CTLFLAG_RD, &nsems, 0,
109     "Number of active kernel POSIX semaphores");
110 
111 static int	kern_sem_wait(struct thread *td, semid_t id, int tryflag,
112 		    struct timespec *abstime);
113 static int	ksem_access(struct ksem *ks, struct ucred *ucred);
114 static struct ksem *ksem_alloc(struct ucred *ucred, mode_t mode,
115 		    unsigned int value);
116 static int	ksem_create(struct thread *td, const char *path,
117 		    semid_t *semidp, mode_t mode, unsigned int value,
118 		    int flags, int compat32);
119 static void	ksem_drop(struct ksem *ks);
120 static int	ksem_get(struct thread *td, semid_t id, cap_rights_t *rightsp,
121     struct file **fpp);
122 static struct ksem *ksem_hold(struct ksem *ks);
123 static void	ksem_insert(char *path, Fnv32_t fnv, struct ksem *ks);
124 static struct ksem *ksem_lookup(char *path, Fnv32_t fnv);
125 static void	ksem_module_destroy(void);
126 static int	ksem_module_init(void);
127 static int	ksem_remove(char *path, Fnv32_t fnv, struct ucred *ucred);
128 static int	sem_modload(struct module *module, int cmd, void *arg);
129 
130 static fo_stat_t	ksem_stat;
131 static fo_close_t	ksem_closef;
132 static fo_chmod_t	ksem_chmod;
133 static fo_chown_t	ksem_chown;
134 static fo_fill_kinfo_t	ksem_fill_kinfo;
135 
136 /* File descriptor operations. */
137 static struct fileops ksem_ops = {
138 	.fo_read = invfo_rdwr,
139 	.fo_write = invfo_rdwr,
140 	.fo_truncate = invfo_truncate,
141 	.fo_ioctl = invfo_ioctl,
142 	.fo_poll = invfo_poll,
143 	.fo_kqfilter = invfo_kqfilter,
144 	.fo_stat = ksem_stat,
145 	.fo_close = ksem_closef,
146 	.fo_chmod = ksem_chmod,
147 	.fo_chown = ksem_chown,
148 	.fo_sendfile = invfo_sendfile,
149 	.fo_fill_kinfo = ksem_fill_kinfo,
150 	.fo_flags = DFLAG_PASSABLE
151 };
152 
153 FEATURE(posix_sem, "POSIX semaphores");
154 
155 static int
156 ksem_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
157     struct thread *td)
158 {
159 	struct ksem *ks;
160 #ifdef MAC
161 	int error;
162 #endif
163 
164 	ks = fp->f_data;
165 
166 #ifdef MAC
167 	error = mac_posixsem_check_stat(active_cred, fp->f_cred, ks);
168 	if (error)
169 		return (error);
170 #endif
171 
172 	/*
173 	 * Attempt to return sanish values for fstat() on a semaphore
174 	 * file descriptor.
175 	 */
176 	bzero(sb, sizeof(*sb));
177 
178 	mtx_lock(&sem_lock);
179 	sb->st_atim = ks->ks_atime;
180 	sb->st_ctim = ks->ks_ctime;
181 	sb->st_mtim = ks->ks_mtime;
182 	sb->st_birthtim = ks->ks_birthtime;
183 	sb->st_uid = ks->ks_uid;
184 	sb->st_gid = ks->ks_gid;
185 	sb->st_mode = S_IFREG | ks->ks_mode;		/* XXX */
186 	mtx_unlock(&sem_lock);
187 
188 	return (0);
189 }
190 
191 static int
192 ksem_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
193     struct thread *td)
194 {
195 	struct ksem *ks;
196 	int error;
197 
198 	error = 0;
199 	ks = fp->f_data;
200 	mtx_lock(&sem_lock);
201 #ifdef MAC
202 	error = mac_posixsem_check_setmode(active_cred, ks, mode);
203 	if (error != 0)
204 		goto out;
205 #endif
206 	error = vaccess(VREG, ks->ks_mode, ks->ks_uid, ks->ks_gid, VADMIN,
207 	    active_cred, NULL);
208 	if (error != 0)
209 		goto out;
210 	ks->ks_mode = mode & ACCESSPERMS;
211 out:
212 	mtx_unlock(&sem_lock);
213 	return (error);
214 }
215 
216 static int
217 ksem_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
218     struct thread *td)
219 {
220 	struct ksem *ks;
221 	int error;
222 
223 	error = 0;
224 	ks = fp->f_data;
225 	mtx_lock(&sem_lock);
226 #ifdef MAC
227 	error = mac_posixsem_check_setowner(active_cred, ks, uid, gid);
228 	if (error != 0)
229 		goto out;
230 #endif
231 	if (uid == (uid_t)-1)
232 		uid = ks->ks_uid;
233 	if (gid == (gid_t)-1)
234                  gid = ks->ks_gid;
235 	if (((uid != ks->ks_uid && uid != active_cred->cr_uid) ||
236 	    (gid != ks->ks_gid && !groupmember(gid, active_cred))) &&
237 	    (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN, 0)))
238 		goto out;
239 	ks->ks_uid = uid;
240 	ks->ks_gid = gid;
241 out:
242 	mtx_unlock(&sem_lock);
243 	return (error);
244 }
245 
246 static int
247 ksem_closef(struct file *fp, struct thread *td)
248 {
249 	struct ksem *ks;
250 
251 	ks = fp->f_data;
252 	fp->f_data = NULL;
253 	ksem_drop(ks);
254 
255 	return (0);
256 }
257 
258 static int
259 ksem_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
260 {
261 	struct ksem *ks;
262 
263 	kif->kf_type = KF_TYPE_SEM;
264 	ks = fp->f_data;
265 	mtx_lock(&sem_lock);
266 	kif->kf_un.kf_sem.kf_sem_value = ks->ks_value;
267 	kif->kf_un.kf_sem.kf_sem_mode = S_IFREG | ks->ks_mode;	/* XXX */
268 	mtx_unlock(&sem_lock);
269 	if (ks->ks_path != NULL) {
270 		sx_slock(&ksem_dict_lock);
271 		if (ks->ks_path != NULL)
272 			strlcpy(kif->kf_path, ks->ks_path, sizeof(kif->kf_path));
273 		sx_sunlock(&ksem_dict_lock);
274 	}
275 	return (0);
276 }
277 
278 /*
279  * ksem object management including creation and reference counting
280  * routines.
281  */
282 static struct ksem *
283 ksem_alloc(struct ucred *ucred, mode_t mode, unsigned int value)
284 {
285 	struct ksem *ks;
286 
287 	mtx_lock(&ksem_count_lock);
288 	if (nsems == p31b_getcfg(CTL_P1003_1B_SEM_NSEMS_MAX) || ksem_dead) {
289 		mtx_unlock(&ksem_count_lock);
290 		return (NULL);
291 	}
292 	nsems++;
293 	mtx_unlock(&ksem_count_lock);
294 	ks = malloc(sizeof(*ks), M_KSEM, M_WAITOK | M_ZERO);
295 	ks->ks_uid = ucred->cr_uid;
296 	ks->ks_gid = ucred->cr_gid;
297 	ks->ks_mode = mode;
298 	ks->ks_value = value;
299 	cv_init(&ks->ks_cv, "ksem");
300 	vfs_timestamp(&ks->ks_birthtime);
301 	ks->ks_atime = ks->ks_mtime = ks->ks_ctime = ks->ks_birthtime;
302 	refcount_init(&ks->ks_ref, 1);
303 #ifdef MAC
304 	mac_posixsem_init(ks);
305 	mac_posixsem_create(ucred, ks);
306 #endif
307 
308 	return (ks);
309 }
310 
311 static struct ksem *
312 ksem_hold(struct ksem *ks)
313 {
314 
315 	refcount_acquire(&ks->ks_ref);
316 	return (ks);
317 }
318 
319 static void
320 ksem_drop(struct ksem *ks)
321 {
322 
323 	if (refcount_release(&ks->ks_ref)) {
324 #ifdef MAC
325 		mac_posixsem_destroy(ks);
326 #endif
327 		cv_destroy(&ks->ks_cv);
328 		free(ks, M_KSEM);
329 		mtx_lock(&ksem_count_lock);
330 		nsems--;
331 		mtx_unlock(&ksem_count_lock);
332 	}
333 }
334 
335 /*
336  * Determine if the credentials have sufficient permissions for read
337  * and write access.
338  */
339 static int
340 ksem_access(struct ksem *ks, struct ucred *ucred)
341 {
342 	int error;
343 
344 	error = vaccess(VREG, ks->ks_mode, ks->ks_uid, ks->ks_gid,
345 	    VREAD | VWRITE, ucred, NULL);
346 	if (error)
347 		error = priv_check_cred(ucred, PRIV_SEM_WRITE, 0);
348 	return (error);
349 }
350 
351 /*
352  * Dictionary management.  We maintain an in-kernel dictionary to map
353  * paths to semaphore objects.  We use the FNV hash on the path to
354  * store the mappings in a hash table.
355  */
356 static struct ksem *
357 ksem_lookup(char *path, Fnv32_t fnv)
358 {
359 	struct ksem_mapping *map;
360 
361 	LIST_FOREACH(map, KSEM_HASH(fnv), km_link) {
362 		if (map->km_fnv != fnv)
363 			continue;
364 		if (strcmp(map->km_path, path) == 0)
365 			return (map->km_ksem);
366 	}
367 
368 	return (NULL);
369 }
370 
371 static void
372 ksem_insert(char *path, Fnv32_t fnv, struct ksem *ks)
373 {
374 	struct ksem_mapping *map;
375 
376 	map = malloc(sizeof(struct ksem_mapping), M_KSEM, M_WAITOK);
377 	map->km_path = path;
378 	map->km_fnv = fnv;
379 	map->km_ksem = ksem_hold(ks);
380 	ks->ks_path = path;
381 	LIST_INSERT_HEAD(KSEM_HASH(fnv), map, km_link);
382 }
383 
384 static int
385 ksem_remove(char *path, Fnv32_t fnv, struct ucred *ucred)
386 {
387 	struct ksem_mapping *map;
388 	int error;
389 
390 	LIST_FOREACH(map, KSEM_HASH(fnv), km_link) {
391 		if (map->km_fnv != fnv)
392 			continue;
393 		if (strcmp(map->km_path, path) == 0) {
394 #ifdef MAC
395 			error = mac_posixsem_check_unlink(ucred, map->km_ksem);
396 			if (error)
397 				return (error);
398 #endif
399 			error = ksem_access(map->km_ksem, ucred);
400 			if (error)
401 				return (error);
402 			map->km_ksem->ks_path = NULL;
403 			LIST_REMOVE(map, km_link);
404 			ksem_drop(map->km_ksem);
405 			free(map->km_path, M_KSEM);
406 			free(map, M_KSEM);
407 			return (0);
408 		}
409 	}
410 
411 	return (ENOENT);
412 }
413 
414 static int
415 ksem_create_copyout_semid(struct thread *td, semid_t *semidp, int fd,
416     int compat32)
417 {
418 	semid_t semid;
419 #ifdef COMPAT_FREEBSD32
420 	int32_t semid32;
421 #endif
422 	void *ptr;
423 	size_t ptrs;
424 
425 #ifdef COMPAT_FREEBSD32
426 	if (compat32) {
427 		semid32 = fd;
428 		ptr = &semid32;
429 		ptrs = sizeof(semid32);
430 	} else {
431 #endif
432 		semid = fd;
433 		ptr = &semid;
434 		ptrs = sizeof(semid);
435 		compat32 = 0; /* silence gcc */
436 #ifdef COMPAT_FREEBSD32
437 	}
438 #endif
439 
440 	return (copyout(ptr, semidp, ptrs));
441 }
442 
443 /* Other helper routines. */
444 static int
445 ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode,
446     unsigned int value, int flags, int compat32)
447 {
448 	struct filedesc *fdp;
449 	struct ksem *ks;
450 	struct file *fp;
451 	char *path;
452 	Fnv32_t fnv;
453 	int error, fd;
454 
455 	if (value > SEM_VALUE_MAX)
456 		return (EINVAL);
457 
458 	fdp = td->td_proc->p_fd;
459 	mode = (mode & ~fdp->fd_cmask) & ACCESSPERMS;
460 	error = falloc(td, &fp, &fd, O_CLOEXEC);
461 	if (error) {
462 		if (name == NULL)
463 			error = ENOSPC;
464 		return (error);
465 	}
466 
467 	/*
468 	 * Go ahead and copyout the file descriptor now.  This is a bit
469 	 * premature, but it is a lot easier to handle errors as opposed
470 	 * to later when we've possibly created a new semaphore, etc.
471 	 */
472 	error = ksem_create_copyout_semid(td, semidp, fd, compat32);
473 	if (error) {
474 		fdclose(td, fp, fd);
475 		fdrop(fp, td);
476 		return (error);
477 	}
478 
479 	if (name == NULL) {
480 		/* Create an anonymous semaphore. */
481 		ks = ksem_alloc(td->td_ucred, mode, value);
482 		if (ks == NULL)
483 			error = ENOSPC;
484 		else
485 			ks->ks_flags |= KS_ANONYMOUS;
486 	} else {
487 		path = malloc(MAXPATHLEN, M_KSEM, M_WAITOK);
488 		error = copyinstr(name, path, MAXPATHLEN, NULL);
489 
490 		/* Require paths to start with a '/' character. */
491 		if (error == 0 && path[0] != '/')
492 			error = EINVAL;
493 		if (error) {
494 			fdclose(td, fp, fd);
495 			fdrop(fp, td);
496 			free(path, M_KSEM);
497 			return (error);
498 		}
499 
500 		fnv = fnv_32_str(path, FNV1_32_INIT);
501 		sx_xlock(&ksem_dict_lock);
502 		ks = ksem_lookup(path, fnv);
503 		if (ks == NULL) {
504 			/* Object does not exist, create it if requested. */
505 			if (flags & O_CREAT) {
506 				ks = ksem_alloc(td->td_ucred, mode, value);
507 				if (ks == NULL)
508 					error = ENFILE;
509 				else {
510 					ksem_insert(path, fnv, ks);
511 					path = NULL;
512 				}
513 			} else
514 				error = ENOENT;
515 		} else {
516 			/*
517 			 * Object already exists, obtain a new
518 			 * reference if requested and permitted.
519 			 */
520 			if ((flags & (O_CREAT | O_EXCL)) ==
521 			    (O_CREAT | O_EXCL))
522 				error = EEXIST;
523 			else {
524 #ifdef MAC
525 				error = mac_posixsem_check_open(td->td_ucred,
526 				    ks);
527 				if (error == 0)
528 #endif
529 				error = ksem_access(ks, td->td_ucred);
530 			}
531 			if (error == 0)
532 				ksem_hold(ks);
533 #ifdef INVARIANTS
534 			else
535 				ks = NULL;
536 #endif
537 		}
538 		sx_xunlock(&ksem_dict_lock);
539 		if (path)
540 			free(path, M_KSEM);
541 	}
542 
543 	if (error) {
544 		KASSERT(ks == NULL, ("ksem_create error with a ksem"));
545 		fdclose(td, fp, fd);
546 		fdrop(fp, td);
547 		return (error);
548 	}
549 	KASSERT(ks != NULL, ("ksem_create w/o a ksem"));
550 
551 	finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops);
552 
553 	fdrop(fp, td);
554 
555 	return (0);
556 }
557 
558 static int
559 ksem_get(struct thread *td, semid_t id, cap_rights_t *rightsp,
560     struct file **fpp)
561 {
562 	struct ksem *ks;
563 	struct file *fp;
564 	int error;
565 
566 	error = fget(td, id, rightsp, &fp);
567 	if (error)
568 		return (EINVAL);
569 	if (fp->f_type != DTYPE_SEM) {
570 		fdrop(fp, td);
571 		return (EINVAL);
572 	}
573 	ks = fp->f_data;
574 	if (ks->ks_flags & KS_DEAD) {
575 		fdrop(fp, td);
576 		return (EINVAL);
577 	}
578 	*fpp = fp;
579 	return (0);
580 }
581 
582 /* System calls. */
583 #ifndef _SYS_SYSPROTO_H_
584 struct ksem_init_args {
585 	unsigned int	value;
586 	semid_t		*idp;
587 };
588 #endif
589 int
590 sys_ksem_init(struct thread *td, struct ksem_init_args *uap)
591 {
592 
593 	return (ksem_create(td, NULL, uap->idp, S_IRWXU | S_IRWXG, uap->value,
594 	    0, 0));
595 }
596 
597 #ifndef _SYS_SYSPROTO_H_
598 struct ksem_open_args {
599 	char		*name;
600 	int		oflag;
601 	mode_t		mode;
602 	unsigned int	value;
603 	semid_t		*idp;
604 };
605 #endif
606 int
607 sys_ksem_open(struct thread *td, struct ksem_open_args *uap)
608 {
609 
610 	DP((">>> ksem_open start, pid=%d\n", (int)td->td_proc->p_pid));
611 
612 	if ((uap->oflag & ~(O_CREAT | O_EXCL)) != 0)
613 		return (EINVAL);
614 	return (ksem_create(td, uap->name, uap->idp, uap->mode, uap->value,
615 	    uap->oflag, 0));
616 }
617 
618 #ifndef _SYS_SYSPROTO_H_
619 struct ksem_unlink_args {
620 	char		*name;
621 };
622 #endif
623 int
624 sys_ksem_unlink(struct thread *td, struct ksem_unlink_args *uap)
625 {
626 	char *path;
627 	Fnv32_t fnv;
628 	int error;
629 
630 	path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
631 	error = copyinstr(uap->name, path, MAXPATHLEN, NULL);
632 	if (error) {
633 		free(path, M_TEMP);
634 		return (error);
635 	}
636 
637 	fnv = fnv_32_str(path, FNV1_32_INIT);
638 	sx_xlock(&ksem_dict_lock);
639 	error = ksem_remove(path, fnv, td->td_ucred);
640 	sx_xunlock(&ksem_dict_lock);
641 	free(path, M_TEMP);
642 
643 	return (error);
644 }
645 
646 #ifndef _SYS_SYSPROTO_H_
647 struct ksem_close_args {
648 	semid_t		id;
649 };
650 #endif
651 int
652 sys_ksem_close(struct thread *td, struct ksem_close_args *uap)
653 {
654 	cap_rights_t rights;
655 	struct ksem *ks;
656 	struct file *fp;
657 	int error;
658 
659 	/* No capability rights required to close a semaphore. */
660 	error = ksem_get(td, uap->id, cap_rights_init(&rights), &fp);
661 	if (error)
662 		return (error);
663 	ks = fp->f_data;
664 	if (ks->ks_flags & KS_ANONYMOUS) {
665 		fdrop(fp, td);
666 		return (EINVAL);
667 	}
668 	error = kern_close(td, uap->id);
669 	fdrop(fp, td);
670 	return (error);
671 }
672 
673 #ifndef _SYS_SYSPROTO_H_
674 struct ksem_post_args {
675 	semid_t	id;
676 };
677 #endif
678 int
679 sys_ksem_post(struct thread *td, struct ksem_post_args *uap)
680 {
681 	cap_rights_t rights;
682 	struct file *fp;
683 	struct ksem *ks;
684 	int error;
685 
686 	error = ksem_get(td, uap->id,
687 	    cap_rights_init(&rights, CAP_SEM_POST), &fp);
688 	if (error)
689 		return (error);
690 	ks = fp->f_data;
691 
692 	mtx_lock(&sem_lock);
693 #ifdef MAC
694 	error = mac_posixsem_check_post(td->td_ucred, fp->f_cred, ks);
695 	if (error)
696 		goto err;
697 #endif
698 	if (ks->ks_value == SEM_VALUE_MAX) {
699 		error = EOVERFLOW;
700 		goto err;
701 	}
702 	++ks->ks_value;
703 	if (ks->ks_waiters > 0)
704 		cv_signal(&ks->ks_cv);
705 	error = 0;
706 	vfs_timestamp(&ks->ks_ctime);
707 err:
708 	mtx_unlock(&sem_lock);
709 	fdrop(fp, td);
710 	return (error);
711 }
712 
713 #ifndef _SYS_SYSPROTO_H_
714 struct ksem_wait_args {
715 	semid_t		id;
716 };
717 #endif
718 int
719 sys_ksem_wait(struct thread *td, struct ksem_wait_args *uap)
720 {
721 
722 	return (kern_sem_wait(td, uap->id, 0, NULL));
723 }
724 
725 #ifndef _SYS_SYSPROTO_H_
726 struct ksem_timedwait_args {
727 	semid_t		id;
728 	const struct timespec *abstime;
729 };
730 #endif
731 int
732 sys_ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap)
733 {
734 	struct timespec abstime;
735 	struct timespec *ts;
736 	int error;
737 
738 	/*
739 	 * We allow a null timespec (wait forever).
740 	 */
741 	if (uap->abstime == NULL)
742 		ts = NULL;
743 	else {
744 		error = copyin(uap->abstime, &abstime, sizeof(abstime));
745 		if (error != 0)
746 			return (error);
747 		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
748 			return (EINVAL);
749 		ts = &abstime;
750 	}
751 	return (kern_sem_wait(td, uap->id, 0, ts));
752 }
753 
754 #ifndef _SYS_SYSPROTO_H_
755 struct ksem_trywait_args {
756 	semid_t		id;
757 };
758 #endif
759 int
760 sys_ksem_trywait(struct thread *td, struct ksem_trywait_args *uap)
761 {
762 
763 	return (kern_sem_wait(td, uap->id, 1, NULL));
764 }
765 
766 static int
767 kern_sem_wait(struct thread *td, semid_t id, int tryflag,
768     struct timespec *abstime)
769 {
770 	struct timespec ts1, ts2;
771 	struct timeval tv;
772 	cap_rights_t rights;
773 	struct file *fp;
774 	struct ksem *ks;
775 	int error;
776 
777 	DP((">>> kern_sem_wait entered! pid=%d\n", (int)td->td_proc->p_pid));
778 	error = ksem_get(td, id, cap_rights_init(&rights, CAP_SEM_WAIT), &fp);
779 	if (error)
780 		return (error);
781 	ks = fp->f_data;
782 	mtx_lock(&sem_lock);
783 	DP((">>> kern_sem_wait critical section entered! pid=%d\n",
784 	    (int)td->td_proc->p_pid));
785 #ifdef MAC
786 	error = mac_posixsem_check_wait(td->td_ucred, fp->f_cred, ks);
787 	if (error) {
788 		DP(("kern_sem_wait mac failed\n"));
789 		goto err;
790 	}
791 #endif
792 	DP(("kern_sem_wait value = %d, tryflag %d\n", ks->ks_value, tryflag));
793 	vfs_timestamp(&ks->ks_atime);
794 	while (ks->ks_value == 0) {
795 		ks->ks_waiters++;
796 		if (tryflag != 0)
797 			error = EAGAIN;
798 		else if (abstime == NULL)
799 			error = cv_wait_sig(&ks->ks_cv, &sem_lock);
800 		else {
801 			for (;;) {
802 				ts1 = *abstime;
803 				getnanotime(&ts2);
804 				timespecsub(&ts1, &ts2);
805 				TIMESPEC_TO_TIMEVAL(&tv, &ts1);
806 				if (tv.tv_sec < 0) {
807 					error = ETIMEDOUT;
808 					break;
809 				}
810 				error = cv_timedwait_sig(&ks->ks_cv,
811 				    &sem_lock, tvtohz(&tv));
812 				if (error != EWOULDBLOCK)
813 					break;
814 			}
815 		}
816 		ks->ks_waiters--;
817 		if (error)
818 			goto err;
819 	}
820 	ks->ks_value--;
821 	DP(("kern_sem_wait value post-decrement = %d\n", ks->ks_value));
822 	error = 0;
823 err:
824 	mtx_unlock(&sem_lock);
825 	fdrop(fp, td);
826 	DP(("<<< kern_sem_wait leaving, pid=%d, error = %d\n",
827 	    (int)td->td_proc->p_pid, error));
828 	return (error);
829 }
830 
831 #ifndef _SYS_SYSPROTO_H_
832 struct ksem_getvalue_args {
833 	semid_t		id;
834 	int		*val;
835 };
836 #endif
837 int
838 sys_ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap)
839 {
840 	cap_rights_t rights;
841 	struct file *fp;
842 	struct ksem *ks;
843 	int error, val;
844 
845 	error = ksem_get(td, uap->id,
846 	    cap_rights_init(&rights, CAP_SEM_GETVALUE), &fp);
847 	if (error)
848 		return (error);
849 	ks = fp->f_data;
850 
851 	mtx_lock(&sem_lock);
852 #ifdef MAC
853 	error = mac_posixsem_check_getvalue(td->td_ucred, fp->f_cred, ks);
854 	if (error) {
855 		mtx_unlock(&sem_lock);
856 		fdrop(fp, td);
857 		return (error);
858 	}
859 #endif
860 	val = ks->ks_value;
861 	vfs_timestamp(&ks->ks_atime);
862 	mtx_unlock(&sem_lock);
863 	fdrop(fp, td);
864 	error = copyout(&val, uap->val, sizeof(val));
865 	return (error);
866 }
867 
868 #ifndef _SYS_SYSPROTO_H_
869 struct ksem_destroy_args {
870 	semid_t		id;
871 };
872 #endif
873 int
874 sys_ksem_destroy(struct thread *td, struct ksem_destroy_args *uap)
875 {
876 	cap_rights_t rights;
877 	struct file *fp;
878 	struct ksem *ks;
879 	int error;
880 
881 	/* No capability rights required to close a semaphore. */
882 	error = ksem_get(td, uap->id, cap_rights_init(&rights), &fp);
883 	if (error)
884 		return (error);
885 	ks = fp->f_data;
886 	if (!(ks->ks_flags & KS_ANONYMOUS)) {
887 		fdrop(fp, td);
888 		return (EINVAL);
889 	}
890 	mtx_lock(&sem_lock);
891 	if (ks->ks_waiters != 0) {
892 		mtx_unlock(&sem_lock);
893 		error = EBUSY;
894 		goto err;
895 	}
896 	ks->ks_flags |= KS_DEAD;
897 	mtx_unlock(&sem_lock);
898 
899 	error = kern_close(td, uap->id);
900 err:
901 	fdrop(fp, td);
902 	return (error);
903 }
904 
905 static struct syscall_helper_data ksem_syscalls[] = {
906 	SYSCALL_INIT_HELPER(ksem_init),
907 	SYSCALL_INIT_HELPER(ksem_open),
908 	SYSCALL_INIT_HELPER(ksem_unlink),
909 	SYSCALL_INIT_HELPER(ksem_close),
910 	SYSCALL_INIT_HELPER(ksem_post),
911 	SYSCALL_INIT_HELPER(ksem_wait),
912 	SYSCALL_INIT_HELPER(ksem_timedwait),
913 	SYSCALL_INIT_HELPER(ksem_trywait),
914 	SYSCALL_INIT_HELPER(ksem_getvalue),
915 	SYSCALL_INIT_HELPER(ksem_destroy),
916 	SYSCALL_INIT_LAST
917 };
918 
919 #ifdef COMPAT_FREEBSD32
920 #include <compat/freebsd32/freebsd32.h>
921 #include <compat/freebsd32/freebsd32_proto.h>
922 #include <compat/freebsd32/freebsd32_signal.h>
923 #include <compat/freebsd32/freebsd32_syscall.h>
924 #include <compat/freebsd32/freebsd32_util.h>
925 
926 int
927 freebsd32_ksem_init(struct thread *td, struct freebsd32_ksem_init_args *uap)
928 {
929 
930 	return (ksem_create(td, NULL, uap->idp, S_IRWXU | S_IRWXG, uap->value,
931 	    0, 1));
932 }
933 
934 int
935 freebsd32_ksem_open(struct thread *td, struct freebsd32_ksem_open_args *uap)
936 {
937 
938 	if ((uap->oflag & ~(O_CREAT | O_EXCL)) != 0)
939 		return (EINVAL);
940 	return (ksem_create(td, uap->name, uap->idp, uap->mode, uap->value,
941 	    uap->oflag, 1));
942 }
943 
944 int
945 freebsd32_ksem_timedwait(struct thread *td,
946     struct freebsd32_ksem_timedwait_args *uap)
947 {
948 	struct timespec32 abstime32;
949 	struct timespec *ts, abstime;
950 	int error;
951 
952 	/*
953 	 * We allow a null timespec (wait forever).
954 	 */
955 	if (uap->abstime == NULL)
956 		ts = NULL;
957 	else {
958 		error = copyin(uap->abstime, &abstime32, sizeof(abstime32));
959 		if (error != 0)
960 			return (error);
961 		CP(abstime32, abstime, tv_sec);
962 		CP(abstime32, abstime, tv_nsec);
963 		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
964 			return (EINVAL);
965 		ts = &abstime;
966 	}
967 	return (kern_sem_wait(td, uap->id, 0, ts));
968 }
969 
970 static struct syscall_helper_data ksem32_syscalls[] = {
971 	SYSCALL32_INIT_HELPER(freebsd32_ksem_init),
972 	SYSCALL32_INIT_HELPER(freebsd32_ksem_open),
973 	SYSCALL32_INIT_HELPER_COMPAT(ksem_unlink),
974 	SYSCALL32_INIT_HELPER_COMPAT(ksem_close),
975 	SYSCALL32_INIT_HELPER_COMPAT(ksem_post),
976 	SYSCALL32_INIT_HELPER_COMPAT(ksem_wait),
977 	SYSCALL32_INIT_HELPER(freebsd32_ksem_timedwait),
978 	SYSCALL32_INIT_HELPER_COMPAT(ksem_trywait),
979 	SYSCALL32_INIT_HELPER_COMPAT(ksem_getvalue),
980 	SYSCALL32_INIT_HELPER_COMPAT(ksem_destroy),
981 	SYSCALL_INIT_LAST
982 };
983 #endif
984 
985 static int
986 ksem_module_init(void)
987 {
988 	int error;
989 
990 	mtx_init(&sem_lock, "sem", NULL, MTX_DEF);
991 	mtx_init(&ksem_count_lock, "ksem count", NULL, MTX_DEF);
992 	sx_init(&ksem_dict_lock, "ksem dictionary");
993 	ksem_dictionary = hashinit(1024, M_KSEM, &ksem_hash);
994 	p31b_setcfg(CTL_P1003_1B_SEMAPHORES, 200112L);
995 	p31b_setcfg(CTL_P1003_1B_SEM_NSEMS_MAX, SEM_MAX);
996 	p31b_setcfg(CTL_P1003_1B_SEM_VALUE_MAX, SEM_VALUE_MAX);
997 
998 	error = syscall_helper_register(ksem_syscalls, SY_THR_STATIC_KLD);
999 	if (error)
1000 		return (error);
1001 #ifdef COMPAT_FREEBSD32
1002 	error = syscall32_helper_register(ksem32_syscalls, SY_THR_STATIC_KLD);
1003 	if (error)
1004 		return (error);
1005 #endif
1006 	return (0);
1007 }
1008 
1009 static void
1010 ksem_module_destroy(void)
1011 {
1012 
1013 #ifdef COMPAT_FREEBSD32
1014 	syscall32_helper_unregister(ksem32_syscalls);
1015 #endif
1016 	syscall_helper_unregister(ksem_syscalls);
1017 
1018 	p31b_setcfg(CTL_P1003_1B_SEMAPHORES, 0);
1019 	hashdestroy(ksem_dictionary, M_KSEM, ksem_hash);
1020 	sx_destroy(&ksem_dict_lock);
1021 	mtx_destroy(&ksem_count_lock);
1022 	mtx_destroy(&sem_lock);
1023 	p31b_unsetcfg(CTL_P1003_1B_SEM_VALUE_MAX);
1024 	p31b_unsetcfg(CTL_P1003_1B_SEM_NSEMS_MAX);
1025 }
1026 
1027 static int
1028 sem_modload(struct module *module, int cmd, void *arg)
1029 {
1030         int error = 0;
1031 
1032         switch (cmd) {
1033         case MOD_LOAD:
1034 		error = ksem_module_init();
1035 		if (error)
1036 			ksem_module_destroy();
1037                 break;
1038 
1039         case MOD_UNLOAD:
1040 		mtx_lock(&ksem_count_lock);
1041 		if (nsems != 0) {
1042 			error = EOPNOTSUPP;
1043 			mtx_unlock(&ksem_count_lock);
1044 			break;
1045 		}
1046 		ksem_dead = 1;
1047 		mtx_unlock(&ksem_count_lock);
1048 		ksem_module_destroy();
1049                 break;
1050 
1051         case MOD_SHUTDOWN:
1052                 break;
1053         default:
1054                 error = EINVAL;
1055                 break;
1056         }
1057         return (error);
1058 }
1059 
1060 static moduledata_t sem_mod = {
1061         "sem",
1062         &sem_modload,
1063         NULL
1064 };
1065 
1066 DECLARE_MODULE(sem, sem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
1067 MODULE_VERSION(sem, 1);
1068