xref: /freebsd/sys/kern/uipc_sem.c (revision 788ca347b816afd83b2885e0c79aeeb88649b2ab)
1 /*-
2  * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
3  * Copyright (c) 2003-2005 SPARTA, Inc.
4  * Copyright (c) 2005 Robert N. M. Watson
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project in part by Network
8  * Associates Laboratories, the Security Research Division of Network
9  * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
10  * as part of the DARPA CHATS research program.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_compat.h"
38 #include "opt_posix.h"
39 
40 #include <sys/param.h>
41 #include <sys/capsicum.h>
42 #include <sys/condvar.h>
43 #include <sys/fcntl.h>
44 #include <sys/file.h>
45 #include <sys/filedesc.h>
46 #include <sys/fnv_hash.h>
47 #include <sys/kernel.h>
48 #include <sys/ksem.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
53 #include <sys/priv.h>
54 #include <sys/proc.h>
55 #include <sys/posix4.h>
56 #include <sys/_semaphore.h>
57 #include <sys/stat.h>
58 #include <sys/syscall.h>
59 #include <sys/syscallsubr.h>
60 #include <sys/sysctl.h>
61 #include <sys/sysent.h>
62 #include <sys/sysproto.h>
63 #include <sys/systm.h>
64 #include <sys/sx.h>
65 #include <sys/user.h>
66 #include <sys/vnode.h>
67 
68 #include <security/mac/mac_framework.h>
69 
70 FEATURE(p1003_1b_semaphores, "POSIX P1003.1B semaphores support");
71 /*
72  * TODO
73  *
74  * - Resource limits?
75  * - Replace global sem_lock with mtx_pool locks?
76  * - Add a MAC check_create() hook for creating new named semaphores.
77  */
78 
79 #ifndef SEM_MAX
80 #define	SEM_MAX	30
81 #endif
82 
83 #ifdef SEM_DEBUG
84 #define	DP(x)	printf x
85 #else
86 #define	DP(x)
87 #endif
88 
89 struct ksem_mapping {
90 	char		*km_path;
91 	Fnv32_t		km_fnv;
92 	struct ksem	*km_ksem;
93 	LIST_ENTRY(ksem_mapping) km_link;
94 };
95 
96 static MALLOC_DEFINE(M_KSEM, "ksem", "semaphore file descriptor");
97 static LIST_HEAD(, ksem_mapping) *ksem_dictionary;
98 static struct sx ksem_dict_lock;
99 static struct mtx ksem_count_lock;
100 static struct mtx sem_lock;
101 static u_long ksem_hash;
102 static int ksem_dead;
103 
104 #define	KSEM_HASH(fnv)	(&ksem_dictionary[(fnv) & ksem_hash])
105 
106 static int nsems = 0;
107 SYSCTL_DECL(_p1003_1b);
108 SYSCTL_INT(_p1003_1b, OID_AUTO, nsems, CTLFLAG_RD, &nsems, 0,
109     "Number of active kernel POSIX semaphores");
110 
111 static int	kern_sem_wait(struct thread *td, semid_t id, int tryflag,
112 		    struct timespec *abstime);
113 static int	ksem_access(struct ksem *ks, struct ucred *ucred);
114 static struct ksem *ksem_alloc(struct ucred *ucred, mode_t mode,
115 		    unsigned int value);
116 static int	ksem_create(struct thread *td, const char *path,
117 		    semid_t *semidp, mode_t mode, unsigned int value,
118 		    int flags, int compat32);
119 static void	ksem_drop(struct ksem *ks);
120 static int	ksem_get(struct thread *td, semid_t id, cap_rights_t *rightsp,
121     struct file **fpp);
122 static struct ksem *ksem_hold(struct ksem *ks);
123 static void	ksem_insert(char *path, Fnv32_t fnv, struct ksem *ks);
124 static struct ksem *ksem_lookup(char *path, Fnv32_t fnv);
125 static void	ksem_module_destroy(void);
126 static int	ksem_module_init(void);
127 static int	ksem_remove(char *path, Fnv32_t fnv, struct ucred *ucred);
128 static int	sem_modload(struct module *module, int cmd, void *arg);
129 
130 static fo_stat_t	ksem_stat;
131 static fo_close_t	ksem_closef;
132 static fo_chmod_t	ksem_chmod;
133 static fo_chown_t	ksem_chown;
134 static fo_fill_kinfo_t	ksem_fill_kinfo;
135 
136 /* File descriptor operations. */
137 static struct fileops ksem_ops = {
138 	.fo_read = invfo_rdwr,
139 	.fo_write = invfo_rdwr,
140 	.fo_truncate = invfo_truncate,
141 	.fo_ioctl = invfo_ioctl,
142 	.fo_poll = invfo_poll,
143 	.fo_kqfilter = invfo_kqfilter,
144 	.fo_stat = ksem_stat,
145 	.fo_close = ksem_closef,
146 	.fo_chmod = ksem_chmod,
147 	.fo_chown = ksem_chown,
148 	.fo_sendfile = invfo_sendfile,
149 	.fo_fill_kinfo = ksem_fill_kinfo,
150 	.fo_flags = DFLAG_PASSABLE
151 };
152 
153 FEATURE(posix_sem, "POSIX semaphores");
154 
155 static int
156 ksem_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
157     struct thread *td)
158 {
159 	struct ksem *ks;
160 #ifdef MAC
161 	int error;
162 #endif
163 
164 	ks = fp->f_data;
165 
166 #ifdef MAC
167 	error = mac_posixsem_check_stat(active_cred, fp->f_cred, ks);
168 	if (error)
169 		return (error);
170 #endif
171 
172 	/*
173 	 * Attempt to return sanish values for fstat() on a semaphore
174 	 * file descriptor.
175 	 */
176 	bzero(sb, sizeof(*sb));
177 
178 	mtx_lock(&sem_lock);
179 	sb->st_atim = ks->ks_atime;
180 	sb->st_ctim = ks->ks_ctime;
181 	sb->st_mtim = ks->ks_mtime;
182 	sb->st_birthtim = ks->ks_birthtime;
183 	sb->st_uid = ks->ks_uid;
184 	sb->st_gid = ks->ks_gid;
185 	sb->st_mode = S_IFREG | ks->ks_mode;		/* XXX */
186 	mtx_unlock(&sem_lock);
187 
188 	return (0);
189 }
190 
191 static int
192 ksem_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
193     struct thread *td)
194 {
195 	struct ksem *ks;
196 	int error;
197 
198 	error = 0;
199 	ks = fp->f_data;
200 	mtx_lock(&sem_lock);
201 #ifdef MAC
202 	error = mac_posixsem_check_setmode(active_cred, ks, mode);
203 	if (error != 0)
204 		goto out;
205 #endif
206 	error = vaccess(VREG, ks->ks_mode, ks->ks_uid, ks->ks_gid, VADMIN,
207 	    active_cred, NULL);
208 	if (error != 0)
209 		goto out;
210 	ks->ks_mode = mode & ACCESSPERMS;
211 out:
212 	mtx_unlock(&sem_lock);
213 	return (error);
214 }
215 
216 static int
217 ksem_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
218     struct thread *td)
219 {
220 	struct ksem *ks;
221 	int error;
222 
223 	error = 0;
224 	ks = fp->f_data;
225 	mtx_lock(&sem_lock);
226 #ifdef MAC
227 	error = mac_posixsem_check_setowner(active_cred, ks, uid, gid);
228 	if (error != 0)
229 		goto out;
230 #endif
231 	if (uid == (uid_t)-1)
232 		uid = ks->ks_uid;
233 	if (gid == (gid_t)-1)
234                  gid = ks->ks_gid;
235 	if (((uid != ks->ks_uid && uid != active_cred->cr_uid) ||
236 	    (gid != ks->ks_gid && !groupmember(gid, active_cred))) &&
237 	    (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN, 0)))
238 		goto out;
239 	ks->ks_uid = uid;
240 	ks->ks_gid = gid;
241 out:
242 	mtx_unlock(&sem_lock);
243 	return (error);
244 }
245 
246 static int
247 ksem_closef(struct file *fp, struct thread *td)
248 {
249 	struct ksem *ks;
250 
251 	ks = fp->f_data;
252 	fp->f_data = NULL;
253 	ksem_drop(ks);
254 
255 	return (0);
256 }
257 
258 static int
259 ksem_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
260 {
261 	struct ksem *ks;
262 
263 	kif->kf_type = KF_TYPE_SEM;
264 	ks = fp->f_data;
265 	mtx_lock(&sem_lock);
266 	kif->kf_un.kf_sem.kf_sem_value = ks->ks_value;
267 	kif->kf_un.kf_sem.kf_sem_mode = S_IFREG | ks->ks_mode;	/* XXX */
268 	mtx_unlock(&sem_lock);
269 	if (ks->ks_path != NULL) {
270 		sx_slock(&ksem_dict_lock);
271 		if (ks->ks_path != NULL)
272 			strlcpy(kif->kf_path, ks->ks_path, sizeof(kif->kf_path));
273 		sx_sunlock(&ksem_dict_lock);
274 	}
275 	return (0);
276 }
277 
278 /*
279  * ksem object management including creation and reference counting
280  * routines.
281  */
282 static struct ksem *
283 ksem_alloc(struct ucred *ucred, mode_t mode, unsigned int value)
284 {
285 	struct ksem *ks;
286 
287 	mtx_lock(&ksem_count_lock);
288 	if (nsems == p31b_getcfg(CTL_P1003_1B_SEM_NSEMS_MAX) || ksem_dead) {
289 		mtx_unlock(&ksem_count_lock);
290 		return (NULL);
291 	}
292 	nsems++;
293 	mtx_unlock(&ksem_count_lock);
294 	ks = malloc(sizeof(*ks), M_KSEM, M_WAITOK | M_ZERO);
295 	ks->ks_uid = ucred->cr_uid;
296 	ks->ks_gid = ucred->cr_gid;
297 	ks->ks_mode = mode;
298 	ks->ks_value = value;
299 	cv_init(&ks->ks_cv, "ksem");
300 	vfs_timestamp(&ks->ks_birthtime);
301 	ks->ks_atime = ks->ks_mtime = ks->ks_ctime = ks->ks_birthtime;
302 	refcount_init(&ks->ks_ref, 1);
303 #ifdef MAC
304 	mac_posixsem_init(ks);
305 	mac_posixsem_create(ucred, ks);
306 #endif
307 
308 	return (ks);
309 }
310 
311 static struct ksem *
312 ksem_hold(struct ksem *ks)
313 {
314 
315 	refcount_acquire(&ks->ks_ref);
316 	return (ks);
317 }
318 
319 static void
320 ksem_drop(struct ksem *ks)
321 {
322 
323 	if (refcount_release(&ks->ks_ref)) {
324 #ifdef MAC
325 		mac_posixsem_destroy(ks);
326 #endif
327 		cv_destroy(&ks->ks_cv);
328 		free(ks, M_KSEM);
329 		mtx_lock(&ksem_count_lock);
330 		nsems--;
331 		mtx_unlock(&ksem_count_lock);
332 	}
333 }
334 
335 /*
336  * Determine if the credentials have sufficient permissions for read
337  * and write access.
338  */
339 static int
340 ksem_access(struct ksem *ks, struct ucred *ucred)
341 {
342 	int error;
343 
344 	error = vaccess(VREG, ks->ks_mode, ks->ks_uid, ks->ks_gid,
345 	    VREAD | VWRITE, ucred, NULL);
346 	if (error)
347 		error = priv_check_cred(ucred, PRIV_SEM_WRITE, 0);
348 	return (error);
349 }
350 
351 /*
352  * Dictionary management.  We maintain an in-kernel dictionary to map
353  * paths to semaphore objects.  We use the FNV hash on the path to
354  * store the mappings in a hash table.
355  */
356 static struct ksem *
357 ksem_lookup(char *path, Fnv32_t fnv)
358 {
359 	struct ksem_mapping *map;
360 
361 	LIST_FOREACH(map, KSEM_HASH(fnv), km_link) {
362 		if (map->km_fnv != fnv)
363 			continue;
364 		if (strcmp(map->km_path, path) == 0)
365 			return (map->km_ksem);
366 	}
367 
368 	return (NULL);
369 }
370 
371 static void
372 ksem_insert(char *path, Fnv32_t fnv, struct ksem *ks)
373 {
374 	struct ksem_mapping *map;
375 
376 	map = malloc(sizeof(struct ksem_mapping), M_KSEM, M_WAITOK);
377 	map->km_path = path;
378 	map->km_fnv = fnv;
379 	map->km_ksem = ksem_hold(ks);
380 	ks->ks_path = path;
381 	LIST_INSERT_HEAD(KSEM_HASH(fnv), map, km_link);
382 }
383 
384 static int
385 ksem_remove(char *path, Fnv32_t fnv, struct ucred *ucred)
386 {
387 	struct ksem_mapping *map;
388 	int error;
389 
390 	LIST_FOREACH(map, KSEM_HASH(fnv), km_link) {
391 		if (map->km_fnv != fnv)
392 			continue;
393 		if (strcmp(map->km_path, path) == 0) {
394 #ifdef MAC
395 			error = mac_posixsem_check_unlink(ucred, map->km_ksem);
396 			if (error)
397 				return (error);
398 #endif
399 			error = ksem_access(map->km_ksem, ucred);
400 			if (error)
401 				return (error);
402 			map->km_ksem->ks_path = NULL;
403 			LIST_REMOVE(map, km_link);
404 			ksem_drop(map->km_ksem);
405 			free(map->km_path, M_KSEM);
406 			free(map, M_KSEM);
407 			return (0);
408 		}
409 	}
410 
411 	return (ENOENT);
412 }
413 
414 static int
415 ksem_create_copyout_semid(struct thread *td, semid_t *semidp, int fd,
416     int compat32)
417 {
418 	semid_t semid;
419 #ifdef COMPAT_FREEBSD32
420 	int32_t semid32;
421 #endif
422 	void *ptr;
423 	size_t ptrs;
424 
425 #ifdef COMPAT_FREEBSD32
426 	if (compat32) {
427 		semid32 = fd;
428 		ptr = &semid32;
429 		ptrs = sizeof(semid32);
430 	} else {
431 #endif
432 		semid = fd;
433 		ptr = &semid;
434 		ptrs = sizeof(semid);
435 		compat32 = 0; /* silence gcc */
436 #ifdef COMPAT_FREEBSD32
437 	}
438 #endif
439 
440 	return (copyout(ptr, semidp, ptrs));
441 }
442 
443 /* Other helper routines. */
444 static int
445 ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode,
446     unsigned int value, int flags, int compat32)
447 {
448 	struct filedesc *fdp;
449 	struct ksem *ks;
450 	struct file *fp;
451 	char *path;
452 	Fnv32_t fnv;
453 	int error, fd;
454 
455 	if (value > SEM_VALUE_MAX)
456 		return (EINVAL);
457 
458 	fdp = td->td_proc->p_fd;
459 	mode = (mode & ~fdp->fd_cmask) & ACCESSPERMS;
460 	error = falloc(td, &fp, &fd, O_CLOEXEC);
461 	if (error) {
462 		if (name == NULL)
463 			error = ENOSPC;
464 		return (error);
465 	}
466 
467 	/*
468 	 * Go ahead and copyout the file descriptor now.  This is a bit
469 	 * premature, but it is a lot easier to handle errors as opposed
470 	 * to later when we've possibly created a new semaphore, etc.
471 	 */
472 	error = ksem_create_copyout_semid(td, semidp, fd, compat32);
473 	if (error) {
474 		fdclose(td, fp, fd);
475 		fdrop(fp, td);
476 		return (error);
477 	}
478 
479 	if (name == NULL) {
480 		/* Create an anonymous semaphore. */
481 		ks = ksem_alloc(td->td_ucred, mode, value);
482 		if (ks == NULL)
483 			error = ENOSPC;
484 		else
485 			ks->ks_flags |= KS_ANONYMOUS;
486 	} else {
487 		path = malloc(MAXPATHLEN, M_KSEM, M_WAITOK);
488 		error = copyinstr(name, path, MAXPATHLEN, NULL);
489 
490 		/* Require paths to start with a '/' character. */
491 		if (error == 0 && path[0] != '/')
492 			error = EINVAL;
493 		if (error) {
494 			fdclose(td, fp, fd);
495 			fdrop(fp, td);
496 			free(path, M_KSEM);
497 			return (error);
498 		}
499 
500 		fnv = fnv_32_str(path, FNV1_32_INIT);
501 		sx_xlock(&ksem_dict_lock);
502 		ks = ksem_lookup(path, fnv);
503 		if (ks == NULL) {
504 			/* Object does not exist, create it if requested. */
505 			if (flags & O_CREAT) {
506 				ks = ksem_alloc(td->td_ucred, mode, value);
507 				if (ks == NULL)
508 					error = ENFILE;
509 				else {
510 					ksem_insert(path, fnv, ks);
511 					path = NULL;
512 				}
513 			} else
514 				error = ENOENT;
515 		} else {
516 			/*
517 			 * Object already exists, obtain a new
518 			 * reference if requested and permitted.
519 			 */
520 			if ((flags & (O_CREAT | O_EXCL)) ==
521 			    (O_CREAT | O_EXCL))
522 				error = EEXIST;
523 			else {
524 #ifdef MAC
525 				error = mac_posixsem_check_open(td->td_ucred,
526 				    ks);
527 				if (error == 0)
528 #endif
529 				error = ksem_access(ks, td->td_ucred);
530 			}
531 			if (error == 0)
532 				ksem_hold(ks);
533 #ifdef INVARIANTS
534 			else
535 				ks = NULL;
536 #endif
537 		}
538 		sx_xunlock(&ksem_dict_lock);
539 		if (path)
540 			free(path, M_KSEM);
541 	}
542 
543 	if (error) {
544 		KASSERT(ks == NULL, ("ksem_create error with a ksem"));
545 		fdclose(td, fp, fd);
546 		fdrop(fp, td);
547 		return (error);
548 	}
549 	KASSERT(ks != NULL, ("ksem_create w/o a ksem"));
550 
551 	finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops);
552 
553 	fdrop(fp, td);
554 
555 	return (0);
556 }
557 
558 static int
559 ksem_get(struct thread *td, semid_t id, cap_rights_t *rightsp,
560     struct file **fpp)
561 {
562 	struct ksem *ks;
563 	struct file *fp;
564 	int error;
565 
566 	error = fget(td, id, rightsp, &fp);
567 	if (error)
568 		return (EINVAL);
569 	if (fp->f_type != DTYPE_SEM) {
570 		fdrop(fp, td);
571 		return (EINVAL);
572 	}
573 	ks = fp->f_data;
574 	if (ks->ks_flags & KS_DEAD) {
575 		fdrop(fp, td);
576 		return (EINVAL);
577 	}
578 	*fpp = fp;
579 	return (0);
580 }
581 
582 /* System calls. */
583 #ifndef _SYS_SYSPROTO_H_
584 struct ksem_init_args {
585 	unsigned int	value;
586 	semid_t		*idp;
587 };
588 #endif
589 int
590 sys_ksem_init(struct thread *td, struct ksem_init_args *uap)
591 {
592 
593 	return (ksem_create(td, NULL, uap->idp, S_IRWXU | S_IRWXG, uap->value,
594 	    0, 0));
595 }
596 
597 #ifndef _SYS_SYSPROTO_H_
598 struct ksem_open_args {
599 	char		*name;
600 	int		oflag;
601 	mode_t		mode;
602 	unsigned int	value;
603 	semid_t		*idp;
604 };
605 #endif
606 int
607 sys_ksem_open(struct thread *td, struct ksem_open_args *uap)
608 {
609 
610 	DP((">>> ksem_open start, pid=%d\n", (int)td->td_proc->p_pid));
611 
612 	if ((uap->oflag & ~(O_CREAT | O_EXCL)) != 0)
613 		return (EINVAL);
614 	return (ksem_create(td, uap->name, uap->idp, uap->mode, uap->value,
615 	    uap->oflag, 0));
616 }
617 
618 #ifndef _SYS_SYSPROTO_H_
619 struct ksem_unlink_args {
620 	char		*name;
621 };
622 #endif
623 int
624 sys_ksem_unlink(struct thread *td, struct ksem_unlink_args *uap)
625 {
626 	char *path;
627 	Fnv32_t fnv;
628 	int error;
629 
630 	path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
631 	error = copyinstr(uap->name, path, MAXPATHLEN, NULL);
632 	if (error) {
633 		free(path, M_TEMP);
634 		return (error);
635 	}
636 
637 	fnv = fnv_32_str(path, FNV1_32_INIT);
638 	sx_xlock(&ksem_dict_lock);
639 	error = ksem_remove(path, fnv, td->td_ucred);
640 	sx_xunlock(&ksem_dict_lock);
641 	free(path, M_TEMP);
642 
643 	return (error);
644 }
645 
646 #ifndef _SYS_SYSPROTO_H_
647 struct ksem_close_args {
648 	semid_t		id;
649 };
650 #endif
651 int
652 sys_ksem_close(struct thread *td, struct ksem_close_args *uap)
653 {
654 	struct ksem *ks;
655 	struct file *fp;
656 	int error;
657 
658 	/* No capability rights required to close a semaphore. */
659 	error = ksem_get(td, uap->id, 0, &fp);
660 	if (error)
661 		return (error);
662 	ks = fp->f_data;
663 	if (ks->ks_flags & KS_ANONYMOUS) {
664 		fdrop(fp, td);
665 		return (EINVAL);
666 	}
667 	error = kern_close(td, uap->id);
668 	fdrop(fp, td);
669 	return (error);
670 }
671 
672 #ifndef _SYS_SYSPROTO_H_
673 struct ksem_post_args {
674 	semid_t	id;
675 };
676 #endif
677 int
678 sys_ksem_post(struct thread *td, struct ksem_post_args *uap)
679 {
680 	cap_rights_t rights;
681 	struct file *fp;
682 	struct ksem *ks;
683 	int error;
684 
685 	error = ksem_get(td, uap->id,
686 	    cap_rights_init(&rights, CAP_SEM_POST), &fp);
687 	if (error)
688 		return (error);
689 	ks = fp->f_data;
690 
691 	mtx_lock(&sem_lock);
692 #ifdef MAC
693 	error = mac_posixsem_check_post(td->td_ucred, fp->f_cred, ks);
694 	if (error)
695 		goto err;
696 #endif
697 	if (ks->ks_value == SEM_VALUE_MAX) {
698 		error = EOVERFLOW;
699 		goto err;
700 	}
701 	++ks->ks_value;
702 	if (ks->ks_waiters > 0)
703 		cv_signal(&ks->ks_cv);
704 	error = 0;
705 	vfs_timestamp(&ks->ks_ctime);
706 err:
707 	mtx_unlock(&sem_lock);
708 	fdrop(fp, td);
709 	return (error);
710 }
711 
712 #ifndef _SYS_SYSPROTO_H_
713 struct ksem_wait_args {
714 	semid_t		id;
715 };
716 #endif
717 int
718 sys_ksem_wait(struct thread *td, struct ksem_wait_args *uap)
719 {
720 
721 	return (kern_sem_wait(td, uap->id, 0, NULL));
722 }
723 
724 #ifndef _SYS_SYSPROTO_H_
725 struct ksem_timedwait_args {
726 	semid_t		id;
727 	const struct timespec *abstime;
728 };
729 #endif
730 int
731 sys_ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap)
732 {
733 	struct timespec abstime;
734 	struct timespec *ts;
735 	int error;
736 
737 	/*
738 	 * We allow a null timespec (wait forever).
739 	 */
740 	if (uap->abstime == NULL)
741 		ts = NULL;
742 	else {
743 		error = copyin(uap->abstime, &abstime, sizeof(abstime));
744 		if (error != 0)
745 			return (error);
746 		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
747 			return (EINVAL);
748 		ts = &abstime;
749 	}
750 	return (kern_sem_wait(td, uap->id, 0, ts));
751 }
752 
753 #ifndef _SYS_SYSPROTO_H_
754 struct ksem_trywait_args {
755 	semid_t		id;
756 };
757 #endif
758 int
759 sys_ksem_trywait(struct thread *td, struct ksem_trywait_args *uap)
760 {
761 
762 	return (kern_sem_wait(td, uap->id, 1, NULL));
763 }
764 
765 static int
766 kern_sem_wait(struct thread *td, semid_t id, int tryflag,
767     struct timespec *abstime)
768 {
769 	struct timespec ts1, ts2;
770 	struct timeval tv;
771 	cap_rights_t rights;
772 	struct file *fp;
773 	struct ksem *ks;
774 	int error;
775 
776 	DP((">>> kern_sem_wait entered! pid=%d\n", (int)td->td_proc->p_pid));
777 	error = ksem_get(td, id, cap_rights_init(&rights, CAP_SEM_WAIT), &fp);
778 	if (error)
779 		return (error);
780 	ks = fp->f_data;
781 	mtx_lock(&sem_lock);
782 	DP((">>> kern_sem_wait critical section entered! pid=%d\n",
783 	    (int)td->td_proc->p_pid));
784 #ifdef MAC
785 	error = mac_posixsem_check_wait(td->td_ucred, fp->f_cred, ks);
786 	if (error) {
787 		DP(("kern_sem_wait mac failed\n"));
788 		goto err;
789 	}
790 #endif
791 	DP(("kern_sem_wait value = %d, tryflag %d\n", ks->ks_value, tryflag));
792 	vfs_timestamp(&ks->ks_atime);
793 	while (ks->ks_value == 0) {
794 		ks->ks_waiters++;
795 		if (tryflag != 0)
796 			error = EAGAIN;
797 		else if (abstime == NULL)
798 			error = cv_wait_sig(&ks->ks_cv, &sem_lock);
799 		else {
800 			for (;;) {
801 				ts1 = *abstime;
802 				getnanotime(&ts2);
803 				timespecsub(&ts1, &ts2);
804 				TIMESPEC_TO_TIMEVAL(&tv, &ts1);
805 				if (tv.tv_sec < 0) {
806 					error = ETIMEDOUT;
807 					break;
808 				}
809 				error = cv_timedwait_sig(&ks->ks_cv,
810 				    &sem_lock, tvtohz(&tv));
811 				if (error != EWOULDBLOCK)
812 					break;
813 			}
814 		}
815 		ks->ks_waiters--;
816 		if (error)
817 			goto err;
818 	}
819 	ks->ks_value--;
820 	DP(("kern_sem_wait value post-decrement = %d\n", ks->ks_value));
821 	error = 0;
822 err:
823 	mtx_unlock(&sem_lock);
824 	fdrop(fp, td);
825 	DP(("<<< kern_sem_wait leaving, pid=%d, error = %d\n",
826 	    (int)td->td_proc->p_pid, error));
827 	return (error);
828 }
829 
830 #ifndef _SYS_SYSPROTO_H_
831 struct ksem_getvalue_args {
832 	semid_t		id;
833 	int		*val;
834 };
835 #endif
836 int
837 sys_ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap)
838 {
839 	cap_rights_t rights;
840 	struct file *fp;
841 	struct ksem *ks;
842 	int error, val;
843 
844 	error = ksem_get(td, uap->id,
845 	    cap_rights_init(&rights, CAP_SEM_GETVALUE), &fp);
846 	if (error)
847 		return (error);
848 	ks = fp->f_data;
849 
850 	mtx_lock(&sem_lock);
851 #ifdef MAC
852 	error = mac_posixsem_check_getvalue(td->td_ucred, fp->f_cred, ks);
853 	if (error) {
854 		mtx_unlock(&sem_lock);
855 		fdrop(fp, td);
856 		return (error);
857 	}
858 #endif
859 	val = ks->ks_value;
860 	vfs_timestamp(&ks->ks_atime);
861 	mtx_unlock(&sem_lock);
862 	fdrop(fp, td);
863 	error = copyout(&val, uap->val, sizeof(val));
864 	return (error);
865 }
866 
867 #ifndef _SYS_SYSPROTO_H_
868 struct ksem_destroy_args {
869 	semid_t		id;
870 };
871 #endif
872 int
873 sys_ksem_destroy(struct thread *td, struct ksem_destroy_args *uap)
874 {
875 	struct file *fp;
876 	struct ksem *ks;
877 	int error;
878 
879 	/* No capability rights required to close a semaphore. */
880 	error = ksem_get(td, uap->id, 0, &fp);
881 	if (error)
882 		return (error);
883 	ks = fp->f_data;
884 	if (!(ks->ks_flags & KS_ANONYMOUS)) {
885 		fdrop(fp, td);
886 		return (EINVAL);
887 	}
888 	mtx_lock(&sem_lock);
889 	if (ks->ks_waiters != 0) {
890 		mtx_unlock(&sem_lock);
891 		error = EBUSY;
892 		goto err;
893 	}
894 	ks->ks_flags |= KS_DEAD;
895 	mtx_unlock(&sem_lock);
896 
897 	error = kern_close(td, uap->id);
898 err:
899 	fdrop(fp, td);
900 	return (error);
901 }
902 
903 static struct syscall_helper_data ksem_syscalls[] = {
904 	SYSCALL_INIT_HELPER(ksem_init),
905 	SYSCALL_INIT_HELPER(ksem_open),
906 	SYSCALL_INIT_HELPER(ksem_unlink),
907 	SYSCALL_INIT_HELPER(ksem_close),
908 	SYSCALL_INIT_HELPER(ksem_post),
909 	SYSCALL_INIT_HELPER(ksem_wait),
910 	SYSCALL_INIT_HELPER(ksem_timedwait),
911 	SYSCALL_INIT_HELPER(ksem_trywait),
912 	SYSCALL_INIT_HELPER(ksem_getvalue),
913 	SYSCALL_INIT_HELPER(ksem_destroy),
914 	SYSCALL_INIT_LAST
915 };
916 
917 #ifdef COMPAT_FREEBSD32
918 #include <compat/freebsd32/freebsd32.h>
919 #include <compat/freebsd32/freebsd32_proto.h>
920 #include <compat/freebsd32/freebsd32_signal.h>
921 #include <compat/freebsd32/freebsd32_syscall.h>
922 #include <compat/freebsd32/freebsd32_util.h>
923 
924 int
925 freebsd32_ksem_init(struct thread *td, struct freebsd32_ksem_init_args *uap)
926 {
927 
928 	return (ksem_create(td, NULL, uap->idp, S_IRWXU | S_IRWXG, uap->value,
929 	    0, 1));
930 }
931 
932 int
933 freebsd32_ksem_open(struct thread *td, struct freebsd32_ksem_open_args *uap)
934 {
935 
936 	if ((uap->oflag & ~(O_CREAT | O_EXCL)) != 0)
937 		return (EINVAL);
938 	return (ksem_create(td, uap->name, uap->idp, uap->mode, uap->value,
939 	    uap->oflag, 1));
940 }
941 
942 int
943 freebsd32_ksem_timedwait(struct thread *td,
944     struct freebsd32_ksem_timedwait_args *uap)
945 {
946 	struct timespec32 abstime32;
947 	struct timespec *ts, abstime;
948 	int error;
949 
950 	/*
951 	 * We allow a null timespec (wait forever).
952 	 */
953 	if (uap->abstime == NULL)
954 		ts = NULL;
955 	else {
956 		error = copyin(uap->abstime, &abstime32, sizeof(abstime32));
957 		if (error != 0)
958 			return (error);
959 		CP(abstime32, abstime, tv_sec);
960 		CP(abstime32, abstime, tv_nsec);
961 		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
962 			return (EINVAL);
963 		ts = &abstime;
964 	}
965 	return (kern_sem_wait(td, uap->id, 0, ts));
966 }
967 
968 static struct syscall_helper_data ksem32_syscalls[] = {
969 	SYSCALL32_INIT_HELPER(freebsd32_ksem_init),
970 	SYSCALL32_INIT_HELPER(freebsd32_ksem_open),
971 	SYSCALL32_INIT_HELPER_COMPAT(ksem_unlink),
972 	SYSCALL32_INIT_HELPER_COMPAT(ksem_close),
973 	SYSCALL32_INIT_HELPER_COMPAT(ksem_post),
974 	SYSCALL32_INIT_HELPER_COMPAT(ksem_wait),
975 	SYSCALL32_INIT_HELPER(freebsd32_ksem_timedwait),
976 	SYSCALL32_INIT_HELPER_COMPAT(ksem_trywait),
977 	SYSCALL32_INIT_HELPER_COMPAT(ksem_getvalue),
978 	SYSCALL32_INIT_HELPER_COMPAT(ksem_destroy),
979 	SYSCALL_INIT_LAST
980 };
981 #endif
982 
983 static int
984 ksem_module_init(void)
985 {
986 	int error;
987 
988 	mtx_init(&sem_lock, "sem", NULL, MTX_DEF);
989 	mtx_init(&ksem_count_lock, "ksem count", NULL, MTX_DEF);
990 	sx_init(&ksem_dict_lock, "ksem dictionary");
991 	ksem_dictionary = hashinit(1024, M_KSEM, &ksem_hash);
992 	p31b_setcfg(CTL_P1003_1B_SEMAPHORES, 200112L);
993 	p31b_setcfg(CTL_P1003_1B_SEM_NSEMS_MAX, SEM_MAX);
994 	p31b_setcfg(CTL_P1003_1B_SEM_VALUE_MAX, SEM_VALUE_MAX);
995 
996 	error = syscall_helper_register(ksem_syscalls, SY_THR_STATIC_KLD);
997 	if (error)
998 		return (error);
999 #ifdef COMPAT_FREEBSD32
1000 	error = syscall32_helper_register(ksem32_syscalls, SY_THR_STATIC_KLD);
1001 	if (error)
1002 		return (error);
1003 #endif
1004 	return (0);
1005 }
1006 
1007 static void
1008 ksem_module_destroy(void)
1009 {
1010 
1011 #ifdef COMPAT_FREEBSD32
1012 	syscall32_helper_unregister(ksem32_syscalls);
1013 #endif
1014 	syscall_helper_unregister(ksem_syscalls);
1015 
1016 	p31b_setcfg(CTL_P1003_1B_SEMAPHORES, 0);
1017 	hashdestroy(ksem_dictionary, M_KSEM, ksem_hash);
1018 	sx_destroy(&ksem_dict_lock);
1019 	mtx_destroy(&ksem_count_lock);
1020 	mtx_destroy(&sem_lock);
1021 	p31b_unsetcfg(CTL_P1003_1B_SEM_VALUE_MAX);
1022 	p31b_unsetcfg(CTL_P1003_1B_SEM_NSEMS_MAX);
1023 }
1024 
1025 static int
1026 sem_modload(struct module *module, int cmd, void *arg)
1027 {
1028         int error = 0;
1029 
1030         switch (cmd) {
1031         case MOD_LOAD:
1032 		error = ksem_module_init();
1033 		if (error)
1034 			ksem_module_destroy();
1035                 break;
1036 
1037         case MOD_UNLOAD:
1038 		mtx_lock(&ksem_count_lock);
1039 		if (nsems != 0) {
1040 			error = EOPNOTSUPP;
1041 			mtx_unlock(&ksem_count_lock);
1042 			break;
1043 		}
1044 		ksem_dead = 1;
1045 		mtx_unlock(&ksem_count_lock);
1046 		ksem_module_destroy();
1047                 break;
1048 
1049         case MOD_SHUTDOWN:
1050                 break;
1051         default:
1052                 error = EINVAL;
1053                 break;
1054         }
1055         return (error);
1056 }
1057 
1058 static moduledata_t sem_mod = {
1059         "sem",
1060         &sem_modload,
1061         NULL
1062 };
1063 
1064 DECLARE_MODULE(sem, sem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
1065 MODULE_VERSION(sem, 1);
1066