xref: /freebsd/sys/kern/uipc_sem.c (revision 7aa383846770374466b1dcb2cefd71bde9acf463)
1 /*-
2  * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
3  * Copyright (c) 2003-2005 SPARTA, Inc.
4  * Copyright (c) 2005 Robert N. M. Watson
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project in part by Network
8  * Associates Laboratories, the Security Research Division of Network
9  * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
10  * as part of the DARPA CHATS research program.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_compat.h"
38 #include "opt_posix.h"
39 
40 #include <sys/param.h>
41 #include <sys/condvar.h>
42 #include <sys/fcntl.h>
43 #include <sys/file.h>
44 #include <sys/filedesc.h>
45 #include <sys/fnv_hash.h>
46 #include <sys/kernel.h>
47 #include <sys/ksem.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/module.h>
51 #include <sys/mutex.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/posix4.h>
55 #include <sys/_semaphore.h>
56 #include <sys/stat.h>
57 #include <sys/syscall.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysctl.h>
60 #include <sys/sysent.h>
61 #include <sys/sysproto.h>
62 #include <sys/systm.h>
63 #include <sys/sx.h>
64 #include <sys/vnode.h>
65 
66 #include <security/mac/mac_framework.h>
67 
68 /*
69  * TODO
70  *
71  * - Resource limits?
72  * - Update fstat(1)
73  * - Replace global sem_lock with mtx_pool locks?
74  * - Add a MAC check_create() hook for creating new named semaphores.
75  */
76 
77 #ifndef SEM_MAX
78 #define	SEM_MAX	30
79 #endif
80 
81 #ifdef SEM_DEBUG
82 #define	DP(x)	printf x
83 #else
84 #define	DP(x)
85 #endif
86 
87 struct ksem_mapping {
88 	char		*km_path;
89 	Fnv32_t		km_fnv;
90 	struct ksem	*km_ksem;
91 	LIST_ENTRY(ksem_mapping) km_link;
92 };
93 
94 static MALLOC_DEFINE(M_KSEM, "ksem", "semaphore file descriptor");
95 static LIST_HEAD(, ksem_mapping) *ksem_dictionary;
96 static struct sx ksem_dict_lock;
97 static struct mtx ksem_count_lock;
98 static struct mtx sem_lock;
99 static u_long ksem_hash;
100 static int ksem_dead;
101 
102 #define	KSEM_HASH(fnv)	(&ksem_dictionary[(fnv) & ksem_hash])
103 
104 static int nsems = 0;
105 SYSCTL_DECL(_p1003_1b);
106 SYSCTL_INT(_p1003_1b, OID_AUTO, nsems, CTLFLAG_RD, &nsems, 0,
107     "Number of active kernel POSIX semaphores");
108 
109 static int	kern_sem_wait(struct thread *td, semid_t id, int tryflag,
110 		    struct timespec *abstime);
111 static int	ksem_access(struct ksem *ks, struct ucred *ucred);
112 static struct ksem *ksem_alloc(struct ucred *ucred, mode_t mode,
113 		    unsigned int value);
114 static int	ksem_create(struct thread *td, const char *path,
115 		    semid_t *semidp, mode_t mode, unsigned int value,
116 		    int flags, int compat32);
117 static void	ksem_drop(struct ksem *ks);
118 static int	ksem_get(struct thread *td, semid_t id, struct file **fpp);
119 static struct ksem *ksem_hold(struct ksem *ks);
120 static void	ksem_insert(char *path, Fnv32_t fnv, struct ksem *ks);
121 static struct ksem *ksem_lookup(char *path, Fnv32_t fnv);
122 static void	ksem_module_destroy(void);
123 static int	ksem_module_init(void);
124 static int	ksem_remove(char *path, Fnv32_t fnv, struct ucred *ucred);
125 static int	sem_modload(struct module *module, int cmd, void *arg);
126 
127 static fo_rdwr_t	ksem_read;
128 static fo_rdwr_t	ksem_write;
129 static fo_truncate_t	ksem_truncate;
130 static fo_ioctl_t	ksem_ioctl;
131 static fo_poll_t	ksem_poll;
132 static fo_kqfilter_t	ksem_kqfilter;
133 static fo_stat_t	ksem_stat;
134 static fo_close_t	ksem_closef;
135 
136 /* File descriptor operations. */
137 static struct fileops ksem_ops = {
138 	.fo_read = ksem_read,
139 	.fo_write = ksem_write,
140 	.fo_truncate = ksem_truncate,
141 	.fo_ioctl = ksem_ioctl,
142 	.fo_poll = ksem_poll,
143 	.fo_kqfilter = ksem_kqfilter,
144 	.fo_stat = ksem_stat,
145 	.fo_close = ksem_closef,
146 	.fo_flags = DFLAG_PASSABLE
147 };
148 
149 FEATURE(posix_sem, "POSIX semaphores");
150 
151 static int
152 ksem_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
153     int flags, struct thread *td)
154 {
155 
156 	return (EOPNOTSUPP);
157 }
158 
159 static int
160 ksem_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
161     int flags, struct thread *td)
162 {
163 
164 	return (EOPNOTSUPP);
165 }
166 
167 static int
168 ksem_truncate(struct file *fp, off_t length, struct ucred *active_cred,
169     struct thread *td)
170 {
171 
172 	return (EINVAL);
173 }
174 
175 static int
176 ksem_ioctl(struct file *fp, u_long com, void *data,
177     struct ucred *active_cred, struct thread *td)
178 {
179 
180 	return (EOPNOTSUPP);
181 }
182 
183 static int
184 ksem_poll(struct file *fp, int events, struct ucred *active_cred,
185     struct thread *td)
186 {
187 
188 	return (EOPNOTSUPP);
189 }
190 
191 static int
192 ksem_kqfilter(struct file *fp, struct knote *kn)
193 {
194 
195 	return (EOPNOTSUPP);
196 }
197 
198 static int
199 ksem_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
200     struct thread *td)
201 {
202 	struct ksem *ks;
203 #ifdef MAC
204 	int error;
205 #endif
206 
207 	ks = fp->f_data;
208 
209 #ifdef MAC
210 	error = mac_posixsem_check_stat(active_cred, fp->f_cred, ks);
211 	if (error)
212 		return (error);
213 #endif
214 
215 	/*
216 	 * Attempt to return sanish values for fstat() on a semaphore
217 	 * file descriptor.
218 	 */
219 	bzero(sb, sizeof(*sb));
220 	sb->st_mode = S_IFREG | ks->ks_mode;		/* XXX */
221 
222 	sb->st_atim = ks->ks_atime;
223 	sb->st_ctim = ks->ks_ctime;
224 	sb->st_mtim = ks->ks_mtime;
225 	sb->st_birthtim = ks->ks_birthtime;
226 	sb->st_uid = ks->ks_uid;
227 	sb->st_gid = ks->ks_gid;
228 
229 	return (0);
230 }
231 
232 static int
233 ksem_closef(struct file *fp, struct thread *td)
234 {
235 	struct ksem *ks;
236 
237 	ks = fp->f_data;
238 	fp->f_data = NULL;
239 	ksem_drop(ks);
240 
241 	return (0);
242 }
243 
244 /*
245  * ksem object management including creation and reference counting
246  * routines.
247  */
248 static struct ksem *
249 ksem_alloc(struct ucred *ucred, mode_t mode, unsigned int value)
250 {
251 	struct ksem *ks;
252 
253 	mtx_lock(&ksem_count_lock);
254 	if (nsems == p31b_getcfg(CTL_P1003_1B_SEM_NSEMS_MAX) || ksem_dead) {
255 		mtx_unlock(&ksem_count_lock);
256 		return (NULL);
257 	}
258 	nsems++;
259 	mtx_unlock(&ksem_count_lock);
260 	ks = malloc(sizeof(*ks), M_KSEM, M_WAITOK | M_ZERO);
261 	ks->ks_uid = ucred->cr_uid;
262 	ks->ks_gid = ucred->cr_gid;
263 	ks->ks_mode = mode;
264 	ks->ks_value = value;
265 	cv_init(&ks->ks_cv, "ksem");
266 	vfs_timestamp(&ks->ks_birthtime);
267 	ks->ks_atime = ks->ks_mtime = ks->ks_ctime = ks->ks_birthtime;
268 	refcount_init(&ks->ks_ref, 1);
269 #ifdef MAC
270 	mac_posixsem_init(ks);
271 	mac_posixsem_create(ucred, ks);
272 #endif
273 
274 	return (ks);
275 }
276 
277 static struct ksem *
278 ksem_hold(struct ksem *ks)
279 {
280 
281 	refcount_acquire(&ks->ks_ref);
282 	return (ks);
283 }
284 
285 static void
286 ksem_drop(struct ksem *ks)
287 {
288 
289 	if (refcount_release(&ks->ks_ref)) {
290 #ifdef MAC
291 		mac_posixsem_destroy(ks);
292 #endif
293 		cv_destroy(&ks->ks_cv);
294 		free(ks, M_KSEM);
295 		mtx_lock(&ksem_count_lock);
296 		nsems--;
297 		mtx_unlock(&ksem_count_lock);
298 	}
299 }
300 
301 /*
302  * Determine if the credentials have sufficient permissions for read
303  * and write access.
304  */
305 static int
306 ksem_access(struct ksem *ks, struct ucred *ucred)
307 {
308 	int error;
309 
310 	error = vaccess(VREG, ks->ks_mode, ks->ks_uid, ks->ks_gid,
311 	    VREAD | VWRITE, ucred, NULL);
312 	if (error)
313 		error = priv_check_cred(ucred, PRIV_SEM_WRITE, 0);
314 	return (error);
315 }
316 
317 /*
318  * Dictionary management.  We maintain an in-kernel dictionary to map
319  * paths to semaphore objects.  We use the FNV hash on the path to
320  * store the mappings in a hash table.
321  */
322 static struct ksem *
323 ksem_lookup(char *path, Fnv32_t fnv)
324 {
325 	struct ksem_mapping *map;
326 
327 	LIST_FOREACH(map, KSEM_HASH(fnv), km_link) {
328 		if (map->km_fnv != fnv)
329 			continue;
330 		if (strcmp(map->km_path, path) == 0)
331 			return (map->km_ksem);
332 	}
333 
334 	return (NULL);
335 }
336 
337 static void
338 ksem_insert(char *path, Fnv32_t fnv, struct ksem *ks)
339 {
340 	struct ksem_mapping *map;
341 
342 	map = malloc(sizeof(struct ksem_mapping), M_KSEM, M_WAITOK);
343 	map->km_path = path;
344 	map->km_fnv = fnv;
345 	map->km_ksem = ksem_hold(ks);
346 	LIST_INSERT_HEAD(KSEM_HASH(fnv), map, km_link);
347 }
348 
349 static int
350 ksem_remove(char *path, Fnv32_t fnv, struct ucred *ucred)
351 {
352 	struct ksem_mapping *map;
353 	int error;
354 
355 	LIST_FOREACH(map, KSEM_HASH(fnv), km_link) {
356 		if (map->km_fnv != fnv)
357 			continue;
358 		if (strcmp(map->km_path, path) == 0) {
359 #ifdef MAC
360 			error = mac_posixsem_check_unlink(ucred, map->km_ksem);
361 			if (error)
362 				return (error);
363 #endif
364 			error = ksem_access(map->km_ksem, ucred);
365 			if (error)
366 				return (error);
367 			LIST_REMOVE(map, km_link);
368 			ksem_drop(map->km_ksem);
369 			free(map->km_path, M_KSEM);
370 			free(map, M_KSEM);
371 			return (0);
372 		}
373 	}
374 
375 	return (ENOENT);
376 }
377 
378 static int
379 ksem_create_copyout_semid(struct thread *td, semid_t *semidp, int fd,
380     int compat32)
381 {
382 	semid_t semid;
383 #ifdef COMPAT_FREEBSD32
384 	int32_t semid32;
385 #endif
386 	void *ptr;
387 	size_t ptrs;
388 
389 #ifdef COMPAT_FREEBSD32
390 	if (compat32) {
391 		semid32 = fd;
392 		ptr = &semid32;
393 		ptrs = sizeof(semid32);
394 	} else {
395 #endif
396 		semid = fd;
397 		ptr = &semid;
398 		ptrs = sizeof(semid);
399 		compat32 = 0; /* silence gcc */
400 #ifdef COMPAT_FREEBSD32
401 	}
402 #endif
403 
404 	return (copyout(ptr, semidp, ptrs));
405 }
406 
407 /* Other helper routines. */
408 static int
409 ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode,
410     unsigned int value, int flags, int compat32)
411 {
412 	struct filedesc *fdp;
413 	struct ksem *ks;
414 	struct file *fp;
415 	char *path;
416 	Fnv32_t fnv;
417 	int error, fd;
418 
419 	if (value > SEM_VALUE_MAX)
420 		return (EINVAL);
421 
422 	fdp = td->td_proc->p_fd;
423 	mode = (mode & ~fdp->fd_cmask) & ACCESSPERMS;
424 	error = falloc(td, &fp, &fd);
425 	if (error) {
426 		if (name == NULL)
427 			error = ENOSPC;
428 		return (error);
429 	}
430 
431 	/*
432 	 * Go ahead and copyout the file descriptor now.  This is a bit
433 	 * premature, but it is a lot easier to handle errors as opposed
434 	 * to later when we've possibly created a new semaphore, etc.
435 	 */
436 	error = ksem_create_copyout_semid(td, semidp, fd, compat32);
437 	if (error) {
438 		fdclose(fdp, fp, fd, td);
439 		fdrop(fp, td);
440 		return (error);
441 	}
442 
443 	if (name == NULL) {
444 		/* Create an anonymous semaphore. */
445 		ks = ksem_alloc(td->td_ucred, mode, value);
446 		if (ks == NULL)
447 			error = ENOSPC;
448 		else
449 			ks->ks_flags |= KS_ANONYMOUS;
450 	} else {
451 		path = malloc(MAXPATHLEN, M_KSEM, M_WAITOK);
452 		error = copyinstr(name, path, MAXPATHLEN, NULL);
453 
454 		/* Require paths to start with a '/' character. */
455 		if (error == 0 && path[0] != '/')
456 			error = EINVAL;
457 		if (error) {
458 			fdclose(fdp, fp, fd, td);
459 			fdrop(fp, td);
460 			free(path, M_KSEM);
461 			return (error);
462 		}
463 
464 		fnv = fnv_32_str(path, FNV1_32_INIT);
465 		sx_xlock(&ksem_dict_lock);
466 		ks = ksem_lookup(path, fnv);
467 		if (ks == NULL) {
468 			/* Object does not exist, create it if requested. */
469 			if (flags & O_CREAT) {
470 				ks = ksem_alloc(td->td_ucred, mode, value);
471 				if (ks == NULL)
472 					error = ENFILE;
473 				else {
474 					ksem_insert(path, fnv, ks);
475 					path = NULL;
476 				}
477 			} else
478 				error = ENOENT;
479 		} else {
480 			/*
481 			 * Object already exists, obtain a new
482 			 * reference if requested and permitted.
483 			 */
484 			if ((flags & (O_CREAT | O_EXCL)) ==
485 			    (O_CREAT | O_EXCL))
486 				error = EEXIST;
487 			else {
488 #ifdef MAC
489 				error = mac_posixsem_check_open(td->td_ucred,
490 				    ks);
491 				if (error == 0)
492 #endif
493 				error = ksem_access(ks, td->td_ucred);
494 			}
495 			if (error == 0)
496 				ksem_hold(ks);
497 #ifdef INVARIANTS
498 			else
499 				ks = NULL;
500 #endif
501 		}
502 		sx_xunlock(&ksem_dict_lock);
503 		if (path)
504 			free(path, M_KSEM);
505 	}
506 
507 	if (error) {
508 		KASSERT(ks == NULL, ("ksem_create error with a ksem"));
509 		fdclose(fdp, fp, fd, td);
510 		fdrop(fp, td);
511 		return (error);
512 	}
513 	KASSERT(ks != NULL, ("ksem_create w/o a ksem"));
514 
515 	finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops);
516 
517 	FILEDESC_XLOCK(fdp);
518 	if (fdp->fd_ofiles[fd] == fp)
519 		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
520 	FILEDESC_XUNLOCK(fdp);
521 	fdrop(fp, td);
522 
523 	return (0);
524 }
525 
526 static int
527 ksem_get(struct thread *td, semid_t id, struct file **fpp)
528 {
529 	struct ksem *ks;
530 	struct file *fp;
531 	int error;
532 
533 	error = fget(td, id, &fp);
534 	if (error)
535 		return (EINVAL);
536 	if (fp->f_type != DTYPE_SEM) {
537 		fdrop(fp, td);
538 		return (EINVAL);
539 	}
540 	ks = fp->f_data;
541 	if (ks->ks_flags & KS_DEAD) {
542 		fdrop(fp, td);
543 		return (EINVAL);
544 	}
545 	*fpp = fp;
546 	return (0);
547 }
548 
549 /* System calls. */
550 #ifndef _SYS_SYSPROTO_H_
551 struct ksem_init_args {
552 	unsigned int	value;
553 	semid_t		*idp;
554 };
555 #endif
556 int
557 ksem_init(struct thread *td, struct ksem_init_args *uap)
558 {
559 
560 	return (ksem_create(td, NULL, uap->idp, S_IRWXU | S_IRWXG, uap->value,
561 	    0, 0));
562 }
563 
564 #ifndef _SYS_SYSPROTO_H_
565 struct ksem_open_args {
566 	char		*name;
567 	int		oflag;
568 	mode_t		mode;
569 	unsigned int	value;
570 	semid_t		*idp;
571 };
572 #endif
573 int
574 ksem_open(struct thread *td, struct ksem_open_args *uap)
575 {
576 
577 	DP((">>> ksem_open start, pid=%d\n", (int)td->td_proc->p_pid));
578 
579 	if ((uap->oflag & ~(O_CREAT | O_EXCL)) != 0)
580 		return (EINVAL);
581 	return (ksem_create(td, uap->name, uap->idp, uap->mode, uap->value,
582 	    uap->oflag, 0));
583 }
584 
585 #ifndef _SYS_SYSPROTO_H_
586 struct ksem_unlink_args {
587 	char		*name;
588 };
589 #endif
590 int
591 ksem_unlink(struct thread *td, struct ksem_unlink_args *uap)
592 {
593 	char *path;
594 	Fnv32_t fnv;
595 	int error;
596 
597 	path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
598 	error = copyinstr(uap->name, path, MAXPATHLEN, NULL);
599 	if (error) {
600 		free(path, M_TEMP);
601 		return (error);
602 	}
603 
604 	fnv = fnv_32_str(path, FNV1_32_INIT);
605 	sx_xlock(&ksem_dict_lock);
606 	error = ksem_remove(path, fnv, td->td_ucred);
607 	sx_xunlock(&ksem_dict_lock);
608 	free(path, M_TEMP);
609 
610 	return (error);
611 }
612 
613 #ifndef _SYS_SYSPROTO_H_
614 struct ksem_close_args {
615 	semid_t		id;
616 };
617 #endif
618 int
619 ksem_close(struct thread *td, struct ksem_close_args *uap)
620 {
621 	struct ksem *ks;
622 	struct file *fp;
623 	int error;
624 
625 	error = ksem_get(td, uap->id, &fp);
626 	if (error)
627 		return (error);
628 	ks = fp->f_data;
629 	if (ks->ks_flags & KS_ANONYMOUS) {
630 		fdrop(fp, td);
631 		return (EINVAL);
632 	}
633 	error = kern_close(td, uap->id);
634 	fdrop(fp, td);
635 	return (error);
636 }
637 
638 #ifndef _SYS_SYSPROTO_H_
639 struct ksem_post_args {
640 	semid_t	id;
641 };
642 #endif
643 int
644 ksem_post(struct thread *td, struct ksem_post_args *uap)
645 {
646 	struct file *fp;
647 	struct ksem *ks;
648 	int error;
649 
650 	error = ksem_get(td, uap->id, &fp);
651 	if (error)
652 		return (error);
653 	ks = fp->f_data;
654 
655 	mtx_lock(&sem_lock);
656 #ifdef MAC
657 	error = mac_posixsem_check_post(td->td_ucred, fp->f_cred, ks);
658 	if (error)
659 		goto err;
660 #endif
661 	if (ks->ks_value == SEM_VALUE_MAX) {
662 		error = EOVERFLOW;
663 		goto err;
664 	}
665 	++ks->ks_value;
666 	if (ks->ks_waiters > 0)
667 		cv_signal(&ks->ks_cv);
668 	error = 0;
669 	vfs_timestamp(&ks->ks_ctime);
670 err:
671 	mtx_unlock(&sem_lock);
672 	fdrop(fp, td);
673 	return (error);
674 }
675 
676 #ifndef _SYS_SYSPROTO_H_
677 struct ksem_wait_args {
678 	semid_t		id;
679 };
680 #endif
681 int
682 ksem_wait(struct thread *td, struct ksem_wait_args *uap)
683 {
684 
685 	return (kern_sem_wait(td, uap->id, 0, NULL));
686 }
687 
688 #ifndef _SYS_SYSPROTO_H_
689 struct ksem_timedwait_args {
690 	semid_t		id;
691 	const struct timespec *abstime;
692 };
693 #endif
694 int
695 ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap)
696 {
697 	struct timespec abstime;
698 	struct timespec *ts;
699 	int error;
700 
701 	/*
702 	 * We allow a null timespec (wait forever).
703 	 */
704 	if (uap->abstime == NULL)
705 		ts = NULL;
706 	else {
707 		error = copyin(uap->abstime, &abstime, sizeof(abstime));
708 		if (error != 0)
709 			return (error);
710 		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
711 			return (EINVAL);
712 		ts = &abstime;
713 	}
714 	return (kern_sem_wait(td, uap->id, 0, ts));
715 }
716 
717 #ifndef _SYS_SYSPROTO_H_
718 struct ksem_trywait_args {
719 	semid_t		id;
720 };
721 #endif
722 int
723 ksem_trywait(struct thread *td, struct ksem_trywait_args *uap)
724 {
725 
726 	return (kern_sem_wait(td, uap->id, 1, NULL));
727 }
728 
729 static int
730 kern_sem_wait(struct thread *td, semid_t id, int tryflag,
731     struct timespec *abstime)
732 {
733 	struct timespec ts1, ts2;
734 	struct timeval tv;
735 	struct file *fp;
736 	struct ksem *ks;
737 	int error;
738 
739 	DP((">>> kern_sem_wait entered! pid=%d\n", (int)td->td_proc->p_pid));
740 	error = ksem_get(td, id, &fp);
741 	if (error)
742 		return (error);
743 	ks = fp->f_data;
744 	mtx_lock(&sem_lock);
745 	DP((">>> kern_sem_wait critical section entered! pid=%d\n",
746 	    (int)td->td_proc->p_pid));
747 #ifdef MAC
748 	error = mac_posixsem_check_wait(td->td_ucred, fp->f_cred, ks);
749 	if (error) {
750 		DP(("kern_sem_wait mac failed\n"));
751 		goto err;
752 	}
753 #endif
754 	DP(("kern_sem_wait value = %d, tryflag %d\n", ks->ks_value, tryflag));
755 	vfs_timestamp(&ks->ks_atime);
756 	while (ks->ks_value == 0) {
757 		ks->ks_waiters++;
758 		if (tryflag != 0)
759 			error = EAGAIN;
760 		else if (abstime == NULL)
761 			error = cv_wait_sig(&ks->ks_cv, &sem_lock);
762 		else {
763 			for (;;) {
764 				ts1 = *abstime;
765 				getnanotime(&ts2);
766 				timespecsub(&ts1, &ts2);
767 				TIMESPEC_TO_TIMEVAL(&tv, &ts1);
768 				if (tv.tv_sec < 0) {
769 					error = ETIMEDOUT;
770 					break;
771 				}
772 				error = cv_timedwait_sig(&ks->ks_cv,
773 				    &sem_lock, tvtohz(&tv));
774 				if (error != EWOULDBLOCK)
775 					break;
776 			}
777 		}
778 		ks->ks_waiters--;
779 		if (error)
780 			goto err;
781 	}
782 	ks->ks_value--;
783 	DP(("kern_sem_wait value post-decrement = %d\n", ks->ks_value));
784 	error = 0;
785 err:
786 	mtx_unlock(&sem_lock);
787 	fdrop(fp, td);
788 	DP(("<<< kern_sem_wait leaving, pid=%d, error = %d\n",
789 	    (int)td->td_proc->p_pid, error));
790 	return (error);
791 }
792 
793 #ifndef _SYS_SYSPROTO_H_
794 struct ksem_getvalue_args {
795 	semid_t		id;
796 	int		*val;
797 };
798 #endif
799 int
800 ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap)
801 {
802 	struct file *fp;
803 	struct ksem *ks;
804 	int error, val;
805 
806 	error = ksem_get(td, uap->id, &fp);
807 	if (error)
808 		return (error);
809 	ks = fp->f_data;
810 
811 	mtx_lock(&sem_lock);
812 #ifdef MAC
813 	error = mac_posixsem_check_getvalue(td->td_ucred, fp->f_cred, ks);
814 	if (error) {
815 		mtx_unlock(&sem_lock);
816 		fdrop(fp, td);
817 		return (error);
818 	}
819 #endif
820 	val = ks->ks_value;
821 	vfs_timestamp(&ks->ks_atime);
822 	mtx_unlock(&sem_lock);
823 	fdrop(fp, td);
824 	error = copyout(&val, uap->val, sizeof(val));
825 	return (error);
826 }
827 
828 #ifndef _SYS_SYSPROTO_H_
829 struct ksem_destroy_args {
830 	semid_t		id;
831 };
832 #endif
833 int
834 ksem_destroy(struct thread *td, struct ksem_destroy_args *uap)
835 {
836 	struct file *fp;
837 	struct ksem *ks;
838 	int error;
839 
840 	error = ksem_get(td, uap->id, &fp);
841 	if (error)
842 		return (error);
843 	ks = fp->f_data;
844 	if (!(ks->ks_flags & KS_ANONYMOUS)) {
845 		fdrop(fp, td);
846 		return (EINVAL);
847 	}
848 	mtx_lock(&sem_lock);
849 	if (ks->ks_waiters != 0) {
850 		mtx_unlock(&sem_lock);
851 		error = EBUSY;
852 		goto err;
853 	}
854 	ks->ks_flags |= KS_DEAD;
855 	mtx_unlock(&sem_lock);
856 
857 	error = kern_close(td, uap->id);
858 err:
859 	fdrop(fp, td);
860 	return (error);
861 }
862 
863 static struct syscall_helper_data ksem_syscalls[] = {
864 	SYSCALL_INIT_HELPER(ksem_init),
865 	SYSCALL_INIT_HELPER(ksem_open),
866 	SYSCALL_INIT_HELPER(ksem_unlink),
867 	SYSCALL_INIT_HELPER(ksem_close),
868 	SYSCALL_INIT_HELPER(ksem_post),
869 	SYSCALL_INIT_HELPER(ksem_wait),
870 	SYSCALL_INIT_HELPER(ksem_timedwait),
871 	SYSCALL_INIT_HELPER(ksem_trywait),
872 	SYSCALL_INIT_HELPER(ksem_getvalue),
873 	SYSCALL_INIT_HELPER(ksem_destroy),
874 	SYSCALL_INIT_LAST
875 };
876 
877 #ifdef COMPAT_FREEBSD32
878 #include <compat/freebsd32/freebsd32.h>
879 #include <compat/freebsd32/freebsd32_proto.h>
880 #include <compat/freebsd32/freebsd32_signal.h>
881 #include <compat/freebsd32/freebsd32_syscall.h>
882 #include <compat/freebsd32/freebsd32_util.h>
883 
884 int
885 freebsd32_ksem_init(struct thread *td, struct freebsd32_ksem_init_args *uap)
886 {
887 
888 	return (ksem_create(td, NULL, uap->idp, S_IRWXU | S_IRWXG, uap->value,
889 	    0, 1));
890 }
891 
892 int
893 freebsd32_ksem_open(struct thread *td, struct freebsd32_ksem_open_args *uap)
894 {
895 
896 	if ((uap->oflag & ~(O_CREAT | O_EXCL)) != 0)
897 		return (EINVAL);
898 	return (ksem_create(td, uap->name, uap->idp, uap->mode, uap->value,
899 	    uap->oflag, 1));
900 }
901 
902 int
903 freebsd32_ksem_timedwait(struct thread *td,
904     struct freebsd32_ksem_timedwait_args *uap)
905 {
906 	struct timespec32 abstime32;
907 	struct timespec *ts, abstime;
908 	int error;
909 
910 	/*
911 	 * We allow a null timespec (wait forever).
912 	 */
913 	if (uap->abstime == NULL)
914 		ts = NULL;
915 	else {
916 		error = copyin(uap->abstime, &abstime32, sizeof(abstime32));
917 		if (error != 0)
918 			return (error);
919 		CP(abstime32, abstime, tv_sec);
920 		CP(abstime32, abstime, tv_nsec);
921 		if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
922 			return (EINVAL);
923 		ts = &abstime;
924 	}
925 	return (kern_sem_wait(td, uap->id, 0, ts));
926 }
927 
928 static struct syscall_helper_data ksem32_syscalls[] = {
929 	SYSCALL32_INIT_HELPER(freebsd32_ksem_init),
930 	SYSCALL32_INIT_HELPER(freebsd32_ksem_open),
931 	SYSCALL32_INIT_HELPER(ksem_unlink),
932 	SYSCALL32_INIT_HELPER(ksem_close),
933 	SYSCALL32_INIT_HELPER(ksem_post),
934 	SYSCALL32_INIT_HELPER(ksem_wait),
935 	SYSCALL32_INIT_HELPER(freebsd32_ksem_timedwait),
936 	SYSCALL32_INIT_HELPER(ksem_trywait),
937 	SYSCALL32_INIT_HELPER(ksem_getvalue),
938 	SYSCALL32_INIT_HELPER(ksem_destroy),
939 	SYSCALL_INIT_LAST
940 };
941 #endif
942 
943 static int
944 ksem_module_init(void)
945 {
946 	int error;
947 
948 	mtx_init(&sem_lock, "sem", NULL, MTX_DEF);
949 	mtx_init(&ksem_count_lock, "ksem count", NULL, MTX_DEF);
950 	sx_init(&ksem_dict_lock, "ksem dictionary");
951 	ksem_dictionary = hashinit(1024, M_KSEM, &ksem_hash);
952 	p31b_setcfg(CTL_P1003_1B_SEM_NSEMS_MAX, SEM_MAX);
953 	p31b_setcfg(CTL_P1003_1B_SEM_VALUE_MAX, SEM_VALUE_MAX);
954 
955 	error = syscall_helper_register(ksem_syscalls);
956 	if (error)
957 		return (error);
958 #ifdef COMPAT_FREEBSD32
959 	error = syscall32_helper_register(ksem32_syscalls);
960 	if (error)
961 		return (error);
962 #endif
963 	return (0);
964 }
965 
966 static void
967 ksem_module_destroy(void)
968 {
969 
970 #ifdef COMPAT_FREEBSD32
971 	syscall32_helper_unregister(ksem32_syscalls);
972 #endif
973 	syscall_helper_unregister(ksem_syscalls);
974 
975 	hashdestroy(ksem_dictionary, M_KSEM, ksem_hash);
976 	sx_destroy(&ksem_dict_lock);
977 	mtx_destroy(&ksem_count_lock);
978 	mtx_destroy(&sem_lock);
979 	p31b_unsetcfg(CTL_P1003_1B_SEM_VALUE_MAX);
980 	p31b_unsetcfg(CTL_P1003_1B_SEM_NSEMS_MAX);
981 }
982 
983 static int
984 sem_modload(struct module *module, int cmd, void *arg)
985 {
986         int error = 0;
987 
988         switch (cmd) {
989         case MOD_LOAD:
990 		error = ksem_module_init();
991 		if (error)
992 			ksem_module_destroy();
993                 break;
994 
995         case MOD_UNLOAD:
996 		mtx_lock(&ksem_count_lock);
997 		if (nsems != 0) {
998 			error = EOPNOTSUPP;
999 			mtx_unlock(&ksem_count_lock);
1000 			break;
1001 		}
1002 		ksem_dead = 1;
1003 		mtx_unlock(&ksem_count_lock);
1004 		ksem_module_destroy();
1005                 break;
1006 
1007         case MOD_SHUTDOWN:
1008                 break;
1009         default:
1010                 error = EINVAL;
1011                 break;
1012         }
1013         return (error);
1014 }
1015 
1016 static moduledata_t sem_mod = {
1017         "sem",
1018         &sem_modload,
1019         NULL
1020 };
1021 
1022 DECLARE_MODULE(sem, sem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
1023 MODULE_VERSION(sem, 1);
1024