xref: /freebsd/sys/kern/uipc_mqueue.c (revision ca987d4641cdcd7f27e153db17c5bf064934faf5)
1 /*-
2  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
3  * Copyright (c) 2016-2017 Robert N. M. Watson
4  * All rights reserved.
5  *
6  * Portions of this software were developed by BAE Systems, the University of
7  * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL
8  * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent
9  * Computing (TC) research program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  */
33 
34 /*
35  * POSIX message queue implementation.
36  *
37  * 1) A mqueue filesystem can be mounted, each message queue appears
38  *    in mounted directory, user can change queue's permission and
39  *    ownership, or remove a queue. Manually creating a file in the
40  *    directory causes a message queue to be created in the kernel with
41  *    default message queue attributes applied and same name used, this
42  *    method is not advocated since mq_open syscall allows user to specify
43  *    different attributes. Also the file system can be mounted multiple
44  *    times at different mount points but shows same contents.
45  *
46  * 2) Standard POSIX message queue API. The syscalls do not use vfs layer,
47  *    but directly operate on internal data structure, this allows user to
48  *    use the IPC facility without having to mount mqueue file system.
49  */
50 
51 #include <sys/cdefs.h>
52 __FBSDID("$FreeBSD$");
53 
54 #include "opt_capsicum.h"
55 #include "opt_compat.h"
56 
57 #include <sys/param.h>
58 #include <sys/kernel.h>
59 #include <sys/systm.h>
60 #include <sys/limits.h>
61 #include <sys/malloc.h>
62 #include <sys/buf.h>
63 #include <sys/capsicum.h>
64 #include <sys/dirent.h>
65 #include <sys/event.h>
66 #include <sys/eventhandler.h>
67 #include <sys/fcntl.h>
68 #include <sys/file.h>
69 #include <sys/filedesc.h>
70 #include <sys/jail.h>
71 #include <sys/lock.h>
72 #include <sys/module.h>
73 #include <sys/mount.h>
74 #include <sys/mqueue.h>
75 #include <sys/mutex.h>
76 #include <sys/namei.h>
77 #include <sys/posix4.h>
78 #include <sys/poll.h>
79 #include <sys/priv.h>
80 #include <sys/proc.h>
81 #include <sys/queue.h>
82 #include <sys/sysproto.h>
83 #include <sys/stat.h>
84 #include <sys/syscall.h>
85 #include <sys/syscallsubr.h>
86 #include <sys/sysent.h>
87 #include <sys/sx.h>
88 #include <sys/sysctl.h>
89 #include <sys/taskqueue.h>
90 #include <sys/unistd.h>
91 #include <sys/user.h>
92 #include <sys/vnode.h>
93 #include <machine/atomic.h>
94 
95 #include <security/audit/audit.h>
96 
97 FEATURE(p1003_1b_mqueue, "POSIX P1003.1B message queues support");
98 
99 /*
100  * Limits and constants
101  */
102 #define	MQFS_NAMELEN		NAME_MAX
103 #define MQFS_DELEN		(8 + MQFS_NAMELEN)
104 
105 /* node types */
106 typedef enum {
107 	mqfstype_none = 0,
108 	mqfstype_root,
109 	mqfstype_dir,
110 	mqfstype_this,
111 	mqfstype_parent,
112 	mqfstype_file,
113 	mqfstype_symlink,
114 } mqfs_type_t;
115 
116 struct mqfs_node;
117 
118 /*
119  * mqfs_info: describes a mqfs instance
120  */
121 struct mqfs_info {
122 	struct sx		mi_lock;
123 	struct mqfs_node	*mi_root;
124 	struct unrhdr		*mi_unrhdr;
125 };
126 
127 struct mqfs_vdata {
128 	LIST_ENTRY(mqfs_vdata)	mv_link;
129 	struct mqfs_node	*mv_node;
130 	struct vnode		*mv_vnode;
131 	struct task		mv_task;
132 };
133 
134 /*
135  * mqfs_node: describes a node (file or directory) within a mqfs
136  */
137 struct mqfs_node {
138 	char			mn_name[MQFS_NAMELEN+1];
139 	struct mqfs_info	*mn_info;
140 	struct mqfs_node	*mn_parent;
141 	LIST_HEAD(,mqfs_node)	mn_children;
142 	LIST_ENTRY(mqfs_node)	mn_sibling;
143 	LIST_HEAD(,mqfs_vdata)	mn_vnodes;
144 	const void		*mn_pr_root;
145 	int			mn_refcount;
146 	mqfs_type_t		mn_type;
147 	int			mn_deleted;
148 	uint32_t		mn_fileno;
149 	void			*mn_data;
150 	struct timespec		mn_birth;
151 	struct timespec		mn_ctime;
152 	struct timespec		mn_atime;
153 	struct timespec		mn_mtime;
154 	uid_t			mn_uid;
155 	gid_t			mn_gid;
156 	int			mn_mode;
157 };
158 
159 #define	VTON(vp)	(((struct mqfs_vdata *)((vp)->v_data))->mv_node)
160 #define VTOMQ(vp) 	((struct mqueue *)(VTON(vp)->mn_data))
161 #define	VFSTOMQFS(m)	((struct mqfs_info *)((m)->mnt_data))
162 #define	FPTOMQ(fp)	((struct mqueue *)(((struct mqfs_node *) \
163 				(fp)->f_data)->mn_data))
164 
165 TAILQ_HEAD(msgq, mqueue_msg);
166 
167 struct mqueue;
168 
169 struct mqueue_notifier {
170 	LIST_ENTRY(mqueue_notifier)	nt_link;
171 	struct sigevent			nt_sigev;
172 	ksiginfo_t			nt_ksi;
173 	struct proc			*nt_proc;
174 };
175 
176 struct mqueue {
177 	struct mtx	mq_mutex;
178 	int		mq_flags;
179 	long		mq_maxmsg;
180 	long		mq_msgsize;
181 	long		mq_curmsgs;
182 	long		mq_totalbytes;
183 	struct msgq	mq_msgq;
184 	int		mq_receivers;
185 	int		mq_senders;
186 	struct selinfo	mq_rsel;
187 	struct selinfo	mq_wsel;
188 	struct mqueue_notifier	*mq_notifier;
189 };
190 
191 #define	MQ_RSEL		0x01
192 #define	MQ_WSEL		0x02
193 
194 struct mqueue_msg {
195 	TAILQ_ENTRY(mqueue_msg)	msg_link;
196 	unsigned int	msg_prio;
197 	unsigned int	msg_size;
198 	/* following real data... */
199 };
200 
201 static SYSCTL_NODE(_kern, OID_AUTO, mqueue, CTLFLAG_RW, 0,
202 	"POSIX real time message queue");
203 
204 static int	default_maxmsg  = 10;
205 static int	default_msgsize = 1024;
206 
207 static int	maxmsg = 100;
208 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsg, CTLFLAG_RW,
209     &maxmsg, 0, "Default maximum messages in queue");
210 static int	maxmsgsize = 16384;
211 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsgsize, CTLFLAG_RW,
212     &maxmsgsize, 0, "Default maximum message size");
213 static int	maxmq = 100;
214 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmq, CTLFLAG_RW,
215     &maxmq, 0, "maximum message queues");
216 static int	curmq = 0;
217 SYSCTL_INT(_kern_mqueue, OID_AUTO, curmq, CTLFLAG_RW,
218     &curmq, 0, "current message queue number");
219 static int	unloadable = 0;
220 static MALLOC_DEFINE(M_MQUEUEDATA, "mqdata", "mqueue data");
221 
222 static eventhandler_tag exit_tag;
223 
224 /* Only one instance per-system */
225 static struct mqfs_info		mqfs_data;
226 static uma_zone_t		mqnode_zone;
227 static uma_zone_t		mqueue_zone;
228 static uma_zone_t		mvdata_zone;
229 static uma_zone_t		mqnoti_zone;
230 static struct vop_vector	mqfs_vnodeops;
231 static struct fileops		mqueueops;
232 static unsigned			mqfs_osd_jail_slot;
233 
234 /*
235  * Directory structure construction and manipulation
236  */
237 #ifdef notyet
238 static struct mqfs_node	*mqfs_create_dir(struct mqfs_node *parent,
239 	const char *name, int namelen, struct ucred *cred, int mode);
240 static struct mqfs_node	*mqfs_create_link(struct mqfs_node *parent,
241 	const char *name, int namelen, struct ucred *cred, int mode);
242 #endif
243 
244 static struct mqfs_node	*mqfs_create_file(struct mqfs_node *parent,
245 	const char *name, int namelen, struct ucred *cred, int mode);
246 static int	mqfs_destroy(struct mqfs_node *mn);
247 static void	mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn);
248 static void	mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn);
249 static int	mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn);
250 static int	mqfs_prison_remove(void *obj, void *data);
251 
252 /*
253  * Message queue construction and maniplation
254  */
255 static struct mqueue	*mqueue_alloc(const struct mq_attr *attr);
256 static void	mqueue_free(struct mqueue *mq);
257 static int	mqueue_send(struct mqueue *mq, const char *msg_ptr,
258 			size_t msg_len, unsigned msg_prio, int waitok,
259 			const struct timespec *abs_timeout);
260 static int	mqueue_receive(struct mqueue *mq, char *msg_ptr,
261 			size_t msg_len, unsigned *msg_prio, int waitok,
262 			const struct timespec *abs_timeout);
263 static int	_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg,
264 			int timo);
265 static int	_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg,
266 			int timo);
267 static void	mqueue_send_notification(struct mqueue *mq);
268 static void	mqueue_fdclose(struct thread *td, int fd, struct file *fp);
269 static void	mq_proc_exit(void *arg, struct proc *p);
270 
271 /*
272  * kqueue filters
273  */
274 static void	filt_mqdetach(struct knote *kn);
275 static int	filt_mqread(struct knote *kn, long hint);
276 static int	filt_mqwrite(struct knote *kn, long hint);
277 
278 struct filterops mq_rfiltops = {
279 	.f_isfd = 1,
280 	.f_detach = filt_mqdetach,
281 	.f_event = filt_mqread,
282 };
283 struct filterops mq_wfiltops = {
284 	.f_isfd = 1,
285 	.f_detach = filt_mqdetach,
286 	.f_event = filt_mqwrite,
287 };
288 
289 /*
290  * Initialize fileno bitmap
291  */
292 static void
293 mqfs_fileno_init(struct mqfs_info *mi)
294 {
295 	struct unrhdr *up;
296 
297 	up = new_unrhdr(1, INT_MAX, NULL);
298 	mi->mi_unrhdr = up;
299 }
300 
301 /*
302  * Tear down fileno bitmap
303  */
304 static void
305 mqfs_fileno_uninit(struct mqfs_info *mi)
306 {
307 	struct unrhdr *up;
308 
309 	up = mi->mi_unrhdr;
310 	mi->mi_unrhdr = NULL;
311 	delete_unrhdr(up);
312 }
313 
314 /*
315  * Allocate a file number
316  */
317 static void
318 mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn)
319 {
320 	/* make sure our parent has a file number */
321 	if (mn->mn_parent && !mn->mn_parent->mn_fileno)
322 		mqfs_fileno_alloc(mi, mn->mn_parent);
323 
324 	switch (mn->mn_type) {
325 	case mqfstype_root:
326 	case mqfstype_dir:
327 	case mqfstype_file:
328 	case mqfstype_symlink:
329 		mn->mn_fileno = alloc_unr(mi->mi_unrhdr);
330 		break;
331 	case mqfstype_this:
332 		KASSERT(mn->mn_parent != NULL,
333 		    ("mqfstype_this node has no parent"));
334 		mn->mn_fileno = mn->mn_parent->mn_fileno;
335 		break;
336 	case mqfstype_parent:
337 		KASSERT(mn->mn_parent != NULL,
338 		    ("mqfstype_parent node has no parent"));
339 		if (mn->mn_parent == mi->mi_root) {
340 			mn->mn_fileno = mn->mn_parent->mn_fileno;
341 			break;
342 		}
343 		KASSERT(mn->mn_parent->mn_parent != NULL,
344 		    ("mqfstype_parent node has no grandparent"));
345 		mn->mn_fileno = mn->mn_parent->mn_parent->mn_fileno;
346 		break;
347 	default:
348 		KASSERT(0,
349 		    ("mqfs_fileno_alloc() called for unknown type node: %d",
350 			mn->mn_type));
351 		break;
352 	}
353 }
354 
355 /*
356  * Release a file number
357  */
358 static void
359 mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn)
360 {
361 	switch (mn->mn_type) {
362 	case mqfstype_root:
363 	case mqfstype_dir:
364 	case mqfstype_file:
365 	case mqfstype_symlink:
366 		free_unr(mi->mi_unrhdr, mn->mn_fileno);
367 		break;
368 	case mqfstype_this:
369 	case mqfstype_parent:
370 		/* ignore these, as they don't "own" their file number */
371 		break;
372 	default:
373 		KASSERT(0,
374 		    ("mqfs_fileno_free() called for unknown type node: %d",
375 			mn->mn_type));
376 		break;
377 	}
378 }
379 
380 static __inline struct mqfs_node *
381 mqnode_alloc(void)
382 {
383 	return uma_zalloc(mqnode_zone, M_WAITOK | M_ZERO);
384 }
385 
386 static __inline void
387 mqnode_free(struct mqfs_node *node)
388 {
389 	uma_zfree(mqnode_zone, node);
390 }
391 
392 static __inline void
393 mqnode_addref(struct mqfs_node *node)
394 {
395 	atomic_fetchadd_int(&node->mn_refcount, 1);
396 }
397 
398 static __inline void
399 mqnode_release(struct mqfs_node *node)
400 {
401 	struct mqfs_info *mqfs;
402 	int old, exp;
403 
404 	mqfs = node->mn_info;
405 	old = atomic_fetchadd_int(&node->mn_refcount, -1);
406 	if (node->mn_type == mqfstype_dir ||
407 	    node->mn_type == mqfstype_root)
408 		exp = 3; /* include . and .. */
409 	else
410 		exp = 1;
411 	if (old == exp) {
412 		int locked = sx_xlocked(&mqfs->mi_lock);
413 		if (!locked)
414 			sx_xlock(&mqfs->mi_lock);
415 		mqfs_destroy(node);
416 		if (!locked)
417 			sx_xunlock(&mqfs->mi_lock);
418 	}
419 }
420 
421 /*
422  * Add a node to a directory
423  */
424 static int
425 mqfs_add_node(struct mqfs_node *parent, struct mqfs_node *node)
426 {
427 	KASSERT(parent != NULL, ("%s(): parent is NULL", __func__));
428 	KASSERT(parent->mn_info != NULL,
429 	    ("%s(): parent has no mn_info", __func__));
430 	KASSERT(parent->mn_type == mqfstype_dir ||
431 	    parent->mn_type == mqfstype_root,
432 	    ("%s(): parent is not a directory", __func__));
433 
434 	node->mn_info = parent->mn_info;
435 	node->mn_parent = parent;
436 	LIST_INIT(&node->mn_children);
437 	LIST_INIT(&node->mn_vnodes);
438 	LIST_INSERT_HEAD(&parent->mn_children, node, mn_sibling);
439 	mqnode_addref(parent);
440 	return (0);
441 }
442 
443 static struct mqfs_node *
444 mqfs_create_node(const char *name, int namelen, struct ucred *cred, int mode,
445 	int nodetype)
446 {
447 	struct mqfs_node *node;
448 
449 	node = mqnode_alloc();
450 	strncpy(node->mn_name, name, namelen);
451 	node->mn_pr_root = cred->cr_prison->pr_root;
452 	node->mn_type = nodetype;
453 	node->mn_refcount = 1;
454 	vfs_timestamp(&node->mn_birth);
455 	node->mn_ctime = node->mn_atime = node->mn_mtime
456 		= node->mn_birth;
457 	node->mn_uid = cred->cr_uid;
458 	node->mn_gid = cred->cr_gid;
459 	node->mn_mode = mode;
460 	return (node);
461 }
462 
463 /*
464  * Create a file
465  */
466 static struct mqfs_node *
467 mqfs_create_file(struct mqfs_node *parent, const char *name, int namelen,
468 	struct ucred *cred, int mode)
469 {
470 	struct mqfs_node *node;
471 
472 	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_file);
473 	if (mqfs_add_node(parent, node) != 0) {
474 		mqnode_free(node);
475 		return (NULL);
476 	}
477 	return (node);
478 }
479 
480 /*
481  * Add . and .. to a directory
482  */
483 static int
484 mqfs_fixup_dir(struct mqfs_node *parent)
485 {
486 	struct mqfs_node *dir;
487 
488 	dir = mqnode_alloc();
489 	dir->mn_name[0] = '.';
490 	dir->mn_type = mqfstype_this;
491 	dir->mn_refcount = 1;
492 	if (mqfs_add_node(parent, dir) != 0) {
493 		mqnode_free(dir);
494 		return (-1);
495 	}
496 
497 	dir = mqnode_alloc();
498 	dir->mn_name[0] = dir->mn_name[1] = '.';
499 	dir->mn_type = mqfstype_parent;
500 	dir->mn_refcount = 1;
501 
502 	if (mqfs_add_node(parent, dir) != 0) {
503 		mqnode_free(dir);
504 		return (-1);
505 	}
506 
507 	return (0);
508 }
509 
510 #ifdef notyet
511 
512 /*
513  * Create a directory
514  */
515 static struct mqfs_node *
516 mqfs_create_dir(struct mqfs_node *parent, const char *name, int namelen,
517 	struct ucred *cred, int mode)
518 {
519 	struct mqfs_node *node;
520 
521 	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_dir);
522 	if (mqfs_add_node(parent, node) != 0) {
523 		mqnode_free(node);
524 		return (NULL);
525 	}
526 
527 	if (mqfs_fixup_dir(node) != 0) {
528 		mqfs_destroy(node);
529 		return (NULL);
530 	}
531 	return (node);
532 }
533 
534 /*
535  * Create a symlink
536  */
537 static struct mqfs_node *
538 mqfs_create_link(struct mqfs_node *parent, const char *name, int namelen,
539 	struct ucred *cred, int mode)
540 {
541 	struct mqfs_node *node;
542 
543 	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_symlink);
544 	if (mqfs_add_node(parent, node) != 0) {
545 		mqnode_free(node);
546 		return (NULL);
547 	}
548 	return (node);
549 }
550 
551 #endif
552 
553 /*
554  * Destroy a node or a tree of nodes
555  */
556 static int
557 mqfs_destroy(struct mqfs_node *node)
558 {
559 	struct mqfs_node *parent;
560 
561 	KASSERT(node != NULL,
562 	    ("%s(): node is NULL", __func__));
563 	KASSERT(node->mn_info != NULL,
564 	    ("%s(): node has no mn_info", __func__));
565 
566 	/* destroy children */
567 	if (node->mn_type == mqfstype_dir || node->mn_type == mqfstype_root)
568 		while (! LIST_EMPTY(&node->mn_children))
569 			mqfs_destroy(LIST_FIRST(&node->mn_children));
570 
571 	/* unlink from parent */
572 	if ((parent = node->mn_parent) != NULL) {
573 		KASSERT(parent->mn_info == node->mn_info,
574 		    ("%s(): parent has different mn_info", __func__));
575 		LIST_REMOVE(node, mn_sibling);
576 	}
577 
578 	if (node->mn_fileno != 0)
579 		mqfs_fileno_free(node->mn_info, node);
580 	if (node->mn_data != NULL)
581 		mqueue_free(node->mn_data);
582 	mqnode_free(node);
583 	return (0);
584 }
585 
586 /*
587  * Mount a mqfs instance
588  */
589 static int
590 mqfs_mount(struct mount *mp)
591 {
592 	struct statfs *sbp;
593 
594 	if (mp->mnt_flag & MNT_UPDATE)
595 		return (EOPNOTSUPP);
596 
597 	mp->mnt_data = &mqfs_data;
598 	MNT_ILOCK(mp);
599 	mp->mnt_flag |= MNT_LOCAL;
600 	MNT_IUNLOCK(mp);
601 	vfs_getnewfsid(mp);
602 
603 	sbp = &mp->mnt_stat;
604 	vfs_mountedfrom(mp, "mqueue");
605 	sbp->f_bsize = PAGE_SIZE;
606 	sbp->f_iosize = PAGE_SIZE;
607 	sbp->f_blocks = 1;
608 	sbp->f_bfree = 0;
609 	sbp->f_bavail = 0;
610 	sbp->f_files = 1;
611 	sbp->f_ffree = 0;
612 	return (0);
613 }
614 
615 /*
616  * Unmount a mqfs instance
617  */
618 static int
619 mqfs_unmount(struct mount *mp, int mntflags)
620 {
621 	int error;
622 
623 	error = vflush(mp, 0, (mntflags & MNT_FORCE) ?  FORCECLOSE : 0,
624 	    curthread);
625 	return (error);
626 }
627 
628 /*
629  * Return a root vnode
630  */
631 static int
632 mqfs_root(struct mount *mp, int flags, struct vnode **vpp)
633 {
634 	struct mqfs_info *mqfs;
635 	int ret;
636 
637 	mqfs = VFSTOMQFS(mp);
638 	ret = mqfs_allocv(mp, vpp, mqfs->mi_root);
639 	return (ret);
640 }
641 
642 /*
643  * Return filesystem stats
644  */
645 static int
646 mqfs_statfs(struct mount *mp, struct statfs *sbp)
647 {
648 	/* XXX update statistics */
649 	return (0);
650 }
651 
652 /*
653  * Initialize a mqfs instance
654  */
655 static int
656 mqfs_init(struct vfsconf *vfc)
657 {
658 	struct mqfs_node *root;
659 	struct mqfs_info *mi;
660 	osd_method_t methods[PR_MAXMETHOD] = {
661 	    [PR_METHOD_REMOVE] = mqfs_prison_remove,
662 	};
663 
664 	mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node),
665 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
666 	mqueue_zone = uma_zcreate("mqueue", sizeof(struct mqueue),
667 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
668 	mvdata_zone = uma_zcreate("mvdata",
669 		sizeof(struct mqfs_vdata), NULL, NULL, NULL,
670 		NULL, UMA_ALIGN_PTR, 0);
671 	mqnoti_zone = uma_zcreate("mqnotifier", sizeof(struct mqueue_notifier),
672 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
673 	mi = &mqfs_data;
674 	sx_init(&mi->mi_lock, "mqfs lock");
675 	/* set up the root diretory */
676 	root = mqfs_create_node("/", 1, curthread->td_ucred, 01777,
677 		mqfstype_root);
678 	root->mn_info = mi;
679 	LIST_INIT(&root->mn_children);
680 	LIST_INIT(&root->mn_vnodes);
681 	mi->mi_root = root;
682 	mqfs_fileno_init(mi);
683 	mqfs_fileno_alloc(mi, root);
684 	mqfs_fixup_dir(root);
685 	exit_tag = EVENTHANDLER_REGISTER(process_exit, mq_proc_exit, NULL,
686 	    EVENTHANDLER_PRI_ANY);
687 	mq_fdclose = mqueue_fdclose;
688 	p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING);
689 	mqfs_osd_jail_slot = osd_jail_register(NULL, methods);
690 	return (0);
691 }
692 
693 /*
694  * Destroy a mqfs instance
695  */
696 static int
697 mqfs_uninit(struct vfsconf *vfc)
698 {
699 	struct mqfs_info *mi;
700 
701 	if (!unloadable)
702 		return (EOPNOTSUPP);
703 	osd_jail_deregister(mqfs_osd_jail_slot);
704 	EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
705 	mi = &mqfs_data;
706 	mqfs_destroy(mi->mi_root);
707 	mi->mi_root = NULL;
708 	mqfs_fileno_uninit(mi);
709 	sx_destroy(&mi->mi_lock);
710 	uma_zdestroy(mqnode_zone);
711 	uma_zdestroy(mqueue_zone);
712 	uma_zdestroy(mvdata_zone);
713 	uma_zdestroy(mqnoti_zone);
714 	return (0);
715 }
716 
717 /*
718  * task routine
719  */
720 static void
721 do_recycle(void *context, int pending __unused)
722 {
723 	struct vnode *vp = (struct vnode *)context;
724 
725 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
726 	vrecycle(vp);
727 	VOP_UNLOCK(vp, 0);
728 	vdrop(vp);
729 }
730 
731 /*
732  * Allocate a vnode
733  */
734 static int
735 mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn)
736 {
737 	struct mqfs_vdata *vd;
738 	struct mqfs_info  *mqfs;
739 	struct vnode *newvpp;
740 	int error;
741 
742 	mqfs = pn->mn_info;
743 	*vpp = NULL;
744 	sx_xlock(&mqfs->mi_lock);
745 	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
746 		if (vd->mv_vnode->v_mount == mp) {
747 			vhold(vd->mv_vnode);
748 			break;
749 		}
750 	}
751 
752 	if (vd != NULL) {
753 found:
754 		*vpp = vd->mv_vnode;
755 		sx_xunlock(&mqfs->mi_lock);
756 		error = vget(*vpp, LK_RETRY | LK_EXCLUSIVE, curthread);
757 		vdrop(*vpp);
758 		return (error);
759 	}
760 	sx_xunlock(&mqfs->mi_lock);
761 
762 	error = getnewvnode("mqueue", mp, &mqfs_vnodeops, &newvpp);
763 	if (error)
764 		return (error);
765 	vn_lock(newvpp, LK_EXCLUSIVE | LK_RETRY);
766 	error = insmntque(newvpp, mp);
767 	if (error != 0)
768 		return (error);
769 
770 	sx_xlock(&mqfs->mi_lock);
771 	/*
772 	 * Check if it has already been allocated
773 	 * while we were blocked.
774 	 */
775 	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
776 		if (vd->mv_vnode->v_mount == mp) {
777 			vhold(vd->mv_vnode);
778 			sx_xunlock(&mqfs->mi_lock);
779 
780 			vgone(newvpp);
781 			vput(newvpp);
782 			goto found;
783 		}
784 	}
785 
786 	*vpp = newvpp;
787 
788 	vd = uma_zalloc(mvdata_zone, M_WAITOK);
789 	(*vpp)->v_data = vd;
790 	vd->mv_vnode = *vpp;
791 	vd->mv_node = pn;
792 	TASK_INIT(&vd->mv_task, 0, do_recycle, *vpp);
793 	LIST_INSERT_HEAD(&pn->mn_vnodes, vd, mv_link);
794 	mqnode_addref(pn);
795 	switch (pn->mn_type) {
796 	case mqfstype_root:
797 		(*vpp)->v_vflag = VV_ROOT;
798 		/* fall through */
799 	case mqfstype_dir:
800 	case mqfstype_this:
801 	case mqfstype_parent:
802 		(*vpp)->v_type = VDIR;
803 		break;
804 	case mqfstype_file:
805 		(*vpp)->v_type = VREG;
806 		break;
807 	case mqfstype_symlink:
808 		(*vpp)->v_type = VLNK;
809 		break;
810 	case mqfstype_none:
811 		KASSERT(0, ("mqfs_allocf called for null node\n"));
812 	default:
813 		panic("%s has unexpected type: %d", pn->mn_name, pn->mn_type);
814 	}
815 	sx_xunlock(&mqfs->mi_lock);
816 	return (0);
817 }
818 
819 /*
820  * Search a directory entry
821  */
822 static struct mqfs_node *
823 mqfs_search(struct mqfs_node *pd, const char *name, int len, struct ucred *cred)
824 {
825 	struct mqfs_node *pn;
826 	const void *pr_root;
827 
828 	sx_assert(&pd->mn_info->mi_lock, SX_LOCKED);
829 	pr_root = cred->cr_prison->pr_root;
830 	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
831 		/* Only match names within the same prison root directory */
832 		if ((pn->mn_pr_root == NULL || pn->mn_pr_root == pr_root) &&
833 		    strncmp(pn->mn_name, name, len) == 0 &&
834 		    pn->mn_name[len] == '\0')
835 			return (pn);
836 	}
837 	return (NULL);
838 }
839 
840 /*
841  * Look up a file or directory.
842  */
843 static int
844 mqfs_lookupx(struct vop_cachedlookup_args *ap)
845 {
846 	struct componentname *cnp;
847 	struct vnode *dvp, **vpp;
848 	struct mqfs_node *pd;
849 	struct mqfs_node *pn;
850 	struct mqfs_info *mqfs;
851 	int nameiop, flags, error, namelen;
852 	char *pname;
853 	struct thread *td;
854 
855 	cnp = ap->a_cnp;
856 	vpp = ap->a_vpp;
857 	dvp = ap->a_dvp;
858 	pname = cnp->cn_nameptr;
859 	namelen = cnp->cn_namelen;
860 	td = cnp->cn_thread;
861 	flags = cnp->cn_flags;
862 	nameiop = cnp->cn_nameiop;
863 	pd = VTON(dvp);
864 	pn = NULL;
865 	mqfs = pd->mn_info;
866 	*vpp = NULLVP;
867 
868 	if (dvp->v_type != VDIR)
869 		return (ENOTDIR);
870 
871 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread);
872 	if (error)
873 		return (error);
874 
875 	/* shortcut: check if the name is too long */
876 	if (cnp->cn_namelen >= MQFS_NAMELEN)
877 		return (ENOENT);
878 
879 	/* self */
880 	if (namelen == 1 && pname[0] == '.') {
881 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
882 			return (EINVAL);
883 		pn = pd;
884 		*vpp = dvp;
885 		VREF(dvp);
886 		return (0);
887 	}
888 
889 	/* parent */
890 	if (cnp->cn_flags & ISDOTDOT) {
891 		if (dvp->v_vflag & VV_ROOT)
892 			return (EIO);
893 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
894 			return (EINVAL);
895 		VOP_UNLOCK(dvp, 0);
896 		KASSERT(pd->mn_parent, ("non-root directory has no parent"));
897 		pn = pd->mn_parent;
898 		error = mqfs_allocv(dvp->v_mount, vpp, pn);
899 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
900 		return (error);
901 	}
902 
903 	/* named node */
904 	sx_xlock(&mqfs->mi_lock);
905 	pn = mqfs_search(pd, pname, namelen, cnp->cn_cred);
906 	if (pn != NULL)
907 		mqnode_addref(pn);
908 	sx_xunlock(&mqfs->mi_lock);
909 
910 	/* found */
911 	if (pn != NULL) {
912 		/* DELETE */
913 		if (nameiop == DELETE && (flags & ISLASTCN)) {
914 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
915 			if (error) {
916 				mqnode_release(pn);
917 				return (error);
918 			}
919 			if (*vpp == dvp) {
920 				VREF(dvp);
921 				*vpp = dvp;
922 				mqnode_release(pn);
923 				return (0);
924 			}
925 		}
926 
927 		/* allocate vnode */
928 		error = mqfs_allocv(dvp->v_mount, vpp, pn);
929 		mqnode_release(pn);
930 		if (error == 0 && cnp->cn_flags & MAKEENTRY)
931 			cache_enter(dvp, *vpp, cnp);
932 		return (error);
933 	}
934 
935 	/* not found */
936 
937 	/* will create a new entry in the directory ? */
938 	if ((nameiop == CREATE || nameiop == RENAME) && (flags & LOCKPARENT)
939 	    && (flags & ISLASTCN)) {
940 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
941 		if (error)
942 			return (error);
943 		cnp->cn_flags |= SAVENAME;
944 		return (EJUSTRETURN);
945 	}
946 	return (ENOENT);
947 }
948 
949 #if 0
950 struct vop_lookup_args {
951 	struct vop_generic_args a_gen;
952 	struct vnode *a_dvp;
953 	struct vnode **a_vpp;
954 	struct componentname *a_cnp;
955 };
956 #endif
957 
958 /*
959  * vnode lookup operation
960  */
961 static int
962 mqfs_lookup(struct vop_cachedlookup_args *ap)
963 {
964 	int rc;
965 
966 	rc = mqfs_lookupx(ap);
967 	return (rc);
968 }
969 
970 #if 0
971 struct vop_create_args {
972 	struct vnode *a_dvp;
973 	struct vnode **a_vpp;
974 	struct componentname *a_cnp;
975 	struct vattr *a_vap;
976 };
977 #endif
978 
979 /*
980  * vnode creation operation
981  */
982 static int
983 mqfs_create(struct vop_create_args *ap)
984 {
985 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
986 	struct componentname *cnp = ap->a_cnp;
987 	struct mqfs_node *pd;
988 	struct mqfs_node *pn;
989 	struct mqueue *mq;
990 	int error;
991 
992 	pd = VTON(ap->a_dvp);
993 	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
994 		return (ENOTDIR);
995 	mq = mqueue_alloc(NULL);
996 	if (mq == NULL)
997 		return (EAGAIN);
998 	sx_xlock(&mqfs->mi_lock);
999 	if ((cnp->cn_flags & HASBUF) == 0)
1000 		panic("%s: no name", __func__);
1001 	pn = mqfs_create_file(pd, cnp->cn_nameptr, cnp->cn_namelen,
1002 		cnp->cn_cred, ap->a_vap->va_mode);
1003 	if (pn == NULL) {
1004 		sx_xunlock(&mqfs->mi_lock);
1005 		error = ENOSPC;
1006 	} else {
1007 		mqnode_addref(pn);
1008 		sx_xunlock(&mqfs->mi_lock);
1009 		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1010 		mqnode_release(pn);
1011 		if (error)
1012 			mqfs_destroy(pn);
1013 		else
1014 			pn->mn_data = mq;
1015 	}
1016 	if (error)
1017 		mqueue_free(mq);
1018 	return (error);
1019 }
1020 
1021 /*
1022  * Remove an entry
1023  */
1024 static
1025 int do_unlink(struct mqfs_node *pn, struct ucred *ucred)
1026 {
1027 	struct mqfs_node *parent;
1028 	struct mqfs_vdata *vd;
1029 	int error = 0;
1030 
1031 	sx_assert(&pn->mn_info->mi_lock, SX_LOCKED);
1032 
1033 	if (ucred->cr_uid != pn->mn_uid &&
1034 	    (error = priv_check_cred(ucred, PRIV_MQ_ADMIN, 0)) != 0)
1035 		error = EACCES;
1036 	else if (!pn->mn_deleted) {
1037 		parent = pn->mn_parent;
1038 		pn->mn_parent = NULL;
1039 		pn->mn_deleted = 1;
1040 		LIST_REMOVE(pn, mn_sibling);
1041 		LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
1042 			cache_purge(vd->mv_vnode);
1043 			vhold(vd->mv_vnode);
1044 			taskqueue_enqueue(taskqueue_thread, &vd->mv_task);
1045 		}
1046 		mqnode_release(pn);
1047 		mqnode_release(parent);
1048 	} else
1049 		error = ENOENT;
1050 	return (error);
1051 }
1052 
1053 #if 0
1054 struct vop_remove_args {
1055 	struct vnode *a_dvp;
1056 	struct vnode *a_vp;
1057 	struct componentname *a_cnp;
1058 };
1059 #endif
1060 
1061 /*
1062  * vnode removal operation
1063  */
1064 static int
1065 mqfs_remove(struct vop_remove_args *ap)
1066 {
1067 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1068 	struct mqfs_node *pn;
1069 	int error;
1070 
1071 	if (ap->a_vp->v_type == VDIR)
1072                 return (EPERM);
1073 	pn = VTON(ap->a_vp);
1074 	sx_xlock(&mqfs->mi_lock);
1075 	error = do_unlink(pn, ap->a_cnp->cn_cred);
1076 	sx_xunlock(&mqfs->mi_lock);
1077 	return (error);
1078 }
1079 
1080 #if 0
1081 struct vop_inactive_args {
1082 	struct vnode *a_vp;
1083 	struct thread *a_td;
1084 };
1085 #endif
1086 
1087 static int
1088 mqfs_inactive(struct vop_inactive_args *ap)
1089 {
1090 	struct mqfs_node *pn = VTON(ap->a_vp);
1091 
1092 	if (pn->mn_deleted)
1093 		vrecycle(ap->a_vp);
1094 	return (0);
1095 }
1096 
1097 #if 0
1098 struct vop_reclaim_args {
1099 	struct vop_generic_args a_gen;
1100 	struct vnode *a_vp;
1101 	struct thread *a_td;
1102 };
1103 #endif
1104 
1105 static int
1106 mqfs_reclaim(struct vop_reclaim_args *ap)
1107 {
1108 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_vp->v_mount);
1109 	struct vnode *vp = ap->a_vp;
1110 	struct mqfs_node *pn;
1111 	struct mqfs_vdata *vd;
1112 
1113 	vd = vp->v_data;
1114 	pn = vd->mv_node;
1115 	sx_xlock(&mqfs->mi_lock);
1116 	vp->v_data = NULL;
1117 	LIST_REMOVE(vd, mv_link);
1118 	uma_zfree(mvdata_zone, vd);
1119 	mqnode_release(pn);
1120 	sx_xunlock(&mqfs->mi_lock);
1121 	return (0);
1122 }
1123 
1124 #if 0
1125 struct vop_open_args {
1126 	struct vop_generic_args a_gen;
1127 	struct vnode *a_vp;
1128 	int a_mode;
1129 	struct ucred *a_cred;
1130 	struct thread *a_td;
1131 	struct file *a_fp;
1132 };
1133 #endif
1134 
1135 static int
1136 mqfs_open(struct vop_open_args *ap)
1137 {
1138 	return (0);
1139 }
1140 
1141 #if 0
1142 struct vop_close_args {
1143 	struct vop_generic_args a_gen;
1144 	struct vnode *a_vp;
1145 	int a_fflag;
1146 	struct ucred *a_cred;
1147 	struct thread *a_td;
1148 };
1149 #endif
1150 
1151 static int
1152 mqfs_close(struct vop_close_args *ap)
1153 {
1154 	return (0);
1155 }
1156 
1157 #if 0
1158 struct vop_access_args {
1159 	struct vop_generic_args a_gen;
1160 	struct vnode *a_vp;
1161 	accmode_t a_accmode;
1162 	struct ucred *a_cred;
1163 	struct thread *a_td;
1164 };
1165 #endif
1166 
1167 /*
1168  * Verify permissions
1169  */
1170 static int
1171 mqfs_access(struct vop_access_args *ap)
1172 {
1173 	struct vnode *vp = ap->a_vp;
1174 	struct vattr vattr;
1175 	int error;
1176 
1177 	error = VOP_GETATTR(vp, &vattr, ap->a_cred);
1178 	if (error)
1179 		return (error);
1180 	error = vaccess(vp->v_type, vattr.va_mode, vattr.va_uid,
1181 	    vattr.va_gid, ap->a_accmode, ap->a_cred, NULL);
1182 	return (error);
1183 }
1184 
1185 #if 0
1186 struct vop_getattr_args {
1187 	struct vop_generic_args a_gen;
1188 	struct vnode *a_vp;
1189 	struct vattr *a_vap;
1190 	struct ucred *a_cred;
1191 };
1192 #endif
1193 
1194 /*
1195  * Get file attributes
1196  */
1197 static int
1198 mqfs_getattr(struct vop_getattr_args *ap)
1199 {
1200 	struct vnode *vp = ap->a_vp;
1201 	struct mqfs_node *pn = VTON(vp);
1202 	struct vattr *vap = ap->a_vap;
1203 	int error = 0;
1204 
1205 	vap->va_type = vp->v_type;
1206 	vap->va_mode = pn->mn_mode;
1207 	vap->va_nlink = 1;
1208 	vap->va_uid = pn->mn_uid;
1209 	vap->va_gid = pn->mn_gid;
1210 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
1211 	vap->va_fileid = pn->mn_fileno;
1212 	vap->va_size = 0;
1213 	vap->va_blocksize = PAGE_SIZE;
1214 	vap->va_bytes = vap->va_size = 0;
1215 	vap->va_atime = pn->mn_atime;
1216 	vap->va_mtime = pn->mn_mtime;
1217 	vap->va_ctime = pn->mn_ctime;
1218 	vap->va_birthtime = pn->mn_birth;
1219 	vap->va_gen = 0;
1220 	vap->va_flags = 0;
1221 	vap->va_rdev = NODEV;
1222 	vap->va_bytes = 0;
1223 	vap->va_filerev = 0;
1224 	return (error);
1225 }
1226 
1227 #if 0
1228 struct vop_setattr_args {
1229 	struct vop_generic_args a_gen;
1230 	struct vnode *a_vp;
1231 	struct vattr *a_vap;
1232 	struct ucred *a_cred;
1233 };
1234 #endif
1235 /*
1236  * Set attributes
1237  */
1238 static int
1239 mqfs_setattr(struct vop_setattr_args *ap)
1240 {
1241 	struct mqfs_node *pn;
1242 	struct vattr *vap;
1243 	struct vnode *vp;
1244 	struct thread *td;
1245 	int c, error;
1246 	uid_t uid;
1247 	gid_t gid;
1248 
1249 	td = curthread;
1250 	vap = ap->a_vap;
1251 	vp = ap->a_vp;
1252 	if ((vap->va_type != VNON) ||
1253 	    (vap->va_nlink != VNOVAL) ||
1254 	    (vap->va_fsid != VNOVAL) ||
1255 	    (vap->va_fileid != VNOVAL) ||
1256 	    (vap->va_blocksize != VNOVAL) ||
1257 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1258 	    (vap->va_rdev != VNOVAL) ||
1259 	    ((int)vap->va_bytes != VNOVAL) ||
1260 	    (vap->va_gen != VNOVAL)) {
1261 		return (EINVAL);
1262 	}
1263 
1264 	pn = VTON(vp);
1265 
1266 	error = c = 0;
1267 	if (vap->va_uid == (uid_t)VNOVAL)
1268 		uid = pn->mn_uid;
1269 	else
1270 		uid = vap->va_uid;
1271 	if (vap->va_gid == (gid_t)VNOVAL)
1272 		gid = pn->mn_gid;
1273 	else
1274 		gid = vap->va_gid;
1275 
1276 	if (uid != pn->mn_uid || gid != pn->mn_gid) {
1277 		/*
1278 		 * To modify the ownership of a file, must possess VADMIN
1279 		 * for that file.
1280 		 */
1281 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)))
1282 			return (error);
1283 
1284 		/*
1285 		 * XXXRW: Why is there a privilege check here: shouldn't the
1286 		 * check in VOP_ACCESS() be enough?  Also, are the group bits
1287 		 * below definitely right?
1288 		 */
1289 		if (((ap->a_cred->cr_uid != pn->mn_uid) || uid != pn->mn_uid ||
1290 		    (gid != pn->mn_gid && !groupmember(gid, ap->a_cred))) &&
1291 		    (error = priv_check(td, PRIV_MQ_ADMIN)) != 0)
1292 			return (error);
1293 		pn->mn_uid = uid;
1294 		pn->mn_gid = gid;
1295 		c = 1;
1296 	}
1297 
1298 	if (vap->va_mode != (mode_t)VNOVAL) {
1299 		if ((ap->a_cred->cr_uid != pn->mn_uid) &&
1300 		    (error = priv_check(td, PRIV_MQ_ADMIN)))
1301 			return (error);
1302 		pn->mn_mode = vap->va_mode;
1303 		c = 1;
1304 	}
1305 
1306 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1307 		/* See the comment in ufs_vnops::ufs_setattr(). */
1308 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)) &&
1309 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1310 		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td))))
1311 			return (error);
1312 		if (vap->va_atime.tv_sec != VNOVAL) {
1313 			pn->mn_atime = vap->va_atime;
1314 		}
1315 		if (vap->va_mtime.tv_sec != VNOVAL) {
1316 			pn->mn_mtime = vap->va_mtime;
1317 		}
1318 		c = 1;
1319 	}
1320 	if (c) {
1321 		vfs_timestamp(&pn->mn_ctime);
1322 	}
1323 	return (0);
1324 }
1325 
1326 #if 0
1327 struct vop_read_args {
1328 	struct vop_generic_args a_gen;
1329 	struct vnode *a_vp;
1330 	struct uio *a_uio;
1331 	int a_ioflag;
1332 	struct ucred *a_cred;
1333 };
1334 #endif
1335 
1336 /*
1337  * Read from a file
1338  */
1339 static int
1340 mqfs_read(struct vop_read_args *ap)
1341 {
1342 	char buf[80];
1343 	struct vnode *vp = ap->a_vp;
1344 	struct uio *uio = ap->a_uio;
1345 	struct mqfs_node *pn;
1346 	struct mqueue *mq;
1347 	int len, error;
1348 
1349 	if (vp->v_type != VREG)
1350 		return (EINVAL);
1351 
1352 	pn = VTON(vp);
1353 	mq = VTOMQ(vp);
1354 	snprintf(buf, sizeof(buf),
1355 		"QSIZE:%-10ld MAXMSG:%-10ld CURMSG:%-10ld MSGSIZE:%-10ld\n",
1356 		mq->mq_totalbytes,
1357 		mq->mq_maxmsg,
1358 		mq->mq_curmsgs,
1359 		mq->mq_msgsize);
1360 	buf[sizeof(buf)-1] = '\0';
1361 	len = strlen(buf);
1362 	error = uiomove_frombuf(buf, len, uio);
1363 	return (error);
1364 }
1365 
1366 #if 0
1367 struct vop_readdir_args {
1368 	struct vop_generic_args a_gen;
1369 	struct vnode *a_vp;
1370 	struct uio *a_uio;
1371 	struct ucred *a_cred;
1372 	int *a_eofflag;
1373 	int *a_ncookies;
1374 	u_long **a_cookies;
1375 };
1376 #endif
1377 
1378 /*
1379  * Return directory entries.
1380  */
1381 static int
1382 mqfs_readdir(struct vop_readdir_args *ap)
1383 {
1384 	struct vnode *vp;
1385 	struct mqfs_info *mi;
1386 	struct mqfs_node *pd;
1387 	struct mqfs_node *pn;
1388 	struct dirent entry;
1389 	struct uio *uio;
1390 	const void *pr_root;
1391 	int *tmp_ncookies = NULL;
1392 	off_t offset;
1393 	int error, i;
1394 
1395 	vp = ap->a_vp;
1396 	mi = VFSTOMQFS(vp->v_mount);
1397 	pd = VTON(vp);
1398 	uio = ap->a_uio;
1399 
1400 	if (vp->v_type != VDIR)
1401 		return (ENOTDIR);
1402 
1403 	if (uio->uio_offset < 0)
1404 		return (EINVAL);
1405 
1406 	if (ap->a_ncookies != NULL) {
1407 		tmp_ncookies = ap->a_ncookies;
1408 		*ap->a_ncookies = 0;
1409 		ap->a_ncookies = NULL;
1410         }
1411 
1412 	error = 0;
1413 	offset = 0;
1414 
1415 	pr_root = ap->a_cred->cr_prison->pr_root;
1416 	sx_xlock(&mi->mi_lock);
1417 
1418 	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
1419 		entry.d_reclen = sizeof(entry);
1420 
1421 		/*
1422 		 * Only show names within the same prison root directory
1423 		 * (or not associated with a prison, e.g. "." and "..").
1424 		 */
1425 		if (pn->mn_pr_root != NULL && pn->mn_pr_root != pr_root)
1426 			continue;
1427 		if (!pn->mn_fileno)
1428 			mqfs_fileno_alloc(mi, pn);
1429 		entry.d_fileno = pn->mn_fileno;
1430 		for (i = 0; i < MQFS_NAMELEN - 1 && pn->mn_name[i] != '\0'; ++i)
1431 			entry.d_name[i] = pn->mn_name[i];
1432 		entry.d_name[i] = 0;
1433 		entry.d_namlen = i;
1434 		switch (pn->mn_type) {
1435 		case mqfstype_root:
1436 		case mqfstype_dir:
1437 		case mqfstype_this:
1438 		case mqfstype_parent:
1439 			entry.d_type = DT_DIR;
1440 			break;
1441 		case mqfstype_file:
1442 			entry.d_type = DT_REG;
1443 			break;
1444 		case mqfstype_symlink:
1445 			entry.d_type = DT_LNK;
1446 			break;
1447 		default:
1448 			panic("%s has unexpected node type: %d", pn->mn_name,
1449 				pn->mn_type);
1450 		}
1451 		if (entry.d_reclen > uio->uio_resid)
1452                         break;
1453 		if (offset >= uio->uio_offset) {
1454 			error = vfs_read_dirent(ap, &entry, offset);
1455                         if (error)
1456                                 break;
1457                 }
1458                 offset += entry.d_reclen;
1459 	}
1460 	sx_xunlock(&mi->mi_lock);
1461 
1462 	uio->uio_offset = offset;
1463 
1464 	if (tmp_ncookies != NULL)
1465 		ap->a_ncookies = tmp_ncookies;
1466 
1467 	return (error);
1468 }
1469 
1470 #ifdef notyet
1471 
1472 #if 0
1473 struct vop_mkdir_args {
1474 	struct vnode *a_dvp;
1475 	struvt vnode **a_vpp;
1476 	struvt componentname *a_cnp;
1477 	struct vattr *a_vap;
1478 };
1479 #endif
1480 
1481 /*
1482  * Create a directory.
1483  */
1484 static int
1485 mqfs_mkdir(struct vop_mkdir_args *ap)
1486 {
1487 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1488 	struct componentname *cnp = ap->a_cnp;
1489 	struct mqfs_node *pd = VTON(ap->a_dvp);
1490 	struct mqfs_node *pn;
1491 	int error;
1492 
1493 	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
1494 		return (ENOTDIR);
1495 	sx_xlock(&mqfs->mi_lock);
1496 	if ((cnp->cn_flags & HASBUF) == 0)
1497 		panic("%s: no name", __func__);
1498 	pn = mqfs_create_dir(pd, cnp->cn_nameptr, cnp->cn_namelen,
1499 		ap->a_vap->cn_cred, ap->a_vap->va_mode);
1500 	if (pn != NULL)
1501 		mqnode_addref(pn);
1502 	sx_xunlock(&mqfs->mi_lock);
1503 	if (pn == NULL) {
1504 		error = ENOSPC;
1505 	} else {
1506 		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1507 		mqnode_release(pn);
1508 	}
1509 	return (error);
1510 }
1511 
1512 #if 0
1513 struct vop_rmdir_args {
1514 	struct vnode *a_dvp;
1515 	struct vnode *a_vp;
1516 	struct componentname *a_cnp;
1517 };
1518 #endif
1519 
1520 /*
1521  * Remove a directory.
1522  */
1523 static int
1524 mqfs_rmdir(struct vop_rmdir_args *ap)
1525 {
1526 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1527 	struct mqfs_node *pn = VTON(ap->a_vp);
1528 	struct mqfs_node *pt;
1529 
1530 	if (pn->mn_type != mqfstype_dir)
1531 		return (ENOTDIR);
1532 
1533 	sx_xlock(&mqfs->mi_lock);
1534 	if (pn->mn_deleted) {
1535 		sx_xunlock(&mqfs->mi_lock);
1536 		return (ENOENT);
1537 	}
1538 
1539 	pt = LIST_FIRST(&pn->mn_children);
1540 	pt = LIST_NEXT(pt, mn_sibling);
1541 	pt = LIST_NEXT(pt, mn_sibling);
1542 	if (pt != NULL) {
1543 		sx_xunlock(&mqfs->mi_lock);
1544 		return (ENOTEMPTY);
1545 	}
1546 	pt = pn->mn_parent;
1547 	pn->mn_parent = NULL;
1548 	pn->mn_deleted = 1;
1549 	LIST_REMOVE(pn, mn_sibling);
1550 	mqnode_release(pn);
1551 	mqnode_release(pt);
1552 	sx_xunlock(&mqfs->mi_lock);
1553 	cache_purge(ap->a_vp);
1554 	return (0);
1555 }
1556 
1557 #endif /* notyet */
1558 
1559 /*
1560  * See if this prison root is obsolete, and clean up associated queues if it is.
1561  */
1562 static int
1563 mqfs_prison_remove(void *obj, void *data __unused)
1564 {
1565 	const struct prison *pr = obj;
1566 	const struct prison *tpr;
1567 	struct mqfs_node *pn, *tpn;
1568 	int found;
1569 
1570 	found = 0;
1571 	TAILQ_FOREACH(tpr, &allprison, pr_list) {
1572 		if (tpr->pr_root == pr->pr_root && tpr != pr && tpr->pr_ref > 0)
1573 			found = 1;
1574 	}
1575 	if (!found) {
1576 		/*
1577 		 * No jails are rooted in this directory anymore,
1578 		 * so no queues should be either.
1579 		 */
1580 		sx_xlock(&mqfs_data.mi_lock);
1581 		LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
1582 		    mn_sibling, tpn) {
1583 			if (pn->mn_pr_root == pr->pr_root)
1584 				(void)do_unlink(pn, curthread->td_ucred);
1585 		}
1586 		sx_xunlock(&mqfs_data.mi_lock);
1587 	}
1588 	return (0);
1589 }
1590 
1591 /*
1592  * Allocate a message queue
1593  */
1594 static struct mqueue *
1595 mqueue_alloc(const struct mq_attr *attr)
1596 {
1597 	struct mqueue *mq;
1598 
1599 	if (curmq >= maxmq)
1600 		return (NULL);
1601 	mq = uma_zalloc(mqueue_zone, M_WAITOK | M_ZERO);
1602 	TAILQ_INIT(&mq->mq_msgq);
1603 	if (attr != NULL) {
1604 		mq->mq_maxmsg = attr->mq_maxmsg;
1605 		mq->mq_msgsize = attr->mq_msgsize;
1606 	} else {
1607 		mq->mq_maxmsg = default_maxmsg;
1608 		mq->mq_msgsize = default_msgsize;
1609 	}
1610 	mtx_init(&mq->mq_mutex, "mqueue lock", NULL, MTX_DEF);
1611 	knlist_init_mtx(&mq->mq_rsel.si_note, &mq->mq_mutex);
1612 	knlist_init_mtx(&mq->mq_wsel.si_note, &mq->mq_mutex);
1613 	atomic_add_int(&curmq, 1);
1614 	return (mq);
1615 }
1616 
1617 /*
1618  * Destroy a message queue
1619  */
1620 static void
1621 mqueue_free(struct mqueue *mq)
1622 {
1623 	struct mqueue_msg *msg;
1624 
1625 	while ((msg = TAILQ_FIRST(&mq->mq_msgq)) != NULL) {
1626 		TAILQ_REMOVE(&mq->mq_msgq, msg, msg_link);
1627 		free(msg, M_MQUEUEDATA);
1628 	}
1629 
1630 	mtx_destroy(&mq->mq_mutex);
1631 	seldrain(&mq->mq_rsel);
1632 	seldrain(&mq->mq_wsel);
1633 	knlist_destroy(&mq->mq_rsel.si_note);
1634 	knlist_destroy(&mq->mq_wsel.si_note);
1635 	uma_zfree(mqueue_zone, mq);
1636 	atomic_add_int(&curmq, -1);
1637 }
1638 
1639 /*
1640  * Load a message from user space
1641  */
1642 static struct mqueue_msg *
1643 mqueue_loadmsg(const char *msg_ptr, size_t msg_size, int msg_prio)
1644 {
1645 	struct mqueue_msg *msg;
1646 	size_t len;
1647 	int error;
1648 
1649 	len = sizeof(struct mqueue_msg) + msg_size;
1650 	msg = malloc(len, M_MQUEUEDATA, M_WAITOK);
1651 	error = copyin(msg_ptr, ((char *)msg) + sizeof(struct mqueue_msg),
1652 	    msg_size);
1653 	if (error) {
1654 		free(msg, M_MQUEUEDATA);
1655 		msg = NULL;
1656 	} else {
1657 		msg->msg_size = msg_size;
1658 		msg->msg_prio = msg_prio;
1659 	}
1660 	return (msg);
1661 }
1662 
1663 /*
1664  * Save a message to user space
1665  */
1666 static int
1667 mqueue_savemsg(struct mqueue_msg *msg, char *msg_ptr, int *msg_prio)
1668 {
1669 	int error;
1670 
1671 	error = copyout(((char *)msg) + sizeof(*msg), msg_ptr,
1672 		msg->msg_size);
1673 	if (error == 0 && msg_prio != NULL)
1674 		error = copyout(&msg->msg_prio, msg_prio, sizeof(int));
1675 	return (error);
1676 }
1677 
1678 /*
1679  * Free a message's memory
1680  */
1681 static __inline void
1682 mqueue_freemsg(struct mqueue_msg *msg)
1683 {
1684 	free(msg, M_MQUEUEDATA);
1685 }
1686 
1687 /*
1688  * Send a message. if waitok is false, thread will not be
1689  * blocked if there is no data in queue, otherwise, absolute
1690  * time will be checked.
1691  */
1692 int
1693 mqueue_send(struct mqueue *mq, const char *msg_ptr,
1694 	size_t msg_len, unsigned msg_prio, int waitok,
1695 	const struct timespec *abs_timeout)
1696 {
1697 	struct mqueue_msg *msg;
1698 	struct timespec ts, ts2;
1699 	struct timeval tv;
1700 	int error;
1701 
1702 	if (msg_prio >= MQ_PRIO_MAX)
1703 		return (EINVAL);
1704 	if (msg_len > mq->mq_msgsize)
1705 		return (EMSGSIZE);
1706 	msg = mqueue_loadmsg(msg_ptr, msg_len, msg_prio);
1707 	if (msg == NULL)
1708 		return (EFAULT);
1709 
1710 	/* O_NONBLOCK case */
1711 	if (!waitok) {
1712 		error = _mqueue_send(mq, msg, -1);
1713 		if (error)
1714 			goto bad;
1715 		return (0);
1716 	}
1717 
1718 	/* we allow a null timeout (wait forever) */
1719 	if (abs_timeout == NULL) {
1720 		error = _mqueue_send(mq, msg, 0);
1721 		if (error)
1722 			goto bad;
1723 		return (0);
1724 	}
1725 
1726 	/* send it before checking time */
1727 	error = _mqueue_send(mq, msg, -1);
1728 	if (error == 0)
1729 		return (0);
1730 
1731 	if (error != EAGAIN)
1732 		goto bad;
1733 
1734 	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1735 		error = EINVAL;
1736 		goto bad;
1737 	}
1738 	for (;;) {
1739 		ts2 = *abs_timeout;
1740 		getnanotime(&ts);
1741 		timespecsub(&ts2, &ts);
1742 		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1743 			error = ETIMEDOUT;
1744 			break;
1745 		}
1746 		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1747 		error = _mqueue_send(mq, msg, tvtohz(&tv));
1748 		if (error != ETIMEDOUT)
1749 			break;
1750 	}
1751 	if (error == 0)
1752 		return (0);
1753 bad:
1754 	mqueue_freemsg(msg);
1755 	return (error);
1756 }
1757 
1758 /*
1759  * Common routine to send a message
1760  */
1761 static int
1762 _mqueue_send(struct mqueue *mq, struct mqueue_msg *msg, int timo)
1763 {
1764 	struct mqueue_msg *msg2;
1765 	int error = 0;
1766 
1767 	mtx_lock(&mq->mq_mutex);
1768 	while (mq->mq_curmsgs >= mq->mq_maxmsg && error == 0) {
1769 		if (timo < 0) {
1770 			mtx_unlock(&mq->mq_mutex);
1771 			return (EAGAIN);
1772 		}
1773 		mq->mq_senders++;
1774 		error = msleep(&mq->mq_senders, &mq->mq_mutex,
1775 			    PCATCH, "mqsend", timo);
1776 		mq->mq_senders--;
1777 		if (error == EAGAIN)
1778 			error = ETIMEDOUT;
1779 	}
1780 	if (mq->mq_curmsgs >= mq->mq_maxmsg) {
1781 		mtx_unlock(&mq->mq_mutex);
1782 		return (error);
1783 	}
1784 	error = 0;
1785 	if (TAILQ_EMPTY(&mq->mq_msgq)) {
1786 		TAILQ_INSERT_HEAD(&mq->mq_msgq, msg, msg_link);
1787 	} else {
1788 		if (msg->msg_prio <= TAILQ_LAST(&mq->mq_msgq, msgq)->msg_prio) {
1789 			TAILQ_INSERT_TAIL(&mq->mq_msgq, msg, msg_link);
1790 		} else {
1791 			TAILQ_FOREACH(msg2, &mq->mq_msgq, msg_link) {
1792 				if (msg2->msg_prio < msg->msg_prio)
1793 					break;
1794 			}
1795 			TAILQ_INSERT_BEFORE(msg2, msg, msg_link);
1796 		}
1797 	}
1798 	mq->mq_curmsgs++;
1799 	mq->mq_totalbytes += msg->msg_size;
1800 	if (mq->mq_receivers)
1801 		wakeup_one(&mq->mq_receivers);
1802 	else if (mq->mq_notifier != NULL)
1803 		mqueue_send_notification(mq);
1804 	if (mq->mq_flags & MQ_RSEL) {
1805 		mq->mq_flags &= ~MQ_RSEL;
1806 		selwakeup(&mq->mq_rsel);
1807 	}
1808 	KNOTE_LOCKED(&mq->mq_rsel.si_note, 0);
1809 	mtx_unlock(&mq->mq_mutex);
1810 	return (0);
1811 }
1812 
1813 /*
1814  * Send realtime a signal to process which registered itself
1815  * successfully by mq_notify.
1816  */
1817 static void
1818 mqueue_send_notification(struct mqueue *mq)
1819 {
1820 	struct mqueue_notifier *nt;
1821 	struct thread *td;
1822 	struct proc *p;
1823 	int error;
1824 
1825 	mtx_assert(&mq->mq_mutex, MA_OWNED);
1826 	nt = mq->mq_notifier;
1827 	if (nt->nt_sigev.sigev_notify != SIGEV_NONE) {
1828 		p = nt->nt_proc;
1829 		error = sigev_findtd(p, &nt->nt_sigev, &td);
1830 		if (error) {
1831 			mq->mq_notifier = NULL;
1832 			return;
1833 		}
1834 		if (!KSI_ONQ(&nt->nt_ksi)) {
1835 			ksiginfo_set_sigev(&nt->nt_ksi, &nt->nt_sigev);
1836 			tdsendsignal(p, td, nt->nt_ksi.ksi_signo, &nt->nt_ksi);
1837 		}
1838 		PROC_UNLOCK(p);
1839 	}
1840 	mq->mq_notifier = NULL;
1841 }
1842 
1843 /*
1844  * Get a message. if waitok is false, thread will not be
1845  * blocked if there is no data in queue, otherwise, absolute
1846  * time will be checked.
1847  */
1848 int
1849 mqueue_receive(struct mqueue *mq, char *msg_ptr,
1850 	size_t msg_len, unsigned *msg_prio, int waitok,
1851 	const struct timespec *abs_timeout)
1852 {
1853 	struct mqueue_msg *msg;
1854 	struct timespec ts, ts2;
1855 	struct timeval tv;
1856 	int error;
1857 
1858 	if (msg_len < mq->mq_msgsize)
1859 		return (EMSGSIZE);
1860 
1861 	/* O_NONBLOCK case */
1862 	if (!waitok) {
1863 		error = _mqueue_recv(mq, &msg, -1);
1864 		if (error)
1865 			return (error);
1866 		goto received;
1867 	}
1868 
1869 	/* we allow a null timeout (wait forever). */
1870 	if (abs_timeout == NULL) {
1871 		error = _mqueue_recv(mq, &msg, 0);
1872 		if (error)
1873 			return (error);
1874 		goto received;
1875 	}
1876 
1877 	/* try to get a message before checking time */
1878 	error = _mqueue_recv(mq, &msg, -1);
1879 	if (error == 0)
1880 		goto received;
1881 
1882 	if (error != EAGAIN)
1883 		return (error);
1884 
1885 	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1886 		error = EINVAL;
1887 		return (error);
1888 	}
1889 
1890 	for (;;) {
1891 		ts2 = *abs_timeout;
1892 		getnanotime(&ts);
1893 		timespecsub(&ts2, &ts);
1894 		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1895 			error = ETIMEDOUT;
1896 			return (error);
1897 		}
1898 		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1899 		error = _mqueue_recv(mq, &msg, tvtohz(&tv));
1900 		if (error == 0)
1901 			break;
1902 		if (error != ETIMEDOUT)
1903 			return (error);
1904 	}
1905 
1906 received:
1907 	error = mqueue_savemsg(msg, msg_ptr, msg_prio);
1908 	if (error == 0) {
1909 		curthread->td_retval[0] = msg->msg_size;
1910 		curthread->td_retval[1] = 0;
1911 	}
1912 	mqueue_freemsg(msg);
1913 	return (error);
1914 }
1915 
1916 /*
1917  * Common routine to receive a message
1918  */
1919 static int
1920 _mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo)
1921 {
1922 	int error = 0;
1923 
1924 	mtx_lock(&mq->mq_mutex);
1925 	while ((*msg = TAILQ_FIRST(&mq->mq_msgq)) == NULL && error == 0) {
1926 		if (timo < 0) {
1927 			mtx_unlock(&mq->mq_mutex);
1928 			return (EAGAIN);
1929 		}
1930 		mq->mq_receivers++;
1931 		error = msleep(&mq->mq_receivers, &mq->mq_mutex,
1932 			    PCATCH, "mqrecv", timo);
1933 		mq->mq_receivers--;
1934 		if (error == EAGAIN)
1935 			error = ETIMEDOUT;
1936 	}
1937 	if (*msg != NULL) {
1938 		error = 0;
1939 		TAILQ_REMOVE(&mq->mq_msgq, *msg, msg_link);
1940 		mq->mq_curmsgs--;
1941 		mq->mq_totalbytes -= (*msg)->msg_size;
1942 		if (mq->mq_senders)
1943 			wakeup_one(&mq->mq_senders);
1944 		if (mq->mq_flags & MQ_WSEL) {
1945 			mq->mq_flags &= ~MQ_WSEL;
1946 			selwakeup(&mq->mq_wsel);
1947 		}
1948 		KNOTE_LOCKED(&mq->mq_wsel.si_note, 0);
1949 	}
1950 	if (mq->mq_notifier != NULL && mq->mq_receivers == 0 &&
1951 	    !TAILQ_EMPTY(&mq->mq_msgq)) {
1952 		mqueue_send_notification(mq);
1953 	}
1954 	mtx_unlock(&mq->mq_mutex);
1955 	return (error);
1956 }
1957 
1958 static __inline struct mqueue_notifier *
1959 notifier_alloc(void)
1960 {
1961 	return (uma_zalloc(mqnoti_zone, M_WAITOK | M_ZERO));
1962 }
1963 
1964 static __inline void
1965 notifier_free(struct mqueue_notifier *p)
1966 {
1967 	uma_zfree(mqnoti_zone, p);
1968 }
1969 
1970 static struct mqueue_notifier *
1971 notifier_search(struct proc *p, int fd)
1972 {
1973 	struct mqueue_notifier *nt;
1974 
1975 	LIST_FOREACH(nt, &p->p_mqnotifier, nt_link) {
1976 		if (nt->nt_ksi.ksi_mqd == fd)
1977 			break;
1978 	}
1979 	return (nt);
1980 }
1981 
1982 static __inline void
1983 notifier_insert(struct proc *p, struct mqueue_notifier *nt)
1984 {
1985 	LIST_INSERT_HEAD(&p->p_mqnotifier, nt, nt_link);
1986 }
1987 
1988 static __inline void
1989 notifier_delete(struct proc *p, struct mqueue_notifier *nt)
1990 {
1991 	LIST_REMOVE(nt, nt_link);
1992 	notifier_free(nt);
1993 }
1994 
1995 static void
1996 notifier_remove(struct proc *p, struct mqueue *mq, int fd)
1997 {
1998 	struct mqueue_notifier *nt;
1999 
2000 	mtx_assert(&mq->mq_mutex, MA_OWNED);
2001 	PROC_LOCK(p);
2002 	nt = notifier_search(p, fd);
2003 	if (nt != NULL) {
2004 		if (mq->mq_notifier == nt)
2005 			mq->mq_notifier = NULL;
2006 		sigqueue_take(&nt->nt_ksi);
2007 		notifier_delete(p, nt);
2008 	}
2009 	PROC_UNLOCK(p);
2010 }
2011 
2012 static int
2013 kern_kmq_open(struct thread *td, const char *upath, int flags, mode_t mode,
2014     const struct mq_attr *attr)
2015 {
2016 	char path[MQFS_NAMELEN + 1];
2017 	struct mqfs_node *pn;
2018 	struct filedesc *fdp;
2019 	struct file *fp;
2020 	struct mqueue *mq;
2021 	int fd, error, len, cmode;
2022 
2023 	AUDIT_ARG_FFLAGS(flags);
2024 	AUDIT_ARG_MODE(mode);
2025 
2026 	fdp = td->td_proc->p_fd;
2027 	cmode = (((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT);
2028 	mq = NULL;
2029 	if ((flags & O_CREAT) != 0 && attr != NULL) {
2030 		if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > maxmsg)
2031 			return (EINVAL);
2032 		if (attr->mq_msgsize <= 0 || attr->mq_msgsize > maxmsgsize)
2033 			return (EINVAL);
2034 	}
2035 
2036 	error = copyinstr(upath, path, MQFS_NAMELEN + 1, NULL);
2037         if (error)
2038 		return (error);
2039 
2040 	/*
2041 	 * The first character of name must be a slash  (/) character
2042 	 * and the remaining characters of name cannot include any slash
2043 	 * characters.
2044 	 */
2045 	len = strlen(path);
2046 	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
2047 		return (EINVAL);
2048 	AUDIT_ARG_UPATH1_CANON(path);
2049 
2050 	error = falloc(td, &fp, &fd, O_CLOEXEC);
2051 	if (error)
2052 		return (error);
2053 
2054 	sx_xlock(&mqfs_data.mi_lock);
2055 	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
2056 	if (pn == NULL) {
2057 		if (!(flags & O_CREAT)) {
2058 			error = ENOENT;
2059 		} else {
2060 			mq = mqueue_alloc(attr);
2061 			if (mq == NULL) {
2062 				error = ENFILE;
2063 			} else {
2064 				pn = mqfs_create_file(mqfs_data.mi_root,
2065 				         path + 1, len - 1, td->td_ucred,
2066 					 cmode);
2067 				if (pn == NULL) {
2068 					error = ENOSPC;
2069 					mqueue_free(mq);
2070 				}
2071 			}
2072 		}
2073 
2074 		if (error == 0) {
2075 			pn->mn_data = mq;
2076 		}
2077 	} else {
2078 		if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) {
2079 			error = EEXIST;
2080 		} else {
2081 			accmode_t accmode = 0;
2082 
2083 			if (flags & FREAD)
2084 				accmode |= VREAD;
2085 			if (flags & FWRITE)
2086 				accmode |= VWRITE;
2087 			error = vaccess(VREG, pn->mn_mode, pn->mn_uid,
2088 				    pn->mn_gid, accmode, td->td_ucred, NULL);
2089 		}
2090 	}
2091 
2092 	if (error) {
2093 		sx_xunlock(&mqfs_data.mi_lock);
2094 		fdclose(td, fp, fd);
2095 		fdrop(fp, td);
2096 		return (error);
2097 	}
2098 
2099 	mqnode_addref(pn);
2100 	sx_xunlock(&mqfs_data.mi_lock);
2101 
2102 	finit(fp, flags & (FREAD | FWRITE | O_NONBLOCK), DTYPE_MQUEUE, pn,
2103 	    &mqueueops);
2104 
2105 	td->td_retval[0] = fd;
2106 	fdrop(fp, td);
2107 	return (0);
2108 }
2109 
2110 /*
2111  * Syscall to open a message queue.
2112  */
2113 int
2114 sys_kmq_open(struct thread *td, struct kmq_open_args *uap)
2115 {
2116 	struct mq_attr attr;
2117 	int flags, error;
2118 
2119 	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
2120 		return (EINVAL);
2121 	flags = FFLAGS(uap->flags);
2122 	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2123 		error = copyin(uap->attr, &attr, sizeof(attr));
2124 		if (error)
2125 			return (error);
2126 	}
2127 	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2128 	    uap->attr != NULL ? &attr : NULL));
2129 }
2130 
2131 /*
2132  * Syscall to unlink a message queue.
2133  */
2134 int
2135 sys_kmq_unlink(struct thread *td, struct kmq_unlink_args *uap)
2136 {
2137 	char path[MQFS_NAMELEN+1];
2138 	struct mqfs_node *pn;
2139 	int error, len;
2140 
2141 	error = copyinstr(uap->path, path, MQFS_NAMELEN + 1, NULL);
2142         if (error)
2143 		return (error);
2144 
2145 	len = strlen(path);
2146 	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
2147 		return (EINVAL);
2148 	AUDIT_ARG_UPATH1_CANON(path);
2149 
2150 	sx_xlock(&mqfs_data.mi_lock);
2151 	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
2152 	if (pn != NULL)
2153 		error = do_unlink(pn, td->td_ucred);
2154 	else
2155 		error = ENOENT;
2156 	sx_xunlock(&mqfs_data.mi_lock);
2157 	return (error);
2158 }
2159 
2160 typedef int (*_fgetf)(struct thread *, int, cap_rights_t *, struct file **);
2161 
2162 /*
2163  * Get message queue by giving file slot
2164  */
2165 static int
2166 _getmq(struct thread *td, int fd, cap_rights_t *rightsp, _fgetf func,
2167        struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq)
2168 {
2169 	struct mqfs_node *pn;
2170 	int error;
2171 
2172 	error = func(td, fd, rightsp, fpp);
2173 	if (error)
2174 		return (error);
2175 	if (&mqueueops != (*fpp)->f_ops) {
2176 		fdrop(*fpp, td);
2177 		return (EBADF);
2178 	}
2179 	pn = (*fpp)->f_data;
2180 	if (ppn)
2181 		*ppn = pn;
2182 	if (pmq)
2183 		*pmq = pn->mn_data;
2184 	return (0);
2185 }
2186 
2187 static __inline int
2188 getmq(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn,
2189 	struct mqueue **pmq)
2190 {
2191 	cap_rights_t rights;
2192 
2193 	return _getmq(td, fd, cap_rights_init(&rights, CAP_EVENT), fget,
2194 	    fpp, ppn, pmq);
2195 }
2196 
2197 static __inline int
2198 getmq_read(struct thread *td, int fd, struct file **fpp,
2199 	 struct mqfs_node **ppn, struct mqueue **pmq)
2200 {
2201 	cap_rights_t rights;
2202 
2203 	return _getmq(td, fd, cap_rights_init(&rights, CAP_READ), fget_read,
2204 	    fpp, ppn, pmq);
2205 }
2206 
2207 static __inline int
2208 getmq_write(struct thread *td, int fd, struct file **fpp,
2209 	struct mqfs_node **ppn, struct mqueue **pmq)
2210 {
2211 	cap_rights_t rights;
2212 
2213 	return _getmq(td, fd, cap_rights_init(&rights, CAP_WRITE), fget_write,
2214 	    fpp, ppn, pmq);
2215 }
2216 
2217 static int
2218 kern_kmq_setattr(struct thread *td, int mqd, const struct mq_attr *attr,
2219     struct mq_attr *oattr)
2220 {
2221 	struct mqueue *mq;
2222 	struct file *fp;
2223 	u_int oflag, flag;
2224 	int error;
2225 
2226 	AUDIT_ARG_FD(mqd);
2227 	if (attr != NULL && (attr->mq_flags & ~O_NONBLOCK) != 0)
2228 		return (EINVAL);
2229 	error = getmq(td, mqd, &fp, NULL, &mq);
2230 	if (error)
2231 		return (error);
2232 	oattr->mq_maxmsg  = mq->mq_maxmsg;
2233 	oattr->mq_msgsize = mq->mq_msgsize;
2234 	oattr->mq_curmsgs = mq->mq_curmsgs;
2235 	if (attr != NULL) {
2236 		do {
2237 			oflag = flag = fp->f_flag;
2238 			flag &= ~O_NONBLOCK;
2239 			flag |= (attr->mq_flags & O_NONBLOCK);
2240 		} while (atomic_cmpset_int(&fp->f_flag, oflag, flag) == 0);
2241 	} else
2242 		oflag = fp->f_flag;
2243 	oattr->mq_flags = (O_NONBLOCK & oflag);
2244 	fdrop(fp, td);
2245 	return (error);
2246 }
2247 
2248 int
2249 sys_kmq_setattr(struct thread *td, struct kmq_setattr_args *uap)
2250 {
2251 	struct mq_attr attr, oattr;
2252 	int error;
2253 
2254 	if (uap->attr != NULL) {
2255 		error = copyin(uap->attr, &attr, sizeof(attr));
2256 		if (error != 0)
2257 			return (error);
2258 	}
2259 	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2260 	    &oattr);
2261 	if (error == 0 && uap->oattr != NULL) {
2262 		bzero(oattr.__reserved, sizeof(oattr.__reserved));
2263 		error = copyout(&oattr, uap->oattr, sizeof(oattr));
2264 	}
2265 	return (error);
2266 }
2267 
2268 int
2269 sys_kmq_timedreceive(struct thread *td, struct kmq_timedreceive_args *uap)
2270 {
2271 	struct mqueue *mq;
2272 	struct file *fp;
2273 	struct timespec *abs_timeout, ets;
2274 	int error;
2275 	int waitok;
2276 
2277 	AUDIT_ARG_FD(uap->mqd);
2278 	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2279 	if (error)
2280 		return (error);
2281 	if (uap->abs_timeout != NULL) {
2282 		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2283 		if (error != 0)
2284 			return (error);
2285 		abs_timeout = &ets;
2286 	} else
2287 		abs_timeout = NULL;
2288 	waitok = !(fp->f_flag & O_NONBLOCK);
2289 	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2290 		uap->msg_prio, waitok, abs_timeout);
2291 	fdrop(fp, td);
2292 	return (error);
2293 }
2294 
2295 int
2296 sys_kmq_timedsend(struct thread *td, struct kmq_timedsend_args *uap)
2297 {
2298 	struct mqueue *mq;
2299 	struct file *fp;
2300 	struct timespec *abs_timeout, ets;
2301 	int error, waitok;
2302 
2303 	AUDIT_ARG_FD(uap->mqd);
2304 	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2305 	if (error)
2306 		return (error);
2307 	if (uap->abs_timeout != NULL) {
2308 		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2309 		if (error != 0)
2310 			return (error);
2311 		abs_timeout = &ets;
2312 	} else
2313 		abs_timeout = NULL;
2314 	waitok = !(fp->f_flag & O_NONBLOCK);
2315 	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2316 		uap->msg_prio, waitok, abs_timeout);
2317 	fdrop(fp, td);
2318 	return (error);
2319 }
2320 
2321 static int
2322 kern_kmq_notify(struct thread *td, int mqd, struct sigevent *sigev)
2323 {
2324 #ifdef CAPABILITIES
2325 	cap_rights_t rights;
2326 #endif
2327 	struct filedesc *fdp;
2328 	struct proc *p;
2329 	struct mqueue *mq;
2330 	struct file *fp, *fp2;
2331 	struct mqueue_notifier *nt, *newnt = NULL;
2332 	int error;
2333 
2334 	AUDIT_ARG_FD(mqd);
2335 	if (sigev != NULL) {
2336 		if (sigev->sigev_notify != SIGEV_SIGNAL &&
2337 		    sigev->sigev_notify != SIGEV_THREAD_ID &&
2338 		    sigev->sigev_notify != SIGEV_NONE)
2339 			return (EINVAL);
2340 		if ((sigev->sigev_notify == SIGEV_SIGNAL ||
2341 		    sigev->sigev_notify == SIGEV_THREAD_ID) &&
2342 		    !_SIG_VALID(sigev->sigev_signo))
2343 			return (EINVAL);
2344 	}
2345 	p = td->td_proc;
2346 	fdp = td->td_proc->p_fd;
2347 	error = getmq(td, mqd, &fp, NULL, &mq);
2348 	if (error)
2349 		return (error);
2350 again:
2351 	FILEDESC_SLOCK(fdp);
2352 	fp2 = fget_locked(fdp, mqd);
2353 	if (fp2 == NULL) {
2354 		FILEDESC_SUNLOCK(fdp);
2355 		error = EBADF;
2356 		goto out;
2357 	}
2358 #ifdef CAPABILITIES
2359 	error = cap_check(cap_rights(fdp, mqd),
2360 	    cap_rights_init(&rights, CAP_EVENT));
2361 	if (error) {
2362 		FILEDESC_SUNLOCK(fdp);
2363 		goto out;
2364 	}
2365 #endif
2366 	if (fp2 != fp) {
2367 		FILEDESC_SUNLOCK(fdp);
2368 		error = EBADF;
2369 		goto out;
2370 	}
2371 	mtx_lock(&mq->mq_mutex);
2372 	FILEDESC_SUNLOCK(fdp);
2373 	if (sigev != NULL) {
2374 		if (mq->mq_notifier != NULL) {
2375 			error = EBUSY;
2376 		} else {
2377 			PROC_LOCK(p);
2378 			nt = notifier_search(p, mqd);
2379 			if (nt == NULL) {
2380 				if (newnt == NULL) {
2381 					PROC_UNLOCK(p);
2382 					mtx_unlock(&mq->mq_mutex);
2383 					newnt = notifier_alloc();
2384 					goto again;
2385 				}
2386 			}
2387 
2388 			if (nt != NULL) {
2389 				sigqueue_take(&nt->nt_ksi);
2390 				if (newnt != NULL) {
2391 					notifier_free(newnt);
2392 					newnt = NULL;
2393 				}
2394 			} else {
2395 				nt = newnt;
2396 				newnt = NULL;
2397 				ksiginfo_init(&nt->nt_ksi);
2398 				nt->nt_ksi.ksi_flags |= KSI_INS | KSI_EXT;
2399 				nt->nt_ksi.ksi_code = SI_MESGQ;
2400 				nt->nt_proc = p;
2401 				nt->nt_ksi.ksi_mqd = mqd;
2402 				notifier_insert(p, nt);
2403 			}
2404 			nt->nt_sigev = *sigev;
2405 			mq->mq_notifier = nt;
2406 			PROC_UNLOCK(p);
2407 			/*
2408 			 * if there is no receivers and message queue
2409 			 * is not empty, we should send notification
2410 			 * as soon as possible.
2411 			 */
2412 			if (mq->mq_receivers == 0 &&
2413 			    !TAILQ_EMPTY(&mq->mq_msgq))
2414 				mqueue_send_notification(mq);
2415 		}
2416 	} else {
2417 		notifier_remove(p, mq, mqd);
2418 	}
2419 	mtx_unlock(&mq->mq_mutex);
2420 
2421 out:
2422 	fdrop(fp, td);
2423 	if (newnt != NULL)
2424 		notifier_free(newnt);
2425 	return (error);
2426 }
2427 
2428 int
2429 sys_kmq_notify(struct thread *td, struct kmq_notify_args *uap)
2430 {
2431 	struct sigevent ev, *evp;
2432 	int error;
2433 
2434 	if (uap->sigev == NULL) {
2435 		evp = NULL;
2436 	} else {
2437 		error = copyin(uap->sigev, &ev, sizeof(ev));
2438 		if (error != 0)
2439 			return (error);
2440 		evp = &ev;
2441 	}
2442 	return (kern_kmq_notify(td, uap->mqd, evp));
2443 }
2444 
2445 static void
2446 mqueue_fdclose(struct thread *td, int fd, struct file *fp)
2447 {
2448 	struct filedesc *fdp;
2449 	struct mqueue *mq;
2450 
2451 	fdp = td->td_proc->p_fd;
2452 	FILEDESC_LOCK_ASSERT(fdp);
2453 
2454 	if (fp->f_ops == &mqueueops) {
2455 		mq = FPTOMQ(fp);
2456 		mtx_lock(&mq->mq_mutex);
2457 		notifier_remove(td->td_proc, mq, fd);
2458 
2459 		/* have to wakeup thread in same process */
2460 		if (mq->mq_flags & MQ_RSEL) {
2461 			mq->mq_flags &= ~MQ_RSEL;
2462 			selwakeup(&mq->mq_rsel);
2463 		}
2464 		if (mq->mq_flags & MQ_WSEL) {
2465 			mq->mq_flags &= ~MQ_WSEL;
2466 			selwakeup(&mq->mq_wsel);
2467 		}
2468 		mtx_unlock(&mq->mq_mutex);
2469 	}
2470 }
2471 
2472 static void
2473 mq_proc_exit(void *arg __unused, struct proc *p)
2474 {
2475 	struct filedesc *fdp;
2476 	struct file *fp;
2477 	struct mqueue *mq;
2478 	int i;
2479 
2480 	fdp = p->p_fd;
2481 	FILEDESC_SLOCK(fdp);
2482 	for (i = 0; i < fdp->fd_nfiles; ++i) {
2483 		fp = fget_locked(fdp, i);
2484 		if (fp != NULL && fp->f_ops == &mqueueops) {
2485 			mq = FPTOMQ(fp);
2486 			mtx_lock(&mq->mq_mutex);
2487 			notifier_remove(p, FPTOMQ(fp), i);
2488 			mtx_unlock(&mq->mq_mutex);
2489 		}
2490 	}
2491 	FILEDESC_SUNLOCK(fdp);
2492 	KASSERT(LIST_EMPTY(&p->p_mqnotifier), ("mq notifiers left"));
2493 }
2494 
2495 static int
2496 mqf_poll(struct file *fp, int events, struct ucred *active_cred,
2497 	struct thread *td)
2498 {
2499 	struct mqueue *mq = FPTOMQ(fp);
2500 	int revents = 0;
2501 
2502 	mtx_lock(&mq->mq_mutex);
2503 	if (events & (POLLIN | POLLRDNORM)) {
2504 		if (mq->mq_curmsgs) {
2505 			revents |= events & (POLLIN | POLLRDNORM);
2506 		} else {
2507 			mq->mq_flags |= MQ_RSEL;
2508 			selrecord(td, &mq->mq_rsel);
2509  		}
2510 	}
2511 	if (events & POLLOUT) {
2512 		if (mq->mq_curmsgs < mq->mq_maxmsg)
2513 			revents |= POLLOUT;
2514 		else {
2515 			mq->mq_flags |= MQ_WSEL;
2516 			selrecord(td, &mq->mq_wsel);
2517 		}
2518 	}
2519 	mtx_unlock(&mq->mq_mutex);
2520 	return (revents);
2521 }
2522 
2523 static int
2524 mqf_close(struct file *fp, struct thread *td)
2525 {
2526 	struct mqfs_node *pn;
2527 
2528 	fp->f_ops = &badfileops;
2529 	pn = fp->f_data;
2530 	fp->f_data = NULL;
2531 	sx_xlock(&mqfs_data.mi_lock);
2532 	mqnode_release(pn);
2533 	sx_xunlock(&mqfs_data.mi_lock);
2534 	return (0);
2535 }
2536 
2537 static int
2538 mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
2539 	struct thread *td)
2540 {
2541 	struct mqfs_node *pn = fp->f_data;
2542 
2543 	bzero(st, sizeof *st);
2544 	sx_xlock(&mqfs_data.mi_lock);
2545 	st->st_atim = pn->mn_atime;
2546 	st->st_mtim = pn->mn_mtime;
2547 	st->st_ctim = pn->mn_ctime;
2548 	st->st_birthtim = pn->mn_birth;
2549 	st->st_uid = pn->mn_uid;
2550 	st->st_gid = pn->mn_gid;
2551 	st->st_mode = S_IFIFO | pn->mn_mode;
2552 	sx_xunlock(&mqfs_data.mi_lock);
2553 	return (0);
2554 }
2555 
2556 static int
2557 mqf_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
2558     struct thread *td)
2559 {
2560 	struct mqfs_node *pn;
2561 	int error;
2562 
2563 	error = 0;
2564 	pn = fp->f_data;
2565 	sx_xlock(&mqfs_data.mi_lock);
2566 	error = vaccess(VREG, pn->mn_mode, pn->mn_uid, pn->mn_gid, VADMIN,
2567 	    active_cred, NULL);
2568 	if (error != 0)
2569 		goto out;
2570 	pn->mn_mode = mode & ACCESSPERMS;
2571 out:
2572 	sx_xunlock(&mqfs_data.mi_lock);
2573 	return (error);
2574 }
2575 
2576 static int
2577 mqf_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
2578     struct thread *td)
2579 {
2580 	struct mqfs_node *pn;
2581 	int error;
2582 
2583 	error = 0;
2584 	pn = fp->f_data;
2585 	sx_xlock(&mqfs_data.mi_lock);
2586 	if (uid == (uid_t)-1)
2587 		uid = pn->mn_uid;
2588 	if (gid == (gid_t)-1)
2589 		gid = pn->mn_gid;
2590 	if (((uid != pn->mn_uid && uid != active_cred->cr_uid) ||
2591 	    (gid != pn->mn_gid && !groupmember(gid, active_cred))) &&
2592 	    (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN, 0)))
2593 		goto out;
2594 	pn->mn_uid = uid;
2595 	pn->mn_gid = gid;
2596 out:
2597 	sx_xunlock(&mqfs_data.mi_lock);
2598 	return (error);
2599 }
2600 
2601 static int
2602 mqf_kqfilter(struct file *fp, struct knote *kn)
2603 {
2604 	struct mqueue *mq = FPTOMQ(fp);
2605 	int error = 0;
2606 
2607 	if (kn->kn_filter == EVFILT_READ) {
2608 		kn->kn_fop = &mq_rfiltops;
2609 		knlist_add(&mq->mq_rsel.si_note, kn, 0);
2610 	} else if (kn->kn_filter == EVFILT_WRITE) {
2611 		kn->kn_fop = &mq_wfiltops;
2612 		knlist_add(&mq->mq_wsel.si_note, kn, 0);
2613 	} else
2614 		error = EINVAL;
2615 	return (error);
2616 }
2617 
2618 static void
2619 filt_mqdetach(struct knote *kn)
2620 {
2621 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2622 
2623 	if (kn->kn_filter == EVFILT_READ)
2624 		knlist_remove(&mq->mq_rsel.si_note, kn, 0);
2625 	else if (kn->kn_filter == EVFILT_WRITE)
2626 		knlist_remove(&mq->mq_wsel.si_note, kn, 0);
2627 	else
2628 		panic("filt_mqdetach");
2629 }
2630 
2631 static int
2632 filt_mqread(struct knote *kn, long hint)
2633 {
2634 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2635 
2636 	mtx_assert(&mq->mq_mutex, MA_OWNED);
2637 	return (mq->mq_curmsgs != 0);
2638 }
2639 
2640 static int
2641 filt_mqwrite(struct knote *kn, long hint)
2642 {
2643 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2644 
2645 	mtx_assert(&mq->mq_mutex, MA_OWNED);
2646 	return (mq->mq_curmsgs < mq->mq_maxmsg);
2647 }
2648 
2649 static int
2650 mqf_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
2651 {
2652 
2653 	kif->kf_type = KF_TYPE_MQUEUE;
2654 	return (0);
2655 }
2656 
2657 static struct fileops mqueueops = {
2658 	.fo_read		= invfo_rdwr,
2659 	.fo_write		= invfo_rdwr,
2660 	.fo_truncate		= invfo_truncate,
2661 	.fo_ioctl		= invfo_ioctl,
2662 	.fo_poll		= mqf_poll,
2663 	.fo_kqfilter		= mqf_kqfilter,
2664 	.fo_stat		= mqf_stat,
2665 	.fo_close		= mqf_close,
2666 	.fo_chmod		= mqf_chmod,
2667 	.fo_chown		= mqf_chown,
2668 	.fo_sendfile		= invfo_sendfile,
2669 	.fo_fill_kinfo		= mqf_fill_kinfo,
2670 };
2671 
2672 static struct vop_vector mqfs_vnodeops = {
2673 	.vop_default 		= &default_vnodeops,
2674 	.vop_access		= mqfs_access,
2675 	.vop_cachedlookup	= mqfs_lookup,
2676 	.vop_lookup		= vfs_cache_lookup,
2677 	.vop_reclaim		= mqfs_reclaim,
2678 	.vop_create		= mqfs_create,
2679 	.vop_remove		= mqfs_remove,
2680 	.vop_inactive		= mqfs_inactive,
2681 	.vop_open		= mqfs_open,
2682 	.vop_close		= mqfs_close,
2683 	.vop_getattr		= mqfs_getattr,
2684 	.vop_setattr		= mqfs_setattr,
2685 	.vop_read		= mqfs_read,
2686 	.vop_write		= VOP_EOPNOTSUPP,
2687 	.vop_readdir		= mqfs_readdir,
2688 	.vop_mkdir		= VOP_EOPNOTSUPP,
2689 	.vop_rmdir		= VOP_EOPNOTSUPP
2690 };
2691 
2692 static struct vfsops mqfs_vfsops = {
2693 	.vfs_init 		= mqfs_init,
2694 	.vfs_uninit		= mqfs_uninit,
2695 	.vfs_mount		= mqfs_mount,
2696 	.vfs_unmount		= mqfs_unmount,
2697 	.vfs_root		= mqfs_root,
2698 	.vfs_statfs		= mqfs_statfs,
2699 };
2700 
2701 static struct vfsconf mqueuefs_vfsconf = {
2702 	.vfc_version = VFS_VERSION,
2703 	.vfc_name = "mqueuefs",
2704 	.vfc_vfsops = &mqfs_vfsops,
2705 	.vfc_typenum = -1,
2706 	.vfc_flags = VFCF_SYNTHETIC
2707 };
2708 
2709 static struct syscall_helper_data mq_syscalls[] = {
2710 	SYSCALL_INIT_HELPER(kmq_open),
2711 	SYSCALL_INIT_HELPER_F(kmq_setattr, SYF_CAPENABLED),
2712 	SYSCALL_INIT_HELPER_F(kmq_timedsend, SYF_CAPENABLED),
2713 	SYSCALL_INIT_HELPER_F(kmq_timedreceive, SYF_CAPENABLED),
2714 	SYSCALL_INIT_HELPER_F(kmq_notify, SYF_CAPENABLED),
2715 	SYSCALL_INIT_HELPER(kmq_unlink),
2716 	SYSCALL_INIT_LAST
2717 };
2718 
2719 #ifdef COMPAT_FREEBSD32
2720 #include <compat/freebsd32/freebsd32.h>
2721 #include <compat/freebsd32/freebsd32_proto.h>
2722 #include <compat/freebsd32/freebsd32_signal.h>
2723 #include <compat/freebsd32/freebsd32_syscall.h>
2724 #include <compat/freebsd32/freebsd32_util.h>
2725 
2726 static void
2727 mq_attr_from32(const struct mq_attr32 *from, struct mq_attr *to)
2728 {
2729 
2730 	to->mq_flags = from->mq_flags;
2731 	to->mq_maxmsg = from->mq_maxmsg;
2732 	to->mq_msgsize = from->mq_msgsize;
2733 	to->mq_curmsgs = from->mq_curmsgs;
2734 }
2735 
2736 static void
2737 mq_attr_to32(const struct mq_attr *from, struct mq_attr32 *to)
2738 {
2739 
2740 	to->mq_flags = from->mq_flags;
2741 	to->mq_maxmsg = from->mq_maxmsg;
2742 	to->mq_msgsize = from->mq_msgsize;
2743 	to->mq_curmsgs = from->mq_curmsgs;
2744 }
2745 
2746 int
2747 freebsd32_kmq_open(struct thread *td, struct freebsd32_kmq_open_args *uap)
2748 {
2749 	struct mq_attr attr;
2750 	struct mq_attr32 attr32;
2751 	int flags, error;
2752 
2753 	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
2754 		return (EINVAL);
2755 	flags = FFLAGS(uap->flags);
2756 	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2757 		error = copyin(uap->attr, &attr32, sizeof(attr32));
2758 		if (error)
2759 			return (error);
2760 		mq_attr_from32(&attr32, &attr);
2761 	}
2762 	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2763 	    uap->attr != NULL ? &attr : NULL));
2764 }
2765 
2766 int
2767 freebsd32_kmq_setattr(struct thread *td, struct freebsd32_kmq_setattr_args *uap)
2768 {
2769 	struct mq_attr attr, oattr;
2770 	struct mq_attr32 attr32, oattr32;
2771 	int error;
2772 
2773 	if (uap->attr != NULL) {
2774 		error = copyin(uap->attr, &attr32, sizeof(attr32));
2775 		if (error != 0)
2776 			return (error);
2777 		mq_attr_from32(&attr32, &attr);
2778 	}
2779 	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2780 	    &oattr);
2781 	if (error == 0 && uap->oattr != NULL) {
2782 		mq_attr_to32(&oattr, &oattr32);
2783 		bzero(oattr32.__reserved, sizeof(oattr32.__reserved));
2784 		error = copyout(&oattr32, uap->oattr, sizeof(oattr32));
2785 	}
2786 	return (error);
2787 }
2788 
2789 int
2790 freebsd32_kmq_timedsend(struct thread *td,
2791     struct freebsd32_kmq_timedsend_args *uap)
2792 {
2793 	struct mqueue *mq;
2794 	struct file *fp;
2795 	struct timespec32 ets32;
2796 	struct timespec *abs_timeout, ets;
2797 	int error;
2798 	int waitok;
2799 
2800 	AUDIT_ARG_FD(uap->mqd);
2801 	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2802 	if (error)
2803 		return (error);
2804 	if (uap->abs_timeout != NULL) {
2805 		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2806 		if (error != 0)
2807 			return (error);
2808 		CP(ets32, ets, tv_sec);
2809 		CP(ets32, ets, tv_nsec);
2810 		abs_timeout = &ets;
2811 	} else
2812 		abs_timeout = NULL;
2813 	waitok = !(fp->f_flag & O_NONBLOCK);
2814 	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2815 		uap->msg_prio, waitok, abs_timeout);
2816 	fdrop(fp, td);
2817 	return (error);
2818 }
2819 
2820 int
2821 freebsd32_kmq_timedreceive(struct thread *td,
2822     struct freebsd32_kmq_timedreceive_args *uap)
2823 {
2824 	struct mqueue *mq;
2825 	struct file *fp;
2826 	struct timespec32 ets32;
2827 	struct timespec *abs_timeout, ets;
2828 	int error, waitok;
2829 
2830 	AUDIT_ARG_FD(uap->mqd);
2831 	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2832 	if (error)
2833 		return (error);
2834 	if (uap->abs_timeout != NULL) {
2835 		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2836 		if (error != 0)
2837 			return (error);
2838 		CP(ets32, ets, tv_sec);
2839 		CP(ets32, ets, tv_nsec);
2840 		abs_timeout = &ets;
2841 	} else
2842 		abs_timeout = NULL;
2843 	waitok = !(fp->f_flag & O_NONBLOCK);
2844 	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2845 		uap->msg_prio, waitok, abs_timeout);
2846 	fdrop(fp, td);
2847 	return (error);
2848 }
2849 
2850 int
2851 freebsd32_kmq_notify(struct thread *td, struct freebsd32_kmq_notify_args *uap)
2852 {
2853 	struct sigevent ev, *evp;
2854 	struct sigevent32 ev32;
2855 	int error;
2856 
2857 	if (uap->sigev == NULL) {
2858 		evp = NULL;
2859 	} else {
2860 		error = copyin(uap->sigev, &ev32, sizeof(ev32));
2861 		if (error != 0)
2862 			return (error);
2863 		error = convert_sigevent32(&ev32, &ev);
2864 		if (error != 0)
2865 			return (error);
2866 		evp = &ev;
2867 	}
2868 	return (kern_kmq_notify(td, uap->mqd, evp));
2869 }
2870 
2871 static struct syscall_helper_data mq32_syscalls[] = {
2872 	SYSCALL32_INIT_HELPER(freebsd32_kmq_open),
2873 	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_setattr, SYF_CAPENABLED),
2874 	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_timedsend, SYF_CAPENABLED),
2875 	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_timedreceive, SYF_CAPENABLED),
2876 	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_notify, SYF_CAPENABLED),
2877 	SYSCALL32_INIT_HELPER_COMPAT(kmq_unlink),
2878 	SYSCALL_INIT_LAST
2879 };
2880 #endif
2881 
2882 static int
2883 mqinit(void)
2884 {
2885 	int error;
2886 
2887 	error = syscall_helper_register(mq_syscalls, SY_THR_STATIC_KLD);
2888 	if (error != 0)
2889 		return (error);
2890 #ifdef COMPAT_FREEBSD32
2891 	error = syscall32_helper_register(mq32_syscalls, SY_THR_STATIC_KLD);
2892 	if (error != 0)
2893 		return (error);
2894 #endif
2895 	return (0);
2896 }
2897 
2898 static int
2899 mqunload(void)
2900 {
2901 
2902 #ifdef COMPAT_FREEBSD32
2903 	syscall32_helper_unregister(mq32_syscalls);
2904 #endif
2905 	syscall_helper_unregister(mq_syscalls);
2906 	return (0);
2907 }
2908 
2909 static int
2910 mq_modload(struct module *module, int cmd, void *arg)
2911 {
2912 	int error = 0;
2913 
2914 	error = vfs_modevent(module, cmd, arg);
2915 	if (error != 0)
2916 		return (error);
2917 
2918 	switch (cmd) {
2919 	case MOD_LOAD:
2920 		error = mqinit();
2921 		if (error != 0)
2922 			mqunload();
2923 		break;
2924 	case MOD_UNLOAD:
2925 		error = mqunload();
2926 		break;
2927 	default:
2928 		break;
2929 	}
2930 	return (error);
2931 }
2932 
2933 static moduledata_t mqueuefs_mod = {
2934 	"mqueuefs",
2935 	mq_modload,
2936 	&mqueuefs_vfsconf
2937 };
2938 DECLARE_MODULE(mqueuefs, mqueuefs_mod, SI_SUB_VFS, SI_ORDER_MIDDLE);
2939 MODULE_VERSION(mqueuefs, 1);
2940