xref: /freebsd/sys/kern/uipc_mqueue.c (revision 076ad2f836d5f49dc1375f1677335a48fe0d4b82)
1 /*-
2  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  * POSIX message queue implementation.
30  *
31  * 1) A mqueue filesystem can be mounted, each message queue appears
32  *    in mounted directory, user can change queue's permission and
33  *    ownership, or remove a queue. Manually creating a file in the
34  *    directory causes a message queue to be created in the kernel with
35  *    default message queue attributes applied and same name used, this
36  *    method is not advocated since mq_open syscall allows user to specify
37  *    different attributes. Also the file system can be mounted multiple
38  *    times at different mount points but shows same contents.
39  *
40  * 2) Standard POSIX message queue API. The syscalls do not use vfs layer,
41  *    but directly operate on internal data structure, this allows user to
42  *    use the IPC facility without having to mount mqueue file system.
43  */
44 
45 #include <sys/cdefs.h>
46 __FBSDID("$FreeBSD$");
47 
48 #include "opt_capsicum.h"
49 #include "opt_compat.h"
50 
51 #include <sys/param.h>
52 #include <sys/kernel.h>
53 #include <sys/systm.h>
54 #include <sys/limits.h>
55 #include <sys/malloc.h>
56 #include <sys/buf.h>
57 #include <sys/capsicum.h>
58 #include <sys/dirent.h>
59 #include <sys/event.h>
60 #include <sys/eventhandler.h>
61 #include <sys/fcntl.h>
62 #include <sys/file.h>
63 #include <sys/filedesc.h>
64 #include <sys/jail.h>
65 #include <sys/lock.h>
66 #include <sys/module.h>
67 #include <sys/mount.h>
68 #include <sys/mqueue.h>
69 #include <sys/mutex.h>
70 #include <sys/namei.h>
71 #include <sys/posix4.h>
72 #include <sys/poll.h>
73 #include <sys/priv.h>
74 #include <sys/proc.h>
75 #include <sys/queue.h>
76 #include <sys/sysproto.h>
77 #include <sys/stat.h>
78 #include <sys/syscall.h>
79 #include <sys/syscallsubr.h>
80 #include <sys/sysent.h>
81 #include <sys/sx.h>
82 #include <sys/sysctl.h>
83 #include <sys/taskqueue.h>
84 #include <sys/unistd.h>
85 #include <sys/user.h>
86 #include <sys/vnode.h>
87 #include <machine/atomic.h>
88 
89 FEATURE(p1003_1b_mqueue, "POSIX P1003.1B message queues support");
90 
91 /*
92  * Limits and constants
93  */
94 #define	MQFS_NAMELEN		NAME_MAX
95 #define MQFS_DELEN		(8 + MQFS_NAMELEN)
96 
97 /* node types */
98 typedef enum {
99 	mqfstype_none = 0,
100 	mqfstype_root,
101 	mqfstype_dir,
102 	mqfstype_this,
103 	mqfstype_parent,
104 	mqfstype_file,
105 	mqfstype_symlink,
106 } mqfs_type_t;
107 
108 struct mqfs_node;
109 
110 /*
111  * mqfs_info: describes a mqfs instance
112  */
113 struct mqfs_info {
114 	struct sx		mi_lock;
115 	struct mqfs_node	*mi_root;
116 	struct unrhdr		*mi_unrhdr;
117 };
118 
119 struct mqfs_vdata {
120 	LIST_ENTRY(mqfs_vdata)	mv_link;
121 	struct mqfs_node	*mv_node;
122 	struct vnode		*mv_vnode;
123 	struct task		mv_task;
124 };
125 
126 /*
127  * mqfs_node: describes a node (file or directory) within a mqfs
128  */
129 struct mqfs_node {
130 	char			mn_name[MQFS_NAMELEN+1];
131 	struct mqfs_info	*mn_info;
132 	struct mqfs_node	*mn_parent;
133 	LIST_HEAD(,mqfs_node)	mn_children;
134 	LIST_ENTRY(mqfs_node)	mn_sibling;
135 	LIST_HEAD(,mqfs_vdata)	mn_vnodes;
136 	const void		*mn_pr_root;
137 	int			mn_refcount;
138 	mqfs_type_t		mn_type;
139 	int			mn_deleted;
140 	uint32_t		mn_fileno;
141 	void			*mn_data;
142 	struct timespec		mn_birth;
143 	struct timespec		mn_ctime;
144 	struct timespec		mn_atime;
145 	struct timespec		mn_mtime;
146 	uid_t			mn_uid;
147 	gid_t			mn_gid;
148 	int			mn_mode;
149 };
150 
151 #define	VTON(vp)	(((struct mqfs_vdata *)((vp)->v_data))->mv_node)
152 #define VTOMQ(vp) 	((struct mqueue *)(VTON(vp)->mn_data))
153 #define	VFSTOMQFS(m)	((struct mqfs_info *)((m)->mnt_data))
154 #define	FPTOMQ(fp)	((struct mqueue *)(((struct mqfs_node *) \
155 				(fp)->f_data)->mn_data))
156 
157 TAILQ_HEAD(msgq, mqueue_msg);
158 
159 struct mqueue;
160 
161 struct mqueue_notifier {
162 	LIST_ENTRY(mqueue_notifier)	nt_link;
163 	struct sigevent			nt_sigev;
164 	ksiginfo_t			nt_ksi;
165 	struct proc			*nt_proc;
166 };
167 
168 struct mqueue {
169 	struct mtx	mq_mutex;
170 	int		mq_flags;
171 	long		mq_maxmsg;
172 	long		mq_msgsize;
173 	long		mq_curmsgs;
174 	long		mq_totalbytes;
175 	struct msgq	mq_msgq;
176 	int		mq_receivers;
177 	int		mq_senders;
178 	struct selinfo	mq_rsel;
179 	struct selinfo	mq_wsel;
180 	struct mqueue_notifier	*mq_notifier;
181 };
182 
183 #define	MQ_RSEL		0x01
184 #define	MQ_WSEL		0x02
185 
186 struct mqueue_msg {
187 	TAILQ_ENTRY(mqueue_msg)	msg_link;
188 	unsigned int	msg_prio;
189 	unsigned int	msg_size;
190 	/* following real data... */
191 };
192 
193 static SYSCTL_NODE(_kern, OID_AUTO, mqueue, CTLFLAG_RW, 0,
194 	"POSIX real time message queue");
195 
196 static int	default_maxmsg  = 10;
197 static int	default_msgsize = 1024;
198 
199 static int	maxmsg = 100;
200 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsg, CTLFLAG_RW,
201     &maxmsg, 0, "Default maximum messages in queue");
202 static int	maxmsgsize = 16384;
203 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsgsize, CTLFLAG_RW,
204     &maxmsgsize, 0, "Default maximum message size");
205 static int	maxmq = 100;
206 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmq, CTLFLAG_RW,
207     &maxmq, 0, "maximum message queues");
208 static int	curmq = 0;
209 SYSCTL_INT(_kern_mqueue, OID_AUTO, curmq, CTLFLAG_RW,
210     &curmq, 0, "current message queue number");
211 static int	unloadable = 0;
212 static MALLOC_DEFINE(M_MQUEUEDATA, "mqdata", "mqueue data");
213 
214 static eventhandler_tag exit_tag;
215 
216 /* Only one instance per-system */
217 static struct mqfs_info		mqfs_data;
218 static uma_zone_t		mqnode_zone;
219 static uma_zone_t		mqueue_zone;
220 static uma_zone_t		mvdata_zone;
221 static uma_zone_t		mqnoti_zone;
222 static struct vop_vector	mqfs_vnodeops;
223 static struct fileops		mqueueops;
224 static unsigned			mqfs_osd_jail_slot;
225 
226 /*
227  * Directory structure construction and manipulation
228  */
229 #ifdef notyet
230 static struct mqfs_node	*mqfs_create_dir(struct mqfs_node *parent,
231 	const char *name, int namelen, struct ucred *cred, int mode);
232 static struct mqfs_node	*mqfs_create_link(struct mqfs_node *parent,
233 	const char *name, int namelen, struct ucred *cred, int mode);
234 #endif
235 
236 static struct mqfs_node	*mqfs_create_file(struct mqfs_node *parent,
237 	const char *name, int namelen, struct ucred *cred, int mode);
238 static int	mqfs_destroy(struct mqfs_node *mn);
239 static void	mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn);
240 static void	mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn);
241 static int	mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn);
242 static int	mqfs_prison_remove(void *obj, void *data);
243 
244 /*
245  * Message queue construction and maniplation
246  */
247 static struct mqueue	*mqueue_alloc(const struct mq_attr *attr);
248 static void	mqueue_free(struct mqueue *mq);
249 static int	mqueue_send(struct mqueue *mq, const char *msg_ptr,
250 			size_t msg_len, unsigned msg_prio, int waitok,
251 			const struct timespec *abs_timeout);
252 static int	mqueue_receive(struct mqueue *mq, char *msg_ptr,
253 			size_t msg_len, unsigned *msg_prio, int waitok,
254 			const struct timespec *abs_timeout);
255 static int	_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg,
256 			int timo);
257 static int	_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg,
258 			int timo);
259 static void	mqueue_send_notification(struct mqueue *mq);
260 static void	mqueue_fdclose(struct thread *td, int fd, struct file *fp);
261 static void	mq_proc_exit(void *arg, struct proc *p);
262 
263 /*
264  * kqueue filters
265  */
266 static void	filt_mqdetach(struct knote *kn);
267 static int	filt_mqread(struct knote *kn, long hint);
268 static int	filt_mqwrite(struct knote *kn, long hint);
269 
270 struct filterops mq_rfiltops = {
271 	.f_isfd = 1,
272 	.f_detach = filt_mqdetach,
273 	.f_event = filt_mqread,
274 };
275 struct filterops mq_wfiltops = {
276 	.f_isfd = 1,
277 	.f_detach = filt_mqdetach,
278 	.f_event = filt_mqwrite,
279 };
280 
281 /*
282  * Initialize fileno bitmap
283  */
284 static void
285 mqfs_fileno_init(struct mqfs_info *mi)
286 {
287 	struct unrhdr *up;
288 
289 	up = new_unrhdr(1, INT_MAX, NULL);
290 	mi->mi_unrhdr = up;
291 }
292 
293 /*
294  * Tear down fileno bitmap
295  */
296 static void
297 mqfs_fileno_uninit(struct mqfs_info *mi)
298 {
299 	struct unrhdr *up;
300 
301 	up = mi->mi_unrhdr;
302 	mi->mi_unrhdr = NULL;
303 	delete_unrhdr(up);
304 }
305 
306 /*
307  * Allocate a file number
308  */
309 static void
310 mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn)
311 {
312 	/* make sure our parent has a file number */
313 	if (mn->mn_parent && !mn->mn_parent->mn_fileno)
314 		mqfs_fileno_alloc(mi, mn->mn_parent);
315 
316 	switch (mn->mn_type) {
317 	case mqfstype_root:
318 	case mqfstype_dir:
319 	case mqfstype_file:
320 	case mqfstype_symlink:
321 		mn->mn_fileno = alloc_unr(mi->mi_unrhdr);
322 		break;
323 	case mqfstype_this:
324 		KASSERT(mn->mn_parent != NULL,
325 		    ("mqfstype_this node has no parent"));
326 		mn->mn_fileno = mn->mn_parent->mn_fileno;
327 		break;
328 	case mqfstype_parent:
329 		KASSERT(mn->mn_parent != NULL,
330 		    ("mqfstype_parent node has no parent"));
331 		if (mn->mn_parent == mi->mi_root) {
332 			mn->mn_fileno = mn->mn_parent->mn_fileno;
333 			break;
334 		}
335 		KASSERT(mn->mn_parent->mn_parent != NULL,
336 		    ("mqfstype_parent node has no grandparent"));
337 		mn->mn_fileno = mn->mn_parent->mn_parent->mn_fileno;
338 		break;
339 	default:
340 		KASSERT(0,
341 		    ("mqfs_fileno_alloc() called for unknown type node: %d",
342 			mn->mn_type));
343 		break;
344 	}
345 }
346 
347 /*
348  * Release a file number
349  */
350 static void
351 mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn)
352 {
353 	switch (mn->mn_type) {
354 	case mqfstype_root:
355 	case mqfstype_dir:
356 	case mqfstype_file:
357 	case mqfstype_symlink:
358 		free_unr(mi->mi_unrhdr, mn->mn_fileno);
359 		break;
360 	case mqfstype_this:
361 	case mqfstype_parent:
362 		/* ignore these, as they don't "own" their file number */
363 		break;
364 	default:
365 		KASSERT(0,
366 		    ("mqfs_fileno_free() called for unknown type node: %d",
367 			mn->mn_type));
368 		break;
369 	}
370 }
371 
372 static __inline struct mqfs_node *
373 mqnode_alloc(void)
374 {
375 	return uma_zalloc(mqnode_zone, M_WAITOK | M_ZERO);
376 }
377 
378 static __inline void
379 mqnode_free(struct mqfs_node *node)
380 {
381 	uma_zfree(mqnode_zone, node);
382 }
383 
384 static __inline void
385 mqnode_addref(struct mqfs_node *node)
386 {
387 	atomic_fetchadd_int(&node->mn_refcount, 1);
388 }
389 
390 static __inline void
391 mqnode_release(struct mqfs_node *node)
392 {
393 	struct mqfs_info *mqfs;
394 	int old, exp;
395 
396 	mqfs = node->mn_info;
397 	old = atomic_fetchadd_int(&node->mn_refcount, -1);
398 	if (node->mn_type == mqfstype_dir ||
399 	    node->mn_type == mqfstype_root)
400 		exp = 3; /* include . and .. */
401 	else
402 		exp = 1;
403 	if (old == exp) {
404 		int locked = sx_xlocked(&mqfs->mi_lock);
405 		if (!locked)
406 			sx_xlock(&mqfs->mi_lock);
407 		mqfs_destroy(node);
408 		if (!locked)
409 			sx_xunlock(&mqfs->mi_lock);
410 	}
411 }
412 
413 /*
414  * Add a node to a directory
415  */
416 static int
417 mqfs_add_node(struct mqfs_node *parent, struct mqfs_node *node)
418 {
419 	KASSERT(parent != NULL, ("%s(): parent is NULL", __func__));
420 	KASSERT(parent->mn_info != NULL,
421 	    ("%s(): parent has no mn_info", __func__));
422 	KASSERT(parent->mn_type == mqfstype_dir ||
423 	    parent->mn_type == mqfstype_root,
424 	    ("%s(): parent is not a directory", __func__));
425 
426 	node->mn_info = parent->mn_info;
427 	node->mn_parent = parent;
428 	LIST_INIT(&node->mn_children);
429 	LIST_INIT(&node->mn_vnodes);
430 	LIST_INSERT_HEAD(&parent->mn_children, node, mn_sibling);
431 	mqnode_addref(parent);
432 	return (0);
433 }
434 
435 static struct mqfs_node *
436 mqfs_create_node(const char *name, int namelen, struct ucred *cred, int mode,
437 	int nodetype)
438 {
439 	struct mqfs_node *node;
440 
441 	node = mqnode_alloc();
442 	strncpy(node->mn_name, name, namelen);
443 	node->mn_pr_root = cred->cr_prison->pr_root;
444 	node->mn_type = nodetype;
445 	node->mn_refcount = 1;
446 	vfs_timestamp(&node->mn_birth);
447 	node->mn_ctime = node->mn_atime = node->mn_mtime
448 		= node->mn_birth;
449 	node->mn_uid = cred->cr_uid;
450 	node->mn_gid = cred->cr_gid;
451 	node->mn_mode = mode;
452 	return (node);
453 }
454 
455 /*
456  * Create a file
457  */
458 static struct mqfs_node *
459 mqfs_create_file(struct mqfs_node *parent, const char *name, int namelen,
460 	struct ucred *cred, int mode)
461 {
462 	struct mqfs_node *node;
463 
464 	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_file);
465 	if (mqfs_add_node(parent, node) != 0) {
466 		mqnode_free(node);
467 		return (NULL);
468 	}
469 	return (node);
470 }
471 
472 /*
473  * Add . and .. to a directory
474  */
475 static int
476 mqfs_fixup_dir(struct mqfs_node *parent)
477 {
478 	struct mqfs_node *dir;
479 
480 	dir = mqnode_alloc();
481 	dir->mn_name[0] = '.';
482 	dir->mn_type = mqfstype_this;
483 	dir->mn_refcount = 1;
484 	if (mqfs_add_node(parent, dir) != 0) {
485 		mqnode_free(dir);
486 		return (-1);
487 	}
488 
489 	dir = mqnode_alloc();
490 	dir->mn_name[0] = dir->mn_name[1] = '.';
491 	dir->mn_type = mqfstype_parent;
492 	dir->mn_refcount = 1;
493 
494 	if (mqfs_add_node(parent, dir) != 0) {
495 		mqnode_free(dir);
496 		return (-1);
497 	}
498 
499 	return (0);
500 }
501 
502 #ifdef notyet
503 
504 /*
505  * Create a directory
506  */
507 static struct mqfs_node *
508 mqfs_create_dir(struct mqfs_node *parent, const char *name, int namelen,
509 	struct ucred *cred, int mode)
510 {
511 	struct mqfs_node *node;
512 
513 	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_dir);
514 	if (mqfs_add_node(parent, node) != 0) {
515 		mqnode_free(node);
516 		return (NULL);
517 	}
518 
519 	if (mqfs_fixup_dir(node) != 0) {
520 		mqfs_destroy(node);
521 		return (NULL);
522 	}
523 	return (node);
524 }
525 
526 /*
527  * Create a symlink
528  */
529 static struct mqfs_node *
530 mqfs_create_link(struct mqfs_node *parent, const char *name, int namelen,
531 	struct ucred *cred, int mode)
532 {
533 	struct mqfs_node *node;
534 
535 	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_symlink);
536 	if (mqfs_add_node(parent, node) != 0) {
537 		mqnode_free(node);
538 		return (NULL);
539 	}
540 	return (node);
541 }
542 
543 #endif
544 
545 /*
546  * Destroy a node or a tree of nodes
547  */
548 static int
549 mqfs_destroy(struct mqfs_node *node)
550 {
551 	struct mqfs_node *parent;
552 
553 	KASSERT(node != NULL,
554 	    ("%s(): node is NULL", __func__));
555 	KASSERT(node->mn_info != NULL,
556 	    ("%s(): node has no mn_info", __func__));
557 
558 	/* destroy children */
559 	if (node->mn_type == mqfstype_dir || node->mn_type == mqfstype_root)
560 		while (! LIST_EMPTY(&node->mn_children))
561 			mqfs_destroy(LIST_FIRST(&node->mn_children));
562 
563 	/* unlink from parent */
564 	if ((parent = node->mn_parent) != NULL) {
565 		KASSERT(parent->mn_info == node->mn_info,
566 		    ("%s(): parent has different mn_info", __func__));
567 		LIST_REMOVE(node, mn_sibling);
568 	}
569 
570 	if (node->mn_fileno != 0)
571 		mqfs_fileno_free(node->mn_info, node);
572 	if (node->mn_data != NULL)
573 		mqueue_free(node->mn_data);
574 	mqnode_free(node);
575 	return (0);
576 }
577 
578 /*
579  * Mount a mqfs instance
580  */
581 static int
582 mqfs_mount(struct mount *mp)
583 {
584 	struct statfs *sbp;
585 
586 	if (mp->mnt_flag & MNT_UPDATE)
587 		return (EOPNOTSUPP);
588 
589 	mp->mnt_data = &mqfs_data;
590 	MNT_ILOCK(mp);
591 	mp->mnt_flag |= MNT_LOCAL;
592 	MNT_IUNLOCK(mp);
593 	vfs_getnewfsid(mp);
594 
595 	sbp = &mp->mnt_stat;
596 	vfs_mountedfrom(mp, "mqueue");
597 	sbp->f_bsize = PAGE_SIZE;
598 	sbp->f_iosize = PAGE_SIZE;
599 	sbp->f_blocks = 1;
600 	sbp->f_bfree = 0;
601 	sbp->f_bavail = 0;
602 	sbp->f_files = 1;
603 	sbp->f_ffree = 0;
604 	return (0);
605 }
606 
607 /*
608  * Unmount a mqfs instance
609  */
610 static int
611 mqfs_unmount(struct mount *mp, int mntflags)
612 {
613 	int error;
614 
615 	error = vflush(mp, 0, (mntflags & MNT_FORCE) ?  FORCECLOSE : 0,
616 	    curthread);
617 	return (error);
618 }
619 
620 /*
621  * Return a root vnode
622  */
623 static int
624 mqfs_root(struct mount *mp, int flags, struct vnode **vpp)
625 {
626 	struct mqfs_info *mqfs;
627 	int ret;
628 
629 	mqfs = VFSTOMQFS(mp);
630 	ret = mqfs_allocv(mp, vpp, mqfs->mi_root);
631 	return (ret);
632 }
633 
634 /*
635  * Return filesystem stats
636  */
637 static int
638 mqfs_statfs(struct mount *mp, struct statfs *sbp)
639 {
640 	/* XXX update statistics */
641 	return (0);
642 }
643 
644 /*
645  * Initialize a mqfs instance
646  */
647 static int
648 mqfs_init(struct vfsconf *vfc)
649 {
650 	struct mqfs_node *root;
651 	struct mqfs_info *mi;
652 	osd_method_t methods[PR_MAXMETHOD] = {
653 	    [PR_METHOD_REMOVE] = mqfs_prison_remove,
654 	};
655 
656 	mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node),
657 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
658 	mqueue_zone = uma_zcreate("mqueue", sizeof(struct mqueue),
659 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
660 	mvdata_zone = uma_zcreate("mvdata",
661 		sizeof(struct mqfs_vdata), NULL, NULL, NULL,
662 		NULL, UMA_ALIGN_PTR, 0);
663 	mqnoti_zone = uma_zcreate("mqnotifier", sizeof(struct mqueue_notifier),
664 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
665 	mi = &mqfs_data;
666 	sx_init(&mi->mi_lock, "mqfs lock");
667 	/* set up the root diretory */
668 	root = mqfs_create_node("/", 1, curthread->td_ucred, 01777,
669 		mqfstype_root);
670 	root->mn_info = mi;
671 	LIST_INIT(&root->mn_children);
672 	LIST_INIT(&root->mn_vnodes);
673 	mi->mi_root = root;
674 	mqfs_fileno_init(mi);
675 	mqfs_fileno_alloc(mi, root);
676 	mqfs_fixup_dir(root);
677 	exit_tag = EVENTHANDLER_REGISTER(process_exit, mq_proc_exit, NULL,
678 	    EVENTHANDLER_PRI_ANY);
679 	mq_fdclose = mqueue_fdclose;
680 	p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING);
681 	mqfs_osd_jail_slot = osd_jail_register(NULL, methods);
682 	return (0);
683 }
684 
685 /*
686  * Destroy a mqfs instance
687  */
688 static int
689 mqfs_uninit(struct vfsconf *vfc)
690 {
691 	struct mqfs_info *mi;
692 
693 	if (!unloadable)
694 		return (EOPNOTSUPP);
695 	osd_jail_deregister(mqfs_osd_jail_slot);
696 	EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
697 	mi = &mqfs_data;
698 	mqfs_destroy(mi->mi_root);
699 	mi->mi_root = NULL;
700 	mqfs_fileno_uninit(mi);
701 	sx_destroy(&mi->mi_lock);
702 	uma_zdestroy(mqnode_zone);
703 	uma_zdestroy(mqueue_zone);
704 	uma_zdestroy(mvdata_zone);
705 	uma_zdestroy(mqnoti_zone);
706 	return (0);
707 }
708 
709 /*
710  * task routine
711  */
712 static void
713 do_recycle(void *context, int pending __unused)
714 {
715 	struct vnode *vp = (struct vnode *)context;
716 
717 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
718 	vrecycle(vp);
719 	VOP_UNLOCK(vp, 0);
720 	vdrop(vp);
721 }
722 
723 /*
724  * Allocate a vnode
725  */
726 static int
727 mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn)
728 {
729 	struct mqfs_vdata *vd;
730 	struct mqfs_info  *mqfs;
731 	struct vnode *newvpp;
732 	int error;
733 
734 	mqfs = pn->mn_info;
735 	*vpp = NULL;
736 	sx_xlock(&mqfs->mi_lock);
737 	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
738 		if (vd->mv_vnode->v_mount == mp) {
739 			vhold(vd->mv_vnode);
740 			break;
741 		}
742 	}
743 
744 	if (vd != NULL) {
745 found:
746 		*vpp = vd->mv_vnode;
747 		sx_xunlock(&mqfs->mi_lock);
748 		error = vget(*vpp, LK_RETRY | LK_EXCLUSIVE, curthread);
749 		vdrop(*vpp);
750 		return (error);
751 	}
752 	sx_xunlock(&mqfs->mi_lock);
753 
754 	error = getnewvnode("mqueue", mp, &mqfs_vnodeops, &newvpp);
755 	if (error)
756 		return (error);
757 	vn_lock(newvpp, LK_EXCLUSIVE | LK_RETRY);
758 	error = insmntque(newvpp, mp);
759 	if (error != 0)
760 		return (error);
761 
762 	sx_xlock(&mqfs->mi_lock);
763 	/*
764 	 * Check if it has already been allocated
765 	 * while we were blocked.
766 	 */
767 	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
768 		if (vd->mv_vnode->v_mount == mp) {
769 			vhold(vd->mv_vnode);
770 			sx_xunlock(&mqfs->mi_lock);
771 
772 			vgone(newvpp);
773 			vput(newvpp);
774 			goto found;
775 		}
776 	}
777 
778 	*vpp = newvpp;
779 
780 	vd = uma_zalloc(mvdata_zone, M_WAITOK);
781 	(*vpp)->v_data = vd;
782 	vd->mv_vnode = *vpp;
783 	vd->mv_node = pn;
784 	TASK_INIT(&vd->mv_task, 0, do_recycle, *vpp);
785 	LIST_INSERT_HEAD(&pn->mn_vnodes, vd, mv_link);
786 	mqnode_addref(pn);
787 	switch (pn->mn_type) {
788 	case mqfstype_root:
789 		(*vpp)->v_vflag = VV_ROOT;
790 		/* fall through */
791 	case mqfstype_dir:
792 	case mqfstype_this:
793 	case mqfstype_parent:
794 		(*vpp)->v_type = VDIR;
795 		break;
796 	case mqfstype_file:
797 		(*vpp)->v_type = VREG;
798 		break;
799 	case mqfstype_symlink:
800 		(*vpp)->v_type = VLNK;
801 		break;
802 	case mqfstype_none:
803 		KASSERT(0, ("mqfs_allocf called for null node\n"));
804 	default:
805 		panic("%s has unexpected type: %d", pn->mn_name, pn->mn_type);
806 	}
807 	sx_xunlock(&mqfs->mi_lock);
808 	return (0);
809 }
810 
811 /*
812  * Search a directory entry
813  */
814 static struct mqfs_node *
815 mqfs_search(struct mqfs_node *pd, const char *name, int len, struct ucred *cred)
816 {
817 	struct mqfs_node *pn;
818 	const void *pr_root;
819 
820 	sx_assert(&pd->mn_info->mi_lock, SX_LOCKED);
821 	pr_root = cred->cr_prison->pr_root;
822 	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
823 		/* Only match names within the same prison root directory */
824 		if ((pn->mn_pr_root == NULL || pn->mn_pr_root == pr_root) &&
825 		    strncmp(pn->mn_name, name, len) == 0 &&
826 		    pn->mn_name[len] == '\0')
827 			return (pn);
828 	}
829 	return (NULL);
830 }
831 
832 /*
833  * Look up a file or directory.
834  */
835 static int
836 mqfs_lookupx(struct vop_cachedlookup_args *ap)
837 {
838 	struct componentname *cnp;
839 	struct vnode *dvp, **vpp;
840 	struct mqfs_node *pd;
841 	struct mqfs_node *pn;
842 	struct mqfs_info *mqfs;
843 	int nameiop, flags, error, namelen;
844 	char *pname;
845 	struct thread *td;
846 
847 	cnp = ap->a_cnp;
848 	vpp = ap->a_vpp;
849 	dvp = ap->a_dvp;
850 	pname = cnp->cn_nameptr;
851 	namelen = cnp->cn_namelen;
852 	td = cnp->cn_thread;
853 	flags = cnp->cn_flags;
854 	nameiop = cnp->cn_nameiop;
855 	pd = VTON(dvp);
856 	pn = NULL;
857 	mqfs = pd->mn_info;
858 	*vpp = NULLVP;
859 
860 	if (dvp->v_type != VDIR)
861 		return (ENOTDIR);
862 
863 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread);
864 	if (error)
865 		return (error);
866 
867 	/* shortcut: check if the name is too long */
868 	if (cnp->cn_namelen >= MQFS_NAMELEN)
869 		return (ENOENT);
870 
871 	/* self */
872 	if (namelen == 1 && pname[0] == '.') {
873 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
874 			return (EINVAL);
875 		pn = pd;
876 		*vpp = dvp;
877 		VREF(dvp);
878 		return (0);
879 	}
880 
881 	/* parent */
882 	if (cnp->cn_flags & ISDOTDOT) {
883 		if (dvp->v_vflag & VV_ROOT)
884 			return (EIO);
885 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
886 			return (EINVAL);
887 		VOP_UNLOCK(dvp, 0);
888 		KASSERT(pd->mn_parent, ("non-root directory has no parent"));
889 		pn = pd->mn_parent;
890 		error = mqfs_allocv(dvp->v_mount, vpp, pn);
891 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
892 		return (error);
893 	}
894 
895 	/* named node */
896 	sx_xlock(&mqfs->mi_lock);
897 	pn = mqfs_search(pd, pname, namelen, cnp->cn_cred);
898 	if (pn != NULL)
899 		mqnode_addref(pn);
900 	sx_xunlock(&mqfs->mi_lock);
901 
902 	/* found */
903 	if (pn != NULL) {
904 		/* DELETE */
905 		if (nameiop == DELETE && (flags & ISLASTCN)) {
906 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
907 			if (error) {
908 				mqnode_release(pn);
909 				return (error);
910 			}
911 			if (*vpp == dvp) {
912 				VREF(dvp);
913 				*vpp = dvp;
914 				mqnode_release(pn);
915 				return (0);
916 			}
917 		}
918 
919 		/* allocate vnode */
920 		error = mqfs_allocv(dvp->v_mount, vpp, pn);
921 		mqnode_release(pn);
922 		if (error == 0 && cnp->cn_flags & MAKEENTRY)
923 			cache_enter(dvp, *vpp, cnp);
924 		return (error);
925 	}
926 
927 	/* not found */
928 
929 	/* will create a new entry in the directory ? */
930 	if ((nameiop == CREATE || nameiop == RENAME) && (flags & LOCKPARENT)
931 	    && (flags & ISLASTCN)) {
932 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
933 		if (error)
934 			return (error);
935 		cnp->cn_flags |= SAVENAME;
936 		return (EJUSTRETURN);
937 	}
938 	return (ENOENT);
939 }
940 
941 #if 0
942 struct vop_lookup_args {
943 	struct vop_generic_args a_gen;
944 	struct vnode *a_dvp;
945 	struct vnode **a_vpp;
946 	struct componentname *a_cnp;
947 };
948 #endif
949 
950 /*
951  * vnode lookup operation
952  */
953 static int
954 mqfs_lookup(struct vop_cachedlookup_args *ap)
955 {
956 	int rc;
957 
958 	rc = mqfs_lookupx(ap);
959 	return (rc);
960 }
961 
962 #if 0
963 struct vop_create_args {
964 	struct vnode *a_dvp;
965 	struct vnode **a_vpp;
966 	struct componentname *a_cnp;
967 	struct vattr *a_vap;
968 };
969 #endif
970 
971 /*
972  * vnode creation operation
973  */
974 static int
975 mqfs_create(struct vop_create_args *ap)
976 {
977 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
978 	struct componentname *cnp = ap->a_cnp;
979 	struct mqfs_node *pd;
980 	struct mqfs_node *pn;
981 	struct mqueue *mq;
982 	int error;
983 
984 	pd = VTON(ap->a_dvp);
985 	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
986 		return (ENOTDIR);
987 	mq = mqueue_alloc(NULL);
988 	if (mq == NULL)
989 		return (EAGAIN);
990 	sx_xlock(&mqfs->mi_lock);
991 	if ((cnp->cn_flags & HASBUF) == 0)
992 		panic("%s: no name", __func__);
993 	pn = mqfs_create_file(pd, cnp->cn_nameptr, cnp->cn_namelen,
994 		cnp->cn_cred, ap->a_vap->va_mode);
995 	if (pn == NULL) {
996 		sx_xunlock(&mqfs->mi_lock);
997 		error = ENOSPC;
998 	} else {
999 		mqnode_addref(pn);
1000 		sx_xunlock(&mqfs->mi_lock);
1001 		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1002 		mqnode_release(pn);
1003 		if (error)
1004 			mqfs_destroy(pn);
1005 		else
1006 			pn->mn_data = mq;
1007 	}
1008 	if (error)
1009 		mqueue_free(mq);
1010 	return (error);
1011 }
1012 
1013 /*
1014  * Remove an entry
1015  */
1016 static
1017 int do_unlink(struct mqfs_node *pn, struct ucred *ucred)
1018 {
1019 	struct mqfs_node *parent;
1020 	struct mqfs_vdata *vd;
1021 	int error = 0;
1022 
1023 	sx_assert(&pn->mn_info->mi_lock, SX_LOCKED);
1024 
1025 	if (ucred->cr_uid != pn->mn_uid &&
1026 	    (error = priv_check_cred(ucred, PRIV_MQ_ADMIN, 0)) != 0)
1027 		error = EACCES;
1028 	else if (!pn->mn_deleted) {
1029 		parent = pn->mn_parent;
1030 		pn->mn_parent = NULL;
1031 		pn->mn_deleted = 1;
1032 		LIST_REMOVE(pn, mn_sibling);
1033 		LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
1034 			cache_purge(vd->mv_vnode);
1035 			vhold(vd->mv_vnode);
1036 			taskqueue_enqueue(taskqueue_thread, &vd->mv_task);
1037 		}
1038 		mqnode_release(pn);
1039 		mqnode_release(parent);
1040 	} else
1041 		error = ENOENT;
1042 	return (error);
1043 }
1044 
1045 #if 0
1046 struct vop_remove_args {
1047 	struct vnode *a_dvp;
1048 	struct vnode *a_vp;
1049 	struct componentname *a_cnp;
1050 };
1051 #endif
1052 
1053 /*
1054  * vnode removal operation
1055  */
1056 static int
1057 mqfs_remove(struct vop_remove_args *ap)
1058 {
1059 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1060 	struct mqfs_node *pn;
1061 	int error;
1062 
1063 	if (ap->a_vp->v_type == VDIR)
1064                 return (EPERM);
1065 	pn = VTON(ap->a_vp);
1066 	sx_xlock(&mqfs->mi_lock);
1067 	error = do_unlink(pn, ap->a_cnp->cn_cred);
1068 	sx_xunlock(&mqfs->mi_lock);
1069 	return (error);
1070 }
1071 
1072 #if 0
1073 struct vop_inactive_args {
1074 	struct vnode *a_vp;
1075 	struct thread *a_td;
1076 };
1077 #endif
1078 
1079 static int
1080 mqfs_inactive(struct vop_inactive_args *ap)
1081 {
1082 	struct mqfs_node *pn = VTON(ap->a_vp);
1083 
1084 	if (pn->mn_deleted)
1085 		vrecycle(ap->a_vp);
1086 	return (0);
1087 }
1088 
1089 #if 0
1090 struct vop_reclaim_args {
1091 	struct vop_generic_args a_gen;
1092 	struct vnode *a_vp;
1093 	struct thread *a_td;
1094 };
1095 #endif
1096 
1097 static int
1098 mqfs_reclaim(struct vop_reclaim_args *ap)
1099 {
1100 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_vp->v_mount);
1101 	struct vnode *vp = ap->a_vp;
1102 	struct mqfs_node *pn;
1103 	struct mqfs_vdata *vd;
1104 
1105 	vd = vp->v_data;
1106 	pn = vd->mv_node;
1107 	sx_xlock(&mqfs->mi_lock);
1108 	vp->v_data = NULL;
1109 	LIST_REMOVE(vd, mv_link);
1110 	uma_zfree(mvdata_zone, vd);
1111 	mqnode_release(pn);
1112 	sx_xunlock(&mqfs->mi_lock);
1113 	return (0);
1114 }
1115 
1116 #if 0
1117 struct vop_open_args {
1118 	struct vop_generic_args a_gen;
1119 	struct vnode *a_vp;
1120 	int a_mode;
1121 	struct ucred *a_cred;
1122 	struct thread *a_td;
1123 	struct file *a_fp;
1124 };
1125 #endif
1126 
1127 static int
1128 mqfs_open(struct vop_open_args *ap)
1129 {
1130 	return (0);
1131 }
1132 
1133 #if 0
1134 struct vop_close_args {
1135 	struct vop_generic_args a_gen;
1136 	struct vnode *a_vp;
1137 	int a_fflag;
1138 	struct ucred *a_cred;
1139 	struct thread *a_td;
1140 };
1141 #endif
1142 
1143 static int
1144 mqfs_close(struct vop_close_args *ap)
1145 {
1146 	return (0);
1147 }
1148 
1149 #if 0
1150 struct vop_access_args {
1151 	struct vop_generic_args a_gen;
1152 	struct vnode *a_vp;
1153 	accmode_t a_accmode;
1154 	struct ucred *a_cred;
1155 	struct thread *a_td;
1156 };
1157 #endif
1158 
1159 /*
1160  * Verify permissions
1161  */
1162 static int
1163 mqfs_access(struct vop_access_args *ap)
1164 {
1165 	struct vnode *vp = ap->a_vp;
1166 	struct vattr vattr;
1167 	int error;
1168 
1169 	error = VOP_GETATTR(vp, &vattr, ap->a_cred);
1170 	if (error)
1171 		return (error);
1172 	error = vaccess(vp->v_type, vattr.va_mode, vattr.va_uid,
1173 	    vattr.va_gid, ap->a_accmode, ap->a_cred, NULL);
1174 	return (error);
1175 }
1176 
1177 #if 0
1178 struct vop_getattr_args {
1179 	struct vop_generic_args a_gen;
1180 	struct vnode *a_vp;
1181 	struct vattr *a_vap;
1182 	struct ucred *a_cred;
1183 };
1184 #endif
1185 
1186 /*
1187  * Get file attributes
1188  */
1189 static int
1190 mqfs_getattr(struct vop_getattr_args *ap)
1191 {
1192 	struct vnode *vp = ap->a_vp;
1193 	struct mqfs_node *pn = VTON(vp);
1194 	struct vattr *vap = ap->a_vap;
1195 	int error = 0;
1196 
1197 	vap->va_type = vp->v_type;
1198 	vap->va_mode = pn->mn_mode;
1199 	vap->va_nlink = 1;
1200 	vap->va_uid = pn->mn_uid;
1201 	vap->va_gid = pn->mn_gid;
1202 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
1203 	vap->va_fileid = pn->mn_fileno;
1204 	vap->va_size = 0;
1205 	vap->va_blocksize = PAGE_SIZE;
1206 	vap->va_bytes = vap->va_size = 0;
1207 	vap->va_atime = pn->mn_atime;
1208 	vap->va_mtime = pn->mn_mtime;
1209 	vap->va_ctime = pn->mn_ctime;
1210 	vap->va_birthtime = pn->mn_birth;
1211 	vap->va_gen = 0;
1212 	vap->va_flags = 0;
1213 	vap->va_rdev = NODEV;
1214 	vap->va_bytes = 0;
1215 	vap->va_filerev = 0;
1216 	return (error);
1217 }
1218 
1219 #if 0
1220 struct vop_setattr_args {
1221 	struct vop_generic_args a_gen;
1222 	struct vnode *a_vp;
1223 	struct vattr *a_vap;
1224 	struct ucred *a_cred;
1225 };
1226 #endif
1227 /*
1228  * Set attributes
1229  */
1230 static int
1231 mqfs_setattr(struct vop_setattr_args *ap)
1232 {
1233 	struct mqfs_node *pn;
1234 	struct vattr *vap;
1235 	struct vnode *vp;
1236 	struct thread *td;
1237 	int c, error;
1238 	uid_t uid;
1239 	gid_t gid;
1240 
1241 	td = curthread;
1242 	vap = ap->a_vap;
1243 	vp = ap->a_vp;
1244 	if ((vap->va_type != VNON) ||
1245 	    (vap->va_nlink != VNOVAL) ||
1246 	    (vap->va_fsid != VNOVAL) ||
1247 	    (vap->va_fileid != VNOVAL) ||
1248 	    (vap->va_blocksize != VNOVAL) ||
1249 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1250 	    (vap->va_rdev != VNOVAL) ||
1251 	    ((int)vap->va_bytes != VNOVAL) ||
1252 	    (vap->va_gen != VNOVAL)) {
1253 		return (EINVAL);
1254 	}
1255 
1256 	pn = VTON(vp);
1257 
1258 	error = c = 0;
1259 	if (vap->va_uid == (uid_t)VNOVAL)
1260 		uid = pn->mn_uid;
1261 	else
1262 		uid = vap->va_uid;
1263 	if (vap->va_gid == (gid_t)VNOVAL)
1264 		gid = pn->mn_gid;
1265 	else
1266 		gid = vap->va_gid;
1267 
1268 	if (uid != pn->mn_uid || gid != pn->mn_gid) {
1269 		/*
1270 		 * To modify the ownership of a file, must possess VADMIN
1271 		 * for that file.
1272 		 */
1273 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)))
1274 			return (error);
1275 
1276 		/*
1277 		 * XXXRW: Why is there a privilege check here: shouldn't the
1278 		 * check in VOP_ACCESS() be enough?  Also, are the group bits
1279 		 * below definitely right?
1280 		 */
1281 		if (((ap->a_cred->cr_uid != pn->mn_uid) || uid != pn->mn_uid ||
1282 		    (gid != pn->mn_gid && !groupmember(gid, ap->a_cred))) &&
1283 		    (error = priv_check(td, PRIV_MQ_ADMIN)) != 0)
1284 			return (error);
1285 		pn->mn_uid = uid;
1286 		pn->mn_gid = gid;
1287 		c = 1;
1288 	}
1289 
1290 	if (vap->va_mode != (mode_t)VNOVAL) {
1291 		if ((ap->a_cred->cr_uid != pn->mn_uid) &&
1292 		    (error = priv_check(td, PRIV_MQ_ADMIN)))
1293 			return (error);
1294 		pn->mn_mode = vap->va_mode;
1295 		c = 1;
1296 	}
1297 
1298 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1299 		/* See the comment in ufs_vnops::ufs_setattr(). */
1300 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)) &&
1301 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1302 		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td))))
1303 			return (error);
1304 		if (vap->va_atime.tv_sec != VNOVAL) {
1305 			pn->mn_atime = vap->va_atime;
1306 		}
1307 		if (vap->va_mtime.tv_sec != VNOVAL) {
1308 			pn->mn_mtime = vap->va_mtime;
1309 		}
1310 		c = 1;
1311 	}
1312 	if (c) {
1313 		vfs_timestamp(&pn->mn_ctime);
1314 	}
1315 	return (0);
1316 }
1317 
1318 #if 0
1319 struct vop_read_args {
1320 	struct vop_generic_args a_gen;
1321 	struct vnode *a_vp;
1322 	struct uio *a_uio;
1323 	int a_ioflag;
1324 	struct ucred *a_cred;
1325 };
1326 #endif
1327 
1328 /*
1329  * Read from a file
1330  */
1331 static int
1332 mqfs_read(struct vop_read_args *ap)
1333 {
1334 	char buf[80];
1335 	struct vnode *vp = ap->a_vp;
1336 	struct uio *uio = ap->a_uio;
1337 	struct mqfs_node *pn;
1338 	struct mqueue *mq;
1339 	int len, error;
1340 
1341 	if (vp->v_type != VREG)
1342 		return (EINVAL);
1343 
1344 	pn = VTON(vp);
1345 	mq = VTOMQ(vp);
1346 	snprintf(buf, sizeof(buf),
1347 		"QSIZE:%-10ld MAXMSG:%-10ld CURMSG:%-10ld MSGSIZE:%-10ld\n",
1348 		mq->mq_totalbytes,
1349 		mq->mq_maxmsg,
1350 		mq->mq_curmsgs,
1351 		mq->mq_msgsize);
1352 	buf[sizeof(buf)-1] = '\0';
1353 	len = strlen(buf);
1354 	error = uiomove_frombuf(buf, len, uio);
1355 	return (error);
1356 }
1357 
1358 #if 0
1359 struct vop_readdir_args {
1360 	struct vop_generic_args a_gen;
1361 	struct vnode *a_vp;
1362 	struct uio *a_uio;
1363 	struct ucred *a_cred;
1364 	int *a_eofflag;
1365 	int *a_ncookies;
1366 	u_long **a_cookies;
1367 };
1368 #endif
1369 
1370 /*
1371  * Return directory entries.
1372  */
1373 static int
1374 mqfs_readdir(struct vop_readdir_args *ap)
1375 {
1376 	struct vnode *vp;
1377 	struct mqfs_info *mi;
1378 	struct mqfs_node *pd;
1379 	struct mqfs_node *pn;
1380 	struct dirent entry;
1381 	struct uio *uio;
1382 	const void *pr_root;
1383 	int *tmp_ncookies = NULL;
1384 	off_t offset;
1385 	int error, i;
1386 
1387 	vp = ap->a_vp;
1388 	mi = VFSTOMQFS(vp->v_mount);
1389 	pd = VTON(vp);
1390 	uio = ap->a_uio;
1391 
1392 	if (vp->v_type != VDIR)
1393 		return (ENOTDIR);
1394 
1395 	if (uio->uio_offset < 0)
1396 		return (EINVAL);
1397 
1398 	if (ap->a_ncookies != NULL) {
1399 		tmp_ncookies = ap->a_ncookies;
1400 		*ap->a_ncookies = 0;
1401 		ap->a_ncookies = NULL;
1402         }
1403 
1404 	error = 0;
1405 	offset = 0;
1406 
1407 	pr_root = ap->a_cred->cr_prison->pr_root;
1408 	sx_xlock(&mi->mi_lock);
1409 
1410 	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
1411 		entry.d_reclen = sizeof(entry);
1412 
1413 		/*
1414 		 * Only show names within the same prison root directory
1415 		 * (or not associated with a prison, e.g. "." and "..").
1416 		 */
1417 		if (pn->mn_pr_root != NULL && pn->mn_pr_root != pr_root)
1418 			continue;
1419 		if (!pn->mn_fileno)
1420 			mqfs_fileno_alloc(mi, pn);
1421 		entry.d_fileno = pn->mn_fileno;
1422 		for (i = 0; i < MQFS_NAMELEN - 1 && pn->mn_name[i] != '\0'; ++i)
1423 			entry.d_name[i] = pn->mn_name[i];
1424 		entry.d_name[i] = 0;
1425 		entry.d_namlen = i;
1426 		switch (pn->mn_type) {
1427 		case mqfstype_root:
1428 		case mqfstype_dir:
1429 		case mqfstype_this:
1430 		case mqfstype_parent:
1431 			entry.d_type = DT_DIR;
1432 			break;
1433 		case mqfstype_file:
1434 			entry.d_type = DT_REG;
1435 			break;
1436 		case mqfstype_symlink:
1437 			entry.d_type = DT_LNK;
1438 			break;
1439 		default:
1440 			panic("%s has unexpected node type: %d", pn->mn_name,
1441 				pn->mn_type);
1442 		}
1443 		if (entry.d_reclen > uio->uio_resid)
1444                         break;
1445 		if (offset >= uio->uio_offset) {
1446 			error = vfs_read_dirent(ap, &entry, offset);
1447                         if (error)
1448                                 break;
1449                 }
1450                 offset += entry.d_reclen;
1451 	}
1452 	sx_xunlock(&mi->mi_lock);
1453 
1454 	uio->uio_offset = offset;
1455 
1456 	if (tmp_ncookies != NULL)
1457 		ap->a_ncookies = tmp_ncookies;
1458 
1459 	return (error);
1460 }
1461 
1462 #ifdef notyet
1463 
1464 #if 0
1465 struct vop_mkdir_args {
1466 	struct vnode *a_dvp;
1467 	struvt vnode **a_vpp;
1468 	struvt componentname *a_cnp;
1469 	struct vattr *a_vap;
1470 };
1471 #endif
1472 
1473 /*
1474  * Create a directory.
1475  */
1476 static int
1477 mqfs_mkdir(struct vop_mkdir_args *ap)
1478 {
1479 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1480 	struct componentname *cnp = ap->a_cnp;
1481 	struct mqfs_node *pd = VTON(ap->a_dvp);
1482 	struct mqfs_node *pn;
1483 	int error;
1484 
1485 	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
1486 		return (ENOTDIR);
1487 	sx_xlock(&mqfs->mi_lock);
1488 	if ((cnp->cn_flags & HASBUF) == 0)
1489 		panic("%s: no name", __func__);
1490 	pn = mqfs_create_dir(pd, cnp->cn_nameptr, cnp->cn_namelen,
1491 		ap->a_vap->cn_cred, ap->a_vap->va_mode);
1492 	if (pn != NULL)
1493 		mqnode_addref(pn);
1494 	sx_xunlock(&mqfs->mi_lock);
1495 	if (pn == NULL) {
1496 		error = ENOSPC;
1497 	} else {
1498 		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1499 		mqnode_release(pn);
1500 	}
1501 	return (error);
1502 }
1503 
1504 #if 0
1505 struct vop_rmdir_args {
1506 	struct vnode *a_dvp;
1507 	struct vnode *a_vp;
1508 	struct componentname *a_cnp;
1509 };
1510 #endif
1511 
1512 /*
1513  * Remove a directory.
1514  */
1515 static int
1516 mqfs_rmdir(struct vop_rmdir_args *ap)
1517 {
1518 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1519 	struct mqfs_node *pn = VTON(ap->a_vp);
1520 	struct mqfs_node *pt;
1521 
1522 	if (pn->mn_type != mqfstype_dir)
1523 		return (ENOTDIR);
1524 
1525 	sx_xlock(&mqfs->mi_lock);
1526 	if (pn->mn_deleted) {
1527 		sx_xunlock(&mqfs->mi_lock);
1528 		return (ENOENT);
1529 	}
1530 
1531 	pt = LIST_FIRST(&pn->mn_children);
1532 	pt = LIST_NEXT(pt, mn_sibling);
1533 	pt = LIST_NEXT(pt, mn_sibling);
1534 	if (pt != NULL) {
1535 		sx_xunlock(&mqfs->mi_lock);
1536 		return (ENOTEMPTY);
1537 	}
1538 	pt = pn->mn_parent;
1539 	pn->mn_parent = NULL;
1540 	pn->mn_deleted = 1;
1541 	LIST_REMOVE(pn, mn_sibling);
1542 	mqnode_release(pn);
1543 	mqnode_release(pt);
1544 	sx_xunlock(&mqfs->mi_lock);
1545 	cache_purge(ap->a_vp);
1546 	return (0);
1547 }
1548 
1549 #endif /* notyet */
1550 
1551 /*
1552  * See if this prison root is obsolete, and clean up associated queues if it is.
1553  */
1554 static int
1555 mqfs_prison_remove(void *obj, void *data __unused)
1556 {
1557 	const struct prison *pr = obj;
1558 	const struct prison *tpr;
1559 	struct mqfs_node *pn, *tpn;
1560 	int found;
1561 
1562 	found = 0;
1563 	TAILQ_FOREACH(tpr, &allprison, pr_list) {
1564 		if (tpr->pr_root == pr->pr_root && tpr != pr && tpr->pr_ref > 0)
1565 			found = 1;
1566 	}
1567 	if (!found) {
1568 		/*
1569 		 * No jails are rooted in this directory anymore,
1570 		 * so no queues should be either.
1571 		 */
1572 		sx_xlock(&mqfs_data.mi_lock);
1573 		LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
1574 		    mn_sibling, tpn) {
1575 			if (pn->mn_pr_root == pr->pr_root)
1576 				(void)do_unlink(pn, curthread->td_ucred);
1577 		}
1578 		sx_xunlock(&mqfs_data.mi_lock);
1579 	}
1580 	return (0);
1581 }
1582 
1583 /*
1584  * Allocate a message queue
1585  */
1586 static struct mqueue *
1587 mqueue_alloc(const struct mq_attr *attr)
1588 {
1589 	struct mqueue *mq;
1590 
1591 	if (curmq >= maxmq)
1592 		return (NULL);
1593 	mq = uma_zalloc(mqueue_zone, M_WAITOK | M_ZERO);
1594 	TAILQ_INIT(&mq->mq_msgq);
1595 	if (attr != NULL) {
1596 		mq->mq_maxmsg = attr->mq_maxmsg;
1597 		mq->mq_msgsize = attr->mq_msgsize;
1598 	} else {
1599 		mq->mq_maxmsg = default_maxmsg;
1600 		mq->mq_msgsize = default_msgsize;
1601 	}
1602 	mtx_init(&mq->mq_mutex, "mqueue lock", NULL, MTX_DEF);
1603 	knlist_init_mtx(&mq->mq_rsel.si_note, &mq->mq_mutex);
1604 	knlist_init_mtx(&mq->mq_wsel.si_note, &mq->mq_mutex);
1605 	atomic_add_int(&curmq, 1);
1606 	return (mq);
1607 }
1608 
1609 /*
1610  * Destroy a message queue
1611  */
1612 static void
1613 mqueue_free(struct mqueue *mq)
1614 {
1615 	struct mqueue_msg *msg;
1616 
1617 	while ((msg = TAILQ_FIRST(&mq->mq_msgq)) != NULL) {
1618 		TAILQ_REMOVE(&mq->mq_msgq, msg, msg_link);
1619 		free(msg, M_MQUEUEDATA);
1620 	}
1621 
1622 	mtx_destroy(&mq->mq_mutex);
1623 	seldrain(&mq->mq_rsel);
1624 	seldrain(&mq->mq_wsel);
1625 	knlist_destroy(&mq->mq_rsel.si_note);
1626 	knlist_destroy(&mq->mq_wsel.si_note);
1627 	uma_zfree(mqueue_zone, mq);
1628 	atomic_add_int(&curmq, -1);
1629 }
1630 
1631 /*
1632  * Load a message from user space
1633  */
1634 static struct mqueue_msg *
1635 mqueue_loadmsg(const char *msg_ptr, size_t msg_size, int msg_prio)
1636 {
1637 	struct mqueue_msg *msg;
1638 	size_t len;
1639 	int error;
1640 
1641 	len = sizeof(struct mqueue_msg) + msg_size;
1642 	msg = malloc(len, M_MQUEUEDATA, M_WAITOK);
1643 	error = copyin(msg_ptr, ((char *)msg) + sizeof(struct mqueue_msg),
1644 	    msg_size);
1645 	if (error) {
1646 		free(msg, M_MQUEUEDATA);
1647 		msg = NULL;
1648 	} else {
1649 		msg->msg_size = msg_size;
1650 		msg->msg_prio = msg_prio;
1651 	}
1652 	return (msg);
1653 }
1654 
1655 /*
1656  * Save a message to user space
1657  */
1658 static int
1659 mqueue_savemsg(struct mqueue_msg *msg, char *msg_ptr, int *msg_prio)
1660 {
1661 	int error;
1662 
1663 	error = copyout(((char *)msg) + sizeof(*msg), msg_ptr,
1664 		msg->msg_size);
1665 	if (error == 0 && msg_prio != NULL)
1666 		error = copyout(&msg->msg_prio, msg_prio, sizeof(int));
1667 	return (error);
1668 }
1669 
1670 /*
1671  * Free a message's memory
1672  */
1673 static __inline void
1674 mqueue_freemsg(struct mqueue_msg *msg)
1675 {
1676 	free(msg, M_MQUEUEDATA);
1677 }
1678 
1679 /*
1680  * Send a message. if waitok is false, thread will not be
1681  * blocked if there is no data in queue, otherwise, absolute
1682  * time will be checked.
1683  */
1684 int
1685 mqueue_send(struct mqueue *mq, const char *msg_ptr,
1686 	size_t msg_len, unsigned msg_prio, int waitok,
1687 	const struct timespec *abs_timeout)
1688 {
1689 	struct mqueue_msg *msg;
1690 	struct timespec ts, ts2;
1691 	struct timeval tv;
1692 	int error;
1693 
1694 	if (msg_prio >= MQ_PRIO_MAX)
1695 		return (EINVAL);
1696 	if (msg_len > mq->mq_msgsize)
1697 		return (EMSGSIZE);
1698 	msg = mqueue_loadmsg(msg_ptr, msg_len, msg_prio);
1699 	if (msg == NULL)
1700 		return (EFAULT);
1701 
1702 	/* O_NONBLOCK case */
1703 	if (!waitok) {
1704 		error = _mqueue_send(mq, msg, -1);
1705 		if (error)
1706 			goto bad;
1707 		return (0);
1708 	}
1709 
1710 	/* we allow a null timeout (wait forever) */
1711 	if (abs_timeout == NULL) {
1712 		error = _mqueue_send(mq, msg, 0);
1713 		if (error)
1714 			goto bad;
1715 		return (0);
1716 	}
1717 
1718 	/* send it before checking time */
1719 	error = _mqueue_send(mq, msg, -1);
1720 	if (error == 0)
1721 		return (0);
1722 
1723 	if (error != EAGAIN)
1724 		goto bad;
1725 
1726 	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1727 		error = EINVAL;
1728 		goto bad;
1729 	}
1730 	for (;;) {
1731 		ts2 = *abs_timeout;
1732 		getnanotime(&ts);
1733 		timespecsub(&ts2, &ts);
1734 		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1735 			error = ETIMEDOUT;
1736 			break;
1737 		}
1738 		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1739 		error = _mqueue_send(mq, msg, tvtohz(&tv));
1740 		if (error != ETIMEDOUT)
1741 			break;
1742 	}
1743 	if (error == 0)
1744 		return (0);
1745 bad:
1746 	mqueue_freemsg(msg);
1747 	return (error);
1748 }
1749 
1750 /*
1751  * Common routine to send a message
1752  */
1753 static int
1754 _mqueue_send(struct mqueue *mq, struct mqueue_msg *msg, int timo)
1755 {
1756 	struct mqueue_msg *msg2;
1757 	int error = 0;
1758 
1759 	mtx_lock(&mq->mq_mutex);
1760 	while (mq->mq_curmsgs >= mq->mq_maxmsg && error == 0) {
1761 		if (timo < 0) {
1762 			mtx_unlock(&mq->mq_mutex);
1763 			return (EAGAIN);
1764 		}
1765 		mq->mq_senders++;
1766 		error = msleep(&mq->mq_senders, &mq->mq_mutex,
1767 			    PCATCH, "mqsend", timo);
1768 		mq->mq_senders--;
1769 		if (error == EAGAIN)
1770 			error = ETIMEDOUT;
1771 	}
1772 	if (mq->mq_curmsgs >= mq->mq_maxmsg) {
1773 		mtx_unlock(&mq->mq_mutex);
1774 		return (error);
1775 	}
1776 	error = 0;
1777 	if (TAILQ_EMPTY(&mq->mq_msgq)) {
1778 		TAILQ_INSERT_HEAD(&mq->mq_msgq, msg, msg_link);
1779 	} else {
1780 		if (msg->msg_prio <= TAILQ_LAST(&mq->mq_msgq, msgq)->msg_prio) {
1781 			TAILQ_INSERT_TAIL(&mq->mq_msgq, msg, msg_link);
1782 		} else {
1783 			TAILQ_FOREACH(msg2, &mq->mq_msgq, msg_link) {
1784 				if (msg2->msg_prio < msg->msg_prio)
1785 					break;
1786 			}
1787 			TAILQ_INSERT_BEFORE(msg2, msg, msg_link);
1788 		}
1789 	}
1790 	mq->mq_curmsgs++;
1791 	mq->mq_totalbytes += msg->msg_size;
1792 	if (mq->mq_receivers)
1793 		wakeup_one(&mq->mq_receivers);
1794 	else if (mq->mq_notifier != NULL)
1795 		mqueue_send_notification(mq);
1796 	if (mq->mq_flags & MQ_RSEL) {
1797 		mq->mq_flags &= ~MQ_RSEL;
1798 		selwakeup(&mq->mq_rsel);
1799 	}
1800 	KNOTE_LOCKED(&mq->mq_rsel.si_note, 0);
1801 	mtx_unlock(&mq->mq_mutex);
1802 	return (0);
1803 }
1804 
1805 /*
1806  * Send realtime a signal to process which registered itself
1807  * successfully by mq_notify.
1808  */
1809 static void
1810 mqueue_send_notification(struct mqueue *mq)
1811 {
1812 	struct mqueue_notifier *nt;
1813 	struct thread *td;
1814 	struct proc *p;
1815 	int error;
1816 
1817 	mtx_assert(&mq->mq_mutex, MA_OWNED);
1818 	nt = mq->mq_notifier;
1819 	if (nt->nt_sigev.sigev_notify != SIGEV_NONE) {
1820 		p = nt->nt_proc;
1821 		error = sigev_findtd(p, &nt->nt_sigev, &td);
1822 		if (error) {
1823 			mq->mq_notifier = NULL;
1824 			return;
1825 		}
1826 		if (!KSI_ONQ(&nt->nt_ksi)) {
1827 			ksiginfo_set_sigev(&nt->nt_ksi, &nt->nt_sigev);
1828 			tdsendsignal(p, td, nt->nt_ksi.ksi_signo, &nt->nt_ksi);
1829 		}
1830 		PROC_UNLOCK(p);
1831 	}
1832 	mq->mq_notifier = NULL;
1833 }
1834 
1835 /*
1836  * Get a message. if waitok is false, thread will not be
1837  * blocked if there is no data in queue, otherwise, absolute
1838  * time will be checked.
1839  */
1840 int
1841 mqueue_receive(struct mqueue *mq, char *msg_ptr,
1842 	size_t msg_len, unsigned *msg_prio, int waitok,
1843 	const struct timespec *abs_timeout)
1844 {
1845 	struct mqueue_msg *msg;
1846 	struct timespec ts, ts2;
1847 	struct timeval tv;
1848 	int error;
1849 
1850 	if (msg_len < mq->mq_msgsize)
1851 		return (EMSGSIZE);
1852 
1853 	/* O_NONBLOCK case */
1854 	if (!waitok) {
1855 		error = _mqueue_recv(mq, &msg, -1);
1856 		if (error)
1857 			return (error);
1858 		goto received;
1859 	}
1860 
1861 	/* we allow a null timeout (wait forever). */
1862 	if (abs_timeout == NULL) {
1863 		error = _mqueue_recv(mq, &msg, 0);
1864 		if (error)
1865 			return (error);
1866 		goto received;
1867 	}
1868 
1869 	/* try to get a message before checking time */
1870 	error = _mqueue_recv(mq, &msg, -1);
1871 	if (error == 0)
1872 		goto received;
1873 
1874 	if (error != EAGAIN)
1875 		return (error);
1876 
1877 	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1878 		error = EINVAL;
1879 		return (error);
1880 	}
1881 
1882 	for (;;) {
1883 		ts2 = *abs_timeout;
1884 		getnanotime(&ts);
1885 		timespecsub(&ts2, &ts);
1886 		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1887 			error = ETIMEDOUT;
1888 			return (error);
1889 		}
1890 		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1891 		error = _mqueue_recv(mq, &msg, tvtohz(&tv));
1892 		if (error == 0)
1893 			break;
1894 		if (error != ETIMEDOUT)
1895 			return (error);
1896 	}
1897 
1898 received:
1899 	error = mqueue_savemsg(msg, msg_ptr, msg_prio);
1900 	if (error == 0) {
1901 		curthread->td_retval[0] = msg->msg_size;
1902 		curthread->td_retval[1] = 0;
1903 	}
1904 	mqueue_freemsg(msg);
1905 	return (error);
1906 }
1907 
1908 /*
1909  * Common routine to receive a message
1910  */
1911 static int
1912 _mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo)
1913 {
1914 	int error = 0;
1915 
1916 	mtx_lock(&mq->mq_mutex);
1917 	while ((*msg = TAILQ_FIRST(&mq->mq_msgq)) == NULL && error == 0) {
1918 		if (timo < 0) {
1919 			mtx_unlock(&mq->mq_mutex);
1920 			return (EAGAIN);
1921 		}
1922 		mq->mq_receivers++;
1923 		error = msleep(&mq->mq_receivers, &mq->mq_mutex,
1924 			    PCATCH, "mqrecv", timo);
1925 		mq->mq_receivers--;
1926 		if (error == EAGAIN)
1927 			error = ETIMEDOUT;
1928 	}
1929 	if (*msg != NULL) {
1930 		error = 0;
1931 		TAILQ_REMOVE(&mq->mq_msgq, *msg, msg_link);
1932 		mq->mq_curmsgs--;
1933 		mq->mq_totalbytes -= (*msg)->msg_size;
1934 		if (mq->mq_senders)
1935 			wakeup_one(&mq->mq_senders);
1936 		if (mq->mq_flags & MQ_WSEL) {
1937 			mq->mq_flags &= ~MQ_WSEL;
1938 			selwakeup(&mq->mq_wsel);
1939 		}
1940 		KNOTE_LOCKED(&mq->mq_wsel.si_note, 0);
1941 	}
1942 	if (mq->mq_notifier != NULL && mq->mq_receivers == 0 &&
1943 	    !TAILQ_EMPTY(&mq->mq_msgq)) {
1944 		mqueue_send_notification(mq);
1945 	}
1946 	mtx_unlock(&mq->mq_mutex);
1947 	return (error);
1948 }
1949 
1950 static __inline struct mqueue_notifier *
1951 notifier_alloc(void)
1952 {
1953 	return (uma_zalloc(mqnoti_zone, M_WAITOK | M_ZERO));
1954 }
1955 
1956 static __inline void
1957 notifier_free(struct mqueue_notifier *p)
1958 {
1959 	uma_zfree(mqnoti_zone, p);
1960 }
1961 
1962 static struct mqueue_notifier *
1963 notifier_search(struct proc *p, int fd)
1964 {
1965 	struct mqueue_notifier *nt;
1966 
1967 	LIST_FOREACH(nt, &p->p_mqnotifier, nt_link) {
1968 		if (nt->nt_ksi.ksi_mqd == fd)
1969 			break;
1970 	}
1971 	return (nt);
1972 }
1973 
1974 static __inline void
1975 notifier_insert(struct proc *p, struct mqueue_notifier *nt)
1976 {
1977 	LIST_INSERT_HEAD(&p->p_mqnotifier, nt, nt_link);
1978 }
1979 
1980 static __inline void
1981 notifier_delete(struct proc *p, struct mqueue_notifier *nt)
1982 {
1983 	LIST_REMOVE(nt, nt_link);
1984 	notifier_free(nt);
1985 }
1986 
1987 static void
1988 notifier_remove(struct proc *p, struct mqueue *mq, int fd)
1989 {
1990 	struct mqueue_notifier *nt;
1991 
1992 	mtx_assert(&mq->mq_mutex, MA_OWNED);
1993 	PROC_LOCK(p);
1994 	nt = notifier_search(p, fd);
1995 	if (nt != NULL) {
1996 		if (mq->mq_notifier == nt)
1997 			mq->mq_notifier = NULL;
1998 		sigqueue_take(&nt->nt_ksi);
1999 		notifier_delete(p, nt);
2000 	}
2001 	PROC_UNLOCK(p);
2002 }
2003 
2004 static int
2005 kern_kmq_open(struct thread *td, const char *upath, int flags, mode_t mode,
2006     const struct mq_attr *attr)
2007 {
2008 	char path[MQFS_NAMELEN + 1];
2009 	struct mqfs_node *pn;
2010 	struct filedesc *fdp;
2011 	struct file *fp;
2012 	struct mqueue *mq;
2013 	int fd, error, len, cmode;
2014 
2015 	fdp = td->td_proc->p_fd;
2016 	cmode = (((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT);
2017 	mq = NULL;
2018 	if ((flags & O_CREAT) != 0 && attr != NULL) {
2019 		if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > maxmsg)
2020 			return (EINVAL);
2021 		if (attr->mq_msgsize <= 0 || attr->mq_msgsize > maxmsgsize)
2022 			return (EINVAL);
2023 	}
2024 
2025 	error = copyinstr(upath, path, MQFS_NAMELEN + 1, NULL);
2026         if (error)
2027 		return (error);
2028 
2029 	/*
2030 	 * The first character of name must be a slash  (/) character
2031 	 * and the remaining characters of name cannot include any slash
2032 	 * characters.
2033 	 */
2034 	len = strlen(path);
2035 	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
2036 		return (EINVAL);
2037 
2038 	error = falloc(td, &fp, &fd, O_CLOEXEC);
2039 	if (error)
2040 		return (error);
2041 
2042 	sx_xlock(&mqfs_data.mi_lock);
2043 	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
2044 	if (pn == NULL) {
2045 		if (!(flags & O_CREAT)) {
2046 			error = ENOENT;
2047 		} else {
2048 			mq = mqueue_alloc(attr);
2049 			if (mq == NULL) {
2050 				error = ENFILE;
2051 			} else {
2052 				pn = mqfs_create_file(mqfs_data.mi_root,
2053 				         path + 1, len - 1, td->td_ucred,
2054 					 cmode);
2055 				if (pn == NULL) {
2056 					error = ENOSPC;
2057 					mqueue_free(mq);
2058 				}
2059 			}
2060 		}
2061 
2062 		if (error == 0) {
2063 			pn->mn_data = mq;
2064 		}
2065 	} else {
2066 		if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) {
2067 			error = EEXIST;
2068 		} else {
2069 			accmode_t accmode = 0;
2070 
2071 			if (flags & FREAD)
2072 				accmode |= VREAD;
2073 			if (flags & FWRITE)
2074 				accmode |= VWRITE;
2075 			error = vaccess(VREG, pn->mn_mode, pn->mn_uid,
2076 				    pn->mn_gid, accmode, td->td_ucred, NULL);
2077 		}
2078 	}
2079 
2080 	if (error) {
2081 		sx_xunlock(&mqfs_data.mi_lock);
2082 		fdclose(td, fp, fd);
2083 		fdrop(fp, td);
2084 		return (error);
2085 	}
2086 
2087 	mqnode_addref(pn);
2088 	sx_xunlock(&mqfs_data.mi_lock);
2089 
2090 	finit(fp, flags & (FREAD | FWRITE | O_NONBLOCK), DTYPE_MQUEUE, pn,
2091 	    &mqueueops);
2092 
2093 	td->td_retval[0] = fd;
2094 	fdrop(fp, td);
2095 	return (0);
2096 }
2097 
2098 /*
2099  * Syscall to open a message queue.
2100  */
2101 int
2102 sys_kmq_open(struct thread *td, struct kmq_open_args *uap)
2103 {
2104 	struct mq_attr attr;
2105 	int flags, error;
2106 
2107 	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
2108 		return (EINVAL);
2109 	flags = FFLAGS(uap->flags);
2110 	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2111 		error = copyin(uap->attr, &attr, sizeof(attr));
2112 		if (error)
2113 			return (error);
2114 	}
2115 	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2116 	    uap->attr != NULL ? &attr : NULL));
2117 }
2118 
2119 /*
2120  * Syscall to unlink a message queue.
2121  */
2122 int
2123 sys_kmq_unlink(struct thread *td, struct kmq_unlink_args *uap)
2124 {
2125 	char path[MQFS_NAMELEN+1];
2126 	struct mqfs_node *pn;
2127 	int error, len;
2128 
2129 	error = copyinstr(uap->path, path, MQFS_NAMELEN + 1, NULL);
2130         if (error)
2131 		return (error);
2132 
2133 	len = strlen(path);
2134 	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
2135 		return (EINVAL);
2136 
2137 	sx_xlock(&mqfs_data.mi_lock);
2138 	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
2139 	if (pn != NULL)
2140 		error = do_unlink(pn, td->td_ucred);
2141 	else
2142 		error = ENOENT;
2143 	sx_xunlock(&mqfs_data.mi_lock);
2144 	return (error);
2145 }
2146 
2147 typedef int (*_fgetf)(struct thread *, int, cap_rights_t *, struct file **);
2148 
2149 /*
2150  * Get message queue by giving file slot
2151  */
2152 static int
2153 _getmq(struct thread *td, int fd, cap_rights_t *rightsp, _fgetf func,
2154        struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq)
2155 {
2156 	struct mqfs_node *pn;
2157 	int error;
2158 
2159 	error = func(td, fd, rightsp, fpp);
2160 	if (error)
2161 		return (error);
2162 	if (&mqueueops != (*fpp)->f_ops) {
2163 		fdrop(*fpp, td);
2164 		return (EBADF);
2165 	}
2166 	pn = (*fpp)->f_data;
2167 	if (ppn)
2168 		*ppn = pn;
2169 	if (pmq)
2170 		*pmq = pn->mn_data;
2171 	return (0);
2172 }
2173 
2174 static __inline int
2175 getmq(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn,
2176 	struct mqueue **pmq)
2177 {
2178 	cap_rights_t rights;
2179 
2180 	return _getmq(td, fd, cap_rights_init(&rights, CAP_EVENT), fget,
2181 	    fpp, ppn, pmq);
2182 }
2183 
2184 static __inline int
2185 getmq_read(struct thread *td, int fd, struct file **fpp,
2186 	 struct mqfs_node **ppn, struct mqueue **pmq)
2187 {
2188 	cap_rights_t rights;
2189 
2190 	return _getmq(td, fd, cap_rights_init(&rights, CAP_READ), fget_read,
2191 	    fpp, ppn, pmq);
2192 }
2193 
2194 static __inline int
2195 getmq_write(struct thread *td, int fd, struct file **fpp,
2196 	struct mqfs_node **ppn, struct mqueue **pmq)
2197 {
2198 	cap_rights_t rights;
2199 
2200 	return _getmq(td, fd, cap_rights_init(&rights, CAP_WRITE), fget_write,
2201 	    fpp, ppn, pmq);
2202 }
2203 
2204 static int
2205 kern_kmq_setattr(struct thread *td, int mqd, const struct mq_attr *attr,
2206     struct mq_attr *oattr)
2207 {
2208 	struct mqueue *mq;
2209 	struct file *fp;
2210 	u_int oflag, flag;
2211 	int error;
2212 
2213 	if (attr != NULL && (attr->mq_flags & ~O_NONBLOCK) != 0)
2214 		return (EINVAL);
2215 	error = getmq(td, mqd, &fp, NULL, &mq);
2216 	if (error)
2217 		return (error);
2218 	oattr->mq_maxmsg  = mq->mq_maxmsg;
2219 	oattr->mq_msgsize = mq->mq_msgsize;
2220 	oattr->mq_curmsgs = mq->mq_curmsgs;
2221 	if (attr != NULL) {
2222 		do {
2223 			oflag = flag = fp->f_flag;
2224 			flag &= ~O_NONBLOCK;
2225 			flag |= (attr->mq_flags & O_NONBLOCK);
2226 		} while (atomic_cmpset_int(&fp->f_flag, oflag, flag) == 0);
2227 	} else
2228 		oflag = fp->f_flag;
2229 	oattr->mq_flags = (O_NONBLOCK & oflag);
2230 	fdrop(fp, td);
2231 	return (error);
2232 }
2233 
2234 int
2235 sys_kmq_setattr(struct thread *td, struct kmq_setattr_args *uap)
2236 {
2237 	struct mq_attr attr, oattr;
2238 	int error;
2239 
2240 	if (uap->attr != NULL) {
2241 		error = copyin(uap->attr, &attr, sizeof(attr));
2242 		if (error != 0)
2243 			return (error);
2244 	}
2245 	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2246 	    &oattr);
2247 	if (error == 0 && uap->oattr != NULL) {
2248 		bzero(oattr.__reserved, sizeof(oattr.__reserved));
2249 		error = copyout(&oattr, uap->oattr, sizeof(oattr));
2250 	}
2251 	return (error);
2252 }
2253 
2254 int
2255 sys_kmq_timedreceive(struct thread *td, struct kmq_timedreceive_args *uap)
2256 {
2257 	struct mqueue *mq;
2258 	struct file *fp;
2259 	struct timespec *abs_timeout, ets;
2260 	int error;
2261 	int waitok;
2262 
2263 	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2264 	if (error)
2265 		return (error);
2266 	if (uap->abs_timeout != NULL) {
2267 		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2268 		if (error != 0)
2269 			return (error);
2270 		abs_timeout = &ets;
2271 	} else
2272 		abs_timeout = NULL;
2273 	waitok = !(fp->f_flag & O_NONBLOCK);
2274 	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2275 		uap->msg_prio, waitok, abs_timeout);
2276 	fdrop(fp, td);
2277 	return (error);
2278 }
2279 
2280 int
2281 sys_kmq_timedsend(struct thread *td, struct kmq_timedsend_args *uap)
2282 {
2283 	struct mqueue *mq;
2284 	struct file *fp;
2285 	struct timespec *abs_timeout, ets;
2286 	int error, waitok;
2287 
2288 	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2289 	if (error)
2290 		return (error);
2291 	if (uap->abs_timeout != NULL) {
2292 		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2293 		if (error != 0)
2294 			return (error);
2295 		abs_timeout = &ets;
2296 	} else
2297 		abs_timeout = NULL;
2298 	waitok = !(fp->f_flag & O_NONBLOCK);
2299 	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2300 		uap->msg_prio, waitok, abs_timeout);
2301 	fdrop(fp, td);
2302 	return (error);
2303 }
2304 
2305 static int
2306 kern_kmq_notify(struct thread *td, int mqd, struct sigevent *sigev)
2307 {
2308 #ifdef CAPABILITIES
2309 	cap_rights_t rights;
2310 #endif
2311 	struct filedesc *fdp;
2312 	struct proc *p;
2313 	struct mqueue *mq;
2314 	struct file *fp, *fp2;
2315 	struct mqueue_notifier *nt, *newnt = NULL;
2316 	int error;
2317 
2318 	if (sigev != NULL) {
2319 		if (sigev->sigev_notify != SIGEV_SIGNAL &&
2320 		    sigev->sigev_notify != SIGEV_THREAD_ID &&
2321 		    sigev->sigev_notify != SIGEV_NONE)
2322 			return (EINVAL);
2323 		if ((sigev->sigev_notify == SIGEV_SIGNAL ||
2324 		    sigev->sigev_notify == SIGEV_THREAD_ID) &&
2325 		    !_SIG_VALID(sigev->sigev_signo))
2326 			return (EINVAL);
2327 	}
2328 	p = td->td_proc;
2329 	fdp = td->td_proc->p_fd;
2330 	error = getmq(td, mqd, &fp, NULL, &mq);
2331 	if (error)
2332 		return (error);
2333 again:
2334 	FILEDESC_SLOCK(fdp);
2335 	fp2 = fget_locked(fdp, mqd);
2336 	if (fp2 == NULL) {
2337 		FILEDESC_SUNLOCK(fdp);
2338 		error = EBADF;
2339 		goto out;
2340 	}
2341 #ifdef CAPABILITIES
2342 	error = cap_check(cap_rights(fdp, mqd),
2343 	    cap_rights_init(&rights, CAP_EVENT));
2344 	if (error) {
2345 		FILEDESC_SUNLOCK(fdp);
2346 		goto out;
2347 	}
2348 #endif
2349 	if (fp2 != fp) {
2350 		FILEDESC_SUNLOCK(fdp);
2351 		error = EBADF;
2352 		goto out;
2353 	}
2354 	mtx_lock(&mq->mq_mutex);
2355 	FILEDESC_SUNLOCK(fdp);
2356 	if (sigev != NULL) {
2357 		if (mq->mq_notifier != NULL) {
2358 			error = EBUSY;
2359 		} else {
2360 			PROC_LOCK(p);
2361 			nt = notifier_search(p, mqd);
2362 			if (nt == NULL) {
2363 				if (newnt == NULL) {
2364 					PROC_UNLOCK(p);
2365 					mtx_unlock(&mq->mq_mutex);
2366 					newnt = notifier_alloc();
2367 					goto again;
2368 				}
2369 			}
2370 
2371 			if (nt != NULL) {
2372 				sigqueue_take(&nt->nt_ksi);
2373 				if (newnt != NULL) {
2374 					notifier_free(newnt);
2375 					newnt = NULL;
2376 				}
2377 			} else {
2378 				nt = newnt;
2379 				newnt = NULL;
2380 				ksiginfo_init(&nt->nt_ksi);
2381 				nt->nt_ksi.ksi_flags |= KSI_INS | KSI_EXT;
2382 				nt->nt_ksi.ksi_code = SI_MESGQ;
2383 				nt->nt_proc = p;
2384 				nt->nt_ksi.ksi_mqd = mqd;
2385 				notifier_insert(p, nt);
2386 			}
2387 			nt->nt_sigev = *sigev;
2388 			mq->mq_notifier = nt;
2389 			PROC_UNLOCK(p);
2390 			/*
2391 			 * if there is no receivers and message queue
2392 			 * is not empty, we should send notification
2393 			 * as soon as possible.
2394 			 */
2395 			if (mq->mq_receivers == 0 &&
2396 			    !TAILQ_EMPTY(&mq->mq_msgq))
2397 				mqueue_send_notification(mq);
2398 		}
2399 	} else {
2400 		notifier_remove(p, mq, mqd);
2401 	}
2402 	mtx_unlock(&mq->mq_mutex);
2403 
2404 out:
2405 	fdrop(fp, td);
2406 	if (newnt != NULL)
2407 		notifier_free(newnt);
2408 	return (error);
2409 }
2410 
2411 int
2412 sys_kmq_notify(struct thread *td, struct kmq_notify_args *uap)
2413 {
2414 	struct sigevent ev, *evp;
2415 	int error;
2416 
2417 	if (uap->sigev == NULL) {
2418 		evp = NULL;
2419 	} else {
2420 		error = copyin(uap->sigev, &ev, sizeof(ev));
2421 		if (error != 0)
2422 			return (error);
2423 		evp = &ev;
2424 	}
2425 	return (kern_kmq_notify(td, uap->mqd, evp));
2426 }
2427 
2428 static void
2429 mqueue_fdclose(struct thread *td, int fd, struct file *fp)
2430 {
2431 	struct filedesc *fdp;
2432 	struct mqueue *mq;
2433 
2434 	fdp = td->td_proc->p_fd;
2435 	FILEDESC_LOCK_ASSERT(fdp);
2436 
2437 	if (fp->f_ops == &mqueueops) {
2438 		mq = FPTOMQ(fp);
2439 		mtx_lock(&mq->mq_mutex);
2440 		notifier_remove(td->td_proc, mq, fd);
2441 
2442 		/* have to wakeup thread in same process */
2443 		if (mq->mq_flags & MQ_RSEL) {
2444 			mq->mq_flags &= ~MQ_RSEL;
2445 			selwakeup(&mq->mq_rsel);
2446 		}
2447 		if (mq->mq_flags & MQ_WSEL) {
2448 			mq->mq_flags &= ~MQ_WSEL;
2449 			selwakeup(&mq->mq_wsel);
2450 		}
2451 		mtx_unlock(&mq->mq_mutex);
2452 	}
2453 }
2454 
2455 static void
2456 mq_proc_exit(void *arg __unused, struct proc *p)
2457 {
2458 	struct filedesc *fdp;
2459 	struct file *fp;
2460 	struct mqueue *mq;
2461 	int i;
2462 
2463 	fdp = p->p_fd;
2464 	FILEDESC_SLOCK(fdp);
2465 	for (i = 0; i < fdp->fd_nfiles; ++i) {
2466 		fp = fget_locked(fdp, i);
2467 		if (fp != NULL && fp->f_ops == &mqueueops) {
2468 			mq = FPTOMQ(fp);
2469 			mtx_lock(&mq->mq_mutex);
2470 			notifier_remove(p, FPTOMQ(fp), i);
2471 			mtx_unlock(&mq->mq_mutex);
2472 		}
2473 	}
2474 	FILEDESC_SUNLOCK(fdp);
2475 	KASSERT(LIST_EMPTY(&p->p_mqnotifier), ("mq notifiers left"));
2476 }
2477 
2478 static int
2479 mqf_poll(struct file *fp, int events, struct ucred *active_cred,
2480 	struct thread *td)
2481 {
2482 	struct mqueue *mq = FPTOMQ(fp);
2483 	int revents = 0;
2484 
2485 	mtx_lock(&mq->mq_mutex);
2486 	if (events & (POLLIN | POLLRDNORM)) {
2487 		if (mq->mq_curmsgs) {
2488 			revents |= events & (POLLIN | POLLRDNORM);
2489 		} else {
2490 			mq->mq_flags |= MQ_RSEL;
2491 			selrecord(td, &mq->mq_rsel);
2492  		}
2493 	}
2494 	if (events & POLLOUT) {
2495 		if (mq->mq_curmsgs < mq->mq_maxmsg)
2496 			revents |= POLLOUT;
2497 		else {
2498 			mq->mq_flags |= MQ_WSEL;
2499 			selrecord(td, &mq->mq_wsel);
2500 		}
2501 	}
2502 	mtx_unlock(&mq->mq_mutex);
2503 	return (revents);
2504 }
2505 
2506 static int
2507 mqf_close(struct file *fp, struct thread *td)
2508 {
2509 	struct mqfs_node *pn;
2510 
2511 	fp->f_ops = &badfileops;
2512 	pn = fp->f_data;
2513 	fp->f_data = NULL;
2514 	sx_xlock(&mqfs_data.mi_lock);
2515 	mqnode_release(pn);
2516 	sx_xunlock(&mqfs_data.mi_lock);
2517 	return (0);
2518 }
2519 
2520 static int
2521 mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
2522 	struct thread *td)
2523 {
2524 	struct mqfs_node *pn = fp->f_data;
2525 
2526 	bzero(st, sizeof *st);
2527 	sx_xlock(&mqfs_data.mi_lock);
2528 	st->st_atim = pn->mn_atime;
2529 	st->st_mtim = pn->mn_mtime;
2530 	st->st_ctim = pn->mn_ctime;
2531 	st->st_birthtim = pn->mn_birth;
2532 	st->st_uid = pn->mn_uid;
2533 	st->st_gid = pn->mn_gid;
2534 	st->st_mode = S_IFIFO | pn->mn_mode;
2535 	sx_xunlock(&mqfs_data.mi_lock);
2536 	return (0);
2537 }
2538 
2539 static int
2540 mqf_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
2541     struct thread *td)
2542 {
2543 	struct mqfs_node *pn;
2544 	int error;
2545 
2546 	error = 0;
2547 	pn = fp->f_data;
2548 	sx_xlock(&mqfs_data.mi_lock);
2549 	error = vaccess(VREG, pn->mn_mode, pn->mn_uid, pn->mn_gid, VADMIN,
2550 	    active_cred, NULL);
2551 	if (error != 0)
2552 		goto out;
2553 	pn->mn_mode = mode & ACCESSPERMS;
2554 out:
2555 	sx_xunlock(&mqfs_data.mi_lock);
2556 	return (error);
2557 }
2558 
2559 static int
2560 mqf_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
2561     struct thread *td)
2562 {
2563 	struct mqfs_node *pn;
2564 	int error;
2565 
2566 	error = 0;
2567 	pn = fp->f_data;
2568 	sx_xlock(&mqfs_data.mi_lock);
2569 	if (uid == (uid_t)-1)
2570 		uid = pn->mn_uid;
2571 	if (gid == (gid_t)-1)
2572 		gid = pn->mn_gid;
2573 	if (((uid != pn->mn_uid && uid != active_cred->cr_uid) ||
2574 	    (gid != pn->mn_gid && !groupmember(gid, active_cred))) &&
2575 	    (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN, 0)))
2576 		goto out;
2577 	pn->mn_uid = uid;
2578 	pn->mn_gid = gid;
2579 out:
2580 	sx_xunlock(&mqfs_data.mi_lock);
2581 	return (error);
2582 }
2583 
2584 static int
2585 mqf_kqfilter(struct file *fp, struct knote *kn)
2586 {
2587 	struct mqueue *mq = FPTOMQ(fp);
2588 	int error = 0;
2589 
2590 	if (kn->kn_filter == EVFILT_READ) {
2591 		kn->kn_fop = &mq_rfiltops;
2592 		knlist_add(&mq->mq_rsel.si_note, kn, 0);
2593 	} else if (kn->kn_filter == EVFILT_WRITE) {
2594 		kn->kn_fop = &mq_wfiltops;
2595 		knlist_add(&mq->mq_wsel.si_note, kn, 0);
2596 	} else
2597 		error = EINVAL;
2598 	return (error);
2599 }
2600 
2601 static void
2602 filt_mqdetach(struct knote *kn)
2603 {
2604 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2605 
2606 	if (kn->kn_filter == EVFILT_READ)
2607 		knlist_remove(&mq->mq_rsel.si_note, kn, 0);
2608 	else if (kn->kn_filter == EVFILT_WRITE)
2609 		knlist_remove(&mq->mq_wsel.si_note, kn, 0);
2610 	else
2611 		panic("filt_mqdetach");
2612 }
2613 
2614 static int
2615 filt_mqread(struct knote *kn, long hint)
2616 {
2617 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2618 
2619 	mtx_assert(&mq->mq_mutex, MA_OWNED);
2620 	return (mq->mq_curmsgs != 0);
2621 }
2622 
2623 static int
2624 filt_mqwrite(struct knote *kn, long hint)
2625 {
2626 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2627 
2628 	mtx_assert(&mq->mq_mutex, MA_OWNED);
2629 	return (mq->mq_curmsgs < mq->mq_maxmsg);
2630 }
2631 
2632 static int
2633 mqf_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
2634 {
2635 
2636 	kif->kf_type = KF_TYPE_MQUEUE;
2637 	return (0);
2638 }
2639 
2640 static struct fileops mqueueops = {
2641 	.fo_read		= invfo_rdwr,
2642 	.fo_write		= invfo_rdwr,
2643 	.fo_truncate		= invfo_truncate,
2644 	.fo_ioctl		= invfo_ioctl,
2645 	.fo_poll		= mqf_poll,
2646 	.fo_kqfilter		= mqf_kqfilter,
2647 	.fo_stat		= mqf_stat,
2648 	.fo_close		= mqf_close,
2649 	.fo_chmod		= mqf_chmod,
2650 	.fo_chown		= mqf_chown,
2651 	.fo_sendfile		= invfo_sendfile,
2652 	.fo_fill_kinfo		= mqf_fill_kinfo,
2653 };
2654 
2655 static struct vop_vector mqfs_vnodeops = {
2656 	.vop_default 		= &default_vnodeops,
2657 	.vop_access		= mqfs_access,
2658 	.vop_cachedlookup	= mqfs_lookup,
2659 	.vop_lookup		= vfs_cache_lookup,
2660 	.vop_reclaim		= mqfs_reclaim,
2661 	.vop_create		= mqfs_create,
2662 	.vop_remove		= mqfs_remove,
2663 	.vop_inactive		= mqfs_inactive,
2664 	.vop_open		= mqfs_open,
2665 	.vop_close		= mqfs_close,
2666 	.vop_getattr		= mqfs_getattr,
2667 	.vop_setattr		= mqfs_setattr,
2668 	.vop_read		= mqfs_read,
2669 	.vop_write		= VOP_EOPNOTSUPP,
2670 	.vop_readdir		= mqfs_readdir,
2671 	.vop_mkdir		= VOP_EOPNOTSUPP,
2672 	.vop_rmdir		= VOP_EOPNOTSUPP
2673 };
2674 
2675 static struct vfsops mqfs_vfsops = {
2676 	.vfs_init 		= mqfs_init,
2677 	.vfs_uninit		= mqfs_uninit,
2678 	.vfs_mount		= mqfs_mount,
2679 	.vfs_unmount		= mqfs_unmount,
2680 	.vfs_root		= mqfs_root,
2681 	.vfs_statfs		= mqfs_statfs,
2682 };
2683 
2684 static struct vfsconf mqueuefs_vfsconf = {
2685 	.vfc_version = VFS_VERSION,
2686 	.vfc_name = "mqueuefs",
2687 	.vfc_vfsops = &mqfs_vfsops,
2688 	.vfc_typenum = -1,
2689 	.vfc_flags = VFCF_SYNTHETIC
2690 };
2691 
2692 static struct syscall_helper_data mq_syscalls[] = {
2693 	SYSCALL_INIT_HELPER(kmq_open),
2694 	SYSCALL_INIT_HELPER(kmq_setattr),
2695 	SYSCALL_INIT_HELPER(kmq_timedsend),
2696 	SYSCALL_INIT_HELPER(kmq_timedreceive),
2697 	SYSCALL_INIT_HELPER(kmq_notify),
2698 	SYSCALL_INIT_HELPER(kmq_unlink),
2699 	SYSCALL_INIT_LAST
2700 };
2701 
2702 #ifdef COMPAT_FREEBSD32
2703 #include <compat/freebsd32/freebsd32.h>
2704 #include <compat/freebsd32/freebsd32_proto.h>
2705 #include <compat/freebsd32/freebsd32_signal.h>
2706 #include <compat/freebsd32/freebsd32_syscall.h>
2707 #include <compat/freebsd32/freebsd32_util.h>
2708 
2709 static void
2710 mq_attr_from32(const struct mq_attr32 *from, struct mq_attr *to)
2711 {
2712 
2713 	to->mq_flags = from->mq_flags;
2714 	to->mq_maxmsg = from->mq_maxmsg;
2715 	to->mq_msgsize = from->mq_msgsize;
2716 	to->mq_curmsgs = from->mq_curmsgs;
2717 }
2718 
2719 static void
2720 mq_attr_to32(const struct mq_attr *from, struct mq_attr32 *to)
2721 {
2722 
2723 	to->mq_flags = from->mq_flags;
2724 	to->mq_maxmsg = from->mq_maxmsg;
2725 	to->mq_msgsize = from->mq_msgsize;
2726 	to->mq_curmsgs = from->mq_curmsgs;
2727 }
2728 
2729 int
2730 freebsd32_kmq_open(struct thread *td, struct freebsd32_kmq_open_args *uap)
2731 {
2732 	struct mq_attr attr;
2733 	struct mq_attr32 attr32;
2734 	int flags, error;
2735 
2736 	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
2737 		return (EINVAL);
2738 	flags = FFLAGS(uap->flags);
2739 	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2740 		error = copyin(uap->attr, &attr32, sizeof(attr32));
2741 		if (error)
2742 			return (error);
2743 		mq_attr_from32(&attr32, &attr);
2744 	}
2745 	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2746 	    uap->attr != NULL ? &attr : NULL));
2747 }
2748 
2749 int
2750 freebsd32_kmq_setattr(struct thread *td, struct freebsd32_kmq_setattr_args *uap)
2751 {
2752 	struct mq_attr attr, oattr;
2753 	struct mq_attr32 attr32, oattr32;
2754 	int error;
2755 
2756 	if (uap->attr != NULL) {
2757 		error = copyin(uap->attr, &attr32, sizeof(attr32));
2758 		if (error != 0)
2759 			return (error);
2760 		mq_attr_from32(&attr32, &attr);
2761 	}
2762 	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2763 	    &oattr);
2764 	if (error == 0 && uap->oattr != NULL) {
2765 		mq_attr_to32(&oattr, &oattr32);
2766 		bzero(oattr32.__reserved, sizeof(oattr32.__reserved));
2767 		error = copyout(&oattr32, uap->oattr, sizeof(oattr32));
2768 	}
2769 	return (error);
2770 }
2771 
2772 int
2773 freebsd32_kmq_timedsend(struct thread *td,
2774     struct freebsd32_kmq_timedsend_args *uap)
2775 {
2776 	struct mqueue *mq;
2777 	struct file *fp;
2778 	struct timespec32 ets32;
2779 	struct timespec *abs_timeout, ets;
2780 	int error;
2781 	int waitok;
2782 
2783 	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2784 	if (error)
2785 		return (error);
2786 	if (uap->abs_timeout != NULL) {
2787 		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2788 		if (error != 0)
2789 			return (error);
2790 		CP(ets32, ets, tv_sec);
2791 		CP(ets32, ets, tv_nsec);
2792 		abs_timeout = &ets;
2793 	} else
2794 		abs_timeout = NULL;
2795 	waitok = !(fp->f_flag & O_NONBLOCK);
2796 	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2797 		uap->msg_prio, waitok, abs_timeout);
2798 	fdrop(fp, td);
2799 	return (error);
2800 }
2801 
2802 int
2803 freebsd32_kmq_timedreceive(struct thread *td,
2804     struct freebsd32_kmq_timedreceive_args *uap)
2805 {
2806 	struct mqueue *mq;
2807 	struct file *fp;
2808 	struct timespec32 ets32;
2809 	struct timespec *abs_timeout, ets;
2810 	int error, waitok;
2811 
2812 	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2813 	if (error)
2814 		return (error);
2815 	if (uap->abs_timeout != NULL) {
2816 		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2817 		if (error != 0)
2818 			return (error);
2819 		CP(ets32, ets, tv_sec);
2820 		CP(ets32, ets, tv_nsec);
2821 		abs_timeout = &ets;
2822 	} else
2823 		abs_timeout = NULL;
2824 	waitok = !(fp->f_flag & O_NONBLOCK);
2825 	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2826 		uap->msg_prio, waitok, abs_timeout);
2827 	fdrop(fp, td);
2828 	return (error);
2829 }
2830 
2831 int
2832 freebsd32_kmq_notify(struct thread *td, struct freebsd32_kmq_notify_args *uap)
2833 {
2834 	struct sigevent ev, *evp;
2835 	struct sigevent32 ev32;
2836 	int error;
2837 
2838 	if (uap->sigev == NULL) {
2839 		evp = NULL;
2840 	} else {
2841 		error = copyin(uap->sigev, &ev32, sizeof(ev32));
2842 		if (error != 0)
2843 			return (error);
2844 		error = convert_sigevent32(&ev32, &ev);
2845 		if (error != 0)
2846 			return (error);
2847 		evp = &ev;
2848 	}
2849 	return (kern_kmq_notify(td, uap->mqd, evp));
2850 }
2851 
2852 static struct syscall_helper_data mq32_syscalls[] = {
2853 	SYSCALL32_INIT_HELPER(freebsd32_kmq_open),
2854 	SYSCALL32_INIT_HELPER(freebsd32_kmq_setattr),
2855 	SYSCALL32_INIT_HELPER(freebsd32_kmq_timedsend),
2856 	SYSCALL32_INIT_HELPER(freebsd32_kmq_timedreceive),
2857 	SYSCALL32_INIT_HELPER(freebsd32_kmq_notify),
2858 	SYSCALL32_INIT_HELPER_COMPAT(kmq_unlink),
2859 	SYSCALL_INIT_LAST
2860 };
2861 #endif
2862 
2863 static int
2864 mqinit(void)
2865 {
2866 	int error;
2867 
2868 	error = syscall_helper_register(mq_syscalls, SY_THR_STATIC_KLD);
2869 	if (error != 0)
2870 		return (error);
2871 #ifdef COMPAT_FREEBSD32
2872 	error = syscall32_helper_register(mq32_syscalls, SY_THR_STATIC_KLD);
2873 	if (error != 0)
2874 		return (error);
2875 #endif
2876 	return (0);
2877 }
2878 
2879 static int
2880 mqunload(void)
2881 {
2882 
2883 #ifdef COMPAT_FREEBSD32
2884 	syscall32_helper_unregister(mq32_syscalls);
2885 #endif
2886 	syscall_helper_unregister(mq_syscalls);
2887 	return (0);
2888 }
2889 
2890 static int
2891 mq_modload(struct module *module, int cmd, void *arg)
2892 {
2893 	int error = 0;
2894 
2895 	error = vfs_modevent(module, cmd, arg);
2896 	if (error != 0)
2897 		return (error);
2898 
2899 	switch (cmd) {
2900 	case MOD_LOAD:
2901 		error = mqinit();
2902 		if (error != 0)
2903 			mqunload();
2904 		break;
2905 	case MOD_UNLOAD:
2906 		error = mqunload();
2907 		break;
2908 	default:
2909 		break;
2910 	}
2911 	return (error);
2912 }
2913 
2914 static moduledata_t mqueuefs_mod = {
2915 	"mqueuefs",
2916 	mq_modload,
2917 	&mqueuefs_vfsconf
2918 };
2919 DECLARE_MODULE(mqueuefs, mqueuefs_mod, SI_SUB_VFS, SI_ORDER_MIDDLE);
2920 MODULE_VERSION(mqueuefs, 1);
2921