xref: /freebsd/sys/kern/uipc_mqueue.c (revision 47bf2cf9fe86c5882b8d805f593e2def1a722f31)
1 /*-
2  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  * POSIX message queue implementation.
30  *
31  * 1) A mqueue filesystem can be mounted, each message queue appears
32  *    in mounted directory, user can change queue's permission and
33  *    ownership, or remove a queue. Manually creating a file in the
34  *    directory causes a message queue to be created in the kernel with
35  *    default message queue attributes applied and same name used, this
36  *    method is not advocated since mq_open syscall allows user to specify
37  *    different attributes. Also the file system can be mounted multiple
38  *    times at different mount points but shows same contents.
39  *
40  * 2) Standard POSIX message queue API. The syscalls do not use vfs layer,
41  *    but directly operate on internal data structure, this allows user to
42  *    use the IPC facility without having to mount mqueue file system.
43  */
44 
45 #include <sys/cdefs.h>
46 __FBSDID("$FreeBSD$");
47 
48 #include <sys/param.h>
49 #include <sys/kernel.h>
50 #include <sys/systm.h>
51 #include <sys/limits.h>
52 #include <sys/buf.h>
53 #include <sys/dirent.h>
54 #include <sys/event.h>
55 #include <sys/fcntl.h>
56 #include <sys/filedesc.h>
57 #include <sys/file.h>
58 #include <sys/limits.h>
59 #include <sys/lock.h>
60 #include <sys/malloc.h>
61 #include <sys/module.h>
62 #include <sys/mount.h>
63 #include <sys/mqueue.h>
64 #include <sys/mutex.h>
65 #include <sys/namei.h>
66 #include <sys/poll.h>
67 #include <sys/proc.h>
68 #include <sys/queue.h>
69 #include <sys/sysproto.h>
70 #include <sys/stat.h>
71 #include <sys/sysent.h>
72 #include <sys/syscall.h>
73 #include <sys/syscallsubr.h>
74 #include <sys/sx.h>
75 #include <sys/sysctl.h>
76 #include <sys/sysctl.h>
77 #include <sys/vnode.h>
78 #include <sys/sysctl.h>
79 #include <sys/taskqueue.h>
80 #include <sys/unistd.h>
81 #include <sys/vnode.h>
82 #include <machine/atomic.h>
83 
84 /*
85  * Limits and constants
86  */
87 #define	MQFS_NAMELEN		NAME_MAX
88 #define MQFS_DELEN		(8 + MQFS_NAMELEN)
89 
90 /* node types */
91 typedef enum {
92 	mqfstype_none = 0,
93 	mqfstype_root,
94 	mqfstype_dir,
95 	mqfstype_this,
96 	mqfstype_parent,
97 	mqfstype_file,
98 	mqfstype_symlink,
99 } mqfs_type_t;
100 
101 struct mqfs_node;
102 
103 /*
104  * mqfs_info: describes a mqfs instance
105  */
106 struct mqfs_info {
107 	struct sx		mi_lock;
108 	struct mqfs_node	*mi_root;
109 	struct unrhdr		*mi_unrhdr;
110 };
111 
112 struct mqfs_vdata {
113 	LIST_ENTRY(mqfs_vdata)	mv_link;
114 	struct mqfs_node	*mv_node;
115 	struct vnode		*mv_vnode;
116 	struct task		mv_task;
117 };
118 
119 /*
120  * mqfs_node: describes a node (file or directory) within a mqfs
121  */
122 struct mqfs_node {
123 	char			mn_name[MQFS_NAMELEN+1];
124 	struct mqfs_info	*mn_info;
125 	struct mqfs_node	*mn_parent;
126 	LIST_HEAD(,mqfs_node)	mn_children;
127 	LIST_ENTRY(mqfs_node)	mn_sibling;
128 	LIST_HEAD(,mqfs_vdata)	mn_vnodes;
129 	int			mn_refcount;
130 	mqfs_type_t		mn_type;
131 	int			mn_deleted;
132 	u_int32_t		mn_fileno;
133 	void			*mn_data;
134 	struct timespec		mn_birth;
135 	struct timespec		mn_ctime;
136 	struct timespec		mn_atime;
137 	struct timespec		mn_mtime;
138 	uid_t			mn_uid;
139 	gid_t			mn_gid;
140 	int			mn_mode;
141 };
142 
143 #define	VTON(vp)	(((struct mqfs_vdata *)((vp)->v_data))->mv_node)
144 #define VTOMQ(vp) 	((struct mqueue *)(VTON(vp)->mn_data))
145 #define	VFSTOMQFS(m)	((struct mqfs_info *)((m)->mnt_data))
146 #define	FPTOMQ(fp)	(((struct mqueue_user *) 		\
147 				(fp)->f_data)->mu_node->mn_data)
148 TAILQ_HEAD(msgq, mqueue_msg);
149 
150 struct mqueue;
151 struct mqueue_user
152 {
153 	struct sigevent	mu_sigev;
154 	struct ksiginfo	mu_ksi;
155 	struct mqfs_node	*mu_node;
156 	struct proc	*mu_proc;
157 };
158 
159 struct mqueue {
160 	struct mtx	mq_mutex;
161 	int		mq_flags;
162 	long		mq_maxmsg;
163 	long		mq_msgsize;
164 	long		mq_curmsgs;
165 	long		mq_totalbytes;
166 	struct msgq	mq_msgq;
167 	int		mq_receivers;
168 	int		mq_senders;
169 	struct selinfo	mq_rsel;
170 	struct selinfo	mq_wsel;
171 	struct mqueue_user	*mq_notifier;
172 };
173 
174 #define	MQ_RSEL		0x01
175 #define	MQ_WSEL		0x02
176 
177 struct mqueue_msg {
178 	TAILQ_ENTRY(mqueue_msg)	msg_link;
179 	unsigned int	msg_prio;
180 	unsigned int	msg_size;
181 	/* following real data... */
182 };
183 
184 SYSCTL_NODE(_kern, OID_AUTO, mqueue, CTLFLAG_RW, 0,
185 	"POSIX real time message queue");
186 
187 static int	default_maxmsg  = 10;
188 static int	default_msgsize = 1024;
189 
190 static int	maxmsg = 20;
191 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsg, CTLFLAG_RW,
192     &maxmsg, 0, "Default maximum messages in queue");
193 static int	maxmsgsize = 16384;
194 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsgsize, CTLFLAG_RW,
195     &maxmsgsize, 0, "Default maximum message size");
196 static int	maxmq = 100;
197 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmq, CTLFLAG_RW,
198     &maxmq, 0, "maximum message queues");
199 static int	curmq = 0;
200 SYSCTL_INT(_kern_mqueue, OID_AUTO, curmq, CTLFLAG_RW,
201     &curmq, 0, "current message queue number");
202 static int	unloadable = 0;
203 static MALLOC_DEFINE(M_MQUEUEDATA, "mqdata", "mqueue data");
204 
205 /* Only one instance per-system */
206 static struct mqfs_info		mqfs_data;
207 static uma_zone_t		mqnode_zone;
208 static uma_zone_t		mqueue_zone;
209 static uma_zone_t		mvdata_zone;
210 static uma_zone_t		mquser_zone;
211 static struct vop_vector	mqfs_vnodeops;
212 static struct fileops		mqueueops;
213 
214 /*
215  * Directory structure construction and manipulation
216  */
217 #ifdef notyet
218 static struct mqfs_node	*mqfs_create_dir(struct mqfs_node *parent,
219 	const char *name, int namelen);
220 #endif
221 
222 static struct mqfs_node	*mqfs_create_file(struct mqfs_node *parent,
223 	const char *name, int namelen);
224 struct mqfs_node	*mqfs_create_link(struct mqfs_node *parent,
225 	const char *name, int namelen);
226 static int	mqfs_destroy(struct mqfs_node *mn);
227 static void	mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn);
228 static void	mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn);
229 static int	mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn);
230 
231 /*
232  * Message queue construction and maniplation
233  */
234 static struct mqueue	*mqueue_alloc(const struct mq_attr *attr);
235 static void	mqueue_free(struct mqueue *mq);
236 static int	mqueue_send(struct mqueue *mq, const char *msg_ptr,
237 			size_t msg_len, unsigned msg_prio, int waitok,
238 			const struct timespec *abs_timeout);
239 static int	mqueue_receive(struct mqueue *mq, char *msg_ptr,
240 			size_t msg_len, unsigned *msg_prio, int waitok,
241 			const struct timespec *abs_timeout);
242 static int	_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg,
243 			int timo);
244 static int	_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg,
245 			int timo);
246 static void	mqueue_send_notification(struct mqueue *mq);
247 
248 /* kqueue filters */
249 static void	filt_mqdetach(struct knote *kn);
250 static int	filt_mqread(struct knote *kn, long hint);
251 static int	filt_mqwrite(struct knote *kn, long hint);
252 
253 struct filterops mq_rfiltops =
254 	{ 1, NULL, filt_mqdetach, filt_mqread };
255 struct filterops mq_wfiltops =
256 	{ 1, NULL, filt_mqdetach, filt_mqwrite };
257 
258 /*
259  * Initialize fileno bitmap
260  */
261 static void
262 mqfs_fileno_init(struct mqfs_info *mi)
263 {
264 	struct unrhdr *up;
265 
266 	up = new_unrhdr(1, INT_MAX, NULL);
267 	mi->mi_unrhdr = up;
268 }
269 
270 /*
271  * Tear down fileno bitmap
272  */
273 static void
274 mqfs_fileno_uninit(struct mqfs_info *mi)
275 {
276 	struct unrhdr *up;
277 
278 	up = mi->mi_unrhdr;
279 	mi->mi_unrhdr = NULL;
280 	delete_unrhdr(up);
281 
282 	uma_zdestroy(mqnode_zone);
283 	uma_zdestroy(mqueue_zone);
284 	uma_zdestroy(mvdata_zone);
285 	uma_zdestroy(mquser_zone);
286 }
287 
288 /*
289  * Allocate a file number
290  */
291 void
292 mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn)
293 {
294 	/* make sure our parent has a file number */
295 	if (mn->mn_parent && !mn->mn_parent->mn_fileno)
296 		mqfs_fileno_alloc(mi, mn->mn_parent);
297 
298 	switch (mn->mn_type) {
299 	case mqfstype_root:
300 	case mqfstype_dir:
301 	case mqfstype_file:
302 	case mqfstype_symlink:
303 		mn->mn_fileno = alloc_unr(mi->mi_unrhdr);
304 		break;
305 	case mqfstype_this:
306 		KASSERT(mn->mn_parent != NULL,
307 		    ("mqfstype_this node has no parent"));
308 		mn->mn_fileno = mn->mn_parent->mn_fileno;
309 		break;
310 	case mqfstype_parent:
311 		KASSERT(mn->mn_parent != NULL,
312 		    ("mqfstype_parent node has no parent"));
313 		if (mn->mn_parent == mi->mi_root) {
314 			mn->mn_fileno = mn->mn_parent->mn_fileno;
315 			break;
316 		}
317 		KASSERT(mn->mn_parent->mn_parent != NULL,
318 		    ("mqfstype_parent node has no grandparent"));
319 		mn->mn_fileno = mn->mn_parent->mn_parent->mn_fileno;
320 		break;
321 	default:
322 		KASSERT(0,
323 		    ("mqfs_fileno_alloc() called for unknown type node: %d",
324 			mn->mn_type));
325 		break;
326 	}
327 }
328 
329 /*
330  * Release a file number
331  */
332 void
333 mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn)
334 {
335 	switch (mn->mn_type) {
336 	case mqfstype_root:
337 	case mqfstype_dir:
338 	case mqfstype_file:
339 	case mqfstype_symlink:
340 		free_unr(mi->mi_unrhdr, mn->mn_fileno);
341 		break;
342 	case mqfstype_this:
343 	case mqfstype_parent:
344 		/* ignore these, as they don't "own" their file number */
345 		break;
346 	default:
347 		KASSERT(0,
348 		    ("mqfs_fileno_free() called for unknown type node: %d",
349 			mn->mn_type));
350 		break;
351 	}
352 }
353 
354 static __inline struct mqfs_node *
355 mqnode_alloc(void)
356 {
357 	return uma_zalloc(mqnode_zone, M_WAITOK | M_ZERO);
358 }
359 
360 static __inline void
361 mqnode_free(struct mqfs_node *node)
362 {
363 	uma_zfree(mqnode_zone, node);
364 }
365 
366 static __inline void
367 mqnode_addref(struct mqfs_node *node)
368 {
369 	atomic_fetchadd_int(&node->mn_refcount, 1);
370 }
371 
372 static __inline void
373 mqnode_release(struct mqfs_node *node)
374 {
375 	int old, exp;
376 
377 	old = atomic_fetchadd_int(&node->mn_refcount, -1);
378 	if (node->mn_type == mqfstype_dir ||
379 	    node->mn_type == mqfstype_root)
380 		exp = 3; /* include . and .. */
381 	else
382 		exp = 1;
383 	if (old == exp)
384 		mqfs_destroy(node);
385 }
386 
387 /*
388  * Add a node to a directory
389  */
390 static int
391 mqfs_add_node(struct mqfs_node *parent, struct mqfs_node *node)
392 {
393 	KASSERT(parent != NULL, ("%s(): parent is NULL", __func__));
394 	KASSERT(parent->mn_info != NULL,
395 	    ("%s(): parent has no mn_info", __func__));
396 	KASSERT(parent->mn_type == mqfstype_dir ||
397 	    parent->mn_type == mqfstype_root,
398 	    ("%s(): parent is not a directory", __func__));
399 
400 	node->mn_info = parent->mn_info;
401 	node->mn_parent = parent;
402 	LIST_INIT(&node->mn_children);
403 	LIST_INIT(&node->mn_vnodes);
404 	LIST_INSERT_HEAD(&parent->mn_children, node, mn_sibling);
405 	mqnode_addref(parent);
406 	return (0);
407 }
408 
409 /*
410  * Add . and .. to a directory
411  */
412 static int
413 mqfs_fixup_dir(struct mqfs_node *parent)
414 {
415 	struct mqfs_node *dir;
416 
417 	dir = mqnode_alloc();
418 	dir->mn_name[0] = '.';
419 	dir->mn_type = mqfstype_this;
420 	dir->mn_refcount = 1;
421 	if (mqfs_add_node(parent, dir) != 0) {
422 		mqnode_free(dir);
423 		return (-1);
424 	}
425 
426 	dir = mqnode_alloc();
427 	dir->mn_name[0] = dir->mn_name[1] = '.';
428 	dir->mn_type = mqfstype_parent;
429 	dir->mn_refcount = 1;
430 
431 	if (mqfs_add_node(parent, dir) != 0) {
432 		mqnode_free(dir);
433 		return (-1);
434 	}
435 
436 	return (0);
437 }
438 
439 #ifdef notyet
440 
441 /*
442  * Create a directory
443  */
444 struct mqfs_node *
445 mqfs_create_dir(struct mqfs_node *parent, const char *name, int namelen)
446 {
447 	struct mqfs_node *dir;
448 
449 	dir = mqnode_alloc();
450 	strncpy(dir->mn_name, name, namelen);
451 	dir->mn_type = mqfstype_dir;
452 	dir->mn_refcount = 1;
453 	if (mqfs_add_node(parent, dir) != 0) {
454 		mqnode_free(dir);
455 		return (NULL);
456 	}
457 
458 	if (mqfs_fixup_dir(dir) != 0) {
459 		mqfs_destroy(dir);
460 		return (NULL);
461 	}
462 
463 	return (dir);
464 }
465 #endif
466 
467 /*
468  * Create a file
469  */
470 struct mqfs_node *
471 mqfs_create_file(struct mqfs_node *parent, const char *name, int namelen)
472 {
473 	struct mqfs_node *node;
474 
475 	node = mqnode_alloc();
476 	strncpy(node->mn_name, name, namelen);
477 	node->mn_type = mqfstype_file;
478 	node->mn_refcount = 1;
479 
480 	if (mqfs_add_node(parent, node) != 0) {
481 		mqnode_free(node);
482 		return (NULL);
483 	}
484 	return (node);
485 }
486 
487 /*
488  * Create a symlink
489  */
490 struct mqfs_node *
491 mqfs_create_link(struct mqfs_node *parent, const char *name, int namelen)
492 {
493 	struct mqfs_node *node;
494 
495 	node = mqfs_create_file(parent, name, namelen);
496 	if (node == NULL)
497 		return (NULL);
498 	node->mn_type = mqfstype_symlink;
499 	return (node);
500 }
501 
502 /*
503  * Destroy a node or a tree of nodes
504  */
505 int
506 mqfs_destroy(struct mqfs_node *node)
507 {
508 	struct mqfs_node *parent;
509 
510 	KASSERT(node != NULL,
511 	    ("%s(): node is NULL", __func__));
512 	KASSERT(node->mn_info != NULL,
513 	    ("%s(): node has no mn_info", __func__));
514 
515 	/* destroy children */
516 	if (node->mn_type == mqfstype_dir || node->mn_type == mqfstype_root)
517 		while (! LIST_EMPTY(&node->mn_children))
518 			mqfs_destroy(LIST_FIRST(&node->mn_children));
519 
520 	/* unlink from parent */
521 	if ((parent = node->mn_parent) != NULL) {
522 		KASSERT(parent->mn_info == node->mn_info,
523 		    ("%s(): parent has different mn_info", __func__));
524 		LIST_REMOVE(node, mn_sibling);
525 	}
526 
527 	if (node->mn_fileno != 0)
528 		mqfs_fileno_free(node->mn_info, node);
529 	if (node->mn_data != NULL)
530 		mqueue_free(node->mn_data);
531 	mqnode_free(node);
532 	return (0);
533 }
534 
535 /*
536  * Mount a mqfs instance
537  */
538 static int
539 mqfs_mount(struct mount *mp, struct thread *td)
540 {
541 	struct statfs *sbp;
542 
543 	if (mp->mnt_flag & MNT_UPDATE)
544 		return (EOPNOTSUPP);
545 
546 	mp->mnt_data = &mqfs_data;
547 	mp->mnt_flag |= MNT_LOCAL;
548 	/* mp->mnt_kern_flag |= MNTK_MPSAFE; */
549 	vfs_getnewfsid(mp);
550 
551 	sbp = &mp->mnt_stat;
552 	vfs_mountedfrom(mp, "mqueue");
553 	sbp->f_bsize = PAGE_SIZE;
554 	sbp->f_iosize = PAGE_SIZE;
555 	sbp->f_blocks = 1;
556 	sbp->f_bfree = 0;
557 	sbp->f_bavail = 0;
558 	sbp->f_files = 1;
559 	sbp->f_ffree = 0;
560 	return (0);
561 }
562 
563 /*
564  * Unmount a mqfs instance
565  */
566 static int
567 mqfs_unmount(struct mount *mp, int mntflags, struct thread *td)
568 {
569 	int error;
570 
571 	error = vflush(mp, 0, (mntflags & MNT_FORCE) ?  FORCECLOSE : 0, td);
572 	return (error);
573 }
574 
575 /*
576  * Return a root vnode
577  */
578 static int
579 mqfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
580 {
581 	struct mqfs_info *mqfs;
582 	int ret;
583 
584 	mqfs = VFSTOMQFS(mp);
585 	sx_xlock(&mqfs->mi_lock);
586 	ret = mqfs_allocv(mp, vpp, mqfs->mi_root);
587 	sx_xunlock(&mqfs->mi_lock);
588 	return (ret);
589 }
590 
591 /*
592  * Return filesystem stats
593  */
594 static int
595 mqfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
596 {
597 	/* XXX update statistics */
598 	return (0);
599 }
600 
601 /*
602  * Initialize a mqfs instance
603  */
604 static int
605 mqfs_init(struct vfsconf *vfc)
606 {
607 	struct mqfs_node *root;
608 	struct mqfs_info *mi;
609 
610 	mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node),
611 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
612 	mqueue_zone = uma_zcreate("mqueue", sizeof(struct mqueue),
613 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
614 	mvdata_zone = uma_zcreate("mvdata",
615 		sizeof(struct mqfs_vdata), NULL, NULL, NULL,
616 		NULL, UMA_ALIGN_PTR, 0);
617 	mquser_zone = uma_zcreate("mquser", sizeof(struct mqueue_user),
618 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
619 	mi = &mqfs_data;
620 	sx_init(&mi->mi_lock, "mqfs lock");
621 	/* set up the root diretory */
622 	root = mqnode_alloc();
623 	root->mn_type = mqfstype_root;
624 	root->mn_refcount = 1;
625 	root->mn_name[0] = '/';
626 	root->mn_info = mi;
627 	LIST_INIT(&root->mn_children);
628 	LIST_INIT(&root->mn_vnodes);
629 	root->mn_mode = 01777;
630 	mi->mi_root = root;
631 	mqfs_fileno_init(mi);
632 	mqfs_fileno_alloc(mi, root);
633 	mqfs_fixup_dir(root);
634 	return (0);
635 }
636 
637 /*
638  * Destroy a mqfs instance
639  */
640 static int
641 mqfs_uninit(struct vfsconf *vfc)
642 {
643 	struct mqfs_info *mi;
644 
645 	if (!unloadable)
646 		return (EOPNOTSUPP);
647 	mi = &mqfs_data;
648 	mqfs_destroy(mi->mi_root);
649 	mi->mi_root = NULL;
650 	mqfs_fileno_uninit(mi);
651 	sx_destroy(&mi->mi_lock);
652 	return (0);
653 }
654 
655 /*
656  * task routine
657  */
658 static void
659 do_recycle(void *context, int pending __unused)
660 {
661 	struct vnode *vp = (struct vnode *)context;
662 
663 	vrecycle(vp, curthread);
664 	vdrop(vp);
665 }
666 
667 /*
668  * Allocate a vnode
669  */
670 int
671 mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn)
672 {
673 	struct mqfs_vdata *vd;
674 	int error;
675 
676 	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
677 		if (vd->mv_vnode->v_mount == mp)
678 			break;
679 	}
680 
681 	if (vd != NULL) {
682 		if (vget(vd->mv_vnode, 0, curthread) == 0) {
683 			*vpp = vd->mv_vnode;
684 			vn_lock(*vpp, LK_RETRY | LK_EXCLUSIVE,
685 			    curthread);
686 			return (0);
687 		}
688 		/* XXX if this can happen, we're in trouble */
689 	}
690 
691 	error = getnewvnode("mqueue", mp, &mqfs_vnodeops, vpp);
692 	if (error)
693 		return (error);
694 	vd = uma_zalloc(mvdata_zone, M_WAITOK);
695 	(*vpp)->v_data = vd;
696 	vd->mv_vnode = *vpp;
697 	vd->mv_node = pn;
698 	TASK_INIT(&vd->mv_task, 0, do_recycle, *vpp);
699 	LIST_INSERT_HEAD(&pn->mn_vnodes, vd, mv_link);
700 	mqnode_addref(pn);
701 	switch (pn->mn_type) {
702 	case mqfstype_root:
703 		(*vpp)->v_vflag = VV_ROOT;
704 		/* fall through */
705 	case mqfstype_dir:
706 	case mqfstype_this:
707 	case mqfstype_parent:
708 		(*vpp)->v_type = VDIR;
709 		break;
710 	case mqfstype_file:
711 		(*vpp)->v_type = VREG;
712 		break;
713 	case mqfstype_symlink:
714 		(*vpp)->v_type = VLNK;
715 		break;
716 	case mqfstype_none:
717 		KASSERT(0, ("mqfs_allocf called for null node\n"));
718 	default:
719 		panic("%s has unexpected type: %d", pn->mn_name, pn->mn_type);
720 	}
721 	vn_lock(*vpp, LK_RETRY | LK_EXCLUSIVE, curthread);
722 	return (0);
723 }
724 
725 /*
726  * Search a directory entry
727  */
728 static struct mqfs_node *
729 mqfs_search(struct mqfs_node *pd, const char *name, int len)
730 {
731 	struct mqfs_node *pn;
732 
733 	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
734 		if (strncmp(pn->mn_name, name, len) == 0)
735 			return (pn);
736 	}
737 	return (NULL);
738 }
739 
740 /*
741  * Look up a file or directory
742  */
743 static int
744 mqfs_lookupx(struct vop_cachedlookup_args *ap)
745 {
746 	struct componentname *cnp;
747 	struct vnode *dvp, **vpp;
748 	struct mqfs_node *pd;
749 	struct mqfs_node *pn;
750 	int nameiop, flags, error, namelen;
751 	char *pname;
752 	struct thread *td;
753 
754 	cnp = ap->a_cnp;
755 	vpp = ap->a_vpp;
756 	dvp = ap->a_dvp;
757 	pname = cnp->cn_nameptr;
758 	namelen = cnp->cn_namelen;
759 	td = cnp->cn_thread;
760 	flags = cnp->cn_flags;
761 	nameiop = cnp->cn_nameiop;
762 	pd = VTON(dvp);
763 	pn = NULL;
764 	*vpp = NULLVP;
765 
766 	if (dvp->v_type != VDIR)
767 		return (ENOTDIR);
768 
769 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread);
770 	if (error)
771 		return (error);
772 
773 	/* shortcut: check if the name is too long */
774 	if (cnp->cn_namelen >= MQFS_NAMELEN)
775 		return (ENOENT);
776 
777 	/* self */
778 	if (namelen == 1 && pname[0] == '.') {
779 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
780 			return (EINVAL);
781 		pn = pd;
782 		*vpp = dvp;
783 		VREF(dvp);
784 		return (0);
785 	}
786 
787 	/* parent */
788 	if (cnp->cn_flags & ISDOTDOT) {
789 		if (dvp->v_vflag & VV_ROOT)
790 			return (EIO);
791 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
792 			return (EINVAL);
793 		VOP_UNLOCK(dvp, 0, cnp->cn_thread);
794 		KASSERT(pd->mn_parent, ("non-root directory has no parent"));
795 		pn = pd->mn_parent;
796 		error = mqfs_allocv(dvp->v_mount, vpp, pn);
797 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
798 		return (error);
799 	}
800 
801 	/* named node */
802 	pn = mqfs_search(pd, pname, namelen);
803 
804 	/* found */
805 	if (pn != NULL) {
806 		/* DELETE */
807 		if (nameiop == DELETE && (flags & ISLASTCN)) {
808 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
809 			if (error)
810 				return (error);
811 			if (*vpp == dvp) {
812 				VREF(dvp);
813 				*vpp = dvp;
814 				return (0);
815 			}
816 		}
817 
818 		/* allocate vnode */
819 		error = mqfs_allocv(dvp->v_mount, vpp, pn);
820 		if (error == 0 && cnp->cn_flags & MAKEENTRY)
821 			cache_enter(dvp, *vpp, cnp);
822 		return (error);
823 	}
824 
825 	/* not found */
826 
827 	/* will create a new entry in the directory ? */
828 	if ((nameiop == CREATE || nameiop == RENAME) && (flags & LOCKPARENT)
829 	    && (flags & ISLASTCN)) {
830 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
831 		if (error)
832 			return (error);
833 		cnp->cn_flags |= SAVENAME;
834 		return (EJUSTRETURN);
835 	}
836 	return (ENOENT);
837 }
838 
839 #if 0
840 struct vop_lookup_args {
841 	struct vop_generic_args a_gen;
842 	struct vnode *a_dvp;
843 	struct vnode **a_vpp;
844 	struct componentname *a_cnp;
845 };
846 #endif
847 
848 /*
849  * vnode lookup operation
850  */
851 static int
852 mqfs_lookup(struct vop_cachedlookup_args *ap)
853 {
854 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
855 	int rc;
856 
857 	sx_xlock(&mqfs->mi_lock);
858 	rc = mqfs_lookupx(ap);
859 	sx_xunlock(&mqfs->mi_lock);
860 	return (rc);
861 }
862 
863 #if 0
864 struct vop_create_args {
865 	struct vnode *a_dvp;
866 	struct vnode **a_vpp;
867 	struct componentname *a_cnp;
868 	struct vattr *a_vap;
869 };
870 #endif
871 
872 /*
873  * vnode creation operation
874  */
875 static int
876 mqfs_create(struct vop_create_args *ap)
877 {
878 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
879 	struct componentname *cnp = ap->a_cnp;
880 	struct mqfs_node *pd;
881 	struct mqfs_node *pn;
882 	struct mqueue *mq;
883 	int error;
884 
885 	pd = VTON(ap->a_dvp);
886 	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
887 		return (ENOTDIR);
888 	mq = mqueue_alloc(NULL);
889 	if (mq == NULL)
890 		return (EAGAIN);
891 	sx_xlock(&mqfs->mi_lock);
892 #if 0
893 	/* named node */
894 	pn = mqfs_search(pd, cnp->cn_nameptr, cnp->cn_namelen);
895 	if (pn != NULL) {
896 		mqueue_free(mq);
897 		sx_xunlock(&mqfs->mi_lock);
898 		return (EEXIST);
899 	}
900 #else
901 	if ((cnp->cn_flags & HASBUF) == 0)
902 		panic("%s: no name", __func__);
903 #endif
904 	pn = mqfs_create_file(pd, cnp->cn_nameptr, cnp->cn_namelen);
905 	pn->mn_mode = ap->a_vap->va_mode;
906 	pn->mn_uid = cnp->cn_cred->cr_uid;
907 	pn->mn_gid = cnp->cn_cred->cr_gid;
908 	pn->mn_data = mq;
909 	getnanotime(&pn->mn_birth);
910 	pn->mn_ctime = pn->mn_atime = pn->mn_mtime = pn->mn_birth;
911 	/* node attribute */
912 	error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
913 	sx_xunlock(&mqfs->mi_lock);
914 	return (error);
915 }
916 
917 /*
918  * Remove an entry
919  */
920 static
921 int do_unlink(struct mqfs_node *pn, struct ucred *ucred)
922 {
923 	struct mqfs_node *parent;
924 	struct mqfs_vdata *vd;
925 	int error = 0;
926 
927 	sx_assert(&pn->mn_info->mi_lock, SX_LOCKED);
928 
929 	if (ucred->cr_uid != pn->mn_uid &&
930 	    (error = suser_cred(ucred, 0)) != 0)
931 		error = EACCES;
932 	else if (!pn->mn_deleted) {
933 		parent = pn->mn_parent;
934 		pn->mn_parent = NULL;
935 		pn->mn_deleted = 1;
936 		LIST_REMOVE(pn, mn_sibling);
937 		LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
938 			cache_purge(vd->mv_vnode);
939 			vhold(vd->mv_vnode);
940 			taskqueue_enqueue(taskqueue_thread, &vd->mv_task);
941 		}
942 		mqnode_release(pn);
943 		mqnode_release(parent);
944 	} else
945 		error = ENOENT;
946 	return (error);
947 }
948 
949 #if 0
950 struct vop_remove_args {
951 	struct vnode *a_dvp;
952 	struct vnode *a_vp;
953 	struct componentname *a_cnp;
954 };
955 #endif
956 
957 /*
958  * vnode removal operation
959  */
960 static int
961 mqfs_remove(struct vop_remove_args *ap)
962 {
963 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
964 	struct mqfs_node *pn;
965 	int error;
966 
967 	if (ap->a_vp->v_type == VDIR)
968                 return (EPERM);
969 	pn = VTON(ap->a_vp);
970 	sx_xlock(&mqfs->mi_lock);
971 	error = do_unlink(pn, ap->a_cnp->cn_cred);
972 	sx_xunlock(&mqfs->mi_lock);
973 	return (error);
974 }
975 
976 #if 0
977 struct vop_inactive_args {
978 	struct vnode *a_vp;
979 	struct thread *a_td;
980 };
981 #endif
982 
983 static int
984 mqfs_inactive(struct vop_inactive_args *ap)
985 {
986 	struct mqfs_node *pn = VTON(ap->a_vp);
987 
988 	if (pn->mn_deleted)
989 		vrecycle(ap->a_vp, ap->a_td);
990 	return (0);
991 }
992 
993 #if 0
994 struct vop_reclaim_args {
995 	struct vop_generic_args a_gen;
996 	struct vnode *a_vp;
997 	struct thread *a_td;
998 };
999 #endif
1000 
1001 static int
1002 mqfs_reclaim(struct vop_reclaim_args *ap)
1003 {
1004 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_vp->v_mount);
1005 	struct vnode *vp = ap->a_vp;
1006 	struct mqfs_node *pn;
1007 	struct mqfs_vdata *vd;
1008 
1009 	vd = vp->v_data;
1010 	pn = vd->mv_node;
1011 	sx_xlock(&mqfs->mi_lock);
1012 	vp->v_data = NULL;
1013 	LIST_REMOVE(vd, mv_link);
1014 	uma_zfree(mvdata_zone, vd);
1015 	mqnode_release(pn);
1016 	sx_xunlock(&mqfs->mi_lock);
1017 	return (0);
1018 }
1019 
1020 #if 0
1021 struct vop_open_args {
1022 	struct vop_generic_args a_gen;
1023 	struct vnode *a_vp;
1024 	int a_mode;
1025 	struct ucred *a_cred;
1026 	struct thread *a_td;
1027 	int a_fdidx;
1028 };
1029 #endif
1030 
1031 static int
1032 mqfs_open(struct vop_open_args *ap)
1033 {
1034 	return (0);
1035 }
1036 
1037 #if 0
1038 struct vop_close_args {
1039 	struct vop_generic_args a_gen;
1040 	struct vnode *a_vp;
1041 	int a_fflag;
1042 	struct ucred *a_cred;
1043 	struct thread *a_td;
1044 };
1045 #endif
1046 
1047 static int
1048 mqfs_close(struct vop_close_args *ap)
1049 {
1050 	return (0);
1051 }
1052 
1053 #if 0
1054 struct vop_access_args {
1055 	struct vop_generic_args a_gen;
1056 	struct vnode *a_vp;
1057 	int a_mode;
1058 	struct ucred *a_cred;
1059 	struct thread *a_td;
1060 };
1061 #endif
1062 
1063 /*
1064  * Verify permissions
1065  */
1066 static int
1067 mqfs_access(struct vop_access_args *ap)
1068 {
1069 	struct vnode *vp = ap->a_vp;
1070 	struct vattr vattr;
1071 	int error;
1072 
1073 	error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
1074 	if (error)
1075 		return (error);
1076 	error = vaccess(vp->v_type, vattr.va_mode, vattr.va_uid,
1077 	    vattr.va_gid, ap->a_mode, ap->a_cred, NULL);
1078 	return (error);
1079 }
1080 
1081 #if 0
1082 struct vop_getattr_args {
1083 	struct vop_generic_args a_gen;
1084 	struct vnode *a_vp;
1085 	struct vattr *a_vap;
1086 	struct ucred *a_cred;
1087 	struct thread *a_td;
1088 };
1089 #endif
1090 
1091 /*
1092  * Get file attributes
1093  */
1094 static int
1095 mqfs_getattr(struct vop_getattr_args *ap)
1096 {
1097 	struct vnode *vp = ap->a_vp;
1098 	struct mqfs_node *pn = VTON(vp);
1099 	struct vattr *vap = ap->a_vap;
1100 	int error = 0;
1101 
1102 	VATTR_NULL(vap);
1103 	vap->va_type = vp->v_type;
1104 	vap->va_mode = pn->mn_mode;
1105 	vap->va_nlink = 1;
1106 	vap->va_uid = pn->mn_uid;
1107 	vap->va_gid = pn->mn_gid;
1108 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
1109 	vap->va_fileid = pn->mn_fileno;
1110 	vap->va_size = 0;
1111 	vap->va_blocksize = PAGE_SIZE;
1112 	vap->va_bytes = vap->va_size = 0;
1113 	vap->va_atime = pn->mn_atime;
1114 	vap->va_mtime = pn->mn_mtime;
1115 	vap->va_ctime = pn->mn_ctime;
1116 	vap->va_birthtime = pn->mn_birth;
1117 	vap->va_gen = 0;
1118 	vap->va_flags = 0;
1119 	vap->va_rdev = 0;
1120 	vap->va_bytes = 0;
1121 	vap->va_filerev = 0;
1122 	vap->va_vaflags = 0;
1123 	return (error);
1124 }
1125 
1126 #if 0
1127 struct vop_setattr_args {
1128 	struct vop_generic_args a_gen;
1129 	struct vnode *a_vp;
1130 	struct vattr *a_vap;
1131 	struct ucred *a_cred;
1132 	struct thread *a_td;
1133 };
1134 #endif
1135 /*
1136  * Set attributes
1137  */
1138 static int
1139 mqfs_setattr(struct vop_setattr_args *ap)
1140 {
1141 	struct mqfs_node *pn;
1142 	struct vattr *vap;
1143 	struct vnode *vp;
1144 	int c, error;
1145 	uid_t uid;
1146 	gid_t gid;
1147 
1148 	vap = ap->a_vap;
1149 	vp = ap->a_vp;
1150 	if ((vap->va_type != VNON) ||
1151 	    (vap->va_nlink != VNOVAL) ||
1152 	    (vap->va_fsid != VNOVAL) ||
1153 	    (vap->va_fileid != VNOVAL) ||
1154 	    (vap->va_blocksize != VNOVAL) ||
1155 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1156 	    (vap->va_rdev != VNOVAL) ||
1157 	    ((int)vap->va_bytes != VNOVAL) ||
1158 	    (vap->va_gen != VNOVAL)) {
1159 		return (EINVAL);
1160 	}
1161 
1162 	pn = VTON(vp);
1163 
1164 	error = c = 0;
1165 	if (vap->va_uid == (uid_t)VNOVAL)
1166 		uid = pn->mn_uid;
1167 	else
1168 		uid = vap->va_uid;
1169 	if (vap->va_gid == (gid_t)VNOVAL)
1170 		gid = pn->mn_gid;
1171 	else
1172 		gid = vap->va_gid;
1173 
1174 	if (uid != pn->mn_uid || gid != pn->mn_gid) {
1175 		/*
1176 		 * To modify the ownership of a file, must possess VADMIN
1177 		 * for that file.
1178 		 */
1179 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td)))
1180 			return (error);
1181 		if (((ap->a_cred->cr_uid != pn->mn_uid) || uid != pn->mn_uid ||
1182 		    (gid != pn->mn_gid && !groupmember(gid, ap->a_cred))) &&
1183 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL))
1184                        != 0)
1185 			return (error);
1186 		pn->mn_uid = uid;
1187 		pn->mn_gid = gid;
1188 		c = 1;
1189 	}
1190 
1191 	if (vap->va_mode != (mode_t)VNOVAL) {
1192 		if ((ap->a_cred->cr_uid != pn->mn_uid) &&
1193 		    (error = suser_cred(ap->a_td->td_ucred, SUSER_ALLOWJAIL)))
1194 			return (error);
1195 		pn->mn_mode = vap->va_mode;
1196 		c = 1;
1197 	}
1198 
1199 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1200 		/* See the comment in ufs_vnops::ufs_setattr(). */
1201 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, ap->a_td)) &&
1202 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1203 		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, ap->a_td))))
1204 			return (error);
1205 		if (vap->va_atime.tv_sec != VNOVAL) {
1206 			pn->mn_atime = vap->va_atime;
1207 		}
1208 		if (vap->va_mtime.tv_sec != VNOVAL) {
1209 			pn->mn_mtime = vap->va_mtime;
1210 		}
1211 		c = 1;
1212 	}
1213 	if (c) {
1214 		vfs_timestamp(&pn->mn_ctime);
1215 	}
1216 	return (0);
1217 }
1218 
1219 #if 0
1220 struct vop_read_args {
1221 	struct vop_generic_args a_gen;
1222 	struct vnode *a_vp;
1223 	struct uio *a_uio;
1224 	int a_ioflag;
1225 	struct ucred *a_cred;
1226 };
1227 #endif
1228 
1229 /*
1230  * Read from a file
1231  */
1232 static int
1233 mqfs_read(struct vop_read_args *ap)
1234 {
1235 	char buf[80];
1236 	struct vnode *vp = ap->a_vp;
1237 	struct uio *uio = ap->a_uio;
1238 	struct mqfs_node *pn;
1239 	struct mqueue *mq;
1240 	int len, error;
1241 
1242 	if (vp->v_type != VREG)
1243 		return (EINVAL);
1244 
1245 	pn = VTON(vp);
1246 	mq = VTOMQ(vp);
1247 	snprintf(buf, sizeof(buf),
1248 		"QSIZE:%-10ld MAXMSG:%-10ld CURMSG:%-10ld MSGSIZE:%-10ld\n",
1249 		mq->mq_totalbytes,
1250 		mq->mq_maxmsg,
1251 		mq->mq_curmsgs,
1252 		mq->mq_msgsize);
1253 	buf[sizeof(buf)-1] = '\0';
1254 	len = strlen(buf);
1255 	error = uiomove_frombuf(buf, len, uio);
1256 	return (error);
1257 }
1258 
1259 #if 0
1260 struct vop_readdir_args {
1261 	struct vop_generic_args a_gen;
1262 	struct vnode *a_vp;
1263 	struct uio *a_uio;
1264 	struct ucred *a_cred;
1265 	int *a_eofflag;
1266 	int *a_ncookies;
1267 	u_long **a_cookies;
1268 };
1269 #endif
1270 
1271 /*
1272  * Return directory entries.
1273  */
1274 static int
1275 mqfs_readdir(struct vop_readdir_args *ap)
1276 {
1277 	struct vnode *vp;
1278 	struct mqfs_info *mi;
1279 	struct mqfs_node *pd;
1280 	struct mqfs_node *pn;
1281 	struct dirent entry;
1282 	struct uio *uio;
1283 	int *tmp_ncookies = NULL;
1284 	off_t offset;
1285 	int error, i;
1286 
1287 	vp = ap->a_vp;
1288 	mi = VFSTOMQFS(vp->v_mount);
1289 	pd = VTON(vp);
1290 	uio = ap->a_uio;
1291 
1292 	if (vp->v_type != VDIR)
1293 		return (ENOTDIR);
1294 
1295 	if (uio->uio_offset < 0)
1296 		return (EINVAL);
1297 
1298 	if (ap->a_ncookies != NULL) {
1299 		tmp_ncookies = ap->a_ncookies;
1300 		*ap->a_ncookies = 0;
1301 		ap->a_ncookies = NULL;
1302         }
1303 
1304 	error = 0;
1305 	offset = 0;
1306 
1307 	sx_xlock(&mi->mi_lock);
1308 
1309 	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
1310 		entry.d_reclen = sizeof(entry);
1311 		if (!pn->mn_fileno)
1312 			mqfs_fileno_alloc(mi, pn);
1313 		entry.d_fileno = pn->mn_fileno;
1314 		for (i = 0; i < MQFS_NAMELEN - 1 && pn->mn_name[i] != '\0'; ++i)
1315 			entry.d_name[i] = pn->mn_name[i];
1316 		entry.d_name[i] = 0;
1317 		entry.d_namlen = i;
1318 		switch (pn->mn_type) {
1319 		case mqfstype_root:
1320 		case mqfstype_dir:
1321 		case mqfstype_this:
1322 		case mqfstype_parent:
1323 			entry.d_type = DT_DIR;
1324 			break;
1325 		case mqfstype_file:
1326 			entry.d_type = DT_REG;
1327 			break;
1328 		case mqfstype_symlink:
1329 			entry.d_type = DT_LNK;
1330 			break;
1331 		default:
1332 			panic("%s has unexpected node type: %d", pn->mn_name,
1333 				pn->mn_type);
1334 		}
1335 		if (entry.d_reclen > uio->uio_resid)
1336                         break;
1337 		if (offset >= uio->uio_offset) {
1338 			error = vfs_read_dirent(ap, &entry, offset);
1339                         if (error)
1340                                 break;
1341                 }
1342                 offset += entry.d_reclen;
1343 	}
1344 	sx_xunlock(&mi->mi_lock);
1345 
1346 	uio->uio_offset = offset;
1347 
1348 	if (tmp_ncookies != NULL)
1349 		ap->a_ncookies = tmp_ncookies;
1350 
1351 	return (error);
1352 }
1353 
1354 #ifdef notyet
1355 
1356 #if 0
1357 struct vop_mkdir_args {
1358 	struct vnode *a_dvp;
1359 	struvt vnode **a_vpp;
1360 	struvt componentname *a_cnp;
1361 	struct vattr *a_vap;
1362 };
1363 #endif
1364 
1365 /*
1366  * Create a directory.
1367  */
1368 static int
1369 mqfs_mkdir(struct vop_mkdir_args *ap)
1370 {
1371 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1372 	struct componentname *cnp = ap->a_cnp;
1373 	struct mqfs_node *pd = VTON(ap->a_dvp);
1374 	struct mqfs_node *pn;
1375 	int error;
1376 
1377 	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
1378 		return (ENOTDIR);
1379 	sx_xlock(&mqfs->mi_lock);
1380 #if 0
1381 	/* named node */
1382 	pn = mqfs_search(pd, cnp->cn_nameptr, cnp->cn_namelen);
1383 	if (pn != NULL) {
1384 		sx_xunlock(&mqfs->mi_lock);
1385 		return (EEXIST);
1386 	}
1387 #else
1388 	if ((cnp->cn_flags & HASBUF) == 0)
1389 		panic("%s: no name", __func__);
1390 #endif
1391 	pn = mqfs_create_dir(pd, cnp->cn_nameptr, cnp->cn_namelen);
1392 	pn->mn_mode = ap->a_vap->va_mode;
1393 	pn->mn_uid = cnp->cn_cred->cr_uid;
1394 	pn->mn_gid = cnp->cn_cred->cr_gid;
1395 	getnanotime(&pn->mn_birth);
1396 	pn->mn_ctime = pn->mn_atime = pn->mn_mtime = pn->mn_birth;
1397 	/* node attribute */
1398 	error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1399 	sx_xunlock(&mqfs->mi_lock);
1400 	return (error);
1401 }
1402 
1403 #if 0
1404 struct vop_rmdir_args {
1405 	struct vnode *a_dvp;
1406 	struct vnode *a_vp;
1407 	struct componentname *a_cnp;
1408 };
1409 #endif
1410 
1411 /*
1412  * Remove a directory.
1413  */
1414 static int
1415 mqfs_rmdir(struct vop_rmdir_args *ap)
1416 {
1417 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1418 	struct mqfs_node *pn = VTON(ap->a_vp);
1419 	struct mqfs_node *pt;
1420 
1421 	if (pn->mn_type != mqfstype_dir)
1422 		return (ENOTDIR);
1423 
1424 	sx_xlock(&mqfs->mi_lock);
1425 	if (pn->mn_deleted) {
1426 		sx_xunlock(&mqfs->mi_lock);
1427 		return (ENOENT);
1428 	}
1429 
1430 	pt = LIST_FIRST(&pn->mn_children);
1431 	pt = LIST_NEXT(pt, mn_sibling);
1432 	pt = LIST_NEXT(pt, mn_sibling);
1433 	if (pt != NULL) {
1434 		sx_xunlock(&mqfs->mi_lock);
1435 		return (ENOTEMPTY);
1436 	}
1437 	pt = pn->mn_parent;
1438 	pn->mn_parent = NULL;
1439 	pn->mn_deleted = 1;
1440 	LIST_REMOVE(pn, mn_sibling);
1441 	mqnode_release(pn);
1442 	mqnode_release(pt);
1443 	sx_xunlock(&mqfs->mi_lock);
1444 	cache_purge(ap->a_vp);
1445 	return (0);
1446 }
1447 
1448 #endif /* notyet */
1449 
1450 /*
1451  * Allocate a message queue
1452  */
1453 static struct mqueue *
1454 mqueue_alloc(const struct mq_attr *attr)
1455 {
1456 	struct mqueue *mq;
1457 
1458 	if (curmq >= maxmq)
1459 		return (NULL);
1460 	mq = uma_zalloc(mqueue_zone, M_WAITOK | M_ZERO);
1461 	TAILQ_INIT(&mq->mq_msgq);
1462 	if (attr != NULL) {
1463 		mq->mq_maxmsg = attr->mq_maxmsg;
1464 		mq->mq_msgsize = attr->mq_msgsize;
1465 	} else {
1466 		mq->mq_maxmsg = default_maxmsg;
1467 		mq->mq_msgsize = default_msgsize;
1468 	}
1469 	mtx_init(&mq->mq_mutex, "mqueue", NULL, MTX_DEF);
1470 	knlist_init(&mq->mq_rsel.si_note, &mq->mq_mutex, NULL, NULL, NULL);
1471 	knlist_init(&mq->mq_wsel.si_note, &mq->mq_mutex, NULL, NULL, NULL);
1472 	atomic_add_int(&curmq, 1);
1473 	return (mq);
1474 }
1475 
1476 /*
1477  * Destroy a message queue
1478  */
1479 static void
1480 mqueue_free(struct mqueue *mq)
1481 {
1482 	struct mqueue_msg *msg;
1483 
1484 	while ((msg = TAILQ_FIRST(&mq->mq_msgq)) != NULL) {
1485 		TAILQ_REMOVE(&mq->mq_msgq, msg, msg_link);
1486 		FREE(msg, M_MQUEUEDATA);
1487 	}
1488 
1489 	mtx_destroy(&mq->mq_mutex);
1490 	knlist_destroy(&mq->mq_rsel.si_note);
1491 	knlist_destroy(&mq->mq_wsel.si_note);
1492 	uma_zfree(mqueue_zone, mq);
1493 	atomic_add_int(&curmq, -1);
1494 }
1495 
1496 /*
1497  * Load a message from user space
1498  */
1499 static struct mqueue_msg *
1500 mqueue_loadmsg(const char *msg_ptr, size_t msg_size, int msg_prio)
1501 {
1502 	struct mqueue_msg *msg;
1503 	size_t len;
1504 	int error;
1505 
1506 	len = sizeof(struct mqueue_msg) + msg_size;
1507 	MALLOC(msg, struct mqueue_msg *, len, M_MQUEUEDATA, M_WAITOK);
1508 	error = copyin(msg_ptr, ((char *)msg) + sizeof(struct mqueue_msg),
1509 	    msg_size);
1510 	if (error) {
1511 		FREE(msg, M_MQUEUEDATA);
1512 		msg = NULL;
1513 	} else {
1514 		msg->msg_size = msg_size;
1515 		msg->msg_prio = msg_prio;
1516 	}
1517 	return (msg);
1518 }
1519 
1520 /*
1521  * Save a message to user space
1522  */
1523 static int
1524 mqueue_savemsg(struct mqueue_msg *msg, char *msg_ptr, int *msg_prio)
1525 {
1526 	int error;
1527 
1528 	error = copyout(((char *)msg) + sizeof(*msg), msg_ptr,
1529 		msg->msg_size);
1530 	if (error == 0)
1531 		error = copyout(&msg->msg_prio, msg_prio, sizeof(int));
1532 	return (error);
1533 }
1534 
1535 /*
1536  * Free a message's memory
1537  */
1538 static __inline void
1539 mqueue_freemsg(struct mqueue_msg *msg)
1540 {
1541 	FREE(msg, M_MQUEUEDATA);
1542 }
1543 
1544 /*
1545  * Send a message. if waitok is false, thread will not be
1546  * blocked if there is no data in queue, otherwise, absolute
1547  * time will be checked.
1548  */
1549 int
1550 mqueue_send(struct mqueue *mq, const char *msg_ptr,
1551 	size_t msg_len, unsigned msg_prio, int waitok,
1552 	const struct timespec *abs_timeout)
1553 {
1554 	struct mqueue_msg *msg;
1555 	struct timespec ets, ts, ts2;
1556 	struct timeval tv;
1557 	int error;
1558 
1559 	if (msg_len > mq->mq_msgsize)
1560 		return (EMSGSIZE);
1561 	msg = mqueue_loadmsg(msg_ptr, msg_len, msg_prio);
1562 	if (msg == NULL)
1563 		return (EFAULT);
1564 
1565 	/* O_NONBLOCK case */
1566 	if (!waitok) {
1567 		error = _mqueue_send(mq, msg, -1);
1568 		if (error)
1569 			goto bad;
1570 		return (0);
1571 	}
1572 
1573 	/* we allow a null timeout (wait forever) */
1574 	if (abs_timeout == NULL) {
1575 		error = _mqueue_send(mq, msg, 0);
1576 		if (error)
1577 			goto bad;
1578 		return (0);
1579 	}
1580 
1581 	/* send it before checking time */
1582 	error = _mqueue_send(mq, msg, -1);
1583 	if (error == 0)
1584 		return (0);
1585 
1586 	if (error != EAGAIN)
1587 		goto bad;
1588 
1589 	error = copyin(abs_timeout, &ets, sizeof(ets));
1590 	if (error != 0)
1591 		goto bad;
1592 	if (ets.tv_nsec >= 1000000000 || ets.tv_nsec < 0) {
1593 		error = EINVAL;
1594 		goto bad;
1595 	}
1596 	for (;;) {
1597 		ts2 = ets;
1598 		getnanouptime(&ts);
1599 		timespecsub(&ts2, &ts);
1600 		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1601 			error = ETIMEDOUT;
1602 			break;
1603 		}
1604 		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1605 		error = _mqueue_send(mq, msg, tvtohz(&tv));
1606 		if (error != ETIMEDOUT)
1607 			break;
1608 	}
1609 	if (error == 0)
1610 		return (0);
1611 bad:
1612 	mqueue_freemsg(msg);
1613 	return (error);
1614 }
1615 
1616 /*
1617  * Common routine to send a message
1618  */
1619 static int
1620 _mqueue_send(struct mqueue *mq, struct mqueue_msg *msg, int timo)
1621 {
1622 	struct mqueue_msg *msg2;
1623 	int error = 0;
1624 
1625 	mtx_lock(&mq->mq_mutex);
1626 	while (mq->mq_curmsgs >= mq->mq_maxmsg && error == 0) {
1627 		if (timo < 0) {
1628 			mtx_unlock(&mq->mq_mutex);
1629 			mqueue_freemsg(msg);
1630 			return (EAGAIN);
1631 		}
1632 		mq->mq_senders++;
1633 		error = msleep(&mq->mq_senders, &mq->mq_mutex,
1634 			    PSOCK | PCATCH, "mqsend", timo);
1635 		mq->mq_senders--;
1636 		if (error == EAGAIN)
1637 			error = ETIMEDOUT;
1638 	}
1639 	if (mq->mq_curmsgs >= mq->mq_maxmsg) {
1640 		mtx_unlock(&mq->mq_mutex);
1641 		return (error);
1642 	}
1643 	error = 0;
1644 	if (TAILQ_EMPTY(&mq->mq_msgq)) {
1645 		TAILQ_INSERT_HEAD(&mq->mq_msgq, msg, msg_link);
1646 	} else {
1647 		if (msg->msg_prio <= TAILQ_LAST(&mq->mq_msgq, msgq)->msg_prio) {
1648 			TAILQ_INSERT_TAIL(&mq->mq_msgq, msg, msg_link);
1649 		} else {
1650 			TAILQ_FOREACH(msg2, &mq->mq_msgq, msg_link) {
1651 				if (msg2->msg_prio < msg->msg_prio)
1652 					break;
1653 			}
1654 			TAILQ_INSERT_BEFORE(msg2, msg, msg_link);
1655 		}
1656 	}
1657 	mq->mq_curmsgs++;
1658 	mq->mq_totalbytes += msg->msg_size;
1659 	if (mq->mq_receivers)
1660 		wakeup_one(&mq->mq_receivers);
1661 	else if (mq->mq_notifier != NULL)
1662 		mqueue_send_notification(mq);
1663 	if (mq->mq_flags & MQ_RSEL) {
1664 		mq->mq_flags &= ~MQ_RSEL;
1665 		selwakeuppri(&mq->mq_rsel, PSOCK);
1666 	}
1667 	KNOTE_LOCKED(&mq->mq_rsel.si_note, 0);
1668 	mtx_unlock(&mq->mq_mutex);
1669 	return (0);
1670 }
1671 
1672 /*
1673  * Send realtime a signal to process which registered itself
1674  * successfully by mq_notify.
1675  */
1676 static void
1677 mqueue_send_notification(struct mqueue *mq)
1678 {
1679 	struct mqueue_user *mu;
1680 
1681 	mtx_assert(&mq->mq_mutex, MA_OWNED);
1682 	mu = mq->mq_notifier;
1683 	PROC_LOCK(mu->mu_proc);
1684 	if (!KSI_ONQ(&mu->mu_ksi))
1685 		psignal_event(mu->mu_proc, &mu->mu_sigev, &mu->mu_ksi);
1686 	PROC_UNLOCK(mu->mu_proc);
1687 	mq->mq_notifier = NULL;
1688 }
1689 
1690 /*
1691  * Get a message. if waitok is false, thread will not be
1692  * blocked if there is no data in queue, otherwise, absolute
1693  * time will be checked.
1694  */
1695 int
1696 mqueue_receive(struct mqueue *mq, char *msg_ptr,
1697 	size_t msg_len, unsigned *msg_prio, int waitok,
1698 	const struct timespec *abs_timeout)
1699 {
1700 	struct mqueue_msg *msg;
1701 	struct timespec ets, ts, ts2;
1702 	struct timeval tv;
1703 	int error;
1704 
1705 	if (msg_len < mq->mq_msgsize)
1706 		return (EMSGSIZE);
1707 
1708 	/* O_NONBLOCK case */
1709 	if (!waitok) {
1710 		error = _mqueue_recv(mq, &msg, -1);
1711 		if (error)
1712 			return (error);
1713 		goto received;
1714 	}
1715 
1716 	/* we allow a null timeout (wait forever). */
1717 	if (abs_timeout == NULL) {
1718 		error = _mqueue_recv(mq, &msg, 0);
1719 		if (error)
1720 			return (error);
1721 		goto received;
1722 	}
1723 
1724 	/* try to get a message before checking time */
1725 	error = _mqueue_recv(mq, &msg, -1);
1726 	if (error == 0)
1727 		goto received;
1728 
1729 	if (error != EAGAIN)
1730 		return (error);
1731 
1732 	error = copyin(abs_timeout, &ets, sizeof(ets));
1733 	if (error != 0)
1734 		return (error);
1735 	if (ets.tv_nsec >= 1000000000 || ets.tv_nsec < 0) {
1736 		error = EINVAL;
1737 		return (error);
1738 	}
1739 
1740 	for (;;) {
1741 		ts2 = ets;
1742 		getnanouptime(&ts);
1743 		timespecsub(&ts2, &ts);
1744 		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1745 			error = ETIMEDOUT;
1746 			return (error);
1747 		}
1748 		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1749 		error = _mqueue_recv(mq, &msg, tvtohz(&tv));
1750 		if (error == 0)
1751 			break;
1752 		if (error != ETIMEDOUT)
1753 			return (error);
1754 	}
1755 
1756 received:
1757 	error = mqueue_savemsg(msg, msg_ptr, msg_prio);
1758 	if (error == 0) {
1759 		curthread->td_retval[0] = msg->msg_size;
1760 		curthread->td_retval[1] = 0;
1761 	}
1762 	mqueue_freemsg(msg);
1763 	return (error);
1764 }
1765 
1766 /*
1767  * Common routine to receive a message
1768  */
1769 static int
1770 _mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo)
1771 {
1772 	int error = 0;
1773 
1774 	mtx_lock(&mq->mq_mutex);
1775 	while ((*msg = TAILQ_FIRST(&mq->mq_msgq)) == NULL && error == 0) {
1776 		if (timo < 0) {
1777 			mtx_unlock(&mq->mq_mutex);
1778 			return (EAGAIN);
1779 		}
1780 		mq->mq_receivers++;
1781 		error = msleep(&mq->mq_receivers, &mq->mq_mutex,
1782 			    PSOCK | PCATCH, "mqrecv", timo);
1783 		mq->mq_receivers--;
1784 		if (error == EAGAIN)
1785 			error = ETIMEDOUT;
1786 	}
1787 	if (*msg != NULL) {
1788 		error = 0;
1789 		TAILQ_REMOVE(&mq->mq_msgq, *msg, msg_link);
1790 		mq->mq_curmsgs--;
1791 		mq->mq_totalbytes -= (*msg)->msg_size;
1792 		if (mq->mq_senders)
1793 			wakeup_one(&mq->mq_senders);
1794 		if (mq->mq_flags & MQ_WSEL) {
1795 			mq->mq_flags &= ~MQ_WSEL;
1796 			selwakeuppri(&mq->mq_wsel, PSOCK);
1797 		}
1798 		KNOTE_LOCKED(&mq->mq_wsel.si_note, 0);
1799 	}
1800 	if (mq->mq_notifier != NULL && mq->mq_receivers == 0 &&
1801 	    !TAILQ_EMPTY(&mq->mq_msgq)) {
1802 		mqueue_send_notification(mq);
1803 	}
1804 	mtx_unlock(&mq->mq_mutex);
1805 	return (error);
1806 }
1807 
1808 static __inline struct mqueue_user *
1809 mquser_alloc(void)
1810 {
1811 	return (uma_zalloc(mquser_zone, M_WAITOK | M_ZERO));
1812 }
1813 
1814 static __inline void
1815 mquser_free(struct mqueue_user *p)
1816 {
1817 	uma_zfree(mquser_zone, p);
1818 }
1819 
1820 /*
1821  * Syscall to open a message queue
1822  */
1823 int
1824 mq_open(struct thread *td, struct mq_open_args *uap)
1825 {
1826 	char path[MQFS_NAMELEN+1];
1827 	struct mq_attr attr, *pattr;
1828 	struct mqfs_node *pn;
1829 	struct filedesc *fdp;
1830 	struct file *fp;
1831 	struct mqueue_user *mu;
1832 	struct mqueue *mq;
1833 	int fd, error, len, flags, cmode;
1834 
1835 	if ((uap->flags & O_ACCMODE) == O_ACCMODE)
1836 		return (EINVAL);
1837 
1838 	fdp = td->td_proc->p_fd;
1839 	flags = FFLAGS(uap->flags);
1840 	cmode = (((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT);
1841 	if ((flags & O_CREAT) && (uap->attr != NULL)) {
1842 		error = copyin(uap->attr, &attr, sizeof(attr));
1843 		if (error)
1844 			return (error);
1845 		if (attr.mq_maxmsg <= 0 || attr.mq_maxmsg > maxmsg)
1846 			return (EINVAL);
1847 		if (attr.mq_msgsize <= 0 || attr.mq_msgsize > maxmsgsize)
1848 			return (EINVAL);
1849 		pattr = &attr;
1850 	} else
1851 		pattr = NULL;
1852 
1853 	error = copyinstr(uap->path, path, MQFS_NAMELEN + 1, NULL);
1854         if (error)
1855                 return (error);
1856 
1857 	/*
1858 	 * The first character of name must be a slash  (/) character
1859 	 * and the remaining characters of name cannot include any slash
1860 	 * characters.
1861 	 */
1862 	len = strlen(path);
1863 	if (len < 2  || path[0] != '/' || index(path + 1, '/') != NULL)
1864 		return (EINVAL);
1865 
1866 	fdp = td->td_proc->p_fd;
1867 	error = falloc(td, &fp, &fd);
1868 	if (error)
1869 		return (error);
1870 
1871 	sx_xlock(&mqfs_data.mi_lock);
1872 	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1);
1873 	if (pn == NULL) {
1874 		if (!(flags & O_CREAT)) {
1875 			error = ENOENT;
1876 		} else {
1877 			mq = mqueue_alloc(pattr);
1878 			if (mq != NULL)
1879 				pn = mqfs_create_file(mqfs_data.mi_root,
1880 				         path + 1, len - 1);
1881 			if (pn == NULL) {
1882 				if (mq != NULL)
1883 					mqueue_free(mq);
1884 				error = ENOSPC;
1885 			}
1886 		}
1887 
1888 		if (error == 0) {
1889 			pn->mn_data = mq;
1890 			getnanotime(&pn->mn_birth);
1891 			pn->mn_ctime = pn->mn_atime = pn->mn_mtime
1892 			  = pn->mn_birth;
1893 			pn->mn_uid = td->td_ucred->cr_uid;
1894 			pn->mn_gid = td->td_ucred->cr_gid;
1895 			pn->mn_mode = cmode;
1896 		}
1897 	} else {
1898 		if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) {
1899 			error = EEXIST;
1900 		} else {
1901 			int acc_mode = 0;
1902 
1903 			if (flags & FREAD)
1904 				acc_mode |= VREAD;
1905 			if (flags & FWRITE)
1906 				acc_mode |= VWRITE;
1907 			error = vaccess(VREG, pn->mn_mode, pn->mn_uid,
1908 				    pn->mn_gid, acc_mode, td->td_ucred, NULL);
1909 		}
1910 	}
1911 
1912 	if (error) {
1913 		sx_xunlock(&mqfs_data.mi_lock);
1914 		fdclose(fdp, fp, fd, td);
1915 		fdrop(fp, td);
1916 		return (error);
1917 	}
1918 
1919 	mqnode_addref(pn);
1920 	sx_xunlock(&mqfs_data.mi_lock);
1921 
1922 	mu = mquser_alloc();
1923 	mu->mu_node = pn;
1924 	ksiginfo_init(&mu->mu_ksi);
1925 	mu->mu_ksi.ksi_flags |= KSI_INS | KSI_EXT;
1926 	mu->mu_ksi.ksi_code = SI_MESGQ;
1927 	mu->mu_proc = td->td_proc;
1928 	FILE_LOCK(fp);
1929 	fp->f_flag = (flags & (FREAD | FWRITE | O_NONBLOCK));
1930 	fp->f_type = DTYPE_MQUEUE;
1931 	fp->f_ops = &mqueueops;
1932 	fp->f_data = mu;
1933 	FILE_UNLOCK(fp);
1934 
1935 	FILEDESC_LOCK_FAST(fdp);
1936 	if (fdp->fd_ofiles[fd] == fp)
1937 		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
1938 	FILEDESC_UNLOCK_FAST(fdp);
1939 	td->td_retval[0] = fd;
1940 	fdrop(fp, td);
1941 	return (0);
1942 }
1943 
1944 /*
1945  * Syscall to unlink a message queue
1946  */
1947 int
1948 mq_unlink(struct thread *td, struct mq_unlink_args *uap)
1949 {
1950 	char path[MQFS_NAMELEN+1];
1951 	struct mqfs_node *pn;
1952 	int error, len;
1953 
1954 	error = copyinstr(uap->path, path, MQFS_NAMELEN + 1, NULL);
1955         if (error)
1956 		return (error);
1957 
1958 	len = strlen(path);
1959 	if (len < 2  || path[0] != '/' || index(path + 1, '/') != NULL)
1960 		return (EINVAL);
1961 
1962 	sx_xlock(&mqfs_data.mi_lock);
1963 	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1);
1964 	if (pn != NULL)
1965 		error = do_unlink(pn, td->td_ucred);
1966 	else
1967 		error = ENOENT;
1968 	sx_xunlock(&mqfs_data.mi_lock);
1969 	return (error);
1970 }
1971 
1972 typedef int (*_fgetf)(struct thread *, int, struct file **);
1973 
1974 /*
1975  * Get message queue by giving file slot
1976  */
1977 static int
1978 _getmq(struct thread *td, int fd, _fgetf func,
1979        struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq)
1980 {
1981 	struct mqueue_user *mu;
1982 	struct mqfs_node *pn;
1983 	int error;
1984 
1985 	error = func(td, fd, fpp);
1986 	if (error)
1987 		return (error);
1988 	if (&mqueueops != (*fpp)->f_ops) {
1989 		fdrop(*fpp, td);
1990 		return (EBADF);
1991 	}
1992 	mu = (*fpp)->f_data;
1993 	pn = mu->mu_node;
1994 	if (ppn)
1995 		*ppn = pn;
1996 	if (pmq)
1997 		*pmq = pn->mn_data;
1998 	return (0);
1999 }
2000 
2001 static __inline int
2002 getmq(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn,
2003 	struct mqueue **pmq)
2004 {
2005 	return _getmq(td, fd, fget, fpp, ppn, pmq);
2006 }
2007 
2008 static __inline int
2009 getmq_read(struct thread *td, int fd, struct file **fpp,
2010 	 struct mqfs_node **ppn, struct mqueue **pmq)
2011 {
2012 	return _getmq(td, fd, fget_read, fpp, ppn, pmq);
2013 }
2014 
2015 static __inline int
2016 getmq_write(struct thread *td, int fd, struct file **fpp,
2017 	struct mqfs_node **ppn, struct mqueue **pmq)
2018 {
2019 	return _getmq(td, fd, fget_write, fpp, ppn, pmq);
2020 }
2021 
2022 /*
2023  * Syscall
2024  */
2025 int
2026 mq_setattr(struct thread *td, struct mq_setattr_args *uap)
2027 {
2028 	struct mqueue *mq;
2029 	struct file *fp;
2030 	struct mq_attr attr, oattr;
2031 	int error;
2032 
2033 	if (uap->attr) {
2034 		error = copyin(uap->attr, &attr, sizeof(attr));
2035 		if (error)
2036 			return (error);
2037 		if (attr.mq_flags & ~O_NONBLOCK)
2038 			return (EINVAL);
2039 	}
2040 	error = getmq(td, uap->mqd, &fp, NULL, &mq);
2041 	if (error)
2042 		return (error);
2043 	oattr.mq_maxmsg  = mq->mq_maxmsg;
2044 	oattr.mq_msgsize = mq->mq_msgsize;
2045 	oattr.mq_curmsgs = mq->mq_curmsgs;
2046 	FILE_LOCK(fp);
2047 	oattr.mq_flags = (O_NONBLOCK & fp->f_flag);
2048 	if (uap->attr) {
2049 		fp->f_flag &= ~O_NONBLOCK;
2050 		fp->f_flag |= (attr.mq_flags & O_NONBLOCK);
2051 	}
2052 	FILE_UNLOCK(fp);
2053 	fdrop(fp, td);
2054 	if (uap->oattr)
2055 		error = copyout(&oattr, uap->oattr, sizeof(oattr));
2056 	return (error);
2057 }
2058 
2059 /*
2060  * Syscall
2061  */
2062 int
2063 mq_timedreceive(struct thread *td, struct mq_timedreceive_args *uap)
2064 {
2065 	struct mqueue *mq;
2066 	struct file *fp;
2067 	int error;
2068 	int waitok;
2069 
2070 	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2071 	if (error)
2072 		return (error);
2073 	waitok = !(fp->f_flag & O_NONBLOCK);
2074 	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2075 		uap->msg_prio, waitok, uap->abs_timeout);
2076 	fdrop(fp, td);
2077 	return (error);
2078 }
2079 
2080 /*
2081  * Syscall
2082  */
2083 int
2084 mq_timedsend(struct thread *td, struct mq_timedsend_args *uap)
2085 {
2086 	struct mqueue *mq;
2087 	struct file *fp;
2088 	int error, waitok;
2089 
2090 	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2091 	if (error)
2092 		return (error);
2093 	waitok = !(fp->f_flag & O_NONBLOCK);
2094 	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2095 		uap->msg_prio, waitok, uap->abs_timeout);
2096 	fdrop(fp, td);
2097 	return (error);
2098 }
2099 
2100 /*
2101  * Syscall
2102  */
2103 int
2104 mq_notify(struct thread *td, struct mq_notify_args *uap)
2105 {
2106 	struct sigevent ev;
2107 	struct mqueue_user *mu;
2108 	struct mqueue *mq;
2109 	struct file *fp;
2110 	int error;
2111 
2112 	if (uap->sigev) {
2113 		error = copyin(uap->sigev, &ev, sizeof(ev));
2114 		if (error)
2115 			return (error);
2116 		if (ev.sigev_notify != SIGEV_SIGNAL &&
2117 		    ev.sigev_notify != SIGEV_THREAD_ID)
2118 			return (EINVAL);
2119 		if ((ev.sigev_notify == SIGEV_SIGNAL ||
2120 		     ev.sigev_notify == SIGEV_THREAD_ID) &&
2121 			!_SIG_VALID(ev.sigev_signo))
2122 			return (EINVAL);
2123 	}
2124 	error = getmq(td, uap->mqd, &fp, NULL, &mq);
2125 	if (error)
2126 		return (error);
2127 	mu = fp->f_data;
2128 	mtx_lock(&mq->mq_mutex);
2129 	if (uap->sigev != NULL) {
2130 		if (mq->mq_notifier != NULL) {
2131 			error = EBUSY;
2132 		} else {
2133 			PROC_LOCK(td->td_proc);
2134 			sigqueue_take(&mu->mu_ksi);
2135 			PROC_UNLOCK(td->td_proc);
2136 			mq->mq_notifier = mu;
2137 			mu->mu_sigev = ev;
2138 			/*
2139 			 * if there is no receivers and message queue is not
2140 			 * empty, we should send notification as soon as
2141 			 * possible.
2142 			 */
2143 			if (mq->mq_receivers == 0 &&
2144 			    !TAILQ_EMPTY(&mq->mq_msgq))
2145 				mqueue_send_notification(mq);
2146 		}
2147 	} else {
2148 		if (mq->mq_notifier == mu)
2149 			mq->mq_notifier = NULL;
2150 		else
2151 			error = EPERM;
2152 	}
2153 	mtx_unlock(&mq->mq_mutex);
2154 	fdrop(fp, td);
2155 	return (error);
2156 }
2157 
2158 static int
2159 mqf_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
2160 	int flags, struct thread *td)
2161 {
2162 	return (EOPNOTSUPP);
2163 }
2164 
2165 static int
2166 mqf_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
2167 	int flags, struct thread *td)
2168 {
2169 	return (EOPNOTSUPP);
2170 }
2171 
2172 static int
2173 mqf_ioctl(struct file *fp, u_long cmd, void *data,
2174 	struct ucred *active_cred, struct thread *td)
2175 {
2176 	return (ENOTTY);
2177 }
2178 
2179 static int
2180 mqf_poll(struct file *fp, int events, struct ucred *active_cred,
2181 	struct thread *td)
2182 {
2183 	struct mqueue *mq = FPTOMQ(fp);
2184 	int revents = 0;
2185 
2186 	mtx_lock(&mq->mq_mutex);
2187 	if (events & (POLLIN | POLLRDNORM)) {
2188 		if (mq->mq_curmsgs) {
2189 			revents |= events & (POLLIN | POLLRDNORM);
2190 		} else {
2191 			mq->mq_flags |= MQ_RSEL;
2192 			selrecord(td, &mq->mq_rsel);
2193  		}
2194 	}
2195 	if (events & POLLOUT) {
2196 		if (mq->mq_curmsgs < mq->mq_maxmsg)
2197 			revents |= POLLOUT;
2198 		else {
2199 			mq->mq_flags |= MQ_WSEL;
2200 			selrecord(td, &mq->mq_wsel);
2201 		}
2202 	}
2203 	mtx_unlock(&mq->mq_mutex);
2204 	return (revents);
2205 }
2206 
2207 static int
2208 mqf_close(struct file *fp, struct thread *td)
2209 {
2210 	struct mqueue_user *mu;
2211 	struct mqfs_node *pn;
2212 	struct mqueue *mq;
2213 
2214 	FILE_LOCK(fp);
2215 	fp->f_ops = &badfileops;
2216 	FILE_UNLOCK(fp);
2217 	mu = fp->f_data;
2218 	fp->f_data = NULL;
2219 	pn = mu->mu_node;
2220 	mq = pn->mn_data;
2221 	mtx_lock(&mq->mq_mutex);
2222 	if (mq->mq_notifier == mu) {
2223 		PROC_LOCK(td->td_proc);
2224 		sigqueue_take(&mu->mu_ksi);
2225 		PROC_UNLOCK(td->td_proc);
2226 		mq->mq_notifier = NULL;
2227 	}
2228 	/* have to wakeup thread in same process */
2229 	if (mq->mq_flags & MQ_RSEL) {
2230 		mq->mq_flags &= ~MQ_RSEL;
2231 		selwakeuppri(&mq->mq_rsel, PSOCK);
2232 	}
2233 	if (mq->mq_flags & MQ_WSEL) {
2234 		mq->mq_flags &= ~MQ_WSEL;
2235 		selwakeuppri(&mq->mq_wsel, PSOCK);
2236 	}
2237 	mtx_unlock(&mq->mq_mutex);
2238 	sx_xlock(&mqfs_data.mi_lock);
2239 	mqnode_release(pn);
2240 	sx_xunlock(&mqfs_data.mi_lock);
2241 	mquser_free(mu);
2242 	return (0);
2243 }
2244 
2245 static int
2246 mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
2247 	struct thread *td)
2248 {
2249 	struct mqueue_user *mu = fp->f_data;
2250 	struct mqfs_node *pn = mu->mu_node;
2251 
2252 	bzero(st, sizeof *st);
2253 	st->st_atimespec = pn->mn_atime;
2254 	st->st_mtimespec = pn->mn_mtime;
2255 	st->st_ctimespec = pn->mn_ctime;
2256 	st->st_birthtimespec = pn->mn_birth;
2257 	st->st_uid = pn->mn_uid;
2258 	st->st_gid = pn->mn_gid;
2259 	st->st_mode = S_IFIFO | pn->mn_mode;
2260 	return (0);
2261 }
2262 
2263 static int
2264 mqf_kqfilter(struct file *fp, struct knote *kn)
2265 {
2266 	struct mqueue *mq = FPTOMQ(fp);
2267 	int error = 0;
2268 
2269 	if (kn->kn_filter == EVFILT_READ) {
2270 		kn->kn_fop = &mq_rfiltops;
2271 		knlist_add(&mq->mq_rsel.si_note, kn, 0);
2272 	} else if (kn->kn_filter == EVFILT_WRITE) {
2273 		kn->kn_fop = &mq_wfiltops;
2274 		knlist_add(&mq->mq_wsel.si_note, kn, 0);
2275 	} else
2276 		error = EINVAL;
2277 	return (error);
2278 }
2279 
2280 static void
2281 filt_mqdetach(struct knote *kn)
2282 {
2283 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2284 
2285 	if (kn->kn_filter == EVFILT_READ)
2286 		knlist_remove(&mq->mq_rsel.si_note, kn, 0);
2287 	else if (kn->kn_filter == EVFILT_WRITE)
2288 		knlist_remove(&mq->mq_wsel.si_note, kn, 0);
2289 	else
2290 		panic("filt_mqdetach");
2291 }
2292 
2293 static int
2294 filt_mqread(struct knote *kn, long hint)
2295 {
2296 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2297 
2298 	mtx_assert(&mq->mq_mutex, MA_OWNED);
2299 	return (mq->mq_curmsgs != 0);
2300 }
2301 
2302 static int
2303 filt_mqwrite(struct knote *kn, long hint)
2304 {
2305 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2306 
2307 	mtx_assert(&mq->mq_mutex, MA_OWNED);
2308 	return (mq->mq_curmsgs < mq->mq_maxmsg);
2309 }
2310 
2311 static struct fileops mqueueops = {
2312 	.fo_read		= mqf_read,
2313 	.fo_write		= mqf_write,
2314 	.fo_ioctl		= mqf_ioctl,
2315 	.fo_poll		= mqf_poll,
2316 	.fo_kqfilter		= mqf_kqfilter,
2317 	.fo_stat		= mqf_stat,
2318 	.fo_close		= mqf_close
2319 };
2320 
2321 static struct vop_vector mqfs_vnodeops = {
2322 	.vop_default 		= &default_vnodeops,
2323 	.vop_access		= mqfs_access,
2324 	.vop_cachedlookup	= mqfs_lookup,
2325 	.vop_lookup		= vfs_cache_lookup,
2326 	.vop_reclaim		= mqfs_reclaim,
2327 	.vop_create		= mqfs_create,
2328 	.vop_remove		= mqfs_remove,
2329 	.vop_inactive		= mqfs_inactive,
2330 	.vop_open		= mqfs_open,
2331 	.vop_close		= mqfs_close,
2332 	.vop_getattr		= mqfs_getattr,
2333 	.vop_setattr		= mqfs_setattr,
2334 	.vop_read		= mqfs_read,
2335 	.vop_write		= VOP_EOPNOTSUPP,
2336 	.vop_readdir		= mqfs_readdir,
2337 	.vop_mkdir		= VOP_EOPNOTSUPP,
2338 	.vop_rmdir		= VOP_EOPNOTSUPP
2339 };
2340 
2341 static struct vfsops mqfs_vfsops = {
2342 	.vfs_init 		= mqfs_init,
2343 	.vfs_uninit		= mqfs_uninit,
2344 	.vfs_mount		= mqfs_mount,
2345 	.vfs_unmount		= mqfs_unmount,
2346 	.vfs_root		= mqfs_root,
2347 	.vfs_statfs		= mqfs_statfs,
2348 };
2349 
2350 SYSCALL_MODULE_HELPER(mq_open);
2351 SYSCALL_MODULE_HELPER(mq_setattr);
2352 SYSCALL_MODULE_HELPER(mq_timedsend);
2353 SYSCALL_MODULE_HELPER(mq_timedreceive);
2354 SYSCALL_MODULE_HELPER(mq_notify);
2355 SYSCALL_MODULE_HELPER(mq_unlink);
2356 
2357 VFS_SET(mqfs_vfsops, mqueuefs, VFCF_SYNTHETIC);
2358 MODULE_VERSION(mqueuefs, 1);
2359