xref: /freebsd/sys/fs/cuse/cuse.c (revision 3ac125068a211377f0b3817c37cf1db95a87e8fb)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2013 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "opt_compat.h"
28 
29 #include <sys/stdint.h>
30 #include <sys/stddef.h>
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/conf.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/linker_set.h>
38 #include <sys/module.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/condvar.h>
42 #include <sys/sysctl.h>
43 #include <sys/unistd.h>
44 #include <sys/malloc.h>
45 #include <sys/priv.h>
46 #include <sys/uio.h>
47 #include <sys/poll.h>
48 #include <sys/sx.h>
49 #include <sys/queue.h>
50 #include <sys/fcntl.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/selinfo.h>
54 #include <sys/ptrace.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 
61 #include <fs/cuse/cuse_defs.h>
62 #include <fs/cuse/cuse_ioctl.h>
63 
64 MODULE_VERSION(cuse, 1);
65 
66 /*
67  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
68  * declaring support for the cuse4bsd interface in cuse.ko:
69  */
70 MODULE_VERSION(cuse4bsd, 1);
71 
72 #define	NBUSY	((uint8_t *)1)
73 
74 #ifdef FEATURE
75 FEATURE(cuse, "Userspace character devices");
76 #endif
77 
78 struct cuse_command;
79 struct cuse_server;
80 struct cuse_client;
81 
82 struct cuse_client_command {
83 	TAILQ_ENTRY(cuse_client_command) entry;
84 	struct cuse_command sub;
85 	struct sx sx;
86 	struct cv cv;
87 	struct thread *entered;
88 	struct cuse_client *client;
89 	struct proc *proc_curr;
90 	int	proc_refs;
91 	int	got_signal;
92 	int	error;
93 	int	command;
94 };
95 
96 struct cuse_memory {
97 	struct cuse_server *owner;
98 	uint8_t *virtaddr;
99 	uint32_t page_count;
100 	uint32_t is_allocated;
101 };
102 
103 struct cuse_server_dev {
104 	TAILQ_ENTRY(cuse_server_dev) entry;
105 	struct cuse_server *server;
106 	struct cdev *kern_dev;
107 	struct cuse_dev *user_dev;
108 };
109 
110 struct cuse_server {
111 	TAILQ_ENTRY(cuse_server) entry;
112 	TAILQ_HEAD(, cuse_client_command) head;
113 	TAILQ_HEAD(, cuse_server_dev) hdev;
114 	TAILQ_HEAD(, cuse_client) hcli;
115 	struct cv cv;
116 	struct selinfo selinfo;
117 	pid_t	pid;
118 	int	is_closing;
119 	int	refs;
120 };
121 
122 struct cuse_client {
123 	TAILQ_ENTRY(cuse_client) entry;
124 	TAILQ_ENTRY(cuse_client) entry_ref;
125 	struct cuse_client_command cmds[CUSE_CMD_MAX];
126 	struct cuse_server *server;
127 	struct cuse_server_dev *server_dev;
128 
129 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
130 
131 	int	fflags;		/* file flags */
132 	int	cflags;		/* client flags */
133 #define	CUSE_CLI_IS_CLOSING 0x01
134 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
135 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
136 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
137 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
138 };
139 
140 #define	CUSE_CLIENT_CLOSING(pcc) \
141     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
142 
143 static MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
144 
145 static TAILQ_HEAD(, cuse_server) cuse_server_head;
146 static struct mtx cuse_mtx;
147 static struct cdev *cuse_dev;
148 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
149 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
150 static struct cuse_memory cuse_mem[CUSE_ALLOC_UNIT_MAX];
151 
152 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
153 static void cuse_client_kqfilter_read_detach(struct knote *kn);
154 static void cuse_client_kqfilter_write_detach(struct knote *kn);
155 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
156 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
157 
158 static struct filterops cuse_client_kqfilter_read_ops = {
159 	.f_isfd = 1,
160 	.f_detach = cuse_client_kqfilter_read_detach,
161 	.f_event = cuse_client_kqfilter_read_event,
162 };
163 
164 static struct filterops cuse_client_kqfilter_write_ops = {
165 	.f_isfd = 1,
166 	.f_detach = cuse_client_kqfilter_write_detach,
167 	.f_event = cuse_client_kqfilter_write_event,
168 };
169 
170 static d_open_t cuse_client_open;
171 static d_close_t cuse_client_close;
172 static d_ioctl_t cuse_client_ioctl;
173 static d_read_t cuse_client_read;
174 static d_write_t cuse_client_write;
175 static d_poll_t cuse_client_poll;
176 static d_mmap_t cuse_client_mmap;
177 static d_kqfilter_t cuse_client_kqfilter;
178 
179 static struct cdevsw cuse_client_devsw = {
180 	.d_version = D_VERSION,
181 	.d_open = cuse_client_open,
182 	.d_close = cuse_client_close,
183 	.d_ioctl = cuse_client_ioctl,
184 	.d_name = "cuse_client",
185 	.d_flags = D_TRACKCLOSE,
186 	.d_read = cuse_client_read,
187 	.d_write = cuse_client_write,
188 	.d_poll = cuse_client_poll,
189 	.d_mmap = cuse_client_mmap,
190 	.d_kqfilter = cuse_client_kqfilter,
191 };
192 
193 static d_open_t cuse_server_open;
194 static d_close_t cuse_server_close;
195 static d_ioctl_t cuse_server_ioctl;
196 static d_read_t cuse_server_read;
197 static d_write_t cuse_server_write;
198 static d_poll_t cuse_server_poll;
199 static d_mmap_t cuse_server_mmap;
200 
201 static struct cdevsw cuse_server_devsw = {
202 	.d_version = D_VERSION,
203 	.d_open = cuse_server_open,
204 	.d_close = cuse_server_close,
205 	.d_ioctl = cuse_server_ioctl,
206 	.d_name = "cuse_server",
207 	.d_flags = D_TRACKCLOSE,
208 	.d_read = cuse_server_read,
209 	.d_write = cuse_server_write,
210 	.d_poll = cuse_server_poll,
211 	.d_mmap = cuse_server_mmap,
212 };
213 
214 static void cuse_client_is_closing(struct cuse_client *);
215 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
216 
217 static void
218 cuse_lock(void)
219 {
220 	mtx_lock(&cuse_mtx);
221 }
222 
223 static void
224 cuse_unlock(void)
225 {
226 	mtx_unlock(&cuse_mtx);
227 }
228 
229 static void
230 cuse_cmd_lock(struct cuse_client_command *pccmd)
231 {
232 	sx_xlock(&pccmd->sx);
233 }
234 
235 static void
236 cuse_cmd_unlock(struct cuse_client_command *pccmd)
237 {
238 	sx_xunlock(&pccmd->sx);
239 }
240 
241 static void
242 cuse_kern_init(void *arg)
243 {
244 	TAILQ_INIT(&cuse_server_head);
245 
246 	mtx_init(&cuse_mtx, "cuse-mtx", NULL, MTX_DEF);
247 
248 	cuse_dev = make_dev(&cuse_server_devsw, 0,
249 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
250 
251 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
252 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
253 	    (CUSE_VERSION >> 0) & 0xFF);
254 }
255 
256 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, 0);
257 
258 static void
259 cuse_kern_uninit(void *arg)
260 {
261 	void *ptr;
262 
263 	while (1) {
264 
265 		printf("Cuse: Please exit all /dev/cuse instances "
266 		    "and processes which have used this device.\n");
267 
268 		pause("DRAIN", 2 * hz);
269 
270 		cuse_lock();
271 		ptr = TAILQ_FIRST(&cuse_server_head);
272 		cuse_unlock();
273 
274 		if (ptr == NULL)
275 			break;
276 	}
277 
278 	if (cuse_dev != NULL)
279 		destroy_dev(cuse_dev);
280 
281 	mtx_destroy(&cuse_mtx);
282 }
283 
284 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
285 
286 static int
287 cuse_server_get(struct cuse_server **ppcs)
288 {
289 	struct cuse_server *pcs;
290 	int error;
291 
292 	error = devfs_get_cdevpriv((void **)&pcs);
293 	if (error != 0) {
294 		*ppcs = NULL;
295 		return (error);
296 	}
297 	/* check if closing */
298 	cuse_lock();
299 	if (pcs->is_closing) {
300 		cuse_unlock();
301 		*ppcs = NULL;
302 		return (EINVAL);
303 	}
304 	cuse_unlock();
305 	*ppcs = pcs;
306 	return (0);
307 }
308 
309 static void
310 cuse_server_is_closing(struct cuse_server *pcs)
311 {
312 	struct cuse_client *pcc;
313 
314 	if (pcs->is_closing)
315 		return;
316 
317 	pcs->is_closing = 1;
318 
319 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
320 		cuse_client_is_closing(pcc);
321 	}
322 }
323 
324 static struct cuse_client_command *
325 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
326 {
327 	struct cuse_client *pcc;
328 	int n;
329 
330 	if (pcs->is_closing)
331 		goto done;
332 
333 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
334 		if (CUSE_CLIENT_CLOSING(pcc))
335 			continue;
336 		for (n = 0; n != CUSE_CMD_MAX; n++) {
337 			if (pcc->cmds[n].entered == td)
338 				return (&pcc->cmds[n]);
339 		}
340 	}
341 done:
342 	return (NULL);
343 }
344 
345 static void
346 cuse_str_filter(char *ptr)
347 {
348 	int c;
349 
350 	while (((c = *ptr) != 0)) {
351 
352 		if ((c >= 'a') && (c <= 'z')) {
353 			ptr++;
354 			continue;
355 		}
356 		if ((c >= 'A') && (c <= 'Z')) {
357 			ptr++;
358 			continue;
359 		}
360 		if ((c >= '0') && (c <= '9')) {
361 			ptr++;
362 			continue;
363 		}
364 		if ((c == '.') || (c == '_') || (c == '/')) {
365 			ptr++;
366 			continue;
367 		}
368 		*ptr = '_';
369 
370 		ptr++;
371 	}
372 }
373 
374 static int
375 cuse_convert_error(int error)
376 {
377 	;				/* indent fix */
378 	switch (error) {
379 	case CUSE_ERR_NONE:
380 		return (0);
381 	case CUSE_ERR_BUSY:
382 		return (EBUSY);
383 	case CUSE_ERR_WOULDBLOCK:
384 		return (EWOULDBLOCK);
385 	case CUSE_ERR_INVALID:
386 		return (EINVAL);
387 	case CUSE_ERR_NO_MEMORY:
388 		return (ENOMEM);
389 	case CUSE_ERR_FAULT:
390 		return (EFAULT);
391 	case CUSE_ERR_SIGNAL:
392 		return (EINTR);
393 	default:
394 		return (ENXIO);
395 	}
396 }
397 
398 static void
399 cuse_server_free_memory(struct cuse_server *pcs)
400 {
401 	struct cuse_memory *mem;
402 	uint32_t n;
403 
404 	for (n = 0; n != CUSE_ALLOC_UNIT_MAX; n++) {
405 		mem = &cuse_mem[n];
406 
407 		/* this memory is never freed */
408 		if (mem->owner == pcs) {
409 			mem->owner = NULL;
410 			mem->is_allocated = 0;
411 		}
412 	}
413 }
414 
415 static int
416 cuse_server_alloc_memory(struct cuse_server *pcs,
417     struct cuse_memory *mem, uint32_t page_count)
418 {
419 	void *ptr;
420 	int error;
421 
422 	cuse_lock();
423 
424 	if (mem->virtaddr == NBUSY) {
425 		cuse_unlock();
426 		return (EBUSY);
427 	}
428 	if (mem->virtaddr != NULL) {
429 		if (mem->is_allocated != 0) {
430 			cuse_unlock();
431 			return (EBUSY);
432 		}
433 		if (mem->page_count == page_count) {
434 			mem->is_allocated = 1;
435 			mem->owner = pcs;
436 			cuse_unlock();
437 			return (0);
438 		}
439 		cuse_unlock();
440 		return (EBUSY);
441 	}
442 	memset(mem, 0, sizeof(*mem));
443 
444 	mem->virtaddr = NBUSY;
445 
446 	cuse_unlock();
447 
448 	ptr = malloc(page_count * PAGE_SIZE, M_CUSE, M_WAITOK | M_ZERO);
449 	if (ptr == NULL)
450 		error = ENOMEM;
451 	else
452 		error = 0;
453 
454 	cuse_lock();
455 
456 	if (error) {
457 		mem->virtaddr = NULL;
458 		cuse_unlock();
459 		return (error);
460 	}
461 	mem->virtaddr = ptr;
462 	mem->page_count = page_count;
463 	mem->is_allocated = 1;
464 	mem->owner = pcs;
465 	cuse_unlock();
466 
467 	return (0);
468 }
469 
470 static int
471 cuse_client_get(struct cuse_client **ppcc)
472 {
473 	struct cuse_client *pcc;
474 	int error;
475 
476 	/* try to get private data */
477 	error = devfs_get_cdevpriv((void **)&pcc);
478 	if (error != 0) {
479 		*ppcc = NULL;
480 		return (error);
481 	}
482 	/* check if closing */
483 	cuse_lock();
484 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
485 		cuse_unlock();
486 		*ppcc = NULL;
487 		return (EINVAL);
488 	}
489 	cuse_unlock();
490 	*ppcc = pcc;
491 	return (0);
492 }
493 
494 static void
495 cuse_client_is_closing(struct cuse_client *pcc)
496 {
497 	struct cuse_client_command *pccmd;
498 	uint32_t n;
499 
500 	if (CUSE_CLIENT_CLOSING(pcc))
501 		return;
502 
503 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
504 	pcc->server_dev = NULL;
505 
506 	for (n = 0; n != CUSE_CMD_MAX; n++) {
507 
508 		pccmd = &pcc->cmds[n];
509 
510 		if (pccmd->entry.tqe_prev != NULL) {
511 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
512 			pccmd->entry.tqe_prev = NULL;
513 		}
514 		cv_broadcast(&pccmd->cv);
515 	}
516 }
517 
518 static void
519 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
520     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
521 {
522 	unsigned long cuse_fflags = 0;
523 	struct cuse_server *pcs;
524 
525 	if (fflags & FREAD)
526 		cuse_fflags |= CUSE_FFLAG_READ;
527 
528 	if (fflags & FWRITE)
529 		cuse_fflags |= CUSE_FFLAG_WRITE;
530 
531 	if (ioflag & IO_NDELAY)
532 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
533 
534 	pccmd->sub.fflags = cuse_fflags;
535 	pccmd->sub.data_pointer = data_ptr;
536 	pccmd->sub.argument = arg;
537 
538 	pcs = pccmd->client->server;
539 
540 	if ((pccmd->entry.tqe_prev == NULL) &&
541 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
542 	    (pcs->is_closing == 0)) {
543 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
544 		cv_signal(&pcs->cv);
545 	}
546 }
547 
548 static void
549 cuse_client_got_signal(struct cuse_client_command *pccmd)
550 {
551 	struct cuse_server *pcs;
552 
553 	pccmd->got_signal = 1;
554 
555 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
556 
557 	pcs = pccmd->client->server;
558 
559 	if ((pccmd->entry.tqe_prev == NULL) &&
560 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
561 	    (pcs->is_closing == 0)) {
562 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
563 		cv_signal(&pcs->cv);
564 	}
565 }
566 
567 static int
568 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
569     uint8_t *arg_ptr, uint32_t arg_len)
570 {
571 	int error;
572 
573 	error = 0;
574 
575 	pccmd->proc_curr = curthread->td_proc;
576 
577 	if (CUSE_CLIENT_CLOSING(pccmd->client) ||
578 	    pccmd->client->server->is_closing) {
579 		error = CUSE_ERR_OTHER;
580 		goto done;
581 	}
582 	while (pccmd->command == CUSE_CMD_NONE) {
583 		if (error != 0) {
584 			cv_wait(&pccmd->cv, &cuse_mtx);
585 		} else {
586 			error = cv_wait_sig(&pccmd->cv, &cuse_mtx);
587 
588 			if (error != 0)
589 				cuse_client_got_signal(pccmd);
590 		}
591 		if (CUSE_CLIENT_CLOSING(pccmd->client) ||
592 		    pccmd->client->server->is_closing) {
593 			error = CUSE_ERR_OTHER;
594 			goto done;
595 		}
596 	}
597 
598 	error = pccmd->error;
599 	pccmd->command = CUSE_CMD_NONE;
600 	cv_signal(&pccmd->cv);
601 
602 done:
603 
604 	/* wait until all process references are gone */
605 
606 	pccmd->proc_curr = NULL;
607 
608 	while (pccmd->proc_refs != 0)
609 		cv_wait(&pccmd->cv, &cuse_mtx);
610 
611 	return (error);
612 }
613 
614 /*------------------------------------------------------------------------*
615  *	CUSE SERVER PART
616  *------------------------------------------------------------------------*/
617 
618 static void
619 cuse_server_free_dev(struct cuse_server_dev *pcsd)
620 {
621 	struct cuse_server *pcs;
622 	struct cuse_client *pcc;
623 
624 	/* get server pointer */
625 	pcs = pcsd->server;
626 
627 	/* prevent creation of more devices */
628 	cuse_lock();
629 	if (pcsd->kern_dev != NULL)
630 		pcsd->kern_dev->si_drv1 = NULL;
631 
632 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
633 		if (pcc->server_dev == pcsd)
634 			cuse_client_is_closing(pcc);
635 	}
636 	cuse_unlock();
637 
638 	/* destroy device, if any */
639 	if (pcsd->kern_dev != NULL) {
640 		/* destroy device synchronously */
641 		destroy_dev(pcsd->kern_dev);
642 	}
643 	free(pcsd, M_CUSE);
644 }
645 
646 static void
647 cuse_server_free(void *arg)
648 {
649 	struct cuse_server *pcs = arg;
650 	struct cuse_server_dev *pcsd;
651 
652 	cuse_lock();
653 	pcs->refs--;
654 	if (pcs->refs != 0) {
655 		cuse_unlock();
656 		return;
657 	}
658 	cuse_server_is_closing(pcs);
659 	/* final client wakeup, if any */
660 	cuse_server_wakeup_all_client_locked(pcs);
661 
662 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
663 
664 	cuse_free_unit_by_id_locked(pcs, -1);
665 
666 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
667 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
668 		cuse_unlock();
669 		cuse_server_free_dev(pcsd);
670 		cuse_lock();
671 	}
672 
673 	cuse_server_free_memory(pcs);
674 
675 	knlist_clear(&pcs->selinfo.si_note, 1);
676 	knlist_destroy(&pcs->selinfo.si_note);
677 
678 	cuse_unlock();
679 
680 	seldrain(&pcs->selinfo);
681 
682 	cv_destroy(&pcs->cv);
683 
684 	free(pcs, M_CUSE);
685 }
686 
687 static int
688 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
689 {
690 	struct cuse_server *pcs;
691 
692 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
693 	if (pcs == NULL)
694 		return (ENOMEM);
695 
696 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
697 		printf("Cuse: Cannot set cdevpriv.\n");
698 		free(pcs, M_CUSE);
699 		return (ENOMEM);
700 	}
701 
702 	/* store current process ID */
703 	pcs->pid = curproc->p_pid;
704 
705 	TAILQ_INIT(&pcs->head);
706 	TAILQ_INIT(&pcs->hdev);
707 	TAILQ_INIT(&pcs->hcli);
708 
709 	cv_init(&pcs->cv, "cuse-server-cv");
710 
711 	knlist_init_mtx(&pcs->selinfo.si_note, &cuse_mtx);
712 
713 	cuse_lock();
714 	pcs->refs++;
715 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
716 	cuse_unlock();
717 
718 	return (0);
719 }
720 
721 static int
722 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
723 {
724 	struct cuse_server *pcs;
725 	int error;
726 
727 	error = cuse_server_get(&pcs);
728 	if (error != 0)
729 		goto done;
730 
731 	cuse_lock();
732 	cuse_server_is_closing(pcs);
733 	/* final client wakeup, if any */
734 	cuse_server_wakeup_all_client_locked(pcs);
735 
736 	knlist_clear(&pcs->selinfo.si_note, 1);
737 	cuse_unlock();
738 
739 done:
740 	return (0);
741 }
742 
743 static int
744 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
745 {
746 	return (ENXIO);
747 }
748 
749 static int
750 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
751 {
752 	return (ENXIO);
753 }
754 
755 static int
756 cuse_server_ioctl_copy_locked(struct cuse_client_command *pccmd,
757     struct cuse_data_chunk *pchk, int isread)
758 {
759 	struct proc *p_proc;
760 	uint32_t offset;
761 	int error;
762 
763 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
764 
765 	if (pchk->length > CUSE_BUFFER_MAX)
766 		return (EFAULT);
767 
768 	if (offset >= CUSE_BUFFER_MAX)
769 		return (EFAULT);
770 
771 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
772 		return (EFAULT);
773 
774 	p_proc = pccmd->proc_curr;
775 	if (p_proc == NULL)
776 		return (ENXIO);
777 
778 	if (pccmd->proc_refs < 0)
779 		return (ENOMEM);
780 
781 	pccmd->proc_refs++;
782 
783 	cuse_unlock();
784 
785 	if (isread == 0) {
786 		error = copyin(
787 		    (void *)pchk->local_ptr,
788 		    pccmd->client->ioctl_buffer + offset,
789 		    pchk->length);
790 	} else {
791 		error = copyout(
792 		    pccmd->client->ioctl_buffer + offset,
793 		    (void *)pchk->local_ptr,
794 		    pchk->length);
795 	}
796 
797 	cuse_lock();
798 
799 	pccmd->proc_refs--;
800 
801 	if (pccmd->proc_curr == NULL)
802 		cv_signal(&pccmd->cv);
803 
804 	return (error);
805 }
806 
807 static int
808 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
809     struct proc *proc_d, vm_offset_t data_d, size_t len)
810 {
811 	struct thread *td;
812 	struct proc *proc_cur;
813 	int error;
814 
815 	td = curthread;
816 	proc_cur = td->td_proc;
817 
818 	if (proc_cur == proc_d) {
819 		struct iovec iov = {
820 			.iov_base = (caddr_t)data_d,
821 			.iov_len = len,
822 		};
823 		struct uio uio = {
824 			.uio_iov = &iov,
825 			.uio_iovcnt = 1,
826 			.uio_offset = (off_t)data_s,
827 			.uio_resid = len,
828 			.uio_segflg = UIO_USERSPACE,
829 			.uio_rw = UIO_READ,
830 			.uio_td = td,
831 		};
832 
833 		PHOLD(proc_s);
834 		error = proc_rwmem(proc_s, &uio);
835 		PRELE(proc_s);
836 
837 	} else if (proc_cur == proc_s) {
838 		struct iovec iov = {
839 			.iov_base = (caddr_t)data_s,
840 			.iov_len = len,
841 		};
842 		struct uio uio = {
843 			.uio_iov = &iov,
844 			.uio_iovcnt = 1,
845 			.uio_offset = (off_t)data_d,
846 			.uio_resid = len,
847 			.uio_segflg = UIO_USERSPACE,
848 			.uio_rw = UIO_WRITE,
849 			.uio_td = td,
850 		};
851 
852 		PHOLD(proc_d);
853 		error = proc_rwmem(proc_d, &uio);
854 		PRELE(proc_d);
855 	} else {
856 		error = EINVAL;
857 	}
858 	return (error);
859 }
860 
861 static int
862 cuse_server_data_copy_locked(struct cuse_client_command *pccmd,
863     struct cuse_data_chunk *pchk, int isread)
864 {
865 	struct proc *p_proc;
866 	int error;
867 
868 	p_proc = pccmd->proc_curr;
869 	if (p_proc == NULL)
870 		return (ENXIO);
871 
872 	if (pccmd->proc_refs < 0)
873 		return (ENOMEM);
874 
875 	pccmd->proc_refs++;
876 
877 	cuse_unlock();
878 
879 	if (isread == 0) {
880 		error = cuse_proc2proc_copy(
881 		    curthread->td_proc, pchk->local_ptr,
882 		    p_proc, pchk->peer_ptr,
883 		    pchk->length);
884 	} else {
885 		error = cuse_proc2proc_copy(
886 		    p_proc, pchk->peer_ptr,
887 		    curthread->td_proc, pchk->local_ptr,
888 		    pchk->length);
889 	}
890 
891 	cuse_lock();
892 
893 	pccmd->proc_refs--;
894 
895 	if (pccmd->proc_curr == NULL)
896 		cv_signal(&pccmd->cv);
897 
898 	return (error);
899 }
900 
901 static int
902 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
903 {
904 	int n;
905 	int x = 0;
906 	int match;
907 
908 	do {
909 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
910 			if (cuse_alloc_unit[n] != NULL) {
911 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
912 					continue;
913 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
914 					x++;
915 					match = 1;
916 				}
917 			}
918 		}
919 	} while (match);
920 
921 	if (x < 256) {
922 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
923 			if (cuse_alloc_unit[n] == NULL) {
924 				cuse_alloc_unit[n] = pcs;
925 				cuse_alloc_unit_id[n] = id | x;
926 				return (x);
927 			}
928 		}
929 	}
930 	return (-1);
931 }
932 
933 static void
934 cuse_server_wakeup_locked(struct cuse_server *pcs)
935 {
936 	selwakeup(&pcs->selinfo);
937 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
938 }
939 
940 static void
941 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
942 {
943 	struct cuse_client *pcc;
944 
945 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
946 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
947 		    CUSE_CLI_KNOTE_NEED_WRITE);
948 	}
949 	cuse_server_wakeup_locked(pcs);
950 }
951 
952 static int
953 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
954 {
955 	int n;
956 	int found = 0;
957 
958 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
959 		if (cuse_alloc_unit[n] == pcs) {
960 			if (cuse_alloc_unit_id[n] == id || id == -1) {
961 				cuse_alloc_unit[n] = NULL;
962 				cuse_alloc_unit_id[n] = 0;
963 				found = 1;
964 			}
965 		}
966 	}
967 
968 	return (found ? 0 : EINVAL);
969 }
970 
971 static int
972 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
973     caddr_t data, int fflag, struct thread *td)
974 {
975 	struct cuse_server *pcs;
976 	int error;
977 
978 	error = cuse_server_get(&pcs);
979 	if (error != 0)
980 		return (error);
981 
982 	switch (cmd) {
983 		struct cuse_client_command *pccmd;
984 		struct cuse_client *pcc;
985 		struct cuse_command *pcmd;
986 		struct cuse_alloc_info *pai;
987 		struct cuse_create_dev *pcd;
988 		struct cuse_server_dev *pcsd;
989 		struct cuse_data_chunk *pchk;
990 		int n;
991 
992 	case CUSE_IOCTL_GET_COMMAND:
993 		pcmd = (void *)data;
994 
995 		cuse_lock();
996 
997 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
998 			error = cv_wait_sig(&pcs->cv, &cuse_mtx);
999 
1000 			if (pcs->is_closing)
1001 				error = ENXIO;
1002 
1003 			if (error) {
1004 				cuse_unlock();
1005 				return (error);
1006 			}
1007 		}
1008 
1009 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1010 		pccmd->entry.tqe_prev = NULL;
1011 
1012 		pccmd->entered = curthread;
1013 
1014 		*pcmd = pccmd->sub;
1015 
1016 		cuse_unlock();
1017 
1018 		break;
1019 
1020 	case CUSE_IOCTL_SYNC_COMMAND:
1021 
1022 		cuse_lock();
1023 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1024 
1025 			/* send sync command */
1026 			pccmd->entered = NULL;
1027 			pccmd->error = *(int *)data;
1028 			pccmd->command = CUSE_CMD_SYNC;
1029 
1030 			/* signal peer, if any */
1031 			cv_signal(&pccmd->cv);
1032 		}
1033 		cuse_unlock();
1034 
1035 		break;
1036 
1037 	case CUSE_IOCTL_ALLOC_UNIT:
1038 
1039 		cuse_lock();
1040 		n = cuse_alloc_unit_by_id_locked(pcs,
1041 		    CUSE_ID_DEFAULT(0));
1042 		cuse_unlock();
1043 
1044 		if (n < 0)
1045 			error = ENOMEM;
1046 		else
1047 			*(int *)data = n;
1048 		break;
1049 
1050 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1051 
1052 		n = *(int *)data;
1053 
1054 		n = (n & CUSE_ID_MASK);
1055 
1056 		cuse_lock();
1057 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1058 		cuse_unlock();
1059 
1060 		if (n < 0)
1061 			error = ENOMEM;
1062 		else
1063 			*(int *)data = n;
1064 		break;
1065 
1066 	case CUSE_IOCTL_FREE_UNIT:
1067 
1068 		n = *(int *)data;
1069 
1070 		n = CUSE_ID_DEFAULT(n);
1071 
1072 		cuse_lock();
1073 		error = cuse_free_unit_by_id_locked(pcs, n);
1074 		cuse_unlock();
1075 		break;
1076 
1077 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1078 
1079 		n = *(int *)data;
1080 
1081 		cuse_lock();
1082 		error = cuse_free_unit_by_id_locked(pcs, n);
1083 		cuse_unlock();
1084 		break;
1085 
1086 	case CUSE_IOCTL_ALLOC_MEMORY:
1087 
1088 		pai = (void *)data;
1089 
1090 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1091 			error = ENOMEM;
1092 			break;
1093 		}
1094 		if (pai->page_count > CUSE_ALLOC_PAGES_MAX) {
1095 			error = ENOMEM;
1096 			break;
1097 		}
1098 		error = cuse_server_alloc_memory(pcs,
1099 		    &cuse_mem[pai->alloc_nr], pai->page_count);
1100 		break;
1101 
1102 	case CUSE_IOCTL_FREE_MEMORY:
1103 		pai = (void *)data;
1104 
1105 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1106 			error = ENOMEM;
1107 			break;
1108 		}
1109 		/* we trust the character device driver in this case */
1110 
1111 		cuse_lock();
1112 		if (cuse_mem[pai->alloc_nr].owner == pcs) {
1113 			cuse_mem[pai->alloc_nr].is_allocated = 0;
1114 			cuse_mem[pai->alloc_nr].owner = NULL;
1115 		} else {
1116 			error = EINVAL;
1117 		}
1118 		cuse_unlock();
1119 		break;
1120 
1121 	case CUSE_IOCTL_GET_SIG:
1122 
1123 		cuse_lock();
1124 		pccmd = cuse_server_find_command(pcs, curthread);
1125 
1126 		if (pccmd != NULL) {
1127 			n = pccmd->got_signal;
1128 			pccmd->got_signal = 0;
1129 		} else {
1130 			n = 0;
1131 		}
1132 		cuse_unlock();
1133 
1134 		*(int *)data = n;
1135 
1136 		break;
1137 
1138 	case CUSE_IOCTL_SET_PFH:
1139 
1140 		cuse_lock();
1141 		pccmd = cuse_server_find_command(pcs, curthread);
1142 
1143 		if (pccmd != NULL) {
1144 			pcc = pccmd->client;
1145 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1146 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1147 			}
1148 		} else {
1149 			error = ENXIO;
1150 		}
1151 		cuse_unlock();
1152 		break;
1153 
1154 	case CUSE_IOCTL_CREATE_DEV:
1155 
1156 		error = priv_check(curthread, PRIV_DRIVER);
1157 		if (error)
1158 			break;
1159 
1160 		pcd = (void *)data;
1161 
1162 		/* filter input */
1163 
1164 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1165 
1166 		if (pcd->devname[0] == 0) {
1167 			error = EINVAL;
1168 			break;
1169 		}
1170 		cuse_str_filter(pcd->devname);
1171 
1172 		pcd->permissions &= 0777;
1173 
1174 		/* try to allocate a character device */
1175 
1176 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1177 
1178 		if (pcsd == NULL) {
1179 			error = ENOMEM;
1180 			break;
1181 		}
1182 		pcsd->server = pcs;
1183 
1184 		pcsd->user_dev = pcd->dev;
1185 
1186 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1187 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1188 		    pcd->permissions, "%s", pcd->devname);
1189 
1190 		if (pcsd->kern_dev == NULL) {
1191 			free(pcsd, M_CUSE);
1192 			error = ENOMEM;
1193 			break;
1194 		}
1195 		pcsd->kern_dev->si_drv1 = pcsd;
1196 
1197 		cuse_lock();
1198 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1199 		cuse_unlock();
1200 
1201 		break;
1202 
1203 	case CUSE_IOCTL_DESTROY_DEV:
1204 
1205 		error = priv_check(curthread, PRIV_DRIVER);
1206 		if (error)
1207 			break;
1208 
1209 		cuse_lock();
1210 
1211 		error = EINVAL;
1212 
1213 		pcsd = TAILQ_FIRST(&pcs->hdev);
1214 		while (pcsd != NULL) {
1215 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1216 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1217 				cuse_unlock();
1218 				cuse_server_free_dev(pcsd);
1219 				cuse_lock();
1220 				error = 0;
1221 				pcsd = TAILQ_FIRST(&pcs->hdev);
1222 			} else {
1223 				pcsd = TAILQ_NEXT(pcsd, entry);
1224 			}
1225 		}
1226 
1227 		cuse_unlock();
1228 		break;
1229 
1230 	case CUSE_IOCTL_WRITE_DATA:
1231 	case CUSE_IOCTL_READ_DATA:
1232 
1233 		cuse_lock();
1234 		pchk = (struct cuse_data_chunk *)data;
1235 
1236 		pccmd = cuse_server_find_command(pcs, curthread);
1237 
1238 		if (pccmd == NULL) {
1239 			error = ENXIO;	/* invalid request */
1240 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1241 			error = EFAULT;	/* NULL pointer */
1242 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1243 			error = cuse_server_ioctl_copy_locked(pccmd,
1244 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1245 		} else {
1246 			error = cuse_server_data_copy_locked(pccmd,
1247 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1248 		}
1249 		cuse_unlock();
1250 		break;
1251 
1252 	case CUSE_IOCTL_SELWAKEUP:
1253 		cuse_lock();
1254 		/*
1255 		 * We don't know which direction caused the event.
1256 		 * Wakeup both!
1257 		 */
1258 		cuse_server_wakeup_all_client_locked(pcs);
1259 		cuse_unlock();
1260 		break;
1261 
1262 	default:
1263 		error = ENXIO;
1264 		break;
1265 	}
1266 	return (error);
1267 }
1268 
1269 static int
1270 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1271 {
1272 	return (events & (POLLHUP | POLLPRI | POLLIN |
1273 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1274 }
1275 
1276 static int
1277 cuse_server_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr)
1278 {
1279 	uint32_t page_nr = offset / PAGE_SIZE;
1280 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1281 	struct cuse_memory *mem;
1282 	struct cuse_server *pcs;
1283 	uint8_t *ptr;
1284 	int error;
1285 
1286 	if (alloc_nr >= CUSE_ALLOC_UNIT_MAX)
1287 		return (ENOMEM);
1288 
1289 	error = cuse_server_get(&pcs);
1290 	if (error != 0)
1291 		pcs = NULL;
1292 
1293 	cuse_lock();
1294 	mem = &cuse_mem[alloc_nr];
1295 
1296 	/* try to enforce slight ownership */
1297 	if ((pcs != NULL) && (mem->owner != pcs)) {
1298 		cuse_unlock();
1299 		return (EINVAL);
1300 	}
1301 	if (mem->virtaddr == NULL) {
1302 		cuse_unlock();
1303 		return (ENOMEM);
1304 	}
1305 	if (mem->virtaddr == NBUSY) {
1306 		cuse_unlock();
1307 		return (ENOMEM);
1308 	}
1309 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1310 
1311 	if (page_nr >= mem->page_count) {
1312 		cuse_unlock();
1313 		return (ENXIO);
1314 	}
1315 	ptr = mem->virtaddr + (page_nr * PAGE_SIZE);
1316 	cuse_unlock();
1317 
1318 	*paddr = vtophys(ptr);
1319 
1320 	return (0);
1321 }
1322 
1323 /*------------------------------------------------------------------------*
1324  *	CUSE CLIENT PART
1325  *------------------------------------------------------------------------*/
1326 static void
1327 cuse_client_free(void *arg)
1328 {
1329 	struct cuse_client *pcc = arg;
1330 	struct cuse_client_command *pccmd;
1331 	struct cuse_server *pcs;
1332 	int n;
1333 
1334 	cuse_lock();
1335 	cuse_client_is_closing(pcc);
1336 	TAILQ_REMOVE(&pcc->server->hcli, pcc, entry);
1337 	cuse_unlock();
1338 
1339 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1340 
1341 		pccmd = &pcc->cmds[n];
1342 
1343 		sx_destroy(&pccmd->sx);
1344 		cv_destroy(&pccmd->cv);
1345 	}
1346 
1347 	pcs = pcc->server;
1348 
1349 	free(pcc, M_CUSE);
1350 
1351 	/* drop reference on server */
1352 	cuse_server_free(pcs);
1353 }
1354 
1355 static int
1356 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1357 {
1358 	struct cuse_client_command *pccmd;
1359 	struct cuse_server_dev *pcsd;
1360 	struct cuse_client *pcc;
1361 	struct cuse_server *pcs;
1362 	struct cuse_dev *pcd;
1363 	int error;
1364 	int n;
1365 
1366 	cuse_lock();
1367 	pcsd = dev->si_drv1;
1368 	if (pcsd != NULL) {
1369 		pcs = pcsd->server;
1370 		pcd = pcsd->user_dev;
1371 		/*
1372 		 * Check that the refcount didn't wrap and that the
1373 		 * same process is not both client and server. This
1374 		 * can easily lead to deadlocks when destroying the
1375 		 * CUSE character device nodes:
1376 		 */
1377 		pcs->refs++;
1378 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1379 			/* overflow or wrong PID */
1380 			pcs->refs--;
1381 			pcsd = NULL;
1382 		}
1383 	} else {
1384 		pcs = NULL;
1385 		pcd = NULL;
1386 	}
1387 	cuse_unlock();
1388 
1389 	if (pcsd == NULL)
1390 		return (EINVAL);
1391 
1392 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1393 	if (pcc == NULL) {
1394 		/* drop reference on server */
1395 		cuse_server_free(pcs);
1396 		return (ENOMEM);
1397 	}
1398 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1399 		printf("Cuse: Cannot set cdevpriv.\n");
1400 		/* drop reference on server */
1401 		cuse_server_free(pcs);
1402 		free(pcc, M_CUSE);
1403 		return (ENOMEM);
1404 	}
1405 	pcc->fflags = fflags;
1406 	pcc->server_dev = pcsd;
1407 	pcc->server = pcs;
1408 
1409 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1410 
1411 		pccmd = &pcc->cmds[n];
1412 
1413 		pccmd->sub.dev = pcd;
1414 		pccmd->sub.command = n;
1415 		pccmd->client = pcc;
1416 
1417 		sx_init(&pccmd->sx, "cuse-client-sx");
1418 		cv_init(&pccmd->cv, "cuse-client-cv");
1419 	}
1420 
1421 	cuse_lock();
1422 
1423 	/* cuse_client_free() assumes that the client is listed somewhere! */
1424 	/* always enqueue */
1425 
1426 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1427 
1428 	/* check if server is closing */
1429 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1430 		error = EINVAL;
1431 	} else {
1432 		error = 0;
1433 	}
1434 	cuse_unlock();
1435 
1436 	if (error) {
1437 		devfs_clear_cdevpriv();	/* XXX bugfix */
1438 		return (error);
1439 	}
1440 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1441 
1442 	cuse_cmd_lock(pccmd);
1443 
1444 	cuse_lock();
1445 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1446 
1447 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1448 	cuse_unlock();
1449 
1450 	if (error < 0) {
1451 		error = cuse_convert_error(error);
1452 	} else {
1453 		error = 0;
1454 	}
1455 
1456 	cuse_cmd_unlock(pccmd);
1457 
1458 	if (error)
1459 		devfs_clear_cdevpriv();	/* XXX bugfix */
1460 
1461 	return (error);
1462 }
1463 
1464 static int
1465 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1466 {
1467 	struct cuse_client_command *pccmd;
1468 	struct cuse_client *pcc;
1469 	int error;
1470 
1471 	error = cuse_client_get(&pcc);
1472 	if (error != 0)
1473 		return (0);
1474 
1475 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1476 
1477 	cuse_cmd_lock(pccmd);
1478 
1479 	cuse_lock();
1480 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1481 
1482 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1483 	cuse_unlock();
1484 
1485 	cuse_cmd_unlock(pccmd);
1486 
1487 	cuse_lock();
1488 	cuse_client_is_closing(pcc);
1489 	cuse_unlock();
1490 
1491 	return (0);
1492 }
1493 
1494 static void
1495 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1496 {
1497 	int temp;
1498 
1499 	cuse_lock();
1500 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1501 	    CUSE_CLI_KNOTE_HAS_WRITE));
1502 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1503 	    CUSE_CLI_KNOTE_NEED_WRITE);
1504 	cuse_unlock();
1505 
1506 	if (temp != 0) {
1507 		/* get the latest polling state from the server */
1508 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1509 
1510 		cuse_lock();
1511 		if (temp & (POLLIN | POLLOUT)) {
1512 			if (temp & POLLIN)
1513 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1514 			if (temp & POLLOUT)
1515 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1516 
1517 			/* make sure the "knote" gets woken up */
1518 			cuse_server_wakeup_locked(pcc->server);
1519 		}
1520 		cuse_unlock();
1521 	}
1522 }
1523 
1524 static int
1525 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1526 {
1527 	struct cuse_client_command *pccmd;
1528 	struct cuse_client *pcc;
1529 	int error;
1530 	int len;
1531 
1532 	error = cuse_client_get(&pcc);
1533 	if (error != 0)
1534 		return (error);
1535 
1536 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1537 
1538 	if (uio->uio_segflg != UIO_USERSPACE) {
1539 		return (EINVAL);
1540 	}
1541 	uio->uio_segflg = UIO_NOCOPY;
1542 
1543 	cuse_cmd_lock(pccmd);
1544 
1545 	while (uio->uio_resid != 0) {
1546 
1547 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1548 			error = ENOMEM;
1549 			break;
1550 		}
1551 
1552 		len = uio->uio_iov->iov_len;
1553 
1554 		cuse_lock();
1555 		cuse_client_send_command_locked(pccmd,
1556 		    (uintptr_t)uio->uio_iov->iov_base,
1557 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1558 
1559 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1560 		cuse_unlock();
1561 
1562 		if (error < 0) {
1563 			error = cuse_convert_error(error);
1564 			break;
1565 		} else if (error == len) {
1566 			error = uiomove(NULL, error, uio);
1567 			if (error)
1568 				break;
1569 		} else {
1570 			error = uiomove(NULL, error, uio);
1571 			break;
1572 		}
1573 	}
1574 	cuse_cmd_unlock(pccmd);
1575 
1576 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1577 
1578 	if (error == EWOULDBLOCK)
1579 		cuse_client_kqfilter_poll(dev, pcc);
1580 
1581 	return (error);
1582 }
1583 
1584 static int
1585 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1586 {
1587 	struct cuse_client_command *pccmd;
1588 	struct cuse_client *pcc;
1589 	int error;
1590 	int len;
1591 
1592 	error = cuse_client_get(&pcc);
1593 	if (error != 0)
1594 		return (error);
1595 
1596 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1597 
1598 	if (uio->uio_segflg != UIO_USERSPACE) {
1599 		return (EINVAL);
1600 	}
1601 	uio->uio_segflg = UIO_NOCOPY;
1602 
1603 	cuse_cmd_lock(pccmd);
1604 
1605 	while (uio->uio_resid != 0) {
1606 
1607 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1608 			error = ENOMEM;
1609 			break;
1610 		}
1611 
1612 		len = uio->uio_iov->iov_len;
1613 
1614 		cuse_lock();
1615 		cuse_client_send_command_locked(pccmd,
1616 		    (uintptr_t)uio->uio_iov->iov_base,
1617 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1618 
1619 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1620 		cuse_unlock();
1621 
1622 		if (error < 0) {
1623 			error = cuse_convert_error(error);
1624 			break;
1625 		} else if (error == len) {
1626 			error = uiomove(NULL, error, uio);
1627 			if (error)
1628 				break;
1629 		} else {
1630 			error = uiomove(NULL, error, uio);
1631 			break;
1632 		}
1633 	}
1634 	cuse_cmd_unlock(pccmd);
1635 
1636 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1637 
1638 	if (error == EWOULDBLOCK)
1639 		cuse_client_kqfilter_poll(dev, pcc);
1640 
1641 	return (error);
1642 }
1643 
1644 int
1645 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1646     caddr_t data, int fflag, struct thread *td)
1647 {
1648 	struct cuse_client_command *pccmd;
1649 	struct cuse_client *pcc;
1650 	int error;
1651 	int len;
1652 
1653 	error = cuse_client_get(&pcc);
1654 	if (error != 0)
1655 		return (error);
1656 
1657 	len = IOCPARM_LEN(cmd);
1658 	if (len > CUSE_BUFFER_MAX)
1659 		return (ENOMEM);
1660 
1661 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1662 
1663 	cuse_cmd_lock(pccmd);
1664 
1665 	if (cmd & (IOC_IN | IOC_VOID))
1666 		memcpy(pcc->ioctl_buffer, data, len);
1667 
1668 	/*
1669 	 * When the ioctl-length is zero drivers can pass information
1670 	 * through the data pointer of the ioctl. Make sure this information
1671 	 * is forwarded to the driver.
1672 	 */
1673 
1674 	cuse_lock();
1675 	cuse_client_send_command_locked(pccmd,
1676 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1677 	    (unsigned long)cmd, pcc->fflags,
1678 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1679 
1680 	error = cuse_client_receive_command_locked(pccmd, data, len);
1681 	cuse_unlock();
1682 
1683 	if (error < 0) {
1684 		error = cuse_convert_error(error);
1685 	} else {
1686 		error = 0;
1687 	}
1688 
1689 	if (cmd & IOC_OUT)
1690 		memcpy(data, pcc->ioctl_buffer, len);
1691 
1692 	cuse_cmd_unlock(pccmd);
1693 
1694 	if (error == EWOULDBLOCK)
1695 		cuse_client_kqfilter_poll(dev, pcc);
1696 
1697 	return (error);
1698 }
1699 
1700 static int
1701 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1702 {
1703 	struct cuse_client_command *pccmd;
1704 	struct cuse_client *pcc;
1705 	unsigned long temp;
1706 	int error;
1707 	int revents;
1708 
1709 	error = cuse_client_get(&pcc);
1710 	if (error != 0)
1711 		goto pollnval;
1712 
1713 	temp = 0;
1714 
1715 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1716 		temp |= CUSE_POLL_READ;
1717 
1718 	if (events & (POLLOUT | POLLWRNORM))
1719 		temp |= CUSE_POLL_WRITE;
1720 
1721 	if (events & POLLHUP)
1722 		temp |= CUSE_POLL_ERROR;
1723 
1724 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1725 
1726 	cuse_cmd_lock(pccmd);
1727 
1728 	/* Need to selrecord() first to not loose any events. */
1729 	if (temp != 0 && td != NULL)
1730 		selrecord(td, &pcc->server->selinfo);
1731 
1732 	cuse_lock();
1733 	cuse_client_send_command_locked(pccmd,
1734 	    0, temp, pcc->fflags, IO_NDELAY);
1735 
1736 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1737 	cuse_unlock();
1738 
1739 	cuse_cmd_unlock(pccmd);
1740 
1741 	if (error < 0) {
1742 		goto pollnval;
1743 	} else {
1744 		revents = 0;
1745 		if (error & CUSE_POLL_READ)
1746 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1747 		if (error & CUSE_POLL_WRITE)
1748 			revents |= (events & (POLLOUT | POLLWRNORM));
1749 		if (error & CUSE_POLL_ERROR)
1750 			revents |= (events & POLLHUP);
1751 	}
1752 	return (revents);
1753 
1754  pollnval:
1755 	/* XXX many clients don't understand POLLNVAL */
1756 	return (events & (POLLHUP | POLLPRI | POLLIN |
1757 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1758 }
1759 
1760 static int
1761 cuse_client_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr)
1762 {
1763 	uint32_t page_nr = offset / PAGE_SIZE;
1764 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1765 	struct cuse_memory *mem;
1766 	struct cuse_server *pcs;
1767 	struct cuse_client *pcc;
1768 	uint8_t *ptr;
1769 	int error;
1770 
1771 	if (alloc_nr >= CUSE_ALLOC_UNIT_MAX)
1772 		return (ENOMEM);
1773 
1774 	error = cuse_client_get(&pcc);
1775 	if (error != 0)
1776 		pcs = NULL;
1777 	else
1778 		pcs = pcc->server;
1779 
1780 	cuse_lock();
1781 	mem = &cuse_mem[alloc_nr];
1782 
1783 	/* try to enforce slight ownership */
1784 	if ((pcs != NULL) && (mem->owner != pcs)) {
1785 		cuse_unlock();
1786 		return (EINVAL);
1787 	}
1788 	if (mem->virtaddr == NULL) {
1789 		cuse_unlock();
1790 		return (ENOMEM);
1791 	}
1792 	if (mem->virtaddr == NBUSY) {
1793 		cuse_unlock();
1794 		return (ENOMEM);
1795 	}
1796 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1797 
1798 	if (page_nr >= mem->page_count) {
1799 		cuse_unlock();
1800 		return (ENXIO);
1801 	}
1802 	ptr = mem->virtaddr + (page_nr * PAGE_SIZE);
1803 	cuse_unlock();
1804 
1805 	*paddr = vtophys(ptr);
1806 
1807 	return (0);
1808 }
1809 
1810 static void
1811 cuse_client_kqfilter_read_detach(struct knote *kn)
1812 {
1813 	struct cuse_client *pcc;
1814 
1815 	cuse_lock();
1816 	pcc = kn->kn_hook;
1817 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1818 	cuse_unlock();
1819 }
1820 
1821 static void
1822 cuse_client_kqfilter_write_detach(struct knote *kn)
1823 {
1824 	struct cuse_client *pcc;
1825 
1826 	cuse_lock();
1827 	pcc = kn->kn_hook;
1828 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1829 	cuse_unlock();
1830 }
1831 
1832 static int
1833 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1834 {
1835 	struct cuse_client *pcc;
1836 
1837 	mtx_assert(&cuse_mtx, MA_OWNED);
1838 
1839 	pcc = kn->kn_hook;
1840 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1841 }
1842 
1843 static int
1844 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1845 {
1846 	struct cuse_client *pcc;
1847 
1848 	mtx_assert(&cuse_mtx, MA_OWNED);
1849 
1850 	pcc = kn->kn_hook;
1851 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1852 }
1853 
1854 static int
1855 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1856 {
1857 	struct cuse_client *pcc;
1858 	struct cuse_server *pcs;
1859 	int error;
1860 
1861 	error = cuse_client_get(&pcc);
1862 	if (error != 0)
1863 		return (error);
1864 
1865 	cuse_lock();
1866 	pcs = pcc->server;
1867 	switch (kn->kn_filter) {
1868 	case EVFILT_READ:
1869 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1870 		kn->kn_hook = pcc;
1871 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1872 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1873 		break;
1874 	case EVFILT_WRITE:
1875 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1876 		kn->kn_hook = pcc;
1877 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1878 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1879 		break;
1880 	default:
1881 		error = EINVAL;
1882 		break;
1883 	}
1884 	cuse_unlock();
1885 
1886 	if (error == 0)
1887 		cuse_client_kqfilter_poll(dev, pcc);
1888 	return (error);
1889 }
1890