xref: /freebsd/sys/fs/cuse/cuse.c (revision 907b59d76938e654f0d040a888e8dfca3de1e222)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2013 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "opt_compat.h"
28 
29 #include <sys/stdint.h>
30 #include <sys/stddef.h>
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/conf.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/linker_set.h>
38 #include <sys/module.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/condvar.h>
42 #include <sys/sysctl.h>
43 #include <sys/unistd.h>
44 #include <sys/malloc.h>
45 #include <sys/priv.h>
46 #include <sys/uio.h>
47 #include <sys/poll.h>
48 #include <sys/sx.h>
49 #include <sys/queue.h>
50 #include <sys/fcntl.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/selinfo.h>
54 #include <sys/ptrace.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 
61 #include <fs/cuse/cuse_defs.h>
62 #include <fs/cuse/cuse_ioctl.h>
63 
64 MODULE_VERSION(cuse, 1);
65 
66 #define	NBUSY	((uint8_t *)1)
67 
68 #ifdef FEATURE
69 FEATURE(cuse, "Userspace character devices");
70 #endif
71 
72 struct cuse_command;
73 struct cuse_server;
74 struct cuse_client;
75 
76 struct cuse_client_command {
77 	TAILQ_ENTRY(cuse_client_command) entry;
78 	struct cuse_command sub;
79 	struct sx sx;
80 	struct cv cv;
81 	struct thread *entered;
82 	struct cuse_client *client;
83 	struct proc *proc_curr;
84 	int	proc_refs;
85 	int	got_signal;
86 	int	error;
87 	int	command;
88 };
89 
90 struct cuse_memory {
91 	struct cuse_server *owner;
92 	uint8_t *virtaddr;
93 	uint32_t page_count;
94 	uint32_t is_allocated;
95 };
96 
97 struct cuse_server_dev {
98 	TAILQ_ENTRY(cuse_server_dev) entry;
99 	struct cuse_server *server;
100 	struct cdev *kern_dev;
101 	struct cuse_dev *user_dev;
102 };
103 
104 struct cuse_server {
105 	TAILQ_ENTRY(cuse_server) entry;
106 	TAILQ_HEAD(, cuse_client_command) head;
107 	TAILQ_HEAD(, cuse_server_dev) hdev;
108 	TAILQ_HEAD(, cuse_client) hcli;
109 	struct cv cv;
110 	struct selinfo selinfo;
111 	pid_t	pid;
112 	int	is_closing;
113 	int	refs;
114 };
115 
116 struct cuse_client {
117 	TAILQ_ENTRY(cuse_client) entry;
118 	TAILQ_ENTRY(cuse_client) entry_ref;
119 	struct cuse_client_command cmds[CUSE_CMD_MAX];
120 	struct cuse_server *server;
121 	struct cuse_server_dev *server_dev;
122 
123 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
124 
125 	int	fflags;		/* file flags */
126 	int	cflags;		/* client flags */
127 #define	CUSE_CLI_IS_CLOSING 0x01
128 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
129 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
130 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
131 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
132 };
133 
134 #define	CUSE_CLIENT_CLOSING(pcc) \
135     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
136 
137 static MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
138 
139 static TAILQ_HEAD(, cuse_server) cuse_server_head;
140 static struct mtx cuse_mtx;
141 static struct cdev *cuse_dev;
142 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
143 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
144 static struct cuse_memory cuse_mem[CUSE_ALLOC_UNIT_MAX];
145 
146 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
147 static void cuse_client_kqfilter_read_detach(struct knote *kn);
148 static void cuse_client_kqfilter_write_detach(struct knote *kn);
149 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
150 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
151 
152 static struct filterops cuse_client_kqfilter_read_ops = {
153 	.f_isfd = 1,
154 	.f_detach = cuse_client_kqfilter_read_detach,
155 	.f_event = cuse_client_kqfilter_read_event,
156 };
157 
158 static struct filterops cuse_client_kqfilter_write_ops = {
159 	.f_isfd = 1,
160 	.f_detach = cuse_client_kqfilter_write_detach,
161 	.f_event = cuse_client_kqfilter_write_event,
162 };
163 
164 static d_open_t cuse_client_open;
165 static d_close_t cuse_client_close;
166 static d_ioctl_t cuse_client_ioctl;
167 static d_read_t cuse_client_read;
168 static d_write_t cuse_client_write;
169 static d_poll_t cuse_client_poll;
170 static d_mmap_t cuse_client_mmap;
171 static d_kqfilter_t cuse_client_kqfilter;
172 
173 static struct cdevsw cuse_client_devsw = {
174 	.d_version = D_VERSION,
175 	.d_open = cuse_client_open,
176 	.d_close = cuse_client_close,
177 	.d_ioctl = cuse_client_ioctl,
178 	.d_name = "cuse_client",
179 	.d_flags = D_TRACKCLOSE,
180 	.d_read = cuse_client_read,
181 	.d_write = cuse_client_write,
182 	.d_poll = cuse_client_poll,
183 	.d_mmap = cuse_client_mmap,
184 	.d_kqfilter = cuse_client_kqfilter,
185 };
186 
187 static d_open_t cuse_server_open;
188 static d_close_t cuse_server_close;
189 static d_ioctl_t cuse_server_ioctl;
190 static d_read_t cuse_server_read;
191 static d_write_t cuse_server_write;
192 static d_poll_t cuse_server_poll;
193 static d_mmap_t cuse_server_mmap;
194 
195 static struct cdevsw cuse_server_devsw = {
196 	.d_version = D_VERSION,
197 	.d_open = cuse_server_open,
198 	.d_close = cuse_server_close,
199 	.d_ioctl = cuse_server_ioctl,
200 	.d_name = "cuse_server",
201 	.d_flags = D_TRACKCLOSE,
202 	.d_read = cuse_server_read,
203 	.d_write = cuse_server_write,
204 	.d_poll = cuse_server_poll,
205 	.d_mmap = cuse_server_mmap,
206 };
207 
208 static void cuse_client_is_closing(struct cuse_client *);
209 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
210 
211 static void
212 cuse_lock(void)
213 {
214 	mtx_lock(&cuse_mtx);
215 }
216 
217 static void
218 cuse_unlock(void)
219 {
220 	mtx_unlock(&cuse_mtx);
221 }
222 
223 static void
224 cuse_cmd_lock(struct cuse_client_command *pccmd)
225 {
226 	sx_xlock(&pccmd->sx);
227 }
228 
229 static void
230 cuse_cmd_unlock(struct cuse_client_command *pccmd)
231 {
232 	sx_xunlock(&pccmd->sx);
233 }
234 
235 static void
236 cuse_kern_init(void *arg)
237 {
238 	TAILQ_INIT(&cuse_server_head);
239 
240 	mtx_init(&cuse_mtx, "cuse-mtx", NULL, MTX_DEF);
241 
242 	cuse_dev = make_dev(&cuse_server_devsw, 0,
243 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
244 
245 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
246 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
247 	    (CUSE_VERSION >> 0) & 0xFF);
248 }
249 
250 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, 0);
251 
252 static void
253 cuse_kern_uninit(void *arg)
254 {
255 	void *ptr;
256 
257 	while (1) {
258 
259 		printf("Cuse: Please exit all /dev/cuse instances "
260 		    "and processes which have used this device.\n");
261 
262 		pause("DRAIN", 2 * hz);
263 
264 		cuse_lock();
265 		ptr = TAILQ_FIRST(&cuse_server_head);
266 		cuse_unlock();
267 
268 		if (ptr == NULL)
269 			break;
270 	}
271 
272 	if (cuse_dev != NULL)
273 		destroy_dev(cuse_dev);
274 
275 	mtx_destroy(&cuse_mtx);
276 }
277 
278 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
279 
280 static int
281 cuse_server_get(struct cuse_server **ppcs)
282 {
283 	struct cuse_server *pcs;
284 	int error;
285 
286 	error = devfs_get_cdevpriv((void **)&pcs);
287 	if (error != 0) {
288 		*ppcs = NULL;
289 		return (error);
290 	}
291 	/* check if closing */
292 	cuse_lock();
293 	if (pcs->is_closing) {
294 		cuse_unlock();
295 		*ppcs = NULL;
296 		return (EINVAL);
297 	}
298 	cuse_unlock();
299 	*ppcs = pcs;
300 	return (0);
301 }
302 
303 static void
304 cuse_server_is_closing(struct cuse_server *pcs)
305 {
306 	struct cuse_client *pcc;
307 
308 	if (pcs->is_closing)
309 		return;
310 
311 	pcs->is_closing = 1;
312 
313 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
314 		cuse_client_is_closing(pcc);
315 	}
316 }
317 
318 static struct cuse_client_command *
319 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
320 {
321 	struct cuse_client *pcc;
322 	int n;
323 
324 	if (pcs->is_closing)
325 		goto done;
326 
327 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
328 		if (CUSE_CLIENT_CLOSING(pcc))
329 			continue;
330 		for (n = 0; n != CUSE_CMD_MAX; n++) {
331 			if (pcc->cmds[n].entered == td)
332 				return (&pcc->cmds[n]);
333 		}
334 	}
335 done:
336 	return (NULL);
337 }
338 
339 static void
340 cuse_str_filter(char *ptr)
341 {
342 	int c;
343 
344 	while (((c = *ptr) != 0)) {
345 
346 		if ((c >= 'a') && (c <= 'z')) {
347 			ptr++;
348 			continue;
349 		}
350 		if ((c >= 'A') && (c <= 'Z')) {
351 			ptr++;
352 			continue;
353 		}
354 		if ((c >= '0') && (c <= '9')) {
355 			ptr++;
356 			continue;
357 		}
358 		if ((c == '.') || (c == '_') || (c == '/')) {
359 			ptr++;
360 			continue;
361 		}
362 		*ptr = '_';
363 
364 		ptr++;
365 	}
366 }
367 
368 static int
369 cuse_convert_error(int error)
370 {
371 	;				/* indent fix */
372 	switch (error) {
373 	case CUSE_ERR_NONE:
374 		return (0);
375 	case CUSE_ERR_BUSY:
376 		return (EBUSY);
377 	case CUSE_ERR_WOULDBLOCK:
378 		return (EWOULDBLOCK);
379 	case CUSE_ERR_INVALID:
380 		return (EINVAL);
381 	case CUSE_ERR_NO_MEMORY:
382 		return (ENOMEM);
383 	case CUSE_ERR_FAULT:
384 		return (EFAULT);
385 	case CUSE_ERR_SIGNAL:
386 		return (EINTR);
387 	default:
388 		return (ENXIO);
389 	}
390 }
391 
392 static void
393 cuse_server_free_memory(struct cuse_server *pcs)
394 {
395 	struct cuse_memory *mem;
396 	uint32_t n;
397 
398 	for (n = 0; n != CUSE_ALLOC_UNIT_MAX; n++) {
399 		mem = &cuse_mem[n];
400 
401 		/* this memory is never freed */
402 		if (mem->owner == pcs) {
403 			mem->owner = NULL;
404 			mem->is_allocated = 0;
405 		}
406 	}
407 }
408 
409 static int
410 cuse_server_alloc_memory(struct cuse_server *pcs,
411     struct cuse_memory *mem, uint32_t page_count)
412 {
413 	void *ptr;
414 	int error;
415 
416 	cuse_lock();
417 
418 	if (mem->virtaddr == NBUSY) {
419 		cuse_unlock();
420 		return (EBUSY);
421 	}
422 	if (mem->virtaddr != NULL) {
423 		if (mem->is_allocated != 0) {
424 			cuse_unlock();
425 			return (EBUSY);
426 		}
427 		if (mem->page_count == page_count) {
428 			mem->is_allocated = 1;
429 			mem->owner = pcs;
430 			cuse_unlock();
431 			return (0);
432 		}
433 		cuse_unlock();
434 		return (EBUSY);
435 	}
436 	memset(mem, 0, sizeof(*mem));
437 
438 	mem->virtaddr = NBUSY;
439 
440 	cuse_unlock();
441 
442 	ptr = malloc(page_count * PAGE_SIZE, M_CUSE, M_WAITOK | M_ZERO);
443 	if (ptr == NULL)
444 		error = ENOMEM;
445 	else
446 		error = 0;
447 
448 	cuse_lock();
449 
450 	if (error) {
451 		mem->virtaddr = NULL;
452 		cuse_unlock();
453 		return (error);
454 	}
455 	mem->virtaddr = ptr;
456 	mem->page_count = page_count;
457 	mem->is_allocated = 1;
458 	mem->owner = pcs;
459 	cuse_unlock();
460 
461 	return (0);
462 }
463 
464 static int
465 cuse_client_get(struct cuse_client **ppcc)
466 {
467 	struct cuse_client *pcc;
468 	int error;
469 
470 	/* try to get private data */
471 	error = devfs_get_cdevpriv((void **)&pcc);
472 	if (error != 0) {
473 		*ppcc = NULL;
474 		return (error);
475 	}
476 	/* check if closing */
477 	cuse_lock();
478 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
479 		cuse_unlock();
480 		*ppcc = NULL;
481 		return (EINVAL);
482 	}
483 	cuse_unlock();
484 	*ppcc = pcc;
485 	return (0);
486 }
487 
488 static void
489 cuse_client_is_closing(struct cuse_client *pcc)
490 {
491 	struct cuse_client_command *pccmd;
492 	uint32_t n;
493 
494 	if (CUSE_CLIENT_CLOSING(pcc))
495 		return;
496 
497 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
498 	pcc->server_dev = NULL;
499 
500 	for (n = 0; n != CUSE_CMD_MAX; n++) {
501 
502 		pccmd = &pcc->cmds[n];
503 
504 		if (pccmd->entry.tqe_prev != NULL) {
505 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
506 			pccmd->entry.tqe_prev = NULL;
507 		}
508 		cv_broadcast(&pccmd->cv);
509 	}
510 }
511 
512 static void
513 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
514     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
515 {
516 	unsigned long cuse_fflags = 0;
517 	struct cuse_server *pcs;
518 
519 	if (fflags & FREAD)
520 		cuse_fflags |= CUSE_FFLAG_READ;
521 
522 	if (fflags & FWRITE)
523 		cuse_fflags |= CUSE_FFLAG_WRITE;
524 
525 	if (ioflag & IO_NDELAY)
526 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
527 
528 	pccmd->sub.fflags = cuse_fflags;
529 	pccmd->sub.data_pointer = data_ptr;
530 	pccmd->sub.argument = arg;
531 
532 	pcs = pccmd->client->server;
533 
534 	if ((pccmd->entry.tqe_prev == NULL) &&
535 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
536 	    (pcs->is_closing == 0)) {
537 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
538 		cv_signal(&pcs->cv);
539 	}
540 }
541 
542 static void
543 cuse_client_got_signal(struct cuse_client_command *pccmd)
544 {
545 	struct cuse_server *pcs;
546 
547 	pccmd->got_signal = 1;
548 
549 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
550 
551 	pcs = pccmd->client->server;
552 
553 	if ((pccmd->entry.tqe_prev == NULL) &&
554 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
555 	    (pcs->is_closing == 0)) {
556 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
557 		cv_signal(&pcs->cv);
558 	}
559 }
560 
561 static int
562 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
563     uint8_t *arg_ptr, uint32_t arg_len)
564 {
565 	int error;
566 
567 	error = 0;
568 
569 	pccmd->proc_curr = curthread->td_proc;
570 
571 	if (CUSE_CLIENT_CLOSING(pccmd->client) ||
572 	    pccmd->client->server->is_closing) {
573 		error = CUSE_ERR_OTHER;
574 		goto done;
575 	}
576 	while (pccmd->command == CUSE_CMD_NONE) {
577 		if (error != 0) {
578 			cv_wait(&pccmd->cv, &cuse_mtx);
579 		} else {
580 			error = cv_wait_sig(&pccmd->cv, &cuse_mtx);
581 
582 			if (error != 0)
583 				cuse_client_got_signal(pccmd);
584 		}
585 		if (CUSE_CLIENT_CLOSING(pccmd->client) ||
586 		    pccmd->client->server->is_closing) {
587 			error = CUSE_ERR_OTHER;
588 			goto done;
589 		}
590 	}
591 
592 	error = pccmd->error;
593 	pccmd->command = CUSE_CMD_NONE;
594 	cv_signal(&pccmd->cv);
595 
596 done:
597 
598 	/* wait until all process references are gone */
599 
600 	pccmd->proc_curr = NULL;
601 
602 	while (pccmd->proc_refs != 0)
603 		cv_wait(&pccmd->cv, &cuse_mtx);
604 
605 	return (error);
606 }
607 
608 /*------------------------------------------------------------------------*
609  *	CUSE SERVER PART
610  *------------------------------------------------------------------------*/
611 
612 static void
613 cuse_server_free_dev(struct cuse_server_dev *pcsd)
614 {
615 	struct cuse_server *pcs;
616 	struct cuse_client *pcc;
617 
618 	/* get server pointer */
619 	pcs = pcsd->server;
620 
621 	/* prevent creation of more devices */
622 	cuse_lock();
623 	if (pcsd->kern_dev != NULL)
624 		pcsd->kern_dev->si_drv1 = NULL;
625 
626 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
627 		if (pcc->server_dev == pcsd)
628 			cuse_client_is_closing(pcc);
629 	}
630 	cuse_unlock();
631 
632 	/* destroy device, if any */
633 	if (pcsd->kern_dev != NULL) {
634 		/* destroy device synchronously */
635 		destroy_dev(pcsd->kern_dev);
636 	}
637 	free(pcsd, M_CUSE);
638 }
639 
640 static void
641 cuse_server_free(void *arg)
642 {
643 	struct cuse_server *pcs = arg;
644 	struct cuse_server_dev *pcsd;
645 
646 	cuse_lock();
647 	pcs->refs--;
648 	if (pcs->refs != 0) {
649 		cuse_unlock();
650 		return;
651 	}
652 	cuse_server_is_closing(pcs);
653 	/* final client wakeup, if any */
654 	cuse_server_wakeup_all_client_locked(pcs);
655 
656 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
657 
658 	cuse_free_unit_by_id_locked(pcs, -1);
659 
660 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
661 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
662 		cuse_unlock();
663 		cuse_server_free_dev(pcsd);
664 		cuse_lock();
665 	}
666 
667 	cuse_server_free_memory(pcs);
668 
669 	knlist_clear(&pcs->selinfo.si_note, 1);
670 	knlist_destroy(&pcs->selinfo.si_note);
671 
672 	cuse_unlock();
673 
674 	seldrain(&pcs->selinfo);
675 
676 	cv_destroy(&pcs->cv);
677 
678 	free(pcs, M_CUSE);
679 }
680 
681 static int
682 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
683 {
684 	struct cuse_server *pcs;
685 
686 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
687 	if (pcs == NULL)
688 		return (ENOMEM);
689 
690 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
691 		printf("Cuse: Cannot set cdevpriv.\n");
692 		free(pcs, M_CUSE);
693 		return (ENOMEM);
694 	}
695 
696 	/* store current process ID */
697 	pcs->pid = curproc->p_pid;
698 
699 	TAILQ_INIT(&pcs->head);
700 	TAILQ_INIT(&pcs->hdev);
701 	TAILQ_INIT(&pcs->hcli);
702 
703 	cv_init(&pcs->cv, "cuse-server-cv");
704 
705 	knlist_init_mtx(&pcs->selinfo.si_note, &cuse_mtx);
706 
707 	cuse_lock();
708 	pcs->refs++;
709 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
710 	cuse_unlock();
711 
712 	return (0);
713 }
714 
715 static int
716 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
717 {
718 	struct cuse_server *pcs;
719 	int error;
720 
721 	error = cuse_server_get(&pcs);
722 	if (error != 0)
723 		goto done;
724 
725 	cuse_lock();
726 	cuse_server_is_closing(pcs);
727 	/* final client wakeup, if any */
728 	cuse_server_wakeup_all_client_locked(pcs);
729 
730 	knlist_clear(&pcs->selinfo.si_note, 1);
731 	cuse_unlock();
732 
733 done:
734 	return (0);
735 }
736 
737 static int
738 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
739 {
740 	return (ENXIO);
741 }
742 
743 static int
744 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
745 {
746 	return (ENXIO);
747 }
748 
749 static int
750 cuse_server_ioctl_copy_locked(struct cuse_client_command *pccmd,
751     struct cuse_data_chunk *pchk, int isread)
752 {
753 	struct proc *p_proc;
754 	uint32_t offset;
755 	int error;
756 
757 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
758 
759 	if (pchk->length > CUSE_BUFFER_MAX)
760 		return (EFAULT);
761 
762 	if (offset >= CUSE_BUFFER_MAX)
763 		return (EFAULT);
764 
765 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
766 		return (EFAULT);
767 
768 	p_proc = pccmd->proc_curr;
769 	if (p_proc == NULL)
770 		return (ENXIO);
771 
772 	if (pccmd->proc_refs < 0)
773 		return (ENOMEM);
774 
775 	pccmd->proc_refs++;
776 
777 	cuse_unlock();
778 
779 	if (isread == 0) {
780 		error = copyin(
781 		    (void *)pchk->local_ptr,
782 		    pccmd->client->ioctl_buffer + offset,
783 		    pchk->length);
784 	} else {
785 		error = copyout(
786 		    pccmd->client->ioctl_buffer + offset,
787 		    (void *)pchk->local_ptr,
788 		    pchk->length);
789 	}
790 
791 	cuse_lock();
792 
793 	pccmd->proc_refs--;
794 
795 	if (pccmd->proc_curr == NULL)
796 		cv_signal(&pccmd->cv);
797 
798 	return (error);
799 }
800 
801 static int
802 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
803     struct proc *proc_d, vm_offset_t data_d, size_t len)
804 {
805 	struct thread *td;
806 	struct proc *proc_cur;
807 	int error;
808 
809 	td = curthread;
810 	proc_cur = td->td_proc;
811 
812 	if (proc_cur == proc_d) {
813 		struct iovec iov = {
814 			.iov_base = (caddr_t)data_d,
815 			.iov_len = len,
816 		};
817 		struct uio uio = {
818 			.uio_iov = &iov,
819 			.uio_iovcnt = 1,
820 			.uio_offset = (off_t)data_s,
821 			.uio_resid = len,
822 			.uio_segflg = UIO_USERSPACE,
823 			.uio_rw = UIO_READ,
824 			.uio_td = td,
825 		};
826 
827 		PHOLD(proc_s);
828 		error = proc_rwmem(proc_s, &uio);
829 		PRELE(proc_s);
830 
831 	} else if (proc_cur == proc_s) {
832 		struct iovec iov = {
833 			.iov_base = (caddr_t)data_s,
834 			.iov_len = len,
835 		};
836 		struct uio uio = {
837 			.uio_iov = &iov,
838 			.uio_iovcnt = 1,
839 			.uio_offset = (off_t)data_d,
840 			.uio_resid = len,
841 			.uio_segflg = UIO_USERSPACE,
842 			.uio_rw = UIO_WRITE,
843 			.uio_td = td,
844 		};
845 
846 		PHOLD(proc_d);
847 		error = proc_rwmem(proc_d, &uio);
848 		PRELE(proc_d);
849 	} else {
850 		error = EINVAL;
851 	}
852 	return (error);
853 }
854 
855 static int
856 cuse_server_data_copy_locked(struct cuse_client_command *pccmd,
857     struct cuse_data_chunk *pchk, int isread)
858 {
859 	struct proc *p_proc;
860 	int error;
861 
862 	p_proc = pccmd->proc_curr;
863 	if (p_proc == NULL)
864 		return (ENXIO);
865 
866 	if (pccmd->proc_refs < 0)
867 		return (ENOMEM);
868 
869 	pccmd->proc_refs++;
870 
871 	cuse_unlock();
872 
873 	if (isread == 0) {
874 		error = cuse_proc2proc_copy(
875 		    curthread->td_proc, pchk->local_ptr,
876 		    p_proc, pchk->peer_ptr,
877 		    pchk->length);
878 	} else {
879 		error = cuse_proc2proc_copy(
880 		    p_proc, pchk->peer_ptr,
881 		    curthread->td_proc, pchk->local_ptr,
882 		    pchk->length);
883 	}
884 
885 	cuse_lock();
886 
887 	pccmd->proc_refs--;
888 
889 	if (pccmd->proc_curr == NULL)
890 		cv_signal(&pccmd->cv);
891 
892 	return (error);
893 }
894 
895 static int
896 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
897 {
898 	int n;
899 	int x = 0;
900 	int match;
901 
902 	do {
903 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
904 			if (cuse_alloc_unit[n] != NULL) {
905 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
906 					continue;
907 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
908 					x++;
909 					match = 1;
910 				}
911 			}
912 		}
913 	} while (match);
914 
915 	if (x < 256) {
916 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
917 			if (cuse_alloc_unit[n] == NULL) {
918 				cuse_alloc_unit[n] = pcs;
919 				cuse_alloc_unit_id[n] = id | x;
920 				return (x);
921 			}
922 		}
923 	}
924 	return (-1);
925 }
926 
927 static void
928 cuse_server_wakeup_locked(struct cuse_server *pcs)
929 {
930 	selwakeup(&pcs->selinfo);
931 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
932 }
933 
934 static void
935 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
936 {
937 	struct cuse_client *pcc;
938 
939 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
940 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
941 		    CUSE_CLI_KNOTE_NEED_WRITE);
942 	}
943 	cuse_server_wakeup_locked(pcs);
944 }
945 
946 static int
947 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
948 {
949 	int n;
950 	int found = 0;
951 
952 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
953 		if (cuse_alloc_unit[n] == pcs) {
954 			if (cuse_alloc_unit_id[n] == id || id == -1) {
955 				cuse_alloc_unit[n] = NULL;
956 				cuse_alloc_unit_id[n] = 0;
957 				found = 1;
958 			}
959 		}
960 	}
961 
962 	return (found ? 0 : EINVAL);
963 }
964 
965 static int
966 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
967     caddr_t data, int fflag, struct thread *td)
968 {
969 	struct cuse_server *pcs;
970 	int error;
971 
972 	error = cuse_server_get(&pcs);
973 	if (error != 0)
974 		return (error);
975 
976 	switch (cmd) {
977 		struct cuse_client_command *pccmd;
978 		struct cuse_client *pcc;
979 		struct cuse_command *pcmd;
980 		struct cuse_alloc_info *pai;
981 		struct cuse_create_dev *pcd;
982 		struct cuse_server_dev *pcsd;
983 		struct cuse_data_chunk *pchk;
984 		int n;
985 
986 	case CUSE_IOCTL_GET_COMMAND:
987 		pcmd = (void *)data;
988 
989 		cuse_lock();
990 
991 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
992 			error = cv_wait_sig(&pcs->cv, &cuse_mtx);
993 
994 			if (pcs->is_closing)
995 				error = ENXIO;
996 
997 			if (error) {
998 				cuse_unlock();
999 				return (error);
1000 			}
1001 		}
1002 
1003 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1004 		pccmd->entry.tqe_prev = NULL;
1005 
1006 		pccmd->entered = curthread;
1007 
1008 		*pcmd = pccmd->sub;
1009 
1010 		cuse_unlock();
1011 
1012 		break;
1013 
1014 	case CUSE_IOCTL_SYNC_COMMAND:
1015 
1016 		cuse_lock();
1017 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1018 
1019 			/* send sync command */
1020 			pccmd->entered = NULL;
1021 			pccmd->error = *(int *)data;
1022 			pccmd->command = CUSE_CMD_SYNC;
1023 
1024 			/* signal peer, if any */
1025 			cv_signal(&pccmd->cv);
1026 		}
1027 		cuse_unlock();
1028 
1029 		break;
1030 
1031 	case CUSE_IOCTL_ALLOC_UNIT:
1032 
1033 		cuse_lock();
1034 		n = cuse_alloc_unit_by_id_locked(pcs,
1035 		    CUSE_ID_DEFAULT(0));
1036 		cuse_unlock();
1037 
1038 		if (n < 0)
1039 			error = ENOMEM;
1040 		else
1041 			*(int *)data = n;
1042 		break;
1043 
1044 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1045 
1046 		n = *(int *)data;
1047 
1048 		n = (n & CUSE_ID_MASK);
1049 
1050 		cuse_lock();
1051 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1052 		cuse_unlock();
1053 
1054 		if (n < 0)
1055 			error = ENOMEM;
1056 		else
1057 			*(int *)data = n;
1058 		break;
1059 
1060 	case CUSE_IOCTL_FREE_UNIT:
1061 
1062 		n = *(int *)data;
1063 
1064 		n = CUSE_ID_DEFAULT(n);
1065 
1066 		cuse_lock();
1067 		error = cuse_free_unit_by_id_locked(pcs, n);
1068 		cuse_unlock();
1069 		break;
1070 
1071 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1072 
1073 		n = *(int *)data;
1074 
1075 		cuse_lock();
1076 		error = cuse_free_unit_by_id_locked(pcs, n);
1077 		cuse_unlock();
1078 		break;
1079 
1080 	case CUSE_IOCTL_ALLOC_MEMORY:
1081 
1082 		pai = (void *)data;
1083 
1084 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1085 			error = ENOMEM;
1086 			break;
1087 		}
1088 		if (pai->page_count > CUSE_ALLOC_PAGES_MAX) {
1089 			error = ENOMEM;
1090 			break;
1091 		}
1092 		error = cuse_server_alloc_memory(pcs,
1093 		    &cuse_mem[pai->alloc_nr], pai->page_count);
1094 		break;
1095 
1096 	case CUSE_IOCTL_FREE_MEMORY:
1097 		pai = (void *)data;
1098 
1099 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1100 			error = ENOMEM;
1101 			break;
1102 		}
1103 		/* we trust the character device driver in this case */
1104 
1105 		cuse_lock();
1106 		if (cuse_mem[pai->alloc_nr].owner == pcs) {
1107 			cuse_mem[pai->alloc_nr].is_allocated = 0;
1108 			cuse_mem[pai->alloc_nr].owner = NULL;
1109 		} else {
1110 			error = EINVAL;
1111 		}
1112 		cuse_unlock();
1113 		break;
1114 
1115 	case CUSE_IOCTL_GET_SIG:
1116 
1117 		cuse_lock();
1118 		pccmd = cuse_server_find_command(pcs, curthread);
1119 
1120 		if (pccmd != NULL) {
1121 			n = pccmd->got_signal;
1122 			pccmd->got_signal = 0;
1123 		} else {
1124 			n = 0;
1125 		}
1126 		cuse_unlock();
1127 
1128 		*(int *)data = n;
1129 
1130 		break;
1131 
1132 	case CUSE_IOCTL_SET_PFH:
1133 
1134 		cuse_lock();
1135 		pccmd = cuse_server_find_command(pcs, curthread);
1136 
1137 		if (pccmd != NULL) {
1138 			pcc = pccmd->client;
1139 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1140 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1141 			}
1142 		} else {
1143 			error = ENXIO;
1144 		}
1145 		cuse_unlock();
1146 		break;
1147 
1148 	case CUSE_IOCTL_CREATE_DEV:
1149 
1150 		error = priv_check(curthread, PRIV_DRIVER);
1151 		if (error)
1152 			break;
1153 
1154 		pcd = (void *)data;
1155 
1156 		/* filter input */
1157 
1158 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1159 
1160 		if (pcd->devname[0] == 0) {
1161 			error = EINVAL;
1162 			break;
1163 		}
1164 		cuse_str_filter(pcd->devname);
1165 
1166 		pcd->permissions &= 0777;
1167 
1168 		/* try to allocate a character device */
1169 
1170 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1171 
1172 		if (pcsd == NULL) {
1173 			error = ENOMEM;
1174 			break;
1175 		}
1176 		pcsd->server = pcs;
1177 
1178 		pcsd->user_dev = pcd->dev;
1179 
1180 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1181 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1182 		    pcd->permissions, "%s", pcd->devname);
1183 
1184 		if (pcsd->kern_dev == NULL) {
1185 			free(pcsd, M_CUSE);
1186 			error = ENOMEM;
1187 			break;
1188 		}
1189 		pcsd->kern_dev->si_drv1 = pcsd;
1190 
1191 		cuse_lock();
1192 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1193 		cuse_unlock();
1194 
1195 		break;
1196 
1197 	case CUSE_IOCTL_DESTROY_DEV:
1198 
1199 		error = priv_check(curthread, PRIV_DRIVER);
1200 		if (error)
1201 			break;
1202 
1203 		cuse_lock();
1204 
1205 		error = EINVAL;
1206 
1207 		pcsd = TAILQ_FIRST(&pcs->hdev);
1208 		while (pcsd != NULL) {
1209 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1210 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1211 				cuse_unlock();
1212 				cuse_server_free_dev(pcsd);
1213 				cuse_lock();
1214 				error = 0;
1215 				pcsd = TAILQ_FIRST(&pcs->hdev);
1216 			} else {
1217 				pcsd = TAILQ_NEXT(pcsd, entry);
1218 			}
1219 		}
1220 
1221 		cuse_unlock();
1222 		break;
1223 
1224 	case CUSE_IOCTL_WRITE_DATA:
1225 	case CUSE_IOCTL_READ_DATA:
1226 
1227 		cuse_lock();
1228 		pchk = (struct cuse_data_chunk *)data;
1229 
1230 		pccmd = cuse_server_find_command(pcs, curthread);
1231 
1232 		if (pccmd == NULL) {
1233 			error = ENXIO;	/* invalid request */
1234 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1235 			error = EFAULT;	/* NULL pointer */
1236 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1237 			error = cuse_server_ioctl_copy_locked(pccmd,
1238 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1239 		} else {
1240 			error = cuse_server_data_copy_locked(pccmd,
1241 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1242 		}
1243 		cuse_unlock();
1244 		break;
1245 
1246 	case CUSE_IOCTL_SELWAKEUP:
1247 		cuse_lock();
1248 		/*
1249 		 * We don't know which direction caused the event.
1250 		 * Wakeup both!
1251 		 */
1252 		cuse_server_wakeup_all_client_locked(pcs);
1253 		cuse_unlock();
1254 		break;
1255 
1256 	default:
1257 		error = ENXIO;
1258 		break;
1259 	}
1260 	return (error);
1261 }
1262 
1263 static int
1264 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1265 {
1266 	return (events & (POLLHUP | POLLPRI | POLLIN |
1267 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1268 }
1269 
1270 static int
1271 cuse_server_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr)
1272 {
1273 	uint32_t page_nr = offset / PAGE_SIZE;
1274 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1275 	struct cuse_memory *mem;
1276 	struct cuse_server *pcs;
1277 	uint8_t *ptr;
1278 	int error;
1279 
1280 	if (alloc_nr >= CUSE_ALLOC_UNIT_MAX)
1281 		return (ENOMEM);
1282 
1283 	error = cuse_server_get(&pcs);
1284 	if (error != 0)
1285 		pcs = NULL;
1286 
1287 	cuse_lock();
1288 	mem = &cuse_mem[alloc_nr];
1289 
1290 	/* try to enforce slight ownership */
1291 	if ((pcs != NULL) && (mem->owner != pcs)) {
1292 		cuse_unlock();
1293 		return (EINVAL);
1294 	}
1295 	if (mem->virtaddr == NULL) {
1296 		cuse_unlock();
1297 		return (ENOMEM);
1298 	}
1299 	if (mem->virtaddr == NBUSY) {
1300 		cuse_unlock();
1301 		return (ENOMEM);
1302 	}
1303 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1304 
1305 	if (page_nr >= mem->page_count) {
1306 		cuse_unlock();
1307 		return (ENXIO);
1308 	}
1309 	ptr = mem->virtaddr + (page_nr * PAGE_SIZE);
1310 	cuse_unlock();
1311 
1312 	*paddr = vtophys(ptr);
1313 
1314 	return (0);
1315 }
1316 
1317 /*------------------------------------------------------------------------*
1318  *	CUSE CLIENT PART
1319  *------------------------------------------------------------------------*/
1320 static void
1321 cuse_client_free(void *arg)
1322 {
1323 	struct cuse_client *pcc = arg;
1324 	struct cuse_client_command *pccmd;
1325 	struct cuse_server *pcs;
1326 	int n;
1327 
1328 	cuse_lock();
1329 	cuse_client_is_closing(pcc);
1330 	TAILQ_REMOVE(&pcc->server->hcli, pcc, entry);
1331 	cuse_unlock();
1332 
1333 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1334 
1335 		pccmd = &pcc->cmds[n];
1336 
1337 		sx_destroy(&pccmd->sx);
1338 		cv_destroy(&pccmd->cv);
1339 	}
1340 
1341 	pcs = pcc->server;
1342 
1343 	free(pcc, M_CUSE);
1344 
1345 	/* drop reference on server */
1346 	cuse_server_free(pcs);
1347 }
1348 
1349 static int
1350 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1351 {
1352 	struct cuse_client_command *pccmd;
1353 	struct cuse_server_dev *pcsd;
1354 	struct cuse_client *pcc;
1355 	struct cuse_server *pcs;
1356 	struct cuse_dev *pcd;
1357 	int error;
1358 	int n;
1359 
1360 	cuse_lock();
1361 	pcsd = dev->si_drv1;
1362 	if (pcsd != NULL) {
1363 		pcs = pcsd->server;
1364 		pcd = pcsd->user_dev;
1365 		/*
1366 		 * Check that the refcount didn't wrap and that the
1367 		 * same process is not both client and server. This
1368 		 * can easily lead to deadlocks when destroying the
1369 		 * CUSE character device nodes:
1370 		 */
1371 		pcs->refs++;
1372 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1373 			/* overflow or wrong PID */
1374 			pcs->refs--;
1375 			pcsd = NULL;
1376 		}
1377 	} else {
1378 		pcs = NULL;
1379 		pcd = NULL;
1380 	}
1381 	cuse_unlock();
1382 
1383 	if (pcsd == NULL)
1384 		return (EINVAL);
1385 
1386 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1387 	if (pcc == NULL) {
1388 		/* drop reference on server */
1389 		cuse_server_free(pcs);
1390 		return (ENOMEM);
1391 	}
1392 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1393 		printf("Cuse: Cannot set cdevpriv.\n");
1394 		/* drop reference on server */
1395 		cuse_server_free(pcs);
1396 		free(pcc, M_CUSE);
1397 		return (ENOMEM);
1398 	}
1399 	pcc->fflags = fflags;
1400 	pcc->server_dev = pcsd;
1401 	pcc->server = pcs;
1402 
1403 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1404 
1405 		pccmd = &pcc->cmds[n];
1406 
1407 		pccmd->sub.dev = pcd;
1408 		pccmd->sub.command = n;
1409 		pccmd->client = pcc;
1410 
1411 		sx_init(&pccmd->sx, "cuse-client-sx");
1412 		cv_init(&pccmd->cv, "cuse-client-cv");
1413 	}
1414 
1415 	cuse_lock();
1416 
1417 	/* cuse_client_free() assumes that the client is listed somewhere! */
1418 	/* always enqueue */
1419 
1420 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1421 
1422 	/* check if server is closing */
1423 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1424 		error = EINVAL;
1425 	} else {
1426 		error = 0;
1427 	}
1428 	cuse_unlock();
1429 
1430 	if (error) {
1431 		devfs_clear_cdevpriv();	/* XXX bugfix */
1432 		return (error);
1433 	}
1434 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1435 
1436 	cuse_cmd_lock(pccmd);
1437 
1438 	cuse_lock();
1439 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1440 
1441 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1442 	cuse_unlock();
1443 
1444 	if (error < 0) {
1445 		error = cuse_convert_error(error);
1446 	} else {
1447 		error = 0;
1448 	}
1449 
1450 	cuse_cmd_unlock(pccmd);
1451 
1452 	if (error)
1453 		devfs_clear_cdevpriv();	/* XXX bugfix */
1454 
1455 	return (error);
1456 }
1457 
1458 static int
1459 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1460 {
1461 	struct cuse_client_command *pccmd;
1462 	struct cuse_client *pcc;
1463 	int error;
1464 
1465 	error = cuse_client_get(&pcc);
1466 	if (error != 0)
1467 		return (0);
1468 
1469 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1470 
1471 	cuse_cmd_lock(pccmd);
1472 
1473 	cuse_lock();
1474 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1475 
1476 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1477 	cuse_unlock();
1478 
1479 	cuse_cmd_unlock(pccmd);
1480 
1481 	cuse_lock();
1482 	cuse_client_is_closing(pcc);
1483 	cuse_unlock();
1484 
1485 	return (0);
1486 }
1487 
1488 static void
1489 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1490 {
1491 	int temp;
1492 
1493 	cuse_lock();
1494 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1495 	    CUSE_CLI_KNOTE_HAS_WRITE));
1496 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1497 	    CUSE_CLI_KNOTE_NEED_WRITE);
1498 	cuse_unlock();
1499 
1500 	if (temp != 0) {
1501 		/* get the latest polling state from the server */
1502 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1503 
1504 		cuse_lock();
1505 		if (temp & (POLLIN | POLLOUT)) {
1506 			if (temp & POLLIN)
1507 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1508 			if (temp & POLLOUT)
1509 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1510 
1511 			/* make sure the "knote" gets woken up */
1512 			cuse_server_wakeup_locked(pcc->server);
1513 		}
1514 		cuse_unlock();
1515 	}
1516 }
1517 
1518 static int
1519 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1520 {
1521 	struct cuse_client_command *pccmd;
1522 	struct cuse_client *pcc;
1523 	int error;
1524 	int len;
1525 
1526 	error = cuse_client_get(&pcc);
1527 	if (error != 0)
1528 		return (error);
1529 
1530 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1531 
1532 	if (uio->uio_segflg != UIO_USERSPACE) {
1533 		return (EINVAL);
1534 	}
1535 	uio->uio_segflg = UIO_NOCOPY;
1536 
1537 	cuse_cmd_lock(pccmd);
1538 
1539 	while (uio->uio_resid != 0) {
1540 
1541 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1542 			error = ENOMEM;
1543 			break;
1544 		}
1545 
1546 		len = uio->uio_iov->iov_len;
1547 
1548 		cuse_lock();
1549 		cuse_client_send_command_locked(pccmd,
1550 		    (uintptr_t)uio->uio_iov->iov_base,
1551 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1552 
1553 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1554 		cuse_unlock();
1555 
1556 		if (error < 0) {
1557 			error = cuse_convert_error(error);
1558 			break;
1559 		} else if (error == len) {
1560 			error = uiomove(NULL, error, uio);
1561 			if (error)
1562 				break;
1563 		} else {
1564 			error = uiomove(NULL, error, uio);
1565 			break;
1566 		}
1567 	}
1568 	cuse_cmd_unlock(pccmd);
1569 
1570 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1571 
1572 	if (error == EWOULDBLOCK)
1573 		cuse_client_kqfilter_poll(dev, pcc);
1574 
1575 	return (error);
1576 }
1577 
1578 static int
1579 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1580 {
1581 	struct cuse_client_command *pccmd;
1582 	struct cuse_client *pcc;
1583 	int error;
1584 	int len;
1585 
1586 	error = cuse_client_get(&pcc);
1587 	if (error != 0)
1588 		return (error);
1589 
1590 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1591 
1592 	if (uio->uio_segflg != UIO_USERSPACE) {
1593 		return (EINVAL);
1594 	}
1595 	uio->uio_segflg = UIO_NOCOPY;
1596 
1597 	cuse_cmd_lock(pccmd);
1598 
1599 	while (uio->uio_resid != 0) {
1600 
1601 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1602 			error = ENOMEM;
1603 			break;
1604 		}
1605 
1606 		len = uio->uio_iov->iov_len;
1607 
1608 		cuse_lock();
1609 		cuse_client_send_command_locked(pccmd,
1610 		    (uintptr_t)uio->uio_iov->iov_base,
1611 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1612 
1613 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1614 		cuse_unlock();
1615 
1616 		if (error < 0) {
1617 			error = cuse_convert_error(error);
1618 			break;
1619 		} else if (error == len) {
1620 			error = uiomove(NULL, error, uio);
1621 			if (error)
1622 				break;
1623 		} else {
1624 			error = uiomove(NULL, error, uio);
1625 			break;
1626 		}
1627 	}
1628 	cuse_cmd_unlock(pccmd);
1629 
1630 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1631 
1632 	if (error == EWOULDBLOCK)
1633 		cuse_client_kqfilter_poll(dev, pcc);
1634 
1635 	return (error);
1636 }
1637 
1638 int
1639 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1640     caddr_t data, int fflag, struct thread *td)
1641 {
1642 	struct cuse_client_command *pccmd;
1643 	struct cuse_client *pcc;
1644 	int error;
1645 	int len;
1646 
1647 	error = cuse_client_get(&pcc);
1648 	if (error != 0)
1649 		return (error);
1650 
1651 	len = IOCPARM_LEN(cmd);
1652 	if (len > CUSE_BUFFER_MAX)
1653 		return (ENOMEM);
1654 
1655 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1656 
1657 	cuse_cmd_lock(pccmd);
1658 
1659 	if (cmd & (IOC_IN | IOC_VOID))
1660 		memcpy(pcc->ioctl_buffer, data, len);
1661 
1662 	/*
1663 	 * When the ioctl-length is zero drivers can pass information
1664 	 * through the data pointer of the ioctl. Make sure this information
1665 	 * is forwarded to the driver.
1666 	 */
1667 
1668 	cuse_lock();
1669 	cuse_client_send_command_locked(pccmd,
1670 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1671 	    (unsigned long)cmd, pcc->fflags,
1672 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1673 
1674 	error = cuse_client_receive_command_locked(pccmd, data, len);
1675 	cuse_unlock();
1676 
1677 	if (error < 0) {
1678 		error = cuse_convert_error(error);
1679 	} else {
1680 		error = 0;
1681 	}
1682 
1683 	if (cmd & IOC_OUT)
1684 		memcpy(data, pcc->ioctl_buffer, len);
1685 
1686 	cuse_cmd_unlock(pccmd);
1687 
1688 	if (error == EWOULDBLOCK)
1689 		cuse_client_kqfilter_poll(dev, pcc);
1690 
1691 	return (error);
1692 }
1693 
1694 static int
1695 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1696 {
1697 	struct cuse_client_command *pccmd;
1698 	struct cuse_client *pcc;
1699 	unsigned long temp;
1700 	int error;
1701 	int revents;
1702 
1703 	error = cuse_client_get(&pcc);
1704 	if (error != 0)
1705 		goto pollnval;
1706 
1707 	temp = 0;
1708 
1709 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1710 		temp |= CUSE_POLL_READ;
1711 
1712 	if (events & (POLLOUT | POLLWRNORM))
1713 		temp |= CUSE_POLL_WRITE;
1714 
1715 	if (events & POLLHUP)
1716 		temp |= CUSE_POLL_ERROR;
1717 
1718 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1719 
1720 	cuse_cmd_lock(pccmd);
1721 
1722 	/* Need to selrecord() first to not loose any events. */
1723 	if (temp != 0 && td != NULL)
1724 		selrecord(td, &pcc->server->selinfo);
1725 
1726 	cuse_lock();
1727 	cuse_client_send_command_locked(pccmd,
1728 	    0, temp, pcc->fflags, IO_NDELAY);
1729 
1730 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1731 	cuse_unlock();
1732 
1733 	cuse_cmd_unlock(pccmd);
1734 
1735 	if (error < 0) {
1736 		goto pollnval;
1737 	} else {
1738 		revents = 0;
1739 		if (error & CUSE_POLL_READ)
1740 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1741 		if (error & CUSE_POLL_WRITE)
1742 			revents |= (events & (POLLOUT | POLLWRNORM));
1743 		if (error & CUSE_POLL_ERROR)
1744 			revents |= (events & POLLHUP);
1745 	}
1746 	return (revents);
1747 
1748  pollnval:
1749 	/* XXX many clients don't understand POLLNVAL */
1750 	return (events & (POLLHUP | POLLPRI | POLLIN |
1751 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1752 }
1753 
1754 static int
1755 cuse_client_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr)
1756 {
1757 	uint32_t page_nr = offset / PAGE_SIZE;
1758 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1759 	struct cuse_memory *mem;
1760 	struct cuse_server *pcs;
1761 	struct cuse_client *pcc;
1762 	uint8_t *ptr;
1763 	int error;
1764 
1765 	if (alloc_nr >= CUSE_ALLOC_UNIT_MAX)
1766 		return (ENOMEM);
1767 
1768 	error = cuse_client_get(&pcc);
1769 	if (error != 0)
1770 		pcs = NULL;
1771 	else
1772 		pcs = pcc->server;
1773 
1774 	cuse_lock();
1775 	mem = &cuse_mem[alloc_nr];
1776 
1777 	/* try to enforce slight ownership */
1778 	if ((pcs != NULL) && (mem->owner != pcs)) {
1779 		cuse_unlock();
1780 		return (EINVAL);
1781 	}
1782 	if (mem->virtaddr == NULL) {
1783 		cuse_unlock();
1784 		return (ENOMEM);
1785 	}
1786 	if (mem->virtaddr == NBUSY) {
1787 		cuse_unlock();
1788 		return (ENOMEM);
1789 	}
1790 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1791 
1792 	if (page_nr >= mem->page_count) {
1793 		cuse_unlock();
1794 		return (ENXIO);
1795 	}
1796 	ptr = mem->virtaddr + (page_nr * PAGE_SIZE);
1797 	cuse_unlock();
1798 
1799 	*paddr = vtophys(ptr);
1800 
1801 	return (0);
1802 }
1803 
1804 static void
1805 cuse_client_kqfilter_read_detach(struct knote *kn)
1806 {
1807 	struct cuse_client *pcc;
1808 
1809 	cuse_lock();
1810 	pcc = kn->kn_hook;
1811 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1812 	cuse_unlock();
1813 }
1814 
1815 static void
1816 cuse_client_kqfilter_write_detach(struct knote *kn)
1817 {
1818 	struct cuse_client *pcc;
1819 
1820 	cuse_lock();
1821 	pcc = kn->kn_hook;
1822 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1823 	cuse_unlock();
1824 }
1825 
1826 static int
1827 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1828 {
1829 	struct cuse_client *pcc;
1830 
1831 	mtx_assert(&cuse_mtx, MA_OWNED);
1832 
1833 	pcc = kn->kn_hook;
1834 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1835 }
1836 
1837 static int
1838 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1839 {
1840 	struct cuse_client *pcc;
1841 
1842 	mtx_assert(&cuse_mtx, MA_OWNED);
1843 
1844 	pcc = kn->kn_hook;
1845 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1846 }
1847 
1848 static int
1849 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1850 {
1851 	struct cuse_client *pcc;
1852 	struct cuse_server *pcs;
1853 	int error;
1854 
1855 	error = cuse_client_get(&pcc);
1856 	if (error != 0)
1857 		return (error);
1858 
1859 	cuse_lock();
1860 	pcs = pcc->server;
1861 	switch (kn->kn_filter) {
1862 	case EVFILT_READ:
1863 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1864 		kn->kn_hook = pcc;
1865 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1866 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1867 		break;
1868 	case EVFILT_WRITE:
1869 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1870 		kn->kn_hook = pcc;
1871 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1872 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1873 		break;
1874 	default:
1875 		error = EINVAL;
1876 		break;
1877 	}
1878 	cuse_unlock();
1879 
1880 	if (error == 0)
1881 		cuse_client_kqfilter_poll(dev, pcc);
1882 	return (error);
1883 }
1884