xref: /freebsd/sys/fs/cuse/cuse.c (revision 545ddfbe7d4fe8adfb862903b24eac1d5896c1ef)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2013 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "opt_compat.h"
28 
29 #include <sys/stdint.h>
30 #include <sys/stddef.h>
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/conf.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/linker_set.h>
38 #include <sys/module.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/condvar.h>
42 #include <sys/sysctl.h>
43 #include <sys/unistd.h>
44 #include <sys/malloc.h>
45 #include <sys/priv.h>
46 #include <sys/uio.h>
47 #include <sys/poll.h>
48 #include <sys/sx.h>
49 #include <sys/queue.h>
50 #include <sys/fcntl.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/selinfo.h>
54 #include <sys/ptrace.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 
61 #include <fs/cuse/cuse_defs.h>
62 #include <fs/cuse/cuse_ioctl.h>
63 
64 MODULE_VERSION(cuse, 1);
65 
66 #define	NBUSY	((uint8_t *)1)
67 
68 #ifdef FEATURE
69 FEATURE(cuse, "Userspace character devices");
70 #endif
71 
72 struct cuse_command;
73 struct cuse_server;
74 struct cuse_client;
75 
76 struct cuse_client_command {
77 	TAILQ_ENTRY(cuse_client_command) entry;
78 	struct cuse_command sub;
79 	struct sx sx;
80 	struct cv cv;
81 	struct thread *entered;
82 	struct cuse_client *client;
83 	struct proc *proc_curr;
84 	int	proc_refs;
85 	int	got_signal;
86 	int	error;
87 	int	command;
88 };
89 
90 struct cuse_memory {
91 	struct cuse_server *owner;
92 	uint8_t *virtaddr;
93 	uint32_t page_count;
94 	uint32_t is_allocated;
95 };
96 
97 struct cuse_server_dev {
98 	TAILQ_ENTRY(cuse_server_dev) entry;
99 	struct cuse_server *server;
100 	struct cdev *kern_dev;
101 	struct cuse_dev *user_dev;
102 };
103 
104 struct cuse_server {
105 	TAILQ_ENTRY(cuse_server) entry;
106 	TAILQ_HEAD(, cuse_client_command) head;
107 	TAILQ_HEAD(, cuse_server_dev) hdev;
108 	TAILQ_HEAD(, cuse_client) hcli;
109 	struct cv cv;
110 	struct selinfo selinfo;
111 	int	is_closing;
112 	int	refs;
113 };
114 
115 struct cuse_client {
116 	TAILQ_ENTRY(cuse_client) entry;
117 	TAILQ_ENTRY(cuse_client) entry_ref;
118 	struct cuse_client_command cmds[CUSE_CMD_MAX];
119 	struct cuse_server *server;
120 	struct cuse_server_dev *server_dev;
121 
122 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
123 
124 	int	fflags;		/* file flags */
125 	int	cflags;		/* client flags */
126 #define	CUSE_CLI_IS_CLOSING 0x01
127 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
128 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
129 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
130 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
131 };
132 
133 #define	CUSE_CLIENT_CLOSING(pcc) \
134     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
135 
136 static MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
137 
138 static TAILQ_HEAD(, cuse_server) cuse_server_head;
139 static struct mtx cuse_mtx;
140 static struct cdev *cuse_dev;
141 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
142 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
143 static struct cuse_memory cuse_mem[CUSE_ALLOC_UNIT_MAX];
144 
145 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
146 static void cuse_client_kqfilter_read_detach(struct knote *kn);
147 static void cuse_client_kqfilter_write_detach(struct knote *kn);
148 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
149 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
150 
151 static struct filterops cuse_client_kqfilter_read_ops = {
152 	.f_isfd = 1,
153 	.f_detach = cuse_client_kqfilter_read_detach,
154 	.f_event = cuse_client_kqfilter_read_event,
155 };
156 
157 static struct filterops cuse_client_kqfilter_write_ops = {
158 	.f_isfd = 1,
159 	.f_detach = cuse_client_kqfilter_write_detach,
160 	.f_event = cuse_client_kqfilter_write_event,
161 };
162 
163 static d_open_t cuse_client_open;
164 static d_close_t cuse_client_close;
165 static d_ioctl_t cuse_client_ioctl;
166 static d_read_t cuse_client_read;
167 static d_write_t cuse_client_write;
168 static d_poll_t cuse_client_poll;
169 static d_mmap_t cuse_client_mmap;
170 static d_kqfilter_t cuse_client_kqfilter;
171 
172 static struct cdevsw cuse_client_devsw = {
173 	.d_version = D_VERSION,
174 	.d_open = cuse_client_open,
175 	.d_close = cuse_client_close,
176 	.d_ioctl = cuse_client_ioctl,
177 	.d_name = "cuse_client",
178 	.d_flags = D_TRACKCLOSE,
179 	.d_read = cuse_client_read,
180 	.d_write = cuse_client_write,
181 	.d_poll = cuse_client_poll,
182 	.d_mmap = cuse_client_mmap,
183 	.d_kqfilter = cuse_client_kqfilter,
184 };
185 
186 static d_open_t cuse_server_open;
187 static d_close_t cuse_server_close;
188 static d_ioctl_t cuse_server_ioctl;
189 static d_read_t cuse_server_read;
190 static d_write_t cuse_server_write;
191 static d_poll_t cuse_server_poll;
192 static d_mmap_t cuse_server_mmap;
193 
194 static struct cdevsw cuse_server_devsw = {
195 	.d_version = D_VERSION,
196 	.d_open = cuse_server_open,
197 	.d_close = cuse_server_close,
198 	.d_ioctl = cuse_server_ioctl,
199 	.d_name = "cuse_server",
200 	.d_flags = D_TRACKCLOSE,
201 	.d_read = cuse_server_read,
202 	.d_write = cuse_server_write,
203 	.d_poll = cuse_server_poll,
204 	.d_mmap = cuse_server_mmap,
205 };
206 
207 static void cuse_client_is_closing(struct cuse_client *);
208 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
209 
210 static void
211 cuse_lock(void)
212 {
213 	mtx_lock(&cuse_mtx);
214 }
215 
216 static void
217 cuse_unlock(void)
218 {
219 	mtx_unlock(&cuse_mtx);
220 }
221 
222 static void
223 cuse_cmd_lock(struct cuse_client_command *pccmd)
224 {
225 	sx_xlock(&pccmd->sx);
226 }
227 
228 static void
229 cuse_cmd_unlock(struct cuse_client_command *pccmd)
230 {
231 	sx_xunlock(&pccmd->sx);
232 }
233 
234 static void
235 cuse_kern_init(void *arg)
236 {
237 	TAILQ_INIT(&cuse_server_head);
238 
239 	mtx_init(&cuse_mtx, "cuse-mtx", NULL, MTX_DEF);
240 
241 	cuse_dev = make_dev(&cuse_server_devsw, 0,
242 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
243 
244 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
245 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
246 	    (CUSE_VERSION >> 0) & 0xFF);
247 }
248 
249 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, 0);
250 
251 static void
252 cuse_kern_uninit(void *arg)
253 {
254 	void *ptr;
255 
256 	while (1) {
257 
258 		printf("Cuse: Please exit all /dev/cuse instances "
259 		    "and processes which have used this device.\n");
260 
261 		pause("DRAIN", 2 * hz);
262 
263 		cuse_lock();
264 		ptr = TAILQ_FIRST(&cuse_server_head);
265 		cuse_unlock();
266 
267 		if (ptr == NULL)
268 			break;
269 	}
270 
271 	if (cuse_dev != NULL)
272 		destroy_dev(cuse_dev);
273 
274 	mtx_destroy(&cuse_mtx);
275 }
276 
277 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
278 
279 static int
280 cuse_server_get(struct cuse_server **ppcs)
281 {
282 	struct cuse_server *pcs;
283 	int error;
284 
285 	error = devfs_get_cdevpriv((void **)&pcs);
286 	if (error != 0) {
287 		*ppcs = NULL;
288 		return (error);
289 	}
290 	/* check if closing */
291 	cuse_lock();
292 	if (pcs->is_closing) {
293 		cuse_unlock();
294 		*ppcs = NULL;
295 		return (EINVAL);
296 	}
297 	cuse_unlock();
298 	*ppcs = pcs;
299 	return (0);
300 }
301 
302 static void
303 cuse_server_is_closing(struct cuse_server *pcs)
304 {
305 	struct cuse_client *pcc;
306 
307 	if (pcs->is_closing)
308 		return;
309 
310 	pcs->is_closing = 1;
311 
312 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
313 		cuse_client_is_closing(pcc);
314 	}
315 }
316 
317 static struct cuse_client_command *
318 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
319 {
320 	struct cuse_client *pcc;
321 	int n;
322 
323 	if (pcs->is_closing)
324 		goto done;
325 
326 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
327 		if (CUSE_CLIENT_CLOSING(pcc))
328 			continue;
329 		for (n = 0; n != CUSE_CMD_MAX; n++) {
330 			if (pcc->cmds[n].entered == td)
331 				return (&pcc->cmds[n]);
332 		}
333 	}
334 done:
335 	return (NULL);
336 }
337 
338 static void
339 cuse_str_filter(char *ptr)
340 {
341 	int c;
342 
343 	while (((c = *ptr) != 0)) {
344 
345 		if ((c >= 'a') && (c <= 'z')) {
346 			ptr++;
347 			continue;
348 		}
349 		if ((c >= 'A') && (c <= 'Z')) {
350 			ptr++;
351 			continue;
352 		}
353 		if ((c >= '0') && (c <= '9')) {
354 			ptr++;
355 			continue;
356 		}
357 		if ((c == '.') || (c == '_') || (c == '/')) {
358 			ptr++;
359 			continue;
360 		}
361 		*ptr = '_';
362 
363 		ptr++;
364 	}
365 }
366 
367 static int
368 cuse_convert_error(int error)
369 {
370 	;				/* indent fix */
371 	switch (error) {
372 	case CUSE_ERR_NONE:
373 		return (0);
374 	case CUSE_ERR_BUSY:
375 		return (EBUSY);
376 	case CUSE_ERR_WOULDBLOCK:
377 		return (EWOULDBLOCK);
378 	case CUSE_ERR_INVALID:
379 		return (EINVAL);
380 	case CUSE_ERR_NO_MEMORY:
381 		return (ENOMEM);
382 	case CUSE_ERR_FAULT:
383 		return (EFAULT);
384 	case CUSE_ERR_SIGNAL:
385 		return (EINTR);
386 	default:
387 		return (ENXIO);
388 	}
389 }
390 
391 static void
392 cuse_server_free_memory(struct cuse_server *pcs)
393 {
394 	struct cuse_memory *mem;
395 	uint32_t n;
396 
397 	for (n = 0; n != CUSE_ALLOC_UNIT_MAX; n++) {
398 		mem = &cuse_mem[n];
399 
400 		/* this memory is never freed */
401 		if (mem->owner == pcs) {
402 			mem->owner = NULL;
403 			mem->is_allocated = 0;
404 		}
405 	}
406 }
407 
408 static int
409 cuse_server_alloc_memory(struct cuse_server *pcs,
410     struct cuse_memory *mem, uint32_t page_count)
411 {
412 	void *ptr;
413 	int error;
414 
415 	cuse_lock();
416 
417 	if (mem->virtaddr == NBUSY) {
418 		cuse_unlock();
419 		return (EBUSY);
420 	}
421 	if (mem->virtaddr != NULL) {
422 		if (mem->is_allocated != 0) {
423 			cuse_unlock();
424 			return (EBUSY);
425 		}
426 		if (mem->page_count == page_count) {
427 			mem->is_allocated = 1;
428 			mem->owner = pcs;
429 			cuse_unlock();
430 			return (0);
431 		}
432 		cuse_unlock();
433 		return (EBUSY);
434 	}
435 	memset(mem, 0, sizeof(*mem));
436 
437 	mem->virtaddr = NBUSY;
438 
439 	cuse_unlock();
440 
441 	ptr = malloc(page_count * PAGE_SIZE, M_CUSE, M_WAITOK | M_ZERO);
442 	if (ptr == NULL)
443 		error = ENOMEM;
444 	else
445 		error = 0;
446 
447 	cuse_lock();
448 
449 	if (error) {
450 		mem->virtaddr = NULL;
451 		cuse_unlock();
452 		return (error);
453 	}
454 	mem->virtaddr = ptr;
455 	mem->page_count = page_count;
456 	mem->is_allocated = 1;
457 	mem->owner = pcs;
458 	cuse_unlock();
459 
460 	return (0);
461 }
462 
463 static int
464 cuse_client_get(struct cuse_client **ppcc)
465 {
466 	struct cuse_client *pcc;
467 	int error;
468 
469 	/* try to get private data */
470 	error = devfs_get_cdevpriv((void **)&pcc);
471 	if (error != 0) {
472 		*ppcc = NULL;
473 		return (error);
474 	}
475 	/* check if closing */
476 	cuse_lock();
477 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
478 		cuse_unlock();
479 		*ppcc = NULL;
480 		return (EINVAL);
481 	}
482 	cuse_unlock();
483 	*ppcc = pcc;
484 	return (0);
485 }
486 
487 static void
488 cuse_client_is_closing(struct cuse_client *pcc)
489 {
490 	struct cuse_client_command *pccmd;
491 	uint32_t n;
492 
493 	if (CUSE_CLIENT_CLOSING(pcc))
494 		return;
495 
496 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
497 	pcc->server_dev = NULL;
498 
499 	for (n = 0; n != CUSE_CMD_MAX; n++) {
500 
501 		pccmd = &pcc->cmds[n];
502 
503 		if (pccmd->entry.tqe_prev != NULL) {
504 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
505 			pccmd->entry.tqe_prev = NULL;
506 		}
507 		cv_broadcast(&pccmd->cv);
508 	}
509 }
510 
511 static void
512 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
513     unsigned long data_ptr, unsigned long arg, int fflags, int ioflag)
514 {
515 	unsigned long cuse_fflags = 0;
516 	struct cuse_server *pcs;
517 
518 	if (fflags & FREAD)
519 		cuse_fflags |= CUSE_FFLAG_READ;
520 
521 	if (fflags & FWRITE)
522 		cuse_fflags |= CUSE_FFLAG_WRITE;
523 
524 	if (ioflag & IO_NDELAY)
525 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
526 
527 	pccmd->sub.fflags = cuse_fflags;
528 	pccmd->sub.data_pointer = data_ptr;
529 	pccmd->sub.argument = arg;
530 
531 	pcs = pccmd->client->server;
532 
533 	if ((pccmd->entry.tqe_prev == NULL) &&
534 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
535 	    (pcs->is_closing == 0)) {
536 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
537 		cv_signal(&pcs->cv);
538 	}
539 }
540 
541 static void
542 cuse_client_got_signal(struct cuse_client_command *pccmd)
543 {
544 	struct cuse_server *pcs;
545 
546 	pccmd->got_signal = 1;
547 
548 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
549 
550 	pcs = pccmd->client->server;
551 
552 	if ((pccmd->entry.tqe_prev == NULL) &&
553 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
554 	    (pcs->is_closing == 0)) {
555 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
556 		cv_signal(&pcs->cv);
557 	}
558 }
559 
560 static int
561 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
562     uint8_t *arg_ptr, uint32_t arg_len)
563 {
564 	int error;
565 
566 	error = 0;
567 
568 	pccmd->proc_curr = curthread->td_proc;
569 
570 	if (CUSE_CLIENT_CLOSING(pccmd->client) ||
571 	    pccmd->client->server->is_closing) {
572 		error = CUSE_ERR_OTHER;
573 		goto done;
574 	}
575 	while (pccmd->command == CUSE_CMD_NONE) {
576 		if (error != 0) {
577 			cv_wait(&pccmd->cv, &cuse_mtx);
578 		} else {
579 			error = cv_wait_sig(&pccmd->cv, &cuse_mtx);
580 
581 			if (error != 0)
582 				cuse_client_got_signal(pccmd);
583 		}
584 		if (CUSE_CLIENT_CLOSING(pccmd->client) ||
585 		    pccmd->client->server->is_closing) {
586 			error = CUSE_ERR_OTHER;
587 			goto done;
588 		}
589 	}
590 
591 	error = pccmd->error;
592 	pccmd->command = CUSE_CMD_NONE;
593 	cv_signal(&pccmd->cv);
594 
595 done:
596 
597 	/* wait until all process references are gone */
598 
599 	pccmd->proc_curr = NULL;
600 
601 	while (pccmd->proc_refs != 0)
602 		cv_wait(&pccmd->cv, &cuse_mtx);
603 
604 	return (error);
605 }
606 
607 /*------------------------------------------------------------------------*
608  *	CUSE SERVER PART
609  *------------------------------------------------------------------------*/
610 
611 static void
612 cuse_server_free_dev(struct cuse_server_dev *pcsd)
613 {
614 	struct cuse_server *pcs;
615 	struct cuse_client *pcc;
616 
617 	/* get server pointer */
618 	pcs = pcsd->server;
619 
620 	/* prevent creation of more devices */
621 	cuse_lock();
622 	if (pcsd->kern_dev != NULL)
623 		pcsd->kern_dev->si_drv1 = NULL;
624 
625 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
626 		if (pcc->server_dev == pcsd)
627 			cuse_client_is_closing(pcc);
628 	}
629 	cuse_unlock();
630 
631 	/* destroy device, if any */
632 	if (pcsd->kern_dev != NULL) {
633 		/* destroy device synchronously */
634 		destroy_dev(pcsd->kern_dev);
635 	}
636 	free(pcsd, M_CUSE);
637 }
638 
639 static void
640 cuse_server_free(void *arg)
641 {
642 	struct cuse_server *pcs = arg;
643 	struct cuse_server_dev *pcsd;
644 
645 	cuse_lock();
646 	pcs->refs--;
647 	if (pcs->refs != 0) {
648 		cuse_unlock();
649 		return;
650 	}
651 	cuse_server_is_closing(pcs);
652 	/* final client wakeup, if any */
653 	cuse_server_wakeup_all_client_locked(pcs);
654 
655 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
656 
657 	cuse_free_unit_by_id_locked(pcs, -1);
658 
659 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
660 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
661 		cuse_unlock();
662 		cuse_server_free_dev(pcsd);
663 		cuse_lock();
664 	}
665 
666 	cuse_server_free_memory(pcs);
667 
668 	knlist_clear(&pcs->selinfo.si_note, 1);
669 	knlist_destroy(&pcs->selinfo.si_note);
670 
671 	cuse_unlock();
672 
673 	seldrain(&pcs->selinfo);
674 
675 	cv_destroy(&pcs->cv);
676 
677 	free(pcs, M_CUSE);
678 }
679 
680 static int
681 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
682 {
683 	struct cuse_server *pcs;
684 
685 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
686 	if (pcs == NULL)
687 		return (ENOMEM);
688 
689 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
690 		printf("Cuse: Cannot set cdevpriv.\n");
691 		free(pcs, M_CUSE);
692 		return (ENOMEM);
693 	}
694 	TAILQ_INIT(&pcs->head);
695 	TAILQ_INIT(&pcs->hdev);
696 	TAILQ_INIT(&pcs->hcli);
697 
698 	cv_init(&pcs->cv, "cuse-server-cv");
699 
700 	knlist_init_mtx(&pcs->selinfo.si_note, &cuse_mtx);
701 
702 	cuse_lock();
703 	pcs->refs++;
704 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
705 	cuse_unlock();
706 
707 	return (0);
708 }
709 
710 static int
711 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
712 {
713 	struct cuse_server *pcs;
714 	int error;
715 
716 	error = cuse_server_get(&pcs);
717 	if (error != 0)
718 		goto done;
719 
720 	cuse_lock();
721 	cuse_server_is_closing(pcs);
722 	/* final client wakeup, if any */
723 	cuse_server_wakeup_all_client_locked(pcs);
724 
725 	knlist_clear(&pcs->selinfo.si_note, 1);
726 	cuse_unlock();
727 
728 done:
729 	return (0);
730 }
731 
732 static int
733 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
734 {
735 	return (ENXIO);
736 }
737 
738 static int
739 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
740 {
741 	return (ENXIO);
742 }
743 
744 static int
745 cuse_server_ioctl_copy_locked(struct cuse_client_command *pccmd,
746     struct cuse_data_chunk *pchk, int isread)
747 {
748 	struct proc *p_proc;
749 	uint32_t offset;
750 	int error;
751 
752 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
753 
754 	if (pchk->length > CUSE_BUFFER_MAX)
755 		return (EFAULT);
756 
757 	if (offset >= CUSE_BUFFER_MAX)
758 		return (EFAULT);
759 
760 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
761 		return (EFAULT);
762 
763 	p_proc = pccmd->proc_curr;
764 	if (p_proc == NULL)
765 		return (ENXIO);
766 
767 	if (pccmd->proc_refs < 0)
768 		return (ENOMEM);
769 
770 	pccmd->proc_refs++;
771 
772 	cuse_unlock();
773 
774 	if (isread == 0) {
775 		error = copyin(
776 		    (void *)pchk->local_ptr,
777 		    pccmd->client->ioctl_buffer + offset,
778 		    pchk->length);
779 	} else {
780 		error = copyout(
781 		    pccmd->client->ioctl_buffer + offset,
782 		    (void *)pchk->local_ptr,
783 		    pchk->length);
784 	}
785 
786 	cuse_lock();
787 
788 	pccmd->proc_refs--;
789 
790 	if (pccmd->proc_curr == NULL)
791 		cv_signal(&pccmd->cv);
792 
793 	return (error);
794 }
795 
796 static int
797 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
798     struct proc *proc_d, vm_offset_t data_d, size_t len)
799 {
800 	struct thread *td;
801 	struct proc *proc_cur;
802 	int error;
803 
804 	td = curthread;
805 	proc_cur = td->td_proc;
806 
807 	if (proc_cur == proc_d) {
808 		struct iovec iov = {
809 			.iov_base = (caddr_t)data_d,
810 			.iov_len = len,
811 		};
812 		struct uio uio = {
813 			.uio_iov = &iov,
814 			.uio_iovcnt = 1,
815 			.uio_offset = (off_t)data_s,
816 			.uio_resid = len,
817 			.uio_segflg = UIO_USERSPACE,
818 			.uio_rw = UIO_READ,
819 			.uio_td = td,
820 		};
821 
822 		PHOLD(proc_s);
823 		error = proc_rwmem(proc_s, &uio);
824 		PRELE(proc_s);
825 
826 	} else if (proc_cur == proc_s) {
827 		struct iovec iov = {
828 			.iov_base = (caddr_t)data_s,
829 			.iov_len = len,
830 		};
831 		struct uio uio = {
832 			.uio_iov = &iov,
833 			.uio_iovcnt = 1,
834 			.uio_offset = (off_t)data_d,
835 			.uio_resid = len,
836 			.uio_segflg = UIO_USERSPACE,
837 			.uio_rw = UIO_WRITE,
838 			.uio_td = td,
839 		};
840 
841 		PHOLD(proc_d);
842 		error = proc_rwmem(proc_d, &uio);
843 		PRELE(proc_d);
844 	} else {
845 		error = EINVAL;
846 	}
847 	return (error);
848 }
849 
850 static int
851 cuse_server_data_copy_locked(struct cuse_client_command *pccmd,
852     struct cuse_data_chunk *pchk, int isread)
853 {
854 	struct proc *p_proc;
855 	int error;
856 
857 	p_proc = pccmd->proc_curr;
858 	if (p_proc == NULL)
859 		return (ENXIO);
860 
861 	if (pccmd->proc_refs < 0)
862 		return (ENOMEM);
863 
864 	pccmd->proc_refs++;
865 
866 	cuse_unlock();
867 
868 	if (isread == 0) {
869 		error = cuse_proc2proc_copy(
870 		    curthread->td_proc, pchk->local_ptr,
871 		    p_proc, pchk->peer_ptr,
872 		    pchk->length);
873 	} else {
874 		error = cuse_proc2proc_copy(
875 		    p_proc, pchk->peer_ptr,
876 		    curthread->td_proc, pchk->local_ptr,
877 		    pchk->length);
878 	}
879 
880 	cuse_lock();
881 
882 	pccmd->proc_refs--;
883 
884 	if (pccmd->proc_curr == NULL)
885 		cv_signal(&pccmd->cv);
886 
887 	return (error);
888 }
889 
890 static int
891 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
892 {
893 	int n;
894 	int x = 0;
895 	int match;
896 
897 	do {
898 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
899 			if (cuse_alloc_unit[n] != NULL) {
900 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
901 					continue;
902 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
903 					x++;
904 					match = 1;
905 				}
906 			}
907 		}
908 	} while (match);
909 
910 	if (x < 256) {
911 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
912 			if (cuse_alloc_unit[n] == NULL) {
913 				cuse_alloc_unit[n] = pcs;
914 				cuse_alloc_unit_id[n] = id | x;
915 				return (x);
916 			}
917 		}
918 	}
919 	return (-1);
920 }
921 
922 static void
923 cuse_server_wakeup_locked(struct cuse_server *pcs)
924 {
925 	selwakeup(&pcs->selinfo);
926 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
927 }
928 
929 static void
930 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
931 {
932 	struct cuse_client *pcc;
933 
934 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
935 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
936 		    CUSE_CLI_KNOTE_NEED_WRITE);
937 	}
938 	cuse_server_wakeup_locked(pcs);
939 }
940 
941 static int
942 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
943 {
944 	int n;
945 	int found = 0;
946 
947 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
948 		if (cuse_alloc_unit[n] == pcs) {
949 			if (cuse_alloc_unit_id[n] == id || id == -1) {
950 				cuse_alloc_unit[n] = NULL;
951 				cuse_alloc_unit_id[n] = 0;
952 				found = 1;
953 			}
954 		}
955 	}
956 
957 	return (found ? 0 : EINVAL);
958 }
959 
960 static int
961 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
962     caddr_t data, int fflag, struct thread *td)
963 {
964 	struct cuse_server *pcs;
965 	int error;
966 
967 	error = cuse_server_get(&pcs);
968 	if (error != 0)
969 		return (error);
970 
971 	switch (cmd) {
972 		struct cuse_client_command *pccmd;
973 		struct cuse_client *pcc;
974 		struct cuse_command *pcmd;
975 		struct cuse_alloc_info *pai;
976 		struct cuse_create_dev *pcd;
977 		struct cuse_server_dev *pcsd;
978 		struct cuse_data_chunk *pchk;
979 		int n;
980 
981 	case CUSE_IOCTL_GET_COMMAND:
982 		pcmd = (void *)data;
983 
984 		cuse_lock();
985 
986 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
987 			error = cv_wait_sig(&pcs->cv, &cuse_mtx);
988 
989 			if (pcs->is_closing)
990 				error = ENXIO;
991 
992 			if (error) {
993 				cuse_unlock();
994 				return (error);
995 			}
996 		}
997 
998 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
999 		pccmd->entry.tqe_prev = NULL;
1000 
1001 		pccmd->entered = curthread;
1002 
1003 		*pcmd = pccmd->sub;
1004 
1005 		cuse_unlock();
1006 
1007 		break;
1008 
1009 	case CUSE_IOCTL_SYNC_COMMAND:
1010 
1011 		cuse_lock();
1012 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1013 
1014 			/* send sync command */
1015 			pccmd->entered = NULL;
1016 			pccmd->error = *(int *)data;
1017 			pccmd->command = CUSE_CMD_SYNC;
1018 
1019 			/* signal peer, if any */
1020 			cv_signal(&pccmd->cv);
1021 		}
1022 		cuse_unlock();
1023 
1024 		break;
1025 
1026 	case CUSE_IOCTL_ALLOC_UNIT:
1027 
1028 		cuse_lock();
1029 		n = cuse_alloc_unit_by_id_locked(pcs,
1030 		    CUSE_ID_DEFAULT(0));
1031 		cuse_unlock();
1032 
1033 		if (n < 0)
1034 			error = ENOMEM;
1035 		else
1036 			*(int *)data = n;
1037 		break;
1038 
1039 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1040 
1041 		n = *(int *)data;
1042 
1043 		n = (n & CUSE_ID_MASK);
1044 
1045 		cuse_lock();
1046 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1047 		cuse_unlock();
1048 
1049 		if (n < 0)
1050 			error = ENOMEM;
1051 		else
1052 			*(int *)data = n;
1053 		break;
1054 
1055 	case CUSE_IOCTL_FREE_UNIT:
1056 
1057 		n = *(int *)data;
1058 
1059 		n = CUSE_ID_DEFAULT(n);
1060 
1061 		cuse_lock();
1062 		error = cuse_free_unit_by_id_locked(pcs, n);
1063 		cuse_unlock();
1064 		break;
1065 
1066 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1067 
1068 		n = *(int *)data;
1069 
1070 		cuse_lock();
1071 		error = cuse_free_unit_by_id_locked(pcs, n);
1072 		cuse_unlock();
1073 		break;
1074 
1075 	case CUSE_IOCTL_ALLOC_MEMORY:
1076 
1077 		pai = (void *)data;
1078 
1079 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1080 			error = ENOMEM;
1081 			break;
1082 		}
1083 		if (pai->page_count > CUSE_ALLOC_PAGES_MAX) {
1084 			error = ENOMEM;
1085 			break;
1086 		}
1087 		error = cuse_server_alloc_memory(pcs,
1088 		    &cuse_mem[pai->alloc_nr], pai->page_count);
1089 		break;
1090 
1091 	case CUSE_IOCTL_FREE_MEMORY:
1092 		pai = (void *)data;
1093 
1094 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1095 			error = ENOMEM;
1096 			break;
1097 		}
1098 		/* we trust the character device driver in this case */
1099 
1100 		cuse_lock();
1101 		if (cuse_mem[pai->alloc_nr].owner == pcs) {
1102 			cuse_mem[pai->alloc_nr].is_allocated = 0;
1103 			cuse_mem[pai->alloc_nr].owner = NULL;
1104 		} else {
1105 			error = EINVAL;
1106 		}
1107 		cuse_unlock();
1108 		break;
1109 
1110 	case CUSE_IOCTL_GET_SIG:
1111 
1112 		cuse_lock();
1113 		pccmd = cuse_server_find_command(pcs, curthread);
1114 
1115 		if (pccmd != NULL) {
1116 			n = pccmd->got_signal;
1117 			pccmd->got_signal = 0;
1118 		} else {
1119 			n = 0;
1120 		}
1121 		cuse_unlock();
1122 
1123 		*(int *)data = n;
1124 
1125 		break;
1126 
1127 	case CUSE_IOCTL_SET_PFH:
1128 
1129 		cuse_lock();
1130 		pccmd = cuse_server_find_command(pcs, curthread);
1131 
1132 		if (pccmd != NULL) {
1133 			pcc = pccmd->client;
1134 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1135 				pcc->cmds[n].sub.per_file_handle = *(unsigned long *)data;
1136 			}
1137 		} else {
1138 			error = ENXIO;
1139 		}
1140 		cuse_unlock();
1141 		break;
1142 
1143 	case CUSE_IOCTL_CREATE_DEV:
1144 
1145 		error = priv_check(curthread, PRIV_DRIVER);
1146 		if (error)
1147 			break;
1148 
1149 		pcd = (void *)data;
1150 
1151 		/* filter input */
1152 
1153 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1154 
1155 		if (pcd->devname[0] == 0) {
1156 			error = EINVAL;
1157 			break;
1158 		}
1159 		cuse_str_filter(pcd->devname);
1160 
1161 		pcd->permissions &= 0777;
1162 
1163 		/* try to allocate a character device */
1164 
1165 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1166 
1167 		if (pcsd == NULL) {
1168 			error = ENOMEM;
1169 			break;
1170 		}
1171 		pcsd->server = pcs;
1172 
1173 		pcsd->user_dev = pcd->dev;
1174 
1175 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1176 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1177 		    pcd->permissions, "%s", pcd->devname);
1178 
1179 		if (pcsd->kern_dev == NULL) {
1180 			free(pcsd, M_CUSE);
1181 			error = ENOMEM;
1182 			break;
1183 		}
1184 		pcsd->kern_dev->si_drv1 = pcsd;
1185 
1186 		cuse_lock();
1187 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1188 		cuse_unlock();
1189 
1190 		break;
1191 
1192 	case CUSE_IOCTL_DESTROY_DEV:
1193 
1194 		error = priv_check(curthread, PRIV_DRIVER);
1195 		if (error)
1196 			break;
1197 
1198 		cuse_lock();
1199 
1200 		error = EINVAL;
1201 
1202 		pcsd = TAILQ_FIRST(&pcs->hdev);
1203 		while (pcsd != NULL) {
1204 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1205 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1206 				cuse_unlock();
1207 				cuse_server_free_dev(pcsd);
1208 				cuse_lock();
1209 				error = 0;
1210 				pcsd = TAILQ_FIRST(&pcs->hdev);
1211 			} else {
1212 				pcsd = TAILQ_NEXT(pcsd, entry);
1213 			}
1214 		}
1215 
1216 		cuse_unlock();
1217 		break;
1218 
1219 	case CUSE_IOCTL_WRITE_DATA:
1220 	case CUSE_IOCTL_READ_DATA:
1221 
1222 		cuse_lock();
1223 		pchk = (struct cuse_data_chunk *)data;
1224 
1225 		pccmd = cuse_server_find_command(pcs, curthread);
1226 
1227 		if (pccmd == NULL) {
1228 			error = ENXIO;	/* invalid request */
1229 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1230 			error = EFAULT;	/* NULL pointer */
1231 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1232 			error = cuse_server_ioctl_copy_locked(pccmd,
1233 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1234 		} else {
1235 			error = cuse_server_data_copy_locked(pccmd,
1236 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1237 		}
1238 		cuse_unlock();
1239 		break;
1240 
1241 	case CUSE_IOCTL_SELWAKEUP:
1242 		cuse_lock();
1243 		/*
1244 		 * We don't know which direction caused the event.
1245 		 * Wakeup both!
1246 		 */
1247 		cuse_server_wakeup_all_client_locked(pcs);
1248 		cuse_unlock();
1249 		break;
1250 
1251 	default:
1252 		error = ENXIO;
1253 		break;
1254 	}
1255 	return (error);
1256 }
1257 
1258 static int
1259 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1260 {
1261 	return (events & (POLLHUP | POLLPRI | POLLIN |
1262 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1263 }
1264 
1265 static int
1266 cuse_server_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr)
1267 {
1268 	uint32_t page_nr = offset / PAGE_SIZE;
1269 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1270 	struct cuse_memory *mem;
1271 	struct cuse_server *pcs;
1272 	uint8_t *ptr;
1273 	int error;
1274 
1275 	if (alloc_nr >= CUSE_ALLOC_UNIT_MAX)
1276 		return (ENOMEM);
1277 
1278 	error = cuse_server_get(&pcs);
1279 	if (error != 0)
1280 		pcs = NULL;
1281 
1282 	cuse_lock();
1283 	mem = &cuse_mem[alloc_nr];
1284 
1285 	/* try to enforce slight ownership */
1286 	if ((pcs != NULL) && (mem->owner != pcs)) {
1287 		cuse_unlock();
1288 		return (EINVAL);
1289 	}
1290 	if (mem->virtaddr == NULL) {
1291 		cuse_unlock();
1292 		return (ENOMEM);
1293 	}
1294 	if (mem->virtaddr == NBUSY) {
1295 		cuse_unlock();
1296 		return (ENOMEM);
1297 	}
1298 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1299 
1300 	if (page_nr >= mem->page_count) {
1301 		cuse_unlock();
1302 		return (ENXIO);
1303 	}
1304 	ptr = mem->virtaddr + (page_nr * PAGE_SIZE);
1305 	cuse_unlock();
1306 
1307 	*paddr = vtophys(ptr);
1308 
1309 	return (0);
1310 }
1311 
1312 /*------------------------------------------------------------------------*
1313  *	CUSE CLIENT PART
1314  *------------------------------------------------------------------------*/
1315 static void
1316 cuse_client_free(void *arg)
1317 {
1318 	struct cuse_client *pcc = arg;
1319 	struct cuse_client_command *pccmd;
1320 	struct cuse_server *pcs;
1321 	int n;
1322 
1323 	cuse_lock();
1324 	cuse_client_is_closing(pcc);
1325 	TAILQ_REMOVE(&pcc->server->hcli, pcc, entry);
1326 	cuse_unlock();
1327 
1328 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1329 
1330 		pccmd = &pcc->cmds[n];
1331 
1332 		sx_destroy(&pccmd->sx);
1333 		cv_destroy(&pccmd->cv);
1334 	}
1335 
1336 	pcs = pcc->server;
1337 
1338 	free(pcc, M_CUSE);
1339 
1340 	/* drop reference on server */
1341 	cuse_server_free(pcs);
1342 }
1343 
1344 static int
1345 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1346 {
1347 	struct cuse_client_command *pccmd;
1348 	struct cuse_server_dev *pcsd;
1349 	struct cuse_client *pcc;
1350 	struct cuse_server *pcs;
1351 	struct cuse_dev *pcd;
1352 	int error;
1353 	int n;
1354 
1355 	cuse_lock();
1356 	pcsd = dev->si_drv1;
1357 	if (pcsd != NULL) {
1358 		pcs = pcsd->server;
1359 		pcd = pcsd->user_dev;
1360 		pcs->refs++;
1361 		if (pcs->refs < 0) {
1362 			/* overflow */
1363 			pcs->refs--;
1364 			pcsd = NULL;
1365 		}
1366 	} else {
1367 		pcs = NULL;
1368 		pcd = NULL;
1369 	}
1370 	cuse_unlock();
1371 
1372 	if (pcsd == NULL)
1373 		return (EINVAL);
1374 
1375 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1376 	if (pcc == NULL) {
1377 		/* drop reference on server */
1378 		cuse_server_free(pcs);
1379 		return (ENOMEM);
1380 	}
1381 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1382 		printf("Cuse: Cannot set cdevpriv.\n");
1383 		/* drop reference on server */
1384 		cuse_server_free(pcs);
1385 		free(pcc, M_CUSE);
1386 		return (ENOMEM);
1387 	}
1388 	pcc->fflags = fflags;
1389 	pcc->server_dev = pcsd;
1390 	pcc->server = pcs;
1391 
1392 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1393 
1394 		pccmd = &pcc->cmds[n];
1395 
1396 		pccmd->sub.dev = pcd;
1397 		pccmd->sub.command = n;
1398 		pccmd->client = pcc;
1399 
1400 		sx_init(&pccmd->sx, "cuse-client-sx");
1401 		cv_init(&pccmd->cv, "cuse-client-cv");
1402 	}
1403 
1404 	cuse_lock();
1405 
1406 	/* cuse_client_free() assumes that the client is listed somewhere! */
1407 	/* always enqueue */
1408 
1409 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1410 
1411 	/* check if server is closing */
1412 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1413 		error = EINVAL;
1414 	} else {
1415 		error = 0;
1416 	}
1417 	cuse_unlock();
1418 
1419 	if (error) {
1420 		devfs_clear_cdevpriv();	/* XXX bugfix */
1421 		return (error);
1422 	}
1423 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1424 
1425 	cuse_cmd_lock(pccmd);
1426 
1427 	cuse_lock();
1428 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1429 
1430 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1431 	cuse_unlock();
1432 
1433 	if (error < 0) {
1434 		error = cuse_convert_error(error);
1435 	} else {
1436 		error = 0;
1437 	}
1438 
1439 	cuse_cmd_unlock(pccmd);
1440 
1441 	if (error)
1442 		devfs_clear_cdevpriv();	/* XXX bugfix */
1443 
1444 	return (error);
1445 }
1446 
1447 static int
1448 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1449 {
1450 	struct cuse_client_command *pccmd;
1451 	struct cuse_client *pcc;
1452 	int error;
1453 
1454 	error = cuse_client_get(&pcc);
1455 	if (error != 0)
1456 		return (0);
1457 
1458 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1459 
1460 	cuse_cmd_lock(pccmd);
1461 
1462 	cuse_lock();
1463 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1464 
1465 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1466 	cuse_unlock();
1467 
1468 	cuse_cmd_unlock(pccmd);
1469 
1470 	cuse_lock();
1471 	cuse_client_is_closing(pcc);
1472 	cuse_unlock();
1473 
1474 	return (0);
1475 }
1476 
1477 static void
1478 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1479 {
1480 	int temp;
1481 
1482 	cuse_lock();
1483 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1484 	    CUSE_CLI_KNOTE_HAS_WRITE));
1485 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1486 	    CUSE_CLI_KNOTE_NEED_WRITE);
1487 	cuse_unlock();
1488 
1489 	if (temp != 0) {
1490 		/* get the latest polling state from the server */
1491 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1492 
1493 		cuse_lock();
1494 		if (temp & (POLLIN | POLLOUT)) {
1495 			if (temp & POLLIN)
1496 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1497 			if (temp & POLLOUT)
1498 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1499 
1500 			/* make sure the "knote" gets woken up */
1501 			cuse_server_wakeup_locked(pcc->server);
1502 		}
1503 		cuse_unlock();
1504 	}
1505 }
1506 
1507 static int
1508 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1509 {
1510 	struct cuse_client_command *pccmd;
1511 	struct cuse_client *pcc;
1512 	int error;
1513 	int len;
1514 
1515 	error = cuse_client_get(&pcc);
1516 	if (error != 0)
1517 		return (error);
1518 
1519 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1520 
1521 	if (uio->uio_segflg != UIO_USERSPACE) {
1522 		return (EINVAL);
1523 	}
1524 	uio->uio_segflg = UIO_NOCOPY;
1525 
1526 	cuse_cmd_lock(pccmd);
1527 
1528 	while (uio->uio_resid != 0) {
1529 
1530 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1531 			error = ENOMEM;
1532 			break;
1533 		}
1534 
1535 		len = uio->uio_iov->iov_len;
1536 
1537 		cuse_lock();
1538 		cuse_client_send_command_locked(pccmd,
1539 		    (unsigned long)uio->uio_iov->iov_base,
1540 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1541 
1542 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1543 		cuse_unlock();
1544 
1545 		if (error < 0) {
1546 			error = cuse_convert_error(error);
1547 			break;
1548 		} else if (error == len) {
1549 			error = uiomove(NULL, error, uio);
1550 			if (error)
1551 				break;
1552 		} else {
1553 			error = uiomove(NULL, error, uio);
1554 			break;
1555 		}
1556 	}
1557 	cuse_cmd_unlock(pccmd);
1558 
1559 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1560 
1561 	if (error == EWOULDBLOCK)
1562 		cuse_client_kqfilter_poll(dev, pcc);
1563 
1564 	return (error);
1565 }
1566 
1567 static int
1568 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1569 {
1570 	struct cuse_client_command *pccmd;
1571 	struct cuse_client *pcc;
1572 	int error;
1573 	int len;
1574 
1575 	error = cuse_client_get(&pcc);
1576 	if (error != 0)
1577 		return (error);
1578 
1579 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1580 
1581 	if (uio->uio_segflg != UIO_USERSPACE) {
1582 		return (EINVAL);
1583 	}
1584 	uio->uio_segflg = UIO_NOCOPY;
1585 
1586 	cuse_cmd_lock(pccmd);
1587 
1588 	while (uio->uio_resid != 0) {
1589 
1590 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1591 			error = ENOMEM;
1592 			break;
1593 		}
1594 
1595 		len = uio->uio_iov->iov_len;
1596 
1597 		cuse_lock();
1598 		cuse_client_send_command_locked(pccmd,
1599 		    (unsigned long)uio->uio_iov->iov_base,
1600 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1601 
1602 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1603 		cuse_unlock();
1604 
1605 		if (error < 0) {
1606 			error = cuse_convert_error(error);
1607 			break;
1608 		} else if (error == len) {
1609 			error = uiomove(NULL, error, uio);
1610 			if (error)
1611 				break;
1612 		} else {
1613 			error = uiomove(NULL, error, uio);
1614 			break;
1615 		}
1616 	}
1617 	cuse_cmd_unlock(pccmd);
1618 
1619 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1620 
1621 	if (error == EWOULDBLOCK)
1622 		cuse_client_kqfilter_poll(dev, pcc);
1623 
1624 	return (error);
1625 }
1626 
1627 int
1628 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1629     caddr_t data, int fflag, struct thread *td)
1630 {
1631 	struct cuse_client_command *pccmd;
1632 	struct cuse_client *pcc;
1633 	int error;
1634 	int len;
1635 
1636 	error = cuse_client_get(&pcc);
1637 	if (error != 0)
1638 		return (error);
1639 
1640 	len = IOCPARM_LEN(cmd);
1641 	if (len > CUSE_BUFFER_MAX)
1642 		return (ENOMEM);
1643 
1644 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1645 
1646 	cuse_cmd_lock(pccmd);
1647 
1648 	if (cmd & IOC_IN)
1649 		memcpy(pcc->ioctl_buffer, data, len);
1650 
1651 	/*
1652 	 * When the ioctl-length is zero drivers can pass information
1653 	 * through the data pointer of the ioctl. Make sure this information
1654 	 * is forwarded to the driver.
1655 	 */
1656 
1657 	cuse_lock();
1658 	cuse_client_send_command_locked(pccmd,
1659 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1660 	    (unsigned long)cmd, pcc->fflags,
1661 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1662 
1663 	error = cuse_client_receive_command_locked(pccmd, data, len);
1664 	cuse_unlock();
1665 
1666 	if (error < 0) {
1667 		error = cuse_convert_error(error);
1668 	} else {
1669 		error = 0;
1670 	}
1671 
1672 	if (cmd & IOC_OUT)
1673 		memcpy(data, pcc->ioctl_buffer, len);
1674 
1675 	cuse_cmd_unlock(pccmd);
1676 
1677 	if (error == EWOULDBLOCK)
1678 		cuse_client_kqfilter_poll(dev, pcc);
1679 
1680 	return (error);
1681 }
1682 
1683 static int
1684 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1685 {
1686 	struct cuse_client_command *pccmd;
1687 	struct cuse_client *pcc;
1688 	unsigned long temp;
1689 	int error;
1690 	int revents;
1691 
1692 	error = cuse_client_get(&pcc);
1693 	if (error != 0)
1694 		goto pollnval;
1695 
1696 	temp = 0;
1697 
1698 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1699 		temp |= CUSE_POLL_READ;
1700 
1701 	if (events & (POLLOUT | POLLWRNORM))
1702 		temp |= CUSE_POLL_WRITE;
1703 
1704 	if (events & POLLHUP)
1705 		temp |= CUSE_POLL_ERROR;
1706 
1707 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1708 
1709 	cuse_cmd_lock(pccmd);
1710 
1711 	/* Need to selrecord() first to not loose any events. */
1712 	if (temp != 0 && td != NULL)
1713 		selrecord(td, &pcc->server->selinfo);
1714 
1715 	cuse_lock();
1716 	cuse_client_send_command_locked(pccmd,
1717 	    0, temp, pcc->fflags, IO_NDELAY);
1718 
1719 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1720 	cuse_unlock();
1721 
1722 	cuse_cmd_unlock(pccmd);
1723 
1724 	if (error < 0) {
1725 		goto pollnval;
1726 	} else {
1727 		revents = 0;
1728 		if (error & CUSE_POLL_READ)
1729 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1730 		if (error & CUSE_POLL_WRITE)
1731 			revents |= (events & (POLLOUT | POLLWRNORM));
1732 		if (error & CUSE_POLL_ERROR)
1733 			revents |= (events & POLLHUP);
1734 	}
1735 	return (revents);
1736 
1737  pollnval:
1738 	/* XXX many clients don't understand POLLNVAL */
1739 	return (events & (POLLHUP | POLLPRI | POLLIN |
1740 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1741 }
1742 
1743 static int
1744 cuse_client_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr)
1745 {
1746 	uint32_t page_nr = offset / PAGE_SIZE;
1747 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1748 	struct cuse_memory *mem;
1749 	struct cuse_server *pcs;
1750 	struct cuse_client *pcc;
1751 	uint8_t *ptr;
1752 	int error;
1753 
1754 	if (alloc_nr >= CUSE_ALLOC_UNIT_MAX)
1755 		return (ENOMEM);
1756 
1757 	error = cuse_client_get(&pcc);
1758 	if (error != 0)
1759 		pcs = NULL;
1760 	else
1761 		pcs = pcc->server;
1762 
1763 	cuse_lock();
1764 	mem = &cuse_mem[alloc_nr];
1765 
1766 	/* try to enforce slight ownership */
1767 	if ((pcs != NULL) && (mem->owner != pcs)) {
1768 		cuse_unlock();
1769 		return (EINVAL);
1770 	}
1771 	if (mem->virtaddr == NULL) {
1772 		cuse_unlock();
1773 		return (ENOMEM);
1774 	}
1775 	if (mem->virtaddr == NBUSY) {
1776 		cuse_unlock();
1777 		return (ENOMEM);
1778 	}
1779 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1780 
1781 	if (page_nr >= mem->page_count) {
1782 		cuse_unlock();
1783 		return (ENXIO);
1784 	}
1785 	ptr = mem->virtaddr + (page_nr * PAGE_SIZE);
1786 	cuse_unlock();
1787 
1788 	*paddr = vtophys(ptr);
1789 
1790 	return (0);
1791 }
1792 
1793 static void
1794 cuse_client_kqfilter_read_detach(struct knote *kn)
1795 {
1796 	struct cuse_client *pcc;
1797 
1798 	cuse_lock();
1799 	pcc = kn->kn_hook;
1800 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1801 	cuse_unlock();
1802 }
1803 
1804 static void
1805 cuse_client_kqfilter_write_detach(struct knote *kn)
1806 {
1807 	struct cuse_client *pcc;
1808 
1809 	cuse_lock();
1810 	pcc = kn->kn_hook;
1811 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1812 	cuse_unlock();
1813 }
1814 
1815 static int
1816 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1817 {
1818 	struct cuse_client *pcc;
1819 
1820 	mtx_assert(&cuse_mtx, MA_OWNED);
1821 
1822 	pcc = kn->kn_hook;
1823 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1824 }
1825 
1826 static int
1827 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1828 {
1829 	struct cuse_client *pcc;
1830 
1831 	mtx_assert(&cuse_mtx, MA_OWNED);
1832 
1833 	pcc = kn->kn_hook;
1834 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1835 }
1836 
1837 static int
1838 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1839 {
1840 	struct cuse_client *pcc;
1841 	struct cuse_server *pcs;
1842 	int error;
1843 
1844 	error = cuse_client_get(&pcc);
1845 	if (error != 0)
1846 		return (error);
1847 
1848 	cuse_lock();
1849 	pcs = pcc->server;
1850 	switch (kn->kn_filter) {
1851 	case EVFILT_READ:
1852 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1853 		kn->kn_hook = pcc;
1854 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1855 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1856 		break;
1857 	case EVFILT_WRITE:
1858 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1859 		kn->kn_hook = pcc;
1860 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1861 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1862 		break;
1863 	default:
1864 		error = EINVAL;
1865 		break;
1866 	}
1867 	cuse_unlock();
1868 
1869 	if (error == 0)
1870 		cuse_client_kqfilter_poll(dev, pcc);
1871 	return (error);
1872 }
1873