xref: /freebsd/sys/fs/cuse/cuse.c (revision 094fc1ed0f2627525c7b0342efcbad5be7a8546a)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2013 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "opt_compat.h"
28 
29 #include <sys/stdint.h>
30 #include <sys/stddef.h>
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/conf.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/linker_set.h>
38 #include <sys/module.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/condvar.h>
42 #include <sys/sysctl.h>
43 #include <sys/unistd.h>
44 #include <sys/malloc.h>
45 #include <sys/priv.h>
46 #include <sys/uio.h>
47 #include <sys/poll.h>
48 #include <sys/sx.h>
49 #include <sys/queue.h>
50 #include <sys/fcntl.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/selinfo.h>
54 #include <sys/ptrace.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 
61 #include <fs/cuse/cuse_defs.h>
62 #include <fs/cuse/cuse_ioctl.h>
63 
64 MODULE_VERSION(cuse, 1);
65 
66 /*
67  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
68  * declaring support for the cuse4bsd interface in cuse.ko:
69  */
70 MODULE_VERSION(cuse4bsd, 1);
71 
72 #define	NBUSY	((uint8_t *)1)
73 
74 #ifdef FEATURE
75 FEATURE(cuse, "Userspace character devices");
76 #endif
77 
78 struct cuse_command;
79 struct cuse_server;
80 struct cuse_client;
81 
82 struct cuse_client_command {
83 	TAILQ_ENTRY(cuse_client_command) entry;
84 	struct cuse_command sub;
85 	struct sx sx;
86 	struct cv cv;
87 	struct thread *entered;
88 	struct cuse_client *client;
89 	struct proc *proc_curr;
90 	int	proc_refs;
91 	int	got_signal;
92 	int	error;
93 	int	command;
94 };
95 
96 struct cuse_memory {
97 	struct cuse_server *owner;
98 	uint8_t *virtaddr;
99 	uint32_t page_count;
100 	uint32_t is_allocated;
101 };
102 
103 struct cuse_server_dev {
104 	TAILQ_ENTRY(cuse_server_dev) entry;
105 	struct cuse_server *server;
106 	struct cdev *kern_dev;
107 	struct cuse_dev *user_dev;
108 };
109 
110 struct cuse_server {
111 	TAILQ_ENTRY(cuse_server) entry;
112 	TAILQ_HEAD(, cuse_client_command) head;
113 	TAILQ_HEAD(, cuse_server_dev) hdev;
114 	TAILQ_HEAD(, cuse_client) hcli;
115 	struct cv cv;
116 	struct selinfo selinfo;
117 	pid_t	pid;
118 	int	is_closing;
119 	int	refs;
120 };
121 
122 struct cuse_client {
123 	TAILQ_ENTRY(cuse_client) entry;
124 	TAILQ_ENTRY(cuse_client) entry_ref;
125 	struct cuse_client_command cmds[CUSE_CMD_MAX];
126 	struct cuse_server *server;
127 	struct cuse_server_dev *server_dev;
128 
129 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
130 
131 	int	fflags;		/* file flags */
132 	int	cflags;		/* client flags */
133 #define	CUSE_CLI_IS_CLOSING 0x01
134 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
135 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
136 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
137 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
138 };
139 
140 #define	CUSE_CLIENT_CLOSING(pcc) \
141     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
142 
143 static MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
144 
145 static TAILQ_HEAD(, cuse_server) cuse_server_head;
146 static struct mtx cuse_mtx;
147 static struct cdev *cuse_dev;
148 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
149 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
150 static struct cuse_memory cuse_mem[CUSE_ALLOC_UNIT_MAX];
151 
152 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
153 static void cuse_client_kqfilter_read_detach(struct knote *kn);
154 static void cuse_client_kqfilter_write_detach(struct knote *kn);
155 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
156 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
157 
158 static struct filterops cuse_client_kqfilter_read_ops = {
159 	.f_isfd = 1,
160 	.f_detach = cuse_client_kqfilter_read_detach,
161 	.f_event = cuse_client_kqfilter_read_event,
162 };
163 
164 static struct filterops cuse_client_kqfilter_write_ops = {
165 	.f_isfd = 1,
166 	.f_detach = cuse_client_kqfilter_write_detach,
167 	.f_event = cuse_client_kqfilter_write_event,
168 };
169 
170 static d_open_t cuse_client_open;
171 static d_close_t cuse_client_close;
172 static d_ioctl_t cuse_client_ioctl;
173 static d_read_t cuse_client_read;
174 static d_write_t cuse_client_write;
175 static d_poll_t cuse_client_poll;
176 static d_mmap_t cuse_client_mmap;
177 static d_kqfilter_t cuse_client_kqfilter;
178 
179 static struct cdevsw cuse_client_devsw = {
180 	.d_version = D_VERSION,
181 	.d_open = cuse_client_open,
182 	.d_close = cuse_client_close,
183 	.d_ioctl = cuse_client_ioctl,
184 	.d_name = "cuse_client",
185 	.d_flags = D_TRACKCLOSE,
186 	.d_read = cuse_client_read,
187 	.d_write = cuse_client_write,
188 	.d_poll = cuse_client_poll,
189 	.d_mmap = cuse_client_mmap,
190 	.d_kqfilter = cuse_client_kqfilter,
191 };
192 
193 static d_open_t cuse_server_open;
194 static d_close_t cuse_server_close;
195 static d_ioctl_t cuse_server_ioctl;
196 static d_read_t cuse_server_read;
197 static d_write_t cuse_server_write;
198 static d_poll_t cuse_server_poll;
199 static d_mmap_t cuse_server_mmap;
200 
201 static struct cdevsw cuse_server_devsw = {
202 	.d_version = D_VERSION,
203 	.d_open = cuse_server_open,
204 	.d_close = cuse_server_close,
205 	.d_ioctl = cuse_server_ioctl,
206 	.d_name = "cuse_server",
207 	.d_flags = D_TRACKCLOSE,
208 	.d_read = cuse_server_read,
209 	.d_write = cuse_server_write,
210 	.d_poll = cuse_server_poll,
211 	.d_mmap = cuse_server_mmap,
212 };
213 
214 static void cuse_client_is_closing(struct cuse_client *);
215 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
216 
217 static void
218 cuse_lock(void)
219 {
220 	mtx_lock(&cuse_mtx);
221 }
222 
223 static void
224 cuse_unlock(void)
225 {
226 	mtx_unlock(&cuse_mtx);
227 }
228 
229 static void
230 cuse_cmd_lock(struct cuse_client_command *pccmd)
231 {
232 	sx_xlock(&pccmd->sx);
233 }
234 
235 static void
236 cuse_cmd_unlock(struct cuse_client_command *pccmd)
237 {
238 	sx_xunlock(&pccmd->sx);
239 }
240 
241 static void
242 cuse_kern_init(void *arg)
243 {
244 	TAILQ_INIT(&cuse_server_head);
245 
246 	mtx_init(&cuse_mtx, "cuse-mtx", NULL, MTX_DEF);
247 
248 	cuse_dev = make_dev(&cuse_server_devsw, 0,
249 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
250 
251 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
252 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
253 	    (CUSE_VERSION >> 0) & 0xFF);
254 }
255 
256 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, 0);
257 
258 static void
259 cuse_kern_uninit(void *arg)
260 {
261 	void *ptr;
262 
263 	while (1) {
264 
265 		printf("Cuse: Please exit all /dev/cuse instances "
266 		    "and processes which have used this device.\n");
267 
268 		pause("DRAIN", 2 * hz);
269 
270 		cuse_lock();
271 		ptr = TAILQ_FIRST(&cuse_server_head);
272 		cuse_unlock();
273 
274 		if (ptr == NULL)
275 			break;
276 	}
277 
278 	if (cuse_dev != NULL)
279 		destroy_dev(cuse_dev);
280 
281 	mtx_destroy(&cuse_mtx);
282 }
283 
284 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
285 
286 static int
287 cuse_server_get(struct cuse_server **ppcs)
288 {
289 	struct cuse_server *pcs;
290 	int error;
291 
292 	error = devfs_get_cdevpriv((void **)&pcs);
293 	if (error != 0) {
294 		*ppcs = NULL;
295 		return (error);
296 	}
297 	/* check if closing */
298 	cuse_lock();
299 	if (pcs->is_closing) {
300 		cuse_unlock();
301 		*ppcs = NULL;
302 		return (EINVAL);
303 	}
304 	cuse_unlock();
305 	*ppcs = pcs;
306 	return (0);
307 }
308 
309 static void
310 cuse_server_is_closing(struct cuse_server *pcs)
311 {
312 	struct cuse_client *pcc;
313 
314 	if (pcs->is_closing)
315 		return;
316 
317 	pcs->is_closing = 1;
318 
319 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
320 		cuse_client_is_closing(pcc);
321 	}
322 }
323 
324 static struct cuse_client_command *
325 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
326 {
327 	struct cuse_client *pcc;
328 	int n;
329 
330 	if (pcs->is_closing)
331 		goto done;
332 
333 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
334 		if (CUSE_CLIENT_CLOSING(pcc))
335 			continue;
336 		for (n = 0; n != CUSE_CMD_MAX; n++) {
337 			if (pcc->cmds[n].entered == td)
338 				return (&pcc->cmds[n]);
339 		}
340 	}
341 done:
342 	return (NULL);
343 }
344 
345 static void
346 cuse_str_filter(char *ptr)
347 {
348 	int c;
349 
350 	while (((c = *ptr) != 0)) {
351 
352 		if ((c >= 'a') && (c <= 'z')) {
353 			ptr++;
354 			continue;
355 		}
356 		if ((c >= 'A') && (c <= 'Z')) {
357 			ptr++;
358 			continue;
359 		}
360 		if ((c >= '0') && (c <= '9')) {
361 			ptr++;
362 			continue;
363 		}
364 		if ((c == '.') || (c == '_') || (c == '/')) {
365 			ptr++;
366 			continue;
367 		}
368 		*ptr = '_';
369 
370 		ptr++;
371 	}
372 }
373 
374 static int
375 cuse_convert_error(int error)
376 {
377 	;				/* indent fix */
378 	switch (error) {
379 	case CUSE_ERR_NONE:
380 		return (0);
381 	case CUSE_ERR_BUSY:
382 		return (EBUSY);
383 	case CUSE_ERR_WOULDBLOCK:
384 		return (EWOULDBLOCK);
385 	case CUSE_ERR_INVALID:
386 		return (EINVAL);
387 	case CUSE_ERR_NO_MEMORY:
388 		return (ENOMEM);
389 	case CUSE_ERR_FAULT:
390 		return (EFAULT);
391 	case CUSE_ERR_SIGNAL:
392 		return (EINTR);
393 	case CUSE_ERR_NO_DEVICE:
394 		return (ENODEV);
395 	default:
396 		return (ENXIO);
397 	}
398 }
399 
400 static void
401 cuse_server_free_memory(struct cuse_server *pcs)
402 {
403 	struct cuse_memory *mem;
404 	uint32_t n;
405 
406 	for (n = 0; n != CUSE_ALLOC_UNIT_MAX; n++) {
407 		mem = &cuse_mem[n];
408 
409 		/* this memory is never freed */
410 		if (mem->owner == pcs) {
411 			mem->owner = NULL;
412 			mem->is_allocated = 0;
413 		}
414 	}
415 }
416 
417 static int
418 cuse_server_alloc_memory(struct cuse_server *pcs,
419     struct cuse_memory *mem, uint32_t page_count)
420 {
421 	void *ptr;
422 	int error;
423 
424 	cuse_lock();
425 
426 	if (mem->virtaddr == NBUSY) {
427 		cuse_unlock();
428 		return (EBUSY);
429 	}
430 	if (mem->virtaddr != NULL) {
431 		if (mem->is_allocated != 0) {
432 			cuse_unlock();
433 			return (EBUSY);
434 		}
435 		if (mem->page_count == page_count) {
436 			mem->is_allocated = 1;
437 			mem->owner = pcs;
438 			cuse_unlock();
439 			return (0);
440 		}
441 		cuse_unlock();
442 		return (EBUSY);
443 	}
444 	memset(mem, 0, sizeof(*mem));
445 
446 	mem->virtaddr = NBUSY;
447 
448 	cuse_unlock();
449 
450 	ptr = malloc(page_count * PAGE_SIZE, M_CUSE, M_WAITOK | M_ZERO);
451 	if (ptr == NULL)
452 		error = ENOMEM;
453 	else
454 		error = 0;
455 
456 	cuse_lock();
457 
458 	if (error) {
459 		mem->virtaddr = NULL;
460 		cuse_unlock();
461 		return (error);
462 	}
463 	mem->virtaddr = ptr;
464 	mem->page_count = page_count;
465 	mem->is_allocated = 1;
466 	mem->owner = pcs;
467 	cuse_unlock();
468 
469 	return (0);
470 }
471 
472 static int
473 cuse_client_get(struct cuse_client **ppcc)
474 {
475 	struct cuse_client *pcc;
476 	int error;
477 
478 	/* try to get private data */
479 	error = devfs_get_cdevpriv((void **)&pcc);
480 	if (error != 0) {
481 		*ppcc = NULL;
482 		return (error);
483 	}
484 	/* check if closing */
485 	cuse_lock();
486 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
487 		cuse_unlock();
488 		*ppcc = NULL;
489 		return (EINVAL);
490 	}
491 	cuse_unlock();
492 	*ppcc = pcc;
493 	return (0);
494 }
495 
496 static void
497 cuse_client_is_closing(struct cuse_client *pcc)
498 {
499 	struct cuse_client_command *pccmd;
500 	uint32_t n;
501 
502 	if (CUSE_CLIENT_CLOSING(pcc))
503 		return;
504 
505 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
506 	pcc->server_dev = NULL;
507 
508 	for (n = 0; n != CUSE_CMD_MAX; n++) {
509 
510 		pccmd = &pcc->cmds[n];
511 
512 		if (pccmd->entry.tqe_prev != NULL) {
513 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
514 			pccmd->entry.tqe_prev = NULL;
515 		}
516 		cv_broadcast(&pccmd->cv);
517 	}
518 }
519 
520 static void
521 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
522     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
523 {
524 	unsigned long cuse_fflags = 0;
525 	struct cuse_server *pcs;
526 
527 	if (fflags & FREAD)
528 		cuse_fflags |= CUSE_FFLAG_READ;
529 
530 	if (fflags & FWRITE)
531 		cuse_fflags |= CUSE_FFLAG_WRITE;
532 
533 	if (ioflag & IO_NDELAY)
534 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
535 
536 	pccmd->sub.fflags = cuse_fflags;
537 	pccmd->sub.data_pointer = data_ptr;
538 	pccmd->sub.argument = arg;
539 
540 	pcs = pccmd->client->server;
541 
542 	if ((pccmd->entry.tqe_prev == NULL) &&
543 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
544 	    (pcs->is_closing == 0)) {
545 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
546 		cv_signal(&pcs->cv);
547 	}
548 }
549 
550 static void
551 cuse_client_got_signal(struct cuse_client_command *pccmd)
552 {
553 	struct cuse_server *pcs;
554 
555 	pccmd->got_signal = 1;
556 
557 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
558 
559 	pcs = pccmd->client->server;
560 
561 	if ((pccmd->entry.tqe_prev == NULL) &&
562 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
563 	    (pcs->is_closing == 0)) {
564 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
565 		cv_signal(&pcs->cv);
566 	}
567 }
568 
569 static int
570 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
571     uint8_t *arg_ptr, uint32_t arg_len)
572 {
573 	int error;
574 
575 	error = 0;
576 
577 	pccmd->proc_curr = curthread->td_proc;
578 
579 	if (CUSE_CLIENT_CLOSING(pccmd->client) ||
580 	    pccmd->client->server->is_closing) {
581 		error = CUSE_ERR_OTHER;
582 		goto done;
583 	}
584 	while (pccmd->command == CUSE_CMD_NONE) {
585 		if (error != 0) {
586 			cv_wait(&pccmd->cv, &cuse_mtx);
587 		} else {
588 			error = cv_wait_sig(&pccmd->cv, &cuse_mtx);
589 
590 			if (error != 0)
591 				cuse_client_got_signal(pccmd);
592 		}
593 		if (CUSE_CLIENT_CLOSING(pccmd->client) ||
594 		    pccmd->client->server->is_closing) {
595 			error = CUSE_ERR_OTHER;
596 			goto done;
597 		}
598 	}
599 
600 	error = pccmd->error;
601 	pccmd->command = CUSE_CMD_NONE;
602 	cv_signal(&pccmd->cv);
603 
604 done:
605 
606 	/* wait until all process references are gone */
607 
608 	pccmd->proc_curr = NULL;
609 
610 	while (pccmd->proc_refs != 0)
611 		cv_wait(&pccmd->cv, &cuse_mtx);
612 
613 	return (error);
614 }
615 
616 /*------------------------------------------------------------------------*
617  *	CUSE SERVER PART
618  *------------------------------------------------------------------------*/
619 
620 static void
621 cuse_server_free_dev(struct cuse_server_dev *pcsd)
622 {
623 	struct cuse_server *pcs;
624 	struct cuse_client *pcc;
625 
626 	/* get server pointer */
627 	pcs = pcsd->server;
628 
629 	/* prevent creation of more devices */
630 	cuse_lock();
631 	if (pcsd->kern_dev != NULL)
632 		pcsd->kern_dev->si_drv1 = NULL;
633 
634 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
635 		if (pcc->server_dev == pcsd)
636 			cuse_client_is_closing(pcc);
637 	}
638 	cuse_unlock();
639 
640 	/* destroy device, if any */
641 	if (pcsd->kern_dev != NULL) {
642 		/* destroy device synchronously */
643 		destroy_dev(pcsd->kern_dev);
644 	}
645 	free(pcsd, M_CUSE);
646 }
647 
648 static void
649 cuse_server_free(void *arg)
650 {
651 	struct cuse_server *pcs = arg;
652 	struct cuse_server_dev *pcsd;
653 
654 	cuse_lock();
655 	pcs->refs--;
656 	if (pcs->refs != 0) {
657 		cuse_unlock();
658 		return;
659 	}
660 	cuse_server_is_closing(pcs);
661 	/* final client wakeup, if any */
662 	cuse_server_wakeup_all_client_locked(pcs);
663 
664 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
665 
666 	cuse_free_unit_by_id_locked(pcs, -1);
667 
668 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
669 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
670 		cuse_unlock();
671 		cuse_server_free_dev(pcsd);
672 		cuse_lock();
673 	}
674 
675 	cuse_server_free_memory(pcs);
676 
677 	knlist_clear(&pcs->selinfo.si_note, 1);
678 	knlist_destroy(&pcs->selinfo.si_note);
679 
680 	cuse_unlock();
681 
682 	seldrain(&pcs->selinfo);
683 
684 	cv_destroy(&pcs->cv);
685 
686 	free(pcs, M_CUSE);
687 }
688 
689 static int
690 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
691 {
692 	struct cuse_server *pcs;
693 
694 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
695 	if (pcs == NULL)
696 		return (ENOMEM);
697 
698 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
699 		printf("Cuse: Cannot set cdevpriv.\n");
700 		free(pcs, M_CUSE);
701 		return (ENOMEM);
702 	}
703 
704 	/* store current process ID */
705 	pcs->pid = curproc->p_pid;
706 
707 	TAILQ_INIT(&pcs->head);
708 	TAILQ_INIT(&pcs->hdev);
709 	TAILQ_INIT(&pcs->hcli);
710 
711 	cv_init(&pcs->cv, "cuse-server-cv");
712 
713 	knlist_init_mtx(&pcs->selinfo.si_note, &cuse_mtx);
714 
715 	cuse_lock();
716 	pcs->refs++;
717 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
718 	cuse_unlock();
719 
720 	return (0);
721 }
722 
723 static int
724 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
725 {
726 	struct cuse_server *pcs;
727 	int error;
728 
729 	error = cuse_server_get(&pcs);
730 	if (error != 0)
731 		goto done;
732 
733 	cuse_lock();
734 	cuse_server_is_closing(pcs);
735 	/* final client wakeup, if any */
736 	cuse_server_wakeup_all_client_locked(pcs);
737 
738 	knlist_clear(&pcs->selinfo.si_note, 1);
739 	cuse_unlock();
740 
741 done:
742 	return (0);
743 }
744 
745 static int
746 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
747 {
748 	return (ENXIO);
749 }
750 
751 static int
752 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
753 {
754 	return (ENXIO);
755 }
756 
757 static int
758 cuse_server_ioctl_copy_locked(struct cuse_client_command *pccmd,
759     struct cuse_data_chunk *pchk, int isread)
760 {
761 	struct proc *p_proc;
762 	uint32_t offset;
763 	int error;
764 
765 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
766 
767 	if (pchk->length > CUSE_BUFFER_MAX)
768 		return (EFAULT);
769 
770 	if (offset >= CUSE_BUFFER_MAX)
771 		return (EFAULT);
772 
773 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
774 		return (EFAULT);
775 
776 	p_proc = pccmd->proc_curr;
777 	if (p_proc == NULL)
778 		return (ENXIO);
779 
780 	if (pccmd->proc_refs < 0)
781 		return (ENOMEM);
782 
783 	pccmd->proc_refs++;
784 
785 	cuse_unlock();
786 
787 	if (isread == 0) {
788 		error = copyin(
789 		    (void *)pchk->local_ptr,
790 		    pccmd->client->ioctl_buffer + offset,
791 		    pchk->length);
792 	} else {
793 		error = copyout(
794 		    pccmd->client->ioctl_buffer + offset,
795 		    (void *)pchk->local_ptr,
796 		    pchk->length);
797 	}
798 
799 	cuse_lock();
800 
801 	pccmd->proc_refs--;
802 
803 	if (pccmd->proc_curr == NULL)
804 		cv_signal(&pccmd->cv);
805 
806 	return (error);
807 }
808 
809 static int
810 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
811     struct proc *proc_d, vm_offset_t data_d, size_t len)
812 {
813 	struct thread *td;
814 	struct proc *proc_cur;
815 	int error;
816 
817 	td = curthread;
818 	proc_cur = td->td_proc;
819 
820 	if (proc_cur == proc_d) {
821 		struct iovec iov = {
822 			.iov_base = (caddr_t)data_d,
823 			.iov_len = len,
824 		};
825 		struct uio uio = {
826 			.uio_iov = &iov,
827 			.uio_iovcnt = 1,
828 			.uio_offset = (off_t)data_s,
829 			.uio_resid = len,
830 			.uio_segflg = UIO_USERSPACE,
831 			.uio_rw = UIO_READ,
832 			.uio_td = td,
833 		};
834 
835 		PHOLD(proc_s);
836 		error = proc_rwmem(proc_s, &uio);
837 		PRELE(proc_s);
838 
839 	} else if (proc_cur == proc_s) {
840 		struct iovec iov = {
841 			.iov_base = (caddr_t)data_s,
842 			.iov_len = len,
843 		};
844 		struct uio uio = {
845 			.uio_iov = &iov,
846 			.uio_iovcnt = 1,
847 			.uio_offset = (off_t)data_d,
848 			.uio_resid = len,
849 			.uio_segflg = UIO_USERSPACE,
850 			.uio_rw = UIO_WRITE,
851 			.uio_td = td,
852 		};
853 
854 		PHOLD(proc_d);
855 		error = proc_rwmem(proc_d, &uio);
856 		PRELE(proc_d);
857 	} else {
858 		error = EINVAL;
859 	}
860 	return (error);
861 }
862 
863 static int
864 cuse_server_data_copy_locked(struct cuse_client_command *pccmd,
865     struct cuse_data_chunk *pchk, int isread)
866 {
867 	struct proc *p_proc;
868 	int error;
869 
870 	p_proc = pccmd->proc_curr;
871 	if (p_proc == NULL)
872 		return (ENXIO);
873 
874 	if (pccmd->proc_refs < 0)
875 		return (ENOMEM);
876 
877 	pccmd->proc_refs++;
878 
879 	cuse_unlock();
880 
881 	if (isread == 0) {
882 		error = cuse_proc2proc_copy(
883 		    curthread->td_proc, pchk->local_ptr,
884 		    p_proc, pchk->peer_ptr,
885 		    pchk->length);
886 	} else {
887 		error = cuse_proc2proc_copy(
888 		    p_proc, pchk->peer_ptr,
889 		    curthread->td_proc, pchk->local_ptr,
890 		    pchk->length);
891 	}
892 
893 	cuse_lock();
894 
895 	pccmd->proc_refs--;
896 
897 	if (pccmd->proc_curr == NULL)
898 		cv_signal(&pccmd->cv);
899 
900 	return (error);
901 }
902 
903 static int
904 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
905 {
906 	int n;
907 	int x = 0;
908 	int match;
909 
910 	do {
911 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
912 			if (cuse_alloc_unit[n] != NULL) {
913 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
914 					continue;
915 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
916 					x++;
917 					match = 1;
918 				}
919 			}
920 		}
921 	} while (match);
922 
923 	if (x < 256) {
924 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
925 			if (cuse_alloc_unit[n] == NULL) {
926 				cuse_alloc_unit[n] = pcs;
927 				cuse_alloc_unit_id[n] = id | x;
928 				return (x);
929 			}
930 		}
931 	}
932 	return (-1);
933 }
934 
935 static void
936 cuse_server_wakeup_locked(struct cuse_server *pcs)
937 {
938 	selwakeup(&pcs->selinfo);
939 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
940 }
941 
942 static void
943 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
944 {
945 	struct cuse_client *pcc;
946 
947 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
948 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
949 		    CUSE_CLI_KNOTE_NEED_WRITE);
950 	}
951 	cuse_server_wakeup_locked(pcs);
952 }
953 
954 static int
955 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
956 {
957 	int n;
958 	int found = 0;
959 
960 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
961 		if (cuse_alloc_unit[n] == pcs) {
962 			if (cuse_alloc_unit_id[n] == id || id == -1) {
963 				cuse_alloc_unit[n] = NULL;
964 				cuse_alloc_unit_id[n] = 0;
965 				found = 1;
966 			}
967 		}
968 	}
969 
970 	return (found ? 0 : EINVAL);
971 }
972 
973 static int
974 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
975     caddr_t data, int fflag, struct thread *td)
976 {
977 	struct cuse_server *pcs;
978 	int error;
979 
980 	error = cuse_server_get(&pcs);
981 	if (error != 0)
982 		return (error);
983 
984 	switch (cmd) {
985 		struct cuse_client_command *pccmd;
986 		struct cuse_client *pcc;
987 		struct cuse_command *pcmd;
988 		struct cuse_alloc_info *pai;
989 		struct cuse_create_dev *pcd;
990 		struct cuse_server_dev *pcsd;
991 		struct cuse_data_chunk *pchk;
992 		int n;
993 
994 	case CUSE_IOCTL_GET_COMMAND:
995 		pcmd = (void *)data;
996 
997 		cuse_lock();
998 
999 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1000 			error = cv_wait_sig(&pcs->cv, &cuse_mtx);
1001 
1002 			if (pcs->is_closing)
1003 				error = ENXIO;
1004 
1005 			if (error) {
1006 				cuse_unlock();
1007 				return (error);
1008 			}
1009 		}
1010 
1011 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1012 		pccmd->entry.tqe_prev = NULL;
1013 
1014 		pccmd->entered = curthread;
1015 
1016 		*pcmd = pccmd->sub;
1017 
1018 		cuse_unlock();
1019 
1020 		break;
1021 
1022 	case CUSE_IOCTL_SYNC_COMMAND:
1023 
1024 		cuse_lock();
1025 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1026 
1027 			/* send sync command */
1028 			pccmd->entered = NULL;
1029 			pccmd->error = *(int *)data;
1030 			pccmd->command = CUSE_CMD_SYNC;
1031 
1032 			/* signal peer, if any */
1033 			cv_signal(&pccmd->cv);
1034 		}
1035 		cuse_unlock();
1036 
1037 		break;
1038 
1039 	case CUSE_IOCTL_ALLOC_UNIT:
1040 
1041 		cuse_lock();
1042 		n = cuse_alloc_unit_by_id_locked(pcs,
1043 		    CUSE_ID_DEFAULT(0));
1044 		cuse_unlock();
1045 
1046 		if (n < 0)
1047 			error = ENOMEM;
1048 		else
1049 			*(int *)data = n;
1050 		break;
1051 
1052 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1053 
1054 		n = *(int *)data;
1055 
1056 		n = (n & CUSE_ID_MASK);
1057 
1058 		cuse_lock();
1059 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1060 		cuse_unlock();
1061 
1062 		if (n < 0)
1063 			error = ENOMEM;
1064 		else
1065 			*(int *)data = n;
1066 		break;
1067 
1068 	case CUSE_IOCTL_FREE_UNIT:
1069 
1070 		n = *(int *)data;
1071 
1072 		n = CUSE_ID_DEFAULT(n);
1073 
1074 		cuse_lock();
1075 		error = cuse_free_unit_by_id_locked(pcs, n);
1076 		cuse_unlock();
1077 		break;
1078 
1079 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1080 
1081 		n = *(int *)data;
1082 
1083 		cuse_lock();
1084 		error = cuse_free_unit_by_id_locked(pcs, n);
1085 		cuse_unlock();
1086 		break;
1087 
1088 	case CUSE_IOCTL_ALLOC_MEMORY:
1089 
1090 		pai = (void *)data;
1091 
1092 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1093 			error = ENOMEM;
1094 			break;
1095 		}
1096 		if (pai->page_count > CUSE_ALLOC_PAGES_MAX) {
1097 			error = ENOMEM;
1098 			break;
1099 		}
1100 		error = cuse_server_alloc_memory(pcs,
1101 		    &cuse_mem[pai->alloc_nr], pai->page_count);
1102 		break;
1103 
1104 	case CUSE_IOCTL_FREE_MEMORY:
1105 		pai = (void *)data;
1106 
1107 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1108 			error = ENOMEM;
1109 			break;
1110 		}
1111 		/* we trust the character device driver in this case */
1112 
1113 		cuse_lock();
1114 		if (cuse_mem[pai->alloc_nr].owner == pcs) {
1115 			cuse_mem[pai->alloc_nr].is_allocated = 0;
1116 			cuse_mem[pai->alloc_nr].owner = NULL;
1117 		} else {
1118 			error = EINVAL;
1119 		}
1120 		cuse_unlock();
1121 		break;
1122 
1123 	case CUSE_IOCTL_GET_SIG:
1124 
1125 		cuse_lock();
1126 		pccmd = cuse_server_find_command(pcs, curthread);
1127 
1128 		if (pccmd != NULL) {
1129 			n = pccmd->got_signal;
1130 			pccmd->got_signal = 0;
1131 		} else {
1132 			n = 0;
1133 		}
1134 		cuse_unlock();
1135 
1136 		*(int *)data = n;
1137 
1138 		break;
1139 
1140 	case CUSE_IOCTL_SET_PFH:
1141 
1142 		cuse_lock();
1143 		pccmd = cuse_server_find_command(pcs, curthread);
1144 
1145 		if (pccmd != NULL) {
1146 			pcc = pccmd->client;
1147 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1148 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1149 			}
1150 		} else {
1151 			error = ENXIO;
1152 		}
1153 		cuse_unlock();
1154 		break;
1155 
1156 	case CUSE_IOCTL_CREATE_DEV:
1157 
1158 		error = priv_check(curthread, PRIV_DRIVER);
1159 		if (error)
1160 			break;
1161 
1162 		pcd = (void *)data;
1163 
1164 		/* filter input */
1165 
1166 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1167 
1168 		if (pcd->devname[0] == 0) {
1169 			error = EINVAL;
1170 			break;
1171 		}
1172 		cuse_str_filter(pcd->devname);
1173 
1174 		pcd->permissions &= 0777;
1175 
1176 		/* try to allocate a character device */
1177 
1178 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1179 
1180 		if (pcsd == NULL) {
1181 			error = ENOMEM;
1182 			break;
1183 		}
1184 		pcsd->server = pcs;
1185 
1186 		pcsd->user_dev = pcd->dev;
1187 
1188 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1189 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1190 		    pcd->permissions, "%s", pcd->devname);
1191 
1192 		if (pcsd->kern_dev == NULL) {
1193 			free(pcsd, M_CUSE);
1194 			error = ENOMEM;
1195 			break;
1196 		}
1197 		pcsd->kern_dev->si_drv1 = pcsd;
1198 
1199 		cuse_lock();
1200 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1201 		cuse_unlock();
1202 
1203 		break;
1204 
1205 	case CUSE_IOCTL_DESTROY_DEV:
1206 
1207 		error = priv_check(curthread, PRIV_DRIVER);
1208 		if (error)
1209 			break;
1210 
1211 		cuse_lock();
1212 
1213 		error = EINVAL;
1214 
1215 		pcsd = TAILQ_FIRST(&pcs->hdev);
1216 		while (pcsd != NULL) {
1217 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1218 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1219 				cuse_unlock();
1220 				cuse_server_free_dev(pcsd);
1221 				cuse_lock();
1222 				error = 0;
1223 				pcsd = TAILQ_FIRST(&pcs->hdev);
1224 			} else {
1225 				pcsd = TAILQ_NEXT(pcsd, entry);
1226 			}
1227 		}
1228 
1229 		cuse_unlock();
1230 		break;
1231 
1232 	case CUSE_IOCTL_WRITE_DATA:
1233 	case CUSE_IOCTL_READ_DATA:
1234 
1235 		cuse_lock();
1236 		pchk = (struct cuse_data_chunk *)data;
1237 
1238 		pccmd = cuse_server_find_command(pcs, curthread);
1239 
1240 		if (pccmd == NULL) {
1241 			error = ENXIO;	/* invalid request */
1242 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1243 			error = EFAULT;	/* NULL pointer */
1244 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1245 			error = cuse_server_ioctl_copy_locked(pccmd,
1246 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1247 		} else {
1248 			error = cuse_server_data_copy_locked(pccmd,
1249 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1250 		}
1251 		cuse_unlock();
1252 		break;
1253 
1254 	case CUSE_IOCTL_SELWAKEUP:
1255 		cuse_lock();
1256 		/*
1257 		 * We don't know which direction caused the event.
1258 		 * Wakeup both!
1259 		 */
1260 		cuse_server_wakeup_all_client_locked(pcs);
1261 		cuse_unlock();
1262 		break;
1263 
1264 	default:
1265 		error = ENXIO;
1266 		break;
1267 	}
1268 	return (error);
1269 }
1270 
1271 static int
1272 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1273 {
1274 	return (events & (POLLHUP | POLLPRI | POLLIN |
1275 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1276 }
1277 
1278 static int
1279 cuse_server_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr)
1280 {
1281 	uint32_t page_nr = offset / PAGE_SIZE;
1282 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1283 	struct cuse_memory *mem;
1284 	struct cuse_server *pcs;
1285 	uint8_t *ptr;
1286 	int error;
1287 
1288 	if (alloc_nr >= CUSE_ALLOC_UNIT_MAX)
1289 		return (ENOMEM);
1290 
1291 	error = cuse_server_get(&pcs);
1292 	if (error != 0)
1293 		pcs = NULL;
1294 
1295 	cuse_lock();
1296 	mem = &cuse_mem[alloc_nr];
1297 
1298 	/* try to enforce slight ownership */
1299 	if ((pcs != NULL) && (mem->owner != pcs)) {
1300 		cuse_unlock();
1301 		return (EINVAL);
1302 	}
1303 	if (mem->virtaddr == NULL) {
1304 		cuse_unlock();
1305 		return (ENOMEM);
1306 	}
1307 	if (mem->virtaddr == NBUSY) {
1308 		cuse_unlock();
1309 		return (ENOMEM);
1310 	}
1311 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1312 
1313 	if (page_nr >= mem->page_count) {
1314 		cuse_unlock();
1315 		return (ENXIO);
1316 	}
1317 	ptr = mem->virtaddr + (page_nr * PAGE_SIZE);
1318 	cuse_unlock();
1319 
1320 	*paddr = vtophys(ptr);
1321 
1322 	return (0);
1323 }
1324 
1325 /*------------------------------------------------------------------------*
1326  *	CUSE CLIENT PART
1327  *------------------------------------------------------------------------*/
1328 static void
1329 cuse_client_free(void *arg)
1330 {
1331 	struct cuse_client *pcc = arg;
1332 	struct cuse_client_command *pccmd;
1333 	struct cuse_server *pcs;
1334 	int n;
1335 
1336 	cuse_lock();
1337 	cuse_client_is_closing(pcc);
1338 	TAILQ_REMOVE(&pcc->server->hcli, pcc, entry);
1339 	cuse_unlock();
1340 
1341 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1342 
1343 		pccmd = &pcc->cmds[n];
1344 
1345 		sx_destroy(&pccmd->sx);
1346 		cv_destroy(&pccmd->cv);
1347 	}
1348 
1349 	pcs = pcc->server;
1350 
1351 	free(pcc, M_CUSE);
1352 
1353 	/* drop reference on server */
1354 	cuse_server_free(pcs);
1355 }
1356 
1357 static int
1358 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1359 {
1360 	struct cuse_client_command *pccmd;
1361 	struct cuse_server_dev *pcsd;
1362 	struct cuse_client *pcc;
1363 	struct cuse_server *pcs;
1364 	struct cuse_dev *pcd;
1365 	int error;
1366 	int n;
1367 
1368 	cuse_lock();
1369 	pcsd = dev->si_drv1;
1370 	if (pcsd != NULL) {
1371 		pcs = pcsd->server;
1372 		pcd = pcsd->user_dev;
1373 		/*
1374 		 * Check that the refcount didn't wrap and that the
1375 		 * same process is not both client and server. This
1376 		 * can easily lead to deadlocks when destroying the
1377 		 * CUSE character device nodes:
1378 		 */
1379 		pcs->refs++;
1380 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1381 			/* overflow or wrong PID */
1382 			pcs->refs--;
1383 			pcsd = NULL;
1384 		}
1385 	} else {
1386 		pcs = NULL;
1387 		pcd = NULL;
1388 	}
1389 	cuse_unlock();
1390 
1391 	if (pcsd == NULL)
1392 		return (EINVAL);
1393 
1394 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1395 	if (pcc == NULL) {
1396 		/* drop reference on server */
1397 		cuse_server_free(pcs);
1398 		return (ENOMEM);
1399 	}
1400 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1401 		printf("Cuse: Cannot set cdevpriv.\n");
1402 		/* drop reference on server */
1403 		cuse_server_free(pcs);
1404 		free(pcc, M_CUSE);
1405 		return (ENOMEM);
1406 	}
1407 	pcc->fflags = fflags;
1408 	pcc->server_dev = pcsd;
1409 	pcc->server = pcs;
1410 
1411 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1412 
1413 		pccmd = &pcc->cmds[n];
1414 
1415 		pccmd->sub.dev = pcd;
1416 		pccmd->sub.command = n;
1417 		pccmd->client = pcc;
1418 
1419 		sx_init(&pccmd->sx, "cuse-client-sx");
1420 		cv_init(&pccmd->cv, "cuse-client-cv");
1421 	}
1422 
1423 	cuse_lock();
1424 
1425 	/* cuse_client_free() assumes that the client is listed somewhere! */
1426 	/* always enqueue */
1427 
1428 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1429 
1430 	/* check if server is closing */
1431 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1432 		error = EINVAL;
1433 	} else {
1434 		error = 0;
1435 	}
1436 	cuse_unlock();
1437 
1438 	if (error) {
1439 		devfs_clear_cdevpriv();	/* XXX bugfix */
1440 		return (error);
1441 	}
1442 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1443 
1444 	cuse_cmd_lock(pccmd);
1445 
1446 	cuse_lock();
1447 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1448 
1449 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1450 	cuse_unlock();
1451 
1452 	if (error < 0) {
1453 		error = cuse_convert_error(error);
1454 	} else {
1455 		error = 0;
1456 	}
1457 
1458 	cuse_cmd_unlock(pccmd);
1459 
1460 	if (error)
1461 		devfs_clear_cdevpriv();	/* XXX bugfix */
1462 
1463 	return (error);
1464 }
1465 
1466 static int
1467 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1468 {
1469 	struct cuse_client_command *pccmd;
1470 	struct cuse_client *pcc;
1471 	int error;
1472 
1473 	error = cuse_client_get(&pcc);
1474 	if (error != 0)
1475 		return (0);
1476 
1477 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1478 
1479 	cuse_cmd_lock(pccmd);
1480 
1481 	cuse_lock();
1482 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1483 
1484 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1485 	cuse_unlock();
1486 
1487 	cuse_cmd_unlock(pccmd);
1488 
1489 	cuse_lock();
1490 	cuse_client_is_closing(pcc);
1491 	cuse_unlock();
1492 
1493 	return (0);
1494 }
1495 
1496 static void
1497 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1498 {
1499 	int temp;
1500 
1501 	cuse_lock();
1502 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1503 	    CUSE_CLI_KNOTE_HAS_WRITE));
1504 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1505 	    CUSE_CLI_KNOTE_NEED_WRITE);
1506 	cuse_unlock();
1507 
1508 	if (temp != 0) {
1509 		/* get the latest polling state from the server */
1510 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1511 
1512 		if (temp & (POLLIN | POLLOUT)) {
1513 			cuse_lock();
1514 			if (temp & POLLIN)
1515 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1516 			if (temp & POLLOUT)
1517 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1518 
1519 			/* make sure the "knote" gets woken up */
1520 			cuse_server_wakeup_locked(pcc->server);
1521 			cuse_unlock();
1522 		}
1523 	}
1524 }
1525 
1526 static int
1527 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1528 {
1529 	struct cuse_client_command *pccmd;
1530 	struct cuse_client *pcc;
1531 	int error;
1532 	int len;
1533 
1534 	error = cuse_client_get(&pcc);
1535 	if (error != 0)
1536 		return (error);
1537 
1538 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1539 
1540 	if (uio->uio_segflg != UIO_USERSPACE) {
1541 		return (EINVAL);
1542 	}
1543 	uio->uio_segflg = UIO_NOCOPY;
1544 
1545 	cuse_cmd_lock(pccmd);
1546 
1547 	while (uio->uio_resid != 0) {
1548 
1549 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1550 			error = ENOMEM;
1551 			break;
1552 		}
1553 
1554 		len = uio->uio_iov->iov_len;
1555 
1556 		cuse_lock();
1557 		cuse_client_send_command_locked(pccmd,
1558 		    (uintptr_t)uio->uio_iov->iov_base,
1559 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1560 
1561 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1562 		cuse_unlock();
1563 
1564 		if (error < 0) {
1565 			error = cuse_convert_error(error);
1566 			break;
1567 		} else if (error == len) {
1568 			error = uiomove(NULL, error, uio);
1569 			if (error)
1570 				break;
1571 		} else {
1572 			error = uiomove(NULL, error, uio);
1573 			break;
1574 		}
1575 	}
1576 	cuse_cmd_unlock(pccmd);
1577 
1578 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1579 
1580 	if (error == EWOULDBLOCK)
1581 		cuse_client_kqfilter_poll(dev, pcc);
1582 
1583 	return (error);
1584 }
1585 
1586 static int
1587 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1588 {
1589 	struct cuse_client_command *pccmd;
1590 	struct cuse_client *pcc;
1591 	int error;
1592 	int len;
1593 
1594 	error = cuse_client_get(&pcc);
1595 	if (error != 0)
1596 		return (error);
1597 
1598 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1599 
1600 	if (uio->uio_segflg != UIO_USERSPACE) {
1601 		return (EINVAL);
1602 	}
1603 	uio->uio_segflg = UIO_NOCOPY;
1604 
1605 	cuse_cmd_lock(pccmd);
1606 
1607 	while (uio->uio_resid != 0) {
1608 
1609 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1610 			error = ENOMEM;
1611 			break;
1612 		}
1613 
1614 		len = uio->uio_iov->iov_len;
1615 
1616 		cuse_lock();
1617 		cuse_client_send_command_locked(pccmd,
1618 		    (uintptr_t)uio->uio_iov->iov_base,
1619 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1620 
1621 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1622 		cuse_unlock();
1623 
1624 		if (error < 0) {
1625 			error = cuse_convert_error(error);
1626 			break;
1627 		} else if (error == len) {
1628 			error = uiomove(NULL, error, uio);
1629 			if (error)
1630 				break;
1631 		} else {
1632 			error = uiomove(NULL, error, uio);
1633 			break;
1634 		}
1635 	}
1636 	cuse_cmd_unlock(pccmd);
1637 
1638 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1639 
1640 	if (error == EWOULDBLOCK)
1641 		cuse_client_kqfilter_poll(dev, pcc);
1642 
1643 	return (error);
1644 }
1645 
1646 int
1647 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1648     caddr_t data, int fflag, struct thread *td)
1649 {
1650 	struct cuse_client_command *pccmd;
1651 	struct cuse_client *pcc;
1652 	int error;
1653 	int len;
1654 
1655 	error = cuse_client_get(&pcc);
1656 	if (error != 0)
1657 		return (error);
1658 
1659 	len = IOCPARM_LEN(cmd);
1660 	if (len > CUSE_BUFFER_MAX)
1661 		return (ENOMEM);
1662 
1663 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1664 
1665 	cuse_cmd_lock(pccmd);
1666 
1667 	if (cmd & (IOC_IN | IOC_VOID))
1668 		memcpy(pcc->ioctl_buffer, data, len);
1669 
1670 	/*
1671 	 * When the ioctl-length is zero drivers can pass information
1672 	 * through the data pointer of the ioctl. Make sure this information
1673 	 * is forwarded to the driver.
1674 	 */
1675 
1676 	cuse_lock();
1677 	cuse_client_send_command_locked(pccmd,
1678 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1679 	    (unsigned long)cmd, pcc->fflags,
1680 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1681 
1682 	error = cuse_client_receive_command_locked(pccmd, data, len);
1683 	cuse_unlock();
1684 
1685 	if (error < 0) {
1686 		error = cuse_convert_error(error);
1687 	} else {
1688 		error = 0;
1689 	}
1690 
1691 	if (cmd & IOC_OUT)
1692 		memcpy(data, pcc->ioctl_buffer, len);
1693 
1694 	cuse_cmd_unlock(pccmd);
1695 
1696 	if (error == EWOULDBLOCK)
1697 		cuse_client_kqfilter_poll(dev, pcc);
1698 
1699 	return (error);
1700 }
1701 
1702 static int
1703 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1704 {
1705 	struct cuse_client_command *pccmd;
1706 	struct cuse_client *pcc;
1707 	unsigned long temp;
1708 	int error;
1709 	int revents;
1710 
1711 	error = cuse_client_get(&pcc);
1712 	if (error != 0)
1713 		goto pollnval;
1714 
1715 	temp = 0;
1716 
1717 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1718 		temp |= CUSE_POLL_READ;
1719 
1720 	if (events & (POLLOUT | POLLWRNORM))
1721 		temp |= CUSE_POLL_WRITE;
1722 
1723 	if (events & POLLHUP)
1724 		temp |= CUSE_POLL_ERROR;
1725 
1726 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1727 
1728 	cuse_cmd_lock(pccmd);
1729 
1730 	/* Need to selrecord() first to not loose any events. */
1731 	if (temp != 0 && td != NULL)
1732 		selrecord(td, &pcc->server->selinfo);
1733 
1734 	cuse_lock();
1735 	cuse_client_send_command_locked(pccmd,
1736 	    0, temp, pcc->fflags, IO_NDELAY);
1737 
1738 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1739 	cuse_unlock();
1740 
1741 	cuse_cmd_unlock(pccmd);
1742 
1743 	if (error < 0) {
1744 		goto pollnval;
1745 	} else {
1746 		revents = 0;
1747 		if (error & CUSE_POLL_READ)
1748 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1749 		if (error & CUSE_POLL_WRITE)
1750 			revents |= (events & (POLLOUT | POLLWRNORM));
1751 		if (error & CUSE_POLL_ERROR)
1752 			revents |= (events & POLLHUP);
1753 	}
1754 	return (revents);
1755 
1756  pollnval:
1757 	/* XXX many clients don't understand POLLNVAL */
1758 	return (events & (POLLHUP | POLLPRI | POLLIN |
1759 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1760 }
1761 
1762 static int
1763 cuse_client_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr)
1764 {
1765 	uint32_t page_nr = offset / PAGE_SIZE;
1766 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1767 	struct cuse_memory *mem;
1768 	struct cuse_server *pcs;
1769 	struct cuse_client *pcc;
1770 	uint8_t *ptr;
1771 	int error;
1772 
1773 	if (alloc_nr >= CUSE_ALLOC_UNIT_MAX)
1774 		return (ENOMEM);
1775 
1776 	error = cuse_client_get(&pcc);
1777 	if (error != 0)
1778 		pcs = NULL;
1779 	else
1780 		pcs = pcc->server;
1781 
1782 	cuse_lock();
1783 	mem = &cuse_mem[alloc_nr];
1784 
1785 	/* try to enforce slight ownership */
1786 	if ((pcs != NULL) && (mem->owner != pcs)) {
1787 		cuse_unlock();
1788 		return (EINVAL);
1789 	}
1790 	if (mem->virtaddr == NULL) {
1791 		cuse_unlock();
1792 		return (ENOMEM);
1793 	}
1794 	if (mem->virtaddr == NBUSY) {
1795 		cuse_unlock();
1796 		return (ENOMEM);
1797 	}
1798 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1799 
1800 	if (page_nr >= mem->page_count) {
1801 		cuse_unlock();
1802 		return (ENXIO);
1803 	}
1804 	ptr = mem->virtaddr + (page_nr * PAGE_SIZE);
1805 	cuse_unlock();
1806 
1807 	*paddr = vtophys(ptr);
1808 
1809 	return (0);
1810 }
1811 
1812 static void
1813 cuse_client_kqfilter_read_detach(struct knote *kn)
1814 {
1815 	struct cuse_client *pcc;
1816 
1817 	cuse_lock();
1818 	pcc = kn->kn_hook;
1819 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1820 	cuse_unlock();
1821 }
1822 
1823 static void
1824 cuse_client_kqfilter_write_detach(struct knote *kn)
1825 {
1826 	struct cuse_client *pcc;
1827 
1828 	cuse_lock();
1829 	pcc = kn->kn_hook;
1830 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1831 	cuse_unlock();
1832 }
1833 
1834 static int
1835 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1836 {
1837 	struct cuse_client *pcc;
1838 
1839 	mtx_assert(&cuse_mtx, MA_OWNED);
1840 
1841 	pcc = kn->kn_hook;
1842 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1843 }
1844 
1845 static int
1846 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1847 {
1848 	struct cuse_client *pcc;
1849 
1850 	mtx_assert(&cuse_mtx, MA_OWNED);
1851 
1852 	pcc = kn->kn_hook;
1853 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1854 }
1855 
1856 static int
1857 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1858 {
1859 	struct cuse_client *pcc;
1860 	struct cuse_server *pcs;
1861 	int error;
1862 
1863 	error = cuse_client_get(&pcc);
1864 	if (error != 0)
1865 		return (error);
1866 
1867 	cuse_lock();
1868 	pcs = pcc->server;
1869 	switch (kn->kn_filter) {
1870 	case EVFILT_READ:
1871 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1872 		kn->kn_hook = pcc;
1873 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1874 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1875 		break;
1876 	case EVFILT_WRITE:
1877 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1878 		kn->kn_hook = pcc;
1879 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1880 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1881 		break;
1882 	default:
1883 		error = EINVAL;
1884 		break;
1885 	}
1886 	cuse_unlock();
1887 
1888 	if (error == 0)
1889 		cuse_client_kqfilter_poll(dev, pcc);
1890 	return (error);
1891 }
1892