xref: /freebsd/sys/fs/cuse/cuse.c (revision b4af4f93c682e445bf159f0d1ec90b636296c946)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2020 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/stdint.h>
28 #include <sys/stddef.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/linker_set.h>
36 #include <sys/module.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/condvar.h>
40 #include <sys/sysctl.h>
41 #include <sys/unistd.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/uio.h>
45 #include <sys/poll.h>
46 #include <sys/sx.h>
47 #include <sys/rwlock.h>
48 #include <sys/queue.h>
49 #include <sys/fcntl.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/selinfo.h>
53 #include <sys/ptrace.h>
54 #include <sys/sysent.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63 
64 #include <fs/cuse/cuse_defs.h>
65 #include <fs/cuse/cuse_ioctl.h>
66 
67 MODULE_VERSION(cuse, 1);
68 
69 /*
70  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
71  * declaring support for the cuse4bsd interface in cuse.ko:
72  */
73 MODULE_VERSION(cuse4bsd, 1);
74 
75 #ifdef FEATURE
76 FEATURE(cuse, "Userspace character devices");
77 #endif
78 
79 struct cuse_command;
80 struct cuse_server;
81 struct cuse_client;
82 
83 struct cuse_client_command {
84 	TAILQ_ENTRY(cuse_client_command) entry;
85 	struct cuse_command sub;
86 	struct sx sx;
87 	struct cv cv;
88 	struct thread *entered;
89 	struct cuse_client *client;
90 	struct proc *proc_curr;
91 	int	proc_refs;
92 	int	got_signal;
93 	int	error;
94 	int	command;
95 };
96 
97 struct cuse_memory {
98 	TAILQ_ENTRY(cuse_memory) entry;
99 	vm_object_t object;
100 	uint32_t page_count;
101 	uint32_t alloc_nr;
102 };
103 
104 struct cuse_server_dev {
105 	TAILQ_ENTRY(cuse_server_dev) entry;
106 	struct cuse_server *server;
107 	struct cdev *kern_dev;
108 	struct cuse_dev *user_dev;
109 };
110 
111 struct cuse_server {
112 	TAILQ_ENTRY(cuse_server) entry;
113 	TAILQ_HEAD(, cuse_client_command) head;
114 	TAILQ_HEAD(, cuse_server_dev) hdev;
115 	TAILQ_HEAD(, cuse_client) hcli;
116 	TAILQ_HEAD(, cuse_memory) hmem;
117 	struct mtx mtx;
118 	struct cv cv;
119 	struct selinfo selinfo;
120 	pid_t	pid;
121 	int	is_closing;
122 	int	refs;
123 };
124 
125 struct cuse_client {
126 	TAILQ_ENTRY(cuse_client) entry;
127 	TAILQ_ENTRY(cuse_client) entry_ref;
128 	struct cuse_client_command cmds[CUSE_CMD_MAX];
129 	struct cuse_server *server;
130 	struct cuse_server_dev *server_dev;
131 
132 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
133 
134 	int	fflags;			/* file flags */
135 	int	cflags;			/* client flags */
136 #define	CUSE_CLI_IS_CLOSING 0x01
137 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
138 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
139 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
140 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
141 };
142 
143 #define	CUSE_CLIENT_CLOSING(pcc) \
144     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
145 
146 static	MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
147 
148 static TAILQ_HEAD(, cuse_server) cuse_server_head;
149 static struct mtx cuse_global_mtx;
150 static struct cdev *cuse_dev;
151 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
152 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
153 
154 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
155 static void cuse_client_kqfilter_read_detach(struct knote *kn);
156 static void cuse_client_kqfilter_write_detach(struct knote *kn);
157 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
158 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
159 
160 static struct filterops cuse_client_kqfilter_read_ops = {
161 	.f_isfd = 1,
162 	.f_detach = cuse_client_kqfilter_read_detach,
163 	.f_event = cuse_client_kqfilter_read_event,
164 };
165 
166 static struct filterops cuse_client_kqfilter_write_ops = {
167 	.f_isfd = 1,
168 	.f_detach = cuse_client_kqfilter_write_detach,
169 	.f_event = cuse_client_kqfilter_write_event,
170 };
171 
172 static d_open_t cuse_client_open;
173 static d_close_t cuse_client_close;
174 static d_ioctl_t cuse_client_ioctl;
175 static d_read_t cuse_client_read;
176 static d_write_t cuse_client_write;
177 static d_poll_t cuse_client_poll;
178 static d_mmap_single_t cuse_client_mmap_single;
179 static d_kqfilter_t cuse_client_kqfilter;
180 
181 static struct cdevsw cuse_client_devsw = {
182 	.d_version = D_VERSION,
183 	.d_open = cuse_client_open,
184 	.d_close = cuse_client_close,
185 	.d_ioctl = cuse_client_ioctl,
186 	.d_name = "cuse_client",
187 	.d_flags = D_TRACKCLOSE,
188 	.d_read = cuse_client_read,
189 	.d_write = cuse_client_write,
190 	.d_poll = cuse_client_poll,
191 	.d_mmap_single = cuse_client_mmap_single,
192 	.d_kqfilter = cuse_client_kqfilter,
193 };
194 
195 static d_open_t cuse_server_open;
196 static d_close_t cuse_server_close;
197 static d_ioctl_t cuse_server_ioctl;
198 static d_read_t cuse_server_read;
199 static d_write_t cuse_server_write;
200 static d_poll_t cuse_server_poll;
201 static d_mmap_single_t cuse_server_mmap_single;
202 
203 static struct cdevsw cuse_server_devsw = {
204 	.d_version = D_VERSION,
205 	.d_open = cuse_server_open,
206 	.d_close = cuse_server_close,
207 	.d_ioctl = cuse_server_ioctl,
208 	.d_name = "cuse_server",
209 	.d_flags = D_TRACKCLOSE,
210 	.d_read = cuse_server_read,
211 	.d_write = cuse_server_write,
212 	.d_poll = cuse_server_poll,
213 	.d_mmap_single = cuse_server_mmap_single,
214 };
215 
216 static void cuse_client_is_closing(struct cuse_client *);
217 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
218 
219 static void
220 cuse_global_lock(void)
221 {
222 	mtx_lock(&cuse_global_mtx);
223 }
224 
225 static void
226 cuse_global_unlock(void)
227 {
228 	mtx_unlock(&cuse_global_mtx);
229 }
230 
231 static void
232 cuse_server_lock(struct cuse_server *pcs)
233 {
234 	mtx_lock(&pcs->mtx);
235 }
236 
237 static void
238 cuse_server_unlock(struct cuse_server *pcs)
239 {
240 	mtx_unlock(&pcs->mtx);
241 }
242 
243 static void
244 cuse_cmd_lock(struct cuse_client_command *pccmd)
245 {
246 	sx_xlock(&pccmd->sx);
247 }
248 
249 static void
250 cuse_cmd_unlock(struct cuse_client_command *pccmd)
251 {
252 	sx_xunlock(&pccmd->sx);
253 }
254 
255 static void
256 cuse_kern_init(void *arg)
257 {
258 	TAILQ_INIT(&cuse_server_head);
259 
260 	mtx_init(&cuse_global_mtx, "cuse-global-mtx", NULL, MTX_DEF);
261 
262 	cuse_dev = make_dev(&cuse_server_devsw, 0,
263 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
264 
265 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
266 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
267 	    (CUSE_VERSION >> 0) & 0xFF);
268 }
269 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
270 
271 static void
272 cuse_kern_uninit(void *arg)
273 {
274 	void *ptr;
275 
276 	while (1) {
277 
278 		printf("Cuse: Please exit all /dev/cuse instances "
279 		    "and processes which have used this device.\n");
280 
281 		pause("DRAIN", 2 * hz);
282 
283 		cuse_global_lock();
284 		ptr = TAILQ_FIRST(&cuse_server_head);
285 		cuse_global_unlock();
286 
287 		if (ptr == NULL)
288 			break;
289 	}
290 
291 	if (cuse_dev != NULL)
292 		destroy_dev(cuse_dev);
293 
294 	mtx_destroy(&cuse_global_mtx);
295 }
296 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
297 
298 static int
299 cuse_server_get(struct cuse_server **ppcs)
300 {
301 	struct cuse_server *pcs;
302 	int error;
303 
304 	error = devfs_get_cdevpriv((void **)&pcs);
305 	if (error != 0) {
306 		*ppcs = NULL;
307 		return (error);
308 	}
309 	if (pcs->is_closing) {
310 		*ppcs = NULL;
311 		return (EINVAL);
312 	}
313 	*ppcs = pcs;
314 	return (0);
315 }
316 
317 static void
318 cuse_server_is_closing(struct cuse_server *pcs)
319 {
320 	struct cuse_client *pcc;
321 
322 	if (pcs->is_closing)
323 		return;
324 
325 	pcs->is_closing = 1;
326 
327 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
328 		cuse_client_is_closing(pcc);
329 	}
330 }
331 
332 static struct cuse_client_command *
333 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
334 {
335 	struct cuse_client *pcc;
336 	int n;
337 
338 	if (pcs->is_closing)
339 		goto done;
340 
341 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
342 		if (CUSE_CLIENT_CLOSING(pcc))
343 			continue;
344 		for (n = 0; n != CUSE_CMD_MAX; n++) {
345 			if (pcc->cmds[n].entered == td)
346 				return (&pcc->cmds[n]);
347 		}
348 	}
349 done:
350 	return (NULL);
351 }
352 
353 static void
354 cuse_str_filter(char *ptr)
355 {
356 	int c;
357 
358 	while (((c = *ptr) != 0)) {
359 
360 		if ((c >= 'a') && (c <= 'z')) {
361 			ptr++;
362 			continue;
363 		}
364 		if ((c >= 'A') && (c <= 'Z')) {
365 			ptr++;
366 			continue;
367 		}
368 		if ((c >= '0') && (c <= '9')) {
369 			ptr++;
370 			continue;
371 		}
372 		if ((c == '.') || (c == '_') || (c == '/')) {
373 			ptr++;
374 			continue;
375 		}
376 		*ptr = '_';
377 
378 		ptr++;
379 	}
380 }
381 
382 static int
383 cuse_convert_error(int error)
384 {
385 	;				/* indent fix */
386 	switch (error) {
387 	case CUSE_ERR_NONE:
388 		return (0);
389 	case CUSE_ERR_BUSY:
390 		return (EBUSY);
391 	case CUSE_ERR_WOULDBLOCK:
392 		return (EWOULDBLOCK);
393 	case CUSE_ERR_INVALID:
394 		return (EINVAL);
395 	case CUSE_ERR_NO_MEMORY:
396 		return (ENOMEM);
397 	case CUSE_ERR_FAULT:
398 		return (EFAULT);
399 	case CUSE_ERR_SIGNAL:
400 		return (EINTR);
401 	case CUSE_ERR_NO_DEVICE:
402 		return (ENODEV);
403 	default:
404 		return (ENXIO);
405 	}
406 }
407 
408 static void
409 cuse_vm_memory_free(struct cuse_memory *mem)
410 {
411 	/* last user is gone - free */
412 	vm_object_deallocate(mem->object);
413 
414 	/* free CUSE memory */
415 	free(mem, M_CUSE);
416 }
417 
418 static int
419 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
420     uint32_t page_count)
421 {
422 	struct cuse_memory *temp;
423 	struct cuse_memory *mem;
424 	vm_object_t object;
425 	int error;
426 
427 	mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
428 
429 	object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
430 	    VM_PROT_DEFAULT, 0, curthread->td_ucred);
431 	if (object == NULL) {
432 		error = ENOMEM;
433 		goto error_0;
434 	}
435 
436 	cuse_server_lock(pcs);
437 	/* check if allocation number already exists */
438 	TAILQ_FOREACH(temp, &pcs->hmem, entry) {
439 		if (temp->alloc_nr == alloc_nr)
440 			break;
441 	}
442 	if (temp != NULL) {
443 		cuse_server_unlock(pcs);
444 		error = EBUSY;
445 		goto error_1;
446 	}
447 	mem->object = object;
448 	mem->page_count = page_count;
449 	mem->alloc_nr = alloc_nr;
450 	TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
451 	cuse_server_unlock(pcs);
452 
453 	return (0);
454 
455 error_1:
456 	vm_object_deallocate(object);
457 error_0:
458 	free(mem, M_CUSE);
459 	return (error);
460 }
461 
462 static int
463 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
464 {
465 	struct cuse_memory *mem;
466 
467 	cuse_server_lock(pcs);
468 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
469 		if (mem->alloc_nr == alloc_nr)
470 			break;
471 	}
472 	if (mem == NULL) {
473 		cuse_server_unlock(pcs);
474 		return (EINVAL);
475 	}
476 	TAILQ_REMOVE(&pcs->hmem, mem, entry);
477 	cuse_server_unlock(pcs);
478 
479 	cuse_vm_memory_free(mem);
480 
481 	return (0);
482 }
483 
484 static int
485 cuse_client_get(struct cuse_client **ppcc)
486 {
487 	struct cuse_client *pcc;
488 	int error;
489 
490 	/* try to get private data */
491 	error = devfs_get_cdevpriv((void **)&pcc);
492 	if (error != 0) {
493 		*ppcc = NULL;
494 		return (error);
495 	}
496 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
497 		*ppcc = NULL;
498 		return (EINVAL);
499 	}
500 	*ppcc = pcc;
501 	return (0);
502 }
503 
504 static void
505 cuse_client_is_closing(struct cuse_client *pcc)
506 {
507 	struct cuse_client_command *pccmd;
508 	uint32_t n;
509 
510 	if (CUSE_CLIENT_CLOSING(pcc))
511 		return;
512 
513 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
514 	pcc->server_dev = NULL;
515 
516 	for (n = 0; n != CUSE_CMD_MAX; n++) {
517 
518 		pccmd = &pcc->cmds[n];
519 
520 		if (pccmd->entry.tqe_prev != NULL) {
521 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
522 			pccmd->entry.tqe_prev = NULL;
523 		}
524 		cv_broadcast(&pccmd->cv);
525 	}
526 }
527 
528 static void
529 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
530     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
531 {
532 	unsigned long cuse_fflags = 0;
533 	struct cuse_server *pcs;
534 
535 	if (fflags & FREAD)
536 		cuse_fflags |= CUSE_FFLAG_READ;
537 
538 	if (fflags & FWRITE)
539 		cuse_fflags |= CUSE_FFLAG_WRITE;
540 
541 	if (ioflag & IO_NDELAY)
542 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
543 #if defined(__LP64__)
544 	if (SV_CURPROC_FLAG(SV_ILP32))
545 		cuse_fflags |= CUSE_FFLAG_COMPAT32;
546 #endif
547 	pccmd->sub.fflags = cuse_fflags;
548 	pccmd->sub.data_pointer = data_ptr;
549 	pccmd->sub.argument = arg;
550 
551 	pcs = pccmd->client->server;
552 
553 	if ((pccmd->entry.tqe_prev == NULL) &&
554 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
555 	    (pcs->is_closing == 0)) {
556 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
557 		cv_signal(&pcs->cv);
558 	}
559 }
560 
561 static void
562 cuse_client_got_signal(struct cuse_client_command *pccmd)
563 {
564 	struct cuse_server *pcs;
565 
566 	pccmd->got_signal = 1;
567 
568 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
569 
570 	pcs = pccmd->client->server;
571 
572 	if ((pccmd->entry.tqe_prev == NULL) &&
573 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
574 	    (pcs->is_closing == 0)) {
575 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
576 		cv_signal(&pcs->cv);
577 	}
578 }
579 
580 static int
581 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
582     uint8_t *arg_ptr, uint32_t arg_len)
583 {
584 	struct cuse_server *pcs;
585 	int error;
586 
587 	pcs = pccmd->client->server;
588 	error = 0;
589 
590 	pccmd->proc_curr = curthread->td_proc;
591 
592 	if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
593 		error = CUSE_ERR_OTHER;
594 		goto done;
595 	}
596 	while (pccmd->command == CUSE_CMD_NONE) {
597 		if (error != 0) {
598 			cv_wait(&pccmd->cv, &pcs->mtx);
599 		} else {
600 			error = cv_wait_sig(&pccmd->cv, &pcs->mtx);
601 
602 			if (error != 0)
603 				cuse_client_got_signal(pccmd);
604 		}
605 		if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
606 			error = CUSE_ERR_OTHER;
607 			goto done;
608 		}
609 	}
610 
611 	error = pccmd->error;
612 	pccmd->command = CUSE_CMD_NONE;
613 	cv_signal(&pccmd->cv);
614 
615 done:
616 
617 	/* wait until all process references are gone */
618 
619 	pccmd->proc_curr = NULL;
620 
621 	while (pccmd->proc_refs != 0)
622 		cv_wait(&pccmd->cv, &pcs->mtx);
623 
624 	return (error);
625 }
626 
627 /*------------------------------------------------------------------------*
628  *	CUSE SERVER PART
629  *------------------------------------------------------------------------*/
630 
631 static void
632 cuse_server_free_dev(struct cuse_server_dev *pcsd)
633 {
634 	struct cuse_server *pcs;
635 	struct cuse_client *pcc;
636 
637 	/* get server pointer */
638 	pcs = pcsd->server;
639 
640 	/* prevent creation of more devices */
641 	cuse_server_lock(pcs);
642 	if (pcsd->kern_dev != NULL)
643 		pcsd->kern_dev->si_drv1 = NULL;
644 
645 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
646 		if (pcc->server_dev == pcsd)
647 			cuse_client_is_closing(pcc);
648 	}
649 	cuse_server_unlock(pcs);
650 
651 	/* destroy device, if any */
652 	if (pcsd->kern_dev != NULL) {
653 		/* destroy device synchronously */
654 		destroy_dev(pcsd->kern_dev);
655 	}
656 	free(pcsd, M_CUSE);
657 }
658 
659 static void
660 cuse_server_unref(struct cuse_server *pcs)
661 {
662 	struct cuse_server_dev *pcsd;
663 	struct cuse_memory *mem;
664 
665 	cuse_server_lock(pcs);
666 	if (--(pcs->refs) != 0) {
667 		cuse_server_unlock(pcs);
668 		return;
669 	}
670 	cuse_server_is_closing(pcs);
671 	/* final client wakeup, if any */
672 	cuse_server_wakeup_all_client_locked(pcs);
673 
674 	cuse_global_lock();
675 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
676 	cuse_global_unlock();
677 
678 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
679 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
680 		cuse_server_unlock(pcs);
681 		cuse_server_free_dev(pcsd);
682 		cuse_server_lock(pcs);
683 	}
684 
685 	cuse_free_unit_by_id_locked(pcs, -1);
686 
687 	while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
688 		TAILQ_REMOVE(&pcs->hmem, mem, entry);
689 		cuse_server_unlock(pcs);
690 		cuse_vm_memory_free(mem);
691 		cuse_server_lock(pcs);
692 	}
693 
694 	knlist_clear(&pcs->selinfo.si_note, 1);
695 	knlist_destroy(&pcs->selinfo.si_note);
696 
697 	cuse_server_unlock(pcs);
698 
699 	seldrain(&pcs->selinfo);
700 
701 	cv_destroy(&pcs->cv);
702 
703 	mtx_destroy(&pcs->mtx);
704 
705 	free(pcs, M_CUSE);
706 }
707 
708 static int
709 cuse_server_do_close(struct cuse_server *pcs)
710 {
711 	int retval;
712 
713 	cuse_server_lock(pcs);
714 	cuse_server_is_closing(pcs);
715 	/* final client wakeup, if any */
716 	cuse_server_wakeup_all_client_locked(pcs);
717 
718 	knlist_clear(&pcs->selinfo.si_note, 1);
719 
720 	retval = pcs->refs;
721 	cuse_server_unlock(pcs);
722 
723 	return (retval);
724 }
725 
726 static void
727 cuse_server_free(void *arg)
728 {
729 	struct cuse_server *pcs = arg;
730 
731 	/*
732 	 * The final server unref should be done by the server thread
733 	 * to prevent deadlock in the client cdevpriv destructor,
734 	 * which cannot destroy itself.
735 	 */
736 	while (cuse_server_do_close(pcs) != 1)
737 		pause("W", hz);
738 
739 	/* drop final refcount */
740 	cuse_server_unref(pcs);
741 }
742 
743 static int
744 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
745 {
746 	struct cuse_server *pcs;
747 
748 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
749 
750 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
751 		printf("Cuse: Cannot set cdevpriv.\n");
752 		free(pcs, M_CUSE);
753 		return (ENOMEM);
754 	}
755 	/* store current process ID */
756 	pcs->pid = curproc->p_pid;
757 
758 	TAILQ_INIT(&pcs->head);
759 	TAILQ_INIT(&pcs->hdev);
760 	TAILQ_INIT(&pcs->hcli);
761 	TAILQ_INIT(&pcs->hmem);
762 
763 	cv_init(&pcs->cv, "cuse-server-cv");
764 
765 	mtx_init(&pcs->mtx, "cuse-server-mtx", NULL, MTX_DEF);
766 
767 	knlist_init_mtx(&pcs->selinfo.si_note, &pcs->mtx);
768 
769 	cuse_global_lock();
770 	pcs->refs++;
771 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
772 	cuse_global_unlock();
773 
774 	return (0);
775 }
776 
777 static int
778 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
779 {
780 	struct cuse_server *pcs;
781 
782 	if (cuse_server_get(&pcs) == 0)
783 		cuse_server_do_close(pcs);
784 
785 	return (0);
786 }
787 
788 static int
789 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
790 {
791 	return (ENXIO);
792 }
793 
794 static int
795 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
796 {
797 	return (ENXIO);
798 }
799 
800 static int
801 cuse_server_ioctl_copy_locked(struct cuse_server *pcs,
802     struct cuse_client_command *pccmd,
803     struct cuse_data_chunk *pchk, int isread)
804 {
805 	struct proc *p_proc;
806 	uint32_t offset;
807 	int error;
808 
809 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
810 
811 	if (pchk->length > CUSE_BUFFER_MAX)
812 		return (EFAULT);
813 
814 	if (offset >= CUSE_BUFFER_MAX)
815 		return (EFAULT);
816 
817 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
818 		return (EFAULT);
819 
820 	p_proc = pccmd->proc_curr;
821 	if (p_proc == NULL)
822 		return (ENXIO);
823 
824 	if (pccmd->proc_refs < 0)
825 		return (ENOMEM);
826 
827 	pccmd->proc_refs++;
828 
829 	cuse_server_unlock(pcs);
830 
831 	if (isread == 0) {
832 		error = copyin(
833 		    (void *)pchk->local_ptr,
834 		    pccmd->client->ioctl_buffer + offset,
835 		    pchk->length);
836 	} else {
837 		error = copyout(
838 		    pccmd->client->ioctl_buffer + offset,
839 		    (void *)pchk->local_ptr,
840 		    pchk->length);
841 	}
842 
843 	cuse_server_lock(pcs);
844 
845 	pccmd->proc_refs--;
846 
847 	if (pccmd->proc_curr == NULL)
848 		cv_signal(&pccmd->cv);
849 
850 	return (error);
851 }
852 
853 static int
854 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
855     struct proc *proc_d, vm_offset_t data_d, size_t len)
856 {
857 	struct thread *td;
858 	struct proc *proc_cur;
859 	int error;
860 
861 	td = curthread;
862 	proc_cur = td->td_proc;
863 
864 	if (proc_cur == proc_d) {
865 		struct iovec iov = {
866 			.iov_base = (caddr_t)data_d,
867 			.iov_len = len,
868 		};
869 		struct uio uio = {
870 			.uio_iov = &iov,
871 			.uio_iovcnt = 1,
872 			.uio_offset = (off_t)data_s,
873 			.uio_resid = len,
874 			.uio_segflg = UIO_USERSPACE,
875 			.uio_rw = UIO_READ,
876 			.uio_td = td,
877 		};
878 
879 		PHOLD(proc_s);
880 		error = proc_rwmem(proc_s, &uio);
881 		PRELE(proc_s);
882 
883 	} else if (proc_cur == proc_s) {
884 		struct iovec iov = {
885 			.iov_base = (caddr_t)data_s,
886 			.iov_len = len,
887 		};
888 		struct uio uio = {
889 			.uio_iov = &iov,
890 			.uio_iovcnt = 1,
891 			.uio_offset = (off_t)data_d,
892 			.uio_resid = len,
893 			.uio_segflg = UIO_USERSPACE,
894 			.uio_rw = UIO_WRITE,
895 			.uio_td = td,
896 		};
897 
898 		PHOLD(proc_d);
899 		error = proc_rwmem(proc_d, &uio);
900 		PRELE(proc_d);
901 	} else {
902 		error = EINVAL;
903 	}
904 	return (error);
905 }
906 
907 static int
908 cuse_server_data_copy_locked(struct cuse_server *pcs,
909     struct cuse_client_command *pccmd,
910     struct cuse_data_chunk *pchk, int isread)
911 {
912 	struct proc *p_proc;
913 	int error;
914 
915 	p_proc = pccmd->proc_curr;
916 	if (p_proc == NULL)
917 		return (ENXIO);
918 
919 	if (pccmd->proc_refs < 0)
920 		return (ENOMEM);
921 
922 	pccmd->proc_refs++;
923 
924 	cuse_server_unlock(pcs);
925 
926 	if (isread == 0) {
927 		error = cuse_proc2proc_copy(
928 		    curthread->td_proc, pchk->local_ptr,
929 		    p_proc, pchk->peer_ptr,
930 		    pchk->length);
931 	} else {
932 		error = cuse_proc2proc_copy(
933 		    p_proc, pchk->peer_ptr,
934 		    curthread->td_proc, pchk->local_ptr,
935 		    pchk->length);
936 	}
937 
938 	cuse_server_lock(pcs);
939 
940 	pccmd->proc_refs--;
941 
942 	if (pccmd->proc_curr == NULL)
943 		cv_signal(&pccmd->cv);
944 
945 	return (error);
946 }
947 
948 static int
949 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
950 {
951 	int n;
952 	int x = 0;
953 	int match;
954 
955 	do {
956 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
957 			if (cuse_alloc_unit[n] != NULL) {
958 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
959 					continue;
960 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
961 					x++;
962 					match = 1;
963 				}
964 			}
965 		}
966 	} while (match);
967 
968 	if (x < 256) {
969 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
970 			if (cuse_alloc_unit[n] == NULL) {
971 				cuse_alloc_unit[n] = pcs;
972 				cuse_alloc_unit_id[n] = id | x;
973 				return (x);
974 			}
975 		}
976 	}
977 	return (-1);
978 }
979 
980 static void
981 cuse_server_wakeup_locked(struct cuse_server *pcs)
982 {
983 	selwakeup(&pcs->selinfo);
984 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
985 }
986 
987 static void
988 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
989 {
990 	struct cuse_client *pcc;
991 
992 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
993 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
994 		    CUSE_CLI_KNOTE_NEED_WRITE);
995 	}
996 	cuse_server_wakeup_locked(pcs);
997 }
998 
999 static int
1000 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
1001 {
1002 	int n;
1003 	int found = 0;
1004 
1005 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
1006 		if (cuse_alloc_unit[n] == pcs) {
1007 			if (cuse_alloc_unit_id[n] == id || id == -1) {
1008 				cuse_alloc_unit[n] = NULL;
1009 				cuse_alloc_unit_id[n] = 0;
1010 				found = 1;
1011 			}
1012 		}
1013 	}
1014 
1015 	return (found ? 0 : EINVAL);
1016 }
1017 
1018 static int
1019 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
1020     caddr_t data, int fflag, struct thread *td)
1021 {
1022 	struct cuse_server *pcs;
1023 	int error;
1024 
1025 	error = cuse_server_get(&pcs);
1026 	if (error != 0)
1027 		return (error);
1028 
1029 	switch (cmd) {
1030 		struct cuse_client_command *pccmd;
1031 		struct cuse_client *pcc;
1032 		struct cuse_command *pcmd;
1033 		struct cuse_alloc_info *pai;
1034 		struct cuse_create_dev *pcd;
1035 		struct cuse_server_dev *pcsd;
1036 		struct cuse_data_chunk *pchk;
1037 		int n;
1038 
1039 	case CUSE_IOCTL_GET_COMMAND:
1040 		pcmd = (void *)data;
1041 
1042 		cuse_server_lock(pcs);
1043 
1044 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1045 			error = cv_wait_sig(&pcs->cv, &pcs->mtx);
1046 
1047 			if (pcs->is_closing)
1048 				error = ENXIO;
1049 
1050 			if (error) {
1051 				cuse_server_unlock(pcs);
1052 				return (error);
1053 			}
1054 		}
1055 
1056 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1057 		pccmd->entry.tqe_prev = NULL;
1058 
1059 		pccmd->entered = curthread;
1060 
1061 		*pcmd = pccmd->sub;
1062 
1063 		cuse_server_unlock(pcs);
1064 
1065 		break;
1066 
1067 	case CUSE_IOCTL_SYNC_COMMAND:
1068 
1069 		cuse_server_lock(pcs);
1070 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1071 
1072 			/* send sync command */
1073 			pccmd->entered = NULL;
1074 			pccmd->error = *(int *)data;
1075 			pccmd->command = CUSE_CMD_SYNC;
1076 
1077 			/* signal peer, if any */
1078 			cv_signal(&pccmd->cv);
1079 		}
1080 		cuse_server_unlock(pcs);
1081 
1082 		break;
1083 
1084 	case CUSE_IOCTL_ALLOC_UNIT:
1085 
1086 		cuse_server_lock(pcs);
1087 		n = cuse_alloc_unit_by_id_locked(pcs,
1088 		    CUSE_ID_DEFAULT(0));
1089 		cuse_server_unlock(pcs);
1090 
1091 		if (n < 0)
1092 			error = ENOMEM;
1093 		else
1094 			*(int *)data = n;
1095 		break;
1096 
1097 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1098 
1099 		n = *(int *)data;
1100 
1101 		n = (n & CUSE_ID_MASK);
1102 
1103 		cuse_server_lock(pcs);
1104 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1105 		cuse_server_unlock(pcs);
1106 
1107 		if (n < 0)
1108 			error = ENOMEM;
1109 		else
1110 			*(int *)data = n;
1111 		break;
1112 
1113 	case CUSE_IOCTL_FREE_UNIT:
1114 
1115 		n = *(int *)data;
1116 
1117 		n = CUSE_ID_DEFAULT(n);
1118 
1119 		cuse_server_lock(pcs);
1120 		error = cuse_free_unit_by_id_locked(pcs, n);
1121 		cuse_server_unlock(pcs);
1122 		break;
1123 
1124 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1125 
1126 		n = *(int *)data;
1127 
1128 		cuse_server_lock(pcs);
1129 		error = cuse_free_unit_by_id_locked(pcs, n);
1130 		cuse_server_unlock(pcs);
1131 		break;
1132 
1133 	case CUSE_IOCTL_ALLOC_MEMORY:
1134 
1135 		pai = (void *)data;
1136 
1137 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1138 			error = ENOMEM;
1139 			break;
1140 		}
1141 		if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) {
1142 			error = ENOMEM;
1143 			break;
1144 		}
1145 		error = cuse_server_alloc_memory(pcs,
1146 		    pai->alloc_nr, pai->page_count);
1147 		break;
1148 
1149 	case CUSE_IOCTL_FREE_MEMORY:
1150 		pai = (void *)data;
1151 
1152 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1153 			error = ENOMEM;
1154 			break;
1155 		}
1156 		error = cuse_server_free_memory(pcs, pai->alloc_nr);
1157 		break;
1158 
1159 	case CUSE_IOCTL_GET_SIG:
1160 
1161 		cuse_server_lock(pcs);
1162 		pccmd = cuse_server_find_command(pcs, curthread);
1163 
1164 		if (pccmd != NULL) {
1165 			n = pccmd->got_signal;
1166 			pccmd->got_signal = 0;
1167 		} else {
1168 			n = 0;
1169 		}
1170 		cuse_server_unlock(pcs);
1171 
1172 		*(int *)data = n;
1173 
1174 		break;
1175 
1176 	case CUSE_IOCTL_SET_PFH:
1177 
1178 		cuse_server_lock(pcs);
1179 		pccmd = cuse_server_find_command(pcs, curthread);
1180 
1181 		if (pccmd != NULL) {
1182 			pcc = pccmd->client;
1183 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1184 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1185 			}
1186 		} else {
1187 			error = ENXIO;
1188 		}
1189 		cuse_server_unlock(pcs);
1190 		break;
1191 
1192 	case CUSE_IOCTL_CREATE_DEV:
1193 
1194 		error = priv_check(curthread, PRIV_DRIVER);
1195 		if (error)
1196 			break;
1197 
1198 		pcd = (void *)data;
1199 
1200 		/* filter input */
1201 
1202 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1203 
1204 		if (pcd->devname[0] == 0) {
1205 			error = EINVAL;
1206 			break;
1207 		}
1208 		cuse_str_filter(pcd->devname);
1209 
1210 		pcd->permissions &= 0777;
1211 
1212 		/* try to allocate a character device */
1213 
1214 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1215 
1216 		pcsd->server = pcs;
1217 
1218 		pcsd->user_dev = pcd->dev;
1219 
1220 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1221 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1222 		    pcd->permissions, "%s", pcd->devname);
1223 
1224 		if (pcsd->kern_dev == NULL) {
1225 			free(pcsd, M_CUSE);
1226 			error = ENOMEM;
1227 			break;
1228 		}
1229 		pcsd->kern_dev->si_drv1 = pcsd;
1230 
1231 		cuse_server_lock(pcs);
1232 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1233 		cuse_server_unlock(pcs);
1234 
1235 		break;
1236 
1237 	case CUSE_IOCTL_DESTROY_DEV:
1238 
1239 		error = priv_check(curthread, PRIV_DRIVER);
1240 		if (error)
1241 			break;
1242 
1243 		cuse_server_lock(pcs);
1244 
1245 		error = EINVAL;
1246 
1247 		pcsd = TAILQ_FIRST(&pcs->hdev);
1248 		while (pcsd != NULL) {
1249 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1250 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1251 				cuse_server_unlock(pcs);
1252 				cuse_server_free_dev(pcsd);
1253 				cuse_server_lock(pcs);
1254 				error = 0;
1255 				pcsd = TAILQ_FIRST(&pcs->hdev);
1256 			} else {
1257 				pcsd = TAILQ_NEXT(pcsd, entry);
1258 			}
1259 		}
1260 
1261 		cuse_server_unlock(pcs);
1262 		break;
1263 
1264 	case CUSE_IOCTL_WRITE_DATA:
1265 	case CUSE_IOCTL_READ_DATA:
1266 
1267 		cuse_server_lock(pcs);
1268 		pchk = (struct cuse_data_chunk *)data;
1269 
1270 		pccmd = cuse_server_find_command(pcs, curthread);
1271 
1272 		if (pccmd == NULL) {
1273 			error = ENXIO;	/* invalid request */
1274 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1275 			error = EFAULT;	/* NULL pointer */
1276 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1277 			error = cuse_server_ioctl_copy_locked(pcs, pccmd,
1278 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1279 		} else {
1280 			error = cuse_server_data_copy_locked(pcs, pccmd,
1281 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1282 		}
1283 		cuse_server_unlock(pcs);
1284 		break;
1285 
1286 	case CUSE_IOCTL_SELWAKEUP:
1287 		cuse_server_lock(pcs);
1288 		/*
1289 		 * We don't know which direction caused the event.
1290 		 * Wakeup both!
1291 		 */
1292 		cuse_server_wakeup_all_client_locked(pcs);
1293 		cuse_server_unlock(pcs);
1294 		break;
1295 
1296 	default:
1297 		error = ENXIO;
1298 		break;
1299 	}
1300 	return (error);
1301 }
1302 
1303 static int
1304 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1305 {
1306 	return (events & (POLLHUP | POLLPRI | POLLIN |
1307 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1308 }
1309 
1310 static int
1311 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1312     vm_size_t size, struct vm_object **object, int nprot)
1313 {
1314 	uint32_t page_nr = *offset / PAGE_SIZE;
1315 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1316 	struct cuse_memory *mem;
1317 	struct cuse_server *pcs;
1318 	int error;
1319 
1320 	error = cuse_server_get(&pcs);
1321 	if (error != 0)
1322 		return (error);
1323 
1324 	cuse_server_lock(pcs);
1325 	/* lookup memory structure */
1326 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1327 		if (mem->alloc_nr == alloc_nr)
1328 			break;
1329 	}
1330 	if (mem == NULL) {
1331 		cuse_server_unlock(pcs);
1332 		return (ENOMEM);
1333 	}
1334 	/* verify page offset */
1335 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1336 	if (page_nr >= mem->page_count) {
1337 		cuse_server_unlock(pcs);
1338 		return (ENXIO);
1339 	}
1340 	/* verify mmap size */
1341 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1342 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1343 		cuse_server_unlock(pcs);
1344 		return (EINVAL);
1345 	}
1346 	vm_object_reference(mem->object);
1347 	*object = mem->object;
1348 	cuse_server_unlock(pcs);
1349 
1350 	/* set new VM object offset to use */
1351 	*offset = page_nr * PAGE_SIZE;
1352 
1353 	/* success */
1354 	return (0);
1355 }
1356 
1357 /*------------------------------------------------------------------------*
1358  *	CUSE CLIENT PART
1359  *------------------------------------------------------------------------*/
1360 static void
1361 cuse_client_free(void *arg)
1362 {
1363 	struct cuse_client *pcc = arg;
1364 	struct cuse_client_command *pccmd;
1365 	struct cuse_server *pcs;
1366 	int n;
1367 
1368 	pcs = pcc->server;
1369 
1370 	cuse_server_lock(pcs);
1371 	cuse_client_is_closing(pcc);
1372 	TAILQ_REMOVE(&pcs->hcli, pcc, entry);
1373 	cuse_server_unlock(pcs);
1374 
1375 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1376 
1377 		pccmd = &pcc->cmds[n];
1378 
1379 		sx_destroy(&pccmd->sx);
1380 		cv_destroy(&pccmd->cv);
1381 	}
1382 
1383 	free(pcc, M_CUSE);
1384 
1385 	/* drop reference on server */
1386 	cuse_server_unref(pcs);
1387 }
1388 
1389 static int
1390 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1391 {
1392 	struct cuse_client_command *pccmd;
1393 	struct cuse_server_dev *pcsd;
1394 	struct cuse_client *pcc;
1395 	struct cuse_server *pcs;
1396 	struct cuse_dev *pcd;
1397 	int error;
1398 	int n;
1399 
1400 	pcsd = dev->si_drv1;
1401 	if (pcsd != NULL) {
1402 		pcs = pcsd->server;
1403 		pcd = pcsd->user_dev;
1404 
1405 		cuse_server_lock(pcs);
1406 		/*
1407 		 * Check that the refcount didn't wrap and that the
1408 		 * same process is not both client and server. This
1409 		 * can easily lead to deadlocks when destroying the
1410 		 * CUSE character device nodes:
1411 		 */
1412 		pcs->refs++;
1413 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1414 			/* overflow or wrong PID */
1415 			pcs->refs--;
1416 			cuse_server_unlock(pcs);
1417 			return (EINVAL);
1418 		}
1419 		cuse_server_unlock(pcs);
1420 	} else {
1421 		return (EINVAL);
1422 	}
1423 
1424 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1425 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1426 		printf("Cuse: Cannot set cdevpriv.\n");
1427 		/* drop reference on server */
1428 		cuse_server_unref(pcs);
1429 		free(pcc, M_CUSE);
1430 		return (ENOMEM);
1431 	}
1432 	pcc->fflags = fflags;
1433 	pcc->server_dev = pcsd;
1434 	pcc->server = pcs;
1435 
1436 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1437 
1438 		pccmd = &pcc->cmds[n];
1439 
1440 		pccmd->sub.dev = pcd;
1441 		pccmd->sub.command = n;
1442 		pccmd->client = pcc;
1443 
1444 		sx_init(&pccmd->sx, "cuse-client-sx");
1445 		cv_init(&pccmd->cv, "cuse-client-cv");
1446 	}
1447 
1448 	cuse_server_lock(pcs);
1449 
1450 	/* cuse_client_free() assumes that the client is listed somewhere! */
1451 	/* always enqueue */
1452 
1453 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1454 
1455 	/* check if server is closing */
1456 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1457 		error = EINVAL;
1458 	} else {
1459 		error = 0;
1460 	}
1461 	cuse_server_unlock(pcs);
1462 
1463 	if (error) {
1464 		devfs_clear_cdevpriv();	/* XXX bugfix */
1465 		return (error);
1466 	}
1467 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1468 
1469 	cuse_cmd_lock(pccmd);
1470 
1471 	cuse_server_lock(pcs);
1472 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1473 
1474 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1475 	cuse_server_unlock(pcs);
1476 
1477 	if (error < 0) {
1478 		error = cuse_convert_error(error);
1479 	} else {
1480 		error = 0;
1481 	}
1482 
1483 	cuse_cmd_unlock(pccmd);
1484 
1485 	if (error)
1486 		devfs_clear_cdevpriv();	/* XXX bugfix */
1487 
1488 	return (error);
1489 }
1490 
1491 static int
1492 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1493 {
1494 	struct cuse_client_command *pccmd;
1495 	struct cuse_client *pcc;
1496 	struct cuse_server *pcs;
1497 	int error;
1498 
1499 	error = cuse_client_get(&pcc);
1500 	if (error != 0)
1501 		return (0);
1502 
1503 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1504 	pcs = pcc->server;
1505 
1506 	cuse_cmd_lock(pccmd);
1507 
1508 	cuse_server_lock(pcs);
1509 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1510 
1511 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1512 	cuse_cmd_unlock(pccmd);
1513 
1514 	cuse_client_is_closing(pcc);
1515 	cuse_server_unlock(pcs);
1516 
1517 	return (0);
1518 }
1519 
1520 static void
1521 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1522 {
1523 	struct cuse_server *pcs = pcc->server;
1524 	int temp;
1525 
1526 	cuse_server_lock(pcs);
1527 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1528 	    CUSE_CLI_KNOTE_HAS_WRITE));
1529 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1530 	    CUSE_CLI_KNOTE_NEED_WRITE);
1531 	cuse_server_unlock(pcs);
1532 
1533 	if (temp != 0) {
1534 		/* get the latest polling state from the server */
1535 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1536 
1537 		if (temp & (POLLIN | POLLOUT)) {
1538 			cuse_server_lock(pcs);
1539 			if (temp & POLLIN)
1540 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1541 			if (temp & POLLOUT)
1542 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1543 
1544 			/* make sure the "knote" gets woken up */
1545 			cuse_server_wakeup_locked(pcc->server);
1546 			cuse_server_unlock(pcs);
1547 		}
1548 	}
1549 }
1550 
1551 static int
1552 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1553 {
1554 	struct cuse_client_command *pccmd;
1555 	struct cuse_client *pcc;
1556 	struct cuse_server *pcs;
1557 	int error;
1558 	int len;
1559 
1560 	error = cuse_client_get(&pcc);
1561 	if (error != 0)
1562 		return (error);
1563 
1564 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1565 	pcs = pcc->server;
1566 
1567 	if (uio->uio_segflg != UIO_USERSPACE) {
1568 		return (EINVAL);
1569 	}
1570 	uio->uio_segflg = UIO_NOCOPY;
1571 
1572 	cuse_cmd_lock(pccmd);
1573 
1574 	while (uio->uio_resid != 0) {
1575 
1576 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1577 			error = ENOMEM;
1578 			break;
1579 		}
1580 		len = uio->uio_iov->iov_len;
1581 
1582 		cuse_server_lock(pcs);
1583 		cuse_client_send_command_locked(pccmd,
1584 		    (uintptr_t)uio->uio_iov->iov_base,
1585 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1586 
1587 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1588 		cuse_server_unlock(pcs);
1589 
1590 		if (error < 0) {
1591 			error = cuse_convert_error(error);
1592 			break;
1593 		} else if (error == len) {
1594 			error = uiomove(NULL, error, uio);
1595 			if (error)
1596 				break;
1597 		} else {
1598 			error = uiomove(NULL, error, uio);
1599 			break;
1600 		}
1601 	}
1602 	cuse_cmd_unlock(pccmd);
1603 
1604 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1605 
1606 	if (error == EWOULDBLOCK)
1607 		cuse_client_kqfilter_poll(dev, pcc);
1608 
1609 	return (error);
1610 }
1611 
1612 static int
1613 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1614 {
1615 	struct cuse_client_command *pccmd;
1616 	struct cuse_client *pcc;
1617 	struct cuse_server *pcs;
1618 	int error;
1619 	int len;
1620 
1621 	error = cuse_client_get(&pcc);
1622 	if (error != 0)
1623 		return (error);
1624 
1625 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1626 	pcs = pcc->server;
1627 
1628 	if (uio->uio_segflg != UIO_USERSPACE) {
1629 		return (EINVAL);
1630 	}
1631 	uio->uio_segflg = UIO_NOCOPY;
1632 
1633 	cuse_cmd_lock(pccmd);
1634 
1635 	while (uio->uio_resid != 0) {
1636 
1637 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1638 			error = ENOMEM;
1639 			break;
1640 		}
1641 		len = uio->uio_iov->iov_len;
1642 
1643 		cuse_server_lock(pcs);
1644 		cuse_client_send_command_locked(pccmd,
1645 		    (uintptr_t)uio->uio_iov->iov_base,
1646 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1647 
1648 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1649 		cuse_server_unlock(pcs);
1650 
1651 		if (error < 0) {
1652 			error = cuse_convert_error(error);
1653 			break;
1654 		} else if (error == len) {
1655 			error = uiomove(NULL, error, uio);
1656 			if (error)
1657 				break;
1658 		} else {
1659 			error = uiomove(NULL, error, uio);
1660 			break;
1661 		}
1662 	}
1663 	cuse_cmd_unlock(pccmd);
1664 
1665 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1666 
1667 	if (error == EWOULDBLOCK)
1668 		cuse_client_kqfilter_poll(dev, pcc);
1669 
1670 	return (error);
1671 }
1672 
1673 int
1674 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1675     caddr_t data, int fflag, struct thread *td)
1676 {
1677 	struct cuse_client_command *pccmd;
1678 	struct cuse_client *pcc;
1679 	struct cuse_server *pcs;
1680 	int error;
1681 	int len;
1682 
1683 	error = cuse_client_get(&pcc);
1684 	if (error != 0)
1685 		return (error);
1686 
1687 	len = IOCPARM_LEN(cmd);
1688 	if (len > CUSE_BUFFER_MAX)
1689 		return (ENOMEM);
1690 
1691 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1692 	pcs = pcc->server;
1693 
1694 	cuse_cmd_lock(pccmd);
1695 
1696 	if (cmd & (IOC_IN | IOC_VOID))
1697 		memcpy(pcc->ioctl_buffer, data, len);
1698 
1699 	/*
1700 	 * When the ioctl-length is zero drivers can pass information
1701 	 * through the data pointer of the ioctl. Make sure this information
1702 	 * is forwarded to the driver.
1703 	 */
1704 
1705 	cuse_server_lock(pcs);
1706 	cuse_client_send_command_locked(pccmd,
1707 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1708 	    (unsigned long)cmd, pcc->fflags,
1709 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1710 
1711 	error = cuse_client_receive_command_locked(pccmd, data, len);
1712 	cuse_server_unlock(pcs);
1713 
1714 	if (error < 0) {
1715 		error = cuse_convert_error(error);
1716 	} else {
1717 		error = 0;
1718 	}
1719 
1720 	if (cmd & IOC_OUT)
1721 		memcpy(data, pcc->ioctl_buffer, len);
1722 
1723 	cuse_cmd_unlock(pccmd);
1724 
1725 	if (error == EWOULDBLOCK)
1726 		cuse_client_kqfilter_poll(dev, pcc);
1727 
1728 	return (error);
1729 }
1730 
1731 static int
1732 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1733 {
1734 	struct cuse_client_command *pccmd;
1735 	struct cuse_client *pcc;
1736 	struct cuse_server *pcs;
1737 	unsigned long temp;
1738 	int error;
1739 	int revents;
1740 
1741 	error = cuse_client_get(&pcc);
1742 	if (error != 0)
1743 		goto pollnval;
1744 
1745 	temp = 0;
1746 	pcs = pcc->server;
1747 
1748 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1749 		temp |= CUSE_POLL_READ;
1750 
1751 	if (events & (POLLOUT | POLLWRNORM))
1752 		temp |= CUSE_POLL_WRITE;
1753 
1754 	if (events & POLLHUP)
1755 		temp |= CUSE_POLL_ERROR;
1756 
1757 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1758 
1759 	cuse_cmd_lock(pccmd);
1760 
1761 	/* Need to selrecord() first to not loose any events. */
1762 	if (temp != 0 && td != NULL)
1763 		selrecord(td, &pcs->selinfo);
1764 
1765 	cuse_server_lock(pcs);
1766 	cuse_client_send_command_locked(pccmd,
1767 	    0, temp, pcc->fflags, IO_NDELAY);
1768 
1769 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1770 	cuse_server_unlock(pcs);
1771 
1772 	cuse_cmd_unlock(pccmd);
1773 
1774 	if (error < 0) {
1775 		goto pollnval;
1776 	} else {
1777 		revents = 0;
1778 		if (error & CUSE_POLL_READ)
1779 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1780 		if (error & CUSE_POLL_WRITE)
1781 			revents |= (events & (POLLOUT | POLLWRNORM));
1782 		if (error & CUSE_POLL_ERROR)
1783 			revents |= (events & POLLHUP);
1784 	}
1785 	return (revents);
1786 
1787 pollnval:
1788 	/* XXX many clients don't understand POLLNVAL */
1789 	return (events & (POLLHUP | POLLPRI | POLLIN |
1790 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1791 }
1792 
1793 static int
1794 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1795     vm_size_t size, struct vm_object **object, int nprot)
1796 {
1797 	uint32_t page_nr = *offset / PAGE_SIZE;
1798 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1799 	struct cuse_memory *mem;
1800 	struct cuse_client *pcc;
1801 	struct cuse_server *pcs;
1802 	int error;
1803 
1804 	error = cuse_client_get(&pcc);
1805 	if (error != 0)
1806 		return (error);
1807 
1808 	pcs = pcc->server;
1809 
1810 	cuse_server_lock(pcs);
1811 	/* lookup memory structure */
1812 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1813 		if (mem->alloc_nr == alloc_nr)
1814 			break;
1815 	}
1816 	if (mem == NULL) {
1817 		cuse_server_unlock(pcs);
1818 		return (ENOMEM);
1819 	}
1820 	/* verify page offset */
1821 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1822 	if (page_nr >= mem->page_count) {
1823 		cuse_server_unlock(pcs);
1824 		return (ENXIO);
1825 	}
1826 	/* verify mmap size */
1827 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1828 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1829 		cuse_server_unlock(pcs);
1830 		return (EINVAL);
1831 	}
1832 	vm_object_reference(mem->object);
1833 	*object = mem->object;
1834 	cuse_server_unlock(pcs);
1835 
1836 	/* set new VM object offset to use */
1837 	*offset = page_nr * PAGE_SIZE;
1838 
1839 	/* success */
1840 	return (0);
1841 }
1842 
1843 static void
1844 cuse_client_kqfilter_read_detach(struct knote *kn)
1845 {
1846 	struct cuse_client *pcc;
1847 	struct cuse_server *pcs;
1848 
1849 	pcc = kn->kn_hook;
1850 	pcs = pcc->server;
1851 
1852 	cuse_server_lock(pcs);
1853 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1854 	cuse_server_unlock(pcs);
1855 }
1856 
1857 static void
1858 cuse_client_kqfilter_write_detach(struct knote *kn)
1859 {
1860 	struct cuse_client *pcc;
1861 	struct cuse_server *pcs;
1862 
1863 	pcc = kn->kn_hook;
1864 	pcs = pcc->server;
1865 
1866 	cuse_server_lock(pcs);
1867 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1868 	cuse_server_unlock(pcs);
1869 }
1870 
1871 static int
1872 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1873 {
1874 	struct cuse_client *pcc;
1875 
1876 	pcc = kn->kn_hook;
1877 
1878 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1879 
1880 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1881 }
1882 
1883 static int
1884 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1885 {
1886 	struct cuse_client *pcc;
1887 
1888 	pcc = kn->kn_hook;
1889 
1890 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1891 
1892 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1893 }
1894 
1895 static int
1896 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1897 {
1898 	struct cuse_client *pcc;
1899 	struct cuse_server *pcs;
1900 	int error;
1901 
1902 	error = cuse_client_get(&pcc);
1903 	if (error != 0)
1904 		return (error);
1905 
1906 	pcs = pcc->server;
1907 
1908 	cuse_server_lock(pcs);
1909 	switch (kn->kn_filter) {
1910 	case EVFILT_READ:
1911 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1912 		kn->kn_hook = pcc;
1913 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1914 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1915 		break;
1916 	case EVFILT_WRITE:
1917 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1918 		kn->kn_hook = pcc;
1919 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1920 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1921 		break;
1922 	default:
1923 		error = EINVAL;
1924 		break;
1925 	}
1926 	cuse_server_unlock(pcs);
1927 
1928 	if (error == 0)
1929 		cuse_client_kqfilter_poll(dev, pcc);
1930 	return (error);
1931 }
1932