xref: /freebsd/sys/fs/cuse/cuse.c (revision 774bb1c256fbc58a7e8d0d1f7d6427007105b334)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2020 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/stdint.h>
28 #include <sys/stddef.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/linker_set.h>
36 #include <sys/module.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/condvar.h>
40 #include <sys/sysctl.h>
41 #include <sys/unistd.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/uio.h>
45 #include <sys/poll.h>
46 #include <sys/sx.h>
47 #include <sys/rwlock.h>
48 #include <sys/queue.h>
49 #include <sys/fcntl.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/selinfo.h>
53 #include <sys/ptrace.h>
54 #include <sys/sysent.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63 
64 #include <fs/cuse/cuse_defs.h>
65 #include <fs/cuse/cuse_ioctl.h>
66 
67 static int
68 cuse_modevent(module_t mod, int type, void *data)
69 {
70 	switch (type) {
71 	case MOD_LOAD:
72 	case MOD_UNLOAD:
73 		return (0);
74 	default:
75 		return (EOPNOTSUPP);
76 	}
77 }
78 
79 static moduledata_t cuse_mod = {
80 	.name = "cuse",
81 	.evhand = &cuse_modevent,
82 };
83 
84 DECLARE_MODULE(cuse, cuse_mod, SI_SUB_DEVFS, SI_ORDER_FIRST);
85 MODULE_VERSION(cuse, 1);
86 
87 /*
88  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
89  * declaring support for the cuse4bsd interface in cuse.ko:
90  */
91 MODULE_VERSION(cuse4bsd, 1);
92 
93 #ifdef FEATURE
94 FEATURE(cuse, "Userspace character devices");
95 #endif
96 
97 struct cuse_command;
98 struct cuse_server;
99 struct cuse_client;
100 
101 struct cuse_client_command {
102 	TAILQ_ENTRY(cuse_client_command) entry;
103 	struct cuse_command sub;
104 	struct sx sx;
105 	struct cv cv;
106 	struct thread *entered;
107 	struct cuse_client *client;
108 	struct proc *proc_curr;
109 	int	proc_refs;
110 	int	got_signal;
111 	int	error;
112 	int	command;
113 };
114 
115 struct cuse_memory {
116 	TAILQ_ENTRY(cuse_memory) entry;
117 	vm_object_t object;
118 	uint32_t page_count;
119 	uint32_t alloc_nr;
120 };
121 
122 struct cuse_server_dev {
123 	TAILQ_ENTRY(cuse_server_dev) entry;
124 	struct cuse_server *server;
125 	struct cdev *kern_dev;
126 	struct cuse_dev *user_dev;
127 };
128 
129 struct cuse_server {
130 	TAILQ_ENTRY(cuse_server) entry;
131 	TAILQ_HEAD(, cuse_client_command) head;
132 	TAILQ_HEAD(, cuse_server_dev) hdev;
133 	TAILQ_HEAD(, cuse_client) hcli;
134 	TAILQ_HEAD(, cuse_memory) hmem;
135 	struct mtx mtx;
136 	struct cv cv;
137 	struct selinfo selinfo;
138 	pid_t	pid;
139 	int	is_closing;
140 	int	refs;
141 };
142 
143 struct cuse_client {
144 	TAILQ_ENTRY(cuse_client) entry;
145 	TAILQ_ENTRY(cuse_client) entry_ref;
146 	struct cuse_client_command cmds[CUSE_CMD_MAX];
147 	struct cuse_server *server;
148 	struct cuse_server_dev *server_dev;
149 
150 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
151 
152 	int	fflags;			/* file flags */
153 	int	cflags;			/* client flags */
154 #define	CUSE_CLI_IS_CLOSING 0x01
155 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
156 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
157 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
158 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
159 };
160 
161 #define	CUSE_CLIENT_CLOSING(pcc) \
162     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
163 
164 static	MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
165 
166 static TAILQ_HEAD(, cuse_server) cuse_server_head;
167 static struct mtx cuse_global_mtx;
168 static struct cdev *cuse_dev;
169 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
170 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
171 
172 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
173 static void cuse_client_kqfilter_read_detach(struct knote *kn);
174 static void cuse_client_kqfilter_write_detach(struct knote *kn);
175 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
176 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
177 
178 static struct filterops cuse_client_kqfilter_read_ops = {
179 	.f_isfd = 1,
180 	.f_detach = cuse_client_kqfilter_read_detach,
181 	.f_event = cuse_client_kqfilter_read_event,
182 };
183 
184 static struct filterops cuse_client_kqfilter_write_ops = {
185 	.f_isfd = 1,
186 	.f_detach = cuse_client_kqfilter_write_detach,
187 	.f_event = cuse_client_kqfilter_write_event,
188 };
189 
190 static d_open_t cuse_client_open;
191 static d_close_t cuse_client_close;
192 static d_ioctl_t cuse_client_ioctl;
193 static d_read_t cuse_client_read;
194 static d_write_t cuse_client_write;
195 static d_poll_t cuse_client_poll;
196 static d_mmap_single_t cuse_client_mmap_single;
197 static d_kqfilter_t cuse_client_kqfilter;
198 
199 static struct cdevsw cuse_client_devsw = {
200 	.d_version = D_VERSION,
201 	.d_open = cuse_client_open,
202 	.d_close = cuse_client_close,
203 	.d_ioctl = cuse_client_ioctl,
204 	.d_name = "cuse_client",
205 	.d_flags = D_TRACKCLOSE,
206 	.d_read = cuse_client_read,
207 	.d_write = cuse_client_write,
208 	.d_poll = cuse_client_poll,
209 	.d_mmap_single = cuse_client_mmap_single,
210 	.d_kqfilter = cuse_client_kqfilter,
211 };
212 
213 static d_open_t cuse_server_open;
214 static d_close_t cuse_server_close;
215 static d_ioctl_t cuse_server_ioctl;
216 static d_read_t cuse_server_read;
217 static d_write_t cuse_server_write;
218 static d_poll_t cuse_server_poll;
219 static d_mmap_single_t cuse_server_mmap_single;
220 
221 static struct cdevsw cuse_server_devsw = {
222 	.d_version = D_VERSION,
223 	.d_open = cuse_server_open,
224 	.d_close = cuse_server_close,
225 	.d_ioctl = cuse_server_ioctl,
226 	.d_name = "cuse_server",
227 	.d_flags = D_TRACKCLOSE,
228 	.d_read = cuse_server_read,
229 	.d_write = cuse_server_write,
230 	.d_poll = cuse_server_poll,
231 	.d_mmap_single = cuse_server_mmap_single,
232 };
233 
234 static void cuse_client_is_closing(struct cuse_client *);
235 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
236 
237 static void
238 cuse_global_lock(void)
239 {
240 	mtx_lock(&cuse_global_mtx);
241 }
242 
243 static void
244 cuse_global_unlock(void)
245 {
246 	mtx_unlock(&cuse_global_mtx);
247 }
248 
249 static void
250 cuse_server_lock(struct cuse_server *pcs)
251 {
252 	mtx_lock(&pcs->mtx);
253 }
254 
255 static void
256 cuse_server_unlock(struct cuse_server *pcs)
257 {
258 	mtx_unlock(&pcs->mtx);
259 }
260 
261 static void
262 cuse_cmd_lock(struct cuse_client_command *pccmd)
263 {
264 	sx_xlock(&pccmd->sx);
265 }
266 
267 static void
268 cuse_cmd_unlock(struct cuse_client_command *pccmd)
269 {
270 	sx_xunlock(&pccmd->sx);
271 }
272 
273 static void
274 cuse_kern_init(void *arg)
275 {
276 	TAILQ_INIT(&cuse_server_head);
277 
278 	mtx_init(&cuse_global_mtx, "cuse-global-mtx", NULL, MTX_DEF);
279 
280 	cuse_dev = make_dev(&cuse_server_devsw, 0,
281 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
282 
283 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
284 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
285 	    (CUSE_VERSION >> 0) & 0xFF);
286 }
287 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
288 
289 static void
290 cuse_kern_uninit(void *arg)
291 {
292 	void *ptr;
293 
294 	while (1) {
295 		printf("Cuse: Please exit all /dev/cuse instances "
296 		    "and processes which have used this device.\n");
297 
298 		pause("DRAIN", 2 * hz);
299 
300 		cuse_global_lock();
301 		ptr = TAILQ_FIRST(&cuse_server_head);
302 		cuse_global_unlock();
303 
304 		if (ptr == NULL)
305 			break;
306 	}
307 
308 	if (cuse_dev != NULL)
309 		destroy_dev(cuse_dev);
310 
311 	mtx_destroy(&cuse_global_mtx);
312 }
313 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
314 
315 static int
316 cuse_server_get(struct cuse_server **ppcs)
317 {
318 	struct cuse_server *pcs;
319 	int error;
320 
321 	error = devfs_get_cdevpriv((void **)&pcs);
322 	if (error != 0) {
323 		*ppcs = NULL;
324 		return (error);
325 	}
326 	if (pcs->is_closing) {
327 		*ppcs = NULL;
328 		return (EINVAL);
329 	}
330 	*ppcs = pcs;
331 	return (0);
332 }
333 
334 static void
335 cuse_server_is_closing(struct cuse_server *pcs)
336 {
337 	struct cuse_client *pcc;
338 
339 	if (pcs->is_closing)
340 		return;
341 
342 	pcs->is_closing = 1;
343 
344 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
345 		cuse_client_is_closing(pcc);
346 	}
347 }
348 
349 static struct cuse_client_command *
350 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
351 {
352 	struct cuse_client *pcc;
353 	int n;
354 
355 	if (pcs->is_closing)
356 		goto done;
357 
358 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
359 		if (CUSE_CLIENT_CLOSING(pcc))
360 			continue;
361 		for (n = 0; n != CUSE_CMD_MAX; n++) {
362 			if (pcc->cmds[n].entered == td)
363 				return (&pcc->cmds[n]);
364 		}
365 	}
366 done:
367 	return (NULL);
368 }
369 
370 static void
371 cuse_str_filter(char *ptr)
372 {
373 	int c;
374 
375 	while (((c = *ptr) != 0)) {
376 		if ((c >= 'a') && (c <= 'z')) {
377 			ptr++;
378 			continue;
379 		}
380 		if ((c >= 'A') && (c <= 'Z')) {
381 			ptr++;
382 			continue;
383 		}
384 		if ((c >= '0') && (c <= '9')) {
385 			ptr++;
386 			continue;
387 		}
388 		if ((c == '.') || (c == '_') || (c == '/')) {
389 			ptr++;
390 			continue;
391 		}
392 		*ptr = '_';
393 
394 		ptr++;
395 	}
396 }
397 
398 static int
399 cuse_convert_error(int error)
400 {
401 	;				/* indent fix */
402 	switch (error) {
403 	case CUSE_ERR_NONE:
404 		return (0);
405 	case CUSE_ERR_BUSY:
406 		return (EBUSY);
407 	case CUSE_ERR_WOULDBLOCK:
408 		return (EWOULDBLOCK);
409 	case CUSE_ERR_INVALID:
410 		return (EINVAL);
411 	case CUSE_ERR_NO_MEMORY:
412 		return (ENOMEM);
413 	case CUSE_ERR_FAULT:
414 		return (EFAULT);
415 	case CUSE_ERR_SIGNAL:
416 		return (EINTR);
417 	case CUSE_ERR_NO_DEVICE:
418 		return (ENODEV);
419 	default:
420 		return (ENXIO);
421 	}
422 }
423 
424 static void
425 cuse_vm_memory_free(struct cuse_memory *mem)
426 {
427 	/* last user is gone - free */
428 	vm_object_deallocate(mem->object);
429 
430 	/* free CUSE memory */
431 	free(mem, M_CUSE);
432 }
433 
434 static int
435 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
436     uint32_t page_count)
437 {
438 	struct cuse_memory *temp;
439 	struct cuse_memory *mem;
440 	vm_object_t object;
441 	int error;
442 
443 	mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
444 
445 	object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
446 	    VM_PROT_DEFAULT, 0, curthread->td_ucred);
447 	if (object == NULL) {
448 		error = ENOMEM;
449 		goto error_0;
450 	}
451 
452 	cuse_server_lock(pcs);
453 	/* check if allocation number already exists */
454 	TAILQ_FOREACH(temp, &pcs->hmem, entry) {
455 		if (temp->alloc_nr == alloc_nr)
456 			break;
457 	}
458 	if (temp != NULL) {
459 		cuse_server_unlock(pcs);
460 		error = EBUSY;
461 		goto error_1;
462 	}
463 	mem->object = object;
464 	mem->page_count = page_count;
465 	mem->alloc_nr = alloc_nr;
466 	TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
467 	cuse_server_unlock(pcs);
468 
469 	return (0);
470 
471 error_1:
472 	vm_object_deallocate(object);
473 error_0:
474 	free(mem, M_CUSE);
475 	return (error);
476 }
477 
478 static int
479 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
480 {
481 	struct cuse_memory *mem;
482 
483 	cuse_server_lock(pcs);
484 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
485 		if (mem->alloc_nr == alloc_nr)
486 			break;
487 	}
488 	if (mem == NULL) {
489 		cuse_server_unlock(pcs);
490 		return (EINVAL);
491 	}
492 	TAILQ_REMOVE(&pcs->hmem, mem, entry);
493 	cuse_server_unlock(pcs);
494 
495 	cuse_vm_memory_free(mem);
496 
497 	return (0);
498 }
499 
500 static int
501 cuse_client_get(struct cuse_client **ppcc)
502 {
503 	struct cuse_client *pcc;
504 	int error;
505 
506 	/* try to get private data */
507 	error = devfs_get_cdevpriv((void **)&pcc);
508 	if (error != 0) {
509 		*ppcc = NULL;
510 		return (error);
511 	}
512 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
513 		*ppcc = NULL;
514 		return (EINVAL);
515 	}
516 	*ppcc = pcc;
517 	return (0);
518 }
519 
520 static void
521 cuse_client_is_closing(struct cuse_client *pcc)
522 {
523 	struct cuse_client_command *pccmd;
524 	uint32_t n;
525 
526 	if (CUSE_CLIENT_CLOSING(pcc))
527 		return;
528 
529 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
530 	pcc->server_dev = NULL;
531 
532 	for (n = 0; n != CUSE_CMD_MAX; n++) {
533 		pccmd = &pcc->cmds[n];
534 
535 		if (pccmd->entry.tqe_prev != NULL) {
536 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
537 			pccmd->entry.tqe_prev = NULL;
538 		}
539 		cv_broadcast(&pccmd->cv);
540 	}
541 }
542 
543 static void
544 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
545     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
546 {
547 	unsigned long cuse_fflags = 0;
548 	struct cuse_server *pcs;
549 
550 	if (fflags & FREAD)
551 		cuse_fflags |= CUSE_FFLAG_READ;
552 
553 	if (fflags & FWRITE)
554 		cuse_fflags |= CUSE_FFLAG_WRITE;
555 
556 	if (ioflag & IO_NDELAY)
557 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
558 #if defined(__LP64__)
559 	if (SV_CURPROC_FLAG(SV_ILP32))
560 		cuse_fflags |= CUSE_FFLAG_COMPAT32;
561 #endif
562 	pccmd->sub.fflags = cuse_fflags;
563 	pccmd->sub.data_pointer = data_ptr;
564 	pccmd->sub.argument = arg;
565 
566 	pcs = pccmd->client->server;
567 
568 	if ((pccmd->entry.tqe_prev == NULL) &&
569 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
570 	    (pcs->is_closing == 0)) {
571 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
572 		cv_signal(&pcs->cv);
573 	}
574 }
575 
576 static void
577 cuse_client_got_signal(struct cuse_client_command *pccmd)
578 {
579 	struct cuse_server *pcs;
580 
581 	pccmd->got_signal = 1;
582 
583 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
584 
585 	pcs = pccmd->client->server;
586 
587 	if ((pccmd->entry.tqe_prev == NULL) &&
588 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
589 	    (pcs->is_closing == 0)) {
590 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
591 		cv_signal(&pcs->cv);
592 	}
593 }
594 
595 static int
596 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
597     uint8_t *arg_ptr, uint32_t arg_len)
598 {
599 	struct cuse_server *pcs;
600 	int error;
601 
602 	pcs = pccmd->client->server;
603 	error = 0;
604 
605 	pccmd->proc_curr = curthread->td_proc;
606 
607 	if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
608 		error = CUSE_ERR_OTHER;
609 		goto done;
610 	}
611 	while (pccmd->command == CUSE_CMD_NONE) {
612 		if (error != 0) {
613 			cv_wait(&pccmd->cv, &pcs->mtx);
614 		} else {
615 			error = cv_wait_sig(&pccmd->cv, &pcs->mtx);
616 
617 			if (error != 0)
618 				cuse_client_got_signal(pccmd);
619 		}
620 		if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
621 			error = CUSE_ERR_OTHER;
622 			goto done;
623 		}
624 	}
625 
626 	error = pccmd->error;
627 	pccmd->command = CUSE_CMD_NONE;
628 	cv_signal(&pccmd->cv);
629 
630 done:
631 
632 	/* wait until all process references are gone */
633 
634 	pccmd->proc_curr = NULL;
635 
636 	while (pccmd->proc_refs != 0)
637 		cv_wait(&pccmd->cv, &pcs->mtx);
638 
639 	return (error);
640 }
641 
642 /*------------------------------------------------------------------------*
643  *	CUSE SERVER PART
644  *------------------------------------------------------------------------*/
645 
646 static void
647 cuse_server_free_dev(struct cuse_server_dev *pcsd)
648 {
649 	struct cuse_server *pcs;
650 	struct cuse_client *pcc;
651 
652 	/* get server pointer */
653 	pcs = pcsd->server;
654 
655 	/* prevent creation of more devices */
656 	cuse_server_lock(pcs);
657 	if (pcsd->kern_dev != NULL)
658 		pcsd->kern_dev->si_drv1 = NULL;
659 
660 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
661 		if (pcc->server_dev == pcsd)
662 			cuse_client_is_closing(pcc);
663 	}
664 	cuse_server_unlock(pcs);
665 
666 	/* destroy device, if any */
667 	if (pcsd->kern_dev != NULL) {
668 		/* destroy device synchronously */
669 		destroy_dev(pcsd->kern_dev);
670 	}
671 	free(pcsd, M_CUSE);
672 }
673 
674 static void
675 cuse_server_unref(struct cuse_server *pcs)
676 {
677 	struct cuse_server_dev *pcsd;
678 	struct cuse_memory *mem;
679 
680 	cuse_server_lock(pcs);
681 	if (--(pcs->refs) != 0) {
682 		cuse_server_unlock(pcs);
683 		return;
684 	}
685 	cuse_server_is_closing(pcs);
686 	/* final client wakeup, if any */
687 	cuse_server_wakeup_all_client_locked(pcs);
688 
689 	cuse_global_lock();
690 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
691 	cuse_global_unlock();
692 
693 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
694 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
695 		cuse_server_unlock(pcs);
696 		cuse_server_free_dev(pcsd);
697 		cuse_server_lock(pcs);
698 	}
699 
700 	cuse_free_unit_by_id_locked(pcs, -1);
701 
702 	while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
703 		TAILQ_REMOVE(&pcs->hmem, mem, entry);
704 		cuse_server_unlock(pcs);
705 		cuse_vm_memory_free(mem);
706 		cuse_server_lock(pcs);
707 	}
708 
709 	knlist_clear(&pcs->selinfo.si_note, 1);
710 	knlist_destroy(&pcs->selinfo.si_note);
711 
712 	cuse_server_unlock(pcs);
713 
714 	seldrain(&pcs->selinfo);
715 
716 	cv_destroy(&pcs->cv);
717 
718 	mtx_destroy(&pcs->mtx);
719 
720 	free(pcs, M_CUSE);
721 }
722 
723 static int
724 cuse_server_do_close(struct cuse_server *pcs)
725 {
726 	int retval;
727 
728 	cuse_server_lock(pcs);
729 	cuse_server_is_closing(pcs);
730 	/* final client wakeup, if any */
731 	cuse_server_wakeup_all_client_locked(pcs);
732 
733 	knlist_clear(&pcs->selinfo.si_note, 1);
734 
735 	retval = pcs->refs;
736 	cuse_server_unlock(pcs);
737 
738 	return (retval);
739 }
740 
741 static void
742 cuse_server_free(void *arg)
743 {
744 	struct cuse_server *pcs = arg;
745 
746 	/*
747 	 * The final server unref should be done by the server thread
748 	 * to prevent deadlock in the client cdevpriv destructor,
749 	 * which cannot destroy itself.
750 	 */
751 	while (cuse_server_do_close(pcs) != 1)
752 		pause("W", hz);
753 
754 	/* drop final refcount */
755 	cuse_server_unref(pcs);
756 }
757 
758 static int
759 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
760 {
761 	struct cuse_server *pcs;
762 
763 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
764 
765 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
766 		printf("Cuse: Cannot set cdevpriv.\n");
767 		free(pcs, M_CUSE);
768 		return (ENOMEM);
769 	}
770 	/* store current process ID */
771 	pcs->pid = curproc->p_pid;
772 
773 	TAILQ_INIT(&pcs->head);
774 	TAILQ_INIT(&pcs->hdev);
775 	TAILQ_INIT(&pcs->hcli);
776 	TAILQ_INIT(&pcs->hmem);
777 
778 	cv_init(&pcs->cv, "cuse-server-cv");
779 
780 	mtx_init(&pcs->mtx, "cuse-server-mtx", NULL, MTX_DEF);
781 
782 	knlist_init_mtx(&pcs->selinfo.si_note, &pcs->mtx);
783 
784 	cuse_global_lock();
785 	pcs->refs++;
786 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
787 	cuse_global_unlock();
788 
789 	return (0);
790 }
791 
792 static int
793 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
794 {
795 	struct cuse_server *pcs;
796 
797 	if (cuse_server_get(&pcs) == 0)
798 		cuse_server_do_close(pcs);
799 
800 	return (0);
801 }
802 
803 static int
804 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
805 {
806 	return (ENXIO);
807 }
808 
809 static int
810 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
811 {
812 	return (ENXIO);
813 }
814 
815 static int
816 cuse_server_ioctl_copy_locked(struct cuse_server *pcs,
817     struct cuse_client_command *pccmd,
818     struct cuse_data_chunk *pchk, int isread)
819 {
820 	struct proc *p_proc;
821 	uint32_t offset;
822 	int error;
823 
824 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
825 
826 	if (pchk->length > CUSE_BUFFER_MAX)
827 		return (EFAULT);
828 
829 	if (offset >= CUSE_BUFFER_MAX)
830 		return (EFAULT);
831 
832 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
833 		return (EFAULT);
834 
835 	p_proc = pccmd->proc_curr;
836 	if (p_proc == NULL)
837 		return (ENXIO);
838 
839 	if (pccmd->proc_refs < 0)
840 		return (ENOMEM);
841 
842 	pccmd->proc_refs++;
843 
844 	cuse_server_unlock(pcs);
845 
846 	if (isread == 0) {
847 		error = copyin(
848 		    (void *)pchk->local_ptr,
849 		    pccmd->client->ioctl_buffer + offset,
850 		    pchk->length);
851 	} else {
852 		error = copyout(
853 		    pccmd->client->ioctl_buffer + offset,
854 		    (void *)pchk->local_ptr,
855 		    pchk->length);
856 	}
857 
858 	cuse_server_lock(pcs);
859 
860 	pccmd->proc_refs--;
861 
862 	if (pccmd->proc_curr == NULL)
863 		cv_signal(&pccmd->cv);
864 
865 	return (error);
866 }
867 
868 static int
869 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
870     struct proc *proc_d, vm_offset_t data_d, size_t len)
871 {
872 	struct thread *td;
873 	struct proc *proc_cur;
874 	int error;
875 
876 	td = curthread;
877 	proc_cur = td->td_proc;
878 
879 	if (proc_cur == proc_d) {
880 		struct iovec iov = {
881 			.iov_base = (caddr_t)data_d,
882 			.iov_len = len,
883 		};
884 		struct uio uio = {
885 			.uio_iov = &iov,
886 			.uio_iovcnt = 1,
887 			.uio_offset = (off_t)data_s,
888 			.uio_resid = len,
889 			.uio_segflg = UIO_USERSPACE,
890 			.uio_rw = UIO_READ,
891 			.uio_td = td,
892 		};
893 
894 		PHOLD(proc_s);
895 		error = proc_rwmem(proc_s, &uio);
896 		PRELE(proc_s);
897 
898 	} else if (proc_cur == proc_s) {
899 		struct iovec iov = {
900 			.iov_base = (caddr_t)data_s,
901 			.iov_len = len,
902 		};
903 		struct uio uio = {
904 			.uio_iov = &iov,
905 			.uio_iovcnt = 1,
906 			.uio_offset = (off_t)data_d,
907 			.uio_resid = len,
908 			.uio_segflg = UIO_USERSPACE,
909 			.uio_rw = UIO_WRITE,
910 			.uio_td = td,
911 		};
912 
913 		PHOLD(proc_d);
914 		error = proc_rwmem(proc_d, &uio);
915 		PRELE(proc_d);
916 	} else {
917 		error = EINVAL;
918 	}
919 	return (error);
920 }
921 
922 static int
923 cuse_server_data_copy_locked(struct cuse_server *pcs,
924     struct cuse_client_command *pccmd,
925     struct cuse_data_chunk *pchk, int isread)
926 {
927 	struct proc *p_proc;
928 	int error;
929 
930 	p_proc = pccmd->proc_curr;
931 	if (p_proc == NULL)
932 		return (ENXIO);
933 
934 	if (pccmd->proc_refs < 0)
935 		return (ENOMEM);
936 
937 	pccmd->proc_refs++;
938 
939 	cuse_server_unlock(pcs);
940 
941 	if (isread == 0) {
942 		error = cuse_proc2proc_copy(
943 		    curthread->td_proc, pchk->local_ptr,
944 		    p_proc, pchk->peer_ptr,
945 		    pchk->length);
946 	} else {
947 		error = cuse_proc2proc_copy(
948 		    p_proc, pchk->peer_ptr,
949 		    curthread->td_proc, pchk->local_ptr,
950 		    pchk->length);
951 	}
952 
953 	cuse_server_lock(pcs);
954 
955 	pccmd->proc_refs--;
956 
957 	if (pccmd->proc_curr == NULL)
958 		cv_signal(&pccmd->cv);
959 
960 	return (error);
961 }
962 
963 static int
964 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
965 {
966 	int n;
967 	int x = 0;
968 	int match;
969 
970 	do {
971 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
972 			if (cuse_alloc_unit[n] != NULL) {
973 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
974 					continue;
975 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
976 					x++;
977 					match = 1;
978 				}
979 			}
980 		}
981 	} while (match);
982 
983 	if (x < 256) {
984 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
985 			if (cuse_alloc_unit[n] == NULL) {
986 				cuse_alloc_unit[n] = pcs;
987 				cuse_alloc_unit_id[n] = id | x;
988 				return (x);
989 			}
990 		}
991 	}
992 	return (-1);
993 }
994 
995 static void
996 cuse_server_wakeup_locked(struct cuse_server *pcs)
997 {
998 	selwakeup(&pcs->selinfo);
999 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
1000 }
1001 
1002 static void
1003 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
1004 {
1005 	struct cuse_client *pcc;
1006 
1007 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
1008 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
1009 		    CUSE_CLI_KNOTE_NEED_WRITE);
1010 	}
1011 	cuse_server_wakeup_locked(pcs);
1012 }
1013 
1014 static int
1015 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
1016 {
1017 	int n;
1018 	int found = 0;
1019 
1020 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
1021 		if (cuse_alloc_unit[n] == pcs) {
1022 			if (cuse_alloc_unit_id[n] == id || id == -1) {
1023 				cuse_alloc_unit[n] = NULL;
1024 				cuse_alloc_unit_id[n] = 0;
1025 				found = 1;
1026 			}
1027 		}
1028 	}
1029 
1030 	return (found ? 0 : EINVAL);
1031 }
1032 
1033 static int
1034 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
1035     caddr_t data, int fflag, struct thread *td)
1036 {
1037 	struct cuse_server *pcs;
1038 	int error;
1039 
1040 	error = cuse_server_get(&pcs);
1041 	if (error != 0)
1042 		return (error);
1043 
1044 	switch (cmd) {
1045 		struct cuse_client_command *pccmd;
1046 		struct cuse_client *pcc;
1047 		struct cuse_command *pcmd;
1048 		struct cuse_alloc_info *pai;
1049 		struct cuse_create_dev *pcd;
1050 		struct cuse_server_dev *pcsd;
1051 		struct cuse_data_chunk *pchk;
1052 		int n;
1053 
1054 	case CUSE_IOCTL_GET_COMMAND:
1055 		pcmd = (void *)data;
1056 
1057 		cuse_server_lock(pcs);
1058 
1059 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1060 			error = cv_wait_sig(&pcs->cv, &pcs->mtx);
1061 
1062 			if (pcs->is_closing)
1063 				error = ENXIO;
1064 
1065 			if (error) {
1066 				cuse_server_unlock(pcs);
1067 				return (error);
1068 			}
1069 		}
1070 
1071 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1072 		pccmd->entry.tqe_prev = NULL;
1073 
1074 		pccmd->entered = curthread;
1075 
1076 		*pcmd = pccmd->sub;
1077 
1078 		cuse_server_unlock(pcs);
1079 
1080 		break;
1081 
1082 	case CUSE_IOCTL_SYNC_COMMAND:
1083 
1084 		cuse_server_lock(pcs);
1085 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1086 			/* send sync command */
1087 			pccmd->entered = NULL;
1088 			pccmd->error = *(int *)data;
1089 			pccmd->command = CUSE_CMD_SYNC;
1090 
1091 			/* signal peer, if any */
1092 			cv_signal(&pccmd->cv);
1093 		}
1094 		cuse_server_unlock(pcs);
1095 
1096 		break;
1097 
1098 	case CUSE_IOCTL_ALLOC_UNIT:
1099 
1100 		cuse_server_lock(pcs);
1101 		n = cuse_alloc_unit_by_id_locked(pcs,
1102 		    CUSE_ID_DEFAULT(0));
1103 		cuse_server_unlock(pcs);
1104 
1105 		if (n < 0)
1106 			error = ENOMEM;
1107 		else
1108 			*(int *)data = n;
1109 		break;
1110 
1111 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1112 
1113 		n = *(int *)data;
1114 
1115 		n = (n & CUSE_ID_MASK);
1116 
1117 		cuse_server_lock(pcs);
1118 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1119 		cuse_server_unlock(pcs);
1120 
1121 		if (n < 0)
1122 			error = ENOMEM;
1123 		else
1124 			*(int *)data = n;
1125 		break;
1126 
1127 	case CUSE_IOCTL_FREE_UNIT:
1128 
1129 		n = *(int *)data;
1130 
1131 		n = CUSE_ID_DEFAULT(n);
1132 
1133 		cuse_server_lock(pcs);
1134 		error = cuse_free_unit_by_id_locked(pcs, n);
1135 		cuse_server_unlock(pcs);
1136 		break;
1137 
1138 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1139 
1140 		n = *(int *)data;
1141 
1142 		cuse_server_lock(pcs);
1143 		error = cuse_free_unit_by_id_locked(pcs, n);
1144 		cuse_server_unlock(pcs);
1145 		break;
1146 
1147 	case CUSE_IOCTL_ALLOC_MEMORY:
1148 
1149 		pai = (void *)data;
1150 
1151 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1152 			error = ENOMEM;
1153 			break;
1154 		}
1155 		if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) {
1156 			error = ENOMEM;
1157 			break;
1158 		}
1159 		error = cuse_server_alloc_memory(pcs,
1160 		    pai->alloc_nr, pai->page_count);
1161 		break;
1162 
1163 	case CUSE_IOCTL_FREE_MEMORY:
1164 		pai = (void *)data;
1165 
1166 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1167 			error = ENOMEM;
1168 			break;
1169 		}
1170 		error = cuse_server_free_memory(pcs, pai->alloc_nr);
1171 		break;
1172 
1173 	case CUSE_IOCTL_GET_SIG:
1174 
1175 		cuse_server_lock(pcs);
1176 		pccmd = cuse_server_find_command(pcs, curthread);
1177 
1178 		if (pccmd != NULL) {
1179 			n = pccmd->got_signal;
1180 			pccmd->got_signal = 0;
1181 		} else {
1182 			n = 0;
1183 		}
1184 		cuse_server_unlock(pcs);
1185 
1186 		*(int *)data = n;
1187 
1188 		break;
1189 
1190 	case CUSE_IOCTL_SET_PFH:
1191 
1192 		cuse_server_lock(pcs);
1193 		pccmd = cuse_server_find_command(pcs, curthread);
1194 
1195 		if (pccmd != NULL) {
1196 			pcc = pccmd->client;
1197 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1198 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1199 			}
1200 		} else {
1201 			error = ENXIO;
1202 		}
1203 		cuse_server_unlock(pcs);
1204 		break;
1205 
1206 	case CUSE_IOCTL_CREATE_DEV:
1207 
1208 		error = priv_check(curthread, PRIV_DRIVER);
1209 		if (error)
1210 			break;
1211 
1212 		pcd = (void *)data;
1213 
1214 		/* filter input */
1215 
1216 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1217 
1218 		if (pcd->devname[0] == 0) {
1219 			error = EINVAL;
1220 			break;
1221 		}
1222 		cuse_str_filter(pcd->devname);
1223 
1224 		pcd->permissions &= 0777;
1225 
1226 		/* try to allocate a character device */
1227 
1228 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1229 
1230 		pcsd->server = pcs;
1231 
1232 		pcsd->user_dev = pcd->dev;
1233 
1234 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1235 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1236 		    pcd->permissions, "%s", pcd->devname);
1237 
1238 		if (pcsd->kern_dev == NULL) {
1239 			free(pcsd, M_CUSE);
1240 			error = ENOMEM;
1241 			break;
1242 		}
1243 		pcsd->kern_dev->si_drv1 = pcsd;
1244 
1245 		cuse_server_lock(pcs);
1246 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1247 		cuse_server_unlock(pcs);
1248 
1249 		break;
1250 
1251 	case CUSE_IOCTL_DESTROY_DEV:
1252 
1253 		error = priv_check(curthread, PRIV_DRIVER);
1254 		if (error)
1255 			break;
1256 
1257 		cuse_server_lock(pcs);
1258 
1259 		error = EINVAL;
1260 
1261 		pcsd = TAILQ_FIRST(&pcs->hdev);
1262 		while (pcsd != NULL) {
1263 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1264 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1265 				cuse_server_unlock(pcs);
1266 				cuse_server_free_dev(pcsd);
1267 				cuse_server_lock(pcs);
1268 				error = 0;
1269 				pcsd = TAILQ_FIRST(&pcs->hdev);
1270 			} else {
1271 				pcsd = TAILQ_NEXT(pcsd, entry);
1272 			}
1273 		}
1274 
1275 		cuse_server_unlock(pcs);
1276 		break;
1277 
1278 	case CUSE_IOCTL_WRITE_DATA:
1279 	case CUSE_IOCTL_READ_DATA:
1280 
1281 		cuse_server_lock(pcs);
1282 		pchk = (struct cuse_data_chunk *)data;
1283 
1284 		pccmd = cuse_server_find_command(pcs, curthread);
1285 
1286 		if (pccmd == NULL) {
1287 			error = ENXIO;	/* invalid request */
1288 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1289 			error = EFAULT;	/* NULL pointer */
1290 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1291 			error = cuse_server_ioctl_copy_locked(pcs, pccmd,
1292 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1293 		} else {
1294 			error = cuse_server_data_copy_locked(pcs, pccmd,
1295 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1296 		}
1297 		cuse_server_unlock(pcs);
1298 		break;
1299 
1300 	case CUSE_IOCTL_SELWAKEUP:
1301 		cuse_server_lock(pcs);
1302 		/*
1303 		 * We don't know which direction caused the event.
1304 		 * Wakeup both!
1305 		 */
1306 		cuse_server_wakeup_all_client_locked(pcs);
1307 		cuse_server_unlock(pcs);
1308 		break;
1309 
1310 	default:
1311 		error = ENXIO;
1312 		break;
1313 	}
1314 	return (error);
1315 }
1316 
1317 static int
1318 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1319 {
1320 	return (events & (POLLHUP | POLLPRI | POLLIN |
1321 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1322 }
1323 
1324 static int
1325 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1326     vm_size_t size, struct vm_object **object, int nprot)
1327 {
1328 	uint32_t page_nr = *offset / PAGE_SIZE;
1329 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1330 	struct cuse_memory *mem;
1331 	struct cuse_server *pcs;
1332 	int error;
1333 
1334 	error = cuse_server_get(&pcs);
1335 	if (error != 0)
1336 		return (error);
1337 
1338 	cuse_server_lock(pcs);
1339 	/* lookup memory structure */
1340 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1341 		if (mem->alloc_nr == alloc_nr)
1342 			break;
1343 	}
1344 	if (mem == NULL) {
1345 		cuse_server_unlock(pcs);
1346 		return (ENOMEM);
1347 	}
1348 	/* verify page offset */
1349 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1350 	if (page_nr >= mem->page_count) {
1351 		cuse_server_unlock(pcs);
1352 		return (ENXIO);
1353 	}
1354 	/* verify mmap size */
1355 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1356 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1357 		cuse_server_unlock(pcs);
1358 		return (EINVAL);
1359 	}
1360 	vm_object_reference(mem->object);
1361 	*object = mem->object;
1362 	cuse_server_unlock(pcs);
1363 
1364 	/* set new VM object offset to use */
1365 	*offset = page_nr * PAGE_SIZE;
1366 
1367 	/* success */
1368 	return (0);
1369 }
1370 
1371 /*------------------------------------------------------------------------*
1372  *	CUSE CLIENT PART
1373  *------------------------------------------------------------------------*/
1374 static void
1375 cuse_client_free(void *arg)
1376 {
1377 	struct cuse_client *pcc = arg;
1378 	struct cuse_client_command *pccmd;
1379 	struct cuse_server *pcs;
1380 	int n;
1381 
1382 	pcs = pcc->server;
1383 
1384 	cuse_server_lock(pcs);
1385 	cuse_client_is_closing(pcc);
1386 	TAILQ_REMOVE(&pcs->hcli, pcc, entry);
1387 	cuse_server_unlock(pcs);
1388 
1389 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1390 		pccmd = &pcc->cmds[n];
1391 
1392 		sx_destroy(&pccmd->sx);
1393 		cv_destroy(&pccmd->cv);
1394 	}
1395 
1396 	free(pcc, M_CUSE);
1397 
1398 	/* drop reference on server */
1399 	cuse_server_unref(pcs);
1400 }
1401 
1402 static int
1403 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1404 {
1405 	struct cuse_client_command *pccmd;
1406 	struct cuse_server_dev *pcsd;
1407 	struct cuse_client *pcc;
1408 	struct cuse_server *pcs;
1409 	struct cuse_dev *pcd;
1410 	int error;
1411 	int n;
1412 
1413 	pcsd = dev->si_drv1;
1414 	if (pcsd != NULL) {
1415 		pcs = pcsd->server;
1416 		pcd = pcsd->user_dev;
1417 
1418 		cuse_server_lock(pcs);
1419 		/*
1420 		 * Check that the refcount didn't wrap and that the
1421 		 * same process is not both client and server. This
1422 		 * can easily lead to deadlocks when destroying the
1423 		 * CUSE character device nodes:
1424 		 */
1425 		pcs->refs++;
1426 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1427 			/* overflow or wrong PID */
1428 			pcs->refs--;
1429 			cuse_server_unlock(pcs);
1430 			return (EINVAL);
1431 		}
1432 		cuse_server_unlock(pcs);
1433 	} else {
1434 		return (EINVAL);
1435 	}
1436 
1437 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1438 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1439 		printf("Cuse: Cannot set cdevpriv.\n");
1440 		/* drop reference on server */
1441 		cuse_server_unref(pcs);
1442 		free(pcc, M_CUSE);
1443 		return (ENOMEM);
1444 	}
1445 	pcc->fflags = fflags;
1446 	pcc->server_dev = pcsd;
1447 	pcc->server = pcs;
1448 
1449 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1450 		pccmd = &pcc->cmds[n];
1451 
1452 		pccmd->sub.dev = pcd;
1453 		pccmd->sub.command = n;
1454 		pccmd->client = pcc;
1455 
1456 		sx_init(&pccmd->sx, "cuse-client-sx");
1457 		cv_init(&pccmd->cv, "cuse-client-cv");
1458 	}
1459 
1460 	cuse_server_lock(pcs);
1461 
1462 	/* cuse_client_free() assumes that the client is listed somewhere! */
1463 	/* always enqueue */
1464 
1465 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1466 
1467 	/* check if server is closing */
1468 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1469 		error = EINVAL;
1470 	} else {
1471 		error = 0;
1472 	}
1473 	cuse_server_unlock(pcs);
1474 
1475 	if (error) {
1476 		devfs_clear_cdevpriv();	/* XXX bugfix */
1477 		return (error);
1478 	}
1479 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1480 
1481 	cuse_cmd_lock(pccmd);
1482 
1483 	cuse_server_lock(pcs);
1484 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1485 
1486 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1487 	cuse_server_unlock(pcs);
1488 
1489 	if (error < 0) {
1490 		error = cuse_convert_error(error);
1491 	} else {
1492 		error = 0;
1493 	}
1494 
1495 	cuse_cmd_unlock(pccmd);
1496 
1497 	if (error)
1498 		devfs_clear_cdevpriv();	/* XXX bugfix */
1499 
1500 	return (error);
1501 }
1502 
1503 static int
1504 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1505 {
1506 	struct cuse_client_command *pccmd;
1507 	struct cuse_client *pcc;
1508 	struct cuse_server *pcs;
1509 	int error;
1510 
1511 	error = cuse_client_get(&pcc);
1512 	if (error != 0)
1513 		return (0);
1514 
1515 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1516 	pcs = pcc->server;
1517 
1518 	cuse_cmd_lock(pccmd);
1519 
1520 	cuse_server_lock(pcs);
1521 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1522 
1523 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1524 	cuse_cmd_unlock(pccmd);
1525 
1526 	cuse_client_is_closing(pcc);
1527 	cuse_server_unlock(pcs);
1528 
1529 	return (0);
1530 }
1531 
1532 static void
1533 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1534 {
1535 	struct cuse_server *pcs = pcc->server;
1536 	int temp;
1537 
1538 	cuse_server_lock(pcs);
1539 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1540 	    CUSE_CLI_KNOTE_HAS_WRITE));
1541 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1542 	    CUSE_CLI_KNOTE_NEED_WRITE);
1543 	cuse_server_unlock(pcs);
1544 
1545 	if (temp != 0) {
1546 		/* get the latest polling state from the server */
1547 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1548 
1549 		if (temp & (POLLIN | POLLOUT)) {
1550 			cuse_server_lock(pcs);
1551 			if (temp & POLLIN)
1552 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1553 			if (temp & POLLOUT)
1554 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1555 
1556 			/* make sure the "knote" gets woken up */
1557 			cuse_server_wakeup_locked(pcc->server);
1558 			cuse_server_unlock(pcs);
1559 		}
1560 	}
1561 }
1562 
1563 static int
1564 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1565 {
1566 	struct cuse_client_command *pccmd;
1567 	struct cuse_client *pcc;
1568 	struct cuse_server *pcs;
1569 	int error;
1570 	int len;
1571 
1572 	error = cuse_client_get(&pcc);
1573 	if (error != 0)
1574 		return (error);
1575 
1576 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1577 	pcs = pcc->server;
1578 
1579 	if (uio->uio_segflg != UIO_USERSPACE) {
1580 		return (EINVAL);
1581 	}
1582 	uio->uio_segflg = UIO_NOCOPY;
1583 
1584 	cuse_cmd_lock(pccmd);
1585 
1586 	while (uio->uio_resid != 0) {
1587 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1588 			error = ENOMEM;
1589 			break;
1590 		}
1591 		len = uio->uio_iov->iov_len;
1592 
1593 		cuse_server_lock(pcs);
1594 		cuse_client_send_command_locked(pccmd,
1595 		    (uintptr_t)uio->uio_iov->iov_base,
1596 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1597 
1598 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1599 		cuse_server_unlock(pcs);
1600 
1601 		if (error < 0) {
1602 			error = cuse_convert_error(error);
1603 			break;
1604 		} else if (error == len) {
1605 			error = uiomove(NULL, error, uio);
1606 			if (error)
1607 				break;
1608 		} else {
1609 			error = uiomove(NULL, error, uio);
1610 			break;
1611 		}
1612 	}
1613 	cuse_cmd_unlock(pccmd);
1614 
1615 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1616 
1617 	if (error == EWOULDBLOCK)
1618 		cuse_client_kqfilter_poll(dev, pcc);
1619 
1620 	return (error);
1621 }
1622 
1623 static int
1624 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1625 {
1626 	struct cuse_client_command *pccmd;
1627 	struct cuse_client *pcc;
1628 	struct cuse_server *pcs;
1629 	int error;
1630 	int len;
1631 
1632 	error = cuse_client_get(&pcc);
1633 	if (error != 0)
1634 		return (error);
1635 
1636 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1637 	pcs = pcc->server;
1638 
1639 	if (uio->uio_segflg != UIO_USERSPACE) {
1640 		return (EINVAL);
1641 	}
1642 	uio->uio_segflg = UIO_NOCOPY;
1643 
1644 	cuse_cmd_lock(pccmd);
1645 
1646 	while (uio->uio_resid != 0) {
1647 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1648 			error = ENOMEM;
1649 			break;
1650 		}
1651 		len = uio->uio_iov->iov_len;
1652 
1653 		cuse_server_lock(pcs);
1654 		cuse_client_send_command_locked(pccmd,
1655 		    (uintptr_t)uio->uio_iov->iov_base,
1656 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1657 
1658 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1659 		cuse_server_unlock(pcs);
1660 
1661 		if (error < 0) {
1662 			error = cuse_convert_error(error);
1663 			break;
1664 		} else if (error == len) {
1665 			error = uiomove(NULL, error, uio);
1666 			if (error)
1667 				break;
1668 		} else {
1669 			error = uiomove(NULL, error, uio);
1670 			break;
1671 		}
1672 	}
1673 	cuse_cmd_unlock(pccmd);
1674 
1675 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1676 
1677 	if (error == EWOULDBLOCK)
1678 		cuse_client_kqfilter_poll(dev, pcc);
1679 
1680 	return (error);
1681 }
1682 
1683 int
1684 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1685     caddr_t data, int fflag, struct thread *td)
1686 {
1687 	struct cuse_client_command *pccmd;
1688 	struct cuse_client *pcc;
1689 	struct cuse_server *pcs;
1690 	int error;
1691 	int len;
1692 
1693 	error = cuse_client_get(&pcc);
1694 	if (error != 0)
1695 		return (error);
1696 
1697 	len = IOCPARM_LEN(cmd);
1698 	if (len > CUSE_BUFFER_MAX)
1699 		return (ENOMEM);
1700 
1701 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1702 	pcs = pcc->server;
1703 
1704 	cuse_cmd_lock(pccmd);
1705 
1706 	if (cmd & (IOC_IN | IOC_VOID))
1707 		memcpy(pcc->ioctl_buffer, data, len);
1708 
1709 	/*
1710 	 * When the ioctl-length is zero drivers can pass information
1711 	 * through the data pointer of the ioctl. Make sure this information
1712 	 * is forwarded to the driver.
1713 	 */
1714 
1715 	cuse_server_lock(pcs);
1716 	cuse_client_send_command_locked(pccmd,
1717 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1718 	    (unsigned long)cmd, pcc->fflags,
1719 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1720 
1721 	error = cuse_client_receive_command_locked(pccmd, data, len);
1722 	cuse_server_unlock(pcs);
1723 
1724 	if (error < 0) {
1725 		error = cuse_convert_error(error);
1726 	} else {
1727 		error = 0;
1728 	}
1729 
1730 	if (cmd & IOC_OUT)
1731 		memcpy(data, pcc->ioctl_buffer, len);
1732 
1733 	cuse_cmd_unlock(pccmd);
1734 
1735 	if (error == EWOULDBLOCK)
1736 		cuse_client_kqfilter_poll(dev, pcc);
1737 
1738 	return (error);
1739 }
1740 
1741 static int
1742 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1743 {
1744 	struct cuse_client_command *pccmd;
1745 	struct cuse_client *pcc;
1746 	struct cuse_server *pcs;
1747 	unsigned long temp;
1748 	int error;
1749 	int revents;
1750 
1751 	error = cuse_client_get(&pcc);
1752 	if (error != 0)
1753 		goto pollnval;
1754 
1755 	temp = 0;
1756 	pcs = pcc->server;
1757 
1758 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1759 		temp |= CUSE_POLL_READ;
1760 
1761 	if (events & (POLLOUT | POLLWRNORM))
1762 		temp |= CUSE_POLL_WRITE;
1763 
1764 	if (events & POLLHUP)
1765 		temp |= CUSE_POLL_ERROR;
1766 
1767 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1768 
1769 	cuse_cmd_lock(pccmd);
1770 
1771 	/* Need to selrecord() first to not loose any events. */
1772 	if (temp != 0 && td != NULL)
1773 		selrecord(td, &pcs->selinfo);
1774 
1775 	cuse_server_lock(pcs);
1776 	cuse_client_send_command_locked(pccmd,
1777 	    0, temp, pcc->fflags, IO_NDELAY);
1778 
1779 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1780 	cuse_server_unlock(pcs);
1781 
1782 	cuse_cmd_unlock(pccmd);
1783 
1784 	if (error < 0) {
1785 		goto pollnval;
1786 	} else {
1787 		revents = 0;
1788 		if (error & CUSE_POLL_READ)
1789 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1790 		if (error & CUSE_POLL_WRITE)
1791 			revents |= (events & (POLLOUT | POLLWRNORM));
1792 		if (error & CUSE_POLL_ERROR)
1793 			revents |= (events & POLLHUP);
1794 	}
1795 	return (revents);
1796 
1797 pollnval:
1798 	/* XXX many clients don't understand POLLNVAL */
1799 	return (events & (POLLHUP | POLLPRI | POLLIN |
1800 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1801 }
1802 
1803 static int
1804 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1805     vm_size_t size, struct vm_object **object, int nprot)
1806 {
1807 	uint32_t page_nr = *offset / PAGE_SIZE;
1808 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1809 	struct cuse_memory *mem;
1810 	struct cuse_client *pcc;
1811 	struct cuse_server *pcs;
1812 	int error;
1813 
1814 	error = cuse_client_get(&pcc);
1815 	if (error != 0)
1816 		return (error);
1817 
1818 	pcs = pcc->server;
1819 
1820 	cuse_server_lock(pcs);
1821 	/* lookup memory structure */
1822 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1823 		if (mem->alloc_nr == alloc_nr)
1824 			break;
1825 	}
1826 	if (mem == NULL) {
1827 		cuse_server_unlock(pcs);
1828 		return (ENOMEM);
1829 	}
1830 	/* verify page offset */
1831 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1832 	if (page_nr >= mem->page_count) {
1833 		cuse_server_unlock(pcs);
1834 		return (ENXIO);
1835 	}
1836 	/* verify mmap size */
1837 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1838 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1839 		cuse_server_unlock(pcs);
1840 		return (EINVAL);
1841 	}
1842 	vm_object_reference(mem->object);
1843 	*object = mem->object;
1844 	cuse_server_unlock(pcs);
1845 
1846 	/* set new VM object offset to use */
1847 	*offset = page_nr * PAGE_SIZE;
1848 
1849 	/* success */
1850 	return (0);
1851 }
1852 
1853 static void
1854 cuse_client_kqfilter_read_detach(struct knote *kn)
1855 {
1856 	struct cuse_client *pcc;
1857 	struct cuse_server *pcs;
1858 
1859 	pcc = kn->kn_hook;
1860 	pcs = pcc->server;
1861 
1862 	cuse_server_lock(pcs);
1863 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1864 	cuse_server_unlock(pcs);
1865 }
1866 
1867 static void
1868 cuse_client_kqfilter_write_detach(struct knote *kn)
1869 {
1870 	struct cuse_client *pcc;
1871 	struct cuse_server *pcs;
1872 
1873 	pcc = kn->kn_hook;
1874 	pcs = pcc->server;
1875 
1876 	cuse_server_lock(pcs);
1877 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1878 	cuse_server_unlock(pcs);
1879 }
1880 
1881 static int
1882 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1883 {
1884 	struct cuse_client *pcc;
1885 
1886 	pcc = kn->kn_hook;
1887 
1888 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1889 
1890 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1891 }
1892 
1893 static int
1894 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1895 {
1896 	struct cuse_client *pcc;
1897 
1898 	pcc = kn->kn_hook;
1899 
1900 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1901 
1902 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1903 }
1904 
1905 static int
1906 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1907 {
1908 	struct cuse_client *pcc;
1909 	struct cuse_server *pcs;
1910 	int error;
1911 
1912 	error = cuse_client_get(&pcc);
1913 	if (error != 0)
1914 		return (error);
1915 
1916 	pcs = pcc->server;
1917 
1918 	cuse_server_lock(pcs);
1919 	switch (kn->kn_filter) {
1920 	case EVFILT_READ:
1921 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1922 		kn->kn_hook = pcc;
1923 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1924 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1925 		break;
1926 	case EVFILT_WRITE:
1927 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1928 		kn->kn_hook = pcc;
1929 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1930 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1931 		break;
1932 	default:
1933 		error = EINVAL;
1934 		break;
1935 	}
1936 	cuse_server_unlock(pcs);
1937 
1938 	if (error == 0)
1939 		cuse_client_kqfilter_poll(dev, pcc);
1940 	return (error);
1941 }
1942