xref: /freebsd/sys/fs/cuse/cuse.c (revision 51015e6d0f570239b0c2088dc6cf2b018928375d)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2022 Hans Petter Selasky
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/stdint.h>
28 #include <sys/stddef.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/linker_set.h>
36 #include <sys/module.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/condvar.h>
40 #include <sys/sysctl.h>
41 #include <sys/unistd.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/uio.h>
45 #include <sys/poll.h>
46 #include <sys/sx.h>
47 #include <sys/rwlock.h>
48 #include <sys/queue.h>
49 #include <sys/fcntl.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/selinfo.h>
53 #include <sys/ptrace.h>
54 #include <sys/sysent.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63 
64 #include <fs/cuse/cuse_defs.h>
65 #include <fs/cuse/cuse_ioctl.h>
66 
67 /* set this define to zero to disable this feature */
68 #define	CUSE_COPY_BUFFER_MAX \
69 	CUSE_BUFFER_MAX
70 
71 #define	CUSE_ALLOC_PAGES_MAX \
72 	(CUSE_ALLOC_BYTES_MAX / PAGE_SIZE)
73 
74 #if (CUSE_ALLOC_PAGES_MAX == 0)
75 #error "PAGE_SIZE is too big!"
76 #endif
77 
78 static int
79 cuse_modevent(module_t mod, int type, void *data)
80 {
81 	switch (type) {
82 	case MOD_LOAD:
83 	case MOD_UNLOAD:
84 		return (0);
85 	default:
86 		return (EOPNOTSUPP);
87 	}
88 }
89 
90 static moduledata_t cuse_mod = {
91 	.name = "cuse",
92 	.evhand = &cuse_modevent,
93 };
94 
95 DECLARE_MODULE(cuse, cuse_mod, SI_SUB_DEVFS, SI_ORDER_FIRST);
96 MODULE_VERSION(cuse, 1);
97 
98 /*
99  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
100  * declaring support for the cuse4bsd interface in cuse.ko:
101  */
102 MODULE_VERSION(cuse4bsd, 1);
103 
104 #ifdef FEATURE
105 FEATURE(cuse, "Userspace character devices");
106 #endif
107 
108 struct cuse_command;
109 struct cuse_server;
110 struct cuse_client;
111 
112 struct cuse_client_command {
113 	TAILQ_ENTRY(cuse_client_command) entry;
114 	struct cuse_command sub;
115 	struct sx sx;
116 	struct cv cv;
117 	struct thread *entered;
118 	struct cuse_client *client;
119 	struct proc *proc_curr;
120 	int	proc_refs;
121 	int	got_signal;
122 	int	error;
123 	int	command;
124 };
125 
126 struct cuse_memory {
127 	TAILQ_ENTRY(cuse_memory) entry;
128 	vm_object_t object;
129 	uint32_t page_count;
130 	uint32_t alloc_nr;
131 };
132 
133 struct cuse_server_dev {
134 	TAILQ_ENTRY(cuse_server_dev) entry;
135 	struct cuse_server *server;
136 	struct cdev *kern_dev;
137 	struct cuse_dev *user_dev;
138 };
139 
140 struct cuse_server {
141 	TAILQ_ENTRY(cuse_server) entry;
142 	TAILQ_HEAD(, cuse_client_command) head;
143 	TAILQ_HEAD(, cuse_server_dev) hdev;
144 	TAILQ_HEAD(, cuse_client) hcli;
145 	TAILQ_HEAD(, cuse_memory) hmem;
146 	struct mtx mtx;
147 	struct cv cv;
148 	struct selinfo selinfo;
149 	pid_t	pid;
150 	int	is_closing;
151 	int	refs;
152 };
153 
154 struct cuse_client {
155 	TAILQ_ENTRY(cuse_client) entry;
156 	TAILQ_ENTRY(cuse_client) entry_ref;
157 	struct cuse_client_command cmds[CUSE_CMD_MAX];
158 	struct cuse_server *server;
159 	struct cuse_server_dev *server_dev;
160 
161 	uintptr_t read_base;
162 	uintptr_t write_base;
163 	int read_length;
164 	int write_length;
165 	uint8_t	read_buffer[CUSE_COPY_BUFFER_MAX] __aligned(4);
166 	uint8_t	write_buffer[CUSE_COPY_BUFFER_MAX] __aligned(4);
167 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
168 
169 	int	fflags;			/* file flags */
170 	int	cflags;			/* client flags */
171 #define	CUSE_CLI_IS_CLOSING 0x01
172 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
173 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
174 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
175 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
176 };
177 
178 #define	CUSE_CLIENT_CLOSING(pcc) \
179     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
180 
181 static	MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
182 
183 static TAILQ_HEAD(, cuse_server) cuse_server_head;
184 static struct mtx cuse_global_mtx;
185 static struct cdev *cuse_dev;
186 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
187 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
188 
189 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
190 static void cuse_client_kqfilter_read_detach(struct knote *kn);
191 static void cuse_client_kqfilter_write_detach(struct knote *kn);
192 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
193 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
194 
195 static struct filterops cuse_client_kqfilter_read_ops = {
196 	.f_isfd = 1,
197 	.f_detach = cuse_client_kqfilter_read_detach,
198 	.f_event = cuse_client_kqfilter_read_event,
199 };
200 
201 static struct filterops cuse_client_kqfilter_write_ops = {
202 	.f_isfd = 1,
203 	.f_detach = cuse_client_kqfilter_write_detach,
204 	.f_event = cuse_client_kqfilter_write_event,
205 };
206 
207 static d_open_t cuse_client_open;
208 static d_close_t cuse_client_close;
209 static d_ioctl_t cuse_client_ioctl;
210 static d_read_t cuse_client_read;
211 static d_write_t cuse_client_write;
212 static d_poll_t cuse_client_poll;
213 static d_mmap_single_t cuse_client_mmap_single;
214 static d_kqfilter_t cuse_client_kqfilter;
215 
216 static struct cdevsw cuse_client_devsw = {
217 	.d_version = D_VERSION,
218 	.d_open = cuse_client_open,
219 	.d_close = cuse_client_close,
220 	.d_ioctl = cuse_client_ioctl,
221 	.d_name = "cuse_client",
222 	.d_flags = D_TRACKCLOSE,
223 	.d_read = cuse_client_read,
224 	.d_write = cuse_client_write,
225 	.d_poll = cuse_client_poll,
226 	.d_mmap_single = cuse_client_mmap_single,
227 	.d_kqfilter = cuse_client_kqfilter,
228 };
229 
230 static d_open_t cuse_server_open;
231 static d_close_t cuse_server_close;
232 static d_ioctl_t cuse_server_ioctl;
233 static d_read_t cuse_server_read;
234 static d_write_t cuse_server_write;
235 static d_poll_t cuse_server_poll;
236 static d_mmap_single_t cuse_server_mmap_single;
237 
238 static struct cdevsw cuse_server_devsw = {
239 	.d_version = D_VERSION,
240 	.d_open = cuse_server_open,
241 	.d_close = cuse_server_close,
242 	.d_ioctl = cuse_server_ioctl,
243 	.d_name = "cuse_server",
244 	.d_flags = D_TRACKCLOSE,
245 	.d_read = cuse_server_read,
246 	.d_write = cuse_server_write,
247 	.d_poll = cuse_server_poll,
248 	.d_mmap_single = cuse_server_mmap_single,
249 };
250 
251 static void cuse_client_is_closing(struct cuse_client *);
252 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
253 
254 static void
255 cuse_global_lock(void)
256 {
257 	mtx_lock(&cuse_global_mtx);
258 }
259 
260 static void
261 cuse_global_unlock(void)
262 {
263 	mtx_unlock(&cuse_global_mtx);
264 }
265 
266 static void
267 cuse_server_lock(struct cuse_server *pcs)
268 {
269 	mtx_lock(&pcs->mtx);
270 }
271 
272 static void
273 cuse_server_unlock(struct cuse_server *pcs)
274 {
275 	mtx_unlock(&pcs->mtx);
276 }
277 
278 static bool
279 cuse_server_is_locked(struct cuse_server *pcs)
280 {
281 	return (mtx_owned(&pcs->mtx));
282 }
283 
284 static void
285 cuse_cmd_lock(struct cuse_client_command *pccmd)
286 {
287 	sx_xlock(&pccmd->sx);
288 }
289 
290 static void
291 cuse_cmd_unlock(struct cuse_client_command *pccmd)
292 {
293 	sx_xunlock(&pccmd->sx);
294 }
295 
296 static void
297 cuse_kern_init(void *arg)
298 {
299 	TAILQ_INIT(&cuse_server_head);
300 
301 	mtx_init(&cuse_global_mtx, "cuse-global-mtx", NULL, MTX_DEF);
302 
303 	cuse_dev = make_dev(&cuse_server_devsw, 0,
304 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
305 
306 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
307 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
308 	    (CUSE_VERSION >> 0) & 0xFF);
309 }
310 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
311 
312 static void
313 cuse_kern_uninit(void *arg)
314 {
315 	void *ptr;
316 
317 	while (1) {
318 		printf("Cuse: Please exit all /dev/cuse instances "
319 		    "and processes which have used this device.\n");
320 
321 		pause("DRAIN", 2 * hz);
322 
323 		cuse_global_lock();
324 		ptr = TAILQ_FIRST(&cuse_server_head);
325 		cuse_global_unlock();
326 
327 		if (ptr == NULL)
328 			break;
329 	}
330 
331 	if (cuse_dev != NULL)
332 		destroy_dev(cuse_dev);
333 
334 	mtx_destroy(&cuse_global_mtx);
335 }
336 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
337 
338 static int
339 cuse_server_get(struct cuse_server **ppcs)
340 {
341 	struct cuse_server *pcs;
342 	int error;
343 
344 	error = devfs_get_cdevpriv((void **)&pcs);
345 	if (error != 0) {
346 		*ppcs = NULL;
347 		return (error);
348 	}
349 	if (pcs->is_closing) {
350 		*ppcs = NULL;
351 		return (EINVAL);
352 	}
353 	*ppcs = pcs;
354 	return (0);
355 }
356 
357 static void
358 cuse_server_is_closing(struct cuse_server *pcs)
359 {
360 	struct cuse_client *pcc;
361 
362 	if (pcs->is_closing)
363 		return;
364 
365 	pcs->is_closing = 1;
366 
367 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
368 		cuse_client_is_closing(pcc);
369 	}
370 }
371 
372 static struct cuse_client_command *
373 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
374 {
375 	struct cuse_client *pcc;
376 	int n;
377 
378 	if (pcs->is_closing)
379 		goto done;
380 
381 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
382 		if (CUSE_CLIENT_CLOSING(pcc))
383 			continue;
384 		for (n = 0; n != CUSE_CMD_MAX; n++) {
385 			if (pcc->cmds[n].entered == td)
386 				return (&pcc->cmds[n]);
387 		}
388 	}
389 done:
390 	return (NULL);
391 }
392 
393 static void
394 cuse_str_filter(char *ptr)
395 {
396 	int c;
397 
398 	while (((c = *ptr) != 0)) {
399 		if ((c >= 'a') && (c <= 'z')) {
400 			ptr++;
401 			continue;
402 		}
403 		if ((c >= 'A') && (c <= 'Z')) {
404 			ptr++;
405 			continue;
406 		}
407 		if ((c >= '0') && (c <= '9')) {
408 			ptr++;
409 			continue;
410 		}
411 		if ((c == '.') || (c == '_') || (c == '/')) {
412 			ptr++;
413 			continue;
414 		}
415 		*ptr = '_';
416 
417 		ptr++;
418 	}
419 }
420 
421 static int
422 cuse_convert_error(int error)
423 {
424 	;				/* indent fix */
425 	switch (error) {
426 	case CUSE_ERR_NONE:
427 		return (0);
428 	case CUSE_ERR_BUSY:
429 		return (EBUSY);
430 	case CUSE_ERR_WOULDBLOCK:
431 		return (EWOULDBLOCK);
432 	case CUSE_ERR_INVALID:
433 		return (EINVAL);
434 	case CUSE_ERR_NO_MEMORY:
435 		return (ENOMEM);
436 	case CUSE_ERR_FAULT:
437 		return (EFAULT);
438 	case CUSE_ERR_SIGNAL:
439 		return (EINTR);
440 	case CUSE_ERR_NO_DEVICE:
441 		return (ENODEV);
442 	default:
443 		return (ENXIO);
444 	}
445 }
446 
447 static void
448 cuse_vm_memory_free(struct cuse_memory *mem)
449 {
450 	/* last user is gone - free */
451 	vm_object_deallocate(mem->object);
452 
453 	/* free CUSE memory */
454 	free(mem, M_CUSE);
455 }
456 
457 static int
458 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
459     uint32_t page_count)
460 {
461 	struct cuse_memory *temp;
462 	struct cuse_memory *mem;
463 	vm_object_t object;
464 	int error;
465 
466 	mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
467 
468 	object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
469 	    VM_PROT_DEFAULT, 0, curthread->td_ucred);
470 	if (object == NULL) {
471 		error = ENOMEM;
472 		goto error_0;
473 	}
474 
475 	cuse_server_lock(pcs);
476 	/* check if allocation number already exists */
477 	TAILQ_FOREACH(temp, &pcs->hmem, entry) {
478 		if (temp->alloc_nr == alloc_nr)
479 			break;
480 	}
481 	if (temp != NULL) {
482 		cuse_server_unlock(pcs);
483 		error = EBUSY;
484 		goto error_1;
485 	}
486 	mem->object = object;
487 	mem->page_count = page_count;
488 	mem->alloc_nr = alloc_nr;
489 	TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
490 	cuse_server_unlock(pcs);
491 
492 	return (0);
493 
494 error_1:
495 	vm_object_deallocate(object);
496 error_0:
497 	free(mem, M_CUSE);
498 	return (error);
499 }
500 
501 static int
502 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
503 {
504 	struct cuse_memory *mem;
505 
506 	cuse_server_lock(pcs);
507 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
508 		if (mem->alloc_nr == alloc_nr)
509 			break;
510 	}
511 	if (mem == NULL) {
512 		cuse_server_unlock(pcs);
513 		return (EINVAL);
514 	}
515 	TAILQ_REMOVE(&pcs->hmem, mem, entry);
516 	cuse_server_unlock(pcs);
517 
518 	cuse_vm_memory_free(mem);
519 
520 	return (0);
521 }
522 
523 static int
524 cuse_client_get(struct cuse_client **ppcc)
525 {
526 	struct cuse_client *pcc;
527 	int error;
528 
529 	/* try to get private data */
530 	error = devfs_get_cdevpriv((void **)&pcc);
531 	if (error != 0) {
532 		*ppcc = NULL;
533 		return (error);
534 	}
535 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
536 		*ppcc = NULL;
537 		return (EINVAL);
538 	}
539 	*ppcc = pcc;
540 	return (0);
541 }
542 
543 static void
544 cuse_client_is_closing(struct cuse_client *pcc)
545 {
546 	struct cuse_client_command *pccmd;
547 	uint32_t n;
548 
549 	if (CUSE_CLIENT_CLOSING(pcc))
550 		return;
551 
552 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
553 	pcc->server_dev = NULL;
554 
555 	for (n = 0; n != CUSE_CMD_MAX; n++) {
556 		pccmd = &pcc->cmds[n];
557 
558 		if (pccmd->entry.tqe_prev != NULL) {
559 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
560 			pccmd->entry.tqe_prev = NULL;
561 		}
562 		cv_broadcast(&pccmd->cv);
563 	}
564 }
565 
566 static void
567 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
568     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
569 {
570 	unsigned long cuse_fflags = 0;
571 	struct cuse_server *pcs;
572 
573 	if (fflags & FREAD)
574 		cuse_fflags |= CUSE_FFLAG_READ;
575 
576 	if (fflags & FWRITE)
577 		cuse_fflags |= CUSE_FFLAG_WRITE;
578 
579 	if (ioflag & IO_NDELAY)
580 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
581 #if defined(__LP64__)
582 	if (SV_CURPROC_FLAG(SV_ILP32))
583 		cuse_fflags |= CUSE_FFLAG_COMPAT32;
584 #endif
585 	pccmd->sub.fflags = cuse_fflags;
586 	pccmd->sub.data_pointer = data_ptr;
587 	pccmd->sub.argument = arg;
588 
589 	pcs = pccmd->client->server;
590 
591 	if ((pccmd->entry.tqe_prev == NULL) &&
592 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
593 	    (pcs->is_closing == 0)) {
594 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
595 		cv_signal(&pcs->cv);
596 	}
597 }
598 
599 static void
600 cuse_client_got_signal(struct cuse_client_command *pccmd)
601 {
602 	struct cuse_server *pcs;
603 
604 	pccmd->got_signal = 1;
605 
606 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
607 
608 	pcs = pccmd->client->server;
609 
610 	if ((pccmd->entry.tqe_prev == NULL) &&
611 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
612 	    (pcs->is_closing == 0)) {
613 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
614 		cv_signal(&pcs->cv);
615 	}
616 }
617 
618 static int
619 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
620     uint8_t *arg_ptr, uint32_t arg_len)
621 {
622 	struct cuse_server *pcs;
623 	int error;
624 
625 	pcs = pccmd->client->server;
626 	error = 0;
627 
628 	pccmd->proc_curr = curthread->td_proc;
629 
630 	if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
631 		error = CUSE_ERR_OTHER;
632 		goto done;
633 	}
634 	while (pccmd->command == CUSE_CMD_NONE) {
635 		if (error != 0) {
636 			cv_wait(&pccmd->cv, &pcs->mtx);
637 		} else {
638 			error = cv_wait_sig(&pccmd->cv, &pcs->mtx);
639 
640 			if (error != 0)
641 				cuse_client_got_signal(pccmd);
642 		}
643 		if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
644 			error = CUSE_ERR_OTHER;
645 			goto done;
646 		}
647 	}
648 
649 	error = pccmd->error;
650 	pccmd->command = CUSE_CMD_NONE;
651 	cv_signal(&pccmd->cv);
652 
653 done:
654 
655 	/* wait until all process references are gone */
656 
657 	pccmd->proc_curr = NULL;
658 
659 	while (pccmd->proc_refs != 0)
660 		cv_wait(&pccmd->cv, &pcs->mtx);
661 
662 	return (error);
663 }
664 
665 /*------------------------------------------------------------------------*
666  *	CUSE SERVER PART
667  *------------------------------------------------------------------------*/
668 
669 static void
670 cuse_server_free_dev(struct cuse_server_dev *pcsd)
671 {
672 	struct cuse_server *pcs;
673 	struct cuse_client *pcc;
674 
675 	/* get server pointer */
676 	pcs = pcsd->server;
677 
678 	/* prevent creation of more devices */
679 	cuse_server_lock(pcs);
680 	if (pcsd->kern_dev != NULL)
681 		pcsd->kern_dev->si_drv1 = NULL;
682 
683 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
684 		if (pcc->server_dev == pcsd)
685 			cuse_client_is_closing(pcc);
686 	}
687 	cuse_server_unlock(pcs);
688 
689 	/* destroy device, if any */
690 	if (pcsd->kern_dev != NULL) {
691 		/* destroy device synchronously */
692 		destroy_dev(pcsd->kern_dev);
693 	}
694 	free(pcsd, M_CUSE);
695 }
696 
697 static void
698 cuse_server_unref(struct cuse_server *pcs)
699 {
700 	struct cuse_server_dev *pcsd;
701 	struct cuse_memory *mem;
702 
703 	cuse_server_lock(pcs);
704 	if (--(pcs->refs) != 0) {
705 		cuse_server_unlock(pcs);
706 		return;
707 	}
708 	cuse_server_is_closing(pcs);
709 	/* final client wakeup, if any */
710 	cuse_server_wakeup_all_client_locked(pcs);
711 
712 	cuse_global_lock();
713 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
714 	cuse_global_unlock();
715 
716 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
717 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
718 		cuse_server_unlock(pcs);
719 		cuse_server_free_dev(pcsd);
720 		cuse_server_lock(pcs);
721 	}
722 
723 	cuse_free_unit_by_id_locked(pcs, -1);
724 
725 	while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
726 		TAILQ_REMOVE(&pcs->hmem, mem, entry);
727 		cuse_server_unlock(pcs);
728 		cuse_vm_memory_free(mem);
729 		cuse_server_lock(pcs);
730 	}
731 
732 	knlist_clear(&pcs->selinfo.si_note, 1);
733 	knlist_destroy(&pcs->selinfo.si_note);
734 
735 	cuse_server_unlock(pcs);
736 
737 	seldrain(&pcs->selinfo);
738 
739 	cv_destroy(&pcs->cv);
740 
741 	mtx_destroy(&pcs->mtx);
742 
743 	free(pcs, M_CUSE);
744 }
745 
746 static int
747 cuse_server_do_close(struct cuse_server *pcs)
748 {
749 	int retval;
750 
751 	cuse_server_lock(pcs);
752 	cuse_server_is_closing(pcs);
753 	/* final client wakeup, if any */
754 	cuse_server_wakeup_all_client_locked(pcs);
755 
756 	knlist_clear(&pcs->selinfo.si_note, 1);
757 
758 	retval = pcs->refs;
759 	cuse_server_unlock(pcs);
760 
761 	return (retval);
762 }
763 
764 static void
765 cuse_server_free(void *arg)
766 {
767 	struct cuse_server *pcs = arg;
768 
769 	/*
770 	 * The final server unref should be done by the server thread
771 	 * to prevent deadlock in the client cdevpriv destructor,
772 	 * which cannot destroy itself.
773 	 */
774 	while (cuse_server_do_close(pcs) != 1)
775 		pause("W", hz);
776 
777 	/* drop final refcount */
778 	cuse_server_unref(pcs);
779 }
780 
781 static int
782 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
783 {
784 	struct cuse_server *pcs;
785 
786 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
787 
788 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
789 		printf("Cuse: Cannot set cdevpriv.\n");
790 		free(pcs, M_CUSE);
791 		return (ENOMEM);
792 	}
793 	/* store current process ID */
794 	pcs->pid = curproc->p_pid;
795 
796 	TAILQ_INIT(&pcs->head);
797 	TAILQ_INIT(&pcs->hdev);
798 	TAILQ_INIT(&pcs->hcli);
799 	TAILQ_INIT(&pcs->hmem);
800 
801 	cv_init(&pcs->cv, "cuse-server-cv");
802 
803 	mtx_init(&pcs->mtx, "cuse-server-mtx", NULL, MTX_DEF);
804 
805 	knlist_init_mtx(&pcs->selinfo.si_note, &pcs->mtx);
806 
807 	cuse_global_lock();
808 	pcs->refs++;
809 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
810 	cuse_global_unlock();
811 
812 	return (0);
813 }
814 
815 static int
816 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
817 {
818 	struct cuse_server *pcs;
819 
820 	if (cuse_server_get(&pcs) == 0)
821 		cuse_server_do_close(pcs);
822 
823 	return (0);
824 }
825 
826 static int
827 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
828 {
829 	return (ENXIO);
830 }
831 
832 static int
833 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
834 {
835 	return (ENXIO);
836 }
837 
838 static int
839 cuse_server_ioctl_copy_locked(struct cuse_server *pcs,
840     struct cuse_client_command *pccmd,
841     struct cuse_data_chunk *pchk, bool isread)
842 {
843 	struct proc *p_proc;
844 	uint32_t offset;
845 	int error;
846 
847 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
848 
849 	if (pchk->length > CUSE_BUFFER_MAX)
850 		return (EFAULT);
851 
852 	if (offset >= CUSE_BUFFER_MAX)
853 		return (EFAULT);
854 
855 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
856 		return (EFAULT);
857 
858 	p_proc = pccmd->proc_curr;
859 	if (p_proc == NULL)
860 		return (ENXIO);
861 
862 	if (pccmd->proc_refs < 0)
863 		return (ENOMEM);
864 
865 	pccmd->proc_refs++;
866 
867 	cuse_server_unlock(pcs);
868 
869 	if (!isread) {
870 		error = copyin(
871 		    (void *)pchk->local_ptr,
872 		    pccmd->client->ioctl_buffer + offset,
873 		    pchk->length);
874 	} else {
875 		error = copyout(
876 		    pccmd->client->ioctl_buffer + offset,
877 		    (void *)pchk->local_ptr,
878 		    pchk->length);
879 	}
880 
881 	cuse_server_lock(pcs);
882 
883 	pccmd->proc_refs--;
884 
885 	if (pccmd->proc_curr == NULL)
886 		cv_signal(&pccmd->cv);
887 
888 	return (error);
889 }
890 
891 static int
892 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
893     struct proc *proc_d, vm_offset_t data_d, size_t len)
894 {
895 	struct thread *td;
896 	struct proc *proc_cur;
897 	int error;
898 
899 	td = curthread;
900 	proc_cur = td->td_proc;
901 
902 	if (proc_cur == proc_d) {
903 		struct iovec iov = {
904 			.iov_base = (caddr_t)data_d,
905 			.iov_len = len,
906 		};
907 		struct uio uio = {
908 			.uio_iov = &iov,
909 			.uio_iovcnt = 1,
910 			.uio_offset = (off_t)data_s,
911 			.uio_resid = len,
912 			.uio_segflg = UIO_USERSPACE,
913 			.uio_rw = UIO_READ,
914 			.uio_td = td,
915 		};
916 
917 		PHOLD(proc_s);
918 		error = proc_rwmem(proc_s, &uio);
919 		PRELE(proc_s);
920 
921 	} else if (proc_cur == proc_s) {
922 		struct iovec iov = {
923 			.iov_base = (caddr_t)data_s,
924 			.iov_len = len,
925 		};
926 		struct uio uio = {
927 			.uio_iov = &iov,
928 			.uio_iovcnt = 1,
929 			.uio_offset = (off_t)data_d,
930 			.uio_resid = len,
931 			.uio_segflg = UIO_USERSPACE,
932 			.uio_rw = UIO_WRITE,
933 			.uio_td = td,
934 		};
935 
936 		PHOLD(proc_d);
937 		error = proc_rwmem(proc_d, &uio);
938 		PRELE(proc_d);
939 	} else {
940 		error = EINVAL;
941 	}
942 	return (error);
943 }
944 
945 static int
946 cuse_server_data_copy_locked(struct cuse_server *pcs,
947     struct cuse_client_command *pccmd,
948     struct cuse_data_chunk *pchk, bool isread)
949 {
950 	struct proc *p_proc;
951 	int error;
952 
953 	p_proc = pccmd->proc_curr;
954 	if (p_proc == NULL)
955 		return (ENXIO);
956 
957 	if (pccmd->proc_refs < 0)
958 		return (ENOMEM);
959 
960 	pccmd->proc_refs++;
961 
962 	cuse_server_unlock(pcs);
963 
964 	if (!isread) {
965 		error = cuse_proc2proc_copy(
966 		    curthread->td_proc, pchk->local_ptr,
967 		    p_proc, pchk->peer_ptr,
968 		    pchk->length);
969 	} else {
970 		error = cuse_proc2proc_copy(
971 		    p_proc, pchk->peer_ptr,
972 		    curthread->td_proc, pchk->local_ptr,
973 		    pchk->length);
974 	}
975 
976 	cuse_server_lock(pcs);
977 
978 	pccmd->proc_refs--;
979 
980 	if (pccmd->proc_curr == NULL)
981 		cv_signal(&pccmd->cv);
982 
983 	return (error);
984 }
985 
986 static int
987 cuse_server_data_copy_optimized_locked(struct cuse_server *pcs,
988     struct cuse_client_command *pccmd,
989     struct cuse_data_chunk *pchk, bool isread)
990 {
991 	uintptr_t offset;
992 	int error;
993 
994 	/*
995 	 * Check if data is stored locally to avoid accessing
996 	 * other process's data space:
997 	 */
998 	if (isread) {
999 		offset = pchk->peer_ptr - pccmd->client->write_base;
1000 
1001 		if (offset < (uintptr_t)pccmd->client->write_length &&
1002 		    pchk->length <= (unsigned long)pccmd->client->write_length &&
1003 		    offset + pchk->length <= (uintptr_t)pccmd->client->write_length) {
1004 			cuse_server_unlock(pcs);
1005 			error = copyout(pccmd->client->write_buffer + offset,
1006 			    (void *)pchk->local_ptr, pchk->length);
1007 			goto done;
1008 		}
1009 	} else {
1010 		offset = pchk->peer_ptr - pccmd->client->read_base;
1011 
1012 		if (offset < (uintptr_t)pccmd->client->read_length &&
1013 		    pchk->length <= (unsigned long)pccmd->client->read_length &&
1014 		    offset + pchk->length <= (uintptr_t)pccmd->client->read_length) {
1015 			cuse_server_unlock(pcs);
1016 			error = copyin((void *)pchk->local_ptr,
1017 			    pccmd->client->read_buffer + offset, pchk->length);
1018 			goto done;
1019 		}
1020 	}
1021 
1022 	/* use process to process copy function */
1023 	error = cuse_server_data_copy_locked(pcs, pccmd, pchk, isread);
1024 done:
1025 	return (error);
1026 }
1027 
1028 static int
1029 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
1030 {
1031 	int n;
1032 	int x = 0;
1033 	int match;
1034 
1035 	do {
1036 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
1037 			if (cuse_alloc_unit[n] != NULL) {
1038 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
1039 					continue;
1040 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
1041 					x++;
1042 					match = 1;
1043 				}
1044 			}
1045 		}
1046 	} while (match);
1047 
1048 	if (x < 256) {
1049 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
1050 			if (cuse_alloc_unit[n] == NULL) {
1051 				cuse_alloc_unit[n] = pcs;
1052 				cuse_alloc_unit_id[n] = id | x;
1053 				return (x);
1054 			}
1055 		}
1056 	}
1057 	return (-1);
1058 }
1059 
1060 static void
1061 cuse_server_wakeup_locked(struct cuse_server *pcs)
1062 {
1063 	selwakeup(&pcs->selinfo);
1064 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
1065 }
1066 
1067 static void
1068 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
1069 {
1070 	struct cuse_client *pcc;
1071 
1072 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
1073 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
1074 		    CUSE_CLI_KNOTE_NEED_WRITE);
1075 	}
1076 	cuse_server_wakeup_locked(pcs);
1077 }
1078 
1079 static int
1080 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
1081 {
1082 	int n;
1083 	int found = 0;
1084 
1085 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
1086 		if (cuse_alloc_unit[n] == pcs) {
1087 			if (cuse_alloc_unit_id[n] == id || id == -1) {
1088 				cuse_alloc_unit[n] = NULL;
1089 				cuse_alloc_unit_id[n] = 0;
1090 				found = 1;
1091 			}
1092 		}
1093 	}
1094 
1095 	return (found ? 0 : EINVAL);
1096 }
1097 
1098 static int
1099 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
1100     caddr_t data, int fflag, struct thread *td)
1101 {
1102 	struct cuse_server *pcs;
1103 	int error;
1104 
1105 	error = cuse_server_get(&pcs);
1106 	if (error != 0)
1107 		return (error);
1108 
1109 	switch (cmd) {
1110 		struct cuse_client_command *pccmd;
1111 		struct cuse_client *pcc;
1112 		struct cuse_command *pcmd;
1113 		struct cuse_alloc_info *pai;
1114 		struct cuse_create_dev *pcd;
1115 		struct cuse_server_dev *pcsd;
1116 		struct cuse_data_chunk *pchk;
1117 		int n;
1118 
1119 	case CUSE_IOCTL_GET_COMMAND:
1120 		pcmd = (void *)data;
1121 
1122 		cuse_server_lock(pcs);
1123 
1124 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1125 			error = cv_wait_sig(&pcs->cv, &pcs->mtx);
1126 
1127 			if (pcs->is_closing)
1128 				error = ENXIO;
1129 
1130 			if (error) {
1131 				cuse_server_unlock(pcs);
1132 				return (error);
1133 			}
1134 		}
1135 
1136 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1137 		pccmd->entry.tqe_prev = NULL;
1138 
1139 		pccmd->entered = curthread;
1140 
1141 		*pcmd = pccmd->sub;
1142 
1143 		cuse_server_unlock(pcs);
1144 
1145 		break;
1146 
1147 	case CUSE_IOCTL_SYNC_COMMAND:
1148 
1149 		cuse_server_lock(pcs);
1150 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1151 			/* send sync command */
1152 			pccmd->entered = NULL;
1153 			pccmd->error = *(int *)data;
1154 			pccmd->command = CUSE_CMD_SYNC;
1155 
1156 			/* signal peer, if any */
1157 			cv_signal(&pccmd->cv);
1158 		}
1159 		cuse_server_unlock(pcs);
1160 
1161 		break;
1162 
1163 	case CUSE_IOCTL_ALLOC_UNIT:
1164 
1165 		cuse_server_lock(pcs);
1166 		n = cuse_alloc_unit_by_id_locked(pcs,
1167 		    CUSE_ID_DEFAULT(0));
1168 		cuse_server_unlock(pcs);
1169 
1170 		if (n < 0)
1171 			error = ENOMEM;
1172 		else
1173 			*(int *)data = n;
1174 		break;
1175 
1176 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1177 
1178 		n = *(int *)data;
1179 
1180 		n = (n & CUSE_ID_MASK);
1181 
1182 		cuse_server_lock(pcs);
1183 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1184 		cuse_server_unlock(pcs);
1185 
1186 		if (n < 0)
1187 			error = ENOMEM;
1188 		else
1189 			*(int *)data = n;
1190 		break;
1191 
1192 	case CUSE_IOCTL_FREE_UNIT:
1193 
1194 		n = *(int *)data;
1195 
1196 		n = CUSE_ID_DEFAULT(n);
1197 
1198 		cuse_server_lock(pcs);
1199 		error = cuse_free_unit_by_id_locked(pcs, n);
1200 		cuse_server_unlock(pcs);
1201 		break;
1202 
1203 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1204 
1205 		n = *(int *)data;
1206 
1207 		cuse_server_lock(pcs);
1208 		error = cuse_free_unit_by_id_locked(pcs, n);
1209 		cuse_server_unlock(pcs);
1210 		break;
1211 
1212 	case CUSE_IOCTL_ALLOC_MEMORY:
1213 
1214 		pai = (void *)data;
1215 
1216 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1217 			error = ENOMEM;
1218 			break;
1219 		}
1220 		if (pai->page_count > CUSE_ALLOC_PAGES_MAX) {
1221 			error = ENOMEM;
1222 			break;
1223 		}
1224 		error = cuse_server_alloc_memory(pcs,
1225 		    pai->alloc_nr, pai->page_count);
1226 		break;
1227 
1228 	case CUSE_IOCTL_FREE_MEMORY:
1229 		pai = (void *)data;
1230 
1231 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1232 			error = ENOMEM;
1233 			break;
1234 		}
1235 		error = cuse_server_free_memory(pcs, pai->alloc_nr);
1236 		break;
1237 
1238 	case CUSE_IOCTL_GET_SIG:
1239 
1240 		cuse_server_lock(pcs);
1241 		pccmd = cuse_server_find_command(pcs, curthread);
1242 
1243 		if (pccmd != NULL) {
1244 			n = pccmd->got_signal;
1245 			pccmd->got_signal = 0;
1246 		} else {
1247 			n = 0;
1248 		}
1249 		cuse_server_unlock(pcs);
1250 
1251 		*(int *)data = n;
1252 
1253 		break;
1254 
1255 	case CUSE_IOCTL_SET_PFH:
1256 
1257 		cuse_server_lock(pcs);
1258 		pccmd = cuse_server_find_command(pcs, curthread);
1259 
1260 		if (pccmd != NULL) {
1261 			pcc = pccmd->client;
1262 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1263 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1264 			}
1265 		} else {
1266 			error = ENXIO;
1267 		}
1268 		cuse_server_unlock(pcs);
1269 		break;
1270 
1271 	case CUSE_IOCTL_CREATE_DEV:
1272 
1273 		error = priv_check(curthread, PRIV_DRIVER);
1274 		if (error)
1275 			break;
1276 
1277 		pcd = (void *)data;
1278 
1279 		/* filter input */
1280 
1281 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1282 
1283 		if (pcd->devname[0] == 0) {
1284 			error = EINVAL;
1285 			break;
1286 		}
1287 		cuse_str_filter(pcd->devname);
1288 
1289 		pcd->permissions &= 0777;
1290 
1291 		/* try to allocate a character device */
1292 
1293 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1294 
1295 		pcsd->server = pcs;
1296 
1297 		pcsd->user_dev = pcd->dev;
1298 
1299 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1300 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1301 		    pcd->permissions, "%s", pcd->devname);
1302 
1303 		if (pcsd->kern_dev == NULL) {
1304 			free(pcsd, M_CUSE);
1305 			error = ENOMEM;
1306 			break;
1307 		}
1308 		pcsd->kern_dev->si_drv1 = pcsd;
1309 
1310 		cuse_server_lock(pcs);
1311 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1312 		cuse_server_unlock(pcs);
1313 
1314 		break;
1315 
1316 	case CUSE_IOCTL_DESTROY_DEV:
1317 
1318 		error = priv_check(curthread, PRIV_DRIVER);
1319 		if (error)
1320 			break;
1321 
1322 		cuse_server_lock(pcs);
1323 
1324 		error = EINVAL;
1325 
1326 		pcsd = TAILQ_FIRST(&pcs->hdev);
1327 		while (pcsd != NULL) {
1328 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1329 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1330 				cuse_server_unlock(pcs);
1331 				cuse_server_free_dev(pcsd);
1332 				cuse_server_lock(pcs);
1333 				error = 0;
1334 				pcsd = TAILQ_FIRST(&pcs->hdev);
1335 			} else {
1336 				pcsd = TAILQ_NEXT(pcsd, entry);
1337 			}
1338 		}
1339 
1340 		cuse_server_unlock(pcs);
1341 		break;
1342 
1343 	case CUSE_IOCTL_WRITE_DATA:
1344 	case CUSE_IOCTL_READ_DATA:
1345 
1346 		cuse_server_lock(pcs);
1347 		pchk = (struct cuse_data_chunk *)data;
1348 
1349 		pccmd = cuse_server_find_command(pcs, curthread);
1350 
1351 		if (pccmd == NULL) {
1352 			error = ENXIO;	/* invalid request */
1353 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1354 			error = EFAULT;	/* NULL pointer */
1355 		} else if (pchk->length == 0) {
1356 			/* NOP */
1357 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1358 			error = cuse_server_ioctl_copy_locked(pcs, pccmd,
1359 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1360 		} else {
1361 			error = cuse_server_data_copy_optimized_locked(
1362 			    pcs, pccmd, pchk, cmd == CUSE_IOCTL_READ_DATA);
1363 		}
1364 
1365 		/*
1366 		 * Sometimes the functions above drop the server lock
1367 		 * early as an optimization:
1368 		 */
1369 		if (cuse_server_is_locked(pcs))
1370 			cuse_server_unlock(pcs);
1371 		break;
1372 
1373 	case CUSE_IOCTL_SELWAKEUP:
1374 		cuse_server_lock(pcs);
1375 		/*
1376 		 * We don't know which direction caused the event.
1377 		 * Wakeup both!
1378 		 */
1379 		cuse_server_wakeup_all_client_locked(pcs);
1380 		cuse_server_unlock(pcs);
1381 		break;
1382 
1383 	default:
1384 		error = ENXIO;
1385 		break;
1386 	}
1387 	return (error);
1388 }
1389 
1390 static int
1391 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1392 {
1393 	return (events & (POLLHUP | POLLPRI | POLLIN |
1394 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1395 }
1396 
1397 static int
1398 cuse_common_mmap_single(struct cuse_server *pcs,
1399     vm_ooffset_t *offset, vm_size_t size, struct vm_object **object)
1400 {
1401   	struct cuse_memory *mem;
1402 	int error;
1403 
1404 	/* verify size */
1405 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE))
1406 		return (EINVAL);
1407 
1408 	cuse_server_lock(pcs);
1409 	error = ENOMEM;
1410 
1411 	/* lookup memory structure, if any */
1412 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1413 		vm_ooffset_t min_off;
1414 		vm_ooffset_t max_off;
1415 
1416 		min_off = (mem->alloc_nr << CUSE_ALLOC_UNIT_SHIFT);
1417 		max_off = min_off + (PAGE_SIZE * mem->page_count);
1418 
1419 		if (*offset >= min_off && *offset < max_off) {
1420 			/* range check size */
1421 			if (size > (max_off - *offset)) {
1422 				error = EINVAL;
1423 			} else {
1424 				/* get new VM object offset to use */
1425 				*offset -= min_off;
1426 				vm_object_reference(mem->object);
1427 				*object = mem->object;
1428 				error = 0;
1429 			}
1430 			break;
1431 		}
1432 	}
1433 	cuse_server_unlock(pcs);
1434 	return (error);
1435 }
1436 
1437 static int
1438 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1439     vm_size_t size, struct vm_object **object, int nprot)
1440 {
1441 	struct cuse_server *pcs;
1442 	int error;
1443 
1444 	error = cuse_server_get(&pcs);
1445 	if (error != 0)
1446 		return (error);
1447 
1448 	return (cuse_common_mmap_single(pcs, offset, size, object));
1449 }
1450 
1451 /*------------------------------------------------------------------------*
1452  *	CUSE CLIENT PART
1453  *------------------------------------------------------------------------*/
1454 static void
1455 cuse_client_free(void *arg)
1456 {
1457 	struct cuse_client *pcc = arg;
1458 	struct cuse_client_command *pccmd;
1459 	struct cuse_server *pcs;
1460 	int n;
1461 
1462 	pcs = pcc->server;
1463 
1464 	cuse_server_lock(pcs);
1465 	cuse_client_is_closing(pcc);
1466 	TAILQ_REMOVE(&pcs->hcli, pcc, entry);
1467 	cuse_server_unlock(pcs);
1468 
1469 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1470 		pccmd = &pcc->cmds[n];
1471 
1472 		sx_destroy(&pccmd->sx);
1473 		cv_destroy(&pccmd->cv);
1474 	}
1475 
1476 	free(pcc, M_CUSE);
1477 
1478 	/* drop reference on server */
1479 	cuse_server_unref(pcs);
1480 }
1481 
1482 static int
1483 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1484 {
1485 	struct cuse_client_command *pccmd;
1486 	struct cuse_server_dev *pcsd;
1487 	struct cuse_client *pcc;
1488 	struct cuse_server *pcs;
1489 	struct cuse_dev *pcd;
1490 	int error;
1491 	int n;
1492 
1493 	pcsd = dev->si_drv1;
1494 	if (pcsd != NULL) {
1495 		pcs = pcsd->server;
1496 		pcd = pcsd->user_dev;
1497 
1498 		cuse_server_lock(pcs);
1499 		/*
1500 		 * Check that the refcount didn't wrap and that the
1501 		 * same process is not both client and server. This
1502 		 * can easily lead to deadlocks when destroying the
1503 		 * CUSE character device nodes:
1504 		 */
1505 		pcs->refs++;
1506 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1507 			/* overflow or wrong PID */
1508 			pcs->refs--;
1509 			cuse_server_unlock(pcs);
1510 			return (EINVAL);
1511 		}
1512 		cuse_server_unlock(pcs);
1513 	} else {
1514 		return (EINVAL);
1515 	}
1516 
1517 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1518 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1519 		printf("Cuse: Cannot set cdevpriv.\n");
1520 		/* drop reference on server */
1521 		cuse_server_unref(pcs);
1522 		free(pcc, M_CUSE);
1523 		return (ENOMEM);
1524 	}
1525 	pcc->fflags = fflags;
1526 	pcc->server_dev = pcsd;
1527 	pcc->server = pcs;
1528 
1529 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1530 		pccmd = &pcc->cmds[n];
1531 
1532 		pccmd->sub.dev = pcd;
1533 		pccmd->sub.command = n;
1534 		pccmd->client = pcc;
1535 
1536 		sx_init(&pccmd->sx, "cuse-client-sx");
1537 		cv_init(&pccmd->cv, "cuse-client-cv");
1538 	}
1539 
1540 	cuse_server_lock(pcs);
1541 
1542 	/* cuse_client_free() assumes that the client is listed somewhere! */
1543 	/* always enqueue */
1544 
1545 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1546 
1547 	/* check if server is closing */
1548 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1549 		error = EINVAL;
1550 	} else {
1551 		error = 0;
1552 	}
1553 	cuse_server_unlock(pcs);
1554 
1555 	if (error) {
1556 		devfs_clear_cdevpriv();	/* XXX bugfix */
1557 		return (error);
1558 	}
1559 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1560 
1561 	cuse_cmd_lock(pccmd);
1562 
1563 	cuse_server_lock(pcs);
1564 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1565 
1566 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1567 	cuse_server_unlock(pcs);
1568 
1569 	if (error < 0) {
1570 		error = cuse_convert_error(error);
1571 	} else {
1572 		error = 0;
1573 	}
1574 
1575 	cuse_cmd_unlock(pccmd);
1576 
1577 	if (error)
1578 		devfs_clear_cdevpriv();	/* XXX bugfix */
1579 
1580 	return (error);
1581 }
1582 
1583 static int
1584 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1585 {
1586 	struct cuse_client_command *pccmd;
1587 	struct cuse_client *pcc;
1588 	struct cuse_server *pcs;
1589 	int error;
1590 
1591 	error = cuse_client_get(&pcc);
1592 	if (error != 0)
1593 		return (0);
1594 
1595 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1596 	pcs = pcc->server;
1597 
1598 	cuse_cmd_lock(pccmd);
1599 
1600 	cuse_server_lock(pcs);
1601 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1602 
1603 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1604 	cuse_cmd_unlock(pccmd);
1605 
1606 	cuse_client_is_closing(pcc);
1607 	cuse_server_unlock(pcs);
1608 
1609 	return (0);
1610 }
1611 
1612 static void
1613 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1614 {
1615 	struct cuse_server *pcs = pcc->server;
1616 	int temp;
1617 
1618 	cuse_server_lock(pcs);
1619 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1620 	    CUSE_CLI_KNOTE_HAS_WRITE));
1621 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1622 	    CUSE_CLI_KNOTE_NEED_WRITE);
1623 	cuse_server_unlock(pcs);
1624 
1625 	if (temp != 0) {
1626 		/* get the latest polling state from the server */
1627 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1628 
1629 		if (temp & (POLLIN | POLLOUT)) {
1630 			cuse_server_lock(pcs);
1631 			if (temp & POLLIN)
1632 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1633 			if (temp & POLLOUT)
1634 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1635 
1636 			/* make sure the "knote" gets woken up */
1637 			cuse_server_wakeup_locked(pcc->server);
1638 			cuse_server_unlock(pcs);
1639 		}
1640 	}
1641 }
1642 
1643 static int
1644 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1645 {
1646 	struct cuse_client_command *pccmd;
1647 	struct cuse_client *pcc;
1648 	struct cuse_server *pcs;
1649 	int error;
1650 	int temp;
1651 	int len;
1652 
1653 	error = cuse_client_get(&pcc);
1654 	if (error != 0)
1655 		return (error);
1656 
1657 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1658 	pcs = pcc->server;
1659 
1660 	if (uio->uio_segflg != UIO_USERSPACE) {
1661 		return (EINVAL);
1662 	}
1663 	uio->uio_segflg = UIO_NOCOPY;
1664 
1665 	cuse_cmd_lock(pccmd);
1666 
1667 	while (uio->uio_resid != 0) {
1668 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1669 			error = ENOMEM;
1670 			break;
1671 		}
1672 		len = uio->uio_iov->iov_len;
1673 
1674 		cuse_server_lock(pcs);
1675 		if (len <= CUSE_COPY_BUFFER_MAX) {
1676 			/* set read buffer region for small reads */
1677 			pcc->read_base = (uintptr_t)uio->uio_iov->iov_base;
1678 			pcc->read_length = len;
1679 		}
1680 		cuse_client_send_command_locked(pccmd,
1681 		    (uintptr_t)uio->uio_iov->iov_base,
1682 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1683 
1684 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1685 		/*
1686 		 * After finishing reading data, disable the read
1687 		 * region for the cuse_server_data_copy_optimized_locked()
1688 		 * function:
1689 		 */
1690 		pcc->read_base = 0;
1691 		pcc->read_length = 0;
1692 		cuse_server_unlock(pcs);
1693 
1694 		/*
1695 		 * The return value indicates the read length, when
1696 		 * not negative. Range check it just in case to avoid
1697 		 * passing invalid length values to uiomove().
1698 		 */
1699 		if (error > len) {
1700 			error = ERANGE;
1701 			break;
1702 		} else if (error > 0 && len <= CUSE_COPY_BUFFER_MAX) {
1703 			temp = copyout(pcc->read_buffer,
1704 			    uio->uio_iov->iov_base, error);
1705 			if (temp != 0) {
1706 				error = temp;
1707 				break;
1708 			}
1709 		}
1710 		if (error < 0) {
1711 			error = cuse_convert_error(error);
1712 			break;
1713 		} else if (error == len) {
1714 			error = uiomove(NULL, error, uio);
1715 			if (error)
1716 				break;
1717 		} else {
1718 			error = uiomove(NULL, error, uio);
1719 			break;
1720 		}
1721 	}
1722 	cuse_cmd_unlock(pccmd);
1723 
1724 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1725 
1726 	if (error == EWOULDBLOCK)
1727 		cuse_client_kqfilter_poll(dev, pcc);
1728 
1729 	return (error);
1730 }
1731 
1732 static int
1733 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1734 {
1735 	struct cuse_client_command *pccmd;
1736 	struct cuse_client *pcc;
1737 	struct cuse_server *pcs;
1738 	int error;
1739 	int len;
1740 
1741 	error = cuse_client_get(&pcc);
1742 	if (error != 0)
1743 		return (error);
1744 
1745 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1746 	pcs = pcc->server;
1747 
1748 	if (uio->uio_segflg != UIO_USERSPACE) {
1749 		return (EINVAL);
1750 	}
1751 	uio->uio_segflg = UIO_NOCOPY;
1752 
1753 	cuse_cmd_lock(pccmd);
1754 
1755 	while (uio->uio_resid != 0) {
1756 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1757 			error = ENOMEM;
1758 			break;
1759 		}
1760 		len = uio->uio_iov->iov_len;
1761 
1762 		if (len <= CUSE_COPY_BUFFER_MAX) {
1763 			error = copyin(uio->uio_iov->iov_base,
1764 			    pcc->write_buffer, len);
1765 			if (error != 0)
1766 				break;
1767 		}
1768 
1769 		cuse_server_lock(pcs);
1770 		if (len <= CUSE_COPY_BUFFER_MAX) {
1771 			/* set write buffer region for small writes */
1772 			pcc->write_base = (uintptr_t)uio->uio_iov->iov_base;
1773 			pcc->write_length = len;
1774 		}
1775 		cuse_client_send_command_locked(pccmd,
1776 		    (uintptr_t)uio->uio_iov->iov_base,
1777 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1778 
1779 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1780 
1781 		/*
1782 		 * After finishing writing data, disable the write
1783 		 * region for the cuse_server_data_copy_optimized_locked()
1784 		 * function:
1785 		 */
1786 		pcc->write_base = 0;
1787 		pcc->write_length = 0;
1788 		cuse_server_unlock(pcs);
1789 
1790 		/*
1791 		 * The return value indicates the write length, when
1792 		 * not negative. Range check it just in case to avoid
1793 		 * passing invalid length values to uiomove().
1794 		 */
1795 		if (error > len) {
1796 			error = ERANGE;
1797 			break;
1798 		} else if (error < 0) {
1799 			error = cuse_convert_error(error);
1800 			break;
1801 		} else if (error == len) {
1802 			error = uiomove(NULL, error, uio);
1803 			if (error)
1804 				break;
1805 		} else {
1806 			error = uiomove(NULL, error, uio);
1807 			break;
1808 		}
1809 	}
1810 	cuse_cmd_unlock(pccmd);
1811 
1812 	/* restore segment flag */
1813 	uio->uio_segflg = UIO_USERSPACE;
1814 
1815 	if (error == EWOULDBLOCK)
1816 		cuse_client_kqfilter_poll(dev, pcc);
1817 
1818 	return (error);
1819 }
1820 
1821 int
1822 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1823     caddr_t data, int fflag, struct thread *td)
1824 {
1825 	struct cuse_client_command *pccmd;
1826 	struct cuse_client *pcc;
1827 	struct cuse_server *pcs;
1828 	int error;
1829 	int len;
1830 
1831 	error = cuse_client_get(&pcc);
1832 	if (error != 0)
1833 		return (error);
1834 
1835 	len = IOCPARM_LEN(cmd);
1836 	if (len > CUSE_BUFFER_MAX)
1837 		return (ENOMEM);
1838 
1839 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1840 	pcs = pcc->server;
1841 
1842 	cuse_cmd_lock(pccmd);
1843 
1844 	if (cmd & (IOC_IN | IOC_VOID))
1845 		memcpy(pcc->ioctl_buffer, data, len);
1846 
1847 	/*
1848 	 * When the ioctl-length is zero drivers can pass information
1849 	 * through the data pointer of the ioctl. Make sure this information
1850 	 * is forwarded to the driver.
1851 	 */
1852 
1853 	cuse_server_lock(pcs);
1854 	cuse_client_send_command_locked(pccmd,
1855 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1856 	    (unsigned long)cmd, pcc->fflags,
1857 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1858 
1859 	error = cuse_client_receive_command_locked(pccmd, data, len);
1860 	cuse_server_unlock(pcs);
1861 
1862 	if (error < 0) {
1863 		error = cuse_convert_error(error);
1864 	} else {
1865 		error = 0;
1866 	}
1867 
1868 	if (cmd & IOC_OUT)
1869 		memcpy(data, pcc->ioctl_buffer, len);
1870 
1871 	cuse_cmd_unlock(pccmd);
1872 
1873 	if (error == EWOULDBLOCK)
1874 		cuse_client_kqfilter_poll(dev, pcc);
1875 
1876 	return (error);
1877 }
1878 
1879 static int
1880 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1881 {
1882 	struct cuse_client_command *pccmd;
1883 	struct cuse_client *pcc;
1884 	struct cuse_server *pcs;
1885 	unsigned long temp;
1886 	int error;
1887 	int revents;
1888 
1889 	error = cuse_client_get(&pcc);
1890 	if (error != 0)
1891 		goto pollnval;
1892 
1893 	temp = 0;
1894 	pcs = pcc->server;
1895 
1896 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1897 		temp |= CUSE_POLL_READ;
1898 
1899 	if (events & (POLLOUT | POLLWRNORM))
1900 		temp |= CUSE_POLL_WRITE;
1901 
1902 	if (events & POLLHUP)
1903 		temp |= CUSE_POLL_ERROR;
1904 
1905 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1906 
1907 	cuse_cmd_lock(pccmd);
1908 
1909 	/* Need to selrecord() first to not loose any events. */
1910 	if (temp != 0 && td != NULL)
1911 		selrecord(td, &pcs->selinfo);
1912 
1913 	cuse_server_lock(pcs);
1914 	cuse_client_send_command_locked(pccmd,
1915 	    0, temp, pcc->fflags, IO_NDELAY);
1916 
1917 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1918 	cuse_server_unlock(pcs);
1919 
1920 	cuse_cmd_unlock(pccmd);
1921 
1922 	if (error < 0) {
1923 		goto pollnval;
1924 	} else {
1925 		revents = 0;
1926 		if (error & CUSE_POLL_READ)
1927 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1928 		if (error & CUSE_POLL_WRITE)
1929 			revents |= (events & (POLLOUT | POLLWRNORM));
1930 		if (error & CUSE_POLL_ERROR)
1931 			revents |= (events & POLLHUP);
1932 	}
1933 	return (revents);
1934 
1935 pollnval:
1936 	/* XXX many clients don't understand POLLNVAL */
1937 	return (events & (POLLHUP | POLLPRI | POLLIN |
1938 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1939 }
1940 
1941 static int
1942 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1943     vm_size_t size, struct vm_object **object, int nprot)
1944 {
1945 	struct cuse_client *pcc;
1946 	int error;
1947 
1948 	error = cuse_client_get(&pcc);
1949 	if (error != 0)
1950 		return (error);
1951 
1952 	return (cuse_common_mmap_single(pcc->server, offset, size, object));
1953 }
1954 
1955 static void
1956 cuse_client_kqfilter_read_detach(struct knote *kn)
1957 {
1958 	struct cuse_client *pcc;
1959 	struct cuse_server *pcs;
1960 
1961 	pcc = kn->kn_hook;
1962 	pcs = pcc->server;
1963 
1964 	cuse_server_lock(pcs);
1965 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1966 	cuse_server_unlock(pcs);
1967 }
1968 
1969 static void
1970 cuse_client_kqfilter_write_detach(struct knote *kn)
1971 {
1972 	struct cuse_client *pcc;
1973 	struct cuse_server *pcs;
1974 
1975 	pcc = kn->kn_hook;
1976 	pcs = pcc->server;
1977 
1978 	cuse_server_lock(pcs);
1979 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1980 	cuse_server_unlock(pcs);
1981 }
1982 
1983 static int
1984 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1985 {
1986 	struct cuse_client *pcc;
1987 
1988 	pcc = kn->kn_hook;
1989 
1990 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1991 
1992 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1993 }
1994 
1995 static int
1996 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1997 {
1998 	struct cuse_client *pcc;
1999 
2000 	pcc = kn->kn_hook;
2001 
2002 	mtx_assert(&pcc->server->mtx, MA_OWNED);
2003 
2004 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
2005 }
2006 
2007 static int
2008 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
2009 {
2010 	struct cuse_client *pcc;
2011 	struct cuse_server *pcs;
2012 	int error;
2013 
2014 	error = cuse_client_get(&pcc);
2015 	if (error != 0)
2016 		return (error);
2017 
2018 	pcs = pcc->server;
2019 
2020 	cuse_server_lock(pcs);
2021 	switch (kn->kn_filter) {
2022 	case EVFILT_READ:
2023 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
2024 		kn->kn_hook = pcc;
2025 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
2026 		knlist_add(&pcs->selinfo.si_note, kn, 1);
2027 		break;
2028 	case EVFILT_WRITE:
2029 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
2030 		kn->kn_hook = pcc;
2031 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
2032 		knlist_add(&pcs->selinfo.si_note, kn, 1);
2033 		break;
2034 	default:
2035 		error = EINVAL;
2036 		break;
2037 	}
2038 	cuse_server_unlock(pcs);
2039 
2040 	if (error == 0)
2041 		cuse_client_kqfilter_poll(dev, pcc);
2042 	return (error);
2043 }
2044