xref: /freebsd/sys/fs/cuse/cuse.c (revision 7cc42f6d25ef2e19059d088fa7d4853fe9afefb5)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2020 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/stdint.h>
28 #include <sys/stddef.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/linker_set.h>
36 #include <sys/module.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/condvar.h>
40 #include <sys/sysctl.h>
41 #include <sys/unistd.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/uio.h>
45 #include <sys/poll.h>
46 #include <sys/sx.h>
47 #include <sys/rwlock.h>
48 #include <sys/queue.h>
49 #include <sys/fcntl.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/selinfo.h>
53 #include <sys/ptrace.h>
54 #include <sys/sysent.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63 
64 #include <fs/cuse/cuse_defs.h>
65 #include <fs/cuse/cuse_ioctl.h>
66 
67 MODULE_VERSION(cuse, 1);
68 
69 /*
70  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
71  * declaring support for the cuse4bsd interface in cuse.ko:
72  */
73 MODULE_VERSION(cuse4bsd, 1);
74 
75 #ifdef FEATURE
76 FEATURE(cuse, "Userspace character devices");
77 #endif
78 
79 struct cuse_command;
80 struct cuse_server;
81 struct cuse_client;
82 
83 struct cuse_client_command {
84 	TAILQ_ENTRY(cuse_client_command) entry;
85 	struct cuse_command sub;
86 	struct sx sx;
87 	struct cv cv;
88 	struct thread *entered;
89 	struct cuse_client *client;
90 	struct proc *proc_curr;
91 	int	proc_refs;
92 	int	got_signal;
93 	int	error;
94 	int	command;
95 };
96 
97 struct cuse_memory {
98 	TAILQ_ENTRY(cuse_memory) entry;
99 	vm_object_t object;
100 	uint32_t page_count;
101 	uint32_t alloc_nr;
102 };
103 
104 struct cuse_server_dev {
105 	TAILQ_ENTRY(cuse_server_dev) entry;
106 	struct cuse_server *server;
107 	struct cdev *kern_dev;
108 	struct cuse_dev *user_dev;
109 };
110 
111 struct cuse_server {
112 	TAILQ_ENTRY(cuse_server) entry;
113 	TAILQ_HEAD(, cuse_client_command) head;
114 	TAILQ_HEAD(, cuse_server_dev) hdev;
115 	TAILQ_HEAD(, cuse_client) hcli;
116 	TAILQ_HEAD(, cuse_memory) hmem;
117 	struct mtx mtx;
118 	struct cv cv;
119 	struct selinfo selinfo;
120 	pid_t	pid;
121 	int	is_closing;
122 	int	refs;
123 };
124 
125 struct cuse_client {
126 	TAILQ_ENTRY(cuse_client) entry;
127 	TAILQ_ENTRY(cuse_client) entry_ref;
128 	struct cuse_client_command cmds[CUSE_CMD_MAX];
129 	struct cuse_server *server;
130 	struct cuse_server_dev *server_dev;
131 
132 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
133 
134 	int	fflags;			/* file flags */
135 	int	cflags;			/* client flags */
136 #define	CUSE_CLI_IS_CLOSING 0x01
137 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
138 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
139 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
140 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
141 };
142 
143 #define	CUSE_CLIENT_CLOSING(pcc) \
144     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
145 
146 static	MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
147 
148 static TAILQ_HEAD(, cuse_server) cuse_server_head;
149 static struct mtx cuse_global_mtx;
150 static struct cdev *cuse_dev;
151 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
152 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
153 
154 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
155 static void cuse_client_kqfilter_read_detach(struct knote *kn);
156 static void cuse_client_kqfilter_write_detach(struct knote *kn);
157 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
158 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
159 
160 static struct filterops cuse_client_kqfilter_read_ops = {
161 	.f_isfd = 1,
162 	.f_detach = cuse_client_kqfilter_read_detach,
163 	.f_event = cuse_client_kqfilter_read_event,
164 };
165 
166 static struct filterops cuse_client_kqfilter_write_ops = {
167 	.f_isfd = 1,
168 	.f_detach = cuse_client_kqfilter_write_detach,
169 	.f_event = cuse_client_kqfilter_write_event,
170 };
171 
172 static d_open_t cuse_client_open;
173 static d_close_t cuse_client_close;
174 static d_ioctl_t cuse_client_ioctl;
175 static d_read_t cuse_client_read;
176 static d_write_t cuse_client_write;
177 static d_poll_t cuse_client_poll;
178 static d_mmap_single_t cuse_client_mmap_single;
179 static d_kqfilter_t cuse_client_kqfilter;
180 
181 static struct cdevsw cuse_client_devsw = {
182 	.d_version = D_VERSION,
183 	.d_open = cuse_client_open,
184 	.d_close = cuse_client_close,
185 	.d_ioctl = cuse_client_ioctl,
186 	.d_name = "cuse_client",
187 	.d_flags = D_TRACKCLOSE,
188 	.d_read = cuse_client_read,
189 	.d_write = cuse_client_write,
190 	.d_poll = cuse_client_poll,
191 	.d_mmap_single = cuse_client_mmap_single,
192 	.d_kqfilter = cuse_client_kqfilter,
193 };
194 
195 static d_open_t cuse_server_open;
196 static d_close_t cuse_server_close;
197 static d_ioctl_t cuse_server_ioctl;
198 static d_read_t cuse_server_read;
199 static d_write_t cuse_server_write;
200 static d_poll_t cuse_server_poll;
201 static d_mmap_single_t cuse_server_mmap_single;
202 
203 static struct cdevsw cuse_server_devsw = {
204 	.d_version = D_VERSION,
205 	.d_open = cuse_server_open,
206 	.d_close = cuse_server_close,
207 	.d_ioctl = cuse_server_ioctl,
208 	.d_name = "cuse_server",
209 	.d_flags = D_TRACKCLOSE,
210 	.d_read = cuse_server_read,
211 	.d_write = cuse_server_write,
212 	.d_poll = cuse_server_poll,
213 	.d_mmap_single = cuse_server_mmap_single,
214 };
215 
216 static void cuse_client_is_closing(struct cuse_client *);
217 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
218 
219 static void
220 cuse_global_lock(void)
221 {
222 	mtx_lock(&cuse_global_mtx);
223 }
224 
225 static void
226 cuse_global_unlock(void)
227 {
228 	mtx_unlock(&cuse_global_mtx);
229 }
230 
231 static void
232 cuse_server_lock(struct cuse_server *pcs)
233 {
234 	mtx_lock(&pcs->mtx);
235 }
236 
237 static void
238 cuse_server_unlock(struct cuse_server *pcs)
239 {
240 	mtx_unlock(&pcs->mtx);
241 }
242 
243 static void
244 cuse_cmd_lock(struct cuse_client_command *pccmd)
245 {
246 	sx_xlock(&pccmd->sx);
247 }
248 
249 static void
250 cuse_cmd_unlock(struct cuse_client_command *pccmd)
251 {
252 	sx_xunlock(&pccmd->sx);
253 }
254 
255 static void
256 cuse_kern_init(void *arg)
257 {
258 	TAILQ_INIT(&cuse_server_head);
259 
260 	mtx_init(&cuse_global_mtx, "cuse-global-mtx", NULL, MTX_DEF);
261 
262 	cuse_dev = make_dev(&cuse_server_devsw, 0,
263 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
264 
265 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
266 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
267 	    (CUSE_VERSION >> 0) & 0xFF);
268 }
269 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
270 
271 static void
272 cuse_kern_uninit(void *arg)
273 {
274 	void *ptr;
275 
276 	while (1) {
277 		printf("Cuse: Please exit all /dev/cuse instances "
278 		    "and processes which have used this device.\n");
279 
280 		pause("DRAIN", 2 * hz);
281 
282 		cuse_global_lock();
283 		ptr = TAILQ_FIRST(&cuse_server_head);
284 		cuse_global_unlock();
285 
286 		if (ptr == NULL)
287 			break;
288 	}
289 
290 	if (cuse_dev != NULL)
291 		destroy_dev(cuse_dev);
292 
293 	mtx_destroy(&cuse_global_mtx);
294 }
295 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
296 
297 static int
298 cuse_server_get(struct cuse_server **ppcs)
299 {
300 	struct cuse_server *pcs;
301 	int error;
302 
303 	error = devfs_get_cdevpriv((void **)&pcs);
304 	if (error != 0) {
305 		*ppcs = NULL;
306 		return (error);
307 	}
308 	if (pcs->is_closing) {
309 		*ppcs = NULL;
310 		return (EINVAL);
311 	}
312 	*ppcs = pcs;
313 	return (0);
314 }
315 
316 static void
317 cuse_server_is_closing(struct cuse_server *pcs)
318 {
319 	struct cuse_client *pcc;
320 
321 	if (pcs->is_closing)
322 		return;
323 
324 	pcs->is_closing = 1;
325 
326 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
327 		cuse_client_is_closing(pcc);
328 	}
329 }
330 
331 static struct cuse_client_command *
332 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
333 {
334 	struct cuse_client *pcc;
335 	int n;
336 
337 	if (pcs->is_closing)
338 		goto done;
339 
340 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
341 		if (CUSE_CLIENT_CLOSING(pcc))
342 			continue;
343 		for (n = 0; n != CUSE_CMD_MAX; n++) {
344 			if (pcc->cmds[n].entered == td)
345 				return (&pcc->cmds[n]);
346 		}
347 	}
348 done:
349 	return (NULL);
350 }
351 
352 static void
353 cuse_str_filter(char *ptr)
354 {
355 	int c;
356 
357 	while (((c = *ptr) != 0)) {
358 		if ((c >= 'a') && (c <= 'z')) {
359 			ptr++;
360 			continue;
361 		}
362 		if ((c >= 'A') && (c <= 'Z')) {
363 			ptr++;
364 			continue;
365 		}
366 		if ((c >= '0') && (c <= '9')) {
367 			ptr++;
368 			continue;
369 		}
370 		if ((c == '.') || (c == '_') || (c == '/')) {
371 			ptr++;
372 			continue;
373 		}
374 		*ptr = '_';
375 
376 		ptr++;
377 	}
378 }
379 
380 static int
381 cuse_convert_error(int error)
382 {
383 	;				/* indent fix */
384 	switch (error) {
385 	case CUSE_ERR_NONE:
386 		return (0);
387 	case CUSE_ERR_BUSY:
388 		return (EBUSY);
389 	case CUSE_ERR_WOULDBLOCK:
390 		return (EWOULDBLOCK);
391 	case CUSE_ERR_INVALID:
392 		return (EINVAL);
393 	case CUSE_ERR_NO_MEMORY:
394 		return (ENOMEM);
395 	case CUSE_ERR_FAULT:
396 		return (EFAULT);
397 	case CUSE_ERR_SIGNAL:
398 		return (EINTR);
399 	case CUSE_ERR_NO_DEVICE:
400 		return (ENODEV);
401 	default:
402 		return (ENXIO);
403 	}
404 }
405 
406 static void
407 cuse_vm_memory_free(struct cuse_memory *mem)
408 {
409 	/* last user is gone - free */
410 	vm_object_deallocate(mem->object);
411 
412 	/* free CUSE memory */
413 	free(mem, M_CUSE);
414 }
415 
416 static int
417 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
418     uint32_t page_count)
419 {
420 	struct cuse_memory *temp;
421 	struct cuse_memory *mem;
422 	vm_object_t object;
423 	int error;
424 
425 	mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
426 
427 	object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
428 	    VM_PROT_DEFAULT, 0, curthread->td_ucred);
429 	if (object == NULL) {
430 		error = ENOMEM;
431 		goto error_0;
432 	}
433 
434 	cuse_server_lock(pcs);
435 	/* check if allocation number already exists */
436 	TAILQ_FOREACH(temp, &pcs->hmem, entry) {
437 		if (temp->alloc_nr == alloc_nr)
438 			break;
439 	}
440 	if (temp != NULL) {
441 		cuse_server_unlock(pcs);
442 		error = EBUSY;
443 		goto error_1;
444 	}
445 	mem->object = object;
446 	mem->page_count = page_count;
447 	mem->alloc_nr = alloc_nr;
448 	TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
449 	cuse_server_unlock(pcs);
450 
451 	return (0);
452 
453 error_1:
454 	vm_object_deallocate(object);
455 error_0:
456 	free(mem, M_CUSE);
457 	return (error);
458 }
459 
460 static int
461 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
462 {
463 	struct cuse_memory *mem;
464 
465 	cuse_server_lock(pcs);
466 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
467 		if (mem->alloc_nr == alloc_nr)
468 			break;
469 	}
470 	if (mem == NULL) {
471 		cuse_server_unlock(pcs);
472 		return (EINVAL);
473 	}
474 	TAILQ_REMOVE(&pcs->hmem, mem, entry);
475 	cuse_server_unlock(pcs);
476 
477 	cuse_vm_memory_free(mem);
478 
479 	return (0);
480 }
481 
482 static int
483 cuse_client_get(struct cuse_client **ppcc)
484 {
485 	struct cuse_client *pcc;
486 	int error;
487 
488 	/* try to get private data */
489 	error = devfs_get_cdevpriv((void **)&pcc);
490 	if (error != 0) {
491 		*ppcc = NULL;
492 		return (error);
493 	}
494 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
495 		*ppcc = NULL;
496 		return (EINVAL);
497 	}
498 	*ppcc = pcc;
499 	return (0);
500 }
501 
502 static void
503 cuse_client_is_closing(struct cuse_client *pcc)
504 {
505 	struct cuse_client_command *pccmd;
506 	uint32_t n;
507 
508 	if (CUSE_CLIENT_CLOSING(pcc))
509 		return;
510 
511 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
512 	pcc->server_dev = NULL;
513 
514 	for (n = 0; n != CUSE_CMD_MAX; n++) {
515 		pccmd = &pcc->cmds[n];
516 
517 		if (pccmd->entry.tqe_prev != NULL) {
518 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
519 			pccmd->entry.tqe_prev = NULL;
520 		}
521 		cv_broadcast(&pccmd->cv);
522 	}
523 }
524 
525 static void
526 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
527     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
528 {
529 	unsigned long cuse_fflags = 0;
530 	struct cuse_server *pcs;
531 
532 	if (fflags & FREAD)
533 		cuse_fflags |= CUSE_FFLAG_READ;
534 
535 	if (fflags & FWRITE)
536 		cuse_fflags |= CUSE_FFLAG_WRITE;
537 
538 	if (ioflag & IO_NDELAY)
539 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
540 #if defined(__LP64__)
541 	if (SV_CURPROC_FLAG(SV_ILP32))
542 		cuse_fflags |= CUSE_FFLAG_COMPAT32;
543 #endif
544 	pccmd->sub.fflags = cuse_fflags;
545 	pccmd->sub.data_pointer = data_ptr;
546 	pccmd->sub.argument = arg;
547 
548 	pcs = pccmd->client->server;
549 
550 	if ((pccmd->entry.tqe_prev == NULL) &&
551 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
552 	    (pcs->is_closing == 0)) {
553 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
554 		cv_signal(&pcs->cv);
555 	}
556 }
557 
558 static void
559 cuse_client_got_signal(struct cuse_client_command *pccmd)
560 {
561 	struct cuse_server *pcs;
562 
563 	pccmd->got_signal = 1;
564 
565 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
566 
567 	pcs = pccmd->client->server;
568 
569 	if ((pccmd->entry.tqe_prev == NULL) &&
570 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
571 	    (pcs->is_closing == 0)) {
572 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
573 		cv_signal(&pcs->cv);
574 	}
575 }
576 
577 static int
578 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
579     uint8_t *arg_ptr, uint32_t arg_len)
580 {
581 	struct cuse_server *pcs;
582 	int error;
583 
584 	pcs = pccmd->client->server;
585 	error = 0;
586 
587 	pccmd->proc_curr = curthread->td_proc;
588 
589 	if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
590 		error = CUSE_ERR_OTHER;
591 		goto done;
592 	}
593 	while (pccmd->command == CUSE_CMD_NONE) {
594 		if (error != 0) {
595 			cv_wait(&pccmd->cv, &pcs->mtx);
596 		} else {
597 			error = cv_wait_sig(&pccmd->cv, &pcs->mtx);
598 
599 			if (error != 0)
600 				cuse_client_got_signal(pccmd);
601 		}
602 		if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
603 			error = CUSE_ERR_OTHER;
604 			goto done;
605 		}
606 	}
607 
608 	error = pccmd->error;
609 	pccmd->command = CUSE_CMD_NONE;
610 	cv_signal(&pccmd->cv);
611 
612 done:
613 
614 	/* wait until all process references are gone */
615 
616 	pccmd->proc_curr = NULL;
617 
618 	while (pccmd->proc_refs != 0)
619 		cv_wait(&pccmd->cv, &pcs->mtx);
620 
621 	return (error);
622 }
623 
624 /*------------------------------------------------------------------------*
625  *	CUSE SERVER PART
626  *------------------------------------------------------------------------*/
627 
628 static void
629 cuse_server_free_dev(struct cuse_server_dev *pcsd)
630 {
631 	struct cuse_server *pcs;
632 	struct cuse_client *pcc;
633 
634 	/* get server pointer */
635 	pcs = pcsd->server;
636 
637 	/* prevent creation of more devices */
638 	cuse_server_lock(pcs);
639 	if (pcsd->kern_dev != NULL)
640 		pcsd->kern_dev->si_drv1 = NULL;
641 
642 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
643 		if (pcc->server_dev == pcsd)
644 			cuse_client_is_closing(pcc);
645 	}
646 	cuse_server_unlock(pcs);
647 
648 	/* destroy device, if any */
649 	if (pcsd->kern_dev != NULL) {
650 		/* destroy device synchronously */
651 		destroy_dev(pcsd->kern_dev);
652 	}
653 	free(pcsd, M_CUSE);
654 }
655 
656 static void
657 cuse_server_unref(struct cuse_server *pcs)
658 {
659 	struct cuse_server_dev *pcsd;
660 	struct cuse_memory *mem;
661 
662 	cuse_server_lock(pcs);
663 	if (--(pcs->refs) != 0) {
664 		cuse_server_unlock(pcs);
665 		return;
666 	}
667 	cuse_server_is_closing(pcs);
668 	/* final client wakeup, if any */
669 	cuse_server_wakeup_all_client_locked(pcs);
670 
671 	cuse_global_lock();
672 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
673 	cuse_global_unlock();
674 
675 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
676 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
677 		cuse_server_unlock(pcs);
678 		cuse_server_free_dev(pcsd);
679 		cuse_server_lock(pcs);
680 	}
681 
682 	cuse_free_unit_by_id_locked(pcs, -1);
683 
684 	while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
685 		TAILQ_REMOVE(&pcs->hmem, mem, entry);
686 		cuse_server_unlock(pcs);
687 		cuse_vm_memory_free(mem);
688 		cuse_server_lock(pcs);
689 	}
690 
691 	knlist_clear(&pcs->selinfo.si_note, 1);
692 	knlist_destroy(&pcs->selinfo.si_note);
693 
694 	cuse_server_unlock(pcs);
695 
696 	seldrain(&pcs->selinfo);
697 
698 	cv_destroy(&pcs->cv);
699 
700 	mtx_destroy(&pcs->mtx);
701 
702 	free(pcs, M_CUSE);
703 }
704 
705 static int
706 cuse_server_do_close(struct cuse_server *pcs)
707 {
708 	int retval;
709 
710 	cuse_server_lock(pcs);
711 	cuse_server_is_closing(pcs);
712 	/* final client wakeup, if any */
713 	cuse_server_wakeup_all_client_locked(pcs);
714 
715 	knlist_clear(&pcs->selinfo.si_note, 1);
716 
717 	retval = pcs->refs;
718 	cuse_server_unlock(pcs);
719 
720 	return (retval);
721 }
722 
723 static void
724 cuse_server_free(void *arg)
725 {
726 	struct cuse_server *pcs = arg;
727 
728 	/*
729 	 * The final server unref should be done by the server thread
730 	 * to prevent deadlock in the client cdevpriv destructor,
731 	 * which cannot destroy itself.
732 	 */
733 	while (cuse_server_do_close(pcs) != 1)
734 		pause("W", hz);
735 
736 	/* drop final refcount */
737 	cuse_server_unref(pcs);
738 }
739 
740 static int
741 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
742 {
743 	struct cuse_server *pcs;
744 
745 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
746 
747 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
748 		printf("Cuse: Cannot set cdevpriv.\n");
749 		free(pcs, M_CUSE);
750 		return (ENOMEM);
751 	}
752 	/* store current process ID */
753 	pcs->pid = curproc->p_pid;
754 
755 	TAILQ_INIT(&pcs->head);
756 	TAILQ_INIT(&pcs->hdev);
757 	TAILQ_INIT(&pcs->hcli);
758 	TAILQ_INIT(&pcs->hmem);
759 
760 	cv_init(&pcs->cv, "cuse-server-cv");
761 
762 	mtx_init(&pcs->mtx, "cuse-server-mtx", NULL, MTX_DEF);
763 
764 	knlist_init_mtx(&pcs->selinfo.si_note, &pcs->mtx);
765 
766 	cuse_global_lock();
767 	pcs->refs++;
768 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
769 	cuse_global_unlock();
770 
771 	return (0);
772 }
773 
774 static int
775 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
776 {
777 	struct cuse_server *pcs;
778 
779 	if (cuse_server_get(&pcs) == 0)
780 		cuse_server_do_close(pcs);
781 
782 	return (0);
783 }
784 
785 static int
786 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
787 {
788 	return (ENXIO);
789 }
790 
791 static int
792 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
793 {
794 	return (ENXIO);
795 }
796 
797 static int
798 cuse_server_ioctl_copy_locked(struct cuse_server *pcs,
799     struct cuse_client_command *pccmd,
800     struct cuse_data_chunk *pchk, int isread)
801 {
802 	struct proc *p_proc;
803 	uint32_t offset;
804 	int error;
805 
806 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
807 
808 	if (pchk->length > CUSE_BUFFER_MAX)
809 		return (EFAULT);
810 
811 	if (offset >= CUSE_BUFFER_MAX)
812 		return (EFAULT);
813 
814 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
815 		return (EFAULT);
816 
817 	p_proc = pccmd->proc_curr;
818 	if (p_proc == NULL)
819 		return (ENXIO);
820 
821 	if (pccmd->proc_refs < 0)
822 		return (ENOMEM);
823 
824 	pccmd->proc_refs++;
825 
826 	cuse_server_unlock(pcs);
827 
828 	if (isread == 0) {
829 		error = copyin(
830 		    (void *)pchk->local_ptr,
831 		    pccmd->client->ioctl_buffer + offset,
832 		    pchk->length);
833 	} else {
834 		error = copyout(
835 		    pccmd->client->ioctl_buffer + offset,
836 		    (void *)pchk->local_ptr,
837 		    pchk->length);
838 	}
839 
840 	cuse_server_lock(pcs);
841 
842 	pccmd->proc_refs--;
843 
844 	if (pccmd->proc_curr == NULL)
845 		cv_signal(&pccmd->cv);
846 
847 	return (error);
848 }
849 
850 static int
851 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
852     struct proc *proc_d, vm_offset_t data_d, size_t len)
853 {
854 	struct thread *td;
855 	struct proc *proc_cur;
856 	int error;
857 
858 	td = curthread;
859 	proc_cur = td->td_proc;
860 
861 	if (proc_cur == proc_d) {
862 		struct iovec iov = {
863 			.iov_base = (caddr_t)data_d,
864 			.iov_len = len,
865 		};
866 		struct uio uio = {
867 			.uio_iov = &iov,
868 			.uio_iovcnt = 1,
869 			.uio_offset = (off_t)data_s,
870 			.uio_resid = len,
871 			.uio_segflg = UIO_USERSPACE,
872 			.uio_rw = UIO_READ,
873 			.uio_td = td,
874 		};
875 
876 		PHOLD(proc_s);
877 		error = proc_rwmem(proc_s, &uio);
878 		PRELE(proc_s);
879 
880 	} else if (proc_cur == proc_s) {
881 		struct iovec iov = {
882 			.iov_base = (caddr_t)data_s,
883 			.iov_len = len,
884 		};
885 		struct uio uio = {
886 			.uio_iov = &iov,
887 			.uio_iovcnt = 1,
888 			.uio_offset = (off_t)data_d,
889 			.uio_resid = len,
890 			.uio_segflg = UIO_USERSPACE,
891 			.uio_rw = UIO_WRITE,
892 			.uio_td = td,
893 		};
894 
895 		PHOLD(proc_d);
896 		error = proc_rwmem(proc_d, &uio);
897 		PRELE(proc_d);
898 	} else {
899 		error = EINVAL;
900 	}
901 	return (error);
902 }
903 
904 static int
905 cuse_server_data_copy_locked(struct cuse_server *pcs,
906     struct cuse_client_command *pccmd,
907     struct cuse_data_chunk *pchk, int isread)
908 {
909 	struct proc *p_proc;
910 	int error;
911 
912 	p_proc = pccmd->proc_curr;
913 	if (p_proc == NULL)
914 		return (ENXIO);
915 
916 	if (pccmd->proc_refs < 0)
917 		return (ENOMEM);
918 
919 	pccmd->proc_refs++;
920 
921 	cuse_server_unlock(pcs);
922 
923 	if (isread == 0) {
924 		error = cuse_proc2proc_copy(
925 		    curthread->td_proc, pchk->local_ptr,
926 		    p_proc, pchk->peer_ptr,
927 		    pchk->length);
928 	} else {
929 		error = cuse_proc2proc_copy(
930 		    p_proc, pchk->peer_ptr,
931 		    curthread->td_proc, pchk->local_ptr,
932 		    pchk->length);
933 	}
934 
935 	cuse_server_lock(pcs);
936 
937 	pccmd->proc_refs--;
938 
939 	if (pccmd->proc_curr == NULL)
940 		cv_signal(&pccmd->cv);
941 
942 	return (error);
943 }
944 
945 static int
946 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
947 {
948 	int n;
949 	int x = 0;
950 	int match;
951 
952 	do {
953 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
954 			if (cuse_alloc_unit[n] != NULL) {
955 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
956 					continue;
957 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
958 					x++;
959 					match = 1;
960 				}
961 			}
962 		}
963 	} while (match);
964 
965 	if (x < 256) {
966 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
967 			if (cuse_alloc_unit[n] == NULL) {
968 				cuse_alloc_unit[n] = pcs;
969 				cuse_alloc_unit_id[n] = id | x;
970 				return (x);
971 			}
972 		}
973 	}
974 	return (-1);
975 }
976 
977 static void
978 cuse_server_wakeup_locked(struct cuse_server *pcs)
979 {
980 	selwakeup(&pcs->selinfo);
981 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
982 }
983 
984 static void
985 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
986 {
987 	struct cuse_client *pcc;
988 
989 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
990 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
991 		    CUSE_CLI_KNOTE_NEED_WRITE);
992 	}
993 	cuse_server_wakeup_locked(pcs);
994 }
995 
996 static int
997 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
998 {
999 	int n;
1000 	int found = 0;
1001 
1002 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
1003 		if (cuse_alloc_unit[n] == pcs) {
1004 			if (cuse_alloc_unit_id[n] == id || id == -1) {
1005 				cuse_alloc_unit[n] = NULL;
1006 				cuse_alloc_unit_id[n] = 0;
1007 				found = 1;
1008 			}
1009 		}
1010 	}
1011 
1012 	return (found ? 0 : EINVAL);
1013 }
1014 
1015 static int
1016 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
1017     caddr_t data, int fflag, struct thread *td)
1018 {
1019 	struct cuse_server *pcs;
1020 	int error;
1021 
1022 	error = cuse_server_get(&pcs);
1023 	if (error != 0)
1024 		return (error);
1025 
1026 	switch (cmd) {
1027 		struct cuse_client_command *pccmd;
1028 		struct cuse_client *pcc;
1029 		struct cuse_command *pcmd;
1030 		struct cuse_alloc_info *pai;
1031 		struct cuse_create_dev *pcd;
1032 		struct cuse_server_dev *pcsd;
1033 		struct cuse_data_chunk *pchk;
1034 		int n;
1035 
1036 	case CUSE_IOCTL_GET_COMMAND:
1037 		pcmd = (void *)data;
1038 
1039 		cuse_server_lock(pcs);
1040 
1041 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1042 			error = cv_wait_sig(&pcs->cv, &pcs->mtx);
1043 
1044 			if (pcs->is_closing)
1045 				error = ENXIO;
1046 
1047 			if (error) {
1048 				cuse_server_unlock(pcs);
1049 				return (error);
1050 			}
1051 		}
1052 
1053 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1054 		pccmd->entry.tqe_prev = NULL;
1055 
1056 		pccmd->entered = curthread;
1057 
1058 		*pcmd = pccmd->sub;
1059 
1060 		cuse_server_unlock(pcs);
1061 
1062 		break;
1063 
1064 	case CUSE_IOCTL_SYNC_COMMAND:
1065 
1066 		cuse_server_lock(pcs);
1067 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1068 			/* send sync command */
1069 			pccmd->entered = NULL;
1070 			pccmd->error = *(int *)data;
1071 			pccmd->command = CUSE_CMD_SYNC;
1072 
1073 			/* signal peer, if any */
1074 			cv_signal(&pccmd->cv);
1075 		}
1076 		cuse_server_unlock(pcs);
1077 
1078 		break;
1079 
1080 	case CUSE_IOCTL_ALLOC_UNIT:
1081 
1082 		cuse_server_lock(pcs);
1083 		n = cuse_alloc_unit_by_id_locked(pcs,
1084 		    CUSE_ID_DEFAULT(0));
1085 		cuse_server_unlock(pcs);
1086 
1087 		if (n < 0)
1088 			error = ENOMEM;
1089 		else
1090 			*(int *)data = n;
1091 		break;
1092 
1093 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1094 
1095 		n = *(int *)data;
1096 
1097 		n = (n & CUSE_ID_MASK);
1098 
1099 		cuse_server_lock(pcs);
1100 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1101 		cuse_server_unlock(pcs);
1102 
1103 		if (n < 0)
1104 			error = ENOMEM;
1105 		else
1106 			*(int *)data = n;
1107 		break;
1108 
1109 	case CUSE_IOCTL_FREE_UNIT:
1110 
1111 		n = *(int *)data;
1112 
1113 		n = CUSE_ID_DEFAULT(n);
1114 
1115 		cuse_server_lock(pcs);
1116 		error = cuse_free_unit_by_id_locked(pcs, n);
1117 		cuse_server_unlock(pcs);
1118 		break;
1119 
1120 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1121 
1122 		n = *(int *)data;
1123 
1124 		cuse_server_lock(pcs);
1125 		error = cuse_free_unit_by_id_locked(pcs, n);
1126 		cuse_server_unlock(pcs);
1127 		break;
1128 
1129 	case CUSE_IOCTL_ALLOC_MEMORY:
1130 
1131 		pai = (void *)data;
1132 
1133 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1134 			error = ENOMEM;
1135 			break;
1136 		}
1137 		if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) {
1138 			error = ENOMEM;
1139 			break;
1140 		}
1141 		error = cuse_server_alloc_memory(pcs,
1142 		    pai->alloc_nr, pai->page_count);
1143 		break;
1144 
1145 	case CUSE_IOCTL_FREE_MEMORY:
1146 		pai = (void *)data;
1147 
1148 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1149 			error = ENOMEM;
1150 			break;
1151 		}
1152 		error = cuse_server_free_memory(pcs, pai->alloc_nr);
1153 		break;
1154 
1155 	case CUSE_IOCTL_GET_SIG:
1156 
1157 		cuse_server_lock(pcs);
1158 		pccmd = cuse_server_find_command(pcs, curthread);
1159 
1160 		if (pccmd != NULL) {
1161 			n = pccmd->got_signal;
1162 			pccmd->got_signal = 0;
1163 		} else {
1164 			n = 0;
1165 		}
1166 		cuse_server_unlock(pcs);
1167 
1168 		*(int *)data = n;
1169 
1170 		break;
1171 
1172 	case CUSE_IOCTL_SET_PFH:
1173 
1174 		cuse_server_lock(pcs);
1175 		pccmd = cuse_server_find_command(pcs, curthread);
1176 
1177 		if (pccmd != NULL) {
1178 			pcc = pccmd->client;
1179 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1180 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1181 			}
1182 		} else {
1183 			error = ENXIO;
1184 		}
1185 		cuse_server_unlock(pcs);
1186 		break;
1187 
1188 	case CUSE_IOCTL_CREATE_DEV:
1189 
1190 		error = priv_check(curthread, PRIV_DRIVER);
1191 		if (error)
1192 			break;
1193 
1194 		pcd = (void *)data;
1195 
1196 		/* filter input */
1197 
1198 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1199 
1200 		if (pcd->devname[0] == 0) {
1201 			error = EINVAL;
1202 			break;
1203 		}
1204 		cuse_str_filter(pcd->devname);
1205 
1206 		pcd->permissions &= 0777;
1207 
1208 		/* try to allocate a character device */
1209 
1210 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1211 
1212 		pcsd->server = pcs;
1213 
1214 		pcsd->user_dev = pcd->dev;
1215 
1216 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1217 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1218 		    pcd->permissions, "%s", pcd->devname);
1219 
1220 		if (pcsd->kern_dev == NULL) {
1221 			free(pcsd, M_CUSE);
1222 			error = ENOMEM;
1223 			break;
1224 		}
1225 		pcsd->kern_dev->si_drv1 = pcsd;
1226 
1227 		cuse_server_lock(pcs);
1228 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1229 		cuse_server_unlock(pcs);
1230 
1231 		break;
1232 
1233 	case CUSE_IOCTL_DESTROY_DEV:
1234 
1235 		error = priv_check(curthread, PRIV_DRIVER);
1236 		if (error)
1237 			break;
1238 
1239 		cuse_server_lock(pcs);
1240 
1241 		error = EINVAL;
1242 
1243 		pcsd = TAILQ_FIRST(&pcs->hdev);
1244 		while (pcsd != NULL) {
1245 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1246 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1247 				cuse_server_unlock(pcs);
1248 				cuse_server_free_dev(pcsd);
1249 				cuse_server_lock(pcs);
1250 				error = 0;
1251 				pcsd = TAILQ_FIRST(&pcs->hdev);
1252 			} else {
1253 				pcsd = TAILQ_NEXT(pcsd, entry);
1254 			}
1255 		}
1256 
1257 		cuse_server_unlock(pcs);
1258 		break;
1259 
1260 	case CUSE_IOCTL_WRITE_DATA:
1261 	case CUSE_IOCTL_READ_DATA:
1262 
1263 		cuse_server_lock(pcs);
1264 		pchk = (struct cuse_data_chunk *)data;
1265 
1266 		pccmd = cuse_server_find_command(pcs, curthread);
1267 
1268 		if (pccmd == NULL) {
1269 			error = ENXIO;	/* invalid request */
1270 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1271 			error = EFAULT;	/* NULL pointer */
1272 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1273 			error = cuse_server_ioctl_copy_locked(pcs, pccmd,
1274 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1275 		} else {
1276 			error = cuse_server_data_copy_locked(pcs, pccmd,
1277 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1278 		}
1279 		cuse_server_unlock(pcs);
1280 		break;
1281 
1282 	case CUSE_IOCTL_SELWAKEUP:
1283 		cuse_server_lock(pcs);
1284 		/*
1285 		 * We don't know which direction caused the event.
1286 		 * Wakeup both!
1287 		 */
1288 		cuse_server_wakeup_all_client_locked(pcs);
1289 		cuse_server_unlock(pcs);
1290 		break;
1291 
1292 	default:
1293 		error = ENXIO;
1294 		break;
1295 	}
1296 	return (error);
1297 }
1298 
1299 static int
1300 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1301 {
1302 	return (events & (POLLHUP | POLLPRI | POLLIN |
1303 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1304 }
1305 
1306 static int
1307 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1308     vm_size_t size, struct vm_object **object, int nprot)
1309 {
1310 	uint32_t page_nr = *offset / PAGE_SIZE;
1311 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1312 	struct cuse_memory *mem;
1313 	struct cuse_server *pcs;
1314 	int error;
1315 
1316 	error = cuse_server_get(&pcs);
1317 	if (error != 0)
1318 		return (error);
1319 
1320 	cuse_server_lock(pcs);
1321 	/* lookup memory structure */
1322 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1323 		if (mem->alloc_nr == alloc_nr)
1324 			break;
1325 	}
1326 	if (mem == NULL) {
1327 		cuse_server_unlock(pcs);
1328 		return (ENOMEM);
1329 	}
1330 	/* verify page offset */
1331 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1332 	if (page_nr >= mem->page_count) {
1333 		cuse_server_unlock(pcs);
1334 		return (ENXIO);
1335 	}
1336 	/* verify mmap size */
1337 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1338 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1339 		cuse_server_unlock(pcs);
1340 		return (EINVAL);
1341 	}
1342 	vm_object_reference(mem->object);
1343 	*object = mem->object;
1344 	cuse_server_unlock(pcs);
1345 
1346 	/* set new VM object offset to use */
1347 	*offset = page_nr * PAGE_SIZE;
1348 
1349 	/* success */
1350 	return (0);
1351 }
1352 
1353 /*------------------------------------------------------------------------*
1354  *	CUSE CLIENT PART
1355  *------------------------------------------------------------------------*/
1356 static void
1357 cuse_client_free(void *arg)
1358 {
1359 	struct cuse_client *pcc = arg;
1360 	struct cuse_client_command *pccmd;
1361 	struct cuse_server *pcs;
1362 	int n;
1363 
1364 	pcs = pcc->server;
1365 
1366 	cuse_server_lock(pcs);
1367 	cuse_client_is_closing(pcc);
1368 	TAILQ_REMOVE(&pcs->hcli, pcc, entry);
1369 	cuse_server_unlock(pcs);
1370 
1371 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1372 		pccmd = &pcc->cmds[n];
1373 
1374 		sx_destroy(&pccmd->sx);
1375 		cv_destroy(&pccmd->cv);
1376 	}
1377 
1378 	free(pcc, M_CUSE);
1379 
1380 	/* drop reference on server */
1381 	cuse_server_unref(pcs);
1382 }
1383 
1384 static int
1385 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1386 {
1387 	struct cuse_client_command *pccmd;
1388 	struct cuse_server_dev *pcsd;
1389 	struct cuse_client *pcc;
1390 	struct cuse_server *pcs;
1391 	struct cuse_dev *pcd;
1392 	int error;
1393 	int n;
1394 
1395 	pcsd = dev->si_drv1;
1396 	if (pcsd != NULL) {
1397 		pcs = pcsd->server;
1398 		pcd = pcsd->user_dev;
1399 
1400 		cuse_server_lock(pcs);
1401 		/*
1402 		 * Check that the refcount didn't wrap and that the
1403 		 * same process is not both client and server. This
1404 		 * can easily lead to deadlocks when destroying the
1405 		 * CUSE character device nodes:
1406 		 */
1407 		pcs->refs++;
1408 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1409 			/* overflow or wrong PID */
1410 			pcs->refs--;
1411 			cuse_server_unlock(pcs);
1412 			return (EINVAL);
1413 		}
1414 		cuse_server_unlock(pcs);
1415 	} else {
1416 		return (EINVAL);
1417 	}
1418 
1419 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1420 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1421 		printf("Cuse: Cannot set cdevpriv.\n");
1422 		/* drop reference on server */
1423 		cuse_server_unref(pcs);
1424 		free(pcc, M_CUSE);
1425 		return (ENOMEM);
1426 	}
1427 	pcc->fflags = fflags;
1428 	pcc->server_dev = pcsd;
1429 	pcc->server = pcs;
1430 
1431 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1432 		pccmd = &pcc->cmds[n];
1433 
1434 		pccmd->sub.dev = pcd;
1435 		pccmd->sub.command = n;
1436 		pccmd->client = pcc;
1437 
1438 		sx_init(&pccmd->sx, "cuse-client-sx");
1439 		cv_init(&pccmd->cv, "cuse-client-cv");
1440 	}
1441 
1442 	cuse_server_lock(pcs);
1443 
1444 	/* cuse_client_free() assumes that the client is listed somewhere! */
1445 	/* always enqueue */
1446 
1447 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1448 
1449 	/* check if server is closing */
1450 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1451 		error = EINVAL;
1452 	} else {
1453 		error = 0;
1454 	}
1455 	cuse_server_unlock(pcs);
1456 
1457 	if (error) {
1458 		devfs_clear_cdevpriv();	/* XXX bugfix */
1459 		return (error);
1460 	}
1461 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1462 
1463 	cuse_cmd_lock(pccmd);
1464 
1465 	cuse_server_lock(pcs);
1466 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1467 
1468 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1469 	cuse_server_unlock(pcs);
1470 
1471 	if (error < 0) {
1472 		error = cuse_convert_error(error);
1473 	} else {
1474 		error = 0;
1475 	}
1476 
1477 	cuse_cmd_unlock(pccmd);
1478 
1479 	if (error)
1480 		devfs_clear_cdevpriv();	/* XXX bugfix */
1481 
1482 	return (error);
1483 }
1484 
1485 static int
1486 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1487 {
1488 	struct cuse_client_command *pccmd;
1489 	struct cuse_client *pcc;
1490 	struct cuse_server *pcs;
1491 	int error;
1492 
1493 	error = cuse_client_get(&pcc);
1494 	if (error != 0)
1495 		return (0);
1496 
1497 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1498 	pcs = pcc->server;
1499 
1500 	cuse_cmd_lock(pccmd);
1501 
1502 	cuse_server_lock(pcs);
1503 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1504 
1505 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1506 	cuse_cmd_unlock(pccmd);
1507 
1508 	cuse_client_is_closing(pcc);
1509 	cuse_server_unlock(pcs);
1510 
1511 	return (0);
1512 }
1513 
1514 static void
1515 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1516 {
1517 	struct cuse_server *pcs = pcc->server;
1518 	int temp;
1519 
1520 	cuse_server_lock(pcs);
1521 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1522 	    CUSE_CLI_KNOTE_HAS_WRITE));
1523 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1524 	    CUSE_CLI_KNOTE_NEED_WRITE);
1525 	cuse_server_unlock(pcs);
1526 
1527 	if (temp != 0) {
1528 		/* get the latest polling state from the server */
1529 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1530 
1531 		if (temp & (POLLIN | POLLOUT)) {
1532 			cuse_server_lock(pcs);
1533 			if (temp & POLLIN)
1534 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1535 			if (temp & POLLOUT)
1536 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1537 
1538 			/* make sure the "knote" gets woken up */
1539 			cuse_server_wakeup_locked(pcc->server);
1540 			cuse_server_unlock(pcs);
1541 		}
1542 	}
1543 }
1544 
1545 static int
1546 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1547 {
1548 	struct cuse_client_command *pccmd;
1549 	struct cuse_client *pcc;
1550 	struct cuse_server *pcs;
1551 	int error;
1552 	int len;
1553 
1554 	error = cuse_client_get(&pcc);
1555 	if (error != 0)
1556 		return (error);
1557 
1558 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1559 	pcs = pcc->server;
1560 
1561 	if (uio->uio_segflg != UIO_USERSPACE) {
1562 		return (EINVAL);
1563 	}
1564 	uio->uio_segflg = UIO_NOCOPY;
1565 
1566 	cuse_cmd_lock(pccmd);
1567 
1568 	while (uio->uio_resid != 0) {
1569 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1570 			error = ENOMEM;
1571 			break;
1572 		}
1573 		len = uio->uio_iov->iov_len;
1574 
1575 		cuse_server_lock(pcs);
1576 		cuse_client_send_command_locked(pccmd,
1577 		    (uintptr_t)uio->uio_iov->iov_base,
1578 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1579 
1580 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1581 		cuse_server_unlock(pcs);
1582 
1583 		if (error < 0) {
1584 			error = cuse_convert_error(error);
1585 			break;
1586 		} else if (error == len) {
1587 			error = uiomove(NULL, error, uio);
1588 			if (error)
1589 				break;
1590 		} else {
1591 			error = uiomove(NULL, error, uio);
1592 			break;
1593 		}
1594 	}
1595 	cuse_cmd_unlock(pccmd);
1596 
1597 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1598 
1599 	if (error == EWOULDBLOCK)
1600 		cuse_client_kqfilter_poll(dev, pcc);
1601 
1602 	return (error);
1603 }
1604 
1605 static int
1606 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1607 {
1608 	struct cuse_client_command *pccmd;
1609 	struct cuse_client *pcc;
1610 	struct cuse_server *pcs;
1611 	int error;
1612 	int len;
1613 
1614 	error = cuse_client_get(&pcc);
1615 	if (error != 0)
1616 		return (error);
1617 
1618 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1619 	pcs = pcc->server;
1620 
1621 	if (uio->uio_segflg != UIO_USERSPACE) {
1622 		return (EINVAL);
1623 	}
1624 	uio->uio_segflg = UIO_NOCOPY;
1625 
1626 	cuse_cmd_lock(pccmd);
1627 
1628 	while (uio->uio_resid != 0) {
1629 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1630 			error = ENOMEM;
1631 			break;
1632 		}
1633 		len = uio->uio_iov->iov_len;
1634 
1635 		cuse_server_lock(pcs);
1636 		cuse_client_send_command_locked(pccmd,
1637 		    (uintptr_t)uio->uio_iov->iov_base,
1638 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1639 
1640 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1641 		cuse_server_unlock(pcs);
1642 
1643 		if (error < 0) {
1644 			error = cuse_convert_error(error);
1645 			break;
1646 		} else if (error == len) {
1647 			error = uiomove(NULL, error, uio);
1648 			if (error)
1649 				break;
1650 		} else {
1651 			error = uiomove(NULL, error, uio);
1652 			break;
1653 		}
1654 	}
1655 	cuse_cmd_unlock(pccmd);
1656 
1657 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1658 
1659 	if (error == EWOULDBLOCK)
1660 		cuse_client_kqfilter_poll(dev, pcc);
1661 
1662 	return (error);
1663 }
1664 
1665 int
1666 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1667     caddr_t data, int fflag, struct thread *td)
1668 {
1669 	struct cuse_client_command *pccmd;
1670 	struct cuse_client *pcc;
1671 	struct cuse_server *pcs;
1672 	int error;
1673 	int len;
1674 
1675 	error = cuse_client_get(&pcc);
1676 	if (error != 0)
1677 		return (error);
1678 
1679 	len = IOCPARM_LEN(cmd);
1680 	if (len > CUSE_BUFFER_MAX)
1681 		return (ENOMEM);
1682 
1683 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1684 	pcs = pcc->server;
1685 
1686 	cuse_cmd_lock(pccmd);
1687 
1688 	if (cmd & (IOC_IN | IOC_VOID))
1689 		memcpy(pcc->ioctl_buffer, data, len);
1690 
1691 	/*
1692 	 * When the ioctl-length is zero drivers can pass information
1693 	 * through the data pointer of the ioctl. Make sure this information
1694 	 * is forwarded to the driver.
1695 	 */
1696 
1697 	cuse_server_lock(pcs);
1698 	cuse_client_send_command_locked(pccmd,
1699 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1700 	    (unsigned long)cmd, pcc->fflags,
1701 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1702 
1703 	error = cuse_client_receive_command_locked(pccmd, data, len);
1704 	cuse_server_unlock(pcs);
1705 
1706 	if (error < 0) {
1707 		error = cuse_convert_error(error);
1708 	} else {
1709 		error = 0;
1710 	}
1711 
1712 	if (cmd & IOC_OUT)
1713 		memcpy(data, pcc->ioctl_buffer, len);
1714 
1715 	cuse_cmd_unlock(pccmd);
1716 
1717 	if (error == EWOULDBLOCK)
1718 		cuse_client_kqfilter_poll(dev, pcc);
1719 
1720 	return (error);
1721 }
1722 
1723 static int
1724 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1725 {
1726 	struct cuse_client_command *pccmd;
1727 	struct cuse_client *pcc;
1728 	struct cuse_server *pcs;
1729 	unsigned long temp;
1730 	int error;
1731 	int revents;
1732 
1733 	error = cuse_client_get(&pcc);
1734 	if (error != 0)
1735 		goto pollnval;
1736 
1737 	temp = 0;
1738 	pcs = pcc->server;
1739 
1740 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1741 		temp |= CUSE_POLL_READ;
1742 
1743 	if (events & (POLLOUT | POLLWRNORM))
1744 		temp |= CUSE_POLL_WRITE;
1745 
1746 	if (events & POLLHUP)
1747 		temp |= CUSE_POLL_ERROR;
1748 
1749 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1750 
1751 	cuse_cmd_lock(pccmd);
1752 
1753 	/* Need to selrecord() first to not loose any events. */
1754 	if (temp != 0 && td != NULL)
1755 		selrecord(td, &pcs->selinfo);
1756 
1757 	cuse_server_lock(pcs);
1758 	cuse_client_send_command_locked(pccmd,
1759 	    0, temp, pcc->fflags, IO_NDELAY);
1760 
1761 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1762 	cuse_server_unlock(pcs);
1763 
1764 	cuse_cmd_unlock(pccmd);
1765 
1766 	if (error < 0) {
1767 		goto pollnval;
1768 	} else {
1769 		revents = 0;
1770 		if (error & CUSE_POLL_READ)
1771 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1772 		if (error & CUSE_POLL_WRITE)
1773 			revents |= (events & (POLLOUT | POLLWRNORM));
1774 		if (error & CUSE_POLL_ERROR)
1775 			revents |= (events & POLLHUP);
1776 	}
1777 	return (revents);
1778 
1779 pollnval:
1780 	/* XXX many clients don't understand POLLNVAL */
1781 	return (events & (POLLHUP | POLLPRI | POLLIN |
1782 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1783 }
1784 
1785 static int
1786 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1787     vm_size_t size, struct vm_object **object, int nprot)
1788 {
1789 	uint32_t page_nr = *offset / PAGE_SIZE;
1790 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1791 	struct cuse_memory *mem;
1792 	struct cuse_client *pcc;
1793 	struct cuse_server *pcs;
1794 	int error;
1795 
1796 	error = cuse_client_get(&pcc);
1797 	if (error != 0)
1798 		return (error);
1799 
1800 	pcs = pcc->server;
1801 
1802 	cuse_server_lock(pcs);
1803 	/* lookup memory structure */
1804 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1805 		if (mem->alloc_nr == alloc_nr)
1806 			break;
1807 	}
1808 	if (mem == NULL) {
1809 		cuse_server_unlock(pcs);
1810 		return (ENOMEM);
1811 	}
1812 	/* verify page offset */
1813 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1814 	if (page_nr >= mem->page_count) {
1815 		cuse_server_unlock(pcs);
1816 		return (ENXIO);
1817 	}
1818 	/* verify mmap size */
1819 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1820 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1821 		cuse_server_unlock(pcs);
1822 		return (EINVAL);
1823 	}
1824 	vm_object_reference(mem->object);
1825 	*object = mem->object;
1826 	cuse_server_unlock(pcs);
1827 
1828 	/* set new VM object offset to use */
1829 	*offset = page_nr * PAGE_SIZE;
1830 
1831 	/* success */
1832 	return (0);
1833 }
1834 
1835 static void
1836 cuse_client_kqfilter_read_detach(struct knote *kn)
1837 {
1838 	struct cuse_client *pcc;
1839 	struct cuse_server *pcs;
1840 
1841 	pcc = kn->kn_hook;
1842 	pcs = pcc->server;
1843 
1844 	cuse_server_lock(pcs);
1845 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1846 	cuse_server_unlock(pcs);
1847 }
1848 
1849 static void
1850 cuse_client_kqfilter_write_detach(struct knote *kn)
1851 {
1852 	struct cuse_client *pcc;
1853 	struct cuse_server *pcs;
1854 
1855 	pcc = kn->kn_hook;
1856 	pcs = pcc->server;
1857 
1858 	cuse_server_lock(pcs);
1859 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1860 	cuse_server_unlock(pcs);
1861 }
1862 
1863 static int
1864 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1865 {
1866 	struct cuse_client *pcc;
1867 
1868 	pcc = kn->kn_hook;
1869 
1870 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1871 
1872 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1873 }
1874 
1875 static int
1876 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1877 {
1878 	struct cuse_client *pcc;
1879 
1880 	pcc = kn->kn_hook;
1881 
1882 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1883 
1884 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1885 }
1886 
1887 static int
1888 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1889 {
1890 	struct cuse_client *pcc;
1891 	struct cuse_server *pcs;
1892 	int error;
1893 
1894 	error = cuse_client_get(&pcc);
1895 	if (error != 0)
1896 		return (error);
1897 
1898 	pcs = pcc->server;
1899 
1900 	cuse_server_lock(pcs);
1901 	switch (kn->kn_filter) {
1902 	case EVFILT_READ:
1903 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1904 		kn->kn_hook = pcc;
1905 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1906 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1907 		break;
1908 	case EVFILT_WRITE:
1909 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1910 		kn->kn_hook = pcc;
1911 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1912 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1913 		break;
1914 	default:
1915 		error = EINVAL;
1916 		break;
1917 	}
1918 	cuse_server_unlock(pcs);
1919 
1920 	if (error == 0)
1921 		cuse_client_kqfilter_poll(dev, pcc);
1922 	return (error);
1923 }
1924