xref: /freebsd/sys/fs/cuse/cuse.c (revision 1dfcff294e44d4b45813288ef4095c36abb22f0e)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2022 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/stdint.h>
28 #include <sys/stddef.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/linker_set.h>
36 #include <sys/module.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/condvar.h>
40 #include <sys/sysctl.h>
41 #include <sys/unistd.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/uio.h>
45 #include <sys/poll.h>
46 #include <sys/sx.h>
47 #include <sys/rwlock.h>
48 #include <sys/queue.h>
49 #include <sys/fcntl.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/selinfo.h>
53 #include <sys/ptrace.h>
54 #include <sys/sysent.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63 
64 #include <fs/cuse/cuse_defs.h>
65 #include <fs/cuse/cuse_ioctl.h>
66 
67 #define	CUSE_ALLOC_PAGES_MAX \
68 	(CUSE_ALLOC_BYTES_MAX / PAGE_SIZE)
69 
70 #if (CUSE_ALLOC_PAGES_MAX == 0)
71 #error "PAGE_SIZE is too big!"
72 #endif
73 
74 static int
75 cuse_modevent(module_t mod, int type, void *data)
76 {
77 	switch (type) {
78 	case MOD_LOAD:
79 	case MOD_UNLOAD:
80 		return (0);
81 	default:
82 		return (EOPNOTSUPP);
83 	}
84 }
85 
86 static moduledata_t cuse_mod = {
87 	.name = "cuse",
88 	.evhand = &cuse_modevent,
89 };
90 
91 DECLARE_MODULE(cuse, cuse_mod, SI_SUB_DEVFS, SI_ORDER_FIRST);
92 MODULE_VERSION(cuse, 1);
93 
94 /*
95  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
96  * declaring support for the cuse4bsd interface in cuse.ko:
97  */
98 MODULE_VERSION(cuse4bsd, 1);
99 
100 #ifdef FEATURE
101 FEATURE(cuse, "Userspace character devices");
102 #endif
103 
104 struct cuse_command;
105 struct cuse_server;
106 struct cuse_client;
107 
108 struct cuse_client_command {
109 	TAILQ_ENTRY(cuse_client_command) entry;
110 	struct cuse_command sub;
111 	struct sx sx;
112 	struct cv cv;
113 	struct thread *entered;
114 	struct cuse_client *client;
115 	struct proc *proc_curr;
116 	int	proc_refs;
117 	int	got_signal;
118 	int	error;
119 	int	command;
120 };
121 
122 struct cuse_memory {
123 	TAILQ_ENTRY(cuse_memory) entry;
124 	vm_object_t object;
125 	uint32_t page_count;
126 	uint32_t alloc_nr;
127 };
128 
129 struct cuse_server_dev {
130 	TAILQ_ENTRY(cuse_server_dev) entry;
131 	struct cuse_server *server;
132 	struct cdev *kern_dev;
133 	struct cuse_dev *user_dev;
134 };
135 
136 struct cuse_server {
137 	TAILQ_ENTRY(cuse_server) entry;
138 	TAILQ_HEAD(, cuse_client_command) head;
139 	TAILQ_HEAD(, cuse_server_dev) hdev;
140 	TAILQ_HEAD(, cuse_client) hcli;
141 	TAILQ_HEAD(, cuse_memory) hmem;
142 	struct mtx mtx;
143 	struct cv cv;
144 	struct selinfo selinfo;
145 	pid_t	pid;
146 	int	is_closing;
147 	int	refs;
148 };
149 
150 struct cuse_client {
151 	TAILQ_ENTRY(cuse_client) entry;
152 	TAILQ_ENTRY(cuse_client) entry_ref;
153 	struct cuse_client_command cmds[CUSE_CMD_MAX];
154 	struct cuse_server *server;
155 	struct cuse_server_dev *server_dev;
156 
157 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
158 
159 	int	fflags;			/* file flags */
160 	int	cflags;			/* client flags */
161 #define	CUSE_CLI_IS_CLOSING 0x01
162 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
163 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
164 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
165 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
166 };
167 
168 #define	CUSE_CLIENT_CLOSING(pcc) \
169     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
170 
171 static	MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
172 
173 static TAILQ_HEAD(, cuse_server) cuse_server_head;
174 static struct mtx cuse_global_mtx;
175 static struct cdev *cuse_dev;
176 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
177 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
178 
179 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
180 static void cuse_client_kqfilter_read_detach(struct knote *kn);
181 static void cuse_client_kqfilter_write_detach(struct knote *kn);
182 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
183 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
184 
185 static struct filterops cuse_client_kqfilter_read_ops = {
186 	.f_isfd = 1,
187 	.f_detach = cuse_client_kqfilter_read_detach,
188 	.f_event = cuse_client_kqfilter_read_event,
189 };
190 
191 static struct filterops cuse_client_kqfilter_write_ops = {
192 	.f_isfd = 1,
193 	.f_detach = cuse_client_kqfilter_write_detach,
194 	.f_event = cuse_client_kqfilter_write_event,
195 };
196 
197 static d_open_t cuse_client_open;
198 static d_close_t cuse_client_close;
199 static d_ioctl_t cuse_client_ioctl;
200 static d_read_t cuse_client_read;
201 static d_write_t cuse_client_write;
202 static d_poll_t cuse_client_poll;
203 static d_mmap_single_t cuse_client_mmap_single;
204 static d_kqfilter_t cuse_client_kqfilter;
205 
206 static struct cdevsw cuse_client_devsw = {
207 	.d_version = D_VERSION,
208 	.d_open = cuse_client_open,
209 	.d_close = cuse_client_close,
210 	.d_ioctl = cuse_client_ioctl,
211 	.d_name = "cuse_client",
212 	.d_flags = D_TRACKCLOSE,
213 	.d_read = cuse_client_read,
214 	.d_write = cuse_client_write,
215 	.d_poll = cuse_client_poll,
216 	.d_mmap_single = cuse_client_mmap_single,
217 	.d_kqfilter = cuse_client_kqfilter,
218 };
219 
220 static d_open_t cuse_server_open;
221 static d_close_t cuse_server_close;
222 static d_ioctl_t cuse_server_ioctl;
223 static d_read_t cuse_server_read;
224 static d_write_t cuse_server_write;
225 static d_poll_t cuse_server_poll;
226 static d_mmap_single_t cuse_server_mmap_single;
227 
228 static struct cdevsw cuse_server_devsw = {
229 	.d_version = D_VERSION,
230 	.d_open = cuse_server_open,
231 	.d_close = cuse_server_close,
232 	.d_ioctl = cuse_server_ioctl,
233 	.d_name = "cuse_server",
234 	.d_flags = D_TRACKCLOSE,
235 	.d_read = cuse_server_read,
236 	.d_write = cuse_server_write,
237 	.d_poll = cuse_server_poll,
238 	.d_mmap_single = cuse_server_mmap_single,
239 };
240 
241 static void cuse_client_is_closing(struct cuse_client *);
242 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
243 
244 static void
245 cuse_global_lock(void)
246 {
247 	mtx_lock(&cuse_global_mtx);
248 }
249 
250 static void
251 cuse_global_unlock(void)
252 {
253 	mtx_unlock(&cuse_global_mtx);
254 }
255 
256 static void
257 cuse_server_lock(struct cuse_server *pcs)
258 {
259 	mtx_lock(&pcs->mtx);
260 }
261 
262 static void
263 cuse_server_unlock(struct cuse_server *pcs)
264 {
265 	mtx_unlock(&pcs->mtx);
266 }
267 
268 static void
269 cuse_cmd_lock(struct cuse_client_command *pccmd)
270 {
271 	sx_xlock(&pccmd->sx);
272 }
273 
274 static void
275 cuse_cmd_unlock(struct cuse_client_command *pccmd)
276 {
277 	sx_xunlock(&pccmd->sx);
278 }
279 
280 static void
281 cuse_kern_init(void *arg)
282 {
283 	TAILQ_INIT(&cuse_server_head);
284 
285 	mtx_init(&cuse_global_mtx, "cuse-global-mtx", NULL, MTX_DEF);
286 
287 	cuse_dev = make_dev(&cuse_server_devsw, 0,
288 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
289 
290 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
291 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
292 	    (CUSE_VERSION >> 0) & 0xFF);
293 }
294 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
295 
296 static void
297 cuse_kern_uninit(void *arg)
298 {
299 	void *ptr;
300 
301 	while (1) {
302 		printf("Cuse: Please exit all /dev/cuse instances "
303 		    "and processes which have used this device.\n");
304 
305 		pause("DRAIN", 2 * hz);
306 
307 		cuse_global_lock();
308 		ptr = TAILQ_FIRST(&cuse_server_head);
309 		cuse_global_unlock();
310 
311 		if (ptr == NULL)
312 			break;
313 	}
314 
315 	if (cuse_dev != NULL)
316 		destroy_dev(cuse_dev);
317 
318 	mtx_destroy(&cuse_global_mtx);
319 }
320 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
321 
322 static int
323 cuse_server_get(struct cuse_server **ppcs)
324 {
325 	struct cuse_server *pcs;
326 	int error;
327 
328 	error = devfs_get_cdevpriv((void **)&pcs);
329 	if (error != 0) {
330 		*ppcs = NULL;
331 		return (error);
332 	}
333 	if (pcs->is_closing) {
334 		*ppcs = NULL;
335 		return (EINVAL);
336 	}
337 	*ppcs = pcs;
338 	return (0);
339 }
340 
341 static void
342 cuse_server_is_closing(struct cuse_server *pcs)
343 {
344 	struct cuse_client *pcc;
345 
346 	if (pcs->is_closing)
347 		return;
348 
349 	pcs->is_closing = 1;
350 
351 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
352 		cuse_client_is_closing(pcc);
353 	}
354 }
355 
356 static struct cuse_client_command *
357 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
358 {
359 	struct cuse_client *pcc;
360 	int n;
361 
362 	if (pcs->is_closing)
363 		goto done;
364 
365 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
366 		if (CUSE_CLIENT_CLOSING(pcc))
367 			continue;
368 		for (n = 0; n != CUSE_CMD_MAX; n++) {
369 			if (pcc->cmds[n].entered == td)
370 				return (&pcc->cmds[n]);
371 		}
372 	}
373 done:
374 	return (NULL);
375 }
376 
377 static void
378 cuse_str_filter(char *ptr)
379 {
380 	int c;
381 
382 	while (((c = *ptr) != 0)) {
383 		if ((c >= 'a') && (c <= 'z')) {
384 			ptr++;
385 			continue;
386 		}
387 		if ((c >= 'A') && (c <= 'Z')) {
388 			ptr++;
389 			continue;
390 		}
391 		if ((c >= '0') && (c <= '9')) {
392 			ptr++;
393 			continue;
394 		}
395 		if ((c == '.') || (c == '_') || (c == '/')) {
396 			ptr++;
397 			continue;
398 		}
399 		*ptr = '_';
400 
401 		ptr++;
402 	}
403 }
404 
405 static int
406 cuse_convert_error(int error)
407 {
408 	;				/* indent fix */
409 	switch (error) {
410 	case CUSE_ERR_NONE:
411 		return (0);
412 	case CUSE_ERR_BUSY:
413 		return (EBUSY);
414 	case CUSE_ERR_WOULDBLOCK:
415 		return (EWOULDBLOCK);
416 	case CUSE_ERR_INVALID:
417 		return (EINVAL);
418 	case CUSE_ERR_NO_MEMORY:
419 		return (ENOMEM);
420 	case CUSE_ERR_FAULT:
421 		return (EFAULT);
422 	case CUSE_ERR_SIGNAL:
423 		return (EINTR);
424 	case CUSE_ERR_NO_DEVICE:
425 		return (ENODEV);
426 	default:
427 		return (ENXIO);
428 	}
429 }
430 
431 static void
432 cuse_vm_memory_free(struct cuse_memory *mem)
433 {
434 	/* last user is gone - free */
435 	vm_object_deallocate(mem->object);
436 
437 	/* free CUSE memory */
438 	free(mem, M_CUSE);
439 }
440 
441 static int
442 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
443     uint32_t page_count)
444 {
445 	struct cuse_memory *temp;
446 	struct cuse_memory *mem;
447 	vm_object_t object;
448 	int error;
449 
450 	mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
451 
452 	object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
453 	    VM_PROT_DEFAULT, 0, curthread->td_ucred);
454 	if (object == NULL) {
455 		error = ENOMEM;
456 		goto error_0;
457 	}
458 
459 	cuse_server_lock(pcs);
460 	/* check if allocation number already exists */
461 	TAILQ_FOREACH(temp, &pcs->hmem, entry) {
462 		if (temp->alloc_nr == alloc_nr)
463 			break;
464 	}
465 	if (temp != NULL) {
466 		cuse_server_unlock(pcs);
467 		error = EBUSY;
468 		goto error_1;
469 	}
470 	mem->object = object;
471 	mem->page_count = page_count;
472 	mem->alloc_nr = alloc_nr;
473 	TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
474 	cuse_server_unlock(pcs);
475 
476 	return (0);
477 
478 error_1:
479 	vm_object_deallocate(object);
480 error_0:
481 	free(mem, M_CUSE);
482 	return (error);
483 }
484 
485 static int
486 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
487 {
488 	struct cuse_memory *mem;
489 
490 	cuse_server_lock(pcs);
491 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
492 		if (mem->alloc_nr == alloc_nr)
493 			break;
494 	}
495 	if (mem == NULL) {
496 		cuse_server_unlock(pcs);
497 		return (EINVAL);
498 	}
499 	TAILQ_REMOVE(&pcs->hmem, mem, entry);
500 	cuse_server_unlock(pcs);
501 
502 	cuse_vm_memory_free(mem);
503 
504 	return (0);
505 }
506 
507 static int
508 cuse_client_get(struct cuse_client **ppcc)
509 {
510 	struct cuse_client *pcc;
511 	int error;
512 
513 	/* try to get private data */
514 	error = devfs_get_cdevpriv((void **)&pcc);
515 	if (error != 0) {
516 		*ppcc = NULL;
517 		return (error);
518 	}
519 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
520 		*ppcc = NULL;
521 		return (EINVAL);
522 	}
523 	*ppcc = pcc;
524 	return (0);
525 }
526 
527 static void
528 cuse_client_is_closing(struct cuse_client *pcc)
529 {
530 	struct cuse_client_command *pccmd;
531 	uint32_t n;
532 
533 	if (CUSE_CLIENT_CLOSING(pcc))
534 		return;
535 
536 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
537 	pcc->server_dev = NULL;
538 
539 	for (n = 0; n != CUSE_CMD_MAX; n++) {
540 		pccmd = &pcc->cmds[n];
541 
542 		if (pccmd->entry.tqe_prev != NULL) {
543 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
544 			pccmd->entry.tqe_prev = NULL;
545 		}
546 		cv_broadcast(&pccmd->cv);
547 	}
548 }
549 
550 static void
551 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
552     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
553 {
554 	unsigned long cuse_fflags = 0;
555 	struct cuse_server *pcs;
556 
557 	if (fflags & FREAD)
558 		cuse_fflags |= CUSE_FFLAG_READ;
559 
560 	if (fflags & FWRITE)
561 		cuse_fflags |= CUSE_FFLAG_WRITE;
562 
563 	if (ioflag & IO_NDELAY)
564 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
565 #if defined(__LP64__)
566 	if (SV_CURPROC_FLAG(SV_ILP32))
567 		cuse_fflags |= CUSE_FFLAG_COMPAT32;
568 #endif
569 	pccmd->sub.fflags = cuse_fflags;
570 	pccmd->sub.data_pointer = data_ptr;
571 	pccmd->sub.argument = arg;
572 
573 	pcs = pccmd->client->server;
574 
575 	if ((pccmd->entry.tqe_prev == NULL) &&
576 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
577 	    (pcs->is_closing == 0)) {
578 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
579 		cv_signal(&pcs->cv);
580 	}
581 }
582 
583 static void
584 cuse_client_got_signal(struct cuse_client_command *pccmd)
585 {
586 	struct cuse_server *pcs;
587 
588 	pccmd->got_signal = 1;
589 
590 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
591 
592 	pcs = pccmd->client->server;
593 
594 	if ((pccmd->entry.tqe_prev == NULL) &&
595 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
596 	    (pcs->is_closing == 0)) {
597 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
598 		cv_signal(&pcs->cv);
599 	}
600 }
601 
602 static int
603 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
604     uint8_t *arg_ptr, uint32_t arg_len)
605 {
606 	struct cuse_server *pcs;
607 	int error;
608 
609 	pcs = pccmd->client->server;
610 	error = 0;
611 
612 	pccmd->proc_curr = curthread->td_proc;
613 
614 	if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
615 		error = CUSE_ERR_OTHER;
616 		goto done;
617 	}
618 	while (pccmd->command == CUSE_CMD_NONE) {
619 		if (error != 0) {
620 			cv_wait(&pccmd->cv, &pcs->mtx);
621 		} else {
622 			error = cv_wait_sig(&pccmd->cv, &pcs->mtx);
623 
624 			if (error != 0)
625 				cuse_client_got_signal(pccmd);
626 		}
627 		if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
628 			error = CUSE_ERR_OTHER;
629 			goto done;
630 		}
631 	}
632 
633 	error = pccmd->error;
634 	pccmd->command = CUSE_CMD_NONE;
635 	cv_signal(&pccmd->cv);
636 
637 done:
638 
639 	/* wait until all process references are gone */
640 
641 	pccmd->proc_curr = NULL;
642 
643 	while (pccmd->proc_refs != 0)
644 		cv_wait(&pccmd->cv, &pcs->mtx);
645 
646 	return (error);
647 }
648 
649 /*------------------------------------------------------------------------*
650  *	CUSE SERVER PART
651  *------------------------------------------------------------------------*/
652 
653 static void
654 cuse_server_free_dev(struct cuse_server_dev *pcsd)
655 {
656 	struct cuse_server *pcs;
657 	struct cuse_client *pcc;
658 
659 	/* get server pointer */
660 	pcs = pcsd->server;
661 
662 	/* prevent creation of more devices */
663 	cuse_server_lock(pcs);
664 	if (pcsd->kern_dev != NULL)
665 		pcsd->kern_dev->si_drv1 = NULL;
666 
667 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
668 		if (pcc->server_dev == pcsd)
669 			cuse_client_is_closing(pcc);
670 	}
671 	cuse_server_unlock(pcs);
672 
673 	/* destroy device, if any */
674 	if (pcsd->kern_dev != NULL) {
675 		/* destroy device synchronously */
676 		destroy_dev(pcsd->kern_dev);
677 	}
678 	free(pcsd, M_CUSE);
679 }
680 
681 static void
682 cuse_server_unref(struct cuse_server *pcs)
683 {
684 	struct cuse_server_dev *pcsd;
685 	struct cuse_memory *mem;
686 
687 	cuse_server_lock(pcs);
688 	if (--(pcs->refs) != 0) {
689 		cuse_server_unlock(pcs);
690 		return;
691 	}
692 	cuse_server_is_closing(pcs);
693 	/* final client wakeup, if any */
694 	cuse_server_wakeup_all_client_locked(pcs);
695 
696 	cuse_global_lock();
697 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
698 	cuse_global_unlock();
699 
700 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
701 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
702 		cuse_server_unlock(pcs);
703 		cuse_server_free_dev(pcsd);
704 		cuse_server_lock(pcs);
705 	}
706 
707 	cuse_free_unit_by_id_locked(pcs, -1);
708 
709 	while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
710 		TAILQ_REMOVE(&pcs->hmem, mem, entry);
711 		cuse_server_unlock(pcs);
712 		cuse_vm_memory_free(mem);
713 		cuse_server_lock(pcs);
714 	}
715 
716 	knlist_clear(&pcs->selinfo.si_note, 1);
717 	knlist_destroy(&pcs->selinfo.si_note);
718 
719 	cuse_server_unlock(pcs);
720 
721 	seldrain(&pcs->selinfo);
722 
723 	cv_destroy(&pcs->cv);
724 
725 	mtx_destroy(&pcs->mtx);
726 
727 	free(pcs, M_CUSE);
728 }
729 
730 static int
731 cuse_server_do_close(struct cuse_server *pcs)
732 {
733 	int retval;
734 
735 	cuse_server_lock(pcs);
736 	cuse_server_is_closing(pcs);
737 	/* final client wakeup, if any */
738 	cuse_server_wakeup_all_client_locked(pcs);
739 
740 	knlist_clear(&pcs->selinfo.si_note, 1);
741 
742 	retval = pcs->refs;
743 	cuse_server_unlock(pcs);
744 
745 	return (retval);
746 }
747 
748 static void
749 cuse_server_free(void *arg)
750 {
751 	struct cuse_server *pcs = arg;
752 
753 	/*
754 	 * The final server unref should be done by the server thread
755 	 * to prevent deadlock in the client cdevpriv destructor,
756 	 * which cannot destroy itself.
757 	 */
758 	while (cuse_server_do_close(pcs) != 1)
759 		pause("W", hz);
760 
761 	/* drop final refcount */
762 	cuse_server_unref(pcs);
763 }
764 
765 static int
766 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
767 {
768 	struct cuse_server *pcs;
769 
770 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
771 
772 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
773 		printf("Cuse: Cannot set cdevpriv.\n");
774 		free(pcs, M_CUSE);
775 		return (ENOMEM);
776 	}
777 	/* store current process ID */
778 	pcs->pid = curproc->p_pid;
779 
780 	TAILQ_INIT(&pcs->head);
781 	TAILQ_INIT(&pcs->hdev);
782 	TAILQ_INIT(&pcs->hcli);
783 	TAILQ_INIT(&pcs->hmem);
784 
785 	cv_init(&pcs->cv, "cuse-server-cv");
786 
787 	mtx_init(&pcs->mtx, "cuse-server-mtx", NULL, MTX_DEF);
788 
789 	knlist_init_mtx(&pcs->selinfo.si_note, &pcs->mtx);
790 
791 	cuse_global_lock();
792 	pcs->refs++;
793 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
794 	cuse_global_unlock();
795 
796 	return (0);
797 }
798 
799 static int
800 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
801 {
802 	struct cuse_server *pcs;
803 
804 	if (cuse_server_get(&pcs) == 0)
805 		cuse_server_do_close(pcs);
806 
807 	return (0);
808 }
809 
810 static int
811 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
812 {
813 	return (ENXIO);
814 }
815 
816 static int
817 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
818 {
819 	return (ENXIO);
820 }
821 
822 static int
823 cuse_server_ioctl_copy_locked(struct cuse_server *pcs,
824     struct cuse_client_command *pccmd,
825     struct cuse_data_chunk *pchk, int isread)
826 {
827 	struct proc *p_proc;
828 	uint32_t offset;
829 	int error;
830 
831 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
832 
833 	if (pchk->length > CUSE_BUFFER_MAX)
834 		return (EFAULT);
835 
836 	if (offset >= CUSE_BUFFER_MAX)
837 		return (EFAULT);
838 
839 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
840 		return (EFAULT);
841 
842 	p_proc = pccmd->proc_curr;
843 	if (p_proc == NULL)
844 		return (ENXIO);
845 
846 	if (pccmd->proc_refs < 0)
847 		return (ENOMEM);
848 
849 	pccmd->proc_refs++;
850 
851 	cuse_server_unlock(pcs);
852 
853 	if (isread == 0) {
854 		error = copyin(
855 		    (void *)pchk->local_ptr,
856 		    pccmd->client->ioctl_buffer + offset,
857 		    pchk->length);
858 	} else {
859 		error = copyout(
860 		    pccmd->client->ioctl_buffer + offset,
861 		    (void *)pchk->local_ptr,
862 		    pchk->length);
863 	}
864 
865 	cuse_server_lock(pcs);
866 
867 	pccmd->proc_refs--;
868 
869 	if (pccmd->proc_curr == NULL)
870 		cv_signal(&pccmd->cv);
871 
872 	return (error);
873 }
874 
875 static int
876 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
877     struct proc *proc_d, vm_offset_t data_d, size_t len)
878 {
879 	struct thread *td;
880 	struct proc *proc_cur;
881 	int error;
882 
883 	td = curthread;
884 	proc_cur = td->td_proc;
885 
886 	if (proc_cur == proc_d) {
887 		struct iovec iov = {
888 			.iov_base = (caddr_t)data_d,
889 			.iov_len = len,
890 		};
891 		struct uio uio = {
892 			.uio_iov = &iov,
893 			.uio_iovcnt = 1,
894 			.uio_offset = (off_t)data_s,
895 			.uio_resid = len,
896 			.uio_segflg = UIO_USERSPACE,
897 			.uio_rw = UIO_READ,
898 			.uio_td = td,
899 		};
900 
901 		PHOLD(proc_s);
902 		error = proc_rwmem(proc_s, &uio);
903 		PRELE(proc_s);
904 
905 	} else if (proc_cur == proc_s) {
906 		struct iovec iov = {
907 			.iov_base = (caddr_t)data_s,
908 			.iov_len = len,
909 		};
910 		struct uio uio = {
911 			.uio_iov = &iov,
912 			.uio_iovcnt = 1,
913 			.uio_offset = (off_t)data_d,
914 			.uio_resid = len,
915 			.uio_segflg = UIO_USERSPACE,
916 			.uio_rw = UIO_WRITE,
917 			.uio_td = td,
918 		};
919 
920 		PHOLD(proc_d);
921 		error = proc_rwmem(proc_d, &uio);
922 		PRELE(proc_d);
923 	} else {
924 		error = EINVAL;
925 	}
926 	return (error);
927 }
928 
929 static int
930 cuse_server_data_copy_locked(struct cuse_server *pcs,
931     struct cuse_client_command *pccmd,
932     struct cuse_data_chunk *pchk, int isread)
933 {
934 	struct proc *p_proc;
935 	int error;
936 
937 	p_proc = pccmd->proc_curr;
938 	if (p_proc == NULL)
939 		return (ENXIO);
940 
941 	if (pccmd->proc_refs < 0)
942 		return (ENOMEM);
943 
944 	pccmd->proc_refs++;
945 
946 	cuse_server_unlock(pcs);
947 
948 	if (isread == 0) {
949 		error = cuse_proc2proc_copy(
950 		    curthread->td_proc, pchk->local_ptr,
951 		    p_proc, pchk->peer_ptr,
952 		    pchk->length);
953 	} else {
954 		error = cuse_proc2proc_copy(
955 		    p_proc, pchk->peer_ptr,
956 		    curthread->td_proc, pchk->local_ptr,
957 		    pchk->length);
958 	}
959 
960 	cuse_server_lock(pcs);
961 
962 	pccmd->proc_refs--;
963 
964 	if (pccmd->proc_curr == NULL)
965 		cv_signal(&pccmd->cv);
966 
967 	return (error);
968 }
969 
970 static int
971 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
972 {
973 	int n;
974 	int x = 0;
975 	int match;
976 
977 	do {
978 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
979 			if (cuse_alloc_unit[n] != NULL) {
980 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
981 					continue;
982 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
983 					x++;
984 					match = 1;
985 				}
986 			}
987 		}
988 	} while (match);
989 
990 	if (x < 256) {
991 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
992 			if (cuse_alloc_unit[n] == NULL) {
993 				cuse_alloc_unit[n] = pcs;
994 				cuse_alloc_unit_id[n] = id | x;
995 				return (x);
996 			}
997 		}
998 	}
999 	return (-1);
1000 }
1001 
1002 static void
1003 cuse_server_wakeup_locked(struct cuse_server *pcs)
1004 {
1005 	selwakeup(&pcs->selinfo);
1006 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
1007 }
1008 
1009 static void
1010 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
1011 {
1012 	struct cuse_client *pcc;
1013 
1014 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
1015 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
1016 		    CUSE_CLI_KNOTE_NEED_WRITE);
1017 	}
1018 	cuse_server_wakeup_locked(pcs);
1019 }
1020 
1021 static int
1022 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
1023 {
1024 	int n;
1025 	int found = 0;
1026 
1027 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
1028 		if (cuse_alloc_unit[n] == pcs) {
1029 			if (cuse_alloc_unit_id[n] == id || id == -1) {
1030 				cuse_alloc_unit[n] = NULL;
1031 				cuse_alloc_unit_id[n] = 0;
1032 				found = 1;
1033 			}
1034 		}
1035 	}
1036 
1037 	return (found ? 0 : EINVAL);
1038 }
1039 
1040 static int
1041 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
1042     caddr_t data, int fflag, struct thread *td)
1043 {
1044 	struct cuse_server *pcs;
1045 	int error;
1046 
1047 	error = cuse_server_get(&pcs);
1048 	if (error != 0)
1049 		return (error);
1050 
1051 	switch (cmd) {
1052 		struct cuse_client_command *pccmd;
1053 		struct cuse_client *pcc;
1054 		struct cuse_command *pcmd;
1055 		struct cuse_alloc_info *pai;
1056 		struct cuse_create_dev *pcd;
1057 		struct cuse_server_dev *pcsd;
1058 		struct cuse_data_chunk *pchk;
1059 		int n;
1060 
1061 	case CUSE_IOCTL_GET_COMMAND:
1062 		pcmd = (void *)data;
1063 
1064 		cuse_server_lock(pcs);
1065 
1066 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1067 			error = cv_wait_sig(&pcs->cv, &pcs->mtx);
1068 
1069 			if (pcs->is_closing)
1070 				error = ENXIO;
1071 
1072 			if (error) {
1073 				cuse_server_unlock(pcs);
1074 				return (error);
1075 			}
1076 		}
1077 
1078 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1079 		pccmd->entry.tqe_prev = NULL;
1080 
1081 		pccmd->entered = curthread;
1082 
1083 		*pcmd = pccmd->sub;
1084 
1085 		cuse_server_unlock(pcs);
1086 
1087 		break;
1088 
1089 	case CUSE_IOCTL_SYNC_COMMAND:
1090 
1091 		cuse_server_lock(pcs);
1092 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1093 			/* send sync command */
1094 			pccmd->entered = NULL;
1095 			pccmd->error = *(int *)data;
1096 			pccmd->command = CUSE_CMD_SYNC;
1097 
1098 			/* signal peer, if any */
1099 			cv_signal(&pccmd->cv);
1100 		}
1101 		cuse_server_unlock(pcs);
1102 
1103 		break;
1104 
1105 	case CUSE_IOCTL_ALLOC_UNIT:
1106 
1107 		cuse_server_lock(pcs);
1108 		n = cuse_alloc_unit_by_id_locked(pcs,
1109 		    CUSE_ID_DEFAULT(0));
1110 		cuse_server_unlock(pcs);
1111 
1112 		if (n < 0)
1113 			error = ENOMEM;
1114 		else
1115 			*(int *)data = n;
1116 		break;
1117 
1118 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1119 
1120 		n = *(int *)data;
1121 
1122 		n = (n & CUSE_ID_MASK);
1123 
1124 		cuse_server_lock(pcs);
1125 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1126 		cuse_server_unlock(pcs);
1127 
1128 		if (n < 0)
1129 			error = ENOMEM;
1130 		else
1131 			*(int *)data = n;
1132 		break;
1133 
1134 	case CUSE_IOCTL_FREE_UNIT:
1135 
1136 		n = *(int *)data;
1137 
1138 		n = CUSE_ID_DEFAULT(n);
1139 
1140 		cuse_server_lock(pcs);
1141 		error = cuse_free_unit_by_id_locked(pcs, n);
1142 		cuse_server_unlock(pcs);
1143 		break;
1144 
1145 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1146 
1147 		n = *(int *)data;
1148 
1149 		cuse_server_lock(pcs);
1150 		error = cuse_free_unit_by_id_locked(pcs, n);
1151 		cuse_server_unlock(pcs);
1152 		break;
1153 
1154 	case CUSE_IOCTL_ALLOC_MEMORY:
1155 
1156 		pai = (void *)data;
1157 
1158 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1159 			error = ENOMEM;
1160 			break;
1161 		}
1162 		if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) {
1163 			error = ENOMEM;
1164 			break;
1165 		}
1166 		error = cuse_server_alloc_memory(pcs,
1167 		    pai->alloc_nr, pai->page_count);
1168 		break;
1169 
1170 	case CUSE_IOCTL_FREE_MEMORY:
1171 		pai = (void *)data;
1172 
1173 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1174 			error = ENOMEM;
1175 			break;
1176 		}
1177 		error = cuse_server_free_memory(pcs, pai->alloc_nr);
1178 		break;
1179 
1180 	case CUSE_IOCTL_GET_SIG:
1181 
1182 		cuse_server_lock(pcs);
1183 		pccmd = cuse_server_find_command(pcs, curthread);
1184 
1185 		if (pccmd != NULL) {
1186 			n = pccmd->got_signal;
1187 			pccmd->got_signal = 0;
1188 		} else {
1189 			n = 0;
1190 		}
1191 		cuse_server_unlock(pcs);
1192 
1193 		*(int *)data = n;
1194 
1195 		break;
1196 
1197 	case CUSE_IOCTL_SET_PFH:
1198 
1199 		cuse_server_lock(pcs);
1200 		pccmd = cuse_server_find_command(pcs, curthread);
1201 
1202 		if (pccmd != NULL) {
1203 			pcc = pccmd->client;
1204 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1205 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1206 			}
1207 		} else {
1208 			error = ENXIO;
1209 		}
1210 		cuse_server_unlock(pcs);
1211 		break;
1212 
1213 	case CUSE_IOCTL_CREATE_DEV:
1214 
1215 		error = priv_check(curthread, PRIV_DRIVER);
1216 		if (error)
1217 			break;
1218 
1219 		pcd = (void *)data;
1220 
1221 		/* filter input */
1222 
1223 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1224 
1225 		if (pcd->devname[0] == 0) {
1226 			error = EINVAL;
1227 			break;
1228 		}
1229 		cuse_str_filter(pcd->devname);
1230 
1231 		pcd->permissions &= 0777;
1232 
1233 		/* try to allocate a character device */
1234 
1235 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1236 
1237 		pcsd->server = pcs;
1238 
1239 		pcsd->user_dev = pcd->dev;
1240 
1241 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1242 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1243 		    pcd->permissions, "%s", pcd->devname);
1244 
1245 		if (pcsd->kern_dev == NULL) {
1246 			free(pcsd, M_CUSE);
1247 			error = ENOMEM;
1248 			break;
1249 		}
1250 		pcsd->kern_dev->si_drv1 = pcsd;
1251 
1252 		cuse_server_lock(pcs);
1253 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1254 		cuse_server_unlock(pcs);
1255 
1256 		break;
1257 
1258 	case CUSE_IOCTL_DESTROY_DEV:
1259 
1260 		error = priv_check(curthread, PRIV_DRIVER);
1261 		if (error)
1262 			break;
1263 
1264 		cuse_server_lock(pcs);
1265 
1266 		error = EINVAL;
1267 
1268 		pcsd = TAILQ_FIRST(&pcs->hdev);
1269 		while (pcsd != NULL) {
1270 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1271 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1272 				cuse_server_unlock(pcs);
1273 				cuse_server_free_dev(pcsd);
1274 				cuse_server_lock(pcs);
1275 				error = 0;
1276 				pcsd = TAILQ_FIRST(&pcs->hdev);
1277 			} else {
1278 				pcsd = TAILQ_NEXT(pcsd, entry);
1279 			}
1280 		}
1281 
1282 		cuse_server_unlock(pcs);
1283 		break;
1284 
1285 	case CUSE_IOCTL_WRITE_DATA:
1286 	case CUSE_IOCTL_READ_DATA:
1287 
1288 		cuse_server_lock(pcs);
1289 		pchk = (struct cuse_data_chunk *)data;
1290 
1291 		pccmd = cuse_server_find_command(pcs, curthread);
1292 
1293 		if (pccmd == NULL) {
1294 			error = ENXIO;	/* invalid request */
1295 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1296 			error = EFAULT;	/* NULL pointer */
1297 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1298 			error = cuse_server_ioctl_copy_locked(pcs, pccmd,
1299 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1300 		} else {
1301 			error = cuse_server_data_copy_locked(pcs, pccmd,
1302 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1303 		}
1304 		cuse_server_unlock(pcs);
1305 		break;
1306 
1307 	case CUSE_IOCTL_SELWAKEUP:
1308 		cuse_server_lock(pcs);
1309 		/*
1310 		 * We don't know which direction caused the event.
1311 		 * Wakeup both!
1312 		 */
1313 		cuse_server_wakeup_all_client_locked(pcs);
1314 		cuse_server_unlock(pcs);
1315 		break;
1316 
1317 	default:
1318 		error = ENXIO;
1319 		break;
1320 	}
1321 	return (error);
1322 }
1323 
1324 static int
1325 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1326 {
1327 	return (events & (POLLHUP | POLLPRI | POLLIN |
1328 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1329 }
1330 
1331 static int
1332 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1333     vm_size_t size, struct vm_object **object, int nprot)
1334 {
1335 	uint32_t page_nr = *offset / PAGE_SIZE;
1336 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1337 	struct cuse_memory *mem;
1338 	struct cuse_server *pcs;
1339 	int error;
1340 
1341 	error = cuse_server_get(&pcs);
1342 	if (error != 0)
1343 		return (error);
1344 
1345 	cuse_server_lock(pcs);
1346 	/* lookup memory structure */
1347 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1348 		if (mem->alloc_nr == alloc_nr)
1349 			break;
1350 	}
1351 	if (mem == NULL) {
1352 		cuse_server_unlock(pcs);
1353 		return (ENOMEM);
1354 	}
1355 	/* verify page offset */
1356 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1357 	if (page_nr >= mem->page_count) {
1358 		cuse_server_unlock(pcs);
1359 		return (ENXIO);
1360 	}
1361 	/* verify mmap size */
1362 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1363 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1364 		cuse_server_unlock(pcs);
1365 		return (EINVAL);
1366 	}
1367 	vm_object_reference(mem->object);
1368 	*object = mem->object;
1369 	cuse_server_unlock(pcs);
1370 
1371 	/* set new VM object offset to use */
1372 	*offset = page_nr * PAGE_SIZE;
1373 
1374 	/* success */
1375 	return (0);
1376 }
1377 
1378 /*------------------------------------------------------------------------*
1379  *	CUSE CLIENT PART
1380  *------------------------------------------------------------------------*/
1381 static void
1382 cuse_client_free(void *arg)
1383 {
1384 	struct cuse_client *pcc = arg;
1385 	struct cuse_client_command *pccmd;
1386 	struct cuse_server *pcs;
1387 	int n;
1388 
1389 	pcs = pcc->server;
1390 
1391 	cuse_server_lock(pcs);
1392 	cuse_client_is_closing(pcc);
1393 	TAILQ_REMOVE(&pcs->hcli, pcc, entry);
1394 	cuse_server_unlock(pcs);
1395 
1396 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1397 		pccmd = &pcc->cmds[n];
1398 
1399 		sx_destroy(&pccmd->sx);
1400 		cv_destroy(&pccmd->cv);
1401 	}
1402 
1403 	free(pcc, M_CUSE);
1404 
1405 	/* drop reference on server */
1406 	cuse_server_unref(pcs);
1407 }
1408 
1409 static int
1410 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1411 {
1412 	struct cuse_client_command *pccmd;
1413 	struct cuse_server_dev *pcsd;
1414 	struct cuse_client *pcc;
1415 	struct cuse_server *pcs;
1416 	struct cuse_dev *pcd;
1417 	int error;
1418 	int n;
1419 
1420 	pcsd = dev->si_drv1;
1421 	if (pcsd != NULL) {
1422 		pcs = pcsd->server;
1423 		pcd = pcsd->user_dev;
1424 
1425 		cuse_server_lock(pcs);
1426 		/*
1427 		 * Check that the refcount didn't wrap and that the
1428 		 * same process is not both client and server. This
1429 		 * can easily lead to deadlocks when destroying the
1430 		 * CUSE character device nodes:
1431 		 */
1432 		pcs->refs++;
1433 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1434 			/* overflow or wrong PID */
1435 			pcs->refs--;
1436 			cuse_server_unlock(pcs);
1437 			return (EINVAL);
1438 		}
1439 		cuse_server_unlock(pcs);
1440 	} else {
1441 		return (EINVAL);
1442 	}
1443 
1444 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1445 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1446 		printf("Cuse: Cannot set cdevpriv.\n");
1447 		/* drop reference on server */
1448 		cuse_server_unref(pcs);
1449 		free(pcc, M_CUSE);
1450 		return (ENOMEM);
1451 	}
1452 	pcc->fflags = fflags;
1453 	pcc->server_dev = pcsd;
1454 	pcc->server = pcs;
1455 
1456 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1457 		pccmd = &pcc->cmds[n];
1458 
1459 		pccmd->sub.dev = pcd;
1460 		pccmd->sub.command = n;
1461 		pccmd->client = pcc;
1462 
1463 		sx_init(&pccmd->sx, "cuse-client-sx");
1464 		cv_init(&pccmd->cv, "cuse-client-cv");
1465 	}
1466 
1467 	cuse_server_lock(pcs);
1468 
1469 	/* cuse_client_free() assumes that the client is listed somewhere! */
1470 	/* always enqueue */
1471 
1472 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1473 
1474 	/* check if server is closing */
1475 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1476 		error = EINVAL;
1477 	} else {
1478 		error = 0;
1479 	}
1480 	cuse_server_unlock(pcs);
1481 
1482 	if (error) {
1483 		devfs_clear_cdevpriv();	/* XXX bugfix */
1484 		return (error);
1485 	}
1486 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1487 
1488 	cuse_cmd_lock(pccmd);
1489 
1490 	cuse_server_lock(pcs);
1491 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1492 
1493 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1494 	cuse_server_unlock(pcs);
1495 
1496 	if (error < 0) {
1497 		error = cuse_convert_error(error);
1498 	} else {
1499 		error = 0;
1500 	}
1501 
1502 	cuse_cmd_unlock(pccmd);
1503 
1504 	if (error)
1505 		devfs_clear_cdevpriv();	/* XXX bugfix */
1506 
1507 	return (error);
1508 }
1509 
1510 static int
1511 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1512 {
1513 	struct cuse_client_command *pccmd;
1514 	struct cuse_client *pcc;
1515 	struct cuse_server *pcs;
1516 	int error;
1517 
1518 	error = cuse_client_get(&pcc);
1519 	if (error != 0)
1520 		return (0);
1521 
1522 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1523 	pcs = pcc->server;
1524 
1525 	cuse_cmd_lock(pccmd);
1526 
1527 	cuse_server_lock(pcs);
1528 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1529 
1530 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1531 	cuse_cmd_unlock(pccmd);
1532 
1533 	cuse_client_is_closing(pcc);
1534 	cuse_server_unlock(pcs);
1535 
1536 	return (0);
1537 }
1538 
1539 static void
1540 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1541 {
1542 	struct cuse_server *pcs = pcc->server;
1543 	int temp;
1544 
1545 	cuse_server_lock(pcs);
1546 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1547 	    CUSE_CLI_KNOTE_HAS_WRITE));
1548 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1549 	    CUSE_CLI_KNOTE_NEED_WRITE);
1550 	cuse_server_unlock(pcs);
1551 
1552 	if (temp != 0) {
1553 		/* get the latest polling state from the server */
1554 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1555 
1556 		if (temp & (POLLIN | POLLOUT)) {
1557 			cuse_server_lock(pcs);
1558 			if (temp & POLLIN)
1559 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1560 			if (temp & POLLOUT)
1561 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1562 
1563 			/* make sure the "knote" gets woken up */
1564 			cuse_server_wakeup_locked(pcc->server);
1565 			cuse_server_unlock(pcs);
1566 		}
1567 	}
1568 }
1569 
1570 static int
1571 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1572 {
1573 	struct cuse_client_command *pccmd;
1574 	struct cuse_client *pcc;
1575 	struct cuse_server *pcs;
1576 	int error;
1577 	int len;
1578 
1579 	error = cuse_client_get(&pcc);
1580 	if (error != 0)
1581 		return (error);
1582 
1583 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1584 	pcs = pcc->server;
1585 
1586 	if (uio->uio_segflg != UIO_USERSPACE) {
1587 		return (EINVAL);
1588 	}
1589 	uio->uio_segflg = UIO_NOCOPY;
1590 
1591 	cuse_cmd_lock(pccmd);
1592 
1593 	while (uio->uio_resid != 0) {
1594 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1595 			error = ENOMEM;
1596 			break;
1597 		}
1598 		len = uio->uio_iov->iov_len;
1599 
1600 		cuse_server_lock(pcs);
1601 		cuse_client_send_command_locked(pccmd,
1602 		    (uintptr_t)uio->uio_iov->iov_base,
1603 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1604 
1605 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1606 		cuse_server_unlock(pcs);
1607 
1608 		if (error < 0) {
1609 			error = cuse_convert_error(error);
1610 			break;
1611 		} else if (error == len) {
1612 			error = uiomove(NULL, error, uio);
1613 			if (error)
1614 				break;
1615 		} else {
1616 			error = uiomove(NULL, error, uio);
1617 			break;
1618 		}
1619 	}
1620 	cuse_cmd_unlock(pccmd);
1621 
1622 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1623 
1624 	if (error == EWOULDBLOCK)
1625 		cuse_client_kqfilter_poll(dev, pcc);
1626 
1627 	return (error);
1628 }
1629 
1630 static int
1631 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1632 {
1633 	struct cuse_client_command *pccmd;
1634 	struct cuse_client *pcc;
1635 	struct cuse_server *pcs;
1636 	int error;
1637 	int len;
1638 
1639 	error = cuse_client_get(&pcc);
1640 	if (error != 0)
1641 		return (error);
1642 
1643 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1644 	pcs = pcc->server;
1645 
1646 	if (uio->uio_segflg != UIO_USERSPACE) {
1647 		return (EINVAL);
1648 	}
1649 	uio->uio_segflg = UIO_NOCOPY;
1650 
1651 	cuse_cmd_lock(pccmd);
1652 
1653 	while (uio->uio_resid != 0) {
1654 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1655 			error = ENOMEM;
1656 			break;
1657 		}
1658 		len = uio->uio_iov->iov_len;
1659 
1660 		cuse_server_lock(pcs);
1661 		cuse_client_send_command_locked(pccmd,
1662 		    (uintptr_t)uio->uio_iov->iov_base,
1663 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1664 
1665 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1666 		cuse_server_unlock(pcs);
1667 
1668 		if (error < 0) {
1669 			error = cuse_convert_error(error);
1670 			break;
1671 		} else if (error == len) {
1672 			error = uiomove(NULL, error, uio);
1673 			if (error)
1674 				break;
1675 		} else {
1676 			error = uiomove(NULL, error, uio);
1677 			break;
1678 		}
1679 	}
1680 	cuse_cmd_unlock(pccmd);
1681 
1682 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1683 
1684 	if (error == EWOULDBLOCK)
1685 		cuse_client_kqfilter_poll(dev, pcc);
1686 
1687 	return (error);
1688 }
1689 
1690 int
1691 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1692     caddr_t data, int fflag, struct thread *td)
1693 {
1694 	struct cuse_client_command *pccmd;
1695 	struct cuse_client *pcc;
1696 	struct cuse_server *pcs;
1697 	int error;
1698 	int len;
1699 
1700 	error = cuse_client_get(&pcc);
1701 	if (error != 0)
1702 		return (error);
1703 
1704 	len = IOCPARM_LEN(cmd);
1705 	if (len > CUSE_BUFFER_MAX)
1706 		return (ENOMEM);
1707 
1708 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1709 	pcs = pcc->server;
1710 
1711 	cuse_cmd_lock(pccmd);
1712 
1713 	if (cmd & (IOC_IN | IOC_VOID))
1714 		memcpy(pcc->ioctl_buffer, data, len);
1715 
1716 	/*
1717 	 * When the ioctl-length is zero drivers can pass information
1718 	 * through the data pointer of the ioctl. Make sure this information
1719 	 * is forwarded to the driver.
1720 	 */
1721 
1722 	cuse_server_lock(pcs);
1723 	cuse_client_send_command_locked(pccmd,
1724 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1725 	    (unsigned long)cmd, pcc->fflags,
1726 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1727 
1728 	error = cuse_client_receive_command_locked(pccmd, data, len);
1729 	cuse_server_unlock(pcs);
1730 
1731 	if (error < 0) {
1732 		error = cuse_convert_error(error);
1733 	} else {
1734 		error = 0;
1735 	}
1736 
1737 	if (cmd & IOC_OUT)
1738 		memcpy(data, pcc->ioctl_buffer, len);
1739 
1740 	cuse_cmd_unlock(pccmd);
1741 
1742 	if (error == EWOULDBLOCK)
1743 		cuse_client_kqfilter_poll(dev, pcc);
1744 
1745 	return (error);
1746 }
1747 
1748 static int
1749 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1750 {
1751 	struct cuse_client_command *pccmd;
1752 	struct cuse_client *pcc;
1753 	struct cuse_server *pcs;
1754 	unsigned long temp;
1755 	int error;
1756 	int revents;
1757 
1758 	error = cuse_client_get(&pcc);
1759 	if (error != 0)
1760 		goto pollnval;
1761 
1762 	temp = 0;
1763 	pcs = pcc->server;
1764 
1765 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1766 		temp |= CUSE_POLL_READ;
1767 
1768 	if (events & (POLLOUT | POLLWRNORM))
1769 		temp |= CUSE_POLL_WRITE;
1770 
1771 	if (events & POLLHUP)
1772 		temp |= CUSE_POLL_ERROR;
1773 
1774 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1775 
1776 	cuse_cmd_lock(pccmd);
1777 
1778 	/* Need to selrecord() first to not loose any events. */
1779 	if (temp != 0 && td != NULL)
1780 		selrecord(td, &pcs->selinfo);
1781 
1782 	cuse_server_lock(pcs);
1783 	cuse_client_send_command_locked(pccmd,
1784 	    0, temp, pcc->fflags, IO_NDELAY);
1785 
1786 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1787 	cuse_server_unlock(pcs);
1788 
1789 	cuse_cmd_unlock(pccmd);
1790 
1791 	if (error < 0) {
1792 		goto pollnval;
1793 	} else {
1794 		revents = 0;
1795 		if (error & CUSE_POLL_READ)
1796 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1797 		if (error & CUSE_POLL_WRITE)
1798 			revents |= (events & (POLLOUT | POLLWRNORM));
1799 		if (error & CUSE_POLL_ERROR)
1800 			revents |= (events & POLLHUP);
1801 	}
1802 	return (revents);
1803 
1804 pollnval:
1805 	/* XXX many clients don't understand POLLNVAL */
1806 	return (events & (POLLHUP | POLLPRI | POLLIN |
1807 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1808 }
1809 
1810 static int
1811 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1812     vm_size_t size, struct vm_object **object, int nprot)
1813 {
1814 	uint32_t page_nr = *offset / PAGE_SIZE;
1815 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1816 	struct cuse_memory *mem;
1817 	struct cuse_client *pcc;
1818 	struct cuse_server *pcs;
1819 	int error;
1820 
1821 	error = cuse_client_get(&pcc);
1822 	if (error != 0)
1823 		return (error);
1824 
1825 	pcs = pcc->server;
1826 
1827 	cuse_server_lock(pcs);
1828 	/* lookup memory structure */
1829 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1830 		if (mem->alloc_nr == alloc_nr)
1831 			break;
1832 	}
1833 	if (mem == NULL) {
1834 		cuse_server_unlock(pcs);
1835 		return (ENOMEM);
1836 	}
1837 	/* verify page offset */
1838 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1839 	if (page_nr >= mem->page_count) {
1840 		cuse_server_unlock(pcs);
1841 		return (ENXIO);
1842 	}
1843 	/* verify mmap size */
1844 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1845 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1846 		cuse_server_unlock(pcs);
1847 		return (EINVAL);
1848 	}
1849 	vm_object_reference(mem->object);
1850 	*object = mem->object;
1851 	cuse_server_unlock(pcs);
1852 
1853 	/* set new VM object offset to use */
1854 	*offset = page_nr * PAGE_SIZE;
1855 
1856 	/* success */
1857 	return (0);
1858 }
1859 
1860 static void
1861 cuse_client_kqfilter_read_detach(struct knote *kn)
1862 {
1863 	struct cuse_client *pcc;
1864 	struct cuse_server *pcs;
1865 
1866 	pcc = kn->kn_hook;
1867 	pcs = pcc->server;
1868 
1869 	cuse_server_lock(pcs);
1870 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1871 	cuse_server_unlock(pcs);
1872 }
1873 
1874 static void
1875 cuse_client_kqfilter_write_detach(struct knote *kn)
1876 {
1877 	struct cuse_client *pcc;
1878 	struct cuse_server *pcs;
1879 
1880 	pcc = kn->kn_hook;
1881 	pcs = pcc->server;
1882 
1883 	cuse_server_lock(pcs);
1884 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1885 	cuse_server_unlock(pcs);
1886 }
1887 
1888 static int
1889 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1890 {
1891 	struct cuse_client *pcc;
1892 
1893 	pcc = kn->kn_hook;
1894 
1895 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1896 
1897 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1898 }
1899 
1900 static int
1901 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1902 {
1903 	struct cuse_client *pcc;
1904 
1905 	pcc = kn->kn_hook;
1906 
1907 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1908 
1909 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1910 }
1911 
1912 static int
1913 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1914 {
1915 	struct cuse_client *pcc;
1916 	struct cuse_server *pcs;
1917 	int error;
1918 
1919 	error = cuse_client_get(&pcc);
1920 	if (error != 0)
1921 		return (error);
1922 
1923 	pcs = pcc->server;
1924 
1925 	cuse_server_lock(pcs);
1926 	switch (kn->kn_filter) {
1927 	case EVFILT_READ:
1928 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1929 		kn->kn_hook = pcc;
1930 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1931 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1932 		break;
1933 	case EVFILT_WRITE:
1934 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1935 		kn->kn_hook = pcc;
1936 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1937 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1938 		break;
1939 	default:
1940 		error = EINVAL;
1941 		break;
1942 	}
1943 	cuse_server_unlock(pcs);
1944 
1945 	if (error == 0)
1946 		cuse_client_kqfilter_poll(dev, pcc);
1947 	return (error);
1948 }
1949