xref: /freebsd/sys/fs/cuse/cuse.c (revision 52c2bb75163559a6e2866ad374a7de67a4ea1273)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2017 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/stdint.h>
28 #include <sys/stddef.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/linker_set.h>
36 #include <sys/module.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/condvar.h>
40 #include <sys/sysctl.h>
41 #include <sys/unistd.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/uio.h>
45 #include <sys/poll.h>
46 #include <sys/sx.h>
47 #include <sys/rwlock.h>
48 #include <sys/queue.h>
49 #include <sys/fcntl.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/selinfo.h>
53 #include <sys/ptrace.h>
54 #include <sys/sysent.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63 
64 #include <fs/cuse/cuse_defs.h>
65 #include <fs/cuse/cuse_ioctl.h>
66 
67 MODULE_VERSION(cuse, 1);
68 
69 /*
70  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
71  * declaring support for the cuse4bsd interface in cuse.ko:
72  */
73 MODULE_VERSION(cuse4bsd, 1);
74 
75 #ifdef FEATURE
76 FEATURE(cuse, "Userspace character devices");
77 #endif
78 
79 struct cuse_command;
80 struct cuse_server;
81 struct cuse_client;
82 
83 struct cuse_client_command {
84 	TAILQ_ENTRY(cuse_client_command) entry;
85 	struct cuse_command sub;
86 	struct sx sx;
87 	struct cv cv;
88 	struct thread *entered;
89 	struct cuse_client *client;
90 	struct proc *proc_curr;
91 	int	proc_refs;
92 	int	got_signal;
93 	int	error;
94 	int	command;
95 };
96 
97 struct cuse_memory {
98 	TAILQ_ENTRY(cuse_memory) entry;
99 	vm_object_t object;
100 	uint32_t page_count;
101 	uint32_t alloc_nr;
102 };
103 
104 struct cuse_server_dev {
105 	TAILQ_ENTRY(cuse_server_dev) entry;
106 	struct cuse_server *server;
107 	struct cdev *kern_dev;
108 	struct cuse_dev *user_dev;
109 };
110 
111 struct cuse_server {
112 	TAILQ_ENTRY(cuse_server) entry;
113 	TAILQ_HEAD(, cuse_client_command) head;
114 	TAILQ_HEAD(, cuse_server_dev) hdev;
115 	TAILQ_HEAD(, cuse_client) hcli;
116 	TAILQ_HEAD(, cuse_memory) hmem;
117 	struct cv cv;
118 	struct selinfo selinfo;
119 	pid_t	pid;
120 	int	is_closing;
121 	int	refs;
122 };
123 
124 struct cuse_client {
125 	TAILQ_ENTRY(cuse_client) entry;
126 	TAILQ_ENTRY(cuse_client) entry_ref;
127 	struct cuse_client_command cmds[CUSE_CMD_MAX];
128 	struct cuse_server *server;
129 	struct cuse_server_dev *server_dev;
130 
131 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
132 
133 	int	fflags;			/* file flags */
134 	int	cflags;			/* client flags */
135 #define	CUSE_CLI_IS_CLOSING 0x01
136 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
137 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
138 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
139 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
140 };
141 
142 #define	CUSE_CLIENT_CLOSING(pcc) \
143     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
144 
145 static	MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
146 
147 static TAILQ_HEAD(, cuse_server) cuse_server_head;
148 static struct mtx cuse_mtx;
149 static struct cdev *cuse_dev;
150 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
151 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
152 
153 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
154 static void cuse_client_kqfilter_read_detach(struct knote *kn);
155 static void cuse_client_kqfilter_write_detach(struct knote *kn);
156 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
157 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
158 
159 static struct filterops cuse_client_kqfilter_read_ops = {
160 	.f_isfd = 1,
161 	.f_detach = cuse_client_kqfilter_read_detach,
162 	.f_event = cuse_client_kqfilter_read_event,
163 };
164 
165 static struct filterops cuse_client_kqfilter_write_ops = {
166 	.f_isfd = 1,
167 	.f_detach = cuse_client_kqfilter_write_detach,
168 	.f_event = cuse_client_kqfilter_write_event,
169 };
170 
171 static d_open_t cuse_client_open;
172 static d_close_t cuse_client_close;
173 static d_ioctl_t cuse_client_ioctl;
174 static d_read_t cuse_client_read;
175 static d_write_t cuse_client_write;
176 static d_poll_t cuse_client_poll;
177 static d_mmap_single_t cuse_client_mmap_single;
178 static d_kqfilter_t cuse_client_kqfilter;
179 
180 static struct cdevsw cuse_client_devsw = {
181 	.d_version = D_VERSION,
182 	.d_open = cuse_client_open,
183 	.d_close = cuse_client_close,
184 	.d_ioctl = cuse_client_ioctl,
185 	.d_name = "cuse_client",
186 	.d_flags = D_TRACKCLOSE,
187 	.d_read = cuse_client_read,
188 	.d_write = cuse_client_write,
189 	.d_poll = cuse_client_poll,
190 	.d_mmap_single = cuse_client_mmap_single,
191 	.d_kqfilter = cuse_client_kqfilter,
192 };
193 
194 static d_open_t cuse_server_open;
195 static d_close_t cuse_server_close;
196 static d_ioctl_t cuse_server_ioctl;
197 static d_read_t cuse_server_read;
198 static d_write_t cuse_server_write;
199 static d_poll_t cuse_server_poll;
200 static d_mmap_single_t cuse_server_mmap_single;
201 
202 static struct cdevsw cuse_server_devsw = {
203 	.d_version = D_VERSION,
204 	.d_open = cuse_server_open,
205 	.d_close = cuse_server_close,
206 	.d_ioctl = cuse_server_ioctl,
207 	.d_name = "cuse_server",
208 	.d_flags = D_TRACKCLOSE,
209 	.d_read = cuse_server_read,
210 	.d_write = cuse_server_write,
211 	.d_poll = cuse_server_poll,
212 	.d_mmap_single = cuse_server_mmap_single,
213 };
214 
215 static void cuse_client_is_closing(struct cuse_client *);
216 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
217 
218 static void
219 cuse_lock(void)
220 {
221 	mtx_lock(&cuse_mtx);
222 }
223 
224 static void
225 cuse_unlock(void)
226 {
227 	mtx_unlock(&cuse_mtx);
228 }
229 
230 static void
231 cuse_cmd_lock(struct cuse_client_command *pccmd)
232 {
233 	sx_xlock(&pccmd->sx);
234 }
235 
236 static void
237 cuse_cmd_unlock(struct cuse_client_command *pccmd)
238 {
239 	sx_xunlock(&pccmd->sx);
240 }
241 
242 static void
243 cuse_kern_init(void *arg)
244 {
245 	TAILQ_INIT(&cuse_server_head);
246 
247 	mtx_init(&cuse_mtx, "cuse-mtx", NULL, MTX_DEF);
248 
249 	cuse_dev = make_dev(&cuse_server_devsw, 0,
250 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
251 
252 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
253 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
254 	    (CUSE_VERSION >> 0) & 0xFF);
255 }
256 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
257 
258 static void
259 cuse_kern_uninit(void *arg)
260 {
261 	void *ptr;
262 
263 	while (1) {
264 
265 		printf("Cuse: Please exit all /dev/cuse instances "
266 		    "and processes which have used this device.\n");
267 
268 		pause("DRAIN", 2 * hz);
269 
270 		cuse_lock();
271 		ptr = TAILQ_FIRST(&cuse_server_head);
272 		cuse_unlock();
273 
274 		if (ptr == NULL)
275 			break;
276 	}
277 
278 	if (cuse_dev != NULL)
279 		destroy_dev(cuse_dev);
280 
281 	mtx_destroy(&cuse_mtx);
282 }
283 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
284 
285 static int
286 cuse_server_get(struct cuse_server **ppcs)
287 {
288 	struct cuse_server *pcs;
289 	int error;
290 
291 	error = devfs_get_cdevpriv((void **)&pcs);
292 	if (error != 0) {
293 		*ppcs = NULL;
294 		return (error);
295 	}
296 	/* check if closing */
297 	cuse_lock();
298 	if (pcs->is_closing) {
299 		cuse_unlock();
300 		*ppcs = NULL;
301 		return (EINVAL);
302 	}
303 	cuse_unlock();
304 	*ppcs = pcs;
305 	return (0);
306 }
307 
308 static void
309 cuse_server_is_closing(struct cuse_server *pcs)
310 {
311 	struct cuse_client *pcc;
312 
313 	if (pcs->is_closing)
314 		return;
315 
316 	pcs->is_closing = 1;
317 
318 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
319 		cuse_client_is_closing(pcc);
320 	}
321 }
322 
323 static struct cuse_client_command *
324 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
325 {
326 	struct cuse_client *pcc;
327 	int n;
328 
329 	if (pcs->is_closing)
330 		goto done;
331 
332 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
333 		if (CUSE_CLIENT_CLOSING(pcc))
334 			continue;
335 		for (n = 0; n != CUSE_CMD_MAX; n++) {
336 			if (pcc->cmds[n].entered == td)
337 				return (&pcc->cmds[n]);
338 		}
339 	}
340 done:
341 	return (NULL);
342 }
343 
344 static void
345 cuse_str_filter(char *ptr)
346 {
347 	int c;
348 
349 	while (((c = *ptr) != 0)) {
350 
351 		if ((c >= 'a') && (c <= 'z')) {
352 			ptr++;
353 			continue;
354 		}
355 		if ((c >= 'A') && (c <= 'Z')) {
356 			ptr++;
357 			continue;
358 		}
359 		if ((c >= '0') && (c <= '9')) {
360 			ptr++;
361 			continue;
362 		}
363 		if ((c == '.') || (c == '_') || (c == '/')) {
364 			ptr++;
365 			continue;
366 		}
367 		*ptr = '_';
368 
369 		ptr++;
370 	}
371 }
372 
373 static int
374 cuse_convert_error(int error)
375 {
376 	;				/* indent fix */
377 	switch (error) {
378 	case CUSE_ERR_NONE:
379 		return (0);
380 	case CUSE_ERR_BUSY:
381 		return (EBUSY);
382 	case CUSE_ERR_WOULDBLOCK:
383 		return (EWOULDBLOCK);
384 	case CUSE_ERR_INVALID:
385 		return (EINVAL);
386 	case CUSE_ERR_NO_MEMORY:
387 		return (ENOMEM);
388 	case CUSE_ERR_FAULT:
389 		return (EFAULT);
390 	case CUSE_ERR_SIGNAL:
391 		return (EINTR);
392 	case CUSE_ERR_NO_DEVICE:
393 		return (ENODEV);
394 	default:
395 		return (ENXIO);
396 	}
397 }
398 
399 static void
400 cuse_vm_memory_free(struct cuse_memory *mem)
401 {
402 	/* last user is gone - free */
403 	vm_object_deallocate(mem->object);
404 
405 	/* free CUSE memory */
406 	free(mem, M_CUSE);
407 }
408 
409 static int
410 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
411     uint32_t page_count)
412 {
413 	struct cuse_memory *temp;
414 	struct cuse_memory *mem;
415 	vm_object_t object;
416 	int error;
417 
418 	mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
419 	if (mem == NULL)
420 		return (ENOMEM);
421 
422 	object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
423 	    VM_PROT_DEFAULT, 0, curthread->td_ucred);
424 	if (object == NULL) {
425 		error = ENOMEM;
426 		goto error_0;
427 	}
428 
429 	cuse_lock();
430 	/* check if allocation number already exists */
431 	TAILQ_FOREACH(temp, &pcs->hmem, entry) {
432 		if (temp->alloc_nr == alloc_nr)
433 			break;
434 	}
435 	if (temp != NULL) {
436 		cuse_unlock();
437 		error = EBUSY;
438 		goto error_1;
439 	}
440 	mem->object = object;
441 	mem->page_count = page_count;
442 	mem->alloc_nr = alloc_nr;
443 	TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
444 	cuse_unlock();
445 
446 	return (0);
447 
448 error_1:
449 	vm_object_deallocate(object);
450 error_0:
451 	free(mem, M_CUSE);
452 	return (error);
453 }
454 
455 static int
456 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
457 {
458 	struct cuse_memory *mem;
459 
460 	cuse_lock();
461 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
462 		if (mem->alloc_nr == alloc_nr)
463 			break;
464 	}
465 	if (mem == NULL) {
466 		cuse_unlock();
467 		return (EINVAL);
468 	}
469 	TAILQ_REMOVE(&pcs->hmem, mem, entry);
470 	cuse_unlock();
471 
472 	cuse_vm_memory_free(mem);
473 
474 	return (0);
475 }
476 
477 static int
478 cuse_client_get(struct cuse_client **ppcc)
479 {
480 	struct cuse_client *pcc;
481 	int error;
482 
483 	/* try to get private data */
484 	error = devfs_get_cdevpriv((void **)&pcc);
485 	if (error != 0) {
486 		*ppcc = NULL;
487 		return (error);
488 	}
489 	/* check if closing */
490 	cuse_lock();
491 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
492 		cuse_unlock();
493 		*ppcc = NULL;
494 		return (EINVAL);
495 	}
496 	cuse_unlock();
497 	*ppcc = pcc;
498 	return (0);
499 }
500 
501 static void
502 cuse_client_is_closing(struct cuse_client *pcc)
503 {
504 	struct cuse_client_command *pccmd;
505 	uint32_t n;
506 
507 	if (CUSE_CLIENT_CLOSING(pcc))
508 		return;
509 
510 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
511 	pcc->server_dev = NULL;
512 
513 	for (n = 0; n != CUSE_CMD_MAX; n++) {
514 
515 		pccmd = &pcc->cmds[n];
516 
517 		if (pccmd->entry.tqe_prev != NULL) {
518 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
519 			pccmd->entry.tqe_prev = NULL;
520 		}
521 		cv_broadcast(&pccmd->cv);
522 	}
523 }
524 
525 static void
526 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
527     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
528 {
529 	unsigned long cuse_fflags = 0;
530 	struct cuse_server *pcs;
531 
532 	if (fflags & FREAD)
533 		cuse_fflags |= CUSE_FFLAG_READ;
534 
535 	if (fflags & FWRITE)
536 		cuse_fflags |= CUSE_FFLAG_WRITE;
537 
538 	if (ioflag & IO_NDELAY)
539 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
540 #if defined(__LP64__)
541 	if (SV_CURPROC_FLAG(SV_ILP32))
542 		cuse_fflags |= CUSE_FFLAG_COMPAT32;
543 #endif
544 	pccmd->sub.fflags = cuse_fflags;
545 	pccmd->sub.data_pointer = data_ptr;
546 	pccmd->sub.argument = arg;
547 
548 	pcs = pccmd->client->server;
549 
550 	if ((pccmd->entry.tqe_prev == NULL) &&
551 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
552 	    (pcs->is_closing == 0)) {
553 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
554 		cv_signal(&pcs->cv);
555 	}
556 }
557 
558 static void
559 cuse_client_got_signal(struct cuse_client_command *pccmd)
560 {
561 	struct cuse_server *pcs;
562 
563 	pccmd->got_signal = 1;
564 
565 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
566 
567 	pcs = pccmd->client->server;
568 
569 	if ((pccmd->entry.tqe_prev == NULL) &&
570 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
571 	    (pcs->is_closing == 0)) {
572 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
573 		cv_signal(&pcs->cv);
574 	}
575 }
576 
577 static int
578 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
579     uint8_t *arg_ptr, uint32_t arg_len)
580 {
581 	int error;
582 
583 	error = 0;
584 
585 	pccmd->proc_curr = curthread->td_proc;
586 
587 	if (CUSE_CLIENT_CLOSING(pccmd->client) ||
588 	    pccmd->client->server->is_closing) {
589 		error = CUSE_ERR_OTHER;
590 		goto done;
591 	}
592 	while (pccmd->command == CUSE_CMD_NONE) {
593 		if (error != 0) {
594 			cv_wait(&pccmd->cv, &cuse_mtx);
595 		} else {
596 			error = cv_wait_sig(&pccmd->cv, &cuse_mtx);
597 
598 			if (error != 0)
599 				cuse_client_got_signal(pccmd);
600 		}
601 		if (CUSE_CLIENT_CLOSING(pccmd->client) ||
602 		    pccmd->client->server->is_closing) {
603 			error = CUSE_ERR_OTHER;
604 			goto done;
605 		}
606 	}
607 
608 	error = pccmd->error;
609 	pccmd->command = CUSE_CMD_NONE;
610 	cv_signal(&pccmd->cv);
611 
612 done:
613 
614 	/* wait until all process references are gone */
615 
616 	pccmd->proc_curr = NULL;
617 
618 	while (pccmd->proc_refs != 0)
619 		cv_wait(&pccmd->cv, &cuse_mtx);
620 
621 	return (error);
622 }
623 
624 /*------------------------------------------------------------------------*
625  *	CUSE SERVER PART
626  *------------------------------------------------------------------------*/
627 
628 static void
629 cuse_server_free_dev(struct cuse_server_dev *pcsd)
630 {
631 	struct cuse_server *pcs;
632 	struct cuse_client *pcc;
633 
634 	/* get server pointer */
635 	pcs = pcsd->server;
636 
637 	/* prevent creation of more devices */
638 	cuse_lock();
639 	if (pcsd->kern_dev != NULL)
640 		pcsd->kern_dev->si_drv1 = NULL;
641 
642 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
643 		if (pcc->server_dev == pcsd)
644 			cuse_client_is_closing(pcc);
645 	}
646 	cuse_unlock();
647 
648 	/* destroy device, if any */
649 	if (pcsd->kern_dev != NULL) {
650 		/* destroy device synchronously */
651 		destroy_dev(pcsd->kern_dev);
652 	}
653 	free(pcsd, M_CUSE);
654 }
655 
656 static void
657 cuse_server_unref(struct cuse_server *pcs)
658 {
659 	struct cuse_server_dev *pcsd;
660 	struct cuse_memory *mem;
661 
662 	cuse_lock();
663 	pcs->refs--;
664 	if (pcs->refs != 0) {
665 		cuse_unlock();
666 		return;
667 	}
668 	cuse_server_is_closing(pcs);
669 	/* final client wakeup, if any */
670 	cuse_server_wakeup_all_client_locked(pcs);
671 
672 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
673 
674 	cuse_free_unit_by_id_locked(pcs, -1);
675 
676 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
677 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
678 		cuse_unlock();
679 		cuse_server_free_dev(pcsd);
680 		cuse_lock();
681 	}
682 
683 	while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
684 		TAILQ_REMOVE(&pcs->hmem, mem, entry);
685 		cuse_unlock();
686 		cuse_vm_memory_free(mem);
687 		cuse_lock();
688 	}
689 
690 	knlist_clear(&pcs->selinfo.si_note, 1);
691 	knlist_destroy(&pcs->selinfo.si_note);
692 
693 	cuse_unlock();
694 
695 	seldrain(&pcs->selinfo);
696 
697 	cv_destroy(&pcs->cv);
698 
699 	free(pcs, M_CUSE);
700 }
701 
702 static void
703 cuse_server_free(void *arg)
704 {
705 	struct cuse_server *pcs = arg;
706 
707 	/* drop refcount */
708 	cuse_server_unref(pcs);
709 }
710 
711 static int
712 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
713 {
714 	struct cuse_server *pcs;
715 
716 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
717 	if (pcs == NULL)
718 		return (ENOMEM);
719 
720 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
721 		printf("Cuse: Cannot set cdevpriv.\n");
722 		free(pcs, M_CUSE);
723 		return (ENOMEM);
724 	}
725 	/* store current process ID */
726 	pcs->pid = curproc->p_pid;
727 
728 	TAILQ_INIT(&pcs->head);
729 	TAILQ_INIT(&pcs->hdev);
730 	TAILQ_INIT(&pcs->hcli);
731 	TAILQ_INIT(&pcs->hmem);
732 
733 	cv_init(&pcs->cv, "cuse-server-cv");
734 
735 	knlist_init_mtx(&pcs->selinfo.si_note, &cuse_mtx);
736 
737 	cuse_lock();
738 	pcs->refs++;
739 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
740 	cuse_unlock();
741 
742 	return (0);
743 }
744 
745 static int
746 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
747 {
748 	struct cuse_server *pcs;
749 	int error;
750 
751 	error = cuse_server_get(&pcs);
752 	if (error != 0)
753 		goto done;
754 
755 	cuse_lock();
756 	cuse_server_is_closing(pcs);
757 	/* final client wakeup, if any */
758 	cuse_server_wakeup_all_client_locked(pcs);
759 
760 	knlist_clear(&pcs->selinfo.si_note, 1);
761 	cuse_unlock();
762 
763 done:
764 	return (0);
765 }
766 
767 static int
768 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
769 {
770 	return (ENXIO);
771 }
772 
773 static int
774 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
775 {
776 	return (ENXIO);
777 }
778 
779 static int
780 cuse_server_ioctl_copy_locked(struct cuse_client_command *pccmd,
781     struct cuse_data_chunk *pchk, int isread)
782 {
783 	struct proc *p_proc;
784 	uint32_t offset;
785 	int error;
786 
787 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
788 
789 	if (pchk->length > CUSE_BUFFER_MAX)
790 		return (EFAULT);
791 
792 	if (offset >= CUSE_BUFFER_MAX)
793 		return (EFAULT);
794 
795 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
796 		return (EFAULT);
797 
798 	p_proc = pccmd->proc_curr;
799 	if (p_proc == NULL)
800 		return (ENXIO);
801 
802 	if (pccmd->proc_refs < 0)
803 		return (ENOMEM);
804 
805 	pccmd->proc_refs++;
806 
807 	cuse_unlock();
808 
809 	if (isread == 0) {
810 		error = copyin(
811 		    (void *)pchk->local_ptr,
812 		    pccmd->client->ioctl_buffer + offset,
813 		    pchk->length);
814 	} else {
815 		error = copyout(
816 		    pccmd->client->ioctl_buffer + offset,
817 		    (void *)pchk->local_ptr,
818 		    pchk->length);
819 	}
820 
821 	cuse_lock();
822 
823 	pccmd->proc_refs--;
824 
825 	if (pccmd->proc_curr == NULL)
826 		cv_signal(&pccmd->cv);
827 
828 	return (error);
829 }
830 
831 static int
832 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
833     struct proc *proc_d, vm_offset_t data_d, size_t len)
834 {
835 	struct thread *td;
836 	struct proc *proc_cur;
837 	int error;
838 
839 	td = curthread;
840 	proc_cur = td->td_proc;
841 
842 	if (proc_cur == proc_d) {
843 		struct iovec iov = {
844 			.iov_base = (caddr_t)data_d,
845 			.iov_len = len,
846 		};
847 		struct uio uio = {
848 			.uio_iov = &iov,
849 			.uio_iovcnt = 1,
850 			.uio_offset = (off_t)data_s,
851 			.uio_resid = len,
852 			.uio_segflg = UIO_USERSPACE,
853 			.uio_rw = UIO_READ,
854 			.uio_td = td,
855 		};
856 
857 		PHOLD(proc_s);
858 		error = proc_rwmem(proc_s, &uio);
859 		PRELE(proc_s);
860 
861 	} else if (proc_cur == proc_s) {
862 		struct iovec iov = {
863 			.iov_base = (caddr_t)data_s,
864 			.iov_len = len,
865 		};
866 		struct uio uio = {
867 			.uio_iov = &iov,
868 			.uio_iovcnt = 1,
869 			.uio_offset = (off_t)data_d,
870 			.uio_resid = len,
871 			.uio_segflg = UIO_USERSPACE,
872 			.uio_rw = UIO_WRITE,
873 			.uio_td = td,
874 		};
875 
876 		PHOLD(proc_d);
877 		error = proc_rwmem(proc_d, &uio);
878 		PRELE(proc_d);
879 	} else {
880 		error = EINVAL;
881 	}
882 	return (error);
883 }
884 
885 static int
886 cuse_server_data_copy_locked(struct cuse_client_command *pccmd,
887     struct cuse_data_chunk *pchk, int isread)
888 {
889 	struct proc *p_proc;
890 	int error;
891 
892 	p_proc = pccmd->proc_curr;
893 	if (p_proc == NULL)
894 		return (ENXIO);
895 
896 	if (pccmd->proc_refs < 0)
897 		return (ENOMEM);
898 
899 	pccmd->proc_refs++;
900 
901 	cuse_unlock();
902 
903 	if (isread == 0) {
904 		error = cuse_proc2proc_copy(
905 		    curthread->td_proc, pchk->local_ptr,
906 		    p_proc, pchk->peer_ptr,
907 		    pchk->length);
908 	} else {
909 		error = cuse_proc2proc_copy(
910 		    p_proc, pchk->peer_ptr,
911 		    curthread->td_proc, pchk->local_ptr,
912 		    pchk->length);
913 	}
914 
915 	cuse_lock();
916 
917 	pccmd->proc_refs--;
918 
919 	if (pccmd->proc_curr == NULL)
920 		cv_signal(&pccmd->cv);
921 
922 	return (error);
923 }
924 
925 static int
926 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
927 {
928 	int n;
929 	int x = 0;
930 	int match;
931 
932 	do {
933 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
934 			if (cuse_alloc_unit[n] != NULL) {
935 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
936 					continue;
937 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
938 					x++;
939 					match = 1;
940 				}
941 			}
942 		}
943 	} while (match);
944 
945 	if (x < 256) {
946 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
947 			if (cuse_alloc_unit[n] == NULL) {
948 				cuse_alloc_unit[n] = pcs;
949 				cuse_alloc_unit_id[n] = id | x;
950 				return (x);
951 			}
952 		}
953 	}
954 	return (-1);
955 }
956 
957 static void
958 cuse_server_wakeup_locked(struct cuse_server *pcs)
959 {
960 	selwakeup(&pcs->selinfo);
961 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
962 }
963 
964 static void
965 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
966 {
967 	struct cuse_client *pcc;
968 
969 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
970 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
971 		    CUSE_CLI_KNOTE_NEED_WRITE);
972 	}
973 	cuse_server_wakeup_locked(pcs);
974 }
975 
976 static int
977 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
978 {
979 	int n;
980 	int found = 0;
981 
982 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
983 		if (cuse_alloc_unit[n] == pcs) {
984 			if (cuse_alloc_unit_id[n] == id || id == -1) {
985 				cuse_alloc_unit[n] = NULL;
986 				cuse_alloc_unit_id[n] = 0;
987 				found = 1;
988 			}
989 		}
990 	}
991 
992 	return (found ? 0 : EINVAL);
993 }
994 
995 static int
996 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
997     caddr_t data, int fflag, struct thread *td)
998 {
999 	struct cuse_server *pcs;
1000 	int error;
1001 
1002 	error = cuse_server_get(&pcs);
1003 	if (error != 0)
1004 		return (error);
1005 
1006 	switch (cmd) {
1007 		struct cuse_client_command *pccmd;
1008 		struct cuse_client *pcc;
1009 		struct cuse_command *pcmd;
1010 		struct cuse_alloc_info *pai;
1011 		struct cuse_create_dev *pcd;
1012 		struct cuse_server_dev *pcsd;
1013 		struct cuse_data_chunk *pchk;
1014 		int n;
1015 
1016 	case CUSE_IOCTL_GET_COMMAND:
1017 		pcmd = (void *)data;
1018 
1019 		cuse_lock();
1020 
1021 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1022 			error = cv_wait_sig(&pcs->cv, &cuse_mtx);
1023 
1024 			if (pcs->is_closing)
1025 				error = ENXIO;
1026 
1027 			if (error) {
1028 				cuse_unlock();
1029 				return (error);
1030 			}
1031 		}
1032 
1033 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1034 		pccmd->entry.tqe_prev = NULL;
1035 
1036 		pccmd->entered = curthread;
1037 
1038 		*pcmd = pccmd->sub;
1039 
1040 		cuse_unlock();
1041 
1042 		break;
1043 
1044 	case CUSE_IOCTL_SYNC_COMMAND:
1045 
1046 		cuse_lock();
1047 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1048 
1049 			/* send sync command */
1050 			pccmd->entered = NULL;
1051 			pccmd->error = *(int *)data;
1052 			pccmd->command = CUSE_CMD_SYNC;
1053 
1054 			/* signal peer, if any */
1055 			cv_signal(&pccmd->cv);
1056 		}
1057 		cuse_unlock();
1058 
1059 		break;
1060 
1061 	case CUSE_IOCTL_ALLOC_UNIT:
1062 
1063 		cuse_lock();
1064 		n = cuse_alloc_unit_by_id_locked(pcs,
1065 		    CUSE_ID_DEFAULT(0));
1066 		cuse_unlock();
1067 
1068 		if (n < 0)
1069 			error = ENOMEM;
1070 		else
1071 			*(int *)data = n;
1072 		break;
1073 
1074 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1075 
1076 		n = *(int *)data;
1077 
1078 		n = (n & CUSE_ID_MASK);
1079 
1080 		cuse_lock();
1081 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1082 		cuse_unlock();
1083 
1084 		if (n < 0)
1085 			error = ENOMEM;
1086 		else
1087 			*(int *)data = n;
1088 		break;
1089 
1090 	case CUSE_IOCTL_FREE_UNIT:
1091 
1092 		n = *(int *)data;
1093 
1094 		n = CUSE_ID_DEFAULT(n);
1095 
1096 		cuse_lock();
1097 		error = cuse_free_unit_by_id_locked(pcs, n);
1098 		cuse_unlock();
1099 		break;
1100 
1101 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1102 
1103 		n = *(int *)data;
1104 
1105 		cuse_lock();
1106 		error = cuse_free_unit_by_id_locked(pcs, n);
1107 		cuse_unlock();
1108 		break;
1109 
1110 	case CUSE_IOCTL_ALLOC_MEMORY:
1111 
1112 		pai = (void *)data;
1113 
1114 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1115 			error = ENOMEM;
1116 			break;
1117 		}
1118 		if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) {
1119 			error = ENOMEM;
1120 			break;
1121 		}
1122 		error = cuse_server_alloc_memory(pcs,
1123 		    pai->alloc_nr, pai->page_count);
1124 		break;
1125 
1126 	case CUSE_IOCTL_FREE_MEMORY:
1127 		pai = (void *)data;
1128 
1129 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1130 			error = ENOMEM;
1131 			break;
1132 		}
1133 		error = cuse_server_free_memory(pcs, pai->alloc_nr);
1134 		break;
1135 
1136 	case CUSE_IOCTL_GET_SIG:
1137 
1138 		cuse_lock();
1139 		pccmd = cuse_server_find_command(pcs, curthread);
1140 
1141 		if (pccmd != NULL) {
1142 			n = pccmd->got_signal;
1143 			pccmd->got_signal = 0;
1144 		} else {
1145 			n = 0;
1146 		}
1147 		cuse_unlock();
1148 
1149 		*(int *)data = n;
1150 
1151 		break;
1152 
1153 	case CUSE_IOCTL_SET_PFH:
1154 
1155 		cuse_lock();
1156 		pccmd = cuse_server_find_command(pcs, curthread);
1157 
1158 		if (pccmd != NULL) {
1159 			pcc = pccmd->client;
1160 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1161 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1162 			}
1163 		} else {
1164 			error = ENXIO;
1165 		}
1166 		cuse_unlock();
1167 		break;
1168 
1169 	case CUSE_IOCTL_CREATE_DEV:
1170 
1171 		error = priv_check(curthread, PRIV_DRIVER);
1172 		if (error)
1173 			break;
1174 
1175 		pcd = (void *)data;
1176 
1177 		/* filter input */
1178 
1179 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1180 
1181 		if (pcd->devname[0] == 0) {
1182 			error = EINVAL;
1183 			break;
1184 		}
1185 		cuse_str_filter(pcd->devname);
1186 
1187 		pcd->permissions &= 0777;
1188 
1189 		/* try to allocate a character device */
1190 
1191 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1192 
1193 		if (pcsd == NULL) {
1194 			error = ENOMEM;
1195 			break;
1196 		}
1197 		pcsd->server = pcs;
1198 
1199 		pcsd->user_dev = pcd->dev;
1200 
1201 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1202 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1203 		    pcd->permissions, "%s", pcd->devname);
1204 
1205 		if (pcsd->kern_dev == NULL) {
1206 			free(pcsd, M_CUSE);
1207 			error = ENOMEM;
1208 			break;
1209 		}
1210 		pcsd->kern_dev->si_drv1 = pcsd;
1211 
1212 		cuse_lock();
1213 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1214 		cuse_unlock();
1215 
1216 		break;
1217 
1218 	case CUSE_IOCTL_DESTROY_DEV:
1219 
1220 		error = priv_check(curthread, PRIV_DRIVER);
1221 		if (error)
1222 			break;
1223 
1224 		cuse_lock();
1225 
1226 		error = EINVAL;
1227 
1228 		pcsd = TAILQ_FIRST(&pcs->hdev);
1229 		while (pcsd != NULL) {
1230 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1231 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1232 				cuse_unlock();
1233 				cuse_server_free_dev(pcsd);
1234 				cuse_lock();
1235 				error = 0;
1236 				pcsd = TAILQ_FIRST(&pcs->hdev);
1237 			} else {
1238 				pcsd = TAILQ_NEXT(pcsd, entry);
1239 			}
1240 		}
1241 
1242 		cuse_unlock();
1243 		break;
1244 
1245 	case CUSE_IOCTL_WRITE_DATA:
1246 	case CUSE_IOCTL_READ_DATA:
1247 
1248 		cuse_lock();
1249 		pchk = (struct cuse_data_chunk *)data;
1250 
1251 		pccmd = cuse_server_find_command(pcs, curthread);
1252 
1253 		if (pccmd == NULL) {
1254 			error = ENXIO;	/* invalid request */
1255 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1256 			error = EFAULT;	/* NULL pointer */
1257 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1258 			error = cuse_server_ioctl_copy_locked(pccmd,
1259 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1260 		} else {
1261 			error = cuse_server_data_copy_locked(pccmd,
1262 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1263 		}
1264 		cuse_unlock();
1265 		break;
1266 
1267 	case CUSE_IOCTL_SELWAKEUP:
1268 		cuse_lock();
1269 		/*
1270 		 * We don't know which direction caused the event.
1271 		 * Wakeup both!
1272 		 */
1273 		cuse_server_wakeup_all_client_locked(pcs);
1274 		cuse_unlock();
1275 		break;
1276 
1277 	default:
1278 		error = ENXIO;
1279 		break;
1280 	}
1281 	return (error);
1282 }
1283 
1284 static int
1285 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1286 {
1287 	return (events & (POLLHUP | POLLPRI | POLLIN |
1288 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1289 }
1290 
1291 static int
1292 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1293     vm_size_t size, struct vm_object **object, int nprot)
1294 {
1295 	uint32_t page_nr = *offset / PAGE_SIZE;
1296 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1297 	struct cuse_memory *mem;
1298 	struct cuse_server *pcs;
1299 	int error;
1300 
1301 	error = cuse_server_get(&pcs);
1302 	if (error != 0)
1303 		return (error);
1304 
1305 	cuse_lock();
1306 	/* lookup memory structure */
1307 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1308 		if (mem->alloc_nr == alloc_nr)
1309 			break;
1310 	}
1311 	if (mem == NULL) {
1312 		cuse_unlock();
1313 		return (ENOMEM);
1314 	}
1315 	/* verify page offset */
1316 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1317 	if (page_nr >= mem->page_count) {
1318 		cuse_unlock();
1319 		return (ENXIO);
1320 	}
1321 	/* verify mmap size */
1322 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1323 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1324 		cuse_unlock();
1325 		return (EINVAL);
1326 	}
1327 	vm_object_reference(mem->object);
1328 	*object = mem->object;
1329 	cuse_unlock();
1330 
1331 	/* set new VM object offset to use */
1332 	*offset = page_nr * PAGE_SIZE;
1333 
1334 	/* success */
1335 	return (0);
1336 }
1337 
1338 /*------------------------------------------------------------------------*
1339  *	CUSE CLIENT PART
1340  *------------------------------------------------------------------------*/
1341 static void
1342 cuse_client_free(void *arg)
1343 {
1344 	struct cuse_client *pcc = arg;
1345 	struct cuse_client_command *pccmd;
1346 	struct cuse_server *pcs;
1347 	int n;
1348 
1349 	cuse_lock();
1350 	cuse_client_is_closing(pcc);
1351 	TAILQ_REMOVE(&pcc->server->hcli, pcc, entry);
1352 	cuse_unlock();
1353 
1354 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1355 
1356 		pccmd = &pcc->cmds[n];
1357 
1358 		sx_destroy(&pccmd->sx);
1359 		cv_destroy(&pccmd->cv);
1360 	}
1361 
1362 	pcs = pcc->server;
1363 
1364 	free(pcc, M_CUSE);
1365 
1366 	/* drop reference on server */
1367 	cuse_server_unref(pcs);
1368 }
1369 
1370 static int
1371 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1372 {
1373 	struct cuse_client_command *pccmd;
1374 	struct cuse_server_dev *pcsd;
1375 	struct cuse_client *pcc;
1376 	struct cuse_server *pcs;
1377 	struct cuse_dev *pcd;
1378 	int error;
1379 	int n;
1380 
1381 	cuse_lock();
1382 	pcsd = dev->si_drv1;
1383 	if (pcsd != NULL) {
1384 		pcs = pcsd->server;
1385 		pcd = pcsd->user_dev;
1386 		/*
1387 		 * Check that the refcount didn't wrap and that the
1388 		 * same process is not both client and server. This
1389 		 * can easily lead to deadlocks when destroying the
1390 		 * CUSE character device nodes:
1391 		 */
1392 		pcs->refs++;
1393 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1394 			/* overflow or wrong PID */
1395 			pcs->refs--;
1396 			pcsd = NULL;
1397 		}
1398 	} else {
1399 		pcs = NULL;
1400 		pcd = NULL;
1401 	}
1402 	cuse_unlock();
1403 
1404 	if (pcsd == NULL)
1405 		return (EINVAL);
1406 
1407 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1408 	if (pcc == NULL) {
1409 		/* drop reference on server */
1410 		cuse_server_unref(pcs);
1411 		return (ENOMEM);
1412 	}
1413 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1414 		printf("Cuse: Cannot set cdevpriv.\n");
1415 		/* drop reference on server */
1416 		cuse_server_unref(pcs);
1417 		free(pcc, M_CUSE);
1418 		return (ENOMEM);
1419 	}
1420 	pcc->fflags = fflags;
1421 	pcc->server_dev = pcsd;
1422 	pcc->server = pcs;
1423 
1424 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1425 
1426 		pccmd = &pcc->cmds[n];
1427 
1428 		pccmd->sub.dev = pcd;
1429 		pccmd->sub.command = n;
1430 		pccmd->client = pcc;
1431 
1432 		sx_init(&pccmd->sx, "cuse-client-sx");
1433 		cv_init(&pccmd->cv, "cuse-client-cv");
1434 	}
1435 
1436 	cuse_lock();
1437 
1438 	/* cuse_client_free() assumes that the client is listed somewhere! */
1439 	/* always enqueue */
1440 
1441 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1442 
1443 	/* check if server is closing */
1444 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1445 		error = EINVAL;
1446 	} else {
1447 		error = 0;
1448 	}
1449 	cuse_unlock();
1450 
1451 	if (error) {
1452 		devfs_clear_cdevpriv();	/* XXX bugfix */
1453 		return (error);
1454 	}
1455 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1456 
1457 	cuse_cmd_lock(pccmd);
1458 
1459 	cuse_lock();
1460 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1461 
1462 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1463 	cuse_unlock();
1464 
1465 	if (error < 0) {
1466 		error = cuse_convert_error(error);
1467 	} else {
1468 		error = 0;
1469 	}
1470 
1471 	cuse_cmd_unlock(pccmd);
1472 
1473 	if (error)
1474 		devfs_clear_cdevpriv();	/* XXX bugfix */
1475 
1476 	return (error);
1477 }
1478 
1479 static int
1480 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1481 {
1482 	struct cuse_client_command *pccmd;
1483 	struct cuse_client *pcc;
1484 	int error;
1485 
1486 	error = cuse_client_get(&pcc);
1487 	if (error != 0)
1488 		return (0);
1489 
1490 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1491 
1492 	cuse_cmd_lock(pccmd);
1493 
1494 	cuse_lock();
1495 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1496 
1497 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1498 	cuse_unlock();
1499 
1500 	cuse_cmd_unlock(pccmd);
1501 
1502 	cuse_lock();
1503 	cuse_client_is_closing(pcc);
1504 	cuse_unlock();
1505 
1506 	return (0);
1507 }
1508 
1509 static void
1510 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1511 {
1512 	int temp;
1513 
1514 	cuse_lock();
1515 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1516 	    CUSE_CLI_KNOTE_HAS_WRITE));
1517 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1518 	    CUSE_CLI_KNOTE_NEED_WRITE);
1519 	cuse_unlock();
1520 
1521 	if (temp != 0) {
1522 		/* get the latest polling state from the server */
1523 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1524 
1525 		if (temp & (POLLIN | POLLOUT)) {
1526 			cuse_lock();
1527 			if (temp & POLLIN)
1528 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1529 			if (temp & POLLOUT)
1530 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1531 
1532 			/* make sure the "knote" gets woken up */
1533 			cuse_server_wakeup_locked(pcc->server);
1534 			cuse_unlock();
1535 		}
1536 	}
1537 }
1538 
1539 static int
1540 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1541 {
1542 	struct cuse_client_command *pccmd;
1543 	struct cuse_client *pcc;
1544 	int error;
1545 	int len;
1546 
1547 	error = cuse_client_get(&pcc);
1548 	if (error != 0)
1549 		return (error);
1550 
1551 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1552 
1553 	if (uio->uio_segflg != UIO_USERSPACE) {
1554 		return (EINVAL);
1555 	}
1556 	uio->uio_segflg = UIO_NOCOPY;
1557 
1558 	cuse_cmd_lock(pccmd);
1559 
1560 	while (uio->uio_resid != 0) {
1561 
1562 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1563 			error = ENOMEM;
1564 			break;
1565 		}
1566 		len = uio->uio_iov->iov_len;
1567 
1568 		cuse_lock();
1569 		cuse_client_send_command_locked(pccmd,
1570 		    (uintptr_t)uio->uio_iov->iov_base,
1571 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1572 
1573 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1574 		cuse_unlock();
1575 
1576 		if (error < 0) {
1577 			error = cuse_convert_error(error);
1578 			break;
1579 		} else if (error == len) {
1580 			error = uiomove(NULL, error, uio);
1581 			if (error)
1582 				break;
1583 		} else {
1584 			error = uiomove(NULL, error, uio);
1585 			break;
1586 		}
1587 	}
1588 	cuse_cmd_unlock(pccmd);
1589 
1590 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1591 
1592 	if (error == EWOULDBLOCK)
1593 		cuse_client_kqfilter_poll(dev, pcc);
1594 
1595 	return (error);
1596 }
1597 
1598 static int
1599 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1600 {
1601 	struct cuse_client_command *pccmd;
1602 	struct cuse_client *pcc;
1603 	int error;
1604 	int len;
1605 
1606 	error = cuse_client_get(&pcc);
1607 	if (error != 0)
1608 		return (error);
1609 
1610 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1611 
1612 	if (uio->uio_segflg != UIO_USERSPACE) {
1613 		return (EINVAL);
1614 	}
1615 	uio->uio_segflg = UIO_NOCOPY;
1616 
1617 	cuse_cmd_lock(pccmd);
1618 
1619 	while (uio->uio_resid != 0) {
1620 
1621 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1622 			error = ENOMEM;
1623 			break;
1624 		}
1625 		len = uio->uio_iov->iov_len;
1626 
1627 		cuse_lock();
1628 		cuse_client_send_command_locked(pccmd,
1629 		    (uintptr_t)uio->uio_iov->iov_base,
1630 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1631 
1632 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1633 		cuse_unlock();
1634 
1635 		if (error < 0) {
1636 			error = cuse_convert_error(error);
1637 			break;
1638 		} else if (error == len) {
1639 			error = uiomove(NULL, error, uio);
1640 			if (error)
1641 				break;
1642 		} else {
1643 			error = uiomove(NULL, error, uio);
1644 			break;
1645 		}
1646 	}
1647 	cuse_cmd_unlock(pccmd);
1648 
1649 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1650 
1651 	if (error == EWOULDBLOCK)
1652 		cuse_client_kqfilter_poll(dev, pcc);
1653 
1654 	return (error);
1655 }
1656 
1657 int
1658 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1659     caddr_t data, int fflag, struct thread *td)
1660 {
1661 	struct cuse_client_command *pccmd;
1662 	struct cuse_client *pcc;
1663 	int error;
1664 	int len;
1665 
1666 	error = cuse_client_get(&pcc);
1667 	if (error != 0)
1668 		return (error);
1669 
1670 	len = IOCPARM_LEN(cmd);
1671 	if (len > CUSE_BUFFER_MAX)
1672 		return (ENOMEM);
1673 
1674 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1675 
1676 	cuse_cmd_lock(pccmd);
1677 
1678 	if (cmd & (IOC_IN | IOC_VOID))
1679 		memcpy(pcc->ioctl_buffer, data, len);
1680 
1681 	/*
1682 	 * When the ioctl-length is zero drivers can pass information
1683 	 * through the data pointer of the ioctl. Make sure this information
1684 	 * is forwarded to the driver.
1685 	 */
1686 
1687 	cuse_lock();
1688 	cuse_client_send_command_locked(pccmd,
1689 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1690 	    (unsigned long)cmd, pcc->fflags,
1691 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1692 
1693 	error = cuse_client_receive_command_locked(pccmd, data, len);
1694 	cuse_unlock();
1695 
1696 	if (error < 0) {
1697 		error = cuse_convert_error(error);
1698 	} else {
1699 		error = 0;
1700 	}
1701 
1702 	if (cmd & IOC_OUT)
1703 		memcpy(data, pcc->ioctl_buffer, len);
1704 
1705 	cuse_cmd_unlock(pccmd);
1706 
1707 	if (error == EWOULDBLOCK)
1708 		cuse_client_kqfilter_poll(dev, pcc);
1709 
1710 	return (error);
1711 }
1712 
1713 static int
1714 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1715 {
1716 	struct cuse_client_command *pccmd;
1717 	struct cuse_client *pcc;
1718 	unsigned long temp;
1719 	int error;
1720 	int revents;
1721 
1722 	error = cuse_client_get(&pcc);
1723 	if (error != 0)
1724 		goto pollnval;
1725 
1726 	temp = 0;
1727 
1728 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1729 		temp |= CUSE_POLL_READ;
1730 
1731 	if (events & (POLLOUT | POLLWRNORM))
1732 		temp |= CUSE_POLL_WRITE;
1733 
1734 	if (events & POLLHUP)
1735 		temp |= CUSE_POLL_ERROR;
1736 
1737 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1738 
1739 	cuse_cmd_lock(pccmd);
1740 
1741 	/* Need to selrecord() first to not loose any events. */
1742 	if (temp != 0 && td != NULL)
1743 		selrecord(td, &pcc->server->selinfo);
1744 
1745 	cuse_lock();
1746 	cuse_client_send_command_locked(pccmd,
1747 	    0, temp, pcc->fflags, IO_NDELAY);
1748 
1749 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1750 	cuse_unlock();
1751 
1752 	cuse_cmd_unlock(pccmd);
1753 
1754 	if (error < 0) {
1755 		goto pollnval;
1756 	} else {
1757 		revents = 0;
1758 		if (error & CUSE_POLL_READ)
1759 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1760 		if (error & CUSE_POLL_WRITE)
1761 			revents |= (events & (POLLOUT | POLLWRNORM));
1762 		if (error & CUSE_POLL_ERROR)
1763 			revents |= (events & POLLHUP);
1764 	}
1765 	return (revents);
1766 
1767 pollnval:
1768 	/* XXX many clients don't understand POLLNVAL */
1769 	return (events & (POLLHUP | POLLPRI | POLLIN |
1770 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1771 }
1772 
1773 static int
1774 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1775     vm_size_t size, struct vm_object **object, int nprot)
1776 {
1777 	uint32_t page_nr = *offset / PAGE_SIZE;
1778 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1779 	struct cuse_memory *mem;
1780 	struct cuse_client *pcc;
1781 	int error;
1782 
1783 	error = cuse_client_get(&pcc);
1784 	if (error != 0)
1785 		return (error);
1786 
1787 	cuse_lock();
1788 	/* lookup memory structure */
1789 	TAILQ_FOREACH(mem, &pcc->server->hmem, entry) {
1790 		if (mem->alloc_nr == alloc_nr)
1791 			break;
1792 	}
1793 	if (mem == NULL) {
1794 		cuse_unlock();
1795 		return (ENOMEM);
1796 	}
1797 	/* verify page offset */
1798 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1799 	if (page_nr >= mem->page_count) {
1800 		cuse_unlock();
1801 		return (ENXIO);
1802 	}
1803 	/* verify mmap size */
1804 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1805 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1806 		cuse_unlock();
1807 		return (EINVAL);
1808 	}
1809 	vm_object_reference(mem->object);
1810 	*object = mem->object;
1811 	cuse_unlock();
1812 
1813 	/* set new VM object offset to use */
1814 	*offset = page_nr * PAGE_SIZE;
1815 
1816 	/* success */
1817 	return (0);
1818 }
1819 
1820 static void
1821 cuse_client_kqfilter_read_detach(struct knote *kn)
1822 {
1823 	struct cuse_client *pcc;
1824 
1825 	cuse_lock();
1826 	pcc = kn->kn_hook;
1827 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1828 	cuse_unlock();
1829 }
1830 
1831 static void
1832 cuse_client_kqfilter_write_detach(struct knote *kn)
1833 {
1834 	struct cuse_client *pcc;
1835 
1836 	cuse_lock();
1837 	pcc = kn->kn_hook;
1838 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1839 	cuse_unlock();
1840 }
1841 
1842 static int
1843 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1844 {
1845 	struct cuse_client *pcc;
1846 
1847 	mtx_assert(&cuse_mtx, MA_OWNED);
1848 
1849 	pcc = kn->kn_hook;
1850 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1851 }
1852 
1853 static int
1854 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1855 {
1856 	struct cuse_client *pcc;
1857 
1858 	mtx_assert(&cuse_mtx, MA_OWNED);
1859 
1860 	pcc = kn->kn_hook;
1861 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1862 }
1863 
1864 static int
1865 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1866 {
1867 	struct cuse_client *pcc;
1868 	struct cuse_server *pcs;
1869 	int error;
1870 
1871 	error = cuse_client_get(&pcc);
1872 	if (error != 0)
1873 		return (error);
1874 
1875 	cuse_lock();
1876 	pcs = pcc->server;
1877 	switch (kn->kn_filter) {
1878 	case EVFILT_READ:
1879 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1880 		kn->kn_hook = pcc;
1881 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1882 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1883 		break;
1884 	case EVFILT_WRITE:
1885 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1886 		kn->kn_hook = pcc;
1887 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1888 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1889 		break;
1890 	default:
1891 		error = EINVAL;
1892 		break;
1893 	}
1894 	cuse_unlock();
1895 
1896 	if (error == 0)
1897 		cuse_client_kqfilter_poll(dev, pcc);
1898 	return (error);
1899 }
1900