xref: /freebsd/sys/fs/cuse/cuse.c (revision eb69d1f144a6fcc765d1b9d44a5ae8082353e70b)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2017 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "opt_compat.h"
28 
29 #include <sys/stdint.h>
30 #include <sys/stddef.h>
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/conf.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/linker_set.h>
38 #include <sys/module.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/condvar.h>
42 #include <sys/sysctl.h>
43 #include <sys/unistd.h>
44 #include <sys/malloc.h>
45 #include <sys/priv.h>
46 #include <sys/uio.h>
47 #include <sys/poll.h>
48 #include <sys/sx.h>
49 #include <sys/rwlock.h>
50 #include <sys/queue.h>
51 #include <sys/fcntl.h>
52 #include <sys/proc.h>
53 #include <sys/vnode.h>
54 #include <sys/selinfo.h>
55 #include <sys/ptrace.h>
56 
57 #include <machine/bus.h>
58 
59 #include <vm/vm.h>
60 #include <vm/pmap.h>
61 #include <vm/vm_object.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_pager.h>
64 
65 #include <fs/cuse/cuse_defs.h>
66 #include <fs/cuse/cuse_ioctl.h>
67 
68 MODULE_VERSION(cuse, 1);
69 
70 /*
71  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
72  * declaring support for the cuse4bsd interface in cuse.ko:
73  */
74 MODULE_VERSION(cuse4bsd, 1);
75 
76 #ifdef FEATURE
77 FEATURE(cuse, "Userspace character devices");
78 #endif
79 
80 struct cuse_command;
81 struct cuse_server;
82 struct cuse_client;
83 
84 struct cuse_client_command {
85 	TAILQ_ENTRY(cuse_client_command) entry;
86 	struct cuse_command sub;
87 	struct sx sx;
88 	struct cv cv;
89 	struct thread *entered;
90 	struct cuse_client *client;
91 	struct proc *proc_curr;
92 	int	proc_refs;
93 	int	got_signal;
94 	int	error;
95 	int	command;
96 };
97 
98 struct cuse_memory {
99 	TAILQ_ENTRY(cuse_memory) entry;
100 	vm_object_t object;
101 	uint32_t page_count;
102 	uint32_t alloc_nr;
103 };
104 
105 struct cuse_server_dev {
106 	TAILQ_ENTRY(cuse_server_dev) entry;
107 	struct cuse_server *server;
108 	struct cdev *kern_dev;
109 	struct cuse_dev *user_dev;
110 };
111 
112 struct cuse_server {
113 	TAILQ_ENTRY(cuse_server) entry;
114 	TAILQ_HEAD(, cuse_client_command) head;
115 	TAILQ_HEAD(, cuse_server_dev) hdev;
116 	TAILQ_HEAD(, cuse_client) hcli;
117 	TAILQ_HEAD(, cuse_memory) hmem;
118 	struct cv cv;
119 	struct selinfo selinfo;
120 	pid_t	pid;
121 	int	is_closing;
122 	int	refs;
123 };
124 
125 struct cuse_client {
126 	TAILQ_ENTRY(cuse_client) entry;
127 	TAILQ_ENTRY(cuse_client) entry_ref;
128 	struct cuse_client_command cmds[CUSE_CMD_MAX];
129 	struct cuse_server *server;
130 	struct cuse_server_dev *server_dev;
131 
132 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
133 
134 	int	fflags;			/* file flags */
135 	int	cflags;			/* client flags */
136 #define	CUSE_CLI_IS_CLOSING 0x01
137 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
138 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
139 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
140 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
141 };
142 
143 #define	CUSE_CLIENT_CLOSING(pcc) \
144     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
145 
146 static	MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
147 
148 static TAILQ_HEAD(, cuse_server) cuse_server_head;
149 static struct mtx cuse_mtx;
150 static struct cdev *cuse_dev;
151 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
152 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
153 
154 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
155 static void cuse_client_kqfilter_read_detach(struct knote *kn);
156 static void cuse_client_kqfilter_write_detach(struct knote *kn);
157 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
158 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
159 
160 static struct filterops cuse_client_kqfilter_read_ops = {
161 	.f_isfd = 1,
162 	.f_detach = cuse_client_kqfilter_read_detach,
163 	.f_event = cuse_client_kqfilter_read_event,
164 };
165 
166 static struct filterops cuse_client_kqfilter_write_ops = {
167 	.f_isfd = 1,
168 	.f_detach = cuse_client_kqfilter_write_detach,
169 	.f_event = cuse_client_kqfilter_write_event,
170 };
171 
172 static d_open_t cuse_client_open;
173 static d_close_t cuse_client_close;
174 static d_ioctl_t cuse_client_ioctl;
175 static d_read_t cuse_client_read;
176 static d_write_t cuse_client_write;
177 static d_poll_t cuse_client_poll;
178 static d_mmap_single_t cuse_client_mmap_single;
179 static d_kqfilter_t cuse_client_kqfilter;
180 
181 static struct cdevsw cuse_client_devsw = {
182 	.d_version = D_VERSION,
183 	.d_open = cuse_client_open,
184 	.d_close = cuse_client_close,
185 	.d_ioctl = cuse_client_ioctl,
186 	.d_name = "cuse_client",
187 	.d_flags = D_TRACKCLOSE,
188 	.d_read = cuse_client_read,
189 	.d_write = cuse_client_write,
190 	.d_poll = cuse_client_poll,
191 	.d_mmap_single = cuse_client_mmap_single,
192 	.d_kqfilter = cuse_client_kqfilter,
193 };
194 
195 static d_open_t cuse_server_open;
196 static d_close_t cuse_server_close;
197 static d_ioctl_t cuse_server_ioctl;
198 static d_read_t cuse_server_read;
199 static d_write_t cuse_server_write;
200 static d_poll_t cuse_server_poll;
201 static d_mmap_single_t cuse_server_mmap_single;
202 
203 static struct cdevsw cuse_server_devsw = {
204 	.d_version = D_VERSION,
205 	.d_open = cuse_server_open,
206 	.d_close = cuse_server_close,
207 	.d_ioctl = cuse_server_ioctl,
208 	.d_name = "cuse_server",
209 	.d_flags = D_TRACKCLOSE,
210 	.d_read = cuse_server_read,
211 	.d_write = cuse_server_write,
212 	.d_poll = cuse_server_poll,
213 	.d_mmap_single = cuse_server_mmap_single,
214 };
215 
216 static void cuse_client_is_closing(struct cuse_client *);
217 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
218 
219 static void
220 cuse_lock(void)
221 {
222 	mtx_lock(&cuse_mtx);
223 }
224 
225 static void
226 cuse_unlock(void)
227 {
228 	mtx_unlock(&cuse_mtx);
229 }
230 
231 static void
232 cuse_cmd_lock(struct cuse_client_command *pccmd)
233 {
234 	sx_xlock(&pccmd->sx);
235 }
236 
237 static void
238 cuse_cmd_unlock(struct cuse_client_command *pccmd)
239 {
240 	sx_xunlock(&pccmd->sx);
241 }
242 
243 static void
244 cuse_kern_init(void *arg)
245 {
246 	TAILQ_INIT(&cuse_server_head);
247 
248 	mtx_init(&cuse_mtx, "cuse-mtx", NULL, MTX_DEF);
249 
250 	cuse_dev = make_dev(&cuse_server_devsw, 0,
251 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
252 
253 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
254 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
255 	    (CUSE_VERSION >> 0) & 0xFF);
256 }
257 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, 0);
258 
259 static void
260 cuse_kern_uninit(void *arg)
261 {
262 	void *ptr;
263 
264 	while (1) {
265 
266 		printf("Cuse: Please exit all /dev/cuse instances "
267 		    "and processes which have used this device.\n");
268 
269 		pause("DRAIN", 2 * hz);
270 
271 		cuse_lock();
272 		ptr = TAILQ_FIRST(&cuse_server_head);
273 		cuse_unlock();
274 
275 		if (ptr == NULL)
276 			break;
277 	}
278 
279 	if (cuse_dev != NULL)
280 		destroy_dev(cuse_dev);
281 
282 	mtx_destroy(&cuse_mtx);
283 }
284 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
285 
286 static int
287 cuse_server_get(struct cuse_server **ppcs)
288 {
289 	struct cuse_server *pcs;
290 	int error;
291 
292 	error = devfs_get_cdevpriv((void **)&pcs);
293 	if (error != 0) {
294 		*ppcs = NULL;
295 		return (error);
296 	}
297 	/* check if closing */
298 	cuse_lock();
299 	if (pcs->is_closing) {
300 		cuse_unlock();
301 		*ppcs = NULL;
302 		return (EINVAL);
303 	}
304 	cuse_unlock();
305 	*ppcs = pcs;
306 	return (0);
307 }
308 
309 static void
310 cuse_server_is_closing(struct cuse_server *pcs)
311 {
312 	struct cuse_client *pcc;
313 
314 	if (pcs->is_closing)
315 		return;
316 
317 	pcs->is_closing = 1;
318 
319 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
320 		cuse_client_is_closing(pcc);
321 	}
322 }
323 
324 static struct cuse_client_command *
325 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
326 {
327 	struct cuse_client *pcc;
328 	int n;
329 
330 	if (pcs->is_closing)
331 		goto done;
332 
333 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
334 		if (CUSE_CLIENT_CLOSING(pcc))
335 			continue;
336 		for (n = 0; n != CUSE_CMD_MAX; n++) {
337 			if (pcc->cmds[n].entered == td)
338 				return (&pcc->cmds[n]);
339 		}
340 	}
341 done:
342 	return (NULL);
343 }
344 
345 static void
346 cuse_str_filter(char *ptr)
347 {
348 	int c;
349 
350 	while (((c = *ptr) != 0)) {
351 
352 		if ((c >= 'a') && (c <= 'z')) {
353 			ptr++;
354 			continue;
355 		}
356 		if ((c >= 'A') && (c <= 'Z')) {
357 			ptr++;
358 			continue;
359 		}
360 		if ((c >= '0') && (c <= '9')) {
361 			ptr++;
362 			continue;
363 		}
364 		if ((c == '.') || (c == '_') || (c == '/')) {
365 			ptr++;
366 			continue;
367 		}
368 		*ptr = '_';
369 
370 		ptr++;
371 	}
372 }
373 
374 static int
375 cuse_convert_error(int error)
376 {
377 	;				/* indent fix */
378 	switch (error) {
379 	case CUSE_ERR_NONE:
380 		return (0);
381 	case CUSE_ERR_BUSY:
382 		return (EBUSY);
383 	case CUSE_ERR_WOULDBLOCK:
384 		return (EWOULDBLOCK);
385 	case CUSE_ERR_INVALID:
386 		return (EINVAL);
387 	case CUSE_ERR_NO_MEMORY:
388 		return (ENOMEM);
389 	case CUSE_ERR_FAULT:
390 		return (EFAULT);
391 	case CUSE_ERR_SIGNAL:
392 		return (EINTR);
393 	case CUSE_ERR_NO_DEVICE:
394 		return (ENODEV);
395 	default:
396 		return (ENXIO);
397 	}
398 }
399 
400 static void
401 cuse_vm_memory_free(struct cuse_memory *mem)
402 {
403 	/* last user is gone - free */
404 	vm_object_deallocate(mem->object);
405 
406 	/* free CUSE memory */
407 	free(mem, M_CUSE);
408 }
409 
410 static int
411 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
412     uint32_t page_count)
413 {
414 	struct cuse_memory *temp;
415 	struct cuse_memory *mem;
416 	vm_object_t object;
417 	int error;
418 
419 	mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
420 	if (mem == NULL)
421 		return (ENOMEM);
422 
423 	object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
424 	    VM_PROT_DEFAULT, 0, curthread->td_ucred);
425 	if (object == NULL) {
426 		error = ENOMEM;
427 		goto error_0;
428 	}
429 
430 	cuse_lock();
431 	/* check if allocation number already exists */
432 	TAILQ_FOREACH(temp, &pcs->hmem, entry) {
433 		if (temp->alloc_nr == alloc_nr)
434 			break;
435 	}
436 	if (temp != NULL) {
437 		cuse_unlock();
438 		error = EBUSY;
439 		goto error_1;
440 	}
441 	mem->object = object;
442 	mem->page_count = page_count;
443 	mem->alloc_nr = alloc_nr;
444 	TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
445 	cuse_unlock();
446 
447 	return (0);
448 
449 error_1:
450 	vm_object_deallocate(object);
451 error_0:
452 	free(mem, M_CUSE);
453 	return (error);
454 }
455 
456 static int
457 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
458 {
459 	struct cuse_memory *mem;
460 
461 	cuse_lock();
462 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
463 		if (mem->alloc_nr == alloc_nr)
464 			break;
465 	}
466 	if (mem == NULL) {
467 		cuse_unlock();
468 		return (EINVAL);
469 	}
470 	TAILQ_REMOVE(&pcs->hmem, mem, entry);
471 	cuse_unlock();
472 
473 	cuse_vm_memory_free(mem);
474 
475 	return (0);
476 }
477 
478 static int
479 cuse_client_get(struct cuse_client **ppcc)
480 {
481 	struct cuse_client *pcc;
482 	int error;
483 
484 	/* try to get private data */
485 	error = devfs_get_cdevpriv((void **)&pcc);
486 	if (error != 0) {
487 		*ppcc = NULL;
488 		return (error);
489 	}
490 	/* check if closing */
491 	cuse_lock();
492 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
493 		cuse_unlock();
494 		*ppcc = NULL;
495 		return (EINVAL);
496 	}
497 	cuse_unlock();
498 	*ppcc = pcc;
499 	return (0);
500 }
501 
502 static void
503 cuse_client_is_closing(struct cuse_client *pcc)
504 {
505 	struct cuse_client_command *pccmd;
506 	uint32_t n;
507 
508 	if (CUSE_CLIENT_CLOSING(pcc))
509 		return;
510 
511 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
512 	pcc->server_dev = NULL;
513 
514 	for (n = 0; n != CUSE_CMD_MAX; n++) {
515 
516 		pccmd = &pcc->cmds[n];
517 
518 		if (pccmd->entry.tqe_prev != NULL) {
519 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
520 			pccmd->entry.tqe_prev = NULL;
521 		}
522 		cv_broadcast(&pccmd->cv);
523 	}
524 }
525 
526 static void
527 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
528     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
529 {
530 	unsigned long cuse_fflags = 0;
531 	struct cuse_server *pcs;
532 
533 	if (fflags & FREAD)
534 		cuse_fflags |= CUSE_FFLAG_READ;
535 
536 	if (fflags & FWRITE)
537 		cuse_fflags |= CUSE_FFLAG_WRITE;
538 
539 	if (ioflag & IO_NDELAY)
540 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
541 
542 	pccmd->sub.fflags = cuse_fflags;
543 	pccmd->sub.data_pointer = data_ptr;
544 	pccmd->sub.argument = arg;
545 
546 	pcs = pccmd->client->server;
547 
548 	if ((pccmd->entry.tqe_prev == NULL) &&
549 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
550 	    (pcs->is_closing == 0)) {
551 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
552 		cv_signal(&pcs->cv);
553 	}
554 }
555 
556 static void
557 cuse_client_got_signal(struct cuse_client_command *pccmd)
558 {
559 	struct cuse_server *pcs;
560 
561 	pccmd->got_signal = 1;
562 
563 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
564 
565 	pcs = pccmd->client->server;
566 
567 	if ((pccmd->entry.tqe_prev == NULL) &&
568 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
569 	    (pcs->is_closing == 0)) {
570 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
571 		cv_signal(&pcs->cv);
572 	}
573 }
574 
575 static int
576 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
577     uint8_t *arg_ptr, uint32_t arg_len)
578 {
579 	int error;
580 
581 	error = 0;
582 
583 	pccmd->proc_curr = curthread->td_proc;
584 
585 	if (CUSE_CLIENT_CLOSING(pccmd->client) ||
586 	    pccmd->client->server->is_closing) {
587 		error = CUSE_ERR_OTHER;
588 		goto done;
589 	}
590 	while (pccmd->command == CUSE_CMD_NONE) {
591 		if (error != 0) {
592 			cv_wait(&pccmd->cv, &cuse_mtx);
593 		} else {
594 			error = cv_wait_sig(&pccmd->cv, &cuse_mtx);
595 
596 			if (error != 0)
597 				cuse_client_got_signal(pccmd);
598 		}
599 		if (CUSE_CLIENT_CLOSING(pccmd->client) ||
600 		    pccmd->client->server->is_closing) {
601 			error = CUSE_ERR_OTHER;
602 			goto done;
603 		}
604 	}
605 
606 	error = pccmd->error;
607 	pccmd->command = CUSE_CMD_NONE;
608 	cv_signal(&pccmd->cv);
609 
610 done:
611 
612 	/* wait until all process references are gone */
613 
614 	pccmd->proc_curr = NULL;
615 
616 	while (pccmd->proc_refs != 0)
617 		cv_wait(&pccmd->cv, &cuse_mtx);
618 
619 	return (error);
620 }
621 
622 /*------------------------------------------------------------------------*
623  *	CUSE SERVER PART
624  *------------------------------------------------------------------------*/
625 
626 static void
627 cuse_server_free_dev(struct cuse_server_dev *pcsd)
628 {
629 	struct cuse_server *pcs;
630 	struct cuse_client *pcc;
631 
632 	/* get server pointer */
633 	pcs = pcsd->server;
634 
635 	/* prevent creation of more devices */
636 	cuse_lock();
637 	if (pcsd->kern_dev != NULL)
638 		pcsd->kern_dev->si_drv1 = NULL;
639 
640 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
641 		if (pcc->server_dev == pcsd)
642 			cuse_client_is_closing(pcc);
643 	}
644 	cuse_unlock();
645 
646 	/* destroy device, if any */
647 	if (pcsd->kern_dev != NULL) {
648 		/* destroy device synchronously */
649 		destroy_dev(pcsd->kern_dev);
650 	}
651 	free(pcsd, M_CUSE);
652 }
653 
654 static void
655 cuse_server_unref(struct cuse_server *pcs)
656 {
657 	struct cuse_server_dev *pcsd;
658 	struct cuse_memory *mem;
659 
660 	cuse_lock();
661 	pcs->refs--;
662 	if (pcs->refs != 0) {
663 		cuse_unlock();
664 		return;
665 	}
666 	cuse_server_is_closing(pcs);
667 	/* final client wakeup, if any */
668 	cuse_server_wakeup_all_client_locked(pcs);
669 
670 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
671 
672 	cuse_free_unit_by_id_locked(pcs, -1);
673 
674 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
675 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
676 		cuse_unlock();
677 		cuse_server_free_dev(pcsd);
678 		cuse_lock();
679 	}
680 
681 	while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
682 		TAILQ_REMOVE(&pcs->hmem, mem, entry);
683 		cuse_unlock();
684 		cuse_vm_memory_free(mem);
685 		cuse_lock();
686 	}
687 
688 	knlist_clear(&pcs->selinfo.si_note, 1);
689 	knlist_destroy(&pcs->selinfo.si_note);
690 
691 	cuse_unlock();
692 
693 	seldrain(&pcs->selinfo);
694 
695 	cv_destroy(&pcs->cv);
696 
697 	free(pcs, M_CUSE);
698 }
699 
700 static void
701 cuse_server_free(void *arg)
702 {
703 	struct cuse_server *pcs = arg;
704 
705 	/* drop refcount */
706 	cuse_server_unref(pcs);
707 }
708 
709 static int
710 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
711 {
712 	struct cuse_server *pcs;
713 
714 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
715 	if (pcs == NULL)
716 		return (ENOMEM);
717 
718 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
719 		printf("Cuse: Cannot set cdevpriv.\n");
720 		free(pcs, M_CUSE);
721 		return (ENOMEM);
722 	}
723 	/* store current process ID */
724 	pcs->pid = curproc->p_pid;
725 
726 	TAILQ_INIT(&pcs->head);
727 	TAILQ_INIT(&pcs->hdev);
728 	TAILQ_INIT(&pcs->hcli);
729 	TAILQ_INIT(&pcs->hmem);
730 
731 	cv_init(&pcs->cv, "cuse-server-cv");
732 
733 	knlist_init_mtx(&pcs->selinfo.si_note, &cuse_mtx);
734 
735 	cuse_lock();
736 	pcs->refs++;
737 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
738 	cuse_unlock();
739 
740 	return (0);
741 }
742 
743 static int
744 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
745 {
746 	struct cuse_server *pcs;
747 	int error;
748 
749 	error = cuse_server_get(&pcs);
750 	if (error != 0)
751 		goto done;
752 
753 	cuse_lock();
754 	cuse_server_is_closing(pcs);
755 	/* final client wakeup, if any */
756 	cuse_server_wakeup_all_client_locked(pcs);
757 
758 	knlist_clear(&pcs->selinfo.si_note, 1);
759 	cuse_unlock();
760 
761 done:
762 	return (0);
763 }
764 
765 static int
766 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
767 {
768 	return (ENXIO);
769 }
770 
771 static int
772 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
773 {
774 	return (ENXIO);
775 }
776 
777 static int
778 cuse_server_ioctl_copy_locked(struct cuse_client_command *pccmd,
779     struct cuse_data_chunk *pchk, int isread)
780 {
781 	struct proc *p_proc;
782 	uint32_t offset;
783 	int error;
784 
785 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
786 
787 	if (pchk->length > CUSE_BUFFER_MAX)
788 		return (EFAULT);
789 
790 	if (offset >= CUSE_BUFFER_MAX)
791 		return (EFAULT);
792 
793 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
794 		return (EFAULT);
795 
796 	p_proc = pccmd->proc_curr;
797 	if (p_proc == NULL)
798 		return (ENXIO);
799 
800 	if (pccmd->proc_refs < 0)
801 		return (ENOMEM);
802 
803 	pccmd->proc_refs++;
804 
805 	cuse_unlock();
806 
807 	if (isread == 0) {
808 		error = copyin(
809 		    (void *)pchk->local_ptr,
810 		    pccmd->client->ioctl_buffer + offset,
811 		    pchk->length);
812 	} else {
813 		error = copyout(
814 		    pccmd->client->ioctl_buffer + offset,
815 		    (void *)pchk->local_ptr,
816 		    pchk->length);
817 	}
818 
819 	cuse_lock();
820 
821 	pccmd->proc_refs--;
822 
823 	if (pccmd->proc_curr == NULL)
824 		cv_signal(&pccmd->cv);
825 
826 	return (error);
827 }
828 
829 static int
830 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
831     struct proc *proc_d, vm_offset_t data_d, size_t len)
832 {
833 	struct thread *td;
834 	struct proc *proc_cur;
835 	int error;
836 
837 	td = curthread;
838 	proc_cur = td->td_proc;
839 
840 	if (proc_cur == proc_d) {
841 		struct iovec iov = {
842 			.iov_base = (caddr_t)data_d,
843 			.iov_len = len,
844 		};
845 		struct uio uio = {
846 			.uio_iov = &iov,
847 			.uio_iovcnt = 1,
848 			.uio_offset = (off_t)data_s,
849 			.uio_resid = len,
850 			.uio_segflg = UIO_USERSPACE,
851 			.uio_rw = UIO_READ,
852 			.uio_td = td,
853 		};
854 
855 		PHOLD(proc_s);
856 		error = proc_rwmem(proc_s, &uio);
857 		PRELE(proc_s);
858 
859 	} else if (proc_cur == proc_s) {
860 		struct iovec iov = {
861 			.iov_base = (caddr_t)data_s,
862 			.iov_len = len,
863 		};
864 		struct uio uio = {
865 			.uio_iov = &iov,
866 			.uio_iovcnt = 1,
867 			.uio_offset = (off_t)data_d,
868 			.uio_resid = len,
869 			.uio_segflg = UIO_USERSPACE,
870 			.uio_rw = UIO_WRITE,
871 			.uio_td = td,
872 		};
873 
874 		PHOLD(proc_d);
875 		error = proc_rwmem(proc_d, &uio);
876 		PRELE(proc_d);
877 	} else {
878 		error = EINVAL;
879 	}
880 	return (error);
881 }
882 
883 static int
884 cuse_server_data_copy_locked(struct cuse_client_command *pccmd,
885     struct cuse_data_chunk *pchk, int isread)
886 {
887 	struct proc *p_proc;
888 	int error;
889 
890 	p_proc = pccmd->proc_curr;
891 	if (p_proc == NULL)
892 		return (ENXIO);
893 
894 	if (pccmd->proc_refs < 0)
895 		return (ENOMEM);
896 
897 	pccmd->proc_refs++;
898 
899 	cuse_unlock();
900 
901 	if (isread == 0) {
902 		error = cuse_proc2proc_copy(
903 		    curthread->td_proc, pchk->local_ptr,
904 		    p_proc, pchk->peer_ptr,
905 		    pchk->length);
906 	} else {
907 		error = cuse_proc2proc_copy(
908 		    p_proc, pchk->peer_ptr,
909 		    curthread->td_proc, pchk->local_ptr,
910 		    pchk->length);
911 	}
912 
913 	cuse_lock();
914 
915 	pccmd->proc_refs--;
916 
917 	if (pccmd->proc_curr == NULL)
918 		cv_signal(&pccmd->cv);
919 
920 	return (error);
921 }
922 
923 static int
924 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
925 {
926 	int n;
927 	int x = 0;
928 	int match;
929 
930 	do {
931 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
932 			if (cuse_alloc_unit[n] != NULL) {
933 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
934 					continue;
935 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
936 					x++;
937 					match = 1;
938 				}
939 			}
940 		}
941 	} while (match);
942 
943 	if (x < 256) {
944 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
945 			if (cuse_alloc_unit[n] == NULL) {
946 				cuse_alloc_unit[n] = pcs;
947 				cuse_alloc_unit_id[n] = id | x;
948 				return (x);
949 			}
950 		}
951 	}
952 	return (-1);
953 }
954 
955 static void
956 cuse_server_wakeup_locked(struct cuse_server *pcs)
957 {
958 	selwakeup(&pcs->selinfo);
959 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
960 }
961 
962 static void
963 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
964 {
965 	struct cuse_client *pcc;
966 
967 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
968 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
969 		    CUSE_CLI_KNOTE_NEED_WRITE);
970 	}
971 	cuse_server_wakeup_locked(pcs);
972 }
973 
974 static int
975 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
976 {
977 	int n;
978 	int found = 0;
979 
980 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
981 		if (cuse_alloc_unit[n] == pcs) {
982 			if (cuse_alloc_unit_id[n] == id || id == -1) {
983 				cuse_alloc_unit[n] = NULL;
984 				cuse_alloc_unit_id[n] = 0;
985 				found = 1;
986 			}
987 		}
988 	}
989 
990 	return (found ? 0 : EINVAL);
991 }
992 
993 static int
994 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
995     caddr_t data, int fflag, struct thread *td)
996 {
997 	struct cuse_server *pcs;
998 	int error;
999 
1000 	error = cuse_server_get(&pcs);
1001 	if (error != 0)
1002 		return (error);
1003 
1004 	switch (cmd) {
1005 		struct cuse_client_command *pccmd;
1006 		struct cuse_client *pcc;
1007 		struct cuse_command *pcmd;
1008 		struct cuse_alloc_info *pai;
1009 		struct cuse_create_dev *pcd;
1010 		struct cuse_server_dev *pcsd;
1011 		struct cuse_data_chunk *pchk;
1012 		int n;
1013 
1014 	case CUSE_IOCTL_GET_COMMAND:
1015 		pcmd = (void *)data;
1016 
1017 		cuse_lock();
1018 
1019 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1020 			error = cv_wait_sig(&pcs->cv, &cuse_mtx);
1021 
1022 			if (pcs->is_closing)
1023 				error = ENXIO;
1024 
1025 			if (error) {
1026 				cuse_unlock();
1027 				return (error);
1028 			}
1029 		}
1030 
1031 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1032 		pccmd->entry.tqe_prev = NULL;
1033 
1034 		pccmd->entered = curthread;
1035 
1036 		*pcmd = pccmd->sub;
1037 
1038 		cuse_unlock();
1039 
1040 		break;
1041 
1042 	case CUSE_IOCTL_SYNC_COMMAND:
1043 
1044 		cuse_lock();
1045 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1046 
1047 			/* send sync command */
1048 			pccmd->entered = NULL;
1049 			pccmd->error = *(int *)data;
1050 			pccmd->command = CUSE_CMD_SYNC;
1051 
1052 			/* signal peer, if any */
1053 			cv_signal(&pccmd->cv);
1054 		}
1055 		cuse_unlock();
1056 
1057 		break;
1058 
1059 	case CUSE_IOCTL_ALLOC_UNIT:
1060 
1061 		cuse_lock();
1062 		n = cuse_alloc_unit_by_id_locked(pcs,
1063 		    CUSE_ID_DEFAULT(0));
1064 		cuse_unlock();
1065 
1066 		if (n < 0)
1067 			error = ENOMEM;
1068 		else
1069 			*(int *)data = n;
1070 		break;
1071 
1072 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1073 
1074 		n = *(int *)data;
1075 
1076 		n = (n & CUSE_ID_MASK);
1077 
1078 		cuse_lock();
1079 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1080 		cuse_unlock();
1081 
1082 		if (n < 0)
1083 			error = ENOMEM;
1084 		else
1085 			*(int *)data = n;
1086 		break;
1087 
1088 	case CUSE_IOCTL_FREE_UNIT:
1089 
1090 		n = *(int *)data;
1091 
1092 		n = CUSE_ID_DEFAULT(n);
1093 
1094 		cuse_lock();
1095 		error = cuse_free_unit_by_id_locked(pcs, n);
1096 		cuse_unlock();
1097 		break;
1098 
1099 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1100 
1101 		n = *(int *)data;
1102 
1103 		cuse_lock();
1104 		error = cuse_free_unit_by_id_locked(pcs, n);
1105 		cuse_unlock();
1106 		break;
1107 
1108 	case CUSE_IOCTL_ALLOC_MEMORY:
1109 
1110 		pai = (void *)data;
1111 
1112 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1113 			error = ENOMEM;
1114 			break;
1115 		}
1116 		if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) {
1117 			error = ENOMEM;
1118 			break;
1119 		}
1120 		error = cuse_server_alloc_memory(pcs,
1121 		    pai->alloc_nr, pai->page_count);
1122 		break;
1123 
1124 	case CUSE_IOCTL_FREE_MEMORY:
1125 		pai = (void *)data;
1126 
1127 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1128 			error = ENOMEM;
1129 			break;
1130 		}
1131 		error = cuse_server_free_memory(pcs, pai->alloc_nr);
1132 		break;
1133 
1134 	case CUSE_IOCTL_GET_SIG:
1135 
1136 		cuse_lock();
1137 		pccmd = cuse_server_find_command(pcs, curthread);
1138 
1139 		if (pccmd != NULL) {
1140 			n = pccmd->got_signal;
1141 			pccmd->got_signal = 0;
1142 		} else {
1143 			n = 0;
1144 		}
1145 		cuse_unlock();
1146 
1147 		*(int *)data = n;
1148 
1149 		break;
1150 
1151 	case CUSE_IOCTL_SET_PFH:
1152 
1153 		cuse_lock();
1154 		pccmd = cuse_server_find_command(pcs, curthread);
1155 
1156 		if (pccmd != NULL) {
1157 			pcc = pccmd->client;
1158 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1159 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1160 			}
1161 		} else {
1162 			error = ENXIO;
1163 		}
1164 		cuse_unlock();
1165 		break;
1166 
1167 	case CUSE_IOCTL_CREATE_DEV:
1168 
1169 		error = priv_check(curthread, PRIV_DRIVER);
1170 		if (error)
1171 			break;
1172 
1173 		pcd = (void *)data;
1174 
1175 		/* filter input */
1176 
1177 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1178 
1179 		if (pcd->devname[0] == 0) {
1180 			error = EINVAL;
1181 			break;
1182 		}
1183 		cuse_str_filter(pcd->devname);
1184 
1185 		pcd->permissions &= 0777;
1186 
1187 		/* try to allocate a character device */
1188 
1189 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1190 
1191 		if (pcsd == NULL) {
1192 			error = ENOMEM;
1193 			break;
1194 		}
1195 		pcsd->server = pcs;
1196 
1197 		pcsd->user_dev = pcd->dev;
1198 
1199 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1200 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1201 		    pcd->permissions, "%s", pcd->devname);
1202 
1203 		if (pcsd->kern_dev == NULL) {
1204 			free(pcsd, M_CUSE);
1205 			error = ENOMEM;
1206 			break;
1207 		}
1208 		pcsd->kern_dev->si_drv1 = pcsd;
1209 
1210 		cuse_lock();
1211 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1212 		cuse_unlock();
1213 
1214 		break;
1215 
1216 	case CUSE_IOCTL_DESTROY_DEV:
1217 
1218 		error = priv_check(curthread, PRIV_DRIVER);
1219 		if (error)
1220 			break;
1221 
1222 		cuse_lock();
1223 
1224 		error = EINVAL;
1225 
1226 		pcsd = TAILQ_FIRST(&pcs->hdev);
1227 		while (pcsd != NULL) {
1228 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1229 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1230 				cuse_unlock();
1231 				cuse_server_free_dev(pcsd);
1232 				cuse_lock();
1233 				error = 0;
1234 				pcsd = TAILQ_FIRST(&pcs->hdev);
1235 			} else {
1236 				pcsd = TAILQ_NEXT(pcsd, entry);
1237 			}
1238 		}
1239 
1240 		cuse_unlock();
1241 		break;
1242 
1243 	case CUSE_IOCTL_WRITE_DATA:
1244 	case CUSE_IOCTL_READ_DATA:
1245 
1246 		cuse_lock();
1247 		pchk = (struct cuse_data_chunk *)data;
1248 
1249 		pccmd = cuse_server_find_command(pcs, curthread);
1250 
1251 		if (pccmd == NULL) {
1252 			error = ENXIO;	/* invalid request */
1253 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1254 			error = EFAULT;	/* NULL pointer */
1255 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1256 			error = cuse_server_ioctl_copy_locked(pccmd,
1257 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1258 		} else {
1259 			error = cuse_server_data_copy_locked(pccmd,
1260 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1261 		}
1262 		cuse_unlock();
1263 		break;
1264 
1265 	case CUSE_IOCTL_SELWAKEUP:
1266 		cuse_lock();
1267 		/*
1268 		 * We don't know which direction caused the event.
1269 		 * Wakeup both!
1270 		 */
1271 		cuse_server_wakeup_all_client_locked(pcs);
1272 		cuse_unlock();
1273 		break;
1274 
1275 	default:
1276 		error = ENXIO;
1277 		break;
1278 	}
1279 	return (error);
1280 }
1281 
1282 static int
1283 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1284 {
1285 	return (events & (POLLHUP | POLLPRI | POLLIN |
1286 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1287 }
1288 
1289 static int
1290 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1291     vm_size_t size, struct vm_object **object, int nprot)
1292 {
1293 	uint32_t page_nr = *offset / PAGE_SIZE;
1294 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1295 	struct cuse_memory *mem;
1296 	struct cuse_server *pcs;
1297 	int error;
1298 
1299 	error = cuse_server_get(&pcs);
1300 	if (error != 0)
1301 		return (error);
1302 
1303 	cuse_lock();
1304 	/* lookup memory structure */
1305 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1306 		if (mem->alloc_nr == alloc_nr)
1307 			break;
1308 	}
1309 	if (mem == NULL) {
1310 		cuse_unlock();
1311 		return (ENOMEM);
1312 	}
1313 	/* verify page offset */
1314 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1315 	if (page_nr >= mem->page_count) {
1316 		cuse_unlock();
1317 		return (ENXIO);
1318 	}
1319 	/* verify mmap size */
1320 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1321 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1322 		cuse_unlock();
1323 		return (EINVAL);
1324 	}
1325 	vm_object_reference(mem->object);
1326 	*object = mem->object;
1327 	cuse_unlock();
1328 
1329 	/* set new VM object offset to use */
1330 	*offset = page_nr * PAGE_SIZE;
1331 
1332 	/* success */
1333 	return (0);
1334 }
1335 
1336 /*------------------------------------------------------------------------*
1337  *	CUSE CLIENT PART
1338  *------------------------------------------------------------------------*/
1339 static void
1340 cuse_client_free(void *arg)
1341 {
1342 	struct cuse_client *pcc = arg;
1343 	struct cuse_client_command *pccmd;
1344 	struct cuse_server *pcs;
1345 	int n;
1346 
1347 	cuse_lock();
1348 	cuse_client_is_closing(pcc);
1349 	TAILQ_REMOVE(&pcc->server->hcli, pcc, entry);
1350 	cuse_unlock();
1351 
1352 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1353 
1354 		pccmd = &pcc->cmds[n];
1355 
1356 		sx_destroy(&pccmd->sx);
1357 		cv_destroy(&pccmd->cv);
1358 	}
1359 
1360 	pcs = pcc->server;
1361 
1362 	free(pcc, M_CUSE);
1363 
1364 	/* drop reference on server */
1365 	cuse_server_unref(pcs);
1366 }
1367 
1368 static int
1369 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1370 {
1371 	struct cuse_client_command *pccmd;
1372 	struct cuse_server_dev *pcsd;
1373 	struct cuse_client *pcc;
1374 	struct cuse_server *pcs;
1375 	struct cuse_dev *pcd;
1376 	int error;
1377 	int n;
1378 
1379 	cuse_lock();
1380 	pcsd = dev->si_drv1;
1381 	if (pcsd != NULL) {
1382 		pcs = pcsd->server;
1383 		pcd = pcsd->user_dev;
1384 		/*
1385 		 * Check that the refcount didn't wrap and that the
1386 		 * same process is not both client and server. This
1387 		 * can easily lead to deadlocks when destroying the
1388 		 * CUSE character device nodes:
1389 		 */
1390 		pcs->refs++;
1391 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1392 			/* overflow or wrong PID */
1393 			pcs->refs--;
1394 			pcsd = NULL;
1395 		}
1396 	} else {
1397 		pcs = NULL;
1398 		pcd = NULL;
1399 	}
1400 	cuse_unlock();
1401 
1402 	if (pcsd == NULL)
1403 		return (EINVAL);
1404 
1405 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1406 	if (pcc == NULL) {
1407 		/* drop reference on server */
1408 		cuse_server_unref(pcs);
1409 		return (ENOMEM);
1410 	}
1411 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1412 		printf("Cuse: Cannot set cdevpriv.\n");
1413 		/* drop reference on server */
1414 		cuse_server_unref(pcs);
1415 		free(pcc, M_CUSE);
1416 		return (ENOMEM);
1417 	}
1418 	pcc->fflags = fflags;
1419 	pcc->server_dev = pcsd;
1420 	pcc->server = pcs;
1421 
1422 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1423 
1424 		pccmd = &pcc->cmds[n];
1425 
1426 		pccmd->sub.dev = pcd;
1427 		pccmd->sub.command = n;
1428 		pccmd->client = pcc;
1429 
1430 		sx_init(&pccmd->sx, "cuse-client-sx");
1431 		cv_init(&pccmd->cv, "cuse-client-cv");
1432 	}
1433 
1434 	cuse_lock();
1435 
1436 	/* cuse_client_free() assumes that the client is listed somewhere! */
1437 	/* always enqueue */
1438 
1439 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1440 
1441 	/* check if server is closing */
1442 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1443 		error = EINVAL;
1444 	} else {
1445 		error = 0;
1446 	}
1447 	cuse_unlock();
1448 
1449 	if (error) {
1450 		devfs_clear_cdevpriv();	/* XXX bugfix */
1451 		return (error);
1452 	}
1453 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1454 
1455 	cuse_cmd_lock(pccmd);
1456 
1457 	cuse_lock();
1458 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1459 
1460 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1461 	cuse_unlock();
1462 
1463 	if (error < 0) {
1464 		error = cuse_convert_error(error);
1465 	} else {
1466 		error = 0;
1467 	}
1468 
1469 	cuse_cmd_unlock(pccmd);
1470 
1471 	if (error)
1472 		devfs_clear_cdevpriv();	/* XXX bugfix */
1473 
1474 	return (error);
1475 }
1476 
1477 static int
1478 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1479 {
1480 	struct cuse_client_command *pccmd;
1481 	struct cuse_client *pcc;
1482 	int error;
1483 
1484 	error = cuse_client_get(&pcc);
1485 	if (error != 0)
1486 		return (0);
1487 
1488 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1489 
1490 	cuse_cmd_lock(pccmd);
1491 
1492 	cuse_lock();
1493 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1494 
1495 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1496 	cuse_unlock();
1497 
1498 	cuse_cmd_unlock(pccmd);
1499 
1500 	cuse_lock();
1501 	cuse_client_is_closing(pcc);
1502 	cuse_unlock();
1503 
1504 	return (0);
1505 }
1506 
1507 static void
1508 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1509 {
1510 	int temp;
1511 
1512 	cuse_lock();
1513 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1514 	    CUSE_CLI_KNOTE_HAS_WRITE));
1515 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1516 	    CUSE_CLI_KNOTE_NEED_WRITE);
1517 	cuse_unlock();
1518 
1519 	if (temp != 0) {
1520 		/* get the latest polling state from the server */
1521 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1522 
1523 		if (temp & (POLLIN | POLLOUT)) {
1524 			cuse_lock();
1525 			if (temp & POLLIN)
1526 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1527 			if (temp & POLLOUT)
1528 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1529 
1530 			/* make sure the "knote" gets woken up */
1531 			cuse_server_wakeup_locked(pcc->server);
1532 			cuse_unlock();
1533 		}
1534 	}
1535 }
1536 
1537 static int
1538 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1539 {
1540 	struct cuse_client_command *pccmd;
1541 	struct cuse_client *pcc;
1542 	int error;
1543 	int len;
1544 
1545 	error = cuse_client_get(&pcc);
1546 	if (error != 0)
1547 		return (error);
1548 
1549 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1550 
1551 	if (uio->uio_segflg != UIO_USERSPACE) {
1552 		return (EINVAL);
1553 	}
1554 	uio->uio_segflg = UIO_NOCOPY;
1555 
1556 	cuse_cmd_lock(pccmd);
1557 
1558 	while (uio->uio_resid != 0) {
1559 
1560 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1561 			error = ENOMEM;
1562 			break;
1563 		}
1564 		len = uio->uio_iov->iov_len;
1565 
1566 		cuse_lock();
1567 		cuse_client_send_command_locked(pccmd,
1568 		    (uintptr_t)uio->uio_iov->iov_base,
1569 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1570 
1571 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1572 		cuse_unlock();
1573 
1574 		if (error < 0) {
1575 			error = cuse_convert_error(error);
1576 			break;
1577 		} else if (error == len) {
1578 			error = uiomove(NULL, error, uio);
1579 			if (error)
1580 				break;
1581 		} else {
1582 			error = uiomove(NULL, error, uio);
1583 			break;
1584 		}
1585 	}
1586 	cuse_cmd_unlock(pccmd);
1587 
1588 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1589 
1590 	if (error == EWOULDBLOCK)
1591 		cuse_client_kqfilter_poll(dev, pcc);
1592 
1593 	return (error);
1594 }
1595 
1596 static int
1597 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1598 {
1599 	struct cuse_client_command *pccmd;
1600 	struct cuse_client *pcc;
1601 	int error;
1602 	int len;
1603 
1604 	error = cuse_client_get(&pcc);
1605 	if (error != 0)
1606 		return (error);
1607 
1608 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1609 
1610 	if (uio->uio_segflg != UIO_USERSPACE) {
1611 		return (EINVAL);
1612 	}
1613 	uio->uio_segflg = UIO_NOCOPY;
1614 
1615 	cuse_cmd_lock(pccmd);
1616 
1617 	while (uio->uio_resid != 0) {
1618 
1619 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1620 			error = ENOMEM;
1621 			break;
1622 		}
1623 		len = uio->uio_iov->iov_len;
1624 
1625 		cuse_lock();
1626 		cuse_client_send_command_locked(pccmd,
1627 		    (uintptr_t)uio->uio_iov->iov_base,
1628 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1629 
1630 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1631 		cuse_unlock();
1632 
1633 		if (error < 0) {
1634 			error = cuse_convert_error(error);
1635 			break;
1636 		} else if (error == len) {
1637 			error = uiomove(NULL, error, uio);
1638 			if (error)
1639 				break;
1640 		} else {
1641 			error = uiomove(NULL, error, uio);
1642 			break;
1643 		}
1644 	}
1645 	cuse_cmd_unlock(pccmd);
1646 
1647 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1648 
1649 	if (error == EWOULDBLOCK)
1650 		cuse_client_kqfilter_poll(dev, pcc);
1651 
1652 	return (error);
1653 }
1654 
1655 int
1656 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1657     caddr_t data, int fflag, struct thread *td)
1658 {
1659 	struct cuse_client_command *pccmd;
1660 	struct cuse_client *pcc;
1661 	int error;
1662 	int len;
1663 
1664 	error = cuse_client_get(&pcc);
1665 	if (error != 0)
1666 		return (error);
1667 
1668 	len = IOCPARM_LEN(cmd);
1669 	if (len > CUSE_BUFFER_MAX)
1670 		return (ENOMEM);
1671 
1672 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1673 
1674 	cuse_cmd_lock(pccmd);
1675 
1676 	if (cmd & (IOC_IN | IOC_VOID))
1677 		memcpy(pcc->ioctl_buffer, data, len);
1678 
1679 	/*
1680 	 * When the ioctl-length is zero drivers can pass information
1681 	 * through the data pointer of the ioctl. Make sure this information
1682 	 * is forwarded to the driver.
1683 	 */
1684 
1685 	cuse_lock();
1686 	cuse_client_send_command_locked(pccmd,
1687 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1688 	    (unsigned long)cmd, pcc->fflags,
1689 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1690 
1691 	error = cuse_client_receive_command_locked(pccmd, data, len);
1692 	cuse_unlock();
1693 
1694 	if (error < 0) {
1695 		error = cuse_convert_error(error);
1696 	} else {
1697 		error = 0;
1698 	}
1699 
1700 	if (cmd & IOC_OUT)
1701 		memcpy(data, pcc->ioctl_buffer, len);
1702 
1703 	cuse_cmd_unlock(pccmd);
1704 
1705 	if (error == EWOULDBLOCK)
1706 		cuse_client_kqfilter_poll(dev, pcc);
1707 
1708 	return (error);
1709 }
1710 
1711 static int
1712 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1713 {
1714 	struct cuse_client_command *pccmd;
1715 	struct cuse_client *pcc;
1716 	unsigned long temp;
1717 	int error;
1718 	int revents;
1719 
1720 	error = cuse_client_get(&pcc);
1721 	if (error != 0)
1722 		goto pollnval;
1723 
1724 	temp = 0;
1725 
1726 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1727 		temp |= CUSE_POLL_READ;
1728 
1729 	if (events & (POLLOUT | POLLWRNORM))
1730 		temp |= CUSE_POLL_WRITE;
1731 
1732 	if (events & POLLHUP)
1733 		temp |= CUSE_POLL_ERROR;
1734 
1735 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1736 
1737 	cuse_cmd_lock(pccmd);
1738 
1739 	/* Need to selrecord() first to not loose any events. */
1740 	if (temp != 0 && td != NULL)
1741 		selrecord(td, &pcc->server->selinfo);
1742 
1743 	cuse_lock();
1744 	cuse_client_send_command_locked(pccmd,
1745 	    0, temp, pcc->fflags, IO_NDELAY);
1746 
1747 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1748 	cuse_unlock();
1749 
1750 	cuse_cmd_unlock(pccmd);
1751 
1752 	if (error < 0) {
1753 		goto pollnval;
1754 	} else {
1755 		revents = 0;
1756 		if (error & CUSE_POLL_READ)
1757 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1758 		if (error & CUSE_POLL_WRITE)
1759 			revents |= (events & (POLLOUT | POLLWRNORM));
1760 		if (error & CUSE_POLL_ERROR)
1761 			revents |= (events & POLLHUP);
1762 	}
1763 	return (revents);
1764 
1765 pollnval:
1766 	/* XXX many clients don't understand POLLNVAL */
1767 	return (events & (POLLHUP | POLLPRI | POLLIN |
1768 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1769 }
1770 
1771 static int
1772 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1773     vm_size_t size, struct vm_object **object, int nprot)
1774 {
1775 	uint32_t page_nr = *offset / PAGE_SIZE;
1776 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1777 	struct cuse_memory *mem;
1778 	struct cuse_client *pcc;
1779 	int error;
1780 
1781 	error = cuse_client_get(&pcc);
1782 	if (error != 0)
1783 		return (error);
1784 
1785 	cuse_lock();
1786 	/* lookup memory structure */
1787 	TAILQ_FOREACH(mem, &pcc->server->hmem, entry) {
1788 		if (mem->alloc_nr == alloc_nr)
1789 			break;
1790 	}
1791 	if (mem == NULL) {
1792 		cuse_unlock();
1793 		return (ENOMEM);
1794 	}
1795 	/* verify page offset */
1796 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1797 	if (page_nr >= mem->page_count) {
1798 		cuse_unlock();
1799 		return (ENXIO);
1800 	}
1801 	/* verify mmap size */
1802 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1803 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1804 		cuse_unlock();
1805 		return (EINVAL);
1806 	}
1807 	vm_object_reference(mem->object);
1808 	*object = mem->object;
1809 	cuse_unlock();
1810 
1811 	/* set new VM object offset to use */
1812 	*offset = page_nr * PAGE_SIZE;
1813 
1814 	/* success */
1815 	return (0);
1816 }
1817 
1818 static void
1819 cuse_client_kqfilter_read_detach(struct knote *kn)
1820 {
1821 	struct cuse_client *pcc;
1822 
1823 	cuse_lock();
1824 	pcc = kn->kn_hook;
1825 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1826 	cuse_unlock();
1827 }
1828 
1829 static void
1830 cuse_client_kqfilter_write_detach(struct knote *kn)
1831 {
1832 	struct cuse_client *pcc;
1833 
1834 	cuse_lock();
1835 	pcc = kn->kn_hook;
1836 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1837 	cuse_unlock();
1838 }
1839 
1840 static int
1841 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1842 {
1843 	struct cuse_client *pcc;
1844 
1845 	mtx_assert(&cuse_mtx, MA_OWNED);
1846 
1847 	pcc = kn->kn_hook;
1848 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1849 }
1850 
1851 static int
1852 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1853 {
1854 	struct cuse_client *pcc;
1855 
1856 	mtx_assert(&cuse_mtx, MA_OWNED);
1857 
1858 	pcc = kn->kn_hook;
1859 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1860 }
1861 
1862 static int
1863 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1864 {
1865 	struct cuse_client *pcc;
1866 	struct cuse_server *pcs;
1867 	int error;
1868 
1869 	error = cuse_client_get(&pcc);
1870 	if (error != 0)
1871 		return (error);
1872 
1873 	cuse_lock();
1874 	pcs = pcc->server;
1875 	switch (kn->kn_filter) {
1876 	case EVFILT_READ:
1877 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1878 		kn->kn_hook = pcc;
1879 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1880 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1881 		break;
1882 	case EVFILT_WRITE:
1883 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1884 		kn->kn_hook = pcc;
1885 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1886 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1887 		break;
1888 	default:
1889 		error = EINVAL;
1890 		break;
1891 	}
1892 	cuse_unlock();
1893 
1894 	if (error == 0)
1895 		cuse_client_kqfilter_poll(dev, pcc);
1896 	return (error);
1897 }
1898