xref: /freebsd/sys/fs/cuse/cuse.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*-
2  * Copyright (c) 2010-2022 Hans Petter Selasky
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include <sys/stdint.h>
27 #include <sys/stddef.h>
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/conf.h>
32 #include <sys/kernel.h>
33 #include <sys/bus.h>
34 #include <sys/linker_set.h>
35 #include <sys/module.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/condvar.h>
39 #include <sys/sysctl.h>
40 #include <sys/unistd.h>
41 #include <sys/malloc.h>
42 #include <sys/priv.h>
43 #include <sys/uio.h>
44 #include <sys/poll.h>
45 #include <sys/sx.h>
46 #include <sys/rwlock.h>
47 #include <sys/queue.h>
48 #include <sys/fcntl.h>
49 #include <sys/proc.h>
50 #include <sys/vnode.h>
51 #include <sys/selinfo.h>
52 #include <sys/ptrace.h>
53 #include <sys/sysent.h>
54 
55 #include <machine/bus.h>
56 
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_object.h>
60 #include <vm/vm_page.h>
61 #include <vm/vm_pager.h>
62 
63 #include <fs/cuse/cuse_defs.h>
64 #include <fs/cuse/cuse_ioctl.h>
65 
66 /* set this define to zero to disable this feature */
67 #define	CUSE_COPY_BUFFER_MAX \
68 	CUSE_BUFFER_MAX
69 
70 #define	CUSE_ALLOC_PAGES_MAX \
71 	(CUSE_ALLOC_BYTES_MAX / PAGE_SIZE)
72 
73 #if (CUSE_ALLOC_PAGES_MAX == 0)
74 #error "PAGE_SIZE is too big!"
75 #endif
76 
77 static int
78 cuse_modevent(module_t mod, int type, void *data)
79 {
80 	switch (type) {
81 	case MOD_LOAD:
82 	case MOD_UNLOAD:
83 		return (0);
84 	default:
85 		return (EOPNOTSUPP);
86 	}
87 }
88 
89 static moduledata_t cuse_mod = {
90 	.name = "cuse",
91 	.evhand = &cuse_modevent,
92 };
93 
94 DECLARE_MODULE(cuse, cuse_mod, SI_SUB_DEVFS, SI_ORDER_FIRST);
95 MODULE_VERSION(cuse, 1);
96 
97 /*
98  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
99  * declaring support for the cuse4bsd interface in cuse.ko:
100  */
101 MODULE_VERSION(cuse4bsd, 1);
102 
103 #ifdef FEATURE
104 FEATURE(cuse, "Userspace character devices");
105 #endif
106 
107 struct cuse_command;
108 struct cuse_server;
109 struct cuse_client;
110 
111 struct cuse_client_command {
112 	TAILQ_ENTRY(cuse_client_command) entry;
113 	struct cuse_command sub;
114 	struct sx sx;
115 	struct cv cv;
116 	struct thread *entered;
117 	struct cuse_client *client;
118 	struct proc *proc_curr;
119 	int	proc_refs;
120 	int	got_signal;
121 	int	error;
122 	int	command;
123 };
124 
125 struct cuse_memory {
126 	TAILQ_ENTRY(cuse_memory) entry;
127 	vm_object_t object;
128 	uint32_t page_count;
129 	uint32_t alloc_nr;
130 };
131 
132 struct cuse_server_dev {
133 	TAILQ_ENTRY(cuse_server_dev) entry;
134 	struct cuse_server *server;
135 	struct cdev *kern_dev;
136 	struct cuse_dev *user_dev;
137 };
138 
139 struct cuse_server {
140 	TAILQ_ENTRY(cuse_server) entry;
141 	TAILQ_HEAD(, cuse_client_command) head;
142 	TAILQ_HEAD(, cuse_server_dev) hdev;
143 	TAILQ_HEAD(, cuse_client) hcli;
144 	TAILQ_HEAD(, cuse_memory) hmem;
145 	struct mtx mtx;
146 	struct cv cv;
147 	struct selinfo selinfo;
148 	pid_t	pid;
149 	int	is_closing;
150 	int	refs;
151 };
152 
153 struct cuse_client {
154 	TAILQ_ENTRY(cuse_client) entry;
155 	TAILQ_ENTRY(cuse_client) entry_ref;
156 	struct cuse_client_command cmds[CUSE_CMD_MAX];
157 	struct cuse_server *server;
158 	struct cuse_server_dev *server_dev;
159 
160 	uintptr_t read_base;
161 	uintptr_t write_base;
162 	int read_length;
163 	int write_length;
164 	uint8_t	read_buffer[CUSE_COPY_BUFFER_MAX] __aligned(4);
165 	uint8_t	write_buffer[CUSE_COPY_BUFFER_MAX] __aligned(4);
166 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
167 
168 	int	fflags;			/* file flags */
169 	int	cflags;			/* client flags */
170 #define	CUSE_CLI_IS_CLOSING 0x01
171 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
172 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
173 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
174 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
175 };
176 
177 #define	CUSE_CLIENT_CLOSING(pcc) \
178     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
179 
180 static	MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
181 
182 static TAILQ_HEAD(, cuse_server) cuse_server_head;
183 static struct mtx cuse_global_mtx;
184 static struct cdev *cuse_dev;
185 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
186 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
187 
188 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
189 static void cuse_client_kqfilter_read_detach(struct knote *kn);
190 static void cuse_client_kqfilter_write_detach(struct knote *kn);
191 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
192 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
193 
194 static const struct filterops cuse_client_kqfilter_read_ops = {
195 	.f_isfd = 1,
196 	.f_detach = cuse_client_kqfilter_read_detach,
197 	.f_event = cuse_client_kqfilter_read_event,
198 };
199 
200 static const struct filterops cuse_client_kqfilter_write_ops = {
201 	.f_isfd = 1,
202 	.f_detach = cuse_client_kqfilter_write_detach,
203 	.f_event = cuse_client_kqfilter_write_event,
204 };
205 
206 static d_open_t cuse_client_open;
207 static d_close_t cuse_client_close;
208 static d_ioctl_t cuse_client_ioctl;
209 static d_read_t cuse_client_read;
210 static d_write_t cuse_client_write;
211 static d_poll_t cuse_client_poll;
212 static d_mmap_single_t cuse_client_mmap_single;
213 static d_kqfilter_t cuse_client_kqfilter;
214 
215 static struct cdevsw cuse_client_devsw = {
216 	.d_version = D_VERSION,
217 	.d_open = cuse_client_open,
218 	.d_close = cuse_client_close,
219 	.d_ioctl = cuse_client_ioctl,
220 	.d_name = "cuse_client",
221 	.d_flags = D_TRACKCLOSE,
222 	.d_read = cuse_client_read,
223 	.d_write = cuse_client_write,
224 	.d_poll = cuse_client_poll,
225 	.d_mmap_single = cuse_client_mmap_single,
226 	.d_kqfilter = cuse_client_kqfilter,
227 };
228 
229 static d_open_t cuse_server_open;
230 static d_close_t cuse_server_close;
231 static d_ioctl_t cuse_server_ioctl;
232 static d_read_t cuse_server_read;
233 static d_write_t cuse_server_write;
234 static d_poll_t cuse_server_poll;
235 static d_mmap_single_t cuse_server_mmap_single;
236 
237 static struct cdevsw cuse_server_devsw = {
238 	.d_version = D_VERSION,
239 	.d_open = cuse_server_open,
240 	.d_close = cuse_server_close,
241 	.d_ioctl = cuse_server_ioctl,
242 	.d_name = "cuse_server",
243 	.d_flags = D_TRACKCLOSE,
244 	.d_read = cuse_server_read,
245 	.d_write = cuse_server_write,
246 	.d_poll = cuse_server_poll,
247 	.d_mmap_single = cuse_server_mmap_single,
248 };
249 
250 static void cuse_client_is_closing(struct cuse_client *);
251 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
252 
253 static void
254 cuse_global_lock(void)
255 {
256 	mtx_lock(&cuse_global_mtx);
257 }
258 
259 static void
260 cuse_global_unlock(void)
261 {
262 	mtx_unlock(&cuse_global_mtx);
263 }
264 
265 static void
266 cuse_server_lock(struct cuse_server *pcs)
267 {
268 	mtx_lock(&pcs->mtx);
269 }
270 
271 static void
272 cuse_server_unlock(struct cuse_server *pcs)
273 {
274 	mtx_unlock(&pcs->mtx);
275 }
276 
277 static bool
278 cuse_server_is_locked(struct cuse_server *pcs)
279 {
280 	return (mtx_owned(&pcs->mtx));
281 }
282 
283 static void
284 cuse_cmd_lock(struct cuse_client_command *pccmd)
285 {
286 	sx_xlock(&pccmd->sx);
287 }
288 
289 static void
290 cuse_cmd_unlock(struct cuse_client_command *pccmd)
291 {
292 	sx_xunlock(&pccmd->sx);
293 }
294 
295 static void
296 cuse_kern_init(void *arg)
297 {
298 	TAILQ_INIT(&cuse_server_head);
299 
300 	mtx_init(&cuse_global_mtx, "cuse-global-mtx", NULL, MTX_DEF);
301 
302 	cuse_dev = make_dev(&cuse_server_devsw, 0,
303 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
304 
305 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
306 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
307 	    (CUSE_VERSION >> 0) & 0xFF);
308 }
309 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
310 
311 static void
312 cuse_kern_uninit(void *arg)
313 {
314 	void *ptr;
315 
316 	while (1) {
317 		printf("Cuse: Please exit all /dev/cuse instances "
318 		    "and processes which have used this device.\n");
319 
320 		pause("DRAIN", 2 * hz);
321 
322 		cuse_global_lock();
323 		ptr = TAILQ_FIRST(&cuse_server_head);
324 		cuse_global_unlock();
325 
326 		if (ptr == NULL)
327 			break;
328 	}
329 
330 	if (cuse_dev != NULL)
331 		destroy_dev(cuse_dev);
332 
333 	mtx_destroy(&cuse_global_mtx);
334 }
335 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, NULL);
336 
337 static int
338 cuse_server_get(struct cuse_server **ppcs)
339 {
340 	struct cuse_server *pcs;
341 	int error;
342 
343 	error = devfs_get_cdevpriv((void **)&pcs);
344 	if (error != 0) {
345 		*ppcs = NULL;
346 		return (error);
347 	}
348 	if (pcs->is_closing) {
349 		*ppcs = NULL;
350 		return (EINVAL);
351 	}
352 	*ppcs = pcs;
353 	return (0);
354 }
355 
356 static void
357 cuse_server_is_closing(struct cuse_server *pcs)
358 {
359 	struct cuse_client *pcc;
360 
361 	if (pcs->is_closing)
362 		return;
363 
364 	pcs->is_closing = 1;
365 
366 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
367 		cuse_client_is_closing(pcc);
368 	}
369 }
370 
371 static struct cuse_client_command *
372 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
373 {
374 	struct cuse_client *pcc;
375 	int n;
376 
377 	if (pcs->is_closing)
378 		goto done;
379 
380 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
381 		if (CUSE_CLIENT_CLOSING(pcc))
382 			continue;
383 		for (n = 0; n != CUSE_CMD_MAX; n++) {
384 			if (pcc->cmds[n].entered == td)
385 				return (&pcc->cmds[n]);
386 		}
387 	}
388 done:
389 	return (NULL);
390 }
391 
392 static void
393 cuse_str_filter(char *ptr)
394 {
395 	int c;
396 
397 	while (((c = *ptr) != 0)) {
398 		if ((c >= 'a') && (c <= 'z')) {
399 			ptr++;
400 			continue;
401 		}
402 		if ((c >= 'A') && (c <= 'Z')) {
403 			ptr++;
404 			continue;
405 		}
406 		if ((c >= '0') && (c <= '9')) {
407 			ptr++;
408 			continue;
409 		}
410 		if ((c == '.') || (c == '_') || (c == '/')) {
411 			ptr++;
412 			continue;
413 		}
414 		*ptr = '_';
415 
416 		ptr++;
417 	}
418 }
419 
420 static int
421 cuse_convert_error(int error)
422 {
423 	;				/* indent fix */
424 	switch (error) {
425 	case CUSE_ERR_NONE:
426 		return (0);
427 	case CUSE_ERR_BUSY:
428 		return (EBUSY);
429 	case CUSE_ERR_WOULDBLOCK:
430 		return (EWOULDBLOCK);
431 	case CUSE_ERR_INVALID:
432 		return (EINVAL);
433 	case CUSE_ERR_NO_MEMORY:
434 		return (ENOMEM);
435 	case CUSE_ERR_FAULT:
436 		return (EFAULT);
437 	case CUSE_ERR_SIGNAL:
438 		return (EINTR);
439 	case CUSE_ERR_NO_DEVICE:
440 		return (ENODEV);
441 	default:
442 		return (ENXIO);
443 	}
444 }
445 
446 static void
447 cuse_vm_memory_free(struct cuse_memory *mem)
448 {
449 	/* last user is gone - free */
450 	vm_object_deallocate(mem->object);
451 
452 	/* free CUSE memory */
453 	free(mem, M_CUSE);
454 }
455 
456 static int
457 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
458     uint32_t page_count)
459 {
460 	struct cuse_memory *temp;
461 	struct cuse_memory *mem;
462 	vm_object_t object;
463 	int error;
464 
465 	mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
466 
467 	object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
468 	    VM_PROT_DEFAULT, 0, curthread->td_ucred);
469 	if (object == NULL) {
470 		error = ENOMEM;
471 		goto error_0;
472 	}
473 
474 	cuse_server_lock(pcs);
475 	/* check if allocation number already exists */
476 	TAILQ_FOREACH(temp, &pcs->hmem, entry) {
477 		if (temp->alloc_nr == alloc_nr)
478 			break;
479 	}
480 	if (temp != NULL) {
481 		cuse_server_unlock(pcs);
482 		error = EBUSY;
483 		goto error_1;
484 	}
485 	mem->object = object;
486 	mem->page_count = page_count;
487 	mem->alloc_nr = alloc_nr;
488 	TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
489 	cuse_server_unlock(pcs);
490 
491 	return (0);
492 
493 error_1:
494 	vm_object_deallocate(object);
495 error_0:
496 	free(mem, M_CUSE);
497 	return (error);
498 }
499 
500 static int
501 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
502 {
503 	struct cuse_memory *mem;
504 
505 	cuse_server_lock(pcs);
506 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
507 		if (mem->alloc_nr == alloc_nr)
508 			break;
509 	}
510 	if (mem == NULL) {
511 		cuse_server_unlock(pcs);
512 		return (EINVAL);
513 	}
514 	TAILQ_REMOVE(&pcs->hmem, mem, entry);
515 	cuse_server_unlock(pcs);
516 
517 	cuse_vm_memory_free(mem);
518 
519 	return (0);
520 }
521 
522 static int
523 cuse_client_get(struct cuse_client **ppcc)
524 {
525 	struct cuse_client *pcc;
526 	int error;
527 
528 	/* try to get private data */
529 	error = devfs_get_cdevpriv((void **)&pcc);
530 	if (error != 0) {
531 		*ppcc = NULL;
532 		return (error);
533 	}
534 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
535 		*ppcc = NULL;
536 		return (EINVAL);
537 	}
538 	*ppcc = pcc;
539 	return (0);
540 }
541 
542 static void
543 cuse_client_is_closing(struct cuse_client *pcc)
544 {
545 	struct cuse_client_command *pccmd;
546 	uint32_t n;
547 
548 	if (CUSE_CLIENT_CLOSING(pcc))
549 		return;
550 
551 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
552 	pcc->server_dev = NULL;
553 
554 	for (n = 0; n != CUSE_CMD_MAX; n++) {
555 		pccmd = &pcc->cmds[n];
556 
557 		if (pccmd->entry.tqe_prev != NULL) {
558 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
559 			pccmd->entry.tqe_prev = NULL;
560 		}
561 		cv_broadcast(&pccmd->cv);
562 	}
563 }
564 
565 static void
566 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
567     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
568 {
569 	unsigned long cuse_fflags = 0;
570 	struct cuse_server *pcs;
571 
572 	if (fflags & FREAD)
573 		cuse_fflags |= CUSE_FFLAG_READ;
574 
575 	if (fflags & FWRITE)
576 		cuse_fflags |= CUSE_FFLAG_WRITE;
577 
578 	if (ioflag & IO_NDELAY)
579 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
580 #if defined(__LP64__)
581 	if (SV_CURPROC_FLAG(SV_ILP32))
582 		cuse_fflags |= CUSE_FFLAG_COMPAT32;
583 #endif
584 	pccmd->sub.fflags = cuse_fflags;
585 	pccmd->sub.data_pointer = data_ptr;
586 	pccmd->sub.argument = arg;
587 
588 	pcs = pccmd->client->server;
589 
590 	if ((pccmd->entry.tqe_prev == NULL) &&
591 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
592 	    (pcs->is_closing == 0)) {
593 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
594 		cv_signal(&pcs->cv);
595 	}
596 }
597 
598 static void
599 cuse_client_got_signal(struct cuse_client_command *pccmd)
600 {
601 	struct cuse_server *pcs;
602 
603 	pccmd->got_signal = 1;
604 
605 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
606 
607 	pcs = pccmd->client->server;
608 
609 	if ((pccmd->entry.tqe_prev == NULL) &&
610 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
611 	    (pcs->is_closing == 0)) {
612 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
613 		cv_signal(&pcs->cv);
614 	}
615 }
616 
617 static int
618 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
619     uint8_t *arg_ptr, uint32_t arg_len)
620 {
621 	struct cuse_server *pcs;
622 	int error;
623 
624 	pcs = pccmd->client->server;
625 	error = 0;
626 
627 	pccmd->proc_curr = curthread->td_proc;
628 
629 	if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
630 		error = CUSE_ERR_OTHER;
631 		goto done;
632 	}
633 	while (pccmd->command == CUSE_CMD_NONE) {
634 		if (error != 0) {
635 			cv_wait(&pccmd->cv, &pcs->mtx);
636 		} else {
637 			error = cv_wait_sig(&pccmd->cv, &pcs->mtx);
638 
639 			if (error != 0)
640 				cuse_client_got_signal(pccmd);
641 		}
642 		if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
643 			error = CUSE_ERR_OTHER;
644 			goto done;
645 		}
646 	}
647 
648 	error = pccmd->error;
649 	pccmd->command = CUSE_CMD_NONE;
650 	cv_signal(&pccmd->cv);
651 
652 done:
653 
654 	/* wait until all process references are gone */
655 
656 	pccmd->proc_curr = NULL;
657 
658 	while (pccmd->proc_refs != 0)
659 		cv_wait(&pccmd->cv, &pcs->mtx);
660 
661 	return (error);
662 }
663 
664 /*------------------------------------------------------------------------*
665  *	CUSE SERVER PART
666  *------------------------------------------------------------------------*/
667 
668 static void
669 cuse_server_free_dev(struct cuse_server_dev *pcsd)
670 {
671 	struct cuse_server *pcs;
672 	struct cuse_client *pcc;
673 
674 	/* get server pointer */
675 	pcs = pcsd->server;
676 
677 	/* prevent creation of more devices */
678 	cuse_server_lock(pcs);
679 	if (pcsd->kern_dev != NULL)
680 		pcsd->kern_dev->si_drv1 = NULL;
681 
682 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
683 		if (pcc->server_dev == pcsd)
684 			cuse_client_is_closing(pcc);
685 	}
686 	cuse_server_unlock(pcs);
687 
688 	/* destroy device, if any */
689 	if (pcsd->kern_dev != NULL) {
690 		/* destroy device synchronously */
691 		destroy_dev(pcsd->kern_dev);
692 	}
693 	free(pcsd, M_CUSE);
694 }
695 
696 static void
697 cuse_server_unref(struct cuse_server *pcs)
698 {
699 	struct cuse_server_dev *pcsd;
700 	struct cuse_memory *mem;
701 
702 	cuse_server_lock(pcs);
703 	if (--(pcs->refs) != 0) {
704 		cuse_server_unlock(pcs);
705 		return;
706 	}
707 	cuse_server_is_closing(pcs);
708 	/* final client wakeup, if any */
709 	cuse_server_wakeup_all_client_locked(pcs);
710 
711 	cuse_global_lock();
712 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
713 	cuse_global_unlock();
714 
715 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
716 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
717 		cuse_server_unlock(pcs);
718 		cuse_server_free_dev(pcsd);
719 		cuse_server_lock(pcs);
720 	}
721 
722 	cuse_free_unit_by_id_locked(pcs, -1);
723 
724 	while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
725 		TAILQ_REMOVE(&pcs->hmem, mem, entry);
726 		cuse_server_unlock(pcs);
727 		cuse_vm_memory_free(mem);
728 		cuse_server_lock(pcs);
729 	}
730 
731 	knlist_clear(&pcs->selinfo.si_note, 1);
732 	knlist_destroy(&pcs->selinfo.si_note);
733 
734 	cuse_server_unlock(pcs);
735 
736 	seldrain(&pcs->selinfo);
737 
738 	cv_destroy(&pcs->cv);
739 
740 	mtx_destroy(&pcs->mtx);
741 
742 	free(pcs, M_CUSE);
743 }
744 
745 static int
746 cuse_server_do_close(struct cuse_server *pcs)
747 {
748 	int retval;
749 
750 	cuse_server_lock(pcs);
751 	cuse_server_is_closing(pcs);
752 	/* final client wakeup, if any */
753 	cuse_server_wakeup_all_client_locked(pcs);
754 
755 	knlist_clear(&pcs->selinfo.si_note, 1);
756 
757 	retval = pcs->refs;
758 	cuse_server_unlock(pcs);
759 
760 	return (retval);
761 }
762 
763 static void
764 cuse_server_free(void *arg)
765 {
766 	struct cuse_server *pcs = arg;
767 
768 	/*
769 	 * The final server unref should be done by the server thread
770 	 * to prevent deadlock in the client cdevpriv destructor,
771 	 * which cannot destroy itself.
772 	 */
773 	while (cuse_server_do_close(pcs) != 1)
774 		pause("W", hz);
775 
776 	/* drop final refcount */
777 	cuse_server_unref(pcs);
778 }
779 
780 static int
781 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
782 {
783 	struct cuse_server *pcs;
784 
785 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
786 
787 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
788 		printf("Cuse: Cannot set cdevpriv.\n");
789 		free(pcs, M_CUSE);
790 		return (ENOMEM);
791 	}
792 	/* store current process ID */
793 	pcs->pid = curproc->p_pid;
794 
795 	TAILQ_INIT(&pcs->head);
796 	TAILQ_INIT(&pcs->hdev);
797 	TAILQ_INIT(&pcs->hcli);
798 	TAILQ_INIT(&pcs->hmem);
799 
800 	cv_init(&pcs->cv, "cuse-server-cv");
801 
802 	mtx_init(&pcs->mtx, "cuse-server-mtx", NULL, MTX_DEF);
803 
804 	knlist_init_mtx(&pcs->selinfo.si_note, &pcs->mtx);
805 
806 	cuse_global_lock();
807 	pcs->refs++;
808 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
809 	cuse_global_unlock();
810 
811 	return (0);
812 }
813 
814 static int
815 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
816 {
817 	struct cuse_server *pcs;
818 
819 	if (cuse_server_get(&pcs) == 0)
820 		cuse_server_do_close(pcs);
821 
822 	return (0);
823 }
824 
825 static int
826 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
827 {
828 	return (ENXIO);
829 }
830 
831 static int
832 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
833 {
834 	return (ENXIO);
835 }
836 
837 static int
838 cuse_server_ioctl_copy_locked(struct cuse_server *pcs,
839     struct cuse_client_command *pccmd,
840     struct cuse_data_chunk *pchk, bool isread)
841 {
842 	struct proc *p_proc;
843 	uint32_t offset;
844 	int error;
845 
846 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
847 
848 	if (pchk->length > CUSE_BUFFER_MAX)
849 		return (EFAULT);
850 
851 	if (offset >= CUSE_BUFFER_MAX)
852 		return (EFAULT);
853 
854 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
855 		return (EFAULT);
856 
857 	p_proc = pccmd->proc_curr;
858 	if (p_proc == NULL)
859 		return (ENXIO);
860 
861 	if (pccmd->proc_refs < 0)
862 		return (ENOMEM);
863 
864 	pccmd->proc_refs++;
865 
866 	cuse_server_unlock(pcs);
867 
868 	if (!isread) {
869 		error = copyin(
870 		    (void *)pchk->local_ptr,
871 		    pccmd->client->ioctl_buffer + offset,
872 		    pchk->length);
873 	} else {
874 		error = copyout(
875 		    pccmd->client->ioctl_buffer + offset,
876 		    (void *)pchk->local_ptr,
877 		    pchk->length);
878 	}
879 
880 	cuse_server_lock(pcs);
881 
882 	pccmd->proc_refs--;
883 
884 	if (pccmd->proc_curr == NULL)
885 		cv_signal(&pccmd->cv);
886 
887 	return (error);
888 }
889 
890 static int
891 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
892     struct proc *proc_d, vm_offset_t data_d, size_t len)
893 {
894 	struct thread *td;
895 	struct proc *proc_cur;
896 	int error;
897 
898 	td = curthread;
899 	proc_cur = td->td_proc;
900 
901 	if (proc_cur == proc_d) {
902 		struct iovec iov = {
903 			.iov_base = (caddr_t)data_d,
904 			.iov_len = len,
905 		};
906 		struct uio uio = {
907 			.uio_iov = &iov,
908 			.uio_iovcnt = 1,
909 			.uio_offset = (off_t)data_s,
910 			.uio_resid = len,
911 			.uio_segflg = UIO_USERSPACE,
912 			.uio_rw = UIO_READ,
913 			.uio_td = td,
914 		};
915 
916 		PHOLD(proc_s);
917 		error = proc_rwmem(proc_s, &uio);
918 		PRELE(proc_s);
919 
920 	} else if (proc_cur == proc_s) {
921 		struct iovec iov = {
922 			.iov_base = (caddr_t)data_s,
923 			.iov_len = len,
924 		};
925 		struct uio uio = {
926 			.uio_iov = &iov,
927 			.uio_iovcnt = 1,
928 			.uio_offset = (off_t)data_d,
929 			.uio_resid = len,
930 			.uio_segflg = UIO_USERSPACE,
931 			.uio_rw = UIO_WRITE,
932 			.uio_td = td,
933 		};
934 
935 		PHOLD(proc_d);
936 		error = proc_rwmem(proc_d, &uio);
937 		PRELE(proc_d);
938 	} else {
939 		error = EINVAL;
940 	}
941 	return (error);
942 }
943 
944 static int
945 cuse_server_data_copy_locked(struct cuse_server *pcs,
946     struct cuse_client_command *pccmd,
947     struct cuse_data_chunk *pchk, bool isread)
948 {
949 	struct proc *p_proc;
950 	int error;
951 
952 	p_proc = pccmd->proc_curr;
953 	if (p_proc == NULL)
954 		return (ENXIO);
955 
956 	if (pccmd->proc_refs < 0)
957 		return (ENOMEM);
958 
959 	pccmd->proc_refs++;
960 
961 	cuse_server_unlock(pcs);
962 
963 	if (!isread) {
964 		error = cuse_proc2proc_copy(
965 		    curthread->td_proc, pchk->local_ptr,
966 		    p_proc, pchk->peer_ptr,
967 		    pchk->length);
968 	} else {
969 		error = cuse_proc2proc_copy(
970 		    p_proc, pchk->peer_ptr,
971 		    curthread->td_proc, pchk->local_ptr,
972 		    pchk->length);
973 	}
974 
975 	cuse_server_lock(pcs);
976 
977 	pccmd->proc_refs--;
978 
979 	if (pccmd->proc_curr == NULL)
980 		cv_signal(&pccmd->cv);
981 
982 	return (error);
983 }
984 
985 static int
986 cuse_server_data_copy_optimized_locked(struct cuse_server *pcs,
987     struct cuse_client_command *pccmd,
988     struct cuse_data_chunk *pchk, bool isread)
989 {
990 	uintptr_t offset;
991 	int error;
992 
993 	/*
994 	 * Check if data is stored locally to avoid accessing
995 	 * other process's data space:
996 	 */
997 	if (isread) {
998 		offset = pchk->peer_ptr - pccmd->client->write_base;
999 
1000 		if (offset < (uintptr_t)pccmd->client->write_length &&
1001 		    pchk->length <= (unsigned long)pccmd->client->write_length &&
1002 		    offset + pchk->length <= (uintptr_t)pccmd->client->write_length) {
1003 			cuse_server_unlock(pcs);
1004 			error = copyout(pccmd->client->write_buffer + offset,
1005 			    (void *)pchk->local_ptr, pchk->length);
1006 			goto done;
1007 		}
1008 	} else {
1009 		offset = pchk->peer_ptr - pccmd->client->read_base;
1010 
1011 		if (offset < (uintptr_t)pccmd->client->read_length &&
1012 		    pchk->length <= (unsigned long)pccmd->client->read_length &&
1013 		    offset + pchk->length <= (uintptr_t)pccmd->client->read_length) {
1014 			cuse_server_unlock(pcs);
1015 			error = copyin((void *)pchk->local_ptr,
1016 			    pccmd->client->read_buffer + offset, pchk->length);
1017 			goto done;
1018 		}
1019 	}
1020 
1021 	/* use process to process copy function */
1022 	error = cuse_server_data_copy_locked(pcs, pccmd, pchk, isread);
1023 done:
1024 	return (error);
1025 }
1026 
1027 static int
1028 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
1029 {
1030 	int n;
1031 	int x = 0;
1032 	int match;
1033 
1034 	do {
1035 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
1036 			if (cuse_alloc_unit[n] != NULL) {
1037 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
1038 					continue;
1039 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
1040 					x++;
1041 					match = 1;
1042 				}
1043 			}
1044 		}
1045 	} while (match);
1046 
1047 	if (x < 256) {
1048 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
1049 			if (cuse_alloc_unit[n] == NULL) {
1050 				cuse_alloc_unit[n] = pcs;
1051 				cuse_alloc_unit_id[n] = id | x;
1052 				return (x);
1053 			}
1054 		}
1055 	}
1056 	return (-1);
1057 }
1058 
1059 static void
1060 cuse_server_wakeup_locked(struct cuse_server *pcs)
1061 {
1062 	selwakeup(&pcs->selinfo);
1063 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
1064 }
1065 
1066 static void
1067 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
1068 {
1069 	struct cuse_client *pcc;
1070 
1071 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
1072 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
1073 		    CUSE_CLI_KNOTE_NEED_WRITE);
1074 	}
1075 	cuse_server_wakeup_locked(pcs);
1076 }
1077 
1078 static int
1079 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
1080 {
1081 	int n;
1082 	int found = 0;
1083 
1084 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
1085 		if (cuse_alloc_unit[n] == pcs) {
1086 			if (cuse_alloc_unit_id[n] == id || id == -1) {
1087 				cuse_alloc_unit[n] = NULL;
1088 				cuse_alloc_unit_id[n] = 0;
1089 				found = 1;
1090 			}
1091 		}
1092 	}
1093 
1094 	return (found ? 0 : EINVAL);
1095 }
1096 
1097 static int
1098 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
1099     caddr_t data, int fflag, struct thread *td)
1100 {
1101 	struct cuse_server *pcs;
1102 	int error;
1103 
1104 	error = cuse_server_get(&pcs);
1105 	if (error != 0)
1106 		return (error);
1107 
1108 	switch (cmd) {
1109 		struct cuse_client_command *pccmd;
1110 		struct cuse_client *pcc;
1111 		struct cuse_command *pcmd;
1112 		struct cuse_alloc_info *pai;
1113 		struct cuse_create_dev *pcd;
1114 		struct cuse_server_dev *pcsd;
1115 		struct cuse_data_chunk *pchk;
1116 		int n;
1117 
1118 	case CUSE_IOCTL_GET_COMMAND:
1119 		pcmd = (void *)data;
1120 
1121 		cuse_server_lock(pcs);
1122 
1123 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1124 			error = cv_wait_sig(&pcs->cv, &pcs->mtx);
1125 
1126 			if (pcs->is_closing)
1127 				error = ENXIO;
1128 
1129 			if (error) {
1130 				cuse_server_unlock(pcs);
1131 				return (error);
1132 			}
1133 		}
1134 
1135 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1136 		pccmd->entry.tqe_prev = NULL;
1137 
1138 		pccmd->entered = curthread;
1139 
1140 		*pcmd = pccmd->sub;
1141 
1142 		cuse_server_unlock(pcs);
1143 
1144 		break;
1145 
1146 	case CUSE_IOCTL_SYNC_COMMAND:
1147 
1148 		cuse_server_lock(pcs);
1149 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1150 			/* send sync command */
1151 			pccmd->entered = NULL;
1152 			pccmd->error = *(int *)data;
1153 			pccmd->command = CUSE_CMD_SYNC;
1154 
1155 			/* signal peer, if any */
1156 			cv_signal(&pccmd->cv);
1157 		}
1158 		cuse_server_unlock(pcs);
1159 
1160 		break;
1161 
1162 	case CUSE_IOCTL_ALLOC_UNIT:
1163 
1164 		cuse_server_lock(pcs);
1165 		n = cuse_alloc_unit_by_id_locked(pcs,
1166 		    CUSE_ID_DEFAULT(0));
1167 		cuse_server_unlock(pcs);
1168 
1169 		if (n < 0)
1170 			error = ENOMEM;
1171 		else
1172 			*(int *)data = n;
1173 		break;
1174 
1175 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1176 
1177 		n = *(int *)data;
1178 
1179 		n = (n & CUSE_ID_MASK);
1180 
1181 		cuse_server_lock(pcs);
1182 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1183 		cuse_server_unlock(pcs);
1184 
1185 		if (n < 0)
1186 			error = ENOMEM;
1187 		else
1188 			*(int *)data = n;
1189 		break;
1190 
1191 	case CUSE_IOCTL_FREE_UNIT:
1192 
1193 		n = *(int *)data;
1194 
1195 		n = CUSE_ID_DEFAULT(n);
1196 
1197 		cuse_server_lock(pcs);
1198 		error = cuse_free_unit_by_id_locked(pcs, n);
1199 		cuse_server_unlock(pcs);
1200 		break;
1201 
1202 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1203 
1204 		n = *(int *)data;
1205 
1206 		cuse_server_lock(pcs);
1207 		error = cuse_free_unit_by_id_locked(pcs, n);
1208 		cuse_server_unlock(pcs);
1209 		break;
1210 
1211 	case CUSE_IOCTL_ALLOC_MEMORY:
1212 
1213 		pai = (void *)data;
1214 
1215 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1216 			error = ENOMEM;
1217 			break;
1218 		}
1219 		if (pai->page_count > CUSE_ALLOC_PAGES_MAX) {
1220 			error = ENOMEM;
1221 			break;
1222 		}
1223 		error = cuse_server_alloc_memory(pcs,
1224 		    pai->alloc_nr, pai->page_count);
1225 		break;
1226 
1227 	case CUSE_IOCTL_FREE_MEMORY:
1228 		pai = (void *)data;
1229 
1230 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1231 			error = ENOMEM;
1232 			break;
1233 		}
1234 		error = cuse_server_free_memory(pcs, pai->alloc_nr);
1235 		break;
1236 
1237 	case CUSE_IOCTL_GET_SIG:
1238 
1239 		cuse_server_lock(pcs);
1240 		pccmd = cuse_server_find_command(pcs, curthread);
1241 
1242 		if (pccmd != NULL) {
1243 			n = pccmd->got_signal;
1244 			pccmd->got_signal = 0;
1245 		} else {
1246 			n = 0;
1247 		}
1248 		cuse_server_unlock(pcs);
1249 
1250 		*(int *)data = n;
1251 
1252 		break;
1253 
1254 	case CUSE_IOCTL_SET_PFH:
1255 
1256 		cuse_server_lock(pcs);
1257 		pccmd = cuse_server_find_command(pcs, curthread);
1258 
1259 		if (pccmd != NULL) {
1260 			pcc = pccmd->client;
1261 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1262 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1263 			}
1264 		} else {
1265 			error = ENXIO;
1266 		}
1267 		cuse_server_unlock(pcs);
1268 		break;
1269 
1270 	case CUSE_IOCTL_CREATE_DEV:
1271 
1272 		error = priv_check(curthread, PRIV_DRIVER);
1273 		if (error)
1274 			break;
1275 
1276 		pcd = (void *)data;
1277 
1278 		/* filter input */
1279 
1280 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1281 
1282 		if (pcd->devname[0] == 0) {
1283 			error = EINVAL;
1284 			break;
1285 		}
1286 		cuse_str_filter(pcd->devname);
1287 
1288 		pcd->permissions &= 0777;
1289 
1290 		/* try to allocate a character device */
1291 
1292 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1293 
1294 		pcsd->server = pcs;
1295 
1296 		pcsd->user_dev = pcd->dev;
1297 
1298 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1299 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1300 		    pcd->permissions, "%s", pcd->devname);
1301 
1302 		if (pcsd->kern_dev == NULL) {
1303 			free(pcsd, M_CUSE);
1304 			error = ENOMEM;
1305 			break;
1306 		}
1307 		pcsd->kern_dev->si_drv1 = pcsd;
1308 
1309 		cuse_server_lock(pcs);
1310 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1311 		cuse_server_unlock(pcs);
1312 
1313 		break;
1314 
1315 	case CUSE_IOCTL_DESTROY_DEV:
1316 
1317 		error = priv_check(curthread, PRIV_DRIVER);
1318 		if (error)
1319 			break;
1320 
1321 		cuse_server_lock(pcs);
1322 
1323 		error = EINVAL;
1324 
1325 		pcsd = TAILQ_FIRST(&pcs->hdev);
1326 		while (pcsd != NULL) {
1327 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1328 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1329 				cuse_server_unlock(pcs);
1330 				cuse_server_free_dev(pcsd);
1331 				cuse_server_lock(pcs);
1332 				error = 0;
1333 				pcsd = TAILQ_FIRST(&pcs->hdev);
1334 			} else {
1335 				pcsd = TAILQ_NEXT(pcsd, entry);
1336 			}
1337 		}
1338 
1339 		cuse_server_unlock(pcs);
1340 		break;
1341 
1342 	case CUSE_IOCTL_WRITE_DATA:
1343 	case CUSE_IOCTL_READ_DATA:
1344 
1345 		cuse_server_lock(pcs);
1346 		pchk = (struct cuse_data_chunk *)data;
1347 
1348 		pccmd = cuse_server_find_command(pcs, curthread);
1349 
1350 		if (pccmd == NULL) {
1351 			error = ENXIO;	/* invalid request */
1352 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1353 			error = EFAULT;	/* NULL pointer */
1354 		} else if (pchk->length == 0) {
1355 			/* NOP */
1356 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1357 			error = cuse_server_ioctl_copy_locked(pcs, pccmd,
1358 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1359 		} else {
1360 			error = cuse_server_data_copy_optimized_locked(
1361 			    pcs, pccmd, pchk, cmd == CUSE_IOCTL_READ_DATA);
1362 		}
1363 
1364 		/*
1365 		 * Sometimes the functions above drop the server lock
1366 		 * early as an optimization:
1367 		 */
1368 		if (cuse_server_is_locked(pcs))
1369 			cuse_server_unlock(pcs);
1370 		break;
1371 
1372 	case CUSE_IOCTL_SELWAKEUP:
1373 		cuse_server_lock(pcs);
1374 		/*
1375 		 * We don't know which direction caused the event.
1376 		 * Wakeup both!
1377 		 */
1378 		cuse_server_wakeup_all_client_locked(pcs);
1379 		cuse_server_unlock(pcs);
1380 		break;
1381 
1382 	default:
1383 		error = ENXIO;
1384 		break;
1385 	}
1386 	return (error);
1387 }
1388 
1389 static int
1390 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1391 {
1392 	return (events & (POLLHUP | POLLPRI | POLLIN |
1393 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1394 }
1395 
1396 static int
1397 cuse_common_mmap_single(struct cuse_server *pcs,
1398     vm_ooffset_t *offset, vm_size_t size, struct vm_object **object)
1399 {
1400   	struct cuse_memory *mem;
1401 	int error;
1402 
1403 	/* verify size */
1404 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE))
1405 		return (EINVAL);
1406 
1407 	cuse_server_lock(pcs);
1408 	error = ENOMEM;
1409 
1410 	/* lookup memory structure, if any */
1411 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1412 		vm_ooffset_t min_off;
1413 		vm_ooffset_t max_off;
1414 
1415 		min_off = (mem->alloc_nr << CUSE_ALLOC_UNIT_SHIFT);
1416 		max_off = min_off + (PAGE_SIZE * mem->page_count);
1417 
1418 		if (*offset >= min_off && *offset < max_off) {
1419 			/* range check size */
1420 			if (size > (max_off - *offset)) {
1421 				error = EINVAL;
1422 			} else {
1423 				/* get new VM object offset to use */
1424 				*offset -= min_off;
1425 				vm_object_reference(mem->object);
1426 				*object = mem->object;
1427 				error = 0;
1428 			}
1429 			break;
1430 		}
1431 	}
1432 	cuse_server_unlock(pcs);
1433 	return (error);
1434 }
1435 
1436 static int
1437 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1438     vm_size_t size, struct vm_object **object, int nprot)
1439 {
1440 	struct cuse_server *pcs;
1441 	int error;
1442 
1443 	error = cuse_server_get(&pcs);
1444 	if (error != 0)
1445 		return (error);
1446 
1447 	return (cuse_common_mmap_single(pcs, offset, size, object));
1448 }
1449 
1450 /*------------------------------------------------------------------------*
1451  *	CUSE CLIENT PART
1452  *------------------------------------------------------------------------*/
1453 static void
1454 cuse_client_free(void *arg)
1455 {
1456 	struct cuse_client *pcc = arg;
1457 	struct cuse_client_command *pccmd;
1458 	struct cuse_server *pcs;
1459 	int n;
1460 
1461 	pcs = pcc->server;
1462 
1463 	cuse_server_lock(pcs);
1464 	cuse_client_is_closing(pcc);
1465 	TAILQ_REMOVE(&pcs->hcli, pcc, entry);
1466 	cuse_server_unlock(pcs);
1467 
1468 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1469 		pccmd = &pcc->cmds[n];
1470 
1471 		sx_destroy(&pccmd->sx);
1472 		cv_destroy(&pccmd->cv);
1473 	}
1474 
1475 	free(pcc, M_CUSE);
1476 
1477 	/* drop reference on server */
1478 	cuse_server_unref(pcs);
1479 }
1480 
1481 static int
1482 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1483 {
1484 	struct cuse_client_command *pccmd;
1485 	struct cuse_server_dev *pcsd;
1486 	struct cuse_client *pcc;
1487 	struct cuse_server *pcs;
1488 	struct cuse_dev *pcd;
1489 	int error;
1490 	int n;
1491 
1492 	pcsd = dev->si_drv1;
1493 	if (pcsd != NULL) {
1494 		pcs = pcsd->server;
1495 		pcd = pcsd->user_dev;
1496 
1497 		cuse_server_lock(pcs);
1498 		/*
1499 		 * Check that the refcount didn't wrap and that the
1500 		 * same process is not both client and server. This
1501 		 * can easily lead to deadlocks when destroying the
1502 		 * CUSE character device nodes:
1503 		 */
1504 		pcs->refs++;
1505 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1506 			/* overflow or wrong PID */
1507 			pcs->refs--;
1508 			cuse_server_unlock(pcs);
1509 			return (EINVAL);
1510 		}
1511 		cuse_server_unlock(pcs);
1512 	} else {
1513 		return (EINVAL);
1514 	}
1515 
1516 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1517 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1518 		printf("Cuse: Cannot set cdevpriv.\n");
1519 		/* drop reference on server */
1520 		cuse_server_unref(pcs);
1521 		free(pcc, M_CUSE);
1522 		return (ENOMEM);
1523 	}
1524 	pcc->fflags = fflags;
1525 	pcc->server_dev = pcsd;
1526 	pcc->server = pcs;
1527 
1528 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1529 		pccmd = &pcc->cmds[n];
1530 
1531 		pccmd->sub.dev = pcd;
1532 		pccmd->sub.command = n;
1533 		pccmd->client = pcc;
1534 
1535 		sx_init(&pccmd->sx, "cuse-client-sx");
1536 		cv_init(&pccmd->cv, "cuse-client-cv");
1537 	}
1538 
1539 	cuse_server_lock(pcs);
1540 
1541 	/* cuse_client_free() assumes that the client is listed somewhere! */
1542 	/* always enqueue */
1543 
1544 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1545 
1546 	/* check if server is closing */
1547 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1548 		error = EINVAL;
1549 	} else {
1550 		error = 0;
1551 	}
1552 	cuse_server_unlock(pcs);
1553 
1554 	if (error) {
1555 		devfs_clear_cdevpriv();	/* XXX bugfix */
1556 		return (error);
1557 	}
1558 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1559 
1560 	cuse_cmd_lock(pccmd);
1561 
1562 	cuse_server_lock(pcs);
1563 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1564 
1565 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1566 	cuse_server_unlock(pcs);
1567 
1568 	if (error < 0) {
1569 		error = cuse_convert_error(error);
1570 	} else {
1571 		error = 0;
1572 	}
1573 
1574 	cuse_cmd_unlock(pccmd);
1575 
1576 	if (error)
1577 		devfs_clear_cdevpriv();	/* XXX bugfix */
1578 
1579 	return (error);
1580 }
1581 
1582 static int
1583 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1584 {
1585 	struct cuse_client_command *pccmd;
1586 	struct cuse_client *pcc;
1587 	struct cuse_server *pcs;
1588 	int error;
1589 
1590 	error = cuse_client_get(&pcc);
1591 	if (error != 0)
1592 		return (0);
1593 
1594 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1595 	pcs = pcc->server;
1596 
1597 	cuse_cmd_lock(pccmd);
1598 
1599 	cuse_server_lock(pcs);
1600 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1601 
1602 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1603 	cuse_cmd_unlock(pccmd);
1604 
1605 	cuse_client_is_closing(pcc);
1606 	cuse_server_unlock(pcs);
1607 
1608 	return (0);
1609 }
1610 
1611 static void
1612 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1613 {
1614 	struct cuse_server *pcs = pcc->server;
1615 	int temp;
1616 
1617 	cuse_server_lock(pcs);
1618 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1619 	    CUSE_CLI_KNOTE_HAS_WRITE));
1620 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1621 	    CUSE_CLI_KNOTE_NEED_WRITE);
1622 	cuse_server_unlock(pcs);
1623 
1624 	if (temp != 0) {
1625 		/* get the latest polling state from the server */
1626 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1627 
1628 		if (temp & (POLLIN | POLLOUT)) {
1629 			cuse_server_lock(pcs);
1630 			if (temp & POLLIN)
1631 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1632 			if (temp & POLLOUT)
1633 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1634 
1635 			/* make sure the "knote" gets woken up */
1636 			cuse_server_wakeup_locked(pcc->server);
1637 			cuse_server_unlock(pcs);
1638 		}
1639 	}
1640 }
1641 
1642 static int
1643 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1644 {
1645 	struct cuse_client_command *pccmd;
1646 	struct cuse_client *pcc;
1647 	struct cuse_server *pcs;
1648 	int error;
1649 	int temp;
1650 	int len;
1651 
1652 	error = cuse_client_get(&pcc);
1653 	if (error != 0)
1654 		return (error);
1655 
1656 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1657 	pcs = pcc->server;
1658 
1659 	if (uio->uio_segflg != UIO_USERSPACE) {
1660 		return (EINVAL);
1661 	}
1662 	uio->uio_segflg = UIO_NOCOPY;
1663 
1664 	cuse_cmd_lock(pccmd);
1665 
1666 	while (uio->uio_resid != 0) {
1667 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1668 			error = ENOMEM;
1669 			break;
1670 		}
1671 		len = uio->uio_iov->iov_len;
1672 
1673 		cuse_server_lock(pcs);
1674 		if (len <= CUSE_COPY_BUFFER_MAX) {
1675 			/* set read buffer region for small reads */
1676 			pcc->read_base = (uintptr_t)uio->uio_iov->iov_base;
1677 			pcc->read_length = len;
1678 		}
1679 		cuse_client_send_command_locked(pccmd,
1680 		    (uintptr_t)uio->uio_iov->iov_base,
1681 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1682 
1683 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1684 		/*
1685 		 * After finishing reading data, disable the read
1686 		 * region for the cuse_server_data_copy_optimized_locked()
1687 		 * function:
1688 		 */
1689 		pcc->read_base = 0;
1690 		pcc->read_length = 0;
1691 		cuse_server_unlock(pcs);
1692 
1693 		/*
1694 		 * The return value indicates the read length, when
1695 		 * not negative. Range check it just in case to avoid
1696 		 * passing invalid length values to uiomove().
1697 		 */
1698 		if (error > len) {
1699 			error = ERANGE;
1700 			break;
1701 		} else if (error > 0 && len <= CUSE_COPY_BUFFER_MAX) {
1702 			temp = copyout(pcc->read_buffer,
1703 			    uio->uio_iov->iov_base, error);
1704 			if (temp != 0) {
1705 				error = temp;
1706 				break;
1707 			}
1708 		}
1709 		if (error < 0) {
1710 			error = cuse_convert_error(error);
1711 			break;
1712 		} else if (error == len) {
1713 			error = uiomove(NULL, error, uio);
1714 			if (error)
1715 				break;
1716 		} else {
1717 			error = uiomove(NULL, error, uio);
1718 			break;
1719 		}
1720 	}
1721 	cuse_cmd_unlock(pccmd);
1722 
1723 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1724 
1725 	if (error == EWOULDBLOCK)
1726 		cuse_client_kqfilter_poll(dev, pcc);
1727 
1728 	return (error);
1729 }
1730 
1731 static int
1732 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1733 {
1734 	struct cuse_client_command *pccmd;
1735 	struct cuse_client *pcc;
1736 	struct cuse_server *pcs;
1737 	int error;
1738 	int len;
1739 
1740 	error = cuse_client_get(&pcc);
1741 	if (error != 0)
1742 		return (error);
1743 
1744 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1745 	pcs = pcc->server;
1746 
1747 	if (uio->uio_segflg != UIO_USERSPACE) {
1748 		return (EINVAL);
1749 	}
1750 	uio->uio_segflg = UIO_NOCOPY;
1751 
1752 	cuse_cmd_lock(pccmd);
1753 
1754 	while (uio->uio_resid != 0) {
1755 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1756 			error = ENOMEM;
1757 			break;
1758 		}
1759 		len = uio->uio_iov->iov_len;
1760 
1761 		if (len <= CUSE_COPY_BUFFER_MAX) {
1762 			error = copyin(uio->uio_iov->iov_base,
1763 			    pcc->write_buffer, len);
1764 			if (error != 0)
1765 				break;
1766 		}
1767 
1768 		cuse_server_lock(pcs);
1769 		if (len <= CUSE_COPY_BUFFER_MAX) {
1770 			/* set write buffer region for small writes */
1771 			pcc->write_base = (uintptr_t)uio->uio_iov->iov_base;
1772 			pcc->write_length = len;
1773 		}
1774 		cuse_client_send_command_locked(pccmd,
1775 		    (uintptr_t)uio->uio_iov->iov_base,
1776 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1777 
1778 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1779 
1780 		/*
1781 		 * After finishing writing data, disable the write
1782 		 * region for the cuse_server_data_copy_optimized_locked()
1783 		 * function:
1784 		 */
1785 		pcc->write_base = 0;
1786 		pcc->write_length = 0;
1787 		cuse_server_unlock(pcs);
1788 
1789 		/*
1790 		 * The return value indicates the write length, when
1791 		 * not negative. Range check it just in case to avoid
1792 		 * passing invalid length values to uiomove().
1793 		 */
1794 		if (error > len) {
1795 			error = ERANGE;
1796 			break;
1797 		} else if (error < 0) {
1798 			error = cuse_convert_error(error);
1799 			break;
1800 		} else if (error == len) {
1801 			error = uiomove(NULL, error, uio);
1802 			if (error)
1803 				break;
1804 		} else {
1805 			error = uiomove(NULL, error, uio);
1806 			break;
1807 		}
1808 	}
1809 	cuse_cmd_unlock(pccmd);
1810 
1811 	/* restore segment flag */
1812 	uio->uio_segflg = UIO_USERSPACE;
1813 
1814 	if (error == EWOULDBLOCK)
1815 		cuse_client_kqfilter_poll(dev, pcc);
1816 
1817 	return (error);
1818 }
1819 
1820 int
1821 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1822     caddr_t data, int fflag, struct thread *td)
1823 {
1824 	struct cuse_client_command *pccmd;
1825 	struct cuse_client *pcc;
1826 	struct cuse_server *pcs;
1827 	int error;
1828 	int len;
1829 
1830 	error = cuse_client_get(&pcc);
1831 	if (error != 0)
1832 		return (error);
1833 
1834 	len = IOCPARM_LEN(cmd);
1835 	if (len > CUSE_BUFFER_MAX)
1836 		return (ENOMEM);
1837 
1838 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1839 	pcs = pcc->server;
1840 
1841 	cuse_cmd_lock(pccmd);
1842 
1843 	if (cmd & (IOC_IN | IOC_VOID))
1844 		memcpy(pcc->ioctl_buffer, data, len);
1845 
1846 	/*
1847 	 * When the ioctl-length is zero drivers can pass information
1848 	 * through the data pointer of the ioctl. Make sure this information
1849 	 * is forwarded to the driver.
1850 	 */
1851 
1852 	cuse_server_lock(pcs);
1853 	cuse_client_send_command_locked(pccmd,
1854 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1855 	    (unsigned long)cmd, pcc->fflags,
1856 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1857 
1858 	error = cuse_client_receive_command_locked(pccmd, data, len);
1859 	cuse_server_unlock(pcs);
1860 
1861 	if (error < 0) {
1862 		error = cuse_convert_error(error);
1863 	} else {
1864 		error = 0;
1865 	}
1866 
1867 	if (cmd & IOC_OUT)
1868 		memcpy(data, pcc->ioctl_buffer, len);
1869 
1870 	cuse_cmd_unlock(pccmd);
1871 
1872 	if (error == EWOULDBLOCK)
1873 		cuse_client_kqfilter_poll(dev, pcc);
1874 
1875 	return (error);
1876 }
1877 
1878 static int
1879 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1880 {
1881 	struct cuse_client_command *pccmd;
1882 	struct cuse_client *pcc;
1883 	struct cuse_server *pcs;
1884 	unsigned long temp;
1885 	int error;
1886 	int revents;
1887 
1888 	error = cuse_client_get(&pcc);
1889 	if (error != 0)
1890 		goto pollnval;
1891 
1892 	temp = 0;
1893 	pcs = pcc->server;
1894 
1895 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1896 		temp |= CUSE_POLL_READ;
1897 
1898 	if (events & (POLLOUT | POLLWRNORM))
1899 		temp |= CUSE_POLL_WRITE;
1900 
1901 	if (events & POLLHUP)
1902 		temp |= CUSE_POLL_ERROR;
1903 
1904 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1905 
1906 	cuse_cmd_lock(pccmd);
1907 
1908 	/* Need to selrecord() first to not loose any events. */
1909 	if (temp != 0 && td != NULL)
1910 		selrecord(td, &pcs->selinfo);
1911 
1912 	cuse_server_lock(pcs);
1913 	cuse_client_send_command_locked(pccmd,
1914 	    0, temp, pcc->fflags, IO_NDELAY);
1915 
1916 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1917 	cuse_server_unlock(pcs);
1918 
1919 	cuse_cmd_unlock(pccmd);
1920 
1921 	if (error < 0) {
1922 		goto pollnval;
1923 	} else {
1924 		revents = 0;
1925 		if (error & CUSE_POLL_READ)
1926 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1927 		if (error & CUSE_POLL_WRITE)
1928 			revents |= (events & (POLLOUT | POLLWRNORM));
1929 		if (error & CUSE_POLL_ERROR)
1930 			revents |= (events & POLLHUP);
1931 	}
1932 	return (revents);
1933 
1934 pollnval:
1935 	/* XXX many clients don't understand POLLNVAL */
1936 	return (events & (POLLHUP | POLLPRI | POLLIN |
1937 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1938 }
1939 
1940 static int
1941 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1942     vm_size_t size, struct vm_object **object, int nprot)
1943 {
1944 	struct cuse_client *pcc;
1945 	int error;
1946 
1947 	error = cuse_client_get(&pcc);
1948 	if (error != 0)
1949 		return (error);
1950 
1951 	return (cuse_common_mmap_single(pcc->server, offset, size, object));
1952 }
1953 
1954 static void
1955 cuse_client_kqfilter_read_detach(struct knote *kn)
1956 {
1957 	struct cuse_client *pcc;
1958 	struct cuse_server *pcs;
1959 
1960 	pcc = kn->kn_hook;
1961 	pcs = pcc->server;
1962 
1963 	cuse_server_lock(pcs);
1964 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1965 	cuse_server_unlock(pcs);
1966 }
1967 
1968 static void
1969 cuse_client_kqfilter_write_detach(struct knote *kn)
1970 {
1971 	struct cuse_client *pcc;
1972 	struct cuse_server *pcs;
1973 
1974 	pcc = kn->kn_hook;
1975 	pcs = pcc->server;
1976 
1977 	cuse_server_lock(pcs);
1978 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1979 	cuse_server_unlock(pcs);
1980 }
1981 
1982 static int
1983 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1984 {
1985 	struct cuse_client *pcc;
1986 
1987 	pcc = kn->kn_hook;
1988 
1989 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1990 
1991 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1992 }
1993 
1994 static int
1995 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1996 {
1997 	struct cuse_client *pcc;
1998 
1999 	pcc = kn->kn_hook;
2000 
2001 	mtx_assert(&pcc->server->mtx, MA_OWNED);
2002 
2003 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
2004 }
2005 
2006 static int
2007 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
2008 {
2009 	struct cuse_client *pcc;
2010 	struct cuse_server *pcs;
2011 	int error;
2012 
2013 	error = cuse_client_get(&pcc);
2014 	if (error != 0)
2015 		return (error);
2016 
2017 	pcs = pcc->server;
2018 
2019 	cuse_server_lock(pcs);
2020 	switch (kn->kn_filter) {
2021 	case EVFILT_READ:
2022 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
2023 		kn->kn_hook = pcc;
2024 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
2025 		knlist_add(&pcs->selinfo.si_note, kn, 1);
2026 		break;
2027 	case EVFILT_WRITE:
2028 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
2029 		kn->kn_hook = pcc;
2030 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
2031 		knlist_add(&pcs->selinfo.si_note, kn, 1);
2032 		break;
2033 	default:
2034 		error = EINVAL;
2035 		break;
2036 	}
2037 	cuse_server_unlock(pcs);
2038 
2039 	if (error == 0)
2040 		cuse_client_kqfilter_poll(dev, pcc);
2041 	return (error);
2042 }
2043