xref: /freebsd/sys/fs/fuse/fuse_ipc.c (revision 036d2e814bf0f5d88ffb4b24c159320894541757)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  * * Redistributions in binary form must reproduce the above
14  *   copyright notice, this list of conditions and the following disclaimer
15  *   in the documentation and/or other materials provided with the
16  *   distribution.
17  * * Neither the name of Google Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Copyright (C) 2005 Csaba Henk.
34  * All rights reserved.
35  *
36  * Copyright (c) 2019 The FreeBSD Foundation
37  *
38  * Portions of this software were developed by BFF Storage Systems, LLC under
39  * sponsorship from the FreeBSD Foundation.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  *
50  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65 
66 #include <sys/param.h>
67 #include <sys/module.h>
68 #include <sys/systm.h>
69 #include <sys/counter.h>
70 #include <sys/errno.h>
71 #include <sys/kernel.h>
72 #include <sys/conf.h>
73 #include <sys/uio.h>
74 #include <sys/malloc.h>
75 #include <sys/queue.h>
76 #include <sys/lock.h>
77 #include <sys/sx.h>
78 #include <sys/mutex.h>
79 #include <sys/proc.h>
80 #include <sys/mount.h>
81 #include <sys/sdt.h>
82 #include <sys/vnode.h>
83 #include <sys/signalvar.h>
84 #include <sys/syscallsubr.h>
85 #include <sys/sysctl.h>
86 #include <vm/uma.h>
87 
88 #include "fuse.h"
89 #include "fuse_node.h"
90 #include "fuse_ipc.h"
91 #include "fuse_internal.h"
92 
93 SDT_PROVIDER_DECLARE(fusefs);
94 /*
95  * Fuse trace probe:
96  * arg0: verbosity.  Higher numbers give more verbose messages
97  * arg1: Textual message
98  */
99 SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*");
100 
101 static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
102     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred);
103 static void fuse_interrupt_send(struct fuse_ticket *otick, int err);
104 static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
105 static void fticket_refresh(struct fuse_ticket *ftick);
106 static void fticket_destroy(struct fuse_ticket *ftick);
107 static int fticket_wait_answer(struct fuse_ticket *ftick);
108 static inline int
109 fticket_aw_pull_uio(struct fuse_ticket *ftick,
110     struct uio *uio);
111 
112 static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
113 
114 static fuse_handler_t fuse_standard_handler;
115 
116 static counter_u64_t fuse_ticket_count;
117 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, ticket_count, CTLFLAG_RD,
118     &fuse_ticket_count, "Number of allocated tickets");
119 
120 static long fuse_iov_permanent_bufsize = 1 << 19;
121 
122 SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
123     &fuse_iov_permanent_bufsize, 0,
124     "limit for permanently stored buffer size for fuse_iovs");
125 static int fuse_iov_credit = 16;
126 
127 SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW,
128     &fuse_iov_credit, 0,
129     "how many times is an oversized fuse_iov tolerated");
130 
131 MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer");
132 static uma_zone_t ticket_zone;
133 
134 /*
135  * TODO: figure out how to timeout INTERRUPT requests, because the daemon may
136  * leagally never respond
137  */
138 static int
139 fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio)
140 {
141 	struct fuse_ticket *otick, *x_tick;
142 	struct fuse_interrupt_in *fii;
143 	struct fuse_data *data = tick->tk_data;
144 	bool found = false;
145 
146 	fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base +
147 		sizeof(struct fuse_in_header));
148 
149 	fuse_lck_mtx_lock(data->aw_mtx);
150 	TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) {
151 		if (otick->tk_unique == fii->unique) {
152 			found = true;
153 			break;
154 		}
155 	}
156 	fuse_lck_mtx_unlock(data->aw_mtx);
157 
158 	if (!found) {
159 		/* Original is already complete.  Just return */
160 		return 0;
161 	}
162 
163 	/* Clear the original ticket's interrupt association */
164 	otick->irq_unique = 0;
165 
166 	if (tick->tk_aw_ohead.error == ENOSYS) {
167 		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
168 		return 0;
169 	} else if (tick->tk_aw_ohead.error == EAGAIN) {
170 		/*
171 		 * There are two reasons we might get this:
172 		 * 1) the daemon received the INTERRUPT request before the
173 		 *    original, or
174 		 * 2) the daemon received the INTERRUPT request after it
175 		 *    completed the original request.
176 		 * In the first case we should re-send the INTERRUPT.  In the
177 		 * second, we should ignore it.
178 		 */
179 		/* Resend */
180 		fuse_interrupt_send(otick, EINTR);
181 		return 0;
182 	} else {
183 		/* Illegal FUSE_INTERRUPT response */
184 		return EINVAL;
185 	}
186 }
187 
188 /* Interrupt the operation otick.  Return err as its error code */
189 void
190 fuse_interrupt_send(struct fuse_ticket *otick, int err)
191 {
192 	struct fuse_dispatcher fdi;
193 	struct fuse_interrupt_in *fii;
194 	struct fuse_in_header *ftick_hdr;
195 	struct fuse_data *data = otick->tk_data;
196 	struct fuse_ticket *tick, *xtick;
197 	struct ucred reused_creds;
198 	gid_t reused_groups[1];
199 
200 	if (otick->irq_unique == 0) {
201 		/*
202 		 * If the daemon hasn't yet received otick, then we can answer
203 		 * it ourselves and return.
204 		 */
205 		fuse_lck_mtx_lock(data->ms_mtx);
206 		STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link,
207 			xtick) {
208 			if (tick == otick) {
209 				STAILQ_REMOVE(&otick->tk_data->ms_head, tick,
210 					fuse_ticket, tk_ms_link);
211 				otick->tk_data->ms_count--;
212 				otick->tk_ms_link.stqe_next = NULL;
213 				fuse_lck_mtx_unlock(data->ms_mtx);
214 
215 				fuse_lck_mtx_lock(otick->tk_aw_mtx);
216 				if (!fticket_answered(otick)) {
217 					fticket_set_answered(otick);
218 					otick->tk_aw_errno = err;
219 					wakeup(otick);
220 				}
221 				fuse_lck_mtx_unlock(otick->tk_aw_mtx);
222 
223 				fuse_ticket_drop(tick);
224 				return;
225 			}
226 		}
227 		fuse_lck_mtx_unlock(data->ms_mtx);
228 
229 		/*
230 		 * If the fuse daemon doesn't support interrupts, then there's
231 		 * nothing more that we can do
232 		 */
233 		if (!fsess_isimpl(data->mp, FUSE_INTERRUPT))
234 			return;
235 
236 		/*
237 		 * If the fuse daemon has already received otick, then we must
238 		 * send FUSE_INTERRUPT.
239 		 */
240 		ftick_hdr = fticket_in_header(otick);
241 		reused_creds.cr_uid = ftick_hdr->uid;
242 		reused_groups[0] = ftick_hdr->gid;
243 		reused_creds.cr_groups = reused_groups;
244 		fdisp_init(&fdi, sizeof(*fii));
245 		fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid,
246 			ftick_hdr->pid, &reused_creds);
247 
248 		fii = fdi.indata;
249 		fii->unique = otick->tk_unique;
250 		fuse_insert_callback(fdi.tick, fuse_interrupt_callback);
251 
252 		otick->irq_unique = fdi.tick->tk_unique;
253 		/* Interrupt ops should be delivered ASAP */
254 		fuse_insert_message(fdi.tick, true);
255 		fdisp_destroy(&fdi);
256 	} else {
257 		/* This ticket has already been interrupted */
258 	}
259 }
260 
261 void
262 fiov_init(struct fuse_iov *fiov, size_t size)
263 {
264 	uint32_t msize = FU_AT_LEAST(size);
265 
266 	fiov->len = 0;
267 
268 	fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO);
269 
270 	fiov->allocated_size = msize;
271 	fiov->credit = fuse_iov_credit;
272 }
273 
274 void
275 fiov_teardown(struct fuse_iov *fiov)
276 {
277 	MPASS(fiov->base != NULL);
278 	free(fiov->base, M_FUSEMSG);
279 }
280 
281 void
282 fiov_adjust(struct fuse_iov *fiov, size_t size)
283 {
284 	if (fiov->allocated_size < size ||
285 	    (fuse_iov_permanent_bufsize >= 0 &&
286 	    fiov->allocated_size - size > fuse_iov_permanent_bufsize &&
287 	    --fiov->credit < 0)) {
288 
289 		fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG,
290 		    M_WAITOK | M_ZERO);
291 		if (!fiov->base) {
292 			panic("FUSE: realloc failed");
293 		}
294 		fiov->allocated_size = FU_AT_LEAST(size);
295 		fiov->credit = fuse_iov_credit;
296 		/* Clear data buffer after reallocation */
297 		bzero(fiov->base, size);
298 	} else if (size > fiov->len) {
299 		/* Clear newly extended portion of data buffer */
300 		bzero((char*)fiov->base + fiov->len, size - fiov->len);
301 	}
302 	fiov->len = size;
303 }
304 
305 /* Resize the fiov if needed, and clear it's buffer */
306 void
307 fiov_refresh(struct fuse_iov *fiov)
308 {
309 	fiov_adjust(fiov, 0);
310 }
311 
312 static int
313 fticket_ctor(void *mem, int size, void *arg, int flags)
314 {
315 	struct fuse_ticket *ftick = mem;
316 	struct fuse_data *data = arg;
317 
318 	FUSE_ASSERT_MS_DONE(ftick);
319 	FUSE_ASSERT_AW_DONE(ftick);
320 
321 	ftick->tk_data = data;
322 
323 	if (ftick->tk_unique != 0)
324 		fticket_refresh(ftick);
325 
326 	/* May be truncated to 32 bits */
327 	ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
328 	if (ftick->tk_unique == 0)
329 		ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
330 
331 	ftick->irq_unique = 0;
332 
333 	refcount_init(&ftick->tk_refcount, 1);
334 	counter_u64_add(fuse_ticket_count, 1);
335 
336 	return 0;
337 }
338 
339 static void
340 fticket_dtor(void *mem, int size, void *arg)
341 {
342 #ifdef INVARIANTS
343 	struct fuse_ticket *ftick = mem;
344 #endif
345 
346 	FUSE_ASSERT_MS_DONE(ftick);
347 	FUSE_ASSERT_AW_DONE(ftick);
348 
349 	counter_u64_add(fuse_ticket_count, -1);
350 }
351 
352 static int
353 fticket_init(void *mem, int size, int flags)
354 {
355 	struct fuse_ticket *ftick = mem;
356 
357 	bzero(ftick, sizeof(struct fuse_ticket));
358 
359 	fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header));
360 	ftick->tk_ms_type = FT_M_FIOV;
361 
362 	mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF);
363 	fiov_init(&ftick->tk_aw_fiov, 0);
364 	ftick->tk_aw_type = FT_A_FIOV;
365 
366 	return 0;
367 }
368 
369 static void
370 fticket_fini(void *mem, int size)
371 {
372 	struct fuse_ticket *ftick = mem;
373 
374 	fiov_teardown(&ftick->tk_ms_fiov);
375 	fiov_teardown(&ftick->tk_aw_fiov);
376 	mtx_destroy(&ftick->tk_aw_mtx);
377 }
378 
379 static inline struct fuse_ticket *
380 fticket_alloc(struct fuse_data *data)
381 {
382 	return uma_zalloc_arg(ticket_zone, data, M_WAITOK);
383 }
384 
385 static inline void
386 fticket_destroy(struct fuse_ticket *ftick)
387 {
388 	return uma_zfree(ticket_zone, ftick);
389 }
390 
391 static inline
392 void
393 fticket_refresh(struct fuse_ticket *ftick)
394 {
395 	FUSE_ASSERT_MS_DONE(ftick);
396 	FUSE_ASSERT_AW_DONE(ftick);
397 
398 	fiov_refresh(&ftick->tk_ms_fiov);
399 	ftick->tk_ms_bufdata = NULL;
400 	ftick->tk_ms_bufsize = 0;
401 	ftick->tk_ms_type = FT_M_FIOV;
402 
403 	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
404 
405 	fiov_refresh(&ftick->tk_aw_fiov);
406 	ftick->tk_aw_errno = 0;
407 	ftick->tk_aw_bufdata = NULL;
408 	ftick->tk_aw_bufsize = 0;
409 	ftick->tk_aw_type = FT_A_FIOV;
410 
411 	ftick->tk_flag = 0;
412 }
413 
414 /* Prepar the ticket to be reused, but don't clear its data buffers */
415 static inline void
416 fticket_reset(struct fuse_ticket *ftick)
417 {
418 	FUSE_ASSERT_MS_DONE(ftick);
419 	FUSE_ASSERT_AW_DONE(ftick);
420 
421 	ftick->tk_ms_bufdata = NULL;
422 	ftick->tk_ms_bufsize = 0;
423 	ftick->tk_ms_type = FT_M_FIOV;
424 
425 	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
426 
427 	ftick->tk_aw_errno = 0;
428 	ftick->tk_aw_bufdata = NULL;
429 	ftick->tk_aw_bufsize = 0;
430 	ftick->tk_aw_type = FT_A_FIOV;
431 
432 	ftick->tk_flag = 0;
433 }
434 
435 static int
436 fticket_wait_answer(struct fuse_ticket *ftick)
437 {
438 	struct thread *td = curthread;
439 	sigset_t blockedset, oldset;
440 	int err = 0, stops_deferred;
441 	struct fuse_data *data = ftick->tk_data;
442 	bool interrupted = false;
443 
444 	if (fsess_isimpl(ftick->tk_data->mp, FUSE_INTERRUPT) &&
445 	    data->dataflags & FSESS_INTR) {
446 		SIGEMPTYSET(blockedset);
447 	} else {
448 		/* Block all signals except (implicitly) SIGKILL */
449 		SIGFILLSET(blockedset);
450 	}
451 	stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT);
452 	kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0);
453 
454 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
455 
456 retry:
457 	if (fticket_answered(ftick)) {
458 		goto out;
459 	}
460 
461 	if (fdata_get_dead(data)) {
462 		err = ENOTCONN;
463 		fticket_set_answered(ftick);
464 		goto out;
465 	}
466 	kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0);
467 	err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans",
468 	    data->daemon_timeout * hz);
469 	kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0);
470 	if (err == EWOULDBLOCK) {
471 		SDT_PROBE2(fusefs, , ipc, trace, 3,
472 			"fticket_wait_answer: EWOULDBLOCK");
473 #ifdef XXXIP				/* die conditionally */
474 		if (!fdata_get_dead(data)) {
475 			fdata_set_dead(data);
476 		}
477 #endif
478 		err = ETIMEDOUT;
479 		fticket_set_answered(ftick);
480 	} else if ((err == EINTR || err == ERESTART)) {
481 		/*
482 		 * Whether we get EINTR or ERESTART depends on whether
483 		 * SA_RESTART was set by sigaction(2).
484 		 *
485 		 * Try to interrupt the operation and wait for an EINTR response
486 		 * to the original operation.  If the file system does not
487 		 * support FUSE_INTERRUPT, then we'll just wait for it to
488 		 * complete like normal.  If it does support FUSE_INTERRUPT,
489 		 * then it will either respond EINTR to the original operation,
490 		 * or EAGAIN to the interrupt.
491 		 */
492 		sigset_t tmpset;
493 
494 		SDT_PROBE2(fusefs, , ipc, trace, 4,
495 			"fticket_wait_answer: interrupt");
496 		fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
497 		fuse_interrupt_send(ftick, err);
498 
499 		PROC_LOCK(td->td_proc);
500 		mtx_lock(&td->td_proc->p_sigacts->ps_mtx);
501 		tmpset = td->td_proc->p_siglist;
502 		SIGSETOR(tmpset, td->td_siglist);
503 		mtx_unlock(&td->td_proc->p_sigacts->ps_mtx);
504 		PROC_UNLOCK(td->td_proc);
505 
506 		fuse_lck_mtx_lock(ftick->tk_aw_mtx);
507 		if (!interrupted && !SIGISMEMBER(tmpset, SIGKILL)) {
508 			/*
509 			 * Block all signals while we wait for an interrupt
510 			 * response.  The protocol doesn't discriminate between
511 			 * different signals.
512 			 */
513 			SIGFILLSET(blockedset);
514 			interrupted = true;
515 			goto retry;
516 		} else {
517 			/*
518 			 * Return immediately for fatal signals, or if this is
519 			 * the second interruption.  We should only be
520 			 * interrupted twice if the thread is stopped, for
521 			 * example during sigexit.
522 			 */
523 		}
524 	} else if (err) {
525 		SDT_PROBE2(fusefs, , ipc, trace, 6,
526 			"fticket_wait_answer: other error");
527 	} else {
528 		SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK");
529 	}
530 out:
531 	if (!(err || fticket_answered(ftick))) {
532 		SDT_PROBE2(fusefs, , ipc, trace, 1,
533 			"FUSE: requester was woken up but still no answer");
534 		err = ENXIO;
535 	}
536 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
537 	sigallowstop(stops_deferred);
538 
539 	return err;
540 }
541 
542 static	inline
543 int
544 fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio)
545 {
546 	int err = 0;
547 	size_t len = uio_resid(uio);
548 
549 	if (len) {
550 		switch (ftick->tk_aw_type) {
551 		case FT_A_FIOV:
552 			fiov_adjust(fticket_resp(ftick), len);
553 			err = uiomove(fticket_resp(ftick)->base, len, uio);
554 			break;
555 
556 		case FT_A_BUF:
557 			ftick->tk_aw_bufsize = len;
558 			err = uiomove(ftick->tk_aw_bufdata, len, uio);
559 			break;
560 
561 		default:
562 			panic("FUSE: unknown answer type for ticket %p", ftick);
563 		}
564 	}
565 	return err;
566 }
567 
568 int
569 fticket_pull(struct fuse_ticket *ftick, struct uio *uio)
570 {
571 	int err = 0;
572 
573 	if (ftick->tk_aw_ohead.error) {
574 		return 0;
575 	}
576 	err = fuse_body_audit(ftick, uio_resid(uio));
577 	if (!err) {
578 		err = fticket_aw_pull_uio(ftick, uio);
579 	}
580 	return err;
581 }
582 
583 struct fuse_data *
584 fdata_alloc(struct cdev *fdev, struct ucred *cred)
585 {
586 	struct fuse_data *data;
587 
588 	data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO);
589 
590 	data->fdev = fdev;
591 	mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF);
592 	STAILQ_INIT(&data->ms_head);
593 	data->ms_count = 0;
594 	knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx);
595 	mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF);
596 	TAILQ_INIT(&data->aw_head);
597 	data->daemoncred = crhold(cred);
598 	data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT;
599 	sx_init(&data->rename_lock, "fuse rename lock");
600 	data->ref = 1;
601 
602 	return data;
603 }
604 
605 void
606 fdata_trydestroy(struct fuse_data *data)
607 {
608 	data->ref--;
609 	MPASS(data->ref >= 0);
610 	if (data->ref != 0)
611 		return;
612 
613 	/* Driving off stage all that stuff thrown at device... */
614 	sx_destroy(&data->rename_lock);
615 	crfree(data->daemoncred);
616 	mtx_destroy(&data->aw_mtx);
617 	knlist_delete(&data->ks_rsel.si_note, curthread, 0);
618 	knlist_destroy(&data->ks_rsel.si_note);
619 	mtx_destroy(&data->ms_mtx);
620 
621 	free(data, M_FUSEMSG);
622 }
623 
624 void
625 fdata_set_dead(struct fuse_data *data)
626 {
627 	FUSE_LOCK();
628 	if (fdata_get_dead(data)) {
629 		FUSE_UNLOCK();
630 		return;
631 	}
632 	fuse_lck_mtx_lock(data->ms_mtx);
633 	data->dataflags |= FSESS_DEAD;
634 	wakeup_one(data);
635 	selwakeuppri(&data->ks_rsel, PZERO + 1);
636 	wakeup(&data->ticketer);
637 	fuse_lck_mtx_unlock(data->ms_mtx);
638 	FUSE_UNLOCK();
639 }
640 
641 struct fuse_ticket *
642 fuse_ticket_fetch(struct fuse_data *data)
643 {
644 	int err = 0;
645 	struct fuse_ticket *ftick;
646 
647 	ftick = fticket_alloc(data);
648 
649 	if (!(data->dataflags & FSESS_INITED)) {
650 		/* Sleep until get answer for INIT messsage */
651 		FUSE_LOCK();
652 		if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) {
653 			err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP,
654 			    "fu_ini", 0);
655 			if (err)
656 				fdata_set_dead(data);
657 		} else
658 			FUSE_UNLOCK();
659 	}
660 	return ftick;
661 }
662 
663 int
664 fuse_ticket_drop(struct fuse_ticket *ftick)
665 {
666 	int die;
667 
668 	die = refcount_release(&ftick->tk_refcount);
669 	if (die)
670 		fticket_destroy(ftick);
671 
672 	return die;
673 }
674 
675 void
676 fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler)
677 {
678 	if (fdata_get_dead(ftick->tk_data)) {
679 		return;
680 	}
681 	ftick->tk_aw_handler = handler;
682 
683 	fuse_lck_mtx_lock(ftick->tk_data->aw_mtx);
684 	fuse_aw_push(ftick);
685 	fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
686 }
687 
688 /*
689  * Insert a new upgoing ticket into the message queue
690  *
691  * If urgent is true, insert at the front of the queue.  Otherwise, insert in
692  * FIFO order.
693  */
694 void
695 fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
696 {
697 	if (ftick->tk_flag & FT_DIRTY) {
698 		panic("FUSE: ticket reused without being refreshed");
699 	}
700 	ftick->tk_flag |= FT_DIRTY;
701 
702 	if (fdata_get_dead(ftick->tk_data)) {
703 		return;
704 	}
705 	fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
706 	if (urgent)
707 		fuse_ms_push_head(ftick);
708 	else
709 		fuse_ms_push(ftick);
710 	wakeup_one(ftick->tk_data);
711 	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
712 	KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
713 	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
714 }
715 
716 static int
717 fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
718 {
719 	int err = 0;
720 	enum fuse_opcode opcode;
721 
722 	opcode = fticket_opcode(ftick);
723 
724 	switch (opcode) {
725 	case FUSE_BMAP:
726 		err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL;
727 		break;
728 
729 	case FUSE_LINK:
730 	case FUSE_LOOKUP:
731 	case FUSE_MKDIR:
732 	case FUSE_MKNOD:
733 	case FUSE_SYMLINK:
734 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
735 			err = (blen == sizeof(struct fuse_entry_out)) ?
736 				0 : EINVAL;
737 		} else {
738 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE) ? 0 : EINVAL;
739 		}
740 		break;
741 
742 	case FUSE_FORGET:
743 		panic("FUSE: a handler has been intalled for FUSE_FORGET");
744 		break;
745 
746 	case FUSE_GETATTR:
747 	case FUSE_SETATTR:
748 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
749 			err = (blen == sizeof(struct fuse_attr_out)) ?
750 			  0 : EINVAL;
751 		} else {
752 			err = (blen == FUSE_COMPAT_ATTR_OUT_SIZE) ? 0 : EINVAL;
753 		}
754 		break;
755 
756 	case FUSE_READLINK:
757 		err = (PAGE_SIZE >= blen) ? 0 : EINVAL;
758 		break;
759 
760 	case FUSE_UNLINK:
761 		err = (blen == 0) ? 0 : EINVAL;
762 		break;
763 
764 	case FUSE_RMDIR:
765 		err = (blen == 0) ? 0 : EINVAL;
766 		break;
767 
768 	case FUSE_RENAME:
769 		err = (blen == 0) ? 0 : EINVAL;
770 		break;
771 
772 	case FUSE_OPEN:
773 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
774 		break;
775 
776 	case FUSE_READ:
777 		err = (((struct fuse_read_in *)(
778 		    (char *)ftick->tk_ms_fiov.base +
779 		    sizeof(struct fuse_in_header)
780 		    ))->size >= blen) ? 0 : EINVAL;
781 		break;
782 
783 	case FUSE_WRITE:
784 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
785 		break;
786 
787 	case FUSE_STATFS:
788 		if (fuse_libabi_geq(ftick->tk_data, 7, 4)) {
789 			err = (blen == sizeof(struct fuse_statfs_out)) ?
790 			  0 : EINVAL;
791 		} else {
792 			err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL;
793 		}
794 		break;
795 
796 	case FUSE_RELEASE:
797 		err = (blen == 0) ? 0 : EINVAL;
798 		break;
799 
800 	case FUSE_FSYNC:
801 		err = (blen == 0) ? 0 : EINVAL;
802 		break;
803 
804 	case FUSE_SETXATTR:
805 		err = (blen == 0) ? 0 : EINVAL;
806 		break;
807 
808 	case FUSE_GETXATTR:
809 	case FUSE_LISTXATTR:
810 		/*
811 		 * These can have varying response lengths, and 0 length
812 		 * isn't necessarily invalid.
813 		 */
814 		err = 0;
815 		break;
816 
817 	case FUSE_REMOVEXATTR:
818 		err = (blen == 0) ? 0 : EINVAL;
819 		break;
820 
821 	case FUSE_FLUSH:
822 		err = (blen == 0) ? 0 : EINVAL;
823 		break;
824 
825 	case FUSE_INIT:
826 		if (blen == sizeof(struct fuse_init_out) ||
827 		    blen == FUSE_COMPAT_INIT_OUT_SIZE ||
828 		    blen == FUSE_COMPAT_22_INIT_OUT_SIZE) {
829 			err = 0;
830 		} else {
831 			err = EINVAL;
832 		}
833 		break;
834 
835 	case FUSE_OPENDIR:
836 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
837 		break;
838 
839 	case FUSE_READDIR:
840 		err = (((struct fuse_read_in *)(
841 		    (char *)ftick->tk_ms_fiov.base +
842 		    sizeof(struct fuse_in_header)
843 		    ))->size >= blen) ? 0 : EINVAL;
844 		break;
845 
846 	case FUSE_RELEASEDIR:
847 		err = (blen == 0) ? 0 : EINVAL;
848 		break;
849 
850 	case FUSE_FSYNCDIR:
851 		err = (blen == 0) ? 0 : EINVAL;
852 		break;
853 
854 	case FUSE_GETLK:
855 		err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL;
856 		break;
857 
858 	case FUSE_SETLK:
859 		err = (blen == 0) ? 0 : EINVAL;
860 		break;
861 
862 	case FUSE_SETLKW:
863 		err = (blen == 0) ? 0 : EINVAL;
864 		break;
865 
866 	case FUSE_ACCESS:
867 		err = (blen == 0) ? 0 : EINVAL;
868 		break;
869 
870 	case FUSE_CREATE:
871 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
872 			err = (blen == sizeof(struct fuse_entry_out) +
873 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
874 		} else {
875 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE +
876 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
877 		}
878 		break;
879 
880 	case FUSE_DESTROY:
881 		err = (blen == 0) ? 0 : EINVAL;
882 		break;
883 
884 	default:
885 		panic("FUSE: opcodes out of sync (%d)\n", opcode);
886 	}
887 
888 	return err;
889 }
890 
891 static inline void
892 fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick,
893     uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid,
894     struct ucred *cred)
895 {
896 	ihead->len = sizeof(*ihead) + blen;
897 	ihead->unique = ftick->tk_unique;
898 	ihead->nodeid = nid;
899 	ihead->opcode = op;
900 
901 	ihead->pid = pid;
902 	ihead->uid = cred->cr_uid;
903 	ihead->gid = cred->cr_groups[0];
904 }
905 
906 /*
907  * fuse_standard_handler just pulls indata and wakes up pretender.
908  * Doesn't try to interpret data, that's left for the pretender.
909  * Though might do a basic size verification before the pull-in takes place
910  */
911 
912 static int
913 fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio)
914 {
915 	int err = 0;
916 
917 	err = fticket_pull(ftick, uio);
918 
919 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
920 
921 	if (!fticket_answered(ftick)) {
922 		fticket_set_answered(ftick);
923 		ftick->tk_aw_errno = err;
924 		wakeup(ftick);
925 	}
926 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
927 
928 	return err;
929 }
930 
931 /*
932  * Reinitialize a dispatcher from a pid and node id, without resizing or
933  * clearing its data buffers
934  */
935 static void
936 fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
937     struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
938 {
939 	MPASS(fdip->tick);
940 	MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len,
941 		"Must use fdisp_make_pid to increase the size of the fiov");
942 	fticket_reset(fdip->tick);
943 
944 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
945 	    fdip->indata, fdip->iosize);
946 
947 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
948 		cred);
949 }
950 
951 /* Initialize a dispatcher from a pid and node id */
952 static void
953 fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
954     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred)
955 {
956 	if (fdip->tick) {
957 		fticket_refresh(fdip->tick);
958 	} else {
959 		fdip->tick = fuse_ticket_fetch(data);
960 	}
961 
962 	/* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */
963 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
964 	    fdip->indata, fdip->iosize);
965 
966 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred);
967 }
968 
969 void
970 fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp,
971     uint64_t nid, struct thread *td, struct ucred *cred)
972 {
973 	struct fuse_data *data = fuse_get_mpdata(mp);
974 	RECTIFY_TDCR(td, cred);
975 
976 	return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred);
977 }
978 
979 void
980 fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
981     struct vnode *vp, struct thread *td, struct ucred *cred)
982 {
983 	struct mount *mp = vnode_mount(vp);
984 	struct fuse_data *data = fuse_get_mpdata(mp);
985 
986 	RECTIFY_TDCR(td, cred);
987 	return fdisp_make_pid(fdip, op, data, VTOI(vp),
988 	    td->td_proc->p_pid, cred);
989 }
990 
991 /* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
992 void
993 fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
994     struct vnode *vp, struct thread *td, struct ucred *cred)
995 {
996 	RECTIFY_TDCR(td, cred);
997 	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
998 	    td->td_proc->p_pid, cred);
999 }
1000 
1001 void
1002 fdisp_refresh(struct fuse_dispatcher *fdip)
1003 {
1004 	fticket_refresh(fdip->tick);
1005 }
1006 
1007 SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int");
1008 
1009 int
1010 fdisp_wait_answ(struct fuse_dispatcher *fdip)
1011 {
1012 	int err = 0;
1013 
1014 	fdip->answ_stat = 0;
1015 	fuse_insert_callback(fdip->tick, fuse_standard_handler);
1016 	fuse_insert_message(fdip->tick, false);
1017 
1018 	if ((err = fticket_wait_answer(fdip->tick))) {
1019 		fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx);
1020 
1021 		if (fticket_answered(fdip->tick)) {
1022 			/*
1023 	                 * Just between noticing the interrupt and getting here,
1024 	                 * the standard handler has completed his job.
1025 	                 * So we drop the ticket and exit as usual.
1026 	                 */
1027 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1028 				"IPC: interrupted, already answered", err);
1029 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
1030 			goto out;
1031 		} else {
1032 			/*
1033 	                 * So we were faster than the standard handler.
1034 	                 * Then by setting the answered flag we get *him*
1035 	                 * to drop the ticket.
1036 	                 */
1037 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1038 				"IPC: interrupted, setting to answered", err);
1039 			fticket_set_answered(fdip->tick);
1040 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
1041 			return err;
1042 		}
1043 	}
1044 
1045 	if (fdip->tick->tk_aw_errno == ENOTCONN) {
1046 		/* The daemon died while we were waiting for a response */
1047 		err = ENOTCONN;
1048 		goto out;
1049 	} else if (fdip->tick->tk_aw_errno) {
1050 		/*
1051 		 * There was some sort of communication error with the daemon
1052 		 * that the client wouldn't understand.
1053 		 */
1054 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1055 			"IPC: explicit EIO-ing", fdip->tick->tk_aw_errno);
1056 		err = EIO;
1057 		goto out;
1058 	}
1059 	if ((err = fdip->tick->tk_aw_ohead.error)) {
1060 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1061 			"IPC: setting status", fdip->tick->tk_aw_ohead.error);
1062 		/*
1063 	         * This means a "proper" fuse syscall error.
1064 	         * We record this value so the caller will
1065 	         * be able to know it's not a boring messaging
1066 	         * failure, if she wishes so (and if not, she can
1067 	         * just simply propagate the return value of this routine).
1068 	         * [XXX Maybe a bitflag would do the job too,
1069 	         * if other flags needed, this will be converted thusly.]
1070 	         */
1071 		fdip->answ_stat = err;
1072 		goto out;
1073 	}
1074 	fdip->answ = fticket_resp(fdip->tick)->base;
1075 	fdip->iosize = fticket_resp(fdip->tick)->len;
1076 
1077 	return 0;
1078 
1079 out:
1080 	return err;
1081 }
1082 
1083 void
1084 fuse_ipc_init(void)
1085 {
1086 	ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket),
1087 	    fticket_ctor, fticket_dtor, fticket_init, fticket_fini,
1088 	    UMA_ALIGN_PTR, 0);
1089 	fuse_ticket_count = counter_u64_alloc(M_WAITOK);
1090 }
1091 
1092 void
1093 fuse_ipc_destroy(void)
1094 {
1095 	counter_u64_free(fuse_ticket_count);
1096 	uma_zdestroy(ticket_zone);
1097 }
1098