xref: /freebsd/sys/fs/fuse/fuse_ipc.c (revision a03411e84728e9b267056fd31c7d1d9d1dc1b01e)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  * * Redistributions in binary form must reproduce the above
14  *   copyright notice, this list of conditions and the following disclaimer
15  *   in the documentation and/or other materials provided with the
16  *   distribution.
17  * * Neither the name of Google Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Copyright (C) 2005 Csaba Henk.
34  * All rights reserved.
35  *
36  * Copyright (c) 2019 The FreeBSD Foundation
37  *
38  * Portions of this software were developed by BFF Storage Systems, LLC under
39  * sponsorship from the FreeBSD Foundation.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  *
50  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #include <sys/param.h>
64 #include <sys/module.h>
65 #include <sys/systm.h>
66 #include <sys/counter.h>
67 #include <sys/errno.h>
68 #include <sys/kernel.h>
69 #include <sys/conf.h>
70 #include <sys/uio.h>
71 #include <sys/malloc.h>
72 #include <sys/queue.h>
73 #include <sys/lock.h>
74 #include <sys/sx.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/mount.h>
78 #include <sys/sdt.h>
79 #include <sys/vnode.h>
80 #include <sys/signalvar.h>
81 #include <sys/syscallsubr.h>
82 #include <sys/sysctl.h>
83 #include <vm/uma.h>
84 
85 #include "fuse.h"
86 #include "fuse_node.h"
87 #include "fuse_ipc.h"
88 #include "fuse_internal.h"
89 
90 SDT_PROVIDER_DECLARE(fusefs);
91 /*
92  * Fuse trace probe:
93  * arg0: verbosity.  Higher numbers give more verbose messages
94  * arg1: Textual message
95  */
96 SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*");
97 
98 static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
99     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred);
100 static void fuse_interrupt_send(struct fuse_ticket *otick, int err);
101 static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
102 static void fticket_refresh(struct fuse_ticket *ftick);
103 static inline void fticket_reset(struct fuse_ticket *ftick);
104 static void fticket_destroy(struct fuse_ticket *ftick);
105 static int fticket_wait_answer(struct fuse_ticket *ftick);
106 static inline int
107 fticket_aw_pull_uio(struct fuse_ticket *ftick,
108     struct uio *uio);
109 
110 static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
111 
112 static fuse_handler_t fuse_standard_handler;
113 
114 static counter_u64_t fuse_ticket_count;
115 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, ticket_count, CTLFLAG_RD,
116     &fuse_ticket_count, "Number of allocated tickets");
117 
118 static long fuse_iov_permanent_bufsize = 1 << 19;
119 
120 SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
121     &fuse_iov_permanent_bufsize, 0,
122     "limit for permanently stored buffer size for fuse_iovs");
123 static int fuse_iov_credit = 16;
124 
125 SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW,
126     &fuse_iov_credit, 0,
127     "how many times is an oversized fuse_iov tolerated");
128 
129 MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer");
130 static uma_zone_t ticket_zone;
131 
132 /*
133  * TODO: figure out how to timeout INTERRUPT requests, because the daemon may
134  * leagally never respond
135  */
136 static int
137 fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio)
138 {
139 	struct fuse_ticket *otick, *x_tick;
140 	struct fuse_interrupt_in *fii;
141 	struct fuse_data *data = tick->tk_data;
142 	bool found = false;
143 
144 	fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base +
145 		sizeof(struct fuse_in_header));
146 
147 	fuse_lck_mtx_lock(data->aw_mtx);
148 	TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) {
149 		if (otick->tk_unique == fii->unique) {
150 			found = true;
151 			break;
152 		}
153 	}
154 	fuse_lck_mtx_unlock(data->aw_mtx);
155 
156 	if (!found) {
157 		/* Original is already complete.  Just return */
158 		return 0;
159 	}
160 
161 	/* Clear the original ticket's interrupt association */
162 	otick->irq_unique = 0;
163 
164 	if (tick->tk_aw_ohead.error == ENOSYS) {
165 		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
166 		return 0;
167 	} else if (tick->tk_aw_ohead.error == EAGAIN) {
168 		/*
169 		 * There are two reasons we might get this:
170 		 * 1) the daemon received the INTERRUPT request before the
171 		 *    original, or
172 		 * 2) the daemon received the INTERRUPT request after it
173 		 *    completed the original request.
174 		 * In the first case we should re-send the INTERRUPT.  In the
175 		 * second, we should ignore it.
176 		 */
177 		/* Resend */
178 		fuse_interrupt_send(otick, EINTR);
179 		return 0;
180 	} else {
181 		/* Illegal FUSE_INTERRUPT response */
182 		return EINVAL;
183 	}
184 }
185 
186 /* Interrupt the operation otick.  Return err as its error code */
187 void
188 fuse_interrupt_send(struct fuse_ticket *otick, int err)
189 {
190 	struct fuse_dispatcher fdi;
191 	struct fuse_interrupt_in *fii;
192 	struct fuse_in_header *ftick_hdr;
193 	struct fuse_data *data = otick->tk_data;
194 	struct fuse_ticket *tick, *xtick;
195 	struct ucred reused_creds;
196 	gid_t reused_groups[1];
197 
198 	if (otick->irq_unique == 0) {
199 		/*
200 		 * If the daemon hasn't yet received otick, then we can answer
201 		 * it ourselves and return.
202 		 */
203 		fuse_lck_mtx_lock(data->ms_mtx);
204 		STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link,
205 			xtick) {
206 			if (tick == otick) {
207 				STAILQ_REMOVE(&otick->tk_data->ms_head, tick,
208 					fuse_ticket, tk_ms_link);
209 				otick->tk_data->ms_count--;
210 				otick->tk_ms_link.stqe_next = NULL;
211 				fuse_lck_mtx_unlock(data->ms_mtx);
212 
213 				fuse_lck_mtx_lock(otick->tk_aw_mtx);
214 				if (!fticket_answered(otick)) {
215 					fticket_set_answered(otick);
216 					otick->tk_aw_errno = err;
217 					wakeup(otick);
218 				}
219 				fuse_lck_mtx_unlock(otick->tk_aw_mtx);
220 
221 				fuse_ticket_drop(tick);
222 				return;
223 			}
224 		}
225 		fuse_lck_mtx_unlock(data->ms_mtx);
226 
227 		/*
228 		 * If the fuse daemon doesn't support interrupts, then there's
229 		 * nothing more that we can do
230 		 */
231 		if (fsess_not_impl(data->mp, FUSE_INTERRUPT))
232 			return;
233 
234 		/*
235 		 * If the fuse daemon has already received otick, then we must
236 		 * send FUSE_INTERRUPT.
237 		 */
238 		ftick_hdr = fticket_in_header(otick);
239 		reused_creds.cr_uid = ftick_hdr->uid;
240 		reused_groups[0] = ftick_hdr->gid;
241 		reused_creds.cr_groups = reused_groups;
242 		fdisp_init(&fdi, sizeof(*fii));
243 		fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid,
244 			ftick_hdr->pid, &reused_creds);
245 
246 		fii = fdi.indata;
247 		fii->unique = otick->tk_unique;
248 		fuse_insert_callback(fdi.tick, fuse_interrupt_callback);
249 
250 		otick->irq_unique = fdi.tick->tk_unique;
251 		/* Interrupt ops should be delivered ASAP */
252 		fuse_insert_message(fdi.tick, true);
253 		fdisp_destroy(&fdi);
254 	} else {
255 		/* This ticket has already been interrupted */
256 	}
257 }
258 
259 void
260 fiov_init(struct fuse_iov *fiov, size_t size)
261 {
262 	uint32_t msize = FU_AT_LEAST(size);
263 
264 	fiov->len = 0;
265 
266 	fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO);
267 
268 	fiov->allocated_size = msize;
269 	fiov->credit = fuse_iov_credit;
270 }
271 
272 void
273 fiov_teardown(struct fuse_iov *fiov)
274 {
275 	MPASS(fiov->base != NULL);
276 	free(fiov->base, M_FUSEMSG);
277 }
278 
279 void
280 fiov_adjust(struct fuse_iov *fiov, size_t size)
281 {
282 	if (fiov->allocated_size < size ||
283 	    (fuse_iov_permanent_bufsize >= 0 &&
284 	    fiov->allocated_size - size > fuse_iov_permanent_bufsize &&
285 	    --fiov->credit < 0)) {
286 		fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG,
287 		    M_WAITOK | M_ZERO);
288 		if (!fiov->base) {
289 			panic("FUSE: realloc failed");
290 		}
291 		fiov->allocated_size = FU_AT_LEAST(size);
292 		fiov->credit = fuse_iov_credit;
293 		/* Clear data buffer after reallocation */
294 		bzero(fiov->base, size);
295 	} else if (size > fiov->len) {
296 		/* Clear newly extended portion of data buffer */
297 		bzero((char*)fiov->base + fiov->len, size - fiov->len);
298 	}
299 	fiov->len = size;
300 }
301 
302 /* Resize the fiov if needed, and clear it's buffer */
303 void
304 fiov_refresh(struct fuse_iov *fiov)
305 {
306 	fiov_adjust(fiov, 0);
307 }
308 
309 static int
310 fticket_ctor(void *mem, int size, void *arg, int flags)
311 {
312 	struct fuse_ticket *ftick = mem;
313 	struct fuse_data *data = arg;
314 
315 	FUSE_ASSERT_MS_DONE(ftick);
316 	FUSE_ASSERT_AW_DONE(ftick);
317 
318 	ftick->tk_data = data;
319 	ftick->irq_unique = 0;
320 	refcount_init(&ftick->tk_refcount, 1);
321 	counter_u64_add(fuse_ticket_count, 1);
322 
323 	fticket_refresh(ftick);
324 
325 	return 0;
326 }
327 
328 static void
329 fticket_dtor(void *mem, int size, void *arg)
330 {
331 #ifdef INVARIANTS
332 	struct fuse_ticket *ftick = mem;
333 #endif
334 
335 	FUSE_ASSERT_MS_DONE(ftick);
336 	FUSE_ASSERT_AW_DONE(ftick);
337 
338 	counter_u64_add(fuse_ticket_count, -1);
339 }
340 
341 static int
342 fticket_init(void *mem, int size, int flags)
343 {
344 	struct fuse_ticket *ftick = mem;
345 
346 	bzero(ftick, sizeof(struct fuse_ticket));
347 
348 	fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header));
349 
350 	mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF);
351 	fiov_init(&ftick->tk_aw_fiov, 0);
352 
353 	return 0;
354 }
355 
356 static void
357 fticket_fini(void *mem, int size)
358 {
359 	struct fuse_ticket *ftick = mem;
360 
361 	fiov_teardown(&ftick->tk_ms_fiov);
362 	fiov_teardown(&ftick->tk_aw_fiov);
363 	mtx_destroy(&ftick->tk_aw_mtx);
364 }
365 
366 static inline struct fuse_ticket *
367 fticket_alloc(struct fuse_data *data)
368 {
369 	return uma_zalloc_arg(ticket_zone, data, M_WAITOK);
370 }
371 
372 static inline void
373 fticket_destroy(struct fuse_ticket *ftick)
374 {
375 	return uma_zfree(ticket_zone, ftick);
376 }
377 
378 /* Prepare the ticket to be reused and clear its data buffers */
379 static inline void
380 fticket_refresh(struct fuse_ticket *ftick)
381 {
382 	fticket_reset(ftick);
383 
384 	fiov_refresh(&ftick->tk_ms_fiov);
385 	fiov_refresh(&ftick->tk_aw_fiov);
386 }
387 
388 /* Prepare the ticket to be reused, but don't clear its data buffers */
389 static inline void
390 fticket_reset(struct fuse_ticket *ftick)
391 {
392 	struct fuse_data *data = ftick->tk_data;
393 
394 	FUSE_ASSERT_MS_DONE(ftick);
395 	FUSE_ASSERT_AW_DONE(ftick);
396 
397 	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
398 
399 	ftick->tk_aw_errno = 0;
400 	ftick->tk_flag = 0;
401 
402 	/* May be truncated to 32 bits on LP32 arches */
403 	ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
404 	if (ftick->tk_unique == 0)
405 		ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
406 }
407 
408 static int
409 fticket_wait_answer(struct fuse_ticket *ftick)
410 {
411 	struct thread *td = curthread;
412 	sigset_t blockedset, oldset;
413 	int err = 0, stops_deferred;
414 	struct fuse_data *data = ftick->tk_data;
415 	bool interrupted = false;
416 
417 	if (fsess_maybe_impl(ftick->tk_data->mp, FUSE_INTERRUPT) &&
418 	    data->dataflags & FSESS_INTR) {
419 		SIGEMPTYSET(blockedset);
420 	} else {
421 		/* Block all signals except (implicitly) SIGKILL */
422 		SIGFILLSET(blockedset);
423 	}
424 	stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT);
425 	kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0);
426 
427 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
428 
429 retry:
430 	if (fticket_answered(ftick)) {
431 		goto out;
432 	}
433 
434 	if (fdata_get_dead(data)) {
435 		err = ENOTCONN;
436 		fticket_set_answered(ftick);
437 		goto out;
438 	}
439 	kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0);
440 	err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans",
441 	    data->daemon_timeout * hz);
442 	kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0);
443 	if (err == EWOULDBLOCK) {
444 		SDT_PROBE2(fusefs, , ipc, trace, 3,
445 			"fticket_wait_answer: EWOULDBLOCK");
446 #ifdef XXXIP				/* die conditionally */
447 		if (!fdata_get_dead(data)) {
448 			fdata_set_dead(data);
449 		}
450 #endif
451 		err = ETIMEDOUT;
452 		fticket_set_answered(ftick);
453 	} else if ((err == EINTR || err == ERESTART)) {
454 		/*
455 		 * Whether we get EINTR or ERESTART depends on whether
456 		 * SA_RESTART was set by sigaction(2).
457 		 *
458 		 * Try to interrupt the operation and wait for an EINTR response
459 		 * to the original operation.  If the file system does not
460 		 * support FUSE_INTERRUPT, then we'll just wait for it to
461 		 * complete like normal.  If it does support FUSE_INTERRUPT,
462 		 * then it will either respond EINTR to the original operation,
463 		 * or EAGAIN to the interrupt.
464 		 */
465 		sigset_t tmpset;
466 
467 		SDT_PROBE2(fusefs, , ipc, trace, 4,
468 			"fticket_wait_answer: interrupt");
469 		fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
470 		fuse_interrupt_send(ftick, err);
471 
472 		PROC_LOCK(td->td_proc);
473 		mtx_lock(&td->td_proc->p_sigacts->ps_mtx);
474 		tmpset = td->td_proc->p_siglist;
475 		SIGSETOR(tmpset, td->td_siglist);
476 		mtx_unlock(&td->td_proc->p_sigacts->ps_mtx);
477 		PROC_UNLOCK(td->td_proc);
478 
479 		fuse_lck_mtx_lock(ftick->tk_aw_mtx);
480 		if (!interrupted && !SIGISMEMBER(tmpset, SIGKILL)) {
481 			/*
482 			 * Block all signals while we wait for an interrupt
483 			 * response.  The protocol doesn't discriminate between
484 			 * different signals.
485 			 */
486 			SIGFILLSET(blockedset);
487 			interrupted = true;
488 			goto retry;
489 		} else {
490 			/*
491 			 * Return immediately for fatal signals, or if this is
492 			 * the second interruption.  We should only be
493 			 * interrupted twice if the thread is stopped, for
494 			 * example during sigexit.
495 			 */
496 		}
497 	} else if (err) {
498 		SDT_PROBE2(fusefs, , ipc, trace, 6,
499 			"fticket_wait_answer: other error");
500 	} else {
501 		SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK");
502 	}
503 out:
504 	if (!(err || fticket_answered(ftick))) {
505 		SDT_PROBE2(fusefs, , ipc, trace, 1,
506 			"FUSE: requester was woken up but still no answer");
507 		err = ENXIO;
508 	}
509 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
510 	sigallowstop(stops_deferred);
511 
512 	return err;
513 }
514 
515 static	inline
516 int
517 fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio)
518 {
519 	int err = 0;
520 	size_t len = uio_resid(uio);
521 
522 	if (len) {
523 		fiov_adjust(fticket_resp(ftick), len);
524 		err = uiomove(fticket_resp(ftick)->base, len, uio);
525 	}
526 	return err;
527 }
528 
529 int
530 fticket_pull(struct fuse_ticket *ftick, struct uio *uio)
531 {
532 	int err = 0;
533 
534 	if (ftick->tk_aw_ohead.error) {
535 		return 0;
536 	}
537 	err = fuse_body_audit(ftick, uio_resid(uio));
538 	if (!err) {
539 		err = fticket_aw_pull_uio(ftick, uio);
540 	}
541 	return err;
542 }
543 
544 struct fuse_data *
545 fdata_alloc(struct cdev *fdev, struct ucred *cred)
546 {
547 	struct fuse_data *data;
548 
549 	data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO);
550 
551 	data->fdev = fdev;
552 	mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF);
553 	STAILQ_INIT(&data->ms_head);
554 	data->ms_count = 0;
555 	knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx);
556 	mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF);
557 	TAILQ_INIT(&data->aw_head);
558 	data->daemoncred = crhold(cred);
559 	data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT;
560 	sx_init(&data->rename_lock, "fuse rename lock");
561 	data->ref = 1;
562 
563 	return data;
564 }
565 
566 void
567 fdata_trydestroy(struct fuse_data *data)
568 {
569 	data->ref--;
570 	MPASS(data->ref >= 0);
571 	if (data->ref != 0)
572 		return;
573 
574 	/* Driving off stage all that stuff thrown at device... */
575 	sx_destroy(&data->rename_lock);
576 	crfree(data->daemoncred);
577 	mtx_destroy(&data->aw_mtx);
578 	knlist_delete(&data->ks_rsel.si_note, curthread, 0);
579 	knlist_destroy(&data->ks_rsel.si_note);
580 	mtx_destroy(&data->ms_mtx);
581 
582 	free(data, M_FUSEMSG);
583 }
584 
585 void
586 fdata_set_dead(struct fuse_data *data)
587 {
588 	FUSE_LOCK();
589 	if (fdata_get_dead(data)) {
590 		FUSE_UNLOCK();
591 		return;
592 	}
593 	fuse_lck_mtx_lock(data->ms_mtx);
594 	data->dataflags |= FSESS_DEAD;
595 	wakeup_one(data);
596 	selwakeuppri(&data->ks_rsel, PZERO + 1);
597 	wakeup(&data->ticketer);
598 	fuse_lck_mtx_unlock(data->ms_mtx);
599 	FUSE_UNLOCK();
600 }
601 
602 struct fuse_ticket *
603 fuse_ticket_fetch(struct fuse_data *data)
604 {
605 	int err = 0;
606 	struct fuse_ticket *ftick;
607 
608 	ftick = fticket_alloc(data);
609 
610 	if (!(data->dataflags & FSESS_INITED)) {
611 		/* Sleep until get answer for INIT message */
612 		FUSE_LOCK();
613 		if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) {
614 			err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP,
615 			    "fu_ini", 0);
616 			if (err)
617 				fdata_set_dead(data);
618 		} else
619 			FUSE_UNLOCK();
620 	}
621 	return ftick;
622 }
623 
624 int
625 fuse_ticket_drop(struct fuse_ticket *ftick)
626 {
627 	int die;
628 
629 	die = refcount_release(&ftick->tk_refcount);
630 	if (die)
631 		fticket_destroy(ftick);
632 
633 	return die;
634 }
635 
636 void
637 fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler)
638 {
639 	if (fdata_get_dead(ftick->tk_data)) {
640 		return;
641 	}
642 	ftick->tk_aw_handler = handler;
643 
644 	fuse_lck_mtx_lock(ftick->tk_data->aw_mtx);
645 	fuse_aw_push(ftick);
646 	fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
647 }
648 
649 /*
650  * Insert a new upgoing ticket into the message queue
651  *
652  * If urgent is true, insert at the front of the queue.  Otherwise, insert in
653  * FIFO order.
654  */
655 void
656 fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
657 {
658 	if (ftick->tk_flag & FT_DIRTY) {
659 		panic("FUSE: ticket reused without being refreshed");
660 	}
661 	ftick->tk_flag |= FT_DIRTY;
662 
663 	if (fdata_get_dead(ftick->tk_data)) {
664 		return;
665 	}
666 	fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
667 	if (urgent)
668 		fuse_ms_push_head(ftick);
669 	else
670 		fuse_ms_push(ftick);
671 	wakeup_one(ftick->tk_data);
672 	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
673 	KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
674 	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
675 }
676 
677 static int
678 fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
679 {
680 	int err = 0;
681 	enum fuse_opcode opcode;
682 
683 	opcode = fticket_opcode(ftick);
684 
685 	switch (opcode) {
686 	case FUSE_BMAP:
687 		err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL;
688 		break;
689 
690 	case FUSE_LINK:
691 	case FUSE_LOOKUP:
692 	case FUSE_MKDIR:
693 	case FUSE_MKNOD:
694 	case FUSE_SYMLINK:
695 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
696 			err = (blen == sizeof(struct fuse_entry_out)) ?
697 				0 : EINVAL;
698 		} else {
699 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE) ? 0 : EINVAL;
700 		}
701 		break;
702 
703 	case FUSE_FORGET:
704 		panic("FUSE: a handler has been intalled for FUSE_FORGET");
705 		break;
706 
707 	case FUSE_GETATTR:
708 	case FUSE_SETATTR:
709 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
710 			err = (blen == sizeof(struct fuse_attr_out)) ?
711 			  0 : EINVAL;
712 		} else {
713 			err = (blen == FUSE_COMPAT_ATTR_OUT_SIZE) ? 0 : EINVAL;
714 		}
715 		break;
716 
717 	case FUSE_READLINK:
718 		err = (PAGE_SIZE >= blen) ? 0 : EINVAL;
719 		break;
720 
721 	case FUSE_UNLINK:
722 		err = (blen == 0) ? 0 : EINVAL;
723 		break;
724 
725 	case FUSE_RMDIR:
726 		err = (blen == 0) ? 0 : EINVAL;
727 		break;
728 
729 	case FUSE_RENAME:
730 		err = (blen == 0) ? 0 : EINVAL;
731 		break;
732 
733 	case FUSE_OPEN:
734 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
735 		break;
736 
737 	case FUSE_READ:
738 		err = (((struct fuse_read_in *)(
739 		    (char *)ftick->tk_ms_fiov.base +
740 		    sizeof(struct fuse_in_header)
741 		    ))->size >= blen) ? 0 : EINVAL;
742 		break;
743 
744 	case FUSE_WRITE:
745 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
746 		break;
747 
748 	case FUSE_STATFS:
749 		if (fuse_libabi_geq(ftick->tk_data, 7, 4)) {
750 			err = (blen == sizeof(struct fuse_statfs_out)) ?
751 			  0 : EINVAL;
752 		} else {
753 			err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL;
754 		}
755 		break;
756 
757 	case FUSE_RELEASE:
758 		err = (blen == 0) ? 0 : EINVAL;
759 		break;
760 
761 	case FUSE_FSYNC:
762 		err = (blen == 0) ? 0 : EINVAL;
763 		break;
764 
765 	case FUSE_SETXATTR:
766 		err = (blen == 0) ? 0 : EINVAL;
767 		break;
768 
769 	case FUSE_GETXATTR:
770 	case FUSE_LISTXATTR:
771 		/*
772 		 * These can have varying response lengths, and 0 length
773 		 * isn't necessarily invalid.
774 		 */
775 		err = 0;
776 		break;
777 
778 	case FUSE_REMOVEXATTR:
779 		err = (blen == 0) ? 0 : EINVAL;
780 		break;
781 
782 	case FUSE_FLUSH:
783 		err = (blen == 0) ? 0 : EINVAL;
784 		break;
785 
786 	case FUSE_INIT:
787 		if (blen == sizeof(struct fuse_init_out) ||
788 		    blen == FUSE_COMPAT_INIT_OUT_SIZE ||
789 		    blen == FUSE_COMPAT_22_INIT_OUT_SIZE) {
790 			err = 0;
791 		} else {
792 			err = EINVAL;
793 		}
794 		break;
795 
796 	case FUSE_OPENDIR:
797 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
798 		break;
799 
800 	case FUSE_READDIR:
801 		err = (((struct fuse_read_in *)(
802 		    (char *)ftick->tk_ms_fiov.base +
803 		    sizeof(struct fuse_in_header)
804 		    ))->size >= blen) ? 0 : EINVAL;
805 		break;
806 
807 	case FUSE_RELEASEDIR:
808 		err = (blen == 0) ? 0 : EINVAL;
809 		break;
810 
811 	case FUSE_FSYNCDIR:
812 		err = (blen == 0) ? 0 : EINVAL;
813 		break;
814 
815 	case FUSE_GETLK:
816 		err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL;
817 		break;
818 
819 	case FUSE_SETLK:
820 		err = (blen == 0) ? 0 : EINVAL;
821 		break;
822 
823 	case FUSE_SETLKW:
824 		err = (blen == 0) ? 0 : EINVAL;
825 		break;
826 
827 	case FUSE_ACCESS:
828 		err = (blen == 0) ? 0 : EINVAL;
829 		break;
830 
831 	case FUSE_CREATE:
832 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
833 			err = (blen == sizeof(struct fuse_entry_out) +
834 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
835 		} else {
836 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE +
837 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
838 		}
839 		break;
840 
841 	case FUSE_DESTROY:
842 		err = (blen == 0) ? 0 : EINVAL;
843 		break;
844 
845 	case FUSE_FALLOCATE:
846 		err = (blen == 0) ? 0 : EINVAL;
847 		break;
848 
849 	case FUSE_LSEEK:
850 		err = (blen == sizeof(struct fuse_lseek_out)) ? 0 : EINVAL;
851 		break;
852 
853 	case FUSE_COPY_FILE_RANGE:
854 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
855 		break;
856 
857 	default:
858 		panic("FUSE: opcodes out of sync (%d)\n", opcode);
859 	}
860 
861 	return err;
862 }
863 
864 static inline void
865 fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick,
866     uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid,
867     struct ucred *cred)
868 {
869 	ihead->len = sizeof(*ihead) + blen;
870 	ihead->unique = ftick->tk_unique;
871 	ihead->nodeid = nid;
872 	ihead->opcode = op;
873 
874 	ihead->pid = pid;
875 	ihead->uid = cred->cr_uid;
876 	ihead->gid = cred->cr_groups[0];
877 }
878 
879 /*
880  * fuse_standard_handler just pulls indata and wakes up pretender.
881  * Doesn't try to interpret data, that's left for the pretender.
882  * Though might do a basic size verification before the pull-in takes place
883  */
884 
885 static int
886 fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio)
887 {
888 	int err = 0;
889 
890 	err = fticket_pull(ftick, uio);
891 
892 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
893 
894 	if (!fticket_answered(ftick)) {
895 		fticket_set_answered(ftick);
896 		ftick->tk_aw_errno = err;
897 		wakeup(ftick);
898 	}
899 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
900 
901 	return err;
902 }
903 
904 /*
905  * Reinitialize a dispatcher from a pid and node id, without resizing or
906  * clearing its data buffers
907  */
908 static void
909 fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
910     struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
911 {
912 	MPASS(fdip->tick);
913 	MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len,
914 		"Must use fdisp_make_pid to increase the size of the fiov");
915 	fticket_reset(fdip->tick);
916 
917 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
918 	    fdip->indata, fdip->iosize);
919 
920 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
921 		cred);
922 }
923 
924 /* Initialize a dispatcher from a pid and node id */
925 static void
926 fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
927     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred)
928 {
929 	if (fdip->tick) {
930 		fticket_refresh(fdip->tick);
931 	} else {
932 		fdip->tick = fuse_ticket_fetch(data);
933 	}
934 
935 	/* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */
936 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
937 	    fdip->indata, fdip->iosize);
938 
939 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred);
940 }
941 
942 void
943 fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp,
944     uint64_t nid, struct thread *td, struct ucred *cred)
945 {
946 	struct fuse_data *data = fuse_get_mpdata(mp);
947 	RECTIFY_TDCR(td, cred);
948 
949 	return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred);
950 }
951 
952 void
953 fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
954     struct vnode *vp, struct thread *td, struct ucred *cred)
955 {
956 	struct mount *mp = vnode_mount(vp);
957 	struct fuse_data *data = fuse_get_mpdata(mp);
958 
959 	RECTIFY_TDCR(td, cred);
960 	return fdisp_make_pid(fdip, op, data, VTOI(vp),
961 	    td->td_proc->p_pid, cred);
962 }
963 
964 /* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
965 void
966 fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
967     struct vnode *vp, struct thread *td, struct ucred *cred)
968 {
969 	RECTIFY_TDCR(td, cred);
970 	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
971 	    td->td_proc->p_pid, cred);
972 }
973 
974 SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int");
975 
976 int
977 fdisp_wait_answ(struct fuse_dispatcher *fdip)
978 {
979 	int err = 0;
980 
981 	fdip->answ_stat = 0;
982 	fuse_insert_callback(fdip->tick, fuse_standard_handler);
983 	fuse_insert_message(fdip->tick, false);
984 
985 	if ((err = fticket_wait_answer(fdip->tick))) {
986 		fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx);
987 
988 		if (fticket_answered(fdip->tick)) {
989 			/*
990 	                 * Just between noticing the interrupt and getting here,
991 	                 * the standard handler has completed his job.
992 	                 * So we drop the ticket and exit as usual.
993 	                 */
994 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
995 				"IPC: interrupted, already answered", err);
996 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
997 			goto out;
998 		} else {
999 			/*
1000 	                 * So we were faster than the standard handler.
1001 	                 * Then by setting the answered flag we get *him*
1002 	                 * to drop the ticket.
1003 	                 */
1004 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1005 				"IPC: interrupted, setting to answered", err);
1006 			fticket_set_answered(fdip->tick);
1007 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
1008 			return err;
1009 		}
1010 	}
1011 
1012 	if (fdip->tick->tk_aw_errno == ENOTCONN) {
1013 		/* The daemon died while we were waiting for a response */
1014 		err = ENOTCONN;
1015 		goto out;
1016 	} else if (fdip->tick->tk_aw_errno) {
1017 		/*
1018 		 * There was some sort of communication error with the daemon
1019 		 * that the client wouldn't understand.
1020 		 */
1021 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1022 			"IPC: explicit EIO-ing", fdip->tick->tk_aw_errno);
1023 		err = EIO;
1024 		goto out;
1025 	}
1026 	if ((err = fdip->tick->tk_aw_ohead.error)) {
1027 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1028 			"IPC: setting status", fdip->tick->tk_aw_ohead.error);
1029 		/*
1030 	         * This means a "proper" fuse syscall error.
1031 	         * We record this value so the caller will
1032 	         * be able to know it's not a boring messaging
1033 	         * failure, if she wishes so (and if not, she can
1034 	         * just simply propagate the return value of this routine).
1035 	         * [XXX Maybe a bitflag would do the job too,
1036 	         * if other flags needed, this will be converted thusly.]
1037 	         */
1038 		fdip->answ_stat = err;
1039 		goto out;
1040 	}
1041 	fdip->answ = fticket_resp(fdip->tick)->base;
1042 	fdip->iosize = fticket_resp(fdip->tick)->len;
1043 
1044 	return 0;
1045 
1046 out:
1047 	return err;
1048 }
1049 
1050 void
1051 fuse_ipc_init(void)
1052 {
1053 	ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket),
1054 	    fticket_ctor, fticket_dtor, fticket_init, fticket_fini,
1055 	    UMA_ALIGN_PTR, 0);
1056 	fuse_ticket_count = counter_u64_alloc(M_WAITOK);
1057 }
1058 
1059 void
1060 fuse_ipc_destroy(void)
1061 {
1062 	counter_u64_free(fuse_ticket_count);
1063 	uma_zdestroy(ticket_zone);
1064 }
1065 
1066 SDT_PROBE_DEFINE3(fusefs,, ipc, warn, "struct fuse_data*", "unsigned", "char*");
1067 void
1068 fuse_warn(struct fuse_data *data, unsigned flag, const char *msg)
1069 {
1070 	SDT_PROBE3(fusefs, , ipc, warn, data, flag, msg);
1071 	if (!(data->dataflags & flag)) {
1072 		printf("WARNING: FUSE protocol violation for server mounted at "
1073 		    "%s: %s  "
1074 		    "This warning will not be repeated.\n",
1075 		    data->mp->mnt_stat.f_mntonname, msg);
1076 		data->dataflags |= flag;
1077 	}
1078 }
1079