xref: /freebsd/sys/fs/fuse/fuse_ipc.c (revision 22cf89c938886d14f5796fc49f9f020c23ea8eaf)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  * * Redistributions in binary form must reproduce the above
14  *   copyright notice, this list of conditions and the following disclaimer
15  *   in the documentation and/or other materials provided with the
16  *   distribution.
17  * * Neither the name of Google Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Copyright (C) 2005 Csaba Henk.
34  * All rights reserved.
35  *
36  * Copyright (c) 2019 The FreeBSD Foundation
37  *
38  * Portions of this software were developed by BFF Storage Systems, LLC under
39  * sponsorship from the FreeBSD Foundation.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  *
50  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #include <sys/cdefs.h>
64 #include <sys/param.h>
65 #include <sys/module.h>
66 #include <sys/systm.h>
67 #include <sys/counter.h>
68 #include <sys/errno.h>
69 #include <sys/kernel.h>
70 #include <sys/conf.h>
71 #include <sys/uio.h>
72 #include <sys/malloc.h>
73 #include <sys/queue.h>
74 #include <sys/lock.h>
75 #include <sys/sx.h>
76 #include <sys/mutex.h>
77 #include <sys/proc.h>
78 #include <sys/mount.h>
79 #include <sys/sdt.h>
80 #include <sys/vnode.h>
81 #include <sys/signalvar.h>
82 #include <sys/syscallsubr.h>
83 #include <sys/sysctl.h>
84 #include <vm/uma.h>
85 
86 #include "fuse.h"
87 #include "fuse_node.h"
88 #include "fuse_ipc.h"
89 #include "fuse_internal.h"
90 
91 SDT_PROVIDER_DECLARE(fusefs);
92 /*
93  * Fuse trace probe:
94  * arg0: verbosity.  Higher numbers give more verbose messages
95  * arg1: Textual message
96  */
97 SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*");
98 
99 static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
100     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred);
101 static void fuse_interrupt_send(struct fuse_ticket *otick, int err);
102 static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
103 static void fticket_refresh(struct fuse_ticket *ftick);
104 static inline void fticket_reset(struct fuse_ticket *ftick);
105 static void fticket_destroy(struct fuse_ticket *ftick);
106 static int fticket_wait_answer(struct fuse_ticket *ftick);
107 static inline int
108 fticket_aw_pull_uio(struct fuse_ticket *ftick,
109     struct uio *uio);
110 
111 static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
112 
113 static fuse_handler_t fuse_standard_handler;
114 
115 static counter_u64_t fuse_ticket_count;
116 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, ticket_count, CTLFLAG_RD,
117     &fuse_ticket_count, "Number of allocated tickets");
118 
119 static long fuse_iov_permanent_bufsize = 1 << 19;
120 
121 SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
122     &fuse_iov_permanent_bufsize, 0,
123     "limit for permanently stored buffer size for fuse_iovs");
124 static int fuse_iov_credit = 16;
125 
126 SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW,
127     &fuse_iov_credit, 0,
128     "how many times is an oversized fuse_iov tolerated");
129 
130 MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer");
131 static uma_zone_t ticket_zone;
132 
133 /*
134  * TODO: figure out how to timeout INTERRUPT requests, because the daemon may
135  * leagally never respond
136  */
137 static int
138 fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio)
139 {
140 	struct fuse_ticket *otick, *x_tick;
141 	struct fuse_interrupt_in *fii;
142 	struct fuse_data *data = tick->tk_data;
143 	bool found = false;
144 
145 	fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base +
146 		sizeof(struct fuse_in_header));
147 
148 	fuse_lck_mtx_lock(data->aw_mtx);
149 	TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) {
150 		if (otick->tk_unique == fii->unique) {
151 			found = true;
152 			break;
153 		}
154 	}
155 	fuse_lck_mtx_unlock(data->aw_mtx);
156 
157 	if (!found) {
158 		/* Original is already complete.  Just return */
159 		return 0;
160 	}
161 
162 	/* Clear the original ticket's interrupt association */
163 	otick->irq_unique = 0;
164 
165 	if (tick->tk_aw_ohead.error == ENOSYS) {
166 		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
167 		return 0;
168 	} else if (tick->tk_aw_ohead.error == EAGAIN) {
169 		/*
170 		 * There are two reasons we might get this:
171 		 * 1) the daemon received the INTERRUPT request before the
172 		 *    original, or
173 		 * 2) the daemon received the INTERRUPT request after it
174 		 *    completed the original request.
175 		 * In the first case we should re-send the INTERRUPT.  In the
176 		 * second, we should ignore it.
177 		 */
178 		/* Resend */
179 		fuse_interrupt_send(otick, EINTR);
180 		return 0;
181 	} else {
182 		/* Illegal FUSE_INTERRUPT response */
183 		return EINVAL;
184 	}
185 }
186 
187 /* Interrupt the operation otick.  Return err as its error code */
188 void
189 fuse_interrupt_send(struct fuse_ticket *otick, int err)
190 {
191 	struct fuse_dispatcher fdi;
192 	struct fuse_interrupt_in *fii;
193 	struct fuse_in_header *ftick_hdr;
194 	struct fuse_data *data = otick->tk_data;
195 	struct fuse_ticket *tick, *xtick;
196 	struct ucred reused_creds;
197 	gid_t reused_groups[1];
198 
199 	if (otick->irq_unique == 0) {
200 		/*
201 		 * If the daemon hasn't yet received otick, then we can answer
202 		 * it ourselves and return.
203 		 */
204 		fuse_lck_mtx_lock(data->ms_mtx);
205 		STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link,
206 			xtick) {
207 			if (tick == otick) {
208 				STAILQ_REMOVE(&otick->tk_data->ms_head, tick,
209 					fuse_ticket, tk_ms_link);
210 				otick->tk_data->ms_count--;
211 				otick->tk_ms_link.stqe_next = NULL;
212 				fuse_lck_mtx_unlock(data->ms_mtx);
213 
214 				fuse_lck_mtx_lock(otick->tk_aw_mtx);
215 				if (!fticket_answered(otick)) {
216 					fticket_set_answered(otick);
217 					otick->tk_aw_errno = err;
218 					wakeup(otick);
219 				}
220 				fuse_lck_mtx_unlock(otick->tk_aw_mtx);
221 
222 				fuse_ticket_drop(tick);
223 				return;
224 			}
225 		}
226 		fuse_lck_mtx_unlock(data->ms_mtx);
227 
228 		/*
229 		 * If the fuse daemon doesn't support interrupts, then there's
230 		 * nothing more that we can do
231 		 */
232 		if (fsess_not_impl(data->mp, FUSE_INTERRUPT))
233 			return;
234 
235 		/*
236 		 * If the fuse daemon has already received otick, then we must
237 		 * send FUSE_INTERRUPT.
238 		 */
239 		ftick_hdr = fticket_in_header(otick);
240 		reused_creds.cr_uid = ftick_hdr->uid;
241 		reused_groups[0] = ftick_hdr->gid;
242 		reused_creds.cr_groups = reused_groups;
243 		fdisp_init(&fdi, sizeof(*fii));
244 		fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid,
245 			ftick_hdr->pid, &reused_creds);
246 
247 		fii = fdi.indata;
248 		fii->unique = otick->tk_unique;
249 		fuse_insert_callback(fdi.tick, fuse_interrupt_callback);
250 
251 		otick->irq_unique = fdi.tick->tk_unique;
252 		/* Interrupt ops should be delivered ASAP */
253 		fuse_insert_message(fdi.tick, true);
254 		fdisp_destroy(&fdi);
255 	} else {
256 		/* This ticket has already been interrupted */
257 	}
258 }
259 
260 void
261 fiov_init(struct fuse_iov *fiov, size_t size)
262 {
263 	uint32_t msize = FU_AT_LEAST(size);
264 
265 	fiov->len = 0;
266 
267 	fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO);
268 
269 	fiov->allocated_size = msize;
270 	fiov->credit = fuse_iov_credit;
271 }
272 
273 void
274 fiov_teardown(struct fuse_iov *fiov)
275 {
276 	MPASS(fiov->base != NULL);
277 	free(fiov->base, M_FUSEMSG);
278 }
279 
280 void
281 fiov_adjust(struct fuse_iov *fiov, size_t size)
282 {
283 	if (fiov->allocated_size < size ||
284 	    (fuse_iov_permanent_bufsize >= 0 &&
285 	    fiov->allocated_size - size > fuse_iov_permanent_bufsize &&
286 	    --fiov->credit < 0)) {
287 		fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG,
288 		    M_WAITOK | M_ZERO);
289 		if (!fiov->base) {
290 			panic("FUSE: realloc failed");
291 		}
292 		fiov->allocated_size = FU_AT_LEAST(size);
293 		fiov->credit = fuse_iov_credit;
294 		/* Clear data buffer after reallocation */
295 		bzero(fiov->base, size);
296 	} else if (size > fiov->len) {
297 		/* Clear newly extended portion of data buffer */
298 		bzero((char*)fiov->base + fiov->len, size - fiov->len);
299 	}
300 	fiov->len = size;
301 }
302 
303 /* Resize the fiov if needed, and clear it's buffer */
304 void
305 fiov_refresh(struct fuse_iov *fiov)
306 {
307 	fiov_adjust(fiov, 0);
308 }
309 
310 static int
311 fticket_ctor(void *mem, int size, void *arg, int flags)
312 {
313 	struct fuse_ticket *ftick = mem;
314 	struct fuse_data *data = arg;
315 
316 	FUSE_ASSERT_MS_DONE(ftick);
317 	FUSE_ASSERT_AW_DONE(ftick);
318 
319 	ftick->tk_data = data;
320 	ftick->irq_unique = 0;
321 	refcount_init(&ftick->tk_refcount, 1);
322 	counter_u64_add(fuse_ticket_count, 1);
323 
324 	fticket_refresh(ftick);
325 
326 	return 0;
327 }
328 
329 static void
330 fticket_dtor(void *mem, int size, void *arg)
331 {
332 #ifdef INVARIANTS
333 	struct fuse_ticket *ftick = mem;
334 #endif
335 
336 	FUSE_ASSERT_MS_DONE(ftick);
337 	FUSE_ASSERT_AW_DONE(ftick);
338 
339 	counter_u64_add(fuse_ticket_count, -1);
340 }
341 
342 static int
343 fticket_init(void *mem, int size, int flags)
344 {
345 	struct fuse_ticket *ftick = mem;
346 
347 	bzero(ftick, sizeof(struct fuse_ticket));
348 
349 	fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header));
350 
351 	mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF);
352 	fiov_init(&ftick->tk_aw_fiov, 0);
353 
354 	return 0;
355 }
356 
357 static void
358 fticket_fini(void *mem, int size)
359 {
360 	struct fuse_ticket *ftick = mem;
361 
362 	fiov_teardown(&ftick->tk_ms_fiov);
363 	fiov_teardown(&ftick->tk_aw_fiov);
364 	mtx_destroy(&ftick->tk_aw_mtx);
365 }
366 
367 static inline struct fuse_ticket *
368 fticket_alloc(struct fuse_data *data)
369 {
370 	return uma_zalloc_arg(ticket_zone, data, M_WAITOK);
371 }
372 
373 static inline void
374 fticket_destroy(struct fuse_ticket *ftick)
375 {
376 	return uma_zfree(ticket_zone, ftick);
377 }
378 
379 /* Prepare the ticket to be reused and clear its data buffers */
380 static inline void
381 fticket_refresh(struct fuse_ticket *ftick)
382 {
383 	fticket_reset(ftick);
384 
385 	fiov_refresh(&ftick->tk_ms_fiov);
386 	fiov_refresh(&ftick->tk_aw_fiov);
387 }
388 
389 /* Prepare the ticket to be reused, but don't clear its data buffers */
390 static inline void
391 fticket_reset(struct fuse_ticket *ftick)
392 {
393 	struct fuse_data *data = ftick->tk_data;
394 
395 	FUSE_ASSERT_MS_DONE(ftick);
396 	FUSE_ASSERT_AW_DONE(ftick);
397 
398 	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
399 
400 	ftick->tk_aw_errno = 0;
401 	ftick->tk_flag = 0;
402 
403 	/* May be truncated to 32 bits on LP32 arches */
404 	ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
405 	if (ftick->tk_unique == 0)
406 		ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
407 }
408 
409 static int
410 fticket_wait_answer(struct fuse_ticket *ftick)
411 {
412 	struct thread *td = curthread;
413 	sigset_t blockedset, oldset;
414 	int err = 0, stops_deferred;
415 	struct fuse_data *data = ftick->tk_data;
416 	bool interrupted = false;
417 
418 	if (fsess_maybe_impl(ftick->tk_data->mp, FUSE_INTERRUPT) &&
419 	    data->dataflags & FSESS_INTR) {
420 		SIGEMPTYSET(blockedset);
421 	} else {
422 		/* Block all signals except (implicitly) SIGKILL */
423 		SIGFILLSET(blockedset);
424 	}
425 	stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT);
426 	kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0);
427 
428 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
429 
430 retry:
431 	if (fticket_answered(ftick)) {
432 		goto out;
433 	}
434 
435 	if (fdata_get_dead(data)) {
436 		err = ENOTCONN;
437 		fticket_set_answered(ftick);
438 		goto out;
439 	}
440 	kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0);
441 	err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans",
442 	    data->daemon_timeout * hz);
443 	kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0);
444 	if (err == EWOULDBLOCK) {
445 		SDT_PROBE2(fusefs, , ipc, trace, 3,
446 			"fticket_wait_answer: EWOULDBLOCK");
447 #ifdef XXXIP				/* die conditionally */
448 		if (!fdata_get_dead(data)) {
449 			fdata_set_dead(data);
450 		}
451 #endif
452 		err = ETIMEDOUT;
453 		fticket_set_answered(ftick);
454 	} else if ((err == EINTR || err == ERESTART)) {
455 		/*
456 		 * Whether we get EINTR or ERESTART depends on whether
457 		 * SA_RESTART was set by sigaction(2).
458 		 *
459 		 * Try to interrupt the operation and wait for an EINTR response
460 		 * to the original operation.  If the file system does not
461 		 * support FUSE_INTERRUPT, then we'll just wait for it to
462 		 * complete like normal.  If it does support FUSE_INTERRUPT,
463 		 * then it will either respond EINTR to the original operation,
464 		 * or EAGAIN to the interrupt.
465 		 */
466 		sigset_t tmpset;
467 
468 		SDT_PROBE2(fusefs, , ipc, trace, 4,
469 			"fticket_wait_answer: interrupt");
470 		fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
471 		fuse_interrupt_send(ftick, err);
472 
473 		PROC_LOCK(td->td_proc);
474 		mtx_lock(&td->td_proc->p_sigacts->ps_mtx);
475 		tmpset = td->td_proc->p_siglist;
476 		SIGSETOR(tmpset, td->td_siglist);
477 		mtx_unlock(&td->td_proc->p_sigacts->ps_mtx);
478 		PROC_UNLOCK(td->td_proc);
479 
480 		fuse_lck_mtx_lock(ftick->tk_aw_mtx);
481 		if (!interrupted && !SIGISMEMBER(tmpset, SIGKILL)) {
482 			/*
483 			 * Block all signals while we wait for an interrupt
484 			 * response.  The protocol doesn't discriminate between
485 			 * different signals.
486 			 */
487 			SIGFILLSET(blockedset);
488 			interrupted = true;
489 			goto retry;
490 		} else {
491 			/*
492 			 * Return immediately for fatal signals, or if this is
493 			 * the second interruption.  We should only be
494 			 * interrupted twice if the thread is stopped, for
495 			 * example during sigexit.
496 			 */
497 		}
498 	} else if (err) {
499 		SDT_PROBE2(fusefs, , ipc, trace, 6,
500 			"fticket_wait_answer: other error");
501 	} else {
502 		SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK");
503 	}
504 out:
505 	if (!(err || fticket_answered(ftick))) {
506 		SDT_PROBE2(fusefs, , ipc, trace, 1,
507 			"FUSE: requester was woken up but still no answer");
508 		err = ENXIO;
509 	}
510 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
511 	sigallowstop(stops_deferred);
512 
513 	return err;
514 }
515 
516 static	inline
517 int
518 fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio)
519 {
520 	int err = 0;
521 	size_t len = uio_resid(uio);
522 
523 	if (len) {
524 		fiov_adjust(fticket_resp(ftick), len);
525 		err = uiomove(fticket_resp(ftick)->base, len, uio);
526 	}
527 	return err;
528 }
529 
530 int
531 fticket_pull(struct fuse_ticket *ftick, struct uio *uio)
532 {
533 	int err = 0;
534 
535 	if (ftick->tk_aw_ohead.error) {
536 		return 0;
537 	}
538 	err = fuse_body_audit(ftick, uio_resid(uio));
539 	if (!err) {
540 		err = fticket_aw_pull_uio(ftick, uio);
541 	}
542 	return err;
543 }
544 
545 struct fuse_data *
546 fdata_alloc(struct cdev *fdev, struct ucred *cred)
547 {
548 	struct fuse_data *data;
549 
550 	data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO);
551 
552 	data->fdev = fdev;
553 	mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF);
554 	STAILQ_INIT(&data->ms_head);
555 	data->ms_count = 0;
556 	knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx);
557 	mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF);
558 	TAILQ_INIT(&data->aw_head);
559 	data->daemoncred = crhold(cred);
560 	data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT;
561 	sx_init(&data->rename_lock, "fuse rename lock");
562 	data->ref = 1;
563 
564 	return data;
565 }
566 
567 void
568 fdata_trydestroy(struct fuse_data *data)
569 {
570 	data->ref--;
571 	MPASS(data->ref >= 0);
572 	if (data->ref != 0)
573 		return;
574 
575 	/* Driving off stage all that stuff thrown at device... */
576 	sx_destroy(&data->rename_lock);
577 	crfree(data->daemoncred);
578 	mtx_destroy(&data->aw_mtx);
579 	knlist_delete(&data->ks_rsel.si_note, curthread, 0);
580 	knlist_destroy(&data->ks_rsel.si_note);
581 	mtx_destroy(&data->ms_mtx);
582 
583 	free(data, M_FUSEMSG);
584 }
585 
586 void
587 fdata_set_dead(struct fuse_data *data)
588 {
589 	FUSE_LOCK();
590 	if (fdata_get_dead(data)) {
591 		FUSE_UNLOCK();
592 		return;
593 	}
594 	fuse_lck_mtx_lock(data->ms_mtx);
595 	data->dataflags |= FSESS_DEAD;
596 	wakeup_one(data);
597 	selwakeuppri(&data->ks_rsel, PZERO + 1);
598 	wakeup(&data->ticketer);
599 	fuse_lck_mtx_unlock(data->ms_mtx);
600 	FUSE_UNLOCK();
601 }
602 
603 struct fuse_ticket *
604 fuse_ticket_fetch(struct fuse_data *data)
605 {
606 	int err = 0;
607 	struct fuse_ticket *ftick;
608 
609 	ftick = fticket_alloc(data);
610 
611 	if (!(data->dataflags & FSESS_INITED)) {
612 		/* Sleep until get answer for INIT messsage */
613 		FUSE_LOCK();
614 		if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) {
615 			err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP,
616 			    "fu_ini", 0);
617 			if (err)
618 				fdata_set_dead(data);
619 		} else
620 			FUSE_UNLOCK();
621 	}
622 	return ftick;
623 }
624 
625 int
626 fuse_ticket_drop(struct fuse_ticket *ftick)
627 {
628 	int die;
629 
630 	die = refcount_release(&ftick->tk_refcount);
631 	if (die)
632 		fticket_destroy(ftick);
633 
634 	return die;
635 }
636 
637 void
638 fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler)
639 {
640 	if (fdata_get_dead(ftick->tk_data)) {
641 		return;
642 	}
643 	ftick->tk_aw_handler = handler;
644 
645 	fuse_lck_mtx_lock(ftick->tk_data->aw_mtx);
646 	fuse_aw_push(ftick);
647 	fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
648 }
649 
650 /*
651  * Insert a new upgoing ticket into the message queue
652  *
653  * If urgent is true, insert at the front of the queue.  Otherwise, insert in
654  * FIFO order.
655  */
656 void
657 fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
658 {
659 	if (ftick->tk_flag & FT_DIRTY) {
660 		panic("FUSE: ticket reused without being refreshed");
661 	}
662 	ftick->tk_flag |= FT_DIRTY;
663 
664 	if (fdata_get_dead(ftick->tk_data)) {
665 		return;
666 	}
667 	fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
668 	if (urgent)
669 		fuse_ms_push_head(ftick);
670 	else
671 		fuse_ms_push(ftick);
672 	wakeup_one(ftick->tk_data);
673 	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
674 	KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
675 	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
676 }
677 
678 static int
679 fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
680 {
681 	int err = 0;
682 	enum fuse_opcode opcode;
683 
684 	opcode = fticket_opcode(ftick);
685 
686 	switch (opcode) {
687 	case FUSE_BMAP:
688 		err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL;
689 		break;
690 
691 	case FUSE_LINK:
692 	case FUSE_LOOKUP:
693 	case FUSE_MKDIR:
694 	case FUSE_MKNOD:
695 	case FUSE_SYMLINK:
696 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
697 			err = (blen == sizeof(struct fuse_entry_out)) ?
698 				0 : EINVAL;
699 		} else {
700 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE) ? 0 : EINVAL;
701 		}
702 		break;
703 
704 	case FUSE_FORGET:
705 		panic("FUSE: a handler has been intalled for FUSE_FORGET");
706 		break;
707 
708 	case FUSE_GETATTR:
709 	case FUSE_SETATTR:
710 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
711 			err = (blen == sizeof(struct fuse_attr_out)) ?
712 			  0 : EINVAL;
713 		} else {
714 			err = (blen == FUSE_COMPAT_ATTR_OUT_SIZE) ? 0 : EINVAL;
715 		}
716 		break;
717 
718 	case FUSE_READLINK:
719 		err = (PAGE_SIZE >= blen) ? 0 : EINVAL;
720 		break;
721 
722 	case FUSE_UNLINK:
723 		err = (blen == 0) ? 0 : EINVAL;
724 		break;
725 
726 	case FUSE_RMDIR:
727 		err = (blen == 0) ? 0 : EINVAL;
728 		break;
729 
730 	case FUSE_RENAME:
731 		err = (blen == 0) ? 0 : EINVAL;
732 		break;
733 
734 	case FUSE_OPEN:
735 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
736 		break;
737 
738 	case FUSE_READ:
739 		err = (((struct fuse_read_in *)(
740 		    (char *)ftick->tk_ms_fiov.base +
741 		    sizeof(struct fuse_in_header)
742 		    ))->size >= blen) ? 0 : EINVAL;
743 		break;
744 
745 	case FUSE_WRITE:
746 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
747 		break;
748 
749 	case FUSE_STATFS:
750 		if (fuse_libabi_geq(ftick->tk_data, 7, 4)) {
751 			err = (blen == sizeof(struct fuse_statfs_out)) ?
752 			  0 : EINVAL;
753 		} else {
754 			err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL;
755 		}
756 		break;
757 
758 	case FUSE_RELEASE:
759 		err = (blen == 0) ? 0 : EINVAL;
760 		break;
761 
762 	case FUSE_FSYNC:
763 		err = (blen == 0) ? 0 : EINVAL;
764 		break;
765 
766 	case FUSE_SETXATTR:
767 		err = (blen == 0) ? 0 : EINVAL;
768 		break;
769 
770 	case FUSE_GETXATTR:
771 	case FUSE_LISTXATTR:
772 		/*
773 		 * These can have varying response lengths, and 0 length
774 		 * isn't necessarily invalid.
775 		 */
776 		err = 0;
777 		break;
778 
779 	case FUSE_REMOVEXATTR:
780 		err = (blen == 0) ? 0 : EINVAL;
781 		break;
782 
783 	case FUSE_FLUSH:
784 		err = (blen == 0) ? 0 : EINVAL;
785 		break;
786 
787 	case FUSE_INIT:
788 		if (blen == sizeof(struct fuse_init_out) ||
789 		    blen == FUSE_COMPAT_INIT_OUT_SIZE ||
790 		    blen == FUSE_COMPAT_22_INIT_OUT_SIZE) {
791 			err = 0;
792 		} else {
793 			err = EINVAL;
794 		}
795 		break;
796 
797 	case FUSE_OPENDIR:
798 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
799 		break;
800 
801 	case FUSE_READDIR:
802 		err = (((struct fuse_read_in *)(
803 		    (char *)ftick->tk_ms_fiov.base +
804 		    sizeof(struct fuse_in_header)
805 		    ))->size >= blen) ? 0 : EINVAL;
806 		break;
807 
808 	case FUSE_RELEASEDIR:
809 		err = (blen == 0) ? 0 : EINVAL;
810 		break;
811 
812 	case FUSE_FSYNCDIR:
813 		err = (blen == 0) ? 0 : EINVAL;
814 		break;
815 
816 	case FUSE_GETLK:
817 		err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL;
818 		break;
819 
820 	case FUSE_SETLK:
821 		err = (blen == 0) ? 0 : EINVAL;
822 		break;
823 
824 	case FUSE_SETLKW:
825 		err = (blen == 0) ? 0 : EINVAL;
826 		break;
827 
828 	case FUSE_ACCESS:
829 		err = (blen == 0) ? 0 : EINVAL;
830 		break;
831 
832 	case FUSE_CREATE:
833 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
834 			err = (blen == sizeof(struct fuse_entry_out) +
835 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
836 		} else {
837 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE +
838 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
839 		}
840 		break;
841 
842 	case FUSE_DESTROY:
843 		err = (blen == 0) ? 0 : EINVAL;
844 		break;
845 
846 	case FUSE_FALLOCATE:
847 		err = (blen == 0) ? 0 : EINVAL;
848 		break;
849 
850 	case FUSE_LSEEK:
851 		err = (blen == sizeof(struct fuse_lseek_out)) ? 0 : EINVAL;
852 		break;
853 
854 	case FUSE_COPY_FILE_RANGE:
855 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
856 		break;
857 
858 	default:
859 		panic("FUSE: opcodes out of sync (%d)\n", opcode);
860 	}
861 
862 	return err;
863 }
864 
865 static inline void
866 fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick,
867     uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid,
868     struct ucred *cred)
869 {
870 	ihead->len = sizeof(*ihead) + blen;
871 	ihead->unique = ftick->tk_unique;
872 	ihead->nodeid = nid;
873 	ihead->opcode = op;
874 
875 	ihead->pid = pid;
876 	ihead->uid = cred->cr_uid;
877 	ihead->gid = cred->cr_groups[0];
878 }
879 
880 /*
881  * fuse_standard_handler just pulls indata and wakes up pretender.
882  * Doesn't try to interpret data, that's left for the pretender.
883  * Though might do a basic size verification before the pull-in takes place
884  */
885 
886 static int
887 fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio)
888 {
889 	int err = 0;
890 
891 	err = fticket_pull(ftick, uio);
892 
893 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
894 
895 	if (!fticket_answered(ftick)) {
896 		fticket_set_answered(ftick);
897 		ftick->tk_aw_errno = err;
898 		wakeup(ftick);
899 	}
900 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
901 
902 	return err;
903 }
904 
905 /*
906  * Reinitialize a dispatcher from a pid and node id, without resizing or
907  * clearing its data buffers
908  */
909 static void
910 fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
911     struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
912 {
913 	MPASS(fdip->tick);
914 	MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len,
915 		"Must use fdisp_make_pid to increase the size of the fiov");
916 	fticket_reset(fdip->tick);
917 
918 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
919 	    fdip->indata, fdip->iosize);
920 
921 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
922 		cred);
923 }
924 
925 /* Initialize a dispatcher from a pid and node id */
926 static void
927 fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
928     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred)
929 {
930 	if (fdip->tick) {
931 		fticket_refresh(fdip->tick);
932 	} else {
933 		fdip->tick = fuse_ticket_fetch(data);
934 	}
935 
936 	/* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */
937 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
938 	    fdip->indata, fdip->iosize);
939 
940 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred);
941 }
942 
943 void
944 fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp,
945     uint64_t nid, struct thread *td, struct ucred *cred)
946 {
947 	struct fuse_data *data = fuse_get_mpdata(mp);
948 	RECTIFY_TDCR(td, cred);
949 
950 	return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred);
951 }
952 
953 void
954 fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
955     struct vnode *vp, struct thread *td, struct ucred *cred)
956 {
957 	struct mount *mp = vnode_mount(vp);
958 	struct fuse_data *data = fuse_get_mpdata(mp);
959 
960 	RECTIFY_TDCR(td, cred);
961 	return fdisp_make_pid(fdip, op, data, VTOI(vp),
962 	    td->td_proc->p_pid, cred);
963 }
964 
965 /* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
966 void
967 fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
968     struct vnode *vp, struct thread *td, struct ucred *cred)
969 {
970 	RECTIFY_TDCR(td, cred);
971 	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
972 	    td->td_proc->p_pid, cred);
973 }
974 
975 SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int");
976 
977 int
978 fdisp_wait_answ(struct fuse_dispatcher *fdip)
979 {
980 	int err = 0;
981 
982 	fdip->answ_stat = 0;
983 	fuse_insert_callback(fdip->tick, fuse_standard_handler);
984 	fuse_insert_message(fdip->tick, false);
985 
986 	if ((err = fticket_wait_answer(fdip->tick))) {
987 		fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx);
988 
989 		if (fticket_answered(fdip->tick)) {
990 			/*
991 	                 * Just between noticing the interrupt and getting here,
992 	                 * the standard handler has completed his job.
993 	                 * So we drop the ticket and exit as usual.
994 	                 */
995 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
996 				"IPC: interrupted, already answered", err);
997 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
998 			goto out;
999 		} else {
1000 			/*
1001 	                 * So we were faster than the standard handler.
1002 	                 * Then by setting the answered flag we get *him*
1003 	                 * to drop the ticket.
1004 	                 */
1005 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1006 				"IPC: interrupted, setting to answered", err);
1007 			fticket_set_answered(fdip->tick);
1008 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
1009 			return err;
1010 		}
1011 	}
1012 
1013 	if (fdip->tick->tk_aw_errno == ENOTCONN) {
1014 		/* The daemon died while we were waiting for a response */
1015 		err = ENOTCONN;
1016 		goto out;
1017 	} else if (fdip->tick->tk_aw_errno) {
1018 		/*
1019 		 * There was some sort of communication error with the daemon
1020 		 * that the client wouldn't understand.
1021 		 */
1022 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1023 			"IPC: explicit EIO-ing", fdip->tick->tk_aw_errno);
1024 		err = EIO;
1025 		goto out;
1026 	}
1027 	if ((err = fdip->tick->tk_aw_ohead.error)) {
1028 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1029 			"IPC: setting status", fdip->tick->tk_aw_ohead.error);
1030 		/*
1031 	         * This means a "proper" fuse syscall error.
1032 	         * We record this value so the caller will
1033 	         * be able to know it's not a boring messaging
1034 	         * failure, if she wishes so (and if not, she can
1035 	         * just simply propagate the return value of this routine).
1036 	         * [XXX Maybe a bitflag would do the job too,
1037 	         * if other flags needed, this will be converted thusly.]
1038 	         */
1039 		fdip->answ_stat = err;
1040 		goto out;
1041 	}
1042 	fdip->answ = fticket_resp(fdip->tick)->base;
1043 	fdip->iosize = fticket_resp(fdip->tick)->len;
1044 
1045 	return 0;
1046 
1047 out:
1048 	return err;
1049 }
1050 
1051 void
1052 fuse_ipc_init(void)
1053 {
1054 	ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket),
1055 	    fticket_ctor, fticket_dtor, fticket_init, fticket_fini,
1056 	    UMA_ALIGN_PTR, 0);
1057 	fuse_ticket_count = counter_u64_alloc(M_WAITOK);
1058 }
1059 
1060 void
1061 fuse_ipc_destroy(void)
1062 {
1063 	counter_u64_free(fuse_ticket_count);
1064 	uma_zdestroy(ticket_zone);
1065 }
1066 
1067 SDT_PROBE_DEFINE3(fusefs,, ipc, warn, "struct fuse_data*", "unsigned", "char*");
1068 void
1069 fuse_warn(struct fuse_data *data, unsigned flag, const char *msg)
1070 {
1071 	SDT_PROBE3(fusefs, , ipc, warn, data, flag, msg);
1072 	if (!(data->dataflags & flag)) {
1073 		printf("WARNING: FUSE protocol violation for server mounted at "
1074 		    "%s: %s  "
1075 		    "This warning will not be repeated.\n",
1076 		    data->mp->mnt_stat.f_mntonname, msg);
1077 		data->dataflags |= flag;
1078 	}
1079 }
1080