xref: /freebsd/sys/fs/fuse/fuse_ipc.c (revision 0d8fe2373503aeac48492f28073049a8bfa4feb5)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  * * Redistributions in binary form must reproduce the above
14  *   copyright notice, this list of conditions and the following disclaimer
15  *   in the documentation and/or other materials provided with the
16  *   distribution.
17  * * Neither the name of Google Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Copyright (C) 2005 Csaba Henk.
34  * All rights reserved.
35  *
36  * Copyright (c) 2019 The FreeBSD Foundation
37  *
38  * Portions of this software were developed by BFF Storage Systems, LLC under
39  * sponsorship from the FreeBSD Foundation.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  *
50  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65 
66 #include <sys/param.h>
67 #include <sys/module.h>
68 #include <sys/systm.h>
69 #include <sys/counter.h>
70 #include <sys/errno.h>
71 #include <sys/kernel.h>
72 #include <sys/conf.h>
73 #include <sys/uio.h>
74 #include <sys/malloc.h>
75 #include <sys/queue.h>
76 #include <sys/lock.h>
77 #include <sys/sx.h>
78 #include <sys/mutex.h>
79 #include <sys/proc.h>
80 #include <sys/mount.h>
81 #include <sys/sdt.h>
82 #include <sys/vnode.h>
83 #include <sys/signalvar.h>
84 #include <sys/syscallsubr.h>
85 #include <sys/sysctl.h>
86 #include <vm/uma.h>
87 
88 #include "fuse.h"
89 #include "fuse_node.h"
90 #include "fuse_ipc.h"
91 #include "fuse_internal.h"
92 
93 SDT_PROVIDER_DECLARE(fusefs);
94 /*
95  * Fuse trace probe:
96  * arg0: verbosity.  Higher numbers give more verbose messages
97  * arg1: Textual message
98  */
99 SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*");
100 
101 static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
102     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred);
103 static void fuse_interrupt_send(struct fuse_ticket *otick, int err);
104 static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
105 static void fticket_refresh(struct fuse_ticket *ftick);
106 static inline void fticket_reset(struct fuse_ticket *ftick);
107 static void fticket_destroy(struct fuse_ticket *ftick);
108 static int fticket_wait_answer(struct fuse_ticket *ftick);
109 static inline int
110 fticket_aw_pull_uio(struct fuse_ticket *ftick,
111     struct uio *uio);
112 
113 static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
114 
115 static fuse_handler_t fuse_standard_handler;
116 
117 static counter_u64_t fuse_ticket_count;
118 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, ticket_count, CTLFLAG_RD,
119     &fuse_ticket_count, "Number of allocated tickets");
120 
121 static long fuse_iov_permanent_bufsize = 1 << 19;
122 
123 SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
124     &fuse_iov_permanent_bufsize, 0,
125     "limit for permanently stored buffer size for fuse_iovs");
126 static int fuse_iov_credit = 16;
127 
128 SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW,
129     &fuse_iov_credit, 0,
130     "how many times is an oversized fuse_iov tolerated");
131 
132 MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer");
133 static uma_zone_t ticket_zone;
134 
135 /*
136  * TODO: figure out how to timeout INTERRUPT requests, because the daemon may
137  * leagally never respond
138  */
139 static int
140 fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio)
141 {
142 	struct fuse_ticket *otick, *x_tick;
143 	struct fuse_interrupt_in *fii;
144 	struct fuse_data *data = tick->tk_data;
145 	bool found = false;
146 
147 	fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base +
148 		sizeof(struct fuse_in_header));
149 
150 	fuse_lck_mtx_lock(data->aw_mtx);
151 	TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) {
152 		if (otick->tk_unique == fii->unique) {
153 			found = true;
154 			break;
155 		}
156 	}
157 	fuse_lck_mtx_unlock(data->aw_mtx);
158 
159 	if (!found) {
160 		/* Original is already complete.  Just return */
161 		return 0;
162 	}
163 
164 	/* Clear the original ticket's interrupt association */
165 	otick->irq_unique = 0;
166 
167 	if (tick->tk_aw_ohead.error == ENOSYS) {
168 		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
169 		return 0;
170 	} else if (tick->tk_aw_ohead.error == EAGAIN) {
171 		/*
172 		 * There are two reasons we might get this:
173 		 * 1) the daemon received the INTERRUPT request before the
174 		 *    original, or
175 		 * 2) the daemon received the INTERRUPT request after it
176 		 *    completed the original request.
177 		 * In the first case we should re-send the INTERRUPT.  In the
178 		 * second, we should ignore it.
179 		 */
180 		/* Resend */
181 		fuse_interrupt_send(otick, EINTR);
182 		return 0;
183 	} else {
184 		/* Illegal FUSE_INTERRUPT response */
185 		return EINVAL;
186 	}
187 }
188 
189 /* Interrupt the operation otick.  Return err as its error code */
190 void
191 fuse_interrupt_send(struct fuse_ticket *otick, int err)
192 {
193 	struct fuse_dispatcher fdi;
194 	struct fuse_interrupt_in *fii;
195 	struct fuse_in_header *ftick_hdr;
196 	struct fuse_data *data = otick->tk_data;
197 	struct fuse_ticket *tick, *xtick;
198 	struct ucred reused_creds;
199 	gid_t reused_groups[1];
200 
201 	if (otick->irq_unique == 0) {
202 		/*
203 		 * If the daemon hasn't yet received otick, then we can answer
204 		 * it ourselves and return.
205 		 */
206 		fuse_lck_mtx_lock(data->ms_mtx);
207 		STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link,
208 			xtick) {
209 			if (tick == otick) {
210 				STAILQ_REMOVE(&otick->tk_data->ms_head, tick,
211 					fuse_ticket, tk_ms_link);
212 				otick->tk_data->ms_count--;
213 				otick->tk_ms_link.stqe_next = NULL;
214 				fuse_lck_mtx_unlock(data->ms_mtx);
215 
216 				fuse_lck_mtx_lock(otick->tk_aw_mtx);
217 				if (!fticket_answered(otick)) {
218 					fticket_set_answered(otick);
219 					otick->tk_aw_errno = err;
220 					wakeup(otick);
221 				}
222 				fuse_lck_mtx_unlock(otick->tk_aw_mtx);
223 
224 				fuse_ticket_drop(tick);
225 				return;
226 			}
227 		}
228 		fuse_lck_mtx_unlock(data->ms_mtx);
229 
230 		/*
231 		 * If the fuse daemon doesn't support interrupts, then there's
232 		 * nothing more that we can do
233 		 */
234 		if (fsess_not_impl(data->mp, FUSE_INTERRUPT))
235 			return;
236 
237 		/*
238 		 * If the fuse daemon has already received otick, then we must
239 		 * send FUSE_INTERRUPT.
240 		 */
241 		ftick_hdr = fticket_in_header(otick);
242 		reused_creds.cr_uid = ftick_hdr->uid;
243 		reused_groups[0] = ftick_hdr->gid;
244 		reused_creds.cr_groups = reused_groups;
245 		fdisp_init(&fdi, sizeof(*fii));
246 		fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid,
247 			ftick_hdr->pid, &reused_creds);
248 
249 		fii = fdi.indata;
250 		fii->unique = otick->tk_unique;
251 		fuse_insert_callback(fdi.tick, fuse_interrupt_callback);
252 
253 		otick->irq_unique = fdi.tick->tk_unique;
254 		/* Interrupt ops should be delivered ASAP */
255 		fuse_insert_message(fdi.tick, true);
256 		fdisp_destroy(&fdi);
257 	} else {
258 		/* This ticket has already been interrupted */
259 	}
260 }
261 
262 void
263 fiov_init(struct fuse_iov *fiov, size_t size)
264 {
265 	uint32_t msize = FU_AT_LEAST(size);
266 
267 	fiov->len = 0;
268 
269 	fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO);
270 
271 	fiov->allocated_size = msize;
272 	fiov->credit = fuse_iov_credit;
273 }
274 
275 void
276 fiov_teardown(struct fuse_iov *fiov)
277 {
278 	MPASS(fiov->base != NULL);
279 	free(fiov->base, M_FUSEMSG);
280 }
281 
282 void
283 fiov_adjust(struct fuse_iov *fiov, size_t size)
284 {
285 	if (fiov->allocated_size < size ||
286 	    (fuse_iov_permanent_bufsize >= 0 &&
287 	    fiov->allocated_size - size > fuse_iov_permanent_bufsize &&
288 	    --fiov->credit < 0)) {
289 		fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG,
290 		    M_WAITOK | M_ZERO);
291 		if (!fiov->base) {
292 			panic("FUSE: realloc failed");
293 		}
294 		fiov->allocated_size = FU_AT_LEAST(size);
295 		fiov->credit = fuse_iov_credit;
296 		/* Clear data buffer after reallocation */
297 		bzero(fiov->base, size);
298 	} else if (size > fiov->len) {
299 		/* Clear newly extended portion of data buffer */
300 		bzero((char*)fiov->base + fiov->len, size - fiov->len);
301 	}
302 	fiov->len = size;
303 }
304 
305 /* Resize the fiov if needed, and clear it's buffer */
306 void
307 fiov_refresh(struct fuse_iov *fiov)
308 {
309 	fiov_adjust(fiov, 0);
310 }
311 
312 static int
313 fticket_ctor(void *mem, int size, void *arg, int flags)
314 {
315 	struct fuse_ticket *ftick = mem;
316 	struct fuse_data *data = arg;
317 
318 	FUSE_ASSERT_MS_DONE(ftick);
319 	FUSE_ASSERT_AW_DONE(ftick);
320 
321 	ftick->tk_data = data;
322 	ftick->irq_unique = 0;
323 	refcount_init(&ftick->tk_refcount, 1);
324 	counter_u64_add(fuse_ticket_count, 1);
325 
326 	fticket_refresh(ftick);
327 
328 	return 0;
329 }
330 
331 static void
332 fticket_dtor(void *mem, int size, void *arg)
333 {
334 #ifdef INVARIANTS
335 	struct fuse_ticket *ftick = mem;
336 #endif
337 
338 	FUSE_ASSERT_MS_DONE(ftick);
339 	FUSE_ASSERT_AW_DONE(ftick);
340 
341 	counter_u64_add(fuse_ticket_count, -1);
342 }
343 
344 static int
345 fticket_init(void *mem, int size, int flags)
346 {
347 	struct fuse_ticket *ftick = mem;
348 
349 	bzero(ftick, sizeof(struct fuse_ticket));
350 
351 	fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header));
352 
353 	mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF);
354 	fiov_init(&ftick->tk_aw_fiov, 0);
355 
356 	return 0;
357 }
358 
359 static void
360 fticket_fini(void *mem, int size)
361 {
362 	struct fuse_ticket *ftick = mem;
363 
364 	fiov_teardown(&ftick->tk_ms_fiov);
365 	fiov_teardown(&ftick->tk_aw_fiov);
366 	mtx_destroy(&ftick->tk_aw_mtx);
367 }
368 
369 static inline struct fuse_ticket *
370 fticket_alloc(struct fuse_data *data)
371 {
372 	return uma_zalloc_arg(ticket_zone, data, M_WAITOK);
373 }
374 
375 static inline void
376 fticket_destroy(struct fuse_ticket *ftick)
377 {
378 	return uma_zfree(ticket_zone, ftick);
379 }
380 
381 /* Prepare the ticket to be reused and clear its data buffers */
382 static inline void
383 fticket_refresh(struct fuse_ticket *ftick)
384 {
385 	fticket_reset(ftick);
386 
387 	fiov_refresh(&ftick->tk_ms_fiov);
388 	fiov_refresh(&ftick->tk_aw_fiov);
389 }
390 
391 /* Prepare the ticket to be reused, but don't clear its data buffers */
392 static inline void
393 fticket_reset(struct fuse_ticket *ftick)
394 {
395 	struct fuse_data *data = ftick->tk_data;
396 
397 	FUSE_ASSERT_MS_DONE(ftick);
398 	FUSE_ASSERT_AW_DONE(ftick);
399 
400 	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
401 
402 	ftick->tk_aw_errno = 0;
403 	ftick->tk_flag = 0;
404 
405 	/* May be truncated to 32 bits on LP32 arches */
406 	ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
407 	if (ftick->tk_unique == 0)
408 		ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
409 }
410 
411 static int
412 fticket_wait_answer(struct fuse_ticket *ftick)
413 {
414 	struct thread *td = curthread;
415 	sigset_t blockedset, oldset;
416 	int err = 0, stops_deferred;
417 	struct fuse_data *data = ftick->tk_data;
418 	bool interrupted = false;
419 
420 	if (fsess_maybe_impl(ftick->tk_data->mp, FUSE_INTERRUPT) &&
421 	    data->dataflags & FSESS_INTR) {
422 		SIGEMPTYSET(blockedset);
423 	} else {
424 		/* Block all signals except (implicitly) SIGKILL */
425 		SIGFILLSET(blockedset);
426 	}
427 	stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT);
428 	kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0);
429 
430 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
431 
432 retry:
433 	if (fticket_answered(ftick)) {
434 		goto out;
435 	}
436 
437 	if (fdata_get_dead(data)) {
438 		err = ENOTCONN;
439 		fticket_set_answered(ftick);
440 		goto out;
441 	}
442 	kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0);
443 	err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans",
444 	    data->daemon_timeout * hz);
445 	kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0);
446 	if (err == EWOULDBLOCK) {
447 		SDT_PROBE2(fusefs, , ipc, trace, 3,
448 			"fticket_wait_answer: EWOULDBLOCK");
449 #ifdef XXXIP				/* die conditionally */
450 		if (!fdata_get_dead(data)) {
451 			fdata_set_dead(data);
452 		}
453 #endif
454 		err = ETIMEDOUT;
455 		fticket_set_answered(ftick);
456 	} else if ((err == EINTR || err == ERESTART)) {
457 		/*
458 		 * Whether we get EINTR or ERESTART depends on whether
459 		 * SA_RESTART was set by sigaction(2).
460 		 *
461 		 * Try to interrupt the operation and wait for an EINTR response
462 		 * to the original operation.  If the file system does not
463 		 * support FUSE_INTERRUPT, then we'll just wait for it to
464 		 * complete like normal.  If it does support FUSE_INTERRUPT,
465 		 * then it will either respond EINTR to the original operation,
466 		 * or EAGAIN to the interrupt.
467 		 */
468 		sigset_t tmpset;
469 
470 		SDT_PROBE2(fusefs, , ipc, trace, 4,
471 			"fticket_wait_answer: interrupt");
472 		fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
473 		fuse_interrupt_send(ftick, err);
474 
475 		PROC_LOCK(td->td_proc);
476 		mtx_lock(&td->td_proc->p_sigacts->ps_mtx);
477 		tmpset = td->td_proc->p_siglist;
478 		SIGSETOR(tmpset, td->td_siglist);
479 		mtx_unlock(&td->td_proc->p_sigacts->ps_mtx);
480 		PROC_UNLOCK(td->td_proc);
481 
482 		fuse_lck_mtx_lock(ftick->tk_aw_mtx);
483 		if (!interrupted && !SIGISMEMBER(tmpset, SIGKILL)) {
484 			/*
485 			 * Block all signals while we wait for an interrupt
486 			 * response.  The protocol doesn't discriminate between
487 			 * different signals.
488 			 */
489 			SIGFILLSET(blockedset);
490 			interrupted = true;
491 			goto retry;
492 		} else {
493 			/*
494 			 * Return immediately for fatal signals, or if this is
495 			 * the second interruption.  We should only be
496 			 * interrupted twice if the thread is stopped, for
497 			 * example during sigexit.
498 			 */
499 		}
500 	} else if (err) {
501 		SDT_PROBE2(fusefs, , ipc, trace, 6,
502 			"fticket_wait_answer: other error");
503 	} else {
504 		SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK");
505 	}
506 out:
507 	if (!(err || fticket_answered(ftick))) {
508 		SDT_PROBE2(fusefs, , ipc, trace, 1,
509 			"FUSE: requester was woken up but still no answer");
510 		err = ENXIO;
511 	}
512 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
513 	sigallowstop(stops_deferred);
514 
515 	return err;
516 }
517 
518 static	inline
519 int
520 fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio)
521 {
522 	int err = 0;
523 	size_t len = uio_resid(uio);
524 
525 	if (len) {
526 		fiov_adjust(fticket_resp(ftick), len);
527 		err = uiomove(fticket_resp(ftick)->base, len, uio);
528 	}
529 	return err;
530 }
531 
532 int
533 fticket_pull(struct fuse_ticket *ftick, struct uio *uio)
534 {
535 	int err = 0;
536 
537 	if (ftick->tk_aw_ohead.error) {
538 		return 0;
539 	}
540 	err = fuse_body_audit(ftick, uio_resid(uio));
541 	if (!err) {
542 		err = fticket_aw_pull_uio(ftick, uio);
543 	}
544 	return err;
545 }
546 
547 struct fuse_data *
548 fdata_alloc(struct cdev *fdev, struct ucred *cred)
549 {
550 	struct fuse_data *data;
551 
552 	data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO);
553 
554 	data->fdev = fdev;
555 	mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF);
556 	STAILQ_INIT(&data->ms_head);
557 	data->ms_count = 0;
558 	knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx);
559 	mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF);
560 	TAILQ_INIT(&data->aw_head);
561 	data->daemoncred = crhold(cred);
562 	data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT;
563 	sx_init(&data->rename_lock, "fuse rename lock");
564 	data->ref = 1;
565 
566 	return data;
567 }
568 
569 void
570 fdata_trydestroy(struct fuse_data *data)
571 {
572 	data->ref--;
573 	MPASS(data->ref >= 0);
574 	if (data->ref != 0)
575 		return;
576 
577 	/* Driving off stage all that stuff thrown at device... */
578 	sx_destroy(&data->rename_lock);
579 	crfree(data->daemoncred);
580 	mtx_destroy(&data->aw_mtx);
581 	knlist_delete(&data->ks_rsel.si_note, curthread, 0);
582 	knlist_destroy(&data->ks_rsel.si_note);
583 	mtx_destroy(&data->ms_mtx);
584 
585 	free(data, M_FUSEMSG);
586 }
587 
588 void
589 fdata_set_dead(struct fuse_data *data)
590 {
591 	FUSE_LOCK();
592 	if (fdata_get_dead(data)) {
593 		FUSE_UNLOCK();
594 		return;
595 	}
596 	fuse_lck_mtx_lock(data->ms_mtx);
597 	data->dataflags |= FSESS_DEAD;
598 	wakeup_one(data);
599 	selwakeuppri(&data->ks_rsel, PZERO + 1);
600 	wakeup(&data->ticketer);
601 	fuse_lck_mtx_unlock(data->ms_mtx);
602 	FUSE_UNLOCK();
603 }
604 
605 struct fuse_ticket *
606 fuse_ticket_fetch(struct fuse_data *data)
607 {
608 	int err = 0;
609 	struct fuse_ticket *ftick;
610 
611 	ftick = fticket_alloc(data);
612 
613 	if (!(data->dataflags & FSESS_INITED)) {
614 		/* Sleep until get answer for INIT messsage */
615 		FUSE_LOCK();
616 		if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) {
617 			err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP,
618 			    "fu_ini", 0);
619 			if (err)
620 				fdata_set_dead(data);
621 		} else
622 			FUSE_UNLOCK();
623 	}
624 	return ftick;
625 }
626 
627 int
628 fuse_ticket_drop(struct fuse_ticket *ftick)
629 {
630 	int die;
631 
632 	die = refcount_release(&ftick->tk_refcount);
633 	if (die)
634 		fticket_destroy(ftick);
635 
636 	return die;
637 }
638 
639 void
640 fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler)
641 {
642 	if (fdata_get_dead(ftick->tk_data)) {
643 		return;
644 	}
645 	ftick->tk_aw_handler = handler;
646 
647 	fuse_lck_mtx_lock(ftick->tk_data->aw_mtx);
648 	fuse_aw_push(ftick);
649 	fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
650 }
651 
652 /*
653  * Insert a new upgoing ticket into the message queue
654  *
655  * If urgent is true, insert at the front of the queue.  Otherwise, insert in
656  * FIFO order.
657  */
658 void
659 fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
660 {
661 	if (ftick->tk_flag & FT_DIRTY) {
662 		panic("FUSE: ticket reused without being refreshed");
663 	}
664 	ftick->tk_flag |= FT_DIRTY;
665 
666 	if (fdata_get_dead(ftick->tk_data)) {
667 		return;
668 	}
669 	fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
670 	if (urgent)
671 		fuse_ms_push_head(ftick);
672 	else
673 		fuse_ms_push(ftick);
674 	wakeup_one(ftick->tk_data);
675 	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
676 	KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
677 	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
678 }
679 
680 static int
681 fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
682 {
683 	int err = 0;
684 	enum fuse_opcode opcode;
685 
686 	opcode = fticket_opcode(ftick);
687 
688 	switch (opcode) {
689 	case FUSE_BMAP:
690 		err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL;
691 		break;
692 
693 	case FUSE_LINK:
694 	case FUSE_LOOKUP:
695 	case FUSE_MKDIR:
696 	case FUSE_MKNOD:
697 	case FUSE_SYMLINK:
698 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
699 			err = (blen == sizeof(struct fuse_entry_out)) ?
700 				0 : EINVAL;
701 		} else {
702 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE) ? 0 : EINVAL;
703 		}
704 		break;
705 
706 	case FUSE_FORGET:
707 		panic("FUSE: a handler has been intalled for FUSE_FORGET");
708 		break;
709 
710 	case FUSE_GETATTR:
711 	case FUSE_SETATTR:
712 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
713 			err = (blen == sizeof(struct fuse_attr_out)) ?
714 			  0 : EINVAL;
715 		} else {
716 			err = (blen == FUSE_COMPAT_ATTR_OUT_SIZE) ? 0 : EINVAL;
717 		}
718 		break;
719 
720 	case FUSE_READLINK:
721 		err = (PAGE_SIZE >= blen) ? 0 : EINVAL;
722 		break;
723 
724 	case FUSE_UNLINK:
725 		err = (blen == 0) ? 0 : EINVAL;
726 		break;
727 
728 	case FUSE_RMDIR:
729 		err = (blen == 0) ? 0 : EINVAL;
730 		break;
731 
732 	case FUSE_RENAME:
733 		err = (blen == 0) ? 0 : EINVAL;
734 		break;
735 
736 	case FUSE_OPEN:
737 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
738 		break;
739 
740 	case FUSE_READ:
741 		err = (((struct fuse_read_in *)(
742 		    (char *)ftick->tk_ms_fiov.base +
743 		    sizeof(struct fuse_in_header)
744 		    ))->size >= blen) ? 0 : EINVAL;
745 		break;
746 
747 	case FUSE_WRITE:
748 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
749 		break;
750 
751 	case FUSE_STATFS:
752 		if (fuse_libabi_geq(ftick->tk_data, 7, 4)) {
753 			err = (blen == sizeof(struct fuse_statfs_out)) ?
754 			  0 : EINVAL;
755 		} else {
756 			err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL;
757 		}
758 		break;
759 
760 	case FUSE_RELEASE:
761 		err = (blen == 0) ? 0 : EINVAL;
762 		break;
763 
764 	case FUSE_FSYNC:
765 		err = (blen == 0) ? 0 : EINVAL;
766 		break;
767 
768 	case FUSE_SETXATTR:
769 		err = (blen == 0) ? 0 : EINVAL;
770 		break;
771 
772 	case FUSE_GETXATTR:
773 	case FUSE_LISTXATTR:
774 		/*
775 		 * These can have varying response lengths, and 0 length
776 		 * isn't necessarily invalid.
777 		 */
778 		err = 0;
779 		break;
780 
781 	case FUSE_REMOVEXATTR:
782 		err = (blen == 0) ? 0 : EINVAL;
783 		break;
784 
785 	case FUSE_FLUSH:
786 		err = (blen == 0) ? 0 : EINVAL;
787 		break;
788 
789 	case FUSE_INIT:
790 		if (blen == sizeof(struct fuse_init_out) ||
791 		    blen == FUSE_COMPAT_INIT_OUT_SIZE ||
792 		    blen == FUSE_COMPAT_22_INIT_OUT_SIZE) {
793 			err = 0;
794 		} else {
795 			err = EINVAL;
796 		}
797 		break;
798 
799 	case FUSE_OPENDIR:
800 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
801 		break;
802 
803 	case FUSE_READDIR:
804 		err = (((struct fuse_read_in *)(
805 		    (char *)ftick->tk_ms_fiov.base +
806 		    sizeof(struct fuse_in_header)
807 		    ))->size >= blen) ? 0 : EINVAL;
808 		break;
809 
810 	case FUSE_RELEASEDIR:
811 		err = (blen == 0) ? 0 : EINVAL;
812 		break;
813 
814 	case FUSE_FSYNCDIR:
815 		err = (blen == 0) ? 0 : EINVAL;
816 		break;
817 
818 	case FUSE_GETLK:
819 		err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL;
820 		break;
821 
822 	case FUSE_SETLK:
823 		err = (blen == 0) ? 0 : EINVAL;
824 		break;
825 
826 	case FUSE_SETLKW:
827 		err = (blen == 0) ? 0 : EINVAL;
828 		break;
829 
830 	case FUSE_ACCESS:
831 		err = (blen == 0) ? 0 : EINVAL;
832 		break;
833 
834 	case FUSE_CREATE:
835 		if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
836 			err = (blen == sizeof(struct fuse_entry_out) +
837 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
838 		} else {
839 			err = (blen == FUSE_COMPAT_ENTRY_OUT_SIZE +
840 			    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
841 		}
842 		break;
843 
844 	case FUSE_DESTROY:
845 		err = (blen == 0) ? 0 : EINVAL;
846 		break;
847 
848 	case FUSE_LSEEK:
849 		err = (blen == sizeof(struct fuse_lseek_out)) ? 0 : EINVAL;
850 		break;
851 
852 	case FUSE_COPY_FILE_RANGE:
853 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
854 		break;
855 
856 	default:
857 		panic("FUSE: opcodes out of sync (%d)\n", opcode);
858 	}
859 
860 	return err;
861 }
862 
863 static inline void
864 fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick,
865     uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid,
866     struct ucred *cred)
867 {
868 	ihead->len = sizeof(*ihead) + blen;
869 	ihead->unique = ftick->tk_unique;
870 	ihead->nodeid = nid;
871 	ihead->opcode = op;
872 
873 	ihead->pid = pid;
874 	ihead->uid = cred->cr_uid;
875 	ihead->gid = cred->cr_groups[0];
876 }
877 
878 /*
879  * fuse_standard_handler just pulls indata and wakes up pretender.
880  * Doesn't try to interpret data, that's left for the pretender.
881  * Though might do a basic size verification before the pull-in takes place
882  */
883 
884 static int
885 fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio)
886 {
887 	int err = 0;
888 
889 	err = fticket_pull(ftick, uio);
890 
891 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
892 
893 	if (!fticket_answered(ftick)) {
894 		fticket_set_answered(ftick);
895 		ftick->tk_aw_errno = err;
896 		wakeup(ftick);
897 	}
898 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
899 
900 	return err;
901 }
902 
903 /*
904  * Reinitialize a dispatcher from a pid and node id, without resizing or
905  * clearing its data buffers
906  */
907 static void
908 fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
909     struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
910 {
911 	MPASS(fdip->tick);
912 	MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len,
913 		"Must use fdisp_make_pid to increase the size of the fiov");
914 	fticket_reset(fdip->tick);
915 
916 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
917 	    fdip->indata, fdip->iosize);
918 
919 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
920 		cred);
921 }
922 
923 /* Initialize a dispatcher from a pid and node id */
924 static void
925 fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
926     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred)
927 {
928 	if (fdip->tick) {
929 		fticket_refresh(fdip->tick);
930 	} else {
931 		fdip->tick = fuse_ticket_fetch(data);
932 	}
933 
934 	/* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */
935 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
936 	    fdip->indata, fdip->iosize);
937 
938 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred);
939 }
940 
941 void
942 fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp,
943     uint64_t nid, struct thread *td, struct ucred *cred)
944 {
945 	struct fuse_data *data = fuse_get_mpdata(mp);
946 	RECTIFY_TDCR(td, cred);
947 
948 	return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred);
949 }
950 
951 void
952 fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
953     struct vnode *vp, struct thread *td, struct ucred *cred)
954 {
955 	struct mount *mp = vnode_mount(vp);
956 	struct fuse_data *data = fuse_get_mpdata(mp);
957 
958 	RECTIFY_TDCR(td, cred);
959 	return fdisp_make_pid(fdip, op, data, VTOI(vp),
960 	    td->td_proc->p_pid, cred);
961 }
962 
963 /* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
964 void
965 fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
966     struct vnode *vp, struct thread *td, struct ucred *cred)
967 {
968 	RECTIFY_TDCR(td, cred);
969 	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
970 	    td->td_proc->p_pid, cred);
971 }
972 
973 SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int");
974 
975 int
976 fdisp_wait_answ(struct fuse_dispatcher *fdip)
977 {
978 	int err = 0;
979 
980 	fdip->answ_stat = 0;
981 	fuse_insert_callback(fdip->tick, fuse_standard_handler);
982 	fuse_insert_message(fdip->tick, false);
983 
984 	if ((err = fticket_wait_answer(fdip->tick))) {
985 		fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx);
986 
987 		if (fticket_answered(fdip->tick)) {
988 			/*
989 	                 * Just between noticing the interrupt and getting here,
990 	                 * the standard handler has completed his job.
991 	                 * So we drop the ticket and exit as usual.
992 	                 */
993 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
994 				"IPC: interrupted, already answered", err);
995 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
996 			goto out;
997 		} else {
998 			/*
999 	                 * So we were faster than the standard handler.
1000 	                 * Then by setting the answered flag we get *him*
1001 	                 * to drop the ticket.
1002 	                 */
1003 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1004 				"IPC: interrupted, setting to answered", err);
1005 			fticket_set_answered(fdip->tick);
1006 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
1007 			return err;
1008 		}
1009 	}
1010 
1011 	if (fdip->tick->tk_aw_errno == ENOTCONN) {
1012 		/* The daemon died while we were waiting for a response */
1013 		err = ENOTCONN;
1014 		goto out;
1015 	} else if (fdip->tick->tk_aw_errno) {
1016 		/*
1017 		 * There was some sort of communication error with the daemon
1018 		 * that the client wouldn't understand.
1019 		 */
1020 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1021 			"IPC: explicit EIO-ing", fdip->tick->tk_aw_errno);
1022 		err = EIO;
1023 		goto out;
1024 	}
1025 	if ((err = fdip->tick->tk_aw_ohead.error)) {
1026 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1027 			"IPC: setting status", fdip->tick->tk_aw_ohead.error);
1028 		/*
1029 	         * This means a "proper" fuse syscall error.
1030 	         * We record this value so the caller will
1031 	         * be able to know it's not a boring messaging
1032 	         * failure, if she wishes so (and if not, she can
1033 	         * just simply propagate the return value of this routine).
1034 	         * [XXX Maybe a bitflag would do the job too,
1035 	         * if other flags needed, this will be converted thusly.]
1036 	         */
1037 		fdip->answ_stat = err;
1038 		goto out;
1039 	}
1040 	fdip->answ = fticket_resp(fdip->tick)->base;
1041 	fdip->iosize = fticket_resp(fdip->tick)->len;
1042 
1043 	return 0;
1044 
1045 out:
1046 	return err;
1047 }
1048 
1049 void
1050 fuse_ipc_init(void)
1051 {
1052 	ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket),
1053 	    fticket_ctor, fticket_dtor, fticket_init, fticket_fini,
1054 	    UMA_ALIGN_PTR, 0);
1055 	fuse_ticket_count = counter_u64_alloc(M_WAITOK);
1056 }
1057 
1058 void
1059 fuse_ipc_destroy(void)
1060 {
1061 	counter_u64_free(fuse_ticket_count);
1062 	uma_zdestroy(ticket_zone);
1063 }
1064 
1065 SDT_PROBE_DEFINE3(fusefs,, ipc, warn, "struct fuse_data*", "unsigned", "char*");
1066 void
1067 fuse_warn(struct fuse_data *data, unsigned flag, const char *msg)
1068 {
1069 	SDT_PROBE3(fusefs, , ipc, warn, data, flag, msg);
1070 	if (!(data->dataflags & flag)) {
1071 		printf("WARNING: FUSE protocol violation for server mounted at "
1072 		    "%s: %s  "
1073 		    "This warning will not be repeated.\n",
1074 		    data->mp->mnt_stat.f_mntonname, msg);
1075 		data->dataflags |= flag;
1076 	}
1077 }
1078