xref: /linux/fs/fuse/dev.c (revision ac7304fa1de49e4663307dc92f34e455daa15a74)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3   FUSE: Filesystem in Userspace
4   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
5 */
6 
7 #include "dev.h"
8 #include "args.h"
9 #include "dev_uring_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/sched/signal.h>
15 #include <linux/uio.h>
16 #include <linux/miscdevice.h>
17 #include <linux/pagemap.h>
18 #include <linux/file.h>
19 #include <linux/slab.h>
20 #include <linux/pipe_fs_i.h>
21 #include <linux/swap.h>
22 #include <linux/splice.h>
23 #include <linux/sched.h>
24 #include <linux/seq_file.h>
25 
26 #include "fuse_trace.h"
27 
28 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
29 MODULE_ALIAS("devname:fuse");
30 
31 static DECLARE_WAIT_QUEUE_HEAD(fuse_dev_waitq);
32 
33 static struct kmem_cache *fuse_req_cachep;
34 
35 static void fuse_request_init(struct fuse_chan *fch, struct fuse_req *req)
36 {
37 	INIT_LIST_HEAD(&req->list);
38 	INIT_LIST_HEAD(&req->intr_entry);
39 	init_waitqueue_head(&req->waitq);
40 	refcount_set(&req->count, 1);
41 	__set_bit(FR_PENDING, &req->flags);
42 	req->chan = fch;
43 	req->create_time = jiffies;
44 }
45 
46 static struct fuse_req *fuse_request_alloc(struct fuse_chan *fch, gfp_t flags)
47 {
48 	struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
49 	if (req)
50 		fuse_request_init(fch, req);
51 
52 	return req;
53 }
54 
55 static void fuse_request_free(struct fuse_req *req)
56 {
57 	WARN_ON(!list_empty(&req->intr_entry));
58 	kmem_cache_free(fuse_req_cachep, req);
59 }
60 
61 static void __fuse_get_request(struct fuse_req *req)
62 {
63 	refcount_inc(&req->count);
64 }
65 
66 /* Must be called with > 1 refcount */
67 static void __fuse_put_request(struct fuse_req *req)
68 {
69 	refcount_dec(&req->count);
70 }
71 
72 void fuse_chan_set_initialized(struct fuse_chan *fch, struct fuse_chan_param *param)
73 {
74 	if (param) {
75 		fch->minor = param->minor;
76 		fch->max_write = param->max_write;
77 		fch->max_pages = param->max_pages;
78 	}
79 
80 	/* Make sure stores before this are seen on another CPU */
81 	smp_wmb();
82 	fch->initialized = 1;
83 	wake_up_all(&fch->blocked_waitq);
84 }
85 
86 static bool fuse_block_alloc(struct fuse_chan *fch, bool for_background)
87 {
88 	return !fch->initialized || (for_background && fch->blocked) ||
89 	       (fch->io_uring && fch->connected && !fuse_uring_ready(fch));
90 }
91 
92 static void fuse_drop_waiting(struct fuse_chan *fch)
93 {
94 	/*
95 	 * lockess check of fch->connected is okay, because atomic_dec_and_test()
96 	 * provides a memory barrier matched with the one in fuse_chan_wait_aborted()
97 	 * to ensure no wake-up is missed.
98 	 */
99 	if (atomic_dec_and_test(&fch->num_waiting) &&
100 	    !READ_ONCE(fch->connected)) {
101 		/* wake up aborters */
102 		wake_up_all(&fch->blocked_waitq);
103 	}
104 }
105 
106 static void fuse_put_request(struct fuse_req *req);
107 
108 static struct fuse_req *fuse_get_req(struct fuse_chan *fch, bool for_background)
109 {
110 	struct fuse_req *req;
111 	int err;
112 
113 	atomic_inc(&fch->num_waiting);
114 
115 	if (fuse_block_alloc(fch, for_background)) {
116 		err = -EINTR;
117 		if (wait_event_state_exclusive(fch->blocked_waitq,
118 				!fuse_block_alloc(fch, for_background),
119 				(TASK_KILLABLE | TASK_FREEZABLE)))
120 			goto out;
121 	}
122 
123 	/* Matches smp_wmb() in fuse_chan_set_initialized() */
124 	smp_rmb();
125 
126 	err = -ENOTCONN;
127 	if (!fch->connected)
128 		goto out;
129 
130 	req = fuse_request_alloc(fch, GFP_KERNEL);
131 	err = -ENOMEM;
132 	if (!req) {
133 		if (for_background)
134 			wake_up(&fch->blocked_waitq);
135 		goto out;
136 	}
137 
138 	__set_bit(FR_WAITING, &req->flags);
139 	if (for_background)
140 		__set_bit(FR_BACKGROUND, &req->flags);
141 
142 	return req;
143 
144  out:
145 	fuse_drop_waiting(fch);
146 	return ERR_PTR(err);
147 }
148 
149 static void fuse_put_request(struct fuse_req *req)
150 {
151 	struct fuse_chan *fch = req->chan;
152 
153 	if (refcount_dec_and_test(&req->count)) {
154 		if (test_bit(FR_BACKGROUND, &req->flags)) {
155 			/*
156 			 * We get here in the unlikely case that a background
157 			 * request was allocated but not sent
158 			 */
159 			spin_lock(&fch->bg_lock);
160 			if (!fch->blocked)
161 				wake_up(&fch->blocked_waitq);
162 			spin_unlock(&fch->bg_lock);
163 		}
164 
165 		if (test_bit(FR_WAITING, &req->flags)) {
166 			__clear_bit(FR_WAITING, &req->flags);
167 			fuse_drop_waiting(fch);
168 		}
169 
170 		fuse_request_free(req);
171 	}
172 }
173 
174 unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
175 {
176 	unsigned nbytes = 0;
177 	unsigned i;
178 
179 	for (i = 0; i < numargs; i++)
180 		nbytes += args[i].size;
181 
182 	return nbytes;
183 }
184 EXPORT_SYMBOL_GPL(fuse_len_args);
185 
186 static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
187 {
188 	fiq->reqctr += FUSE_REQ_ID_STEP;
189 	return fiq->reqctr;
190 }
191 
192 u64 fuse_get_unique(struct fuse_iqueue *fiq)
193 {
194 	u64 ret;
195 
196 	spin_lock(&fiq->lock);
197 	ret = fuse_get_unique_locked(fiq);
198 	spin_unlock(&fiq->lock);
199 
200 	return ret;
201 }
202 EXPORT_SYMBOL_GPL(fuse_get_unique);
203 
204 unsigned int fuse_req_hash(u64 unique)
205 {
206 	return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
207 }
208 EXPORT_SYMBOL_GPL(fuse_req_hash);
209 
210 /*
211  * A new request is available, wake fiq->waitq
212  */
213 static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq)
214 __releases(fiq->lock)
215 {
216 	wake_up(&fiq->waitq);
217 	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
218 	spin_unlock(&fiq->lock);
219 }
220 
221 struct fuse_forget_link *fuse_alloc_forget(void)
222 {
223 	return kzalloc_obj(struct fuse_forget_link, GFP_KERNEL_ACCOUNT);
224 }
225 
226 void fuse_dev_queue_forget(struct fuse_iqueue *fiq,
227 			   struct fuse_forget_link *forget)
228 {
229 	spin_lock(&fiq->lock);
230 	if (fiq->connected) {
231 		fiq->forget_list_tail->next = forget;
232 		fiq->forget_list_tail = forget;
233 		fuse_dev_wake_and_unlock(fiq);
234 	} else {
235 		kfree(forget);
236 		spin_unlock(&fiq->lock);
237 	}
238 }
239 
240 void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
241 {
242 	spin_lock(&fiq->lock);
243 	if (list_empty(&req->intr_entry)) {
244 		list_add_tail(&req->intr_entry, &fiq->interrupts);
245 		/*
246 		 * Pairs with smp_mb() implied by test_and_set_bit()
247 		 * from fuse_request_end().
248 		 */
249 		smp_mb();
250 		if (test_bit(FR_FINISHED, &req->flags)) {
251 			list_del_init(&req->intr_entry);
252 			spin_unlock(&fiq->lock);
253 		} else  {
254 			fuse_dev_wake_and_unlock(fiq);
255 		}
256 	} else {
257 		spin_unlock(&fiq->lock);
258 	}
259 }
260 
261 static inline void fuse_request_assign_unique_locked(struct fuse_iqueue *fiq,
262 						     struct fuse_req *req)
263 {
264 	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
265 		req->in.h.unique = fuse_get_unique_locked(fiq);
266 
267 	/* tracepoint captures in.h.unique and in.h.len */
268 	trace_fuse_request_send(req);
269 }
270 
271 inline void fuse_request_assign_unique(struct fuse_iqueue *fiq,
272 				       struct fuse_req *req)
273 {
274 	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
275 		req->in.h.unique = fuse_get_unique(fiq);
276 
277 	/* tracepoint captures in.h.unique and in.h.len */
278 	trace_fuse_request_send(req);
279 }
280 EXPORT_SYMBOL_GPL(fuse_request_assign_unique);
281 
282 static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
283 {
284 	spin_lock(&fiq->lock);
285 	if (fiq->connected) {
286 		fuse_request_assign_unique_locked(fiq, req);
287 		list_add_tail(&req->list, &fiq->pending);
288 		fuse_dev_wake_and_unlock(fiq);
289 	} else {
290 		spin_unlock(&fiq->lock);
291 		req->out.h.error = -ENOTCONN;
292 		clear_bit(FR_PENDING, &req->flags);
293 		fuse_request_end(req);
294 	}
295 }
296 
297 static const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
298 	.send_forget	= fuse_dev_queue_forget,
299 	.send_interrupt	= fuse_dev_queue_interrupt,
300 	.send_req	= fuse_dev_queue_req,
301 };
302 
303 void fuse_iqueue_init(struct fuse_iqueue *fiq, const struct fuse_iqueue_ops *ops, void *priv)
304 {
305 	spin_lock_init(&fiq->lock);
306 	init_waitqueue_head(&fiq->waitq);
307 	INIT_LIST_HEAD(&fiq->pending);
308 	INIT_LIST_HEAD(&fiq->interrupts);
309 	fiq->forget_list_tail = &fiq->forget_list_head;
310 	fiq->connected = 1;
311 	fiq->ops = ops;
312 	fiq->priv = priv;
313 }
314 EXPORT_SYMBOL_GPL(fuse_iqueue_init);
315 
316 void fuse_chan_release(struct fuse_chan *fch)
317 {
318 	struct fuse_iqueue *fiq = &fch->iq;
319 
320 	if (fiq->ops->release)
321 		fiq->ops->release(fiq);
322 
323 	if (fch->timeout.req_timeout)
324 		cancel_delayed_work_sync(&fch->timeout.work);
325 }
326 
327 void fuse_chan_free(struct fuse_chan *fch)
328 {
329 	WARN_ON(!list_empty(&fch->devices));
330 	kfree(fch->pq_prealloc);
331 	kfree(fch);
332 }
333 EXPORT_SYMBOL_GPL(fuse_chan_free);
334 
335 struct fuse_chan *fuse_chan_new(void)
336 {
337 	struct fuse_chan *fch = kzalloc_obj(struct fuse_chan);
338 	if (!fch)
339 		return NULL;
340 
341 	spin_lock_init(&fch->lock);
342 	INIT_LIST_HEAD(&fch->devices);
343 	spin_lock_init(&fch->bg_lock);
344 	INIT_LIST_HEAD(&fch->bg_queue);
345 	init_waitqueue_head(&fch->blocked_waitq);
346 	atomic_set(&fch->num_waiting, 0);
347 	fch->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
348 	fch->initialized = 0;
349 	fch->blocked = 0;
350 	fch->connected = 1;
351 	fch->timeout.req_timeout = 0;
352 
353 	return fch;
354 }
355 EXPORT_SYMBOL_GPL(fuse_chan_new);
356 
357 struct list_head *fuse_pqueue_alloc(void)
358 {
359 	struct list_head *pq = kzalloc_objs(struct list_head, FUSE_PQ_HASH_SIZE);
360 
361 	if (pq) {
362 		for (int i = 0; i < FUSE_PQ_HASH_SIZE; i++)
363 			INIT_LIST_HEAD(&pq[i]);
364 	}
365 	return pq;
366 }
367 
368 struct fuse_chan *fuse_dev_chan_new(void)
369 {
370 	struct fuse_chan *fch __free(kfree) = fuse_chan_new();
371 	if (!fch)
372 		return NULL;
373 
374 	fch->pq_prealloc = fuse_pqueue_alloc();
375 	if (!fch->pq_prealloc)
376 		return NULL;
377 
378 	fuse_iqueue_init(&fch->iq, &fuse_dev_fiq_ops, NULL);
379 
380 	return no_free_ptr(fch);
381 }
382 EXPORT_SYMBOL_GPL(fuse_dev_chan_new);
383 
384 unsigned int fuse_chan_num_background(struct fuse_chan *fch)
385 {
386 	return READ_ONCE(fch->num_background);
387 }
388 
389 unsigned int fuse_chan_max_background(struct fuse_chan *fch)
390 {
391 	return READ_ONCE(fch->max_background);
392 }
393 
394 void fuse_chan_max_background_set(struct fuse_chan *fch, unsigned int val)
395 {
396 	spin_lock(&fch->bg_lock);
397 	fch->max_background = val;
398 	fch->blocked = fch->num_background >= fch->max_background;
399 	if (!fch->blocked)
400 		wake_up(&fch->blocked_waitq);
401 	spin_unlock(&fch->bg_lock);
402 }
403 
404 unsigned int fuse_chan_num_waiting(struct fuse_chan *fch)
405 {
406 	return atomic_read(&fch->num_waiting);
407 }
408 
409 void fuse_chan_set_fc(struct fuse_chan *fch, struct fuse_conn *fc)
410 {
411 	fch->conn = fc;
412 }
413 
414 void fuse_chan_io_uring_enable(struct fuse_chan *fch)
415 {
416 	fch->io_uring = 1;
417 }
418 
419 void fuse_pqueue_init(struct fuse_pqueue *fpq)
420 {
421 	spin_lock_init(&fpq->lock);
422 	INIT_LIST_HEAD(&fpq->io);
423 	fpq->connected = 1;
424 	fpq->processing = NULL;
425 }
426 
427 static struct fuse_dev *fuse_dev_alloc_no_pq(void)
428 {
429 	struct fuse_dev *fud;
430 
431 	fud = kzalloc_obj(struct fuse_dev);
432 	if (!fud)
433 		return NULL;
434 
435 	refcount_set(&fud->ref, 1);
436 	fuse_pqueue_init(&fud->pq);
437 
438 	return fud;
439 }
440 
441 struct fuse_dev *fuse_dev_alloc(void)
442 {
443 	struct fuse_dev *fud __free(kfree) = fuse_dev_alloc_no_pq();
444 	if (!fud)
445 		return NULL;
446 
447 	fud->pq.processing = fuse_pqueue_alloc();
448 	if (!fud->pq.processing)
449 		return NULL;
450 
451 	return no_free_ptr(fud);
452 }
453 EXPORT_SYMBOL_GPL(fuse_dev_alloc);
454 
455 /*
456  * Installs @fch into @fud, return true on success.  "Consumes" @pq in either case.
457  */
458 static bool fuse_dev_install_with_pq(struct fuse_dev *fud, struct fuse_chan *fch,
459 				     struct list_head *pq)
460 {
461 	struct fuse_chan *old_fch;
462 
463 	guard(spinlock)(&fch->lock);
464 	/*
465 	 * Pairs with:
466 	 *  - xchg() in fuse_dev_release()
467 	 *  - smp_load_acquire() in fuse_dev_fc_get()
468 	 */
469 	old_fch = cmpxchg(&fud->chan, NULL, fch);
470 	if (old_fch) {
471 		/*
472 		 * failed to set fud->chan because
473 		 *  - it was already set to a different fc
474 		 *  - it was set to disconneted
475 		 */
476 		kfree(pq);
477 		return false;
478 	}
479 	if (pq) {
480 		WARN_ON(fud->pq.processing);
481 		fud->pq.processing = pq;
482 	}
483 	list_add_tail(&fud->entry, &fch->devices);
484 	fuse_conn_get(fch->conn);
485 	wake_up_all(&fuse_dev_waitq);
486 	return true;
487 }
488 
489 void fuse_dev_install(struct fuse_dev *fud, struct fuse_chan *fch)
490 {
491 	struct list_head *pq = fch->pq_prealloc;
492 
493 	fch->pq_prealloc = NULL;
494 	if (!fuse_dev_install_with_pq(fud, fch, pq)) {
495 		/* Channel is not usable without a dev */
496 		fuse_chan_abort(fch, false);
497 	}
498 }
499 EXPORT_SYMBOL_GPL(fuse_dev_install);
500 
501 struct fuse_dev *fuse_dev_alloc_install(struct fuse_chan *fch)
502 {
503 	struct fuse_dev *fud;
504 
505 	fud = fuse_dev_alloc_no_pq();
506 	if (!fud)
507 		return NULL;
508 
509 	fuse_dev_install(fud, fch);
510 	return fud;
511 }
512 EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
513 
514 void fuse_dev_put(struct fuse_dev *fud)
515 {
516 	struct fuse_chan *fch;
517 
518 	if (!refcount_dec_and_test(&fud->ref))
519 		return;
520 
521 	fch = fuse_dev_chan_get(fud);
522 	if (fch && fch != FUSE_DEV_CHAN_DISCONNECTED) {
523 		/* This is the virtiofs case (fuse_dev_release() not called) */
524 		spin_lock(&fch->lock);
525 		list_del(&fud->entry);
526 		spin_unlock(&fch->lock);
527 
528 		fuse_conn_put(fch->conn);
529 	}
530 	kfree(fud->pq.processing);
531 	kfree(fud);
532 }
533 EXPORT_SYMBOL_GPL(fuse_dev_put);
534 
535 bool fuse_dev_is_installed(struct fuse_dev *fud)
536 {
537 	struct fuse_chan *fch = fuse_dev_chan_get(fud);
538 
539 	return fch != NULL && fch != FUSE_DEV_CHAN_DISCONNECTED;
540 }
541 
542 /*
543  * Checks if @fc matches the one installed in @fud
544  */
545 bool fuse_dev_verify(struct fuse_dev *fud, struct fuse_chan *fch)
546 {
547 	return fuse_dev_chan_get(fud) == fch;
548 }
549 
550 bool fuse_dev_is_sync_init(struct fuse_dev *fud)
551 {
552 	return fud->sync_init;
553 }
554 
555 struct fuse_dev *fuse_dev_grab(struct file *file)
556 {
557 	struct fuse_dev *fud = fuse_file_to_fud(file);
558 
559 	refcount_inc(&fud->ref);
560 	return fud;
561 }
562 
563 static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req)
564 {
565 	req->in.h.len = sizeof(struct fuse_in_header) +
566 		fuse_len_args(req->args->in_numargs,
567 			      (struct fuse_arg *) req->args->in_args);
568 	fiq->ops->send_req(fiq, req);
569 }
570 
571 void fuse_chan_queue_forget(struct fuse_chan *fch, struct fuse_forget_link *forget,
572 			    u64 nodeid, u64 nlookup)
573 {
574 	struct fuse_iqueue *fiq = &fch->iq;
575 
576 	forget->forget_one.nodeid = nodeid;
577 	forget->forget_one.nlookup = nlookup;
578 
579 	fiq->ops->send_forget(fiq, forget);
580 }
581 
582 static void flush_bg_queue(struct fuse_chan *fch)
583 {
584 	struct fuse_iqueue *fiq = &fch->iq;
585 
586 	while (fch->active_background < fch->max_background &&
587 	       !list_empty(&fch->bg_queue)) {
588 		struct fuse_req *req;
589 
590 		req = list_first_entry(&fch->bg_queue, struct fuse_req, list);
591 		list_del(&req->list);
592 		fch->active_background++;
593 		fuse_send_one(fiq, req);
594 	}
595 }
596 
597 void fuse_request_bg_finish(struct fuse_chan *fch, struct fuse_req *req)
598 {
599 	lockdep_assert_held(&fch->bg_lock);
600 
601 	clear_bit(FR_BACKGROUND, &req->flags);
602 	if (fch->num_background == fch->max_background) {
603 		fch->blocked = 0;
604 		wake_up(&fch->blocked_waitq);
605 	} else if (!fch->blocked) {
606 		/*
607 		 * Wake up next waiter, if any.  It's okay to use
608 		 * waitqueue_active(), as we've already synced up
609 		 * fch->blocked with waiters with the wake_up() call
610 		 * above.
611 		 */
612 		if (waitqueue_active(&fch->blocked_waitq))
613 			wake_up(&fch->blocked_waitq);
614 	}
615 
616 	fch->num_background--;
617 	fch->active_background--;
618 }
619 
620 /*
621  * This function is called when a request is finished.  Either a reply
622  * has arrived or it was aborted (and not yet sent) or some error
623  * occurred during communication with userspace, or the device file
624  * was closed.  The requester thread is woken up (if still waiting),
625  * the 'end' callback is called if given, else the reference to the
626  * request is released
627  */
628 void fuse_request_end(struct fuse_req *req)
629 {
630 	struct fuse_chan *fch = req->chan;
631 	struct fuse_iqueue *fiq = &fch->iq;
632 
633 	if (test_and_set_bit(FR_FINISHED, &req->flags))
634 		goto put_request;
635 
636 	trace_fuse_request_end(req);
637 	/*
638 	 * test_and_set_bit() implies smp_mb() between bit
639 	 * changing and below FR_INTERRUPTED check. Pairs with
640 	 * smp_mb() from queue_interrupt().
641 	 */
642 	if (test_bit(FR_INTERRUPTED, &req->flags)) {
643 		spin_lock(&fiq->lock);
644 		list_del_init(&req->intr_entry);
645 		spin_unlock(&fiq->lock);
646 	}
647 	WARN_ON(test_bit(FR_PENDING, &req->flags));
648 	WARN_ON(test_bit(FR_SENT, &req->flags));
649 	if (test_bit(FR_BACKGROUND, &req->flags)) {
650 		spin_lock(&fch->bg_lock);
651 		fuse_request_bg_finish(fch, req);
652 		flush_bg_queue(fch);
653 		spin_unlock(&fch->bg_lock);
654 	} else {
655 		/* Wake up waiter sleeping in request_wait_answer() */
656 		wake_up(&req->waitq);
657 	}
658 
659 	if (test_bit(FR_ASYNC, &req->flags))
660 		req->args->end(req->args, req->out.h.error);
661 put_request:
662 	fuse_put_request(req);
663 }
664 EXPORT_SYMBOL_GPL(fuse_request_end);
665 
666 static int queue_interrupt(struct fuse_req *req)
667 {
668 	struct fuse_iqueue *fiq = &req->chan->iq;
669 
670 	/* Check for we've sent request to interrupt this req */
671 	if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags)))
672 		return -EINVAL;
673 
674 	fiq->ops->send_interrupt(fiq, req);
675 
676 	return 0;
677 }
678 
679 bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock)
680 {
681 	spin_lock(lock);
682 	if (test_bit(FR_PENDING, &req->flags)) {
683 		/*
684 		 * FR_PENDING does not get cleared as the request will end
685 		 * up in destruction anyway.
686 		 */
687 		list_del(&req->list);
688 		spin_unlock(lock);
689 		__fuse_put_request(req);
690 		req->out.h.error = -EINTR;
691 		return true;
692 	}
693 	spin_unlock(lock);
694 	return false;
695 }
696 
697 static void request_wait_answer(struct fuse_req *req)
698 {
699 	struct fuse_chan *fch = req->chan;
700 	struct fuse_iqueue *fiq = &fch->iq;
701 	int err;
702 
703 	if (!fch->no_interrupt) {
704 		/* Any signal may interrupt this */
705 		err = wait_event_interruptible(req->waitq,
706 					test_bit(FR_FINISHED, &req->flags));
707 		if (!err)
708 			return;
709 
710 		set_bit(FR_INTERRUPTED, &req->flags);
711 		/* matches barrier in fuse_dev_do_read() */
712 		smp_mb__after_atomic();
713 		if (test_bit(FR_SENT, &req->flags))
714 			queue_interrupt(req);
715 	}
716 
717 	if (!test_bit(FR_FORCE, &req->flags)) {
718 		bool removed;
719 
720 		/* Only fatal signals may interrupt this */
721 		err = wait_event_killable(req->waitq,
722 					test_bit(FR_FINISHED, &req->flags));
723 		if (!err)
724 			return;
725 
726 		if (req->args->abort_on_kill) {
727 			fuse_chan_abort(fch, false);
728 			return;
729 		}
730 
731 		if (test_bit(FR_URING, &req->flags))
732 			removed = fuse_uring_remove_pending_req(req);
733 		else
734 			removed = fuse_remove_pending_req(req, &fiq->lock);
735 		if (removed)
736 			return;
737 	}
738 
739 	/*
740 	 * Either request is already in userspace, or it was forced.
741 	 * Wait it out.
742 	 */
743 	wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
744 }
745 
746 static void __fuse_request_send(struct fuse_req *req)
747 {
748 	struct fuse_iqueue *fiq = &req->chan->iq;
749 
750 	BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
751 
752 	/* acquire extra reference, since request is still needed after
753 	   fuse_request_end() */
754 	__fuse_get_request(req);
755 	fuse_send_one(fiq, req);
756 
757 	request_wait_answer(req);
758 	/* Pairs with smp_wmb() in fuse_request_end() */
759 	smp_rmb();
760 }
761 
762 static void fuse_adjust_compat(struct fuse_chan *fch, struct fuse_args *args)
763 {
764 	if (fch->minor < 4 && args->opcode == FUSE_STATFS)
765 		args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
766 
767 	if (fch->minor < 9) {
768 		switch (args->opcode) {
769 		case FUSE_LOOKUP:
770 		case FUSE_CREATE:
771 		case FUSE_MKNOD:
772 		case FUSE_MKDIR:
773 		case FUSE_SYMLINK:
774 		case FUSE_LINK:
775 			args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
776 			break;
777 		case FUSE_GETATTR:
778 		case FUSE_SETATTR:
779 			args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
780 			break;
781 		}
782 	}
783 	if (fch->minor < 12) {
784 		switch (args->opcode) {
785 		case FUSE_CREATE:
786 			args->in_args[0].size = sizeof(struct fuse_open_in);
787 			break;
788 		case FUSE_MKNOD:
789 			args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
790 			break;
791 		}
792 	}
793 }
794 
795 static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
796 {
797 	req->in.h.opcode = args->opcode;
798 	req->in.h.nodeid = args->nodeid;
799 	req->in.h.uid = args->uid;
800 	req->in.h.gid = args->gid;
801 	req->in.h.pid = args->pid;
802 	req->args = args;
803 	if (args->is_ext)
804 		req->in.h.total_extlen = args->in_args[args->ext_idx].size / 8;
805 	if (args->end)
806 		__set_bit(FR_ASYNC, &req->flags);
807 }
808 
809 ssize_t fuse_chan_send(struct fuse_chan *fch, struct fuse_args *args)
810 {
811 	struct fuse_req *req;
812 	ssize_t ret;
813 
814 	if (args->force) {
815 		atomic_inc(&fch->num_waiting);
816 		req = fuse_request_alloc(fch, GFP_KERNEL | __GFP_NOFAIL);
817 
818 		__set_bit(FR_WAITING, &req->flags);
819 		if (!args->abort_on_kill)
820 			__set_bit(FR_FORCE, &req->flags);
821 	} else {
822 		req = fuse_get_req(fch, false);
823 		if (IS_ERR(req))
824 			return PTR_ERR(req);
825 	}
826 
827 	/* Needs to be done after fuse_get_req() so that fch->minor is valid */
828 	fuse_adjust_compat(fch, args);
829 	fuse_args_to_req(req, args);
830 
831 	if (!args->noreply)
832 		__set_bit(FR_ISREPLY, &req->flags);
833 	__fuse_request_send(req);
834 	ret = req->out.h.error;
835 	if (!ret && args->out_argvar) {
836 		BUG_ON(args->out_numargs == 0);
837 		ret = args->out_args[args->out_numargs - 1].size;
838 	}
839 	fuse_put_request(req);
840 
841 	return ret;
842 }
843 
844 #ifdef CONFIG_FUSE_IO_URING
845 static bool fuse_request_queue_background_uring(struct fuse_req *req)
846 {
847 	struct fuse_iqueue *fiq = &req->chan->iq;
848 
849 	req->in.h.len = sizeof(struct fuse_in_header) +
850 		fuse_len_args(req->args->in_numargs,
851 			      (struct fuse_arg *) req->args->in_args);
852 	fuse_request_assign_unique(fiq, req);
853 
854 	return fuse_uring_queue_bq_req(req);
855 }
856 #endif
857 
858 /*
859  * @return true if queued
860  */
861 static int fuse_request_queue_background(struct fuse_req *req)
862 {
863 	struct fuse_chan *fch = req->chan;
864 	bool queued = false;
865 
866 	WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
867 	if (!test_bit(FR_WAITING, &req->flags)) {
868 		__set_bit(FR_WAITING, &req->flags);
869 		atomic_inc(&fch->num_waiting);
870 	}
871 	__set_bit(FR_ISREPLY, &req->flags);
872 
873 #ifdef CONFIG_FUSE_IO_URING
874 	if (fuse_uring_ready(fch))
875 		return fuse_request_queue_background_uring(req);
876 #endif
877 
878 	spin_lock(&fch->bg_lock);
879 	if (likely(fch->connected)) {
880 		fch->num_background++;
881 		if (fch->num_background == fch->max_background)
882 			fch->blocked = 1;
883 		list_add_tail(&req->list, &fch->bg_queue);
884 		flush_bg_queue(fch);
885 		queued = true;
886 	}
887 	spin_unlock(&fch->bg_lock);
888 
889 	return queued;
890 }
891 
892 int fuse_chan_send_bg(struct fuse_chan *fch, struct fuse_args *args, gfp_t gfp_flags)
893 {
894 	struct fuse_req *req;
895 
896 	if (args->force) {
897 		req = fuse_request_alloc(fch, gfp_flags);
898 		if (!req)
899 			return -ENOMEM;
900 		__set_bit(FR_BACKGROUND, &req->flags);
901 	} else {
902 		req = fuse_get_req(fch, true);
903 		if (IS_ERR(req))
904 			return PTR_ERR(req);
905 	}
906 
907 	fuse_args_to_req(req, args);
908 
909 	if (!fuse_request_queue_background(req)) {
910 		fuse_put_request(req);
911 		return -ENOTCONN;
912 	}
913 
914 	return 0;
915 }
916 
917 int fuse_chan_send_notify_reply(struct fuse_chan *fch, struct fuse_args *args, u64 unique)
918 {
919 	struct fuse_req *req;
920 	struct fuse_iqueue *fiq = &fch->iq;
921 
922 	req = fuse_get_req(fch, false);
923 	if (IS_ERR(req))
924 		return PTR_ERR(req);
925 
926 	__clear_bit(FR_ISREPLY, &req->flags);
927 	req->in.h.unique = unique;
928 
929 	fuse_args_to_req(req, args);
930 
931 	fuse_send_one(fiq, req);
932 
933 	return 0;
934 }
935 
936 /*
937  * Lock the request.  Up to the next unlock_request() there mustn't be
938  * anything that could cause a page-fault.  If the request was already
939  * aborted bail out.
940  */
941 static int lock_request(struct fuse_req *req)
942 {
943 	int err = 0;
944 	if (req) {
945 		spin_lock(&req->waitq.lock);
946 		if (test_bit(FR_ABORTED, &req->flags))
947 			err = -ENOENT;
948 		else
949 			set_bit(FR_LOCKED, &req->flags);
950 		spin_unlock(&req->waitq.lock);
951 	}
952 	return err;
953 }
954 
955 /*
956  * Unlock request.  If it was aborted while locked, caller is responsible
957  * for unlocking and ending the request.
958  */
959 static int unlock_request(struct fuse_req *req)
960 {
961 	int err = 0;
962 	if (req) {
963 		spin_lock(&req->waitq.lock);
964 		if (test_bit(FR_ABORTED, &req->flags))
965 			err = -ENOENT;
966 		else
967 			clear_bit(FR_LOCKED, &req->flags);
968 		spin_unlock(&req->waitq.lock);
969 	}
970 	return err;
971 }
972 
973 void fuse_copy_init(struct fuse_copy_state *cs, bool write,
974 		    struct iov_iter *iter)
975 {
976 	memset(cs, 0, sizeof(*cs));
977 	cs->write = write;
978 	cs->iter = iter;
979 }
980 
981 /* Unmap and put previous page of userspace buffer */
982 void fuse_copy_finish(struct fuse_copy_state *cs)
983 {
984 	if (cs->currbuf) {
985 		struct pipe_buffer *buf = cs->currbuf;
986 
987 		if (cs->write)
988 			buf->len = PAGE_SIZE - cs->len;
989 		cs->currbuf = NULL;
990 	} else if (cs->pg) {
991 		if (cs->write) {
992 			flush_dcache_page(cs->pg);
993 			set_page_dirty_lock(cs->pg);
994 		}
995 		put_page(cs->pg);
996 	}
997 	cs->pg = NULL;
998 }
999 
1000 /*
1001  * Get another pagefull of userspace buffer, and map it to kernel
1002  * address space, and lock request
1003  */
1004 static int fuse_copy_fill(struct fuse_copy_state *cs)
1005 {
1006 	struct page *page;
1007 	int err;
1008 
1009 	err = unlock_request(cs->req);
1010 	if (err)
1011 		return err;
1012 
1013 	fuse_copy_finish(cs);
1014 	if (cs->pipebufs) {
1015 		struct pipe_buffer *buf = cs->pipebufs;
1016 
1017 		if (!cs->write) {
1018 			err = pipe_buf_confirm(cs->pipe, buf);
1019 			if (err)
1020 				return err;
1021 
1022 			BUG_ON(!cs->nr_segs);
1023 			cs->currbuf = buf;
1024 			cs->pg = buf->page;
1025 			cs->offset = buf->offset;
1026 			cs->len = buf->len;
1027 			cs->pipebufs++;
1028 			cs->nr_segs--;
1029 		} else {
1030 			if (cs->nr_segs >= cs->pipe->max_usage)
1031 				return -EIO;
1032 
1033 			page = alloc_page(GFP_HIGHUSER);
1034 			if (!page)
1035 				return -ENOMEM;
1036 
1037 			buf->page = page;
1038 			buf->offset = 0;
1039 			buf->len = 0;
1040 
1041 			cs->currbuf = buf;
1042 			cs->pg = page;
1043 			cs->offset = 0;
1044 			cs->len = PAGE_SIZE;
1045 			cs->pipebufs++;
1046 			cs->nr_segs++;
1047 		}
1048 	} else {
1049 		size_t off;
1050 		err = iov_iter_get_pages2(cs->iter, &page, PAGE_SIZE, 1, &off);
1051 		if (err < 0)
1052 			return err;
1053 		BUG_ON(!err);
1054 		cs->len = err;
1055 		cs->offset = off;
1056 		cs->pg = page;
1057 	}
1058 
1059 	return lock_request(cs->req);
1060 }
1061 
1062 /* Do as much copy to/from userspace buffer as we can */
1063 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
1064 {
1065 	unsigned ncpy = min(*size, cs->len);
1066 	if (val) {
1067 		void *pgaddr = kmap_local_page(cs->pg);
1068 		void *buf = pgaddr + cs->offset;
1069 
1070 		if (cs->write)
1071 			memcpy(buf, *val, ncpy);
1072 		else
1073 			memcpy(*val, buf, ncpy);
1074 
1075 		kunmap_local(pgaddr);
1076 		*val += ncpy;
1077 	}
1078 	*size -= ncpy;
1079 	cs->len -= ncpy;
1080 	cs->offset += ncpy;
1081 	if (cs->is_uring)
1082 		cs->ring.copied_sz += ncpy;
1083 
1084 	return ncpy;
1085 }
1086 
1087 static int fuse_check_folio(struct folio *folio)
1088 {
1089 	if (folio_mapped(folio) ||
1090 	    folio->mapping != NULL ||
1091 	    (folio->flags.f & PAGE_FLAGS_CHECK_AT_PREP &
1092 	     ~(1 << PG_locked |
1093 	       1 << PG_referenced |
1094 	       1 << PG_lru |
1095 	       1 << PG_active |
1096 	       1 << PG_workingset |
1097 	       1 << PG_reclaim |
1098 	       1 << PG_waiters |
1099 	       LRU_GEN_MASK | LRU_REFS_MASK))) {
1100 		dump_page(&folio->page, "fuse: trying to steal weird page");
1101 		return 1;
1102 	}
1103 	return 0;
1104 }
1105 
1106 /*
1107  * Attempt to steal a page from the splice() pipe and move it into the
1108  * pagecache. If successful, the pointer in @pagep will be updated. The
1109  * folio that was originally in @pagep will lose a reference and the new
1110  * folio returned in @pagep will carry a reference.
1111  */
1112 static int fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop)
1113 {
1114 	int err;
1115 	struct folio *oldfolio = *foliop;
1116 	struct folio *newfolio;
1117 	struct pipe_buffer *buf = cs->pipebufs;
1118 
1119 	folio_get(oldfolio);
1120 	err = unlock_request(cs->req);
1121 	if (err)
1122 		goto out_put_old;
1123 
1124 	fuse_copy_finish(cs);
1125 
1126 	err = pipe_buf_confirm(cs->pipe, buf);
1127 	if (err)
1128 		goto out_put_old;
1129 
1130 	BUG_ON(!cs->nr_segs);
1131 	cs->currbuf = buf;
1132 	cs->len = buf->len;
1133 	cs->pipebufs++;
1134 	cs->nr_segs--;
1135 
1136 	if (cs->len != folio_size(oldfolio))
1137 		goto out_fallback;
1138 
1139 	if (!pipe_buf_try_steal(cs->pipe, buf))
1140 		goto out_fallback;
1141 
1142 	newfolio = page_folio(buf->page);
1143 
1144 	folio_clear_uptodate(newfolio);
1145 	folio_clear_mappedtodisk(newfolio);
1146 
1147 	if (folio_test_large(newfolio))
1148 		goto out_fallback_unlock;
1149 
1150 	if (fuse_check_folio(newfolio) != 0)
1151 		goto out_fallback_unlock;
1152 
1153 	/*
1154 	 * This is a new and locked page, it shouldn't be mapped or
1155 	 * have any special flags on it
1156 	 */
1157 	if (WARN_ON(folio_mapped(oldfolio)))
1158 		goto out_fallback_unlock;
1159 	if (WARN_ON(folio_has_private(oldfolio)))
1160 		goto out_fallback_unlock;
1161 	if (WARN_ON(folio_test_dirty(oldfolio) ||
1162 				folio_test_writeback(oldfolio)))
1163 		goto out_fallback_unlock;
1164 	if (WARN_ON(folio_test_mlocked(oldfolio)))
1165 		goto out_fallback_unlock;
1166 
1167 	err = lock_request(cs->req);
1168 	if (err)
1169 		goto out_fallback_unlock;
1170 
1171 	replace_page_cache_folio(oldfolio, newfolio);
1172 
1173 	folio_get(newfolio);
1174 
1175 	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
1176 		folio_add_lru(newfolio);
1177 
1178 	/*
1179 	 * Release while we have extra ref on stolen page.  Otherwise
1180 	 * anon_pipe_buf_release() might think the page can be reused.
1181 	 */
1182 	pipe_buf_release(cs->pipe, buf);
1183 
1184 	*foliop = newfolio;
1185 	folio_unlock(oldfolio);
1186 	/* Drop ref for ap->pages[] array */
1187 	folio_put(oldfolio);
1188 	cs->len = 0;
1189 
1190 	err = 0;
1191 out_put_old:
1192 	/* Drop ref obtained in this function */
1193 	folio_put(oldfolio);
1194 	return err;
1195 
1196 out_fallback_unlock:
1197 	folio_unlock(newfolio);
1198 out_fallback:
1199 	cs->pg = buf->page;
1200 	cs->offset = buf->offset;
1201 
1202 	err = lock_request(cs->req);
1203 	if (!err)
1204 		err = 1;
1205 
1206 	goto out_put_old;
1207 }
1208 
1209 static int fuse_ref_folio(struct fuse_copy_state *cs, struct folio *folio,
1210 			  unsigned offset, unsigned count)
1211 {
1212 	struct pipe_buffer *buf;
1213 	int err;
1214 
1215 	if (cs->nr_segs >= cs->pipe->max_usage)
1216 		return -EIO;
1217 
1218 	folio_get(folio);
1219 	err = unlock_request(cs->req);
1220 	if (err) {
1221 		folio_put(folio);
1222 		return err;
1223 	}
1224 
1225 	fuse_copy_finish(cs);
1226 
1227 	buf = cs->pipebufs;
1228 	buf->page = &folio->page;
1229 	buf->offset = offset;
1230 	buf->len = count;
1231 
1232 	cs->pipebufs++;
1233 	cs->nr_segs++;
1234 	cs->len = 0;
1235 
1236 	return lock_request(cs->req);
1237 }
1238 
1239 /*
1240  * Copy a folio in the request to/from the userspace buffer.  Must be
1241  * done atomically
1242  */
1243 int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop,
1244 		    unsigned offset, unsigned count, int zeroing)
1245 {
1246 	int err;
1247 	struct folio *folio = *foliop;
1248 	size_t size;
1249 
1250 	if (folio) {
1251 		size = folio_size(folio);
1252 		if (zeroing && count < size)
1253 			folio_zero_range(folio, 0, size);
1254 	}
1255 
1256 	while (count) {
1257 		if (cs->write && cs->pipebufs && folio) {
1258 			/*
1259 			 * Can't control lifetime of pipe buffers, so always
1260 			 * copy user pages.
1261 			 */
1262 			if (cs->req->args->user_pages) {
1263 				err = fuse_copy_fill(cs);
1264 				if (err)
1265 					return err;
1266 			} else {
1267 				return fuse_ref_folio(cs, folio, offset, count);
1268 			}
1269 		} else if (!cs->len) {
1270 			if (cs->move_folios && folio &&
1271 			    offset == 0 && count == size) {
1272 				err = fuse_try_move_folio(cs, foliop);
1273 				if (err <= 0)
1274 					return err;
1275 			} else {
1276 				err = fuse_copy_fill(cs);
1277 				if (err)
1278 					return err;
1279 			}
1280 		}
1281 		if (folio) {
1282 			void *mapaddr = kmap_local_folio(folio, offset);
1283 			void *buf = mapaddr;
1284 			unsigned int copy = count;
1285 			unsigned int bytes_copied;
1286 
1287 			if (folio_test_highmem(folio) && count > PAGE_SIZE - offset_in_page(offset))
1288 				copy = PAGE_SIZE - offset_in_page(offset);
1289 
1290 			bytes_copied = fuse_copy_do(cs, &buf, &copy);
1291 			kunmap_local(mapaddr);
1292 			offset += bytes_copied;
1293 			count -= bytes_copied;
1294 		} else
1295 			offset += fuse_copy_do(cs, NULL, &count);
1296 	}
1297 	if (folio && !cs->write)
1298 		flush_dcache_folio(folio);
1299 	return 0;
1300 }
1301 
1302 /* Copy folios in the request to/from userspace buffer */
1303 static int fuse_copy_folios(struct fuse_copy_state *cs, unsigned nbytes,
1304 			    int zeroing)
1305 {
1306 	unsigned i;
1307 	struct fuse_req *req = cs->req;
1308 	struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
1309 
1310 	for (i = 0; i < ap->num_folios && (nbytes || zeroing); i++) {
1311 		int err;
1312 		unsigned int offset = ap->descs[i].offset;
1313 		unsigned int count = min(nbytes, ap->descs[i].length);
1314 
1315 		err = fuse_copy_folio(cs, &ap->folios[i], offset, count, zeroing);
1316 		if (err)
1317 			return err;
1318 
1319 		nbytes -= count;
1320 	}
1321 	return 0;
1322 }
1323 
1324 /* Copy a single argument in the request to/from userspace buffer */
1325 int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
1326 {
1327 	while (size) {
1328 		if (!cs->len) {
1329 			int err = fuse_copy_fill(cs);
1330 			if (err)
1331 				return err;
1332 		}
1333 		fuse_copy_do(cs, &val, &size);
1334 	}
1335 	return 0;
1336 }
1337 
1338 /* Copy request arguments to/from userspace buffer */
1339 int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
1340 		   unsigned argpages, struct fuse_arg *args,
1341 		   int zeroing)
1342 {
1343 	int err = 0;
1344 	unsigned i;
1345 
1346 	for (i = 0; !err && i < numargs; i++)  {
1347 		struct fuse_arg *arg = &args[i];
1348 		if (i == numargs - 1 && argpages)
1349 			err = fuse_copy_folios(cs, arg->size, zeroing);
1350 		else
1351 			err = fuse_copy_one(cs, arg->value, arg->size);
1352 	}
1353 	return err;
1354 }
1355 
1356 static int forget_pending(struct fuse_iqueue *fiq)
1357 {
1358 	return fiq->forget_list_head.next != NULL;
1359 }
1360 
1361 static int request_pending(struct fuse_iqueue *fiq)
1362 {
1363 	return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
1364 		forget_pending(fiq);
1365 }
1366 
1367 /*
1368  * Transfer an interrupt request to userspace
1369  *
1370  * Unlike other requests this is assembled on demand, without a need
1371  * to allocate a separate fuse_req structure.
1372  *
1373  * Called with fiq->lock held, releases it
1374  */
1375 static int fuse_read_interrupt(struct fuse_iqueue *fiq, struct fuse_copy_state *cs)
1376 __releases(fiq->lock)
1377 {
1378 	struct fuse_req *req = list_first_entry(&fiq->interrupts, struct fuse_req, intr_entry);
1379 	struct fuse_interrupt_in arg = {
1380 		.unique = req->in.h.unique,
1381 	};
1382 	struct fuse_in_header ih = {
1383 		.opcode = FUSE_INTERRUPT,
1384 		.unique = (req->in.h.unique | FUSE_INT_REQ_BIT),
1385 		.len = sizeof(ih) + sizeof(arg),
1386 	};
1387 	int err;
1388 
1389 	list_del_init(&req->intr_entry);
1390 	spin_unlock(&fiq->lock);
1391 
1392 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1393 	if (!err)
1394 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1395 	fuse_copy_finish(cs);
1396 
1397 	return err ? err : ih.len;
1398 }
1399 
1400 static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
1401 						    unsigned int max,
1402 						    unsigned int *countp)
1403 {
1404 	struct fuse_forget_link *head = fiq->forget_list_head.next;
1405 	struct fuse_forget_link **newhead = &head;
1406 	unsigned count;
1407 
1408 	for (count = 0; *newhead != NULL && count < max; count++)
1409 		newhead = &(*newhead)->next;
1410 
1411 	fiq->forget_list_head.next = *newhead;
1412 	*newhead = NULL;
1413 	if (fiq->forget_list_head.next == NULL)
1414 		fiq->forget_list_tail = &fiq->forget_list_head;
1415 
1416 	if (countp != NULL)
1417 		*countp = count;
1418 
1419 	return head;
1420 }
1421 
1422 static int fuse_read_single_forget(struct fuse_iqueue *fiq,
1423 				   struct fuse_copy_state *cs)
1424 __releases(fiq->lock)
1425 {
1426 	int err;
1427 	struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL);
1428 	struct fuse_forget_in arg = {
1429 		.nlookup = forget->forget_one.nlookup,
1430 	};
1431 	struct fuse_in_header ih = {
1432 		.opcode = FUSE_FORGET,
1433 		.nodeid = forget->forget_one.nodeid,
1434 		.unique = fuse_get_unique_locked(fiq),
1435 		.len = sizeof(ih) + sizeof(arg),
1436 	};
1437 
1438 	spin_unlock(&fiq->lock);
1439 	kfree(forget);
1440 
1441 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1442 	if (!err)
1443 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1444 	fuse_copy_finish(cs);
1445 
1446 	if (err)
1447 		return err;
1448 
1449 	return ih.len;
1450 }
1451 
1452 static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
1453 				   struct fuse_copy_state *cs, size_t nbytes)
1454 __releases(fiq->lock)
1455 {
1456 	int err;
1457 	unsigned max_forgets;
1458 	unsigned count;
1459 	struct fuse_forget_link *head;
1460 	struct fuse_batch_forget_in arg = { .count = 0 };
1461 	struct fuse_in_header ih = {
1462 		.opcode = FUSE_BATCH_FORGET,
1463 		.unique = fuse_get_unique_locked(fiq),
1464 		.len = sizeof(ih) + sizeof(arg),
1465 	};
1466 
1467 	max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1468 	head = fuse_dequeue_forget(fiq, max_forgets, &count);
1469 	spin_unlock(&fiq->lock);
1470 
1471 	arg.count = count;
1472 	ih.len += count * sizeof(struct fuse_forget_one);
1473 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1474 	if (!err)
1475 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1476 
1477 	while (head) {
1478 		struct fuse_forget_link *forget = head;
1479 
1480 		if (!err) {
1481 			err = fuse_copy_one(cs, &forget->forget_one,
1482 					    sizeof(forget->forget_one));
1483 		}
1484 		head = forget->next;
1485 		kfree(forget);
1486 	}
1487 
1488 	fuse_copy_finish(cs);
1489 
1490 	if (err)
1491 		return err;
1492 
1493 	return ih.len;
1494 }
1495 
1496 static int fuse_read_forget(struct fuse_chan *fch, struct fuse_iqueue *fiq,
1497 			    struct fuse_copy_state *cs,
1498 			    size_t nbytes)
1499 __releases(fiq->lock)
1500 {
1501 	if (fch->minor < 16 || fiq->forget_list_head.next->next == NULL)
1502 		return fuse_read_single_forget(fiq, cs);
1503 	else
1504 		return fuse_read_batch_forget(fiq, cs, nbytes);
1505 }
1506 
1507 /*
1508  * Read a single request into the userspace filesystem's buffer.  This
1509  * function waits until a request is available, then removes it from
1510  * the pending list and copies request data to userspace buffer.  If
1511  * no reply is needed (FORGET) or request has been aborted or there
1512  * was an error during the copying then it's finished by calling
1513  * fuse_request_end().  Otherwise add it to the processing list, and set
1514  * the 'sent' flag.
1515  */
1516 static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
1517 				struct fuse_copy_state *cs, size_t nbytes)
1518 {
1519 	ssize_t err;
1520 	struct fuse_chan *fch = fud->chan;
1521 	struct fuse_iqueue *fiq = &fch->iq;
1522 	struct fuse_pqueue *fpq = &fud->pq;
1523 	struct fuse_req *req;
1524 	struct fuse_args *args;
1525 	unsigned reqsize;
1526 	unsigned int hash;
1527 
1528 	/*
1529 	 * Require sane minimum read buffer - that has capacity for fixed part
1530 	 * of any request header + negotiated max_write room for data.
1531 	 *
1532 	 * Historically libfuse reserves 4K for fixed header room, but e.g.
1533 	 * GlusterFS reserves only 80 bytes
1534 	 *
1535 	 *	= `sizeof(fuse_in_header) + sizeof(fuse_write_in)`
1536 	 *
1537 	 * which is the absolute minimum any sane filesystem should be using
1538 	 * for header room.
1539 	 */
1540 	if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER,
1541 			   sizeof(struct fuse_in_header) +
1542 			   sizeof(struct fuse_write_in) +
1543 			   fch->max_write))
1544 		return -EINVAL;
1545 
1546  restart:
1547 	for (;;) {
1548 		spin_lock(&fiq->lock);
1549 		if (!fiq->connected || request_pending(fiq))
1550 			break;
1551 		spin_unlock(&fiq->lock);
1552 
1553 		if (file->f_flags & O_NONBLOCK)
1554 			return -EAGAIN;
1555 		err = wait_event_interruptible_exclusive(fiq->waitq,
1556 				!fiq->connected || request_pending(fiq));
1557 		if (err)
1558 			return err;
1559 	}
1560 
1561 	if (!fiq->connected) {
1562 		err = fch->abort_with_err ? -ECONNABORTED : -ENODEV;
1563 		goto err_unlock;
1564 	}
1565 
1566 	if (!list_empty(&fiq->interrupts))
1567 		return fuse_read_interrupt(fiq, cs);
1568 
1569 	if (forget_pending(fiq)) {
1570 		if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
1571 			return fuse_read_forget(fch, fiq, cs, nbytes);
1572 
1573 		if (fiq->forget_batch <= -8)
1574 			fiq->forget_batch = 16;
1575 	}
1576 
1577 	req = list_entry(fiq->pending.next, struct fuse_req, list);
1578 	clear_bit(FR_PENDING, &req->flags);
1579 	list_del_init(&req->list);
1580 	spin_unlock(&fiq->lock);
1581 
1582 	args = req->args;
1583 	reqsize = req->in.h.len;
1584 
1585 	/* If request is too large, reply with an error and restart the read */
1586 	if (nbytes < reqsize) {
1587 		req->out.h.error = -EIO;
1588 		/* SETXATTR is special, since it may contain too large data */
1589 		if (args->opcode == FUSE_SETXATTR)
1590 			req->out.h.error = -E2BIG;
1591 		fuse_request_end(req);
1592 		goto restart;
1593 	}
1594 	spin_lock(&fpq->lock);
1595 	/*
1596 	 *  Must not put request on fpq->io queue after having been shut down by
1597 	 *  fuse_chan_abort()
1598 	 */
1599 	if (!fpq->connected) {
1600 		req->out.h.error = err = -ECONNABORTED;
1601 		goto out_end;
1602 	}
1603 	list_add(&req->list, &fpq->io);
1604 	spin_unlock(&fpq->lock);
1605 	cs->req = req;
1606 	err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
1607 	if (!err)
1608 		err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
1609 				     (struct fuse_arg *) args->in_args, 0);
1610 	fuse_copy_finish(cs);
1611 	spin_lock(&fpq->lock);
1612 	clear_bit(FR_LOCKED, &req->flags);
1613 	if (!fpq->connected) {
1614 		err = fch->abort_with_err ? -ECONNABORTED : -ENODEV;
1615 		goto out_end;
1616 	}
1617 	if (err) {
1618 		req->out.h.error = -EIO;
1619 		goto out_end;
1620 	}
1621 	if (!test_bit(FR_ISREPLY, &req->flags)) {
1622 		err = reqsize;
1623 		goto out_end;
1624 	}
1625 	hash = fuse_req_hash(req->in.h.unique);
1626 	list_move_tail(&req->list, &fpq->processing[hash]);
1627 	__fuse_get_request(req);
1628 	set_bit(FR_SENT, &req->flags);
1629 	trace_fuse_request_sent(req);
1630 	spin_unlock(&fpq->lock);
1631 	/* matches barrier in request_wait_answer() */
1632 	smp_mb__after_atomic();
1633 	if (test_bit(FR_INTERRUPTED, &req->flags))
1634 		queue_interrupt(req);
1635 	fuse_put_request(req);
1636 
1637 	return reqsize;
1638 
1639 out_end:
1640 	if (!test_bit(FR_PRIVATE, &req->flags))
1641 		list_del_init(&req->list);
1642 	spin_unlock(&fpq->lock);
1643 	fuse_request_end(req);
1644 	return err;
1645 
1646  err_unlock:
1647 	spin_unlock(&fiq->lock);
1648 	return err;
1649 }
1650 
1651 static int fuse_dev_open(struct inode *inode, struct file *file)
1652 {
1653 	struct fuse_dev *fud = fuse_dev_alloc_no_pq();
1654 
1655 	if (!fud)
1656 		return -ENOMEM;
1657 
1658 	file->private_data = fud;
1659 	return 0;
1660 }
1661 
1662 struct fuse_dev *fuse_get_dev(struct file *file)
1663 {
1664 	struct fuse_dev *fud = fuse_file_to_fud(file);
1665 	int err;
1666 
1667 	if (unlikely(!fuse_dev_chan_get(fud))) {
1668 		/* only block waiting for mount if sync init was requested */
1669 		if (!fud->sync_init)
1670 			return ERR_PTR(-EPERM);
1671 
1672 		err = wait_event_interruptible(fuse_dev_waitq, fuse_dev_chan_get(fud) != NULL);
1673 		if (err)
1674 			return ERR_PTR(err);
1675 	}
1676 
1677 	return fud;
1678 }
1679 
1680 static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1681 {
1682 	struct fuse_copy_state cs;
1683 	struct file *file = iocb->ki_filp;
1684 	struct fuse_dev *fud = fuse_get_dev(file);
1685 
1686 	if (IS_ERR(fud))
1687 		return PTR_ERR(fud);
1688 
1689 	if (!user_backed_iter(to))
1690 		return -EINVAL;
1691 
1692 	fuse_copy_init(&cs, true, to);
1693 
1694 	return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
1695 }
1696 
1697 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1698 				    struct pipe_inode_info *pipe,
1699 				    size_t len, unsigned int flags)
1700 {
1701 	int total, ret;
1702 	int page_nr = 0;
1703 	struct pipe_buffer *bufs;
1704 	struct fuse_copy_state cs;
1705 	struct fuse_dev *fud = fuse_get_dev(in);
1706 
1707 	if (IS_ERR(fud))
1708 		return PTR_ERR(fud);
1709 
1710 	bufs = kvmalloc_objs(struct pipe_buffer, pipe->max_usage);
1711 	if (!bufs)
1712 		return -ENOMEM;
1713 
1714 	fuse_copy_init(&cs, true, NULL);
1715 	cs.pipebufs = bufs;
1716 	cs.pipe = pipe;
1717 	ret = fuse_dev_do_read(fud, in, &cs, len);
1718 	if (ret < 0)
1719 		goto out;
1720 
1721 	if (pipe_buf_usage(pipe) + cs.nr_segs > pipe->max_usage) {
1722 		ret = -EIO;
1723 		goto out;
1724 	}
1725 
1726 	for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
1727 		/*
1728 		 * Need to be careful about this.  Having buf->ops in module
1729 		 * code can Oops if the buffer persists after module unload.
1730 		 */
1731 		bufs[page_nr].ops = &nosteal_pipe_buf_ops;
1732 		bufs[page_nr].flags = 0;
1733 		ret = add_to_pipe(pipe, &bufs[page_nr++]);
1734 		if (unlikely(ret < 0))
1735 			break;
1736 	}
1737 	if (total)
1738 		ret = total;
1739 out:
1740 	for (; page_nr < cs.nr_segs; page_nr++)
1741 		put_page(bufs[page_nr].page);
1742 
1743 	kvfree(bufs);
1744 	return ret;
1745 }
1746 
1747 /*
1748  * Resending all processing queue requests.
1749  *
1750  * During a FUSE daemon panics and failover, it is possible for some inflight
1751  * requests to be lost and never returned. As a result, applications awaiting
1752  * replies would become stuck forever. To address this, we can use notification
1753  * to trigger resending of these pending requests to the FUSE daemon, ensuring
1754  * they are properly processed again.
1755  *
1756  * Please note that this strategy is applicable only to idempotent requests or
1757  * if the FUSE daemon takes careful measures to avoid processing duplicated
1758  * non-idempotent requests.
1759  */
1760 void fuse_chan_resend(struct fuse_chan *fch)
1761 {
1762 	struct fuse_dev *fud;
1763 	struct fuse_req *req, *next;
1764 	struct fuse_iqueue *fiq = &fch->iq;
1765 	LIST_HEAD(to_queue);
1766 	unsigned int i;
1767 
1768 	spin_lock(&fch->lock);
1769 	if (!fch->connected) {
1770 		spin_unlock(&fch->lock);
1771 		return;
1772 	}
1773 
1774 	list_for_each_entry(fud, &fch->devices, entry) {
1775 		struct fuse_pqueue *fpq = &fud->pq;
1776 
1777 		spin_lock(&fpq->lock);
1778 		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
1779 			list_splice_tail_init(&fpq->processing[i], &to_queue);
1780 		spin_unlock(&fpq->lock);
1781 	}
1782 	spin_unlock(&fch->lock);
1783 
1784 	list_for_each_entry_safe(req, next, &to_queue, list) {
1785 		set_bit(FR_PENDING, &req->flags);
1786 		clear_bit(FR_SENT, &req->flags);
1787 		/* mark the request as resend request */
1788 		req->in.h.unique |= FUSE_UNIQUE_RESEND;
1789 	}
1790 
1791 	spin_lock(&fiq->lock);
1792 	if (!fiq->connected) {
1793 		spin_unlock(&fiq->lock);
1794 		list_for_each_entry(req, &to_queue, list)
1795 			clear_bit(FR_PENDING, &req->flags);
1796 		fuse_dev_end_requests(&to_queue);
1797 		return;
1798 	}
1799 	/*
1800 	 * Remove interrupt entries for resent requests to prevent stale
1801 	 * intr_entry on fiq->interrupts after the request is re-queued.
1802 	 */
1803 	list_for_each_entry(req, &to_queue, list) {
1804 		if (test_bit(FR_INTERRUPTED, &req->flags))
1805 			list_del_init(&req->intr_entry);
1806 	}
1807 	/* iq and pq requests are both oldest to newest */
1808 	list_splice(&to_queue, &fiq->pending);
1809 	fuse_dev_wake_and_unlock(fiq);
1810 }
1811 
1812 /* Look up request on processing list by unique ID */
1813 struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique)
1814 {
1815 	unsigned int hash = fuse_req_hash(unique);
1816 	struct fuse_req *req;
1817 
1818 	list_for_each_entry(req, &fpq->processing[hash], list) {
1819 		if (req->in.h.unique == unique)
1820 			return req;
1821 	}
1822 	return NULL;
1823 }
1824 
1825 int fuse_copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
1826 		       unsigned nbytes)
1827 {
1828 
1829 	unsigned int reqsize = 0;
1830 
1831 	/*
1832 	 * Uring has all headers separated from args - args is payload only
1833 	 */
1834 	if (!cs->is_uring)
1835 		reqsize = sizeof(struct fuse_out_header);
1836 
1837 	reqsize += fuse_len_args(args->out_numargs, args->out_args);
1838 
1839 	if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar))
1840 		return -EINVAL;
1841 	else if (reqsize > nbytes) {
1842 		struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1];
1843 		unsigned diffsize = reqsize - nbytes;
1844 
1845 		if (diffsize > lastarg->size)
1846 			return -EINVAL;
1847 		lastarg->size -= diffsize;
1848 	}
1849 	return fuse_copy_args(cs, args->out_numargs, args->out_pages,
1850 			      args->out_args, args->page_zeroing);
1851 }
1852 
1853 /*
1854  * Write a single reply to a request.  First the header is copied from
1855  * the write buffer.  The request is then searched on the processing
1856  * list by the unique ID found in the header.  If found, then remove
1857  * it from the list and copy the rest of the buffer to the request.
1858  * The request is finished by calling fuse_request_end().
1859  */
1860 static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
1861 				 struct fuse_copy_state *cs, size_t nbytes)
1862 {
1863 	int err;
1864 	struct fuse_chan *fch = fud->chan;
1865 	struct fuse_pqueue *fpq = &fud->pq;
1866 	struct fuse_req *req;
1867 	struct fuse_out_header oh;
1868 
1869 	err = -EINVAL;
1870 	if (nbytes < sizeof(struct fuse_out_header))
1871 		goto out;
1872 
1873 	err = fuse_copy_one(cs, &oh, sizeof(oh));
1874 	if (err)
1875 		goto copy_finish;
1876 
1877 	err = -EINVAL;
1878 	if (oh.len != nbytes)
1879 		goto copy_finish;
1880 
1881 	/*
1882 	 * Zero oh.unique indicates unsolicited notification message
1883 	 * and error contains notification code.
1884 	 */
1885 	if (!oh.unique) {
1886 		/*
1887 		 * Only allow notifications during while the connection is in an
1888 		 * initialized and connected state
1889 		 */
1890 		err = -EINVAL;
1891 		if (!fch->initialized || !fch->connected)
1892 			goto copy_finish;
1893 
1894 		/* Don't try to move folios (yet) */
1895 		cs->move_folios = false;
1896 
1897 		err = fuse_notify(fch->conn, oh.error, nbytes - sizeof(oh), cs);
1898 		goto copy_finish;
1899 	}
1900 
1901 	err = -EINVAL;
1902 	if (oh.error <= -512 || oh.error > 0)
1903 		goto copy_finish;
1904 
1905 	spin_lock(&fpq->lock);
1906 	req = NULL;
1907 	if (fpq->connected)
1908 		req = fuse_request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
1909 
1910 	err = -ENOENT;
1911 	if (!req) {
1912 		spin_unlock(&fpq->lock);
1913 		goto copy_finish;
1914 	}
1915 
1916 	/* Is it an interrupt reply ID? */
1917 	if (oh.unique & FUSE_INT_REQ_BIT) {
1918 		__fuse_get_request(req);
1919 		spin_unlock(&fpq->lock);
1920 
1921 		err = 0;
1922 		if (nbytes != sizeof(struct fuse_out_header))
1923 			err = -EINVAL;
1924 		else if (oh.error == -ENOSYS)
1925 			fch->no_interrupt = 1;
1926 		else if (oh.error == -EAGAIN)
1927 			err = queue_interrupt(req);
1928 
1929 		fuse_put_request(req);
1930 
1931 		goto copy_finish;
1932 	}
1933 
1934 	clear_bit(FR_SENT, &req->flags);
1935 	list_move(&req->list, &fpq->io);
1936 	req->out.h = oh;
1937 	set_bit(FR_LOCKED, &req->flags);
1938 	spin_unlock(&fpq->lock);
1939 	cs->req = req;
1940 	if (!req->args->page_replace)
1941 		cs->move_folios = false;
1942 
1943 	if (oh.error)
1944 		err = nbytes != sizeof(oh) ? -EINVAL : 0;
1945 	else
1946 		err = fuse_copy_out_args(cs, req->args, nbytes);
1947 	fuse_copy_finish(cs);
1948 
1949 	spin_lock(&fpq->lock);
1950 	clear_bit(FR_LOCKED, &req->flags);
1951 	if (!fpq->connected)
1952 		err = -ENOENT;
1953 	else if (err)
1954 		req->out.h.error = -EIO;
1955 	if (!test_bit(FR_PRIVATE, &req->flags))
1956 		list_del_init(&req->list);
1957 	spin_unlock(&fpq->lock);
1958 
1959 	fuse_request_end(req);
1960 out:
1961 	return err ? err : nbytes;
1962 
1963 copy_finish:
1964 	fuse_copy_finish(cs);
1965 	goto out;
1966 }
1967 
1968 static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
1969 {
1970 	struct fuse_copy_state cs;
1971 	struct fuse_dev *fud = __fuse_get_dev(iocb->ki_filp);
1972 
1973 	if (!fud)
1974 		return -EPERM;
1975 
1976 	if (!user_backed_iter(from))
1977 		return -EINVAL;
1978 
1979 	fuse_copy_init(&cs, false, from);
1980 
1981 	return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
1982 }
1983 
1984 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1985 				     struct file *out, loff_t *ppos,
1986 				     size_t len, unsigned int flags)
1987 {
1988 	unsigned int head, tail, count;
1989 	unsigned nbuf;
1990 	unsigned idx;
1991 	struct pipe_buffer *bufs;
1992 	struct fuse_copy_state cs;
1993 	struct fuse_dev *fud = __fuse_get_dev(out);
1994 	size_t rem;
1995 	ssize_t ret;
1996 
1997 	if (!fud)
1998 		return -EPERM;
1999 
2000 	pipe_lock(pipe);
2001 
2002 	head = pipe->head;
2003 	tail = pipe->tail;
2004 	count = pipe_occupancy(head, tail);
2005 
2006 	bufs = kvmalloc_objs(struct pipe_buffer, count);
2007 	if (!bufs) {
2008 		pipe_unlock(pipe);
2009 		return -ENOMEM;
2010 	}
2011 
2012 	nbuf = 0;
2013 	rem = 0;
2014 	for (idx = tail; !pipe_empty(head, idx) && rem < len; idx++)
2015 		rem += pipe_buf(pipe, idx)->len;
2016 
2017 	ret = -EINVAL;
2018 	if (rem < len)
2019 		goto out_free;
2020 
2021 	rem = len;
2022 	while (rem) {
2023 		struct pipe_buffer *ibuf;
2024 		struct pipe_buffer *obuf;
2025 
2026 		if (WARN_ON(nbuf >= count || pipe_empty(head, tail)))
2027 			goto out_free;
2028 
2029 		ibuf = pipe_buf(pipe, tail);
2030 		obuf = &bufs[nbuf];
2031 
2032 		if (rem >= ibuf->len) {
2033 			*obuf = *ibuf;
2034 			ibuf->ops = NULL;
2035 			tail++;
2036 			pipe->tail = tail;
2037 		} else {
2038 			if (!pipe_buf_get(pipe, ibuf))
2039 				goto out_free;
2040 
2041 			*obuf = *ibuf;
2042 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
2043 			obuf->len = rem;
2044 			ibuf->offset += obuf->len;
2045 			ibuf->len -= obuf->len;
2046 		}
2047 		nbuf++;
2048 		rem -= obuf->len;
2049 	}
2050 	pipe_unlock(pipe);
2051 
2052 	fuse_copy_init(&cs, false, NULL);
2053 	cs.pipebufs = bufs;
2054 	cs.nr_segs = nbuf;
2055 	cs.pipe = pipe;
2056 
2057 	if (flags & SPLICE_F_MOVE)
2058 		cs.move_folios = true;
2059 
2060 	ret = fuse_dev_do_write(fud, &cs, len);
2061 
2062 	pipe_lock(pipe);
2063 out_free:
2064 	for (idx = 0; idx < nbuf; idx++) {
2065 		struct pipe_buffer *buf = &bufs[idx];
2066 
2067 		if (buf->ops)
2068 			pipe_buf_release(pipe, buf);
2069 	}
2070 	pipe_unlock(pipe);
2071 
2072 	kvfree(bufs);
2073 	return ret;
2074 }
2075 
2076 static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
2077 {
2078 	__poll_t mask = EPOLLOUT | EPOLLWRNORM;
2079 	struct fuse_iqueue *fiq;
2080 	struct fuse_dev *fud = fuse_get_dev(file);
2081 
2082 	if (IS_ERR(fud))
2083 		return EPOLLERR;
2084 
2085 	fiq = &fud->chan->iq;
2086 	poll_wait(file, &fiq->waitq, wait);
2087 
2088 	spin_lock(&fiq->lock);
2089 	if (!fiq->connected)
2090 		mask = EPOLLERR;
2091 	else if (request_pending(fiq))
2092 		mask |= EPOLLIN | EPOLLRDNORM;
2093 	spin_unlock(&fiq->lock);
2094 
2095 	return mask;
2096 }
2097 
2098 /* Abort all requests on the given list (pending or processing) */
2099 void fuse_dev_end_requests(struct list_head *head)
2100 {
2101 	while (!list_empty(head)) {
2102 		struct fuse_req *req;
2103 		req = list_entry(head->next, struct fuse_req, list);
2104 		req->out.h.error = -ECONNABORTED;
2105 		clear_bit(FR_SENT, &req->flags);
2106 		list_del_init(&req->list);
2107 		fuse_request_end(req);
2108 	}
2109 }
2110 
2111 /*
2112  * Abort all requests.
2113  *
2114  * Emergency exit in case of a malicious or accidental deadlock, or just a hung
2115  * filesystem.
2116  *
2117  * The same effect is usually achievable through killing the filesystem daemon
2118  * and all users of the filesystem.  The exception is the combination of an
2119  * asynchronous request and the tricky deadlock (see
2120  * Documentation/filesystems/fuse/fuse.rst).
2121  *
2122  * Aborting requests under I/O goes as follows: 1: Separate out unlocked
2123  * requests, they should be finished off immediately.  Locked requests will be
2124  * finished after unlock; see unlock_request(). 2: Finish off the unlocked
2125  * requests.  It is possible that some request will finish before we can.  This
2126  * is OK, the request will in that case be removed from the list before we touch
2127  * it.
2128  */
2129 void fuse_chan_abort(struct fuse_chan *fch, bool abort_with_err)
2130 {
2131 	struct fuse_iqueue *fiq = &fch->iq;
2132 
2133 	fch->abort_with_err = abort_with_err;
2134 
2135 	spin_lock(&fch->lock);
2136 	if (fch->connected) {
2137 		struct fuse_dev *fud;
2138 		struct fuse_req *req, *next;
2139 		LIST_HEAD(to_end);
2140 		unsigned int i;
2141 
2142 		if (fch->timeout.req_timeout)
2143 			cancel_delayed_work(&fch->timeout.work);
2144 
2145 		/* Background queuing checks fch->connected under bg_lock */
2146 		spin_lock(&fch->bg_lock);
2147 		fch->connected = 0;
2148 		spin_unlock(&fch->bg_lock);
2149 
2150 		fuse_chan_set_initialized(fch, NULL);
2151 		list_for_each_entry(fud, &fch->devices, entry) {
2152 			struct fuse_pqueue *fpq = &fud->pq;
2153 
2154 			spin_lock(&fpq->lock);
2155 			fpq->connected = 0;
2156 			list_for_each_entry_safe(req, next, &fpq->io, list) {
2157 				req->out.h.error = -ECONNABORTED;
2158 				spin_lock(&req->waitq.lock);
2159 				set_bit(FR_ABORTED, &req->flags);
2160 				if (!test_bit(FR_LOCKED, &req->flags)) {
2161 					set_bit(FR_PRIVATE, &req->flags);
2162 					__fuse_get_request(req);
2163 					list_move(&req->list, &to_end);
2164 				}
2165 				spin_unlock(&req->waitq.lock);
2166 			}
2167 			for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2168 				list_splice_tail_init(&fpq->processing[i],
2169 						      &to_end);
2170 			spin_unlock(&fpq->lock);
2171 		}
2172 		spin_lock(&fch->bg_lock);
2173 		fch->blocked = 0;
2174 		fch->max_background = UINT_MAX;
2175 		flush_bg_queue(fch);
2176 		spin_unlock(&fch->bg_lock);
2177 
2178 		spin_lock(&fiq->lock);
2179 		fiq->connected = 0;
2180 		list_for_each_entry(req, &fiq->pending, list)
2181 			clear_bit(FR_PENDING, &req->flags);
2182 		list_splice_tail_init(&fiq->pending, &to_end);
2183 		while (forget_pending(fiq))
2184 			kfree(fuse_dequeue_forget(fiq, 1, NULL));
2185 		wake_up_all(&fiq->waitq);
2186 		spin_unlock(&fiq->lock);
2187 		kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
2188 		fuse_end_polls(fch->conn);
2189 		wake_up_all(&fch->blocked_waitq);
2190 		spin_unlock(&fch->lock);
2191 
2192 		fuse_dev_end_requests(&to_end);
2193 
2194 		/*
2195 		 * fch->lock must not be taken to avoid conflicts with io-uring
2196 		 * locks
2197 		 */
2198 		fuse_uring_abort(fch);
2199 	} else {
2200 		spin_unlock(&fch->lock);
2201 	}
2202 }
2203 EXPORT_SYMBOL_GPL(fuse_chan_abort);
2204 
2205 void fuse_chan_wait_aborted(struct fuse_chan *fch)
2206 {
2207 	/* matches implicit memory barrier in fuse_drop_waiting() */
2208 	smp_mb();
2209 	wait_event(fch->blocked_waitq, fuse_chan_num_waiting(fch) == 0);
2210 
2211 	fuse_uring_wait_stopped_queues(fch);
2212 }
2213 
2214 int fuse_dev_release(struct inode *inode, struct file *file)
2215 {
2216 	struct fuse_dev *fud = fuse_file_to_fud(file);
2217 	/* Pairs with cmpxchg() in fuse_dev_install() */
2218 	struct fuse_chan *fch = xchg(&fud->chan, FUSE_DEV_CHAN_DISCONNECTED);
2219 
2220 	if (fch) {
2221 		struct fuse_pqueue *fpq = &fud->pq;
2222 		LIST_HEAD(to_end);
2223 		unsigned int i;
2224 		bool last;
2225 
2226 		/* Make sure fuse_dev_install_with_pq() has finished */
2227 		spin_lock(&fch->lock);
2228 		spin_lock(&fpq->lock);
2229 		WARN_ON(!list_empty(&fpq->io));
2230 		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2231 			list_splice_init(&fpq->processing[i], &to_end);
2232 		spin_unlock(&fpq->lock);
2233 
2234 		list_del(&fud->entry);
2235 		/* Are we the last open device? */
2236 		last = list_empty(&fch->devices);
2237 		spin_unlock(&fch->lock);
2238 
2239 		fuse_dev_end_requests(&to_end);
2240 
2241 		if (last) {
2242 			WARN_ON(fch->iq.fasync != NULL);
2243 			fuse_chan_abort(fch, false);
2244 		}
2245 		fuse_conn_put(fch->conn);
2246 	}
2247 	fuse_dev_put(fud);
2248 	return 0;
2249 }
2250 EXPORT_SYMBOL_GPL(fuse_dev_release);
2251 
2252 static int fuse_dev_fasync(int fd, struct file *file, int on)
2253 {
2254 	struct fuse_dev *fud = fuse_get_dev(file);
2255 
2256 	if (IS_ERR(fud))
2257 		return PTR_ERR(fud);
2258 
2259 	/* No locking - fasync_helper does its own locking */
2260 	return fasync_helper(fd, file, on, &fud->chan->iq.fasync);
2261 }
2262 
2263 static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
2264 {
2265 	int oldfd;
2266 	struct fuse_dev *fud, *new_fud;
2267 	struct list_head *pq;
2268 
2269 	if (get_user(oldfd, argp))
2270 		return -EFAULT;
2271 
2272 	CLASS(fd, f)(oldfd);
2273 	if (fd_empty(f))
2274 		return -EINVAL;
2275 
2276 	/*
2277 	 * Check against file->f_op because CUSE
2278 	 * uses the same ioctl handler.
2279 	 */
2280 	if (fd_file(f)->f_op != file->f_op)
2281 		return -EINVAL;
2282 
2283 	fud = fuse_get_dev(fd_file(f));
2284 	if (IS_ERR(fud))
2285 		return PTR_ERR(fud);
2286 
2287 	pq = fuse_pqueue_alloc();
2288 	if (!pq)
2289 		return -ENOMEM;
2290 
2291 	new_fud = fuse_file_to_fud(file);
2292 	if (!fuse_dev_install_with_pq(new_fud, fud->chan, pq))
2293 		return -EINVAL;
2294 
2295 	return 0;
2296 }
2297 
2298 static long fuse_dev_ioctl_backing_open(struct file *file,
2299 					struct fuse_backing_map __user *argp)
2300 {
2301 	struct fuse_dev *fud = fuse_get_dev(file);
2302 	struct fuse_backing_map map;
2303 
2304 	if (IS_ERR(fud))
2305 		return PTR_ERR(fud);
2306 
2307 	if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
2308 		return -EOPNOTSUPP;
2309 
2310 	if (copy_from_user(&map, argp, sizeof(map)))
2311 		return -EFAULT;
2312 
2313 	return fuse_backing_open(fud->chan->conn, &map);
2314 }
2315 
2316 static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
2317 {
2318 	struct fuse_dev *fud = fuse_get_dev(file);
2319 	int backing_id;
2320 
2321 	if (IS_ERR(fud))
2322 		return PTR_ERR(fud);
2323 
2324 	if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
2325 		return -EOPNOTSUPP;
2326 
2327 	if (get_user(backing_id, argp))
2328 		return -EFAULT;
2329 
2330 	return fuse_backing_close(fud->chan->conn, backing_id);
2331 }
2332 
2333 static long fuse_dev_ioctl_sync_init(struct file *file)
2334 {
2335 	struct fuse_dev *fud = fuse_file_to_fud(file);
2336 
2337 	if (fuse_dev_chan_get(fud))
2338 		return -EINVAL;
2339 
2340 	fud->sync_init = true;
2341 	return 0;
2342 }
2343 
2344 static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2345 			   unsigned long arg)
2346 {
2347 	void __user *argp = (void __user *)arg;
2348 
2349 	switch (cmd) {
2350 	case FUSE_DEV_IOC_CLONE:
2351 		return fuse_dev_ioctl_clone(file, argp);
2352 
2353 	case FUSE_DEV_IOC_BACKING_OPEN:
2354 		return fuse_dev_ioctl_backing_open(file, argp);
2355 
2356 	case FUSE_DEV_IOC_BACKING_CLOSE:
2357 		return fuse_dev_ioctl_backing_close(file, argp);
2358 
2359 	case FUSE_DEV_IOC_SYNC_INIT:
2360 		return fuse_dev_ioctl_sync_init(file);
2361 
2362 	default:
2363 		return -ENOTTY;
2364 	}
2365 }
2366 
2367 #ifdef CONFIG_PROC_FS
2368 static void fuse_dev_show_fdinfo(struct seq_file *seq, struct file *file)
2369 {
2370 	struct fuse_dev *fud = __fuse_get_dev(file);
2371 	if (!fud)
2372 		return;
2373 
2374 	seq_printf(seq, "fuse_connection:\t%u\n", fuse_conn_get_id(fud->chan->conn));
2375 }
2376 #endif
2377 
2378 const struct file_operations fuse_dev_operations = {
2379 	.owner		= THIS_MODULE,
2380 	.open		= fuse_dev_open,
2381 	.read_iter	= fuse_dev_read,
2382 	.splice_read	= fuse_dev_splice_read,
2383 	.write_iter	= fuse_dev_write,
2384 	.splice_write	= fuse_dev_splice_write,
2385 	.poll		= fuse_dev_poll,
2386 	.release	= fuse_dev_release,
2387 	.fasync		= fuse_dev_fasync,
2388 	.unlocked_ioctl = fuse_dev_ioctl,
2389 	.compat_ioctl   = compat_ptr_ioctl,
2390 #ifdef CONFIG_FUSE_IO_URING
2391 	.uring_cmd	= fuse_uring_cmd,
2392 #endif
2393 #ifdef CONFIG_PROC_FS
2394 	.show_fdinfo	= fuse_dev_show_fdinfo,
2395 #endif
2396 };
2397 EXPORT_SYMBOL_GPL(fuse_dev_operations);
2398 
2399 static struct miscdevice fuse_miscdevice = {
2400 	.minor = FUSE_MINOR,
2401 	.name  = "fuse",
2402 	.fops = &fuse_dev_operations,
2403 };
2404 
2405 int __init fuse_dev_init(void)
2406 {
2407 	int err = -ENOMEM;
2408 	fuse_req_cachep = kmem_cache_create("fuse_request",
2409 					    sizeof(struct fuse_req),
2410 					    0, 0, NULL);
2411 	if (!fuse_req_cachep)
2412 		goto out;
2413 
2414 	err = misc_register(&fuse_miscdevice);
2415 	if (err)
2416 		goto out_cache_clean;
2417 
2418 	return 0;
2419 
2420  out_cache_clean:
2421 	kmem_cache_destroy(fuse_req_cachep);
2422  out:
2423 	return err;
2424 }
2425 
2426 void fuse_dev_cleanup(void)
2427 {
2428 	misc_deregister(&fuse_miscdevice);
2429 	kmem_cache_destroy(fuse_req_cachep);
2430 }
2431