xref: /linux/fs/fuse/dev.c (revision 6238729bfce13f94b701766996a5d116d2df8bff)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "dev_uring_i.h"
10 #include "fuse_i.h"
11 #include "fuse_dev_i.h"
12 
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/poll.h>
16 #include <linux/sched/signal.h>
17 #include <linux/uio.h>
18 #include <linux/miscdevice.h>
19 #include <linux/pagemap.h>
20 #include <linux/file.h>
21 #include <linux/slab.h>
22 #include <linux/pipe_fs_i.h>
23 #include <linux/swap.h>
24 #include <linux/splice.h>
25 #include <linux/sched.h>
26 #include <linux/seq_file.h>
27 
28 #include "fuse_trace.h"
29 
30 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
31 MODULE_ALIAS("devname:fuse");
32 
33 static struct kmem_cache *fuse_req_cachep;
34 
35 const unsigned long fuse_timeout_timer_freq =
36 	secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ);
37 
fuse_request_expired(struct fuse_conn * fc,struct list_head * list)38 bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list)
39 {
40 	struct fuse_req *req;
41 
42 	req = list_first_entry_or_null(list, struct fuse_req, list);
43 	if (!req)
44 		return false;
45 	return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout);
46 }
47 
fuse_fpq_processing_expired(struct fuse_conn * fc,struct list_head * processing)48 static bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing)
49 {
50 	int i;
51 
52 	for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
53 		if (fuse_request_expired(fc, &processing[i]))
54 			return true;
55 
56 	return false;
57 }
58 
59 /*
60  * Check if any requests aren't being completed by the time the request timeout
61  * elapses. To do so, we:
62  * - check the fiq pending list
63  * - check the bg queue
64  * - check the fpq io and processing lists
65  *
66  * To make this fast, we only check against the head request on each list since
67  * these are generally queued in order of creation time (eg newer requests get
68  * queued to the tail). We might miss a few edge cases (eg requests transitioning
69  * between lists, re-sent requests at the head of the pending list having a
70  * later creation time than other requests on that list, etc.) but that is fine
71  * since if the request never gets fulfilled, it will eventually be caught.
72  */
fuse_check_timeout(struct work_struct * work)73 void fuse_check_timeout(struct work_struct *work)
74 {
75 	struct delayed_work *dwork = to_delayed_work(work);
76 	struct fuse_conn *fc = container_of(dwork, struct fuse_conn,
77 					    timeout.work);
78 	struct fuse_iqueue *fiq = &fc->iq;
79 	struct fuse_dev *fud;
80 	struct fuse_pqueue *fpq;
81 	bool expired = false;
82 
83 	if (!atomic_read(&fc->num_waiting))
84 	    goto out;
85 
86 	spin_lock(&fiq->lock);
87 	expired = fuse_request_expired(fc, &fiq->pending);
88 	spin_unlock(&fiq->lock);
89 	if (expired)
90 		goto abort_conn;
91 
92 	spin_lock(&fc->bg_lock);
93 	expired = fuse_request_expired(fc, &fc->bg_queue);
94 	spin_unlock(&fc->bg_lock);
95 	if (expired)
96 		goto abort_conn;
97 
98 	spin_lock(&fc->lock);
99 	if (!fc->connected) {
100 		spin_unlock(&fc->lock);
101 		return;
102 	}
103 	list_for_each_entry(fud, &fc->devices, entry) {
104 		fpq = &fud->pq;
105 		spin_lock(&fpq->lock);
106 		if (fuse_request_expired(fc, &fpq->io) ||
107 		    fuse_fpq_processing_expired(fc, fpq->processing)) {
108 			spin_unlock(&fpq->lock);
109 			spin_unlock(&fc->lock);
110 			goto abort_conn;
111 		}
112 
113 		spin_unlock(&fpq->lock);
114 	}
115 	spin_unlock(&fc->lock);
116 
117 	if (fuse_uring_request_expired(fc))
118 	    goto abort_conn;
119 
120 out:
121 	queue_delayed_work(system_percpu_wq, &fc->timeout.work,
122 			   fuse_timeout_timer_freq);
123 	return;
124 
125 abort_conn:
126 	fuse_abort_conn(fc);
127 }
128 
fuse_request_init(struct fuse_mount * fm,struct fuse_req * req)129 static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
130 {
131 	INIT_LIST_HEAD(&req->list);
132 	INIT_LIST_HEAD(&req->intr_entry);
133 	init_waitqueue_head(&req->waitq);
134 	refcount_set(&req->count, 1);
135 	__set_bit(FR_PENDING, &req->flags);
136 	req->fm = fm;
137 	req->create_time = jiffies;
138 }
139 
fuse_request_alloc(struct fuse_mount * fm,gfp_t flags)140 static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
141 {
142 	struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
143 	if (req)
144 		fuse_request_init(fm, req);
145 
146 	return req;
147 }
148 
fuse_request_free(struct fuse_req * req)149 static void fuse_request_free(struct fuse_req *req)
150 {
151 	kmem_cache_free(fuse_req_cachep, req);
152 }
153 
__fuse_get_request(struct fuse_req * req)154 static void __fuse_get_request(struct fuse_req *req)
155 {
156 	refcount_inc(&req->count);
157 }
158 
159 /* Must be called with > 1 refcount */
__fuse_put_request(struct fuse_req * req)160 static void __fuse_put_request(struct fuse_req *req)
161 {
162 	refcount_dec(&req->count);
163 }
164 
fuse_set_initialized(struct fuse_conn * fc)165 void fuse_set_initialized(struct fuse_conn *fc)
166 {
167 	/* Make sure stores before this are seen on another CPU */
168 	smp_wmb();
169 	fc->initialized = 1;
170 }
171 
fuse_block_alloc(struct fuse_conn * fc,bool for_background)172 static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
173 {
174 	return !fc->initialized || (for_background && fc->blocked) ||
175 	       (fc->io_uring && fc->connected && !fuse_uring_ready(fc));
176 }
177 
fuse_drop_waiting(struct fuse_conn * fc)178 static void fuse_drop_waiting(struct fuse_conn *fc)
179 {
180 	/*
181 	 * lockess check of fc->connected is okay, because atomic_dec_and_test()
182 	 * provides a memory barrier matched with the one in fuse_wait_aborted()
183 	 * to ensure no wake-up is missed.
184 	 */
185 	if (atomic_dec_and_test(&fc->num_waiting) &&
186 	    !READ_ONCE(fc->connected)) {
187 		/* wake up aborters */
188 		wake_up_all(&fc->blocked_waitq);
189 	}
190 }
191 
192 static void fuse_put_request(struct fuse_req *req);
193 
fuse_get_req(struct mnt_idmap * idmap,struct fuse_mount * fm,bool for_background)194 static struct fuse_req *fuse_get_req(struct mnt_idmap *idmap,
195 				     struct fuse_mount *fm,
196 				     bool for_background)
197 {
198 	struct fuse_conn *fc = fm->fc;
199 	struct fuse_req *req;
200 	bool no_idmap = !fm->sb || (fm->sb->s_iflags & SB_I_NOIDMAP);
201 	kuid_t fsuid;
202 	kgid_t fsgid;
203 	int err;
204 
205 	atomic_inc(&fc->num_waiting);
206 
207 	if (fuse_block_alloc(fc, for_background)) {
208 		err = -EINTR;
209 		if (wait_event_state_exclusive(fc->blocked_waitq,
210 				!fuse_block_alloc(fc, for_background),
211 				(TASK_KILLABLE | TASK_FREEZABLE)))
212 			goto out;
213 	}
214 	/* Matches smp_wmb() in fuse_set_initialized() */
215 	smp_rmb();
216 
217 	err = -ENOTCONN;
218 	if (!fc->connected)
219 		goto out;
220 
221 	err = -ECONNREFUSED;
222 	if (fc->conn_error)
223 		goto out;
224 
225 	req = fuse_request_alloc(fm, GFP_KERNEL);
226 	err = -ENOMEM;
227 	if (!req) {
228 		if (for_background)
229 			wake_up(&fc->blocked_waitq);
230 		goto out;
231 	}
232 
233 	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
234 
235 	__set_bit(FR_WAITING, &req->flags);
236 	if (for_background)
237 		__set_bit(FR_BACKGROUND, &req->flags);
238 
239 	/*
240 	 * Keep the old behavior when idmappings support was not
241 	 * declared by a FUSE server.
242 	 *
243 	 * For those FUSE servers who support idmapped mounts,
244 	 * we send UID/GID only along with "inode creation"
245 	 * fuse requests, otherwise idmap == &invalid_mnt_idmap and
246 	 * req->in.h.{u,g}id will be equal to FUSE_INVALID_UIDGID.
247 	 */
248 	fsuid = no_idmap ? current_fsuid() : mapped_fsuid(idmap, fc->user_ns);
249 	fsgid = no_idmap ? current_fsgid() : mapped_fsgid(idmap, fc->user_ns);
250 	req->in.h.uid = from_kuid(fc->user_ns, fsuid);
251 	req->in.h.gid = from_kgid(fc->user_ns, fsgid);
252 
253 	if (no_idmap && unlikely(req->in.h.uid == ((uid_t)-1) ||
254 				 req->in.h.gid == ((gid_t)-1))) {
255 		fuse_put_request(req);
256 		return ERR_PTR(-EOVERFLOW);
257 	}
258 
259 	return req;
260 
261  out:
262 	fuse_drop_waiting(fc);
263 	return ERR_PTR(err);
264 }
265 
fuse_put_request(struct fuse_req * req)266 static void fuse_put_request(struct fuse_req *req)
267 {
268 	struct fuse_conn *fc = req->fm->fc;
269 
270 	if (refcount_dec_and_test(&req->count)) {
271 		if (test_bit(FR_BACKGROUND, &req->flags)) {
272 			/*
273 			 * We get here in the unlikely case that a background
274 			 * request was allocated but not sent
275 			 */
276 			spin_lock(&fc->bg_lock);
277 			if (!fc->blocked)
278 				wake_up(&fc->blocked_waitq);
279 			spin_unlock(&fc->bg_lock);
280 		}
281 
282 		if (test_bit(FR_WAITING, &req->flags)) {
283 			__clear_bit(FR_WAITING, &req->flags);
284 			fuse_drop_waiting(fc);
285 		}
286 
287 		fuse_request_free(req);
288 	}
289 }
290 
fuse_len_args(unsigned int numargs,struct fuse_arg * args)291 unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
292 {
293 	unsigned nbytes = 0;
294 	unsigned i;
295 
296 	for (i = 0; i < numargs; i++)
297 		nbytes += args[i].size;
298 
299 	return nbytes;
300 }
301 EXPORT_SYMBOL_GPL(fuse_len_args);
302 
fuse_get_unique_locked(struct fuse_iqueue * fiq)303 static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
304 {
305 	fiq->reqctr += FUSE_REQ_ID_STEP;
306 	return fiq->reqctr;
307 }
308 
fuse_get_unique(struct fuse_iqueue * fiq)309 u64 fuse_get_unique(struct fuse_iqueue *fiq)
310 {
311 	u64 ret;
312 
313 	spin_lock(&fiq->lock);
314 	ret = fuse_get_unique_locked(fiq);
315 	spin_unlock(&fiq->lock);
316 
317 	return ret;
318 }
319 EXPORT_SYMBOL_GPL(fuse_get_unique);
320 
fuse_req_hash(u64 unique)321 unsigned int fuse_req_hash(u64 unique)
322 {
323 	return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
324 }
325 EXPORT_SYMBOL_GPL(fuse_req_hash);
326 
327 /*
328  * A new request is available, wake fiq->waitq
329  */
fuse_dev_wake_and_unlock(struct fuse_iqueue * fiq)330 static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq)
331 __releases(fiq->lock)
332 {
333 	wake_up(&fiq->waitq);
334 	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
335 	spin_unlock(&fiq->lock);
336 }
337 
fuse_dev_queue_forget(struct fuse_iqueue * fiq,struct fuse_forget_link * forget)338 void fuse_dev_queue_forget(struct fuse_iqueue *fiq,
339 			   struct fuse_forget_link *forget)
340 {
341 	spin_lock(&fiq->lock);
342 	if (fiq->connected) {
343 		fiq->forget_list_tail->next = forget;
344 		fiq->forget_list_tail = forget;
345 		fuse_dev_wake_and_unlock(fiq);
346 	} else {
347 		kfree(forget);
348 		spin_unlock(&fiq->lock);
349 	}
350 }
351 
fuse_dev_queue_interrupt(struct fuse_iqueue * fiq,struct fuse_req * req)352 void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
353 {
354 	spin_lock(&fiq->lock);
355 	if (list_empty(&req->intr_entry)) {
356 		list_add_tail(&req->intr_entry, &fiq->interrupts);
357 		/*
358 		 * Pairs with smp_mb() implied by test_and_set_bit()
359 		 * from fuse_request_end().
360 		 */
361 		smp_mb();
362 		if (test_bit(FR_FINISHED, &req->flags)) {
363 			list_del_init(&req->intr_entry);
364 			spin_unlock(&fiq->lock);
365 		} else  {
366 			fuse_dev_wake_and_unlock(fiq);
367 		}
368 	} else {
369 		spin_unlock(&fiq->lock);
370 	}
371 }
372 
fuse_request_assign_unique_locked(struct fuse_iqueue * fiq,struct fuse_req * req)373 static inline void fuse_request_assign_unique_locked(struct fuse_iqueue *fiq,
374 						     struct fuse_req *req)
375 {
376 	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
377 		req->in.h.unique = fuse_get_unique_locked(fiq);
378 
379 	/* tracepoint captures in.h.unique and in.h.len */
380 	trace_fuse_request_send(req);
381 }
382 
fuse_request_assign_unique(struct fuse_iqueue * fiq,struct fuse_req * req)383 inline void fuse_request_assign_unique(struct fuse_iqueue *fiq,
384 				       struct fuse_req *req)
385 {
386 	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
387 		req->in.h.unique = fuse_get_unique(fiq);
388 
389 	/* tracepoint captures in.h.unique and in.h.len */
390 	trace_fuse_request_send(req);
391 }
392 EXPORT_SYMBOL_GPL(fuse_request_assign_unique);
393 
fuse_dev_queue_req(struct fuse_iqueue * fiq,struct fuse_req * req)394 static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
395 {
396 	spin_lock(&fiq->lock);
397 	if (fiq->connected) {
398 		fuse_request_assign_unique_locked(fiq, req);
399 		list_add_tail(&req->list, &fiq->pending);
400 		fuse_dev_wake_and_unlock(fiq);
401 	} else {
402 		spin_unlock(&fiq->lock);
403 		req->out.h.error = -ENOTCONN;
404 		clear_bit(FR_PENDING, &req->flags);
405 		fuse_request_end(req);
406 	}
407 }
408 
409 const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
410 	.send_forget	= fuse_dev_queue_forget,
411 	.send_interrupt	= fuse_dev_queue_interrupt,
412 	.send_req	= fuse_dev_queue_req,
413 };
414 EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
415 
fuse_send_one(struct fuse_iqueue * fiq,struct fuse_req * req)416 static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req)
417 {
418 	req->in.h.len = sizeof(struct fuse_in_header) +
419 		fuse_len_args(req->args->in_numargs,
420 			      (struct fuse_arg *) req->args->in_args);
421 	fiq->ops->send_req(fiq, req);
422 }
423 
fuse_queue_forget(struct fuse_conn * fc,struct fuse_forget_link * forget,u64 nodeid,u64 nlookup)424 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
425 		       u64 nodeid, u64 nlookup)
426 {
427 	struct fuse_iqueue *fiq = &fc->iq;
428 
429 	forget->forget_one.nodeid = nodeid;
430 	forget->forget_one.nlookup = nlookup;
431 
432 	fiq->ops->send_forget(fiq, forget);
433 }
434 
flush_bg_queue(struct fuse_conn * fc)435 static void flush_bg_queue(struct fuse_conn *fc)
436 {
437 	struct fuse_iqueue *fiq = &fc->iq;
438 
439 	while (fc->active_background < fc->max_background &&
440 	       !list_empty(&fc->bg_queue)) {
441 		struct fuse_req *req;
442 
443 		req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
444 		list_del(&req->list);
445 		fc->active_background++;
446 		fuse_send_one(fiq, req);
447 	}
448 }
449 
450 /*
451  * This function is called when a request is finished.  Either a reply
452  * has arrived or it was aborted (and not yet sent) or some error
453  * occurred during communication with userspace, or the device file
454  * was closed.  The requester thread is woken up (if still waiting),
455  * the 'end' callback is called if given, else the reference to the
456  * request is released
457  */
fuse_request_end(struct fuse_req * req)458 void fuse_request_end(struct fuse_req *req)
459 {
460 	struct fuse_mount *fm = req->fm;
461 	struct fuse_conn *fc = fm->fc;
462 	struct fuse_iqueue *fiq = &fc->iq;
463 
464 	if (test_and_set_bit(FR_FINISHED, &req->flags))
465 		goto put_request;
466 
467 	trace_fuse_request_end(req);
468 	/*
469 	 * test_and_set_bit() implies smp_mb() between bit
470 	 * changing and below FR_INTERRUPTED check. Pairs with
471 	 * smp_mb() from queue_interrupt().
472 	 */
473 	if (test_bit(FR_INTERRUPTED, &req->flags)) {
474 		spin_lock(&fiq->lock);
475 		list_del_init(&req->intr_entry);
476 		spin_unlock(&fiq->lock);
477 	}
478 	WARN_ON(test_bit(FR_PENDING, &req->flags));
479 	WARN_ON(test_bit(FR_SENT, &req->flags));
480 	if (test_bit(FR_BACKGROUND, &req->flags)) {
481 		spin_lock(&fc->bg_lock);
482 		clear_bit(FR_BACKGROUND, &req->flags);
483 		if (fc->num_background == fc->max_background) {
484 			fc->blocked = 0;
485 			wake_up(&fc->blocked_waitq);
486 		} else if (!fc->blocked) {
487 			/*
488 			 * Wake up next waiter, if any.  It's okay to use
489 			 * waitqueue_active(), as we've already synced up
490 			 * fc->blocked with waiters with the wake_up() call
491 			 * above.
492 			 */
493 			if (waitqueue_active(&fc->blocked_waitq))
494 				wake_up(&fc->blocked_waitq);
495 		}
496 
497 		fc->num_background--;
498 		fc->active_background--;
499 		flush_bg_queue(fc);
500 		spin_unlock(&fc->bg_lock);
501 	} else {
502 		/* Wake up waiter sleeping in request_wait_answer() */
503 		wake_up(&req->waitq);
504 	}
505 
506 	if (test_bit(FR_ASYNC, &req->flags))
507 		req->args->end(fm, req->args, req->out.h.error);
508 put_request:
509 	fuse_put_request(req);
510 }
511 EXPORT_SYMBOL_GPL(fuse_request_end);
512 
queue_interrupt(struct fuse_req * req)513 static int queue_interrupt(struct fuse_req *req)
514 {
515 	struct fuse_iqueue *fiq = &req->fm->fc->iq;
516 
517 	/* Check for we've sent request to interrupt this req */
518 	if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags)))
519 		return -EINVAL;
520 
521 	fiq->ops->send_interrupt(fiq, req);
522 
523 	return 0;
524 }
525 
fuse_remove_pending_req(struct fuse_req * req,spinlock_t * lock)526 bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock)
527 {
528 	spin_lock(lock);
529 	if (test_bit(FR_PENDING, &req->flags)) {
530 		/*
531 		 * FR_PENDING does not get cleared as the request will end
532 		 * up in destruction anyway.
533 		 */
534 		list_del(&req->list);
535 		spin_unlock(lock);
536 		__fuse_put_request(req);
537 		req->out.h.error = -EINTR;
538 		return true;
539 	}
540 	spin_unlock(lock);
541 	return false;
542 }
543 
request_wait_answer(struct fuse_req * req)544 static void request_wait_answer(struct fuse_req *req)
545 {
546 	struct fuse_conn *fc = req->fm->fc;
547 	struct fuse_iqueue *fiq = &fc->iq;
548 	int err;
549 
550 	if (!fc->no_interrupt) {
551 		/* Any signal may interrupt this */
552 		err = wait_event_interruptible(req->waitq,
553 					test_bit(FR_FINISHED, &req->flags));
554 		if (!err)
555 			return;
556 
557 		set_bit(FR_INTERRUPTED, &req->flags);
558 		/* matches barrier in fuse_dev_do_read() */
559 		smp_mb__after_atomic();
560 		if (test_bit(FR_SENT, &req->flags))
561 			queue_interrupt(req);
562 	}
563 
564 	if (!test_bit(FR_FORCE, &req->flags)) {
565 		bool removed;
566 
567 		/* Only fatal signals may interrupt this */
568 		err = wait_event_killable(req->waitq,
569 					test_bit(FR_FINISHED, &req->flags));
570 		if (!err)
571 			return;
572 
573 		if (test_bit(FR_URING, &req->flags))
574 			removed = fuse_uring_remove_pending_req(req);
575 		else
576 			removed = fuse_remove_pending_req(req, &fiq->lock);
577 		if (removed)
578 			return;
579 	}
580 
581 	/*
582 	 * Either request is already in userspace, or it was forced.
583 	 * Wait it out.
584 	 */
585 	wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
586 }
587 
__fuse_request_send(struct fuse_req * req)588 static void __fuse_request_send(struct fuse_req *req)
589 {
590 	struct fuse_iqueue *fiq = &req->fm->fc->iq;
591 
592 	BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
593 
594 	/* acquire extra reference, since request is still needed after
595 	   fuse_request_end() */
596 	__fuse_get_request(req);
597 	fuse_send_one(fiq, req);
598 
599 	request_wait_answer(req);
600 	/* Pairs with smp_wmb() in fuse_request_end() */
601 	smp_rmb();
602 }
603 
fuse_adjust_compat(struct fuse_conn * fc,struct fuse_args * args)604 static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
605 {
606 	if (fc->minor < 4 && args->opcode == FUSE_STATFS)
607 		args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
608 
609 	if (fc->minor < 9) {
610 		switch (args->opcode) {
611 		case FUSE_LOOKUP:
612 		case FUSE_CREATE:
613 		case FUSE_MKNOD:
614 		case FUSE_MKDIR:
615 		case FUSE_SYMLINK:
616 		case FUSE_LINK:
617 			args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
618 			break;
619 		case FUSE_GETATTR:
620 		case FUSE_SETATTR:
621 			args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
622 			break;
623 		}
624 	}
625 	if (fc->minor < 12) {
626 		switch (args->opcode) {
627 		case FUSE_CREATE:
628 			args->in_args[0].size = sizeof(struct fuse_open_in);
629 			break;
630 		case FUSE_MKNOD:
631 			args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
632 			break;
633 		}
634 	}
635 }
636 
fuse_force_creds(struct fuse_req * req)637 static void fuse_force_creds(struct fuse_req *req)
638 {
639 	struct fuse_conn *fc = req->fm->fc;
640 
641 	if (!req->fm->sb || req->fm->sb->s_iflags & SB_I_NOIDMAP) {
642 		req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
643 		req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
644 	} else {
645 		req->in.h.uid = FUSE_INVALID_UIDGID;
646 		req->in.h.gid = FUSE_INVALID_UIDGID;
647 	}
648 
649 	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
650 }
651 
fuse_args_to_req(struct fuse_req * req,struct fuse_args * args)652 static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
653 {
654 	req->in.h.opcode = args->opcode;
655 	req->in.h.nodeid = args->nodeid;
656 	req->args = args;
657 	if (args->is_ext)
658 		req->in.h.total_extlen = args->in_args[args->ext_idx].size / 8;
659 	if (args->end)
660 		__set_bit(FR_ASYNC, &req->flags);
661 }
662 
__fuse_simple_request(struct mnt_idmap * idmap,struct fuse_mount * fm,struct fuse_args * args)663 ssize_t __fuse_simple_request(struct mnt_idmap *idmap,
664 			      struct fuse_mount *fm,
665 			      struct fuse_args *args)
666 {
667 	struct fuse_conn *fc = fm->fc;
668 	struct fuse_req *req;
669 	ssize_t ret;
670 
671 	if (args->force) {
672 		atomic_inc(&fc->num_waiting);
673 		req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL);
674 
675 		if (!args->nocreds)
676 			fuse_force_creds(req);
677 
678 		__set_bit(FR_WAITING, &req->flags);
679 		__set_bit(FR_FORCE, &req->flags);
680 	} else {
681 		WARN_ON(args->nocreds);
682 		req = fuse_get_req(idmap, fm, false);
683 		if (IS_ERR(req))
684 			return PTR_ERR(req);
685 	}
686 
687 	/* Needs to be done after fuse_get_req() so that fc->minor is valid */
688 	fuse_adjust_compat(fc, args);
689 	fuse_args_to_req(req, args);
690 
691 	if (!args->noreply)
692 		__set_bit(FR_ISREPLY, &req->flags);
693 	__fuse_request_send(req);
694 	ret = req->out.h.error;
695 	if (!ret && args->out_argvar) {
696 		BUG_ON(args->out_numargs == 0);
697 		ret = args->out_args[args->out_numargs - 1].size;
698 	}
699 	fuse_put_request(req);
700 
701 	return ret;
702 }
703 
704 #ifdef CONFIG_FUSE_IO_URING
fuse_request_queue_background_uring(struct fuse_conn * fc,struct fuse_req * req)705 static bool fuse_request_queue_background_uring(struct fuse_conn *fc,
706 					       struct fuse_req *req)
707 {
708 	struct fuse_iqueue *fiq = &fc->iq;
709 
710 	req->in.h.len = sizeof(struct fuse_in_header) +
711 		fuse_len_args(req->args->in_numargs,
712 			      (struct fuse_arg *) req->args->in_args);
713 	fuse_request_assign_unique(fiq, req);
714 
715 	return fuse_uring_queue_bq_req(req);
716 }
717 #endif
718 
719 /*
720  * @return true if queued
721  */
fuse_request_queue_background(struct fuse_req * req)722 static int fuse_request_queue_background(struct fuse_req *req)
723 {
724 	struct fuse_mount *fm = req->fm;
725 	struct fuse_conn *fc = fm->fc;
726 	bool queued = false;
727 
728 	WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
729 	if (!test_bit(FR_WAITING, &req->flags)) {
730 		__set_bit(FR_WAITING, &req->flags);
731 		atomic_inc(&fc->num_waiting);
732 	}
733 	__set_bit(FR_ISREPLY, &req->flags);
734 
735 #ifdef CONFIG_FUSE_IO_URING
736 	if (fuse_uring_ready(fc))
737 		return fuse_request_queue_background_uring(fc, req);
738 #endif
739 
740 	spin_lock(&fc->bg_lock);
741 	if (likely(fc->connected)) {
742 		fc->num_background++;
743 		if (fc->num_background == fc->max_background)
744 			fc->blocked = 1;
745 		list_add_tail(&req->list, &fc->bg_queue);
746 		flush_bg_queue(fc);
747 		queued = true;
748 	}
749 	spin_unlock(&fc->bg_lock);
750 
751 	return queued;
752 }
753 
fuse_simple_background(struct fuse_mount * fm,struct fuse_args * args,gfp_t gfp_flags)754 int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
755 			    gfp_t gfp_flags)
756 {
757 	struct fuse_req *req;
758 
759 	if (args->force) {
760 		WARN_ON(!args->nocreds);
761 		req = fuse_request_alloc(fm, gfp_flags);
762 		if (!req)
763 			return -ENOMEM;
764 		__set_bit(FR_BACKGROUND, &req->flags);
765 	} else {
766 		WARN_ON(args->nocreds);
767 		req = fuse_get_req(&invalid_mnt_idmap, fm, true);
768 		if (IS_ERR(req))
769 			return PTR_ERR(req);
770 	}
771 
772 	fuse_args_to_req(req, args);
773 
774 	if (!fuse_request_queue_background(req)) {
775 		fuse_put_request(req);
776 		return -ENOTCONN;
777 	}
778 
779 	return 0;
780 }
781 EXPORT_SYMBOL_GPL(fuse_simple_background);
782 
fuse_simple_notify_reply(struct fuse_mount * fm,struct fuse_args * args,u64 unique)783 static int fuse_simple_notify_reply(struct fuse_mount *fm,
784 				    struct fuse_args *args, u64 unique)
785 {
786 	struct fuse_req *req;
787 	struct fuse_iqueue *fiq = &fm->fc->iq;
788 
789 	req = fuse_get_req(&invalid_mnt_idmap, fm, false);
790 	if (IS_ERR(req))
791 		return PTR_ERR(req);
792 
793 	__clear_bit(FR_ISREPLY, &req->flags);
794 	req->in.h.unique = unique;
795 
796 	fuse_args_to_req(req, args);
797 
798 	fuse_send_one(fiq, req);
799 
800 	return 0;
801 }
802 
803 /*
804  * Lock the request.  Up to the next unlock_request() there mustn't be
805  * anything that could cause a page-fault.  If the request was already
806  * aborted bail out.
807  */
lock_request(struct fuse_req * req)808 static int lock_request(struct fuse_req *req)
809 {
810 	int err = 0;
811 	if (req) {
812 		spin_lock(&req->waitq.lock);
813 		if (test_bit(FR_ABORTED, &req->flags))
814 			err = -ENOENT;
815 		else
816 			set_bit(FR_LOCKED, &req->flags);
817 		spin_unlock(&req->waitq.lock);
818 	}
819 	return err;
820 }
821 
822 /*
823  * Unlock request.  If it was aborted while locked, caller is responsible
824  * for unlocking and ending the request.
825  */
unlock_request(struct fuse_req * req)826 static int unlock_request(struct fuse_req *req)
827 {
828 	int err = 0;
829 	if (req) {
830 		spin_lock(&req->waitq.lock);
831 		if (test_bit(FR_ABORTED, &req->flags))
832 			err = -ENOENT;
833 		else
834 			clear_bit(FR_LOCKED, &req->flags);
835 		spin_unlock(&req->waitq.lock);
836 	}
837 	return err;
838 }
839 
fuse_copy_init(struct fuse_copy_state * cs,bool write,struct iov_iter * iter)840 void fuse_copy_init(struct fuse_copy_state *cs, bool write,
841 		    struct iov_iter *iter)
842 {
843 	memset(cs, 0, sizeof(*cs));
844 	cs->write = write;
845 	cs->iter = iter;
846 }
847 
848 /* Unmap and put previous page of userspace buffer */
fuse_copy_finish(struct fuse_copy_state * cs)849 static void fuse_copy_finish(struct fuse_copy_state *cs)
850 {
851 	if (cs->currbuf) {
852 		struct pipe_buffer *buf = cs->currbuf;
853 
854 		if (cs->write)
855 			buf->len = PAGE_SIZE - cs->len;
856 		cs->currbuf = NULL;
857 	} else if (cs->pg) {
858 		if (cs->write) {
859 			flush_dcache_page(cs->pg);
860 			set_page_dirty_lock(cs->pg);
861 		}
862 		put_page(cs->pg);
863 	}
864 	cs->pg = NULL;
865 }
866 
867 /*
868  * Get another pagefull of userspace buffer, and map it to kernel
869  * address space, and lock request
870  */
fuse_copy_fill(struct fuse_copy_state * cs)871 static int fuse_copy_fill(struct fuse_copy_state *cs)
872 {
873 	struct page *page;
874 	int err;
875 
876 	err = unlock_request(cs->req);
877 	if (err)
878 		return err;
879 
880 	fuse_copy_finish(cs);
881 	if (cs->pipebufs) {
882 		struct pipe_buffer *buf = cs->pipebufs;
883 
884 		if (!cs->write) {
885 			err = pipe_buf_confirm(cs->pipe, buf);
886 			if (err)
887 				return err;
888 
889 			BUG_ON(!cs->nr_segs);
890 			cs->currbuf = buf;
891 			cs->pg = buf->page;
892 			cs->offset = buf->offset;
893 			cs->len = buf->len;
894 			cs->pipebufs++;
895 			cs->nr_segs--;
896 		} else {
897 			if (cs->nr_segs >= cs->pipe->max_usage)
898 				return -EIO;
899 
900 			page = alloc_page(GFP_HIGHUSER);
901 			if (!page)
902 				return -ENOMEM;
903 
904 			buf->page = page;
905 			buf->offset = 0;
906 			buf->len = 0;
907 
908 			cs->currbuf = buf;
909 			cs->pg = page;
910 			cs->offset = 0;
911 			cs->len = PAGE_SIZE;
912 			cs->pipebufs++;
913 			cs->nr_segs++;
914 		}
915 	} else {
916 		size_t off;
917 		err = iov_iter_get_pages2(cs->iter, &page, PAGE_SIZE, 1, &off);
918 		if (err < 0)
919 			return err;
920 		BUG_ON(!err);
921 		cs->len = err;
922 		cs->offset = off;
923 		cs->pg = page;
924 	}
925 
926 	return lock_request(cs->req);
927 }
928 
929 /* Do as much copy to/from userspace buffer as we can */
fuse_copy_do(struct fuse_copy_state * cs,void ** val,unsigned * size)930 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
931 {
932 	unsigned ncpy = min(*size, cs->len);
933 	if (val) {
934 		void *pgaddr = kmap_local_page(cs->pg);
935 		void *buf = pgaddr + cs->offset;
936 
937 		if (cs->write)
938 			memcpy(buf, *val, ncpy);
939 		else
940 			memcpy(*val, buf, ncpy);
941 
942 		kunmap_local(pgaddr);
943 		*val += ncpy;
944 	}
945 	*size -= ncpy;
946 	cs->len -= ncpy;
947 	cs->offset += ncpy;
948 	if (cs->is_uring)
949 		cs->ring.copied_sz += ncpy;
950 
951 	return ncpy;
952 }
953 
fuse_check_folio(struct folio * folio)954 static int fuse_check_folio(struct folio *folio)
955 {
956 	if (folio_mapped(folio) ||
957 	    folio->mapping != NULL ||
958 	    (folio->flags.f & PAGE_FLAGS_CHECK_AT_PREP &
959 	     ~(1 << PG_locked |
960 	       1 << PG_referenced |
961 	       1 << PG_lru |
962 	       1 << PG_active |
963 	       1 << PG_workingset |
964 	       1 << PG_reclaim |
965 	       1 << PG_waiters |
966 	       LRU_GEN_MASK | LRU_REFS_MASK))) {
967 		dump_page(&folio->page, "fuse: trying to steal weird page");
968 		return 1;
969 	}
970 	return 0;
971 }
972 
973 /*
974  * Attempt to steal a page from the splice() pipe and move it into the
975  * pagecache. If successful, the pointer in @pagep will be updated. The
976  * folio that was originally in @pagep will lose a reference and the new
977  * folio returned in @pagep will carry a reference.
978  */
fuse_try_move_folio(struct fuse_copy_state * cs,struct folio ** foliop)979 static int fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop)
980 {
981 	int err;
982 	struct folio *oldfolio = *foliop;
983 	struct folio *newfolio;
984 	struct pipe_buffer *buf = cs->pipebufs;
985 
986 	folio_get(oldfolio);
987 	err = unlock_request(cs->req);
988 	if (err)
989 		goto out_put_old;
990 
991 	fuse_copy_finish(cs);
992 
993 	err = pipe_buf_confirm(cs->pipe, buf);
994 	if (err)
995 		goto out_put_old;
996 
997 	BUG_ON(!cs->nr_segs);
998 	cs->currbuf = buf;
999 	cs->len = buf->len;
1000 	cs->pipebufs++;
1001 	cs->nr_segs--;
1002 
1003 	if (cs->len != folio_size(oldfolio))
1004 		goto out_fallback;
1005 
1006 	if (!pipe_buf_try_steal(cs->pipe, buf))
1007 		goto out_fallback;
1008 
1009 	newfolio = page_folio(buf->page);
1010 
1011 	folio_clear_uptodate(newfolio);
1012 	folio_clear_mappedtodisk(newfolio);
1013 
1014 	if (fuse_check_folio(newfolio) != 0)
1015 		goto out_fallback_unlock;
1016 
1017 	/*
1018 	 * This is a new and locked page, it shouldn't be mapped or
1019 	 * have any special flags on it
1020 	 */
1021 	if (WARN_ON(folio_mapped(oldfolio)))
1022 		goto out_fallback_unlock;
1023 	if (WARN_ON(folio_has_private(oldfolio)))
1024 		goto out_fallback_unlock;
1025 	if (WARN_ON(folio_test_dirty(oldfolio) ||
1026 				folio_test_writeback(oldfolio)))
1027 		goto out_fallback_unlock;
1028 	if (WARN_ON(folio_test_mlocked(oldfolio)))
1029 		goto out_fallback_unlock;
1030 
1031 	replace_page_cache_folio(oldfolio, newfolio);
1032 
1033 	folio_get(newfolio);
1034 
1035 	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
1036 		folio_add_lru(newfolio);
1037 
1038 	/*
1039 	 * Release while we have extra ref on stolen page.  Otherwise
1040 	 * anon_pipe_buf_release() might think the page can be reused.
1041 	 */
1042 	pipe_buf_release(cs->pipe, buf);
1043 
1044 	err = 0;
1045 	spin_lock(&cs->req->waitq.lock);
1046 	if (test_bit(FR_ABORTED, &cs->req->flags))
1047 		err = -ENOENT;
1048 	else
1049 		*foliop = newfolio;
1050 	spin_unlock(&cs->req->waitq.lock);
1051 
1052 	if (err) {
1053 		folio_unlock(newfolio);
1054 		folio_put(newfolio);
1055 		goto out_put_old;
1056 	}
1057 
1058 	folio_unlock(oldfolio);
1059 	/* Drop ref for ap->pages[] array */
1060 	folio_put(oldfolio);
1061 	cs->len = 0;
1062 
1063 	err = 0;
1064 out_put_old:
1065 	/* Drop ref obtained in this function */
1066 	folio_put(oldfolio);
1067 	return err;
1068 
1069 out_fallback_unlock:
1070 	folio_unlock(newfolio);
1071 out_fallback:
1072 	cs->pg = buf->page;
1073 	cs->offset = buf->offset;
1074 
1075 	err = lock_request(cs->req);
1076 	if (!err)
1077 		err = 1;
1078 
1079 	goto out_put_old;
1080 }
1081 
fuse_ref_folio(struct fuse_copy_state * cs,struct folio * folio,unsigned offset,unsigned count)1082 static int fuse_ref_folio(struct fuse_copy_state *cs, struct folio *folio,
1083 			  unsigned offset, unsigned count)
1084 {
1085 	struct pipe_buffer *buf;
1086 	int err;
1087 
1088 	if (cs->nr_segs >= cs->pipe->max_usage)
1089 		return -EIO;
1090 
1091 	folio_get(folio);
1092 	err = unlock_request(cs->req);
1093 	if (err) {
1094 		folio_put(folio);
1095 		return err;
1096 	}
1097 
1098 	fuse_copy_finish(cs);
1099 
1100 	buf = cs->pipebufs;
1101 	buf->page = &folio->page;
1102 	buf->offset = offset;
1103 	buf->len = count;
1104 
1105 	cs->pipebufs++;
1106 	cs->nr_segs++;
1107 	cs->len = 0;
1108 
1109 	return 0;
1110 }
1111 
1112 /*
1113  * Copy a folio in the request to/from the userspace buffer.  Must be
1114  * done atomically
1115  */
fuse_copy_folio(struct fuse_copy_state * cs,struct folio ** foliop,unsigned offset,unsigned count,int zeroing)1116 static int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop,
1117 			   unsigned offset, unsigned count, int zeroing)
1118 {
1119 	int err;
1120 	struct folio *folio = *foliop;
1121 	size_t size;
1122 
1123 	if (folio) {
1124 		size = folio_size(folio);
1125 		if (zeroing && count < size)
1126 			folio_zero_range(folio, 0, size);
1127 	}
1128 
1129 	while (count) {
1130 		if (cs->write && cs->pipebufs && folio) {
1131 			/*
1132 			 * Can't control lifetime of pipe buffers, so always
1133 			 * copy user pages.
1134 			 */
1135 			if (cs->req->args->user_pages) {
1136 				err = fuse_copy_fill(cs);
1137 				if (err)
1138 					return err;
1139 			} else {
1140 				return fuse_ref_folio(cs, folio, offset, count);
1141 			}
1142 		} else if (!cs->len) {
1143 			if (cs->move_folios && folio &&
1144 			    offset == 0 && count == size) {
1145 				err = fuse_try_move_folio(cs, foliop);
1146 				if (err <= 0)
1147 					return err;
1148 			} else {
1149 				err = fuse_copy_fill(cs);
1150 				if (err)
1151 					return err;
1152 			}
1153 		}
1154 		if (folio) {
1155 			void *mapaddr = kmap_local_folio(folio, offset);
1156 			void *buf = mapaddr;
1157 			unsigned int copy = count;
1158 			unsigned int bytes_copied;
1159 
1160 			if (folio_test_highmem(folio) && count > PAGE_SIZE - offset_in_page(offset))
1161 				copy = PAGE_SIZE - offset_in_page(offset);
1162 
1163 			bytes_copied = fuse_copy_do(cs, &buf, &copy);
1164 			kunmap_local(mapaddr);
1165 			offset += bytes_copied;
1166 			count -= bytes_copied;
1167 		} else
1168 			offset += fuse_copy_do(cs, NULL, &count);
1169 	}
1170 	if (folio && !cs->write)
1171 		flush_dcache_folio(folio);
1172 	return 0;
1173 }
1174 
1175 /* Copy folios in the request to/from userspace buffer */
fuse_copy_folios(struct fuse_copy_state * cs,unsigned nbytes,int zeroing)1176 static int fuse_copy_folios(struct fuse_copy_state *cs, unsigned nbytes,
1177 			    int zeroing)
1178 {
1179 	unsigned i;
1180 	struct fuse_req *req = cs->req;
1181 	struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
1182 
1183 	for (i = 0; i < ap->num_folios && (nbytes || zeroing); i++) {
1184 		int err;
1185 		unsigned int offset = ap->descs[i].offset;
1186 		unsigned int count = min(nbytes, ap->descs[i].length);
1187 
1188 		err = fuse_copy_folio(cs, &ap->folios[i], offset, count, zeroing);
1189 		if (err)
1190 			return err;
1191 
1192 		nbytes -= count;
1193 	}
1194 	return 0;
1195 }
1196 
1197 /* Copy a single argument in the request to/from userspace buffer */
fuse_copy_one(struct fuse_copy_state * cs,void * val,unsigned size)1198 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
1199 {
1200 	while (size) {
1201 		if (!cs->len) {
1202 			int err = fuse_copy_fill(cs);
1203 			if (err)
1204 				return err;
1205 		}
1206 		fuse_copy_do(cs, &val, &size);
1207 	}
1208 	return 0;
1209 }
1210 
1211 /* Copy request arguments to/from userspace buffer */
fuse_copy_args(struct fuse_copy_state * cs,unsigned numargs,unsigned argpages,struct fuse_arg * args,int zeroing)1212 int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
1213 		   unsigned argpages, struct fuse_arg *args,
1214 		   int zeroing)
1215 {
1216 	int err = 0;
1217 	unsigned i;
1218 
1219 	for (i = 0; !err && i < numargs; i++)  {
1220 		struct fuse_arg *arg = &args[i];
1221 		if (i == numargs - 1 && argpages)
1222 			err = fuse_copy_folios(cs, arg->size, zeroing);
1223 		else
1224 			err = fuse_copy_one(cs, arg->value, arg->size);
1225 	}
1226 	return err;
1227 }
1228 
forget_pending(struct fuse_iqueue * fiq)1229 static int forget_pending(struct fuse_iqueue *fiq)
1230 {
1231 	return fiq->forget_list_head.next != NULL;
1232 }
1233 
request_pending(struct fuse_iqueue * fiq)1234 static int request_pending(struct fuse_iqueue *fiq)
1235 {
1236 	return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
1237 		forget_pending(fiq);
1238 }
1239 
1240 /*
1241  * Transfer an interrupt request to userspace
1242  *
1243  * Unlike other requests this is assembled on demand, without a need
1244  * to allocate a separate fuse_req structure.
1245  *
1246  * Called with fiq->lock held, releases it
1247  */
fuse_read_interrupt(struct fuse_iqueue * fiq,struct fuse_copy_state * cs,size_t nbytes,struct fuse_req * req)1248 static int fuse_read_interrupt(struct fuse_iqueue *fiq,
1249 			       struct fuse_copy_state *cs,
1250 			       size_t nbytes, struct fuse_req *req)
1251 __releases(fiq->lock)
1252 {
1253 	struct fuse_in_header ih;
1254 	struct fuse_interrupt_in arg;
1255 	unsigned reqsize = sizeof(ih) + sizeof(arg);
1256 	int err;
1257 
1258 	list_del_init(&req->intr_entry);
1259 	memset(&ih, 0, sizeof(ih));
1260 	memset(&arg, 0, sizeof(arg));
1261 	ih.len = reqsize;
1262 	ih.opcode = FUSE_INTERRUPT;
1263 	ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
1264 	arg.unique = req->in.h.unique;
1265 
1266 	spin_unlock(&fiq->lock);
1267 	if (nbytes < reqsize)
1268 		return -EINVAL;
1269 
1270 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1271 	if (!err)
1272 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1273 	fuse_copy_finish(cs);
1274 
1275 	return err ? err : reqsize;
1276 }
1277 
fuse_dequeue_forget(struct fuse_iqueue * fiq,unsigned int max,unsigned int * countp)1278 static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
1279 						    unsigned int max,
1280 						    unsigned int *countp)
1281 {
1282 	struct fuse_forget_link *head = fiq->forget_list_head.next;
1283 	struct fuse_forget_link **newhead = &head;
1284 	unsigned count;
1285 
1286 	for (count = 0; *newhead != NULL && count < max; count++)
1287 		newhead = &(*newhead)->next;
1288 
1289 	fiq->forget_list_head.next = *newhead;
1290 	*newhead = NULL;
1291 	if (fiq->forget_list_head.next == NULL)
1292 		fiq->forget_list_tail = &fiq->forget_list_head;
1293 
1294 	if (countp != NULL)
1295 		*countp = count;
1296 
1297 	return head;
1298 }
1299 
fuse_read_single_forget(struct fuse_iqueue * fiq,struct fuse_copy_state * cs,size_t nbytes)1300 static int fuse_read_single_forget(struct fuse_iqueue *fiq,
1301 				   struct fuse_copy_state *cs,
1302 				   size_t nbytes)
1303 __releases(fiq->lock)
1304 {
1305 	int err;
1306 	struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL);
1307 	struct fuse_forget_in arg = {
1308 		.nlookup = forget->forget_one.nlookup,
1309 	};
1310 	struct fuse_in_header ih = {
1311 		.opcode = FUSE_FORGET,
1312 		.nodeid = forget->forget_one.nodeid,
1313 		.unique = fuse_get_unique_locked(fiq),
1314 		.len = sizeof(ih) + sizeof(arg),
1315 	};
1316 
1317 	spin_unlock(&fiq->lock);
1318 	kfree(forget);
1319 	if (nbytes < ih.len)
1320 		return -EINVAL;
1321 
1322 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1323 	if (!err)
1324 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1325 	fuse_copy_finish(cs);
1326 
1327 	if (err)
1328 		return err;
1329 
1330 	return ih.len;
1331 }
1332 
fuse_read_batch_forget(struct fuse_iqueue * fiq,struct fuse_copy_state * cs,size_t nbytes)1333 static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
1334 				   struct fuse_copy_state *cs, size_t nbytes)
1335 __releases(fiq->lock)
1336 {
1337 	int err;
1338 	unsigned max_forgets;
1339 	unsigned count;
1340 	struct fuse_forget_link *head;
1341 	struct fuse_batch_forget_in arg = { .count = 0 };
1342 	struct fuse_in_header ih = {
1343 		.opcode = FUSE_BATCH_FORGET,
1344 		.unique = fuse_get_unique_locked(fiq),
1345 		.len = sizeof(ih) + sizeof(arg),
1346 	};
1347 
1348 	if (nbytes < ih.len) {
1349 		spin_unlock(&fiq->lock);
1350 		return -EINVAL;
1351 	}
1352 
1353 	max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1354 	head = fuse_dequeue_forget(fiq, max_forgets, &count);
1355 	spin_unlock(&fiq->lock);
1356 
1357 	arg.count = count;
1358 	ih.len += count * sizeof(struct fuse_forget_one);
1359 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1360 	if (!err)
1361 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1362 
1363 	while (head) {
1364 		struct fuse_forget_link *forget = head;
1365 
1366 		if (!err) {
1367 			err = fuse_copy_one(cs, &forget->forget_one,
1368 					    sizeof(forget->forget_one));
1369 		}
1370 		head = forget->next;
1371 		kfree(forget);
1372 	}
1373 
1374 	fuse_copy_finish(cs);
1375 
1376 	if (err)
1377 		return err;
1378 
1379 	return ih.len;
1380 }
1381 
fuse_read_forget(struct fuse_conn * fc,struct fuse_iqueue * fiq,struct fuse_copy_state * cs,size_t nbytes)1382 static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
1383 			    struct fuse_copy_state *cs,
1384 			    size_t nbytes)
1385 __releases(fiq->lock)
1386 {
1387 	if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
1388 		return fuse_read_single_forget(fiq, cs, nbytes);
1389 	else
1390 		return fuse_read_batch_forget(fiq, cs, nbytes);
1391 }
1392 
1393 /*
1394  * Read a single request into the userspace filesystem's buffer.  This
1395  * function waits until a request is available, then removes it from
1396  * the pending list and copies request data to userspace buffer.  If
1397  * no reply is needed (FORGET) or request has been aborted or there
1398  * was an error during the copying then it's finished by calling
1399  * fuse_request_end().  Otherwise add it to the processing list, and set
1400  * the 'sent' flag.
1401  */
fuse_dev_do_read(struct fuse_dev * fud,struct file * file,struct fuse_copy_state * cs,size_t nbytes)1402 static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
1403 				struct fuse_copy_state *cs, size_t nbytes)
1404 {
1405 	ssize_t err;
1406 	struct fuse_conn *fc = fud->fc;
1407 	struct fuse_iqueue *fiq = &fc->iq;
1408 	struct fuse_pqueue *fpq = &fud->pq;
1409 	struct fuse_req *req;
1410 	struct fuse_args *args;
1411 	unsigned reqsize;
1412 	unsigned int hash;
1413 
1414 	/*
1415 	 * Require sane minimum read buffer - that has capacity for fixed part
1416 	 * of any request header + negotiated max_write room for data.
1417 	 *
1418 	 * Historically libfuse reserves 4K for fixed header room, but e.g.
1419 	 * GlusterFS reserves only 80 bytes
1420 	 *
1421 	 *	= `sizeof(fuse_in_header) + sizeof(fuse_write_in)`
1422 	 *
1423 	 * which is the absolute minimum any sane filesystem should be using
1424 	 * for header room.
1425 	 */
1426 	if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER,
1427 			   sizeof(struct fuse_in_header) +
1428 			   sizeof(struct fuse_write_in) +
1429 			   fc->max_write))
1430 		return -EINVAL;
1431 
1432  restart:
1433 	for (;;) {
1434 		spin_lock(&fiq->lock);
1435 		if (!fiq->connected || request_pending(fiq))
1436 			break;
1437 		spin_unlock(&fiq->lock);
1438 
1439 		if (file->f_flags & O_NONBLOCK)
1440 			return -EAGAIN;
1441 		err = wait_event_interruptible_exclusive(fiq->waitq,
1442 				!fiq->connected || request_pending(fiq));
1443 		if (err)
1444 			return err;
1445 	}
1446 
1447 	if (!fiq->connected) {
1448 		err = fc->aborted ? -ECONNABORTED : -ENODEV;
1449 		goto err_unlock;
1450 	}
1451 
1452 	if (!list_empty(&fiq->interrupts)) {
1453 		req = list_entry(fiq->interrupts.next, struct fuse_req,
1454 				 intr_entry);
1455 		return fuse_read_interrupt(fiq, cs, nbytes, req);
1456 	}
1457 
1458 	if (forget_pending(fiq)) {
1459 		if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
1460 			return fuse_read_forget(fc, fiq, cs, nbytes);
1461 
1462 		if (fiq->forget_batch <= -8)
1463 			fiq->forget_batch = 16;
1464 	}
1465 
1466 	req = list_entry(fiq->pending.next, struct fuse_req, list);
1467 	clear_bit(FR_PENDING, &req->flags);
1468 	list_del_init(&req->list);
1469 	spin_unlock(&fiq->lock);
1470 
1471 	args = req->args;
1472 	reqsize = req->in.h.len;
1473 
1474 	/* If request is too large, reply with an error and restart the read */
1475 	if (nbytes < reqsize) {
1476 		req->out.h.error = -EIO;
1477 		/* SETXATTR is special, since it may contain too large data */
1478 		if (args->opcode == FUSE_SETXATTR)
1479 			req->out.h.error = -E2BIG;
1480 		fuse_request_end(req);
1481 		goto restart;
1482 	}
1483 	spin_lock(&fpq->lock);
1484 	/*
1485 	 *  Must not put request on fpq->io queue after having been shut down by
1486 	 *  fuse_abort_conn()
1487 	 */
1488 	if (!fpq->connected) {
1489 		req->out.h.error = err = -ECONNABORTED;
1490 		goto out_end;
1491 
1492 	}
1493 	list_add(&req->list, &fpq->io);
1494 	spin_unlock(&fpq->lock);
1495 	cs->req = req;
1496 	err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
1497 	if (!err)
1498 		err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
1499 				     (struct fuse_arg *) args->in_args, 0);
1500 	fuse_copy_finish(cs);
1501 	spin_lock(&fpq->lock);
1502 	clear_bit(FR_LOCKED, &req->flags);
1503 	if (!fpq->connected) {
1504 		err = fc->aborted ? -ECONNABORTED : -ENODEV;
1505 		goto out_end;
1506 	}
1507 	if (err) {
1508 		req->out.h.error = -EIO;
1509 		goto out_end;
1510 	}
1511 	if (!test_bit(FR_ISREPLY, &req->flags)) {
1512 		err = reqsize;
1513 		goto out_end;
1514 	}
1515 	hash = fuse_req_hash(req->in.h.unique);
1516 	list_move_tail(&req->list, &fpq->processing[hash]);
1517 	__fuse_get_request(req);
1518 	set_bit(FR_SENT, &req->flags);
1519 	spin_unlock(&fpq->lock);
1520 	/* matches barrier in request_wait_answer() */
1521 	smp_mb__after_atomic();
1522 	if (test_bit(FR_INTERRUPTED, &req->flags))
1523 		queue_interrupt(req);
1524 	fuse_put_request(req);
1525 
1526 	return reqsize;
1527 
1528 out_end:
1529 	if (!test_bit(FR_PRIVATE, &req->flags))
1530 		list_del_init(&req->list);
1531 	spin_unlock(&fpq->lock);
1532 	fuse_request_end(req);
1533 	return err;
1534 
1535  err_unlock:
1536 	spin_unlock(&fiq->lock);
1537 	return err;
1538 }
1539 
fuse_dev_open(struct inode * inode,struct file * file)1540 static int fuse_dev_open(struct inode *inode, struct file *file)
1541 {
1542 	/*
1543 	 * The fuse device's file's private_data is used to hold
1544 	 * the fuse_conn(ection) when it is mounted, and is used to
1545 	 * keep track of whether the file has been mounted already.
1546 	 */
1547 	file->private_data = NULL;
1548 	return 0;
1549 }
1550 
fuse_get_dev(struct file * file)1551 struct fuse_dev *fuse_get_dev(struct file *file)
1552 {
1553 	struct fuse_dev *fud = __fuse_get_dev(file);
1554 	int err;
1555 
1556 	if (likely(fud))
1557 		return fud;
1558 
1559 	err = wait_event_interruptible(fuse_dev_waitq,
1560 				       READ_ONCE(file->private_data) != FUSE_DEV_SYNC_INIT);
1561 	if (err)
1562 		return ERR_PTR(err);
1563 
1564 	fud = __fuse_get_dev(file);
1565 	if (!fud)
1566 		return ERR_PTR(-EPERM);
1567 
1568 	return fud;
1569 }
1570 
fuse_dev_read(struct kiocb * iocb,struct iov_iter * to)1571 static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1572 {
1573 	struct fuse_copy_state cs;
1574 	struct file *file = iocb->ki_filp;
1575 	struct fuse_dev *fud = fuse_get_dev(file);
1576 
1577 	if (IS_ERR(fud))
1578 		return PTR_ERR(fud);
1579 
1580 	if (!user_backed_iter(to))
1581 		return -EINVAL;
1582 
1583 	fuse_copy_init(&cs, true, to);
1584 
1585 	return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
1586 }
1587 
fuse_dev_splice_read(struct file * in,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)1588 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1589 				    struct pipe_inode_info *pipe,
1590 				    size_t len, unsigned int flags)
1591 {
1592 	int total, ret;
1593 	int page_nr = 0;
1594 	struct pipe_buffer *bufs;
1595 	struct fuse_copy_state cs;
1596 	struct fuse_dev *fud = fuse_get_dev(in);
1597 
1598 	if (IS_ERR(fud))
1599 		return PTR_ERR(fud);
1600 
1601 	bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
1602 			      GFP_KERNEL);
1603 	if (!bufs)
1604 		return -ENOMEM;
1605 
1606 	fuse_copy_init(&cs, true, NULL);
1607 	cs.pipebufs = bufs;
1608 	cs.pipe = pipe;
1609 	ret = fuse_dev_do_read(fud, in, &cs, len);
1610 	if (ret < 0)
1611 		goto out;
1612 
1613 	if (pipe_buf_usage(pipe) + cs.nr_segs > pipe->max_usage) {
1614 		ret = -EIO;
1615 		goto out;
1616 	}
1617 
1618 	for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
1619 		/*
1620 		 * Need to be careful about this.  Having buf->ops in module
1621 		 * code can Oops if the buffer persists after module unload.
1622 		 */
1623 		bufs[page_nr].ops = &nosteal_pipe_buf_ops;
1624 		bufs[page_nr].flags = 0;
1625 		ret = add_to_pipe(pipe, &bufs[page_nr++]);
1626 		if (unlikely(ret < 0))
1627 			break;
1628 	}
1629 	if (total)
1630 		ret = total;
1631 out:
1632 	for (; page_nr < cs.nr_segs; page_nr++)
1633 		put_page(bufs[page_nr].page);
1634 
1635 	kvfree(bufs);
1636 	return ret;
1637 }
1638 
fuse_notify_poll(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1639 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1640 			    struct fuse_copy_state *cs)
1641 {
1642 	struct fuse_notify_poll_wakeup_out outarg;
1643 	int err;
1644 
1645 	if (size != sizeof(outarg))
1646 		return -EINVAL;
1647 
1648 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1649 	if (err)
1650 		return err;
1651 
1652 	fuse_copy_finish(cs);
1653 	return fuse_notify_poll_wakeup(fc, &outarg);
1654 }
1655 
fuse_notify_inval_inode(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1656 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1657 				   struct fuse_copy_state *cs)
1658 {
1659 	struct fuse_notify_inval_inode_out outarg;
1660 	int err;
1661 
1662 	if (size != sizeof(outarg))
1663 		return -EINVAL;
1664 
1665 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1666 	if (err)
1667 		return err;
1668 	fuse_copy_finish(cs);
1669 
1670 	down_read(&fc->killsb);
1671 	err = fuse_reverse_inval_inode(fc, outarg.ino,
1672 				       outarg.off, outarg.len);
1673 	up_read(&fc->killsb);
1674 	return err;
1675 }
1676 
fuse_notify_inval_entry(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1677 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1678 				   struct fuse_copy_state *cs)
1679 {
1680 	struct fuse_notify_inval_entry_out outarg;
1681 	int err;
1682 	char *buf;
1683 	struct qstr name;
1684 
1685 	if (size < sizeof(outarg))
1686 		return -EINVAL;
1687 
1688 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1689 	if (err)
1690 		return err;
1691 
1692 	if (outarg.namelen > fc->name_max)
1693 		return -ENAMETOOLONG;
1694 
1695 	err = -EINVAL;
1696 	if (size != sizeof(outarg) + outarg.namelen + 1)
1697 		return -EINVAL;
1698 
1699 	buf = kzalloc(outarg.namelen + 1, GFP_KERNEL);
1700 	if (!buf)
1701 		return -ENOMEM;
1702 
1703 	name.name = buf;
1704 	name.len = outarg.namelen;
1705 	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1706 	if (err)
1707 		goto err;
1708 	fuse_copy_finish(cs);
1709 	buf[outarg.namelen] = 0;
1710 
1711 	down_read(&fc->killsb);
1712 	err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags);
1713 	up_read(&fc->killsb);
1714 err:
1715 	kfree(buf);
1716 	return err;
1717 }
1718 
fuse_notify_delete(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1719 static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1720 			      struct fuse_copy_state *cs)
1721 {
1722 	struct fuse_notify_delete_out outarg;
1723 	int err;
1724 	char *buf;
1725 	struct qstr name;
1726 
1727 	if (size < sizeof(outarg))
1728 		return -EINVAL;
1729 
1730 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1731 	if (err)
1732 		return err;
1733 
1734 	if (outarg.namelen > fc->name_max)
1735 		return -ENAMETOOLONG;
1736 
1737 	if (size != sizeof(outarg) + outarg.namelen + 1)
1738 		return -EINVAL;
1739 
1740 	buf = kzalloc(outarg.namelen + 1, GFP_KERNEL);
1741 	if (!buf)
1742 		return -ENOMEM;
1743 
1744 	name.name = buf;
1745 	name.len = outarg.namelen;
1746 	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1747 	if (err)
1748 		goto err;
1749 	fuse_copy_finish(cs);
1750 	buf[outarg.namelen] = 0;
1751 
1752 	down_read(&fc->killsb);
1753 	err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0);
1754 	up_read(&fc->killsb);
1755 err:
1756 	kfree(buf);
1757 	return err;
1758 }
1759 
fuse_notify_store(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1760 static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1761 			     struct fuse_copy_state *cs)
1762 {
1763 	struct fuse_notify_store_out outarg;
1764 	struct inode *inode;
1765 	struct address_space *mapping;
1766 	u64 nodeid;
1767 	int err;
1768 	pgoff_t index;
1769 	unsigned int offset;
1770 	unsigned int num;
1771 	loff_t file_size;
1772 	loff_t end;
1773 
1774 	if (size < sizeof(outarg))
1775 		return -EINVAL;
1776 
1777 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1778 	if (err)
1779 		return err;
1780 
1781 	if (size - sizeof(outarg) != outarg.size)
1782 		return -EINVAL;
1783 
1784 	nodeid = outarg.nodeid;
1785 
1786 	down_read(&fc->killsb);
1787 
1788 	err = -ENOENT;
1789 	inode = fuse_ilookup(fc, nodeid,  NULL);
1790 	if (!inode)
1791 		goto out_up_killsb;
1792 
1793 	mapping = inode->i_mapping;
1794 	index = outarg.offset >> PAGE_SHIFT;
1795 	offset = outarg.offset & ~PAGE_MASK;
1796 	file_size = i_size_read(inode);
1797 	end = outarg.offset + outarg.size;
1798 	if (end > file_size) {
1799 		file_size = end;
1800 		fuse_write_update_attr(inode, file_size, outarg.size);
1801 	}
1802 
1803 	num = outarg.size;
1804 	while (num) {
1805 		struct folio *folio;
1806 		unsigned int folio_offset;
1807 		unsigned int nr_bytes;
1808 		unsigned int nr_pages;
1809 
1810 		folio = filemap_grab_folio(mapping, index);
1811 		err = PTR_ERR(folio);
1812 		if (IS_ERR(folio))
1813 			goto out_iput;
1814 
1815 		folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset;
1816 		nr_bytes = min_t(unsigned, num, folio_size(folio) - folio_offset);
1817 		nr_pages = (offset + nr_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
1818 
1819 		err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0);
1820 		if (!folio_test_uptodate(folio) && !err && offset == 0 &&
1821 		    (nr_bytes == folio_size(folio) || file_size == end)) {
1822 			folio_zero_segment(folio, nr_bytes, folio_size(folio));
1823 			folio_mark_uptodate(folio);
1824 		}
1825 		folio_unlock(folio);
1826 		folio_put(folio);
1827 
1828 		if (err)
1829 			goto out_iput;
1830 
1831 		num -= nr_bytes;
1832 		offset = 0;
1833 		index += nr_pages;
1834 	}
1835 
1836 	err = 0;
1837 
1838 out_iput:
1839 	iput(inode);
1840 out_up_killsb:
1841 	up_read(&fc->killsb);
1842 	return err;
1843 }
1844 
1845 struct fuse_retrieve_args {
1846 	struct fuse_args_pages ap;
1847 	struct fuse_notify_retrieve_in inarg;
1848 };
1849 
fuse_retrieve_end(struct fuse_mount * fm,struct fuse_args * args,int error)1850 static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
1851 			      int error)
1852 {
1853 	struct fuse_retrieve_args *ra =
1854 		container_of(args, typeof(*ra), ap.args);
1855 
1856 	release_pages(ra->ap.folios, ra->ap.num_folios);
1857 	kfree(ra);
1858 }
1859 
fuse_retrieve(struct fuse_mount * fm,struct inode * inode,struct fuse_notify_retrieve_out * outarg)1860 static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
1861 			 struct fuse_notify_retrieve_out *outarg)
1862 {
1863 	int err;
1864 	struct address_space *mapping = inode->i_mapping;
1865 	pgoff_t index;
1866 	loff_t file_size;
1867 	unsigned int num;
1868 	unsigned int offset;
1869 	size_t total_len = 0;
1870 	unsigned int num_pages;
1871 	struct fuse_conn *fc = fm->fc;
1872 	struct fuse_retrieve_args *ra;
1873 	size_t args_size = sizeof(*ra);
1874 	struct fuse_args_pages *ap;
1875 	struct fuse_args *args;
1876 
1877 	offset = outarg->offset & ~PAGE_MASK;
1878 	file_size = i_size_read(inode);
1879 
1880 	num = min(outarg->size, fc->max_write);
1881 	if (outarg->offset > file_size)
1882 		num = 0;
1883 	else if (outarg->offset + num > file_size)
1884 		num = file_size - outarg->offset;
1885 
1886 	num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1887 	num_pages = min(num_pages, fc->max_pages);
1888 	num = min(num, num_pages << PAGE_SHIFT);
1889 
1890 	args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0]));
1891 
1892 	ra = kzalloc(args_size, GFP_KERNEL);
1893 	if (!ra)
1894 		return -ENOMEM;
1895 
1896 	ap = &ra->ap;
1897 	ap->folios = (void *) (ra + 1);
1898 	ap->descs = (void *) (ap->folios + num_pages);
1899 
1900 	args = &ap->args;
1901 	args->nodeid = outarg->nodeid;
1902 	args->opcode = FUSE_NOTIFY_REPLY;
1903 	args->in_numargs = 3;
1904 	args->in_pages = true;
1905 	args->end = fuse_retrieve_end;
1906 
1907 	index = outarg->offset >> PAGE_SHIFT;
1908 
1909 	while (num && ap->num_folios < num_pages) {
1910 		struct folio *folio;
1911 		unsigned int folio_offset;
1912 		unsigned int nr_bytes;
1913 		unsigned int nr_pages;
1914 
1915 		folio = filemap_get_folio(mapping, index);
1916 		if (IS_ERR(folio))
1917 			break;
1918 
1919 		folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset;
1920 		nr_bytes = min(folio_size(folio) - folio_offset, num);
1921 		nr_pages = (offset + nr_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
1922 
1923 		ap->folios[ap->num_folios] = folio;
1924 		ap->descs[ap->num_folios].offset = folio_offset;
1925 		ap->descs[ap->num_folios].length = nr_bytes;
1926 		ap->num_folios++;
1927 
1928 		offset = 0;
1929 		num -= nr_bytes;
1930 		total_len += nr_bytes;
1931 		index += nr_pages;
1932 	}
1933 	ra->inarg.offset = outarg->offset;
1934 	ra->inarg.size = total_len;
1935 	fuse_set_zero_arg0(args);
1936 	args->in_args[1].size = sizeof(ra->inarg);
1937 	args->in_args[1].value = &ra->inarg;
1938 	args->in_args[2].size = total_len;
1939 
1940 	err = fuse_simple_notify_reply(fm, args, outarg->notify_unique);
1941 	if (err)
1942 		fuse_retrieve_end(fm, args, err);
1943 
1944 	return err;
1945 }
1946 
fuse_notify_retrieve(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1947 static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1948 				struct fuse_copy_state *cs)
1949 {
1950 	struct fuse_notify_retrieve_out outarg;
1951 	struct fuse_mount *fm;
1952 	struct inode *inode;
1953 	u64 nodeid;
1954 	int err;
1955 
1956 	if (size != sizeof(outarg))
1957 		return -EINVAL;
1958 
1959 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1960 	if (err)
1961 		return err;
1962 
1963 	fuse_copy_finish(cs);
1964 
1965 	down_read(&fc->killsb);
1966 	err = -ENOENT;
1967 	nodeid = outarg.nodeid;
1968 
1969 	inode = fuse_ilookup(fc, nodeid, &fm);
1970 	if (inode) {
1971 		err = fuse_retrieve(fm, inode, &outarg);
1972 		iput(inode);
1973 	}
1974 	up_read(&fc->killsb);
1975 
1976 	return err;
1977 }
1978 
1979 /*
1980  * Resending all processing queue requests.
1981  *
1982  * During a FUSE daemon panics and failover, it is possible for some inflight
1983  * requests to be lost and never returned. As a result, applications awaiting
1984  * replies would become stuck forever. To address this, we can use notification
1985  * to trigger resending of these pending requests to the FUSE daemon, ensuring
1986  * they are properly processed again.
1987  *
1988  * Please note that this strategy is applicable only to idempotent requests or
1989  * if the FUSE daemon takes careful measures to avoid processing duplicated
1990  * non-idempotent requests.
1991  */
fuse_resend(struct fuse_conn * fc)1992 static void fuse_resend(struct fuse_conn *fc)
1993 {
1994 	struct fuse_dev *fud;
1995 	struct fuse_req *req, *next;
1996 	struct fuse_iqueue *fiq = &fc->iq;
1997 	LIST_HEAD(to_queue);
1998 	unsigned int i;
1999 
2000 	spin_lock(&fc->lock);
2001 	if (!fc->connected) {
2002 		spin_unlock(&fc->lock);
2003 		return;
2004 	}
2005 
2006 	list_for_each_entry(fud, &fc->devices, entry) {
2007 		struct fuse_pqueue *fpq = &fud->pq;
2008 
2009 		spin_lock(&fpq->lock);
2010 		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2011 			list_splice_tail_init(&fpq->processing[i], &to_queue);
2012 		spin_unlock(&fpq->lock);
2013 	}
2014 	spin_unlock(&fc->lock);
2015 
2016 	list_for_each_entry_safe(req, next, &to_queue, list) {
2017 		set_bit(FR_PENDING, &req->flags);
2018 		clear_bit(FR_SENT, &req->flags);
2019 		/* mark the request as resend request */
2020 		req->in.h.unique |= FUSE_UNIQUE_RESEND;
2021 	}
2022 
2023 	spin_lock(&fiq->lock);
2024 	if (!fiq->connected) {
2025 		spin_unlock(&fiq->lock);
2026 		list_for_each_entry(req, &to_queue, list)
2027 			clear_bit(FR_PENDING, &req->flags);
2028 		fuse_dev_end_requests(&to_queue);
2029 		return;
2030 	}
2031 	/* iq and pq requests are both oldest to newest */
2032 	list_splice(&to_queue, &fiq->pending);
2033 	fuse_dev_wake_and_unlock(fiq);
2034 }
2035 
fuse_notify_resend(struct fuse_conn * fc)2036 static int fuse_notify_resend(struct fuse_conn *fc)
2037 {
2038 	fuse_resend(fc);
2039 	return 0;
2040 }
2041 
2042 /*
2043  * Increments the fuse connection epoch.  This will result of dentries from
2044  * previous epochs to be invalidated.
2045  *
2046  * XXX optimization: add call to shrink_dcache_sb()?
2047  */
fuse_notify_inc_epoch(struct fuse_conn * fc)2048 static int fuse_notify_inc_epoch(struct fuse_conn *fc)
2049 {
2050 	atomic_inc(&fc->epoch);
2051 
2052 	return 0;
2053 }
2054 
fuse_notify_prune(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)2055 static int fuse_notify_prune(struct fuse_conn *fc, unsigned int size,
2056 			     struct fuse_copy_state *cs)
2057 {
2058 	struct fuse_notify_prune_out outarg;
2059 	const unsigned int batch = 512;
2060 	u64 *nodeids __free(kfree) = kmalloc(sizeof(u64) * batch, GFP_KERNEL);
2061 	unsigned int num, i;
2062 	int err;
2063 
2064 	if (!nodeids)
2065 		return -ENOMEM;
2066 
2067 	if (size < sizeof(outarg))
2068 		return -EINVAL;
2069 
2070 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
2071 	if (err)
2072 		return err;
2073 
2074 	if (size - sizeof(outarg) != outarg.count * sizeof(u64))
2075 		return -EINVAL;
2076 
2077 	for (; outarg.count; outarg.count -= num) {
2078 		num = min(batch, outarg.count);
2079 		err = fuse_copy_one(cs, nodeids, num * sizeof(u64));
2080 		if (err)
2081 			return err;
2082 
2083 		scoped_guard(rwsem_read, &fc->killsb) {
2084 			for (i = 0; i < num; i++)
2085 				fuse_try_prune_one_inode(fc, nodeids[i]);
2086 		}
2087 	}
2088 	return 0;
2089 }
2090 
fuse_notify(struct fuse_conn * fc,enum fuse_notify_code code,unsigned int size,struct fuse_copy_state * cs)2091 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
2092 		       unsigned int size, struct fuse_copy_state *cs)
2093 {
2094 	/* Don't try to move folios (yet) */
2095 	cs->move_folios = false;
2096 
2097 	switch (code) {
2098 	case FUSE_NOTIFY_POLL:
2099 		return fuse_notify_poll(fc, size, cs);
2100 
2101 	case FUSE_NOTIFY_INVAL_INODE:
2102 		return fuse_notify_inval_inode(fc, size, cs);
2103 
2104 	case FUSE_NOTIFY_INVAL_ENTRY:
2105 		return fuse_notify_inval_entry(fc, size, cs);
2106 
2107 	case FUSE_NOTIFY_STORE:
2108 		return fuse_notify_store(fc, size, cs);
2109 
2110 	case FUSE_NOTIFY_RETRIEVE:
2111 		return fuse_notify_retrieve(fc, size, cs);
2112 
2113 	case FUSE_NOTIFY_DELETE:
2114 		return fuse_notify_delete(fc, size, cs);
2115 
2116 	case FUSE_NOTIFY_RESEND:
2117 		return fuse_notify_resend(fc);
2118 
2119 	case FUSE_NOTIFY_INC_EPOCH:
2120 		return fuse_notify_inc_epoch(fc);
2121 
2122 	case FUSE_NOTIFY_PRUNE:
2123 		return fuse_notify_prune(fc, size, cs);
2124 
2125 	default:
2126 		return -EINVAL;
2127 	}
2128 }
2129 
2130 /* Look up request on processing list by unique ID */
fuse_request_find(struct fuse_pqueue * fpq,u64 unique)2131 struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique)
2132 {
2133 	unsigned int hash = fuse_req_hash(unique);
2134 	struct fuse_req *req;
2135 
2136 	list_for_each_entry(req, &fpq->processing[hash], list) {
2137 		if (req->in.h.unique == unique)
2138 			return req;
2139 	}
2140 	return NULL;
2141 }
2142 
fuse_copy_out_args(struct fuse_copy_state * cs,struct fuse_args * args,unsigned nbytes)2143 int fuse_copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
2144 		       unsigned nbytes)
2145 {
2146 
2147 	unsigned int reqsize = 0;
2148 
2149 	/*
2150 	 * Uring has all headers separated from args - args is payload only
2151 	 */
2152 	if (!cs->is_uring)
2153 		reqsize = sizeof(struct fuse_out_header);
2154 
2155 	reqsize += fuse_len_args(args->out_numargs, args->out_args);
2156 
2157 	if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar))
2158 		return -EINVAL;
2159 	else if (reqsize > nbytes) {
2160 		struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1];
2161 		unsigned diffsize = reqsize - nbytes;
2162 
2163 		if (diffsize > lastarg->size)
2164 			return -EINVAL;
2165 		lastarg->size -= diffsize;
2166 	}
2167 	return fuse_copy_args(cs, args->out_numargs, args->out_pages,
2168 			      args->out_args, args->page_zeroing);
2169 }
2170 
2171 /*
2172  * Write a single reply to a request.  First the header is copied from
2173  * the write buffer.  The request is then searched on the processing
2174  * list by the unique ID found in the header.  If found, then remove
2175  * it from the list and copy the rest of the buffer to the request.
2176  * The request is finished by calling fuse_request_end().
2177  */
fuse_dev_do_write(struct fuse_dev * fud,struct fuse_copy_state * cs,size_t nbytes)2178 static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
2179 				 struct fuse_copy_state *cs, size_t nbytes)
2180 {
2181 	int err;
2182 	struct fuse_conn *fc = fud->fc;
2183 	struct fuse_pqueue *fpq = &fud->pq;
2184 	struct fuse_req *req;
2185 	struct fuse_out_header oh;
2186 
2187 	err = -EINVAL;
2188 	if (nbytes < sizeof(struct fuse_out_header))
2189 		goto out;
2190 
2191 	err = fuse_copy_one(cs, &oh, sizeof(oh));
2192 	if (err)
2193 		goto copy_finish;
2194 
2195 	err = -EINVAL;
2196 	if (oh.len != nbytes)
2197 		goto copy_finish;
2198 
2199 	/*
2200 	 * Zero oh.unique indicates unsolicited notification message
2201 	 * and error contains notification code.
2202 	 */
2203 	if (!oh.unique) {
2204 		err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
2205 		goto copy_finish;
2206 	}
2207 
2208 	err = -EINVAL;
2209 	if (oh.error <= -512 || oh.error > 0)
2210 		goto copy_finish;
2211 
2212 	spin_lock(&fpq->lock);
2213 	req = NULL;
2214 	if (fpq->connected)
2215 		req = fuse_request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
2216 
2217 	err = -ENOENT;
2218 	if (!req) {
2219 		spin_unlock(&fpq->lock);
2220 		goto copy_finish;
2221 	}
2222 
2223 	/* Is it an interrupt reply ID? */
2224 	if (oh.unique & FUSE_INT_REQ_BIT) {
2225 		__fuse_get_request(req);
2226 		spin_unlock(&fpq->lock);
2227 
2228 		err = 0;
2229 		if (nbytes != sizeof(struct fuse_out_header))
2230 			err = -EINVAL;
2231 		else if (oh.error == -ENOSYS)
2232 			fc->no_interrupt = 1;
2233 		else if (oh.error == -EAGAIN)
2234 			err = queue_interrupt(req);
2235 
2236 		fuse_put_request(req);
2237 
2238 		goto copy_finish;
2239 	}
2240 
2241 	clear_bit(FR_SENT, &req->flags);
2242 	list_move(&req->list, &fpq->io);
2243 	req->out.h = oh;
2244 	set_bit(FR_LOCKED, &req->flags);
2245 	spin_unlock(&fpq->lock);
2246 	cs->req = req;
2247 	if (!req->args->page_replace)
2248 		cs->move_folios = false;
2249 
2250 	if (oh.error)
2251 		err = nbytes != sizeof(oh) ? -EINVAL : 0;
2252 	else
2253 		err = fuse_copy_out_args(cs, req->args, nbytes);
2254 	fuse_copy_finish(cs);
2255 
2256 	spin_lock(&fpq->lock);
2257 	clear_bit(FR_LOCKED, &req->flags);
2258 	if (!fpq->connected)
2259 		err = -ENOENT;
2260 	else if (err)
2261 		req->out.h.error = -EIO;
2262 	if (!test_bit(FR_PRIVATE, &req->flags))
2263 		list_del_init(&req->list);
2264 	spin_unlock(&fpq->lock);
2265 
2266 	fuse_request_end(req);
2267 out:
2268 	return err ? err : nbytes;
2269 
2270 copy_finish:
2271 	fuse_copy_finish(cs);
2272 	goto out;
2273 }
2274 
fuse_dev_write(struct kiocb * iocb,struct iov_iter * from)2275 static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
2276 {
2277 	struct fuse_copy_state cs;
2278 	struct fuse_dev *fud = __fuse_get_dev(iocb->ki_filp);
2279 
2280 	if (!fud)
2281 		return -EPERM;
2282 
2283 	if (!user_backed_iter(from))
2284 		return -EINVAL;
2285 
2286 	fuse_copy_init(&cs, false, from);
2287 
2288 	return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
2289 }
2290 
fuse_dev_splice_write(struct pipe_inode_info * pipe,struct file * out,loff_t * ppos,size_t len,unsigned int flags)2291 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
2292 				     struct file *out, loff_t *ppos,
2293 				     size_t len, unsigned int flags)
2294 {
2295 	unsigned int head, tail, count;
2296 	unsigned nbuf;
2297 	unsigned idx;
2298 	struct pipe_buffer *bufs;
2299 	struct fuse_copy_state cs;
2300 	struct fuse_dev *fud = __fuse_get_dev(out);
2301 	size_t rem;
2302 	ssize_t ret;
2303 
2304 	if (!fud)
2305 		return -EPERM;
2306 
2307 	pipe_lock(pipe);
2308 
2309 	head = pipe->head;
2310 	tail = pipe->tail;
2311 	count = pipe_occupancy(head, tail);
2312 
2313 	bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
2314 	if (!bufs) {
2315 		pipe_unlock(pipe);
2316 		return -ENOMEM;
2317 	}
2318 
2319 	nbuf = 0;
2320 	rem = 0;
2321 	for (idx = tail; !pipe_empty(head, idx) && rem < len; idx++)
2322 		rem += pipe_buf(pipe, idx)->len;
2323 
2324 	ret = -EINVAL;
2325 	if (rem < len)
2326 		goto out_free;
2327 
2328 	rem = len;
2329 	while (rem) {
2330 		struct pipe_buffer *ibuf;
2331 		struct pipe_buffer *obuf;
2332 
2333 		if (WARN_ON(nbuf >= count || pipe_empty(head, tail)))
2334 			goto out_free;
2335 
2336 		ibuf = pipe_buf(pipe, tail);
2337 		obuf = &bufs[nbuf];
2338 
2339 		if (rem >= ibuf->len) {
2340 			*obuf = *ibuf;
2341 			ibuf->ops = NULL;
2342 			tail++;
2343 			pipe->tail = tail;
2344 		} else {
2345 			if (!pipe_buf_get(pipe, ibuf))
2346 				goto out_free;
2347 
2348 			*obuf = *ibuf;
2349 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
2350 			obuf->len = rem;
2351 			ibuf->offset += obuf->len;
2352 			ibuf->len -= obuf->len;
2353 		}
2354 		nbuf++;
2355 		rem -= obuf->len;
2356 	}
2357 	pipe_unlock(pipe);
2358 
2359 	fuse_copy_init(&cs, false, NULL);
2360 	cs.pipebufs = bufs;
2361 	cs.nr_segs = nbuf;
2362 	cs.pipe = pipe;
2363 
2364 	if (flags & SPLICE_F_MOVE)
2365 		cs.move_folios = true;
2366 
2367 	ret = fuse_dev_do_write(fud, &cs, len);
2368 
2369 	pipe_lock(pipe);
2370 out_free:
2371 	for (idx = 0; idx < nbuf; idx++) {
2372 		struct pipe_buffer *buf = &bufs[idx];
2373 
2374 		if (buf->ops)
2375 			pipe_buf_release(pipe, buf);
2376 	}
2377 	pipe_unlock(pipe);
2378 
2379 	kvfree(bufs);
2380 	return ret;
2381 }
2382 
fuse_dev_poll(struct file * file,poll_table * wait)2383 static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
2384 {
2385 	__poll_t mask = EPOLLOUT | EPOLLWRNORM;
2386 	struct fuse_iqueue *fiq;
2387 	struct fuse_dev *fud = fuse_get_dev(file);
2388 
2389 	if (IS_ERR(fud))
2390 		return EPOLLERR;
2391 
2392 	fiq = &fud->fc->iq;
2393 	poll_wait(file, &fiq->waitq, wait);
2394 
2395 	spin_lock(&fiq->lock);
2396 	if (!fiq->connected)
2397 		mask = EPOLLERR;
2398 	else if (request_pending(fiq))
2399 		mask |= EPOLLIN | EPOLLRDNORM;
2400 	spin_unlock(&fiq->lock);
2401 
2402 	return mask;
2403 }
2404 
2405 /* Abort all requests on the given list (pending or processing) */
fuse_dev_end_requests(struct list_head * head)2406 void fuse_dev_end_requests(struct list_head *head)
2407 {
2408 	while (!list_empty(head)) {
2409 		struct fuse_req *req;
2410 		req = list_entry(head->next, struct fuse_req, list);
2411 		req->out.h.error = -ECONNABORTED;
2412 		clear_bit(FR_SENT, &req->flags);
2413 		list_del_init(&req->list);
2414 		fuse_request_end(req);
2415 	}
2416 }
2417 
end_polls(struct fuse_conn * fc)2418 static void end_polls(struct fuse_conn *fc)
2419 {
2420 	struct rb_node *p;
2421 
2422 	p = rb_first(&fc->polled_files);
2423 
2424 	while (p) {
2425 		struct fuse_file *ff;
2426 		ff = rb_entry(p, struct fuse_file, polled_node);
2427 		wake_up_interruptible_all(&ff->poll_wait);
2428 
2429 		p = rb_next(p);
2430 	}
2431 }
2432 
2433 /*
2434  * Abort all requests.
2435  *
2436  * Emergency exit in case of a malicious or accidental deadlock, or just a hung
2437  * filesystem.
2438  *
2439  * The same effect is usually achievable through killing the filesystem daemon
2440  * and all users of the filesystem.  The exception is the combination of an
2441  * asynchronous request and the tricky deadlock (see
2442  * Documentation/filesystems/fuse/fuse.rst).
2443  *
2444  * Aborting requests under I/O goes as follows: 1: Separate out unlocked
2445  * requests, they should be finished off immediately.  Locked requests will be
2446  * finished after unlock; see unlock_request(). 2: Finish off the unlocked
2447  * requests.  It is possible that some request will finish before we can.  This
2448  * is OK, the request will in that case be removed from the list before we touch
2449  * it.
2450  */
fuse_abort_conn(struct fuse_conn * fc)2451 void fuse_abort_conn(struct fuse_conn *fc)
2452 {
2453 	struct fuse_iqueue *fiq = &fc->iq;
2454 
2455 	spin_lock(&fc->lock);
2456 	if (fc->connected) {
2457 		struct fuse_dev *fud;
2458 		struct fuse_req *req, *next;
2459 		LIST_HEAD(to_end);
2460 		unsigned int i;
2461 
2462 		if (fc->timeout.req_timeout)
2463 			cancel_delayed_work(&fc->timeout.work);
2464 
2465 		/* Background queuing checks fc->connected under bg_lock */
2466 		spin_lock(&fc->bg_lock);
2467 		fc->connected = 0;
2468 		spin_unlock(&fc->bg_lock);
2469 
2470 		fuse_set_initialized(fc);
2471 		list_for_each_entry(fud, &fc->devices, entry) {
2472 			struct fuse_pqueue *fpq = &fud->pq;
2473 
2474 			spin_lock(&fpq->lock);
2475 			fpq->connected = 0;
2476 			list_for_each_entry_safe(req, next, &fpq->io, list) {
2477 				req->out.h.error = -ECONNABORTED;
2478 				spin_lock(&req->waitq.lock);
2479 				set_bit(FR_ABORTED, &req->flags);
2480 				if (!test_bit(FR_LOCKED, &req->flags)) {
2481 					set_bit(FR_PRIVATE, &req->flags);
2482 					__fuse_get_request(req);
2483 					list_move(&req->list, &to_end);
2484 				}
2485 				spin_unlock(&req->waitq.lock);
2486 			}
2487 			for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2488 				list_splice_tail_init(&fpq->processing[i],
2489 						      &to_end);
2490 			spin_unlock(&fpq->lock);
2491 		}
2492 		spin_lock(&fc->bg_lock);
2493 		fc->blocked = 0;
2494 		fc->max_background = UINT_MAX;
2495 		flush_bg_queue(fc);
2496 		spin_unlock(&fc->bg_lock);
2497 
2498 		spin_lock(&fiq->lock);
2499 		fiq->connected = 0;
2500 		list_for_each_entry(req, &fiq->pending, list)
2501 			clear_bit(FR_PENDING, &req->flags);
2502 		list_splice_tail_init(&fiq->pending, &to_end);
2503 		while (forget_pending(fiq))
2504 			kfree(fuse_dequeue_forget(fiq, 1, NULL));
2505 		wake_up_all(&fiq->waitq);
2506 		spin_unlock(&fiq->lock);
2507 		kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
2508 		end_polls(fc);
2509 		wake_up_all(&fc->blocked_waitq);
2510 		spin_unlock(&fc->lock);
2511 
2512 		fuse_dev_end_requests(&to_end);
2513 
2514 		/*
2515 		 * fc->lock must not be taken to avoid conflicts with io-uring
2516 		 * locks
2517 		 */
2518 		fuse_uring_abort(fc);
2519 	} else {
2520 		spin_unlock(&fc->lock);
2521 	}
2522 }
2523 EXPORT_SYMBOL_GPL(fuse_abort_conn);
2524 
fuse_wait_aborted(struct fuse_conn * fc)2525 void fuse_wait_aborted(struct fuse_conn *fc)
2526 {
2527 	/* matches implicit memory barrier in fuse_drop_waiting() */
2528 	smp_mb();
2529 	wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
2530 
2531 	fuse_uring_wait_stopped_queues(fc);
2532 }
2533 
fuse_dev_release(struct inode * inode,struct file * file)2534 int fuse_dev_release(struct inode *inode, struct file *file)
2535 {
2536 	struct fuse_dev *fud = __fuse_get_dev(file);
2537 
2538 	if (fud) {
2539 		struct fuse_conn *fc = fud->fc;
2540 		struct fuse_pqueue *fpq = &fud->pq;
2541 		LIST_HEAD(to_end);
2542 		unsigned int i;
2543 
2544 		spin_lock(&fpq->lock);
2545 		WARN_ON(!list_empty(&fpq->io));
2546 		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2547 			list_splice_init(&fpq->processing[i], &to_end);
2548 		spin_unlock(&fpq->lock);
2549 
2550 		fuse_dev_end_requests(&to_end);
2551 
2552 		/* Are we the last open device? */
2553 		if (atomic_dec_and_test(&fc->dev_count)) {
2554 			WARN_ON(fc->iq.fasync != NULL);
2555 			fuse_abort_conn(fc);
2556 		}
2557 		fuse_dev_free(fud);
2558 	}
2559 	return 0;
2560 }
2561 EXPORT_SYMBOL_GPL(fuse_dev_release);
2562 
fuse_dev_fasync(int fd,struct file * file,int on)2563 static int fuse_dev_fasync(int fd, struct file *file, int on)
2564 {
2565 	struct fuse_dev *fud = fuse_get_dev(file);
2566 
2567 	if (IS_ERR(fud))
2568 		return PTR_ERR(fud);
2569 
2570 	/* No locking - fasync_helper does its own locking */
2571 	return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
2572 }
2573 
fuse_device_clone(struct fuse_conn * fc,struct file * new)2574 static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
2575 {
2576 	struct fuse_dev *fud;
2577 
2578 	if (__fuse_get_dev(new))
2579 		return -EINVAL;
2580 
2581 	fud = fuse_dev_alloc_install(fc);
2582 	if (!fud)
2583 		return -ENOMEM;
2584 
2585 	new->private_data = fud;
2586 	atomic_inc(&fc->dev_count);
2587 
2588 	return 0;
2589 }
2590 
fuse_dev_ioctl_clone(struct file * file,__u32 __user * argp)2591 static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
2592 {
2593 	int res;
2594 	int oldfd;
2595 	struct fuse_dev *fud = NULL;
2596 
2597 	if (get_user(oldfd, argp))
2598 		return -EFAULT;
2599 
2600 	CLASS(fd, f)(oldfd);
2601 	if (fd_empty(f))
2602 		return -EINVAL;
2603 
2604 	/*
2605 	 * Check against file->f_op because CUSE
2606 	 * uses the same ioctl handler.
2607 	 */
2608 	if (fd_file(f)->f_op == file->f_op)
2609 		fud = __fuse_get_dev(fd_file(f));
2610 
2611 	res = -EINVAL;
2612 	if (fud) {
2613 		mutex_lock(&fuse_mutex);
2614 		res = fuse_device_clone(fud->fc, file);
2615 		mutex_unlock(&fuse_mutex);
2616 	}
2617 
2618 	return res;
2619 }
2620 
fuse_dev_ioctl_backing_open(struct file * file,struct fuse_backing_map __user * argp)2621 static long fuse_dev_ioctl_backing_open(struct file *file,
2622 					struct fuse_backing_map __user *argp)
2623 {
2624 	struct fuse_dev *fud = fuse_get_dev(file);
2625 	struct fuse_backing_map map;
2626 
2627 	if (IS_ERR(fud))
2628 		return PTR_ERR(fud);
2629 
2630 	if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
2631 		return -EOPNOTSUPP;
2632 
2633 	if (copy_from_user(&map, argp, sizeof(map)))
2634 		return -EFAULT;
2635 
2636 	return fuse_backing_open(fud->fc, &map);
2637 }
2638 
fuse_dev_ioctl_backing_close(struct file * file,__u32 __user * argp)2639 static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
2640 {
2641 	struct fuse_dev *fud = fuse_get_dev(file);
2642 	int backing_id;
2643 
2644 	if (IS_ERR(fud))
2645 		return PTR_ERR(fud);
2646 
2647 	if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
2648 		return -EOPNOTSUPP;
2649 
2650 	if (get_user(backing_id, argp))
2651 		return -EFAULT;
2652 
2653 	return fuse_backing_close(fud->fc, backing_id);
2654 }
2655 
fuse_dev_ioctl_sync_init(struct file * file)2656 static long fuse_dev_ioctl_sync_init(struct file *file)
2657 {
2658 	int err = -EINVAL;
2659 
2660 	mutex_lock(&fuse_mutex);
2661 	if (!__fuse_get_dev(file)) {
2662 		WRITE_ONCE(file->private_data, FUSE_DEV_SYNC_INIT);
2663 		err = 0;
2664 	}
2665 	mutex_unlock(&fuse_mutex);
2666 	return err;
2667 }
2668 
fuse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)2669 static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2670 			   unsigned long arg)
2671 {
2672 	void __user *argp = (void __user *)arg;
2673 
2674 	switch (cmd) {
2675 	case FUSE_DEV_IOC_CLONE:
2676 		return fuse_dev_ioctl_clone(file, argp);
2677 
2678 	case FUSE_DEV_IOC_BACKING_OPEN:
2679 		return fuse_dev_ioctl_backing_open(file, argp);
2680 
2681 	case FUSE_DEV_IOC_BACKING_CLOSE:
2682 		return fuse_dev_ioctl_backing_close(file, argp);
2683 
2684 	case FUSE_DEV_IOC_SYNC_INIT:
2685 		return fuse_dev_ioctl_sync_init(file);
2686 
2687 	default:
2688 		return -ENOTTY;
2689 	}
2690 }
2691 
2692 #ifdef CONFIG_PROC_FS
fuse_dev_show_fdinfo(struct seq_file * seq,struct file * file)2693 static void fuse_dev_show_fdinfo(struct seq_file *seq, struct file *file)
2694 {
2695 	struct fuse_dev *fud = __fuse_get_dev(file);
2696 	if (!fud)
2697 		return;
2698 
2699 	seq_printf(seq, "fuse_connection:\t%u\n", fud->fc->dev);
2700 }
2701 #endif
2702 
2703 const struct file_operations fuse_dev_operations = {
2704 	.owner		= THIS_MODULE,
2705 	.open		= fuse_dev_open,
2706 	.read_iter	= fuse_dev_read,
2707 	.splice_read	= fuse_dev_splice_read,
2708 	.write_iter	= fuse_dev_write,
2709 	.splice_write	= fuse_dev_splice_write,
2710 	.poll		= fuse_dev_poll,
2711 	.release	= fuse_dev_release,
2712 	.fasync		= fuse_dev_fasync,
2713 	.unlocked_ioctl = fuse_dev_ioctl,
2714 	.compat_ioctl   = compat_ptr_ioctl,
2715 #ifdef CONFIG_FUSE_IO_URING
2716 	.uring_cmd	= fuse_uring_cmd,
2717 #endif
2718 #ifdef CONFIG_PROC_FS
2719 	.show_fdinfo	= fuse_dev_show_fdinfo,
2720 #endif
2721 };
2722 EXPORT_SYMBOL_GPL(fuse_dev_operations);
2723 
2724 static struct miscdevice fuse_miscdevice = {
2725 	.minor = FUSE_MINOR,
2726 	.name  = "fuse",
2727 	.fops = &fuse_dev_operations,
2728 };
2729 
fuse_dev_init(void)2730 int __init fuse_dev_init(void)
2731 {
2732 	int err = -ENOMEM;
2733 	fuse_req_cachep = kmem_cache_create("fuse_request",
2734 					    sizeof(struct fuse_req),
2735 					    0, 0, NULL);
2736 	if (!fuse_req_cachep)
2737 		goto out;
2738 
2739 	err = misc_register(&fuse_miscdevice);
2740 	if (err)
2741 		goto out_cache_clean;
2742 
2743 	return 0;
2744 
2745  out_cache_clean:
2746 	kmem_cache_destroy(fuse_req_cachep);
2747  out:
2748 	return err;
2749 }
2750 
fuse_dev_cleanup(void)2751 void fuse_dev_cleanup(void)
2752 {
2753 	misc_deregister(&fuse_miscdevice);
2754 	kmem_cache_destroy(fuse_req_cachep);
2755 }
2756