xref: /linux/fs/fuse/dev.c (revision 6a61b70b43c9c4cbc7314bf6c8b5ba8b0d6e1e7b)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/sched/signal.h>
15 #include <linux/uio.h>
16 #include <linux/miscdevice.h>
17 #include <linux/pagemap.h>
18 #include <linux/file.h>
19 #include <linux/slab.h>
20 #include <linux/pipe_fs_i.h>
21 #include <linux/swap.h>
22 #include <linux/splice.h>
23 #include <linux/sched.h>
24 
25 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
26 MODULE_ALIAS("devname:fuse");
27 
28 static struct kmem_cache *fuse_req_cachep;
29 
30 static struct fuse_dev *fuse_get_dev(struct file *file)
31 {
32 	/*
33 	 * Lockless access is OK, because file->private data is set
34 	 * once during mount and is valid until the file is released.
35 	 */
36 	return READ_ONCE(file->private_data);
37 }
38 
39 static void fuse_request_init(struct fuse_req *req, struct page **pages,
40 			      struct fuse_page_desc *page_descs,
41 			      unsigned npages)
42 {
43 	memset(req, 0, sizeof(*req));
44 	memset(pages, 0, sizeof(*pages) * npages);
45 	memset(page_descs, 0, sizeof(*page_descs) * npages);
46 	INIT_LIST_HEAD(&req->list);
47 	INIT_LIST_HEAD(&req->intr_entry);
48 	init_waitqueue_head(&req->waitq);
49 	refcount_set(&req->count, 1);
50 	req->pages = pages;
51 	req->page_descs = page_descs;
52 	req->max_pages = npages;
53 	__set_bit(FR_PENDING, &req->flags);
54 }
55 
56 static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
57 {
58 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
59 	if (req) {
60 		struct page **pages;
61 		struct fuse_page_desc *page_descs;
62 
63 		if (npages <= FUSE_REQ_INLINE_PAGES) {
64 			pages = req->inline_pages;
65 			page_descs = req->inline_page_descs;
66 		} else {
67 			pages = kmalloc(sizeof(struct page *) * npages, flags);
68 			page_descs = kmalloc(sizeof(struct fuse_page_desc) *
69 					     npages, flags);
70 		}
71 
72 		if (!pages || !page_descs) {
73 			kfree(pages);
74 			kfree(page_descs);
75 			kmem_cache_free(fuse_req_cachep, req);
76 			return NULL;
77 		}
78 
79 		fuse_request_init(req, pages, page_descs, npages);
80 	}
81 	return req;
82 }
83 
84 struct fuse_req *fuse_request_alloc(unsigned npages)
85 {
86 	return __fuse_request_alloc(npages, GFP_KERNEL);
87 }
88 EXPORT_SYMBOL_GPL(fuse_request_alloc);
89 
90 struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
91 {
92 	return __fuse_request_alloc(npages, GFP_NOFS);
93 }
94 
95 void fuse_request_free(struct fuse_req *req)
96 {
97 	if (req->pages != req->inline_pages) {
98 		kfree(req->pages);
99 		kfree(req->page_descs);
100 	}
101 	kmem_cache_free(fuse_req_cachep, req);
102 }
103 
104 void __fuse_get_request(struct fuse_req *req)
105 {
106 	refcount_inc(&req->count);
107 }
108 
109 /* Must be called with > 1 refcount */
110 static void __fuse_put_request(struct fuse_req *req)
111 {
112 	refcount_dec(&req->count);
113 }
114 
115 void fuse_set_initialized(struct fuse_conn *fc)
116 {
117 	/* Make sure stores before this are seen on another CPU */
118 	smp_wmb();
119 	fc->initialized = 1;
120 }
121 
122 static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
123 {
124 	return !fc->initialized || (for_background && fc->blocked);
125 }
126 
127 static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
128 				       bool for_background)
129 {
130 	struct fuse_req *req;
131 	int err;
132 	atomic_inc(&fc->num_waiting);
133 
134 	if (fuse_block_alloc(fc, for_background)) {
135 		err = -EINTR;
136 		if (wait_event_killable_exclusive(fc->blocked_waitq,
137 				!fuse_block_alloc(fc, for_background)))
138 			goto out;
139 	}
140 	/* Matches smp_wmb() in fuse_set_initialized() */
141 	smp_rmb();
142 
143 	err = -ENOTCONN;
144 	if (!fc->connected)
145 		goto out;
146 
147 	err = -ECONNREFUSED;
148 	if (fc->conn_error)
149 		goto out;
150 
151 	req = fuse_request_alloc(npages);
152 	err = -ENOMEM;
153 	if (!req) {
154 		if (for_background)
155 			wake_up(&fc->blocked_waitq);
156 		goto out;
157 	}
158 
159 	req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
160 	req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
161 	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
162 
163 	__set_bit(FR_WAITING, &req->flags);
164 	if (for_background)
165 		__set_bit(FR_BACKGROUND, &req->flags);
166 
167 	if (unlikely(req->in.h.uid == ((uid_t)-1) ||
168 		     req->in.h.gid == ((gid_t)-1))) {
169 		fuse_put_request(fc, req);
170 		return ERR_PTR(-EOVERFLOW);
171 	}
172 	return req;
173 
174  out:
175 	atomic_dec(&fc->num_waiting);
176 	return ERR_PTR(err);
177 }
178 
179 struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
180 {
181 	return __fuse_get_req(fc, npages, false);
182 }
183 EXPORT_SYMBOL_GPL(fuse_get_req);
184 
185 struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
186 					     unsigned npages)
187 {
188 	return __fuse_get_req(fc, npages, true);
189 }
190 EXPORT_SYMBOL_GPL(fuse_get_req_for_background);
191 
192 /*
193  * Return request in fuse_file->reserved_req.  However that may
194  * currently be in use.  If that is the case, wait for it to become
195  * available.
196  */
197 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
198 					 struct file *file)
199 {
200 	struct fuse_req *req = NULL;
201 	struct fuse_file *ff = file->private_data;
202 
203 	do {
204 		wait_event(fc->reserved_req_waitq, ff->reserved_req);
205 		spin_lock(&fc->lock);
206 		if (ff->reserved_req) {
207 			req = ff->reserved_req;
208 			ff->reserved_req = NULL;
209 			req->stolen_file = get_file(file);
210 		}
211 		spin_unlock(&fc->lock);
212 	} while (!req);
213 
214 	return req;
215 }
216 
217 /*
218  * Put stolen request back into fuse_file->reserved_req
219  */
220 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
221 {
222 	struct file *file = req->stolen_file;
223 	struct fuse_file *ff = file->private_data;
224 
225 	spin_lock(&fc->lock);
226 	fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
227 	BUG_ON(ff->reserved_req);
228 	ff->reserved_req = req;
229 	wake_up_all(&fc->reserved_req_waitq);
230 	spin_unlock(&fc->lock);
231 	fput(file);
232 }
233 
234 /*
235  * Gets a requests for a file operation, always succeeds
236  *
237  * This is used for sending the FLUSH request, which must get to
238  * userspace, due to POSIX locks which may need to be unlocked.
239  *
240  * If allocation fails due to OOM, use the reserved request in
241  * fuse_file.
242  *
243  * This is very unlikely to deadlock accidentally, since the
244  * filesystem should not have it's own file open.  If deadlock is
245  * intentional, it can still be broken by "aborting" the filesystem.
246  */
247 struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
248 					     struct file *file)
249 {
250 	struct fuse_req *req;
251 
252 	atomic_inc(&fc->num_waiting);
253 	wait_event(fc->blocked_waitq, fc->initialized);
254 	/* Matches smp_wmb() in fuse_set_initialized() */
255 	smp_rmb();
256 	req = fuse_request_alloc(0);
257 	if (!req)
258 		req = get_reserved_req(fc, file);
259 
260 	req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
261 	req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
262 	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
263 
264 	__set_bit(FR_WAITING, &req->flags);
265 	__clear_bit(FR_BACKGROUND, &req->flags);
266 	return req;
267 }
268 
269 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
270 {
271 	if (refcount_dec_and_test(&req->count)) {
272 		if (test_bit(FR_BACKGROUND, &req->flags)) {
273 			/*
274 			 * We get here in the unlikely case that a background
275 			 * request was allocated but not sent
276 			 */
277 			spin_lock(&fc->lock);
278 			if (!fc->blocked)
279 				wake_up(&fc->blocked_waitq);
280 			spin_unlock(&fc->lock);
281 		}
282 
283 		if (test_bit(FR_WAITING, &req->flags)) {
284 			__clear_bit(FR_WAITING, &req->flags);
285 			atomic_dec(&fc->num_waiting);
286 		}
287 
288 		if (req->stolen_file)
289 			put_reserved_req(fc, req);
290 		else
291 			fuse_request_free(req);
292 	}
293 }
294 EXPORT_SYMBOL_GPL(fuse_put_request);
295 
296 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
297 {
298 	unsigned nbytes = 0;
299 	unsigned i;
300 
301 	for (i = 0; i < numargs; i++)
302 		nbytes += args[i].size;
303 
304 	return nbytes;
305 }
306 
307 static u64 fuse_get_unique(struct fuse_iqueue *fiq)
308 {
309 	return ++fiq->reqctr;
310 }
311 
312 static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
313 {
314 	req->in.h.len = sizeof(struct fuse_in_header) +
315 		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
316 	list_add_tail(&req->list, &fiq->pending);
317 	wake_up_locked(&fiq->waitq);
318 	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
319 }
320 
321 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
322 		       u64 nodeid, u64 nlookup)
323 {
324 	struct fuse_iqueue *fiq = &fc->iq;
325 
326 	forget->forget_one.nodeid = nodeid;
327 	forget->forget_one.nlookup = nlookup;
328 
329 	spin_lock(&fiq->waitq.lock);
330 	if (fiq->connected) {
331 		fiq->forget_list_tail->next = forget;
332 		fiq->forget_list_tail = forget;
333 		wake_up_locked(&fiq->waitq);
334 		kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
335 	} else {
336 		kfree(forget);
337 	}
338 	spin_unlock(&fiq->waitq.lock);
339 }
340 
341 static void flush_bg_queue(struct fuse_conn *fc)
342 {
343 	while (fc->active_background < fc->max_background &&
344 	       !list_empty(&fc->bg_queue)) {
345 		struct fuse_req *req;
346 		struct fuse_iqueue *fiq = &fc->iq;
347 
348 		req = list_entry(fc->bg_queue.next, struct fuse_req, list);
349 		list_del(&req->list);
350 		fc->active_background++;
351 		spin_lock(&fiq->waitq.lock);
352 		req->in.h.unique = fuse_get_unique(fiq);
353 		queue_request(fiq, req);
354 		spin_unlock(&fiq->waitq.lock);
355 	}
356 }
357 
358 /*
359  * This function is called when a request is finished.  Either a reply
360  * has arrived or it was aborted (and not yet sent) or some error
361  * occurred during communication with userspace, or the device file
362  * was closed.  The requester thread is woken up (if still waiting),
363  * the 'end' callback is called if given, else the reference to the
364  * request is released
365  */
366 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
367 {
368 	struct fuse_iqueue *fiq = &fc->iq;
369 
370 	if (test_and_set_bit(FR_FINISHED, &req->flags))
371 		return;
372 
373 	spin_lock(&fiq->waitq.lock);
374 	list_del_init(&req->intr_entry);
375 	spin_unlock(&fiq->waitq.lock);
376 	WARN_ON(test_bit(FR_PENDING, &req->flags));
377 	WARN_ON(test_bit(FR_SENT, &req->flags));
378 	if (test_bit(FR_BACKGROUND, &req->flags)) {
379 		spin_lock(&fc->lock);
380 		clear_bit(FR_BACKGROUND, &req->flags);
381 		if (fc->num_background == fc->max_background)
382 			fc->blocked = 0;
383 
384 		/* Wake up next waiter, if any */
385 		if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
386 			wake_up(&fc->blocked_waitq);
387 
388 		if (fc->num_background == fc->congestion_threshold && fc->sb) {
389 			clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
390 			clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
391 		}
392 		fc->num_background--;
393 		fc->active_background--;
394 		flush_bg_queue(fc);
395 		spin_unlock(&fc->lock);
396 	}
397 	wake_up(&req->waitq);
398 	if (req->end)
399 		req->end(fc, req);
400 	fuse_put_request(fc, req);
401 }
402 
403 static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
404 {
405 	spin_lock(&fiq->waitq.lock);
406 	if (test_bit(FR_FINISHED, &req->flags)) {
407 		spin_unlock(&fiq->waitq.lock);
408 		return;
409 	}
410 	if (list_empty(&req->intr_entry)) {
411 		list_add_tail(&req->intr_entry, &fiq->interrupts);
412 		wake_up_locked(&fiq->waitq);
413 	}
414 	spin_unlock(&fiq->waitq.lock);
415 	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
416 }
417 
418 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
419 {
420 	struct fuse_iqueue *fiq = &fc->iq;
421 	int err;
422 
423 	if (!fc->no_interrupt) {
424 		/* Any signal may interrupt this */
425 		err = wait_event_interruptible(req->waitq,
426 					test_bit(FR_FINISHED, &req->flags));
427 		if (!err)
428 			return;
429 
430 		set_bit(FR_INTERRUPTED, &req->flags);
431 		/* matches barrier in fuse_dev_do_read() */
432 		smp_mb__after_atomic();
433 		if (test_bit(FR_SENT, &req->flags))
434 			queue_interrupt(fiq, req);
435 	}
436 
437 	if (!test_bit(FR_FORCE, &req->flags)) {
438 		/* Only fatal signals may interrupt this */
439 		err = wait_event_killable(req->waitq,
440 					test_bit(FR_FINISHED, &req->flags));
441 		if (!err)
442 			return;
443 
444 		spin_lock(&fiq->waitq.lock);
445 		/* Request is not yet in userspace, bail out */
446 		if (test_bit(FR_PENDING, &req->flags)) {
447 			list_del(&req->list);
448 			spin_unlock(&fiq->waitq.lock);
449 			__fuse_put_request(req);
450 			req->out.h.error = -EINTR;
451 			return;
452 		}
453 		spin_unlock(&fiq->waitq.lock);
454 	}
455 
456 	/*
457 	 * Either request is already in userspace, or it was forced.
458 	 * Wait it out.
459 	 */
460 	wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
461 }
462 
463 static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
464 {
465 	struct fuse_iqueue *fiq = &fc->iq;
466 
467 	BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
468 	spin_lock(&fiq->waitq.lock);
469 	if (!fiq->connected) {
470 		spin_unlock(&fiq->waitq.lock);
471 		req->out.h.error = -ENOTCONN;
472 	} else {
473 		req->in.h.unique = fuse_get_unique(fiq);
474 		queue_request(fiq, req);
475 		/* acquire extra reference, since request is still needed
476 		   after request_end() */
477 		__fuse_get_request(req);
478 		spin_unlock(&fiq->waitq.lock);
479 
480 		request_wait_answer(fc, req);
481 		/* Pairs with smp_wmb() in request_end() */
482 		smp_rmb();
483 	}
484 }
485 
486 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
487 {
488 	__set_bit(FR_ISREPLY, &req->flags);
489 	if (!test_bit(FR_WAITING, &req->flags)) {
490 		__set_bit(FR_WAITING, &req->flags);
491 		atomic_inc(&fc->num_waiting);
492 	}
493 	__fuse_request_send(fc, req);
494 }
495 EXPORT_SYMBOL_GPL(fuse_request_send);
496 
497 static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
498 {
499 	if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS)
500 		args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE;
501 
502 	if (fc->minor < 9) {
503 		switch (args->in.h.opcode) {
504 		case FUSE_LOOKUP:
505 		case FUSE_CREATE:
506 		case FUSE_MKNOD:
507 		case FUSE_MKDIR:
508 		case FUSE_SYMLINK:
509 		case FUSE_LINK:
510 			args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
511 			break;
512 		case FUSE_GETATTR:
513 		case FUSE_SETATTR:
514 			args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
515 			break;
516 		}
517 	}
518 	if (fc->minor < 12) {
519 		switch (args->in.h.opcode) {
520 		case FUSE_CREATE:
521 			args->in.args[0].size = sizeof(struct fuse_open_in);
522 			break;
523 		case FUSE_MKNOD:
524 			args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
525 			break;
526 		}
527 	}
528 }
529 
530 ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
531 {
532 	struct fuse_req *req;
533 	ssize_t ret;
534 
535 	req = fuse_get_req(fc, 0);
536 	if (IS_ERR(req))
537 		return PTR_ERR(req);
538 
539 	/* Needs to be done after fuse_get_req() so that fc->minor is valid */
540 	fuse_adjust_compat(fc, args);
541 
542 	req->in.h.opcode = args->in.h.opcode;
543 	req->in.h.nodeid = args->in.h.nodeid;
544 	req->in.numargs = args->in.numargs;
545 	memcpy(req->in.args, args->in.args,
546 	       args->in.numargs * sizeof(struct fuse_in_arg));
547 	req->out.argvar = args->out.argvar;
548 	req->out.numargs = args->out.numargs;
549 	memcpy(req->out.args, args->out.args,
550 	       args->out.numargs * sizeof(struct fuse_arg));
551 	fuse_request_send(fc, req);
552 	ret = req->out.h.error;
553 	if (!ret && args->out.argvar) {
554 		BUG_ON(args->out.numargs != 1);
555 		ret = req->out.args[0].size;
556 	}
557 	fuse_put_request(fc, req);
558 
559 	return ret;
560 }
561 
562 /*
563  * Called under fc->lock
564  *
565  * fc->connected must have been checked previously
566  */
567 void fuse_request_send_background_locked(struct fuse_conn *fc,
568 					 struct fuse_req *req)
569 {
570 	BUG_ON(!test_bit(FR_BACKGROUND, &req->flags));
571 	if (!test_bit(FR_WAITING, &req->flags)) {
572 		__set_bit(FR_WAITING, &req->flags);
573 		atomic_inc(&fc->num_waiting);
574 	}
575 	__set_bit(FR_ISREPLY, &req->flags);
576 	fc->num_background++;
577 	if (fc->num_background == fc->max_background)
578 		fc->blocked = 1;
579 	if (fc->num_background == fc->congestion_threshold && fc->sb) {
580 		set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
581 		set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
582 	}
583 	list_add_tail(&req->list, &fc->bg_queue);
584 	flush_bg_queue(fc);
585 }
586 
587 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
588 {
589 	BUG_ON(!req->end);
590 	spin_lock(&fc->lock);
591 	if (fc->connected) {
592 		fuse_request_send_background_locked(fc, req);
593 		spin_unlock(&fc->lock);
594 	} else {
595 		spin_unlock(&fc->lock);
596 		req->out.h.error = -ENOTCONN;
597 		req->end(fc, req);
598 		fuse_put_request(fc, req);
599 	}
600 }
601 EXPORT_SYMBOL_GPL(fuse_request_send_background);
602 
603 static int fuse_request_send_notify_reply(struct fuse_conn *fc,
604 					  struct fuse_req *req, u64 unique)
605 {
606 	int err = -ENODEV;
607 	struct fuse_iqueue *fiq = &fc->iq;
608 
609 	__clear_bit(FR_ISREPLY, &req->flags);
610 	req->in.h.unique = unique;
611 	spin_lock(&fiq->waitq.lock);
612 	if (fiq->connected) {
613 		queue_request(fiq, req);
614 		err = 0;
615 	}
616 	spin_unlock(&fiq->waitq.lock);
617 
618 	return err;
619 }
620 
621 void fuse_force_forget(struct file *file, u64 nodeid)
622 {
623 	struct inode *inode = file_inode(file);
624 	struct fuse_conn *fc = get_fuse_conn(inode);
625 	struct fuse_req *req;
626 	struct fuse_forget_in inarg;
627 
628 	memset(&inarg, 0, sizeof(inarg));
629 	inarg.nlookup = 1;
630 	req = fuse_get_req_nofail_nopages(fc, file);
631 	req->in.h.opcode = FUSE_FORGET;
632 	req->in.h.nodeid = nodeid;
633 	req->in.numargs = 1;
634 	req->in.args[0].size = sizeof(inarg);
635 	req->in.args[0].value = &inarg;
636 	__clear_bit(FR_ISREPLY, &req->flags);
637 	__fuse_request_send(fc, req);
638 	/* ignore errors */
639 	fuse_put_request(fc, req);
640 }
641 
642 /*
643  * Lock the request.  Up to the next unlock_request() there mustn't be
644  * anything that could cause a page-fault.  If the request was already
645  * aborted bail out.
646  */
647 static int lock_request(struct fuse_req *req)
648 {
649 	int err = 0;
650 	if (req) {
651 		spin_lock(&req->waitq.lock);
652 		if (test_bit(FR_ABORTED, &req->flags))
653 			err = -ENOENT;
654 		else
655 			set_bit(FR_LOCKED, &req->flags);
656 		spin_unlock(&req->waitq.lock);
657 	}
658 	return err;
659 }
660 
661 /*
662  * Unlock request.  If it was aborted while locked, caller is responsible
663  * for unlocking and ending the request.
664  */
665 static int unlock_request(struct fuse_req *req)
666 {
667 	int err = 0;
668 	if (req) {
669 		spin_lock(&req->waitq.lock);
670 		if (test_bit(FR_ABORTED, &req->flags))
671 			err = -ENOENT;
672 		else
673 			clear_bit(FR_LOCKED, &req->flags);
674 		spin_unlock(&req->waitq.lock);
675 	}
676 	return err;
677 }
678 
679 struct fuse_copy_state {
680 	int write;
681 	struct fuse_req *req;
682 	struct iov_iter *iter;
683 	struct pipe_buffer *pipebufs;
684 	struct pipe_buffer *currbuf;
685 	struct pipe_inode_info *pipe;
686 	unsigned long nr_segs;
687 	struct page *pg;
688 	unsigned len;
689 	unsigned offset;
690 	unsigned move_pages:1;
691 };
692 
693 static void fuse_copy_init(struct fuse_copy_state *cs, int write,
694 			   struct iov_iter *iter)
695 {
696 	memset(cs, 0, sizeof(*cs));
697 	cs->write = write;
698 	cs->iter = iter;
699 }
700 
701 /* Unmap and put previous page of userspace buffer */
702 static void fuse_copy_finish(struct fuse_copy_state *cs)
703 {
704 	if (cs->currbuf) {
705 		struct pipe_buffer *buf = cs->currbuf;
706 
707 		if (cs->write)
708 			buf->len = PAGE_SIZE - cs->len;
709 		cs->currbuf = NULL;
710 	} else if (cs->pg) {
711 		if (cs->write) {
712 			flush_dcache_page(cs->pg);
713 			set_page_dirty_lock(cs->pg);
714 		}
715 		put_page(cs->pg);
716 	}
717 	cs->pg = NULL;
718 }
719 
720 /*
721  * Get another pagefull of userspace buffer, and map it to kernel
722  * address space, and lock request
723  */
724 static int fuse_copy_fill(struct fuse_copy_state *cs)
725 {
726 	struct page *page;
727 	int err;
728 
729 	err = unlock_request(cs->req);
730 	if (err)
731 		return err;
732 
733 	fuse_copy_finish(cs);
734 	if (cs->pipebufs) {
735 		struct pipe_buffer *buf = cs->pipebufs;
736 
737 		if (!cs->write) {
738 			err = pipe_buf_confirm(cs->pipe, buf);
739 			if (err)
740 				return err;
741 
742 			BUG_ON(!cs->nr_segs);
743 			cs->currbuf = buf;
744 			cs->pg = buf->page;
745 			cs->offset = buf->offset;
746 			cs->len = buf->len;
747 			cs->pipebufs++;
748 			cs->nr_segs--;
749 		} else {
750 			if (cs->nr_segs == cs->pipe->buffers)
751 				return -EIO;
752 
753 			page = alloc_page(GFP_HIGHUSER);
754 			if (!page)
755 				return -ENOMEM;
756 
757 			buf->page = page;
758 			buf->offset = 0;
759 			buf->len = 0;
760 
761 			cs->currbuf = buf;
762 			cs->pg = page;
763 			cs->offset = 0;
764 			cs->len = PAGE_SIZE;
765 			cs->pipebufs++;
766 			cs->nr_segs++;
767 		}
768 	} else {
769 		size_t off;
770 		err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off);
771 		if (err < 0)
772 			return err;
773 		BUG_ON(!err);
774 		cs->len = err;
775 		cs->offset = off;
776 		cs->pg = page;
777 		iov_iter_advance(cs->iter, err);
778 	}
779 
780 	return lock_request(cs->req);
781 }
782 
783 /* Do as much copy to/from userspace buffer as we can */
784 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
785 {
786 	unsigned ncpy = min(*size, cs->len);
787 	if (val) {
788 		void *pgaddr = kmap_atomic(cs->pg);
789 		void *buf = pgaddr + cs->offset;
790 
791 		if (cs->write)
792 			memcpy(buf, *val, ncpy);
793 		else
794 			memcpy(*val, buf, ncpy);
795 
796 		kunmap_atomic(pgaddr);
797 		*val += ncpy;
798 	}
799 	*size -= ncpy;
800 	cs->len -= ncpy;
801 	cs->offset += ncpy;
802 	return ncpy;
803 }
804 
805 static int fuse_check_page(struct page *page)
806 {
807 	if (page_mapcount(page) ||
808 	    page->mapping != NULL ||
809 	    page_count(page) != 1 ||
810 	    (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
811 	     ~(1 << PG_locked |
812 	       1 << PG_referenced |
813 	       1 << PG_uptodate |
814 	       1 << PG_lru |
815 	       1 << PG_active |
816 	       1 << PG_reclaim))) {
817 		printk(KERN_WARNING "fuse: trying to steal weird page\n");
818 		printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
819 		return 1;
820 	}
821 	return 0;
822 }
823 
824 static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
825 {
826 	int err;
827 	struct page *oldpage = *pagep;
828 	struct page *newpage;
829 	struct pipe_buffer *buf = cs->pipebufs;
830 
831 	err = unlock_request(cs->req);
832 	if (err)
833 		return err;
834 
835 	fuse_copy_finish(cs);
836 
837 	err = pipe_buf_confirm(cs->pipe, buf);
838 	if (err)
839 		return err;
840 
841 	BUG_ON(!cs->nr_segs);
842 	cs->currbuf = buf;
843 	cs->len = buf->len;
844 	cs->pipebufs++;
845 	cs->nr_segs--;
846 
847 	if (cs->len != PAGE_SIZE)
848 		goto out_fallback;
849 
850 	if (pipe_buf_steal(cs->pipe, buf) != 0)
851 		goto out_fallback;
852 
853 	newpage = buf->page;
854 
855 	if (!PageUptodate(newpage))
856 		SetPageUptodate(newpage);
857 
858 	ClearPageMappedToDisk(newpage);
859 
860 	if (fuse_check_page(newpage) != 0)
861 		goto out_fallback_unlock;
862 
863 	/*
864 	 * This is a new and locked page, it shouldn't be mapped or
865 	 * have any special flags on it
866 	 */
867 	if (WARN_ON(page_mapped(oldpage)))
868 		goto out_fallback_unlock;
869 	if (WARN_ON(page_has_private(oldpage)))
870 		goto out_fallback_unlock;
871 	if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
872 		goto out_fallback_unlock;
873 	if (WARN_ON(PageMlocked(oldpage)))
874 		goto out_fallback_unlock;
875 
876 	err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
877 	if (err) {
878 		unlock_page(newpage);
879 		return err;
880 	}
881 
882 	get_page(newpage);
883 
884 	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
885 		lru_cache_add_file(newpage);
886 
887 	err = 0;
888 	spin_lock(&cs->req->waitq.lock);
889 	if (test_bit(FR_ABORTED, &cs->req->flags))
890 		err = -ENOENT;
891 	else
892 		*pagep = newpage;
893 	spin_unlock(&cs->req->waitq.lock);
894 
895 	if (err) {
896 		unlock_page(newpage);
897 		put_page(newpage);
898 		return err;
899 	}
900 
901 	unlock_page(oldpage);
902 	put_page(oldpage);
903 	cs->len = 0;
904 
905 	return 0;
906 
907 out_fallback_unlock:
908 	unlock_page(newpage);
909 out_fallback:
910 	cs->pg = buf->page;
911 	cs->offset = buf->offset;
912 
913 	err = lock_request(cs->req);
914 	if (err)
915 		return err;
916 
917 	return 1;
918 }
919 
920 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
921 			 unsigned offset, unsigned count)
922 {
923 	struct pipe_buffer *buf;
924 	int err;
925 
926 	if (cs->nr_segs == cs->pipe->buffers)
927 		return -EIO;
928 
929 	err = unlock_request(cs->req);
930 	if (err)
931 		return err;
932 
933 	fuse_copy_finish(cs);
934 
935 	buf = cs->pipebufs;
936 	get_page(page);
937 	buf->page = page;
938 	buf->offset = offset;
939 	buf->len = count;
940 
941 	cs->pipebufs++;
942 	cs->nr_segs++;
943 	cs->len = 0;
944 
945 	return 0;
946 }
947 
948 /*
949  * Copy a page in the request to/from the userspace buffer.  Must be
950  * done atomically
951  */
952 static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
953 			  unsigned offset, unsigned count, int zeroing)
954 {
955 	int err;
956 	struct page *page = *pagep;
957 
958 	if (page && zeroing && count < PAGE_SIZE)
959 		clear_highpage(page);
960 
961 	while (count) {
962 		if (cs->write && cs->pipebufs && page) {
963 			return fuse_ref_page(cs, page, offset, count);
964 		} else if (!cs->len) {
965 			if (cs->move_pages && page &&
966 			    offset == 0 && count == PAGE_SIZE) {
967 				err = fuse_try_move_page(cs, pagep);
968 				if (err <= 0)
969 					return err;
970 			} else {
971 				err = fuse_copy_fill(cs);
972 				if (err)
973 					return err;
974 			}
975 		}
976 		if (page) {
977 			void *mapaddr = kmap_atomic(page);
978 			void *buf = mapaddr + offset;
979 			offset += fuse_copy_do(cs, &buf, &count);
980 			kunmap_atomic(mapaddr);
981 		} else
982 			offset += fuse_copy_do(cs, NULL, &count);
983 	}
984 	if (page && !cs->write)
985 		flush_dcache_page(page);
986 	return 0;
987 }
988 
989 /* Copy pages in the request to/from userspace buffer */
990 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
991 			   int zeroing)
992 {
993 	unsigned i;
994 	struct fuse_req *req = cs->req;
995 
996 	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
997 		int err;
998 		unsigned offset = req->page_descs[i].offset;
999 		unsigned count = min(nbytes, req->page_descs[i].length);
1000 
1001 		err = fuse_copy_page(cs, &req->pages[i], offset, count,
1002 				     zeroing);
1003 		if (err)
1004 			return err;
1005 
1006 		nbytes -= count;
1007 	}
1008 	return 0;
1009 }
1010 
1011 /* Copy a single argument in the request to/from userspace buffer */
1012 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
1013 {
1014 	while (size) {
1015 		if (!cs->len) {
1016 			int err = fuse_copy_fill(cs);
1017 			if (err)
1018 				return err;
1019 		}
1020 		fuse_copy_do(cs, &val, &size);
1021 	}
1022 	return 0;
1023 }
1024 
1025 /* Copy request arguments to/from userspace buffer */
1026 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
1027 			  unsigned argpages, struct fuse_arg *args,
1028 			  int zeroing)
1029 {
1030 	int err = 0;
1031 	unsigned i;
1032 
1033 	for (i = 0; !err && i < numargs; i++)  {
1034 		struct fuse_arg *arg = &args[i];
1035 		if (i == numargs - 1 && argpages)
1036 			err = fuse_copy_pages(cs, arg->size, zeroing);
1037 		else
1038 			err = fuse_copy_one(cs, arg->value, arg->size);
1039 	}
1040 	return err;
1041 }
1042 
1043 static int forget_pending(struct fuse_iqueue *fiq)
1044 {
1045 	return fiq->forget_list_head.next != NULL;
1046 }
1047 
1048 static int request_pending(struct fuse_iqueue *fiq)
1049 {
1050 	return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
1051 		forget_pending(fiq);
1052 }
1053 
1054 /*
1055  * Transfer an interrupt request to userspace
1056  *
1057  * Unlike other requests this is assembled on demand, without a need
1058  * to allocate a separate fuse_req structure.
1059  *
1060  * Called with fiq->waitq.lock held, releases it
1061  */
1062 static int fuse_read_interrupt(struct fuse_iqueue *fiq,
1063 			       struct fuse_copy_state *cs,
1064 			       size_t nbytes, struct fuse_req *req)
1065 __releases(fiq->waitq.lock)
1066 {
1067 	struct fuse_in_header ih;
1068 	struct fuse_interrupt_in arg;
1069 	unsigned reqsize = sizeof(ih) + sizeof(arg);
1070 	int err;
1071 
1072 	list_del_init(&req->intr_entry);
1073 	req->intr_unique = fuse_get_unique(fiq);
1074 	memset(&ih, 0, sizeof(ih));
1075 	memset(&arg, 0, sizeof(arg));
1076 	ih.len = reqsize;
1077 	ih.opcode = FUSE_INTERRUPT;
1078 	ih.unique = req->intr_unique;
1079 	arg.unique = req->in.h.unique;
1080 
1081 	spin_unlock(&fiq->waitq.lock);
1082 	if (nbytes < reqsize)
1083 		return -EINVAL;
1084 
1085 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1086 	if (!err)
1087 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1088 	fuse_copy_finish(cs);
1089 
1090 	return err ? err : reqsize;
1091 }
1092 
1093 static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
1094 					       unsigned max,
1095 					       unsigned *countp)
1096 {
1097 	struct fuse_forget_link *head = fiq->forget_list_head.next;
1098 	struct fuse_forget_link **newhead = &head;
1099 	unsigned count;
1100 
1101 	for (count = 0; *newhead != NULL && count < max; count++)
1102 		newhead = &(*newhead)->next;
1103 
1104 	fiq->forget_list_head.next = *newhead;
1105 	*newhead = NULL;
1106 	if (fiq->forget_list_head.next == NULL)
1107 		fiq->forget_list_tail = &fiq->forget_list_head;
1108 
1109 	if (countp != NULL)
1110 		*countp = count;
1111 
1112 	return head;
1113 }
1114 
1115 static int fuse_read_single_forget(struct fuse_iqueue *fiq,
1116 				   struct fuse_copy_state *cs,
1117 				   size_t nbytes)
1118 __releases(fiq->waitq.lock)
1119 {
1120 	int err;
1121 	struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL);
1122 	struct fuse_forget_in arg = {
1123 		.nlookup = forget->forget_one.nlookup,
1124 	};
1125 	struct fuse_in_header ih = {
1126 		.opcode = FUSE_FORGET,
1127 		.nodeid = forget->forget_one.nodeid,
1128 		.unique = fuse_get_unique(fiq),
1129 		.len = sizeof(ih) + sizeof(arg),
1130 	};
1131 
1132 	spin_unlock(&fiq->waitq.lock);
1133 	kfree(forget);
1134 	if (nbytes < ih.len)
1135 		return -EINVAL;
1136 
1137 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1138 	if (!err)
1139 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1140 	fuse_copy_finish(cs);
1141 
1142 	if (err)
1143 		return err;
1144 
1145 	return ih.len;
1146 }
1147 
1148 static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
1149 				   struct fuse_copy_state *cs, size_t nbytes)
1150 __releases(fiq->waitq.lock)
1151 {
1152 	int err;
1153 	unsigned max_forgets;
1154 	unsigned count;
1155 	struct fuse_forget_link *head;
1156 	struct fuse_batch_forget_in arg = { .count = 0 };
1157 	struct fuse_in_header ih = {
1158 		.opcode = FUSE_BATCH_FORGET,
1159 		.unique = fuse_get_unique(fiq),
1160 		.len = sizeof(ih) + sizeof(arg),
1161 	};
1162 
1163 	if (nbytes < ih.len) {
1164 		spin_unlock(&fiq->waitq.lock);
1165 		return -EINVAL;
1166 	}
1167 
1168 	max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1169 	head = dequeue_forget(fiq, max_forgets, &count);
1170 	spin_unlock(&fiq->waitq.lock);
1171 
1172 	arg.count = count;
1173 	ih.len += count * sizeof(struct fuse_forget_one);
1174 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1175 	if (!err)
1176 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1177 
1178 	while (head) {
1179 		struct fuse_forget_link *forget = head;
1180 
1181 		if (!err) {
1182 			err = fuse_copy_one(cs, &forget->forget_one,
1183 					    sizeof(forget->forget_one));
1184 		}
1185 		head = forget->next;
1186 		kfree(forget);
1187 	}
1188 
1189 	fuse_copy_finish(cs);
1190 
1191 	if (err)
1192 		return err;
1193 
1194 	return ih.len;
1195 }
1196 
1197 static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
1198 			    struct fuse_copy_state *cs,
1199 			    size_t nbytes)
1200 __releases(fiq->waitq.lock)
1201 {
1202 	if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
1203 		return fuse_read_single_forget(fiq, cs, nbytes);
1204 	else
1205 		return fuse_read_batch_forget(fiq, cs, nbytes);
1206 }
1207 
1208 /*
1209  * Read a single request into the userspace filesystem's buffer.  This
1210  * function waits until a request is available, then removes it from
1211  * the pending list and copies request data to userspace buffer.  If
1212  * no reply is needed (FORGET) or request has been aborted or there
1213  * was an error during the copying then it's finished by calling
1214  * request_end().  Otherwise add it to the processing list, and set
1215  * the 'sent' flag.
1216  */
1217 static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
1218 				struct fuse_copy_state *cs, size_t nbytes)
1219 {
1220 	ssize_t err;
1221 	struct fuse_conn *fc = fud->fc;
1222 	struct fuse_iqueue *fiq = &fc->iq;
1223 	struct fuse_pqueue *fpq = &fud->pq;
1224 	struct fuse_req *req;
1225 	struct fuse_in *in;
1226 	unsigned reqsize;
1227 
1228  restart:
1229 	spin_lock(&fiq->waitq.lock);
1230 	err = -EAGAIN;
1231 	if ((file->f_flags & O_NONBLOCK) && fiq->connected &&
1232 	    !request_pending(fiq))
1233 		goto err_unlock;
1234 
1235 	err = wait_event_interruptible_exclusive_locked(fiq->waitq,
1236 				!fiq->connected || request_pending(fiq));
1237 	if (err)
1238 		goto err_unlock;
1239 
1240 	if (!fiq->connected) {
1241 		err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
1242 		goto err_unlock;
1243 	}
1244 
1245 	if (!list_empty(&fiq->interrupts)) {
1246 		req = list_entry(fiq->interrupts.next, struct fuse_req,
1247 				 intr_entry);
1248 		return fuse_read_interrupt(fiq, cs, nbytes, req);
1249 	}
1250 
1251 	if (forget_pending(fiq)) {
1252 		if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
1253 			return fuse_read_forget(fc, fiq, cs, nbytes);
1254 
1255 		if (fiq->forget_batch <= -8)
1256 			fiq->forget_batch = 16;
1257 	}
1258 
1259 	req = list_entry(fiq->pending.next, struct fuse_req, list);
1260 	clear_bit(FR_PENDING, &req->flags);
1261 	list_del_init(&req->list);
1262 	spin_unlock(&fiq->waitq.lock);
1263 
1264 	in = &req->in;
1265 	reqsize = in->h.len;
1266 
1267 	/* If request is too large, reply with an error and restart the read */
1268 	if (nbytes < reqsize) {
1269 		req->out.h.error = -EIO;
1270 		/* SETXATTR is special, since it may contain too large data */
1271 		if (in->h.opcode == FUSE_SETXATTR)
1272 			req->out.h.error = -E2BIG;
1273 		request_end(fc, req);
1274 		goto restart;
1275 	}
1276 	spin_lock(&fpq->lock);
1277 	list_add(&req->list, &fpq->io);
1278 	spin_unlock(&fpq->lock);
1279 	cs->req = req;
1280 	err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1281 	if (!err)
1282 		err = fuse_copy_args(cs, in->numargs, in->argpages,
1283 				     (struct fuse_arg *) in->args, 0);
1284 	fuse_copy_finish(cs);
1285 	spin_lock(&fpq->lock);
1286 	clear_bit(FR_LOCKED, &req->flags);
1287 	if (!fpq->connected) {
1288 		err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
1289 		goto out_end;
1290 	}
1291 	if (err) {
1292 		req->out.h.error = -EIO;
1293 		goto out_end;
1294 	}
1295 	if (!test_bit(FR_ISREPLY, &req->flags)) {
1296 		err = reqsize;
1297 		goto out_end;
1298 	}
1299 	list_move_tail(&req->list, &fpq->processing);
1300 	spin_unlock(&fpq->lock);
1301 	set_bit(FR_SENT, &req->flags);
1302 	/* matches barrier in request_wait_answer() */
1303 	smp_mb__after_atomic();
1304 	if (test_bit(FR_INTERRUPTED, &req->flags))
1305 		queue_interrupt(fiq, req);
1306 
1307 	return reqsize;
1308 
1309 out_end:
1310 	if (!test_bit(FR_PRIVATE, &req->flags))
1311 		list_del_init(&req->list);
1312 	spin_unlock(&fpq->lock);
1313 	request_end(fc, req);
1314 	return err;
1315 
1316  err_unlock:
1317 	spin_unlock(&fiq->waitq.lock);
1318 	return err;
1319 }
1320 
1321 static int fuse_dev_open(struct inode *inode, struct file *file)
1322 {
1323 	/*
1324 	 * The fuse device's file's private_data is used to hold
1325 	 * the fuse_conn(ection) when it is mounted, and is used to
1326 	 * keep track of whether the file has been mounted already.
1327 	 */
1328 	file->private_data = NULL;
1329 	return 0;
1330 }
1331 
1332 static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1333 {
1334 	struct fuse_copy_state cs;
1335 	struct file *file = iocb->ki_filp;
1336 	struct fuse_dev *fud = fuse_get_dev(file);
1337 
1338 	if (!fud)
1339 		return -EPERM;
1340 
1341 	if (!iter_is_iovec(to))
1342 		return -EINVAL;
1343 
1344 	fuse_copy_init(&cs, 1, to);
1345 
1346 	return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
1347 }
1348 
1349 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1350 				    struct pipe_inode_info *pipe,
1351 				    size_t len, unsigned int flags)
1352 {
1353 	int total, ret;
1354 	int page_nr = 0;
1355 	struct pipe_buffer *bufs;
1356 	struct fuse_copy_state cs;
1357 	struct fuse_dev *fud = fuse_get_dev(in);
1358 
1359 	if (!fud)
1360 		return -EPERM;
1361 
1362 	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1363 	if (!bufs)
1364 		return -ENOMEM;
1365 
1366 	fuse_copy_init(&cs, 1, NULL);
1367 	cs.pipebufs = bufs;
1368 	cs.pipe = pipe;
1369 	ret = fuse_dev_do_read(fud, in, &cs, len);
1370 	if (ret < 0)
1371 		goto out;
1372 
1373 	if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1374 		ret = -EIO;
1375 		goto out;
1376 	}
1377 
1378 	for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
1379 		/*
1380 		 * Need to be careful about this.  Having buf->ops in module
1381 		 * code can Oops if the buffer persists after module unload.
1382 		 */
1383 		bufs[page_nr].ops = &nosteal_pipe_buf_ops;
1384 		bufs[page_nr].flags = 0;
1385 		ret = add_to_pipe(pipe, &bufs[page_nr++]);
1386 		if (unlikely(ret < 0))
1387 			break;
1388 	}
1389 	if (total)
1390 		ret = total;
1391 out:
1392 	for (; page_nr < cs.nr_segs; page_nr++)
1393 		put_page(bufs[page_nr].page);
1394 
1395 	kfree(bufs);
1396 	return ret;
1397 }
1398 
1399 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1400 			    struct fuse_copy_state *cs)
1401 {
1402 	struct fuse_notify_poll_wakeup_out outarg;
1403 	int err = -EINVAL;
1404 
1405 	if (size != sizeof(outarg))
1406 		goto err;
1407 
1408 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1409 	if (err)
1410 		goto err;
1411 
1412 	fuse_copy_finish(cs);
1413 	return fuse_notify_poll_wakeup(fc, &outarg);
1414 
1415 err:
1416 	fuse_copy_finish(cs);
1417 	return err;
1418 }
1419 
1420 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1421 				   struct fuse_copy_state *cs)
1422 {
1423 	struct fuse_notify_inval_inode_out outarg;
1424 	int err = -EINVAL;
1425 
1426 	if (size != sizeof(outarg))
1427 		goto err;
1428 
1429 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1430 	if (err)
1431 		goto err;
1432 	fuse_copy_finish(cs);
1433 
1434 	down_read(&fc->killsb);
1435 	err = -ENOENT;
1436 	if (fc->sb) {
1437 		err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1438 					       outarg.off, outarg.len);
1439 	}
1440 	up_read(&fc->killsb);
1441 	return err;
1442 
1443 err:
1444 	fuse_copy_finish(cs);
1445 	return err;
1446 }
1447 
1448 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1449 				   struct fuse_copy_state *cs)
1450 {
1451 	struct fuse_notify_inval_entry_out outarg;
1452 	int err = -ENOMEM;
1453 	char *buf;
1454 	struct qstr name;
1455 
1456 	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1457 	if (!buf)
1458 		goto err;
1459 
1460 	err = -EINVAL;
1461 	if (size < sizeof(outarg))
1462 		goto err;
1463 
1464 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1465 	if (err)
1466 		goto err;
1467 
1468 	err = -ENAMETOOLONG;
1469 	if (outarg.namelen > FUSE_NAME_MAX)
1470 		goto err;
1471 
1472 	err = -EINVAL;
1473 	if (size != sizeof(outarg) + outarg.namelen + 1)
1474 		goto err;
1475 
1476 	name.name = buf;
1477 	name.len = outarg.namelen;
1478 	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1479 	if (err)
1480 		goto err;
1481 	fuse_copy_finish(cs);
1482 	buf[outarg.namelen] = 0;
1483 
1484 	down_read(&fc->killsb);
1485 	err = -ENOENT;
1486 	if (fc->sb)
1487 		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1488 	up_read(&fc->killsb);
1489 	kfree(buf);
1490 	return err;
1491 
1492 err:
1493 	kfree(buf);
1494 	fuse_copy_finish(cs);
1495 	return err;
1496 }
1497 
1498 static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1499 			      struct fuse_copy_state *cs)
1500 {
1501 	struct fuse_notify_delete_out outarg;
1502 	int err = -ENOMEM;
1503 	char *buf;
1504 	struct qstr name;
1505 
1506 	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1507 	if (!buf)
1508 		goto err;
1509 
1510 	err = -EINVAL;
1511 	if (size < sizeof(outarg))
1512 		goto err;
1513 
1514 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1515 	if (err)
1516 		goto err;
1517 
1518 	err = -ENAMETOOLONG;
1519 	if (outarg.namelen > FUSE_NAME_MAX)
1520 		goto err;
1521 
1522 	err = -EINVAL;
1523 	if (size != sizeof(outarg) + outarg.namelen + 1)
1524 		goto err;
1525 
1526 	name.name = buf;
1527 	name.len = outarg.namelen;
1528 	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1529 	if (err)
1530 		goto err;
1531 	fuse_copy_finish(cs);
1532 	buf[outarg.namelen] = 0;
1533 
1534 	down_read(&fc->killsb);
1535 	err = -ENOENT;
1536 	if (fc->sb)
1537 		err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1538 					       outarg.child, &name);
1539 	up_read(&fc->killsb);
1540 	kfree(buf);
1541 	return err;
1542 
1543 err:
1544 	kfree(buf);
1545 	fuse_copy_finish(cs);
1546 	return err;
1547 }
1548 
1549 static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1550 			     struct fuse_copy_state *cs)
1551 {
1552 	struct fuse_notify_store_out outarg;
1553 	struct inode *inode;
1554 	struct address_space *mapping;
1555 	u64 nodeid;
1556 	int err;
1557 	pgoff_t index;
1558 	unsigned int offset;
1559 	unsigned int num;
1560 	loff_t file_size;
1561 	loff_t end;
1562 
1563 	err = -EINVAL;
1564 	if (size < sizeof(outarg))
1565 		goto out_finish;
1566 
1567 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1568 	if (err)
1569 		goto out_finish;
1570 
1571 	err = -EINVAL;
1572 	if (size - sizeof(outarg) != outarg.size)
1573 		goto out_finish;
1574 
1575 	nodeid = outarg.nodeid;
1576 
1577 	down_read(&fc->killsb);
1578 
1579 	err = -ENOENT;
1580 	if (!fc->sb)
1581 		goto out_up_killsb;
1582 
1583 	inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1584 	if (!inode)
1585 		goto out_up_killsb;
1586 
1587 	mapping = inode->i_mapping;
1588 	index = outarg.offset >> PAGE_SHIFT;
1589 	offset = outarg.offset & ~PAGE_MASK;
1590 	file_size = i_size_read(inode);
1591 	end = outarg.offset + outarg.size;
1592 	if (end > file_size) {
1593 		file_size = end;
1594 		fuse_write_update_size(inode, file_size);
1595 	}
1596 
1597 	num = outarg.size;
1598 	while (num) {
1599 		struct page *page;
1600 		unsigned int this_num;
1601 
1602 		err = -ENOMEM;
1603 		page = find_or_create_page(mapping, index,
1604 					   mapping_gfp_mask(mapping));
1605 		if (!page)
1606 			goto out_iput;
1607 
1608 		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1609 		err = fuse_copy_page(cs, &page, offset, this_num, 0);
1610 		if (!err && offset == 0 &&
1611 		    (this_num == PAGE_SIZE || file_size == end))
1612 			SetPageUptodate(page);
1613 		unlock_page(page);
1614 		put_page(page);
1615 
1616 		if (err)
1617 			goto out_iput;
1618 
1619 		num -= this_num;
1620 		offset = 0;
1621 		index++;
1622 	}
1623 
1624 	err = 0;
1625 
1626 out_iput:
1627 	iput(inode);
1628 out_up_killsb:
1629 	up_read(&fc->killsb);
1630 out_finish:
1631 	fuse_copy_finish(cs);
1632 	return err;
1633 }
1634 
1635 static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1636 {
1637 	release_pages(req->pages, req->num_pages);
1638 }
1639 
1640 static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1641 			 struct fuse_notify_retrieve_out *outarg)
1642 {
1643 	int err;
1644 	struct address_space *mapping = inode->i_mapping;
1645 	struct fuse_req *req;
1646 	pgoff_t index;
1647 	loff_t file_size;
1648 	unsigned int num;
1649 	unsigned int offset;
1650 	size_t total_len = 0;
1651 	int num_pages;
1652 
1653 	offset = outarg->offset & ~PAGE_MASK;
1654 	file_size = i_size_read(inode);
1655 
1656 	num = outarg->size;
1657 	if (outarg->offset > file_size)
1658 		num = 0;
1659 	else if (outarg->offset + num > file_size)
1660 		num = file_size - outarg->offset;
1661 
1662 	num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1663 	num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1664 
1665 	req = fuse_get_req(fc, num_pages);
1666 	if (IS_ERR(req))
1667 		return PTR_ERR(req);
1668 
1669 	req->in.h.opcode = FUSE_NOTIFY_REPLY;
1670 	req->in.h.nodeid = outarg->nodeid;
1671 	req->in.numargs = 2;
1672 	req->in.argpages = 1;
1673 	req->page_descs[0].offset = offset;
1674 	req->end = fuse_retrieve_end;
1675 
1676 	index = outarg->offset >> PAGE_SHIFT;
1677 
1678 	while (num && req->num_pages < num_pages) {
1679 		struct page *page;
1680 		unsigned int this_num;
1681 
1682 		page = find_get_page(mapping, index);
1683 		if (!page)
1684 			break;
1685 
1686 		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1687 		req->pages[req->num_pages] = page;
1688 		req->page_descs[req->num_pages].length = this_num;
1689 		req->num_pages++;
1690 
1691 		offset = 0;
1692 		num -= this_num;
1693 		total_len += this_num;
1694 		index++;
1695 	}
1696 	req->misc.retrieve_in.offset = outarg->offset;
1697 	req->misc.retrieve_in.size = total_len;
1698 	req->in.args[0].size = sizeof(req->misc.retrieve_in);
1699 	req->in.args[0].value = &req->misc.retrieve_in;
1700 	req->in.args[1].size = total_len;
1701 
1702 	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1703 	if (err)
1704 		fuse_retrieve_end(fc, req);
1705 
1706 	return err;
1707 }
1708 
1709 static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1710 				struct fuse_copy_state *cs)
1711 {
1712 	struct fuse_notify_retrieve_out outarg;
1713 	struct inode *inode;
1714 	int err;
1715 
1716 	err = -EINVAL;
1717 	if (size != sizeof(outarg))
1718 		goto copy_finish;
1719 
1720 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1721 	if (err)
1722 		goto copy_finish;
1723 
1724 	fuse_copy_finish(cs);
1725 
1726 	down_read(&fc->killsb);
1727 	err = -ENOENT;
1728 	if (fc->sb) {
1729 		u64 nodeid = outarg.nodeid;
1730 
1731 		inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1732 		if (inode) {
1733 			err = fuse_retrieve(fc, inode, &outarg);
1734 			iput(inode);
1735 		}
1736 	}
1737 	up_read(&fc->killsb);
1738 
1739 	return err;
1740 
1741 copy_finish:
1742 	fuse_copy_finish(cs);
1743 	return err;
1744 }
1745 
1746 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1747 		       unsigned int size, struct fuse_copy_state *cs)
1748 {
1749 	/* Don't try to move pages (yet) */
1750 	cs->move_pages = 0;
1751 
1752 	switch (code) {
1753 	case FUSE_NOTIFY_POLL:
1754 		return fuse_notify_poll(fc, size, cs);
1755 
1756 	case FUSE_NOTIFY_INVAL_INODE:
1757 		return fuse_notify_inval_inode(fc, size, cs);
1758 
1759 	case FUSE_NOTIFY_INVAL_ENTRY:
1760 		return fuse_notify_inval_entry(fc, size, cs);
1761 
1762 	case FUSE_NOTIFY_STORE:
1763 		return fuse_notify_store(fc, size, cs);
1764 
1765 	case FUSE_NOTIFY_RETRIEVE:
1766 		return fuse_notify_retrieve(fc, size, cs);
1767 
1768 	case FUSE_NOTIFY_DELETE:
1769 		return fuse_notify_delete(fc, size, cs);
1770 
1771 	default:
1772 		fuse_copy_finish(cs);
1773 		return -EINVAL;
1774 	}
1775 }
1776 
1777 /* Look up request on processing list by unique ID */
1778 static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
1779 {
1780 	struct fuse_req *req;
1781 
1782 	list_for_each_entry(req, &fpq->processing, list) {
1783 		if (req->in.h.unique == unique || req->intr_unique == unique)
1784 			return req;
1785 	}
1786 	return NULL;
1787 }
1788 
1789 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1790 			 unsigned nbytes)
1791 {
1792 	unsigned reqsize = sizeof(struct fuse_out_header);
1793 
1794 	if (out->h.error)
1795 		return nbytes != reqsize ? -EINVAL : 0;
1796 
1797 	reqsize += len_args(out->numargs, out->args);
1798 
1799 	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1800 		return -EINVAL;
1801 	else if (reqsize > nbytes) {
1802 		struct fuse_arg *lastarg = &out->args[out->numargs-1];
1803 		unsigned diffsize = reqsize - nbytes;
1804 		if (diffsize > lastarg->size)
1805 			return -EINVAL;
1806 		lastarg->size -= diffsize;
1807 	}
1808 	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1809 			      out->page_zeroing);
1810 }
1811 
1812 /*
1813  * Write a single reply to a request.  First the header is copied from
1814  * the write buffer.  The request is then searched on the processing
1815  * list by the unique ID found in the header.  If found, then remove
1816  * it from the list and copy the rest of the buffer to the request.
1817  * The request is finished by calling request_end()
1818  */
1819 static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
1820 				 struct fuse_copy_state *cs, size_t nbytes)
1821 {
1822 	int err;
1823 	struct fuse_conn *fc = fud->fc;
1824 	struct fuse_pqueue *fpq = &fud->pq;
1825 	struct fuse_req *req;
1826 	struct fuse_out_header oh;
1827 
1828 	if (nbytes < sizeof(struct fuse_out_header))
1829 		return -EINVAL;
1830 
1831 	err = fuse_copy_one(cs, &oh, sizeof(oh));
1832 	if (err)
1833 		goto err_finish;
1834 
1835 	err = -EINVAL;
1836 	if (oh.len != nbytes)
1837 		goto err_finish;
1838 
1839 	/*
1840 	 * Zero oh.unique indicates unsolicited notification message
1841 	 * and error contains notification code.
1842 	 */
1843 	if (!oh.unique) {
1844 		err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1845 		return err ? err : nbytes;
1846 	}
1847 
1848 	err = -EINVAL;
1849 	if (oh.error <= -1000 || oh.error > 0)
1850 		goto err_finish;
1851 
1852 	spin_lock(&fpq->lock);
1853 	err = -ENOENT;
1854 	if (!fpq->connected)
1855 		goto err_unlock_pq;
1856 
1857 	req = request_find(fpq, oh.unique);
1858 	if (!req)
1859 		goto err_unlock_pq;
1860 
1861 	/* Is it an interrupt reply? */
1862 	if (req->intr_unique == oh.unique) {
1863 		spin_unlock(&fpq->lock);
1864 
1865 		err = -EINVAL;
1866 		if (nbytes != sizeof(struct fuse_out_header))
1867 			goto err_finish;
1868 
1869 		if (oh.error == -ENOSYS)
1870 			fc->no_interrupt = 1;
1871 		else if (oh.error == -EAGAIN)
1872 			queue_interrupt(&fc->iq, req);
1873 
1874 		fuse_copy_finish(cs);
1875 		return nbytes;
1876 	}
1877 
1878 	clear_bit(FR_SENT, &req->flags);
1879 	list_move(&req->list, &fpq->io);
1880 	req->out.h = oh;
1881 	set_bit(FR_LOCKED, &req->flags);
1882 	spin_unlock(&fpq->lock);
1883 	cs->req = req;
1884 	if (!req->out.page_replace)
1885 		cs->move_pages = 0;
1886 
1887 	err = copy_out_args(cs, &req->out, nbytes);
1888 	fuse_copy_finish(cs);
1889 
1890 	spin_lock(&fpq->lock);
1891 	clear_bit(FR_LOCKED, &req->flags);
1892 	if (!fpq->connected)
1893 		err = -ENOENT;
1894 	else if (err)
1895 		req->out.h.error = -EIO;
1896 	if (!test_bit(FR_PRIVATE, &req->flags))
1897 		list_del_init(&req->list);
1898 	spin_unlock(&fpq->lock);
1899 
1900 	request_end(fc, req);
1901 
1902 	return err ? err : nbytes;
1903 
1904  err_unlock_pq:
1905 	spin_unlock(&fpq->lock);
1906  err_finish:
1907 	fuse_copy_finish(cs);
1908 	return err;
1909 }
1910 
1911 static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
1912 {
1913 	struct fuse_copy_state cs;
1914 	struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
1915 
1916 	if (!fud)
1917 		return -EPERM;
1918 
1919 	if (!iter_is_iovec(from))
1920 		return -EINVAL;
1921 
1922 	fuse_copy_init(&cs, 0, from);
1923 
1924 	return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
1925 }
1926 
1927 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1928 				     struct file *out, loff_t *ppos,
1929 				     size_t len, unsigned int flags)
1930 {
1931 	unsigned nbuf;
1932 	unsigned idx;
1933 	struct pipe_buffer *bufs;
1934 	struct fuse_copy_state cs;
1935 	struct fuse_dev *fud;
1936 	size_t rem;
1937 	ssize_t ret;
1938 
1939 	fud = fuse_get_dev(out);
1940 	if (!fud)
1941 		return -EPERM;
1942 
1943 	bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1944 	if (!bufs)
1945 		return -ENOMEM;
1946 
1947 	pipe_lock(pipe);
1948 	nbuf = 0;
1949 	rem = 0;
1950 	for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1951 		rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1952 
1953 	ret = -EINVAL;
1954 	if (rem < len) {
1955 		pipe_unlock(pipe);
1956 		goto out;
1957 	}
1958 
1959 	rem = len;
1960 	while (rem) {
1961 		struct pipe_buffer *ibuf;
1962 		struct pipe_buffer *obuf;
1963 
1964 		BUG_ON(nbuf >= pipe->buffers);
1965 		BUG_ON(!pipe->nrbufs);
1966 		ibuf = &pipe->bufs[pipe->curbuf];
1967 		obuf = &bufs[nbuf];
1968 
1969 		if (rem >= ibuf->len) {
1970 			*obuf = *ibuf;
1971 			ibuf->ops = NULL;
1972 			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1973 			pipe->nrbufs--;
1974 		} else {
1975 			pipe_buf_get(pipe, ibuf);
1976 			*obuf = *ibuf;
1977 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1978 			obuf->len = rem;
1979 			ibuf->offset += obuf->len;
1980 			ibuf->len -= obuf->len;
1981 		}
1982 		nbuf++;
1983 		rem -= obuf->len;
1984 	}
1985 	pipe_unlock(pipe);
1986 
1987 	fuse_copy_init(&cs, 0, NULL);
1988 	cs.pipebufs = bufs;
1989 	cs.nr_segs = nbuf;
1990 	cs.pipe = pipe;
1991 
1992 	if (flags & SPLICE_F_MOVE)
1993 		cs.move_pages = 1;
1994 
1995 	ret = fuse_dev_do_write(fud, &cs, len);
1996 
1997 	for (idx = 0; idx < nbuf; idx++)
1998 		pipe_buf_release(pipe, &bufs[idx]);
1999 
2000 out:
2001 	kfree(bufs);
2002 	return ret;
2003 }
2004 
2005 static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
2006 {
2007 	__poll_t mask = EPOLLOUT | EPOLLWRNORM;
2008 	struct fuse_iqueue *fiq;
2009 	struct fuse_dev *fud = fuse_get_dev(file);
2010 
2011 	if (!fud)
2012 		return EPOLLERR;
2013 
2014 	fiq = &fud->fc->iq;
2015 	poll_wait(file, &fiq->waitq, wait);
2016 
2017 	spin_lock(&fiq->waitq.lock);
2018 	if (!fiq->connected)
2019 		mask = EPOLLERR;
2020 	else if (request_pending(fiq))
2021 		mask |= EPOLLIN | EPOLLRDNORM;
2022 	spin_unlock(&fiq->waitq.lock);
2023 
2024 	return mask;
2025 }
2026 
2027 /*
2028  * Abort all requests on the given list (pending or processing)
2029  *
2030  * This function releases and reacquires fc->lock
2031  */
2032 static void end_requests(struct fuse_conn *fc, struct list_head *head)
2033 {
2034 	while (!list_empty(head)) {
2035 		struct fuse_req *req;
2036 		req = list_entry(head->next, struct fuse_req, list);
2037 		req->out.h.error = -ECONNABORTED;
2038 		clear_bit(FR_SENT, &req->flags);
2039 		list_del_init(&req->list);
2040 		request_end(fc, req);
2041 	}
2042 }
2043 
2044 static void end_polls(struct fuse_conn *fc)
2045 {
2046 	struct rb_node *p;
2047 
2048 	p = rb_first(&fc->polled_files);
2049 
2050 	while (p) {
2051 		struct fuse_file *ff;
2052 		ff = rb_entry(p, struct fuse_file, polled_node);
2053 		wake_up_interruptible_all(&ff->poll_wait);
2054 
2055 		p = rb_next(p);
2056 	}
2057 }
2058 
2059 /*
2060  * Abort all requests.
2061  *
2062  * Emergency exit in case of a malicious or accidental deadlock, or just a hung
2063  * filesystem.
2064  *
2065  * The same effect is usually achievable through killing the filesystem daemon
2066  * and all users of the filesystem.  The exception is the combination of an
2067  * asynchronous request and the tricky deadlock (see
2068  * Documentation/filesystems/fuse.txt).
2069  *
2070  * Aborting requests under I/O goes as follows: 1: Separate out unlocked
2071  * requests, they should be finished off immediately.  Locked requests will be
2072  * finished after unlock; see unlock_request(). 2: Finish off the unlocked
2073  * requests.  It is possible that some request will finish before we can.  This
2074  * is OK, the request will in that case be removed from the list before we touch
2075  * it.
2076  */
2077 void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
2078 {
2079 	struct fuse_iqueue *fiq = &fc->iq;
2080 
2081 	spin_lock(&fc->lock);
2082 	if (fc->connected) {
2083 		struct fuse_dev *fud;
2084 		struct fuse_req *req, *next;
2085 		LIST_HEAD(to_end1);
2086 		LIST_HEAD(to_end2);
2087 
2088 		fc->connected = 0;
2089 		fc->blocked = 0;
2090 		fc->aborted = is_abort;
2091 		fuse_set_initialized(fc);
2092 		list_for_each_entry(fud, &fc->devices, entry) {
2093 			struct fuse_pqueue *fpq = &fud->pq;
2094 
2095 			spin_lock(&fpq->lock);
2096 			fpq->connected = 0;
2097 			list_for_each_entry_safe(req, next, &fpq->io, list) {
2098 				req->out.h.error = -ECONNABORTED;
2099 				spin_lock(&req->waitq.lock);
2100 				set_bit(FR_ABORTED, &req->flags);
2101 				if (!test_bit(FR_LOCKED, &req->flags)) {
2102 					set_bit(FR_PRIVATE, &req->flags);
2103 					list_move(&req->list, &to_end1);
2104 				}
2105 				spin_unlock(&req->waitq.lock);
2106 			}
2107 			list_splice_init(&fpq->processing, &to_end2);
2108 			spin_unlock(&fpq->lock);
2109 		}
2110 		fc->max_background = UINT_MAX;
2111 		flush_bg_queue(fc);
2112 
2113 		spin_lock(&fiq->waitq.lock);
2114 		fiq->connected = 0;
2115 		list_splice_init(&fiq->pending, &to_end2);
2116 		list_for_each_entry(req, &to_end2, list)
2117 			clear_bit(FR_PENDING, &req->flags);
2118 		while (forget_pending(fiq))
2119 			kfree(dequeue_forget(fiq, 1, NULL));
2120 		wake_up_all_locked(&fiq->waitq);
2121 		spin_unlock(&fiq->waitq.lock);
2122 		kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
2123 		end_polls(fc);
2124 		wake_up_all(&fc->blocked_waitq);
2125 		spin_unlock(&fc->lock);
2126 
2127 		while (!list_empty(&to_end1)) {
2128 			req = list_first_entry(&to_end1, struct fuse_req, list);
2129 			__fuse_get_request(req);
2130 			list_del_init(&req->list);
2131 			request_end(fc, req);
2132 		}
2133 		end_requests(fc, &to_end2);
2134 	} else {
2135 		spin_unlock(&fc->lock);
2136 	}
2137 }
2138 EXPORT_SYMBOL_GPL(fuse_abort_conn);
2139 
2140 int fuse_dev_release(struct inode *inode, struct file *file)
2141 {
2142 	struct fuse_dev *fud = fuse_get_dev(file);
2143 
2144 	if (fud) {
2145 		struct fuse_conn *fc = fud->fc;
2146 		struct fuse_pqueue *fpq = &fud->pq;
2147 
2148 		WARN_ON(!list_empty(&fpq->io));
2149 		end_requests(fc, &fpq->processing);
2150 		/* Are we the last open device? */
2151 		if (atomic_dec_and_test(&fc->dev_count)) {
2152 			WARN_ON(fc->iq.fasync != NULL);
2153 			fuse_abort_conn(fc, false);
2154 		}
2155 		fuse_dev_free(fud);
2156 	}
2157 	return 0;
2158 }
2159 EXPORT_SYMBOL_GPL(fuse_dev_release);
2160 
2161 static int fuse_dev_fasync(int fd, struct file *file, int on)
2162 {
2163 	struct fuse_dev *fud = fuse_get_dev(file);
2164 
2165 	if (!fud)
2166 		return -EPERM;
2167 
2168 	/* No locking - fasync_helper does its own locking */
2169 	return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
2170 }
2171 
2172 static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
2173 {
2174 	struct fuse_dev *fud;
2175 
2176 	if (new->private_data)
2177 		return -EINVAL;
2178 
2179 	fud = fuse_dev_alloc(fc);
2180 	if (!fud)
2181 		return -ENOMEM;
2182 
2183 	new->private_data = fud;
2184 	atomic_inc(&fc->dev_count);
2185 
2186 	return 0;
2187 }
2188 
2189 static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2190 			   unsigned long arg)
2191 {
2192 	int err = -ENOTTY;
2193 
2194 	if (cmd == FUSE_DEV_IOC_CLONE) {
2195 		int oldfd;
2196 
2197 		err = -EFAULT;
2198 		if (!get_user(oldfd, (__u32 __user *) arg)) {
2199 			struct file *old = fget(oldfd);
2200 
2201 			err = -EINVAL;
2202 			if (old) {
2203 				struct fuse_dev *fud = NULL;
2204 
2205 				/*
2206 				 * Check against file->f_op because CUSE
2207 				 * uses the same ioctl handler.
2208 				 */
2209 				if (old->f_op == file->f_op &&
2210 				    old->f_cred->user_ns == file->f_cred->user_ns)
2211 					fud = fuse_get_dev(old);
2212 
2213 				if (fud) {
2214 					mutex_lock(&fuse_mutex);
2215 					err = fuse_device_clone(fud->fc, file);
2216 					mutex_unlock(&fuse_mutex);
2217 				}
2218 				fput(old);
2219 			}
2220 		}
2221 	}
2222 	return err;
2223 }
2224 
2225 const struct file_operations fuse_dev_operations = {
2226 	.owner		= THIS_MODULE,
2227 	.open		= fuse_dev_open,
2228 	.llseek		= no_llseek,
2229 	.read_iter	= fuse_dev_read,
2230 	.splice_read	= fuse_dev_splice_read,
2231 	.write_iter	= fuse_dev_write,
2232 	.splice_write	= fuse_dev_splice_write,
2233 	.poll		= fuse_dev_poll,
2234 	.release	= fuse_dev_release,
2235 	.fasync		= fuse_dev_fasync,
2236 	.unlocked_ioctl = fuse_dev_ioctl,
2237 	.compat_ioctl   = fuse_dev_ioctl,
2238 };
2239 EXPORT_SYMBOL_GPL(fuse_dev_operations);
2240 
2241 static struct miscdevice fuse_miscdevice = {
2242 	.minor = FUSE_MINOR,
2243 	.name  = "fuse",
2244 	.fops = &fuse_dev_operations,
2245 };
2246 
2247 int __init fuse_dev_init(void)
2248 {
2249 	int err = -ENOMEM;
2250 	fuse_req_cachep = kmem_cache_create("fuse_request",
2251 					    sizeof(struct fuse_req),
2252 					    0, 0, NULL);
2253 	if (!fuse_req_cachep)
2254 		goto out;
2255 
2256 	err = misc_register(&fuse_miscdevice);
2257 	if (err)
2258 		goto out_cache_clean;
2259 
2260 	return 0;
2261 
2262  out_cache_clean:
2263 	kmem_cache_destroy(fuse_req_cachep);
2264  out:
2265 	return err;
2266 }
2267 
2268 void fuse_dev_cleanup(void)
2269 {
2270 	misc_deregister(&fuse_miscdevice);
2271 	kmem_cache_destroy(fuse_req_cachep);
2272 }
2273