xref: /linux/fs/fuse/dev.c (revision 5bdd5fbb35ab0fe21bf2263106f51c5bee466a07)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/sched/signal.h>
15 #include <linux/uio.h>
16 #include <linux/miscdevice.h>
17 #include <linux/pagemap.h>
18 #include <linux/file.h>
19 #include <linux/slab.h>
20 #include <linux/pipe_fs_i.h>
21 #include <linux/swap.h>
22 #include <linux/splice.h>
23 #include <linux/sched.h>
24 
25 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
26 MODULE_ALIAS("devname:fuse");
27 
28 static struct kmem_cache *fuse_req_cachep;
29 
30 static struct fuse_dev *fuse_get_dev(struct file *file)
31 {
32 	/*
33 	 * Lockless access is OK, because file->private data is set
34 	 * once during mount and is valid until the file is released.
35 	 */
36 	return READ_ONCE(file->private_data);
37 }
38 
39 static void fuse_request_init(struct fuse_req *req, struct page **pages,
40 			      struct fuse_page_desc *page_descs,
41 			      unsigned npages)
42 {
43 	memset(req, 0, sizeof(*req));
44 	memset(pages, 0, sizeof(*pages) * npages);
45 	memset(page_descs, 0, sizeof(*page_descs) * npages);
46 	INIT_LIST_HEAD(&req->list);
47 	INIT_LIST_HEAD(&req->intr_entry);
48 	init_waitqueue_head(&req->waitq);
49 	refcount_set(&req->count, 1);
50 	req->pages = pages;
51 	req->page_descs = page_descs;
52 	req->max_pages = npages;
53 	__set_bit(FR_PENDING, &req->flags);
54 }
55 
56 static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
57 {
58 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
59 	if (req) {
60 		struct page **pages;
61 		struct fuse_page_desc *page_descs;
62 
63 		if (npages <= FUSE_REQ_INLINE_PAGES) {
64 			pages = req->inline_pages;
65 			page_descs = req->inline_page_descs;
66 		} else {
67 			pages = kmalloc_array(npages, sizeof(struct page *),
68 					      flags);
69 			page_descs =
70 				kmalloc_array(npages,
71 					      sizeof(struct fuse_page_desc),
72 					      flags);
73 		}
74 
75 		if (!pages || !page_descs) {
76 			kfree(pages);
77 			kfree(page_descs);
78 			kmem_cache_free(fuse_req_cachep, req);
79 			return NULL;
80 		}
81 
82 		fuse_request_init(req, pages, page_descs, npages);
83 	}
84 	return req;
85 }
86 
87 struct fuse_req *fuse_request_alloc(unsigned npages)
88 {
89 	return __fuse_request_alloc(npages, GFP_KERNEL);
90 }
91 EXPORT_SYMBOL_GPL(fuse_request_alloc);
92 
93 struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
94 {
95 	return __fuse_request_alloc(npages, GFP_NOFS);
96 }
97 
98 void fuse_request_free(struct fuse_req *req)
99 {
100 	if (req->pages != req->inline_pages) {
101 		kfree(req->pages);
102 		kfree(req->page_descs);
103 	}
104 	kmem_cache_free(fuse_req_cachep, req);
105 }
106 
107 void __fuse_get_request(struct fuse_req *req)
108 {
109 	refcount_inc(&req->count);
110 }
111 
112 /* Must be called with > 1 refcount */
113 static void __fuse_put_request(struct fuse_req *req)
114 {
115 	refcount_dec(&req->count);
116 }
117 
118 void fuse_set_initialized(struct fuse_conn *fc)
119 {
120 	/* Make sure stores before this are seen on another CPU */
121 	smp_wmb();
122 	fc->initialized = 1;
123 }
124 
125 static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
126 {
127 	return !fc->initialized || (for_background && fc->blocked);
128 }
129 
130 static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
131 				       bool for_background)
132 {
133 	struct fuse_req *req;
134 	int err;
135 	atomic_inc(&fc->num_waiting);
136 
137 	if (fuse_block_alloc(fc, for_background)) {
138 		err = -EINTR;
139 		if (wait_event_killable_exclusive(fc->blocked_waitq,
140 				!fuse_block_alloc(fc, for_background)))
141 			goto out;
142 	}
143 	/* Matches smp_wmb() in fuse_set_initialized() */
144 	smp_rmb();
145 
146 	err = -ENOTCONN;
147 	if (!fc->connected)
148 		goto out;
149 
150 	err = -ECONNREFUSED;
151 	if (fc->conn_error)
152 		goto out;
153 
154 	req = fuse_request_alloc(npages);
155 	err = -ENOMEM;
156 	if (!req) {
157 		if (for_background)
158 			wake_up(&fc->blocked_waitq);
159 		goto out;
160 	}
161 
162 	req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
163 	req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
164 	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
165 
166 	__set_bit(FR_WAITING, &req->flags);
167 	if (for_background)
168 		__set_bit(FR_BACKGROUND, &req->flags);
169 
170 	if (unlikely(req->in.h.uid == ((uid_t)-1) ||
171 		     req->in.h.gid == ((gid_t)-1))) {
172 		fuse_put_request(fc, req);
173 		return ERR_PTR(-EOVERFLOW);
174 	}
175 	return req;
176 
177  out:
178 	atomic_dec(&fc->num_waiting);
179 	return ERR_PTR(err);
180 }
181 
182 struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
183 {
184 	return __fuse_get_req(fc, npages, false);
185 }
186 EXPORT_SYMBOL_GPL(fuse_get_req);
187 
188 struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
189 					     unsigned npages)
190 {
191 	return __fuse_get_req(fc, npages, true);
192 }
193 EXPORT_SYMBOL_GPL(fuse_get_req_for_background);
194 
195 /*
196  * Return request in fuse_file->reserved_req.  However that may
197  * currently be in use.  If that is the case, wait for it to become
198  * available.
199  */
200 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
201 					 struct file *file)
202 {
203 	struct fuse_req *req = NULL;
204 	struct fuse_file *ff = file->private_data;
205 
206 	do {
207 		wait_event(fc->reserved_req_waitq, ff->reserved_req);
208 		spin_lock(&fc->lock);
209 		if (ff->reserved_req) {
210 			req = ff->reserved_req;
211 			ff->reserved_req = NULL;
212 			req->stolen_file = get_file(file);
213 		}
214 		spin_unlock(&fc->lock);
215 	} while (!req);
216 
217 	return req;
218 }
219 
220 /*
221  * Put stolen request back into fuse_file->reserved_req
222  */
223 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
224 {
225 	struct file *file = req->stolen_file;
226 	struct fuse_file *ff = file->private_data;
227 
228 	spin_lock(&fc->lock);
229 	fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
230 	BUG_ON(ff->reserved_req);
231 	ff->reserved_req = req;
232 	wake_up_all(&fc->reserved_req_waitq);
233 	spin_unlock(&fc->lock);
234 	fput(file);
235 }
236 
237 /*
238  * Gets a requests for a file operation, always succeeds
239  *
240  * This is used for sending the FLUSH request, which must get to
241  * userspace, due to POSIX locks which may need to be unlocked.
242  *
243  * If allocation fails due to OOM, use the reserved request in
244  * fuse_file.
245  *
246  * This is very unlikely to deadlock accidentally, since the
247  * filesystem should not have it's own file open.  If deadlock is
248  * intentional, it can still be broken by "aborting" the filesystem.
249  */
250 struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
251 					     struct file *file)
252 {
253 	struct fuse_req *req;
254 
255 	atomic_inc(&fc->num_waiting);
256 	wait_event(fc->blocked_waitq, fc->initialized);
257 	/* Matches smp_wmb() in fuse_set_initialized() */
258 	smp_rmb();
259 	req = fuse_request_alloc(0);
260 	if (!req)
261 		req = get_reserved_req(fc, file);
262 
263 	req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
264 	req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
265 	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
266 
267 	__set_bit(FR_WAITING, &req->flags);
268 	__clear_bit(FR_BACKGROUND, &req->flags);
269 	return req;
270 }
271 
272 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
273 {
274 	if (refcount_dec_and_test(&req->count)) {
275 		if (test_bit(FR_BACKGROUND, &req->flags)) {
276 			/*
277 			 * We get here in the unlikely case that a background
278 			 * request was allocated but not sent
279 			 */
280 			spin_lock(&fc->lock);
281 			if (!fc->blocked)
282 				wake_up(&fc->blocked_waitq);
283 			spin_unlock(&fc->lock);
284 		}
285 
286 		if (test_bit(FR_WAITING, &req->flags)) {
287 			__clear_bit(FR_WAITING, &req->flags);
288 			atomic_dec(&fc->num_waiting);
289 		}
290 
291 		if (req->stolen_file)
292 			put_reserved_req(fc, req);
293 		else
294 			fuse_request_free(req);
295 	}
296 }
297 EXPORT_SYMBOL_GPL(fuse_put_request);
298 
299 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
300 {
301 	unsigned nbytes = 0;
302 	unsigned i;
303 
304 	for (i = 0; i < numargs; i++)
305 		nbytes += args[i].size;
306 
307 	return nbytes;
308 }
309 
310 static u64 fuse_get_unique(struct fuse_iqueue *fiq)
311 {
312 	return ++fiq->reqctr;
313 }
314 
315 static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
316 {
317 	req->in.h.len = sizeof(struct fuse_in_header) +
318 		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
319 	list_add_tail(&req->list, &fiq->pending);
320 	wake_up_locked(&fiq->waitq);
321 	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
322 }
323 
324 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
325 		       u64 nodeid, u64 nlookup)
326 {
327 	struct fuse_iqueue *fiq = &fc->iq;
328 
329 	forget->forget_one.nodeid = nodeid;
330 	forget->forget_one.nlookup = nlookup;
331 
332 	spin_lock(&fiq->waitq.lock);
333 	if (fiq->connected) {
334 		fiq->forget_list_tail->next = forget;
335 		fiq->forget_list_tail = forget;
336 		wake_up_locked(&fiq->waitq);
337 		kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
338 	} else {
339 		kfree(forget);
340 	}
341 	spin_unlock(&fiq->waitq.lock);
342 }
343 
344 static void flush_bg_queue(struct fuse_conn *fc)
345 {
346 	while (fc->active_background < fc->max_background &&
347 	       !list_empty(&fc->bg_queue)) {
348 		struct fuse_req *req;
349 		struct fuse_iqueue *fiq = &fc->iq;
350 
351 		req = list_entry(fc->bg_queue.next, struct fuse_req, list);
352 		list_del(&req->list);
353 		fc->active_background++;
354 		spin_lock(&fiq->waitq.lock);
355 		req->in.h.unique = fuse_get_unique(fiq);
356 		queue_request(fiq, req);
357 		spin_unlock(&fiq->waitq.lock);
358 	}
359 }
360 
361 /*
362  * This function is called when a request is finished.  Either a reply
363  * has arrived or it was aborted (and not yet sent) or some error
364  * occurred during communication with userspace, or the device file
365  * was closed.  The requester thread is woken up (if still waiting),
366  * the 'end' callback is called if given, else the reference to the
367  * request is released
368  */
369 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
370 {
371 	struct fuse_iqueue *fiq = &fc->iq;
372 
373 	if (test_and_set_bit(FR_FINISHED, &req->flags))
374 		return;
375 
376 	spin_lock(&fiq->waitq.lock);
377 	list_del_init(&req->intr_entry);
378 	spin_unlock(&fiq->waitq.lock);
379 	WARN_ON(test_bit(FR_PENDING, &req->flags));
380 	WARN_ON(test_bit(FR_SENT, &req->flags));
381 	if (test_bit(FR_BACKGROUND, &req->flags)) {
382 		spin_lock(&fc->lock);
383 		clear_bit(FR_BACKGROUND, &req->flags);
384 		if (fc->num_background == fc->max_background)
385 			fc->blocked = 0;
386 
387 		/* Wake up next waiter, if any */
388 		if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
389 			wake_up(&fc->blocked_waitq);
390 
391 		if (fc->num_background == fc->congestion_threshold && fc->sb) {
392 			clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
393 			clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
394 		}
395 		fc->num_background--;
396 		fc->active_background--;
397 		flush_bg_queue(fc);
398 		spin_unlock(&fc->lock);
399 	}
400 	wake_up(&req->waitq);
401 	if (req->end)
402 		req->end(fc, req);
403 	fuse_put_request(fc, req);
404 }
405 
406 static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
407 {
408 	spin_lock(&fiq->waitq.lock);
409 	if (test_bit(FR_FINISHED, &req->flags)) {
410 		spin_unlock(&fiq->waitq.lock);
411 		return;
412 	}
413 	if (list_empty(&req->intr_entry)) {
414 		list_add_tail(&req->intr_entry, &fiq->interrupts);
415 		wake_up_locked(&fiq->waitq);
416 	}
417 	spin_unlock(&fiq->waitq.lock);
418 	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
419 }
420 
421 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
422 {
423 	struct fuse_iqueue *fiq = &fc->iq;
424 	int err;
425 
426 	if (!fc->no_interrupt) {
427 		/* Any signal may interrupt this */
428 		err = wait_event_interruptible(req->waitq,
429 					test_bit(FR_FINISHED, &req->flags));
430 		if (!err)
431 			return;
432 
433 		set_bit(FR_INTERRUPTED, &req->flags);
434 		/* matches barrier in fuse_dev_do_read() */
435 		smp_mb__after_atomic();
436 		if (test_bit(FR_SENT, &req->flags))
437 			queue_interrupt(fiq, req);
438 	}
439 
440 	if (!test_bit(FR_FORCE, &req->flags)) {
441 		/* Only fatal signals may interrupt this */
442 		err = wait_event_killable(req->waitq,
443 					test_bit(FR_FINISHED, &req->flags));
444 		if (!err)
445 			return;
446 
447 		spin_lock(&fiq->waitq.lock);
448 		/* Request is not yet in userspace, bail out */
449 		if (test_bit(FR_PENDING, &req->flags)) {
450 			list_del(&req->list);
451 			spin_unlock(&fiq->waitq.lock);
452 			__fuse_put_request(req);
453 			req->out.h.error = -EINTR;
454 			return;
455 		}
456 		spin_unlock(&fiq->waitq.lock);
457 	}
458 
459 	/*
460 	 * Either request is already in userspace, or it was forced.
461 	 * Wait it out.
462 	 */
463 	wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
464 }
465 
466 static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
467 {
468 	struct fuse_iqueue *fiq = &fc->iq;
469 
470 	BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
471 	spin_lock(&fiq->waitq.lock);
472 	if (!fiq->connected) {
473 		spin_unlock(&fiq->waitq.lock);
474 		req->out.h.error = -ENOTCONN;
475 	} else {
476 		req->in.h.unique = fuse_get_unique(fiq);
477 		queue_request(fiq, req);
478 		/* acquire extra reference, since request is still needed
479 		   after request_end() */
480 		__fuse_get_request(req);
481 		spin_unlock(&fiq->waitq.lock);
482 
483 		request_wait_answer(fc, req);
484 		/* Pairs with smp_wmb() in request_end() */
485 		smp_rmb();
486 	}
487 }
488 
489 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
490 {
491 	__set_bit(FR_ISREPLY, &req->flags);
492 	if (!test_bit(FR_WAITING, &req->flags)) {
493 		__set_bit(FR_WAITING, &req->flags);
494 		atomic_inc(&fc->num_waiting);
495 	}
496 	__fuse_request_send(fc, req);
497 }
498 EXPORT_SYMBOL_GPL(fuse_request_send);
499 
500 static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
501 {
502 	if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS)
503 		args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE;
504 
505 	if (fc->minor < 9) {
506 		switch (args->in.h.opcode) {
507 		case FUSE_LOOKUP:
508 		case FUSE_CREATE:
509 		case FUSE_MKNOD:
510 		case FUSE_MKDIR:
511 		case FUSE_SYMLINK:
512 		case FUSE_LINK:
513 			args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
514 			break;
515 		case FUSE_GETATTR:
516 		case FUSE_SETATTR:
517 			args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
518 			break;
519 		}
520 	}
521 	if (fc->minor < 12) {
522 		switch (args->in.h.opcode) {
523 		case FUSE_CREATE:
524 			args->in.args[0].size = sizeof(struct fuse_open_in);
525 			break;
526 		case FUSE_MKNOD:
527 			args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
528 			break;
529 		}
530 	}
531 }
532 
533 ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
534 {
535 	struct fuse_req *req;
536 	ssize_t ret;
537 
538 	req = fuse_get_req(fc, 0);
539 	if (IS_ERR(req))
540 		return PTR_ERR(req);
541 
542 	/* Needs to be done after fuse_get_req() so that fc->minor is valid */
543 	fuse_adjust_compat(fc, args);
544 
545 	req->in.h.opcode = args->in.h.opcode;
546 	req->in.h.nodeid = args->in.h.nodeid;
547 	req->in.numargs = args->in.numargs;
548 	memcpy(req->in.args, args->in.args,
549 	       args->in.numargs * sizeof(struct fuse_in_arg));
550 	req->out.argvar = args->out.argvar;
551 	req->out.numargs = args->out.numargs;
552 	memcpy(req->out.args, args->out.args,
553 	       args->out.numargs * sizeof(struct fuse_arg));
554 	fuse_request_send(fc, req);
555 	ret = req->out.h.error;
556 	if (!ret && args->out.argvar) {
557 		BUG_ON(args->out.numargs != 1);
558 		ret = req->out.args[0].size;
559 	}
560 	fuse_put_request(fc, req);
561 
562 	return ret;
563 }
564 
565 /*
566  * Called under fc->lock
567  *
568  * fc->connected must have been checked previously
569  */
570 void fuse_request_send_background_locked(struct fuse_conn *fc,
571 					 struct fuse_req *req)
572 {
573 	BUG_ON(!test_bit(FR_BACKGROUND, &req->flags));
574 	if (!test_bit(FR_WAITING, &req->flags)) {
575 		__set_bit(FR_WAITING, &req->flags);
576 		atomic_inc(&fc->num_waiting);
577 	}
578 	__set_bit(FR_ISREPLY, &req->flags);
579 	fc->num_background++;
580 	if (fc->num_background == fc->max_background)
581 		fc->blocked = 1;
582 	if (fc->num_background == fc->congestion_threshold && fc->sb) {
583 		set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
584 		set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
585 	}
586 	list_add_tail(&req->list, &fc->bg_queue);
587 	flush_bg_queue(fc);
588 }
589 
590 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
591 {
592 	BUG_ON(!req->end);
593 	spin_lock(&fc->lock);
594 	if (fc->connected) {
595 		fuse_request_send_background_locked(fc, req);
596 		spin_unlock(&fc->lock);
597 	} else {
598 		spin_unlock(&fc->lock);
599 		req->out.h.error = -ENOTCONN;
600 		req->end(fc, req);
601 		fuse_put_request(fc, req);
602 	}
603 }
604 EXPORT_SYMBOL_GPL(fuse_request_send_background);
605 
606 static int fuse_request_send_notify_reply(struct fuse_conn *fc,
607 					  struct fuse_req *req, u64 unique)
608 {
609 	int err = -ENODEV;
610 	struct fuse_iqueue *fiq = &fc->iq;
611 
612 	__clear_bit(FR_ISREPLY, &req->flags);
613 	req->in.h.unique = unique;
614 	spin_lock(&fiq->waitq.lock);
615 	if (fiq->connected) {
616 		queue_request(fiq, req);
617 		err = 0;
618 	}
619 	spin_unlock(&fiq->waitq.lock);
620 
621 	return err;
622 }
623 
624 void fuse_force_forget(struct file *file, u64 nodeid)
625 {
626 	struct inode *inode = file_inode(file);
627 	struct fuse_conn *fc = get_fuse_conn(inode);
628 	struct fuse_req *req;
629 	struct fuse_forget_in inarg;
630 
631 	memset(&inarg, 0, sizeof(inarg));
632 	inarg.nlookup = 1;
633 	req = fuse_get_req_nofail_nopages(fc, file);
634 	req->in.h.opcode = FUSE_FORGET;
635 	req->in.h.nodeid = nodeid;
636 	req->in.numargs = 1;
637 	req->in.args[0].size = sizeof(inarg);
638 	req->in.args[0].value = &inarg;
639 	__clear_bit(FR_ISREPLY, &req->flags);
640 	__fuse_request_send(fc, req);
641 	/* ignore errors */
642 	fuse_put_request(fc, req);
643 }
644 
645 /*
646  * Lock the request.  Up to the next unlock_request() there mustn't be
647  * anything that could cause a page-fault.  If the request was already
648  * aborted bail out.
649  */
650 static int lock_request(struct fuse_req *req)
651 {
652 	int err = 0;
653 	if (req) {
654 		spin_lock(&req->waitq.lock);
655 		if (test_bit(FR_ABORTED, &req->flags))
656 			err = -ENOENT;
657 		else
658 			set_bit(FR_LOCKED, &req->flags);
659 		spin_unlock(&req->waitq.lock);
660 	}
661 	return err;
662 }
663 
664 /*
665  * Unlock request.  If it was aborted while locked, caller is responsible
666  * for unlocking and ending the request.
667  */
668 static int unlock_request(struct fuse_req *req)
669 {
670 	int err = 0;
671 	if (req) {
672 		spin_lock(&req->waitq.lock);
673 		if (test_bit(FR_ABORTED, &req->flags))
674 			err = -ENOENT;
675 		else
676 			clear_bit(FR_LOCKED, &req->flags);
677 		spin_unlock(&req->waitq.lock);
678 	}
679 	return err;
680 }
681 
682 struct fuse_copy_state {
683 	int write;
684 	struct fuse_req *req;
685 	struct iov_iter *iter;
686 	struct pipe_buffer *pipebufs;
687 	struct pipe_buffer *currbuf;
688 	struct pipe_inode_info *pipe;
689 	unsigned long nr_segs;
690 	struct page *pg;
691 	unsigned len;
692 	unsigned offset;
693 	unsigned move_pages:1;
694 };
695 
696 static void fuse_copy_init(struct fuse_copy_state *cs, int write,
697 			   struct iov_iter *iter)
698 {
699 	memset(cs, 0, sizeof(*cs));
700 	cs->write = write;
701 	cs->iter = iter;
702 }
703 
704 /* Unmap and put previous page of userspace buffer */
705 static void fuse_copy_finish(struct fuse_copy_state *cs)
706 {
707 	if (cs->currbuf) {
708 		struct pipe_buffer *buf = cs->currbuf;
709 
710 		if (cs->write)
711 			buf->len = PAGE_SIZE - cs->len;
712 		cs->currbuf = NULL;
713 	} else if (cs->pg) {
714 		if (cs->write) {
715 			flush_dcache_page(cs->pg);
716 			set_page_dirty_lock(cs->pg);
717 		}
718 		put_page(cs->pg);
719 	}
720 	cs->pg = NULL;
721 }
722 
723 /*
724  * Get another pagefull of userspace buffer, and map it to kernel
725  * address space, and lock request
726  */
727 static int fuse_copy_fill(struct fuse_copy_state *cs)
728 {
729 	struct page *page;
730 	int err;
731 
732 	err = unlock_request(cs->req);
733 	if (err)
734 		return err;
735 
736 	fuse_copy_finish(cs);
737 	if (cs->pipebufs) {
738 		struct pipe_buffer *buf = cs->pipebufs;
739 
740 		if (!cs->write) {
741 			err = pipe_buf_confirm(cs->pipe, buf);
742 			if (err)
743 				return err;
744 
745 			BUG_ON(!cs->nr_segs);
746 			cs->currbuf = buf;
747 			cs->pg = buf->page;
748 			cs->offset = buf->offset;
749 			cs->len = buf->len;
750 			cs->pipebufs++;
751 			cs->nr_segs--;
752 		} else {
753 			if (cs->nr_segs == cs->pipe->buffers)
754 				return -EIO;
755 
756 			page = alloc_page(GFP_HIGHUSER);
757 			if (!page)
758 				return -ENOMEM;
759 
760 			buf->page = page;
761 			buf->offset = 0;
762 			buf->len = 0;
763 
764 			cs->currbuf = buf;
765 			cs->pg = page;
766 			cs->offset = 0;
767 			cs->len = PAGE_SIZE;
768 			cs->pipebufs++;
769 			cs->nr_segs++;
770 		}
771 	} else {
772 		size_t off;
773 		err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off);
774 		if (err < 0)
775 			return err;
776 		BUG_ON(!err);
777 		cs->len = err;
778 		cs->offset = off;
779 		cs->pg = page;
780 		iov_iter_advance(cs->iter, err);
781 	}
782 
783 	return lock_request(cs->req);
784 }
785 
786 /* Do as much copy to/from userspace buffer as we can */
787 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
788 {
789 	unsigned ncpy = min(*size, cs->len);
790 	if (val) {
791 		void *pgaddr = kmap_atomic(cs->pg);
792 		void *buf = pgaddr + cs->offset;
793 
794 		if (cs->write)
795 			memcpy(buf, *val, ncpy);
796 		else
797 			memcpy(*val, buf, ncpy);
798 
799 		kunmap_atomic(pgaddr);
800 		*val += ncpy;
801 	}
802 	*size -= ncpy;
803 	cs->len -= ncpy;
804 	cs->offset += ncpy;
805 	return ncpy;
806 }
807 
808 static int fuse_check_page(struct page *page)
809 {
810 	if (page_mapcount(page) ||
811 	    page->mapping != NULL ||
812 	    page_count(page) != 1 ||
813 	    (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
814 	     ~(1 << PG_locked |
815 	       1 << PG_referenced |
816 	       1 << PG_uptodate |
817 	       1 << PG_lru |
818 	       1 << PG_active |
819 	       1 << PG_reclaim))) {
820 		printk(KERN_WARNING "fuse: trying to steal weird page\n");
821 		printk(KERN_WARNING "  page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
822 		return 1;
823 	}
824 	return 0;
825 }
826 
827 static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
828 {
829 	int err;
830 	struct page *oldpage = *pagep;
831 	struct page *newpage;
832 	struct pipe_buffer *buf = cs->pipebufs;
833 
834 	err = unlock_request(cs->req);
835 	if (err)
836 		return err;
837 
838 	fuse_copy_finish(cs);
839 
840 	err = pipe_buf_confirm(cs->pipe, buf);
841 	if (err)
842 		return err;
843 
844 	BUG_ON(!cs->nr_segs);
845 	cs->currbuf = buf;
846 	cs->len = buf->len;
847 	cs->pipebufs++;
848 	cs->nr_segs--;
849 
850 	if (cs->len != PAGE_SIZE)
851 		goto out_fallback;
852 
853 	if (pipe_buf_steal(cs->pipe, buf) != 0)
854 		goto out_fallback;
855 
856 	newpage = buf->page;
857 
858 	if (!PageUptodate(newpage))
859 		SetPageUptodate(newpage);
860 
861 	ClearPageMappedToDisk(newpage);
862 
863 	if (fuse_check_page(newpage) != 0)
864 		goto out_fallback_unlock;
865 
866 	/*
867 	 * This is a new and locked page, it shouldn't be mapped or
868 	 * have any special flags on it
869 	 */
870 	if (WARN_ON(page_mapped(oldpage)))
871 		goto out_fallback_unlock;
872 	if (WARN_ON(page_has_private(oldpage)))
873 		goto out_fallback_unlock;
874 	if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
875 		goto out_fallback_unlock;
876 	if (WARN_ON(PageMlocked(oldpage)))
877 		goto out_fallback_unlock;
878 
879 	err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
880 	if (err) {
881 		unlock_page(newpage);
882 		return err;
883 	}
884 
885 	get_page(newpage);
886 
887 	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
888 		lru_cache_add_file(newpage);
889 
890 	err = 0;
891 	spin_lock(&cs->req->waitq.lock);
892 	if (test_bit(FR_ABORTED, &cs->req->flags))
893 		err = -ENOENT;
894 	else
895 		*pagep = newpage;
896 	spin_unlock(&cs->req->waitq.lock);
897 
898 	if (err) {
899 		unlock_page(newpage);
900 		put_page(newpage);
901 		return err;
902 	}
903 
904 	unlock_page(oldpage);
905 	put_page(oldpage);
906 	cs->len = 0;
907 
908 	return 0;
909 
910 out_fallback_unlock:
911 	unlock_page(newpage);
912 out_fallback:
913 	cs->pg = buf->page;
914 	cs->offset = buf->offset;
915 
916 	err = lock_request(cs->req);
917 	if (err)
918 		return err;
919 
920 	return 1;
921 }
922 
923 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
924 			 unsigned offset, unsigned count)
925 {
926 	struct pipe_buffer *buf;
927 	int err;
928 
929 	if (cs->nr_segs == cs->pipe->buffers)
930 		return -EIO;
931 
932 	err = unlock_request(cs->req);
933 	if (err)
934 		return err;
935 
936 	fuse_copy_finish(cs);
937 
938 	buf = cs->pipebufs;
939 	get_page(page);
940 	buf->page = page;
941 	buf->offset = offset;
942 	buf->len = count;
943 
944 	cs->pipebufs++;
945 	cs->nr_segs++;
946 	cs->len = 0;
947 
948 	return 0;
949 }
950 
951 /*
952  * Copy a page in the request to/from the userspace buffer.  Must be
953  * done atomically
954  */
955 static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
956 			  unsigned offset, unsigned count, int zeroing)
957 {
958 	int err;
959 	struct page *page = *pagep;
960 
961 	if (page && zeroing && count < PAGE_SIZE)
962 		clear_highpage(page);
963 
964 	while (count) {
965 		if (cs->write && cs->pipebufs && page) {
966 			return fuse_ref_page(cs, page, offset, count);
967 		} else if (!cs->len) {
968 			if (cs->move_pages && page &&
969 			    offset == 0 && count == PAGE_SIZE) {
970 				err = fuse_try_move_page(cs, pagep);
971 				if (err <= 0)
972 					return err;
973 			} else {
974 				err = fuse_copy_fill(cs);
975 				if (err)
976 					return err;
977 			}
978 		}
979 		if (page) {
980 			void *mapaddr = kmap_atomic(page);
981 			void *buf = mapaddr + offset;
982 			offset += fuse_copy_do(cs, &buf, &count);
983 			kunmap_atomic(mapaddr);
984 		} else
985 			offset += fuse_copy_do(cs, NULL, &count);
986 	}
987 	if (page && !cs->write)
988 		flush_dcache_page(page);
989 	return 0;
990 }
991 
992 /* Copy pages in the request to/from userspace buffer */
993 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
994 			   int zeroing)
995 {
996 	unsigned i;
997 	struct fuse_req *req = cs->req;
998 
999 	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
1000 		int err;
1001 		unsigned offset = req->page_descs[i].offset;
1002 		unsigned count = min(nbytes, req->page_descs[i].length);
1003 
1004 		err = fuse_copy_page(cs, &req->pages[i], offset, count,
1005 				     zeroing);
1006 		if (err)
1007 			return err;
1008 
1009 		nbytes -= count;
1010 	}
1011 	return 0;
1012 }
1013 
1014 /* Copy a single argument in the request to/from userspace buffer */
1015 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
1016 {
1017 	while (size) {
1018 		if (!cs->len) {
1019 			int err = fuse_copy_fill(cs);
1020 			if (err)
1021 				return err;
1022 		}
1023 		fuse_copy_do(cs, &val, &size);
1024 	}
1025 	return 0;
1026 }
1027 
1028 /* Copy request arguments to/from userspace buffer */
1029 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
1030 			  unsigned argpages, struct fuse_arg *args,
1031 			  int zeroing)
1032 {
1033 	int err = 0;
1034 	unsigned i;
1035 
1036 	for (i = 0; !err && i < numargs; i++)  {
1037 		struct fuse_arg *arg = &args[i];
1038 		if (i == numargs - 1 && argpages)
1039 			err = fuse_copy_pages(cs, arg->size, zeroing);
1040 		else
1041 			err = fuse_copy_one(cs, arg->value, arg->size);
1042 	}
1043 	return err;
1044 }
1045 
1046 static int forget_pending(struct fuse_iqueue *fiq)
1047 {
1048 	return fiq->forget_list_head.next != NULL;
1049 }
1050 
1051 static int request_pending(struct fuse_iqueue *fiq)
1052 {
1053 	return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
1054 		forget_pending(fiq);
1055 }
1056 
1057 /*
1058  * Transfer an interrupt request to userspace
1059  *
1060  * Unlike other requests this is assembled on demand, without a need
1061  * to allocate a separate fuse_req structure.
1062  *
1063  * Called with fiq->waitq.lock held, releases it
1064  */
1065 static int fuse_read_interrupt(struct fuse_iqueue *fiq,
1066 			       struct fuse_copy_state *cs,
1067 			       size_t nbytes, struct fuse_req *req)
1068 __releases(fiq->waitq.lock)
1069 {
1070 	struct fuse_in_header ih;
1071 	struct fuse_interrupt_in arg;
1072 	unsigned reqsize = sizeof(ih) + sizeof(arg);
1073 	int err;
1074 
1075 	list_del_init(&req->intr_entry);
1076 	req->intr_unique = fuse_get_unique(fiq);
1077 	memset(&ih, 0, sizeof(ih));
1078 	memset(&arg, 0, sizeof(arg));
1079 	ih.len = reqsize;
1080 	ih.opcode = FUSE_INTERRUPT;
1081 	ih.unique = req->intr_unique;
1082 	arg.unique = req->in.h.unique;
1083 
1084 	spin_unlock(&fiq->waitq.lock);
1085 	if (nbytes < reqsize)
1086 		return -EINVAL;
1087 
1088 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1089 	if (!err)
1090 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1091 	fuse_copy_finish(cs);
1092 
1093 	return err ? err : reqsize;
1094 }
1095 
1096 static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
1097 					       unsigned max,
1098 					       unsigned *countp)
1099 {
1100 	struct fuse_forget_link *head = fiq->forget_list_head.next;
1101 	struct fuse_forget_link **newhead = &head;
1102 	unsigned count;
1103 
1104 	for (count = 0; *newhead != NULL && count < max; count++)
1105 		newhead = &(*newhead)->next;
1106 
1107 	fiq->forget_list_head.next = *newhead;
1108 	*newhead = NULL;
1109 	if (fiq->forget_list_head.next == NULL)
1110 		fiq->forget_list_tail = &fiq->forget_list_head;
1111 
1112 	if (countp != NULL)
1113 		*countp = count;
1114 
1115 	return head;
1116 }
1117 
1118 static int fuse_read_single_forget(struct fuse_iqueue *fiq,
1119 				   struct fuse_copy_state *cs,
1120 				   size_t nbytes)
1121 __releases(fiq->waitq.lock)
1122 {
1123 	int err;
1124 	struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL);
1125 	struct fuse_forget_in arg = {
1126 		.nlookup = forget->forget_one.nlookup,
1127 	};
1128 	struct fuse_in_header ih = {
1129 		.opcode = FUSE_FORGET,
1130 		.nodeid = forget->forget_one.nodeid,
1131 		.unique = fuse_get_unique(fiq),
1132 		.len = sizeof(ih) + sizeof(arg),
1133 	};
1134 
1135 	spin_unlock(&fiq->waitq.lock);
1136 	kfree(forget);
1137 	if (nbytes < ih.len)
1138 		return -EINVAL;
1139 
1140 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1141 	if (!err)
1142 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1143 	fuse_copy_finish(cs);
1144 
1145 	if (err)
1146 		return err;
1147 
1148 	return ih.len;
1149 }
1150 
1151 static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
1152 				   struct fuse_copy_state *cs, size_t nbytes)
1153 __releases(fiq->waitq.lock)
1154 {
1155 	int err;
1156 	unsigned max_forgets;
1157 	unsigned count;
1158 	struct fuse_forget_link *head;
1159 	struct fuse_batch_forget_in arg = { .count = 0 };
1160 	struct fuse_in_header ih = {
1161 		.opcode = FUSE_BATCH_FORGET,
1162 		.unique = fuse_get_unique(fiq),
1163 		.len = sizeof(ih) + sizeof(arg),
1164 	};
1165 
1166 	if (nbytes < ih.len) {
1167 		spin_unlock(&fiq->waitq.lock);
1168 		return -EINVAL;
1169 	}
1170 
1171 	max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1172 	head = dequeue_forget(fiq, max_forgets, &count);
1173 	spin_unlock(&fiq->waitq.lock);
1174 
1175 	arg.count = count;
1176 	ih.len += count * sizeof(struct fuse_forget_one);
1177 	err = fuse_copy_one(cs, &ih, sizeof(ih));
1178 	if (!err)
1179 		err = fuse_copy_one(cs, &arg, sizeof(arg));
1180 
1181 	while (head) {
1182 		struct fuse_forget_link *forget = head;
1183 
1184 		if (!err) {
1185 			err = fuse_copy_one(cs, &forget->forget_one,
1186 					    sizeof(forget->forget_one));
1187 		}
1188 		head = forget->next;
1189 		kfree(forget);
1190 	}
1191 
1192 	fuse_copy_finish(cs);
1193 
1194 	if (err)
1195 		return err;
1196 
1197 	return ih.len;
1198 }
1199 
1200 static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
1201 			    struct fuse_copy_state *cs,
1202 			    size_t nbytes)
1203 __releases(fiq->waitq.lock)
1204 {
1205 	if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
1206 		return fuse_read_single_forget(fiq, cs, nbytes);
1207 	else
1208 		return fuse_read_batch_forget(fiq, cs, nbytes);
1209 }
1210 
1211 /*
1212  * Read a single request into the userspace filesystem's buffer.  This
1213  * function waits until a request is available, then removes it from
1214  * the pending list and copies request data to userspace buffer.  If
1215  * no reply is needed (FORGET) or request has been aborted or there
1216  * was an error during the copying then it's finished by calling
1217  * request_end().  Otherwise add it to the processing list, and set
1218  * the 'sent' flag.
1219  */
1220 static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
1221 				struct fuse_copy_state *cs, size_t nbytes)
1222 {
1223 	ssize_t err;
1224 	struct fuse_conn *fc = fud->fc;
1225 	struct fuse_iqueue *fiq = &fc->iq;
1226 	struct fuse_pqueue *fpq = &fud->pq;
1227 	struct fuse_req *req;
1228 	struct fuse_in *in;
1229 	unsigned reqsize;
1230 
1231  restart:
1232 	spin_lock(&fiq->waitq.lock);
1233 	err = -EAGAIN;
1234 	if ((file->f_flags & O_NONBLOCK) && fiq->connected &&
1235 	    !request_pending(fiq))
1236 		goto err_unlock;
1237 
1238 	err = wait_event_interruptible_exclusive_locked(fiq->waitq,
1239 				!fiq->connected || request_pending(fiq));
1240 	if (err)
1241 		goto err_unlock;
1242 
1243 	if (!fiq->connected) {
1244 		err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
1245 		goto err_unlock;
1246 	}
1247 
1248 	if (!list_empty(&fiq->interrupts)) {
1249 		req = list_entry(fiq->interrupts.next, struct fuse_req,
1250 				 intr_entry);
1251 		return fuse_read_interrupt(fiq, cs, nbytes, req);
1252 	}
1253 
1254 	if (forget_pending(fiq)) {
1255 		if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
1256 			return fuse_read_forget(fc, fiq, cs, nbytes);
1257 
1258 		if (fiq->forget_batch <= -8)
1259 			fiq->forget_batch = 16;
1260 	}
1261 
1262 	req = list_entry(fiq->pending.next, struct fuse_req, list);
1263 	clear_bit(FR_PENDING, &req->flags);
1264 	list_del_init(&req->list);
1265 	spin_unlock(&fiq->waitq.lock);
1266 
1267 	in = &req->in;
1268 	reqsize = in->h.len;
1269 
1270 	/* If request is too large, reply with an error and restart the read */
1271 	if (nbytes < reqsize) {
1272 		req->out.h.error = -EIO;
1273 		/* SETXATTR is special, since it may contain too large data */
1274 		if (in->h.opcode == FUSE_SETXATTR)
1275 			req->out.h.error = -E2BIG;
1276 		request_end(fc, req);
1277 		goto restart;
1278 	}
1279 	spin_lock(&fpq->lock);
1280 	list_add(&req->list, &fpq->io);
1281 	spin_unlock(&fpq->lock);
1282 	cs->req = req;
1283 	err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1284 	if (!err)
1285 		err = fuse_copy_args(cs, in->numargs, in->argpages,
1286 				     (struct fuse_arg *) in->args, 0);
1287 	fuse_copy_finish(cs);
1288 	spin_lock(&fpq->lock);
1289 	clear_bit(FR_LOCKED, &req->flags);
1290 	if (!fpq->connected) {
1291 		err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
1292 		goto out_end;
1293 	}
1294 	if (err) {
1295 		req->out.h.error = -EIO;
1296 		goto out_end;
1297 	}
1298 	if (!test_bit(FR_ISREPLY, &req->flags)) {
1299 		err = reqsize;
1300 		goto out_end;
1301 	}
1302 	list_move_tail(&req->list, &fpq->processing);
1303 	spin_unlock(&fpq->lock);
1304 	set_bit(FR_SENT, &req->flags);
1305 	/* matches barrier in request_wait_answer() */
1306 	smp_mb__after_atomic();
1307 	if (test_bit(FR_INTERRUPTED, &req->flags))
1308 		queue_interrupt(fiq, req);
1309 
1310 	return reqsize;
1311 
1312 out_end:
1313 	if (!test_bit(FR_PRIVATE, &req->flags))
1314 		list_del_init(&req->list);
1315 	spin_unlock(&fpq->lock);
1316 	request_end(fc, req);
1317 	return err;
1318 
1319  err_unlock:
1320 	spin_unlock(&fiq->waitq.lock);
1321 	return err;
1322 }
1323 
1324 static int fuse_dev_open(struct inode *inode, struct file *file)
1325 {
1326 	/*
1327 	 * The fuse device's file's private_data is used to hold
1328 	 * the fuse_conn(ection) when it is mounted, and is used to
1329 	 * keep track of whether the file has been mounted already.
1330 	 */
1331 	file->private_data = NULL;
1332 	return 0;
1333 }
1334 
1335 static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1336 {
1337 	struct fuse_copy_state cs;
1338 	struct file *file = iocb->ki_filp;
1339 	struct fuse_dev *fud = fuse_get_dev(file);
1340 
1341 	if (!fud)
1342 		return -EPERM;
1343 
1344 	if (!iter_is_iovec(to))
1345 		return -EINVAL;
1346 
1347 	fuse_copy_init(&cs, 1, to);
1348 
1349 	return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
1350 }
1351 
1352 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1353 				    struct pipe_inode_info *pipe,
1354 				    size_t len, unsigned int flags)
1355 {
1356 	int total, ret;
1357 	int page_nr = 0;
1358 	struct pipe_buffer *bufs;
1359 	struct fuse_copy_state cs;
1360 	struct fuse_dev *fud = fuse_get_dev(in);
1361 
1362 	if (!fud)
1363 		return -EPERM;
1364 
1365 	bufs = kmalloc_array(pipe->buffers, sizeof(struct pipe_buffer),
1366 			     GFP_KERNEL);
1367 	if (!bufs)
1368 		return -ENOMEM;
1369 
1370 	fuse_copy_init(&cs, 1, NULL);
1371 	cs.pipebufs = bufs;
1372 	cs.pipe = pipe;
1373 	ret = fuse_dev_do_read(fud, in, &cs, len);
1374 	if (ret < 0)
1375 		goto out;
1376 
1377 	if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1378 		ret = -EIO;
1379 		goto out;
1380 	}
1381 
1382 	for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
1383 		/*
1384 		 * Need to be careful about this.  Having buf->ops in module
1385 		 * code can Oops if the buffer persists after module unload.
1386 		 */
1387 		bufs[page_nr].ops = &nosteal_pipe_buf_ops;
1388 		bufs[page_nr].flags = 0;
1389 		ret = add_to_pipe(pipe, &bufs[page_nr++]);
1390 		if (unlikely(ret < 0))
1391 			break;
1392 	}
1393 	if (total)
1394 		ret = total;
1395 out:
1396 	for (; page_nr < cs.nr_segs; page_nr++)
1397 		put_page(bufs[page_nr].page);
1398 
1399 	kfree(bufs);
1400 	return ret;
1401 }
1402 
1403 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1404 			    struct fuse_copy_state *cs)
1405 {
1406 	struct fuse_notify_poll_wakeup_out outarg;
1407 	int err = -EINVAL;
1408 
1409 	if (size != sizeof(outarg))
1410 		goto err;
1411 
1412 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1413 	if (err)
1414 		goto err;
1415 
1416 	fuse_copy_finish(cs);
1417 	return fuse_notify_poll_wakeup(fc, &outarg);
1418 
1419 err:
1420 	fuse_copy_finish(cs);
1421 	return err;
1422 }
1423 
1424 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1425 				   struct fuse_copy_state *cs)
1426 {
1427 	struct fuse_notify_inval_inode_out outarg;
1428 	int err = -EINVAL;
1429 
1430 	if (size != sizeof(outarg))
1431 		goto err;
1432 
1433 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1434 	if (err)
1435 		goto err;
1436 	fuse_copy_finish(cs);
1437 
1438 	down_read(&fc->killsb);
1439 	err = -ENOENT;
1440 	if (fc->sb) {
1441 		err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1442 					       outarg.off, outarg.len);
1443 	}
1444 	up_read(&fc->killsb);
1445 	return err;
1446 
1447 err:
1448 	fuse_copy_finish(cs);
1449 	return err;
1450 }
1451 
1452 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1453 				   struct fuse_copy_state *cs)
1454 {
1455 	struct fuse_notify_inval_entry_out outarg;
1456 	int err = -ENOMEM;
1457 	char *buf;
1458 	struct qstr name;
1459 
1460 	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1461 	if (!buf)
1462 		goto err;
1463 
1464 	err = -EINVAL;
1465 	if (size < sizeof(outarg))
1466 		goto err;
1467 
1468 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1469 	if (err)
1470 		goto err;
1471 
1472 	err = -ENAMETOOLONG;
1473 	if (outarg.namelen > FUSE_NAME_MAX)
1474 		goto err;
1475 
1476 	err = -EINVAL;
1477 	if (size != sizeof(outarg) + outarg.namelen + 1)
1478 		goto err;
1479 
1480 	name.name = buf;
1481 	name.len = outarg.namelen;
1482 	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1483 	if (err)
1484 		goto err;
1485 	fuse_copy_finish(cs);
1486 	buf[outarg.namelen] = 0;
1487 
1488 	down_read(&fc->killsb);
1489 	err = -ENOENT;
1490 	if (fc->sb)
1491 		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1492 	up_read(&fc->killsb);
1493 	kfree(buf);
1494 	return err;
1495 
1496 err:
1497 	kfree(buf);
1498 	fuse_copy_finish(cs);
1499 	return err;
1500 }
1501 
1502 static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1503 			      struct fuse_copy_state *cs)
1504 {
1505 	struct fuse_notify_delete_out outarg;
1506 	int err = -ENOMEM;
1507 	char *buf;
1508 	struct qstr name;
1509 
1510 	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1511 	if (!buf)
1512 		goto err;
1513 
1514 	err = -EINVAL;
1515 	if (size < sizeof(outarg))
1516 		goto err;
1517 
1518 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1519 	if (err)
1520 		goto err;
1521 
1522 	err = -ENAMETOOLONG;
1523 	if (outarg.namelen > FUSE_NAME_MAX)
1524 		goto err;
1525 
1526 	err = -EINVAL;
1527 	if (size != sizeof(outarg) + outarg.namelen + 1)
1528 		goto err;
1529 
1530 	name.name = buf;
1531 	name.len = outarg.namelen;
1532 	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1533 	if (err)
1534 		goto err;
1535 	fuse_copy_finish(cs);
1536 	buf[outarg.namelen] = 0;
1537 
1538 	down_read(&fc->killsb);
1539 	err = -ENOENT;
1540 	if (fc->sb)
1541 		err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1542 					       outarg.child, &name);
1543 	up_read(&fc->killsb);
1544 	kfree(buf);
1545 	return err;
1546 
1547 err:
1548 	kfree(buf);
1549 	fuse_copy_finish(cs);
1550 	return err;
1551 }
1552 
1553 static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1554 			     struct fuse_copy_state *cs)
1555 {
1556 	struct fuse_notify_store_out outarg;
1557 	struct inode *inode;
1558 	struct address_space *mapping;
1559 	u64 nodeid;
1560 	int err;
1561 	pgoff_t index;
1562 	unsigned int offset;
1563 	unsigned int num;
1564 	loff_t file_size;
1565 	loff_t end;
1566 
1567 	err = -EINVAL;
1568 	if (size < sizeof(outarg))
1569 		goto out_finish;
1570 
1571 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1572 	if (err)
1573 		goto out_finish;
1574 
1575 	err = -EINVAL;
1576 	if (size - sizeof(outarg) != outarg.size)
1577 		goto out_finish;
1578 
1579 	nodeid = outarg.nodeid;
1580 
1581 	down_read(&fc->killsb);
1582 
1583 	err = -ENOENT;
1584 	if (!fc->sb)
1585 		goto out_up_killsb;
1586 
1587 	inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1588 	if (!inode)
1589 		goto out_up_killsb;
1590 
1591 	mapping = inode->i_mapping;
1592 	index = outarg.offset >> PAGE_SHIFT;
1593 	offset = outarg.offset & ~PAGE_MASK;
1594 	file_size = i_size_read(inode);
1595 	end = outarg.offset + outarg.size;
1596 	if (end > file_size) {
1597 		file_size = end;
1598 		fuse_write_update_size(inode, file_size);
1599 	}
1600 
1601 	num = outarg.size;
1602 	while (num) {
1603 		struct page *page;
1604 		unsigned int this_num;
1605 
1606 		err = -ENOMEM;
1607 		page = find_or_create_page(mapping, index,
1608 					   mapping_gfp_mask(mapping));
1609 		if (!page)
1610 			goto out_iput;
1611 
1612 		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1613 		err = fuse_copy_page(cs, &page, offset, this_num, 0);
1614 		if (!err && offset == 0 &&
1615 		    (this_num == PAGE_SIZE || file_size == end))
1616 			SetPageUptodate(page);
1617 		unlock_page(page);
1618 		put_page(page);
1619 
1620 		if (err)
1621 			goto out_iput;
1622 
1623 		num -= this_num;
1624 		offset = 0;
1625 		index++;
1626 	}
1627 
1628 	err = 0;
1629 
1630 out_iput:
1631 	iput(inode);
1632 out_up_killsb:
1633 	up_read(&fc->killsb);
1634 out_finish:
1635 	fuse_copy_finish(cs);
1636 	return err;
1637 }
1638 
1639 static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1640 {
1641 	release_pages(req->pages, req->num_pages);
1642 }
1643 
1644 static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1645 			 struct fuse_notify_retrieve_out *outarg)
1646 {
1647 	int err;
1648 	struct address_space *mapping = inode->i_mapping;
1649 	struct fuse_req *req;
1650 	pgoff_t index;
1651 	loff_t file_size;
1652 	unsigned int num;
1653 	unsigned int offset;
1654 	size_t total_len = 0;
1655 	int num_pages;
1656 
1657 	offset = outarg->offset & ~PAGE_MASK;
1658 	file_size = i_size_read(inode);
1659 
1660 	num = outarg->size;
1661 	if (outarg->offset > file_size)
1662 		num = 0;
1663 	else if (outarg->offset + num > file_size)
1664 		num = file_size - outarg->offset;
1665 
1666 	num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1667 	num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1668 
1669 	req = fuse_get_req(fc, num_pages);
1670 	if (IS_ERR(req))
1671 		return PTR_ERR(req);
1672 
1673 	req->in.h.opcode = FUSE_NOTIFY_REPLY;
1674 	req->in.h.nodeid = outarg->nodeid;
1675 	req->in.numargs = 2;
1676 	req->in.argpages = 1;
1677 	req->page_descs[0].offset = offset;
1678 	req->end = fuse_retrieve_end;
1679 
1680 	index = outarg->offset >> PAGE_SHIFT;
1681 
1682 	while (num && req->num_pages < num_pages) {
1683 		struct page *page;
1684 		unsigned int this_num;
1685 
1686 		page = find_get_page(mapping, index);
1687 		if (!page)
1688 			break;
1689 
1690 		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1691 		req->pages[req->num_pages] = page;
1692 		req->page_descs[req->num_pages].length = this_num;
1693 		req->num_pages++;
1694 
1695 		offset = 0;
1696 		num -= this_num;
1697 		total_len += this_num;
1698 		index++;
1699 	}
1700 	req->misc.retrieve_in.offset = outarg->offset;
1701 	req->misc.retrieve_in.size = total_len;
1702 	req->in.args[0].size = sizeof(req->misc.retrieve_in);
1703 	req->in.args[0].value = &req->misc.retrieve_in;
1704 	req->in.args[1].size = total_len;
1705 
1706 	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1707 	if (err)
1708 		fuse_retrieve_end(fc, req);
1709 
1710 	return err;
1711 }
1712 
1713 static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1714 				struct fuse_copy_state *cs)
1715 {
1716 	struct fuse_notify_retrieve_out outarg;
1717 	struct inode *inode;
1718 	int err;
1719 
1720 	err = -EINVAL;
1721 	if (size != sizeof(outarg))
1722 		goto copy_finish;
1723 
1724 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1725 	if (err)
1726 		goto copy_finish;
1727 
1728 	fuse_copy_finish(cs);
1729 
1730 	down_read(&fc->killsb);
1731 	err = -ENOENT;
1732 	if (fc->sb) {
1733 		u64 nodeid = outarg.nodeid;
1734 
1735 		inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1736 		if (inode) {
1737 			err = fuse_retrieve(fc, inode, &outarg);
1738 			iput(inode);
1739 		}
1740 	}
1741 	up_read(&fc->killsb);
1742 
1743 	return err;
1744 
1745 copy_finish:
1746 	fuse_copy_finish(cs);
1747 	return err;
1748 }
1749 
1750 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1751 		       unsigned int size, struct fuse_copy_state *cs)
1752 {
1753 	/* Don't try to move pages (yet) */
1754 	cs->move_pages = 0;
1755 
1756 	switch (code) {
1757 	case FUSE_NOTIFY_POLL:
1758 		return fuse_notify_poll(fc, size, cs);
1759 
1760 	case FUSE_NOTIFY_INVAL_INODE:
1761 		return fuse_notify_inval_inode(fc, size, cs);
1762 
1763 	case FUSE_NOTIFY_INVAL_ENTRY:
1764 		return fuse_notify_inval_entry(fc, size, cs);
1765 
1766 	case FUSE_NOTIFY_STORE:
1767 		return fuse_notify_store(fc, size, cs);
1768 
1769 	case FUSE_NOTIFY_RETRIEVE:
1770 		return fuse_notify_retrieve(fc, size, cs);
1771 
1772 	case FUSE_NOTIFY_DELETE:
1773 		return fuse_notify_delete(fc, size, cs);
1774 
1775 	default:
1776 		fuse_copy_finish(cs);
1777 		return -EINVAL;
1778 	}
1779 }
1780 
1781 /* Look up request on processing list by unique ID */
1782 static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
1783 {
1784 	struct fuse_req *req;
1785 
1786 	list_for_each_entry(req, &fpq->processing, list) {
1787 		if (req->in.h.unique == unique || req->intr_unique == unique)
1788 			return req;
1789 	}
1790 	return NULL;
1791 }
1792 
1793 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1794 			 unsigned nbytes)
1795 {
1796 	unsigned reqsize = sizeof(struct fuse_out_header);
1797 
1798 	if (out->h.error)
1799 		return nbytes != reqsize ? -EINVAL : 0;
1800 
1801 	reqsize += len_args(out->numargs, out->args);
1802 
1803 	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1804 		return -EINVAL;
1805 	else if (reqsize > nbytes) {
1806 		struct fuse_arg *lastarg = &out->args[out->numargs-1];
1807 		unsigned diffsize = reqsize - nbytes;
1808 		if (diffsize > lastarg->size)
1809 			return -EINVAL;
1810 		lastarg->size -= diffsize;
1811 	}
1812 	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1813 			      out->page_zeroing);
1814 }
1815 
1816 /*
1817  * Write a single reply to a request.  First the header is copied from
1818  * the write buffer.  The request is then searched on the processing
1819  * list by the unique ID found in the header.  If found, then remove
1820  * it from the list and copy the rest of the buffer to the request.
1821  * The request is finished by calling request_end()
1822  */
1823 static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
1824 				 struct fuse_copy_state *cs, size_t nbytes)
1825 {
1826 	int err;
1827 	struct fuse_conn *fc = fud->fc;
1828 	struct fuse_pqueue *fpq = &fud->pq;
1829 	struct fuse_req *req;
1830 	struct fuse_out_header oh;
1831 
1832 	if (nbytes < sizeof(struct fuse_out_header))
1833 		return -EINVAL;
1834 
1835 	err = fuse_copy_one(cs, &oh, sizeof(oh));
1836 	if (err)
1837 		goto err_finish;
1838 
1839 	err = -EINVAL;
1840 	if (oh.len != nbytes)
1841 		goto err_finish;
1842 
1843 	/*
1844 	 * Zero oh.unique indicates unsolicited notification message
1845 	 * and error contains notification code.
1846 	 */
1847 	if (!oh.unique) {
1848 		err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1849 		return err ? err : nbytes;
1850 	}
1851 
1852 	err = -EINVAL;
1853 	if (oh.error <= -1000 || oh.error > 0)
1854 		goto err_finish;
1855 
1856 	spin_lock(&fpq->lock);
1857 	err = -ENOENT;
1858 	if (!fpq->connected)
1859 		goto err_unlock_pq;
1860 
1861 	req = request_find(fpq, oh.unique);
1862 	if (!req)
1863 		goto err_unlock_pq;
1864 
1865 	/* Is it an interrupt reply? */
1866 	if (req->intr_unique == oh.unique) {
1867 		spin_unlock(&fpq->lock);
1868 
1869 		err = -EINVAL;
1870 		if (nbytes != sizeof(struct fuse_out_header))
1871 			goto err_finish;
1872 
1873 		if (oh.error == -ENOSYS)
1874 			fc->no_interrupt = 1;
1875 		else if (oh.error == -EAGAIN)
1876 			queue_interrupt(&fc->iq, req);
1877 
1878 		fuse_copy_finish(cs);
1879 		return nbytes;
1880 	}
1881 
1882 	clear_bit(FR_SENT, &req->flags);
1883 	list_move(&req->list, &fpq->io);
1884 	req->out.h = oh;
1885 	set_bit(FR_LOCKED, &req->flags);
1886 	spin_unlock(&fpq->lock);
1887 	cs->req = req;
1888 	if (!req->out.page_replace)
1889 		cs->move_pages = 0;
1890 
1891 	err = copy_out_args(cs, &req->out, nbytes);
1892 	fuse_copy_finish(cs);
1893 
1894 	spin_lock(&fpq->lock);
1895 	clear_bit(FR_LOCKED, &req->flags);
1896 	if (!fpq->connected)
1897 		err = -ENOENT;
1898 	else if (err)
1899 		req->out.h.error = -EIO;
1900 	if (!test_bit(FR_PRIVATE, &req->flags))
1901 		list_del_init(&req->list);
1902 	spin_unlock(&fpq->lock);
1903 
1904 	request_end(fc, req);
1905 
1906 	return err ? err : nbytes;
1907 
1908  err_unlock_pq:
1909 	spin_unlock(&fpq->lock);
1910  err_finish:
1911 	fuse_copy_finish(cs);
1912 	return err;
1913 }
1914 
1915 static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
1916 {
1917 	struct fuse_copy_state cs;
1918 	struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
1919 
1920 	if (!fud)
1921 		return -EPERM;
1922 
1923 	if (!iter_is_iovec(from))
1924 		return -EINVAL;
1925 
1926 	fuse_copy_init(&cs, 0, from);
1927 
1928 	return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
1929 }
1930 
1931 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1932 				     struct file *out, loff_t *ppos,
1933 				     size_t len, unsigned int flags)
1934 {
1935 	unsigned nbuf;
1936 	unsigned idx;
1937 	struct pipe_buffer *bufs;
1938 	struct fuse_copy_state cs;
1939 	struct fuse_dev *fud;
1940 	size_t rem;
1941 	ssize_t ret;
1942 
1943 	fud = fuse_get_dev(out);
1944 	if (!fud)
1945 		return -EPERM;
1946 
1947 	bufs = kmalloc_array(pipe->buffers, sizeof(struct pipe_buffer),
1948 			     GFP_KERNEL);
1949 	if (!bufs)
1950 		return -ENOMEM;
1951 
1952 	pipe_lock(pipe);
1953 	nbuf = 0;
1954 	rem = 0;
1955 	for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1956 		rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1957 
1958 	ret = -EINVAL;
1959 	if (rem < len) {
1960 		pipe_unlock(pipe);
1961 		goto out;
1962 	}
1963 
1964 	rem = len;
1965 	while (rem) {
1966 		struct pipe_buffer *ibuf;
1967 		struct pipe_buffer *obuf;
1968 
1969 		BUG_ON(nbuf >= pipe->buffers);
1970 		BUG_ON(!pipe->nrbufs);
1971 		ibuf = &pipe->bufs[pipe->curbuf];
1972 		obuf = &bufs[nbuf];
1973 
1974 		if (rem >= ibuf->len) {
1975 			*obuf = *ibuf;
1976 			ibuf->ops = NULL;
1977 			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1978 			pipe->nrbufs--;
1979 		} else {
1980 			pipe_buf_get(pipe, ibuf);
1981 			*obuf = *ibuf;
1982 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1983 			obuf->len = rem;
1984 			ibuf->offset += obuf->len;
1985 			ibuf->len -= obuf->len;
1986 		}
1987 		nbuf++;
1988 		rem -= obuf->len;
1989 	}
1990 	pipe_unlock(pipe);
1991 
1992 	fuse_copy_init(&cs, 0, NULL);
1993 	cs.pipebufs = bufs;
1994 	cs.nr_segs = nbuf;
1995 	cs.pipe = pipe;
1996 
1997 	if (flags & SPLICE_F_MOVE)
1998 		cs.move_pages = 1;
1999 
2000 	ret = fuse_dev_do_write(fud, &cs, len);
2001 
2002 	for (idx = 0; idx < nbuf; idx++)
2003 		pipe_buf_release(pipe, &bufs[idx]);
2004 
2005 out:
2006 	kfree(bufs);
2007 	return ret;
2008 }
2009 
2010 static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
2011 {
2012 	__poll_t mask = EPOLLOUT | EPOLLWRNORM;
2013 	struct fuse_iqueue *fiq;
2014 	struct fuse_dev *fud = fuse_get_dev(file);
2015 
2016 	if (!fud)
2017 		return EPOLLERR;
2018 
2019 	fiq = &fud->fc->iq;
2020 	poll_wait(file, &fiq->waitq, wait);
2021 
2022 	spin_lock(&fiq->waitq.lock);
2023 	if (!fiq->connected)
2024 		mask = EPOLLERR;
2025 	else if (request_pending(fiq))
2026 		mask |= EPOLLIN | EPOLLRDNORM;
2027 	spin_unlock(&fiq->waitq.lock);
2028 
2029 	return mask;
2030 }
2031 
2032 /*
2033  * Abort all requests on the given list (pending or processing)
2034  *
2035  * This function releases and reacquires fc->lock
2036  */
2037 static void end_requests(struct fuse_conn *fc, struct list_head *head)
2038 {
2039 	while (!list_empty(head)) {
2040 		struct fuse_req *req;
2041 		req = list_entry(head->next, struct fuse_req, list);
2042 		req->out.h.error = -ECONNABORTED;
2043 		clear_bit(FR_SENT, &req->flags);
2044 		list_del_init(&req->list);
2045 		request_end(fc, req);
2046 	}
2047 }
2048 
2049 static void end_polls(struct fuse_conn *fc)
2050 {
2051 	struct rb_node *p;
2052 
2053 	p = rb_first(&fc->polled_files);
2054 
2055 	while (p) {
2056 		struct fuse_file *ff;
2057 		ff = rb_entry(p, struct fuse_file, polled_node);
2058 		wake_up_interruptible_all(&ff->poll_wait);
2059 
2060 		p = rb_next(p);
2061 	}
2062 }
2063 
2064 /*
2065  * Abort all requests.
2066  *
2067  * Emergency exit in case of a malicious or accidental deadlock, or just a hung
2068  * filesystem.
2069  *
2070  * The same effect is usually achievable through killing the filesystem daemon
2071  * and all users of the filesystem.  The exception is the combination of an
2072  * asynchronous request and the tricky deadlock (see
2073  * Documentation/filesystems/fuse.txt).
2074  *
2075  * Aborting requests under I/O goes as follows: 1: Separate out unlocked
2076  * requests, they should be finished off immediately.  Locked requests will be
2077  * finished after unlock; see unlock_request(). 2: Finish off the unlocked
2078  * requests.  It is possible that some request will finish before we can.  This
2079  * is OK, the request will in that case be removed from the list before we touch
2080  * it.
2081  */
2082 void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
2083 {
2084 	struct fuse_iqueue *fiq = &fc->iq;
2085 
2086 	spin_lock(&fc->lock);
2087 	if (fc->connected) {
2088 		struct fuse_dev *fud;
2089 		struct fuse_req *req, *next;
2090 		LIST_HEAD(to_end1);
2091 		LIST_HEAD(to_end2);
2092 
2093 		fc->connected = 0;
2094 		fc->blocked = 0;
2095 		fc->aborted = is_abort;
2096 		fuse_set_initialized(fc);
2097 		list_for_each_entry(fud, &fc->devices, entry) {
2098 			struct fuse_pqueue *fpq = &fud->pq;
2099 
2100 			spin_lock(&fpq->lock);
2101 			fpq->connected = 0;
2102 			list_for_each_entry_safe(req, next, &fpq->io, list) {
2103 				req->out.h.error = -ECONNABORTED;
2104 				spin_lock(&req->waitq.lock);
2105 				set_bit(FR_ABORTED, &req->flags);
2106 				if (!test_bit(FR_LOCKED, &req->flags)) {
2107 					set_bit(FR_PRIVATE, &req->flags);
2108 					list_move(&req->list, &to_end1);
2109 				}
2110 				spin_unlock(&req->waitq.lock);
2111 			}
2112 			list_splice_init(&fpq->processing, &to_end2);
2113 			spin_unlock(&fpq->lock);
2114 		}
2115 		fc->max_background = UINT_MAX;
2116 		flush_bg_queue(fc);
2117 
2118 		spin_lock(&fiq->waitq.lock);
2119 		fiq->connected = 0;
2120 		list_splice_init(&fiq->pending, &to_end2);
2121 		list_for_each_entry(req, &to_end2, list)
2122 			clear_bit(FR_PENDING, &req->flags);
2123 		while (forget_pending(fiq))
2124 			kfree(dequeue_forget(fiq, 1, NULL));
2125 		wake_up_all_locked(&fiq->waitq);
2126 		spin_unlock(&fiq->waitq.lock);
2127 		kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
2128 		end_polls(fc);
2129 		wake_up_all(&fc->blocked_waitq);
2130 		spin_unlock(&fc->lock);
2131 
2132 		while (!list_empty(&to_end1)) {
2133 			req = list_first_entry(&to_end1, struct fuse_req, list);
2134 			__fuse_get_request(req);
2135 			list_del_init(&req->list);
2136 			request_end(fc, req);
2137 		}
2138 		end_requests(fc, &to_end2);
2139 	} else {
2140 		spin_unlock(&fc->lock);
2141 	}
2142 }
2143 EXPORT_SYMBOL_GPL(fuse_abort_conn);
2144 
2145 int fuse_dev_release(struct inode *inode, struct file *file)
2146 {
2147 	struct fuse_dev *fud = fuse_get_dev(file);
2148 
2149 	if (fud) {
2150 		struct fuse_conn *fc = fud->fc;
2151 		struct fuse_pqueue *fpq = &fud->pq;
2152 
2153 		WARN_ON(!list_empty(&fpq->io));
2154 		end_requests(fc, &fpq->processing);
2155 		/* Are we the last open device? */
2156 		if (atomic_dec_and_test(&fc->dev_count)) {
2157 			WARN_ON(fc->iq.fasync != NULL);
2158 			fuse_abort_conn(fc, false);
2159 		}
2160 		fuse_dev_free(fud);
2161 	}
2162 	return 0;
2163 }
2164 EXPORT_SYMBOL_GPL(fuse_dev_release);
2165 
2166 static int fuse_dev_fasync(int fd, struct file *file, int on)
2167 {
2168 	struct fuse_dev *fud = fuse_get_dev(file);
2169 
2170 	if (!fud)
2171 		return -EPERM;
2172 
2173 	/* No locking - fasync_helper does its own locking */
2174 	return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
2175 }
2176 
2177 static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
2178 {
2179 	struct fuse_dev *fud;
2180 
2181 	if (new->private_data)
2182 		return -EINVAL;
2183 
2184 	fud = fuse_dev_alloc(fc);
2185 	if (!fud)
2186 		return -ENOMEM;
2187 
2188 	new->private_data = fud;
2189 	atomic_inc(&fc->dev_count);
2190 
2191 	return 0;
2192 }
2193 
2194 static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2195 			   unsigned long arg)
2196 {
2197 	int err = -ENOTTY;
2198 
2199 	if (cmd == FUSE_DEV_IOC_CLONE) {
2200 		int oldfd;
2201 
2202 		err = -EFAULT;
2203 		if (!get_user(oldfd, (__u32 __user *) arg)) {
2204 			struct file *old = fget(oldfd);
2205 
2206 			err = -EINVAL;
2207 			if (old) {
2208 				struct fuse_dev *fud = NULL;
2209 
2210 				/*
2211 				 * Check against file->f_op because CUSE
2212 				 * uses the same ioctl handler.
2213 				 */
2214 				if (old->f_op == file->f_op &&
2215 				    old->f_cred->user_ns == file->f_cred->user_ns)
2216 					fud = fuse_get_dev(old);
2217 
2218 				if (fud) {
2219 					mutex_lock(&fuse_mutex);
2220 					err = fuse_device_clone(fud->fc, file);
2221 					mutex_unlock(&fuse_mutex);
2222 				}
2223 				fput(old);
2224 			}
2225 		}
2226 	}
2227 	return err;
2228 }
2229 
2230 const struct file_operations fuse_dev_operations = {
2231 	.owner		= THIS_MODULE,
2232 	.open		= fuse_dev_open,
2233 	.llseek		= no_llseek,
2234 	.read_iter	= fuse_dev_read,
2235 	.splice_read	= fuse_dev_splice_read,
2236 	.write_iter	= fuse_dev_write,
2237 	.splice_write	= fuse_dev_splice_write,
2238 	.poll		= fuse_dev_poll,
2239 	.release	= fuse_dev_release,
2240 	.fasync		= fuse_dev_fasync,
2241 	.unlocked_ioctl = fuse_dev_ioctl,
2242 	.compat_ioctl   = fuse_dev_ioctl,
2243 };
2244 EXPORT_SYMBOL_GPL(fuse_dev_operations);
2245 
2246 static struct miscdevice fuse_miscdevice = {
2247 	.minor = FUSE_MINOR,
2248 	.name  = "fuse",
2249 	.fops = &fuse_dev_operations,
2250 };
2251 
2252 int __init fuse_dev_init(void)
2253 {
2254 	int err = -ENOMEM;
2255 	fuse_req_cachep = kmem_cache_create("fuse_request",
2256 					    sizeof(struct fuse_req),
2257 					    0, 0, NULL);
2258 	if (!fuse_req_cachep)
2259 		goto out;
2260 
2261 	err = misc_register(&fuse_miscdevice);
2262 	if (err)
2263 		goto out_cache_clean;
2264 
2265 	return 0;
2266 
2267  out_cache_clean:
2268 	kmem_cache_destroy(fuse_req_cachep);
2269  out:
2270 	return err;
2271 }
2272 
2273 void fuse_dev_cleanup(void)
2274 {
2275 	misc_deregister(&fuse_miscdevice);
2276 	kmem_cache_destroy(fuse_req_cachep);
2277 }
2278