xref: /linux/fs/fuse/dev.c (revision de2fe5e07d58424bc286fff3fd3c1b0bf933cd58)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21 
22 static kmem_cache_t *fuse_req_cachep;
23 
24 static struct fuse_conn *fuse_get_conn(struct file *file)
25 {
26 	struct fuse_conn *fc;
27 	spin_lock(&fuse_lock);
28 	fc = file->private_data;
29 	if (fc && !fc->connected)
30 		fc = NULL;
31 	spin_unlock(&fuse_lock);
32 	return fc;
33 }
34 
35 static void fuse_request_init(struct fuse_req *req)
36 {
37 	memset(req, 0, sizeof(*req));
38 	INIT_LIST_HEAD(&req->list);
39 	init_waitqueue_head(&req->waitq);
40 	atomic_set(&req->count, 1);
41 }
42 
43 struct fuse_req *fuse_request_alloc(void)
44 {
45 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
46 	if (req)
47 		fuse_request_init(req);
48 	return req;
49 }
50 
51 void fuse_request_free(struct fuse_req *req)
52 {
53 	kmem_cache_free(fuse_req_cachep, req);
54 }
55 
56 static void block_sigs(sigset_t *oldset)
57 {
58 	sigset_t mask;
59 
60 	siginitsetinv(&mask, sigmask(SIGKILL));
61 	sigprocmask(SIG_BLOCK, &mask, oldset);
62 }
63 
64 static void restore_sigs(sigset_t *oldset)
65 {
66 	sigprocmask(SIG_SETMASK, oldset, NULL);
67 }
68 
69 /*
70  * Reset request, so that it can be reused
71  *
72  * The caller must be _very_ careful to make sure, that it is holding
73  * the only reference to req
74  */
75 void fuse_reset_request(struct fuse_req *req)
76 {
77 	int preallocated = req->preallocated;
78 	BUG_ON(atomic_read(&req->count) != 1);
79 	fuse_request_init(req);
80 	req->preallocated = preallocated;
81 }
82 
83 static void __fuse_get_request(struct fuse_req *req)
84 {
85 	atomic_inc(&req->count);
86 }
87 
88 /* Must be called with > 1 refcount */
89 static void __fuse_put_request(struct fuse_req *req)
90 {
91 	BUG_ON(atomic_read(&req->count) < 2);
92 	atomic_dec(&req->count);
93 }
94 
95 static struct fuse_req *do_get_request(struct fuse_conn *fc)
96 {
97 	struct fuse_req *req;
98 
99 	spin_lock(&fuse_lock);
100 	BUG_ON(list_empty(&fc->unused_list));
101 	req = list_entry(fc->unused_list.next, struct fuse_req, list);
102 	list_del_init(&req->list);
103 	spin_unlock(&fuse_lock);
104 	fuse_request_init(req);
105 	req->preallocated = 1;
106 	req->in.h.uid = current->fsuid;
107 	req->in.h.gid = current->fsgid;
108 	req->in.h.pid = current->pid;
109 	return req;
110 }
111 
112 /* This can return NULL, but only in case it's interrupted by a SIGKILL */
113 struct fuse_req *fuse_get_request(struct fuse_conn *fc)
114 {
115 	int intr;
116 	sigset_t oldset;
117 
118 	atomic_inc(&fc->num_waiting);
119 	block_sigs(&oldset);
120 	intr = down_interruptible(&fc->outstanding_sem);
121 	restore_sigs(&oldset);
122 	if (intr) {
123 		atomic_dec(&fc->num_waiting);
124 		return NULL;
125 	}
126 	return do_get_request(fc);
127 }
128 
129 /* Must be called with fuse_lock held */
130 static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
131 {
132 	if (req->preallocated) {
133 		atomic_dec(&fc->num_waiting);
134 		list_add(&req->list, &fc->unused_list);
135 	} else
136 		fuse_request_free(req);
137 
138 	/* If we are in debt decrease that first */
139 	if (fc->outstanding_debt)
140 		fc->outstanding_debt--;
141 	else
142 		up(&fc->outstanding_sem);
143 }
144 
145 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
146 {
147 	if (atomic_dec_and_test(&req->count)) {
148 		spin_lock(&fuse_lock);
149 		fuse_putback_request(fc, req);
150 		spin_unlock(&fuse_lock);
151 	}
152 }
153 
154 static void fuse_put_request_locked(struct fuse_conn *fc, struct fuse_req *req)
155 {
156 	if (atomic_dec_and_test(&req->count))
157 		fuse_putback_request(fc, req);
158 }
159 
160 void fuse_release_background(struct fuse_req *req)
161 {
162 	iput(req->inode);
163 	iput(req->inode2);
164 	if (req->file)
165 		fput(req->file);
166 	spin_lock(&fuse_lock);
167 	list_del(&req->bg_entry);
168 	spin_unlock(&fuse_lock);
169 }
170 
171 /*
172  * This function is called when a request is finished.  Either a reply
173  * has arrived or it was interrupted (and not yet sent) or some error
174  * occurred during communication with userspace, or the device file
175  * was closed.  In case of a background request the reference to the
176  * stored objects are released.  The requester thread is woken up (if
177  * still waiting), the 'end' callback is called if given, else the
178  * reference to the request is released
179  *
180  * Releasing extra reference for foreground requests must be done
181  * within the same locked region as setting state to finished.  This
182  * is because fuse_reset_request() may be called after request is
183  * finished and it must be the sole possessor.  If request is
184  * interrupted and put in the background, it will return with an error
185  * and hence never be reset and reused.
186  *
187  * Called with fuse_lock, unlocks it
188  */
189 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
190 {
191 	list_del(&req->list);
192 	req->state = FUSE_REQ_FINISHED;
193 	if (!req->background) {
194 		wake_up(&req->waitq);
195 		fuse_put_request_locked(fc, req);
196 		spin_unlock(&fuse_lock);
197 	} else {
198 		void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
199 		req->end = NULL;
200 		spin_unlock(&fuse_lock);
201 		down_read(&fc->sbput_sem);
202 		if (fc->mounted)
203 			fuse_release_background(req);
204 		up_read(&fc->sbput_sem);
205 		if (end)
206 			end(fc, req);
207 		else
208 			fuse_put_request(fc, req);
209 	}
210 }
211 
212 /*
213  * Unfortunately request interruption not just solves the deadlock
214  * problem, it causes problems too.  These stem from the fact, that an
215  * interrupted request is continued to be processed in userspace,
216  * while all the locks and object references (inode and file) held
217  * during the operation are released.
218  *
219  * To release the locks is exactly why there's a need to interrupt the
220  * request, so there's not a lot that can be done about this, except
221  * introduce additional locking in userspace.
222  *
223  * More important is to keep inode and file references until userspace
224  * has replied, otherwise FORGET and RELEASE could be sent while the
225  * inode/file is still used by the filesystem.
226  *
227  * For this reason the concept of "background" request is introduced.
228  * An interrupted request is backgrounded if it has been already sent
229  * to userspace.  Backgrounding involves getting an extra reference to
230  * inode(s) or file used in the request, and adding the request to
231  * fc->background list.  When a reply is received for a background
232  * request, the object references are released, and the request is
233  * removed from the list.  If the filesystem is unmounted while there
234  * are still background requests, the list is walked and references
235  * are released as if a reply was received.
236  *
237  * There's one more use for a background request.  The RELEASE message is
238  * always sent as background, since it doesn't return an error or
239  * data.
240  */
241 static void background_request(struct fuse_conn *fc, struct fuse_req *req)
242 {
243 	req->background = 1;
244 	list_add(&req->bg_entry, &fc->background);
245 	if (req->inode)
246 		req->inode = igrab(req->inode);
247 	if (req->inode2)
248 		req->inode2 = igrab(req->inode2);
249 	if (req->file)
250 		get_file(req->file);
251 }
252 
253 /* Called with fuse_lock held.  Releases, and then reacquires it. */
254 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
255 {
256 	sigset_t oldset;
257 
258 	spin_unlock(&fuse_lock);
259 	block_sigs(&oldset);
260 	wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
261 	restore_sigs(&oldset);
262 	spin_lock(&fuse_lock);
263 	if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
264 		return;
265 
266 	if (!req->interrupted) {
267 		req->out.h.error = -EINTR;
268 		req->interrupted = 1;
269 	}
270 	if (req->locked) {
271 		/* This is uninterruptible sleep, because data is
272 		   being copied to/from the buffers of req.  During
273 		   locked state, there mustn't be any filesystem
274 		   operation (e.g. page fault), since that could lead
275 		   to deadlock */
276 		spin_unlock(&fuse_lock);
277 		wait_event(req->waitq, !req->locked);
278 		spin_lock(&fuse_lock);
279 	}
280 	if (req->state == FUSE_REQ_PENDING) {
281 		list_del(&req->list);
282 		__fuse_put_request(req);
283 	} else if (req->state == FUSE_REQ_SENT)
284 		background_request(fc, req);
285 }
286 
287 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
288 {
289 	unsigned nbytes = 0;
290 	unsigned i;
291 
292 	for (i = 0; i < numargs; i++)
293 		nbytes += args[i].size;
294 
295 	return nbytes;
296 }
297 
298 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
299 {
300 	fc->reqctr++;
301 	/* zero is special */
302 	if (fc->reqctr == 0)
303 		fc->reqctr = 1;
304 	req->in.h.unique = fc->reqctr;
305 	req->in.h.len = sizeof(struct fuse_in_header) +
306 		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
307 	if (!req->preallocated) {
308 		/* If request is not preallocated (either FORGET or
309 		   RELEASE), then still decrease outstanding_sem, so
310 		   user can't open infinite number of files while not
311 		   processing the RELEASE requests.  However for
312 		   efficiency do it without blocking, so if down()
313 		   would block, just increase the debt instead */
314 		if (down_trylock(&fc->outstanding_sem))
315 			fc->outstanding_debt++;
316 	}
317 	list_add_tail(&req->list, &fc->pending);
318 	req->state = FUSE_REQ_PENDING;
319 	wake_up(&fc->waitq);
320 }
321 
322 /*
323  * This can only be interrupted by a SIGKILL
324  */
325 void request_send(struct fuse_conn *fc, struct fuse_req *req)
326 {
327 	req->isreply = 1;
328 	spin_lock(&fuse_lock);
329 	if (!fc->connected)
330 		req->out.h.error = -ENOTCONN;
331 	else if (fc->conn_error)
332 		req->out.h.error = -ECONNREFUSED;
333 	else {
334 		queue_request(fc, req);
335 		/* acquire extra reference, since request is still needed
336 		   after request_end() */
337 		__fuse_get_request(req);
338 
339 		request_wait_answer(fc, req);
340 	}
341 	spin_unlock(&fuse_lock);
342 }
343 
344 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
345 {
346 	spin_lock(&fuse_lock);
347 	if (fc->connected) {
348 		queue_request(fc, req);
349 		spin_unlock(&fuse_lock);
350 	} else {
351 		req->out.h.error = -ENOTCONN;
352 		request_end(fc, req);
353 	}
354 }
355 
356 void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
357 {
358 	req->isreply = 0;
359 	request_send_nowait(fc, req);
360 }
361 
362 void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
363 {
364 	req->isreply = 1;
365 	spin_lock(&fuse_lock);
366 	background_request(fc, req);
367 	spin_unlock(&fuse_lock);
368 	request_send_nowait(fc, req);
369 }
370 
371 /*
372  * Lock the request.  Up to the next unlock_request() there mustn't be
373  * anything that could cause a page-fault.  If the request was already
374  * interrupted bail out.
375  */
376 static int lock_request(struct fuse_req *req)
377 {
378 	int err = 0;
379 	if (req) {
380 		spin_lock(&fuse_lock);
381 		if (req->interrupted)
382 			err = -ENOENT;
383 		else
384 			req->locked = 1;
385 		spin_unlock(&fuse_lock);
386 	}
387 	return err;
388 }
389 
390 /*
391  * Unlock request.  If it was interrupted during being locked, the
392  * requester thread is currently waiting for it to be unlocked, so
393  * wake it up.
394  */
395 static void unlock_request(struct fuse_req *req)
396 {
397 	if (req) {
398 		spin_lock(&fuse_lock);
399 		req->locked = 0;
400 		if (req->interrupted)
401 			wake_up(&req->waitq);
402 		spin_unlock(&fuse_lock);
403 	}
404 }
405 
406 struct fuse_copy_state {
407 	int write;
408 	struct fuse_req *req;
409 	const struct iovec *iov;
410 	unsigned long nr_segs;
411 	unsigned long seglen;
412 	unsigned long addr;
413 	struct page *pg;
414 	void *mapaddr;
415 	void *buf;
416 	unsigned len;
417 };
418 
419 static void fuse_copy_init(struct fuse_copy_state *cs, int write,
420 			   struct fuse_req *req, const struct iovec *iov,
421 			   unsigned long nr_segs)
422 {
423 	memset(cs, 0, sizeof(*cs));
424 	cs->write = write;
425 	cs->req = req;
426 	cs->iov = iov;
427 	cs->nr_segs = nr_segs;
428 }
429 
430 /* Unmap and put previous page of userspace buffer */
431 static void fuse_copy_finish(struct fuse_copy_state *cs)
432 {
433 	if (cs->mapaddr) {
434 		kunmap_atomic(cs->mapaddr, KM_USER0);
435 		if (cs->write) {
436 			flush_dcache_page(cs->pg);
437 			set_page_dirty_lock(cs->pg);
438 		}
439 		put_page(cs->pg);
440 		cs->mapaddr = NULL;
441 	}
442 }
443 
444 /*
445  * Get another pagefull of userspace buffer, and map it to kernel
446  * address space, and lock request
447  */
448 static int fuse_copy_fill(struct fuse_copy_state *cs)
449 {
450 	unsigned long offset;
451 	int err;
452 
453 	unlock_request(cs->req);
454 	fuse_copy_finish(cs);
455 	if (!cs->seglen) {
456 		BUG_ON(!cs->nr_segs);
457 		cs->seglen = cs->iov[0].iov_len;
458 		cs->addr = (unsigned long) cs->iov[0].iov_base;
459 		cs->iov ++;
460 		cs->nr_segs --;
461 	}
462 	down_read(&current->mm->mmap_sem);
463 	err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
464 			     &cs->pg, NULL);
465 	up_read(&current->mm->mmap_sem);
466 	if (err < 0)
467 		return err;
468 	BUG_ON(err != 1);
469 	offset = cs->addr % PAGE_SIZE;
470 	cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
471 	cs->buf = cs->mapaddr + offset;
472 	cs->len = min(PAGE_SIZE - offset, cs->seglen);
473 	cs->seglen -= cs->len;
474 	cs->addr += cs->len;
475 
476 	return lock_request(cs->req);
477 }
478 
479 /* Do as much copy to/from userspace buffer as we can */
480 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
481 {
482 	unsigned ncpy = min(*size, cs->len);
483 	if (val) {
484 		if (cs->write)
485 			memcpy(cs->buf, *val, ncpy);
486 		else
487 			memcpy(*val, cs->buf, ncpy);
488 		*val += ncpy;
489 	}
490 	*size -= ncpy;
491 	cs->len -= ncpy;
492 	cs->buf += ncpy;
493 	return ncpy;
494 }
495 
496 /*
497  * Copy a page in the request to/from the userspace buffer.  Must be
498  * done atomically
499  */
500 static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
501 			  unsigned offset, unsigned count, int zeroing)
502 {
503 	if (page && zeroing && count < PAGE_SIZE) {
504 		void *mapaddr = kmap_atomic(page, KM_USER1);
505 		memset(mapaddr, 0, PAGE_SIZE);
506 		kunmap_atomic(mapaddr, KM_USER1);
507 	}
508 	while (count) {
509 		int err;
510 		if (!cs->len && (err = fuse_copy_fill(cs)))
511 			return err;
512 		if (page) {
513 			void *mapaddr = kmap_atomic(page, KM_USER1);
514 			void *buf = mapaddr + offset;
515 			offset += fuse_copy_do(cs, &buf, &count);
516 			kunmap_atomic(mapaddr, KM_USER1);
517 		} else
518 			offset += fuse_copy_do(cs, NULL, &count);
519 	}
520 	if (page && !cs->write)
521 		flush_dcache_page(page);
522 	return 0;
523 }
524 
525 /* Copy pages in the request to/from userspace buffer */
526 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
527 			   int zeroing)
528 {
529 	unsigned i;
530 	struct fuse_req *req = cs->req;
531 	unsigned offset = req->page_offset;
532 	unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
533 
534 	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
535 		struct page *page = req->pages[i];
536 		int err = fuse_copy_page(cs, page, offset, count, zeroing);
537 		if (err)
538 			return err;
539 
540 		nbytes -= count;
541 		count = min(nbytes, (unsigned) PAGE_SIZE);
542 		offset = 0;
543 	}
544 	return 0;
545 }
546 
547 /* Copy a single argument in the request to/from userspace buffer */
548 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
549 {
550 	while (size) {
551 		int err;
552 		if (!cs->len && (err = fuse_copy_fill(cs)))
553 			return err;
554 		fuse_copy_do(cs, &val, &size);
555 	}
556 	return 0;
557 }
558 
559 /* Copy request arguments to/from userspace buffer */
560 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
561 			  unsigned argpages, struct fuse_arg *args,
562 			  int zeroing)
563 {
564 	int err = 0;
565 	unsigned i;
566 
567 	for (i = 0; !err && i < numargs; i++)  {
568 		struct fuse_arg *arg = &args[i];
569 		if (i == numargs - 1 && argpages)
570 			err = fuse_copy_pages(cs, arg->size, zeroing);
571 		else
572 			err = fuse_copy_one(cs, arg->value, arg->size);
573 	}
574 	return err;
575 }
576 
577 /* Wait until a request is available on the pending list */
578 static void request_wait(struct fuse_conn *fc)
579 {
580 	DECLARE_WAITQUEUE(wait, current);
581 
582 	add_wait_queue_exclusive(&fc->waitq, &wait);
583 	while (fc->connected && list_empty(&fc->pending)) {
584 		set_current_state(TASK_INTERRUPTIBLE);
585 		if (signal_pending(current))
586 			break;
587 
588 		spin_unlock(&fuse_lock);
589 		schedule();
590 		spin_lock(&fuse_lock);
591 	}
592 	set_current_state(TASK_RUNNING);
593 	remove_wait_queue(&fc->waitq, &wait);
594 }
595 
596 /*
597  * Read a single request into the userspace filesystem's buffer.  This
598  * function waits until a request is available, then removes it from
599  * the pending list and copies request data to userspace buffer.  If
600  * no reply is needed (FORGET) or request has been interrupted or
601  * there was an error during the copying then it's finished by calling
602  * request_end().  Otherwise add it to the processing list, and set
603  * the 'sent' flag.
604  */
605 static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
606 			      unsigned long nr_segs, loff_t *off)
607 {
608 	int err;
609 	struct fuse_conn *fc;
610 	struct fuse_req *req;
611 	struct fuse_in *in;
612 	struct fuse_copy_state cs;
613 	unsigned reqsize;
614 
615  restart:
616 	spin_lock(&fuse_lock);
617 	fc = file->private_data;
618 	err = -EPERM;
619 	if (!fc)
620 		goto err_unlock;
621 	request_wait(fc);
622 	err = -ENODEV;
623 	if (!fc->connected)
624 		goto err_unlock;
625 	err = -ERESTARTSYS;
626 	if (list_empty(&fc->pending))
627 		goto err_unlock;
628 
629 	req = list_entry(fc->pending.next, struct fuse_req, list);
630 	req->state = FUSE_REQ_READING;
631 	list_move(&req->list, &fc->io);
632 
633 	in = &req->in;
634 	reqsize = in->h.len;
635 	/* If request is too large, reply with an error and restart the read */
636 	if (iov_length(iov, nr_segs) < reqsize) {
637 		req->out.h.error = -EIO;
638 		/* SETXATTR is special, since it may contain too large data */
639 		if (in->h.opcode == FUSE_SETXATTR)
640 			req->out.h.error = -E2BIG;
641 		request_end(fc, req);
642 		goto restart;
643 	}
644 	spin_unlock(&fuse_lock);
645 	fuse_copy_init(&cs, 1, req, iov, nr_segs);
646 	err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
647 	if (!err)
648 		err = fuse_copy_args(&cs, in->numargs, in->argpages,
649 				     (struct fuse_arg *) in->args, 0);
650 	fuse_copy_finish(&cs);
651 	spin_lock(&fuse_lock);
652 	req->locked = 0;
653 	if (!err && req->interrupted)
654 		err = -ENOENT;
655 	if (err) {
656 		if (!req->interrupted)
657 			req->out.h.error = -EIO;
658 		request_end(fc, req);
659 		return err;
660 	}
661 	if (!req->isreply)
662 		request_end(fc, req);
663 	else {
664 		req->state = FUSE_REQ_SENT;
665 		list_move_tail(&req->list, &fc->processing);
666 		spin_unlock(&fuse_lock);
667 	}
668 	return reqsize;
669 
670  err_unlock:
671 	spin_unlock(&fuse_lock);
672 	return err;
673 }
674 
675 static ssize_t fuse_dev_read(struct file *file, char __user *buf,
676 			     size_t nbytes, loff_t *off)
677 {
678 	struct iovec iov;
679 	iov.iov_len = nbytes;
680 	iov.iov_base = buf;
681 	return fuse_dev_readv(file, &iov, 1, off);
682 }
683 
684 /* Look up request on processing list by unique ID */
685 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
686 {
687 	struct list_head *entry;
688 
689 	list_for_each(entry, &fc->processing) {
690 		struct fuse_req *req;
691 		req = list_entry(entry, struct fuse_req, list);
692 		if (req->in.h.unique == unique)
693 			return req;
694 	}
695 	return NULL;
696 }
697 
698 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
699 			 unsigned nbytes)
700 {
701 	unsigned reqsize = sizeof(struct fuse_out_header);
702 
703 	if (out->h.error)
704 		return nbytes != reqsize ? -EINVAL : 0;
705 
706 	reqsize += len_args(out->numargs, out->args);
707 
708 	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
709 		return -EINVAL;
710 	else if (reqsize > nbytes) {
711 		struct fuse_arg *lastarg = &out->args[out->numargs-1];
712 		unsigned diffsize = reqsize - nbytes;
713 		if (diffsize > lastarg->size)
714 			return -EINVAL;
715 		lastarg->size -= diffsize;
716 	}
717 	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
718 			      out->page_zeroing);
719 }
720 
721 /*
722  * Write a single reply to a request.  First the header is copied from
723  * the write buffer.  The request is then searched on the processing
724  * list by the unique ID found in the header.  If found, then remove
725  * it from the list and copy the rest of the buffer to the request.
726  * The request is finished by calling request_end()
727  */
728 static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
729 			       unsigned long nr_segs, loff_t *off)
730 {
731 	int err;
732 	unsigned nbytes = iov_length(iov, nr_segs);
733 	struct fuse_req *req;
734 	struct fuse_out_header oh;
735 	struct fuse_copy_state cs;
736 	struct fuse_conn *fc = fuse_get_conn(file);
737 	if (!fc)
738 		return -ENODEV;
739 
740 	fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
741 	if (nbytes < sizeof(struct fuse_out_header))
742 		return -EINVAL;
743 
744 	err = fuse_copy_one(&cs, &oh, sizeof(oh));
745 	if (err)
746 		goto err_finish;
747 	err = -EINVAL;
748 	if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
749 	    oh.len != nbytes)
750 		goto err_finish;
751 
752 	spin_lock(&fuse_lock);
753 	err = -ENOENT;
754 	if (!fc->connected)
755 		goto err_unlock;
756 
757 	req = request_find(fc, oh.unique);
758 	err = -EINVAL;
759 	if (!req)
760 		goto err_unlock;
761 
762 	if (req->interrupted) {
763 		spin_unlock(&fuse_lock);
764 		fuse_copy_finish(&cs);
765 		spin_lock(&fuse_lock);
766 		request_end(fc, req);
767 		return -ENOENT;
768 	}
769 	list_move(&req->list, &fc->io);
770 	req->out.h = oh;
771 	req->locked = 1;
772 	cs.req = req;
773 	spin_unlock(&fuse_lock);
774 
775 	err = copy_out_args(&cs, &req->out, nbytes);
776 	fuse_copy_finish(&cs);
777 
778 	spin_lock(&fuse_lock);
779 	req->locked = 0;
780 	if (!err) {
781 		if (req->interrupted)
782 			err = -ENOENT;
783 	} else if (!req->interrupted)
784 		req->out.h.error = -EIO;
785 	request_end(fc, req);
786 
787 	return err ? err : nbytes;
788 
789  err_unlock:
790 	spin_unlock(&fuse_lock);
791  err_finish:
792 	fuse_copy_finish(&cs);
793 	return err;
794 }
795 
796 static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
797 			      size_t nbytes, loff_t *off)
798 {
799 	struct iovec iov;
800 	iov.iov_len = nbytes;
801 	iov.iov_base = (char __user *) buf;
802 	return fuse_dev_writev(file, &iov, 1, off);
803 }
804 
805 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
806 {
807 	struct fuse_conn *fc = fuse_get_conn(file);
808 	unsigned mask = POLLOUT | POLLWRNORM;
809 
810 	if (!fc)
811 		return -ENODEV;
812 
813 	poll_wait(file, &fc->waitq, wait);
814 
815 	spin_lock(&fuse_lock);
816 	if (!list_empty(&fc->pending))
817                 mask |= POLLIN | POLLRDNORM;
818 	spin_unlock(&fuse_lock);
819 
820 	return mask;
821 }
822 
823 /*
824  * Abort all requests on the given list (pending or processing)
825  *
826  * This function releases and reacquires fuse_lock
827  */
828 static void end_requests(struct fuse_conn *fc, struct list_head *head)
829 {
830 	while (!list_empty(head)) {
831 		struct fuse_req *req;
832 		req = list_entry(head->next, struct fuse_req, list);
833 		req->out.h.error = -ECONNABORTED;
834 		request_end(fc, req);
835 		spin_lock(&fuse_lock);
836 	}
837 }
838 
839 /*
840  * Abort requests under I/O
841  *
842  * The requests are set to interrupted and finished, and the request
843  * waiter is woken up.  This will make request_wait_answer() wait
844  * until the request is unlocked and then return.
845  *
846  * If the request is asynchronous, then the end function needs to be
847  * called after waiting for the request to be unlocked (if it was
848  * locked).
849  */
850 static void end_io_requests(struct fuse_conn *fc)
851 {
852 	while (!list_empty(&fc->io)) {
853 		struct fuse_req *req =
854 			list_entry(fc->io.next, struct fuse_req, list);
855 		void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
856 
857 		req->interrupted = 1;
858 		req->out.h.error = -ECONNABORTED;
859 		req->state = FUSE_REQ_FINISHED;
860 		list_del_init(&req->list);
861 		wake_up(&req->waitq);
862 		if (end) {
863 			req->end = NULL;
864 			/* The end function will consume this reference */
865 			__fuse_get_request(req);
866 			spin_unlock(&fuse_lock);
867 			wait_event(req->waitq, !req->locked);
868 			end(fc, req);
869 			spin_lock(&fuse_lock);
870 		}
871 	}
872 }
873 
874 /*
875  * Abort all requests.
876  *
877  * Emergency exit in case of a malicious or accidental deadlock, or
878  * just a hung filesystem.
879  *
880  * The same effect is usually achievable through killing the
881  * filesystem daemon and all users of the filesystem.  The exception
882  * is the combination of an asynchronous request and the tricky
883  * deadlock (see Documentation/filesystems/fuse.txt).
884  *
885  * During the aborting, progression of requests from the pending and
886  * processing lists onto the io list, and progression of new requests
887  * onto the pending list is prevented by req->connected being false.
888  *
889  * Progression of requests under I/O to the processing list is
890  * prevented by the req->interrupted flag being true for these
891  * requests.  For this reason requests on the io list must be aborted
892  * first.
893  */
894 void fuse_abort_conn(struct fuse_conn *fc)
895 {
896 	spin_lock(&fuse_lock);
897 	if (fc->connected) {
898 		fc->connected = 0;
899 		end_io_requests(fc);
900 		end_requests(fc, &fc->pending);
901 		end_requests(fc, &fc->processing);
902 		wake_up_all(&fc->waitq);
903 	}
904 	spin_unlock(&fuse_lock);
905 }
906 
907 static int fuse_dev_release(struct inode *inode, struct file *file)
908 {
909 	struct fuse_conn *fc;
910 
911 	spin_lock(&fuse_lock);
912 	fc = file->private_data;
913 	if (fc) {
914 		fc->connected = 0;
915 		end_requests(fc, &fc->pending);
916 		end_requests(fc, &fc->processing);
917 	}
918 	spin_unlock(&fuse_lock);
919 	if (fc)
920 		kobject_put(&fc->kobj);
921 
922 	return 0;
923 }
924 
925 const struct file_operations fuse_dev_operations = {
926 	.owner		= THIS_MODULE,
927 	.llseek		= no_llseek,
928 	.read		= fuse_dev_read,
929 	.readv		= fuse_dev_readv,
930 	.write		= fuse_dev_write,
931 	.writev		= fuse_dev_writev,
932 	.poll		= fuse_dev_poll,
933 	.release	= fuse_dev_release,
934 };
935 
936 static struct miscdevice fuse_miscdevice = {
937 	.minor = FUSE_MINOR,
938 	.name  = "fuse",
939 	.fops = &fuse_dev_operations,
940 };
941 
942 int __init fuse_dev_init(void)
943 {
944 	int err = -ENOMEM;
945 	fuse_req_cachep = kmem_cache_create("fuse_request",
946 					    sizeof(struct fuse_req),
947 					    0, 0, NULL, NULL);
948 	if (!fuse_req_cachep)
949 		goto out;
950 
951 	err = misc_register(&fuse_miscdevice);
952 	if (err)
953 		goto out_cache_clean;
954 
955 	return 0;
956 
957  out_cache_clean:
958 	kmem_cache_destroy(fuse_req_cachep);
959  out:
960 	return err;
961 }
962 
963 void fuse_dev_cleanup(void)
964 {
965 	misc_deregister(&fuse_miscdevice);
966 	kmem_cache_destroy(fuse_req_cachep);
967 }
968