xref: /linux/fs/fuse/dev.c (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2006  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21 
22 static kmem_cache_t *fuse_req_cachep;
23 
24 static struct fuse_conn *fuse_get_conn(struct file *file)
25 {
26 	/*
27 	 * Lockless access is OK, because file->private data is set
28 	 * once during mount and is valid until the file is released.
29 	 */
30 	return file->private_data;
31 }
32 
33 static void fuse_request_init(struct fuse_req *req)
34 {
35 	memset(req, 0, sizeof(*req));
36 	INIT_LIST_HEAD(&req->list);
37 	init_waitqueue_head(&req->waitq);
38 	atomic_set(&req->count, 1);
39 }
40 
41 struct fuse_req *fuse_request_alloc(void)
42 {
43 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
44 	if (req)
45 		fuse_request_init(req);
46 	return req;
47 }
48 
49 void fuse_request_free(struct fuse_req *req)
50 {
51 	kmem_cache_free(fuse_req_cachep, req);
52 }
53 
54 static void block_sigs(sigset_t *oldset)
55 {
56 	sigset_t mask;
57 
58 	siginitsetinv(&mask, sigmask(SIGKILL));
59 	sigprocmask(SIG_BLOCK, &mask, oldset);
60 }
61 
62 static void restore_sigs(sigset_t *oldset)
63 {
64 	sigprocmask(SIG_SETMASK, oldset, NULL);
65 }
66 
67 /*
68  * Reset request, so that it can be reused
69  *
70  * The caller must be _very_ careful to make sure, that it is holding
71  * the only reference to req
72  */
73 void fuse_reset_request(struct fuse_req *req)
74 {
75 	BUG_ON(atomic_read(&req->count) != 1);
76 	fuse_request_init(req);
77 }
78 
79 static void __fuse_get_request(struct fuse_req *req)
80 {
81 	atomic_inc(&req->count);
82 }
83 
84 /* Must be called with > 1 refcount */
85 static void __fuse_put_request(struct fuse_req *req)
86 {
87 	BUG_ON(atomic_read(&req->count) < 2);
88 	atomic_dec(&req->count);
89 }
90 
91 struct fuse_req *fuse_get_req(struct fuse_conn *fc)
92 {
93 	struct fuse_req *req;
94 	sigset_t oldset;
95 	int intr;
96 	int err;
97 
98 	atomic_inc(&fc->num_waiting);
99 	block_sigs(&oldset);
100 	intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
101 	restore_sigs(&oldset);
102 	err = -EINTR;
103 	if (intr)
104 		goto out;
105 
106 	req = fuse_request_alloc();
107 	err = -ENOMEM;
108 	if (!req)
109 		goto out;
110 
111 	req->in.h.uid = current->fsuid;
112 	req->in.h.gid = current->fsgid;
113 	req->in.h.pid = current->pid;
114 	req->waiting = 1;
115 	return req;
116 
117  out:
118 	atomic_dec(&fc->num_waiting);
119 	return ERR_PTR(err);
120 }
121 
122 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
123 {
124 	if (atomic_dec_and_test(&req->count)) {
125 		if (req->waiting)
126 			atomic_dec(&fc->num_waiting);
127 		fuse_request_free(req);
128 	}
129 }
130 
131 /*
132  * Called with sbput_sem held for read (request_end) or write
133  * (fuse_put_super).  By the time fuse_put_super() is finished, all
134  * inodes belonging to background requests must be released, so the
135  * iputs have to be done within the locked region.
136  */
137 void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
138 {
139 	iput(req->inode);
140 	iput(req->inode2);
141 	spin_lock(&fc->lock);
142 	list_del(&req->bg_entry);
143 	if (fc->num_background == FUSE_MAX_BACKGROUND) {
144 		fc->blocked = 0;
145 		wake_up_all(&fc->blocked_waitq);
146 	}
147 	fc->num_background--;
148 	spin_unlock(&fc->lock);
149 }
150 
151 /*
152  * This function is called when a request is finished.  Either a reply
153  * has arrived or it was interrupted (and not yet sent) or some error
154  * occurred during communication with userspace, or the device file
155  * was closed.  In case of a background request the reference to the
156  * stored objects are released.  The requester thread is woken up (if
157  * still waiting), the 'end' callback is called if given, else the
158  * reference to the request is released
159  *
160  * Releasing extra reference for foreground requests must be done
161  * within the same locked region as setting state to finished.  This
162  * is because fuse_reset_request() may be called after request is
163  * finished and it must be the sole possessor.  If request is
164  * interrupted and put in the background, it will return with an error
165  * and hence never be reset and reused.
166  *
167  * Called with fc->lock, unlocks it
168  */
169 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
170 {
171 	list_del(&req->list);
172 	req->state = FUSE_REQ_FINISHED;
173 	if (!req->background) {
174 		spin_unlock(&fc->lock);
175 		wake_up(&req->waitq);
176 		fuse_put_request(fc, req);
177 	} else {
178 		void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
179 		req->end = NULL;
180 		spin_unlock(&fc->lock);
181 		down_read(&fc->sbput_sem);
182 		if (fc->mounted)
183 			fuse_release_background(fc, req);
184 		up_read(&fc->sbput_sem);
185 
186 		/* fput must go outside sbput_sem, otherwise it can deadlock */
187 		if (req->file)
188 			fput(req->file);
189 
190 		if (end)
191 			end(fc, req);
192 		else
193 			fuse_put_request(fc, req);
194 	}
195 }
196 
197 /*
198  * Unfortunately request interruption not just solves the deadlock
199  * problem, it causes problems too.  These stem from the fact, that an
200  * interrupted request is continued to be processed in userspace,
201  * while all the locks and object references (inode and file) held
202  * during the operation are released.
203  *
204  * To release the locks is exactly why there's a need to interrupt the
205  * request, so there's not a lot that can be done about this, except
206  * introduce additional locking in userspace.
207  *
208  * More important is to keep inode and file references until userspace
209  * has replied, otherwise FORGET and RELEASE could be sent while the
210  * inode/file is still used by the filesystem.
211  *
212  * For this reason the concept of "background" request is introduced.
213  * An interrupted request is backgrounded if it has been already sent
214  * to userspace.  Backgrounding involves getting an extra reference to
215  * inode(s) or file used in the request, and adding the request to
216  * fc->background list.  When a reply is received for a background
217  * request, the object references are released, and the request is
218  * removed from the list.  If the filesystem is unmounted while there
219  * are still background requests, the list is walked and references
220  * are released as if a reply was received.
221  *
222  * There's one more use for a background request.  The RELEASE message is
223  * always sent as background, since it doesn't return an error or
224  * data.
225  */
226 static void background_request(struct fuse_conn *fc, struct fuse_req *req)
227 {
228 	req->background = 1;
229 	list_add(&req->bg_entry, &fc->background);
230 	fc->num_background++;
231 	if (fc->num_background == FUSE_MAX_BACKGROUND)
232 		fc->blocked = 1;
233 	if (req->inode)
234 		req->inode = igrab(req->inode);
235 	if (req->inode2)
236 		req->inode2 = igrab(req->inode2);
237 	if (req->file)
238 		get_file(req->file);
239 }
240 
241 /* Called with fc->lock held.  Releases, and then reacquires it. */
242 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
243 {
244 	sigset_t oldset;
245 
246 	spin_unlock(&fc->lock);
247 	block_sigs(&oldset);
248 	wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
249 	restore_sigs(&oldset);
250 	spin_lock(&fc->lock);
251 	if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
252 		return;
253 
254 	if (!req->interrupted) {
255 		req->out.h.error = -EINTR;
256 		req->interrupted = 1;
257 	}
258 	if (req->locked) {
259 		/* This is uninterruptible sleep, because data is
260 		   being copied to/from the buffers of req.  During
261 		   locked state, there mustn't be any filesystem
262 		   operation (e.g. page fault), since that could lead
263 		   to deadlock */
264 		spin_unlock(&fc->lock);
265 		wait_event(req->waitq, !req->locked);
266 		spin_lock(&fc->lock);
267 	}
268 	if (req->state == FUSE_REQ_PENDING) {
269 		list_del(&req->list);
270 		__fuse_put_request(req);
271 	} else if (req->state == FUSE_REQ_SENT)
272 		background_request(fc, req);
273 }
274 
275 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
276 {
277 	unsigned nbytes = 0;
278 	unsigned i;
279 
280 	for (i = 0; i < numargs; i++)
281 		nbytes += args[i].size;
282 
283 	return nbytes;
284 }
285 
286 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
287 {
288 	fc->reqctr++;
289 	/* zero is special */
290 	if (fc->reqctr == 0)
291 		fc->reqctr = 1;
292 	req->in.h.unique = fc->reqctr;
293 	req->in.h.len = sizeof(struct fuse_in_header) +
294 		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
295 	list_add_tail(&req->list, &fc->pending);
296 	req->state = FUSE_REQ_PENDING;
297 	if (!req->waiting) {
298 		req->waiting = 1;
299 		atomic_inc(&fc->num_waiting);
300 	}
301 	wake_up(&fc->waitq);
302 	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
303 }
304 
305 /*
306  * This can only be interrupted by a SIGKILL
307  */
308 void request_send(struct fuse_conn *fc, struct fuse_req *req)
309 {
310 	req->isreply = 1;
311 	spin_lock(&fc->lock);
312 	if (!fc->connected)
313 		req->out.h.error = -ENOTCONN;
314 	else if (fc->conn_error)
315 		req->out.h.error = -ECONNREFUSED;
316 	else {
317 		queue_request(fc, req);
318 		/* acquire extra reference, since request is still needed
319 		   after request_end() */
320 		__fuse_get_request(req);
321 
322 		request_wait_answer(fc, req);
323 	}
324 	spin_unlock(&fc->lock);
325 }
326 
327 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
328 {
329 	spin_lock(&fc->lock);
330 	background_request(fc, req);
331 	if (fc->connected) {
332 		queue_request(fc, req);
333 		spin_unlock(&fc->lock);
334 	} else {
335 		req->out.h.error = -ENOTCONN;
336 		request_end(fc, req);
337 	}
338 }
339 
340 void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
341 {
342 	req->isreply = 0;
343 	request_send_nowait(fc, req);
344 }
345 
346 void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
347 {
348 	req->isreply = 1;
349 	request_send_nowait(fc, req);
350 }
351 
352 /*
353  * Lock the request.  Up to the next unlock_request() there mustn't be
354  * anything that could cause a page-fault.  If the request was already
355  * interrupted bail out.
356  */
357 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
358 {
359 	int err = 0;
360 	if (req) {
361 		spin_lock(&fc->lock);
362 		if (req->interrupted)
363 			err = -ENOENT;
364 		else
365 			req->locked = 1;
366 		spin_unlock(&fc->lock);
367 	}
368 	return err;
369 }
370 
371 /*
372  * Unlock request.  If it was interrupted during being locked, the
373  * requester thread is currently waiting for it to be unlocked, so
374  * wake it up.
375  */
376 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
377 {
378 	if (req) {
379 		spin_lock(&fc->lock);
380 		req->locked = 0;
381 		if (req->interrupted)
382 			wake_up(&req->waitq);
383 		spin_unlock(&fc->lock);
384 	}
385 }
386 
387 struct fuse_copy_state {
388 	struct fuse_conn *fc;
389 	int write;
390 	struct fuse_req *req;
391 	const struct iovec *iov;
392 	unsigned long nr_segs;
393 	unsigned long seglen;
394 	unsigned long addr;
395 	struct page *pg;
396 	void *mapaddr;
397 	void *buf;
398 	unsigned len;
399 };
400 
401 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
402 			   int write, struct fuse_req *req,
403 			   const struct iovec *iov, unsigned long nr_segs)
404 {
405 	memset(cs, 0, sizeof(*cs));
406 	cs->fc = fc;
407 	cs->write = write;
408 	cs->req = req;
409 	cs->iov = iov;
410 	cs->nr_segs = nr_segs;
411 }
412 
413 /* Unmap and put previous page of userspace buffer */
414 static void fuse_copy_finish(struct fuse_copy_state *cs)
415 {
416 	if (cs->mapaddr) {
417 		kunmap_atomic(cs->mapaddr, KM_USER0);
418 		if (cs->write) {
419 			flush_dcache_page(cs->pg);
420 			set_page_dirty_lock(cs->pg);
421 		}
422 		put_page(cs->pg);
423 		cs->mapaddr = NULL;
424 	}
425 }
426 
427 /*
428  * Get another pagefull of userspace buffer, and map it to kernel
429  * address space, and lock request
430  */
431 static int fuse_copy_fill(struct fuse_copy_state *cs)
432 {
433 	unsigned long offset;
434 	int err;
435 
436 	unlock_request(cs->fc, cs->req);
437 	fuse_copy_finish(cs);
438 	if (!cs->seglen) {
439 		BUG_ON(!cs->nr_segs);
440 		cs->seglen = cs->iov[0].iov_len;
441 		cs->addr = (unsigned long) cs->iov[0].iov_base;
442 		cs->iov ++;
443 		cs->nr_segs --;
444 	}
445 	down_read(&current->mm->mmap_sem);
446 	err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
447 			     &cs->pg, NULL);
448 	up_read(&current->mm->mmap_sem);
449 	if (err < 0)
450 		return err;
451 	BUG_ON(err != 1);
452 	offset = cs->addr % PAGE_SIZE;
453 	cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
454 	cs->buf = cs->mapaddr + offset;
455 	cs->len = min(PAGE_SIZE - offset, cs->seglen);
456 	cs->seglen -= cs->len;
457 	cs->addr += cs->len;
458 
459 	return lock_request(cs->fc, cs->req);
460 }
461 
462 /* Do as much copy to/from userspace buffer as we can */
463 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
464 {
465 	unsigned ncpy = min(*size, cs->len);
466 	if (val) {
467 		if (cs->write)
468 			memcpy(cs->buf, *val, ncpy);
469 		else
470 			memcpy(*val, cs->buf, ncpy);
471 		*val += ncpy;
472 	}
473 	*size -= ncpy;
474 	cs->len -= ncpy;
475 	cs->buf += ncpy;
476 	return ncpy;
477 }
478 
479 /*
480  * Copy a page in the request to/from the userspace buffer.  Must be
481  * done atomically
482  */
483 static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
484 			  unsigned offset, unsigned count, int zeroing)
485 {
486 	if (page && zeroing && count < PAGE_SIZE) {
487 		void *mapaddr = kmap_atomic(page, KM_USER1);
488 		memset(mapaddr, 0, PAGE_SIZE);
489 		kunmap_atomic(mapaddr, KM_USER1);
490 	}
491 	while (count) {
492 		int err;
493 		if (!cs->len && (err = fuse_copy_fill(cs)))
494 			return err;
495 		if (page) {
496 			void *mapaddr = kmap_atomic(page, KM_USER1);
497 			void *buf = mapaddr + offset;
498 			offset += fuse_copy_do(cs, &buf, &count);
499 			kunmap_atomic(mapaddr, KM_USER1);
500 		} else
501 			offset += fuse_copy_do(cs, NULL, &count);
502 	}
503 	if (page && !cs->write)
504 		flush_dcache_page(page);
505 	return 0;
506 }
507 
508 /* Copy pages in the request to/from userspace buffer */
509 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
510 			   int zeroing)
511 {
512 	unsigned i;
513 	struct fuse_req *req = cs->req;
514 	unsigned offset = req->page_offset;
515 	unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
516 
517 	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
518 		struct page *page = req->pages[i];
519 		int err = fuse_copy_page(cs, page, offset, count, zeroing);
520 		if (err)
521 			return err;
522 
523 		nbytes -= count;
524 		count = min(nbytes, (unsigned) PAGE_SIZE);
525 		offset = 0;
526 	}
527 	return 0;
528 }
529 
530 /* Copy a single argument in the request to/from userspace buffer */
531 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
532 {
533 	while (size) {
534 		int err;
535 		if (!cs->len && (err = fuse_copy_fill(cs)))
536 			return err;
537 		fuse_copy_do(cs, &val, &size);
538 	}
539 	return 0;
540 }
541 
542 /* Copy request arguments to/from userspace buffer */
543 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
544 			  unsigned argpages, struct fuse_arg *args,
545 			  int zeroing)
546 {
547 	int err = 0;
548 	unsigned i;
549 
550 	for (i = 0; !err && i < numargs; i++)  {
551 		struct fuse_arg *arg = &args[i];
552 		if (i == numargs - 1 && argpages)
553 			err = fuse_copy_pages(cs, arg->size, zeroing);
554 		else
555 			err = fuse_copy_one(cs, arg->value, arg->size);
556 	}
557 	return err;
558 }
559 
560 /* Wait until a request is available on the pending list */
561 static void request_wait(struct fuse_conn *fc)
562 {
563 	DECLARE_WAITQUEUE(wait, current);
564 
565 	add_wait_queue_exclusive(&fc->waitq, &wait);
566 	while (fc->connected && list_empty(&fc->pending)) {
567 		set_current_state(TASK_INTERRUPTIBLE);
568 		if (signal_pending(current))
569 			break;
570 
571 		spin_unlock(&fc->lock);
572 		schedule();
573 		spin_lock(&fc->lock);
574 	}
575 	set_current_state(TASK_RUNNING);
576 	remove_wait_queue(&fc->waitq, &wait);
577 }
578 
579 /*
580  * Read a single request into the userspace filesystem's buffer.  This
581  * function waits until a request is available, then removes it from
582  * the pending list and copies request data to userspace buffer.  If
583  * no reply is needed (FORGET) or request has been interrupted or
584  * there was an error during the copying then it's finished by calling
585  * request_end().  Otherwise add it to the processing list, and set
586  * the 'sent' flag.
587  */
588 static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
589 			      unsigned long nr_segs, loff_t *off)
590 {
591 	int err;
592 	struct fuse_req *req;
593 	struct fuse_in *in;
594 	struct fuse_copy_state cs;
595 	unsigned reqsize;
596 	struct fuse_conn *fc = fuse_get_conn(file);
597 	if (!fc)
598 		return -EPERM;
599 
600  restart:
601 	spin_lock(&fc->lock);
602 	err = -EAGAIN;
603 	if ((file->f_flags & O_NONBLOCK) && fc->connected &&
604 	    list_empty(&fc->pending))
605 		goto err_unlock;
606 
607 	request_wait(fc);
608 	err = -ENODEV;
609 	if (!fc->connected)
610 		goto err_unlock;
611 	err = -ERESTARTSYS;
612 	if (list_empty(&fc->pending))
613 		goto err_unlock;
614 
615 	req = list_entry(fc->pending.next, struct fuse_req, list);
616 	req->state = FUSE_REQ_READING;
617 	list_move(&req->list, &fc->io);
618 
619 	in = &req->in;
620 	reqsize = in->h.len;
621 	/* If request is too large, reply with an error and restart the read */
622 	if (iov_length(iov, nr_segs) < reqsize) {
623 		req->out.h.error = -EIO;
624 		/* SETXATTR is special, since it may contain too large data */
625 		if (in->h.opcode == FUSE_SETXATTR)
626 			req->out.h.error = -E2BIG;
627 		request_end(fc, req);
628 		goto restart;
629 	}
630 	spin_unlock(&fc->lock);
631 	fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
632 	err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
633 	if (!err)
634 		err = fuse_copy_args(&cs, in->numargs, in->argpages,
635 				     (struct fuse_arg *) in->args, 0);
636 	fuse_copy_finish(&cs);
637 	spin_lock(&fc->lock);
638 	req->locked = 0;
639 	if (!err && req->interrupted)
640 		err = -ENOENT;
641 	if (err) {
642 		if (!req->interrupted)
643 			req->out.h.error = -EIO;
644 		request_end(fc, req);
645 		return err;
646 	}
647 	if (!req->isreply)
648 		request_end(fc, req);
649 	else {
650 		req->state = FUSE_REQ_SENT;
651 		list_move_tail(&req->list, &fc->processing);
652 		spin_unlock(&fc->lock);
653 	}
654 	return reqsize;
655 
656  err_unlock:
657 	spin_unlock(&fc->lock);
658 	return err;
659 }
660 
661 static ssize_t fuse_dev_read(struct file *file, char __user *buf,
662 			     size_t nbytes, loff_t *off)
663 {
664 	struct iovec iov;
665 	iov.iov_len = nbytes;
666 	iov.iov_base = buf;
667 	return fuse_dev_readv(file, &iov, 1, off);
668 }
669 
670 /* Look up request on processing list by unique ID */
671 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
672 {
673 	struct list_head *entry;
674 
675 	list_for_each(entry, &fc->processing) {
676 		struct fuse_req *req;
677 		req = list_entry(entry, struct fuse_req, list);
678 		if (req->in.h.unique == unique)
679 			return req;
680 	}
681 	return NULL;
682 }
683 
684 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
685 			 unsigned nbytes)
686 {
687 	unsigned reqsize = sizeof(struct fuse_out_header);
688 
689 	if (out->h.error)
690 		return nbytes != reqsize ? -EINVAL : 0;
691 
692 	reqsize += len_args(out->numargs, out->args);
693 
694 	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
695 		return -EINVAL;
696 	else if (reqsize > nbytes) {
697 		struct fuse_arg *lastarg = &out->args[out->numargs-1];
698 		unsigned diffsize = reqsize - nbytes;
699 		if (diffsize > lastarg->size)
700 			return -EINVAL;
701 		lastarg->size -= diffsize;
702 	}
703 	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
704 			      out->page_zeroing);
705 }
706 
707 /*
708  * Write a single reply to a request.  First the header is copied from
709  * the write buffer.  The request is then searched on the processing
710  * list by the unique ID found in the header.  If found, then remove
711  * it from the list and copy the rest of the buffer to the request.
712  * The request is finished by calling request_end()
713  */
714 static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
715 			       unsigned long nr_segs, loff_t *off)
716 {
717 	int err;
718 	unsigned nbytes = iov_length(iov, nr_segs);
719 	struct fuse_req *req;
720 	struct fuse_out_header oh;
721 	struct fuse_copy_state cs;
722 	struct fuse_conn *fc = fuse_get_conn(file);
723 	if (!fc)
724 		return -EPERM;
725 
726 	fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
727 	if (nbytes < sizeof(struct fuse_out_header))
728 		return -EINVAL;
729 
730 	err = fuse_copy_one(&cs, &oh, sizeof(oh));
731 	if (err)
732 		goto err_finish;
733 	err = -EINVAL;
734 	if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
735 	    oh.len != nbytes)
736 		goto err_finish;
737 
738 	spin_lock(&fc->lock);
739 	err = -ENOENT;
740 	if (!fc->connected)
741 		goto err_unlock;
742 
743 	req = request_find(fc, oh.unique);
744 	err = -EINVAL;
745 	if (!req)
746 		goto err_unlock;
747 
748 	if (req->interrupted) {
749 		spin_unlock(&fc->lock);
750 		fuse_copy_finish(&cs);
751 		spin_lock(&fc->lock);
752 		request_end(fc, req);
753 		return -ENOENT;
754 	}
755 	list_move(&req->list, &fc->io);
756 	req->out.h = oh;
757 	req->locked = 1;
758 	cs.req = req;
759 	spin_unlock(&fc->lock);
760 
761 	err = copy_out_args(&cs, &req->out, nbytes);
762 	fuse_copy_finish(&cs);
763 
764 	spin_lock(&fc->lock);
765 	req->locked = 0;
766 	if (!err) {
767 		if (req->interrupted)
768 			err = -ENOENT;
769 	} else if (!req->interrupted)
770 		req->out.h.error = -EIO;
771 	request_end(fc, req);
772 
773 	return err ? err : nbytes;
774 
775  err_unlock:
776 	spin_unlock(&fc->lock);
777  err_finish:
778 	fuse_copy_finish(&cs);
779 	return err;
780 }
781 
782 static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
783 			      size_t nbytes, loff_t *off)
784 {
785 	struct iovec iov;
786 	iov.iov_len = nbytes;
787 	iov.iov_base = (char __user *) buf;
788 	return fuse_dev_writev(file, &iov, 1, off);
789 }
790 
791 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
792 {
793 	unsigned mask = POLLOUT | POLLWRNORM;
794 	struct fuse_conn *fc = fuse_get_conn(file);
795 	if (!fc)
796 		return POLLERR;
797 
798 	poll_wait(file, &fc->waitq, wait);
799 
800 	spin_lock(&fc->lock);
801 	if (!fc->connected)
802 		mask = POLLERR;
803 	else if (!list_empty(&fc->pending))
804 		mask |= POLLIN | POLLRDNORM;
805 	spin_unlock(&fc->lock);
806 
807 	return mask;
808 }
809 
810 /*
811  * Abort all requests on the given list (pending or processing)
812  *
813  * This function releases and reacquires fc->lock
814  */
815 static void end_requests(struct fuse_conn *fc, struct list_head *head)
816 {
817 	while (!list_empty(head)) {
818 		struct fuse_req *req;
819 		req = list_entry(head->next, struct fuse_req, list);
820 		req->out.h.error = -ECONNABORTED;
821 		request_end(fc, req);
822 		spin_lock(&fc->lock);
823 	}
824 }
825 
826 /*
827  * Abort requests under I/O
828  *
829  * The requests are set to interrupted and finished, and the request
830  * waiter is woken up.  This will make request_wait_answer() wait
831  * until the request is unlocked and then return.
832  *
833  * If the request is asynchronous, then the end function needs to be
834  * called after waiting for the request to be unlocked (if it was
835  * locked).
836  */
837 static void end_io_requests(struct fuse_conn *fc)
838 {
839 	while (!list_empty(&fc->io)) {
840 		struct fuse_req *req =
841 			list_entry(fc->io.next, struct fuse_req, list);
842 		void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
843 
844 		req->interrupted = 1;
845 		req->out.h.error = -ECONNABORTED;
846 		req->state = FUSE_REQ_FINISHED;
847 		list_del_init(&req->list);
848 		wake_up(&req->waitq);
849 		if (end) {
850 			req->end = NULL;
851 			/* The end function will consume this reference */
852 			__fuse_get_request(req);
853 			spin_unlock(&fc->lock);
854 			wait_event(req->waitq, !req->locked);
855 			end(fc, req);
856 			spin_lock(&fc->lock);
857 		}
858 	}
859 }
860 
861 /*
862  * Abort all requests.
863  *
864  * Emergency exit in case of a malicious or accidental deadlock, or
865  * just a hung filesystem.
866  *
867  * The same effect is usually achievable through killing the
868  * filesystem daemon and all users of the filesystem.  The exception
869  * is the combination of an asynchronous request and the tricky
870  * deadlock (see Documentation/filesystems/fuse.txt).
871  *
872  * During the aborting, progression of requests from the pending and
873  * processing lists onto the io list, and progression of new requests
874  * onto the pending list is prevented by req->connected being false.
875  *
876  * Progression of requests under I/O to the processing list is
877  * prevented by the req->interrupted flag being true for these
878  * requests.  For this reason requests on the io list must be aborted
879  * first.
880  */
881 void fuse_abort_conn(struct fuse_conn *fc)
882 {
883 	spin_lock(&fc->lock);
884 	if (fc->connected) {
885 		fc->connected = 0;
886 		end_io_requests(fc);
887 		end_requests(fc, &fc->pending);
888 		end_requests(fc, &fc->processing);
889 		wake_up_all(&fc->waitq);
890 		kill_fasync(&fc->fasync, SIGIO, POLL_IN);
891 	}
892 	spin_unlock(&fc->lock);
893 }
894 
895 static int fuse_dev_release(struct inode *inode, struct file *file)
896 {
897 	struct fuse_conn *fc = fuse_get_conn(file);
898 	if (fc) {
899 		spin_lock(&fc->lock);
900 		fc->connected = 0;
901 		end_requests(fc, &fc->pending);
902 		end_requests(fc, &fc->processing);
903 		spin_unlock(&fc->lock);
904 		fasync_helper(-1, file, 0, &fc->fasync);
905 		kobject_put(&fc->kobj);
906 	}
907 
908 	return 0;
909 }
910 
911 static int fuse_dev_fasync(int fd, struct file *file, int on)
912 {
913 	struct fuse_conn *fc = fuse_get_conn(file);
914 	if (!fc)
915 		return -EPERM;
916 
917 	/* No locking - fasync_helper does its own locking */
918 	return fasync_helper(fd, file, on, &fc->fasync);
919 }
920 
921 const struct file_operations fuse_dev_operations = {
922 	.owner		= THIS_MODULE,
923 	.llseek		= no_llseek,
924 	.read		= fuse_dev_read,
925 	.readv		= fuse_dev_readv,
926 	.write		= fuse_dev_write,
927 	.writev		= fuse_dev_writev,
928 	.poll		= fuse_dev_poll,
929 	.release	= fuse_dev_release,
930 	.fasync		= fuse_dev_fasync,
931 };
932 
933 static struct miscdevice fuse_miscdevice = {
934 	.minor = FUSE_MINOR,
935 	.name  = "fuse",
936 	.fops = &fuse_dev_operations,
937 };
938 
939 int __init fuse_dev_init(void)
940 {
941 	int err = -ENOMEM;
942 	fuse_req_cachep = kmem_cache_create("fuse_request",
943 					    sizeof(struct fuse_req),
944 					    0, 0, NULL, NULL);
945 	if (!fuse_req_cachep)
946 		goto out;
947 
948 	err = misc_register(&fuse_miscdevice);
949 	if (err)
950 		goto out_cache_clean;
951 
952 	return 0;
953 
954  out_cache_clean:
955 	kmem_cache_destroy(fuse_req_cachep);
956  out:
957 	return err;
958 }
959 
960 void fuse_dev_cleanup(void)
961 {
962 	misc_deregister(&fuse_miscdevice);
963 	kmem_cache_destroy(fuse_req_cachep);
964 }
965