xref: /linux/fs/fuse/dev.c (revision 87c2ce3b9305b9b723faeedf6e32ef703ec9b33a)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2005  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21 
22 static kmem_cache_t *fuse_req_cachep;
23 
24 static inline struct fuse_conn *fuse_get_conn(struct file *file)
25 {
26 	struct fuse_conn *fc;
27 	spin_lock(&fuse_lock);
28 	fc = file->private_data;
29 	if (fc && !fc->mounted)
30 		fc = NULL;
31 	spin_unlock(&fuse_lock);
32 	return fc;
33 }
34 
35 static inline void fuse_request_init(struct fuse_req *req)
36 {
37 	memset(req, 0, sizeof(*req));
38 	INIT_LIST_HEAD(&req->list);
39 	init_waitqueue_head(&req->waitq);
40 	atomic_set(&req->count, 1);
41 }
42 
43 struct fuse_req *fuse_request_alloc(void)
44 {
45 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
46 	if (req)
47 		fuse_request_init(req);
48 	return req;
49 }
50 
51 void fuse_request_free(struct fuse_req *req)
52 {
53 	kmem_cache_free(fuse_req_cachep, req);
54 }
55 
56 static inline void block_sigs(sigset_t *oldset)
57 {
58 	sigset_t mask;
59 
60 	siginitsetinv(&mask, sigmask(SIGKILL));
61 	sigprocmask(SIG_BLOCK, &mask, oldset);
62 }
63 
64 static inline void restore_sigs(sigset_t *oldset)
65 {
66 	sigprocmask(SIG_SETMASK, oldset, NULL);
67 }
68 
69 void fuse_reset_request(struct fuse_req *req)
70 {
71 	int preallocated = req->preallocated;
72 	BUG_ON(atomic_read(&req->count) != 1);
73 	fuse_request_init(req);
74 	req->preallocated = preallocated;
75 }
76 
77 static void __fuse_get_request(struct fuse_req *req)
78 {
79 	atomic_inc(&req->count);
80 }
81 
82 /* Must be called with > 1 refcount */
83 static void __fuse_put_request(struct fuse_req *req)
84 {
85 	BUG_ON(atomic_read(&req->count) < 2);
86 	atomic_dec(&req->count);
87 }
88 
89 static struct fuse_req *do_get_request(struct fuse_conn *fc)
90 {
91 	struct fuse_req *req;
92 
93 	spin_lock(&fuse_lock);
94 	BUG_ON(list_empty(&fc->unused_list));
95 	req = list_entry(fc->unused_list.next, struct fuse_req, list);
96 	list_del_init(&req->list);
97 	spin_unlock(&fuse_lock);
98 	fuse_request_init(req);
99 	req->preallocated = 1;
100 	req->in.h.uid = current->fsuid;
101 	req->in.h.gid = current->fsgid;
102 	req->in.h.pid = current->pid;
103 	return req;
104 }
105 
106 /* This can return NULL, but only in case it's interrupted by a SIGKILL */
107 struct fuse_req *fuse_get_request(struct fuse_conn *fc)
108 {
109 	int intr;
110 	sigset_t oldset;
111 
112 	block_sigs(&oldset);
113 	intr = down_interruptible(&fc->outstanding_sem);
114 	restore_sigs(&oldset);
115 	return intr ? NULL : do_get_request(fc);
116 }
117 
118 static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
119 {
120 	spin_lock(&fuse_lock);
121 	if (req->preallocated)
122 		list_add(&req->list, &fc->unused_list);
123 	else
124 		fuse_request_free(req);
125 
126 	/* If we are in debt decrease that first */
127 	if (fc->outstanding_debt)
128 		fc->outstanding_debt--;
129 	else
130 		up(&fc->outstanding_sem);
131 	spin_unlock(&fuse_lock);
132 }
133 
134 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
135 {
136 	if (atomic_dec_and_test(&req->count))
137 		fuse_putback_request(fc, req);
138 }
139 
140 void fuse_release_background(struct fuse_req *req)
141 {
142 	iput(req->inode);
143 	iput(req->inode2);
144 	if (req->file)
145 		fput(req->file);
146 	spin_lock(&fuse_lock);
147 	list_del(&req->bg_entry);
148 	spin_unlock(&fuse_lock);
149 }
150 
151 static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
152 {
153 	int i;
154 	struct fuse_init_out *arg = &req->misc.init_out;
155 
156 	if (arg->major != FUSE_KERNEL_VERSION)
157 		fc->conn_error = 1;
158 	else {
159 		fc->minor = arg->minor;
160 		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
161 	}
162 
163 	/* After INIT reply is received other requests can go
164 	   out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
165 	   up()s on outstanding_sem.  The last up() is done in
166 	   fuse_putback_request() */
167 	for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
168 		up(&fc->outstanding_sem);
169 }
170 
171 /*
172  * This function is called when a request is finished.  Either a reply
173  * has arrived or it was interrupted (and not yet sent) or some error
174  * occurred during communication with userspace, or the device file was
175  * closed.  It decreases the reference count for the request.  In case
176  * of a background request the reference to the stored objects are
177  * released.  The requester thread is woken up (if still waiting), and
178  * finally the request is either freed or put on the unused_list
179  *
180  * Called with fuse_lock, unlocks it
181  */
182 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
183 {
184 	int putback;
185 	req->finished = 1;
186 	putback = atomic_dec_and_test(&req->count);
187 	spin_unlock(&fuse_lock);
188 	if (req->background) {
189 		down_read(&fc->sbput_sem);
190 		if (fc->mounted)
191 			fuse_release_background(req);
192 		up_read(&fc->sbput_sem);
193 	}
194 	wake_up(&req->waitq);
195 	if (req->in.h.opcode == FUSE_INIT)
196 		process_init_reply(fc, req);
197 	else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) {
198 		/* Special case for failed iget in CREATE */
199 		u64 nodeid = req->in.h.nodeid;
200 		__fuse_get_request(req);
201 		fuse_reset_request(req);
202 		fuse_send_forget(fc, req, nodeid, 1);
203 		putback = 0;
204 	}
205 	if (putback)
206 		fuse_putback_request(fc, req);
207 }
208 
209 /*
210  * Unfortunately request interruption not just solves the deadlock
211  * problem, it causes problems too.  These stem from the fact, that an
212  * interrupted request is continued to be processed in userspace,
213  * while all the locks and object references (inode and file) held
214  * during the operation are released.
215  *
216  * To release the locks is exactly why there's a need to interrupt the
217  * request, so there's not a lot that can be done about this, except
218  * introduce additional locking in userspace.
219  *
220  * More important is to keep inode and file references until userspace
221  * has replied, otherwise FORGET and RELEASE could be sent while the
222  * inode/file is still used by the filesystem.
223  *
224  * For this reason the concept of "background" request is introduced.
225  * An interrupted request is backgrounded if it has been already sent
226  * to userspace.  Backgrounding involves getting an extra reference to
227  * inode(s) or file used in the request, and adding the request to
228  * fc->background list.  When a reply is received for a background
229  * request, the object references are released, and the request is
230  * removed from the list.  If the filesystem is unmounted while there
231  * are still background requests, the list is walked and references
232  * are released as if a reply was received.
233  *
234  * There's one more use for a background request.  The RELEASE message is
235  * always sent as background, since it doesn't return an error or
236  * data.
237  */
238 static void background_request(struct fuse_conn *fc, struct fuse_req *req)
239 {
240 	req->background = 1;
241 	list_add(&req->bg_entry, &fc->background);
242 	if (req->inode)
243 		req->inode = igrab(req->inode);
244 	if (req->inode2)
245 		req->inode2 = igrab(req->inode2);
246 	if (req->file)
247 		get_file(req->file);
248 }
249 
250 /* Called with fuse_lock held.  Releases, and then reacquires it. */
251 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
252 {
253 	sigset_t oldset;
254 
255 	spin_unlock(&fuse_lock);
256 	block_sigs(&oldset);
257 	wait_event_interruptible(req->waitq, req->finished);
258 	restore_sigs(&oldset);
259 	spin_lock(&fuse_lock);
260 	if (req->finished)
261 		return;
262 
263 	req->out.h.error = -EINTR;
264 	req->interrupted = 1;
265 	if (req->locked) {
266 		/* This is uninterruptible sleep, because data is
267 		   being copied to/from the buffers of req.  During
268 		   locked state, there mustn't be any filesystem
269 		   operation (e.g. page fault), since that could lead
270 		   to deadlock */
271 		spin_unlock(&fuse_lock);
272 		wait_event(req->waitq, !req->locked);
273 		spin_lock(&fuse_lock);
274 	}
275 	if (!req->sent && !list_empty(&req->list)) {
276 		list_del(&req->list);
277 		__fuse_put_request(req);
278 	} else if (!req->finished && req->sent)
279 		background_request(fc, req);
280 }
281 
282 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
283 {
284 	unsigned nbytes = 0;
285 	unsigned i;
286 
287 	for (i = 0; i < numargs; i++)
288 		nbytes += args[i].size;
289 
290 	return nbytes;
291 }
292 
293 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
294 {
295 	fc->reqctr++;
296 	/* zero is special */
297 	if (fc->reqctr == 0)
298 		fc->reqctr = 1;
299 	req->in.h.unique = fc->reqctr;
300 	req->in.h.len = sizeof(struct fuse_in_header) +
301 		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
302 	if (!req->preallocated) {
303 		/* If request is not preallocated (either FORGET or
304 		   RELEASE), then still decrease outstanding_sem, so
305 		   user can't open infinite number of files while not
306 		   processing the RELEASE requests.  However for
307 		   efficiency do it without blocking, so if down()
308 		   would block, just increase the debt instead */
309 		if (down_trylock(&fc->outstanding_sem))
310 			fc->outstanding_debt++;
311 	}
312 	list_add_tail(&req->list, &fc->pending);
313 	wake_up(&fc->waitq);
314 }
315 
316 /*
317  * This can only be interrupted by a SIGKILL
318  */
319 void request_send(struct fuse_conn *fc, struct fuse_req *req)
320 {
321 	req->isreply = 1;
322 	spin_lock(&fuse_lock);
323 	if (!fc->connected)
324 		req->out.h.error = -ENOTCONN;
325 	else if (fc->conn_error)
326 		req->out.h.error = -ECONNREFUSED;
327 	else {
328 		queue_request(fc, req);
329 		/* acquire extra reference, since request is still needed
330 		   after request_end() */
331 		__fuse_get_request(req);
332 
333 		request_wait_answer(fc, req);
334 	}
335 	spin_unlock(&fuse_lock);
336 }
337 
338 static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
339 {
340 	spin_lock(&fuse_lock);
341 	if (fc->connected) {
342 		queue_request(fc, req);
343 		spin_unlock(&fuse_lock);
344 	} else {
345 		req->out.h.error = -ENOTCONN;
346 		request_end(fc, req);
347 	}
348 }
349 
350 void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
351 {
352 	req->isreply = 0;
353 	request_send_nowait(fc, req);
354 }
355 
356 void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
357 {
358 	req->isreply = 1;
359 	spin_lock(&fuse_lock);
360 	background_request(fc, req);
361 	spin_unlock(&fuse_lock);
362 	request_send_nowait(fc, req);
363 }
364 
365 void fuse_send_init(struct fuse_conn *fc)
366 {
367 	/* This is called from fuse_read_super() so there's guaranteed
368 	   to be a request available */
369 	struct fuse_req *req = do_get_request(fc);
370 	struct fuse_init_in *arg = &req->misc.init_in;
371 	arg->major = FUSE_KERNEL_VERSION;
372 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
373 	req->in.h.opcode = FUSE_INIT;
374 	req->in.numargs = 1;
375 	req->in.args[0].size = sizeof(*arg);
376 	req->in.args[0].value = arg;
377 	req->out.numargs = 1;
378 	/* Variable length arguement used for backward compatibility
379 	   with interface version < 7.5.  Rest of init_out is zeroed
380 	   by do_get_request(), so a short reply is not a problem */
381 	req->out.argvar = 1;
382 	req->out.args[0].size = sizeof(struct fuse_init_out);
383 	req->out.args[0].value = &req->misc.init_out;
384 	request_send_background(fc, req);
385 }
386 
387 /*
388  * Lock the request.  Up to the next unlock_request() there mustn't be
389  * anything that could cause a page-fault.  If the request was already
390  * interrupted bail out.
391  */
392 static inline int lock_request(struct fuse_req *req)
393 {
394 	int err = 0;
395 	if (req) {
396 		spin_lock(&fuse_lock);
397 		if (req->interrupted)
398 			err = -ENOENT;
399 		else
400 			req->locked = 1;
401 		spin_unlock(&fuse_lock);
402 	}
403 	return err;
404 }
405 
406 /*
407  * Unlock request.  If it was interrupted during being locked, the
408  * requester thread is currently waiting for it to be unlocked, so
409  * wake it up.
410  */
411 static inline void unlock_request(struct fuse_req *req)
412 {
413 	if (req) {
414 		spin_lock(&fuse_lock);
415 		req->locked = 0;
416 		if (req->interrupted)
417 			wake_up(&req->waitq);
418 		spin_unlock(&fuse_lock);
419 	}
420 }
421 
422 struct fuse_copy_state {
423 	int write;
424 	struct fuse_req *req;
425 	const struct iovec *iov;
426 	unsigned long nr_segs;
427 	unsigned long seglen;
428 	unsigned long addr;
429 	struct page *pg;
430 	void *mapaddr;
431 	void *buf;
432 	unsigned len;
433 };
434 
435 static void fuse_copy_init(struct fuse_copy_state *cs, int write,
436 			   struct fuse_req *req, const struct iovec *iov,
437 			   unsigned long nr_segs)
438 {
439 	memset(cs, 0, sizeof(*cs));
440 	cs->write = write;
441 	cs->req = req;
442 	cs->iov = iov;
443 	cs->nr_segs = nr_segs;
444 }
445 
446 /* Unmap and put previous page of userspace buffer */
447 static inline void fuse_copy_finish(struct fuse_copy_state *cs)
448 {
449 	if (cs->mapaddr) {
450 		kunmap_atomic(cs->mapaddr, KM_USER0);
451 		if (cs->write) {
452 			flush_dcache_page(cs->pg);
453 			set_page_dirty_lock(cs->pg);
454 		}
455 		put_page(cs->pg);
456 		cs->mapaddr = NULL;
457 	}
458 }
459 
460 /*
461  * Get another pagefull of userspace buffer, and map it to kernel
462  * address space, and lock request
463  */
464 static int fuse_copy_fill(struct fuse_copy_state *cs)
465 {
466 	unsigned long offset;
467 	int err;
468 
469 	unlock_request(cs->req);
470 	fuse_copy_finish(cs);
471 	if (!cs->seglen) {
472 		BUG_ON(!cs->nr_segs);
473 		cs->seglen = cs->iov[0].iov_len;
474 		cs->addr = (unsigned long) cs->iov[0].iov_base;
475 		cs->iov ++;
476 		cs->nr_segs --;
477 	}
478 	down_read(&current->mm->mmap_sem);
479 	err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
480 			     &cs->pg, NULL);
481 	up_read(&current->mm->mmap_sem);
482 	if (err < 0)
483 		return err;
484 	BUG_ON(err != 1);
485 	offset = cs->addr % PAGE_SIZE;
486 	cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
487 	cs->buf = cs->mapaddr + offset;
488 	cs->len = min(PAGE_SIZE - offset, cs->seglen);
489 	cs->seglen -= cs->len;
490 	cs->addr += cs->len;
491 
492 	return lock_request(cs->req);
493 }
494 
495 /* Do as much copy to/from userspace buffer as we can */
496 static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
497 			       unsigned *size)
498 {
499 	unsigned ncpy = min(*size, cs->len);
500 	if (val) {
501 		if (cs->write)
502 			memcpy(cs->buf, *val, ncpy);
503 		else
504 			memcpy(*val, cs->buf, ncpy);
505 		*val += ncpy;
506 	}
507 	*size -= ncpy;
508 	cs->len -= ncpy;
509 	cs->buf += ncpy;
510 	return ncpy;
511 }
512 
513 /*
514  * Copy a page in the request to/from the userspace buffer.  Must be
515  * done atomically
516  */
517 static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
518 				 unsigned offset, unsigned count, int zeroing)
519 {
520 	if (page && zeroing && count < PAGE_SIZE) {
521 		void *mapaddr = kmap_atomic(page, KM_USER1);
522 		memset(mapaddr, 0, PAGE_SIZE);
523 		kunmap_atomic(mapaddr, KM_USER1);
524 	}
525 	while (count) {
526 		int err;
527 		if (!cs->len && (err = fuse_copy_fill(cs)))
528 			return err;
529 		if (page) {
530 			void *mapaddr = kmap_atomic(page, KM_USER1);
531 			void *buf = mapaddr + offset;
532 			offset += fuse_copy_do(cs, &buf, &count);
533 			kunmap_atomic(mapaddr, KM_USER1);
534 		} else
535 			offset += fuse_copy_do(cs, NULL, &count);
536 	}
537 	if (page && !cs->write)
538 		flush_dcache_page(page);
539 	return 0;
540 }
541 
542 /* Copy pages in the request to/from userspace buffer */
543 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
544 			   int zeroing)
545 {
546 	unsigned i;
547 	struct fuse_req *req = cs->req;
548 	unsigned offset = req->page_offset;
549 	unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
550 
551 	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
552 		struct page *page = req->pages[i];
553 		int err = fuse_copy_page(cs, page, offset, count, zeroing);
554 		if (err)
555 			return err;
556 
557 		nbytes -= count;
558 		count = min(nbytes, (unsigned) PAGE_SIZE);
559 		offset = 0;
560 	}
561 	return 0;
562 }
563 
564 /* Copy a single argument in the request to/from userspace buffer */
565 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
566 {
567 	while (size) {
568 		int err;
569 		if (!cs->len && (err = fuse_copy_fill(cs)))
570 			return err;
571 		fuse_copy_do(cs, &val, &size);
572 	}
573 	return 0;
574 }
575 
576 /* Copy request arguments to/from userspace buffer */
577 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
578 			  unsigned argpages, struct fuse_arg *args,
579 			  int zeroing)
580 {
581 	int err = 0;
582 	unsigned i;
583 
584 	for (i = 0; !err && i < numargs; i++)  {
585 		struct fuse_arg *arg = &args[i];
586 		if (i == numargs - 1 && argpages)
587 			err = fuse_copy_pages(cs, arg->size, zeroing);
588 		else
589 			err = fuse_copy_one(cs, arg->value, arg->size);
590 	}
591 	return err;
592 }
593 
594 /* Wait until a request is available on the pending list */
595 static void request_wait(struct fuse_conn *fc)
596 {
597 	DECLARE_WAITQUEUE(wait, current);
598 
599 	add_wait_queue_exclusive(&fc->waitq, &wait);
600 	while (fc->mounted && list_empty(&fc->pending)) {
601 		set_current_state(TASK_INTERRUPTIBLE);
602 		if (signal_pending(current))
603 			break;
604 
605 		spin_unlock(&fuse_lock);
606 		schedule();
607 		spin_lock(&fuse_lock);
608 	}
609 	set_current_state(TASK_RUNNING);
610 	remove_wait_queue(&fc->waitq, &wait);
611 }
612 
613 /*
614  * Read a single request into the userspace filesystem's buffer.  This
615  * function waits until a request is available, then removes it from
616  * the pending list and copies request data to userspace buffer.  If
617  * no reply is needed (FORGET) or request has been interrupted or
618  * there was an error during the copying then it's finished by calling
619  * request_end().  Otherwise add it to the processing list, and set
620  * the 'sent' flag.
621  */
622 static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
623 			      unsigned long nr_segs, loff_t *off)
624 {
625 	int err;
626 	struct fuse_conn *fc;
627 	struct fuse_req *req;
628 	struct fuse_in *in;
629 	struct fuse_copy_state cs;
630 	unsigned reqsize;
631 
632  restart:
633 	spin_lock(&fuse_lock);
634 	fc = file->private_data;
635 	err = -EPERM;
636 	if (!fc)
637 		goto err_unlock;
638 	request_wait(fc);
639 	err = -ENODEV;
640 	if (!fc->mounted)
641 		goto err_unlock;
642 	err = -ERESTARTSYS;
643 	if (list_empty(&fc->pending))
644 		goto err_unlock;
645 
646 	req = list_entry(fc->pending.next, struct fuse_req, list);
647 	list_del_init(&req->list);
648 
649 	in = &req->in;
650 	reqsize = in->h.len;
651 	/* If request is too large, reply with an error and restart the read */
652 	if (iov_length(iov, nr_segs) < reqsize) {
653 		req->out.h.error = -EIO;
654 		/* SETXATTR is special, since it may contain too large data */
655 		if (in->h.opcode == FUSE_SETXATTR)
656 			req->out.h.error = -E2BIG;
657 		request_end(fc, req);
658 		goto restart;
659 	}
660 	spin_unlock(&fuse_lock);
661 	fuse_copy_init(&cs, 1, req, iov, nr_segs);
662 	err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
663 	if (!err)
664 		err = fuse_copy_args(&cs, in->numargs, in->argpages,
665 				     (struct fuse_arg *) in->args, 0);
666 	fuse_copy_finish(&cs);
667 	spin_lock(&fuse_lock);
668 	req->locked = 0;
669 	if (!err && req->interrupted)
670 		err = -ENOENT;
671 	if (err) {
672 		if (!req->interrupted)
673 			req->out.h.error = -EIO;
674 		request_end(fc, req);
675 		return err;
676 	}
677 	if (!req->isreply)
678 		request_end(fc, req);
679 	else {
680 		req->sent = 1;
681 		list_add_tail(&req->list, &fc->processing);
682 		spin_unlock(&fuse_lock);
683 	}
684 	return reqsize;
685 
686  err_unlock:
687 	spin_unlock(&fuse_lock);
688 	return err;
689 }
690 
691 static ssize_t fuse_dev_read(struct file *file, char __user *buf,
692 			     size_t nbytes, loff_t *off)
693 {
694 	struct iovec iov;
695 	iov.iov_len = nbytes;
696 	iov.iov_base = buf;
697 	return fuse_dev_readv(file, &iov, 1, off);
698 }
699 
700 /* Look up request on processing list by unique ID */
701 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
702 {
703 	struct list_head *entry;
704 
705 	list_for_each(entry, &fc->processing) {
706 		struct fuse_req *req;
707 		req = list_entry(entry, struct fuse_req, list);
708 		if (req->in.h.unique == unique)
709 			return req;
710 	}
711 	return NULL;
712 }
713 
714 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
715 			 unsigned nbytes)
716 {
717 	unsigned reqsize = sizeof(struct fuse_out_header);
718 
719 	if (out->h.error)
720 		return nbytes != reqsize ? -EINVAL : 0;
721 
722 	reqsize += len_args(out->numargs, out->args);
723 
724 	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
725 		return -EINVAL;
726 	else if (reqsize > nbytes) {
727 		struct fuse_arg *lastarg = &out->args[out->numargs-1];
728 		unsigned diffsize = reqsize - nbytes;
729 		if (diffsize > lastarg->size)
730 			return -EINVAL;
731 		lastarg->size -= diffsize;
732 	}
733 	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
734 			      out->page_zeroing);
735 }
736 
737 /*
738  * Write a single reply to a request.  First the header is copied from
739  * the write buffer.  The request is then searched on the processing
740  * list by the unique ID found in the header.  If found, then remove
741  * it from the list and copy the rest of the buffer to the request.
742  * The request is finished by calling request_end()
743  */
744 static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
745 			       unsigned long nr_segs, loff_t *off)
746 {
747 	int err;
748 	unsigned nbytes = iov_length(iov, nr_segs);
749 	struct fuse_req *req;
750 	struct fuse_out_header oh;
751 	struct fuse_copy_state cs;
752 	struct fuse_conn *fc = fuse_get_conn(file);
753 	if (!fc)
754 		return -ENODEV;
755 
756 	fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
757 	if (nbytes < sizeof(struct fuse_out_header))
758 		return -EINVAL;
759 
760 	err = fuse_copy_one(&cs, &oh, sizeof(oh));
761 	if (err)
762 		goto err_finish;
763 	err = -EINVAL;
764 	if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
765 	    oh.len != nbytes)
766 		goto err_finish;
767 
768 	spin_lock(&fuse_lock);
769 	req = request_find(fc, oh.unique);
770 	err = -EINVAL;
771 	if (!req)
772 		goto err_unlock;
773 
774 	list_del_init(&req->list);
775 	if (req->interrupted) {
776 		request_end(fc, req);
777 		fuse_copy_finish(&cs);
778 		return -ENOENT;
779 	}
780 	req->out.h = oh;
781 	req->locked = 1;
782 	cs.req = req;
783 	spin_unlock(&fuse_lock);
784 
785 	err = copy_out_args(&cs, &req->out, nbytes);
786 	fuse_copy_finish(&cs);
787 
788 	spin_lock(&fuse_lock);
789 	req->locked = 0;
790 	if (!err) {
791 		if (req->interrupted)
792 			err = -ENOENT;
793 	} else if (!req->interrupted)
794 		req->out.h.error = -EIO;
795 	request_end(fc, req);
796 
797 	return err ? err : nbytes;
798 
799  err_unlock:
800 	spin_unlock(&fuse_lock);
801  err_finish:
802 	fuse_copy_finish(&cs);
803 	return err;
804 }
805 
806 static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
807 			      size_t nbytes, loff_t *off)
808 {
809 	struct iovec iov;
810 	iov.iov_len = nbytes;
811 	iov.iov_base = (char __user *) buf;
812 	return fuse_dev_writev(file, &iov, 1, off);
813 }
814 
815 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
816 {
817 	struct fuse_conn *fc = fuse_get_conn(file);
818 	unsigned mask = POLLOUT | POLLWRNORM;
819 
820 	if (!fc)
821 		return -ENODEV;
822 
823 	poll_wait(file, &fc->waitq, wait);
824 
825 	spin_lock(&fuse_lock);
826 	if (!list_empty(&fc->pending))
827                 mask |= POLLIN | POLLRDNORM;
828 	spin_unlock(&fuse_lock);
829 
830 	return mask;
831 }
832 
833 /* Abort all requests on the given list (pending or processing) */
834 static void end_requests(struct fuse_conn *fc, struct list_head *head)
835 {
836 	while (!list_empty(head)) {
837 		struct fuse_req *req;
838 		req = list_entry(head->next, struct fuse_req, list);
839 		list_del_init(&req->list);
840 		req->out.h.error = -ECONNABORTED;
841 		request_end(fc, req);
842 		spin_lock(&fuse_lock);
843 	}
844 }
845 
846 static int fuse_dev_release(struct inode *inode, struct file *file)
847 {
848 	struct fuse_conn *fc;
849 
850 	spin_lock(&fuse_lock);
851 	fc = file->private_data;
852 	if (fc) {
853 		fc->connected = 0;
854 		end_requests(fc, &fc->pending);
855 		end_requests(fc, &fc->processing);
856 		fuse_release_conn(fc);
857 	}
858 	spin_unlock(&fuse_lock);
859 	return 0;
860 }
861 
862 struct file_operations fuse_dev_operations = {
863 	.owner		= THIS_MODULE,
864 	.llseek		= no_llseek,
865 	.read		= fuse_dev_read,
866 	.readv		= fuse_dev_readv,
867 	.write		= fuse_dev_write,
868 	.writev		= fuse_dev_writev,
869 	.poll		= fuse_dev_poll,
870 	.release	= fuse_dev_release,
871 };
872 
873 static struct miscdevice fuse_miscdevice = {
874 	.minor = FUSE_MINOR,
875 	.name  = "fuse",
876 	.fops = &fuse_dev_operations,
877 };
878 
879 int __init fuse_dev_init(void)
880 {
881 	int err = -ENOMEM;
882 	fuse_req_cachep = kmem_cache_create("fuse_request",
883 					    sizeof(struct fuse_req),
884 					    0, 0, NULL, NULL);
885 	if (!fuse_req_cachep)
886 		goto out;
887 
888 	err = misc_register(&fuse_miscdevice);
889 	if (err)
890 		goto out_cache_clean;
891 
892 	return 0;
893 
894  out_cache_clean:
895 	kmem_cache_destroy(fuse_req_cachep);
896  out:
897 	return err;
898 }
899 
900 void fuse_dev_cleanup(void)
901 {
902 	misc_deregister(&fuse_miscdevice);
903 	kmem_cache_destroy(fuse_req_cachep);
904 }
905