xref: /linux/fs/fuse/dev.c (revision 13da9e200fe4740b02cd51e07ab454627e228920)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21 MODULE_ALIAS("devname:fuse");
22 
23 static struct kmem_cache *fuse_req_cachep;
24 
25 static struct fuse_conn *fuse_get_conn(struct file *file)
26 {
27 	/*
28 	 * Lockless access is OK, because file->private data is set
29 	 * once during mount and is valid until the file is released.
30 	 */
31 	return file->private_data;
32 }
33 
34 static void fuse_request_init(struct fuse_req *req)
35 {
36 	memset(req, 0, sizeof(*req));
37 	INIT_LIST_HEAD(&req->list);
38 	INIT_LIST_HEAD(&req->intr_entry);
39 	init_waitqueue_head(&req->waitq);
40 	atomic_set(&req->count, 1);
41 }
42 
43 struct fuse_req *fuse_request_alloc(void)
44 {
45 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
46 	if (req)
47 		fuse_request_init(req);
48 	return req;
49 }
50 EXPORT_SYMBOL_GPL(fuse_request_alloc);
51 
52 struct fuse_req *fuse_request_alloc_nofs(void)
53 {
54 	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
55 	if (req)
56 		fuse_request_init(req);
57 	return req;
58 }
59 
60 void fuse_request_free(struct fuse_req *req)
61 {
62 	kmem_cache_free(fuse_req_cachep, req);
63 }
64 
65 static void block_sigs(sigset_t *oldset)
66 {
67 	sigset_t mask;
68 
69 	siginitsetinv(&mask, sigmask(SIGKILL));
70 	sigprocmask(SIG_BLOCK, &mask, oldset);
71 }
72 
73 static void restore_sigs(sigset_t *oldset)
74 {
75 	sigprocmask(SIG_SETMASK, oldset, NULL);
76 }
77 
78 static void __fuse_get_request(struct fuse_req *req)
79 {
80 	atomic_inc(&req->count);
81 }
82 
83 /* Must be called with > 1 refcount */
84 static void __fuse_put_request(struct fuse_req *req)
85 {
86 	BUG_ON(atomic_read(&req->count) < 2);
87 	atomic_dec(&req->count);
88 }
89 
90 static void fuse_req_init_context(struct fuse_req *req)
91 {
92 	req->in.h.uid = current_fsuid();
93 	req->in.h.gid = current_fsgid();
94 	req->in.h.pid = current->pid;
95 }
96 
97 struct fuse_req *fuse_get_req(struct fuse_conn *fc)
98 {
99 	struct fuse_req *req;
100 	sigset_t oldset;
101 	int intr;
102 	int err;
103 
104 	atomic_inc(&fc->num_waiting);
105 	block_sigs(&oldset);
106 	intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
107 	restore_sigs(&oldset);
108 	err = -EINTR;
109 	if (intr)
110 		goto out;
111 
112 	err = -ENOTCONN;
113 	if (!fc->connected)
114 		goto out;
115 
116 	req = fuse_request_alloc();
117 	err = -ENOMEM;
118 	if (!req)
119 		goto out;
120 
121 	fuse_req_init_context(req);
122 	req->waiting = 1;
123 	return req;
124 
125  out:
126 	atomic_dec(&fc->num_waiting);
127 	return ERR_PTR(err);
128 }
129 EXPORT_SYMBOL_GPL(fuse_get_req);
130 
131 /*
132  * Return request in fuse_file->reserved_req.  However that may
133  * currently be in use.  If that is the case, wait for it to become
134  * available.
135  */
136 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
137 					 struct file *file)
138 {
139 	struct fuse_req *req = NULL;
140 	struct fuse_file *ff = file->private_data;
141 
142 	do {
143 		wait_event(fc->reserved_req_waitq, ff->reserved_req);
144 		spin_lock(&fc->lock);
145 		if (ff->reserved_req) {
146 			req = ff->reserved_req;
147 			ff->reserved_req = NULL;
148 			get_file(file);
149 			req->stolen_file = file;
150 		}
151 		spin_unlock(&fc->lock);
152 	} while (!req);
153 
154 	return req;
155 }
156 
157 /*
158  * Put stolen request back into fuse_file->reserved_req
159  */
160 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
161 {
162 	struct file *file = req->stolen_file;
163 	struct fuse_file *ff = file->private_data;
164 
165 	spin_lock(&fc->lock);
166 	fuse_request_init(req);
167 	BUG_ON(ff->reserved_req);
168 	ff->reserved_req = req;
169 	wake_up_all(&fc->reserved_req_waitq);
170 	spin_unlock(&fc->lock);
171 	fput(file);
172 }
173 
174 /*
175  * Gets a requests for a file operation, always succeeds
176  *
177  * This is used for sending the FLUSH request, which must get to
178  * userspace, due to POSIX locks which may need to be unlocked.
179  *
180  * If allocation fails due to OOM, use the reserved request in
181  * fuse_file.
182  *
183  * This is very unlikely to deadlock accidentally, since the
184  * filesystem should not have it's own file open.  If deadlock is
185  * intentional, it can still be broken by "aborting" the filesystem.
186  */
187 struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
188 {
189 	struct fuse_req *req;
190 
191 	atomic_inc(&fc->num_waiting);
192 	wait_event(fc->blocked_waitq, !fc->blocked);
193 	req = fuse_request_alloc();
194 	if (!req)
195 		req = get_reserved_req(fc, file);
196 
197 	fuse_req_init_context(req);
198 	req->waiting = 1;
199 	return req;
200 }
201 
202 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
203 {
204 	if (atomic_dec_and_test(&req->count)) {
205 		if (req->waiting)
206 			atomic_dec(&fc->num_waiting);
207 
208 		if (req->stolen_file)
209 			put_reserved_req(fc, req);
210 		else
211 			fuse_request_free(req);
212 	}
213 }
214 EXPORT_SYMBOL_GPL(fuse_put_request);
215 
216 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
217 {
218 	unsigned nbytes = 0;
219 	unsigned i;
220 
221 	for (i = 0; i < numargs; i++)
222 		nbytes += args[i].size;
223 
224 	return nbytes;
225 }
226 
227 static u64 fuse_get_unique(struct fuse_conn *fc)
228 {
229 	fc->reqctr++;
230 	/* zero is special */
231 	if (fc->reqctr == 0)
232 		fc->reqctr = 1;
233 
234 	return fc->reqctr;
235 }
236 
237 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
238 {
239 	req->in.h.unique = fuse_get_unique(fc);
240 	req->in.h.len = sizeof(struct fuse_in_header) +
241 		len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
242 	list_add_tail(&req->list, &fc->pending);
243 	req->state = FUSE_REQ_PENDING;
244 	if (!req->waiting) {
245 		req->waiting = 1;
246 		atomic_inc(&fc->num_waiting);
247 	}
248 	wake_up(&fc->waitq);
249 	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
250 }
251 
252 static void flush_bg_queue(struct fuse_conn *fc)
253 {
254 	while (fc->active_background < fc->max_background &&
255 	       !list_empty(&fc->bg_queue)) {
256 		struct fuse_req *req;
257 
258 		req = list_entry(fc->bg_queue.next, struct fuse_req, list);
259 		list_del(&req->list);
260 		fc->active_background++;
261 		queue_request(fc, req);
262 	}
263 }
264 
265 /*
266  * This function is called when a request is finished.  Either a reply
267  * has arrived or it was aborted (and not yet sent) or some error
268  * occurred during communication with userspace, or the device file
269  * was closed.  The requester thread is woken up (if still waiting),
270  * the 'end' callback is called if given, else the reference to the
271  * request is released
272  *
273  * Called with fc->lock, unlocks it
274  */
275 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
276 __releases(&fc->lock)
277 {
278 	void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
279 	req->end = NULL;
280 	list_del(&req->list);
281 	list_del(&req->intr_entry);
282 	req->state = FUSE_REQ_FINISHED;
283 	if (req->background) {
284 		if (fc->num_background == fc->max_background) {
285 			fc->blocked = 0;
286 			wake_up_all(&fc->blocked_waitq);
287 		}
288 		if (fc->num_background == fc->congestion_threshold &&
289 		    fc->connected && fc->bdi_initialized) {
290 			clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
291 			clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
292 		}
293 		fc->num_background--;
294 		fc->active_background--;
295 		flush_bg_queue(fc);
296 	}
297 	spin_unlock(&fc->lock);
298 	wake_up(&req->waitq);
299 	if (end)
300 		end(fc, req);
301 	fuse_put_request(fc, req);
302 }
303 
304 static void wait_answer_interruptible(struct fuse_conn *fc,
305 				      struct fuse_req *req)
306 __releases(&fc->lock)
307 __acquires(&fc->lock)
308 {
309 	if (signal_pending(current))
310 		return;
311 
312 	spin_unlock(&fc->lock);
313 	wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
314 	spin_lock(&fc->lock);
315 }
316 
317 static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
318 {
319 	list_add_tail(&req->intr_entry, &fc->interrupts);
320 	wake_up(&fc->waitq);
321 	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
322 }
323 
324 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
325 __releases(&fc->lock)
326 __acquires(&fc->lock)
327 {
328 	if (!fc->no_interrupt) {
329 		/* Any signal may interrupt this */
330 		wait_answer_interruptible(fc, req);
331 
332 		if (req->aborted)
333 			goto aborted;
334 		if (req->state == FUSE_REQ_FINISHED)
335 			return;
336 
337 		req->interrupted = 1;
338 		if (req->state == FUSE_REQ_SENT)
339 			queue_interrupt(fc, req);
340 	}
341 
342 	if (!req->force) {
343 		sigset_t oldset;
344 
345 		/* Only fatal signals may interrupt this */
346 		block_sigs(&oldset);
347 		wait_answer_interruptible(fc, req);
348 		restore_sigs(&oldset);
349 
350 		if (req->aborted)
351 			goto aborted;
352 		if (req->state == FUSE_REQ_FINISHED)
353 			return;
354 
355 		/* Request is not yet in userspace, bail out */
356 		if (req->state == FUSE_REQ_PENDING) {
357 			list_del(&req->list);
358 			__fuse_put_request(req);
359 			req->out.h.error = -EINTR;
360 			return;
361 		}
362 	}
363 
364 	/*
365 	 * Either request is already in userspace, or it was forced.
366 	 * Wait it out.
367 	 */
368 	spin_unlock(&fc->lock);
369 	wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
370 	spin_lock(&fc->lock);
371 
372 	if (!req->aborted)
373 		return;
374 
375  aborted:
376 	BUG_ON(req->state != FUSE_REQ_FINISHED);
377 	if (req->locked) {
378 		/* This is uninterruptible sleep, because data is
379 		   being copied to/from the buffers of req.  During
380 		   locked state, there mustn't be any filesystem
381 		   operation (e.g. page fault), since that could lead
382 		   to deadlock */
383 		spin_unlock(&fc->lock);
384 		wait_event(req->waitq, !req->locked);
385 		spin_lock(&fc->lock);
386 	}
387 }
388 
389 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
390 {
391 	req->isreply = 1;
392 	spin_lock(&fc->lock);
393 	if (!fc->connected)
394 		req->out.h.error = -ENOTCONN;
395 	else if (fc->conn_error)
396 		req->out.h.error = -ECONNREFUSED;
397 	else {
398 		queue_request(fc, req);
399 		/* acquire extra reference, since request is still needed
400 		   after request_end() */
401 		__fuse_get_request(req);
402 
403 		request_wait_answer(fc, req);
404 	}
405 	spin_unlock(&fc->lock);
406 }
407 EXPORT_SYMBOL_GPL(fuse_request_send);
408 
409 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
410 					    struct fuse_req *req)
411 {
412 	req->background = 1;
413 	fc->num_background++;
414 	if (fc->num_background == fc->max_background)
415 		fc->blocked = 1;
416 	if (fc->num_background == fc->congestion_threshold &&
417 	    fc->bdi_initialized) {
418 		set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
419 		set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
420 	}
421 	list_add_tail(&req->list, &fc->bg_queue);
422 	flush_bg_queue(fc);
423 }
424 
425 static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
426 {
427 	spin_lock(&fc->lock);
428 	if (fc->connected) {
429 		fuse_request_send_nowait_locked(fc, req);
430 		spin_unlock(&fc->lock);
431 	} else {
432 		req->out.h.error = -ENOTCONN;
433 		request_end(fc, req);
434 	}
435 }
436 
437 void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
438 {
439 	req->isreply = 0;
440 	fuse_request_send_nowait(fc, req);
441 }
442 
443 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
444 {
445 	req->isreply = 1;
446 	fuse_request_send_nowait(fc, req);
447 }
448 EXPORT_SYMBOL_GPL(fuse_request_send_background);
449 
450 /*
451  * Called under fc->lock
452  *
453  * fc->connected must have been checked previously
454  */
455 void fuse_request_send_background_locked(struct fuse_conn *fc,
456 					 struct fuse_req *req)
457 {
458 	req->isreply = 1;
459 	fuse_request_send_nowait_locked(fc, req);
460 }
461 
462 /*
463  * Lock the request.  Up to the next unlock_request() there mustn't be
464  * anything that could cause a page-fault.  If the request was already
465  * aborted bail out.
466  */
467 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
468 {
469 	int err = 0;
470 	if (req) {
471 		spin_lock(&fc->lock);
472 		if (req->aborted)
473 			err = -ENOENT;
474 		else
475 			req->locked = 1;
476 		spin_unlock(&fc->lock);
477 	}
478 	return err;
479 }
480 
481 /*
482  * Unlock request.  If it was aborted during being locked, the
483  * requester thread is currently waiting for it to be unlocked, so
484  * wake it up.
485  */
486 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
487 {
488 	if (req) {
489 		spin_lock(&fc->lock);
490 		req->locked = 0;
491 		if (req->aborted)
492 			wake_up(&req->waitq);
493 		spin_unlock(&fc->lock);
494 	}
495 }
496 
497 struct fuse_copy_state {
498 	struct fuse_conn *fc;
499 	int write;
500 	struct fuse_req *req;
501 	const struct iovec *iov;
502 	unsigned long nr_segs;
503 	unsigned long seglen;
504 	unsigned long addr;
505 	struct page *pg;
506 	void *mapaddr;
507 	void *buf;
508 	unsigned len;
509 };
510 
511 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
512 			   int write, struct fuse_req *req,
513 			   const struct iovec *iov, unsigned long nr_segs)
514 {
515 	memset(cs, 0, sizeof(*cs));
516 	cs->fc = fc;
517 	cs->write = write;
518 	cs->req = req;
519 	cs->iov = iov;
520 	cs->nr_segs = nr_segs;
521 }
522 
523 /* Unmap and put previous page of userspace buffer */
524 static void fuse_copy_finish(struct fuse_copy_state *cs)
525 {
526 	if (cs->mapaddr) {
527 		kunmap_atomic(cs->mapaddr, KM_USER0);
528 		if (cs->write) {
529 			flush_dcache_page(cs->pg);
530 			set_page_dirty_lock(cs->pg);
531 		}
532 		put_page(cs->pg);
533 		cs->mapaddr = NULL;
534 	}
535 }
536 
537 /*
538  * Get another pagefull of userspace buffer, and map it to kernel
539  * address space, and lock request
540  */
541 static int fuse_copy_fill(struct fuse_copy_state *cs)
542 {
543 	unsigned long offset;
544 	int err;
545 
546 	unlock_request(cs->fc, cs->req);
547 	fuse_copy_finish(cs);
548 	if (!cs->seglen) {
549 		BUG_ON(!cs->nr_segs);
550 		cs->seglen = cs->iov[0].iov_len;
551 		cs->addr = (unsigned long) cs->iov[0].iov_base;
552 		cs->iov++;
553 		cs->nr_segs--;
554 	}
555 	down_read(&current->mm->mmap_sem);
556 	err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
557 			     &cs->pg, NULL);
558 	up_read(&current->mm->mmap_sem);
559 	if (err < 0)
560 		return err;
561 	BUG_ON(err != 1);
562 	offset = cs->addr % PAGE_SIZE;
563 	cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
564 	cs->buf = cs->mapaddr + offset;
565 	cs->len = min(PAGE_SIZE - offset, cs->seglen);
566 	cs->seglen -= cs->len;
567 	cs->addr += cs->len;
568 
569 	return lock_request(cs->fc, cs->req);
570 }
571 
572 /* Do as much copy to/from userspace buffer as we can */
573 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
574 {
575 	unsigned ncpy = min(*size, cs->len);
576 	if (val) {
577 		if (cs->write)
578 			memcpy(cs->buf, *val, ncpy);
579 		else
580 			memcpy(*val, cs->buf, ncpy);
581 		*val += ncpy;
582 	}
583 	*size -= ncpy;
584 	cs->len -= ncpy;
585 	cs->buf += ncpy;
586 	return ncpy;
587 }
588 
589 /*
590  * Copy a page in the request to/from the userspace buffer.  Must be
591  * done atomically
592  */
593 static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
594 			  unsigned offset, unsigned count, int zeroing)
595 {
596 	if (page && zeroing && count < PAGE_SIZE) {
597 		void *mapaddr = kmap_atomic(page, KM_USER1);
598 		memset(mapaddr, 0, PAGE_SIZE);
599 		kunmap_atomic(mapaddr, KM_USER1);
600 	}
601 	while (count) {
602 		if (!cs->len) {
603 			int err = fuse_copy_fill(cs);
604 			if (err)
605 				return err;
606 		}
607 		if (page) {
608 			void *mapaddr = kmap_atomic(page, KM_USER1);
609 			void *buf = mapaddr + offset;
610 			offset += fuse_copy_do(cs, &buf, &count);
611 			kunmap_atomic(mapaddr, KM_USER1);
612 		} else
613 			offset += fuse_copy_do(cs, NULL, &count);
614 	}
615 	if (page && !cs->write)
616 		flush_dcache_page(page);
617 	return 0;
618 }
619 
620 /* Copy pages in the request to/from userspace buffer */
621 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
622 			   int zeroing)
623 {
624 	unsigned i;
625 	struct fuse_req *req = cs->req;
626 	unsigned offset = req->page_offset;
627 	unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
628 
629 	for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
630 		struct page *page = req->pages[i];
631 		int err = fuse_copy_page(cs, page, offset, count, zeroing);
632 		if (err)
633 			return err;
634 
635 		nbytes -= count;
636 		count = min(nbytes, (unsigned) PAGE_SIZE);
637 		offset = 0;
638 	}
639 	return 0;
640 }
641 
642 /* Copy a single argument in the request to/from userspace buffer */
643 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
644 {
645 	while (size) {
646 		if (!cs->len) {
647 			int err = fuse_copy_fill(cs);
648 			if (err)
649 				return err;
650 		}
651 		fuse_copy_do(cs, &val, &size);
652 	}
653 	return 0;
654 }
655 
656 /* Copy request arguments to/from userspace buffer */
657 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
658 			  unsigned argpages, struct fuse_arg *args,
659 			  int zeroing)
660 {
661 	int err = 0;
662 	unsigned i;
663 
664 	for (i = 0; !err && i < numargs; i++)  {
665 		struct fuse_arg *arg = &args[i];
666 		if (i == numargs - 1 && argpages)
667 			err = fuse_copy_pages(cs, arg->size, zeroing);
668 		else
669 			err = fuse_copy_one(cs, arg->value, arg->size);
670 	}
671 	return err;
672 }
673 
674 static int request_pending(struct fuse_conn *fc)
675 {
676 	return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
677 }
678 
679 /* Wait until a request is available on the pending list */
680 static void request_wait(struct fuse_conn *fc)
681 __releases(&fc->lock)
682 __acquires(&fc->lock)
683 {
684 	DECLARE_WAITQUEUE(wait, current);
685 
686 	add_wait_queue_exclusive(&fc->waitq, &wait);
687 	while (fc->connected && !request_pending(fc)) {
688 		set_current_state(TASK_INTERRUPTIBLE);
689 		if (signal_pending(current))
690 			break;
691 
692 		spin_unlock(&fc->lock);
693 		schedule();
694 		spin_lock(&fc->lock);
695 	}
696 	set_current_state(TASK_RUNNING);
697 	remove_wait_queue(&fc->waitq, &wait);
698 }
699 
700 /*
701  * Transfer an interrupt request to userspace
702  *
703  * Unlike other requests this is assembled on demand, without a need
704  * to allocate a separate fuse_req structure.
705  *
706  * Called with fc->lock held, releases it
707  */
708 static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
709 			       const struct iovec *iov, unsigned long nr_segs)
710 __releases(&fc->lock)
711 {
712 	struct fuse_copy_state cs;
713 	struct fuse_in_header ih;
714 	struct fuse_interrupt_in arg;
715 	unsigned reqsize = sizeof(ih) + sizeof(arg);
716 	int err;
717 
718 	list_del_init(&req->intr_entry);
719 	req->intr_unique = fuse_get_unique(fc);
720 	memset(&ih, 0, sizeof(ih));
721 	memset(&arg, 0, sizeof(arg));
722 	ih.len = reqsize;
723 	ih.opcode = FUSE_INTERRUPT;
724 	ih.unique = req->intr_unique;
725 	arg.unique = req->in.h.unique;
726 
727 	spin_unlock(&fc->lock);
728 	if (iov_length(iov, nr_segs) < reqsize)
729 		return -EINVAL;
730 
731 	fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs);
732 	err = fuse_copy_one(&cs, &ih, sizeof(ih));
733 	if (!err)
734 		err = fuse_copy_one(&cs, &arg, sizeof(arg));
735 	fuse_copy_finish(&cs);
736 
737 	return err ? err : reqsize;
738 }
739 
740 /*
741  * Read a single request into the userspace filesystem's buffer.  This
742  * function waits until a request is available, then removes it from
743  * the pending list and copies request data to userspace buffer.  If
744  * no reply is needed (FORGET) or request has been aborted or there
745  * was an error during the copying then it's finished by calling
746  * request_end().  Otherwise add it to the processing list, and set
747  * the 'sent' flag.
748  */
749 static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
750 			      unsigned long nr_segs, loff_t pos)
751 {
752 	int err;
753 	struct fuse_req *req;
754 	struct fuse_in *in;
755 	struct fuse_copy_state cs;
756 	unsigned reqsize;
757 	struct file *file = iocb->ki_filp;
758 	struct fuse_conn *fc = fuse_get_conn(file);
759 	if (!fc)
760 		return -EPERM;
761 
762  restart:
763 	spin_lock(&fc->lock);
764 	err = -EAGAIN;
765 	if ((file->f_flags & O_NONBLOCK) && fc->connected &&
766 	    !request_pending(fc))
767 		goto err_unlock;
768 
769 	request_wait(fc);
770 	err = -ENODEV;
771 	if (!fc->connected)
772 		goto err_unlock;
773 	err = -ERESTARTSYS;
774 	if (!request_pending(fc))
775 		goto err_unlock;
776 
777 	if (!list_empty(&fc->interrupts)) {
778 		req = list_entry(fc->interrupts.next, struct fuse_req,
779 				 intr_entry);
780 		return fuse_read_interrupt(fc, req, iov, nr_segs);
781 	}
782 
783 	req = list_entry(fc->pending.next, struct fuse_req, list);
784 	req->state = FUSE_REQ_READING;
785 	list_move(&req->list, &fc->io);
786 
787 	in = &req->in;
788 	reqsize = in->h.len;
789 	/* If request is too large, reply with an error and restart the read */
790 	if (iov_length(iov, nr_segs) < reqsize) {
791 		req->out.h.error = -EIO;
792 		/* SETXATTR is special, since it may contain too large data */
793 		if (in->h.opcode == FUSE_SETXATTR)
794 			req->out.h.error = -E2BIG;
795 		request_end(fc, req);
796 		goto restart;
797 	}
798 	spin_unlock(&fc->lock);
799 	fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
800 	err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
801 	if (!err)
802 		err = fuse_copy_args(&cs, in->numargs, in->argpages,
803 				     (struct fuse_arg *) in->args, 0);
804 	fuse_copy_finish(&cs);
805 	spin_lock(&fc->lock);
806 	req->locked = 0;
807 	if (req->aborted) {
808 		request_end(fc, req);
809 		return -ENODEV;
810 	}
811 	if (err) {
812 		req->out.h.error = -EIO;
813 		request_end(fc, req);
814 		return err;
815 	}
816 	if (!req->isreply)
817 		request_end(fc, req);
818 	else {
819 		req->state = FUSE_REQ_SENT;
820 		list_move_tail(&req->list, &fc->processing);
821 		if (req->interrupted)
822 			queue_interrupt(fc, req);
823 		spin_unlock(&fc->lock);
824 	}
825 	return reqsize;
826 
827  err_unlock:
828 	spin_unlock(&fc->lock);
829 	return err;
830 }
831 
832 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
833 			    struct fuse_copy_state *cs)
834 {
835 	struct fuse_notify_poll_wakeup_out outarg;
836 	int err = -EINVAL;
837 
838 	if (size != sizeof(outarg))
839 		goto err;
840 
841 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
842 	if (err)
843 		goto err;
844 
845 	fuse_copy_finish(cs);
846 	return fuse_notify_poll_wakeup(fc, &outarg);
847 
848 err:
849 	fuse_copy_finish(cs);
850 	return err;
851 }
852 
853 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
854 				   struct fuse_copy_state *cs)
855 {
856 	struct fuse_notify_inval_inode_out outarg;
857 	int err = -EINVAL;
858 
859 	if (size != sizeof(outarg))
860 		goto err;
861 
862 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
863 	if (err)
864 		goto err;
865 	fuse_copy_finish(cs);
866 
867 	down_read(&fc->killsb);
868 	err = -ENOENT;
869 	if (fc->sb) {
870 		err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
871 					       outarg.off, outarg.len);
872 	}
873 	up_read(&fc->killsb);
874 	return err;
875 
876 err:
877 	fuse_copy_finish(cs);
878 	return err;
879 }
880 
881 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
882 				   struct fuse_copy_state *cs)
883 {
884 	struct fuse_notify_inval_entry_out outarg;
885 	int err = -ENOMEM;
886 	char *buf;
887 	struct qstr name;
888 
889 	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
890 	if (!buf)
891 		goto err;
892 
893 	err = -EINVAL;
894 	if (size < sizeof(outarg))
895 		goto err;
896 
897 	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
898 	if (err)
899 		goto err;
900 
901 	err = -ENAMETOOLONG;
902 	if (outarg.namelen > FUSE_NAME_MAX)
903 		goto err;
904 
905 	name.name = buf;
906 	name.len = outarg.namelen;
907 	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
908 	if (err)
909 		goto err;
910 	fuse_copy_finish(cs);
911 	buf[outarg.namelen] = 0;
912 	name.hash = full_name_hash(name.name, name.len);
913 
914 	down_read(&fc->killsb);
915 	err = -ENOENT;
916 	if (fc->sb)
917 		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
918 	up_read(&fc->killsb);
919 	kfree(buf);
920 	return err;
921 
922 err:
923 	kfree(buf);
924 	fuse_copy_finish(cs);
925 	return err;
926 }
927 
928 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
929 		       unsigned int size, struct fuse_copy_state *cs)
930 {
931 	switch (code) {
932 	case FUSE_NOTIFY_POLL:
933 		return fuse_notify_poll(fc, size, cs);
934 
935 	case FUSE_NOTIFY_INVAL_INODE:
936 		return fuse_notify_inval_inode(fc, size, cs);
937 
938 	case FUSE_NOTIFY_INVAL_ENTRY:
939 		return fuse_notify_inval_entry(fc, size, cs);
940 
941 	default:
942 		fuse_copy_finish(cs);
943 		return -EINVAL;
944 	}
945 }
946 
947 /* Look up request on processing list by unique ID */
948 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
949 {
950 	struct list_head *entry;
951 
952 	list_for_each(entry, &fc->processing) {
953 		struct fuse_req *req;
954 		req = list_entry(entry, struct fuse_req, list);
955 		if (req->in.h.unique == unique || req->intr_unique == unique)
956 			return req;
957 	}
958 	return NULL;
959 }
960 
961 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
962 			 unsigned nbytes)
963 {
964 	unsigned reqsize = sizeof(struct fuse_out_header);
965 
966 	if (out->h.error)
967 		return nbytes != reqsize ? -EINVAL : 0;
968 
969 	reqsize += len_args(out->numargs, out->args);
970 
971 	if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
972 		return -EINVAL;
973 	else if (reqsize > nbytes) {
974 		struct fuse_arg *lastarg = &out->args[out->numargs-1];
975 		unsigned diffsize = reqsize - nbytes;
976 		if (diffsize > lastarg->size)
977 			return -EINVAL;
978 		lastarg->size -= diffsize;
979 	}
980 	return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
981 			      out->page_zeroing);
982 }
983 
984 /*
985  * Write a single reply to a request.  First the header is copied from
986  * the write buffer.  The request is then searched on the processing
987  * list by the unique ID found in the header.  If found, then remove
988  * it from the list and copy the rest of the buffer to the request.
989  * The request is finished by calling request_end()
990  */
991 static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
992 			       unsigned long nr_segs, loff_t pos)
993 {
994 	int err;
995 	size_t nbytes = iov_length(iov, nr_segs);
996 	struct fuse_req *req;
997 	struct fuse_out_header oh;
998 	struct fuse_copy_state cs;
999 	struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1000 	if (!fc)
1001 		return -EPERM;
1002 
1003 	fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
1004 	if (nbytes < sizeof(struct fuse_out_header))
1005 		return -EINVAL;
1006 
1007 	err = fuse_copy_one(&cs, &oh, sizeof(oh));
1008 	if (err)
1009 		goto err_finish;
1010 
1011 	err = -EINVAL;
1012 	if (oh.len != nbytes)
1013 		goto err_finish;
1014 
1015 	/*
1016 	 * Zero oh.unique indicates unsolicited notification message
1017 	 * and error contains notification code.
1018 	 */
1019 	if (!oh.unique) {
1020 		err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs);
1021 		return err ? err : nbytes;
1022 	}
1023 
1024 	err = -EINVAL;
1025 	if (oh.error <= -1000 || oh.error > 0)
1026 		goto err_finish;
1027 
1028 	spin_lock(&fc->lock);
1029 	err = -ENOENT;
1030 	if (!fc->connected)
1031 		goto err_unlock;
1032 
1033 	req = request_find(fc, oh.unique);
1034 	if (!req)
1035 		goto err_unlock;
1036 
1037 	if (req->aborted) {
1038 		spin_unlock(&fc->lock);
1039 		fuse_copy_finish(&cs);
1040 		spin_lock(&fc->lock);
1041 		request_end(fc, req);
1042 		return -ENOENT;
1043 	}
1044 	/* Is it an interrupt reply? */
1045 	if (req->intr_unique == oh.unique) {
1046 		err = -EINVAL;
1047 		if (nbytes != sizeof(struct fuse_out_header))
1048 			goto err_unlock;
1049 
1050 		if (oh.error == -ENOSYS)
1051 			fc->no_interrupt = 1;
1052 		else if (oh.error == -EAGAIN)
1053 			queue_interrupt(fc, req);
1054 
1055 		spin_unlock(&fc->lock);
1056 		fuse_copy_finish(&cs);
1057 		return nbytes;
1058 	}
1059 
1060 	req->state = FUSE_REQ_WRITING;
1061 	list_move(&req->list, &fc->io);
1062 	req->out.h = oh;
1063 	req->locked = 1;
1064 	cs.req = req;
1065 	spin_unlock(&fc->lock);
1066 
1067 	err = copy_out_args(&cs, &req->out, nbytes);
1068 	fuse_copy_finish(&cs);
1069 
1070 	spin_lock(&fc->lock);
1071 	req->locked = 0;
1072 	if (!err) {
1073 		if (req->aborted)
1074 			err = -ENOENT;
1075 	} else if (!req->aborted)
1076 		req->out.h.error = -EIO;
1077 	request_end(fc, req);
1078 
1079 	return err ? err : nbytes;
1080 
1081  err_unlock:
1082 	spin_unlock(&fc->lock);
1083  err_finish:
1084 	fuse_copy_finish(&cs);
1085 	return err;
1086 }
1087 
1088 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1089 {
1090 	unsigned mask = POLLOUT | POLLWRNORM;
1091 	struct fuse_conn *fc = fuse_get_conn(file);
1092 	if (!fc)
1093 		return POLLERR;
1094 
1095 	poll_wait(file, &fc->waitq, wait);
1096 
1097 	spin_lock(&fc->lock);
1098 	if (!fc->connected)
1099 		mask = POLLERR;
1100 	else if (request_pending(fc))
1101 		mask |= POLLIN | POLLRDNORM;
1102 	spin_unlock(&fc->lock);
1103 
1104 	return mask;
1105 }
1106 
1107 /*
1108  * Abort all requests on the given list (pending or processing)
1109  *
1110  * This function releases and reacquires fc->lock
1111  */
1112 static void end_requests(struct fuse_conn *fc, struct list_head *head)
1113 __releases(&fc->lock)
1114 __acquires(&fc->lock)
1115 {
1116 	while (!list_empty(head)) {
1117 		struct fuse_req *req;
1118 		req = list_entry(head->next, struct fuse_req, list);
1119 		req->out.h.error = -ECONNABORTED;
1120 		request_end(fc, req);
1121 		spin_lock(&fc->lock);
1122 	}
1123 }
1124 
1125 /*
1126  * Abort requests under I/O
1127  *
1128  * The requests are set to aborted and finished, and the request
1129  * waiter is woken up.  This will make request_wait_answer() wait
1130  * until the request is unlocked and then return.
1131  *
1132  * If the request is asynchronous, then the end function needs to be
1133  * called after waiting for the request to be unlocked (if it was
1134  * locked).
1135  */
1136 static void end_io_requests(struct fuse_conn *fc)
1137 __releases(&fc->lock)
1138 __acquires(&fc->lock)
1139 {
1140 	while (!list_empty(&fc->io)) {
1141 		struct fuse_req *req =
1142 			list_entry(fc->io.next, struct fuse_req, list);
1143 		void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
1144 
1145 		req->aborted = 1;
1146 		req->out.h.error = -ECONNABORTED;
1147 		req->state = FUSE_REQ_FINISHED;
1148 		list_del_init(&req->list);
1149 		wake_up(&req->waitq);
1150 		if (end) {
1151 			req->end = NULL;
1152 			__fuse_get_request(req);
1153 			spin_unlock(&fc->lock);
1154 			wait_event(req->waitq, !req->locked);
1155 			end(fc, req);
1156 			fuse_put_request(fc, req);
1157 			spin_lock(&fc->lock);
1158 		}
1159 	}
1160 }
1161 
1162 /*
1163  * Abort all requests.
1164  *
1165  * Emergency exit in case of a malicious or accidental deadlock, or
1166  * just a hung filesystem.
1167  *
1168  * The same effect is usually achievable through killing the
1169  * filesystem daemon and all users of the filesystem.  The exception
1170  * is the combination of an asynchronous request and the tricky
1171  * deadlock (see Documentation/filesystems/fuse.txt).
1172  *
1173  * During the aborting, progression of requests from the pending and
1174  * processing lists onto the io list, and progression of new requests
1175  * onto the pending list is prevented by req->connected being false.
1176  *
1177  * Progression of requests under I/O to the processing list is
1178  * prevented by the req->aborted flag being true for these requests.
1179  * For this reason requests on the io list must be aborted first.
1180  */
1181 void fuse_abort_conn(struct fuse_conn *fc)
1182 {
1183 	spin_lock(&fc->lock);
1184 	if (fc->connected) {
1185 		fc->connected = 0;
1186 		fc->blocked = 0;
1187 		end_io_requests(fc);
1188 		end_requests(fc, &fc->pending);
1189 		end_requests(fc, &fc->processing);
1190 		wake_up_all(&fc->waitq);
1191 		wake_up_all(&fc->blocked_waitq);
1192 		kill_fasync(&fc->fasync, SIGIO, POLL_IN);
1193 	}
1194 	spin_unlock(&fc->lock);
1195 }
1196 EXPORT_SYMBOL_GPL(fuse_abort_conn);
1197 
1198 int fuse_dev_release(struct inode *inode, struct file *file)
1199 {
1200 	struct fuse_conn *fc = fuse_get_conn(file);
1201 	if (fc) {
1202 		spin_lock(&fc->lock);
1203 		fc->connected = 0;
1204 		end_requests(fc, &fc->pending);
1205 		end_requests(fc, &fc->processing);
1206 		spin_unlock(&fc->lock);
1207 		fuse_conn_put(fc);
1208 	}
1209 
1210 	return 0;
1211 }
1212 EXPORT_SYMBOL_GPL(fuse_dev_release);
1213 
1214 static int fuse_dev_fasync(int fd, struct file *file, int on)
1215 {
1216 	struct fuse_conn *fc = fuse_get_conn(file);
1217 	if (!fc)
1218 		return -EPERM;
1219 
1220 	/* No locking - fasync_helper does its own locking */
1221 	return fasync_helper(fd, file, on, &fc->fasync);
1222 }
1223 
1224 const struct file_operations fuse_dev_operations = {
1225 	.owner		= THIS_MODULE,
1226 	.llseek		= no_llseek,
1227 	.read		= do_sync_read,
1228 	.aio_read	= fuse_dev_read,
1229 	.write		= do_sync_write,
1230 	.aio_write	= fuse_dev_write,
1231 	.poll		= fuse_dev_poll,
1232 	.release	= fuse_dev_release,
1233 	.fasync		= fuse_dev_fasync,
1234 };
1235 EXPORT_SYMBOL_GPL(fuse_dev_operations);
1236 
1237 static struct miscdevice fuse_miscdevice = {
1238 	.minor = FUSE_MINOR,
1239 	.name  = "fuse",
1240 	.fops = &fuse_dev_operations,
1241 };
1242 
1243 int __init fuse_dev_init(void)
1244 {
1245 	int err = -ENOMEM;
1246 	fuse_req_cachep = kmem_cache_create("fuse_request",
1247 					    sizeof(struct fuse_req),
1248 					    0, 0, NULL);
1249 	if (!fuse_req_cachep)
1250 		goto out;
1251 
1252 	err = misc_register(&fuse_miscdevice);
1253 	if (err)
1254 		goto out_cache_clean;
1255 
1256 	return 0;
1257 
1258  out_cache_clean:
1259 	kmem_cache_destroy(fuse_req_cachep);
1260  out:
1261 	return err;
1262 }
1263 
1264 void fuse_dev_cleanup(void)
1265 {
1266 	misc_deregister(&fuse_miscdevice);
1267 	kmem_cache_destroy(fuse_req_cachep);
1268 }
1269