xref: /linux/fs/fuse/virtio_fs.c (revision 17cfcb68af3bc7d5e8ae08779b1853310a2949f3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * virtio-fs: Virtio Filesystem
4  * Copyright (C) 2018 Red Hat, Inc.
5  */
6 
7 #include <linux/fs.h>
8 #include <linux/module.h>
9 #include <linux/virtio.h>
10 #include <linux/virtio_fs.h>
11 #include <linux/delay.h>
12 #include <linux/fs_context.h>
13 #include <linux/highmem.h>
14 #include "fuse_i.h"
15 
16 /* List of virtio-fs device instances and a lock for the list. Also provides
17  * mutual exclusion in device removal and mounting path
18  */
19 static DEFINE_MUTEX(virtio_fs_mutex);
20 static LIST_HEAD(virtio_fs_instances);
21 
22 enum {
23 	VQ_HIPRIO,
24 	VQ_REQUEST
25 };
26 
27 /* Per-virtqueue state */
28 struct virtio_fs_vq {
29 	spinlock_t lock;
30 	struct virtqueue *vq;     /* protected by ->lock */
31 	struct work_struct done_work;
32 	struct list_head queued_reqs;
33 	struct delayed_work dispatch_work;
34 	struct fuse_dev *fud;
35 	bool connected;
36 	long in_flight;
37 	char name[24];
38 } ____cacheline_aligned_in_smp;
39 
40 /* A virtio-fs device instance */
41 struct virtio_fs {
42 	struct kref refcount;
43 	struct list_head list;    /* on virtio_fs_instances */
44 	char *tag;
45 	struct virtio_fs_vq *vqs;
46 	unsigned int nvqs;               /* number of virtqueues */
47 	unsigned int num_request_queues; /* number of request queues */
48 };
49 
50 struct virtio_fs_forget {
51 	struct fuse_in_header ih;
52 	struct fuse_forget_in arg;
53 	/* This request can be temporarily queued on virt queue */
54 	struct list_head list;
55 };
56 
57 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
58 {
59 	struct virtio_fs *fs = vq->vdev->priv;
60 
61 	return &fs->vqs[vq->index];
62 }
63 
64 static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
65 {
66 	return &vq_to_fsvq(vq)->fud->pq;
67 }
68 
69 static void release_virtio_fs_obj(struct kref *ref)
70 {
71 	struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
72 
73 	kfree(vfs->vqs);
74 	kfree(vfs);
75 }
76 
77 /* Make sure virtiofs_mutex is held */
78 static void virtio_fs_put(struct virtio_fs *fs)
79 {
80 	kref_put(&fs->refcount, release_virtio_fs_obj);
81 }
82 
83 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
84 {
85 	struct virtio_fs *vfs = fiq->priv;
86 
87 	mutex_lock(&virtio_fs_mutex);
88 	virtio_fs_put(vfs);
89 	mutex_unlock(&virtio_fs_mutex);
90 }
91 
92 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
93 {
94 	WARN_ON(fsvq->in_flight < 0);
95 
96 	/* Wait for in flight requests to finish.*/
97 	while (1) {
98 		spin_lock(&fsvq->lock);
99 		if (!fsvq->in_flight) {
100 			spin_unlock(&fsvq->lock);
101 			break;
102 		}
103 		spin_unlock(&fsvq->lock);
104 		/* TODO use completion instead of timeout */
105 		usleep_range(1000, 2000);
106 	}
107 
108 	flush_work(&fsvq->done_work);
109 	flush_delayed_work(&fsvq->dispatch_work);
110 }
111 
112 static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq)
113 {
114 	struct virtio_fs_forget *forget;
115 
116 	spin_lock(&fsvq->lock);
117 	while (1) {
118 		forget = list_first_entry_or_null(&fsvq->queued_reqs,
119 						struct virtio_fs_forget, list);
120 		if (!forget)
121 			break;
122 		list_del(&forget->list);
123 		kfree(forget);
124 	}
125 	spin_unlock(&fsvq->lock);
126 }
127 
128 static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
129 {
130 	struct virtio_fs_vq *fsvq;
131 	int i;
132 
133 	for (i = 0; i < fs->nvqs; i++) {
134 		fsvq = &fs->vqs[i];
135 		if (i == VQ_HIPRIO)
136 			drain_hiprio_queued_reqs(fsvq);
137 
138 		virtio_fs_drain_queue(fsvq);
139 	}
140 }
141 
142 static void virtio_fs_start_all_queues(struct virtio_fs *fs)
143 {
144 	struct virtio_fs_vq *fsvq;
145 	int i;
146 
147 	for (i = 0; i < fs->nvqs; i++) {
148 		fsvq = &fs->vqs[i];
149 		spin_lock(&fsvq->lock);
150 		fsvq->connected = true;
151 		spin_unlock(&fsvq->lock);
152 	}
153 }
154 
155 /* Add a new instance to the list or return -EEXIST if tag name exists*/
156 static int virtio_fs_add_instance(struct virtio_fs *fs)
157 {
158 	struct virtio_fs *fs2;
159 	bool duplicate = false;
160 
161 	mutex_lock(&virtio_fs_mutex);
162 
163 	list_for_each_entry(fs2, &virtio_fs_instances, list) {
164 		if (strcmp(fs->tag, fs2->tag) == 0)
165 			duplicate = true;
166 	}
167 
168 	if (!duplicate)
169 		list_add_tail(&fs->list, &virtio_fs_instances);
170 
171 	mutex_unlock(&virtio_fs_mutex);
172 
173 	if (duplicate)
174 		return -EEXIST;
175 	return 0;
176 }
177 
178 /* Return the virtio_fs with a given tag, or NULL */
179 static struct virtio_fs *virtio_fs_find_instance(const char *tag)
180 {
181 	struct virtio_fs *fs;
182 
183 	mutex_lock(&virtio_fs_mutex);
184 
185 	list_for_each_entry(fs, &virtio_fs_instances, list) {
186 		if (strcmp(fs->tag, tag) == 0) {
187 			kref_get(&fs->refcount);
188 			goto found;
189 		}
190 	}
191 
192 	fs = NULL; /* not found */
193 
194 found:
195 	mutex_unlock(&virtio_fs_mutex);
196 
197 	return fs;
198 }
199 
200 static void virtio_fs_free_devs(struct virtio_fs *fs)
201 {
202 	unsigned int i;
203 
204 	for (i = 0; i < fs->nvqs; i++) {
205 		struct virtio_fs_vq *fsvq = &fs->vqs[i];
206 
207 		if (!fsvq->fud)
208 			continue;
209 
210 		fuse_dev_free(fsvq->fud);
211 		fsvq->fud = NULL;
212 	}
213 }
214 
215 /* Read filesystem name from virtio config into fs->tag (must kfree()). */
216 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
217 {
218 	char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
219 	char *end;
220 	size_t len;
221 
222 	virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
223 			   &tag_buf, sizeof(tag_buf));
224 	end = memchr(tag_buf, '\0', sizeof(tag_buf));
225 	if (end == tag_buf)
226 		return -EINVAL; /* empty tag */
227 	if (!end)
228 		end = &tag_buf[sizeof(tag_buf)];
229 
230 	len = end - tag_buf;
231 	fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
232 	if (!fs->tag)
233 		return -ENOMEM;
234 	memcpy(fs->tag, tag_buf, len);
235 	fs->tag[len] = '\0';
236 	return 0;
237 }
238 
239 /* Work function for hiprio completion */
240 static void virtio_fs_hiprio_done_work(struct work_struct *work)
241 {
242 	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
243 						 done_work);
244 	struct virtqueue *vq = fsvq->vq;
245 
246 	/* Free completed FUSE_FORGET requests */
247 	spin_lock(&fsvq->lock);
248 	do {
249 		unsigned int len;
250 		void *req;
251 
252 		virtqueue_disable_cb(vq);
253 
254 		while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
255 			kfree(req);
256 			fsvq->in_flight--;
257 		}
258 	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
259 	spin_unlock(&fsvq->lock);
260 }
261 
262 static void virtio_fs_dummy_dispatch_work(struct work_struct *work)
263 {
264 }
265 
266 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
267 {
268 	struct virtio_fs_forget *forget;
269 	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
270 						 dispatch_work.work);
271 	struct virtqueue *vq = fsvq->vq;
272 	struct scatterlist sg;
273 	struct scatterlist *sgs[] = {&sg};
274 	bool notify;
275 	int ret;
276 
277 	pr_debug("virtio-fs: worker %s called.\n", __func__);
278 	while (1) {
279 		spin_lock(&fsvq->lock);
280 		forget = list_first_entry_or_null(&fsvq->queued_reqs,
281 					struct virtio_fs_forget, list);
282 		if (!forget) {
283 			spin_unlock(&fsvq->lock);
284 			return;
285 		}
286 
287 		list_del(&forget->list);
288 		if (!fsvq->connected) {
289 			spin_unlock(&fsvq->lock);
290 			kfree(forget);
291 			continue;
292 		}
293 
294 		sg_init_one(&sg, forget, sizeof(*forget));
295 
296 		/* Enqueue the request */
297 		dev_dbg(&vq->vdev->dev, "%s\n", __func__);
298 		ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
299 		if (ret < 0) {
300 			if (ret == -ENOMEM || ret == -ENOSPC) {
301 				pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
302 					 ret);
303 				list_add_tail(&forget->list,
304 						&fsvq->queued_reqs);
305 				schedule_delayed_work(&fsvq->dispatch_work,
306 						msecs_to_jiffies(1));
307 			} else {
308 				pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
309 					 ret);
310 				kfree(forget);
311 			}
312 			spin_unlock(&fsvq->lock);
313 			return;
314 		}
315 
316 		fsvq->in_flight++;
317 		notify = virtqueue_kick_prepare(vq);
318 		spin_unlock(&fsvq->lock);
319 
320 		if (notify)
321 			virtqueue_notify(vq);
322 		pr_debug("virtio-fs: worker %s dispatched one forget request.\n",
323 			 __func__);
324 	}
325 }
326 
327 /* Allocate and copy args into req->argbuf */
328 static int copy_args_to_argbuf(struct fuse_req *req)
329 {
330 	struct fuse_args *args = req->args;
331 	unsigned int offset = 0;
332 	unsigned int num_in;
333 	unsigned int num_out;
334 	unsigned int len;
335 	unsigned int i;
336 
337 	num_in = args->in_numargs - args->in_pages;
338 	num_out = args->out_numargs - args->out_pages;
339 	len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
340 	      fuse_len_args(num_out, args->out_args);
341 
342 	req->argbuf = kmalloc(len, GFP_ATOMIC);
343 	if (!req->argbuf)
344 		return -ENOMEM;
345 
346 	for (i = 0; i < num_in; i++) {
347 		memcpy(req->argbuf + offset,
348 		       args->in_args[i].value,
349 		       args->in_args[i].size);
350 		offset += args->in_args[i].size;
351 	}
352 
353 	return 0;
354 }
355 
356 /* Copy args out of and free req->argbuf */
357 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
358 {
359 	unsigned int remaining;
360 	unsigned int offset;
361 	unsigned int num_in;
362 	unsigned int num_out;
363 	unsigned int i;
364 
365 	remaining = req->out.h.len - sizeof(req->out.h);
366 	num_in = args->in_numargs - args->in_pages;
367 	num_out = args->out_numargs - args->out_pages;
368 	offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
369 
370 	for (i = 0; i < num_out; i++) {
371 		unsigned int argsize = args->out_args[i].size;
372 
373 		if (args->out_argvar &&
374 		    i == args->out_numargs - 1 &&
375 		    argsize > remaining) {
376 			argsize = remaining;
377 		}
378 
379 		memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
380 		offset += argsize;
381 
382 		if (i != args->out_numargs - 1)
383 			remaining -= argsize;
384 	}
385 
386 	/* Store the actual size of the variable-length arg */
387 	if (args->out_argvar)
388 		args->out_args[args->out_numargs - 1].size = remaining;
389 
390 	kfree(req->argbuf);
391 	req->argbuf = NULL;
392 }
393 
394 /* Work function for request completion */
395 static void virtio_fs_requests_done_work(struct work_struct *work)
396 {
397 	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
398 						 done_work);
399 	struct fuse_pqueue *fpq = &fsvq->fud->pq;
400 	struct fuse_conn *fc = fsvq->fud->fc;
401 	struct virtqueue *vq = fsvq->vq;
402 	struct fuse_req *req;
403 	struct fuse_args_pages *ap;
404 	struct fuse_req *next;
405 	struct fuse_args *args;
406 	unsigned int len, i, thislen;
407 	struct page *page;
408 	LIST_HEAD(reqs);
409 
410 	/* Collect completed requests off the virtqueue */
411 	spin_lock(&fsvq->lock);
412 	do {
413 		virtqueue_disable_cb(vq);
414 
415 		while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
416 			spin_lock(&fpq->lock);
417 			list_move_tail(&req->list, &reqs);
418 			spin_unlock(&fpq->lock);
419 		}
420 	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
421 	spin_unlock(&fsvq->lock);
422 
423 	/* End requests */
424 	list_for_each_entry_safe(req, next, &reqs, list) {
425 		/*
426 		 * TODO verify that server properly follows FUSE protocol
427 		 * (oh.uniq, oh.len)
428 		 */
429 		args = req->args;
430 		copy_args_from_argbuf(args, req);
431 
432 		if (args->out_pages && args->page_zeroing) {
433 			len = args->out_args[args->out_numargs - 1].size;
434 			ap = container_of(args, typeof(*ap), args);
435 			for (i = 0; i < ap->num_pages; i++) {
436 				thislen = ap->descs[i].length;
437 				if (len < thislen) {
438 					WARN_ON(ap->descs[i].offset);
439 					page = ap->pages[i];
440 					zero_user_segment(page, len, thislen);
441 					len = 0;
442 				} else {
443 					len -= thislen;
444 				}
445 			}
446 		}
447 
448 		spin_lock(&fpq->lock);
449 		clear_bit(FR_SENT, &req->flags);
450 		list_del_init(&req->list);
451 		spin_unlock(&fpq->lock);
452 
453 		fuse_request_end(fc, req);
454 		spin_lock(&fsvq->lock);
455 		fsvq->in_flight--;
456 		spin_unlock(&fsvq->lock);
457 	}
458 }
459 
460 /* Virtqueue interrupt handler */
461 static void virtio_fs_vq_done(struct virtqueue *vq)
462 {
463 	struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
464 
465 	dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
466 
467 	schedule_work(&fsvq->done_work);
468 }
469 
470 /* Initialize virtqueues */
471 static int virtio_fs_setup_vqs(struct virtio_device *vdev,
472 			       struct virtio_fs *fs)
473 {
474 	struct virtqueue **vqs;
475 	vq_callback_t **callbacks;
476 	const char **names;
477 	unsigned int i;
478 	int ret = 0;
479 
480 	virtio_cread(vdev, struct virtio_fs_config, num_request_queues,
481 		     &fs->num_request_queues);
482 	if (fs->num_request_queues == 0)
483 		return -EINVAL;
484 
485 	fs->nvqs = 1 + fs->num_request_queues;
486 	fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
487 	if (!fs->vqs)
488 		return -ENOMEM;
489 
490 	vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
491 	callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
492 					GFP_KERNEL);
493 	names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
494 	if (!vqs || !callbacks || !names) {
495 		ret = -ENOMEM;
496 		goto out;
497 	}
498 
499 	callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
500 	snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
501 			"hiprio");
502 	names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
503 	INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
504 	INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
505 	INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
506 			virtio_fs_hiprio_dispatch_work);
507 	spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
508 
509 	/* Initialize the requests virtqueues */
510 	for (i = VQ_REQUEST; i < fs->nvqs; i++) {
511 		spin_lock_init(&fs->vqs[i].lock);
512 		INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
513 		INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
514 					virtio_fs_dummy_dispatch_work);
515 		INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
516 		snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
517 			 "requests.%u", i - VQ_REQUEST);
518 		callbacks[i] = virtio_fs_vq_done;
519 		names[i] = fs->vqs[i].name;
520 	}
521 
522 	ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
523 	if (ret < 0)
524 		goto out;
525 
526 	for (i = 0; i < fs->nvqs; i++)
527 		fs->vqs[i].vq = vqs[i];
528 
529 	virtio_fs_start_all_queues(fs);
530 out:
531 	kfree(names);
532 	kfree(callbacks);
533 	kfree(vqs);
534 	if (ret)
535 		kfree(fs->vqs);
536 	return ret;
537 }
538 
539 /* Free virtqueues (device must already be reset) */
540 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
541 				  struct virtio_fs *fs)
542 {
543 	vdev->config->del_vqs(vdev);
544 }
545 
546 static int virtio_fs_probe(struct virtio_device *vdev)
547 {
548 	struct virtio_fs *fs;
549 	int ret;
550 
551 	fs = kzalloc(sizeof(*fs), GFP_KERNEL);
552 	if (!fs)
553 		return -ENOMEM;
554 	kref_init(&fs->refcount);
555 	vdev->priv = fs;
556 
557 	ret = virtio_fs_read_tag(vdev, fs);
558 	if (ret < 0)
559 		goto out;
560 
561 	ret = virtio_fs_setup_vqs(vdev, fs);
562 	if (ret < 0)
563 		goto out;
564 
565 	/* TODO vq affinity */
566 
567 	/* Bring the device online in case the filesystem is mounted and
568 	 * requests need to be sent before we return.
569 	 */
570 	virtio_device_ready(vdev);
571 
572 	ret = virtio_fs_add_instance(fs);
573 	if (ret < 0)
574 		goto out_vqs;
575 
576 	return 0;
577 
578 out_vqs:
579 	vdev->config->reset(vdev);
580 	virtio_fs_cleanup_vqs(vdev, fs);
581 
582 out:
583 	vdev->priv = NULL;
584 	kfree(fs);
585 	return ret;
586 }
587 
588 static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
589 {
590 	struct virtio_fs_vq *fsvq;
591 	int i;
592 
593 	for (i = 0; i < fs->nvqs; i++) {
594 		fsvq = &fs->vqs[i];
595 		spin_lock(&fsvq->lock);
596 		fsvq->connected = false;
597 		spin_unlock(&fsvq->lock);
598 	}
599 }
600 
601 static void virtio_fs_remove(struct virtio_device *vdev)
602 {
603 	struct virtio_fs *fs = vdev->priv;
604 
605 	mutex_lock(&virtio_fs_mutex);
606 	/* This device is going away. No one should get new reference */
607 	list_del_init(&fs->list);
608 	virtio_fs_stop_all_queues(fs);
609 	virtio_fs_drain_all_queues(fs);
610 	vdev->config->reset(vdev);
611 	virtio_fs_cleanup_vqs(vdev, fs);
612 
613 	vdev->priv = NULL;
614 	/* Put device reference on virtio_fs object */
615 	virtio_fs_put(fs);
616 	mutex_unlock(&virtio_fs_mutex);
617 }
618 
619 #ifdef CONFIG_PM_SLEEP
620 static int virtio_fs_freeze(struct virtio_device *vdev)
621 {
622 	/* TODO need to save state here */
623 	pr_warn("virtio-fs: suspend/resume not yet supported\n");
624 	return -EOPNOTSUPP;
625 }
626 
627 static int virtio_fs_restore(struct virtio_device *vdev)
628 {
629 	 /* TODO need to restore state here */
630 	return 0;
631 }
632 #endif /* CONFIG_PM_SLEEP */
633 
634 const static struct virtio_device_id id_table[] = {
635 	{ VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
636 	{},
637 };
638 
639 const static unsigned int feature_table[] = {};
640 
641 static struct virtio_driver virtio_fs_driver = {
642 	.driver.name		= KBUILD_MODNAME,
643 	.driver.owner		= THIS_MODULE,
644 	.id_table		= id_table,
645 	.feature_table		= feature_table,
646 	.feature_table_size	= ARRAY_SIZE(feature_table),
647 	.probe			= virtio_fs_probe,
648 	.remove			= virtio_fs_remove,
649 #ifdef CONFIG_PM_SLEEP
650 	.freeze			= virtio_fs_freeze,
651 	.restore		= virtio_fs_restore,
652 #endif
653 };
654 
655 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
656 __releases(fiq->lock)
657 {
658 	struct fuse_forget_link *link;
659 	struct virtio_fs_forget *forget;
660 	struct scatterlist sg;
661 	struct scatterlist *sgs[] = {&sg};
662 	struct virtio_fs *fs;
663 	struct virtqueue *vq;
664 	struct virtio_fs_vq *fsvq;
665 	bool notify;
666 	u64 unique;
667 	int ret;
668 
669 	link = fuse_dequeue_forget(fiq, 1, NULL);
670 	unique = fuse_get_unique(fiq);
671 
672 	fs = fiq->priv;
673 	fsvq = &fs->vqs[VQ_HIPRIO];
674 	spin_unlock(&fiq->lock);
675 
676 	/* Allocate a buffer for the request */
677 	forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
678 
679 	forget->ih = (struct fuse_in_header){
680 		.opcode = FUSE_FORGET,
681 		.nodeid = link->forget_one.nodeid,
682 		.unique = unique,
683 		.len = sizeof(*forget),
684 	};
685 	forget->arg = (struct fuse_forget_in){
686 		.nlookup = link->forget_one.nlookup,
687 	};
688 
689 	sg_init_one(&sg, forget, sizeof(*forget));
690 
691 	/* Enqueue the request */
692 	spin_lock(&fsvq->lock);
693 
694 	if (!fsvq->connected) {
695 		kfree(forget);
696 		spin_unlock(&fsvq->lock);
697 		goto out;
698 	}
699 
700 	vq = fsvq->vq;
701 	dev_dbg(&vq->vdev->dev, "%s\n", __func__);
702 
703 	ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
704 	if (ret < 0) {
705 		if (ret == -ENOMEM || ret == -ENOSPC) {
706 			pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n",
707 				 ret);
708 			list_add_tail(&forget->list, &fsvq->queued_reqs);
709 			schedule_delayed_work(&fsvq->dispatch_work,
710 					msecs_to_jiffies(1));
711 		} else {
712 			pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
713 				 ret);
714 			kfree(forget);
715 		}
716 		spin_unlock(&fsvq->lock);
717 		goto out;
718 	}
719 
720 	fsvq->in_flight++;
721 	notify = virtqueue_kick_prepare(vq);
722 
723 	spin_unlock(&fsvq->lock);
724 
725 	if (notify)
726 		virtqueue_notify(vq);
727 out:
728 	kfree(link);
729 }
730 
731 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
732 __releases(fiq->lock)
733 {
734 	/*
735 	 * TODO interrupts.
736 	 *
737 	 * Normal fs operations on a local filesystems aren't interruptible.
738 	 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
739 	 * with shared lock between host and guest.
740 	 */
741 	spin_unlock(&fiq->lock);
742 }
743 
744 /* Return the number of scatter-gather list elements required */
745 static unsigned int sg_count_fuse_req(struct fuse_req *req)
746 {
747 	struct fuse_args *args = req->args;
748 	struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
749 	unsigned int total_sgs = 1 /* fuse_in_header */;
750 
751 	if (args->in_numargs - args->in_pages)
752 		total_sgs += 1;
753 
754 	if (args->in_pages)
755 		total_sgs += ap->num_pages;
756 
757 	if (!test_bit(FR_ISREPLY, &req->flags))
758 		return total_sgs;
759 
760 	total_sgs += 1 /* fuse_out_header */;
761 
762 	if (args->out_numargs - args->out_pages)
763 		total_sgs += 1;
764 
765 	if (args->out_pages)
766 		total_sgs += ap->num_pages;
767 
768 	return total_sgs;
769 }
770 
771 /* Add pages to scatter-gather list and return number of elements used */
772 static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
773 				       struct page **pages,
774 				       struct fuse_page_desc *page_descs,
775 				       unsigned int num_pages,
776 				       unsigned int total_len)
777 {
778 	unsigned int i;
779 	unsigned int this_len;
780 
781 	for (i = 0; i < num_pages && total_len; i++) {
782 		sg_init_table(&sg[i], 1);
783 		this_len =  min(page_descs[i].length, total_len);
784 		sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
785 		total_len -= this_len;
786 	}
787 
788 	return i;
789 }
790 
791 /* Add args to scatter-gather list and return number of elements used */
792 static unsigned int sg_init_fuse_args(struct scatterlist *sg,
793 				      struct fuse_req *req,
794 				      struct fuse_arg *args,
795 				      unsigned int numargs,
796 				      bool argpages,
797 				      void *argbuf,
798 				      unsigned int *len_used)
799 {
800 	struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
801 	unsigned int total_sgs = 0;
802 	unsigned int len;
803 
804 	len = fuse_len_args(numargs - argpages, args);
805 	if (len)
806 		sg_init_one(&sg[total_sgs++], argbuf, len);
807 
808 	if (argpages)
809 		total_sgs += sg_init_fuse_pages(&sg[total_sgs],
810 						ap->pages, ap->descs,
811 						ap->num_pages,
812 						args[numargs - 1].size);
813 
814 	if (len_used)
815 		*len_used = len;
816 
817 	return total_sgs;
818 }
819 
820 /* Add a request to a virtqueue and kick the device */
821 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
822 				 struct fuse_req *req)
823 {
824 	/* requests need at least 4 elements */
825 	struct scatterlist *stack_sgs[6];
826 	struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
827 	struct scatterlist **sgs = stack_sgs;
828 	struct scatterlist *sg = stack_sg;
829 	struct virtqueue *vq;
830 	struct fuse_args *args = req->args;
831 	unsigned int argbuf_used = 0;
832 	unsigned int out_sgs = 0;
833 	unsigned int in_sgs = 0;
834 	unsigned int total_sgs;
835 	unsigned int i;
836 	int ret;
837 	bool notify;
838 
839 	/* Does the sglist fit on the stack? */
840 	total_sgs = sg_count_fuse_req(req);
841 	if (total_sgs > ARRAY_SIZE(stack_sgs)) {
842 		sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
843 		sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
844 		if (!sgs || !sg) {
845 			ret = -ENOMEM;
846 			goto out;
847 		}
848 	}
849 
850 	/* Use a bounce buffer since stack args cannot be mapped */
851 	ret = copy_args_to_argbuf(req);
852 	if (ret < 0)
853 		goto out;
854 
855 	/* Request elements */
856 	sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
857 	out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
858 				     (struct fuse_arg *)args->in_args,
859 				     args->in_numargs, args->in_pages,
860 				     req->argbuf, &argbuf_used);
861 
862 	/* Reply elements */
863 	if (test_bit(FR_ISREPLY, &req->flags)) {
864 		sg_init_one(&sg[out_sgs + in_sgs++],
865 			    &req->out.h, sizeof(req->out.h));
866 		in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
867 					    args->out_args, args->out_numargs,
868 					    args->out_pages,
869 					    req->argbuf + argbuf_used, NULL);
870 	}
871 
872 	WARN_ON(out_sgs + in_sgs != total_sgs);
873 
874 	for (i = 0; i < total_sgs; i++)
875 		sgs[i] = &sg[i];
876 
877 	spin_lock(&fsvq->lock);
878 
879 	if (!fsvq->connected) {
880 		spin_unlock(&fsvq->lock);
881 		ret = -ENOTCONN;
882 		goto out;
883 	}
884 
885 	vq = fsvq->vq;
886 	ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
887 	if (ret < 0) {
888 		spin_unlock(&fsvq->lock);
889 		goto out;
890 	}
891 
892 	fsvq->in_flight++;
893 	notify = virtqueue_kick_prepare(vq);
894 
895 	spin_unlock(&fsvq->lock);
896 
897 	if (notify)
898 		virtqueue_notify(vq);
899 
900 out:
901 	if (ret < 0 && req->argbuf) {
902 		kfree(req->argbuf);
903 		req->argbuf = NULL;
904 	}
905 	if (sgs != stack_sgs) {
906 		kfree(sgs);
907 		kfree(sg);
908 	}
909 
910 	return ret;
911 }
912 
913 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
914 __releases(fiq->lock)
915 {
916 	unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
917 	struct virtio_fs *fs;
918 	struct fuse_conn *fc;
919 	struct fuse_req *req;
920 	struct fuse_pqueue *fpq;
921 	int ret;
922 
923 	WARN_ON(list_empty(&fiq->pending));
924 	req = list_last_entry(&fiq->pending, struct fuse_req, list);
925 	clear_bit(FR_PENDING, &req->flags);
926 	list_del_init(&req->list);
927 	WARN_ON(!list_empty(&fiq->pending));
928 	spin_unlock(&fiq->lock);
929 
930 	fs = fiq->priv;
931 	fc = fs->vqs[queue_id].fud->fc;
932 
933 	pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
934 		  __func__, req->in.h.opcode, req->in.h.unique,
935 		 req->in.h.nodeid, req->in.h.len,
936 		 fuse_len_args(req->args->out_numargs, req->args->out_args));
937 
938 	fpq = &fs->vqs[queue_id].fud->pq;
939 	spin_lock(&fpq->lock);
940 	if (!fpq->connected) {
941 		spin_unlock(&fpq->lock);
942 		req->out.h.error = -ENODEV;
943 		pr_err("virtio-fs: %s disconnected\n", __func__);
944 		fuse_request_end(fc, req);
945 		return;
946 	}
947 	list_add_tail(&req->list, fpq->processing);
948 	spin_unlock(&fpq->lock);
949 	set_bit(FR_SENT, &req->flags);
950 	/* matches barrier in request_wait_answer() */
951 	smp_mb__after_atomic();
952 
953 retry:
954 	ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
955 	if (ret < 0) {
956 		if (ret == -ENOMEM || ret == -ENOSPC) {
957 			/* Virtqueue full. Retry submission */
958 			/* TODO use completion instead of timeout */
959 			usleep_range(20, 30);
960 			goto retry;
961 		}
962 		req->out.h.error = ret;
963 		pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
964 		spin_lock(&fpq->lock);
965 		clear_bit(FR_SENT, &req->flags);
966 		list_del_init(&req->list);
967 		spin_unlock(&fpq->lock);
968 		fuse_request_end(fc, req);
969 		return;
970 	}
971 }
972 
973 const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
974 	.wake_forget_and_unlock		= virtio_fs_wake_forget_and_unlock,
975 	.wake_interrupt_and_unlock	= virtio_fs_wake_interrupt_and_unlock,
976 	.wake_pending_and_unlock	= virtio_fs_wake_pending_and_unlock,
977 	.release			= virtio_fs_fiq_release,
978 };
979 
980 static int virtio_fs_fill_super(struct super_block *sb)
981 {
982 	struct fuse_conn *fc = get_fuse_conn_super(sb);
983 	struct virtio_fs *fs = fc->iq.priv;
984 	unsigned int i;
985 	int err;
986 	struct fuse_fs_context ctx = {
987 		.rootmode = S_IFDIR,
988 		.default_permissions = 1,
989 		.allow_other = 1,
990 		.max_read = UINT_MAX,
991 		.blksize = 512,
992 		.destroy = true,
993 		.no_control = true,
994 		.no_force_umount = true,
995 	};
996 
997 	mutex_lock(&virtio_fs_mutex);
998 
999 	/* After holding mutex, make sure virtiofs device is still there.
1000 	 * Though we are holding a reference to it, drive ->remove might
1001 	 * still have cleaned up virtual queues. In that case bail out.
1002 	 */
1003 	err = -EINVAL;
1004 	if (list_empty(&fs->list)) {
1005 		pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
1006 		goto err;
1007 	}
1008 
1009 	err = -ENOMEM;
1010 	/* Allocate fuse_dev for hiprio and notification queues */
1011 	for (i = 0; i < VQ_REQUEST; i++) {
1012 		struct virtio_fs_vq *fsvq = &fs->vqs[i];
1013 
1014 		fsvq->fud = fuse_dev_alloc();
1015 		if (!fsvq->fud)
1016 			goto err_free_fuse_devs;
1017 	}
1018 
1019 	ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
1020 	err = fuse_fill_super_common(sb, &ctx);
1021 	if (err < 0)
1022 		goto err_free_fuse_devs;
1023 
1024 	fc = fs->vqs[VQ_REQUEST].fud->fc;
1025 
1026 	for (i = 0; i < fs->nvqs; i++) {
1027 		struct virtio_fs_vq *fsvq = &fs->vqs[i];
1028 
1029 		if (i == VQ_REQUEST)
1030 			continue; /* already initialized */
1031 		fuse_dev_install(fsvq->fud, fc);
1032 	}
1033 
1034 	/* Previous unmount will stop all queues. Start these again */
1035 	virtio_fs_start_all_queues(fs);
1036 	fuse_send_init(fc);
1037 	mutex_unlock(&virtio_fs_mutex);
1038 	return 0;
1039 
1040 err_free_fuse_devs:
1041 	virtio_fs_free_devs(fs);
1042 err:
1043 	mutex_unlock(&virtio_fs_mutex);
1044 	return err;
1045 }
1046 
1047 static void virtio_kill_sb(struct super_block *sb)
1048 {
1049 	struct fuse_conn *fc = get_fuse_conn_super(sb);
1050 	struct virtio_fs *vfs;
1051 	struct virtio_fs_vq *fsvq;
1052 
1053 	/* If mount failed, we can still be called without any fc */
1054 	if (!fc)
1055 		return fuse_kill_sb_anon(sb);
1056 
1057 	vfs = fc->iq.priv;
1058 	fsvq = &vfs->vqs[VQ_HIPRIO];
1059 
1060 	/* Stop forget queue. Soon destroy will be sent */
1061 	spin_lock(&fsvq->lock);
1062 	fsvq->connected = false;
1063 	spin_unlock(&fsvq->lock);
1064 	virtio_fs_drain_all_queues(vfs);
1065 
1066 	fuse_kill_sb_anon(sb);
1067 
1068 	/* fuse_kill_sb_anon() must have sent destroy. Stop all queues
1069 	 * and drain one more time and free fuse devices. Freeing fuse
1070 	 * devices will drop their reference on fuse_conn and that in
1071 	 * turn will drop its reference on virtio_fs object.
1072 	 */
1073 	virtio_fs_stop_all_queues(vfs);
1074 	virtio_fs_drain_all_queues(vfs);
1075 	virtio_fs_free_devs(vfs);
1076 }
1077 
1078 static int virtio_fs_test_super(struct super_block *sb,
1079 				struct fs_context *fsc)
1080 {
1081 	struct fuse_conn *fc = fsc->s_fs_info;
1082 
1083 	return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv;
1084 }
1085 
1086 static int virtio_fs_set_super(struct super_block *sb,
1087 			       struct fs_context *fsc)
1088 {
1089 	int err;
1090 
1091 	err = get_anon_bdev(&sb->s_dev);
1092 	if (!err)
1093 		fuse_conn_get(fsc->s_fs_info);
1094 
1095 	return err;
1096 }
1097 
1098 static int virtio_fs_get_tree(struct fs_context *fsc)
1099 {
1100 	struct virtio_fs *fs;
1101 	struct super_block *sb;
1102 	struct fuse_conn *fc;
1103 	int err;
1104 
1105 	/* This gets a reference on virtio_fs object. This ptr gets installed
1106 	 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
1107 	 * to drop the reference to this object.
1108 	 */
1109 	fs = virtio_fs_find_instance(fsc->source);
1110 	if (!fs) {
1111 		pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
1112 		return -EINVAL;
1113 	}
1114 
1115 	fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
1116 	if (!fc) {
1117 		mutex_lock(&virtio_fs_mutex);
1118 		virtio_fs_put(fs);
1119 		mutex_unlock(&virtio_fs_mutex);
1120 		return -ENOMEM;
1121 	}
1122 
1123 	fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops,
1124 		       fs);
1125 	fc->release = fuse_free_conn;
1126 	fc->delete_stale = true;
1127 
1128 	fsc->s_fs_info = fc;
1129 	sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
1130 	fuse_conn_put(fc);
1131 	if (IS_ERR(sb))
1132 		return PTR_ERR(sb);
1133 
1134 	if (!sb->s_root) {
1135 		err = virtio_fs_fill_super(sb);
1136 		if (err) {
1137 			deactivate_locked_super(sb);
1138 			return err;
1139 		}
1140 
1141 		sb->s_flags |= SB_ACTIVE;
1142 	}
1143 
1144 	WARN_ON(fsc->root);
1145 	fsc->root = dget(sb->s_root);
1146 	return 0;
1147 }
1148 
1149 static const struct fs_context_operations virtio_fs_context_ops = {
1150 	.get_tree	= virtio_fs_get_tree,
1151 };
1152 
1153 static int virtio_fs_init_fs_context(struct fs_context *fsc)
1154 {
1155 	fsc->ops = &virtio_fs_context_ops;
1156 	return 0;
1157 }
1158 
1159 static struct file_system_type virtio_fs_type = {
1160 	.owner		= THIS_MODULE,
1161 	.name		= "virtiofs",
1162 	.init_fs_context = virtio_fs_init_fs_context,
1163 	.kill_sb	= virtio_kill_sb,
1164 };
1165 
1166 static int __init virtio_fs_init(void)
1167 {
1168 	int ret;
1169 
1170 	ret = register_virtio_driver(&virtio_fs_driver);
1171 	if (ret < 0)
1172 		return ret;
1173 
1174 	ret = register_filesystem(&virtio_fs_type);
1175 	if (ret < 0) {
1176 		unregister_virtio_driver(&virtio_fs_driver);
1177 		return ret;
1178 	}
1179 
1180 	return 0;
1181 }
1182 module_init(virtio_fs_init);
1183 
1184 static void __exit virtio_fs_exit(void)
1185 {
1186 	unregister_filesystem(&virtio_fs_type);
1187 	unregister_virtio_driver(&virtio_fs_driver);
1188 }
1189 module_exit(virtio_fs_exit);
1190 
1191 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
1192 MODULE_DESCRIPTION("Virtio Filesystem");
1193 MODULE_LICENSE("GPL");
1194 MODULE_ALIAS_FS(KBUILD_MODNAME);
1195 MODULE_DEVICE_TABLE(virtio, id_table);
1196