1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/module.h> 9 #include <linux/virtio.h> 10 #include <linux/virtio_fs.h> 11 #include <linux/delay.h> 12 #include <linux/fs_context.h> 13 #include <linux/highmem.h> 14 #include "fuse_i.h" 15 16 /* List of virtio-fs device instances and a lock for the list. Also provides 17 * mutual exclusion in device removal and mounting path 18 */ 19 static DEFINE_MUTEX(virtio_fs_mutex); 20 static LIST_HEAD(virtio_fs_instances); 21 22 enum { 23 VQ_HIPRIO, 24 VQ_REQUEST 25 }; 26 27 /* Per-virtqueue state */ 28 struct virtio_fs_vq { 29 spinlock_t lock; 30 struct virtqueue *vq; /* protected by ->lock */ 31 struct work_struct done_work; 32 struct list_head queued_reqs; 33 struct list_head end_reqs; /* End these requests */ 34 struct delayed_work dispatch_work; 35 struct fuse_dev *fud; 36 bool connected; 37 long in_flight; 38 struct completion in_flight_zero; /* No inflight requests */ 39 char name[24]; 40 } ____cacheline_aligned_in_smp; 41 42 /* A virtio-fs device instance */ 43 struct virtio_fs { 44 struct kref refcount; 45 struct list_head list; /* on virtio_fs_instances */ 46 char *tag; 47 struct virtio_fs_vq *vqs; 48 unsigned int nvqs; /* number of virtqueues */ 49 unsigned int num_request_queues; /* number of request queues */ 50 }; 51 52 struct virtio_fs_forget_req { 53 struct fuse_in_header ih; 54 struct fuse_forget_in arg; 55 }; 56 57 struct virtio_fs_forget { 58 /* This request can be temporarily queued on virt queue */ 59 struct list_head list; 60 struct virtio_fs_forget_req req; 61 }; 62 63 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 64 struct fuse_req *req, bool in_flight); 65 66 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 67 { 68 struct virtio_fs *fs = vq->vdev->priv; 69 70 return &fs->vqs[vq->index]; 71 } 72 73 static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) 74 { 75 return &vq_to_fsvq(vq)->fud->pq; 76 } 77 78 /* Should be called with fsvq->lock held. */ 79 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 80 { 81 fsvq->in_flight++; 82 } 83 84 /* Should be called with fsvq->lock held. */ 85 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 86 { 87 WARN_ON(fsvq->in_flight <= 0); 88 fsvq->in_flight--; 89 if (!fsvq->in_flight) 90 complete(&fsvq->in_flight_zero); 91 } 92 93 static void release_virtio_fs_obj(struct kref *ref) 94 { 95 struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); 96 97 kfree(vfs->vqs); 98 kfree(vfs); 99 } 100 101 /* Make sure virtiofs_mutex is held */ 102 static void virtio_fs_put(struct virtio_fs *fs) 103 { 104 kref_put(&fs->refcount, release_virtio_fs_obj); 105 } 106 107 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 108 { 109 struct virtio_fs *vfs = fiq->priv; 110 111 mutex_lock(&virtio_fs_mutex); 112 virtio_fs_put(vfs); 113 mutex_unlock(&virtio_fs_mutex); 114 } 115 116 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 117 { 118 WARN_ON(fsvq->in_flight < 0); 119 120 /* Wait for in flight requests to finish.*/ 121 spin_lock(&fsvq->lock); 122 if (fsvq->in_flight) { 123 /* We are holding virtio_fs_mutex. There should not be any 124 * waiters waiting for completion. 125 */ 126 reinit_completion(&fsvq->in_flight_zero); 127 spin_unlock(&fsvq->lock); 128 wait_for_completion(&fsvq->in_flight_zero); 129 } else { 130 spin_unlock(&fsvq->lock); 131 } 132 133 flush_work(&fsvq->done_work); 134 flush_delayed_work(&fsvq->dispatch_work); 135 } 136 137 static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) 138 { 139 struct virtio_fs_vq *fsvq; 140 int i; 141 142 for (i = 0; i < fs->nvqs; i++) { 143 fsvq = &fs->vqs[i]; 144 virtio_fs_drain_queue(fsvq); 145 } 146 } 147 148 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 149 { 150 /* Provides mutual exclusion between ->remove and ->kill_sb 151 * paths. We don't want both of these draining queue at the 152 * same time. Current completion logic reinits completion 153 * and that means there should not be any other thread 154 * doing reinit or waiting for completion already. 155 */ 156 mutex_lock(&virtio_fs_mutex); 157 virtio_fs_drain_all_queues_locked(fs); 158 mutex_unlock(&virtio_fs_mutex); 159 } 160 161 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 162 { 163 struct virtio_fs_vq *fsvq; 164 int i; 165 166 for (i = 0; i < fs->nvqs; i++) { 167 fsvq = &fs->vqs[i]; 168 spin_lock(&fsvq->lock); 169 fsvq->connected = true; 170 spin_unlock(&fsvq->lock); 171 } 172 } 173 174 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 175 static int virtio_fs_add_instance(struct virtio_fs *fs) 176 { 177 struct virtio_fs *fs2; 178 bool duplicate = false; 179 180 mutex_lock(&virtio_fs_mutex); 181 182 list_for_each_entry(fs2, &virtio_fs_instances, list) { 183 if (strcmp(fs->tag, fs2->tag) == 0) 184 duplicate = true; 185 } 186 187 if (!duplicate) 188 list_add_tail(&fs->list, &virtio_fs_instances); 189 190 mutex_unlock(&virtio_fs_mutex); 191 192 if (duplicate) 193 return -EEXIST; 194 return 0; 195 } 196 197 /* Return the virtio_fs with a given tag, or NULL */ 198 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 199 { 200 struct virtio_fs *fs; 201 202 mutex_lock(&virtio_fs_mutex); 203 204 list_for_each_entry(fs, &virtio_fs_instances, list) { 205 if (strcmp(fs->tag, tag) == 0) { 206 kref_get(&fs->refcount); 207 goto found; 208 } 209 } 210 211 fs = NULL; /* not found */ 212 213 found: 214 mutex_unlock(&virtio_fs_mutex); 215 216 return fs; 217 } 218 219 static void virtio_fs_free_devs(struct virtio_fs *fs) 220 { 221 unsigned int i; 222 223 for (i = 0; i < fs->nvqs; i++) { 224 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 225 226 if (!fsvq->fud) 227 continue; 228 229 fuse_dev_free(fsvq->fud); 230 fsvq->fud = NULL; 231 } 232 } 233 234 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 235 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 236 { 237 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 238 char *end; 239 size_t len; 240 241 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 242 &tag_buf, sizeof(tag_buf)); 243 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 244 if (end == tag_buf) 245 return -EINVAL; /* empty tag */ 246 if (!end) 247 end = &tag_buf[sizeof(tag_buf)]; 248 249 len = end - tag_buf; 250 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 251 if (!fs->tag) 252 return -ENOMEM; 253 memcpy(fs->tag, tag_buf, len); 254 fs->tag[len] = '\0'; 255 return 0; 256 } 257 258 /* Work function for hiprio completion */ 259 static void virtio_fs_hiprio_done_work(struct work_struct *work) 260 { 261 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 262 done_work); 263 struct virtqueue *vq = fsvq->vq; 264 265 /* Free completed FUSE_FORGET requests */ 266 spin_lock(&fsvq->lock); 267 do { 268 unsigned int len; 269 void *req; 270 271 virtqueue_disable_cb(vq); 272 273 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 274 kfree(req); 275 dec_in_flight_req(fsvq); 276 } 277 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 278 spin_unlock(&fsvq->lock); 279 } 280 281 static void virtio_fs_request_dispatch_work(struct work_struct *work) 282 { 283 struct fuse_req *req; 284 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 285 dispatch_work.work); 286 struct fuse_conn *fc = fsvq->fud->fc; 287 int ret; 288 289 pr_debug("virtio-fs: worker %s called.\n", __func__); 290 while (1) { 291 spin_lock(&fsvq->lock); 292 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 293 list); 294 if (!req) { 295 spin_unlock(&fsvq->lock); 296 break; 297 } 298 299 list_del_init(&req->list); 300 spin_unlock(&fsvq->lock); 301 fuse_request_end(fc, req); 302 } 303 304 /* Dispatch pending requests */ 305 while (1) { 306 spin_lock(&fsvq->lock); 307 req = list_first_entry_or_null(&fsvq->queued_reqs, 308 struct fuse_req, list); 309 if (!req) { 310 spin_unlock(&fsvq->lock); 311 return; 312 } 313 list_del_init(&req->list); 314 spin_unlock(&fsvq->lock); 315 316 ret = virtio_fs_enqueue_req(fsvq, req, true); 317 if (ret < 0) { 318 if (ret == -ENOMEM || ret == -ENOSPC) { 319 spin_lock(&fsvq->lock); 320 list_add_tail(&req->list, &fsvq->queued_reqs); 321 schedule_delayed_work(&fsvq->dispatch_work, 322 msecs_to_jiffies(1)); 323 spin_unlock(&fsvq->lock); 324 return; 325 } 326 req->out.h.error = ret; 327 spin_lock(&fsvq->lock); 328 dec_in_flight_req(fsvq); 329 spin_unlock(&fsvq->lock); 330 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 331 ret); 332 fuse_request_end(fc, req); 333 } 334 } 335 } 336 337 /* 338 * Returns 1 if queue is full and sender should wait a bit before sending 339 * next request, 0 otherwise. 340 */ 341 static int send_forget_request(struct virtio_fs_vq *fsvq, 342 struct virtio_fs_forget *forget, 343 bool in_flight) 344 { 345 struct scatterlist sg; 346 struct virtqueue *vq; 347 int ret = 0; 348 bool notify; 349 struct virtio_fs_forget_req *req = &forget->req; 350 351 spin_lock(&fsvq->lock); 352 if (!fsvq->connected) { 353 if (in_flight) 354 dec_in_flight_req(fsvq); 355 kfree(forget); 356 goto out; 357 } 358 359 sg_init_one(&sg, req, sizeof(*req)); 360 vq = fsvq->vq; 361 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 362 363 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 364 if (ret < 0) { 365 if (ret == -ENOMEM || ret == -ENOSPC) { 366 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 367 ret); 368 list_add_tail(&forget->list, &fsvq->queued_reqs); 369 schedule_delayed_work(&fsvq->dispatch_work, 370 msecs_to_jiffies(1)); 371 if (!in_flight) 372 inc_in_flight_req(fsvq); 373 /* Queue is full */ 374 ret = 1; 375 } else { 376 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 377 ret); 378 kfree(forget); 379 if (in_flight) 380 dec_in_flight_req(fsvq); 381 } 382 goto out; 383 } 384 385 if (!in_flight) 386 inc_in_flight_req(fsvq); 387 notify = virtqueue_kick_prepare(vq); 388 spin_unlock(&fsvq->lock); 389 390 if (notify) 391 virtqueue_notify(vq); 392 return ret; 393 out: 394 spin_unlock(&fsvq->lock); 395 return ret; 396 } 397 398 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 399 { 400 struct virtio_fs_forget *forget; 401 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 402 dispatch_work.work); 403 pr_debug("virtio-fs: worker %s called.\n", __func__); 404 while (1) { 405 spin_lock(&fsvq->lock); 406 forget = list_first_entry_or_null(&fsvq->queued_reqs, 407 struct virtio_fs_forget, list); 408 if (!forget) { 409 spin_unlock(&fsvq->lock); 410 return; 411 } 412 413 list_del(&forget->list); 414 spin_unlock(&fsvq->lock); 415 if (send_forget_request(fsvq, forget, true)) 416 return; 417 } 418 } 419 420 /* Allocate and copy args into req->argbuf */ 421 static int copy_args_to_argbuf(struct fuse_req *req) 422 { 423 struct fuse_args *args = req->args; 424 unsigned int offset = 0; 425 unsigned int num_in; 426 unsigned int num_out; 427 unsigned int len; 428 unsigned int i; 429 430 num_in = args->in_numargs - args->in_pages; 431 num_out = args->out_numargs - args->out_pages; 432 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 433 fuse_len_args(num_out, args->out_args); 434 435 req->argbuf = kmalloc(len, GFP_ATOMIC); 436 if (!req->argbuf) 437 return -ENOMEM; 438 439 for (i = 0; i < num_in; i++) { 440 memcpy(req->argbuf + offset, 441 args->in_args[i].value, 442 args->in_args[i].size); 443 offset += args->in_args[i].size; 444 } 445 446 return 0; 447 } 448 449 /* Copy args out of and free req->argbuf */ 450 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 451 { 452 unsigned int remaining; 453 unsigned int offset; 454 unsigned int num_in; 455 unsigned int num_out; 456 unsigned int i; 457 458 remaining = req->out.h.len - sizeof(req->out.h); 459 num_in = args->in_numargs - args->in_pages; 460 num_out = args->out_numargs - args->out_pages; 461 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 462 463 for (i = 0; i < num_out; i++) { 464 unsigned int argsize = args->out_args[i].size; 465 466 if (args->out_argvar && 467 i == args->out_numargs - 1 && 468 argsize > remaining) { 469 argsize = remaining; 470 } 471 472 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 473 offset += argsize; 474 475 if (i != args->out_numargs - 1) 476 remaining -= argsize; 477 } 478 479 /* Store the actual size of the variable-length arg */ 480 if (args->out_argvar) 481 args->out_args[args->out_numargs - 1].size = remaining; 482 483 kfree(req->argbuf); 484 req->argbuf = NULL; 485 } 486 487 /* Work function for request completion */ 488 static void virtio_fs_requests_done_work(struct work_struct *work) 489 { 490 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 491 done_work); 492 struct fuse_pqueue *fpq = &fsvq->fud->pq; 493 struct fuse_conn *fc = fsvq->fud->fc; 494 struct virtqueue *vq = fsvq->vq; 495 struct fuse_req *req; 496 struct fuse_args_pages *ap; 497 struct fuse_req *next; 498 struct fuse_args *args; 499 unsigned int len, i, thislen; 500 struct page *page; 501 LIST_HEAD(reqs); 502 503 /* Collect completed requests off the virtqueue */ 504 spin_lock(&fsvq->lock); 505 do { 506 virtqueue_disable_cb(vq); 507 508 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 509 spin_lock(&fpq->lock); 510 list_move_tail(&req->list, &reqs); 511 spin_unlock(&fpq->lock); 512 } 513 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 514 spin_unlock(&fsvq->lock); 515 516 /* End requests */ 517 list_for_each_entry_safe(req, next, &reqs, list) { 518 /* 519 * TODO verify that server properly follows FUSE protocol 520 * (oh.uniq, oh.len) 521 */ 522 args = req->args; 523 copy_args_from_argbuf(args, req); 524 525 if (args->out_pages && args->page_zeroing) { 526 len = args->out_args[args->out_numargs - 1].size; 527 ap = container_of(args, typeof(*ap), args); 528 for (i = 0; i < ap->num_pages; i++) { 529 thislen = ap->descs[i].length; 530 if (len < thislen) { 531 WARN_ON(ap->descs[i].offset); 532 page = ap->pages[i]; 533 zero_user_segment(page, len, thislen); 534 len = 0; 535 } else { 536 len -= thislen; 537 } 538 } 539 } 540 541 spin_lock(&fpq->lock); 542 clear_bit(FR_SENT, &req->flags); 543 list_del_init(&req->list); 544 spin_unlock(&fpq->lock); 545 546 fuse_request_end(fc, req); 547 spin_lock(&fsvq->lock); 548 dec_in_flight_req(fsvq); 549 spin_unlock(&fsvq->lock); 550 } 551 } 552 553 /* Virtqueue interrupt handler */ 554 static void virtio_fs_vq_done(struct virtqueue *vq) 555 { 556 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 557 558 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 559 560 schedule_work(&fsvq->done_work); 561 } 562 563 /* Initialize virtqueues */ 564 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 565 struct virtio_fs *fs) 566 { 567 struct virtqueue **vqs; 568 vq_callback_t **callbacks; 569 const char **names; 570 unsigned int i; 571 int ret = 0; 572 573 virtio_cread(vdev, struct virtio_fs_config, num_request_queues, 574 &fs->num_request_queues); 575 if (fs->num_request_queues == 0) 576 return -EINVAL; 577 578 fs->nvqs = 1 + fs->num_request_queues; 579 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 580 if (!fs->vqs) 581 return -ENOMEM; 582 583 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 584 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), 585 GFP_KERNEL); 586 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); 587 if (!vqs || !callbacks || !names) { 588 ret = -ENOMEM; 589 goto out; 590 } 591 592 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 593 snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name), 594 "hiprio"); 595 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 596 INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); 597 INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); 598 INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs); 599 INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, 600 virtio_fs_hiprio_dispatch_work); 601 init_completion(&fs->vqs[VQ_HIPRIO].in_flight_zero); 602 spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); 603 604 /* Initialize the requests virtqueues */ 605 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 606 spin_lock_init(&fs->vqs[i].lock); 607 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); 608 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, 609 virtio_fs_request_dispatch_work); 610 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); 611 INIT_LIST_HEAD(&fs->vqs[i].end_reqs); 612 init_completion(&fs->vqs[i].in_flight_zero); 613 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), 614 "requests.%u", i - VQ_REQUEST); 615 callbacks[i] = virtio_fs_vq_done; 616 names[i] = fs->vqs[i].name; 617 } 618 619 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); 620 if (ret < 0) 621 goto out; 622 623 for (i = 0; i < fs->nvqs; i++) 624 fs->vqs[i].vq = vqs[i]; 625 626 virtio_fs_start_all_queues(fs); 627 out: 628 kfree(names); 629 kfree(callbacks); 630 kfree(vqs); 631 if (ret) 632 kfree(fs->vqs); 633 return ret; 634 } 635 636 /* Free virtqueues (device must already be reset) */ 637 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, 638 struct virtio_fs *fs) 639 { 640 vdev->config->del_vqs(vdev); 641 } 642 643 static int virtio_fs_probe(struct virtio_device *vdev) 644 { 645 struct virtio_fs *fs; 646 int ret; 647 648 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 649 if (!fs) 650 return -ENOMEM; 651 kref_init(&fs->refcount); 652 vdev->priv = fs; 653 654 ret = virtio_fs_read_tag(vdev, fs); 655 if (ret < 0) 656 goto out; 657 658 ret = virtio_fs_setup_vqs(vdev, fs); 659 if (ret < 0) 660 goto out; 661 662 /* TODO vq affinity */ 663 664 /* Bring the device online in case the filesystem is mounted and 665 * requests need to be sent before we return. 666 */ 667 virtio_device_ready(vdev); 668 669 ret = virtio_fs_add_instance(fs); 670 if (ret < 0) 671 goto out_vqs; 672 673 return 0; 674 675 out_vqs: 676 vdev->config->reset(vdev); 677 virtio_fs_cleanup_vqs(vdev, fs); 678 679 out: 680 vdev->priv = NULL; 681 kfree(fs); 682 return ret; 683 } 684 685 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 686 { 687 struct virtio_fs_vq *fsvq; 688 int i; 689 690 for (i = 0; i < fs->nvqs; i++) { 691 fsvq = &fs->vqs[i]; 692 spin_lock(&fsvq->lock); 693 fsvq->connected = false; 694 spin_unlock(&fsvq->lock); 695 } 696 } 697 698 static void virtio_fs_remove(struct virtio_device *vdev) 699 { 700 struct virtio_fs *fs = vdev->priv; 701 702 mutex_lock(&virtio_fs_mutex); 703 /* This device is going away. No one should get new reference */ 704 list_del_init(&fs->list); 705 virtio_fs_stop_all_queues(fs); 706 virtio_fs_drain_all_queues_locked(fs); 707 vdev->config->reset(vdev); 708 virtio_fs_cleanup_vqs(vdev, fs); 709 710 vdev->priv = NULL; 711 /* Put device reference on virtio_fs object */ 712 virtio_fs_put(fs); 713 mutex_unlock(&virtio_fs_mutex); 714 } 715 716 #ifdef CONFIG_PM_SLEEP 717 static int virtio_fs_freeze(struct virtio_device *vdev) 718 { 719 /* TODO need to save state here */ 720 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 721 return -EOPNOTSUPP; 722 } 723 724 static int virtio_fs_restore(struct virtio_device *vdev) 725 { 726 /* TODO need to restore state here */ 727 return 0; 728 } 729 #endif /* CONFIG_PM_SLEEP */ 730 731 static const struct virtio_device_id id_table[] = { 732 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 733 {}, 734 }; 735 736 static const unsigned int feature_table[] = {}; 737 738 static struct virtio_driver virtio_fs_driver = { 739 .driver.name = KBUILD_MODNAME, 740 .driver.owner = THIS_MODULE, 741 .id_table = id_table, 742 .feature_table = feature_table, 743 .feature_table_size = ARRAY_SIZE(feature_table), 744 .probe = virtio_fs_probe, 745 .remove = virtio_fs_remove, 746 #ifdef CONFIG_PM_SLEEP 747 .freeze = virtio_fs_freeze, 748 .restore = virtio_fs_restore, 749 #endif 750 }; 751 752 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 753 __releases(fiq->lock) 754 { 755 struct fuse_forget_link *link; 756 struct virtio_fs_forget *forget; 757 struct virtio_fs_forget_req *req; 758 struct virtio_fs *fs; 759 struct virtio_fs_vq *fsvq; 760 u64 unique; 761 762 link = fuse_dequeue_forget(fiq, 1, NULL); 763 unique = fuse_get_unique(fiq); 764 765 fs = fiq->priv; 766 fsvq = &fs->vqs[VQ_HIPRIO]; 767 spin_unlock(&fiq->lock); 768 769 /* Allocate a buffer for the request */ 770 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 771 req = &forget->req; 772 773 req->ih = (struct fuse_in_header){ 774 .opcode = FUSE_FORGET, 775 .nodeid = link->forget_one.nodeid, 776 .unique = unique, 777 .len = sizeof(*req), 778 }; 779 req->arg = (struct fuse_forget_in){ 780 .nlookup = link->forget_one.nlookup, 781 }; 782 783 send_forget_request(fsvq, forget, false); 784 kfree(link); 785 } 786 787 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 788 __releases(fiq->lock) 789 { 790 /* 791 * TODO interrupts. 792 * 793 * Normal fs operations on a local filesystems aren't interruptible. 794 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 795 * with shared lock between host and guest. 796 */ 797 spin_unlock(&fiq->lock); 798 } 799 800 /* Return the number of scatter-gather list elements required */ 801 static unsigned int sg_count_fuse_req(struct fuse_req *req) 802 { 803 struct fuse_args *args = req->args; 804 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 805 unsigned int total_sgs = 1 /* fuse_in_header */; 806 807 if (args->in_numargs - args->in_pages) 808 total_sgs += 1; 809 810 if (args->in_pages) 811 total_sgs += ap->num_pages; 812 813 if (!test_bit(FR_ISREPLY, &req->flags)) 814 return total_sgs; 815 816 total_sgs += 1 /* fuse_out_header */; 817 818 if (args->out_numargs - args->out_pages) 819 total_sgs += 1; 820 821 if (args->out_pages) 822 total_sgs += ap->num_pages; 823 824 return total_sgs; 825 } 826 827 /* Add pages to scatter-gather list and return number of elements used */ 828 static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 829 struct page **pages, 830 struct fuse_page_desc *page_descs, 831 unsigned int num_pages, 832 unsigned int total_len) 833 { 834 unsigned int i; 835 unsigned int this_len; 836 837 for (i = 0; i < num_pages && total_len; i++) { 838 sg_init_table(&sg[i], 1); 839 this_len = min(page_descs[i].length, total_len); 840 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 841 total_len -= this_len; 842 } 843 844 return i; 845 } 846 847 /* Add args to scatter-gather list and return number of elements used */ 848 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 849 struct fuse_req *req, 850 struct fuse_arg *args, 851 unsigned int numargs, 852 bool argpages, 853 void *argbuf, 854 unsigned int *len_used) 855 { 856 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 857 unsigned int total_sgs = 0; 858 unsigned int len; 859 860 len = fuse_len_args(numargs - argpages, args); 861 if (len) 862 sg_init_one(&sg[total_sgs++], argbuf, len); 863 864 if (argpages) 865 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 866 ap->pages, ap->descs, 867 ap->num_pages, 868 args[numargs - 1].size); 869 870 if (len_used) 871 *len_used = len; 872 873 return total_sgs; 874 } 875 876 /* Add a request to a virtqueue and kick the device */ 877 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 878 struct fuse_req *req, bool in_flight) 879 { 880 /* requests need at least 4 elements */ 881 struct scatterlist *stack_sgs[6]; 882 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 883 struct scatterlist **sgs = stack_sgs; 884 struct scatterlist *sg = stack_sg; 885 struct virtqueue *vq; 886 struct fuse_args *args = req->args; 887 unsigned int argbuf_used = 0; 888 unsigned int out_sgs = 0; 889 unsigned int in_sgs = 0; 890 unsigned int total_sgs; 891 unsigned int i; 892 int ret; 893 bool notify; 894 struct fuse_pqueue *fpq; 895 896 /* Does the sglist fit on the stack? */ 897 total_sgs = sg_count_fuse_req(req); 898 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 899 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 900 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 901 if (!sgs || !sg) { 902 ret = -ENOMEM; 903 goto out; 904 } 905 } 906 907 /* Use a bounce buffer since stack args cannot be mapped */ 908 ret = copy_args_to_argbuf(req); 909 if (ret < 0) 910 goto out; 911 912 /* Request elements */ 913 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 914 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 915 (struct fuse_arg *)args->in_args, 916 args->in_numargs, args->in_pages, 917 req->argbuf, &argbuf_used); 918 919 /* Reply elements */ 920 if (test_bit(FR_ISREPLY, &req->flags)) { 921 sg_init_one(&sg[out_sgs + in_sgs++], 922 &req->out.h, sizeof(req->out.h)); 923 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 924 args->out_args, args->out_numargs, 925 args->out_pages, 926 req->argbuf + argbuf_used, NULL); 927 } 928 929 WARN_ON(out_sgs + in_sgs != total_sgs); 930 931 for (i = 0; i < total_sgs; i++) 932 sgs[i] = &sg[i]; 933 934 spin_lock(&fsvq->lock); 935 936 if (!fsvq->connected) { 937 spin_unlock(&fsvq->lock); 938 ret = -ENOTCONN; 939 goto out; 940 } 941 942 vq = fsvq->vq; 943 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 944 if (ret < 0) { 945 spin_unlock(&fsvq->lock); 946 goto out; 947 } 948 949 /* Request successfully sent. */ 950 fpq = &fsvq->fud->pq; 951 spin_lock(&fpq->lock); 952 list_add_tail(&req->list, fpq->processing); 953 spin_unlock(&fpq->lock); 954 set_bit(FR_SENT, &req->flags); 955 /* matches barrier in request_wait_answer() */ 956 smp_mb__after_atomic(); 957 958 if (!in_flight) 959 inc_in_flight_req(fsvq); 960 notify = virtqueue_kick_prepare(vq); 961 962 spin_unlock(&fsvq->lock); 963 964 if (notify) 965 virtqueue_notify(vq); 966 967 out: 968 if (ret < 0 && req->argbuf) { 969 kfree(req->argbuf); 970 req->argbuf = NULL; 971 } 972 if (sgs != stack_sgs) { 973 kfree(sgs); 974 kfree(sg); 975 } 976 977 return ret; 978 } 979 980 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 981 __releases(fiq->lock) 982 { 983 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ 984 struct virtio_fs *fs; 985 struct fuse_req *req; 986 struct virtio_fs_vq *fsvq; 987 int ret; 988 989 WARN_ON(list_empty(&fiq->pending)); 990 req = list_last_entry(&fiq->pending, struct fuse_req, list); 991 clear_bit(FR_PENDING, &req->flags); 992 list_del_init(&req->list); 993 WARN_ON(!list_empty(&fiq->pending)); 994 spin_unlock(&fiq->lock); 995 996 fs = fiq->priv; 997 998 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", 999 __func__, req->in.h.opcode, req->in.h.unique, 1000 req->in.h.nodeid, req->in.h.len, 1001 fuse_len_args(req->args->out_numargs, req->args->out_args)); 1002 1003 fsvq = &fs->vqs[queue_id]; 1004 ret = virtio_fs_enqueue_req(fsvq, req, false); 1005 if (ret < 0) { 1006 if (ret == -ENOMEM || ret == -ENOSPC) { 1007 /* 1008 * Virtqueue full. Retry submission from worker 1009 * context as we might be holding fc->bg_lock. 1010 */ 1011 spin_lock(&fsvq->lock); 1012 list_add_tail(&req->list, &fsvq->queued_reqs); 1013 inc_in_flight_req(fsvq); 1014 schedule_delayed_work(&fsvq->dispatch_work, 1015 msecs_to_jiffies(1)); 1016 spin_unlock(&fsvq->lock); 1017 return; 1018 } 1019 req->out.h.error = ret; 1020 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1021 1022 /* Can't end request in submission context. Use a worker */ 1023 spin_lock(&fsvq->lock); 1024 list_add_tail(&req->list, &fsvq->end_reqs); 1025 schedule_delayed_work(&fsvq->dispatch_work, 0); 1026 spin_unlock(&fsvq->lock); 1027 return; 1028 } 1029 } 1030 1031 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1032 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 1033 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 1034 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 1035 .release = virtio_fs_fiq_release, 1036 }; 1037 1038 static int virtio_fs_fill_super(struct super_block *sb) 1039 { 1040 struct fuse_conn *fc = get_fuse_conn_super(sb); 1041 struct virtio_fs *fs = fc->iq.priv; 1042 unsigned int i; 1043 int err; 1044 struct fuse_fs_context ctx = { 1045 .rootmode = S_IFDIR, 1046 .default_permissions = 1, 1047 .allow_other = 1, 1048 .max_read = UINT_MAX, 1049 .blksize = 512, 1050 .destroy = true, 1051 .no_control = true, 1052 .no_force_umount = true, 1053 .no_mount_options = true, 1054 }; 1055 1056 mutex_lock(&virtio_fs_mutex); 1057 1058 /* After holding mutex, make sure virtiofs device is still there. 1059 * Though we are holding a reference to it, drive ->remove might 1060 * still have cleaned up virtual queues. In that case bail out. 1061 */ 1062 err = -EINVAL; 1063 if (list_empty(&fs->list)) { 1064 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1065 goto err; 1066 } 1067 1068 err = -ENOMEM; 1069 /* Allocate fuse_dev for hiprio and notification queues */ 1070 for (i = 0; i < VQ_REQUEST; i++) { 1071 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1072 1073 fsvq->fud = fuse_dev_alloc(); 1074 if (!fsvq->fud) 1075 goto err_free_fuse_devs; 1076 } 1077 1078 ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud; 1079 err = fuse_fill_super_common(sb, &ctx); 1080 if (err < 0) 1081 goto err_free_fuse_devs; 1082 1083 fc = fs->vqs[VQ_REQUEST].fud->fc; 1084 1085 for (i = 0; i < fs->nvqs; i++) { 1086 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1087 1088 if (i == VQ_REQUEST) 1089 continue; /* already initialized */ 1090 fuse_dev_install(fsvq->fud, fc); 1091 } 1092 1093 /* Previous unmount will stop all queues. Start these again */ 1094 virtio_fs_start_all_queues(fs); 1095 fuse_send_init(fc); 1096 mutex_unlock(&virtio_fs_mutex); 1097 return 0; 1098 1099 err_free_fuse_devs: 1100 virtio_fs_free_devs(fs); 1101 err: 1102 mutex_unlock(&virtio_fs_mutex); 1103 return err; 1104 } 1105 1106 static void virtio_kill_sb(struct super_block *sb) 1107 { 1108 struct fuse_conn *fc = get_fuse_conn_super(sb); 1109 struct virtio_fs *vfs; 1110 struct virtio_fs_vq *fsvq; 1111 1112 /* If mount failed, we can still be called without any fc */ 1113 if (!fc) 1114 return fuse_kill_sb_anon(sb); 1115 1116 vfs = fc->iq.priv; 1117 fsvq = &vfs->vqs[VQ_HIPRIO]; 1118 1119 /* Stop forget queue. Soon destroy will be sent */ 1120 spin_lock(&fsvq->lock); 1121 fsvq->connected = false; 1122 spin_unlock(&fsvq->lock); 1123 virtio_fs_drain_all_queues(vfs); 1124 1125 fuse_kill_sb_anon(sb); 1126 1127 /* fuse_kill_sb_anon() must have sent destroy. Stop all queues 1128 * and drain one more time and free fuse devices. Freeing fuse 1129 * devices will drop their reference on fuse_conn and that in 1130 * turn will drop its reference on virtio_fs object. 1131 */ 1132 virtio_fs_stop_all_queues(vfs); 1133 virtio_fs_drain_all_queues(vfs); 1134 virtio_fs_free_devs(vfs); 1135 } 1136 1137 static int virtio_fs_test_super(struct super_block *sb, 1138 struct fs_context *fsc) 1139 { 1140 struct fuse_conn *fc = fsc->s_fs_info; 1141 1142 return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv; 1143 } 1144 1145 static int virtio_fs_set_super(struct super_block *sb, 1146 struct fs_context *fsc) 1147 { 1148 int err; 1149 1150 err = get_anon_bdev(&sb->s_dev); 1151 if (!err) 1152 fuse_conn_get(fsc->s_fs_info); 1153 1154 return err; 1155 } 1156 1157 static int virtio_fs_get_tree(struct fs_context *fsc) 1158 { 1159 struct virtio_fs *fs; 1160 struct super_block *sb; 1161 struct fuse_conn *fc; 1162 int err; 1163 1164 /* This gets a reference on virtio_fs object. This ptr gets installed 1165 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1166 * to drop the reference to this object. 1167 */ 1168 fs = virtio_fs_find_instance(fsc->source); 1169 if (!fs) { 1170 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1171 return -EINVAL; 1172 } 1173 1174 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1175 if (!fc) { 1176 mutex_lock(&virtio_fs_mutex); 1177 virtio_fs_put(fs); 1178 mutex_unlock(&virtio_fs_mutex); 1179 return -ENOMEM; 1180 } 1181 1182 fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops, 1183 fs); 1184 fc->release = fuse_free_conn; 1185 fc->delete_stale = true; 1186 1187 fsc->s_fs_info = fc; 1188 sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); 1189 fuse_conn_put(fc); 1190 if (IS_ERR(sb)) 1191 return PTR_ERR(sb); 1192 1193 if (!sb->s_root) { 1194 err = virtio_fs_fill_super(sb); 1195 if (err) { 1196 deactivate_locked_super(sb); 1197 return err; 1198 } 1199 1200 sb->s_flags |= SB_ACTIVE; 1201 } 1202 1203 WARN_ON(fsc->root); 1204 fsc->root = dget(sb->s_root); 1205 return 0; 1206 } 1207 1208 static const struct fs_context_operations virtio_fs_context_ops = { 1209 .get_tree = virtio_fs_get_tree, 1210 }; 1211 1212 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1213 { 1214 fsc->ops = &virtio_fs_context_ops; 1215 return 0; 1216 } 1217 1218 static struct file_system_type virtio_fs_type = { 1219 .owner = THIS_MODULE, 1220 .name = "virtiofs", 1221 .init_fs_context = virtio_fs_init_fs_context, 1222 .kill_sb = virtio_kill_sb, 1223 }; 1224 1225 static int __init virtio_fs_init(void) 1226 { 1227 int ret; 1228 1229 ret = register_virtio_driver(&virtio_fs_driver); 1230 if (ret < 0) 1231 return ret; 1232 1233 ret = register_filesystem(&virtio_fs_type); 1234 if (ret < 0) { 1235 unregister_virtio_driver(&virtio_fs_driver); 1236 return ret; 1237 } 1238 1239 return 0; 1240 } 1241 module_init(virtio_fs_init); 1242 1243 static void __exit virtio_fs_exit(void) 1244 { 1245 unregister_filesystem(&virtio_fs_type); 1246 unregister_virtio_driver(&virtio_fs_driver); 1247 } 1248 module_exit(virtio_fs_exit); 1249 1250 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1251 MODULE_DESCRIPTION("Virtio Filesystem"); 1252 MODULE_LICENSE("GPL"); 1253 MODULE_ALIAS_FS(KBUILD_MODNAME); 1254 MODULE_DEVICE_TABLE(virtio, id_table); 1255