1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/dax.h> 9 #include <linux/pci.h> 10 #include <linux/pfn_t.h> 11 #include <linux/memremap.h> 12 #include <linux/module.h> 13 #include <linux/virtio.h> 14 #include <linux/virtio_fs.h> 15 #include <linux/delay.h> 16 #include <linux/fs_context.h> 17 #include <linux/fs_parser.h> 18 #include <linux/highmem.h> 19 #include <linux/uio.h> 20 #include "fuse_i.h" 21 22 /* Used to help calculate the FUSE connection's max_pages limit for a request's 23 * size. Parts of the struct fuse_req are sliced into scattergather lists in 24 * addition to the pages used, so this can help account for that overhead. 25 */ 26 #define FUSE_HEADER_OVERHEAD 4 27 28 /* List of virtio-fs device instances and a lock for the list. Also provides 29 * mutual exclusion in device removal and mounting path 30 */ 31 static DEFINE_MUTEX(virtio_fs_mutex); 32 static LIST_HEAD(virtio_fs_instances); 33 34 enum { 35 VQ_HIPRIO, 36 VQ_REQUEST 37 }; 38 39 #define VQ_NAME_LEN 24 40 41 /* Per-virtqueue state */ 42 struct virtio_fs_vq { 43 spinlock_t lock; 44 struct virtqueue *vq; /* protected by ->lock */ 45 struct work_struct done_work; 46 struct list_head queued_reqs; 47 struct list_head end_reqs; /* End these requests */ 48 struct delayed_work dispatch_work; 49 struct fuse_dev *fud; 50 bool connected; 51 long in_flight; 52 struct completion in_flight_zero; /* No inflight requests */ 53 char name[VQ_NAME_LEN]; 54 } ____cacheline_aligned_in_smp; 55 56 /* A virtio-fs device instance */ 57 struct virtio_fs { 58 struct kref refcount; 59 struct list_head list; /* on virtio_fs_instances */ 60 char *tag; 61 struct virtio_fs_vq *vqs; 62 unsigned int nvqs; /* number of virtqueues */ 63 unsigned int num_request_queues; /* number of request queues */ 64 struct dax_device *dax_dev; 65 66 /* DAX memory window where file contents are mapped */ 67 void *window_kaddr; 68 phys_addr_t window_phys_addr; 69 size_t window_len; 70 }; 71 72 struct virtio_fs_forget_req { 73 struct fuse_in_header ih; 74 struct fuse_forget_in arg; 75 }; 76 77 struct virtio_fs_forget { 78 /* This request can be temporarily queued on virt queue */ 79 struct list_head list; 80 struct virtio_fs_forget_req req; 81 }; 82 83 struct virtio_fs_req_work { 84 struct fuse_req *req; 85 struct virtio_fs_vq *fsvq; 86 struct work_struct done_work; 87 }; 88 89 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 90 struct fuse_req *req, bool in_flight); 91 92 static const struct constant_table dax_param_enums[] = { 93 {"always", FUSE_DAX_ALWAYS }, 94 {"never", FUSE_DAX_NEVER }, 95 {"inode", FUSE_DAX_INODE_USER }, 96 {} 97 }; 98 99 enum { 100 OPT_DAX, 101 OPT_DAX_ENUM, 102 }; 103 104 static const struct fs_parameter_spec virtio_fs_parameters[] = { 105 fsparam_flag("dax", OPT_DAX), 106 fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums), 107 {} 108 }; 109 110 static int virtio_fs_parse_param(struct fs_context *fsc, 111 struct fs_parameter *param) 112 { 113 struct fs_parse_result result; 114 struct fuse_fs_context *ctx = fsc->fs_private; 115 int opt; 116 117 opt = fs_parse(fsc, virtio_fs_parameters, param, &result); 118 if (opt < 0) 119 return opt; 120 121 switch (opt) { 122 case OPT_DAX: 123 ctx->dax_mode = FUSE_DAX_ALWAYS; 124 break; 125 case OPT_DAX_ENUM: 126 ctx->dax_mode = result.uint_32; 127 break; 128 default: 129 return -EINVAL; 130 } 131 132 return 0; 133 } 134 135 static void virtio_fs_free_fsc(struct fs_context *fsc) 136 { 137 struct fuse_fs_context *ctx = fsc->fs_private; 138 139 kfree(ctx); 140 } 141 142 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 143 { 144 struct virtio_fs *fs = vq->vdev->priv; 145 146 return &fs->vqs[vq->index]; 147 } 148 149 /* Should be called with fsvq->lock held. */ 150 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 151 { 152 fsvq->in_flight++; 153 } 154 155 /* Should be called with fsvq->lock held. */ 156 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 157 { 158 WARN_ON(fsvq->in_flight <= 0); 159 fsvq->in_flight--; 160 if (!fsvq->in_flight) 161 complete(&fsvq->in_flight_zero); 162 } 163 164 static void release_virtio_fs_obj(struct kref *ref) 165 { 166 struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); 167 168 kfree(vfs->vqs); 169 kfree(vfs); 170 } 171 172 /* Make sure virtiofs_mutex is held */ 173 static void virtio_fs_put(struct virtio_fs *fs) 174 { 175 kref_put(&fs->refcount, release_virtio_fs_obj); 176 } 177 178 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 179 { 180 struct virtio_fs *vfs = fiq->priv; 181 182 mutex_lock(&virtio_fs_mutex); 183 virtio_fs_put(vfs); 184 mutex_unlock(&virtio_fs_mutex); 185 } 186 187 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 188 { 189 WARN_ON(fsvq->in_flight < 0); 190 191 /* Wait for in flight requests to finish.*/ 192 spin_lock(&fsvq->lock); 193 if (fsvq->in_flight) { 194 /* We are holding virtio_fs_mutex. There should not be any 195 * waiters waiting for completion. 196 */ 197 reinit_completion(&fsvq->in_flight_zero); 198 spin_unlock(&fsvq->lock); 199 wait_for_completion(&fsvq->in_flight_zero); 200 } else { 201 spin_unlock(&fsvq->lock); 202 } 203 204 flush_work(&fsvq->done_work); 205 flush_delayed_work(&fsvq->dispatch_work); 206 } 207 208 static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) 209 { 210 struct virtio_fs_vq *fsvq; 211 int i; 212 213 for (i = 0; i < fs->nvqs; i++) { 214 fsvq = &fs->vqs[i]; 215 virtio_fs_drain_queue(fsvq); 216 } 217 } 218 219 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 220 { 221 /* Provides mutual exclusion between ->remove and ->kill_sb 222 * paths. We don't want both of these draining queue at the 223 * same time. Current completion logic reinits completion 224 * and that means there should not be any other thread 225 * doing reinit or waiting for completion already. 226 */ 227 mutex_lock(&virtio_fs_mutex); 228 virtio_fs_drain_all_queues_locked(fs); 229 mutex_unlock(&virtio_fs_mutex); 230 } 231 232 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 233 { 234 struct virtio_fs_vq *fsvq; 235 int i; 236 237 for (i = 0; i < fs->nvqs; i++) { 238 fsvq = &fs->vqs[i]; 239 spin_lock(&fsvq->lock); 240 fsvq->connected = true; 241 spin_unlock(&fsvq->lock); 242 } 243 } 244 245 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 246 static int virtio_fs_add_instance(struct virtio_fs *fs) 247 { 248 struct virtio_fs *fs2; 249 bool duplicate = false; 250 251 mutex_lock(&virtio_fs_mutex); 252 253 list_for_each_entry(fs2, &virtio_fs_instances, list) { 254 if (strcmp(fs->tag, fs2->tag) == 0) 255 duplicate = true; 256 } 257 258 if (!duplicate) 259 list_add_tail(&fs->list, &virtio_fs_instances); 260 261 mutex_unlock(&virtio_fs_mutex); 262 263 if (duplicate) 264 return -EEXIST; 265 return 0; 266 } 267 268 /* Return the virtio_fs with a given tag, or NULL */ 269 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 270 { 271 struct virtio_fs *fs; 272 273 mutex_lock(&virtio_fs_mutex); 274 275 list_for_each_entry(fs, &virtio_fs_instances, list) { 276 if (strcmp(fs->tag, tag) == 0) { 277 kref_get(&fs->refcount); 278 goto found; 279 } 280 } 281 282 fs = NULL; /* not found */ 283 284 found: 285 mutex_unlock(&virtio_fs_mutex); 286 287 return fs; 288 } 289 290 static void virtio_fs_free_devs(struct virtio_fs *fs) 291 { 292 unsigned int i; 293 294 for (i = 0; i < fs->nvqs; i++) { 295 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 296 297 if (!fsvq->fud) 298 continue; 299 300 fuse_dev_free(fsvq->fud); 301 fsvq->fud = NULL; 302 } 303 } 304 305 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 306 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 307 { 308 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 309 char *end; 310 size_t len; 311 312 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 313 &tag_buf, sizeof(tag_buf)); 314 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 315 if (end == tag_buf) 316 return -EINVAL; /* empty tag */ 317 if (!end) 318 end = &tag_buf[sizeof(tag_buf)]; 319 320 len = end - tag_buf; 321 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 322 if (!fs->tag) 323 return -ENOMEM; 324 memcpy(fs->tag, tag_buf, len); 325 fs->tag[len] = '\0'; 326 return 0; 327 } 328 329 /* Work function for hiprio completion */ 330 static void virtio_fs_hiprio_done_work(struct work_struct *work) 331 { 332 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 333 done_work); 334 struct virtqueue *vq = fsvq->vq; 335 336 /* Free completed FUSE_FORGET requests */ 337 spin_lock(&fsvq->lock); 338 do { 339 unsigned int len; 340 void *req; 341 342 virtqueue_disable_cb(vq); 343 344 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 345 kfree(req); 346 dec_in_flight_req(fsvq); 347 } 348 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 349 spin_unlock(&fsvq->lock); 350 } 351 352 static void virtio_fs_request_dispatch_work(struct work_struct *work) 353 { 354 struct fuse_req *req; 355 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 356 dispatch_work.work); 357 int ret; 358 359 pr_debug("virtio-fs: worker %s called.\n", __func__); 360 while (1) { 361 spin_lock(&fsvq->lock); 362 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 363 list); 364 if (!req) { 365 spin_unlock(&fsvq->lock); 366 break; 367 } 368 369 list_del_init(&req->list); 370 spin_unlock(&fsvq->lock); 371 fuse_request_end(req); 372 } 373 374 /* Dispatch pending requests */ 375 while (1) { 376 spin_lock(&fsvq->lock); 377 req = list_first_entry_or_null(&fsvq->queued_reqs, 378 struct fuse_req, list); 379 if (!req) { 380 spin_unlock(&fsvq->lock); 381 return; 382 } 383 list_del_init(&req->list); 384 spin_unlock(&fsvq->lock); 385 386 ret = virtio_fs_enqueue_req(fsvq, req, true); 387 if (ret < 0) { 388 if (ret == -ENOMEM || ret == -ENOSPC) { 389 spin_lock(&fsvq->lock); 390 list_add_tail(&req->list, &fsvq->queued_reqs); 391 schedule_delayed_work(&fsvq->dispatch_work, 392 msecs_to_jiffies(1)); 393 spin_unlock(&fsvq->lock); 394 return; 395 } 396 req->out.h.error = ret; 397 spin_lock(&fsvq->lock); 398 dec_in_flight_req(fsvq); 399 spin_unlock(&fsvq->lock); 400 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 401 ret); 402 fuse_request_end(req); 403 } 404 } 405 } 406 407 /* 408 * Returns 1 if queue is full and sender should wait a bit before sending 409 * next request, 0 otherwise. 410 */ 411 static int send_forget_request(struct virtio_fs_vq *fsvq, 412 struct virtio_fs_forget *forget, 413 bool in_flight) 414 { 415 struct scatterlist sg; 416 struct virtqueue *vq; 417 int ret = 0; 418 bool notify; 419 struct virtio_fs_forget_req *req = &forget->req; 420 421 spin_lock(&fsvq->lock); 422 if (!fsvq->connected) { 423 if (in_flight) 424 dec_in_flight_req(fsvq); 425 kfree(forget); 426 goto out; 427 } 428 429 sg_init_one(&sg, req, sizeof(*req)); 430 vq = fsvq->vq; 431 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 432 433 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 434 if (ret < 0) { 435 if (ret == -ENOMEM || ret == -ENOSPC) { 436 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 437 ret); 438 list_add_tail(&forget->list, &fsvq->queued_reqs); 439 schedule_delayed_work(&fsvq->dispatch_work, 440 msecs_to_jiffies(1)); 441 if (!in_flight) 442 inc_in_flight_req(fsvq); 443 /* Queue is full */ 444 ret = 1; 445 } else { 446 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 447 ret); 448 kfree(forget); 449 if (in_flight) 450 dec_in_flight_req(fsvq); 451 } 452 goto out; 453 } 454 455 if (!in_flight) 456 inc_in_flight_req(fsvq); 457 notify = virtqueue_kick_prepare(vq); 458 spin_unlock(&fsvq->lock); 459 460 if (notify) 461 virtqueue_notify(vq); 462 return ret; 463 out: 464 spin_unlock(&fsvq->lock); 465 return ret; 466 } 467 468 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 469 { 470 struct virtio_fs_forget *forget; 471 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 472 dispatch_work.work); 473 pr_debug("virtio-fs: worker %s called.\n", __func__); 474 while (1) { 475 spin_lock(&fsvq->lock); 476 forget = list_first_entry_or_null(&fsvq->queued_reqs, 477 struct virtio_fs_forget, list); 478 if (!forget) { 479 spin_unlock(&fsvq->lock); 480 return; 481 } 482 483 list_del(&forget->list); 484 spin_unlock(&fsvq->lock); 485 if (send_forget_request(fsvq, forget, true)) 486 return; 487 } 488 } 489 490 /* Allocate and copy args into req->argbuf */ 491 static int copy_args_to_argbuf(struct fuse_req *req) 492 { 493 struct fuse_args *args = req->args; 494 unsigned int offset = 0; 495 unsigned int num_in; 496 unsigned int num_out; 497 unsigned int len; 498 unsigned int i; 499 500 num_in = args->in_numargs - args->in_pages; 501 num_out = args->out_numargs - args->out_pages; 502 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 503 fuse_len_args(num_out, args->out_args); 504 505 req->argbuf = kmalloc(len, GFP_ATOMIC); 506 if (!req->argbuf) 507 return -ENOMEM; 508 509 for (i = 0; i < num_in; i++) { 510 memcpy(req->argbuf + offset, 511 args->in_args[i].value, 512 args->in_args[i].size); 513 offset += args->in_args[i].size; 514 } 515 516 return 0; 517 } 518 519 /* Copy args out of and free req->argbuf */ 520 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 521 { 522 unsigned int remaining; 523 unsigned int offset; 524 unsigned int num_in; 525 unsigned int num_out; 526 unsigned int i; 527 528 remaining = req->out.h.len - sizeof(req->out.h); 529 num_in = args->in_numargs - args->in_pages; 530 num_out = args->out_numargs - args->out_pages; 531 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 532 533 for (i = 0; i < num_out; i++) { 534 unsigned int argsize = args->out_args[i].size; 535 536 if (args->out_argvar && 537 i == args->out_numargs - 1 && 538 argsize > remaining) { 539 argsize = remaining; 540 } 541 542 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 543 offset += argsize; 544 545 if (i != args->out_numargs - 1) 546 remaining -= argsize; 547 } 548 549 /* Store the actual size of the variable-length arg */ 550 if (args->out_argvar) 551 args->out_args[args->out_numargs - 1].size = remaining; 552 553 kfree(req->argbuf); 554 req->argbuf = NULL; 555 } 556 557 /* Work function for request completion */ 558 static void virtio_fs_request_complete(struct fuse_req *req, 559 struct virtio_fs_vq *fsvq) 560 { 561 struct fuse_pqueue *fpq = &fsvq->fud->pq; 562 struct fuse_args *args; 563 struct fuse_args_pages *ap; 564 unsigned int len, i, thislen; 565 struct page *page; 566 567 /* 568 * TODO verify that server properly follows FUSE protocol 569 * (oh.uniq, oh.len) 570 */ 571 args = req->args; 572 copy_args_from_argbuf(args, req); 573 574 if (args->out_pages && args->page_zeroing) { 575 len = args->out_args[args->out_numargs - 1].size; 576 ap = container_of(args, typeof(*ap), args); 577 for (i = 0; i < ap->num_pages; i++) { 578 thislen = ap->descs[i].length; 579 if (len < thislen) { 580 WARN_ON(ap->descs[i].offset); 581 page = ap->pages[i]; 582 zero_user_segment(page, len, thislen); 583 len = 0; 584 } else { 585 len -= thislen; 586 } 587 } 588 } 589 590 spin_lock(&fpq->lock); 591 clear_bit(FR_SENT, &req->flags); 592 spin_unlock(&fpq->lock); 593 594 fuse_request_end(req); 595 spin_lock(&fsvq->lock); 596 dec_in_flight_req(fsvq); 597 spin_unlock(&fsvq->lock); 598 } 599 600 static void virtio_fs_complete_req_work(struct work_struct *work) 601 { 602 struct virtio_fs_req_work *w = 603 container_of(work, typeof(*w), done_work); 604 605 virtio_fs_request_complete(w->req, w->fsvq); 606 kfree(w); 607 } 608 609 static void virtio_fs_requests_done_work(struct work_struct *work) 610 { 611 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 612 done_work); 613 struct fuse_pqueue *fpq = &fsvq->fud->pq; 614 struct virtqueue *vq = fsvq->vq; 615 struct fuse_req *req; 616 struct fuse_req *next; 617 unsigned int len; 618 LIST_HEAD(reqs); 619 620 /* Collect completed requests off the virtqueue */ 621 spin_lock(&fsvq->lock); 622 do { 623 virtqueue_disable_cb(vq); 624 625 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 626 spin_lock(&fpq->lock); 627 list_move_tail(&req->list, &reqs); 628 spin_unlock(&fpq->lock); 629 } 630 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 631 spin_unlock(&fsvq->lock); 632 633 /* End requests */ 634 list_for_each_entry_safe(req, next, &reqs, list) { 635 list_del_init(&req->list); 636 637 /* blocking async request completes in a worker context */ 638 if (req->args->may_block) { 639 struct virtio_fs_req_work *w; 640 641 w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); 642 INIT_WORK(&w->done_work, virtio_fs_complete_req_work); 643 w->fsvq = fsvq; 644 w->req = req; 645 schedule_work(&w->done_work); 646 } else { 647 virtio_fs_request_complete(req, fsvq); 648 } 649 } 650 } 651 652 /* Virtqueue interrupt handler */ 653 static void virtio_fs_vq_done(struct virtqueue *vq) 654 { 655 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 656 657 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 658 659 schedule_work(&fsvq->done_work); 660 } 661 662 static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, 663 int vq_type) 664 { 665 strscpy(fsvq->name, name, VQ_NAME_LEN); 666 spin_lock_init(&fsvq->lock); 667 INIT_LIST_HEAD(&fsvq->queued_reqs); 668 INIT_LIST_HEAD(&fsvq->end_reqs); 669 init_completion(&fsvq->in_flight_zero); 670 671 if (vq_type == VQ_REQUEST) { 672 INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 673 INIT_DELAYED_WORK(&fsvq->dispatch_work, 674 virtio_fs_request_dispatch_work); 675 } else { 676 INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 677 INIT_DELAYED_WORK(&fsvq->dispatch_work, 678 virtio_fs_hiprio_dispatch_work); 679 } 680 } 681 682 /* Initialize virtqueues */ 683 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 684 struct virtio_fs *fs) 685 { 686 struct virtqueue **vqs; 687 vq_callback_t **callbacks; 688 const char **names; 689 unsigned int i; 690 int ret = 0; 691 692 virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, 693 &fs->num_request_queues); 694 if (fs->num_request_queues == 0) 695 return -EINVAL; 696 697 fs->nvqs = VQ_REQUEST + fs->num_request_queues; 698 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 699 if (!fs->vqs) 700 return -ENOMEM; 701 702 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 703 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), 704 GFP_KERNEL); 705 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); 706 if (!vqs || !callbacks || !names) { 707 ret = -ENOMEM; 708 goto out; 709 } 710 711 /* Initialize the hiprio/forget request virtqueue */ 712 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 713 virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); 714 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 715 716 /* Initialize the requests virtqueues */ 717 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 718 char vq_name[VQ_NAME_LEN]; 719 720 snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); 721 virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); 722 callbacks[i] = virtio_fs_vq_done; 723 names[i] = fs->vqs[i].name; 724 } 725 726 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); 727 if (ret < 0) 728 goto out; 729 730 for (i = 0; i < fs->nvqs; i++) 731 fs->vqs[i].vq = vqs[i]; 732 733 virtio_fs_start_all_queues(fs); 734 out: 735 kfree(names); 736 kfree(callbacks); 737 kfree(vqs); 738 if (ret) 739 kfree(fs->vqs); 740 return ret; 741 } 742 743 /* Free virtqueues (device must already be reset) */ 744 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev) 745 { 746 vdev->config->del_vqs(vdev); 747 } 748 749 /* Map a window offset to a page frame number. The window offset will have 750 * been produced by .iomap_begin(), which maps a file offset to a window 751 * offset. 752 */ 753 static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 754 long nr_pages, enum dax_access_mode mode, 755 void **kaddr, pfn_t *pfn) 756 { 757 struct virtio_fs *fs = dax_get_private(dax_dev); 758 phys_addr_t offset = PFN_PHYS(pgoff); 759 size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff; 760 761 if (kaddr) 762 *kaddr = fs->window_kaddr + offset; 763 if (pfn) 764 *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 765 PFN_DEV | PFN_MAP); 766 return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 767 } 768 769 static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 770 pgoff_t pgoff, size_t nr_pages) 771 { 772 long rc; 773 void *kaddr; 774 775 rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr, 776 NULL); 777 if (rc < 0) 778 return dax_mem2blk_err(rc); 779 780 memset(kaddr, 0, nr_pages << PAGE_SHIFT); 781 dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 782 return 0; 783 } 784 785 static const struct dax_operations virtio_fs_dax_ops = { 786 .direct_access = virtio_fs_direct_access, 787 .zero_page_range = virtio_fs_zero_page_range, 788 }; 789 790 static void virtio_fs_cleanup_dax(void *data) 791 { 792 struct dax_device *dax_dev = data; 793 794 kill_dax(dax_dev); 795 put_dax(dax_dev); 796 } 797 798 static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 799 { 800 struct virtio_shm_region cache_reg; 801 struct dev_pagemap *pgmap; 802 bool have_cache; 803 804 if (!IS_ENABLED(CONFIG_FUSE_DAX)) 805 return 0; 806 807 /* Get cache region */ 808 have_cache = virtio_get_shm_region(vdev, &cache_reg, 809 (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 810 if (!have_cache) { 811 dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 812 return 0; 813 } 814 815 if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 816 dev_name(&vdev->dev))) { 817 dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 818 cache_reg.addr, cache_reg.len); 819 return -EBUSY; 820 } 821 822 dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 823 cache_reg.addr); 824 825 pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 826 if (!pgmap) 827 return -ENOMEM; 828 829 pgmap->type = MEMORY_DEVICE_FS_DAX; 830 831 /* Ideally we would directly use the PCI BAR resource but 832 * devm_memremap_pages() wants its own copy in pgmap. So 833 * initialize a struct resource from scratch (only the start 834 * and end fields will be used). 835 */ 836 pgmap->range = (struct range) { 837 .start = (phys_addr_t) cache_reg.addr, 838 .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 839 }; 840 pgmap->nr_range = 1; 841 842 fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 843 if (IS_ERR(fs->window_kaddr)) 844 return PTR_ERR(fs->window_kaddr); 845 846 fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 847 fs->window_len = (phys_addr_t) cache_reg.len; 848 849 dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 850 __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 851 852 fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops); 853 if (IS_ERR(fs->dax_dev)) 854 return PTR_ERR(fs->dax_dev); 855 856 return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 857 fs->dax_dev); 858 } 859 860 static int virtio_fs_probe(struct virtio_device *vdev) 861 { 862 struct virtio_fs *fs; 863 int ret; 864 865 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 866 if (!fs) 867 return -ENOMEM; 868 kref_init(&fs->refcount); 869 vdev->priv = fs; 870 871 ret = virtio_fs_read_tag(vdev, fs); 872 if (ret < 0) 873 goto out; 874 875 ret = virtio_fs_setup_vqs(vdev, fs); 876 if (ret < 0) 877 goto out; 878 879 /* TODO vq affinity */ 880 881 ret = virtio_fs_setup_dax(vdev, fs); 882 if (ret < 0) 883 goto out_vqs; 884 885 /* Bring the device online in case the filesystem is mounted and 886 * requests need to be sent before we return. 887 */ 888 virtio_device_ready(vdev); 889 890 ret = virtio_fs_add_instance(fs); 891 if (ret < 0) 892 goto out_vqs; 893 894 return 0; 895 896 out_vqs: 897 virtio_reset_device(vdev); 898 virtio_fs_cleanup_vqs(vdev); 899 kfree(fs->vqs); 900 901 out: 902 vdev->priv = NULL; 903 kfree(fs); 904 return ret; 905 } 906 907 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 908 { 909 struct virtio_fs_vq *fsvq; 910 int i; 911 912 for (i = 0; i < fs->nvqs; i++) { 913 fsvq = &fs->vqs[i]; 914 spin_lock(&fsvq->lock); 915 fsvq->connected = false; 916 spin_unlock(&fsvq->lock); 917 } 918 } 919 920 static void virtio_fs_remove(struct virtio_device *vdev) 921 { 922 struct virtio_fs *fs = vdev->priv; 923 924 mutex_lock(&virtio_fs_mutex); 925 /* This device is going away. No one should get new reference */ 926 list_del_init(&fs->list); 927 virtio_fs_stop_all_queues(fs); 928 virtio_fs_drain_all_queues_locked(fs); 929 virtio_reset_device(vdev); 930 virtio_fs_cleanup_vqs(vdev); 931 932 vdev->priv = NULL; 933 /* Put device reference on virtio_fs object */ 934 virtio_fs_put(fs); 935 mutex_unlock(&virtio_fs_mutex); 936 } 937 938 #ifdef CONFIG_PM_SLEEP 939 static int virtio_fs_freeze(struct virtio_device *vdev) 940 { 941 /* TODO need to save state here */ 942 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 943 return -EOPNOTSUPP; 944 } 945 946 static int virtio_fs_restore(struct virtio_device *vdev) 947 { 948 /* TODO need to restore state here */ 949 return 0; 950 } 951 #endif /* CONFIG_PM_SLEEP */ 952 953 static const struct virtio_device_id id_table[] = { 954 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 955 {}, 956 }; 957 958 static const unsigned int feature_table[] = {}; 959 960 static struct virtio_driver virtio_fs_driver = { 961 .driver.name = KBUILD_MODNAME, 962 .driver.owner = THIS_MODULE, 963 .id_table = id_table, 964 .feature_table = feature_table, 965 .feature_table_size = ARRAY_SIZE(feature_table), 966 .probe = virtio_fs_probe, 967 .remove = virtio_fs_remove, 968 #ifdef CONFIG_PM_SLEEP 969 .freeze = virtio_fs_freeze, 970 .restore = virtio_fs_restore, 971 #endif 972 }; 973 974 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 975 __releases(fiq->lock) 976 { 977 struct fuse_forget_link *link; 978 struct virtio_fs_forget *forget; 979 struct virtio_fs_forget_req *req; 980 struct virtio_fs *fs; 981 struct virtio_fs_vq *fsvq; 982 u64 unique; 983 984 link = fuse_dequeue_forget(fiq, 1, NULL); 985 unique = fuse_get_unique(fiq); 986 987 fs = fiq->priv; 988 fsvq = &fs->vqs[VQ_HIPRIO]; 989 spin_unlock(&fiq->lock); 990 991 /* Allocate a buffer for the request */ 992 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 993 req = &forget->req; 994 995 req->ih = (struct fuse_in_header){ 996 .opcode = FUSE_FORGET, 997 .nodeid = link->forget_one.nodeid, 998 .unique = unique, 999 .len = sizeof(*req), 1000 }; 1001 req->arg = (struct fuse_forget_in){ 1002 .nlookup = link->forget_one.nlookup, 1003 }; 1004 1005 send_forget_request(fsvq, forget, false); 1006 kfree(link); 1007 } 1008 1009 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 1010 __releases(fiq->lock) 1011 { 1012 /* 1013 * TODO interrupts. 1014 * 1015 * Normal fs operations on a local filesystems aren't interruptible. 1016 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 1017 * with shared lock between host and guest. 1018 */ 1019 spin_unlock(&fiq->lock); 1020 } 1021 1022 /* Count number of scatter-gather elements required */ 1023 static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs, 1024 unsigned int num_pages, 1025 unsigned int total_len) 1026 { 1027 unsigned int i; 1028 unsigned int this_len; 1029 1030 for (i = 0; i < num_pages && total_len; i++) { 1031 this_len = min(page_descs[i].length, total_len); 1032 total_len -= this_len; 1033 } 1034 1035 return i; 1036 } 1037 1038 /* Return the number of scatter-gather list elements required */ 1039 static unsigned int sg_count_fuse_req(struct fuse_req *req) 1040 { 1041 struct fuse_args *args = req->args; 1042 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 1043 unsigned int size, total_sgs = 1 /* fuse_in_header */; 1044 1045 if (args->in_numargs - args->in_pages) 1046 total_sgs += 1; 1047 1048 if (args->in_pages) { 1049 size = args->in_args[args->in_numargs - 1].size; 1050 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1051 size); 1052 } 1053 1054 if (!test_bit(FR_ISREPLY, &req->flags)) 1055 return total_sgs; 1056 1057 total_sgs += 1 /* fuse_out_header */; 1058 1059 if (args->out_numargs - args->out_pages) 1060 total_sgs += 1; 1061 1062 if (args->out_pages) { 1063 size = args->out_args[args->out_numargs - 1].size; 1064 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1065 size); 1066 } 1067 1068 return total_sgs; 1069 } 1070 1071 /* Add pages to scatter-gather list and return number of elements used */ 1072 static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 1073 struct page **pages, 1074 struct fuse_page_desc *page_descs, 1075 unsigned int num_pages, 1076 unsigned int total_len) 1077 { 1078 unsigned int i; 1079 unsigned int this_len; 1080 1081 for (i = 0; i < num_pages && total_len; i++) { 1082 sg_init_table(&sg[i], 1); 1083 this_len = min(page_descs[i].length, total_len); 1084 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 1085 total_len -= this_len; 1086 } 1087 1088 return i; 1089 } 1090 1091 /* Add args to scatter-gather list and return number of elements used */ 1092 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 1093 struct fuse_req *req, 1094 struct fuse_arg *args, 1095 unsigned int numargs, 1096 bool argpages, 1097 void *argbuf, 1098 unsigned int *len_used) 1099 { 1100 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 1101 unsigned int total_sgs = 0; 1102 unsigned int len; 1103 1104 len = fuse_len_args(numargs - argpages, args); 1105 if (len) 1106 sg_init_one(&sg[total_sgs++], argbuf, len); 1107 1108 if (argpages) 1109 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 1110 ap->pages, ap->descs, 1111 ap->num_pages, 1112 args[numargs - 1].size); 1113 1114 if (len_used) 1115 *len_used = len; 1116 1117 return total_sgs; 1118 } 1119 1120 /* Add a request to a virtqueue and kick the device */ 1121 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 1122 struct fuse_req *req, bool in_flight) 1123 { 1124 /* requests need at least 4 elements */ 1125 struct scatterlist *stack_sgs[6]; 1126 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 1127 struct scatterlist **sgs = stack_sgs; 1128 struct scatterlist *sg = stack_sg; 1129 struct virtqueue *vq; 1130 struct fuse_args *args = req->args; 1131 unsigned int argbuf_used = 0; 1132 unsigned int out_sgs = 0; 1133 unsigned int in_sgs = 0; 1134 unsigned int total_sgs; 1135 unsigned int i; 1136 int ret; 1137 bool notify; 1138 struct fuse_pqueue *fpq; 1139 1140 /* Does the sglist fit on the stack? */ 1141 total_sgs = sg_count_fuse_req(req); 1142 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 1143 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 1144 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 1145 if (!sgs || !sg) { 1146 ret = -ENOMEM; 1147 goto out; 1148 } 1149 } 1150 1151 /* Use a bounce buffer since stack args cannot be mapped */ 1152 ret = copy_args_to_argbuf(req); 1153 if (ret < 0) 1154 goto out; 1155 1156 /* Request elements */ 1157 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 1158 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 1159 (struct fuse_arg *)args->in_args, 1160 args->in_numargs, args->in_pages, 1161 req->argbuf, &argbuf_used); 1162 1163 /* Reply elements */ 1164 if (test_bit(FR_ISREPLY, &req->flags)) { 1165 sg_init_one(&sg[out_sgs + in_sgs++], 1166 &req->out.h, sizeof(req->out.h)); 1167 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 1168 args->out_args, args->out_numargs, 1169 args->out_pages, 1170 req->argbuf + argbuf_used, NULL); 1171 } 1172 1173 WARN_ON(out_sgs + in_sgs != total_sgs); 1174 1175 for (i = 0; i < total_sgs; i++) 1176 sgs[i] = &sg[i]; 1177 1178 spin_lock(&fsvq->lock); 1179 1180 if (!fsvq->connected) { 1181 spin_unlock(&fsvq->lock); 1182 ret = -ENOTCONN; 1183 goto out; 1184 } 1185 1186 vq = fsvq->vq; 1187 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 1188 if (ret < 0) { 1189 spin_unlock(&fsvq->lock); 1190 goto out; 1191 } 1192 1193 /* Request successfully sent. */ 1194 fpq = &fsvq->fud->pq; 1195 spin_lock(&fpq->lock); 1196 list_add_tail(&req->list, fpq->processing); 1197 spin_unlock(&fpq->lock); 1198 set_bit(FR_SENT, &req->flags); 1199 /* matches barrier in request_wait_answer() */ 1200 smp_mb__after_atomic(); 1201 1202 if (!in_flight) 1203 inc_in_flight_req(fsvq); 1204 notify = virtqueue_kick_prepare(vq); 1205 1206 spin_unlock(&fsvq->lock); 1207 1208 if (notify) 1209 virtqueue_notify(vq); 1210 1211 out: 1212 if (ret < 0 && req->argbuf) { 1213 kfree(req->argbuf); 1214 req->argbuf = NULL; 1215 } 1216 if (sgs != stack_sgs) { 1217 kfree(sgs); 1218 kfree(sg); 1219 } 1220 1221 return ret; 1222 } 1223 1224 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 1225 __releases(fiq->lock) 1226 { 1227 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ 1228 struct virtio_fs *fs; 1229 struct fuse_req *req; 1230 struct virtio_fs_vq *fsvq; 1231 int ret; 1232 1233 WARN_ON(list_empty(&fiq->pending)); 1234 req = list_last_entry(&fiq->pending, struct fuse_req, list); 1235 clear_bit(FR_PENDING, &req->flags); 1236 list_del_init(&req->list); 1237 WARN_ON(!list_empty(&fiq->pending)); 1238 spin_unlock(&fiq->lock); 1239 1240 fs = fiq->priv; 1241 1242 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", 1243 __func__, req->in.h.opcode, req->in.h.unique, 1244 req->in.h.nodeid, req->in.h.len, 1245 fuse_len_args(req->args->out_numargs, req->args->out_args)); 1246 1247 fsvq = &fs->vqs[queue_id]; 1248 ret = virtio_fs_enqueue_req(fsvq, req, false); 1249 if (ret < 0) { 1250 if (ret == -ENOMEM || ret == -ENOSPC) { 1251 /* 1252 * Virtqueue full. Retry submission from worker 1253 * context as we might be holding fc->bg_lock. 1254 */ 1255 spin_lock(&fsvq->lock); 1256 list_add_tail(&req->list, &fsvq->queued_reqs); 1257 inc_in_flight_req(fsvq); 1258 schedule_delayed_work(&fsvq->dispatch_work, 1259 msecs_to_jiffies(1)); 1260 spin_unlock(&fsvq->lock); 1261 return; 1262 } 1263 req->out.h.error = ret; 1264 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1265 1266 /* Can't end request in submission context. Use a worker */ 1267 spin_lock(&fsvq->lock); 1268 list_add_tail(&req->list, &fsvq->end_reqs); 1269 schedule_delayed_work(&fsvq->dispatch_work, 0); 1270 spin_unlock(&fsvq->lock); 1271 return; 1272 } 1273 } 1274 1275 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1276 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 1277 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 1278 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 1279 .release = virtio_fs_fiq_release, 1280 }; 1281 1282 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) 1283 { 1284 ctx->rootmode = S_IFDIR; 1285 ctx->default_permissions = 1; 1286 ctx->allow_other = 1; 1287 ctx->max_read = UINT_MAX; 1288 ctx->blksize = 512; 1289 ctx->destroy = true; 1290 ctx->no_control = true; 1291 ctx->no_force_umount = true; 1292 } 1293 1294 static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) 1295 { 1296 struct fuse_mount *fm = get_fuse_mount_super(sb); 1297 struct fuse_conn *fc = fm->fc; 1298 struct virtio_fs *fs = fc->iq.priv; 1299 struct fuse_fs_context *ctx = fsc->fs_private; 1300 unsigned int i; 1301 int err; 1302 1303 virtio_fs_ctx_set_defaults(ctx); 1304 mutex_lock(&virtio_fs_mutex); 1305 1306 /* After holding mutex, make sure virtiofs device is still there. 1307 * Though we are holding a reference to it, drive ->remove might 1308 * still have cleaned up virtual queues. In that case bail out. 1309 */ 1310 err = -EINVAL; 1311 if (list_empty(&fs->list)) { 1312 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1313 goto err; 1314 } 1315 1316 err = -ENOMEM; 1317 /* Allocate fuse_dev for hiprio and notification queues */ 1318 for (i = 0; i < fs->nvqs; i++) { 1319 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1320 1321 fsvq->fud = fuse_dev_alloc(); 1322 if (!fsvq->fud) 1323 goto err_free_fuse_devs; 1324 } 1325 1326 /* virtiofs allocates and installs its own fuse devices */ 1327 ctx->fudptr = NULL; 1328 if (ctx->dax_mode != FUSE_DAX_NEVER) { 1329 if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) { 1330 err = -EINVAL; 1331 pr_err("virtio-fs: dax can't be enabled as filesystem" 1332 " device does not support it.\n"); 1333 goto err_free_fuse_devs; 1334 } 1335 ctx->dax_dev = fs->dax_dev; 1336 } 1337 err = fuse_fill_super_common(sb, ctx); 1338 if (err < 0) 1339 goto err_free_fuse_devs; 1340 1341 for (i = 0; i < fs->nvqs; i++) { 1342 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1343 1344 fuse_dev_install(fsvq->fud, fc); 1345 } 1346 1347 /* Previous unmount will stop all queues. Start these again */ 1348 virtio_fs_start_all_queues(fs); 1349 fuse_send_init(fm); 1350 mutex_unlock(&virtio_fs_mutex); 1351 return 0; 1352 1353 err_free_fuse_devs: 1354 virtio_fs_free_devs(fs); 1355 err: 1356 mutex_unlock(&virtio_fs_mutex); 1357 return err; 1358 } 1359 1360 static void virtio_fs_conn_destroy(struct fuse_mount *fm) 1361 { 1362 struct fuse_conn *fc = fm->fc; 1363 struct virtio_fs *vfs = fc->iq.priv; 1364 struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; 1365 1366 /* Stop dax worker. Soon evict_inodes() will be called which 1367 * will free all memory ranges belonging to all inodes. 1368 */ 1369 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1370 fuse_dax_cancel_work(fc); 1371 1372 /* Stop forget queue. Soon destroy will be sent */ 1373 spin_lock(&fsvq->lock); 1374 fsvq->connected = false; 1375 spin_unlock(&fsvq->lock); 1376 virtio_fs_drain_all_queues(vfs); 1377 1378 fuse_conn_destroy(fm); 1379 1380 /* fuse_conn_destroy() must have sent destroy. Stop all queues 1381 * and drain one more time and free fuse devices. Freeing fuse 1382 * devices will drop their reference on fuse_conn and that in 1383 * turn will drop its reference on virtio_fs object. 1384 */ 1385 virtio_fs_stop_all_queues(vfs); 1386 virtio_fs_drain_all_queues(vfs); 1387 virtio_fs_free_devs(vfs); 1388 } 1389 1390 static void virtio_kill_sb(struct super_block *sb) 1391 { 1392 struct fuse_mount *fm = get_fuse_mount_super(sb); 1393 bool last; 1394 1395 /* If mount failed, we can still be called without any fc */ 1396 if (sb->s_root) { 1397 last = fuse_mount_remove(fm); 1398 if (last) 1399 virtio_fs_conn_destroy(fm); 1400 } 1401 kill_anon_super(sb); 1402 fuse_mount_destroy(fm); 1403 } 1404 1405 static int virtio_fs_test_super(struct super_block *sb, 1406 struct fs_context *fsc) 1407 { 1408 struct fuse_mount *fsc_fm = fsc->s_fs_info; 1409 struct fuse_mount *sb_fm = get_fuse_mount_super(sb); 1410 1411 return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; 1412 } 1413 1414 static int virtio_fs_get_tree(struct fs_context *fsc) 1415 { 1416 struct virtio_fs *fs; 1417 struct super_block *sb; 1418 struct fuse_conn *fc = NULL; 1419 struct fuse_mount *fm; 1420 unsigned int virtqueue_size; 1421 int err = -EIO; 1422 1423 /* This gets a reference on virtio_fs object. This ptr gets installed 1424 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1425 * to drop the reference to this object. 1426 */ 1427 fs = virtio_fs_find_instance(fsc->source); 1428 if (!fs) { 1429 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1430 return -EINVAL; 1431 } 1432 1433 virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); 1434 if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) 1435 goto out_err; 1436 1437 err = -ENOMEM; 1438 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1439 if (!fc) 1440 goto out_err; 1441 1442 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1443 if (!fm) 1444 goto out_err; 1445 1446 fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); 1447 fc->release = fuse_free_conn; 1448 fc->delete_stale = true; 1449 fc->auto_submounts = true; 1450 fc->sync_fs = true; 1451 1452 /* Tell FUSE to split requests that exceed the virtqueue's size */ 1453 fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, 1454 virtqueue_size - FUSE_HEADER_OVERHEAD); 1455 1456 fsc->s_fs_info = fm; 1457 sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); 1458 if (fsc->s_fs_info) 1459 fuse_mount_destroy(fm); 1460 if (IS_ERR(sb)) 1461 return PTR_ERR(sb); 1462 1463 if (!sb->s_root) { 1464 err = virtio_fs_fill_super(sb, fsc); 1465 if (err) { 1466 deactivate_locked_super(sb); 1467 return err; 1468 } 1469 1470 sb->s_flags |= SB_ACTIVE; 1471 } 1472 1473 WARN_ON(fsc->root); 1474 fsc->root = dget(sb->s_root); 1475 return 0; 1476 1477 out_err: 1478 kfree(fc); 1479 mutex_lock(&virtio_fs_mutex); 1480 virtio_fs_put(fs); 1481 mutex_unlock(&virtio_fs_mutex); 1482 return err; 1483 } 1484 1485 static const struct fs_context_operations virtio_fs_context_ops = { 1486 .free = virtio_fs_free_fsc, 1487 .parse_param = virtio_fs_parse_param, 1488 .get_tree = virtio_fs_get_tree, 1489 }; 1490 1491 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1492 { 1493 struct fuse_fs_context *ctx; 1494 1495 if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT) 1496 return fuse_init_fs_context_submount(fsc); 1497 1498 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1499 if (!ctx) 1500 return -ENOMEM; 1501 fsc->fs_private = ctx; 1502 fsc->ops = &virtio_fs_context_ops; 1503 return 0; 1504 } 1505 1506 static struct file_system_type virtio_fs_type = { 1507 .owner = THIS_MODULE, 1508 .name = "virtiofs", 1509 .init_fs_context = virtio_fs_init_fs_context, 1510 .kill_sb = virtio_kill_sb, 1511 }; 1512 1513 static int __init virtio_fs_init(void) 1514 { 1515 int ret; 1516 1517 ret = register_virtio_driver(&virtio_fs_driver); 1518 if (ret < 0) 1519 return ret; 1520 1521 ret = register_filesystem(&virtio_fs_type); 1522 if (ret < 0) { 1523 unregister_virtio_driver(&virtio_fs_driver); 1524 return ret; 1525 } 1526 1527 return 0; 1528 } 1529 module_init(virtio_fs_init); 1530 1531 static void __exit virtio_fs_exit(void) 1532 { 1533 unregister_filesystem(&virtio_fs_type); 1534 unregister_virtio_driver(&virtio_fs_driver); 1535 } 1536 module_exit(virtio_fs_exit); 1537 1538 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1539 MODULE_DESCRIPTION("Virtio Filesystem"); 1540 MODULE_LICENSE("GPL"); 1541 MODULE_ALIAS_FS(KBUILD_MODNAME); 1542 MODULE_DEVICE_TABLE(virtio, id_table); 1543