1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/dax.h> 9 #include <linux/pci.h> 10 #include <linux/pfn_t.h> 11 #include <linux/memremap.h> 12 #include <linux/module.h> 13 #include <linux/virtio.h> 14 #include <linux/virtio_fs.h> 15 #include <linux/delay.h> 16 #include <linux/fs_context.h> 17 #include <linux/fs_parser.h> 18 #include <linux/highmem.h> 19 #include <linux/cleanup.h> 20 #include <linux/uio.h> 21 #include "fuse_i.h" 22 23 /* Used to help calculate the FUSE connection's max_pages limit for a request's 24 * size. Parts of the struct fuse_req are sliced into scattergather lists in 25 * addition to the pages used, so this can help account for that overhead. 26 */ 27 #define FUSE_HEADER_OVERHEAD 4 28 29 /* List of virtio-fs device instances and a lock for the list. Also provides 30 * mutual exclusion in device removal and mounting path 31 */ 32 static DEFINE_MUTEX(virtio_fs_mutex); 33 static LIST_HEAD(virtio_fs_instances); 34 35 /* The /sys/fs/virtio_fs/ kset */ 36 static struct kset *virtio_fs_kset; 37 38 enum { 39 VQ_HIPRIO, 40 VQ_REQUEST 41 }; 42 43 #define VQ_NAME_LEN 24 44 45 /* Per-virtqueue state */ 46 struct virtio_fs_vq { 47 spinlock_t lock; 48 struct virtqueue *vq; /* protected by ->lock */ 49 struct work_struct done_work; 50 struct list_head queued_reqs; 51 struct list_head end_reqs; /* End these requests */ 52 struct delayed_work dispatch_work; 53 struct fuse_dev *fud; 54 bool connected; 55 long in_flight; 56 struct completion in_flight_zero; /* No inflight requests */ 57 char name[VQ_NAME_LEN]; 58 } ____cacheline_aligned_in_smp; 59 60 /* A virtio-fs device instance */ 61 struct virtio_fs { 62 struct kobject kobj; 63 struct list_head list; /* on virtio_fs_instances */ 64 char *tag; 65 struct virtio_fs_vq *vqs; 66 unsigned int nvqs; /* number of virtqueues */ 67 unsigned int num_request_queues; /* number of request queues */ 68 struct dax_device *dax_dev; 69 70 /* DAX memory window where file contents are mapped */ 71 void *window_kaddr; 72 phys_addr_t window_phys_addr; 73 size_t window_len; 74 }; 75 76 struct virtio_fs_forget_req { 77 struct fuse_in_header ih; 78 struct fuse_forget_in arg; 79 }; 80 81 struct virtio_fs_forget { 82 /* This request can be temporarily queued on virt queue */ 83 struct list_head list; 84 struct virtio_fs_forget_req req; 85 }; 86 87 struct virtio_fs_req_work { 88 struct fuse_req *req; 89 struct virtio_fs_vq *fsvq; 90 struct work_struct done_work; 91 }; 92 93 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 94 struct fuse_req *req, bool in_flight); 95 96 static const struct constant_table dax_param_enums[] = { 97 {"always", FUSE_DAX_ALWAYS }, 98 {"never", FUSE_DAX_NEVER }, 99 {"inode", FUSE_DAX_INODE_USER }, 100 {} 101 }; 102 103 enum { 104 OPT_DAX, 105 OPT_DAX_ENUM, 106 }; 107 108 static const struct fs_parameter_spec virtio_fs_parameters[] = { 109 fsparam_flag("dax", OPT_DAX), 110 fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums), 111 {} 112 }; 113 114 static int virtio_fs_parse_param(struct fs_context *fsc, 115 struct fs_parameter *param) 116 { 117 struct fs_parse_result result; 118 struct fuse_fs_context *ctx = fsc->fs_private; 119 int opt; 120 121 opt = fs_parse(fsc, virtio_fs_parameters, param, &result); 122 if (opt < 0) 123 return opt; 124 125 switch (opt) { 126 case OPT_DAX: 127 ctx->dax_mode = FUSE_DAX_ALWAYS; 128 break; 129 case OPT_DAX_ENUM: 130 ctx->dax_mode = result.uint_32; 131 break; 132 default: 133 return -EINVAL; 134 } 135 136 return 0; 137 } 138 139 static void virtio_fs_free_fsc(struct fs_context *fsc) 140 { 141 struct fuse_fs_context *ctx = fsc->fs_private; 142 143 kfree(ctx); 144 } 145 146 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 147 { 148 struct virtio_fs *fs = vq->vdev->priv; 149 150 return &fs->vqs[vq->index]; 151 } 152 153 /* Should be called with fsvq->lock held. */ 154 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 155 { 156 fsvq->in_flight++; 157 } 158 159 /* Should be called with fsvq->lock held. */ 160 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 161 { 162 WARN_ON(fsvq->in_flight <= 0); 163 fsvq->in_flight--; 164 if (!fsvq->in_flight) 165 complete(&fsvq->in_flight_zero); 166 } 167 168 static ssize_t tag_show(struct kobject *kobj, 169 struct kobj_attribute *attr, char *buf) 170 { 171 struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); 172 173 return sysfs_emit(buf, "%s\n", fs->tag); 174 } 175 176 static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag); 177 178 static struct attribute *virtio_fs_attrs[] = { 179 &virtio_fs_tag_attr.attr, 180 NULL 181 }; 182 ATTRIBUTE_GROUPS(virtio_fs); 183 184 static void virtio_fs_ktype_release(struct kobject *kobj) 185 { 186 struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj); 187 188 kfree(vfs->vqs); 189 kfree(vfs); 190 } 191 192 static const struct kobj_type virtio_fs_ktype = { 193 .release = virtio_fs_ktype_release, 194 .sysfs_ops = &kobj_sysfs_ops, 195 .default_groups = virtio_fs_groups, 196 }; 197 198 /* Make sure virtiofs_mutex is held */ 199 static void virtio_fs_put(struct virtio_fs *fs) 200 { 201 kobject_put(&fs->kobj); 202 } 203 204 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 205 { 206 struct virtio_fs *vfs = fiq->priv; 207 208 mutex_lock(&virtio_fs_mutex); 209 virtio_fs_put(vfs); 210 mutex_unlock(&virtio_fs_mutex); 211 } 212 213 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 214 { 215 WARN_ON(fsvq->in_flight < 0); 216 217 /* Wait for in flight requests to finish.*/ 218 spin_lock(&fsvq->lock); 219 if (fsvq->in_flight) { 220 /* We are holding virtio_fs_mutex. There should not be any 221 * waiters waiting for completion. 222 */ 223 reinit_completion(&fsvq->in_flight_zero); 224 spin_unlock(&fsvq->lock); 225 wait_for_completion(&fsvq->in_flight_zero); 226 } else { 227 spin_unlock(&fsvq->lock); 228 } 229 230 flush_work(&fsvq->done_work); 231 flush_delayed_work(&fsvq->dispatch_work); 232 } 233 234 static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) 235 { 236 struct virtio_fs_vq *fsvq; 237 int i; 238 239 for (i = 0; i < fs->nvqs; i++) { 240 fsvq = &fs->vqs[i]; 241 virtio_fs_drain_queue(fsvq); 242 } 243 } 244 245 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 246 { 247 /* Provides mutual exclusion between ->remove and ->kill_sb 248 * paths. We don't want both of these draining queue at the 249 * same time. Current completion logic reinits completion 250 * and that means there should not be any other thread 251 * doing reinit or waiting for completion already. 252 */ 253 mutex_lock(&virtio_fs_mutex); 254 virtio_fs_drain_all_queues_locked(fs); 255 mutex_unlock(&virtio_fs_mutex); 256 } 257 258 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 259 { 260 struct virtio_fs_vq *fsvq; 261 int i; 262 263 for (i = 0; i < fs->nvqs; i++) { 264 fsvq = &fs->vqs[i]; 265 spin_lock(&fsvq->lock); 266 fsvq->connected = true; 267 spin_unlock(&fsvq->lock); 268 } 269 } 270 271 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 272 static int virtio_fs_add_instance(struct virtio_device *vdev, 273 struct virtio_fs *fs) 274 { 275 struct virtio_fs *fs2; 276 int ret; 277 278 mutex_lock(&virtio_fs_mutex); 279 280 list_for_each_entry(fs2, &virtio_fs_instances, list) { 281 if (strcmp(fs->tag, fs2->tag) == 0) { 282 mutex_unlock(&virtio_fs_mutex); 283 return -EEXIST; 284 } 285 } 286 287 /* Use the virtio_device's index as a unique identifier, there is no 288 * need to allocate our own identifiers because the virtio_fs instance 289 * is only visible to userspace as long as the underlying virtio_device 290 * exists. 291 */ 292 fs->kobj.kset = virtio_fs_kset; 293 ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index); 294 if (ret < 0) { 295 mutex_unlock(&virtio_fs_mutex); 296 return ret; 297 } 298 299 ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device"); 300 if (ret < 0) { 301 kobject_del(&fs->kobj); 302 mutex_unlock(&virtio_fs_mutex); 303 return ret; 304 } 305 306 list_add_tail(&fs->list, &virtio_fs_instances); 307 308 mutex_unlock(&virtio_fs_mutex); 309 310 kobject_uevent(&fs->kobj, KOBJ_ADD); 311 312 return 0; 313 } 314 315 /* Return the virtio_fs with a given tag, or NULL */ 316 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 317 { 318 struct virtio_fs *fs; 319 320 mutex_lock(&virtio_fs_mutex); 321 322 list_for_each_entry(fs, &virtio_fs_instances, list) { 323 if (strcmp(fs->tag, tag) == 0) { 324 kobject_get(&fs->kobj); 325 goto found; 326 } 327 } 328 329 fs = NULL; /* not found */ 330 331 found: 332 mutex_unlock(&virtio_fs_mutex); 333 334 return fs; 335 } 336 337 static void virtio_fs_free_devs(struct virtio_fs *fs) 338 { 339 unsigned int i; 340 341 for (i = 0; i < fs->nvqs; i++) { 342 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 343 344 if (!fsvq->fud) 345 continue; 346 347 fuse_dev_free(fsvq->fud); 348 fsvq->fud = NULL; 349 } 350 } 351 352 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 353 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 354 { 355 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 356 char *end; 357 size_t len; 358 359 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 360 &tag_buf, sizeof(tag_buf)); 361 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 362 if (end == tag_buf) 363 return -EINVAL; /* empty tag */ 364 if (!end) 365 end = &tag_buf[sizeof(tag_buf)]; 366 367 len = end - tag_buf; 368 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 369 if (!fs->tag) 370 return -ENOMEM; 371 memcpy(fs->tag, tag_buf, len); 372 fs->tag[len] = '\0'; 373 374 /* While the VIRTIO specification allows any character, newlines are 375 * awkward on mount(8) command-lines and cause problems in the sysfs 376 * "tag" attr and uevent TAG= properties. Forbid them. 377 */ 378 if (strchr(fs->tag, '\n')) { 379 dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n"); 380 return -EINVAL; 381 } 382 383 return 0; 384 } 385 386 /* Work function for hiprio completion */ 387 static void virtio_fs_hiprio_done_work(struct work_struct *work) 388 { 389 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 390 done_work); 391 struct virtqueue *vq = fsvq->vq; 392 393 /* Free completed FUSE_FORGET requests */ 394 spin_lock(&fsvq->lock); 395 do { 396 unsigned int len; 397 void *req; 398 399 virtqueue_disable_cb(vq); 400 401 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 402 kfree(req); 403 dec_in_flight_req(fsvq); 404 } 405 } while (!virtqueue_enable_cb(vq)); 406 spin_unlock(&fsvq->lock); 407 } 408 409 static void virtio_fs_request_dispatch_work(struct work_struct *work) 410 { 411 struct fuse_req *req; 412 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 413 dispatch_work.work); 414 int ret; 415 416 pr_debug("virtio-fs: worker %s called.\n", __func__); 417 while (1) { 418 spin_lock(&fsvq->lock); 419 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 420 list); 421 if (!req) { 422 spin_unlock(&fsvq->lock); 423 break; 424 } 425 426 list_del_init(&req->list); 427 spin_unlock(&fsvq->lock); 428 fuse_request_end(req); 429 } 430 431 /* Dispatch pending requests */ 432 while (1) { 433 spin_lock(&fsvq->lock); 434 req = list_first_entry_or_null(&fsvq->queued_reqs, 435 struct fuse_req, list); 436 if (!req) { 437 spin_unlock(&fsvq->lock); 438 return; 439 } 440 list_del_init(&req->list); 441 spin_unlock(&fsvq->lock); 442 443 ret = virtio_fs_enqueue_req(fsvq, req, true); 444 if (ret < 0) { 445 if (ret == -ENOMEM || ret == -ENOSPC) { 446 spin_lock(&fsvq->lock); 447 list_add_tail(&req->list, &fsvq->queued_reqs); 448 schedule_delayed_work(&fsvq->dispatch_work, 449 msecs_to_jiffies(1)); 450 spin_unlock(&fsvq->lock); 451 return; 452 } 453 req->out.h.error = ret; 454 spin_lock(&fsvq->lock); 455 dec_in_flight_req(fsvq); 456 spin_unlock(&fsvq->lock); 457 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 458 ret); 459 fuse_request_end(req); 460 } 461 } 462 } 463 464 /* 465 * Returns 1 if queue is full and sender should wait a bit before sending 466 * next request, 0 otherwise. 467 */ 468 static int send_forget_request(struct virtio_fs_vq *fsvq, 469 struct virtio_fs_forget *forget, 470 bool in_flight) 471 { 472 struct scatterlist sg; 473 struct virtqueue *vq; 474 int ret = 0; 475 bool notify; 476 struct virtio_fs_forget_req *req = &forget->req; 477 478 spin_lock(&fsvq->lock); 479 if (!fsvq->connected) { 480 if (in_flight) 481 dec_in_flight_req(fsvq); 482 kfree(forget); 483 goto out; 484 } 485 486 sg_init_one(&sg, req, sizeof(*req)); 487 vq = fsvq->vq; 488 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 489 490 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 491 if (ret < 0) { 492 if (ret == -ENOMEM || ret == -ENOSPC) { 493 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 494 ret); 495 list_add_tail(&forget->list, &fsvq->queued_reqs); 496 schedule_delayed_work(&fsvq->dispatch_work, 497 msecs_to_jiffies(1)); 498 if (!in_flight) 499 inc_in_flight_req(fsvq); 500 /* Queue is full */ 501 ret = 1; 502 } else { 503 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 504 ret); 505 kfree(forget); 506 if (in_flight) 507 dec_in_flight_req(fsvq); 508 } 509 goto out; 510 } 511 512 if (!in_flight) 513 inc_in_flight_req(fsvq); 514 notify = virtqueue_kick_prepare(vq); 515 spin_unlock(&fsvq->lock); 516 517 if (notify) 518 virtqueue_notify(vq); 519 return ret; 520 out: 521 spin_unlock(&fsvq->lock); 522 return ret; 523 } 524 525 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 526 { 527 struct virtio_fs_forget *forget; 528 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 529 dispatch_work.work); 530 pr_debug("virtio-fs: worker %s called.\n", __func__); 531 while (1) { 532 spin_lock(&fsvq->lock); 533 forget = list_first_entry_or_null(&fsvq->queued_reqs, 534 struct virtio_fs_forget, list); 535 if (!forget) { 536 spin_unlock(&fsvq->lock); 537 return; 538 } 539 540 list_del(&forget->list); 541 spin_unlock(&fsvq->lock); 542 if (send_forget_request(fsvq, forget, true)) 543 return; 544 } 545 } 546 547 /* Allocate and copy args into req->argbuf */ 548 static int copy_args_to_argbuf(struct fuse_req *req) 549 { 550 struct fuse_args *args = req->args; 551 unsigned int offset = 0; 552 unsigned int num_in; 553 unsigned int num_out; 554 unsigned int len; 555 unsigned int i; 556 557 num_in = args->in_numargs - args->in_pages; 558 num_out = args->out_numargs - args->out_pages; 559 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 560 fuse_len_args(num_out, args->out_args); 561 562 req->argbuf = kmalloc(len, GFP_ATOMIC); 563 if (!req->argbuf) 564 return -ENOMEM; 565 566 for (i = 0; i < num_in; i++) { 567 memcpy(req->argbuf + offset, 568 args->in_args[i].value, 569 args->in_args[i].size); 570 offset += args->in_args[i].size; 571 } 572 573 return 0; 574 } 575 576 /* Copy args out of and free req->argbuf */ 577 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 578 { 579 unsigned int remaining; 580 unsigned int offset; 581 unsigned int num_in; 582 unsigned int num_out; 583 unsigned int i; 584 585 remaining = req->out.h.len - sizeof(req->out.h); 586 num_in = args->in_numargs - args->in_pages; 587 num_out = args->out_numargs - args->out_pages; 588 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 589 590 for (i = 0; i < num_out; i++) { 591 unsigned int argsize = args->out_args[i].size; 592 593 if (args->out_argvar && 594 i == args->out_numargs - 1 && 595 argsize > remaining) { 596 argsize = remaining; 597 } 598 599 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 600 offset += argsize; 601 602 if (i != args->out_numargs - 1) 603 remaining -= argsize; 604 } 605 606 /* Store the actual size of the variable-length arg */ 607 if (args->out_argvar) 608 args->out_args[args->out_numargs - 1].size = remaining; 609 610 kfree(req->argbuf); 611 req->argbuf = NULL; 612 } 613 614 /* Work function for request completion */ 615 static void virtio_fs_request_complete(struct fuse_req *req, 616 struct virtio_fs_vq *fsvq) 617 { 618 struct fuse_pqueue *fpq = &fsvq->fud->pq; 619 struct fuse_args *args; 620 struct fuse_args_pages *ap; 621 unsigned int len, i, thislen; 622 struct page *page; 623 624 /* 625 * TODO verify that server properly follows FUSE protocol 626 * (oh.uniq, oh.len) 627 */ 628 args = req->args; 629 copy_args_from_argbuf(args, req); 630 631 if (args->out_pages && args->page_zeroing) { 632 len = args->out_args[args->out_numargs - 1].size; 633 ap = container_of(args, typeof(*ap), args); 634 for (i = 0; i < ap->num_pages; i++) { 635 thislen = ap->descs[i].length; 636 if (len < thislen) { 637 WARN_ON(ap->descs[i].offset); 638 page = ap->pages[i]; 639 zero_user_segment(page, len, thislen); 640 len = 0; 641 } else { 642 len -= thislen; 643 } 644 } 645 } 646 647 spin_lock(&fpq->lock); 648 clear_bit(FR_SENT, &req->flags); 649 spin_unlock(&fpq->lock); 650 651 fuse_request_end(req); 652 spin_lock(&fsvq->lock); 653 dec_in_flight_req(fsvq); 654 spin_unlock(&fsvq->lock); 655 } 656 657 static void virtio_fs_complete_req_work(struct work_struct *work) 658 { 659 struct virtio_fs_req_work *w = 660 container_of(work, typeof(*w), done_work); 661 662 virtio_fs_request_complete(w->req, w->fsvq); 663 kfree(w); 664 } 665 666 static void virtio_fs_requests_done_work(struct work_struct *work) 667 { 668 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 669 done_work); 670 struct fuse_pqueue *fpq = &fsvq->fud->pq; 671 struct virtqueue *vq = fsvq->vq; 672 struct fuse_req *req; 673 struct fuse_req *next; 674 unsigned int len; 675 LIST_HEAD(reqs); 676 677 /* Collect completed requests off the virtqueue */ 678 spin_lock(&fsvq->lock); 679 do { 680 virtqueue_disable_cb(vq); 681 682 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 683 spin_lock(&fpq->lock); 684 list_move_tail(&req->list, &reqs); 685 spin_unlock(&fpq->lock); 686 } 687 } while (!virtqueue_enable_cb(vq)); 688 spin_unlock(&fsvq->lock); 689 690 /* End requests */ 691 list_for_each_entry_safe(req, next, &reqs, list) { 692 list_del_init(&req->list); 693 694 /* blocking async request completes in a worker context */ 695 if (req->args->may_block) { 696 struct virtio_fs_req_work *w; 697 698 w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); 699 INIT_WORK(&w->done_work, virtio_fs_complete_req_work); 700 w->fsvq = fsvq; 701 w->req = req; 702 schedule_work(&w->done_work); 703 } else { 704 virtio_fs_request_complete(req, fsvq); 705 } 706 } 707 } 708 709 /* Virtqueue interrupt handler */ 710 static void virtio_fs_vq_done(struct virtqueue *vq) 711 { 712 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 713 714 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 715 716 schedule_work(&fsvq->done_work); 717 } 718 719 static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, 720 int vq_type) 721 { 722 strscpy(fsvq->name, name, VQ_NAME_LEN); 723 spin_lock_init(&fsvq->lock); 724 INIT_LIST_HEAD(&fsvq->queued_reqs); 725 INIT_LIST_HEAD(&fsvq->end_reqs); 726 init_completion(&fsvq->in_flight_zero); 727 728 if (vq_type == VQ_REQUEST) { 729 INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 730 INIT_DELAYED_WORK(&fsvq->dispatch_work, 731 virtio_fs_request_dispatch_work); 732 } else { 733 INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 734 INIT_DELAYED_WORK(&fsvq->dispatch_work, 735 virtio_fs_hiprio_dispatch_work); 736 } 737 } 738 739 /* Initialize virtqueues */ 740 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 741 struct virtio_fs *fs) 742 { 743 struct virtqueue **vqs; 744 vq_callback_t **callbacks; 745 const char **names; 746 unsigned int i; 747 int ret = 0; 748 749 virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, 750 &fs->num_request_queues); 751 if (fs->num_request_queues == 0) 752 return -EINVAL; 753 754 fs->nvqs = VQ_REQUEST + fs->num_request_queues; 755 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 756 if (!fs->vqs) 757 return -ENOMEM; 758 759 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 760 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), 761 GFP_KERNEL); 762 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); 763 if (!vqs || !callbacks || !names) { 764 ret = -ENOMEM; 765 goto out; 766 } 767 768 /* Initialize the hiprio/forget request virtqueue */ 769 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 770 virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); 771 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 772 773 /* Initialize the requests virtqueues */ 774 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 775 char vq_name[VQ_NAME_LEN]; 776 777 snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); 778 virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); 779 callbacks[i] = virtio_fs_vq_done; 780 names[i] = fs->vqs[i].name; 781 } 782 783 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); 784 if (ret < 0) 785 goto out; 786 787 for (i = 0; i < fs->nvqs; i++) 788 fs->vqs[i].vq = vqs[i]; 789 790 virtio_fs_start_all_queues(fs); 791 out: 792 kfree(names); 793 kfree(callbacks); 794 kfree(vqs); 795 if (ret) 796 kfree(fs->vqs); 797 return ret; 798 } 799 800 /* Free virtqueues (device must already be reset) */ 801 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev) 802 { 803 vdev->config->del_vqs(vdev); 804 } 805 806 /* Map a window offset to a page frame number. The window offset will have 807 * been produced by .iomap_begin(), which maps a file offset to a window 808 * offset. 809 */ 810 static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 811 long nr_pages, enum dax_access_mode mode, 812 void **kaddr, pfn_t *pfn) 813 { 814 struct virtio_fs *fs = dax_get_private(dax_dev); 815 phys_addr_t offset = PFN_PHYS(pgoff); 816 size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff; 817 818 if (kaddr) 819 *kaddr = fs->window_kaddr + offset; 820 if (pfn) 821 *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 822 PFN_DEV | PFN_MAP); 823 return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 824 } 825 826 static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 827 pgoff_t pgoff, size_t nr_pages) 828 { 829 long rc; 830 void *kaddr; 831 832 rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr, 833 NULL); 834 if (rc < 0) 835 return dax_mem2blk_err(rc); 836 837 memset(kaddr, 0, nr_pages << PAGE_SHIFT); 838 dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 839 return 0; 840 } 841 842 static const struct dax_operations virtio_fs_dax_ops = { 843 .direct_access = virtio_fs_direct_access, 844 .zero_page_range = virtio_fs_zero_page_range, 845 }; 846 847 static void virtio_fs_cleanup_dax(void *data) 848 { 849 struct dax_device *dax_dev = data; 850 851 kill_dax(dax_dev); 852 put_dax(dax_dev); 853 } 854 855 DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T)) 856 857 static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 858 { 859 struct dax_device *dax_dev __free(cleanup_dax) = NULL; 860 struct virtio_shm_region cache_reg; 861 struct dev_pagemap *pgmap; 862 bool have_cache; 863 864 if (!IS_ENABLED(CONFIG_FUSE_DAX)) 865 return 0; 866 867 dax_dev = alloc_dax(fs, &virtio_fs_dax_ops); 868 if (IS_ERR(dax_dev)) { 869 int rc = PTR_ERR(dax_dev); 870 return rc == -EOPNOTSUPP ? 0 : rc; 871 } 872 873 /* Get cache region */ 874 have_cache = virtio_get_shm_region(vdev, &cache_reg, 875 (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 876 if (!have_cache) { 877 dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 878 return 0; 879 } 880 881 if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 882 dev_name(&vdev->dev))) { 883 dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 884 cache_reg.addr, cache_reg.len); 885 return -EBUSY; 886 } 887 888 dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 889 cache_reg.addr); 890 891 pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 892 if (!pgmap) 893 return -ENOMEM; 894 895 pgmap->type = MEMORY_DEVICE_FS_DAX; 896 897 /* Ideally we would directly use the PCI BAR resource but 898 * devm_memremap_pages() wants its own copy in pgmap. So 899 * initialize a struct resource from scratch (only the start 900 * and end fields will be used). 901 */ 902 pgmap->range = (struct range) { 903 .start = (phys_addr_t) cache_reg.addr, 904 .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 905 }; 906 pgmap->nr_range = 1; 907 908 fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 909 if (IS_ERR(fs->window_kaddr)) 910 return PTR_ERR(fs->window_kaddr); 911 912 fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 913 fs->window_len = (phys_addr_t) cache_reg.len; 914 915 dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 916 __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 917 918 fs->dax_dev = no_free_ptr(dax_dev); 919 return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 920 fs->dax_dev); 921 } 922 923 static int virtio_fs_probe(struct virtio_device *vdev) 924 { 925 struct virtio_fs *fs; 926 int ret; 927 928 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 929 if (!fs) 930 return -ENOMEM; 931 kobject_init(&fs->kobj, &virtio_fs_ktype); 932 vdev->priv = fs; 933 934 ret = virtio_fs_read_tag(vdev, fs); 935 if (ret < 0) 936 goto out; 937 938 ret = virtio_fs_setup_vqs(vdev, fs); 939 if (ret < 0) 940 goto out; 941 942 /* TODO vq affinity */ 943 944 ret = virtio_fs_setup_dax(vdev, fs); 945 if (ret < 0) 946 goto out_vqs; 947 948 /* Bring the device online in case the filesystem is mounted and 949 * requests need to be sent before we return. 950 */ 951 virtio_device_ready(vdev); 952 953 ret = virtio_fs_add_instance(vdev, fs); 954 if (ret < 0) 955 goto out_vqs; 956 957 return 0; 958 959 out_vqs: 960 virtio_reset_device(vdev); 961 virtio_fs_cleanup_vqs(vdev); 962 963 out: 964 vdev->priv = NULL; 965 kobject_put(&fs->kobj); 966 return ret; 967 } 968 969 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 970 { 971 struct virtio_fs_vq *fsvq; 972 int i; 973 974 for (i = 0; i < fs->nvqs; i++) { 975 fsvq = &fs->vqs[i]; 976 spin_lock(&fsvq->lock); 977 fsvq->connected = false; 978 spin_unlock(&fsvq->lock); 979 } 980 } 981 982 static void virtio_fs_remove(struct virtio_device *vdev) 983 { 984 struct virtio_fs *fs = vdev->priv; 985 986 mutex_lock(&virtio_fs_mutex); 987 /* This device is going away. No one should get new reference */ 988 list_del_init(&fs->list); 989 sysfs_remove_link(&fs->kobj, "device"); 990 kobject_del(&fs->kobj); 991 virtio_fs_stop_all_queues(fs); 992 virtio_fs_drain_all_queues_locked(fs); 993 virtio_reset_device(vdev); 994 virtio_fs_cleanup_vqs(vdev); 995 996 vdev->priv = NULL; 997 /* Put device reference on virtio_fs object */ 998 virtio_fs_put(fs); 999 mutex_unlock(&virtio_fs_mutex); 1000 } 1001 1002 #ifdef CONFIG_PM_SLEEP 1003 static int virtio_fs_freeze(struct virtio_device *vdev) 1004 { 1005 /* TODO need to save state here */ 1006 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 1007 return -EOPNOTSUPP; 1008 } 1009 1010 static int virtio_fs_restore(struct virtio_device *vdev) 1011 { 1012 /* TODO need to restore state here */ 1013 return 0; 1014 } 1015 #endif /* CONFIG_PM_SLEEP */ 1016 1017 static const struct virtio_device_id id_table[] = { 1018 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 1019 {}, 1020 }; 1021 1022 static const unsigned int feature_table[] = {}; 1023 1024 static struct virtio_driver virtio_fs_driver = { 1025 .driver.name = KBUILD_MODNAME, 1026 .driver.owner = THIS_MODULE, 1027 .id_table = id_table, 1028 .feature_table = feature_table, 1029 .feature_table_size = ARRAY_SIZE(feature_table), 1030 .probe = virtio_fs_probe, 1031 .remove = virtio_fs_remove, 1032 #ifdef CONFIG_PM_SLEEP 1033 .freeze = virtio_fs_freeze, 1034 .restore = virtio_fs_restore, 1035 #endif 1036 }; 1037 1038 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 1039 __releases(fiq->lock) 1040 { 1041 struct fuse_forget_link *link; 1042 struct virtio_fs_forget *forget; 1043 struct virtio_fs_forget_req *req; 1044 struct virtio_fs *fs; 1045 struct virtio_fs_vq *fsvq; 1046 u64 unique; 1047 1048 link = fuse_dequeue_forget(fiq, 1, NULL); 1049 unique = fuse_get_unique(fiq); 1050 1051 fs = fiq->priv; 1052 fsvq = &fs->vqs[VQ_HIPRIO]; 1053 spin_unlock(&fiq->lock); 1054 1055 /* Allocate a buffer for the request */ 1056 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 1057 req = &forget->req; 1058 1059 req->ih = (struct fuse_in_header){ 1060 .opcode = FUSE_FORGET, 1061 .nodeid = link->forget_one.nodeid, 1062 .unique = unique, 1063 .len = sizeof(*req), 1064 }; 1065 req->arg = (struct fuse_forget_in){ 1066 .nlookup = link->forget_one.nlookup, 1067 }; 1068 1069 send_forget_request(fsvq, forget, false); 1070 kfree(link); 1071 } 1072 1073 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 1074 __releases(fiq->lock) 1075 { 1076 /* 1077 * TODO interrupts. 1078 * 1079 * Normal fs operations on a local filesystems aren't interruptible. 1080 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 1081 * with shared lock between host and guest. 1082 */ 1083 spin_unlock(&fiq->lock); 1084 } 1085 1086 /* Count number of scatter-gather elements required */ 1087 static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs, 1088 unsigned int num_pages, 1089 unsigned int total_len) 1090 { 1091 unsigned int i; 1092 unsigned int this_len; 1093 1094 for (i = 0; i < num_pages && total_len; i++) { 1095 this_len = min(page_descs[i].length, total_len); 1096 total_len -= this_len; 1097 } 1098 1099 return i; 1100 } 1101 1102 /* Return the number of scatter-gather list elements required */ 1103 static unsigned int sg_count_fuse_req(struct fuse_req *req) 1104 { 1105 struct fuse_args *args = req->args; 1106 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 1107 unsigned int size, total_sgs = 1 /* fuse_in_header */; 1108 1109 if (args->in_numargs - args->in_pages) 1110 total_sgs += 1; 1111 1112 if (args->in_pages) { 1113 size = args->in_args[args->in_numargs - 1].size; 1114 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1115 size); 1116 } 1117 1118 if (!test_bit(FR_ISREPLY, &req->flags)) 1119 return total_sgs; 1120 1121 total_sgs += 1 /* fuse_out_header */; 1122 1123 if (args->out_numargs - args->out_pages) 1124 total_sgs += 1; 1125 1126 if (args->out_pages) { 1127 size = args->out_args[args->out_numargs - 1].size; 1128 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1129 size); 1130 } 1131 1132 return total_sgs; 1133 } 1134 1135 /* Add pages to scatter-gather list and return number of elements used */ 1136 static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 1137 struct page **pages, 1138 struct fuse_page_desc *page_descs, 1139 unsigned int num_pages, 1140 unsigned int total_len) 1141 { 1142 unsigned int i; 1143 unsigned int this_len; 1144 1145 for (i = 0; i < num_pages && total_len; i++) { 1146 sg_init_table(&sg[i], 1); 1147 this_len = min(page_descs[i].length, total_len); 1148 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 1149 total_len -= this_len; 1150 } 1151 1152 return i; 1153 } 1154 1155 /* Add args to scatter-gather list and return number of elements used */ 1156 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 1157 struct fuse_req *req, 1158 struct fuse_arg *args, 1159 unsigned int numargs, 1160 bool argpages, 1161 void *argbuf, 1162 unsigned int *len_used) 1163 { 1164 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 1165 unsigned int total_sgs = 0; 1166 unsigned int len; 1167 1168 len = fuse_len_args(numargs - argpages, args); 1169 if (len) 1170 sg_init_one(&sg[total_sgs++], argbuf, len); 1171 1172 if (argpages) 1173 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 1174 ap->pages, ap->descs, 1175 ap->num_pages, 1176 args[numargs - 1].size); 1177 1178 if (len_used) 1179 *len_used = len; 1180 1181 return total_sgs; 1182 } 1183 1184 /* Add a request to a virtqueue and kick the device */ 1185 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 1186 struct fuse_req *req, bool in_flight) 1187 { 1188 /* requests need at least 4 elements */ 1189 struct scatterlist *stack_sgs[6]; 1190 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 1191 struct scatterlist **sgs = stack_sgs; 1192 struct scatterlist *sg = stack_sg; 1193 struct virtqueue *vq; 1194 struct fuse_args *args = req->args; 1195 unsigned int argbuf_used = 0; 1196 unsigned int out_sgs = 0; 1197 unsigned int in_sgs = 0; 1198 unsigned int total_sgs; 1199 unsigned int i; 1200 int ret; 1201 bool notify; 1202 struct fuse_pqueue *fpq; 1203 1204 /* Does the sglist fit on the stack? */ 1205 total_sgs = sg_count_fuse_req(req); 1206 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 1207 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 1208 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 1209 if (!sgs || !sg) { 1210 ret = -ENOMEM; 1211 goto out; 1212 } 1213 } 1214 1215 /* Use a bounce buffer since stack args cannot be mapped */ 1216 ret = copy_args_to_argbuf(req); 1217 if (ret < 0) 1218 goto out; 1219 1220 /* Request elements */ 1221 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 1222 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 1223 (struct fuse_arg *)args->in_args, 1224 args->in_numargs, args->in_pages, 1225 req->argbuf, &argbuf_used); 1226 1227 /* Reply elements */ 1228 if (test_bit(FR_ISREPLY, &req->flags)) { 1229 sg_init_one(&sg[out_sgs + in_sgs++], 1230 &req->out.h, sizeof(req->out.h)); 1231 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 1232 args->out_args, args->out_numargs, 1233 args->out_pages, 1234 req->argbuf + argbuf_used, NULL); 1235 } 1236 1237 WARN_ON(out_sgs + in_sgs != total_sgs); 1238 1239 for (i = 0; i < total_sgs; i++) 1240 sgs[i] = &sg[i]; 1241 1242 spin_lock(&fsvq->lock); 1243 1244 if (!fsvq->connected) { 1245 spin_unlock(&fsvq->lock); 1246 ret = -ENOTCONN; 1247 goto out; 1248 } 1249 1250 vq = fsvq->vq; 1251 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 1252 if (ret < 0) { 1253 spin_unlock(&fsvq->lock); 1254 goto out; 1255 } 1256 1257 /* Request successfully sent. */ 1258 fpq = &fsvq->fud->pq; 1259 spin_lock(&fpq->lock); 1260 list_add_tail(&req->list, fpq->processing); 1261 spin_unlock(&fpq->lock); 1262 set_bit(FR_SENT, &req->flags); 1263 /* matches barrier in request_wait_answer() */ 1264 smp_mb__after_atomic(); 1265 1266 if (!in_flight) 1267 inc_in_flight_req(fsvq); 1268 notify = virtqueue_kick_prepare(vq); 1269 1270 spin_unlock(&fsvq->lock); 1271 1272 if (notify) 1273 virtqueue_notify(vq); 1274 1275 out: 1276 if (ret < 0 && req->argbuf) { 1277 kfree(req->argbuf); 1278 req->argbuf = NULL; 1279 } 1280 if (sgs != stack_sgs) { 1281 kfree(sgs); 1282 kfree(sg); 1283 } 1284 1285 return ret; 1286 } 1287 1288 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 1289 __releases(fiq->lock) 1290 { 1291 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ 1292 struct virtio_fs *fs; 1293 struct fuse_req *req; 1294 struct virtio_fs_vq *fsvq; 1295 int ret; 1296 1297 WARN_ON(list_empty(&fiq->pending)); 1298 req = list_last_entry(&fiq->pending, struct fuse_req, list); 1299 clear_bit(FR_PENDING, &req->flags); 1300 list_del_init(&req->list); 1301 WARN_ON(!list_empty(&fiq->pending)); 1302 spin_unlock(&fiq->lock); 1303 1304 fs = fiq->priv; 1305 1306 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", 1307 __func__, req->in.h.opcode, req->in.h.unique, 1308 req->in.h.nodeid, req->in.h.len, 1309 fuse_len_args(req->args->out_numargs, req->args->out_args)); 1310 1311 fsvq = &fs->vqs[queue_id]; 1312 ret = virtio_fs_enqueue_req(fsvq, req, false); 1313 if (ret < 0) { 1314 if (ret == -ENOMEM || ret == -ENOSPC) { 1315 /* 1316 * Virtqueue full. Retry submission from worker 1317 * context as we might be holding fc->bg_lock. 1318 */ 1319 spin_lock(&fsvq->lock); 1320 list_add_tail(&req->list, &fsvq->queued_reqs); 1321 inc_in_flight_req(fsvq); 1322 schedule_delayed_work(&fsvq->dispatch_work, 1323 msecs_to_jiffies(1)); 1324 spin_unlock(&fsvq->lock); 1325 return; 1326 } 1327 req->out.h.error = ret; 1328 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1329 1330 /* Can't end request in submission context. Use a worker */ 1331 spin_lock(&fsvq->lock); 1332 list_add_tail(&req->list, &fsvq->end_reqs); 1333 schedule_delayed_work(&fsvq->dispatch_work, 0); 1334 spin_unlock(&fsvq->lock); 1335 return; 1336 } 1337 } 1338 1339 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1340 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 1341 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 1342 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 1343 .release = virtio_fs_fiq_release, 1344 }; 1345 1346 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) 1347 { 1348 ctx->rootmode = S_IFDIR; 1349 ctx->default_permissions = 1; 1350 ctx->allow_other = 1; 1351 ctx->max_read = UINT_MAX; 1352 ctx->blksize = 512; 1353 ctx->destroy = true; 1354 ctx->no_control = true; 1355 ctx->no_force_umount = true; 1356 } 1357 1358 static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) 1359 { 1360 struct fuse_mount *fm = get_fuse_mount_super(sb); 1361 struct fuse_conn *fc = fm->fc; 1362 struct virtio_fs *fs = fc->iq.priv; 1363 struct fuse_fs_context *ctx = fsc->fs_private; 1364 unsigned int i; 1365 int err; 1366 1367 virtio_fs_ctx_set_defaults(ctx); 1368 mutex_lock(&virtio_fs_mutex); 1369 1370 /* After holding mutex, make sure virtiofs device is still there. 1371 * Though we are holding a reference to it, drive ->remove might 1372 * still have cleaned up virtual queues. In that case bail out. 1373 */ 1374 err = -EINVAL; 1375 if (list_empty(&fs->list)) { 1376 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1377 goto err; 1378 } 1379 1380 err = -ENOMEM; 1381 /* Allocate fuse_dev for hiprio and notification queues */ 1382 for (i = 0; i < fs->nvqs; i++) { 1383 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1384 1385 fsvq->fud = fuse_dev_alloc(); 1386 if (!fsvq->fud) 1387 goto err_free_fuse_devs; 1388 } 1389 1390 /* virtiofs allocates and installs its own fuse devices */ 1391 ctx->fudptr = NULL; 1392 if (ctx->dax_mode != FUSE_DAX_NEVER) { 1393 if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) { 1394 err = -EINVAL; 1395 pr_err("virtio-fs: dax can't be enabled as filesystem" 1396 " device does not support it.\n"); 1397 goto err_free_fuse_devs; 1398 } 1399 ctx->dax_dev = fs->dax_dev; 1400 } 1401 err = fuse_fill_super_common(sb, ctx); 1402 if (err < 0) 1403 goto err_free_fuse_devs; 1404 1405 for (i = 0; i < fs->nvqs; i++) { 1406 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1407 1408 fuse_dev_install(fsvq->fud, fc); 1409 } 1410 1411 /* Previous unmount will stop all queues. Start these again */ 1412 virtio_fs_start_all_queues(fs); 1413 fuse_send_init(fm); 1414 mutex_unlock(&virtio_fs_mutex); 1415 return 0; 1416 1417 err_free_fuse_devs: 1418 virtio_fs_free_devs(fs); 1419 err: 1420 mutex_unlock(&virtio_fs_mutex); 1421 return err; 1422 } 1423 1424 static void virtio_fs_conn_destroy(struct fuse_mount *fm) 1425 { 1426 struct fuse_conn *fc = fm->fc; 1427 struct virtio_fs *vfs = fc->iq.priv; 1428 struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; 1429 1430 /* Stop dax worker. Soon evict_inodes() will be called which 1431 * will free all memory ranges belonging to all inodes. 1432 */ 1433 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1434 fuse_dax_cancel_work(fc); 1435 1436 /* Stop forget queue. Soon destroy will be sent */ 1437 spin_lock(&fsvq->lock); 1438 fsvq->connected = false; 1439 spin_unlock(&fsvq->lock); 1440 virtio_fs_drain_all_queues(vfs); 1441 1442 fuse_conn_destroy(fm); 1443 1444 /* fuse_conn_destroy() must have sent destroy. Stop all queues 1445 * and drain one more time and free fuse devices. Freeing fuse 1446 * devices will drop their reference on fuse_conn and that in 1447 * turn will drop its reference on virtio_fs object. 1448 */ 1449 virtio_fs_stop_all_queues(vfs); 1450 virtio_fs_drain_all_queues(vfs); 1451 virtio_fs_free_devs(vfs); 1452 } 1453 1454 static void virtio_kill_sb(struct super_block *sb) 1455 { 1456 struct fuse_mount *fm = get_fuse_mount_super(sb); 1457 bool last; 1458 1459 /* If mount failed, we can still be called without any fc */ 1460 if (sb->s_root) { 1461 last = fuse_mount_remove(fm); 1462 if (last) 1463 virtio_fs_conn_destroy(fm); 1464 } 1465 kill_anon_super(sb); 1466 fuse_mount_destroy(fm); 1467 } 1468 1469 static int virtio_fs_test_super(struct super_block *sb, 1470 struct fs_context *fsc) 1471 { 1472 struct fuse_mount *fsc_fm = fsc->s_fs_info; 1473 struct fuse_mount *sb_fm = get_fuse_mount_super(sb); 1474 1475 return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; 1476 } 1477 1478 static int virtio_fs_get_tree(struct fs_context *fsc) 1479 { 1480 struct virtio_fs *fs; 1481 struct super_block *sb; 1482 struct fuse_conn *fc = NULL; 1483 struct fuse_mount *fm; 1484 unsigned int virtqueue_size; 1485 int err = -EIO; 1486 1487 /* This gets a reference on virtio_fs object. This ptr gets installed 1488 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1489 * to drop the reference to this object. 1490 */ 1491 fs = virtio_fs_find_instance(fsc->source); 1492 if (!fs) { 1493 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1494 return -EINVAL; 1495 } 1496 1497 virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); 1498 if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) 1499 goto out_err; 1500 1501 err = -ENOMEM; 1502 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1503 if (!fc) 1504 goto out_err; 1505 1506 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1507 if (!fm) 1508 goto out_err; 1509 1510 fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); 1511 fc->release = fuse_free_conn; 1512 fc->delete_stale = true; 1513 fc->auto_submounts = true; 1514 fc->sync_fs = true; 1515 1516 /* Tell FUSE to split requests that exceed the virtqueue's size */ 1517 fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, 1518 virtqueue_size - FUSE_HEADER_OVERHEAD); 1519 1520 fsc->s_fs_info = fm; 1521 sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); 1522 if (fsc->s_fs_info) 1523 fuse_mount_destroy(fm); 1524 if (IS_ERR(sb)) 1525 return PTR_ERR(sb); 1526 1527 if (!sb->s_root) { 1528 err = virtio_fs_fill_super(sb, fsc); 1529 if (err) { 1530 deactivate_locked_super(sb); 1531 return err; 1532 } 1533 1534 sb->s_flags |= SB_ACTIVE; 1535 } 1536 1537 WARN_ON(fsc->root); 1538 fsc->root = dget(sb->s_root); 1539 return 0; 1540 1541 out_err: 1542 kfree(fc); 1543 mutex_lock(&virtio_fs_mutex); 1544 virtio_fs_put(fs); 1545 mutex_unlock(&virtio_fs_mutex); 1546 return err; 1547 } 1548 1549 static const struct fs_context_operations virtio_fs_context_ops = { 1550 .free = virtio_fs_free_fsc, 1551 .parse_param = virtio_fs_parse_param, 1552 .get_tree = virtio_fs_get_tree, 1553 }; 1554 1555 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1556 { 1557 struct fuse_fs_context *ctx; 1558 1559 if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT) 1560 return fuse_init_fs_context_submount(fsc); 1561 1562 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1563 if (!ctx) 1564 return -ENOMEM; 1565 fsc->fs_private = ctx; 1566 fsc->ops = &virtio_fs_context_ops; 1567 return 0; 1568 } 1569 1570 static struct file_system_type virtio_fs_type = { 1571 .owner = THIS_MODULE, 1572 .name = "virtiofs", 1573 .init_fs_context = virtio_fs_init_fs_context, 1574 .kill_sb = virtio_kill_sb, 1575 }; 1576 1577 static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) 1578 { 1579 const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); 1580 1581 add_uevent_var(env, "TAG=%s", fs->tag); 1582 return 0; 1583 } 1584 1585 static const struct kset_uevent_ops virtio_fs_uevent_ops = { 1586 .uevent = virtio_fs_uevent, 1587 }; 1588 1589 static int __init virtio_fs_sysfs_init(void) 1590 { 1591 virtio_fs_kset = kset_create_and_add("virtiofs", &virtio_fs_uevent_ops, 1592 fs_kobj); 1593 if (!virtio_fs_kset) 1594 return -ENOMEM; 1595 return 0; 1596 } 1597 1598 static void virtio_fs_sysfs_exit(void) 1599 { 1600 kset_unregister(virtio_fs_kset); 1601 virtio_fs_kset = NULL; 1602 } 1603 1604 static int __init virtio_fs_init(void) 1605 { 1606 int ret; 1607 1608 ret = virtio_fs_sysfs_init(); 1609 if (ret < 0) 1610 return ret; 1611 1612 ret = register_virtio_driver(&virtio_fs_driver); 1613 if (ret < 0) 1614 goto sysfs_exit; 1615 1616 ret = register_filesystem(&virtio_fs_type); 1617 if (ret < 0) 1618 goto unregister_virtio_driver; 1619 1620 return 0; 1621 1622 unregister_virtio_driver: 1623 unregister_virtio_driver(&virtio_fs_driver); 1624 sysfs_exit: 1625 virtio_fs_sysfs_exit(); 1626 return ret; 1627 } 1628 module_init(virtio_fs_init); 1629 1630 static void __exit virtio_fs_exit(void) 1631 { 1632 unregister_filesystem(&virtio_fs_type); 1633 unregister_virtio_driver(&virtio_fs_driver); 1634 virtio_fs_sysfs_exit(); 1635 } 1636 module_exit(virtio_fs_exit); 1637 1638 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1639 MODULE_DESCRIPTION("Virtio Filesystem"); 1640 MODULE_LICENSE("GPL"); 1641 MODULE_ALIAS_FS(KBUILD_MODNAME); 1642 MODULE_DEVICE_TABLE(virtio, id_table); 1643