1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/dax.h> 9 #include <linux/pci.h> 10 #include <linux/interrupt.h> 11 #include <linux/group_cpus.h> 12 #include <linux/memremap.h> 13 #include <linux/module.h> 14 #include <linux/virtio.h> 15 #include <linux/virtio_fs.h> 16 #include <linux/delay.h> 17 #include <linux/fs_context.h> 18 #include <linux/fs_parser.h> 19 #include <linux/highmem.h> 20 #include <linux/cleanup.h> 21 #include <linux/uio.h> 22 #include "dev.h" 23 #include "fuse_i.h" 24 #include "fuse_dev_i.h" 25 26 /* Used to help calculate the FUSE connection's max_pages limit for a request's 27 * size. Parts of the struct fuse_req are sliced into scattergather lists in 28 * addition to the pages used, so this can help account for that overhead. 29 */ 30 #define FUSE_HEADER_OVERHEAD 4 31 32 /* List of virtio-fs device instances and a lock for the list. Also provides 33 * mutual exclusion in device removal and mounting path 34 */ 35 static DEFINE_MUTEX(virtio_fs_mutex); 36 static LIST_HEAD(virtio_fs_instances); 37 38 /* The /sys/fs/virtio_fs/ kset */ 39 static struct kset *virtio_fs_kset; 40 41 enum { 42 VQ_HIPRIO, 43 VQ_REQUEST 44 }; 45 46 #define VQ_NAME_LEN 24 47 48 /* Per-virtqueue state */ 49 struct virtio_fs_vq { 50 spinlock_t lock; 51 struct virtqueue *vq; /* protected by ->lock */ 52 struct work_struct done_work; 53 struct list_head queued_reqs; 54 struct list_head end_reqs; /* End these requests */ 55 struct work_struct dispatch_work; 56 struct fuse_dev *fud; 57 bool connected; 58 long in_flight; 59 struct completion in_flight_zero; /* No inflight requests */ 60 struct kobject *kobj; 61 char name[VQ_NAME_LEN]; 62 } ____cacheline_aligned_in_smp; 63 64 /* A virtio-fs device instance */ 65 struct virtio_fs { 66 struct kobject kobj; 67 struct kobject *mqs_kobj; 68 struct list_head list; /* on virtio_fs_instances */ 69 char *tag; 70 struct virtio_fs_vq *vqs; 71 unsigned int nvqs; /* number of virtqueues */ 72 unsigned int num_request_queues; /* number of request queues */ 73 struct dax_device *dax_dev; 74 75 unsigned int *mq_map; /* index = cpu id, value = request vq id */ 76 77 /* DAX memory window where file contents are mapped */ 78 void *window_kaddr; 79 phys_addr_t window_phys_addr; 80 size_t window_len; 81 }; 82 83 struct virtio_fs_forget_req { 84 struct fuse_in_header ih; 85 struct fuse_forget_in arg; 86 }; 87 88 struct virtio_fs_forget { 89 /* This request can be temporarily queued on virt queue */ 90 struct list_head list; 91 struct virtio_fs_forget_req req; 92 }; 93 94 struct virtio_fs_req_work { 95 struct fuse_req *req; 96 struct virtio_fs_vq *fsvq; 97 struct work_struct done_work; 98 }; 99 100 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 101 struct fuse_req *req, bool in_flight, 102 gfp_t gfp); 103 104 static const struct constant_table dax_param_enums[] = { 105 {"always", FUSE_DAX_ALWAYS }, 106 {"never", FUSE_DAX_NEVER }, 107 {"inode", FUSE_DAX_INODE_USER }, 108 {} 109 }; 110 111 enum { 112 OPT_DAX, 113 OPT_DAX_ENUM, 114 }; 115 116 static const struct fs_parameter_spec virtio_fs_parameters[] = { 117 fsparam_flag("dax", OPT_DAX), 118 fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums), 119 {} 120 }; 121 122 static int virtio_fs_parse_param(struct fs_context *fsc, 123 struct fs_parameter *param) 124 { 125 struct fs_parse_result result; 126 struct fuse_fs_context *ctx = fsc->fs_private; 127 int opt; 128 129 opt = fs_parse(fsc, virtio_fs_parameters, param, &result); 130 if (opt < 0) 131 return opt; 132 133 switch (opt) { 134 case OPT_DAX: 135 ctx->dax_mode = FUSE_DAX_ALWAYS; 136 break; 137 case OPT_DAX_ENUM: 138 ctx->dax_mode = result.uint_32; 139 break; 140 default: 141 return -EINVAL; 142 } 143 144 return 0; 145 } 146 147 static void virtio_fs_free_fsc(struct fs_context *fsc) 148 { 149 struct fuse_fs_context *ctx = fsc->fs_private; 150 151 kfree(ctx); 152 } 153 154 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 155 { 156 struct virtio_fs *fs = vq->vdev->priv; 157 158 return &fs->vqs[vq->index]; 159 } 160 161 /* Should be called with fsvq->lock held. */ 162 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 163 { 164 fsvq->in_flight++; 165 } 166 167 /* Should be called with fsvq->lock held. */ 168 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 169 { 170 WARN_ON(fsvq->in_flight <= 0); 171 fsvq->in_flight--; 172 if (!fsvq->in_flight) 173 complete(&fsvq->in_flight_zero); 174 } 175 176 static ssize_t tag_show(struct kobject *kobj, 177 struct kobj_attribute *attr, char *buf) 178 { 179 struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); 180 181 return sysfs_emit(buf, "%s\n", fs->tag); 182 } 183 184 static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag); 185 186 static struct attribute *virtio_fs_attrs[] = { 187 &virtio_fs_tag_attr.attr, 188 NULL 189 }; 190 ATTRIBUTE_GROUPS(virtio_fs); 191 192 static void virtio_fs_ktype_release(struct kobject *kobj) 193 { 194 struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj); 195 196 kfree(vfs->mq_map); 197 kfree(vfs->vqs); 198 kfree(vfs); 199 } 200 201 static const struct kobj_type virtio_fs_ktype = { 202 .release = virtio_fs_ktype_release, 203 .sysfs_ops = &kobj_sysfs_ops, 204 .default_groups = virtio_fs_groups, 205 }; 206 207 static struct virtio_fs_vq *virtio_fs_kobj_to_vq(struct virtio_fs *fs, 208 struct kobject *kobj) 209 { 210 int i; 211 212 for (i = 0; i < fs->nvqs; i++) { 213 if (kobj == fs->vqs[i].kobj) 214 return &fs->vqs[i]; 215 } 216 return NULL; 217 } 218 219 static ssize_t name_show(struct kobject *kobj, 220 struct kobj_attribute *attr, char *buf) 221 { 222 struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj); 223 struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj); 224 225 if (!fsvq) 226 return -EINVAL; 227 return sysfs_emit(buf, "%s\n", fsvq->name); 228 } 229 230 static struct kobj_attribute virtio_fs_vq_name_attr = __ATTR_RO(name); 231 232 static ssize_t cpu_list_show(struct kobject *kobj, 233 struct kobj_attribute *attr, char *buf) 234 { 235 struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj); 236 struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj); 237 unsigned int cpu, qid; 238 const size_t size = PAGE_SIZE - 1; 239 bool first = true; 240 int ret = 0, pos = 0; 241 242 if (!fsvq) 243 return -EINVAL; 244 245 qid = fsvq->vq->index; 246 for (cpu = 0; cpu < nr_cpu_ids; cpu++) { 247 if (qid < VQ_REQUEST || (fs->mq_map[cpu] == qid)) { 248 if (first) 249 ret = snprintf(buf + pos, size - pos, "%u", cpu); 250 else 251 ret = snprintf(buf + pos, size - pos, ", %u", cpu); 252 253 if (ret >= size - pos) 254 break; 255 first = false; 256 pos += ret; 257 } 258 } 259 ret = snprintf(buf + pos, size + 1 - pos, "\n"); 260 return pos + ret; 261 } 262 263 static struct kobj_attribute virtio_fs_vq_cpu_list_attr = __ATTR_RO(cpu_list); 264 265 static struct attribute *virtio_fs_vq_attrs[] = { 266 &virtio_fs_vq_name_attr.attr, 267 &virtio_fs_vq_cpu_list_attr.attr, 268 NULL 269 }; 270 271 static struct attribute_group virtio_fs_vq_attr_group = { 272 .attrs = virtio_fs_vq_attrs, 273 }; 274 275 /* Make sure virtiofs_mutex is held */ 276 static void virtio_fs_put_locked(struct virtio_fs *fs) 277 { 278 lockdep_assert_held(&virtio_fs_mutex); 279 280 kobject_put(&fs->kobj); 281 } 282 283 static void virtio_fs_put(struct virtio_fs *fs) 284 { 285 mutex_lock(&virtio_fs_mutex); 286 virtio_fs_put_locked(fs); 287 mutex_unlock(&virtio_fs_mutex); 288 } 289 290 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 291 { 292 struct virtio_fs *vfs = fiq->priv; 293 294 virtio_fs_put(vfs); 295 } 296 297 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 298 { 299 WARN_ON(fsvq->in_flight < 0); 300 301 /* Wait for in flight requests to finish.*/ 302 spin_lock(&fsvq->lock); 303 if (fsvq->in_flight) { 304 /* We are holding virtio_fs_mutex. There should not be any 305 * waiters waiting for completion. 306 */ 307 reinit_completion(&fsvq->in_flight_zero); 308 spin_unlock(&fsvq->lock); 309 wait_for_completion(&fsvq->in_flight_zero); 310 } else { 311 spin_unlock(&fsvq->lock); 312 } 313 314 flush_work(&fsvq->done_work); 315 flush_work(&fsvq->dispatch_work); 316 } 317 318 static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) 319 { 320 struct virtio_fs_vq *fsvq; 321 int i; 322 323 for (i = 0; i < fs->nvqs; i++) { 324 fsvq = &fs->vqs[i]; 325 virtio_fs_drain_queue(fsvq); 326 } 327 } 328 329 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 330 { 331 /* Provides mutual exclusion between ->remove and ->kill_sb 332 * paths. We don't want both of these draining queue at the 333 * same time. Current completion logic reinits completion 334 * and that means there should not be any other thread 335 * doing reinit or waiting for completion already. 336 */ 337 mutex_lock(&virtio_fs_mutex); 338 virtio_fs_drain_all_queues_locked(fs); 339 mutex_unlock(&virtio_fs_mutex); 340 } 341 342 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 343 { 344 struct virtio_fs_vq *fsvq; 345 int i; 346 347 for (i = 0; i < fs->nvqs; i++) { 348 fsvq = &fs->vqs[i]; 349 spin_lock(&fsvq->lock); 350 fsvq->connected = true; 351 spin_unlock(&fsvq->lock); 352 } 353 } 354 355 static void virtio_fs_delete_queues_sysfs(struct virtio_fs *fs) 356 { 357 struct virtio_fs_vq *fsvq; 358 int i; 359 360 for (i = 0; i < fs->nvqs; i++) { 361 fsvq = &fs->vqs[i]; 362 kobject_put(fsvq->kobj); 363 } 364 } 365 366 static int virtio_fs_add_queues_sysfs(struct virtio_fs *fs) 367 { 368 struct virtio_fs_vq *fsvq; 369 char buff[12]; 370 int i, j, ret; 371 372 for (i = 0; i < fs->nvqs; i++) { 373 fsvq = &fs->vqs[i]; 374 375 sprintf(buff, "%d", i); 376 fsvq->kobj = kobject_create_and_add(buff, fs->mqs_kobj); 377 if (!fsvq->kobj) { 378 ret = -ENOMEM; 379 goto out_del; 380 } 381 382 ret = sysfs_create_group(fsvq->kobj, &virtio_fs_vq_attr_group); 383 if (ret) { 384 kobject_put(fsvq->kobj); 385 goto out_del; 386 } 387 } 388 389 return 0; 390 391 out_del: 392 for (j = 0; j < i; j++) { 393 fsvq = &fs->vqs[j]; 394 kobject_put(fsvq->kobj); 395 } 396 return ret; 397 } 398 399 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 400 static int virtio_fs_add_instance(struct virtio_device *vdev, 401 struct virtio_fs *fs) 402 { 403 struct virtio_fs *fs2; 404 int ret; 405 406 mutex_lock(&virtio_fs_mutex); 407 408 list_for_each_entry(fs2, &virtio_fs_instances, list) { 409 if (strcmp(fs->tag, fs2->tag) == 0) { 410 mutex_unlock(&virtio_fs_mutex); 411 return -EEXIST; 412 } 413 } 414 415 /* Use the virtio_device's index as a unique identifier, there is no 416 * need to allocate our own identifiers because the virtio_fs instance 417 * is only visible to userspace as long as the underlying virtio_device 418 * exists. 419 */ 420 fs->kobj.kset = virtio_fs_kset; 421 ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index); 422 if (ret < 0) 423 goto out_unlock; 424 425 fs->mqs_kobj = kobject_create_and_add("mqs", &fs->kobj); 426 if (!fs->mqs_kobj) { 427 ret = -ENOMEM; 428 goto out_del; 429 } 430 431 ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device"); 432 if (ret < 0) 433 goto out_put; 434 435 ret = virtio_fs_add_queues_sysfs(fs); 436 if (ret) 437 goto out_remove; 438 439 list_add_tail(&fs->list, &virtio_fs_instances); 440 441 mutex_unlock(&virtio_fs_mutex); 442 443 kobject_uevent(&fs->kobj, KOBJ_ADD); 444 445 return 0; 446 447 out_remove: 448 sysfs_remove_link(&fs->kobj, "device"); 449 out_put: 450 kobject_put(fs->mqs_kobj); 451 out_del: 452 kobject_del(&fs->kobj); 453 out_unlock: 454 mutex_unlock(&virtio_fs_mutex); 455 return ret; 456 } 457 458 /* Return the virtio_fs with a given tag, or NULL */ 459 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 460 { 461 struct virtio_fs *fs; 462 463 mutex_lock(&virtio_fs_mutex); 464 465 list_for_each_entry(fs, &virtio_fs_instances, list) { 466 if (strcmp(fs->tag, tag) == 0) { 467 kobject_get(&fs->kobj); 468 goto found; 469 } 470 } 471 472 fs = NULL; /* not found */ 473 474 found: 475 mutex_unlock(&virtio_fs_mutex); 476 477 return fs; 478 } 479 480 static void virtio_fs_free_devs(struct virtio_fs *fs) 481 { 482 unsigned int i; 483 484 for (i = 0; i < fs->nvqs; i++) { 485 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 486 487 if (!fsvq->fud) 488 continue; 489 490 fuse_dev_put(fsvq->fud); 491 fsvq->fud = NULL; 492 } 493 } 494 495 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 496 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 497 { 498 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 499 char *end; 500 size_t len; 501 502 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 503 &tag_buf, sizeof(tag_buf)); 504 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 505 if (end == tag_buf) 506 return -EINVAL; /* empty tag */ 507 if (!end) 508 end = &tag_buf[sizeof(tag_buf)]; 509 510 len = end - tag_buf; 511 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 512 if (!fs->tag) 513 return -ENOMEM; 514 memcpy(fs->tag, tag_buf, len); 515 fs->tag[len] = '\0'; 516 517 /* While the VIRTIO specification allows any character, newlines are 518 * awkward on mount(8) command-lines and cause problems in the sysfs 519 * "tag" attr and uevent TAG= properties. Forbid them. 520 */ 521 if (strchr(fs->tag, '\n')) { 522 dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n"); 523 return -EINVAL; 524 } 525 526 dev_info(&vdev->dev, "discovered new tag: %s\n", fs->tag); 527 return 0; 528 } 529 530 /* Work function for hiprio completion */ 531 static void virtio_fs_hiprio_done_work(struct work_struct *work) 532 { 533 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 534 done_work); 535 struct virtqueue *vq = fsvq->vq; 536 537 /* Free completed FUSE_FORGET requests */ 538 spin_lock(&fsvq->lock); 539 do { 540 unsigned int len; 541 void *req; 542 543 virtqueue_disable_cb(vq); 544 545 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 546 kfree(req); 547 dec_in_flight_req(fsvq); 548 } 549 } while (!virtqueue_enable_cb(vq)); 550 551 if (!list_empty(&fsvq->queued_reqs)) 552 schedule_work(&fsvq->dispatch_work); 553 554 spin_unlock(&fsvq->lock); 555 } 556 557 static void virtio_fs_request_dispatch_work(struct work_struct *work) 558 { 559 struct fuse_req *req; 560 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 561 dispatch_work); 562 int ret; 563 564 pr_debug("virtio-fs: worker %s called.\n", __func__); 565 while (1) { 566 spin_lock(&fsvq->lock); 567 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 568 list); 569 if (!req) { 570 spin_unlock(&fsvq->lock); 571 break; 572 } 573 574 list_del_init(&req->list); 575 spin_unlock(&fsvq->lock); 576 fuse_request_end(req); 577 } 578 579 /* Dispatch pending requests */ 580 while (1) { 581 unsigned int flags; 582 583 spin_lock(&fsvq->lock); 584 req = list_first_entry_or_null(&fsvq->queued_reqs, 585 struct fuse_req, list); 586 if (!req) { 587 spin_unlock(&fsvq->lock); 588 return; 589 } 590 list_del_init(&req->list); 591 spin_unlock(&fsvq->lock); 592 593 flags = memalloc_nofs_save(); 594 ret = virtio_fs_enqueue_req(fsvq, req, true, GFP_KERNEL); 595 memalloc_nofs_restore(flags); 596 if (ret < 0) { 597 if (ret == -ENOSPC) { 598 spin_lock(&fsvq->lock); 599 list_add_tail(&req->list, &fsvq->queued_reqs); 600 spin_unlock(&fsvq->lock); 601 return; 602 } 603 req->out.h.error = ret; 604 spin_lock(&fsvq->lock); 605 dec_in_flight_req(fsvq); 606 spin_unlock(&fsvq->lock); 607 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 608 ret); 609 fuse_request_end(req); 610 } 611 } 612 } 613 614 /* 615 * Returns 1 if queue is full and sender should wait a bit before sending 616 * next request, 0 otherwise. 617 */ 618 static int send_forget_request(struct virtio_fs_vq *fsvq, 619 struct virtio_fs_forget *forget, 620 bool in_flight) 621 { 622 struct scatterlist sg; 623 struct virtqueue *vq; 624 int ret = 0; 625 bool notify; 626 struct virtio_fs_forget_req *req = &forget->req; 627 628 spin_lock(&fsvq->lock); 629 if (!fsvq->connected) { 630 if (in_flight) 631 dec_in_flight_req(fsvq); 632 kfree(forget); 633 goto out; 634 } 635 636 sg_init_one(&sg, req, sizeof(*req)); 637 vq = fsvq->vq; 638 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 639 640 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 641 if (ret < 0) { 642 if (ret == -ENOSPC) { 643 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 644 ret); 645 list_add_tail(&forget->list, &fsvq->queued_reqs); 646 if (!in_flight) 647 inc_in_flight_req(fsvq); 648 /* Queue is full */ 649 ret = 1; 650 } else { 651 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 652 ret); 653 kfree(forget); 654 if (in_flight) 655 dec_in_flight_req(fsvq); 656 } 657 goto out; 658 } 659 660 if (!in_flight) 661 inc_in_flight_req(fsvq); 662 notify = virtqueue_kick_prepare(vq); 663 spin_unlock(&fsvq->lock); 664 665 if (notify) 666 virtqueue_notify(vq); 667 return ret; 668 out: 669 spin_unlock(&fsvq->lock); 670 return ret; 671 } 672 673 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 674 { 675 struct virtio_fs_forget *forget; 676 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 677 dispatch_work); 678 pr_debug("virtio-fs: worker %s called.\n", __func__); 679 while (1) { 680 spin_lock(&fsvq->lock); 681 forget = list_first_entry_or_null(&fsvq->queued_reqs, 682 struct virtio_fs_forget, list); 683 if (!forget) { 684 spin_unlock(&fsvq->lock); 685 return; 686 } 687 688 list_del(&forget->list); 689 spin_unlock(&fsvq->lock); 690 if (send_forget_request(fsvq, forget, true)) 691 return; 692 } 693 } 694 695 /* Allocate and copy args into req->argbuf */ 696 static int copy_args_to_argbuf(struct fuse_req *req, gfp_t gfp) 697 { 698 struct fuse_args *args = req->args; 699 unsigned int offset = 0; 700 unsigned int num_in; 701 unsigned int num_out; 702 unsigned int len; 703 unsigned int i; 704 705 num_in = args->in_numargs - args->in_pages; 706 num_out = args->out_numargs - args->out_pages; 707 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 708 fuse_len_args(num_out, args->out_args); 709 710 req->argbuf = kmalloc(len, gfp); 711 if (!req->argbuf) 712 return -ENOMEM; 713 714 for (i = 0; i < num_in; i++) { 715 memcpy(req->argbuf + offset, 716 args->in_args[i].value, 717 args->in_args[i].size); 718 offset += args->in_args[i].size; 719 } 720 721 return 0; 722 } 723 724 /* Copy args out of and free req->argbuf */ 725 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 726 { 727 unsigned int remaining; 728 unsigned int offset; 729 unsigned int num_in; 730 unsigned int num_out; 731 unsigned int i; 732 733 remaining = req->out.h.len - sizeof(req->out.h); 734 num_in = args->in_numargs - args->in_pages; 735 num_out = args->out_numargs - args->out_pages; 736 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 737 738 for (i = 0; i < num_out; i++) { 739 unsigned int argsize = args->out_args[i].size; 740 741 if (args->out_argvar && 742 i == args->out_numargs - 1 && 743 argsize > remaining) { 744 argsize = remaining; 745 } 746 747 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 748 offset += argsize; 749 750 if (i != args->out_numargs - 1) 751 remaining -= argsize; 752 } 753 754 /* Store the actual size of the variable-length arg */ 755 if (args->out_argvar) 756 args->out_args[args->out_numargs - 1].size = remaining; 757 758 kfree(req->argbuf); 759 req->argbuf = NULL; 760 } 761 762 /* Verify that the server properly follows the FUSE protocol */ 763 static bool virtio_fs_verify_response(struct fuse_req *req, unsigned int len) 764 { 765 struct fuse_out_header *oh = &req->out.h; 766 767 if (len < sizeof(*oh)) { 768 pr_warn("virtio-fs: response too short (%u)\n", len); 769 return false; 770 } 771 if (oh->len != len) { 772 pr_warn("virtio-fs: oh.len mismatch (%u != %u)\n", oh->len, len); 773 return false; 774 } 775 if (oh->unique != req->in.h.unique) { 776 pr_warn("virtio-fs: oh.unique mismatch (%llu != %llu)\n", 777 oh->unique, req->in.h.unique); 778 return false; 779 } 780 return true; 781 } 782 783 /* Work function for request completion */ 784 static void virtio_fs_request_complete(struct fuse_req *req, 785 struct virtio_fs_vq *fsvq) 786 { 787 struct fuse_args *args; 788 struct fuse_args_pages *ap; 789 unsigned int len, i, thislen; 790 struct folio *folio; 791 792 args = req->args; 793 copy_args_from_argbuf(args, req); 794 795 if (args->out_pages && args->page_zeroing) { 796 len = args->out_args[args->out_numargs - 1].size; 797 ap = container_of(args, typeof(*ap), args); 798 for (i = 0; i < ap->num_folios; i++) { 799 thislen = ap->descs[i].length; 800 if (len < thislen) { 801 WARN_ON(ap->descs[i].offset); 802 folio = ap->folios[i]; 803 folio_zero_segment(folio, len, thislen); 804 len = 0; 805 } else { 806 len -= thislen; 807 } 808 } 809 } 810 811 clear_bit(FR_SENT, &req->flags); 812 813 fuse_request_end(req); 814 spin_lock(&fsvq->lock); 815 dec_in_flight_req(fsvq); 816 spin_unlock(&fsvq->lock); 817 } 818 819 static void virtio_fs_complete_req_work(struct work_struct *work) 820 { 821 struct virtio_fs_req_work *w = 822 container_of(work, typeof(*w), done_work); 823 824 virtio_fs_request_complete(w->req, w->fsvq); 825 kfree(w); 826 } 827 828 static void virtio_fs_requests_done_work(struct work_struct *work) 829 { 830 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 831 done_work); 832 struct fuse_pqueue *fpq = &fsvq->fud->pq; 833 struct virtqueue *vq = fsvq->vq; 834 struct fuse_req *req; 835 struct fuse_req *next; 836 unsigned int len; 837 LIST_HEAD(reqs); 838 839 /* Collect completed requests off the virtqueue */ 840 spin_lock(&fsvq->lock); 841 do { 842 virtqueue_disable_cb(vq); 843 844 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 845 if (!virtio_fs_verify_response(req, len)) { 846 req->out.h.error = -EIO; 847 req->out.h.len = sizeof(struct fuse_out_header); 848 } 849 spin_lock(&fpq->lock); 850 list_move_tail(&req->list, &reqs); 851 spin_unlock(&fpq->lock); 852 } 853 } while (!virtqueue_enable_cb(vq)); 854 spin_unlock(&fsvq->lock); 855 856 /* End requests */ 857 list_for_each_entry_safe(req, next, &reqs, list) { 858 list_del_init(&req->list); 859 860 /* blocking async request completes in a worker context */ 861 if (req->args->may_block) { 862 struct virtio_fs_req_work *w; 863 864 w = kzalloc_obj(*w, GFP_NOFS | __GFP_NOFAIL); 865 INIT_WORK(&w->done_work, virtio_fs_complete_req_work); 866 w->fsvq = fsvq; 867 w->req = req; 868 schedule_work(&w->done_work); 869 } else { 870 virtio_fs_request_complete(req, fsvq); 871 } 872 } 873 874 /* Try to push previously queued requests, as the queue might no longer be full */ 875 spin_lock(&fsvq->lock); 876 if (!list_empty(&fsvq->queued_reqs)) 877 schedule_work(&fsvq->dispatch_work); 878 spin_unlock(&fsvq->lock); 879 } 880 881 static void virtio_fs_map_queues(struct virtio_device *vdev, struct virtio_fs *fs) 882 { 883 const struct cpumask *mask, *masks; 884 unsigned int q, cpu, nr_masks; 885 886 /* First attempt to map using existing transport layer affinities 887 * e.g. PCIe MSI-X 888 */ 889 if (!vdev->config->get_vq_affinity) 890 goto fallback; 891 892 for (q = 0; q < fs->num_request_queues; q++) { 893 mask = vdev->config->get_vq_affinity(vdev, VQ_REQUEST + q); 894 if (!mask) 895 goto fallback; 896 897 for_each_cpu(cpu, mask) 898 fs->mq_map[cpu] = q + VQ_REQUEST; 899 } 900 901 return; 902 fallback: 903 /* Attempt to map evenly in groups over the CPUs */ 904 masks = group_cpus_evenly(fs->num_request_queues, &nr_masks); 905 /* If even this fails we default to all CPUs use first request queue */ 906 if (!masks) { 907 for_each_possible_cpu(cpu) 908 fs->mq_map[cpu] = VQ_REQUEST; 909 return; 910 } 911 912 for (q = 0; q < fs->num_request_queues; q++) { 913 for_each_cpu(cpu, &masks[q % nr_masks]) 914 fs->mq_map[cpu] = q + VQ_REQUEST; 915 } 916 kfree(masks); 917 } 918 919 /* Virtqueue interrupt handler */ 920 static void virtio_fs_vq_done(struct virtqueue *vq) 921 { 922 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 923 924 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 925 926 schedule_work(&fsvq->done_work); 927 } 928 929 static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, 930 int vq_type) 931 { 932 strscpy(fsvq->name, name, VQ_NAME_LEN); 933 spin_lock_init(&fsvq->lock); 934 INIT_LIST_HEAD(&fsvq->queued_reqs); 935 INIT_LIST_HEAD(&fsvq->end_reqs); 936 init_completion(&fsvq->in_flight_zero); 937 938 if (vq_type == VQ_REQUEST) { 939 INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 940 INIT_WORK(&fsvq->dispatch_work, 941 virtio_fs_request_dispatch_work); 942 } else { 943 INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 944 INIT_WORK(&fsvq->dispatch_work, 945 virtio_fs_hiprio_dispatch_work); 946 } 947 } 948 949 /* Initialize virtqueues */ 950 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 951 struct virtio_fs *fs) 952 { 953 struct virtqueue_info *vqs_info; 954 struct virtqueue **vqs; 955 /* Specify pre_vectors to ensure that the queues before the 956 * request queues (e.g. hiprio) don't claim any of the CPUs in 957 * the multi-queue mapping and interrupt affinities 958 */ 959 struct irq_affinity desc = { .pre_vectors = VQ_REQUEST }; 960 unsigned int i; 961 int ret = 0; 962 963 virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, 964 &fs->num_request_queues); 965 if (fs->num_request_queues == 0) 966 return -EINVAL; 967 968 /* Truncate nr of request queues to nr_cpu_id */ 969 fs->num_request_queues = min_t(unsigned int, fs->num_request_queues, 970 nr_cpu_ids); 971 fs->nvqs = VQ_REQUEST + fs->num_request_queues; 972 fs->vqs = kzalloc_objs(fs->vqs[VQ_HIPRIO], fs->nvqs); 973 if (!fs->vqs) 974 return -ENOMEM; 975 976 vqs = kmalloc_objs(vqs[VQ_HIPRIO], fs->nvqs); 977 fs->mq_map = kcalloc_node(nr_cpu_ids, sizeof(*fs->mq_map), GFP_KERNEL, 978 dev_to_node(&vdev->dev)); 979 vqs_info = kzalloc_objs(*vqs_info, fs->nvqs); 980 if (!vqs || !vqs_info || !fs->mq_map) { 981 ret = -ENOMEM; 982 goto out; 983 } 984 985 /* Initialize the hiprio/forget request virtqueue */ 986 vqs_info[VQ_HIPRIO].callback = virtio_fs_vq_done; 987 virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); 988 vqs_info[VQ_HIPRIO].name = fs->vqs[VQ_HIPRIO].name; 989 990 /* Initialize the requests virtqueues */ 991 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 992 char vq_name[VQ_NAME_LEN]; 993 994 snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); 995 virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); 996 vqs_info[i].callback = virtio_fs_vq_done; 997 vqs_info[i].name = fs->vqs[i].name; 998 } 999 1000 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, vqs_info, &desc); 1001 if (ret < 0) 1002 goto out; 1003 1004 for (i = 0; i < fs->nvqs; i++) 1005 fs->vqs[i].vq = vqs[i]; 1006 1007 virtio_fs_start_all_queues(fs); 1008 out: 1009 kfree(vqs_info); 1010 kfree(vqs); 1011 if (ret) { 1012 kfree(fs->vqs); 1013 fs->vqs = NULL; 1014 kfree(fs->mq_map); 1015 fs->mq_map = NULL; 1016 } 1017 return ret; 1018 } 1019 1020 /* Free virtqueues (device must already be reset) */ 1021 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev) 1022 { 1023 vdev->config->del_vqs(vdev); 1024 } 1025 1026 /* Map a window offset to a page frame number. The window offset will have 1027 * been produced by .iomap_begin(), which maps a file offset to a window 1028 * offset. 1029 */ 1030 static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 1031 long nr_pages, enum dax_access_mode mode, 1032 void **kaddr, unsigned long *pfn) 1033 { 1034 struct virtio_fs *fs = dax_get_private(dax_dev); 1035 phys_addr_t offset = PFN_PHYS(pgoff); 1036 size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff; 1037 1038 if (kaddr) 1039 *kaddr = fs->window_kaddr + offset; 1040 if (pfn) 1041 *pfn = PHYS_PFN(fs->window_phys_addr + offset); 1042 return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 1043 } 1044 1045 static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 1046 pgoff_t pgoff, size_t nr_pages) 1047 { 1048 long rc; 1049 void *kaddr; 1050 1051 rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr, 1052 NULL); 1053 if (rc < 0) 1054 return dax_mem2blk_err(rc); 1055 1056 memset(kaddr, 0, nr_pages << PAGE_SHIFT); 1057 dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 1058 return 0; 1059 } 1060 1061 static const struct dax_operations virtio_fs_dax_ops = { 1062 .direct_access = virtio_fs_direct_access, 1063 .zero_page_range = virtio_fs_zero_page_range, 1064 }; 1065 1066 static void virtio_fs_cleanup_dax(void *data) 1067 { 1068 struct dax_device *dax_dev = data; 1069 1070 kill_dax(dax_dev); 1071 put_dax(dax_dev); 1072 } 1073 1074 DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T)) 1075 1076 static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 1077 { 1078 struct dax_device *dax_dev __free(cleanup_dax) = NULL; 1079 struct virtio_shm_region cache_reg; 1080 struct dev_pagemap *pgmap; 1081 bool have_cache; 1082 1083 if (!IS_ENABLED(CONFIG_FUSE_DAX)) 1084 return 0; 1085 1086 dax_dev = alloc_dax(fs, &virtio_fs_dax_ops); 1087 if (IS_ERR(dax_dev)) { 1088 int rc = PTR_ERR(dax_dev); 1089 return rc == -EOPNOTSUPP ? 0 : rc; 1090 } 1091 1092 /* Get cache region */ 1093 have_cache = virtio_get_shm_region(vdev, &cache_reg, 1094 (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 1095 if (!have_cache) { 1096 dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 1097 return 0; 1098 } 1099 1100 if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 1101 dev_name(&vdev->dev))) { 1102 dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 1103 cache_reg.addr, cache_reg.len); 1104 return -EBUSY; 1105 } 1106 1107 dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 1108 cache_reg.addr); 1109 1110 pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 1111 if (!pgmap) 1112 return -ENOMEM; 1113 1114 pgmap->type = MEMORY_DEVICE_FS_DAX; 1115 1116 /* Ideally we would directly use the PCI BAR resource but 1117 * devm_memremap_pages() wants its own copy in pgmap. So 1118 * initialize a struct resource from scratch (only the start 1119 * and end fields will be used). 1120 */ 1121 pgmap->range = (struct range) { 1122 .start = (phys_addr_t) cache_reg.addr, 1123 .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 1124 }; 1125 pgmap->nr_range = 1; 1126 1127 fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 1128 if (IS_ERR(fs->window_kaddr)) 1129 return PTR_ERR(fs->window_kaddr); 1130 1131 fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 1132 fs->window_len = (phys_addr_t) cache_reg.len; 1133 1134 dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 1135 __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 1136 1137 fs->dax_dev = no_free_ptr(dax_dev); 1138 return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 1139 fs->dax_dev); 1140 } 1141 1142 static int virtio_fs_probe(struct virtio_device *vdev) 1143 { 1144 struct virtio_fs *fs; 1145 int ret; 1146 1147 fs = kzalloc_obj(*fs); 1148 if (!fs) 1149 return -ENOMEM; 1150 kobject_init(&fs->kobj, &virtio_fs_ktype); 1151 vdev->priv = fs; 1152 1153 ret = virtio_fs_read_tag(vdev, fs); 1154 if (ret < 0) 1155 goto out; 1156 1157 ret = virtio_fs_setup_vqs(vdev, fs); 1158 if (ret < 0) 1159 goto out; 1160 1161 virtio_fs_map_queues(vdev, fs); 1162 1163 ret = virtio_fs_setup_dax(vdev, fs); 1164 if (ret < 0) 1165 goto out_vqs; 1166 1167 /* Bring the device online in case the filesystem is mounted and 1168 * requests need to be sent before we return. 1169 */ 1170 virtio_device_ready(vdev); 1171 1172 ret = virtio_fs_add_instance(vdev, fs); 1173 if (ret < 0) 1174 goto out_vqs; 1175 1176 return 0; 1177 1178 out_vqs: 1179 virtio_reset_device(vdev); 1180 virtio_fs_cleanup_vqs(vdev); 1181 1182 out: 1183 vdev->priv = NULL; 1184 kobject_put(&fs->kobj); 1185 return ret; 1186 } 1187 1188 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 1189 { 1190 struct virtio_fs_vq *fsvq; 1191 int i; 1192 1193 for (i = 0; i < fs->nvqs; i++) { 1194 fsvq = &fs->vqs[i]; 1195 spin_lock(&fsvq->lock); 1196 fsvq->connected = false; 1197 spin_unlock(&fsvq->lock); 1198 } 1199 } 1200 1201 static void virtio_fs_remove(struct virtio_device *vdev) 1202 { 1203 struct virtio_fs *fs = vdev->priv; 1204 1205 mutex_lock(&virtio_fs_mutex); 1206 /* This device is going away. No one should get new reference */ 1207 list_del_init(&fs->list); 1208 virtio_fs_delete_queues_sysfs(fs); 1209 sysfs_remove_link(&fs->kobj, "device"); 1210 kobject_put(fs->mqs_kobj); 1211 kobject_del(&fs->kobj); 1212 virtio_fs_stop_all_queues(fs); 1213 virtio_fs_drain_all_queues_locked(fs); 1214 virtio_reset_device(vdev); 1215 virtio_fs_cleanup_vqs(vdev); 1216 1217 vdev->priv = NULL; 1218 /* Put device reference on virtio_fs object */ 1219 virtio_fs_put_locked(fs); 1220 mutex_unlock(&virtio_fs_mutex); 1221 } 1222 1223 #ifdef CONFIG_PM_SLEEP 1224 static int virtio_fs_freeze(struct virtio_device *vdev) 1225 { 1226 /* TODO need to save state here */ 1227 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 1228 return -EOPNOTSUPP; 1229 } 1230 1231 static int virtio_fs_restore(struct virtio_device *vdev) 1232 { 1233 /* TODO need to restore state here */ 1234 return 0; 1235 } 1236 #endif /* CONFIG_PM_SLEEP */ 1237 1238 static const struct virtio_device_id id_table[] = { 1239 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 1240 {}, 1241 }; 1242 1243 static const unsigned int feature_table[] = {}; 1244 1245 static struct virtio_driver virtio_fs_driver = { 1246 .driver.name = KBUILD_MODNAME, 1247 .id_table = id_table, 1248 .feature_table = feature_table, 1249 .feature_table_size = ARRAY_SIZE(feature_table), 1250 .probe = virtio_fs_probe, 1251 .remove = virtio_fs_remove, 1252 #ifdef CONFIG_PM_SLEEP 1253 .freeze = virtio_fs_freeze, 1254 .restore = virtio_fs_restore, 1255 #endif 1256 }; 1257 1258 static void virtio_fs_send_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *link) 1259 { 1260 struct virtio_fs_forget *forget; 1261 struct virtio_fs_forget_req *req; 1262 struct virtio_fs *fs = fiq->priv; 1263 struct virtio_fs_vq *fsvq = &fs->vqs[VQ_HIPRIO]; 1264 u64 unique = fuse_get_unique(fiq); 1265 1266 /* Allocate a buffer for the request */ 1267 forget = kmalloc_obj(*forget, GFP_NOFS | __GFP_NOFAIL); 1268 req = &forget->req; 1269 1270 req->ih = (struct fuse_in_header){ 1271 .opcode = FUSE_FORGET, 1272 .nodeid = link->forget_one.nodeid, 1273 .unique = unique, 1274 .len = sizeof(*req), 1275 }; 1276 req->arg = (struct fuse_forget_in){ 1277 .nlookup = link->forget_one.nlookup, 1278 }; 1279 1280 send_forget_request(fsvq, forget, false); 1281 kfree(link); 1282 } 1283 1284 static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) 1285 { 1286 /* 1287 * TODO interrupts. 1288 * 1289 * Normal fs operations on a local filesystems aren't interruptible. 1290 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 1291 * with shared lock between host and guest. 1292 */ 1293 } 1294 1295 /* Count number of scatter-gather elements required */ 1296 static unsigned int sg_count_fuse_folios(struct fuse_folio_desc *folio_descs, 1297 unsigned int num_folios, 1298 unsigned int total_len) 1299 { 1300 unsigned int i; 1301 unsigned int this_len; 1302 1303 for (i = 0; i < num_folios && total_len; i++) { 1304 this_len = min(folio_descs[i].length, total_len); 1305 total_len -= this_len; 1306 } 1307 1308 return i; 1309 } 1310 1311 /* Return the number of scatter-gather list elements required */ 1312 static unsigned int sg_count_fuse_req(struct fuse_req *req) 1313 { 1314 struct fuse_args *args = req->args; 1315 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 1316 unsigned int size, total_sgs = 1 /* fuse_in_header */; 1317 1318 if (args->in_numargs - args->in_pages) 1319 total_sgs += 1; 1320 1321 if (args->in_pages) { 1322 size = args->in_args[args->in_numargs - 1].size; 1323 total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios, 1324 size); 1325 } 1326 1327 if (!test_bit(FR_ISREPLY, &req->flags)) 1328 return total_sgs; 1329 1330 total_sgs += 1 /* fuse_out_header */; 1331 1332 if (args->out_numargs - args->out_pages) 1333 total_sgs += 1; 1334 1335 if (args->out_pages) { 1336 size = args->out_args[args->out_numargs - 1].size; 1337 total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios, 1338 size); 1339 } 1340 1341 return total_sgs; 1342 } 1343 1344 /* Add folios to scatter-gather list and return number of elements used */ 1345 static unsigned int sg_init_fuse_folios(struct scatterlist *sg, 1346 struct folio **folios, 1347 struct fuse_folio_desc *folio_descs, 1348 unsigned int num_folios, 1349 unsigned int total_len) 1350 { 1351 unsigned int i; 1352 unsigned int this_len; 1353 1354 for (i = 0; i < num_folios && total_len; i++) { 1355 sg_init_table(&sg[i], 1); 1356 this_len = min(folio_descs[i].length, total_len); 1357 sg_set_folio(&sg[i], folios[i], this_len, folio_descs[i].offset); 1358 total_len -= this_len; 1359 } 1360 1361 return i; 1362 } 1363 1364 /* Add args to scatter-gather list and return number of elements used */ 1365 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 1366 struct fuse_req *req, 1367 struct fuse_arg *args, 1368 unsigned int numargs, 1369 bool argpages, 1370 void *argbuf, 1371 unsigned int *len_used) 1372 { 1373 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 1374 unsigned int total_sgs = 0; 1375 unsigned int len; 1376 1377 len = fuse_len_args(numargs - argpages, args); 1378 if (len) 1379 sg_init_one(&sg[total_sgs++], argbuf, len); 1380 1381 if (argpages) 1382 total_sgs += sg_init_fuse_folios(&sg[total_sgs], 1383 ap->folios, ap->descs, 1384 ap->num_folios, 1385 args[numargs - 1].size); 1386 1387 if (len_used) 1388 *len_used = len; 1389 1390 return total_sgs; 1391 } 1392 1393 /* Add a request to a virtqueue and kick the device */ 1394 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 1395 struct fuse_req *req, bool in_flight, 1396 gfp_t gfp) 1397 { 1398 /* requests need at least 4 elements */ 1399 struct scatterlist *stack_sgs[6]; 1400 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 1401 struct scatterlist **sgs = stack_sgs; 1402 struct scatterlist *sg = stack_sg; 1403 struct virtqueue *vq; 1404 struct fuse_args *args = req->args; 1405 unsigned int argbuf_used = 0; 1406 unsigned int out_sgs = 0; 1407 unsigned int in_sgs = 0; 1408 unsigned int total_sgs; 1409 unsigned int i, hash; 1410 int ret; 1411 bool notify; 1412 struct fuse_pqueue *fpq; 1413 1414 /* Does the sglist fit on the stack? */ 1415 total_sgs = sg_count_fuse_req(req); 1416 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 1417 sgs = kmalloc_objs(sgs[0], total_sgs, gfp); 1418 sg = kmalloc_objs(sg[0], total_sgs, gfp); 1419 if (!sgs || !sg) { 1420 ret = -ENOMEM; 1421 goto out; 1422 } 1423 } 1424 1425 /* Use a bounce buffer since stack args cannot be mapped */ 1426 ret = copy_args_to_argbuf(req, gfp); 1427 if (ret < 0) 1428 goto out; 1429 1430 /* Request elements */ 1431 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 1432 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 1433 (struct fuse_arg *)args->in_args, 1434 args->in_numargs, args->in_pages, 1435 req->argbuf, &argbuf_used); 1436 1437 /* Reply elements */ 1438 if (test_bit(FR_ISREPLY, &req->flags)) { 1439 sg_init_one(&sg[out_sgs + in_sgs++], 1440 &req->out.h, sizeof(req->out.h)); 1441 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 1442 args->out_args, args->out_numargs, 1443 args->out_pages, 1444 req->argbuf + argbuf_used, NULL); 1445 } 1446 1447 WARN_ON(out_sgs + in_sgs != total_sgs); 1448 1449 for (i = 0; i < total_sgs; i++) 1450 sgs[i] = &sg[i]; 1451 1452 spin_lock(&fsvq->lock); 1453 1454 if (!fsvq->connected) { 1455 spin_unlock(&fsvq->lock); 1456 ret = -ENOTCONN; 1457 goto out; 1458 } 1459 1460 vq = fsvq->vq; 1461 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 1462 if (ret < 0) { 1463 spin_unlock(&fsvq->lock); 1464 goto out; 1465 } 1466 1467 /* Request successfully sent. */ 1468 fpq = &fsvq->fud->pq; 1469 hash = fuse_req_hash(req->in.h.unique); 1470 spin_lock(&fpq->lock); 1471 list_add_tail(&req->list, &fpq->processing[hash]); 1472 spin_unlock(&fpq->lock); 1473 set_bit(FR_SENT, &req->flags); 1474 /* matches barrier in request_wait_answer() */ 1475 smp_mb__after_atomic(); 1476 1477 if (!in_flight) 1478 inc_in_flight_req(fsvq); 1479 notify = virtqueue_kick_prepare(vq); 1480 1481 spin_unlock(&fsvq->lock); 1482 1483 if (notify) 1484 virtqueue_notify(vq); 1485 1486 out: 1487 if (ret < 0 && req->argbuf) { 1488 kfree(req->argbuf); 1489 req->argbuf = NULL; 1490 } 1491 if (sgs != stack_sgs) { 1492 kfree(sgs); 1493 kfree(sg); 1494 } 1495 1496 return ret; 1497 } 1498 1499 static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req) 1500 { 1501 unsigned int queue_id; 1502 struct virtio_fs *fs; 1503 struct virtio_fs_vq *fsvq; 1504 int ret; 1505 1506 fuse_request_assign_unique(fiq, req); 1507 1508 clear_bit(FR_PENDING, &req->flags); 1509 1510 fs = fiq->priv; 1511 queue_id = fs->mq_map[raw_smp_processor_id()]; 1512 1513 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u queue_id %u\n", 1514 __func__, req->in.h.opcode, req->in.h.unique, 1515 req->in.h.nodeid, req->in.h.len, 1516 fuse_len_args(req->args->out_numargs, req->args->out_args), 1517 queue_id); 1518 1519 fsvq = &fs->vqs[queue_id]; 1520 ret = virtio_fs_enqueue_req(fsvq, req, false, GFP_ATOMIC); 1521 if (ret < 0) { 1522 if (ret == -ENOSPC) { 1523 /* 1524 * Virtqueue full. Retry submission from worker 1525 * context as we might be holding fc->chan->bg_lock. 1526 */ 1527 spin_lock(&fsvq->lock); 1528 list_add_tail(&req->list, &fsvq->queued_reqs); 1529 inc_in_flight_req(fsvq); 1530 spin_unlock(&fsvq->lock); 1531 return; 1532 } 1533 req->out.h.error = ret; 1534 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1535 1536 /* Can't end request in submission context. Use a worker */ 1537 spin_lock(&fsvq->lock); 1538 list_add_tail(&req->list, &fsvq->end_reqs); 1539 schedule_work(&fsvq->dispatch_work); 1540 spin_unlock(&fsvq->lock); 1541 return; 1542 } 1543 } 1544 1545 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1546 .send_forget = virtio_fs_send_forget, 1547 .send_interrupt = virtio_fs_send_interrupt, 1548 .send_req = virtio_fs_send_req, 1549 .release = virtio_fs_fiq_release, 1550 }; 1551 1552 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) 1553 { 1554 ctx->rootmode = S_IFDIR; 1555 ctx->default_permissions = 1; 1556 ctx->allow_other = 1; 1557 ctx->max_read = UINT_MAX; 1558 ctx->blksize = 512; 1559 ctx->destroy = true; 1560 ctx->no_control = true; 1561 ctx->no_force_umount = true; 1562 } 1563 1564 static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) 1565 { 1566 struct fuse_mount *fm = get_fuse_mount_super(sb); 1567 struct fuse_conn *fc = fm->fc; 1568 struct virtio_fs *fs = fc->chan->iq.priv; 1569 struct fuse_fs_context *ctx = fsc->fs_private; 1570 unsigned int i; 1571 int err; 1572 1573 virtio_fs_ctx_set_defaults(ctx); 1574 mutex_lock(&virtio_fs_mutex); 1575 1576 /* After holding mutex, make sure virtiofs device is still there. 1577 * Though we are holding a reference to it, drive ->remove might 1578 * still have cleaned up virtual queues. In that case bail out. 1579 */ 1580 err = -EINVAL; 1581 if (list_empty(&fs->list)) { 1582 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1583 goto err; 1584 } 1585 1586 err = -ENOMEM; 1587 /* Allocate fuse_dev for hiprio and notification queues */ 1588 for (i = 0; i < fs->nvqs; i++) { 1589 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1590 1591 fsvq->fud = fuse_dev_alloc(); 1592 if (!fsvq->fud) 1593 goto err_free_fuse_devs; 1594 } 1595 1596 if (ctx->dax_mode != FUSE_DAX_NEVER) { 1597 if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) { 1598 err = -EINVAL; 1599 pr_err("virtio-fs: dax can't be enabled as filesystem" 1600 " device does not support it.\n"); 1601 goto err_free_fuse_devs; 1602 } 1603 ctx->dax_dev = fs->dax_dev; 1604 } 1605 err = fuse_fill_super_common(sb, ctx); 1606 if (err < 0) 1607 goto err_free_fuse_devs; 1608 1609 for (i = 0; i < fs->nvqs; i++) { 1610 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1611 1612 fuse_dev_install(fsvq->fud, fc->chan); 1613 } 1614 1615 /* Previous unmount will stop all queues. Start these again */ 1616 virtio_fs_start_all_queues(fs); 1617 fuse_send_init(fm); 1618 mutex_unlock(&virtio_fs_mutex); 1619 return 0; 1620 1621 err_free_fuse_devs: 1622 virtio_fs_free_devs(fs); 1623 err: 1624 mutex_unlock(&virtio_fs_mutex); 1625 return err; 1626 } 1627 1628 static void virtio_fs_conn_destroy(struct fuse_mount *fm) 1629 { 1630 struct fuse_conn *fc = fm->fc; 1631 struct virtio_fs *vfs = fc->chan->iq.priv; 1632 struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; 1633 1634 /* Stop dax worker. Soon evict_inodes() will be called which 1635 * will free all memory ranges belonging to all inodes. 1636 */ 1637 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1638 fuse_dax_cancel_work(fc); 1639 1640 /* Stop forget queue. Soon destroy will be sent */ 1641 spin_lock(&fsvq->lock); 1642 fsvq->connected = false; 1643 spin_unlock(&fsvq->lock); 1644 virtio_fs_drain_all_queues(vfs); 1645 1646 fuse_conn_destroy(fm); 1647 1648 /* fuse_conn_destroy() must have sent destroy. Stop all queues 1649 * and drain one more time and free fuse devices. Freeing fuse 1650 * devices will drop their reference on fuse_conn and that in 1651 * turn will drop its reference on virtio_fs object. 1652 */ 1653 virtio_fs_stop_all_queues(vfs); 1654 virtio_fs_drain_all_queues(vfs); 1655 virtio_fs_free_devs(vfs); 1656 } 1657 1658 static void virtio_kill_sb(struct super_block *sb) 1659 { 1660 struct fuse_mount *fm = get_fuse_mount_super(sb); 1661 bool last; 1662 1663 /* If mount failed, we can still be called without any fc */ 1664 if (sb->s_root) { 1665 last = fuse_mount_remove(fm); 1666 if (last) 1667 virtio_fs_conn_destroy(fm); 1668 } 1669 kill_anon_super(sb); 1670 fuse_mount_destroy(fm); 1671 } 1672 1673 static int virtio_fs_test_super(struct super_block *sb, 1674 struct fs_context *fsc) 1675 { 1676 struct fuse_mount *fsc_fm = fsc->s_fs_info; 1677 struct fuse_mount *sb_fm = get_fuse_mount_super(sb); 1678 1679 return fsc_fm->fc->chan->iq.priv == sb_fm->fc->chan->iq.priv; 1680 } 1681 1682 static int virtio_fs_get_tree(struct fs_context *fsc) 1683 { 1684 struct virtio_fs *fs; 1685 struct super_block *sb; 1686 struct fuse_conn *fc = NULL; 1687 struct fuse_mount *fm; 1688 unsigned int virtqueue_size; 1689 struct fuse_chan *fch __free(fuse_chan_free) = fuse_chan_new(); 1690 int err = -EIO; 1691 1692 if (!fch) 1693 return -ENOMEM; 1694 1695 if (!fsc->source) 1696 return invalf(fsc, "No source specified"); 1697 1698 /* This gets a reference on virtio_fs object. This ptr gets installed 1699 * in chan->iq->priv. Once fuse_conn is going away, it calls ->put() 1700 * to drop the reference to this object. 1701 */ 1702 fs = virtio_fs_find_instance(fsc->source); 1703 if (!fs) { 1704 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1705 return -EINVAL; 1706 } 1707 1708 virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); 1709 if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) 1710 goto out_err; 1711 1712 err = -ENOMEM; 1713 fc = kzalloc_obj(struct fuse_conn); 1714 if (!fc) 1715 goto out_err; 1716 1717 fm = kzalloc_obj(struct fuse_mount); 1718 if (!fm) 1719 goto out_err; 1720 1721 fuse_iqueue_init(&fch->iq, &virtio_fs_fiq_ops, fs); 1722 fuse_conn_init(fc, fm, fsc->user_ns, no_free_ptr(fch)); 1723 1724 fc->release = fuse_free_conn; 1725 fc->delete_stale = true; 1726 fc->auto_submounts = true; 1727 fc->sync_fs = true; 1728 fc->use_pages_for_kvec_io = true; 1729 1730 /* Tell FUSE to split requests that exceed the virtqueue's size */ 1731 fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, 1732 virtqueue_size - FUSE_HEADER_OVERHEAD); 1733 1734 fsc->s_fs_info = fm; 1735 sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); 1736 if (fsc->s_fs_info) 1737 fuse_mount_destroy(fm); 1738 if (IS_ERR(sb)) 1739 return PTR_ERR(sb); 1740 1741 if (!sb->s_root) { 1742 err = virtio_fs_fill_super(sb, fsc); 1743 if (err) { 1744 deactivate_locked_super(sb); 1745 return err; 1746 } 1747 1748 sb->s_flags |= SB_ACTIVE; 1749 } 1750 1751 WARN_ON(fsc->root); 1752 fsc->root = dget(sb->s_root); 1753 return 0; 1754 1755 out_err: 1756 kfree(fc); 1757 virtio_fs_put(fs); 1758 return err; 1759 } 1760 1761 static const struct fs_context_operations virtio_fs_context_ops = { 1762 .free = virtio_fs_free_fsc, 1763 .parse_param = virtio_fs_parse_param, 1764 .get_tree = virtio_fs_get_tree, 1765 }; 1766 1767 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1768 { 1769 struct fuse_fs_context *ctx; 1770 1771 if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT) 1772 return fuse_init_fs_context_submount(fsc); 1773 1774 ctx = kzalloc_obj(struct fuse_fs_context); 1775 if (!ctx) 1776 return -ENOMEM; 1777 fsc->fs_private = ctx; 1778 fsc->ops = &virtio_fs_context_ops; 1779 return 0; 1780 } 1781 1782 static struct file_system_type virtio_fs_type = { 1783 .owner = THIS_MODULE, 1784 .name = "virtiofs", 1785 .init_fs_context = virtio_fs_init_fs_context, 1786 .kill_sb = virtio_kill_sb, 1787 .fs_flags = FS_ALLOW_IDMAP, 1788 }; 1789 1790 static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) 1791 { 1792 const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); 1793 1794 add_uevent_var(env, "TAG=%s", fs->tag); 1795 return 0; 1796 } 1797 1798 static const struct kset_uevent_ops virtio_fs_uevent_ops = { 1799 .uevent = virtio_fs_uevent, 1800 }; 1801 1802 static int __init virtio_fs_sysfs_init(void) 1803 { 1804 virtio_fs_kset = kset_create_and_add("virtiofs", &virtio_fs_uevent_ops, 1805 fs_kobj); 1806 if (!virtio_fs_kset) 1807 return -ENOMEM; 1808 return 0; 1809 } 1810 1811 static void virtio_fs_sysfs_exit(void) 1812 { 1813 kset_unregister(virtio_fs_kset); 1814 virtio_fs_kset = NULL; 1815 } 1816 1817 static int __init virtio_fs_init(void) 1818 { 1819 int ret; 1820 1821 ret = virtio_fs_sysfs_init(); 1822 if (ret < 0) 1823 return ret; 1824 1825 ret = register_virtio_driver(&virtio_fs_driver); 1826 if (ret < 0) 1827 goto sysfs_exit; 1828 1829 ret = register_filesystem(&virtio_fs_type); 1830 if (ret < 0) 1831 goto unregister_virtio_driver; 1832 1833 return 0; 1834 1835 unregister_virtio_driver: 1836 unregister_virtio_driver(&virtio_fs_driver); 1837 sysfs_exit: 1838 virtio_fs_sysfs_exit(); 1839 return ret; 1840 } 1841 module_init(virtio_fs_init); 1842 1843 static void __exit virtio_fs_exit(void) 1844 { 1845 unregister_filesystem(&virtio_fs_type); 1846 unregister_virtio_driver(&virtio_fs_driver); 1847 virtio_fs_sysfs_exit(); 1848 } 1849 module_exit(virtio_fs_exit); 1850 1851 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1852 MODULE_DESCRIPTION("Virtio Filesystem"); 1853 MODULE_LICENSE("GPL"); 1854 MODULE_ALIAS_FS(KBUILD_MODNAME); 1855 MODULE_DEVICE_TABLE(virtio, id_table); 1856