1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/dax.h> 9 #include <linux/pci.h> 10 #include <linux/interrupt.h> 11 #include <linux/group_cpus.h> 12 #include <linux/memremap.h> 13 #include <linux/module.h> 14 #include <linux/virtio.h> 15 #include <linux/virtio_fs.h> 16 #include <linux/delay.h> 17 #include <linux/fs_context.h> 18 #include <linux/fs_parser.h> 19 #include <linux/highmem.h> 20 #include <linux/cleanup.h> 21 #include <linux/uio.h> 22 #include "fuse_i.h" 23 24 /* Used to help calculate the FUSE connection's max_pages limit for a request's 25 * size. Parts of the struct fuse_req are sliced into scattergather lists in 26 * addition to the pages used, so this can help account for that overhead. 27 */ 28 #define FUSE_HEADER_OVERHEAD 4 29 30 /* List of virtio-fs device instances and a lock for the list. Also provides 31 * mutual exclusion in device removal and mounting path 32 */ 33 static DEFINE_MUTEX(virtio_fs_mutex); 34 static LIST_HEAD(virtio_fs_instances); 35 36 /* The /sys/fs/virtio_fs/ kset */ 37 static struct kset *virtio_fs_kset; 38 39 enum { 40 VQ_HIPRIO, 41 VQ_REQUEST 42 }; 43 44 #define VQ_NAME_LEN 24 45 46 /* Per-virtqueue state */ 47 struct virtio_fs_vq { 48 spinlock_t lock; 49 struct virtqueue *vq; /* protected by ->lock */ 50 struct work_struct done_work; 51 struct list_head queued_reqs; 52 struct list_head end_reqs; /* End these requests */ 53 struct work_struct dispatch_work; 54 struct fuse_dev *fud; 55 bool connected; 56 long in_flight; 57 struct completion in_flight_zero; /* No inflight requests */ 58 struct kobject *kobj; 59 char name[VQ_NAME_LEN]; 60 } ____cacheline_aligned_in_smp; 61 62 /* A virtio-fs device instance */ 63 struct virtio_fs { 64 struct kobject kobj; 65 struct kobject *mqs_kobj; 66 struct list_head list; /* on virtio_fs_instances */ 67 char *tag; 68 struct virtio_fs_vq *vqs; 69 unsigned int nvqs; /* number of virtqueues */ 70 unsigned int num_request_queues; /* number of request queues */ 71 struct dax_device *dax_dev; 72 73 unsigned int *mq_map; /* index = cpu id, value = request vq id */ 74 75 /* DAX memory window where file contents are mapped */ 76 void *window_kaddr; 77 phys_addr_t window_phys_addr; 78 size_t window_len; 79 }; 80 81 struct virtio_fs_forget_req { 82 struct fuse_in_header ih; 83 struct fuse_forget_in arg; 84 }; 85 86 struct virtio_fs_forget { 87 /* This request can be temporarily queued on virt queue */ 88 struct list_head list; 89 struct virtio_fs_forget_req req; 90 }; 91 92 struct virtio_fs_req_work { 93 struct fuse_req *req; 94 struct virtio_fs_vq *fsvq; 95 struct work_struct done_work; 96 }; 97 98 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 99 struct fuse_req *req, bool in_flight, 100 gfp_t gfp); 101 102 static const struct constant_table dax_param_enums[] = { 103 {"always", FUSE_DAX_ALWAYS }, 104 {"never", FUSE_DAX_NEVER }, 105 {"inode", FUSE_DAX_INODE_USER }, 106 {} 107 }; 108 109 enum { 110 OPT_DAX, 111 OPT_DAX_ENUM, 112 }; 113 114 static const struct fs_parameter_spec virtio_fs_parameters[] = { 115 fsparam_flag("dax", OPT_DAX), 116 fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums), 117 {} 118 }; 119 120 static int virtio_fs_parse_param(struct fs_context *fsc, 121 struct fs_parameter *param) 122 { 123 struct fs_parse_result result; 124 struct fuse_fs_context *ctx = fsc->fs_private; 125 int opt; 126 127 opt = fs_parse(fsc, virtio_fs_parameters, param, &result); 128 if (opt < 0) 129 return opt; 130 131 switch (opt) { 132 case OPT_DAX: 133 ctx->dax_mode = FUSE_DAX_ALWAYS; 134 break; 135 case OPT_DAX_ENUM: 136 ctx->dax_mode = result.uint_32; 137 break; 138 default: 139 return -EINVAL; 140 } 141 142 return 0; 143 } 144 145 static void virtio_fs_free_fsc(struct fs_context *fsc) 146 { 147 struct fuse_fs_context *ctx = fsc->fs_private; 148 149 kfree(ctx); 150 } 151 152 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 153 { 154 struct virtio_fs *fs = vq->vdev->priv; 155 156 return &fs->vqs[vq->index]; 157 } 158 159 /* Should be called with fsvq->lock held. */ 160 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 161 { 162 fsvq->in_flight++; 163 } 164 165 /* Should be called with fsvq->lock held. */ 166 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 167 { 168 WARN_ON(fsvq->in_flight <= 0); 169 fsvq->in_flight--; 170 if (!fsvq->in_flight) 171 complete(&fsvq->in_flight_zero); 172 } 173 174 static ssize_t tag_show(struct kobject *kobj, 175 struct kobj_attribute *attr, char *buf) 176 { 177 struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); 178 179 return sysfs_emit(buf, "%s\n", fs->tag); 180 } 181 182 static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag); 183 184 static struct attribute *virtio_fs_attrs[] = { 185 &virtio_fs_tag_attr.attr, 186 NULL 187 }; 188 ATTRIBUTE_GROUPS(virtio_fs); 189 190 static void virtio_fs_ktype_release(struct kobject *kobj) 191 { 192 struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj); 193 194 kfree(vfs->mq_map); 195 kfree(vfs->vqs); 196 kfree(vfs); 197 } 198 199 static const struct kobj_type virtio_fs_ktype = { 200 .release = virtio_fs_ktype_release, 201 .sysfs_ops = &kobj_sysfs_ops, 202 .default_groups = virtio_fs_groups, 203 }; 204 205 static struct virtio_fs_vq *virtio_fs_kobj_to_vq(struct virtio_fs *fs, 206 struct kobject *kobj) 207 { 208 int i; 209 210 for (i = 0; i < fs->nvqs; i++) { 211 if (kobj == fs->vqs[i].kobj) 212 return &fs->vqs[i]; 213 } 214 return NULL; 215 } 216 217 static ssize_t name_show(struct kobject *kobj, 218 struct kobj_attribute *attr, char *buf) 219 { 220 struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj); 221 struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj); 222 223 if (!fsvq) 224 return -EINVAL; 225 return sysfs_emit(buf, "%s\n", fsvq->name); 226 } 227 228 static struct kobj_attribute virtio_fs_vq_name_attr = __ATTR_RO(name); 229 230 static ssize_t cpu_list_show(struct kobject *kobj, 231 struct kobj_attribute *attr, char *buf) 232 { 233 struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj); 234 struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj); 235 unsigned int cpu, qid; 236 const size_t size = PAGE_SIZE - 1; 237 bool first = true; 238 int ret = 0, pos = 0; 239 240 if (!fsvq) 241 return -EINVAL; 242 243 qid = fsvq->vq->index; 244 for (cpu = 0; cpu < nr_cpu_ids; cpu++) { 245 if (qid < VQ_REQUEST || (fs->mq_map[cpu] == qid)) { 246 if (first) 247 ret = snprintf(buf + pos, size - pos, "%u", cpu); 248 else 249 ret = snprintf(buf + pos, size - pos, ", %u", cpu); 250 251 if (ret >= size - pos) 252 break; 253 first = false; 254 pos += ret; 255 } 256 } 257 ret = snprintf(buf + pos, size + 1 - pos, "\n"); 258 return pos + ret; 259 } 260 261 static struct kobj_attribute virtio_fs_vq_cpu_list_attr = __ATTR_RO(cpu_list); 262 263 static struct attribute *virtio_fs_vq_attrs[] = { 264 &virtio_fs_vq_name_attr.attr, 265 &virtio_fs_vq_cpu_list_attr.attr, 266 NULL 267 }; 268 269 static struct attribute_group virtio_fs_vq_attr_group = { 270 .attrs = virtio_fs_vq_attrs, 271 }; 272 273 /* Make sure virtiofs_mutex is held */ 274 static void virtio_fs_put_locked(struct virtio_fs *fs) 275 { 276 lockdep_assert_held(&virtio_fs_mutex); 277 278 kobject_put(&fs->kobj); 279 } 280 281 static void virtio_fs_put(struct virtio_fs *fs) 282 { 283 mutex_lock(&virtio_fs_mutex); 284 virtio_fs_put_locked(fs); 285 mutex_unlock(&virtio_fs_mutex); 286 } 287 288 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 289 { 290 struct virtio_fs *vfs = fiq->priv; 291 292 virtio_fs_put(vfs); 293 } 294 295 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 296 { 297 WARN_ON(fsvq->in_flight < 0); 298 299 /* Wait for in flight requests to finish.*/ 300 spin_lock(&fsvq->lock); 301 if (fsvq->in_flight) { 302 /* We are holding virtio_fs_mutex. There should not be any 303 * waiters waiting for completion. 304 */ 305 reinit_completion(&fsvq->in_flight_zero); 306 spin_unlock(&fsvq->lock); 307 wait_for_completion(&fsvq->in_flight_zero); 308 } else { 309 spin_unlock(&fsvq->lock); 310 } 311 312 flush_work(&fsvq->done_work); 313 flush_work(&fsvq->dispatch_work); 314 } 315 316 static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) 317 { 318 struct virtio_fs_vq *fsvq; 319 int i; 320 321 for (i = 0; i < fs->nvqs; i++) { 322 fsvq = &fs->vqs[i]; 323 virtio_fs_drain_queue(fsvq); 324 } 325 } 326 327 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 328 { 329 /* Provides mutual exclusion between ->remove and ->kill_sb 330 * paths. We don't want both of these draining queue at the 331 * same time. Current completion logic reinits completion 332 * and that means there should not be any other thread 333 * doing reinit or waiting for completion already. 334 */ 335 mutex_lock(&virtio_fs_mutex); 336 virtio_fs_drain_all_queues_locked(fs); 337 mutex_unlock(&virtio_fs_mutex); 338 } 339 340 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 341 { 342 struct virtio_fs_vq *fsvq; 343 int i; 344 345 for (i = 0; i < fs->nvqs; i++) { 346 fsvq = &fs->vqs[i]; 347 spin_lock(&fsvq->lock); 348 fsvq->connected = true; 349 spin_unlock(&fsvq->lock); 350 } 351 } 352 353 static void virtio_fs_delete_queues_sysfs(struct virtio_fs *fs) 354 { 355 struct virtio_fs_vq *fsvq; 356 int i; 357 358 for (i = 0; i < fs->nvqs; i++) { 359 fsvq = &fs->vqs[i]; 360 kobject_put(fsvq->kobj); 361 } 362 } 363 364 static int virtio_fs_add_queues_sysfs(struct virtio_fs *fs) 365 { 366 struct virtio_fs_vq *fsvq; 367 char buff[12]; 368 int i, j, ret; 369 370 for (i = 0; i < fs->nvqs; i++) { 371 fsvq = &fs->vqs[i]; 372 373 sprintf(buff, "%d", i); 374 fsvq->kobj = kobject_create_and_add(buff, fs->mqs_kobj); 375 if (!fs->mqs_kobj) { 376 ret = -ENOMEM; 377 goto out_del; 378 } 379 380 ret = sysfs_create_group(fsvq->kobj, &virtio_fs_vq_attr_group); 381 if (ret) { 382 kobject_put(fsvq->kobj); 383 goto out_del; 384 } 385 } 386 387 return 0; 388 389 out_del: 390 for (j = 0; j < i; j++) { 391 fsvq = &fs->vqs[j]; 392 kobject_put(fsvq->kobj); 393 } 394 return ret; 395 } 396 397 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 398 static int virtio_fs_add_instance(struct virtio_device *vdev, 399 struct virtio_fs *fs) 400 { 401 struct virtio_fs *fs2; 402 int ret; 403 404 mutex_lock(&virtio_fs_mutex); 405 406 list_for_each_entry(fs2, &virtio_fs_instances, list) { 407 if (strcmp(fs->tag, fs2->tag) == 0) { 408 mutex_unlock(&virtio_fs_mutex); 409 return -EEXIST; 410 } 411 } 412 413 /* Use the virtio_device's index as a unique identifier, there is no 414 * need to allocate our own identifiers because the virtio_fs instance 415 * is only visible to userspace as long as the underlying virtio_device 416 * exists. 417 */ 418 fs->kobj.kset = virtio_fs_kset; 419 ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index); 420 if (ret < 0) 421 goto out_unlock; 422 423 fs->mqs_kobj = kobject_create_and_add("mqs", &fs->kobj); 424 if (!fs->mqs_kobj) { 425 ret = -ENOMEM; 426 goto out_del; 427 } 428 429 ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device"); 430 if (ret < 0) 431 goto out_put; 432 433 ret = virtio_fs_add_queues_sysfs(fs); 434 if (ret) 435 goto out_remove; 436 437 list_add_tail(&fs->list, &virtio_fs_instances); 438 439 mutex_unlock(&virtio_fs_mutex); 440 441 kobject_uevent(&fs->kobj, KOBJ_ADD); 442 443 return 0; 444 445 out_remove: 446 sysfs_remove_link(&fs->kobj, "device"); 447 out_put: 448 kobject_put(fs->mqs_kobj); 449 out_del: 450 kobject_del(&fs->kobj); 451 out_unlock: 452 mutex_unlock(&virtio_fs_mutex); 453 return ret; 454 } 455 456 /* Return the virtio_fs with a given tag, or NULL */ 457 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 458 { 459 struct virtio_fs *fs; 460 461 mutex_lock(&virtio_fs_mutex); 462 463 list_for_each_entry(fs, &virtio_fs_instances, list) { 464 if (strcmp(fs->tag, tag) == 0) { 465 kobject_get(&fs->kobj); 466 goto found; 467 } 468 } 469 470 fs = NULL; /* not found */ 471 472 found: 473 mutex_unlock(&virtio_fs_mutex); 474 475 return fs; 476 } 477 478 static void virtio_fs_free_devs(struct virtio_fs *fs) 479 { 480 unsigned int i; 481 482 for (i = 0; i < fs->nvqs; i++) { 483 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 484 485 if (!fsvq->fud) 486 continue; 487 488 fuse_dev_free(fsvq->fud); 489 fsvq->fud = NULL; 490 } 491 } 492 493 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 494 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 495 { 496 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 497 char *end; 498 size_t len; 499 500 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 501 &tag_buf, sizeof(tag_buf)); 502 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 503 if (end == tag_buf) 504 return -EINVAL; /* empty tag */ 505 if (!end) 506 end = &tag_buf[sizeof(tag_buf)]; 507 508 len = end - tag_buf; 509 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 510 if (!fs->tag) 511 return -ENOMEM; 512 memcpy(fs->tag, tag_buf, len); 513 fs->tag[len] = '\0'; 514 515 /* While the VIRTIO specification allows any character, newlines are 516 * awkward on mount(8) command-lines and cause problems in the sysfs 517 * "tag" attr and uevent TAG= properties. Forbid them. 518 */ 519 if (strchr(fs->tag, '\n')) { 520 dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n"); 521 return -EINVAL; 522 } 523 524 dev_info(&vdev->dev, "discovered new tag: %s\n", fs->tag); 525 return 0; 526 } 527 528 /* Work function for hiprio completion */ 529 static void virtio_fs_hiprio_done_work(struct work_struct *work) 530 { 531 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 532 done_work); 533 struct virtqueue *vq = fsvq->vq; 534 535 /* Free completed FUSE_FORGET requests */ 536 spin_lock(&fsvq->lock); 537 do { 538 unsigned int len; 539 void *req; 540 541 virtqueue_disable_cb(vq); 542 543 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 544 kfree(req); 545 dec_in_flight_req(fsvq); 546 } 547 } while (!virtqueue_enable_cb(vq)); 548 549 if (!list_empty(&fsvq->queued_reqs)) 550 schedule_work(&fsvq->dispatch_work); 551 552 spin_unlock(&fsvq->lock); 553 } 554 555 static void virtio_fs_request_dispatch_work(struct work_struct *work) 556 { 557 struct fuse_req *req; 558 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 559 dispatch_work); 560 int ret; 561 562 pr_debug("virtio-fs: worker %s called.\n", __func__); 563 while (1) { 564 spin_lock(&fsvq->lock); 565 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 566 list); 567 if (!req) { 568 spin_unlock(&fsvq->lock); 569 break; 570 } 571 572 list_del_init(&req->list); 573 spin_unlock(&fsvq->lock); 574 fuse_request_end(req); 575 } 576 577 /* Dispatch pending requests */ 578 while (1) { 579 unsigned int flags; 580 581 spin_lock(&fsvq->lock); 582 req = list_first_entry_or_null(&fsvq->queued_reqs, 583 struct fuse_req, list); 584 if (!req) { 585 spin_unlock(&fsvq->lock); 586 return; 587 } 588 list_del_init(&req->list); 589 spin_unlock(&fsvq->lock); 590 591 flags = memalloc_nofs_save(); 592 ret = virtio_fs_enqueue_req(fsvq, req, true, GFP_KERNEL); 593 memalloc_nofs_restore(flags); 594 if (ret < 0) { 595 if (ret == -ENOSPC) { 596 spin_lock(&fsvq->lock); 597 list_add_tail(&req->list, &fsvq->queued_reqs); 598 spin_unlock(&fsvq->lock); 599 return; 600 } 601 req->out.h.error = ret; 602 spin_lock(&fsvq->lock); 603 dec_in_flight_req(fsvq); 604 spin_unlock(&fsvq->lock); 605 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 606 ret); 607 fuse_request_end(req); 608 } 609 } 610 } 611 612 /* 613 * Returns 1 if queue is full and sender should wait a bit before sending 614 * next request, 0 otherwise. 615 */ 616 static int send_forget_request(struct virtio_fs_vq *fsvq, 617 struct virtio_fs_forget *forget, 618 bool in_flight) 619 { 620 struct scatterlist sg; 621 struct virtqueue *vq; 622 int ret = 0; 623 bool notify; 624 struct virtio_fs_forget_req *req = &forget->req; 625 626 spin_lock(&fsvq->lock); 627 if (!fsvq->connected) { 628 if (in_flight) 629 dec_in_flight_req(fsvq); 630 kfree(forget); 631 goto out; 632 } 633 634 sg_init_one(&sg, req, sizeof(*req)); 635 vq = fsvq->vq; 636 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 637 638 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 639 if (ret < 0) { 640 if (ret == -ENOSPC) { 641 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 642 ret); 643 list_add_tail(&forget->list, &fsvq->queued_reqs); 644 if (!in_flight) 645 inc_in_flight_req(fsvq); 646 /* Queue is full */ 647 ret = 1; 648 } else { 649 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 650 ret); 651 kfree(forget); 652 if (in_flight) 653 dec_in_flight_req(fsvq); 654 } 655 goto out; 656 } 657 658 if (!in_flight) 659 inc_in_flight_req(fsvq); 660 notify = virtqueue_kick_prepare(vq); 661 spin_unlock(&fsvq->lock); 662 663 if (notify) 664 virtqueue_notify(vq); 665 return ret; 666 out: 667 spin_unlock(&fsvq->lock); 668 return ret; 669 } 670 671 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 672 { 673 struct virtio_fs_forget *forget; 674 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 675 dispatch_work); 676 pr_debug("virtio-fs: worker %s called.\n", __func__); 677 while (1) { 678 spin_lock(&fsvq->lock); 679 forget = list_first_entry_or_null(&fsvq->queued_reqs, 680 struct virtio_fs_forget, list); 681 if (!forget) { 682 spin_unlock(&fsvq->lock); 683 return; 684 } 685 686 list_del(&forget->list); 687 spin_unlock(&fsvq->lock); 688 if (send_forget_request(fsvq, forget, true)) 689 return; 690 } 691 } 692 693 /* Allocate and copy args into req->argbuf */ 694 static int copy_args_to_argbuf(struct fuse_req *req, gfp_t gfp) 695 { 696 struct fuse_args *args = req->args; 697 unsigned int offset = 0; 698 unsigned int num_in; 699 unsigned int num_out; 700 unsigned int len; 701 unsigned int i; 702 703 num_in = args->in_numargs - args->in_pages; 704 num_out = args->out_numargs - args->out_pages; 705 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 706 fuse_len_args(num_out, args->out_args); 707 708 req->argbuf = kmalloc(len, gfp); 709 if (!req->argbuf) 710 return -ENOMEM; 711 712 for (i = 0; i < num_in; i++) { 713 memcpy(req->argbuf + offset, 714 args->in_args[i].value, 715 args->in_args[i].size); 716 offset += args->in_args[i].size; 717 } 718 719 return 0; 720 } 721 722 /* Copy args out of and free req->argbuf */ 723 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 724 { 725 unsigned int remaining; 726 unsigned int offset; 727 unsigned int num_in; 728 unsigned int num_out; 729 unsigned int i; 730 731 remaining = req->out.h.len - sizeof(req->out.h); 732 num_in = args->in_numargs - args->in_pages; 733 num_out = args->out_numargs - args->out_pages; 734 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 735 736 for (i = 0; i < num_out; i++) { 737 unsigned int argsize = args->out_args[i].size; 738 739 if (args->out_argvar && 740 i == args->out_numargs - 1 && 741 argsize > remaining) { 742 argsize = remaining; 743 } 744 745 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 746 offset += argsize; 747 748 if (i != args->out_numargs - 1) 749 remaining -= argsize; 750 } 751 752 /* Store the actual size of the variable-length arg */ 753 if (args->out_argvar) 754 args->out_args[args->out_numargs - 1].size = remaining; 755 756 kfree(req->argbuf); 757 req->argbuf = NULL; 758 } 759 760 /* Work function for request completion */ 761 static void virtio_fs_request_complete(struct fuse_req *req, 762 struct virtio_fs_vq *fsvq) 763 { 764 struct fuse_pqueue *fpq = &fsvq->fud->pq; 765 struct fuse_args *args; 766 struct fuse_args_pages *ap; 767 unsigned int len, i, thislen; 768 struct folio *folio; 769 770 /* 771 * TODO verify that server properly follows FUSE protocol 772 * (oh.uniq, oh.len) 773 */ 774 args = req->args; 775 copy_args_from_argbuf(args, req); 776 777 if (args->out_pages && args->page_zeroing) { 778 len = args->out_args[args->out_numargs - 1].size; 779 ap = container_of(args, typeof(*ap), args); 780 for (i = 0; i < ap->num_folios; i++) { 781 thislen = ap->descs[i].length; 782 if (len < thislen) { 783 WARN_ON(ap->descs[i].offset); 784 folio = ap->folios[i]; 785 folio_zero_segment(folio, len, thislen); 786 len = 0; 787 } else { 788 len -= thislen; 789 } 790 } 791 } 792 793 spin_lock(&fpq->lock); 794 clear_bit(FR_SENT, &req->flags); 795 spin_unlock(&fpq->lock); 796 797 fuse_request_end(req); 798 spin_lock(&fsvq->lock); 799 dec_in_flight_req(fsvq); 800 spin_unlock(&fsvq->lock); 801 } 802 803 static void virtio_fs_complete_req_work(struct work_struct *work) 804 { 805 struct virtio_fs_req_work *w = 806 container_of(work, typeof(*w), done_work); 807 808 virtio_fs_request_complete(w->req, w->fsvq); 809 kfree(w); 810 } 811 812 static void virtio_fs_requests_done_work(struct work_struct *work) 813 { 814 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 815 done_work); 816 struct fuse_pqueue *fpq = &fsvq->fud->pq; 817 struct virtqueue *vq = fsvq->vq; 818 struct fuse_req *req; 819 struct fuse_req *next; 820 unsigned int len; 821 LIST_HEAD(reqs); 822 823 /* Collect completed requests off the virtqueue */ 824 spin_lock(&fsvq->lock); 825 do { 826 virtqueue_disable_cb(vq); 827 828 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 829 spin_lock(&fpq->lock); 830 list_move_tail(&req->list, &reqs); 831 spin_unlock(&fpq->lock); 832 } 833 } while (!virtqueue_enable_cb(vq)); 834 spin_unlock(&fsvq->lock); 835 836 /* End requests */ 837 list_for_each_entry_safe(req, next, &reqs, list) { 838 list_del_init(&req->list); 839 840 /* blocking async request completes in a worker context */ 841 if (req->args->may_block) { 842 struct virtio_fs_req_work *w; 843 844 w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); 845 INIT_WORK(&w->done_work, virtio_fs_complete_req_work); 846 w->fsvq = fsvq; 847 w->req = req; 848 schedule_work(&w->done_work); 849 } else { 850 virtio_fs_request_complete(req, fsvq); 851 } 852 } 853 854 /* Try to push previously queued requests, as the queue might no longer be full */ 855 spin_lock(&fsvq->lock); 856 if (!list_empty(&fsvq->queued_reqs)) 857 schedule_work(&fsvq->dispatch_work); 858 spin_unlock(&fsvq->lock); 859 } 860 861 static void virtio_fs_map_queues(struct virtio_device *vdev, struct virtio_fs *fs) 862 { 863 const struct cpumask *mask, *masks; 864 unsigned int q, cpu, nr_masks; 865 866 /* First attempt to map using existing transport layer affinities 867 * e.g. PCIe MSI-X 868 */ 869 if (!vdev->config->get_vq_affinity) 870 goto fallback; 871 872 for (q = 0; q < fs->num_request_queues; q++) { 873 mask = vdev->config->get_vq_affinity(vdev, VQ_REQUEST + q); 874 if (!mask) 875 goto fallback; 876 877 for_each_cpu(cpu, mask) 878 fs->mq_map[cpu] = q + VQ_REQUEST; 879 } 880 881 return; 882 fallback: 883 /* Attempt to map evenly in groups over the CPUs */ 884 masks = group_cpus_evenly(fs->num_request_queues, &nr_masks); 885 /* If even this fails we default to all CPUs use first request queue */ 886 if (!masks) { 887 for_each_possible_cpu(cpu) 888 fs->mq_map[cpu] = VQ_REQUEST; 889 return; 890 } 891 892 for (q = 0; q < fs->num_request_queues; q++) { 893 for_each_cpu(cpu, &masks[q % nr_masks]) 894 fs->mq_map[cpu] = q + VQ_REQUEST; 895 } 896 kfree(masks); 897 } 898 899 /* Virtqueue interrupt handler */ 900 static void virtio_fs_vq_done(struct virtqueue *vq) 901 { 902 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 903 904 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 905 906 schedule_work(&fsvq->done_work); 907 } 908 909 static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, 910 int vq_type) 911 { 912 strscpy(fsvq->name, name, VQ_NAME_LEN); 913 spin_lock_init(&fsvq->lock); 914 INIT_LIST_HEAD(&fsvq->queued_reqs); 915 INIT_LIST_HEAD(&fsvq->end_reqs); 916 init_completion(&fsvq->in_flight_zero); 917 918 if (vq_type == VQ_REQUEST) { 919 INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 920 INIT_WORK(&fsvq->dispatch_work, 921 virtio_fs_request_dispatch_work); 922 } else { 923 INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 924 INIT_WORK(&fsvq->dispatch_work, 925 virtio_fs_hiprio_dispatch_work); 926 } 927 } 928 929 /* Initialize virtqueues */ 930 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 931 struct virtio_fs *fs) 932 { 933 struct virtqueue_info *vqs_info; 934 struct virtqueue **vqs; 935 /* Specify pre_vectors to ensure that the queues before the 936 * request queues (e.g. hiprio) don't claim any of the CPUs in 937 * the multi-queue mapping and interrupt affinities 938 */ 939 struct irq_affinity desc = { .pre_vectors = VQ_REQUEST }; 940 unsigned int i; 941 int ret = 0; 942 943 virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, 944 &fs->num_request_queues); 945 if (fs->num_request_queues == 0) 946 return -EINVAL; 947 948 /* Truncate nr of request queues to nr_cpu_id */ 949 fs->num_request_queues = min_t(unsigned int, fs->num_request_queues, 950 nr_cpu_ids); 951 fs->nvqs = VQ_REQUEST + fs->num_request_queues; 952 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 953 if (!fs->vqs) 954 return -ENOMEM; 955 956 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 957 fs->mq_map = kcalloc_node(nr_cpu_ids, sizeof(*fs->mq_map), GFP_KERNEL, 958 dev_to_node(&vdev->dev)); 959 vqs_info = kcalloc(fs->nvqs, sizeof(*vqs_info), GFP_KERNEL); 960 if (!vqs || !vqs_info || !fs->mq_map) { 961 ret = -ENOMEM; 962 goto out; 963 } 964 965 /* Initialize the hiprio/forget request virtqueue */ 966 vqs_info[VQ_HIPRIO].callback = virtio_fs_vq_done; 967 virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); 968 vqs_info[VQ_HIPRIO].name = fs->vqs[VQ_HIPRIO].name; 969 970 /* Initialize the requests virtqueues */ 971 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 972 char vq_name[VQ_NAME_LEN]; 973 974 snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); 975 virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); 976 vqs_info[i].callback = virtio_fs_vq_done; 977 vqs_info[i].name = fs->vqs[i].name; 978 } 979 980 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, vqs_info, &desc); 981 if (ret < 0) 982 goto out; 983 984 for (i = 0; i < fs->nvqs; i++) 985 fs->vqs[i].vq = vqs[i]; 986 987 virtio_fs_start_all_queues(fs); 988 out: 989 kfree(vqs_info); 990 kfree(vqs); 991 if (ret) { 992 kfree(fs->vqs); 993 kfree(fs->mq_map); 994 } 995 return ret; 996 } 997 998 /* Free virtqueues (device must already be reset) */ 999 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev) 1000 { 1001 vdev->config->del_vqs(vdev); 1002 } 1003 1004 /* Map a window offset to a page frame number. The window offset will have 1005 * been produced by .iomap_begin(), which maps a file offset to a window 1006 * offset. 1007 */ 1008 static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 1009 long nr_pages, enum dax_access_mode mode, 1010 void **kaddr, unsigned long *pfn) 1011 { 1012 struct virtio_fs *fs = dax_get_private(dax_dev); 1013 phys_addr_t offset = PFN_PHYS(pgoff); 1014 size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff; 1015 1016 if (kaddr) 1017 *kaddr = fs->window_kaddr + offset; 1018 if (pfn) 1019 *pfn = fs->window_phys_addr + offset; 1020 return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 1021 } 1022 1023 static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 1024 pgoff_t pgoff, size_t nr_pages) 1025 { 1026 long rc; 1027 void *kaddr; 1028 1029 rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr, 1030 NULL); 1031 if (rc < 0) 1032 return dax_mem2blk_err(rc); 1033 1034 memset(kaddr, 0, nr_pages << PAGE_SHIFT); 1035 dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 1036 return 0; 1037 } 1038 1039 static const struct dax_operations virtio_fs_dax_ops = { 1040 .direct_access = virtio_fs_direct_access, 1041 .zero_page_range = virtio_fs_zero_page_range, 1042 }; 1043 1044 static void virtio_fs_cleanup_dax(void *data) 1045 { 1046 struct dax_device *dax_dev = data; 1047 1048 kill_dax(dax_dev); 1049 put_dax(dax_dev); 1050 } 1051 1052 DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T)) 1053 1054 static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 1055 { 1056 struct dax_device *dax_dev __free(cleanup_dax) = NULL; 1057 struct virtio_shm_region cache_reg; 1058 struct dev_pagemap *pgmap; 1059 bool have_cache; 1060 1061 if (!IS_ENABLED(CONFIG_FUSE_DAX)) 1062 return 0; 1063 1064 dax_dev = alloc_dax(fs, &virtio_fs_dax_ops); 1065 if (IS_ERR(dax_dev)) { 1066 int rc = PTR_ERR(dax_dev); 1067 return rc == -EOPNOTSUPP ? 0 : rc; 1068 } 1069 1070 /* Get cache region */ 1071 have_cache = virtio_get_shm_region(vdev, &cache_reg, 1072 (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 1073 if (!have_cache) { 1074 dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 1075 return 0; 1076 } 1077 1078 if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 1079 dev_name(&vdev->dev))) { 1080 dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 1081 cache_reg.addr, cache_reg.len); 1082 return -EBUSY; 1083 } 1084 1085 dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 1086 cache_reg.addr); 1087 1088 pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 1089 if (!pgmap) 1090 return -ENOMEM; 1091 1092 pgmap->type = MEMORY_DEVICE_FS_DAX; 1093 1094 /* Ideally we would directly use the PCI BAR resource but 1095 * devm_memremap_pages() wants its own copy in pgmap. So 1096 * initialize a struct resource from scratch (only the start 1097 * and end fields will be used). 1098 */ 1099 pgmap->range = (struct range) { 1100 .start = (phys_addr_t) cache_reg.addr, 1101 .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 1102 }; 1103 pgmap->nr_range = 1; 1104 1105 fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 1106 if (IS_ERR(fs->window_kaddr)) 1107 return PTR_ERR(fs->window_kaddr); 1108 1109 fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 1110 fs->window_len = (phys_addr_t) cache_reg.len; 1111 1112 dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 1113 __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 1114 1115 fs->dax_dev = no_free_ptr(dax_dev); 1116 return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 1117 fs->dax_dev); 1118 } 1119 1120 static int virtio_fs_probe(struct virtio_device *vdev) 1121 { 1122 struct virtio_fs *fs; 1123 int ret; 1124 1125 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 1126 if (!fs) 1127 return -ENOMEM; 1128 kobject_init(&fs->kobj, &virtio_fs_ktype); 1129 vdev->priv = fs; 1130 1131 ret = virtio_fs_read_tag(vdev, fs); 1132 if (ret < 0) 1133 goto out; 1134 1135 ret = virtio_fs_setup_vqs(vdev, fs); 1136 if (ret < 0) 1137 goto out; 1138 1139 virtio_fs_map_queues(vdev, fs); 1140 1141 ret = virtio_fs_setup_dax(vdev, fs); 1142 if (ret < 0) 1143 goto out_vqs; 1144 1145 /* Bring the device online in case the filesystem is mounted and 1146 * requests need to be sent before we return. 1147 */ 1148 virtio_device_ready(vdev); 1149 1150 ret = virtio_fs_add_instance(vdev, fs); 1151 if (ret < 0) 1152 goto out_vqs; 1153 1154 return 0; 1155 1156 out_vqs: 1157 virtio_reset_device(vdev); 1158 virtio_fs_cleanup_vqs(vdev); 1159 1160 out: 1161 vdev->priv = NULL; 1162 kobject_put(&fs->kobj); 1163 return ret; 1164 } 1165 1166 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 1167 { 1168 struct virtio_fs_vq *fsvq; 1169 int i; 1170 1171 for (i = 0; i < fs->nvqs; i++) { 1172 fsvq = &fs->vqs[i]; 1173 spin_lock(&fsvq->lock); 1174 fsvq->connected = false; 1175 spin_unlock(&fsvq->lock); 1176 } 1177 } 1178 1179 static void virtio_fs_remove(struct virtio_device *vdev) 1180 { 1181 struct virtio_fs *fs = vdev->priv; 1182 1183 mutex_lock(&virtio_fs_mutex); 1184 /* This device is going away. No one should get new reference */ 1185 list_del_init(&fs->list); 1186 virtio_fs_delete_queues_sysfs(fs); 1187 sysfs_remove_link(&fs->kobj, "device"); 1188 kobject_put(fs->mqs_kobj); 1189 kobject_del(&fs->kobj); 1190 virtio_fs_stop_all_queues(fs); 1191 virtio_fs_drain_all_queues_locked(fs); 1192 virtio_reset_device(vdev); 1193 virtio_fs_cleanup_vqs(vdev); 1194 1195 vdev->priv = NULL; 1196 /* Put device reference on virtio_fs object */ 1197 virtio_fs_put_locked(fs); 1198 mutex_unlock(&virtio_fs_mutex); 1199 } 1200 1201 #ifdef CONFIG_PM_SLEEP 1202 static int virtio_fs_freeze(struct virtio_device *vdev) 1203 { 1204 /* TODO need to save state here */ 1205 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 1206 return -EOPNOTSUPP; 1207 } 1208 1209 static int virtio_fs_restore(struct virtio_device *vdev) 1210 { 1211 /* TODO need to restore state here */ 1212 return 0; 1213 } 1214 #endif /* CONFIG_PM_SLEEP */ 1215 1216 static const struct virtio_device_id id_table[] = { 1217 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 1218 {}, 1219 }; 1220 1221 static const unsigned int feature_table[] = {}; 1222 1223 static struct virtio_driver virtio_fs_driver = { 1224 .driver.name = KBUILD_MODNAME, 1225 .id_table = id_table, 1226 .feature_table = feature_table, 1227 .feature_table_size = ARRAY_SIZE(feature_table), 1228 .probe = virtio_fs_probe, 1229 .remove = virtio_fs_remove, 1230 #ifdef CONFIG_PM_SLEEP 1231 .freeze = virtio_fs_freeze, 1232 .restore = virtio_fs_restore, 1233 #endif 1234 }; 1235 1236 static void virtio_fs_send_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *link) 1237 { 1238 struct virtio_fs_forget *forget; 1239 struct virtio_fs_forget_req *req; 1240 struct virtio_fs *fs = fiq->priv; 1241 struct virtio_fs_vq *fsvq = &fs->vqs[VQ_HIPRIO]; 1242 u64 unique = fuse_get_unique(fiq); 1243 1244 /* Allocate a buffer for the request */ 1245 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 1246 req = &forget->req; 1247 1248 req->ih = (struct fuse_in_header){ 1249 .opcode = FUSE_FORGET, 1250 .nodeid = link->forget_one.nodeid, 1251 .unique = unique, 1252 .len = sizeof(*req), 1253 }; 1254 req->arg = (struct fuse_forget_in){ 1255 .nlookup = link->forget_one.nlookup, 1256 }; 1257 1258 send_forget_request(fsvq, forget, false); 1259 kfree(link); 1260 } 1261 1262 static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) 1263 { 1264 /* 1265 * TODO interrupts. 1266 * 1267 * Normal fs operations on a local filesystems aren't interruptible. 1268 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 1269 * with shared lock between host and guest. 1270 */ 1271 } 1272 1273 /* Count number of scatter-gather elements required */ 1274 static unsigned int sg_count_fuse_folios(struct fuse_folio_desc *folio_descs, 1275 unsigned int num_folios, 1276 unsigned int total_len) 1277 { 1278 unsigned int i; 1279 unsigned int this_len; 1280 1281 for (i = 0; i < num_folios && total_len; i++) { 1282 this_len = min(folio_descs[i].length, total_len); 1283 total_len -= this_len; 1284 } 1285 1286 return i; 1287 } 1288 1289 /* Return the number of scatter-gather list elements required */ 1290 static unsigned int sg_count_fuse_req(struct fuse_req *req) 1291 { 1292 struct fuse_args *args = req->args; 1293 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 1294 unsigned int size, total_sgs = 1 /* fuse_in_header */; 1295 1296 if (args->in_numargs - args->in_pages) 1297 total_sgs += 1; 1298 1299 if (args->in_pages) { 1300 size = args->in_args[args->in_numargs - 1].size; 1301 total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios, 1302 size); 1303 } 1304 1305 if (!test_bit(FR_ISREPLY, &req->flags)) 1306 return total_sgs; 1307 1308 total_sgs += 1 /* fuse_out_header */; 1309 1310 if (args->out_numargs - args->out_pages) 1311 total_sgs += 1; 1312 1313 if (args->out_pages) { 1314 size = args->out_args[args->out_numargs - 1].size; 1315 total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios, 1316 size); 1317 } 1318 1319 return total_sgs; 1320 } 1321 1322 /* Add folios to scatter-gather list and return number of elements used */ 1323 static unsigned int sg_init_fuse_folios(struct scatterlist *sg, 1324 struct folio **folios, 1325 struct fuse_folio_desc *folio_descs, 1326 unsigned int num_folios, 1327 unsigned int total_len) 1328 { 1329 unsigned int i; 1330 unsigned int this_len; 1331 1332 for (i = 0; i < num_folios && total_len; i++) { 1333 sg_init_table(&sg[i], 1); 1334 this_len = min(folio_descs[i].length, total_len); 1335 sg_set_folio(&sg[i], folios[i], this_len, folio_descs[i].offset); 1336 total_len -= this_len; 1337 } 1338 1339 return i; 1340 } 1341 1342 /* Add args to scatter-gather list and return number of elements used */ 1343 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 1344 struct fuse_req *req, 1345 struct fuse_arg *args, 1346 unsigned int numargs, 1347 bool argpages, 1348 void *argbuf, 1349 unsigned int *len_used) 1350 { 1351 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 1352 unsigned int total_sgs = 0; 1353 unsigned int len; 1354 1355 len = fuse_len_args(numargs - argpages, args); 1356 if (len) 1357 sg_init_one(&sg[total_sgs++], argbuf, len); 1358 1359 if (argpages) 1360 total_sgs += sg_init_fuse_folios(&sg[total_sgs], 1361 ap->folios, ap->descs, 1362 ap->num_folios, 1363 args[numargs - 1].size); 1364 1365 if (len_used) 1366 *len_used = len; 1367 1368 return total_sgs; 1369 } 1370 1371 /* Add a request to a virtqueue and kick the device */ 1372 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 1373 struct fuse_req *req, bool in_flight, 1374 gfp_t gfp) 1375 { 1376 /* requests need at least 4 elements */ 1377 struct scatterlist *stack_sgs[6]; 1378 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 1379 struct scatterlist **sgs = stack_sgs; 1380 struct scatterlist *sg = stack_sg; 1381 struct virtqueue *vq; 1382 struct fuse_args *args = req->args; 1383 unsigned int argbuf_used = 0; 1384 unsigned int out_sgs = 0; 1385 unsigned int in_sgs = 0; 1386 unsigned int total_sgs; 1387 unsigned int i; 1388 int ret; 1389 bool notify; 1390 struct fuse_pqueue *fpq; 1391 1392 /* Does the sglist fit on the stack? */ 1393 total_sgs = sg_count_fuse_req(req); 1394 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 1395 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), gfp); 1396 sg = kmalloc_array(total_sgs, sizeof(sg[0]), gfp); 1397 if (!sgs || !sg) { 1398 ret = -ENOMEM; 1399 goto out; 1400 } 1401 } 1402 1403 /* Use a bounce buffer since stack args cannot be mapped */ 1404 ret = copy_args_to_argbuf(req, gfp); 1405 if (ret < 0) 1406 goto out; 1407 1408 /* Request elements */ 1409 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 1410 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 1411 (struct fuse_arg *)args->in_args, 1412 args->in_numargs, args->in_pages, 1413 req->argbuf, &argbuf_used); 1414 1415 /* Reply elements */ 1416 if (test_bit(FR_ISREPLY, &req->flags)) { 1417 sg_init_one(&sg[out_sgs + in_sgs++], 1418 &req->out.h, sizeof(req->out.h)); 1419 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 1420 args->out_args, args->out_numargs, 1421 args->out_pages, 1422 req->argbuf + argbuf_used, NULL); 1423 } 1424 1425 WARN_ON(out_sgs + in_sgs != total_sgs); 1426 1427 for (i = 0; i < total_sgs; i++) 1428 sgs[i] = &sg[i]; 1429 1430 spin_lock(&fsvq->lock); 1431 1432 if (!fsvq->connected) { 1433 spin_unlock(&fsvq->lock); 1434 ret = -ENOTCONN; 1435 goto out; 1436 } 1437 1438 vq = fsvq->vq; 1439 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 1440 if (ret < 0) { 1441 spin_unlock(&fsvq->lock); 1442 goto out; 1443 } 1444 1445 /* Request successfully sent. */ 1446 fpq = &fsvq->fud->pq; 1447 spin_lock(&fpq->lock); 1448 list_add_tail(&req->list, fpq->processing); 1449 spin_unlock(&fpq->lock); 1450 set_bit(FR_SENT, &req->flags); 1451 /* matches barrier in request_wait_answer() */ 1452 smp_mb__after_atomic(); 1453 1454 if (!in_flight) 1455 inc_in_flight_req(fsvq); 1456 notify = virtqueue_kick_prepare(vq); 1457 1458 spin_unlock(&fsvq->lock); 1459 1460 if (notify) 1461 virtqueue_notify(vq); 1462 1463 out: 1464 if (ret < 0 && req->argbuf) { 1465 kfree(req->argbuf); 1466 req->argbuf = NULL; 1467 } 1468 if (sgs != stack_sgs) { 1469 kfree(sgs); 1470 kfree(sg); 1471 } 1472 1473 return ret; 1474 } 1475 1476 static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req) 1477 { 1478 unsigned int queue_id; 1479 struct virtio_fs *fs; 1480 struct virtio_fs_vq *fsvq; 1481 int ret; 1482 1483 if (req->in.h.opcode != FUSE_NOTIFY_REPLY) 1484 req->in.h.unique = fuse_get_unique(fiq); 1485 1486 clear_bit(FR_PENDING, &req->flags); 1487 1488 fs = fiq->priv; 1489 queue_id = fs->mq_map[raw_smp_processor_id()]; 1490 1491 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u queue_id %u\n", 1492 __func__, req->in.h.opcode, req->in.h.unique, 1493 req->in.h.nodeid, req->in.h.len, 1494 fuse_len_args(req->args->out_numargs, req->args->out_args), 1495 queue_id); 1496 1497 fsvq = &fs->vqs[queue_id]; 1498 ret = virtio_fs_enqueue_req(fsvq, req, false, GFP_ATOMIC); 1499 if (ret < 0) { 1500 if (ret == -ENOSPC) { 1501 /* 1502 * Virtqueue full. Retry submission from worker 1503 * context as we might be holding fc->bg_lock. 1504 */ 1505 spin_lock(&fsvq->lock); 1506 list_add_tail(&req->list, &fsvq->queued_reqs); 1507 inc_in_flight_req(fsvq); 1508 spin_unlock(&fsvq->lock); 1509 return; 1510 } 1511 req->out.h.error = ret; 1512 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1513 1514 /* Can't end request in submission context. Use a worker */ 1515 spin_lock(&fsvq->lock); 1516 list_add_tail(&req->list, &fsvq->end_reqs); 1517 schedule_work(&fsvq->dispatch_work); 1518 spin_unlock(&fsvq->lock); 1519 return; 1520 } 1521 } 1522 1523 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1524 .send_forget = virtio_fs_send_forget, 1525 .send_interrupt = virtio_fs_send_interrupt, 1526 .send_req = virtio_fs_send_req, 1527 .release = virtio_fs_fiq_release, 1528 }; 1529 1530 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) 1531 { 1532 ctx->rootmode = S_IFDIR; 1533 ctx->default_permissions = 1; 1534 ctx->allow_other = 1; 1535 ctx->max_read = UINT_MAX; 1536 ctx->blksize = 512; 1537 ctx->destroy = true; 1538 ctx->no_control = true; 1539 ctx->no_force_umount = true; 1540 } 1541 1542 static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) 1543 { 1544 struct fuse_mount *fm = get_fuse_mount_super(sb); 1545 struct fuse_conn *fc = fm->fc; 1546 struct virtio_fs *fs = fc->iq.priv; 1547 struct fuse_fs_context *ctx = fsc->fs_private; 1548 unsigned int i; 1549 int err; 1550 1551 virtio_fs_ctx_set_defaults(ctx); 1552 mutex_lock(&virtio_fs_mutex); 1553 1554 /* After holding mutex, make sure virtiofs device is still there. 1555 * Though we are holding a reference to it, drive ->remove might 1556 * still have cleaned up virtual queues. In that case bail out. 1557 */ 1558 err = -EINVAL; 1559 if (list_empty(&fs->list)) { 1560 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1561 goto err; 1562 } 1563 1564 err = -ENOMEM; 1565 /* Allocate fuse_dev for hiprio and notification queues */ 1566 for (i = 0; i < fs->nvqs; i++) { 1567 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1568 1569 fsvq->fud = fuse_dev_alloc(); 1570 if (!fsvq->fud) 1571 goto err_free_fuse_devs; 1572 } 1573 1574 /* virtiofs allocates and installs its own fuse devices */ 1575 ctx->fudptr = NULL; 1576 if (ctx->dax_mode != FUSE_DAX_NEVER) { 1577 if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) { 1578 err = -EINVAL; 1579 pr_err("virtio-fs: dax can't be enabled as filesystem" 1580 " device does not support it.\n"); 1581 goto err_free_fuse_devs; 1582 } 1583 ctx->dax_dev = fs->dax_dev; 1584 } 1585 err = fuse_fill_super_common(sb, ctx); 1586 if (err < 0) 1587 goto err_free_fuse_devs; 1588 1589 for (i = 0; i < fs->nvqs; i++) { 1590 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1591 1592 fuse_dev_install(fsvq->fud, fc); 1593 } 1594 1595 /* Previous unmount will stop all queues. Start these again */ 1596 virtio_fs_start_all_queues(fs); 1597 fuse_send_init(fm); 1598 mutex_unlock(&virtio_fs_mutex); 1599 return 0; 1600 1601 err_free_fuse_devs: 1602 virtio_fs_free_devs(fs); 1603 err: 1604 mutex_unlock(&virtio_fs_mutex); 1605 return err; 1606 } 1607 1608 static void virtio_fs_conn_destroy(struct fuse_mount *fm) 1609 { 1610 struct fuse_conn *fc = fm->fc; 1611 struct virtio_fs *vfs = fc->iq.priv; 1612 struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; 1613 1614 /* Stop dax worker. Soon evict_inodes() will be called which 1615 * will free all memory ranges belonging to all inodes. 1616 */ 1617 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1618 fuse_dax_cancel_work(fc); 1619 1620 /* Stop forget queue. Soon destroy will be sent */ 1621 spin_lock(&fsvq->lock); 1622 fsvq->connected = false; 1623 spin_unlock(&fsvq->lock); 1624 virtio_fs_drain_all_queues(vfs); 1625 1626 fuse_conn_destroy(fm); 1627 1628 /* fuse_conn_destroy() must have sent destroy. Stop all queues 1629 * and drain one more time and free fuse devices. Freeing fuse 1630 * devices will drop their reference on fuse_conn and that in 1631 * turn will drop its reference on virtio_fs object. 1632 */ 1633 virtio_fs_stop_all_queues(vfs); 1634 virtio_fs_drain_all_queues(vfs); 1635 virtio_fs_free_devs(vfs); 1636 } 1637 1638 static void virtio_kill_sb(struct super_block *sb) 1639 { 1640 struct fuse_mount *fm = get_fuse_mount_super(sb); 1641 bool last; 1642 1643 /* If mount failed, we can still be called without any fc */ 1644 if (sb->s_root) { 1645 last = fuse_mount_remove(fm); 1646 if (last) 1647 virtio_fs_conn_destroy(fm); 1648 } 1649 kill_anon_super(sb); 1650 fuse_mount_destroy(fm); 1651 } 1652 1653 static int virtio_fs_test_super(struct super_block *sb, 1654 struct fs_context *fsc) 1655 { 1656 struct fuse_mount *fsc_fm = fsc->s_fs_info; 1657 struct fuse_mount *sb_fm = get_fuse_mount_super(sb); 1658 1659 return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; 1660 } 1661 1662 static int virtio_fs_get_tree(struct fs_context *fsc) 1663 { 1664 struct virtio_fs *fs; 1665 struct super_block *sb; 1666 struct fuse_conn *fc = NULL; 1667 struct fuse_mount *fm; 1668 unsigned int virtqueue_size; 1669 int err = -EIO; 1670 1671 if (!fsc->source) 1672 return invalf(fsc, "No source specified"); 1673 1674 /* This gets a reference on virtio_fs object. This ptr gets installed 1675 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1676 * to drop the reference to this object. 1677 */ 1678 fs = virtio_fs_find_instance(fsc->source); 1679 if (!fs) { 1680 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1681 return -EINVAL; 1682 } 1683 1684 virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); 1685 if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) 1686 goto out_err; 1687 1688 err = -ENOMEM; 1689 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1690 if (!fc) 1691 goto out_err; 1692 1693 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1694 if (!fm) 1695 goto out_err; 1696 1697 fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); 1698 fc->release = fuse_free_conn; 1699 fc->delete_stale = true; 1700 fc->auto_submounts = true; 1701 fc->sync_fs = true; 1702 fc->use_pages_for_kvec_io = true; 1703 1704 /* Tell FUSE to split requests that exceed the virtqueue's size */ 1705 fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, 1706 virtqueue_size - FUSE_HEADER_OVERHEAD); 1707 1708 fsc->s_fs_info = fm; 1709 sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); 1710 if (fsc->s_fs_info) 1711 fuse_mount_destroy(fm); 1712 if (IS_ERR(sb)) 1713 return PTR_ERR(sb); 1714 1715 if (!sb->s_root) { 1716 err = virtio_fs_fill_super(sb, fsc); 1717 if (err) { 1718 deactivate_locked_super(sb); 1719 return err; 1720 } 1721 1722 sb->s_flags |= SB_ACTIVE; 1723 } 1724 1725 WARN_ON(fsc->root); 1726 fsc->root = dget(sb->s_root); 1727 return 0; 1728 1729 out_err: 1730 kfree(fc); 1731 virtio_fs_put(fs); 1732 return err; 1733 } 1734 1735 static const struct fs_context_operations virtio_fs_context_ops = { 1736 .free = virtio_fs_free_fsc, 1737 .parse_param = virtio_fs_parse_param, 1738 .get_tree = virtio_fs_get_tree, 1739 }; 1740 1741 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1742 { 1743 struct fuse_fs_context *ctx; 1744 1745 if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT) 1746 return fuse_init_fs_context_submount(fsc); 1747 1748 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1749 if (!ctx) 1750 return -ENOMEM; 1751 fsc->fs_private = ctx; 1752 fsc->ops = &virtio_fs_context_ops; 1753 return 0; 1754 } 1755 1756 static struct file_system_type virtio_fs_type = { 1757 .owner = THIS_MODULE, 1758 .name = "virtiofs", 1759 .init_fs_context = virtio_fs_init_fs_context, 1760 .kill_sb = virtio_kill_sb, 1761 .fs_flags = FS_ALLOW_IDMAP, 1762 }; 1763 1764 static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) 1765 { 1766 const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); 1767 1768 add_uevent_var(env, "TAG=%s", fs->tag); 1769 return 0; 1770 } 1771 1772 static const struct kset_uevent_ops virtio_fs_uevent_ops = { 1773 .uevent = virtio_fs_uevent, 1774 }; 1775 1776 static int __init virtio_fs_sysfs_init(void) 1777 { 1778 virtio_fs_kset = kset_create_and_add("virtiofs", &virtio_fs_uevent_ops, 1779 fs_kobj); 1780 if (!virtio_fs_kset) 1781 return -ENOMEM; 1782 return 0; 1783 } 1784 1785 static void virtio_fs_sysfs_exit(void) 1786 { 1787 kset_unregister(virtio_fs_kset); 1788 virtio_fs_kset = NULL; 1789 } 1790 1791 static int __init virtio_fs_init(void) 1792 { 1793 int ret; 1794 1795 ret = virtio_fs_sysfs_init(); 1796 if (ret < 0) 1797 return ret; 1798 1799 ret = register_virtio_driver(&virtio_fs_driver); 1800 if (ret < 0) 1801 goto sysfs_exit; 1802 1803 ret = register_filesystem(&virtio_fs_type); 1804 if (ret < 0) 1805 goto unregister_virtio_driver; 1806 1807 return 0; 1808 1809 unregister_virtio_driver: 1810 unregister_virtio_driver(&virtio_fs_driver); 1811 sysfs_exit: 1812 virtio_fs_sysfs_exit(); 1813 return ret; 1814 } 1815 module_init(virtio_fs_init); 1816 1817 static void __exit virtio_fs_exit(void) 1818 { 1819 unregister_filesystem(&virtio_fs_type); 1820 unregister_virtio_driver(&virtio_fs_driver); 1821 virtio_fs_sysfs_exit(); 1822 } 1823 module_exit(virtio_fs_exit); 1824 1825 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1826 MODULE_DESCRIPTION("Virtio Filesystem"); 1827 MODULE_LICENSE("GPL"); 1828 MODULE_ALIAS_FS(KBUILD_MODNAME); 1829 MODULE_DEVICE_TABLE(virtio, id_table); 1830