1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/dax.h> 9 #include <linux/pci.h> 10 #include <linux/interrupt.h> 11 #include <linux/group_cpus.h> 12 #include <linux/pfn_t.h> 13 #include <linux/memremap.h> 14 #include <linux/module.h> 15 #include <linux/virtio.h> 16 #include <linux/virtio_fs.h> 17 #include <linux/delay.h> 18 #include <linux/fs_context.h> 19 #include <linux/fs_parser.h> 20 #include <linux/highmem.h> 21 #include <linux/cleanup.h> 22 #include <linux/uio.h> 23 #include "fuse_i.h" 24 25 /* Used to help calculate the FUSE connection's max_pages limit for a request's 26 * size. Parts of the struct fuse_req are sliced into scattergather lists in 27 * addition to the pages used, so this can help account for that overhead. 28 */ 29 #define FUSE_HEADER_OVERHEAD 4 30 31 /* List of virtio-fs device instances and a lock for the list. Also provides 32 * mutual exclusion in device removal and mounting path 33 */ 34 static DEFINE_MUTEX(virtio_fs_mutex); 35 static LIST_HEAD(virtio_fs_instances); 36 37 /* The /sys/fs/virtio_fs/ kset */ 38 static struct kset *virtio_fs_kset; 39 40 enum { 41 VQ_HIPRIO, 42 VQ_REQUEST 43 }; 44 45 #define VQ_NAME_LEN 24 46 47 /* Per-virtqueue state */ 48 struct virtio_fs_vq { 49 spinlock_t lock; 50 struct virtqueue *vq; /* protected by ->lock */ 51 struct work_struct done_work; 52 struct list_head queued_reqs; 53 struct list_head end_reqs; /* End these requests */ 54 struct work_struct dispatch_work; 55 struct fuse_dev *fud; 56 bool connected; 57 long in_flight; 58 struct completion in_flight_zero; /* No inflight requests */ 59 struct kobject *kobj; 60 char name[VQ_NAME_LEN]; 61 } ____cacheline_aligned_in_smp; 62 63 /* A virtio-fs device instance */ 64 struct virtio_fs { 65 struct kobject kobj; 66 struct kobject *mqs_kobj; 67 struct list_head list; /* on virtio_fs_instances */ 68 char *tag; 69 struct virtio_fs_vq *vqs; 70 unsigned int nvqs; /* number of virtqueues */ 71 unsigned int num_request_queues; /* number of request queues */ 72 struct dax_device *dax_dev; 73 74 unsigned int *mq_map; /* index = cpu id, value = request vq id */ 75 76 /* DAX memory window where file contents are mapped */ 77 void *window_kaddr; 78 phys_addr_t window_phys_addr; 79 size_t window_len; 80 }; 81 82 struct virtio_fs_forget_req { 83 struct fuse_in_header ih; 84 struct fuse_forget_in arg; 85 }; 86 87 struct virtio_fs_forget { 88 /* This request can be temporarily queued on virt queue */ 89 struct list_head list; 90 struct virtio_fs_forget_req req; 91 }; 92 93 struct virtio_fs_req_work { 94 struct fuse_req *req; 95 struct virtio_fs_vq *fsvq; 96 struct work_struct done_work; 97 }; 98 99 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 100 struct fuse_req *req, bool in_flight); 101 102 static const struct constant_table dax_param_enums[] = { 103 {"always", FUSE_DAX_ALWAYS }, 104 {"never", FUSE_DAX_NEVER }, 105 {"inode", FUSE_DAX_INODE_USER }, 106 {} 107 }; 108 109 enum { 110 OPT_DAX, 111 OPT_DAX_ENUM, 112 }; 113 114 static const struct fs_parameter_spec virtio_fs_parameters[] = { 115 fsparam_flag("dax", OPT_DAX), 116 fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums), 117 {} 118 }; 119 120 static int virtio_fs_parse_param(struct fs_context *fsc, 121 struct fs_parameter *param) 122 { 123 struct fs_parse_result result; 124 struct fuse_fs_context *ctx = fsc->fs_private; 125 int opt; 126 127 opt = fs_parse(fsc, virtio_fs_parameters, param, &result); 128 if (opt < 0) 129 return opt; 130 131 switch (opt) { 132 case OPT_DAX: 133 ctx->dax_mode = FUSE_DAX_ALWAYS; 134 break; 135 case OPT_DAX_ENUM: 136 ctx->dax_mode = result.uint_32; 137 break; 138 default: 139 return -EINVAL; 140 } 141 142 return 0; 143 } 144 145 static void virtio_fs_free_fsc(struct fs_context *fsc) 146 { 147 struct fuse_fs_context *ctx = fsc->fs_private; 148 149 kfree(ctx); 150 } 151 152 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 153 { 154 struct virtio_fs *fs = vq->vdev->priv; 155 156 return &fs->vqs[vq->index]; 157 } 158 159 /* Should be called with fsvq->lock held. */ 160 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 161 { 162 fsvq->in_flight++; 163 } 164 165 /* Should be called with fsvq->lock held. */ 166 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 167 { 168 WARN_ON(fsvq->in_flight <= 0); 169 fsvq->in_flight--; 170 if (!fsvq->in_flight) 171 complete(&fsvq->in_flight_zero); 172 } 173 174 static ssize_t tag_show(struct kobject *kobj, 175 struct kobj_attribute *attr, char *buf) 176 { 177 struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); 178 179 return sysfs_emit(buf, "%s\n", fs->tag); 180 } 181 182 static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag); 183 184 static struct attribute *virtio_fs_attrs[] = { 185 &virtio_fs_tag_attr.attr, 186 NULL 187 }; 188 ATTRIBUTE_GROUPS(virtio_fs); 189 190 static void virtio_fs_ktype_release(struct kobject *kobj) 191 { 192 struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj); 193 194 kfree(vfs->mq_map); 195 kfree(vfs->vqs); 196 kfree(vfs); 197 } 198 199 static const struct kobj_type virtio_fs_ktype = { 200 .release = virtio_fs_ktype_release, 201 .sysfs_ops = &kobj_sysfs_ops, 202 .default_groups = virtio_fs_groups, 203 }; 204 205 static struct virtio_fs_vq *virtio_fs_kobj_to_vq(struct virtio_fs *fs, 206 struct kobject *kobj) 207 { 208 int i; 209 210 for (i = 0; i < fs->nvqs; i++) { 211 if (kobj == fs->vqs[i].kobj) 212 return &fs->vqs[i]; 213 } 214 return NULL; 215 } 216 217 static ssize_t name_show(struct kobject *kobj, 218 struct kobj_attribute *attr, char *buf) 219 { 220 struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj); 221 struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj); 222 223 if (!fsvq) 224 return -EINVAL; 225 return sysfs_emit(buf, "%s\n", fsvq->name); 226 } 227 228 static struct kobj_attribute virtio_fs_vq_name_attr = __ATTR_RO(name); 229 230 static ssize_t cpu_list_show(struct kobject *kobj, 231 struct kobj_attribute *attr, char *buf) 232 { 233 struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj); 234 struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj); 235 unsigned int cpu, qid; 236 const size_t size = PAGE_SIZE - 1; 237 bool first = true; 238 int ret = 0, pos = 0; 239 240 if (!fsvq) 241 return -EINVAL; 242 243 qid = fsvq->vq->index; 244 for (cpu = 0; cpu < nr_cpu_ids; cpu++) { 245 if (qid < VQ_REQUEST || (fs->mq_map[cpu] == qid - VQ_REQUEST)) { 246 if (first) 247 ret = snprintf(buf + pos, size - pos, "%u", cpu); 248 else 249 ret = snprintf(buf + pos, size - pos, ", %u", cpu); 250 251 if (ret >= size - pos) 252 break; 253 first = false; 254 pos += ret; 255 } 256 } 257 ret = snprintf(buf + pos, size + 1 - pos, "\n"); 258 return pos + ret; 259 } 260 261 static struct kobj_attribute virtio_fs_vq_cpu_list_attr = __ATTR_RO(cpu_list); 262 263 static struct attribute *virtio_fs_vq_attrs[] = { 264 &virtio_fs_vq_name_attr.attr, 265 &virtio_fs_vq_cpu_list_attr.attr, 266 NULL 267 }; 268 269 static struct attribute_group virtio_fs_vq_attr_group = { 270 .attrs = virtio_fs_vq_attrs, 271 }; 272 273 /* Make sure virtiofs_mutex is held */ 274 static void virtio_fs_put_locked(struct virtio_fs *fs) 275 { 276 lockdep_assert_held(&virtio_fs_mutex); 277 278 kobject_put(&fs->kobj); 279 } 280 281 static void virtio_fs_put(struct virtio_fs *fs) 282 { 283 mutex_lock(&virtio_fs_mutex); 284 virtio_fs_put_locked(fs); 285 mutex_unlock(&virtio_fs_mutex); 286 } 287 288 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 289 { 290 struct virtio_fs *vfs = fiq->priv; 291 292 virtio_fs_put(vfs); 293 } 294 295 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 296 { 297 WARN_ON(fsvq->in_flight < 0); 298 299 /* Wait for in flight requests to finish.*/ 300 spin_lock(&fsvq->lock); 301 if (fsvq->in_flight) { 302 /* We are holding virtio_fs_mutex. There should not be any 303 * waiters waiting for completion. 304 */ 305 reinit_completion(&fsvq->in_flight_zero); 306 spin_unlock(&fsvq->lock); 307 wait_for_completion(&fsvq->in_flight_zero); 308 } else { 309 spin_unlock(&fsvq->lock); 310 } 311 312 flush_work(&fsvq->done_work); 313 flush_work(&fsvq->dispatch_work); 314 } 315 316 static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) 317 { 318 struct virtio_fs_vq *fsvq; 319 int i; 320 321 for (i = 0; i < fs->nvqs; i++) { 322 fsvq = &fs->vqs[i]; 323 virtio_fs_drain_queue(fsvq); 324 } 325 } 326 327 static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 328 { 329 /* Provides mutual exclusion between ->remove and ->kill_sb 330 * paths. We don't want both of these draining queue at the 331 * same time. Current completion logic reinits completion 332 * and that means there should not be any other thread 333 * doing reinit or waiting for completion already. 334 */ 335 mutex_lock(&virtio_fs_mutex); 336 virtio_fs_drain_all_queues_locked(fs); 337 mutex_unlock(&virtio_fs_mutex); 338 } 339 340 static void virtio_fs_start_all_queues(struct virtio_fs *fs) 341 { 342 struct virtio_fs_vq *fsvq; 343 int i; 344 345 for (i = 0; i < fs->nvqs; i++) { 346 fsvq = &fs->vqs[i]; 347 spin_lock(&fsvq->lock); 348 fsvq->connected = true; 349 spin_unlock(&fsvq->lock); 350 } 351 } 352 353 static void virtio_fs_delete_queues_sysfs(struct virtio_fs *fs) 354 { 355 struct virtio_fs_vq *fsvq; 356 int i; 357 358 for (i = 0; i < fs->nvqs; i++) { 359 fsvq = &fs->vqs[i]; 360 kobject_put(fsvq->kobj); 361 } 362 } 363 364 static int virtio_fs_add_queues_sysfs(struct virtio_fs *fs) 365 { 366 struct virtio_fs_vq *fsvq; 367 char buff[12]; 368 int i, j, ret; 369 370 for (i = 0; i < fs->nvqs; i++) { 371 fsvq = &fs->vqs[i]; 372 373 sprintf(buff, "%d", i); 374 fsvq->kobj = kobject_create_and_add(buff, fs->mqs_kobj); 375 if (!fs->mqs_kobj) { 376 ret = -ENOMEM; 377 goto out_del; 378 } 379 380 ret = sysfs_create_group(fsvq->kobj, &virtio_fs_vq_attr_group); 381 if (ret) { 382 kobject_put(fsvq->kobj); 383 goto out_del; 384 } 385 } 386 387 return 0; 388 389 out_del: 390 for (j = 0; j < i; j++) { 391 fsvq = &fs->vqs[j]; 392 kobject_put(fsvq->kobj); 393 } 394 return ret; 395 } 396 397 /* Add a new instance to the list or return -EEXIST if tag name exists*/ 398 static int virtio_fs_add_instance(struct virtio_device *vdev, 399 struct virtio_fs *fs) 400 { 401 struct virtio_fs *fs2; 402 int ret; 403 404 mutex_lock(&virtio_fs_mutex); 405 406 list_for_each_entry(fs2, &virtio_fs_instances, list) { 407 if (strcmp(fs->tag, fs2->tag) == 0) { 408 mutex_unlock(&virtio_fs_mutex); 409 return -EEXIST; 410 } 411 } 412 413 /* Use the virtio_device's index as a unique identifier, there is no 414 * need to allocate our own identifiers because the virtio_fs instance 415 * is only visible to userspace as long as the underlying virtio_device 416 * exists. 417 */ 418 fs->kobj.kset = virtio_fs_kset; 419 ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index); 420 if (ret < 0) 421 goto out_unlock; 422 423 fs->mqs_kobj = kobject_create_and_add("mqs", &fs->kobj); 424 if (!fs->mqs_kobj) { 425 ret = -ENOMEM; 426 goto out_del; 427 } 428 429 ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device"); 430 if (ret < 0) 431 goto out_put; 432 433 ret = virtio_fs_add_queues_sysfs(fs); 434 if (ret) 435 goto out_remove; 436 437 list_add_tail(&fs->list, &virtio_fs_instances); 438 439 mutex_unlock(&virtio_fs_mutex); 440 441 kobject_uevent(&fs->kobj, KOBJ_ADD); 442 443 return 0; 444 445 out_remove: 446 sysfs_remove_link(&fs->kobj, "device"); 447 out_put: 448 kobject_put(fs->mqs_kobj); 449 out_del: 450 kobject_del(&fs->kobj); 451 out_unlock: 452 mutex_unlock(&virtio_fs_mutex); 453 return ret; 454 } 455 456 /* Return the virtio_fs with a given tag, or NULL */ 457 static struct virtio_fs *virtio_fs_find_instance(const char *tag) 458 { 459 struct virtio_fs *fs; 460 461 mutex_lock(&virtio_fs_mutex); 462 463 list_for_each_entry(fs, &virtio_fs_instances, list) { 464 if (strcmp(fs->tag, tag) == 0) { 465 kobject_get(&fs->kobj); 466 goto found; 467 } 468 } 469 470 fs = NULL; /* not found */ 471 472 found: 473 mutex_unlock(&virtio_fs_mutex); 474 475 return fs; 476 } 477 478 static void virtio_fs_free_devs(struct virtio_fs *fs) 479 { 480 unsigned int i; 481 482 for (i = 0; i < fs->nvqs; i++) { 483 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 484 485 if (!fsvq->fud) 486 continue; 487 488 fuse_dev_free(fsvq->fud); 489 fsvq->fud = NULL; 490 } 491 } 492 493 /* Read filesystem name from virtio config into fs->tag (must kfree()). */ 494 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 495 { 496 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 497 char *end; 498 size_t len; 499 500 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 501 &tag_buf, sizeof(tag_buf)); 502 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 503 if (end == tag_buf) 504 return -EINVAL; /* empty tag */ 505 if (!end) 506 end = &tag_buf[sizeof(tag_buf)]; 507 508 len = end - tag_buf; 509 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 510 if (!fs->tag) 511 return -ENOMEM; 512 memcpy(fs->tag, tag_buf, len); 513 fs->tag[len] = '\0'; 514 515 /* While the VIRTIO specification allows any character, newlines are 516 * awkward on mount(8) command-lines and cause problems in the sysfs 517 * "tag" attr and uevent TAG= properties. Forbid them. 518 */ 519 if (strchr(fs->tag, '\n')) { 520 dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n"); 521 return -EINVAL; 522 } 523 524 return 0; 525 } 526 527 /* Work function for hiprio completion */ 528 static void virtio_fs_hiprio_done_work(struct work_struct *work) 529 { 530 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 531 done_work); 532 struct virtqueue *vq = fsvq->vq; 533 534 /* Free completed FUSE_FORGET requests */ 535 spin_lock(&fsvq->lock); 536 do { 537 unsigned int len; 538 void *req; 539 540 virtqueue_disable_cb(vq); 541 542 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 543 kfree(req); 544 dec_in_flight_req(fsvq); 545 } 546 } while (!virtqueue_enable_cb(vq)); 547 548 if (!list_empty(&fsvq->queued_reqs)) 549 schedule_work(&fsvq->dispatch_work); 550 551 spin_unlock(&fsvq->lock); 552 } 553 554 static void virtio_fs_request_dispatch_work(struct work_struct *work) 555 { 556 struct fuse_req *req; 557 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 558 dispatch_work); 559 int ret; 560 561 pr_debug("virtio-fs: worker %s called.\n", __func__); 562 while (1) { 563 spin_lock(&fsvq->lock); 564 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 565 list); 566 if (!req) { 567 spin_unlock(&fsvq->lock); 568 break; 569 } 570 571 list_del_init(&req->list); 572 spin_unlock(&fsvq->lock); 573 fuse_request_end(req); 574 } 575 576 /* Dispatch pending requests */ 577 while (1) { 578 spin_lock(&fsvq->lock); 579 req = list_first_entry_or_null(&fsvq->queued_reqs, 580 struct fuse_req, list); 581 if (!req) { 582 spin_unlock(&fsvq->lock); 583 return; 584 } 585 list_del_init(&req->list); 586 spin_unlock(&fsvq->lock); 587 588 ret = virtio_fs_enqueue_req(fsvq, req, true); 589 if (ret < 0) { 590 if (ret == -ENOSPC) { 591 spin_lock(&fsvq->lock); 592 list_add_tail(&req->list, &fsvq->queued_reqs); 593 spin_unlock(&fsvq->lock); 594 return; 595 } 596 req->out.h.error = ret; 597 spin_lock(&fsvq->lock); 598 dec_in_flight_req(fsvq); 599 spin_unlock(&fsvq->lock); 600 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 601 ret); 602 fuse_request_end(req); 603 } 604 } 605 } 606 607 /* 608 * Returns 1 if queue is full and sender should wait a bit before sending 609 * next request, 0 otherwise. 610 */ 611 static int send_forget_request(struct virtio_fs_vq *fsvq, 612 struct virtio_fs_forget *forget, 613 bool in_flight) 614 { 615 struct scatterlist sg; 616 struct virtqueue *vq; 617 int ret = 0; 618 bool notify; 619 struct virtio_fs_forget_req *req = &forget->req; 620 621 spin_lock(&fsvq->lock); 622 if (!fsvq->connected) { 623 if (in_flight) 624 dec_in_flight_req(fsvq); 625 kfree(forget); 626 goto out; 627 } 628 629 sg_init_one(&sg, req, sizeof(*req)); 630 vq = fsvq->vq; 631 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 632 633 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 634 if (ret < 0) { 635 if (ret == -ENOSPC) { 636 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 637 ret); 638 list_add_tail(&forget->list, &fsvq->queued_reqs); 639 if (!in_flight) 640 inc_in_flight_req(fsvq); 641 /* Queue is full */ 642 ret = 1; 643 } else { 644 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 645 ret); 646 kfree(forget); 647 if (in_flight) 648 dec_in_flight_req(fsvq); 649 } 650 goto out; 651 } 652 653 if (!in_flight) 654 inc_in_flight_req(fsvq); 655 notify = virtqueue_kick_prepare(vq); 656 spin_unlock(&fsvq->lock); 657 658 if (notify) 659 virtqueue_notify(vq); 660 return ret; 661 out: 662 spin_unlock(&fsvq->lock); 663 return ret; 664 } 665 666 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 667 { 668 struct virtio_fs_forget *forget; 669 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 670 dispatch_work); 671 pr_debug("virtio-fs: worker %s called.\n", __func__); 672 while (1) { 673 spin_lock(&fsvq->lock); 674 forget = list_first_entry_or_null(&fsvq->queued_reqs, 675 struct virtio_fs_forget, list); 676 if (!forget) { 677 spin_unlock(&fsvq->lock); 678 return; 679 } 680 681 list_del(&forget->list); 682 spin_unlock(&fsvq->lock); 683 if (send_forget_request(fsvq, forget, true)) 684 return; 685 } 686 } 687 688 /* Allocate and copy args into req->argbuf */ 689 static int copy_args_to_argbuf(struct fuse_req *req) 690 { 691 struct fuse_args *args = req->args; 692 unsigned int offset = 0; 693 unsigned int num_in; 694 unsigned int num_out; 695 unsigned int len; 696 unsigned int i; 697 698 num_in = args->in_numargs - args->in_pages; 699 num_out = args->out_numargs - args->out_pages; 700 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 701 fuse_len_args(num_out, args->out_args); 702 703 req->argbuf = kmalloc(len, GFP_ATOMIC); 704 if (!req->argbuf) 705 return -ENOMEM; 706 707 for (i = 0; i < num_in; i++) { 708 memcpy(req->argbuf + offset, 709 args->in_args[i].value, 710 args->in_args[i].size); 711 offset += args->in_args[i].size; 712 } 713 714 return 0; 715 } 716 717 /* Copy args out of and free req->argbuf */ 718 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 719 { 720 unsigned int remaining; 721 unsigned int offset; 722 unsigned int num_in; 723 unsigned int num_out; 724 unsigned int i; 725 726 remaining = req->out.h.len - sizeof(req->out.h); 727 num_in = args->in_numargs - args->in_pages; 728 num_out = args->out_numargs - args->out_pages; 729 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 730 731 for (i = 0; i < num_out; i++) { 732 unsigned int argsize = args->out_args[i].size; 733 734 if (args->out_argvar && 735 i == args->out_numargs - 1 && 736 argsize > remaining) { 737 argsize = remaining; 738 } 739 740 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 741 offset += argsize; 742 743 if (i != args->out_numargs - 1) 744 remaining -= argsize; 745 } 746 747 /* Store the actual size of the variable-length arg */ 748 if (args->out_argvar) 749 args->out_args[args->out_numargs - 1].size = remaining; 750 751 kfree(req->argbuf); 752 req->argbuf = NULL; 753 } 754 755 /* Work function for request completion */ 756 static void virtio_fs_request_complete(struct fuse_req *req, 757 struct virtio_fs_vq *fsvq) 758 { 759 struct fuse_pqueue *fpq = &fsvq->fud->pq; 760 struct fuse_args *args; 761 struct fuse_args_pages *ap; 762 unsigned int len, i, thislen; 763 struct page *page; 764 765 /* 766 * TODO verify that server properly follows FUSE protocol 767 * (oh.uniq, oh.len) 768 */ 769 args = req->args; 770 copy_args_from_argbuf(args, req); 771 772 if (args->out_pages && args->page_zeroing) { 773 len = args->out_args[args->out_numargs - 1].size; 774 ap = container_of(args, typeof(*ap), args); 775 for (i = 0; i < ap->num_pages; i++) { 776 thislen = ap->descs[i].length; 777 if (len < thislen) { 778 WARN_ON(ap->descs[i].offset); 779 page = ap->pages[i]; 780 zero_user_segment(page, len, thislen); 781 len = 0; 782 } else { 783 len -= thislen; 784 } 785 } 786 } 787 788 spin_lock(&fpq->lock); 789 clear_bit(FR_SENT, &req->flags); 790 spin_unlock(&fpq->lock); 791 792 fuse_request_end(req); 793 spin_lock(&fsvq->lock); 794 dec_in_flight_req(fsvq); 795 spin_unlock(&fsvq->lock); 796 } 797 798 static void virtio_fs_complete_req_work(struct work_struct *work) 799 { 800 struct virtio_fs_req_work *w = 801 container_of(work, typeof(*w), done_work); 802 803 virtio_fs_request_complete(w->req, w->fsvq); 804 kfree(w); 805 } 806 807 static void virtio_fs_requests_done_work(struct work_struct *work) 808 { 809 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 810 done_work); 811 struct fuse_pqueue *fpq = &fsvq->fud->pq; 812 struct virtqueue *vq = fsvq->vq; 813 struct fuse_req *req; 814 struct fuse_req *next; 815 unsigned int len; 816 LIST_HEAD(reqs); 817 818 /* Collect completed requests off the virtqueue */ 819 spin_lock(&fsvq->lock); 820 do { 821 virtqueue_disable_cb(vq); 822 823 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 824 spin_lock(&fpq->lock); 825 list_move_tail(&req->list, &reqs); 826 spin_unlock(&fpq->lock); 827 } 828 } while (!virtqueue_enable_cb(vq)); 829 spin_unlock(&fsvq->lock); 830 831 /* End requests */ 832 list_for_each_entry_safe(req, next, &reqs, list) { 833 list_del_init(&req->list); 834 835 /* blocking async request completes in a worker context */ 836 if (req->args->may_block) { 837 struct virtio_fs_req_work *w; 838 839 w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); 840 INIT_WORK(&w->done_work, virtio_fs_complete_req_work); 841 w->fsvq = fsvq; 842 w->req = req; 843 schedule_work(&w->done_work); 844 } else { 845 virtio_fs_request_complete(req, fsvq); 846 } 847 } 848 849 /* Try to push previously queued requests, as the queue might no longer be full */ 850 spin_lock(&fsvq->lock); 851 if (!list_empty(&fsvq->queued_reqs)) 852 schedule_work(&fsvq->dispatch_work); 853 spin_unlock(&fsvq->lock); 854 } 855 856 static void virtio_fs_map_queues(struct virtio_device *vdev, struct virtio_fs *fs) 857 { 858 const struct cpumask *mask, *masks; 859 unsigned int q, cpu; 860 861 /* First attempt to map using existing transport layer affinities 862 * e.g. PCIe MSI-X 863 */ 864 if (!vdev->config->get_vq_affinity) 865 goto fallback; 866 867 for (q = 0; q < fs->num_request_queues; q++) { 868 mask = vdev->config->get_vq_affinity(vdev, VQ_REQUEST + q); 869 if (!mask) 870 goto fallback; 871 872 for_each_cpu(cpu, mask) 873 fs->mq_map[cpu] = q; 874 } 875 876 return; 877 fallback: 878 /* Attempt to map evenly in groups over the CPUs */ 879 masks = group_cpus_evenly(fs->num_request_queues); 880 /* If even this fails we default to all CPUs use queue zero */ 881 if (!masks) { 882 for_each_possible_cpu(cpu) 883 fs->mq_map[cpu] = 0; 884 return; 885 } 886 887 for (q = 0; q < fs->num_request_queues; q++) { 888 for_each_cpu(cpu, &masks[q]) 889 fs->mq_map[cpu] = q; 890 } 891 kfree(masks); 892 } 893 894 /* Virtqueue interrupt handler */ 895 static void virtio_fs_vq_done(struct virtqueue *vq) 896 { 897 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 898 899 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 900 901 schedule_work(&fsvq->done_work); 902 } 903 904 static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, 905 int vq_type) 906 { 907 strscpy(fsvq->name, name, VQ_NAME_LEN); 908 spin_lock_init(&fsvq->lock); 909 INIT_LIST_HEAD(&fsvq->queued_reqs); 910 INIT_LIST_HEAD(&fsvq->end_reqs); 911 init_completion(&fsvq->in_flight_zero); 912 913 if (vq_type == VQ_REQUEST) { 914 INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 915 INIT_WORK(&fsvq->dispatch_work, 916 virtio_fs_request_dispatch_work); 917 } else { 918 INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 919 INIT_WORK(&fsvq->dispatch_work, 920 virtio_fs_hiprio_dispatch_work); 921 } 922 } 923 924 /* Initialize virtqueues */ 925 static int virtio_fs_setup_vqs(struct virtio_device *vdev, 926 struct virtio_fs *fs) 927 { 928 struct virtqueue_info *vqs_info; 929 struct virtqueue **vqs; 930 /* Specify pre_vectors to ensure that the queues before the 931 * request queues (e.g. hiprio) don't claim any of the CPUs in 932 * the multi-queue mapping and interrupt affinities 933 */ 934 struct irq_affinity desc = { .pre_vectors = VQ_REQUEST }; 935 unsigned int i; 936 int ret = 0; 937 938 virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, 939 &fs->num_request_queues); 940 if (fs->num_request_queues == 0) 941 return -EINVAL; 942 943 /* Truncate nr of request queues to nr_cpu_id */ 944 fs->num_request_queues = min_t(unsigned int, fs->num_request_queues, 945 nr_cpu_ids); 946 fs->nvqs = VQ_REQUEST + fs->num_request_queues; 947 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 948 if (!fs->vqs) 949 return -ENOMEM; 950 951 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 952 fs->mq_map = kcalloc_node(nr_cpu_ids, sizeof(*fs->mq_map), GFP_KERNEL, 953 dev_to_node(&vdev->dev)); 954 vqs_info = kcalloc(fs->nvqs, sizeof(*vqs_info), GFP_KERNEL); 955 if (!vqs || !vqs_info || !fs->mq_map) { 956 ret = -ENOMEM; 957 goto out; 958 } 959 960 /* Initialize the hiprio/forget request virtqueue */ 961 vqs_info[VQ_HIPRIO].callback = virtio_fs_vq_done; 962 virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); 963 vqs_info[VQ_HIPRIO].name = fs->vqs[VQ_HIPRIO].name; 964 965 /* Initialize the requests virtqueues */ 966 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 967 char vq_name[VQ_NAME_LEN]; 968 969 snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); 970 virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); 971 vqs_info[i].callback = virtio_fs_vq_done; 972 vqs_info[i].name = fs->vqs[i].name; 973 } 974 975 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, vqs_info, &desc); 976 if (ret < 0) 977 goto out; 978 979 for (i = 0; i < fs->nvqs; i++) 980 fs->vqs[i].vq = vqs[i]; 981 982 virtio_fs_start_all_queues(fs); 983 out: 984 kfree(vqs_info); 985 kfree(vqs); 986 if (ret) { 987 kfree(fs->vqs); 988 kfree(fs->mq_map); 989 } 990 return ret; 991 } 992 993 /* Free virtqueues (device must already be reset) */ 994 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev) 995 { 996 vdev->config->del_vqs(vdev); 997 } 998 999 /* Map a window offset to a page frame number. The window offset will have 1000 * been produced by .iomap_begin(), which maps a file offset to a window 1001 * offset. 1002 */ 1003 static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 1004 long nr_pages, enum dax_access_mode mode, 1005 void **kaddr, pfn_t *pfn) 1006 { 1007 struct virtio_fs *fs = dax_get_private(dax_dev); 1008 phys_addr_t offset = PFN_PHYS(pgoff); 1009 size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff; 1010 1011 if (kaddr) 1012 *kaddr = fs->window_kaddr + offset; 1013 if (pfn) 1014 *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 1015 PFN_DEV | PFN_MAP); 1016 return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 1017 } 1018 1019 static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 1020 pgoff_t pgoff, size_t nr_pages) 1021 { 1022 long rc; 1023 void *kaddr; 1024 1025 rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr, 1026 NULL); 1027 if (rc < 0) 1028 return dax_mem2blk_err(rc); 1029 1030 memset(kaddr, 0, nr_pages << PAGE_SHIFT); 1031 dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 1032 return 0; 1033 } 1034 1035 static const struct dax_operations virtio_fs_dax_ops = { 1036 .direct_access = virtio_fs_direct_access, 1037 .zero_page_range = virtio_fs_zero_page_range, 1038 }; 1039 1040 static void virtio_fs_cleanup_dax(void *data) 1041 { 1042 struct dax_device *dax_dev = data; 1043 1044 kill_dax(dax_dev); 1045 put_dax(dax_dev); 1046 } 1047 1048 DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T)) 1049 1050 static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 1051 { 1052 struct dax_device *dax_dev __free(cleanup_dax) = NULL; 1053 struct virtio_shm_region cache_reg; 1054 struct dev_pagemap *pgmap; 1055 bool have_cache; 1056 1057 if (!IS_ENABLED(CONFIG_FUSE_DAX)) 1058 return 0; 1059 1060 dax_dev = alloc_dax(fs, &virtio_fs_dax_ops); 1061 if (IS_ERR(dax_dev)) { 1062 int rc = PTR_ERR(dax_dev); 1063 return rc == -EOPNOTSUPP ? 0 : rc; 1064 } 1065 1066 /* Get cache region */ 1067 have_cache = virtio_get_shm_region(vdev, &cache_reg, 1068 (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 1069 if (!have_cache) { 1070 dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 1071 return 0; 1072 } 1073 1074 if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 1075 dev_name(&vdev->dev))) { 1076 dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 1077 cache_reg.addr, cache_reg.len); 1078 return -EBUSY; 1079 } 1080 1081 dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 1082 cache_reg.addr); 1083 1084 pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 1085 if (!pgmap) 1086 return -ENOMEM; 1087 1088 pgmap->type = MEMORY_DEVICE_FS_DAX; 1089 1090 /* Ideally we would directly use the PCI BAR resource but 1091 * devm_memremap_pages() wants its own copy in pgmap. So 1092 * initialize a struct resource from scratch (only the start 1093 * and end fields will be used). 1094 */ 1095 pgmap->range = (struct range) { 1096 .start = (phys_addr_t) cache_reg.addr, 1097 .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 1098 }; 1099 pgmap->nr_range = 1; 1100 1101 fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 1102 if (IS_ERR(fs->window_kaddr)) 1103 return PTR_ERR(fs->window_kaddr); 1104 1105 fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 1106 fs->window_len = (phys_addr_t) cache_reg.len; 1107 1108 dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 1109 __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 1110 1111 fs->dax_dev = no_free_ptr(dax_dev); 1112 return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 1113 fs->dax_dev); 1114 } 1115 1116 static int virtio_fs_probe(struct virtio_device *vdev) 1117 { 1118 struct virtio_fs *fs; 1119 int ret; 1120 1121 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 1122 if (!fs) 1123 return -ENOMEM; 1124 kobject_init(&fs->kobj, &virtio_fs_ktype); 1125 vdev->priv = fs; 1126 1127 ret = virtio_fs_read_tag(vdev, fs); 1128 if (ret < 0) 1129 goto out; 1130 1131 ret = virtio_fs_setup_vqs(vdev, fs); 1132 if (ret < 0) 1133 goto out; 1134 1135 virtio_fs_map_queues(vdev, fs); 1136 1137 ret = virtio_fs_setup_dax(vdev, fs); 1138 if (ret < 0) 1139 goto out_vqs; 1140 1141 /* Bring the device online in case the filesystem is mounted and 1142 * requests need to be sent before we return. 1143 */ 1144 virtio_device_ready(vdev); 1145 1146 ret = virtio_fs_add_instance(vdev, fs); 1147 if (ret < 0) 1148 goto out_vqs; 1149 1150 return 0; 1151 1152 out_vqs: 1153 virtio_reset_device(vdev); 1154 virtio_fs_cleanup_vqs(vdev); 1155 1156 out: 1157 vdev->priv = NULL; 1158 kobject_put(&fs->kobj); 1159 return ret; 1160 } 1161 1162 static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 1163 { 1164 struct virtio_fs_vq *fsvq; 1165 int i; 1166 1167 for (i = 0; i < fs->nvqs; i++) { 1168 fsvq = &fs->vqs[i]; 1169 spin_lock(&fsvq->lock); 1170 fsvq->connected = false; 1171 spin_unlock(&fsvq->lock); 1172 } 1173 } 1174 1175 static void virtio_fs_remove(struct virtio_device *vdev) 1176 { 1177 struct virtio_fs *fs = vdev->priv; 1178 1179 mutex_lock(&virtio_fs_mutex); 1180 /* This device is going away. No one should get new reference */ 1181 list_del_init(&fs->list); 1182 virtio_fs_delete_queues_sysfs(fs); 1183 sysfs_remove_link(&fs->kobj, "device"); 1184 kobject_put(fs->mqs_kobj); 1185 kobject_del(&fs->kobj); 1186 virtio_fs_stop_all_queues(fs); 1187 virtio_fs_drain_all_queues_locked(fs); 1188 virtio_reset_device(vdev); 1189 virtio_fs_cleanup_vqs(vdev); 1190 1191 vdev->priv = NULL; 1192 /* Put device reference on virtio_fs object */ 1193 virtio_fs_put_locked(fs); 1194 mutex_unlock(&virtio_fs_mutex); 1195 } 1196 1197 #ifdef CONFIG_PM_SLEEP 1198 static int virtio_fs_freeze(struct virtio_device *vdev) 1199 { 1200 /* TODO need to save state here */ 1201 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 1202 return -EOPNOTSUPP; 1203 } 1204 1205 static int virtio_fs_restore(struct virtio_device *vdev) 1206 { 1207 /* TODO need to restore state here */ 1208 return 0; 1209 } 1210 #endif /* CONFIG_PM_SLEEP */ 1211 1212 static const struct virtio_device_id id_table[] = { 1213 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 1214 {}, 1215 }; 1216 1217 static const unsigned int feature_table[] = {}; 1218 1219 static struct virtio_driver virtio_fs_driver = { 1220 .driver.name = KBUILD_MODNAME, 1221 .id_table = id_table, 1222 .feature_table = feature_table, 1223 .feature_table_size = ARRAY_SIZE(feature_table), 1224 .probe = virtio_fs_probe, 1225 .remove = virtio_fs_remove, 1226 #ifdef CONFIG_PM_SLEEP 1227 .freeze = virtio_fs_freeze, 1228 .restore = virtio_fs_restore, 1229 #endif 1230 }; 1231 1232 static void virtio_fs_send_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *link) 1233 { 1234 struct virtio_fs_forget *forget; 1235 struct virtio_fs_forget_req *req; 1236 struct virtio_fs *fs = fiq->priv; 1237 struct virtio_fs_vq *fsvq = &fs->vqs[VQ_HIPRIO]; 1238 u64 unique = fuse_get_unique(fiq); 1239 1240 /* Allocate a buffer for the request */ 1241 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 1242 req = &forget->req; 1243 1244 req->ih = (struct fuse_in_header){ 1245 .opcode = FUSE_FORGET, 1246 .nodeid = link->forget_one.nodeid, 1247 .unique = unique, 1248 .len = sizeof(*req), 1249 }; 1250 req->arg = (struct fuse_forget_in){ 1251 .nlookup = link->forget_one.nlookup, 1252 }; 1253 1254 send_forget_request(fsvq, forget, false); 1255 kfree(link); 1256 } 1257 1258 static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) 1259 { 1260 /* 1261 * TODO interrupts. 1262 * 1263 * Normal fs operations on a local filesystems aren't interruptible. 1264 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 1265 * with shared lock between host and guest. 1266 */ 1267 } 1268 1269 /* Count number of scatter-gather elements required */ 1270 static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs, 1271 unsigned int num_pages, 1272 unsigned int total_len) 1273 { 1274 unsigned int i; 1275 unsigned int this_len; 1276 1277 for (i = 0; i < num_pages && total_len; i++) { 1278 this_len = min(page_descs[i].length, total_len); 1279 total_len -= this_len; 1280 } 1281 1282 return i; 1283 } 1284 1285 /* Return the number of scatter-gather list elements required */ 1286 static unsigned int sg_count_fuse_req(struct fuse_req *req) 1287 { 1288 struct fuse_args *args = req->args; 1289 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 1290 unsigned int size, total_sgs = 1 /* fuse_in_header */; 1291 1292 if (args->in_numargs - args->in_pages) 1293 total_sgs += 1; 1294 1295 if (args->in_pages) { 1296 size = args->in_args[args->in_numargs - 1].size; 1297 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1298 size); 1299 } 1300 1301 if (!test_bit(FR_ISREPLY, &req->flags)) 1302 return total_sgs; 1303 1304 total_sgs += 1 /* fuse_out_header */; 1305 1306 if (args->out_numargs - args->out_pages) 1307 total_sgs += 1; 1308 1309 if (args->out_pages) { 1310 size = args->out_args[args->out_numargs - 1].size; 1311 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1312 size); 1313 } 1314 1315 return total_sgs; 1316 } 1317 1318 /* Add pages to scatter-gather list and return number of elements used */ 1319 static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 1320 struct page **pages, 1321 struct fuse_page_desc *page_descs, 1322 unsigned int num_pages, 1323 unsigned int total_len) 1324 { 1325 unsigned int i; 1326 unsigned int this_len; 1327 1328 for (i = 0; i < num_pages && total_len; i++) { 1329 sg_init_table(&sg[i], 1); 1330 this_len = min(page_descs[i].length, total_len); 1331 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 1332 total_len -= this_len; 1333 } 1334 1335 return i; 1336 } 1337 1338 /* Add args to scatter-gather list and return number of elements used */ 1339 static unsigned int sg_init_fuse_args(struct scatterlist *sg, 1340 struct fuse_req *req, 1341 struct fuse_arg *args, 1342 unsigned int numargs, 1343 bool argpages, 1344 void *argbuf, 1345 unsigned int *len_used) 1346 { 1347 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 1348 unsigned int total_sgs = 0; 1349 unsigned int len; 1350 1351 len = fuse_len_args(numargs - argpages, args); 1352 if (len) 1353 sg_init_one(&sg[total_sgs++], argbuf, len); 1354 1355 if (argpages) 1356 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 1357 ap->pages, ap->descs, 1358 ap->num_pages, 1359 args[numargs - 1].size); 1360 1361 if (len_used) 1362 *len_used = len; 1363 1364 return total_sgs; 1365 } 1366 1367 /* Add a request to a virtqueue and kick the device */ 1368 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 1369 struct fuse_req *req, bool in_flight) 1370 { 1371 /* requests need at least 4 elements */ 1372 struct scatterlist *stack_sgs[6]; 1373 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 1374 struct scatterlist **sgs = stack_sgs; 1375 struct scatterlist *sg = stack_sg; 1376 struct virtqueue *vq; 1377 struct fuse_args *args = req->args; 1378 unsigned int argbuf_used = 0; 1379 unsigned int out_sgs = 0; 1380 unsigned int in_sgs = 0; 1381 unsigned int total_sgs; 1382 unsigned int i; 1383 int ret; 1384 bool notify; 1385 struct fuse_pqueue *fpq; 1386 1387 /* Does the sglist fit on the stack? */ 1388 total_sgs = sg_count_fuse_req(req); 1389 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 1390 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 1391 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 1392 if (!sgs || !sg) { 1393 ret = -ENOMEM; 1394 goto out; 1395 } 1396 } 1397 1398 /* Use a bounce buffer since stack args cannot be mapped */ 1399 ret = copy_args_to_argbuf(req); 1400 if (ret < 0) 1401 goto out; 1402 1403 /* Request elements */ 1404 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 1405 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 1406 (struct fuse_arg *)args->in_args, 1407 args->in_numargs, args->in_pages, 1408 req->argbuf, &argbuf_used); 1409 1410 /* Reply elements */ 1411 if (test_bit(FR_ISREPLY, &req->flags)) { 1412 sg_init_one(&sg[out_sgs + in_sgs++], 1413 &req->out.h, sizeof(req->out.h)); 1414 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 1415 args->out_args, args->out_numargs, 1416 args->out_pages, 1417 req->argbuf + argbuf_used, NULL); 1418 } 1419 1420 WARN_ON(out_sgs + in_sgs != total_sgs); 1421 1422 for (i = 0; i < total_sgs; i++) 1423 sgs[i] = &sg[i]; 1424 1425 spin_lock(&fsvq->lock); 1426 1427 if (!fsvq->connected) { 1428 spin_unlock(&fsvq->lock); 1429 ret = -ENOTCONN; 1430 goto out; 1431 } 1432 1433 vq = fsvq->vq; 1434 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 1435 if (ret < 0) { 1436 spin_unlock(&fsvq->lock); 1437 goto out; 1438 } 1439 1440 /* Request successfully sent. */ 1441 fpq = &fsvq->fud->pq; 1442 spin_lock(&fpq->lock); 1443 list_add_tail(&req->list, fpq->processing); 1444 spin_unlock(&fpq->lock); 1445 set_bit(FR_SENT, &req->flags); 1446 /* matches barrier in request_wait_answer() */ 1447 smp_mb__after_atomic(); 1448 1449 if (!in_flight) 1450 inc_in_flight_req(fsvq); 1451 notify = virtqueue_kick_prepare(vq); 1452 1453 spin_unlock(&fsvq->lock); 1454 1455 if (notify) 1456 virtqueue_notify(vq); 1457 1458 out: 1459 if (ret < 0 && req->argbuf) { 1460 kfree(req->argbuf); 1461 req->argbuf = NULL; 1462 } 1463 if (sgs != stack_sgs) { 1464 kfree(sgs); 1465 kfree(sg); 1466 } 1467 1468 return ret; 1469 } 1470 1471 static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req) 1472 { 1473 unsigned int queue_id; 1474 struct virtio_fs *fs; 1475 struct virtio_fs_vq *fsvq; 1476 int ret; 1477 1478 if (req->in.h.opcode != FUSE_NOTIFY_REPLY) 1479 req->in.h.unique = fuse_get_unique(fiq); 1480 1481 clear_bit(FR_PENDING, &req->flags); 1482 1483 fs = fiq->priv; 1484 queue_id = VQ_REQUEST + fs->mq_map[raw_smp_processor_id()]; 1485 1486 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u queue_id %u\n", 1487 __func__, req->in.h.opcode, req->in.h.unique, 1488 req->in.h.nodeid, req->in.h.len, 1489 fuse_len_args(req->args->out_numargs, req->args->out_args), 1490 queue_id); 1491 1492 fsvq = &fs->vqs[queue_id]; 1493 ret = virtio_fs_enqueue_req(fsvq, req, false); 1494 if (ret < 0) { 1495 if (ret == -ENOSPC) { 1496 /* 1497 * Virtqueue full. Retry submission from worker 1498 * context as we might be holding fc->bg_lock. 1499 */ 1500 spin_lock(&fsvq->lock); 1501 list_add_tail(&req->list, &fsvq->queued_reqs); 1502 inc_in_flight_req(fsvq); 1503 spin_unlock(&fsvq->lock); 1504 return; 1505 } 1506 req->out.h.error = ret; 1507 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1508 1509 /* Can't end request in submission context. Use a worker */ 1510 spin_lock(&fsvq->lock); 1511 list_add_tail(&req->list, &fsvq->end_reqs); 1512 schedule_work(&fsvq->dispatch_work); 1513 spin_unlock(&fsvq->lock); 1514 return; 1515 } 1516 } 1517 1518 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1519 .send_forget = virtio_fs_send_forget, 1520 .send_interrupt = virtio_fs_send_interrupt, 1521 .send_req = virtio_fs_send_req, 1522 .release = virtio_fs_fiq_release, 1523 }; 1524 1525 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) 1526 { 1527 ctx->rootmode = S_IFDIR; 1528 ctx->default_permissions = 1; 1529 ctx->allow_other = 1; 1530 ctx->max_read = UINT_MAX; 1531 ctx->blksize = 512; 1532 ctx->destroy = true; 1533 ctx->no_control = true; 1534 ctx->no_force_umount = true; 1535 } 1536 1537 static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) 1538 { 1539 struct fuse_mount *fm = get_fuse_mount_super(sb); 1540 struct fuse_conn *fc = fm->fc; 1541 struct virtio_fs *fs = fc->iq.priv; 1542 struct fuse_fs_context *ctx = fsc->fs_private; 1543 unsigned int i; 1544 int err; 1545 1546 virtio_fs_ctx_set_defaults(ctx); 1547 mutex_lock(&virtio_fs_mutex); 1548 1549 /* After holding mutex, make sure virtiofs device is still there. 1550 * Though we are holding a reference to it, drive ->remove might 1551 * still have cleaned up virtual queues. In that case bail out. 1552 */ 1553 err = -EINVAL; 1554 if (list_empty(&fs->list)) { 1555 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1556 goto err; 1557 } 1558 1559 err = -ENOMEM; 1560 /* Allocate fuse_dev for hiprio and notification queues */ 1561 for (i = 0; i < fs->nvqs; i++) { 1562 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1563 1564 fsvq->fud = fuse_dev_alloc(); 1565 if (!fsvq->fud) 1566 goto err_free_fuse_devs; 1567 } 1568 1569 /* virtiofs allocates and installs its own fuse devices */ 1570 ctx->fudptr = NULL; 1571 if (ctx->dax_mode != FUSE_DAX_NEVER) { 1572 if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) { 1573 err = -EINVAL; 1574 pr_err("virtio-fs: dax can't be enabled as filesystem" 1575 " device does not support it.\n"); 1576 goto err_free_fuse_devs; 1577 } 1578 ctx->dax_dev = fs->dax_dev; 1579 } 1580 err = fuse_fill_super_common(sb, ctx); 1581 if (err < 0) 1582 goto err_free_fuse_devs; 1583 1584 for (i = 0; i < fs->nvqs; i++) { 1585 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1586 1587 fuse_dev_install(fsvq->fud, fc); 1588 } 1589 1590 /* Previous unmount will stop all queues. Start these again */ 1591 virtio_fs_start_all_queues(fs); 1592 fuse_send_init(fm); 1593 mutex_unlock(&virtio_fs_mutex); 1594 return 0; 1595 1596 err_free_fuse_devs: 1597 virtio_fs_free_devs(fs); 1598 err: 1599 mutex_unlock(&virtio_fs_mutex); 1600 return err; 1601 } 1602 1603 static void virtio_fs_conn_destroy(struct fuse_mount *fm) 1604 { 1605 struct fuse_conn *fc = fm->fc; 1606 struct virtio_fs *vfs = fc->iq.priv; 1607 struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; 1608 1609 /* Stop dax worker. Soon evict_inodes() will be called which 1610 * will free all memory ranges belonging to all inodes. 1611 */ 1612 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1613 fuse_dax_cancel_work(fc); 1614 1615 /* Stop forget queue. Soon destroy will be sent */ 1616 spin_lock(&fsvq->lock); 1617 fsvq->connected = false; 1618 spin_unlock(&fsvq->lock); 1619 virtio_fs_drain_all_queues(vfs); 1620 1621 fuse_conn_destroy(fm); 1622 1623 /* fuse_conn_destroy() must have sent destroy. Stop all queues 1624 * and drain one more time and free fuse devices. Freeing fuse 1625 * devices will drop their reference on fuse_conn and that in 1626 * turn will drop its reference on virtio_fs object. 1627 */ 1628 virtio_fs_stop_all_queues(vfs); 1629 virtio_fs_drain_all_queues(vfs); 1630 virtio_fs_free_devs(vfs); 1631 } 1632 1633 static void virtio_kill_sb(struct super_block *sb) 1634 { 1635 struct fuse_mount *fm = get_fuse_mount_super(sb); 1636 bool last; 1637 1638 /* If mount failed, we can still be called without any fc */ 1639 if (sb->s_root) { 1640 last = fuse_mount_remove(fm); 1641 if (last) 1642 virtio_fs_conn_destroy(fm); 1643 } 1644 kill_anon_super(sb); 1645 fuse_mount_destroy(fm); 1646 } 1647 1648 static int virtio_fs_test_super(struct super_block *sb, 1649 struct fs_context *fsc) 1650 { 1651 struct fuse_mount *fsc_fm = fsc->s_fs_info; 1652 struct fuse_mount *sb_fm = get_fuse_mount_super(sb); 1653 1654 return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; 1655 } 1656 1657 static int virtio_fs_get_tree(struct fs_context *fsc) 1658 { 1659 struct virtio_fs *fs; 1660 struct super_block *sb; 1661 struct fuse_conn *fc = NULL; 1662 struct fuse_mount *fm; 1663 unsigned int virtqueue_size; 1664 int err = -EIO; 1665 1666 /* This gets a reference on virtio_fs object. This ptr gets installed 1667 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1668 * to drop the reference to this object. 1669 */ 1670 fs = virtio_fs_find_instance(fsc->source); 1671 if (!fs) { 1672 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1673 return -EINVAL; 1674 } 1675 1676 virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); 1677 if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) 1678 goto out_err; 1679 1680 err = -ENOMEM; 1681 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1682 if (!fc) 1683 goto out_err; 1684 1685 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1686 if (!fm) 1687 goto out_err; 1688 1689 fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); 1690 fc->release = fuse_free_conn; 1691 fc->delete_stale = true; 1692 fc->auto_submounts = true; 1693 fc->sync_fs = true; 1694 1695 /* Tell FUSE to split requests that exceed the virtqueue's size */ 1696 fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, 1697 virtqueue_size - FUSE_HEADER_OVERHEAD); 1698 1699 fsc->s_fs_info = fm; 1700 sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); 1701 if (fsc->s_fs_info) 1702 fuse_mount_destroy(fm); 1703 if (IS_ERR(sb)) 1704 return PTR_ERR(sb); 1705 1706 if (!sb->s_root) { 1707 err = virtio_fs_fill_super(sb, fsc); 1708 if (err) { 1709 deactivate_locked_super(sb); 1710 return err; 1711 } 1712 1713 sb->s_flags |= SB_ACTIVE; 1714 } 1715 1716 WARN_ON(fsc->root); 1717 fsc->root = dget(sb->s_root); 1718 return 0; 1719 1720 out_err: 1721 kfree(fc); 1722 virtio_fs_put(fs); 1723 return err; 1724 } 1725 1726 static const struct fs_context_operations virtio_fs_context_ops = { 1727 .free = virtio_fs_free_fsc, 1728 .parse_param = virtio_fs_parse_param, 1729 .get_tree = virtio_fs_get_tree, 1730 }; 1731 1732 static int virtio_fs_init_fs_context(struct fs_context *fsc) 1733 { 1734 struct fuse_fs_context *ctx; 1735 1736 if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT) 1737 return fuse_init_fs_context_submount(fsc); 1738 1739 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1740 if (!ctx) 1741 return -ENOMEM; 1742 fsc->fs_private = ctx; 1743 fsc->ops = &virtio_fs_context_ops; 1744 return 0; 1745 } 1746 1747 static struct file_system_type virtio_fs_type = { 1748 .owner = THIS_MODULE, 1749 .name = "virtiofs", 1750 .init_fs_context = virtio_fs_init_fs_context, 1751 .kill_sb = virtio_kill_sb, 1752 .fs_flags = FS_ALLOW_IDMAP, 1753 }; 1754 1755 static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) 1756 { 1757 const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); 1758 1759 add_uevent_var(env, "TAG=%s", fs->tag); 1760 return 0; 1761 } 1762 1763 static const struct kset_uevent_ops virtio_fs_uevent_ops = { 1764 .uevent = virtio_fs_uevent, 1765 }; 1766 1767 static int __init virtio_fs_sysfs_init(void) 1768 { 1769 virtio_fs_kset = kset_create_and_add("virtiofs", &virtio_fs_uevent_ops, 1770 fs_kobj); 1771 if (!virtio_fs_kset) 1772 return -ENOMEM; 1773 return 0; 1774 } 1775 1776 static void virtio_fs_sysfs_exit(void) 1777 { 1778 kset_unregister(virtio_fs_kset); 1779 virtio_fs_kset = NULL; 1780 } 1781 1782 static int __init virtio_fs_init(void) 1783 { 1784 int ret; 1785 1786 ret = virtio_fs_sysfs_init(); 1787 if (ret < 0) 1788 return ret; 1789 1790 ret = register_virtio_driver(&virtio_fs_driver); 1791 if (ret < 0) 1792 goto sysfs_exit; 1793 1794 ret = register_filesystem(&virtio_fs_type); 1795 if (ret < 0) 1796 goto unregister_virtio_driver; 1797 1798 return 0; 1799 1800 unregister_virtio_driver: 1801 unregister_virtio_driver(&virtio_fs_driver); 1802 sysfs_exit: 1803 virtio_fs_sysfs_exit(); 1804 return ret; 1805 } 1806 module_init(virtio_fs_init); 1807 1808 static void __exit virtio_fs_exit(void) 1809 { 1810 unregister_filesystem(&virtio_fs_type); 1811 unregister_virtio_driver(&virtio_fs_driver); 1812 virtio_fs_sysfs_exit(); 1813 } 1814 module_exit(virtio_fs_exit); 1815 1816 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1817 MODULE_DESCRIPTION("Virtio Filesystem"); 1818 MODULE_LICENSE("GPL"); 1819 MODULE_ALIAS_FS(KBUILD_MODNAME); 1820 MODULE_DEVICE_TABLE(virtio, id_table); 1821