1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018-2020 Intel Corporation. 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Author: Tiwei Bie <tiwei.bie@intel.com> 7 * Jason Wang <jasowang@redhat.com> 8 * 9 * Thanks Michael S. Tsirkin for the valuable comments and 10 * suggestions. And thanks to Cunming Liang and Zhihong Wang for all 11 * their supports. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/cdev.h> 17 #include <linux/device.h> 18 #include <linux/mm.h> 19 #include <linux/slab.h> 20 #include <linux/iommu.h> 21 #include <linux/uuid.h> 22 #include <linux/vdpa.h> 23 #include <linux/nospec.h> 24 #include <linux/vhost.h> 25 26 #include "vhost.h" 27 28 enum { 29 VHOST_VDPA_BACKEND_FEATURES = 30 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | 31 (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) | 32 (1ULL << VHOST_BACKEND_F_IOTLB_ASID), 33 }; 34 35 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) 36 37 #define VHOST_VDPA_IOTLB_BUCKETS 16 38 39 struct vhost_vdpa_as { 40 struct hlist_node hash_link; 41 struct vhost_iotlb iotlb; 42 u32 id; 43 }; 44 45 struct vhost_vdpa { 46 struct vhost_dev vdev; 47 struct iommu_domain *domain; 48 struct vhost_virtqueue *vqs; 49 struct completion completion; 50 struct vdpa_device *vdpa; 51 struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; 52 struct device dev; 53 struct cdev cdev; 54 atomic_t opened; 55 u32 nvqs; 56 int virtio_id; 57 int minor; 58 struct eventfd_ctx *config_ctx; 59 int in_batch; 60 struct vdpa_iova_range range; 61 u32 batch_asid; 62 bool suspended; 63 }; 64 65 static DEFINE_IDA(vhost_vdpa_ida); 66 67 static dev_t vhost_vdpa_major; 68 69 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, 70 struct vhost_iotlb *iotlb, u64 start, 71 u64 last, u32 asid); 72 73 static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) 74 { 75 struct vhost_vdpa_as *as = container_of(iotlb, struct 76 vhost_vdpa_as, iotlb); 77 return as->id; 78 } 79 80 static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) 81 { 82 struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; 83 struct vhost_vdpa_as *as; 84 85 hlist_for_each_entry(as, head, hash_link) 86 if (as->id == asid) 87 return as; 88 89 return NULL; 90 } 91 92 static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid) 93 { 94 struct vhost_vdpa_as *as = asid_to_as(v, asid); 95 96 if (!as) 97 return NULL; 98 99 return &as->iotlb; 100 } 101 102 static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) 103 { 104 struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; 105 struct vhost_vdpa_as *as; 106 107 if (asid_to_as(v, asid)) 108 return NULL; 109 110 if (asid >= v->vdpa->nas) 111 return NULL; 112 113 as = kmalloc(sizeof(*as), GFP_KERNEL); 114 if (!as) 115 return NULL; 116 117 vhost_iotlb_init(&as->iotlb, 0, 0); 118 as->id = asid; 119 hlist_add_head(&as->hash_link, head); 120 121 return as; 122 } 123 124 static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, 125 u32 asid) 126 { 127 struct vhost_vdpa_as *as = asid_to_as(v, asid); 128 129 if (as) 130 return as; 131 132 return vhost_vdpa_alloc_as(v, asid); 133 } 134 135 static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid) 136 { 137 struct vdpa_device *vdpa = v->vdpa; 138 const struct vdpa_config_ops *ops = vdpa->config; 139 140 if (ops->reset_map) 141 ops->reset_map(vdpa, asid); 142 } 143 144 static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) 145 { 146 struct vhost_vdpa_as *as = asid_to_as(v, asid); 147 148 if (!as) 149 return -EINVAL; 150 151 hlist_del(&as->hash_link); 152 vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); 153 /* 154 * Devices with vendor specific IOMMU may need to restore 155 * iotlb to the initial or default state, which cannot be 156 * cleaned up in the all range unmap call above. Give them 157 * a chance to clean up or reset the map to the desired 158 * state. 159 */ 160 vhost_vdpa_reset_map(v, asid); 161 kfree(as); 162 163 return 0; 164 } 165 166 static void handle_vq_kick(struct vhost_work *work) 167 { 168 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 169 poll.work); 170 struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); 171 const struct vdpa_config_ops *ops = v->vdpa->config; 172 173 ops->kick_vq(v->vdpa, vq - v->vqs); 174 } 175 176 static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) 177 { 178 struct vhost_virtqueue *vq = private; 179 struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; 180 181 if (call_ctx) 182 eventfd_signal(call_ctx); 183 184 return IRQ_HANDLED; 185 } 186 187 static irqreturn_t vhost_vdpa_config_cb(void *private) 188 { 189 struct vhost_vdpa *v = private; 190 struct eventfd_ctx *config_ctx = v->config_ctx; 191 192 if (config_ctx) 193 eventfd_signal(config_ctx); 194 195 return IRQ_HANDLED; 196 } 197 198 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) 199 { 200 struct vhost_virtqueue *vq = &v->vqs[qid]; 201 const struct vdpa_config_ops *ops = v->vdpa->config; 202 struct vdpa_device *vdpa = v->vdpa; 203 int ret, irq; 204 205 if (!ops->get_vq_irq) 206 return; 207 208 irq = ops->get_vq_irq(vdpa, qid); 209 if (irq < 0) 210 return; 211 212 irq_bypass_unregister_producer(&vq->call_ctx.producer); 213 if (!vq->call_ctx.ctx) 214 return; 215 216 vq->call_ctx.producer.token = vq->call_ctx.ctx; 217 vq->call_ctx.producer.irq = irq; 218 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 219 if (unlikely(ret)) 220 dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n", 221 qid, vq->call_ctx.producer.token, ret); 222 } 223 224 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) 225 { 226 struct vhost_virtqueue *vq = &v->vqs[qid]; 227 228 irq_bypass_unregister_producer(&vq->call_ctx.producer); 229 } 230 231 static int _compat_vdpa_reset(struct vhost_vdpa *v) 232 { 233 struct vdpa_device *vdpa = v->vdpa; 234 u32 flags = 0; 235 236 v->suspended = false; 237 238 if (v->vdev.vqs) { 239 flags |= !vhost_backend_has_feature(v->vdev.vqs[0], 240 VHOST_BACKEND_F_IOTLB_PERSIST) ? 241 VDPA_RESET_F_CLEAN_MAP : 0; 242 } 243 244 return vdpa_reset(vdpa, flags); 245 } 246 247 static int vhost_vdpa_reset(struct vhost_vdpa *v) 248 { 249 v->in_batch = 0; 250 return _compat_vdpa_reset(v); 251 } 252 253 static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) 254 { 255 struct vdpa_device *vdpa = v->vdpa; 256 const struct vdpa_config_ops *ops = vdpa->config; 257 258 if (!vdpa->use_va || !ops->bind_mm) 259 return 0; 260 261 return ops->bind_mm(vdpa, v->vdev.mm); 262 } 263 264 static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v) 265 { 266 struct vdpa_device *vdpa = v->vdpa; 267 const struct vdpa_config_ops *ops = vdpa->config; 268 269 if (!vdpa->use_va || !ops->unbind_mm) 270 return; 271 272 ops->unbind_mm(vdpa); 273 } 274 275 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) 276 { 277 struct vdpa_device *vdpa = v->vdpa; 278 const struct vdpa_config_ops *ops = vdpa->config; 279 u32 device_id; 280 281 device_id = ops->get_device_id(vdpa); 282 283 if (copy_to_user(argp, &device_id, sizeof(device_id))) 284 return -EFAULT; 285 286 return 0; 287 } 288 289 static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) 290 { 291 struct vdpa_device *vdpa = v->vdpa; 292 const struct vdpa_config_ops *ops = vdpa->config; 293 u8 status; 294 295 status = ops->get_status(vdpa); 296 297 if (copy_to_user(statusp, &status, sizeof(status))) 298 return -EFAULT; 299 300 return 0; 301 } 302 303 static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) 304 { 305 struct vdpa_device *vdpa = v->vdpa; 306 const struct vdpa_config_ops *ops = vdpa->config; 307 u8 status, status_old; 308 u32 nvqs = v->nvqs; 309 int ret; 310 u16 i; 311 312 if (copy_from_user(&status, statusp, sizeof(status))) 313 return -EFAULT; 314 315 status_old = ops->get_status(vdpa); 316 317 /* 318 * Userspace shouldn't remove status bits unless reset the 319 * status to 0. 320 */ 321 if (status != 0 && (status_old & ~status) != 0) 322 return -EINVAL; 323 324 if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) 325 for (i = 0; i < nvqs; i++) 326 vhost_vdpa_unsetup_vq_irq(v, i); 327 328 if (status == 0) { 329 ret = _compat_vdpa_reset(v); 330 if (ret) 331 return ret; 332 } else 333 vdpa_set_status(vdpa, status); 334 335 if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) 336 for (i = 0; i < nvqs; i++) 337 vhost_vdpa_setup_vq_irq(v, i); 338 339 return 0; 340 } 341 342 static int vhost_vdpa_config_validate(struct vhost_vdpa *v, 343 struct vhost_vdpa_config *c) 344 { 345 struct vdpa_device *vdpa = v->vdpa; 346 size_t size = vdpa->config->get_config_size(vdpa); 347 348 if (c->len == 0 || c->off > size) 349 return -EINVAL; 350 351 if (c->len > size - c->off) 352 return -E2BIG; 353 354 return 0; 355 } 356 357 static long vhost_vdpa_get_config(struct vhost_vdpa *v, 358 struct vhost_vdpa_config __user *c) 359 { 360 struct vdpa_device *vdpa = v->vdpa; 361 struct vhost_vdpa_config config; 362 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 363 u8 *buf; 364 365 if (copy_from_user(&config, c, size)) 366 return -EFAULT; 367 if (vhost_vdpa_config_validate(v, &config)) 368 return -EINVAL; 369 buf = kvzalloc(config.len, GFP_KERNEL); 370 if (!buf) 371 return -ENOMEM; 372 373 vdpa_get_config(vdpa, config.off, buf, config.len); 374 375 if (copy_to_user(c->buf, buf, config.len)) { 376 kvfree(buf); 377 return -EFAULT; 378 } 379 380 kvfree(buf); 381 return 0; 382 } 383 384 static long vhost_vdpa_set_config(struct vhost_vdpa *v, 385 struct vhost_vdpa_config __user *c) 386 { 387 struct vdpa_device *vdpa = v->vdpa; 388 struct vhost_vdpa_config config; 389 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 390 u8 *buf; 391 392 if (copy_from_user(&config, c, size)) 393 return -EFAULT; 394 if (vhost_vdpa_config_validate(v, &config)) 395 return -EINVAL; 396 397 buf = vmemdup_user(c->buf, config.len); 398 if (IS_ERR(buf)) 399 return PTR_ERR(buf); 400 401 vdpa_set_config(vdpa, config.off, buf, config.len); 402 403 kvfree(buf); 404 return 0; 405 } 406 407 static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) 408 { 409 struct vdpa_device *vdpa = v->vdpa; 410 const struct vdpa_config_ops *ops = vdpa->config; 411 412 return ops->suspend; 413 } 414 415 static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) 416 { 417 struct vdpa_device *vdpa = v->vdpa; 418 const struct vdpa_config_ops *ops = vdpa->config; 419 420 return ops->resume; 421 } 422 423 static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v) 424 { 425 struct vdpa_device *vdpa = v->vdpa; 426 const struct vdpa_config_ops *ops = vdpa->config; 427 428 return ops->get_vq_desc_group; 429 } 430 431 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) 432 { 433 struct vdpa_device *vdpa = v->vdpa; 434 const struct vdpa_config_ops *ops = vdpa->config; 435 u64 features; 436 437 features = ops->get_device_features(vdpa); 438 439 if (copy_to_user(featurep, &features, sizeof(features))) 440 return -EFAULT; 441 442 return 0; 443 } 444 445 static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) 446 { 447 struct vdpa_device *vdpa = v->vdpa; 448 const struct vdpa_config_ops *ops = vdpa->config; 449 450 if (!ops->get_backend_features) 451 return 0; 452 else 453 return ops->get_backend_features(vdpa); 454 } 455 456 static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v) 457 { 458 struct vdpa_device *vdpa = v->vdpa; 459 const struct vdpa_config_ops *ops = vdpa->config; 460 461 return (!ops->set_map && !ops->dma_map) || ops->reset_map || 462 vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); 463 } 464 465 static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) 466 { 467 struct vdpa_device *vdpa = v->vdpa; 468 const struct vdpa_config_ops *ops = vdpa->config; 469 struct vhost_dev *d = &v->vdev; 470 u64 actual_features; 471 u64 features; 472 int i; 473 474 /* 475 * It's not allowed to change the features after they have 476 * been negotiated. 477 */ 478 if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) 479 return -EBUSY; 480 481 if (copy_from_user(&features, featurep, sizeof(features))) 482 return -EFAULT; 483 484 if (vdpa_set_features(vdpa, features)) 485 return -EINVAL; 486 487 /* let the vqs know what has been configured */ 488 actual_features = ops->get_driver_features(vdpa); 489 for (i = 0; i < d->nvqs; ++i) { 490 struct vhost_virtqueue *vq = d->vqs[i]; 491 492 mutex_lock(&vq->mutex); 493 vq->acked_features = actual_features; 494 mutex_unlock(&vq->mutex); 495 } 496 497 return 0; 498 } 499 500 static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) 501 { 502 struct vdpa_device *vdpa = v->vdpa; 503 const struct vdpa_config_ops *ops = vdpa->config; 504 u16 num; 505 506 num = ops->get_vq_num_max(vdpa); 507 508 if (copy_to_user(argp, &num, sizeof(num))) 509 return -EFAULT; 510 511 return 0; 512 } 513 514 static void vhost_vdpa_config_put(struct vhost_vdpa *v) 515 { 516 if (v->config_ctx) { 517 eventfd_ctx_put(v->config_ctx); 518 v->config_ctx = NULL; 519 } 520 } 521 522 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) 523 { 524 struct vdpa_callback cb; 525 int fd; 526 struct eventfd_ctx *ctx; 527 528 cb.callback = vhost_vdpa_config_cb; 529 cb.private = v; 530 if (copy_from_user(&fd, argp, sizeof(fd))) 531 return -EFAULT; 532 533 ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); 534 swap(ctx, v->config_ctx); 535 536 if (!IS_ERR_OR_NULL(ctx)) 537 eventfd_ctx_put(ctx); 538 539 if (IS_ERR(v->config_ctx)) { 540 long ret = PTR_ERR(v->config_ctx); 541 542 v->config_ctx = NULL; 543 return ret; 544 } 545 546 v->vdpa->config->set_config_cb(v->vdpa, &cb); 547 548 return 0; 549 } 550 551 static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) 552 { 553 struct vhost_vdpa_iova_range range = { 554 .first = v->range.first, 555 .last = v->range.last, 556 }; 557 558 if (copy_to_user(argp, &range, sizeof(range))) 559 return -EFAULT; 560 return 0; 561 } 562 563 static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) 564 { 565 struct vdpa_device *vdpa = v->vdpa; 566 const struct vdpa_config_ops *ops = vdpa->config; 567 u32 size; 568 569 size = ops->get_config_size(vdpa); 570 571 if (copy_to_user(argp, &size, sizeof(size))) 572 return -EFAULT; 573 574 return 0; 575 } 576 577 static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) 578 { 579 struct vdpa_device *vdpa = v->vdpa; 580 581 if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs))) 582 return -EFAULT; 583 584 return 0; 585 } 586 587 /* After a successful return of ioctl the device must not process more 588 * virtqueue descriptors. The device can answer to read or writes of config 589 * fields as if it were not suspended. In particular, writing to "queue_enable" 590 * with a value of 1 will not make the device start processing buffers. 591 */ 592 static long vhost_vdpa_suspend(struct vhost_vdpa *v) 593 { 594 struct vdpa_device *vdpa = v->vdpa; 595 const struct vdpa_config_ops *ops = vdpa->config; 596 int ret; 597 598 if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK)) 599 return 0; 600 601 if (!ops->suspend) 602 return -EOPNOTSUPP; 603 604 ret = ops->suspend(vdpa); 605 if (!ret) 606 v->suspended = true; 607 608 return ret; 609 } 610 611 /* After a successful return of this ioctl the device resumes processing 612 * virtqueue descriptors. The device becomes fully operational the same way it 613 * was before it was suspended. 614 */ 615 static long vhost_vdpa_resume(struct vhost_vdpa *v) 616 { 617 struct vdpa_device *vdpa = v->vdpa; 618 const struct vdpa_config_ops *ops = vdpa->config; 619 int ret; 620 621 if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK)) 622 return 0; 623 624 if (!ops->resume) 625 return -EOPNOTSUPP; 626 627 ret = ops->resume(vdpa); 628 if (!ret) 629 v->suspended = false; 630 631 return ret; 632 } 633 634 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, 635 void __user *argp) 636 { 637 struct vdpa_device *vdpa = v->vdpa; 638 const struct vdpa_config_ops *ops = vdpa->config; 639 struct vdpa_vq_state vq_state; 640 struct vdpa_callback cb; 641 struct vhost_virtqueue *vq; 642 struct vhost_vring_state s; 643 u32 idx; 644 long r; 645 646 r = get_user(idx, (u32 __user *)argp); 647 if (r < 0) 648 return r; 649 650 if (idx >= v->nvqs) 651 return -ENOBUFS; 652 653 idx = array_index_nospec(idx, v->nvqs); 654 vq = &v->vqs[idx]; 655 656 switch (cmd) { 657 case VHOST_VDPA_SET_VRING_ENABLE: 658 if (copy_from_user(&s, argp, sizeof(s))) 659 return -EFAULT; 660 ops->set_vq_ready(vdpa, idx, s.num); 661 return 0; 662 case VHOST_VDPA_GET_VRING_GROUP: 663 if (!ops->get_vq_group) 664 return -EOPNOTSUPP; 665 s.index = idx; 666 s.num = ops->get_vq_group(vdpa, idx); 667 if (s.num >= vdpa->ngroups) 668 return -EIO; 669 else if (copy_to_user(argp, &s, sizeof(s))) 670 return -EFAULT; 671 return 0; 672 case VHOST_VDPA_GET_VRING_DESC_GROUP: 673 if (!vhost_vdpa_has_desc_group(v)) 674 return -EOPNOTSUPP; 675 s.index = idx; 676 s.num = ops->get_vq_desc_group(vdpa, idx); 677 if (s.num >= vdpa->ngroups) 678 return -EIO; 679 else if (copy_to_user(argp, &s, sizeof(s))) 680 return -EFAULT; 681 return 0; 682 case VHOST_VDPA_SET_GROUP_ASID: 683 if (copy_from_user(&s, argp, sizeof(s))) 684 return -EFAULT; 685 if (s.num >= vdpa->nas) 686 return -EINVAL; 687 if (!ops->set_group_asid) 688 return -EOPNOTSUPP; 689 return ops->set_group_asid(vdpa, idx, s.num); 690 case VHOST_VDPA_GET_VRING_SIZE: 691 if (!ops->get_vq_size) 692 return -EOPNOTSUPP; 693 s.index = idx; 694 s.num = ops->get_vq_size(vdpa, idx); 695 if (copy_to_user(argp, &s, sizeof(s))) 696 return -EFAULT; 697 return 0; 698 case VHOST_GET_VRING_BASE: 699 r = ops->get_vq_state(v->vdpa, idx, &vq_state); 700 if (r) 701 return r; 702 703 if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { 704 vq->last_avail_idx = vq_state.packed.last_avail_idx | 705 (vq_state.packed.last_avail_counter << 15); 706 vq->last_used_idx = vq_state.packed.last_used_idx | 707 (vq_state.packed.last_used_counter << 15); 708 } else { 709 vq->last_avail_idx = vq_state.split.avail_index; 710 } 711 break; 712 } 713 714 r = vhost_vring_ioctl(&v->vdev, cmd, argp); 715 if (r) 716 return r; 717 718 switch (cmd) { 719 case VHOST_SET_VRING_ADDR: 720 if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) 721 return -EINVAL; 722 723 if (ops->set_vq_address(vdpa, idx, 724 (u64)(uintptr_t)vq->desc, 725 (u64)(uintptr_t)vq->avail, 726 (u64)(uintptr_t)vq->used)) 727 r = -EINVAL; 728 break; 729 730 case VHOST_SET_VRING_BASE: 731 if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) 732 return -EINVAL; 733 734 if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { 735 vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; 736 vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); 737 vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff; 738 vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000); 739 } else { 740 vq_state.split.avail_index = vq->last_avail_idx; 741 } 742 r = ops->set_vq_state(vdpa, idx, &vq_state); 743 break; 744 745 case VHOST_SET_VRING_CALL: 746 if (vq->call_ctx.ctx) { 747 cb.callback = vhost_vdpa_virtqueue_cb; 748 cb.private = vq; 749 cb.trigger = vq->call_ctx.ctx; 750 } else { 751 cb.callback = NULL; 752 cb.private = NULL; 753 cb.trigger = NULL; 754 } 755 ops->set_vq_cb(vdpa, idx, &cb); 756 vhost_vdpa_setup_vq_irq(v, idx); 757 break; 758 759 case VHOST_SET_VRING_NUM: 760 ops->set_vq_num(vdpa, idx, vq->num); 761 break; 762 } 763 764 return r; 765 } 766 767 static long vhost_vdpa_unlocked_ioctl(struct file *filep, 768 unsigned int cmd, unsigned long arg) 769 { 770 struct vhost_vdpa *v = filep->private_data; 771 struct vhost_dev *d = &v->vdev; 772 void __user *argp = (void __user *)arg; 773 u64 __user *featurep = argp; 774 u64 features; 775 long r = 0; 776 777 if (cmd == VHOST_SET_BACKEND_FEATURES) { 778 if (copy_from_user(&features, featurep, sizeof(features))) 779 return -EFAULT; 780 if (features & ~(VHOST_VDPA_BACKEND_FEATURES | 781 BIT_ULL(VHOST_BACKEND_F_DESC_ASID) | 782 BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) | 783 BIT_ULL(VHOST_BACKEND_F_SUSPEND) | 784 BIT_ULL(VHOST_BACKEND_F_RESUME) | 785 BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) 786 return -EOPNOTSUPP; 787 if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && 788 !vhost_vdpa_can_suspend(v)) 789 return -EOPNOTSUPP; 790 if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && 791 !vhost_vdpa_can_resume(v)) 792 return -EOPNOTSUPP; 793 if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && 794 !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) 795 return -EINVAL; 796 if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && 797 !vhost_vdpa_has_desc_group(v)) 798 return -EOPNOTSUPP; 799 if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) && 800 !vhost_vdpa_has_persistent_map(v)) 801 return -EOPNOTSUPP; 802 vhost_set_backend_features(&v->vdev, features); 803 return 0; 804 } 805 806 mutex_lock(&d->mutex); 807 808 switch (cmd) { 809 case VHOST_VDPA_GET_DEVICE_ID: 810 r = vhost_vdpa_get_device_id(v, argp); 811 break; 812 case VHOST_VDPA_GET_STATUS: 813 r = vhost_vdpa_get_status(v, argp); 814 break; 815 case VHOST_VDPA_SET_STATUS: 816 r = vhost_vdpa_set_status(v, argp); 817 break; 818 case VHOST_VDPA_GET_CONFIG: 819 r = vhost_vdpa_get_config(v, argp); 820 break; 821 case VHOST_VDPA_SET_CONFIG: 822 r = vhost_vdpa_set_config(v, argp); 823 break; 824 case VHOST_GET_FEATURES: 825 r = vhost_vdpa_get_features(v, argp); 826 break; 827 case VHOST_SET_FEATURES: 828 r = vhost_vdpa_set_features(v, argp); 829 break; 830 case VHOST_VDPA_GET_VRING_NUM: 831 r = vhost_vdpa_get_vring_num(v, argp); 832 break; 833 case VHOST_VDPA_GET_GROUP_NUM: 834 if (copy_to_user(argp, &v->vdpa->ngroups, 835 sizeof(v->vdpa->ngroups))) 836 r = -EFAULT; 837 break; 838 case VHOST_VDPA_GET_AS_NUM: 839 if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas))) 840 r = -EFAULT; 841 break; 842 case VHOST_SET_LOG_BASE: 843 case VHOST_SET_LOG_FD: 844 r = -ENOIOCTLCMD; 845 break; 846 case VHOST_VDPA_SET_CONFIG_CALL: 847 r = vhost_vdpa_set_config_call(v, argp); 848 break; 849 case VHOST_GET_BACKEND_FEATURES: 850 features = VHOST_VDPA_BACKEND_FEATURES; 851 if (vhost_vdpa_can_suspend(v)) 852 features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); 853 if (vhost_vdpa_can_resume(v)) 854 features |= BIT_ULL(VHOST_BACKEND_F_RESUME); 855 if (vhost_vdpa_has_desc_group(v)) 856 features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID); 857 if (vhost_vdpa_has_persistent_map(v)) 858 features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); 859 features |= vhost_vdpa_get_backend_features(v); 860 if (copy_to_user(featurep, &features, sizeof(features))) 861 r = -EFAULT; 862 break; 863 case VHOST_VDPA_GET_IOVA_RANGE: 864 r = vhost_vdpa_get_iova_range(v, argp); 865 break; 866 case VHOST_VDPA_GET_CONFIG_SIZE: 867 r = vhost_vdpa_get_config_size(v, argp); 868 break; 869 case VHOST_VDPA_GET_VQS_COUNT: 870 r = vhost_vdpa_get_vqs_count(v, argp); 871 break; 872 case VHOST_VDPA_SUSPEND: 873 r = vhost_vdpa_suspend(v); 874 break; 875 case VHOST_VDPA_RESUME: 876 r = vhost_vdpa_resume(v); 877 break; 878 default: 879 r = vhost_dev_ioctl(&v->vdev, cmd, argp); 880 if (r == -ENOIOCTLCMD) 881 r = vhost_vdpa_vring_ioctl(v, cmd, argp); 882 break; 883 } 884 885 if (r) 886 goto out; 887 888 switch (cmd) { 889 case VHOST_SET_OWNER: 890 r = vhost_vdpa_bind_mm(v); 891 if (r) 892 vhost_dev_reset_owner(d, NULL); 893 break; 894 } 895 out: 896 mutex_unlock(&d->mutex); 897 return r; 898 } 899 static void vhost_vdpa_general_unmap(struct vhost_vdpa *v, 900 struct vhost_iotlb_map *map, u32 asid) 901 { 902 struct vdpa_device *vdpa = v->vdpa; 903 const struct vdpa_config_ops *ops = vdpa->config; 904 if (ops->dma_map) { 905 ops->dma_unmap(vdpa, asid, map->start, map->size); 906 } else if (ops->set_map == NULL) { 907 iommu_unmap(v->domain, map->start, map->size); 908 } 909 } 910 911 static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 912 u64 start, u64 last, u32 asid) 913 { 914 struct vhost_dev *dev = &v->vdev; 915 struct vhost_iotlb_map *map; 916 struct page *page; 917 unsigned long pfn, pinned; 918 919 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 920 pinned = PFN_DOWN(map->size); 921 for (pfn = PFN_DOWN(map->addr); 922 pinned > 0; pfn++, pinned--) { 923 page = pfn_to_page(pfn); 924 if (map->perm & VHOST_ACCESS_WO) 925 set_page_dirty_lock(page); 926 unpin_user_page(page); 927 } 928 atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); 929 vhost_vdpa_general_unmap(v, map, asid); 930 vhost_iotlb_map_free(iotlb, map); 931 } 932 } 933 934 static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 935 u64 start, u64 last, u32 asid) 936 { 937 struct vhost_iotlb_map *map; 938 struct vdpa_map_file *map_file; 939 940 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 941 map_file = (struct vdpa_map_file *)map->opaque; 942 fput(map_file->file); 943 kfree(map_file); 944 vhost_vdpa_general_unmap(v, map, asid); 945 vhost_iotlb_map_free(iotlb, map); 946 } 947 } 948 949 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, 950 struct vhost_iotlb *iotlb, u64 start, 951 u64 last, u32 asid) 952 { 953 struct vdpa_device *vdpa = v->vdpa; 954 955 if (vdpa->use_va) 956 return vhost_vdpa_va_unmap(v, iotlb, start, last, asid); 957 958 return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid); 959 } 960 961 static int perm_to_iommu_flags(u32 perm) 962 { 963 int flags = 0; 964 965 switch (perm) { 966 case VHOST_ACCESS_WO: 967 flags |= IOMMU_WRITE; 968 break; 969 case VHOST_ACCESS_RO: 970 flags |= IOMMU_READ; 971 break; 972 case VHOST_ACCESS_RW: 973 flags |= (IOMMU_WRITE | IOMMU_READ); 974 break; 975 default: 976 WARN(1, "invalidate vhost IOTLB permission\n"); 977 break; 978 } 979 980 return flags | IOMMU_CACHE; 981 } 982 983 static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 984 u64 iova, u64 size, u64 pa, u32 perm, void *opaque) 985 { 986 struct vhost_dev *dev = &v->vdev; 987 struct vdpa_device *vdpa = v->vdpa; 988 const struct vdpa_config_ops *ops = vdpa->config; 989 u32 asid = iotlb_to_asid(iotlb); 990 int r = 0; 991 992 r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, 993 pa, perm, opaque); 994 if (r) 995 return r; 996 997 if (ops->dma_map) { 998 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque); 999 } else if (ops->set_map) { 1000 if (!v->in_batch) 1001 r = ops->set_map(vdpa, asid, iotlb); 1002 } else { 1003 r = iommu_map(v->domain, iova, pa, size, 1004 perm_to_iommu_flags(perm), 1005 GFP_KERNEL_ACCOUNT); 1006 } 1007 if (r) { 1008 vhost_iotlb_del_range(iotlb, iova, iova + size - 1); 1009 return r; 1010 } 1011 1012 if (!vdpa->use_va) 1013 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); 1014 1015 return 0; 1016 } 1017 1018 static void vhost_vdpa_unmap(struct vhost_vdpa *v, 1019 struct vhost_iotlb *iotlb, 1020 u64 iova, u64 size) 1021 { 1022 struct vdpa_device *vdpa = v->vdpa; 1023 const struct vdpa_config_ops *ops = vdpa->config; 1024 u32 asid = iotlb_to_asid(iotlb); 1025 1026 vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid); 1027 1028 if (ops->set_map) { 1029 if (!v->in_batch) 1030 ops->set_map(vdpa, asid, iotlb); 1031 } 1032 1033 } 1034 1035 static int vhost_vdpa_va_map(struct vhost_vdpa *v, 1036 struct vhost_iotlb *iotlb, 1037 u64 iova, u64 size, u64 uaddr, u32 perm) 1038 { 1039 struct vhost_dev *dev = &v->vdev; 1040 u64 offset, map_size, map_iova = iova; 1041 struct vdpa_map_file *map_file; 1042 struct vm_area_struct *vma; 1043 int ret = 0; 1044 1045 mmap_read_lock(dev->mm); 1046 1047 while (size) { 1048 vma = find_vma(dev->mm, uaddr); 1049 if (!vma) { 1050 ret = -EINVAL; 1051 break; 1052 } 1053 map_size = min(size, vma->vm_end - uaddr); 1054 if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && 1055 !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) 1056 goto next; 1057 1058 map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); 1059 if (!map_file) { 1060 ret = -ENOMEM; 1061 break; 1062 } 1063 offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; 1064 map_file->offset = offset; 1065 map_file->file = get_file(vma->vm_file); 1066 ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr, 1067 perm, map_file); 1068 if (ret) { 1069 fput(map_file->file); 1070 kfree(map_file); 1071 break; 1072 } 1073 next: 1074 size -= map_size; 1075 uaddr += map_size; 1076 map_iova += map_size; 1077 } 1078 if (ret) 1079 vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova); 1080 1081 mmap_read_unlock(dev->mm); 1082 1083 return ret; 1084 } 1085 1086 static int vhost_vdpa_pa_map(struct vhost_vdpa *v, 1087 struct vhost_iotlb *iotlb, 1088 u64 iova, u64 size, u64 uaddr, u32 perm) 1089 { 1090 struct vhost_dev *dev = &v->vdev; 1091 struct page **page_list; 1092 unsigned long list_size = PAGE_SIZE / sizeof(struct page *); 1093 unsigned int gup_flags = FOLL_LONGTERM; 1094 unsigned long npages, cur_base, map_pfn, last_pfn = 0; 1095 unsigned long lock_limit, sz2pin, nchunks, i; 1096 u64 start = iova; 1097 long pinned; 1098 int ret = 0; 1099 1100 /* Limit the use of memory for bookkeeping */ 1101 page_list = (struct page **) __get_free_page(GFP_KERNEL); 1102 if (!page_list) 1103 return -ENOMEM; 1104 1105 if (perm & VHOST_ACCESS_WO) 1106 gup_flags |= FOLL_WRITE; 1107 1108 npages = PFN_UP(size + (iova & ~PAGE_MASK)); 1109 if (!npages) { 1110 ret = -EINVAL; 1111 goto free; 1112 } 1113 1114 mmap_read_lock(dev->mm); 1115 1116 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 1117 if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { 1118 ret = -ENOMEM; 1119 goto unlock; 1120 } 1121 1122 cur_base = uaddr & PAGE_MASK; 1123 iova &= PAGE_MASK; 1124 nchunks = 0; 1125 1126 while (npages) { 1127 sz2pin = min_t(unsigned long, npages, list_size); 1128 pinned = pin_user_pages(cur_base, sz2pin, 1129 gup_flags, page_list); 1130 if (sz2pin != pinned) { 1131 if (pinned < 0) { 1132 ret = pinned; 1133 } else { 1134 unpin_user_pages(page_list, pinned); 1135 ret = -ENOMEM; 1136 } 1137 goto out; 1138 } 1139 nchunks++; 1140 1141 if (!last_pfn) 1142 map_pfn = page_to_pfn(page_list[0]); 1143 1144 for (i = 0; i < pinned; i++) { 1145 unsigned long this_pfn = page_to_pfn(page_list[i]); 1146 u64 csize; 1147 1148 if (last_pfn && (this_pfn != last_pfn + 1)) { 1149 /* Pin a contiguous chunk of memory */ 1150 csize = PFN_PHYS(last_pfn - map_pfn + 1); 1151 ret = vhost_vdpa_map(v, iotlb, iova, csize, 1152 PFN_PHYS(map_pfn), 1153 perm, NULL); 1154 if (ret) { 1155 /* 1156 * Unpin the pages that are left unmapped 1157 * from this point on in the current 1158 * page_list. The remaining outstanding 1159 * ones which may stride across several 1160 * chunks will be covered in the common 1161 * error path subsequently. 1162 */ 1163 unpin_user_pages(&page_list[i], 1164 pinned - i); 1165 goto out; 1166 } 1167 1168 map_pfn = this_pfn; 1169 iova += csize; 1170 nchunks = 0; 1171 } 1172 1173 last_pfn = this_pfn; 1174 } 1175 1176 cur_base += PFN_PHYS(pinned); 1177 npages -= pinned; 1178 } 1179 1180 /* Pin the rest chunk */ 1181 ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1), 1182 PFN_PHYS(map_pfn), perm, NULL); 1183 out: 1184 if (ret) { 1185 if (nchunks) { 1186 unsigned long pfn; 1187 1188 /* 1189 * Unpin the outstanding pages which are yet to be 1190 * mapped but haven't due to vdpa_map() or 1191 * pin_user_pages() failure. 1192 * 1193 * Mapped pages are accounted in vdpa_map(), hence 1194 * the corresponding unpinning will be handled by 1195 * vdpa_unmap(). 1196 */ 1197 WARN_ON(!last_pfn); 1198 for (pfn = map_pfn; pfn <= last_pfn; pfn++) 1199 unpin_user_page(pfn_to_page(pfn)); 1200 } 1201 vhost_vdpa_unmap(v, iotlb, start, size); 1202 } 1203 unlock: 1204 mmap_read_unlock(dev->mm); 1205 free: 1206 free_page((unsigned long)page_list); 1207 return ret; 1208 1209 } 1210 1211 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, 1212 struct vhost_iotlb *iotlb, 1213 struct vhost_iotlb_msg *msg) 1214 { 1215 struct vdpa_device *vdpa = v->vdpa; 1216 1217 if (msg->iova < v->range.first || !msg->size || 1218 msg->iova > U64_MAX - msg->size + 1 || 1219 msg->iova + msg->size - 1 > v->range.last) 1220 return -EINVAL; 1221 1222 if (vhost_iotlb_itree_first(iotlb, msg->iova, 1223 msg->iova + msg->size - 1)) 1224 return -EEXIST; 1225 1226 if (vdpa->use_va) 1227 return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size, 1228 msg->uaddr, msg->perm); 1229 1230 return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr, 1231 msg->perm); 1232 } 1233 1234 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, 1235 struct vhost_iotlb_msg *msg) 1236 { 1237 struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); 1238 struct vdpa_device *vdpa = v->vdpa; 1239 const struct vdpa_config_ops *ops = vdpa->config; 1240 struct vhost_iotlb *iotlb = NULL; 1241 struct vhost_vdpa_as *as = NULL; 1242 int r = 0; 1243 1244 mutex_lock(&dev->mutex); 1245 1246 r = vhost_dev_check_owner(dev); 1247 if (r) 1248 goto unlock; 1249 1250 if (msg->type == VHOST_IOTLB_UPDATE || 1251 msg->type == VHOST_IOTLB_BATCH_BEGIN) { 1252 as = vhost_vdpa_find_alloc_as(v, asid); 1253 if (!as) { 1254 dev_err(&v->dev, "can't find and alloc asid %d\n", 1255 asid); 1256 r = -EINVAL; 1257 goto unlock; 1258 } 1259 iotlb = &as->iotlb; 1260 } else 1261 iotlb = asid_to_iotlb(v, asid); 1262 1263 if ((v->in_batch && v->batch_asid != asid) || !iotlb) { 1264 if (v->in_batch && v->batch_asid != asid) { 1265 dev_info(&v->dev, "batch id %d asid %d\n", 1266 v->batch_asid, asid); 1267 } 1268 if (!iotlb) 1269 dev_err(&v->dev, "no iotlb for asid %d\n", asid); 1270 r = -EINVAL; 1271 goto unlock; 1272 } 1273 1274 switch (msg->type) { 1275 case VHOST_IOTLB_UPDATE: 1276 r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); 1277 break; 1278 case VHOST_IOTLB_INVALIDATE: 1279 vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size); 1280 break; 1281 case VHOST_IOTLB_BATCH_BEGIN: 1282 v->batch_asid = asid; 1283 v->in_batch = true; 1284 break; 1285 case VHOST_IOTLB_BATCH_END: 1286 if (v->in_batch && ops->set_map) 1287 ops->set_map(vdpa, asid, iotlb); 1288 v->in_batch = false; 1289 break; 1290 default: 1291 r = -EINVAL; 1292 break; 1293 } 1294 unlock: 1295 mutex_unlock(&dev->mutex); 1296 1297 return r; 1298 } 1299 1300 static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, 1301 struct iov_iter *from) 1302 { 1303 struct file *file = iocb->ki_filp; 1304 struct vhost_vdpa *v = file->private_data; 1305 struct vhost_dev *dev = &v->vdev; 1306 1307 return vhost_chr_write_iter(dev, from); 1308 } 1309 1310 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) 1311 { 1312 struct vdpa_device *vdpa = v->vdpa; 1313 const struct vdpa_config_ops *ops = vdpa->config; 1314 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 1315 int ret; 1316 1317 /* Device want to do DMA by itself */ 1318 if (ops->set_map || ops->dma_map) 1319 return 0; 1320 1321 if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) { 1322 dev_warn_once(&v->dev, 1323 "Failed to allocate domain, device is not IOMMU cache coherent capable\n"); 1324 return -ENOTSUPP; 1325 } 1326 1327 v->domain = iommu_paging_domain_alloc(dma_dev); 1328 if (IS_ERR(v->domain)) { 1329 ret = PTR_ERR(v->domain); 1330 v->domain = NULL; 1331 return ret; 1332 } 1333 1334 ret = iommu_attach_device(v->domain, dma_dev); 1335 if (ret) 1336 goto err_attach; 1337 1338 return 0; 1339 1340 err_attach: 1341 iommu_domain_free(v->domain); 1342 v->domain = NULL; 1343 return ret; 1344 } 1345 1346 static void vhost_vdpa_free_domain(struct vhost_vdpa *v) 1347 { 1348 struct vdpa_device *vdpa = v->vdpa; 1349 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 1350 1351 if (v->domain) { 1352 iommu_detach_device(v->domain, dma_dev); 1353 iommu_domain_free(v->domain); 1354 } 1355 1356 v->domain = NULL; 1357 } 1358 1359 static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) 1360 { 1361 struct vdpa_iova_range *range = &v->range; 1362 struct vdpa_device *vdpa = v->vdpa; 1363 const struct vdpa_config_ops *ops = vdpa->config; 1364 1365 if (ops->get_iova_range) { 1366 *range = ops->get_iova_range(vdpa); 1367 } else if (v->domain && v->domain->geometry.force_aperture) { 1368 range->first = v->domain->geometry.aperture_start; 1369 range->last = v->domain->geometry.aperture_end; 1370 } else { 1371 range->first = 0; 1372 range->last = ULLONG_MAX; 1373 } 1374 } 1375 1376 static void vhost_vdpa_cleanup(struct vhost_vdpa *v) 1377 { 1378 struct vhost_vdpa_as *as; 1379 u32 asid; 1380 1381 for (asid = 0; asid < v->vdpa->nas; asid++) { 1382 as = asid_to_as(v, asid); 1383 if (as) 1384 vhost_vdpa_remove_as(v, asid); 1385 } 1386 1387 vhost_vdpa_free_domain(v); 1388 vhost_dev_cleanup(&v->vdev); 1389 kfree(v->vdev.vqs); 1390 v->vdev.vqs = NULL; 1391 } 1392 1393 static int vhost_vdpa_open(struct inode *inode, struct file *filep) 1394 { 1395 struct vhost_vdpa *v; 1396 struct vhost_dev *dev; 1397 struct vhost_virtqueue **vqs; 1398 int r, opened; 1399 u32 i, nvqs; 1400 1401 v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); 1402 1403 opened = atomic_cmpxchg(&v->opened, 0, 1); 1404 if (opened) 1405 return -EBUSY; 1406 1407 nvqs = v->nvqs; 1408 r = vhost_vdpa_reset(v); 1409 if (r) 1410 goto err; 1411 1412 vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); 1413 if (!vqs) { 1414 r = -ENOMEM; 1415 goto err; 1416 } 1417 1418 dev = &v->vdev; 1419 for (i = 0; i < nvqs; i++) { 1420 vqs[i] = &v->vqs[i]; 1421 vqs[i]->handle_kick = handle_vq_kick; 1422 } 1423 vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, 1424 vhost_vdpa_process_iotlb_msg); 1425 1426 r = vhost_vdpa_alloc_domain(v); 1427 if (r) 1428 goto err_alloc_domain; 1429 1430 vhost_vdpa_set_iova_range(v); 1431 1432 filep->private_data = v; 1433 1434 return 0; 1435 1436 err_alloc_domain: 1437 vhost_vdpa_cleanup(v); 1438 err: 1439 atomic_dec(&v->opened); 1440 return r; 1441 } 1442 1443 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) 1444 { 1445 u32 i; 1446 1447 for (i = 0; i < v->nvqs; i++) 1448 vhost_vdpa_unsetup_vq_irq(v, i); 1449 } 1450 1451 static int vhost_vdpa_release(struct inode *inode, struct file *filep) 1452 { 1453 struct vhost_vdpa *v = filep->private_data; 1454 struct vhost_dev *d = &v->vdev; 1455 1456 mutex_lock(&d->mutex); 1457 filep->private_data = NULL; 1458 vhost_vdpa_clean_irq(v); 1459 vhost_vdpa_reset(v); 1460 vhost_dev_stop(&v->vdev); 1461 vhost_vdpa_unbind_mm(v); 1462 vhost_vdpa_config_put(v); 1463 vhost_vdpa_cleanup(v); 1464 mutex_unlock(&d->mutex); 1465 1466 atomic_dec(&v->opened); 1467 complete(&v->completion); 1468 1469 return 0; 1470 } 1471 1472 #ifdef CONFIG_MMU 1473 static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) 1474 { 1475 struct vhost_vdpa *v = vmf->vma->vm_file->private_data; 1476 struct vdpa_device *vdpa = v->vdpa; 1477 const struct vdpa_config_ops *ops = vdpa->config; 1478 struct vdpa_notification_area notify; 1479 struct vm_area_struct *vma = vmf->vma; 1480 u16 index = vma->vm_pgoff; 1481 1482 notify = ops->get_vq_notification(vdpa, index); 1483 1484 return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr)); 1485 } 1486 1487 static const struct vm_operations_struct vhost_vdpa_vm_ops = { 1488 .fault = vhost_vdpa_fault, 1489 }; 1490 1491 static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) 1492 { 1493 struct vhost_vdpa *v = vma->vm_file->private_data; 1494 struct vdpa_device *vdpa = v->vdpa; 1495 const struct vdpa_config_ops *ops = vdpa->config; 1496 struct vdpa_notification_area notify; 1497 unsigned long index = vma->vm_pgoff; 1498 1499 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1500 return -EINVAL; 1501 if ((vma->vm_flags & VM_SHARED) == 0) 1502 return -EINVAL; 1503 if (vma->vm_flags & VM_READ) 1504 return -EINVAL; 1505 if (index > 65535) 1506 return -EINVAL; 1507 if (!ops->get_vq_notification) 1508 return -ENOTSUPP; 1509 1510 /* To be safe and easily modelled by userspace, We only 1511 * support the doorbell which sits on the page boundary and 1512 * does not share the page with other registers. 1513 */ 1514 notify = ops->get_vq_notification(vdpa, index); 1515 if (notify.addr & (PAGE_SIZE - 1)) 1516 return -EINVAL; 1517 if (vma->vm_end - vma->vm_start != notify.size) 1518 return -ENOTSUPP; 1519 1520 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); 1521 vma->vm_ops = &vhost_vdpa_vm_ops; 1522 return 0; 1523 } 1524 #endif /* CONFIG_MMU */ 1525 1526 static const struct file_operations vhost_vdpa_fops = { 1527 .owner = THIS_MODULE, 1528 .open = vhost_vdpa_open, 1529 .release = vhost_vdpa_release, 1530 .write_iter = vhost_vdpa_chr_write_iter, 1531 .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, 1532 #ifdef CONFIG_MMU 1533 .mmap = vhost_vdpa_mmap, 1534 #endif /* CONFIG_MMU */ 1535 .compat_ioctl = compat_ptr_ioctl, 1536 }; 1537 1538 static void vhost_vdpa_release_dev(struct device *device) 1539 { 1540 struct vhost_vdpa *v = 1541 container_of(device, struct vhost_vdpa, dev); 1542 1543 ida_free(&vhost_vdpa_ida, v->minor); 1544 kfree(v->vqs); 1545 kfree(v); 1546 } 1547 1548 static int vhost_vdpa_probe(struct vdpa_device *vdpa) 1549 { 1550 const struct vdpa_config_ops *ops = vdpa->config; 1551 struct vhost_vdpa *v; 1552 int minor; 1553 int i, r; 1554 1555 /* We can't support platform IOMMU device with more than 1 1556 * group or as 1557 */ 1558 if (!ops->set_map && !ops->dma_map && 1559 (vdpa->ngroups > 1 || vdpa->nas > 1)) 1560 return -EOPNOTSUPP; 1561 1562 v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 1563 if (!v) 1564 return -ENOMEM; 1565 1566 minor = ida_alloc_max(&vhost_vdpa_ida, VHOST_VDPA_DEV_MAX - 1, 1567 GFP_KERNEL); 1568 if (minor < 0) { 1569 kfree(v); 1570 return minor; 1571 } 1572 1573 atomic_set(&v->opened, 0); 1574 v->minor = minor; 1575 v->vdpa = vdpa; 1576 v->nvqs = vdpa->nvqs; 1577 v->virtio_id = ops->get_device_id(vdpa); 1578 1579 device_initialize(&v->dev); 1580 v->dev.release = vhost_vdpa_release_dev; 1581 v->dev.parent = &vdpa->dev; 1582 v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); 1583 v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), 1584 GFP_KERNEL); 1585 if (!v->vqs) { 1586 r = -ENOMEM; 1587 goto err; 1588 } 1589 1590 r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); 1591 if (r) 1592 goto err; 1593 1594 cdev_init(&v->cdev, &vhost_vdpa_fops); 1595 v->cdev.owner = THIS_MODULE; 1596 1597 r = cdev_device_add(&v->cdev, &v->dev); 1598 if (r) 1599 goto err; 1600 1601 init_completion(&v->completion); 1602 vdpa_set_drvdata(vdpa, v); 1603 1604 for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++) 1605 INIT_HLIST_HEAD(&v->as[i]); 1606 1607 return 0; 1608 1609 err: 1610 put_device(&v->dev); 1611 return r; 1612 } 1613 1614 static void vhost_vdpa_remove(struct vdpa_device *vdpa) 1615 { 1616 struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); 1617 int opened; 1618 1619 cdev_device_del(&v->cdev, &v->dev); 1620 1621 do { 1622 opened = atomic_cmpxchg(&v->opened, 0, 1); 1623 if (!opened) 1624 break; 1625 wait_for_completion(&v->completion); 1626 } while (1); 1627 1628 put_device(&v->dev); 1629 } 1630 1631 static struct vdpa_driver vhost_vdpa_driver = { 1632 .driver = { 1633 .name = "vhost_vdpa", 1634 }, 1635 .probe = vhost_vdpa_probe, 1636 .remove = vhost_vdpa_remove, 1637 }; 1638 1639 static int __init vhost_vdpa_init(void) 1640 { 1641 int r; 1642 1643 r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, 1644 "vhost-vdpa"); 1645 if (r) 1646 goto err_alloc_chrdev; 1647 1648 r = vdpa_register_driver(&vhost_vdpa_driver); 1649 if (r) 1650 goto err_vdpa_register_driver; 1651 1652 return 0; 1653 1654 err_vdpa_register_driver: 1655 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1656 err_alloc_chrdev: 1657 return r; 1658 } 1659 module_init(vhost_vdpa_init); 1660 1661 static void __exit vhost_vdpa_exit(void) 1662 { 1663 vdpa_unregister_driver(&vhost_vdpa_driver); 1664 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1665 } 1666 module_exit(vhost_vdpa_exit); 1667 1668 MODULE_VERSION("0.0.1"); 1669 MODULE_LICENSE("GPL v2"); 1670 MODULE_AUTHOR("Intel Corporation"); 1671 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); 1672