1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018-2020 Intel Corporation. 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Author: Tiwei Bie <tiwei.bie@intel.com> 7 * Jason Wang <jasowang@redhat.com> 8 * 9 * Thanks Michael S. Tsirkin for the valuable comments and 10 * suggestions. And thanks to Cunming Liang and Zhihong Wang for all 11 * their supports. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/cdev.h> 17 #include <linux/device.h> 18 #include <linux/mm.h> 19 #include <linux/slab.h> 20 #include <linux/iommu.h> 21 #include <linux/uuid.h> 22 #include <linux/vdpa.h> 23 #include <linux/nospec.h> 24 #include <linux/vhost.h> 25 26 #include "vhost.h" 27 28 enum { 29 VHOST_VDPA_BACKEND_FEATURES = 30 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | 31 (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) | 32 (1ULL << VHOST_BACKEND_F_IOTLB_ASID), 33 }; 34 35 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) 36 37 #define VHOST_VDPA_IOTLB_BUCKETS 16 38 39 struct vhost_vdpa_as { 40 struct hlist_node hash_link; 41 struct vhost_iotlb iotlb; 42 u32 id; 43 }; 44 45 struct vhost_vdpa { 46 struct vhost_dev vdev; 47 struct iommu_domain *domain; 48 struct vhost_virtqueue *vqs; 49 struct completion completion; 50 struct vdpa_device *vdpa; 51 struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; 52 struct device dev; 53 struct cdev cdev; 54 atomic_t opened; 55 u32 nvqs; 56 int virtio_id; 57 int minor; 58 struct eventfd_ctx *config_ctx; 59 int in_batch; 60 struct vdpa_iova_range range; 61 u32 batch_asid; 62 bool suspended; 63 }; 64 65 static DEFINE_IDA(vhost_vdpa_ida); 66 67 static dev_t vhost_vdpa_major; 68 69 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, 70 struct vhost_iotlb *iotlb, u64 start, 71 u64 last, u32 asid); 72 73 static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) 74 { 75 struct vhost_vdpa_as *as = container_of(iotlb, struct 76 vhost_vdpa_as, iotlb); 77 return as->id; 78 } 79 80 static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) 81 { 82 struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; 83 struct vhost_vdpa_as *as; 84 85 hlist_for_each_entry(as, head, hash_link) 86 if (as->id == asid) 87 return as; 88 89 return NULL; 90 } 91 92 static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid) 93 { 94 struct vhost_vdpa_as *as = asid_to_as(v, asid); 95 96 if (!as) 97 return NULL; 98 99 return &as->iotlb; 100 } 101 102 static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) 103 { 104 struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; 105 struct vhost_vdpa_as *as; 106 107 if (asid_to_as(v, asid)) 108 return NULL; 109 110 if (asid >= v->vdpa->nas) 111 return NULL; 112 113 as = kmalloc(sizeof(*as), GFP_KERNEL); 114 if (!as) 115 return NULL; 116 117 vhost_iotlb_init(&as->iotlb, 0, 0); 118 as->id = asid; 119 hlist_add_head(&as->hash_link, head); 120 121 return as; 122 } 123 124 static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, 125 u32 asid) 126 { 127 struct vhost_vdpa_as *as = asid_to_as(v, asid); 128 129 if (as) 130 return as; 131 132 return vhost_vdpa_alloc_as(v, asid); 133 } 134 135 static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid) 136 { 137 struct vdpa_device *vdpa = v->vdpa; 138 const struct vdpa_config_ops *ops = vdpa->config; 139 140 if (ops->reset_map) 141 ops->reset_map(vdpa, asid); 142 } 143 144 static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) 145 { 146 struct vhost_vdpa_as *as = asid_to_as(v, asid); 147 148 if (!as) 149 return -EINVAL; 150 151 hlist_del(&as->hash_link); 152 vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); 153 /* 154 * Devices with vendor specific IOMMU may need to restore 155 * iotlb to the initial or default state, which cannot be 156 * cleaned up in the all range unmap call above. Give them 157 * a chance to clean up or reset the map to the desired 158 * state. 159 */ 160 vhost_vdpa_reset_map(v, asid); 161 kfree(as); 162 163 return 0; 164 } 165 166 static void handle_vq_kick(struct vhost_work *work) 167 { 168 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 169 poll.work); 170 struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); 171 const struct vdpa_config_ops *ops = v->vdpa->config; 172 173 ops->kick_vq(v->vdpa, vq - v->vqs); 174 } 175 176 static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) 177 { 178 struct vhost_virtqueue *vq = private; 179 struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; 180 181 if (call_ctx) 182 eventfd_signal(call_ctx); 183 184 return IRQ_HANDLED; 185 } 186 187 static irqreturn_t vhost_vdpa_config_cb(void *private) 188 { 189 struct vhost_vdpa *v = private; 190 struct eventfd_ctx *config_ctx = v->config_ctx; 191 192 if (config_ctx) 193 eventfd_signal(config_ctx); 194 195 return IRQ_HANDLED; 196 } 197 198 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) 199 { 200 struct vhost_virtqueue *vq = &v->vqs[qid]; 201 const struct vdpa_config_ops *ops = v->vdpa->config; 202 struct vdpa_device *vdpa = v->vdpa; 203 int ret, irq; 204 205 if (!ops->get_vq_irq) 206 return; 207 208 irq = ops->get_vq_irq(vdpa, qid); 209 if (irq < 0) 210 return; 211 212 irq_bypass_unregister_producer(&vq->call_ctx.producer); 213 if (!vq->call_ctx.ctx) 214 return; 215 216 vq->call_ctx.producer.token = vq->call_ctx.ctx; 217 vq->call_ctx.producer.irq = irq; 218 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 219 if (unlikely(ret)) 220 dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n", 221 qid, vq->call_ctx.producer.token, ret); 222 } 223 224 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) 225 { 226 struct vhost_virtqueue *vq = &v->vqs[qid]; 227 228 irq_bypass_unregister_producer(&vq->call_ctx.producer); 229 } 230 231 static int _compat_vdpa_reset(struct vhost_vdpa *v) 232 { 233 struct vdpa_device *vdpa = v->vdpa; 234 u32 flags = 0; 235 236 v->suspended = false; 237 238 if (v->vdev.vqs) { 239 flags |= !vhost_backend_has_feature(v->vdev.vqs[0], 240 VHOST_BACKEND_F_IOTLB_PERSIST) ? 241 VDPA_RESET_F_CLEAN_MAP : 0; 242 } 243 244 return vdpa_reset(vdpa, flags); 245 } 246 247 static int vhost_vdpa_reset(struct vhost_vdpa *v) 248 { 249 v->in_batch = 0; 250 return _compat_vdpa_reset(v); 251 } 252 253 static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) 254 { 255 struct vdpa_device *vdpa = v->vdpa; 256 const struct vdpa_config_ops *ops = vdpa->config; 257 258 if (!vdpa->use_va || !ops->bind_mm) 259 return 0; 260 261 return ops->bind_mm(vdpa, v->vdev.mm); 262 } 263 264 static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v) 265 { 266 struct vdpa_device *vdpa = v->vdpa; 267 const struct vdpa_config_ops *ops = vdpa->config; 268 269 if (!vdpa->use_va || !ops->unbind_mm) 270 return; 271 272 ops->unbind_mm(vdpa); 273 } 274 275 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) 276 { 277 struct vdpa_device *vdpa = v->vdpa; 278 const struct vdpa_config_ops *ops = vdpa->config; 279 u32 device_id; 280 281 device_id = ops->get_device_id(vdpa); 282 283 if (copy_to_user(argp, &device_id, sizeof(device_id))) 284 return -EFAULT; 285 286 return 0; 287 } 288 289 static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) 290 { 291 struct vdpa_device *vdpa = v->vdpa; 292 const struct vdpa_config_ops *ops = vdpa->config; 293 u8 status; 294 295 status = ops->get_status(vdpa); 296 297 if (copy_to_user(statusp, &status, sizeof(status))) 298 return -EFAULT; 299 300 return 0; 301 } 302 303 static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) 304 { 305 struct vdpa_device *vdpa = v->vdpa; 306 const struct vdpa_config_ops *ops = vdpa->config; 307 u8 status, status_old; 308 u32 nvqs = v->nvqs; 309 int ret; 310 u16 i; 311 312 if (copy_from_user(&status, statusp, sizeof(status))) 313 return -EFAULT; 314 315 status_old = ops->get_status(vdpa); 316 317 /* 318 * Userspace shouldn't remove status bits unless reset the 319 * status to 0. 320 */ 321 if (status != 0 && (status_old & ~status) != 0) 322 return -EINVAL; 323 324 if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) 325 for (i = 0; i < nvqs; i++) 326 vhost_vdpa_unsetup_vq_irq(v, i); 327 328 if (status == 0) { 329 ret = _compat_vdpa_reset(v); 330 if (ret) 331 return ret; 332 } else 333 vdpa_set_status(vdpa, status); 334 335 if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) 336 for (i = 0; i < nvqs; i++) 337 vhost_vdpa_setup_vq_irq(v, i); 338 339 return 0; 340 } 341 342 static int vhost_vdpa_config_validate(struct vhost_vdpa *v, 343 struct vhost_vdpa_config *c) 344 { 345 struct vdpa_device *vdpa = v->vdpa; 346 size_t size = vdpa->config->get_config_size(vdpa); 347 348 if (c->len == 0 || c->off > size) 349 return -EINVAL; 350 351 if (c->len > size - c->off) 352 return -E2BIG; 353 354 return 0; 355 } 356 357 static long vhost_vdpa_get_config(struct vhost_vdpa *v, 358 struct vhost_vdpa_config __user *c) 359 { 360 struct vdpa_device *vdpa = v->vdpa; 361 struct vhost_vdpa_config config; 362 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 363 u8 *buf; 364 365 if (copy_from_user(&config, c, size)) 366 return -EFAULT; 367 if (vhost_vdpa_config_validate(v, &config)) 368 return -EINVAL; 369 buf = kvzalloc(config.len, GFP_KERNEL); 370 if (!buf) 371 return -ENOMEM; 372 373 vdpa_get_config(vdpa, config.off, buf, config.len); 374 375 if (copy_to_user(c->buf, buf, config.len)) { 376 kvfree(buf); 377 return -EFAULT; 378 } 379 380 kvfree(buf); 381 return 0; 382 } 383 384 static long vhost_vdpa_set_config(struct vhost_vdpa *v, 385 struct vhost_vdpa_config __user *c) 386 { 387 struct vdpa_device *vdpa = v->vdpa; 388 struct vhost_vdpa_config config; 389 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 390 u8 *buf; 391 392 if (copy_from_user(&config, c, size)) 393 return -EFAULT; 394 if (vhost_vdpa_config_validate(v, &config)) 395 return -EINVAL; 396 397 buf = vmemdup_user(c->buf, config.len); 398 if (IS_ERR(buf)) 399 return PTR_ERR(buf); 400 401 vdpa_set_config(vdpa, config.off, buf, config.len); 402 403 kvfree(buf); 404 return 0; 405 } 406 407 static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) 408 { 409 struct vdpa_device *vdpa = v->vdpa; 410 const struct vdpa_config_ops *ops = vdpa->config; 411 412 return ops->suspend; 413 } 414 415 static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) 416 { 417 struct vdpa_device *vdpa = v->vdpa; 418 const struct vdpa_config_ops *ops = vdpa->config; 419 420 return ops->resume; 421 } 422 423 static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v) 424 { 425 struct vdpa_device *vdpa = v->vdpa; 426 const struct vdpa_config_ops *ops = vdpa->config; 427 428 return ops->get_vq_desc_group; 429 } 430 431 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) 432 { 433 struct vdpa_device *vdpa = v->vdpa; 434 const struct vdpa_config_ops *ops = vdpa->config; 435 u64 features; 436 437 features = ops->get_device_features(vdpa); 438 439 if (copy_to_user(featurep, &features, sizeof(features))) 440 return -EFAULT; 441 442 return 0; 443 } 444 445 static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) 446 { 447 struct vdpa_device *vdpa = v->vdpa; 448 const struct vdpa_config_ops *ops = vdpa->config; 449 450 if (!ops->get_backend_features) 451 return 0; 452 else 453 return ops->get_backend_features(vdpa); 454 } 455 456 static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v) 457 { 458 struct vdpa_device *vdpa = v->vdpa; 459 const struct vdpa_config_ops *ops = vdpa->config; 460 461 return (!ops->set_map && !ops->dma_map) || ops->reset_map || 462 vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); 463 } 464 465 static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) 466 { 467 struct vdpa_device *vdpa = v->vdpa; 468 const struct vdpa_config_ops *ops = vdpa->config; 469 struct vhost_dev *d = &v->vdev; 470 u64 actual_features; 471 u64 features; 472 int i; 473 474 /* 475 * It's not allowed to change the features after they have 476 * been negotiated. 477 */ 478 if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) 479 return -EBUSY; 480 481 if (copy_from_user(&features, featurep, sizeof(features))) 482 return -EFAULT; 483 484 if (vdpa_set_features(vdpa, features)) 485 return -EINVAL; 486 487 /* let the vqs know what has been configured */ 488 actual_features = ops->get_driver_features(vdpa); 489 for (i = 0; i < d->nvqs; ++i) { 490 struct vhost_virtqueue *vq = d->vqs[i]; 491 492 mutex_lock(&vq->mutex); 493 vq->acked_features = actual_features; 494 mutex_unlock(&vq->mutex); 495 } 496 497 return 0; 498 } 499 500 static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) 501 { 502 struct vdpa_device *vdpa = v->vdpa; 503 const struct vdpa_config_ops *ops = vdpa->config; 504 u16 num; 505 506 num = ops->get_vq_num_max(vdpa); 507 508 if (copy_to_user(argp, &num, sizeof(num))) 509 return -EFAULT; 510 511 return 0; 512 } 513 514 static void vhost_vdpa_config_put(struct vhost_vdpa *v) 515 { 516 if (v->config_ctx) { 517 eventfd_ctx_put(v->config_ctx); 518 v->config_ctx = NULL; 519 } 520 } 521 522 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) 523 { 524 struct vdpa_callback cb; 525 int fd; 526 struct eventfd_ctx *ctx; 527 528 cb.callback = vhost_vdpa_config_cb; 529 cb.private = v; 530 if (copy_from_user(&fd, argp, sizeof(fd))) 531 return -EFAULT; 532 533 ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); 534 swap(ctx, v->config_ctx); 535 536 if (!IS_ERR_OR_NULL(ctx)) 537 eventfd_ctx_put(ctx); 538 539 if (IS_ERR(v->config_ctx)) { 540 long ret = PTR_ERR(v->config_ctx); 541 542 v->config_ctx = NULL; 543 return ret; 544 } 545 546 v->vdpa->config->set_config_cb(v->vdpa, &cb); 547 548 return 0; 549 } 550 551 static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) 552 { 553 struct vhost_vdpa_iova_range range = { 554 .first = v->range.first, 555 .last = v->range.last, 556 }; 557 558 if (copy_to_user(argp, &range, sizeof(range))) 559 return -EFAULT; 560 return 0; 561 } 562 563 static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) 564 { 565 struct vdpa_device *vdpa = v->vdpa; 566 const struct vdpa_config_ops *ops = vdpa->config; 567 u32 size; 568 569 size = ops->get_config_size(vdpa); 570 571 if (copy_to_user(argp, &size, sizeof(size))) 572 return -EFAULT; 573 574 return 0; 575 } 576 577 static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) 578 { 579 struct vdpa_device *vdpa = v->vdpa; 580 581 if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs))) 582 return -EFAULT; 583 584 return 0; 585 } 586 587 /* After a successful return of ioctl the device must not process more 588 * virtqueue descriptors. The device can answer to read or writes of config 589 * fields as if it were not suspended. In particular, writing to "queue_enable" 590 * with a value of 1 will not make the device start processing buffers. 591 */ 592 static long vhost_vdpa_suspend(struct vhost_vdpa *v) 593 { 594 struct vdpa_device *vdpa = v->vdpa; 595 const struct vdpa_config_ops *ops = vdpa->config; 596 int ret; 597 598 if (!ops->suspend) 599 return -EOPNOTSUPP; 600 601 ret = ops->suspend(vdpa); 602 if (!ret) 603 v->suspended = true; 604 605 return ret; 606 } 607 608 /* After a successful return of this ioctl the device resumes processing 609 * virtqueue descriptors. The device becomes fully operational the same way it 610 * was before it was suspended. 611 */ 612 static long vhost_vdpa_resume(struct vhost_vdpa *v) 613 { 614 struct vdpa_device *vdpa = v->vdpa; 615 const struct vdpa_config_ops *ops = vdpa->config; 616 int ret; 617 618 if (!ops->resume) 619 return -EOPNOTSUPP; 620 621 ret = ops->resume(vdpa); 622 if (!ret) 623 v->suspended = false; 624 625 return ret; 626 } 627 628 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, 629 void __user *argp) 630 { 631 struct vdpa_device *vdpa = v->vdpa; 632 const struct vdpa_config_ops *ops = vdpa->config; 633 struct vdpa_vq_state vq_state; 634 struct vdpa_callback cb; 635 struct vhost_virtqueue *vq; 636 struct vhost_vring_state s; 637 u32 idx; 638 long r; 639 640 r = get_user(idx, (u32 __user *)argp); 641 if (r < 0) 642 return r; 643 644 if (idx >= v->nvqs) 645 return -ENOBUFS; 646 647 idx = array_index_nospec(idx, v->nvqs); 648 vq = &v->vqs[idx]; 649 650 switch (cmd) { 651 case VHOST_VDPA_SET_VRING_ENABLE: 652 if (copy_from_user(&s, argp, sizeof(s))) 653 return -EFAULT; 654 ops->set_vq_ready(vdpa, idx, s.num); 655 return 0; 656 case VHOST_VDPA_GET_VRING_GROUP: 657 if (!ops->get_vq_group) 658 return -EOPNOTSUPP; 659 s.index = idx; 660 s.num = ops->get_vq_group(vdpa, idx); 661 if (s.num >= vdpa->ngroups) 662 return -EIO; 663 else if (copy_to_user(argp, &s, sizeof(s))) 664 return -EFAULT; 665 return 0; 666 case VHOST_VDPA_GET_VRING_DESC_GROUP: 667 if (!vhost_vdpa_has_desc_group(v)) 668 return -EOPNOTSUPP; 669 s.index = idx; 670 s.num = ops->get_vq_desc_group(vdpa, idx); 671 if (s.num >= vdpa->ngroups) 672 return -EIO; 673 else if (copy_to_user(argp, &s, sizeof(s))) 674 return -EFAULT; 675 return 0; 676 case VHOST_VDPA_SET_GROUP_ASID: 677 if (copy_from_user(&s, argp, sizeof(s))) 678 return -EFAULT; 679 if (s.num >= vdpa->nas) 680 return -EINVAL; 681 if (!ops->set_group_asid) 682 return -EOPNOTSUPP; 683 return ops->set_group_asid(vdpa, idx, s.num); 684 case VHOST_GET_VRING_BASE: 685 r = ops->get_vq_state(v->vdpa, idx, &vq_state); 686 if (r) 687 return r; 688 689 if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { 690 vq->last_avail_idx = vq_state.packed.last_avail_idx | 691 (vq_state.packed.last_avail_counter << 15); 692 vq->last_used_idx = vq_state.packed.last_used_idx | 693 (vq_state.packed.last_used_counter << 15); 694 } else { 695 vq->last_avail_idx = vq_state.split.avail_index; 696 } 697 break; 698 } 699 700 r = vhost_vring_ioctl(&v->vdev, cmd, argp); 701 if (r) 702 return r; 703 704 switch (cmd) { 705 case VHOST_SET_VRING_ADDR: 706 if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) 707 return -EINVAL; 708 709 if (ops->set_vq_address(vdpa, idx, 710 (u64)(uintptr_t)vq->desc, 711 (u64)(uintptr_t)vq->avail, 712 (u64)(uintptr_t)vq->used)) 713 r = -EINVAL; 714 break; 715 716 case VHOST_SET_VRING_BASE: 717 if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) 718 return -EINVAL; 719 720 if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { 721 vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; 722 vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); 723 vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff; 724 vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000); 725 } else { 726 vq_state.split.avail_index = vq->last_avail_idx; 727 } 728 r = ops->set_vq_state(vdpa, idx, &vq_state); 729 break; 730 731 case VHOST_SET_VRING_CALL: 732 if (vq->call_ctx.ctx) { 733 cb.callback = vhost_vdpa_virtqueue_cb; 734 cb.private = vq; 735 cb.trigger = vq->call_ctx.ctx; 736 } else { 737 cb.callback = NULL; 738 cb.private = NULL; 739 cb.trigger = NULL; 740 } 741 ops->set_vq_cb(vdpa, idx, &cb); 742 vhost_vdpa_setup_vq_irq(v, idx); 743 break; 744 745 case VHOST_SET_VRING_NUM: 746 ops->set_vq_num(vdpa, idx, vq->num); 747 break; 748 } 749 750 return r; 751 } 752 753 static long vhost_vdpa_unlocked_ioctl(struct file *filep, 754 unsigned int cmd, unsigned long arg) 755 { 756 struct vhost_vdpa *v = filep->private_data; 757 struct vhost_dev *d = &v->vdev; 758 void __user *argp = (void __user *)arg; 759 u64 __user *featurep = argp; 760 u64 features; 761 long r = 0; 762 763 if (cmd == VHOST_SET_BACKEND_FEATURES) { 764 if (copy_from_user(&features, featurep, sizeof(features))) 765 return -EFAULT; 766 if (features & ~(VHOST_VDPA_BACKEND_FEATURES | 767 BIT_ULL(VHOST_BACKEND_F_DESC_ASID) | 768 BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) | 769 BIT_ULL(VHOST_BACKEND_F_SUSPEND) | 770 BIT_ULL(VHOST_BACKEND_F_RESUME) | 771 BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) 772 return -EOPNOTSUPP; 773 if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && 774 !vhost_vdpa_can_suspend(v)) 775 return -EOPNOTSUPP; 776 if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && 777 !vhost_vdpa_can_resume(v)) 778 return -EOPNOTSUPP; 779 if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && 780 !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) 781 return -EINVAL; 782 if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && 783 !vhost_vdpa_has_desc_group(v)) 784 return -EOPNOTSUPP; 785 if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) && 786 !vhost_vdpa_has_persistent_map(v)) 787 return -EOPNOTSUPP; 788 vhost_set_backend_features(&v->vdev, features); 789 return 0; 790 } 791 792 mutex_lock(&d->mutex); 793 794 switch (cmd) { 795 case VHOST_VDPA_GET_DEVICE_ID: 796 r = vhost_vdpa_get_device_id(v, argp); 797 break; 798 case VHOST_VDPA_GET_STATUS: 799 r = vhost_vdpa_get_status(v, argp); 800 break; 801 case VHOST_VDPA_SET_STATUS: 802 r = vhost_vdpa_set_status(v, argp); 803 break; 804 case VHOST_VDPA_GET_CONFIG: 805 r = vhost_vdpa_get_config(v, argp); 806 break; 807 case VHOST_VDPA_SET_CONFIG: 808 r = vhost_vdpa_set_config(v, argp); 809 break; 810 case VHOST_GET_FEATURES: 811 r = vhost_vdpa_get_features(v, argp); 812 break; 813 case VHOST_SET_FEATURES: 814 r = vhost_vdpa_set_features(v, argp); 815 break; 816 case VHOST_VDPA_GET_VRING_NUM: 817 r = vhost_vdpa_get_vring_num(v, argp); 818 break; 819 case VHOST_VDPA_GET_GROUP_NUM: 820 if (copy_to_user(argp, &v->vdpa->ngroups, 821 sizeof(v->vdpa->ngroups))) 822 r = -EFAULT; 823 break; 824 case VHOST_VDPA_GET_AS_NUM: 825 if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas))) 826 r = -EFAULT; 827 break; 828 case VHOST_SET_LOG_BASE: 829 case VHOST_SET_LOG_FD: 830 r = -ENOIOCTLCMD; 831 break; 832 case VHOST_VDPA_SET_CONFIG_CALL: 833 r = vhost_vdpa_set_config_call(v, argp); 834 break; 835 case VHOST_GET_BACKEND_FEATURES: 836 features = VHOST_VDPA_BACKEND_FEATURES; 837 if (vhost_vdpa_can_suspend(v)) 838 features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); 839 if (vhost_vdpa_can_resume(v)) 840 features |= BIT_ULL(VHOST_BACKEND_F_RESUME); 841 if (vhost_vdpa_has_desc_group(v)) 842 features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID); 843 if (vhost_vdpa_has_persistent_map(v)) 844 features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); 845 features |= vhost_vdpa_get_backend_features(v); 846 if (copy_to_user(featurep, &features, sizeof(features))) 847 r = -EFAULT; 848 break; 849 case VHOST_VDPA_GET_IOVA_RANGE: 850 r = vhost_vdpa_get_iova_range(v, argp); 851 break; 852 case VHOST_VDPA_GET_CONFIG_SIZE: 853 r = vhost_vdpa_get_config_size(v, argp); 854 break; 855 case VHOST_VDPA_GET_VQS_COUNT: 856 r = vhost_vdpa_get_vqs_count(v, argp); 857 break; 858 case VHOST_VDPA_SUSPEND: 859 r = vhost_vdpa_suspend(v); 860 break; 861 case VHOST_VDPA_RESUME: 862 r = vhost_vdpa_resume(v); 863 break; 864 default: 865 r = vhost_dev_ioctl(&v->vdev, cmd, argp); 866 if (r == -ENOIOCTLCMD) 867 r = vhost_vdpa_vring_ioctl(v, cmd, argp); 868 break; 869 } 870 871 if (r) 872 goto out; 873 874 switch (cmd) { 875 case VHOST_SET_OWNER: 876 r = vhost_vdpa_bind_mm(v); 877 if (r) 878 vhost_dev_reset_owner(d, NULL); 879 break; 880 } 881 out: 882 mutex_unlock(&d->mutex); 883 return r; 884 } 885 static void vhost_vdpa_general_unmap(struct vhost_vdpa *v, 886 struct vhost_iotlb_map *map, u32 asid) 887 { 888 struct vdpa_device *vdpa = v->vdpa; 889 const struct vdpa_config_ops *ops = vdpa->config; 890 if (ops->dma_map) { 891 ops->dma_unmap(vdpa, asid, map->start, map->size); 892 } else if (ops->set_map == NULL) { 893 iommu_unmap(v->domain, map->start, map->size); 894 } 895 } 896 897 static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 898 u64 start, u64 last, u32 asid) 899 { 900 struct vhost_dev *dev = &v->vdev; 901 struct vhost_iotlb_map *map; 902 struct page *page; 903 unsigned long pfn, pinned; 904 905 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 906 pinned = PFN_DOWN(map->size); 907 for (pfn = PFN_DOWN(map->addr); 908 pinned > 0; pfn++, pinned--) { 909 page = pfn_to_page(pfn); 910 if (map->perm & VHOST_ACCESS_WO) 911 set_page_dirty_lock(page); 912 unpin_user_page(page); 913 } 914 atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); 915 vhost_vdpa_general_unmap(v, map, asid); 916 vhost_iotlb_map_free(iotlb, map); 917 } 918 } 919 920 static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 921 u64 start, u64 last, u32 asid) 922 { 923 struct vhost_iotlb_map *map; 924 struct vdpa_map_file *map_file; 925 926 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 927 map_file = (struct vdpa_map_file *)map->opaque; 928 fput(map_file->file); 929 kfree(map_file); 930 vhost_vdpa_general_unmap(v, map, asid); 931 vhost_iotlb_map_free(iotlb, map); 932 } 933 } 934 935 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, 936 struct vhost_iotlb *iotlb, u64 start, 937 u64 last, u32 asid) 938 { 939 struct vdpa_device *vdpa = v->vdpa; 940 941 if (vdpa->use_va) 942 return vhost_vdpa_va_unmap(v, iotlb, start, last, asid); 943 944 return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid); 945 } 946 947 static int perm_to_iommu_flags(u32 perm) 948 { 949 int flags = 0; 950 951 switch (perm) { 952 case VHOST_ACCESS_WO: 953 flags |= IOMMU_WRITE; 954 break; 955 case VHOST_ACCESS_RO: 956 flags |= IOMMU_READ; 957 break; 958 case VHOST_ACCESS_RW: 959 flags |= (IOMMU_WRITE | IOMMU_READ); 960 break; 961 default: 962 WARN(1, "invalidate vhost IOTLB permission\n"); 963 break; 964 } 965 966 return flags | IOMMU_CACHE; 967 } 968 969 static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 970 u64 iova, u64 size, u64 pa, u32 perm, void *opaque) 971 { 972 struct vhost_dev *dev = &v->vdev; 973 struct vdpa_device *vdpa = v->vdpa; 974 const struct vdpa_config_ops *ops = vdpa->config; 975 u32 asid = iotlb_to_asid(iotlb); 976 int r = 0; 977 978 r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, 979 pa, perm, opaque); 980 if (r) 981 return r; 982 983 if (ops->dma_map) { 984 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque); 985 } else if (ops->set_map) { 986 if (!v->in_batch) 987 r = ops->set_map(vdpa, asid, iotlb); 988 } else { 989 r = iommu_map(v->domain, iova, pa, size, 990 perm_to_iommu_flags(perm), 991 GFP_KERNEL_ACCOUNT); 992 } 993 if (r) { 994 vhost_iotlb_del_range(iotlb, iova, iova + size - 1); 995 return r; 996 } 997 998 if (!vdpa->use_va) 999 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); 1000 1001 return 0; 1002 } 1003 1004 static void vhost_vdpa_unmap(struct vhost_vdpa *v, 1005 struct vhost_iotlb *iotlb, 1006 u64 iova, u64 size) 1007 { 1008 struct vdpa_device *vdpa = v->vdpa; 1009 const struct vdpa_config_ops *ops = vdpa->config; 1010 u32 asid = iotlb_to_asid(iotlb); 1011 1012 vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid); 1013 1014 if (ops->set_map) { 1015 if (!v->in_batch) 1016 ops->set_map(vdpa, asid, iotlb); 1017 } 1018 1019 } 1020 1021 static int vhost_vdpa_va_map(struct vhost_vdpa *v, 1022 struct vhost_iotlb *iotlb, 1023 u64 iova, u64 size, u64 uaddr, u32 perm) 1024 { 1025 struct vhost_dev *dev = &v->vdev; 1026 u64 offset, map_size, map_iova = iova; 1027 struct vdpa_map_file *map_file; 1028 struct vm_area_struct *vma; 1029 int ret = 0; 1030 1031 mmap_read_lock(dev->mm); 1032 1033 while (size) { 1034 vma = find_vma(dev->mm, uaddr); 1035 if (!vma) { 1036 ret = -EINVAL; 1037 break; 1038 } 1039 map_size = min(size, vma->vm_end - uaddr); 1040 if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && 1041 !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) 1042 goto next; 1043 1044 map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); 1045 if (!map_file) { 1046 ret = -ENOMEM; 1047 break; 1048 } 1049 offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; 1050 map_file->offset = offset; 1051 map_file->file = get_file(vma->vm_file); 1052 ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr, 1053 perm, map_file); 1054 if (ret) { 1055 fput(map_file->file); 1056 kfree(map_file); 1057 break; 1058 } 1059 next: 1060 size -= map_size; 1061 uaddr += map_size; 1062 map_iova += map_size; 1063 } 1064 if (ret) 1065 vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova); 1066 1067 mmap_read_unlock(dev->mm); 1068 1069 return ret; 1070 } 1071 1072 static int vhost_vdpa_pa_map(struct vhost_vdpa *v, 1073 struct vhost_iotlb *iotlb, 1074 u64 iova, u64 size, u64 uaddr, u32 perm) 1075 { 1076 struct vhost_dev *dev = &v->vdev; 1077 struct page **page_list; 1078 unsigned long list_size = PAGE_SIZE / sizeof(struct page *); 1079 unsigned int gup_flags = FOLL_LONGTERM; 1080 unsigned long npages, cur_base, map_pfn, last_pfn = 0; 1081 unsigned long lock_limit, sz2pin, nchunks, i; 1082 u64 start = iova; 1083 long pinned; 1084 int ret = 0; 1085 1086 /* Limit the use of memory for bookkeeping */ 1087 page_list = (struct page **) __get_free_page(GFP_KERNEL); 1088 if (!page_list) 1089 return -ENOMEM; 1090 1091 if (perm & VHOST_ACCESS_WO) 1092 gup_flags |= FOLL_WRITE; 1093 1094 npages = PFN_UP(size + (iova & ~PAGE_MASK)); 1095 if (!npages) { 1096 ret = -EINVAL; 1097 goto free; 1098 } 1099 1100 mmap_read_lock(dev->mm); 1101 1102 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 1103 if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { 1104 ret = -ENOMEM; 1105 goto unlock; 1106 } 1107 1108 cur_base = uaddr & PAGE_MASK; 1109 iova &= PAGE_MASK; 1110 nchunks = 0; 1111 1112 while (npages) { 1113 sz2pin = min_t(unsigned long, npages, list_size); 1114 pinned = pin_user_pages(cur_base, sz2pin, 1115 gup_flags, page_list); 1116 if (sz2pin != pinned) { 1117 if (pinned < 0) { 1118 ret = pinned; 1119 } else { 1120 unpin_user_pages(page_list, pinned); 1121 ret = -ENOMEM; 1122 } 1123 goto out; 1124 } 1125 nchunks++; 1126 1127 if (!last_pfn) 1128 map_pfn = page_to_pfn(page_list[0]); 1129 1130 for (i = 0; i < pinned; i++) { 1131 unsigned long this_pfn = page_to_pfn(page_list[i]); 1132 u64 csize; 1133 1134 if (last_pfn && (this_pfn != last_pfn + 1)) { 1135 /* Pin a contiguous chunk of memory */ 1136 csize = PFN_PHYS(last_pfn - map_pfn + 1); 1137 ret = vhost_vdpa_map(v, iotlb, iova, csize, 1138 PFN_PHYS(map_pfn), 1139 perm, NULL); 1140 if (ret) { 1141 /* 1142 * Unpin the pages that are left unmapped 1143 * from this point on in the current 1144 * page_list. The remaining outstanding 1145 * ones which may stride across several 1146 * chunks will be covered in the common 1147 * error path subsequently. 1148 */ 1149 unpin_user_pages(&page_list[i], 1150 pinned - i); 1151 goto out; 1152 } 1153 1154 map_pfn = this_pfn; 1155 iova += csize; 1156 nchunks = 0; 1157 } 1158 1159 last_pfn = this_pfn; 1160 } 1161 1162 cur_base += PFN_PHYS(pinned); 1163 npages -= pinned; 1164 } 1165 1166 /* Pin the rest chunk */ 1167 ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1), 1168 PFN_PHYS(map_pfn), perm, NULL); 1169 out: 1170 if (ret) { 1171 if (nchunks) { 1172 unsigned long pfn; 1173 1174 /* 1175 * Unpin the outstanding pages which are yet to be 1176 * mapped but haven't due to vdpa_map() or 1177 * pin_user_pages() failure. 1178 * 1179 * Mapped pages are accounted in vdpa_map(), hence 1180 * the corresponding unpinning will be handled by 1181 * vdpa_unmap(). 1182 */ 1183 WARN_ON(!last_pfn); 1184 for (pfn = map_pfn; pfn <= last_pfn; pfn++) 1185 unpin_user_page(pfn_to_page(pfn)); 1186 } 1187 vhost_vdpa_unmap(v, iotlb, start, size); 1188 } 1189 unlock: 1190 mmap_read_unlock(dev->mm); 1191 free: 1192 free_page((unsigned long)page_list); 1193 return ret; 1194 1195 } 1196 1197 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, 1198 struct vhost_iotlb *iotlb, 1199 struct vhost_iotlb_msg *msg) 1200 { 1201 struct vdpa_device *vdpa = v->vdpa; 1202 1203 if (msg->iova < v->range.first || !msg->size || 1204 msg->iova > U64_MAX - msg->size + 1 || 1205 msg->iova + msg->size - 1 > v->range.last) 1206 return -EINVAL; 1207 1208 if (vhost_iotlb_itree_first(iotlb, msg->iova, 1209 msg->iova + msg->size - 1)) 1210 return -EEXIST; 1211 1212 if (vdpa->use_va) 1213 return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size, 1214 msg->uaddr, msg->perm); 1215 1216 return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr, 1217 msg->perm); 1218 } 1219 1220 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, 1221 struct vhost_iotlb_msg *msg) 1222 { 1223 struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); 1224 struct vdpa_device *vdpa = v->vdpa; 1225 const struct vdpa_config_ops *ops = vdpa->config; 1226 struct vhost_iotlb *iotlb = NULL; 1227 struct vhost_vdpa_as *as = NULL; 1228 int r = 0; 1229 1230 mutex_lock(&dev->mutex); 1231 1232 r = vhost_dev_check_owner(dev); 1233 if (r) 1234 goto unlock; 1235 1236 if (msg->type == VHOST_IOTLB_UPDATE || 1237 msg->type == VHOST_IOTLB_BATCH_BEGIN) { 1238 as = vhost_vdpa_find_alloc_as(v, asid); 1239 if (!as) { 1240 dev_err(&v->dev, "can't find and alloc asid %d\n", 1241 asid); 1242 r = -EINVAL; 1243 goto unlock; 1244 } 1245 iotlb = &as->iotlb; 1246 } else 1247 iotlb = asid_to_iotlb(v, asid); 1248 1249 if ((v->in_batch && v->batch_asid != asid) || !iotlb) { 1250 if (v->in_batch && v->batch_asid != asid) { 1251 dev_info(&v->dev, "batch id %d asid %d\n", 1252 v->batch_asid, asid); 1253 } 1254 if (!iotlb) 1255 dev_err(&v->dev, "no iotlb for asid %d\n", asid); 1256 r = -EINVAL; 1257 goto unlock; 1258 } 1259 1260 switch (msg->type) { 1261 case VHOST_IOTLB_UPDATE: 1262 r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); 1263 break; 1264 case VHOST_IOTLB_INVALIDATE: 1265 vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size); 1266 break; 1267 case VHOST_IOTLB_BATCH_BEGIN: 1268 v->batch_asid = asid; 1269 v->in_batch = true; 1270 break; 1271 case VHOST_IOTLB_BATCH_END: 1272 if (v->in_batch && ops->set_map) 1273 ops->set_map(vdpa, asid, iotlb); 1274 v->in_batch = false; 1275 break; 1276 default: 1277 r = -EINVAL; 1278 break; 1279 } 1280 unlock: 1281 mutex_unlock(&dev->mutex); 1282 1283 return r; 1284 } 1285 1286 static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, 1287 struct iov_iter *from) 1288 { 1289 struct file *file = iocb->ki_filp; 1290 struct vhost_vdpa *v = file->private_data; 1291 struct vhost_dev *dev = &v->vdev; 1292 1293 return vhost_chr_write_iter(dev, from); 1294 } 1295 1296 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) 1297 { 1298 struct vdpa_device *vdpa = v->vdpa; 1299 const struct vdpa_config_ops *ops = vdpa->config; 1300 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 1301 const struct bus_type *bus; 1302 int ret; 1303 1304 /* Device want to do DMA by itself */ 1305 if (ops->set_map || ops->dma_map) 1306 return 0; 1307 1308 bus = dma_dev->bus; 1309 if (!bus) 1310 return -EFAULT; 1311 1312 if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) { 1313 dev_warn_once(&v->dev, 1314 "Failed to allocate domain, device is not IOMMU cache coherent capable\n"); 1315 return -ENOTSUPP; 1316 } 1317 1318 v->domain = iommu_domain_alloc(bus); 1319 if (!v->domain) 1320 return -EIO; 1321 1322 ret = iommu_attach_device(v->domain, dma_dev); 1323 if (ret) 1324 goto err_attach; 1325 1326 return 0; 1327 1328 err_attach: 1329 iommu_domain_free(v->domain); 1330 v->domain = NULL; 1331 return ret; 1332 } 1333 1334 static void vhost_vdpa_free_domain(struct vhost_vdpa *v) 1335 { 1336 struct vdpa_device *vdpa = v->vdpa; 1337 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 1338 1339 if (v->domain) { 1340 iommu_detach_device(v->domain, dma_dev); 1341 iommu_domain_free(v->domain); 1342 } 1343 1344 v->domain = NULL; 1345 } 1346 1347 static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) 1348 { 1349 struct vdpa_iova_range *range = &v->range; 1350 struct vdpa_device *vdpa = v->vdpa; 1351 const struct vdpa_config_ops *ops = vdpa->config; 1352 1353 if (ops->get_iova_range) { 1354 *range = ops->get_iova_range(vdpa); 1355 } else if (v->domain && v->domain->geometry.force_aperture) { 1356 range->first = v->domain->geometry.aperture_start; 1357 range->last = v->domain->geometry.aperture_end; 1358 } else { 1359 range->first = 0; 1360 range->last = ULLONG_MAX; 1361 } 1362 } 1363 1364 static void vhost_vdpa_cleanup(struct vhost_vdpa *v) 1365 { 1366 struct vhost_vdpa_as *as; 1367 u32 asid; 1368 1369 for (asid = 0; asid < v->vdpa->nas; asid++) { 1370 as = asid_to_as(v, asid); 1371 if (as) 1372 vhost_vdpa_remove_as(v, asid); 1373 } 1374 1375 vhost_vdpa_free_domain(v); 1376 vhost_dev_cleanup(&v->vdev); 1377 kfree(v->vdev.vqs); 1378 v->vdev.vqs = NULL; 1379 } 1380 1381 static int vhost_vdpa_open(struct inode *inode, struct file *filep) 1382 { 1383 struct vhost_vdpa *v; 1384 struct vhost_dev *dev; 1385 struct vhost_virtqueue **vqs; 1386 int r, opened; 1387 u32 i, nvqs; 1388 1389 v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); 1390 1391 opened = atomic_cmpxchg(&v->opened, 0, 1); 1392 if (opened) 1393 return -EBUSY; 1394 1395 nvqs = v->nvqs; 1396 r = vhost_vdpa_reset(v); 1397 if (r) 1398 goto err; 1399 1400 vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); 1401 if (!vqs) { 1402 r = -ENOMEM; 1403 goto err; 1404 } 1405 1406 dev = &v->vdev; 1407 for (i = 0; i < nvqs; i++) { 1408 vqs[i] = &v->vqs[i]; 1409 vqs[i]->handle_kick = handle_vq_kick; 1410 } 1411 vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, 1412 vhost_vdpa_process_iotlb_msg); 1413 1414 r = vhost_vdpa_alloc_domain(v); 1415 if (r) 1416 goto err_alloc_domain; 1417 1418 vhost_vdpa_set_iova_range(v); 1419 1420 filep->private_data = v; 1421 1422 return 0; 1423 1424 err_alloc_domain: 1425 vhost_vdpa_cleanup(v); 1426 err: 1427 atomic_dec(&v->opened); 1428 return r; 1429 } 1430 1431 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) 1432 { 1433 u32 i; 1434 1435 for (i = 0; i < v->nvqs; i++) 1436 vhost_vdpa_unsetup_vq_irq(v, i); 1437 } 1438 1439 static int vhost_vdpa_release(struct inode *inode, struct file *filep) 1440 { 1441 struct vhost_vdpa *v = filep->private_data; 1442 struct vhost_dev *d = &v->vdev; 1443 1444 mutex_lock(&d->mutex); 1445 filep->private_data = NULL; 1446 vhost_vdpa_clean_irq(v); 1447 vhost_vdpa_reset(v); 1448 vhost_dev_stop(&v->vdev); 1449 vhost_vdpa_unbind_mm(v); 1450 vhost_vdpa_config_put(v); 1451 vhost_vdpa_cleanup(v); 1452 mutex_unlock(&d->mutex); 1453 1454 atomic_dec(&v->opened); 1455 complete(&v->completion); 1456 1457 return 0; 1458 } 1459 1460 #ifdef CONFIG_MMU 1461 static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) 1462 { 1463 struct vhost_vdpa *v = vmf->vma->vm_file->private_data; 1464 struct vdpa_device *vdpa = v->vdpa; 1465 const struct vdpa_config_ops *ops = vdpa->config; 1466 struct vdpa_notification_area notify; 1467 struct vm_area_struct *vma = vmf->vma; 1468 u16 index = vma->vm_pgoff; 1469 1470 notify = ops->get_vq_notification(vdpa, index); 1471 1472 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1473 if (remap_pfn_range(vma, vmf->address & PAGE_MASK, 1474 PFN_DOWN(notify.addr), PAGE_SIZE, 1475 vma->vm_page_prot)) 1476 return VM_FAULT_SIGBUS; 1477 1478 return VM_FAULT_NOPAGE; 1479 } 1480 1481 static const struct vm_operations_struct vhost_vdpa_vm_ops = { 1482 .fault = vhost_vdpa_fault, 1483 }; 1484 1485 static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) 1486 { 1487 struct vhost_vdpa *v = vma->vm_file->private_data; 1488 struct vdpa_device *vdpa = v->vdpa; 1489 const struct vdpa_config_ops *ops = vdpa->config; 1490 struct vdpa_notification_area notify; 1491 unsigned long index = vma->vm_pgoff; 1492 1493 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1494 return -EINVAL; 1495 if ((vma->vm_flags & VM_SHARED) == 0) 1496 return -EINVAL; 1497 if (vma->vm_flags & VM_READ) 1498 return -EINVAL; 1499 if (index > 65535) 1500 return -EINVAL; 1501 if (!ops->get_vq_notification) 1502 return -ENOTSUPP; 1503 1504 /* To be safe and easily modelled by userspace, We only 1505 * support the doorbell which sits on the page boundary and 1506 * does not share the page with other registers. 1507 */ 1508 notify = ops->get_vq_notification(vdpa, index); 1509 if (notify.addr & (PAGE_SIZE - 1)) 1510 return -EINVAL; 1511 if (vma->vm_end - vma->vm_start != notify.size) 1512 return -ENOTSUPP; 1513 1514 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); 1515 vma->vm_ops = &vhost_vdpa_vm_ops; 1516 return 0; 1517 } 1518 #endif /* CONFIG_MMU */ 1519 1520 static const struct file_operations vhost_vdpa_fops = { 1521 .owner = THIS_MODULE, 1522 .open = vhost_vdpa_open, 1523 .release = vhost_vdpa_release, 1524 .write_iter = vhost_vdpa_chr_write_iter, 1525 .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, 1526 #ifdef CONFIG_MMU 1527 .mmap = vhost_vdpa_mmap, 1528 #endif /* CONFIG_MMU */ 1529 .compat_ioctl = compat_ptr_ioctl, 1530 }; 1531 1532 static void vhost_vdpa_release_dev(struct device *device) 1533 { 1534 struct vhost_vdpa *v = 1535 container_of(device, struct vhost_vdpa, dev); 1536 1537 ida_simple_remove(&vhost_vdpa_ida, v->minor); 1538 kfree(v->vqs); 1539 kfree(v); 1540 } 1541 1542 static int vhost_vdpa_probe(struct vdpa_device *vdpa) 1543 { 1544 const struct vdpa_config_ops *ops = vdpa->config; 1545 struct vhost_vdpa *v; 1546 int minor; 1547 int i, r; 1548 1549 /* We can't support platform IOMMU device with more than 1 1550 * group or as 1551 */ 1552 if (!ops->set_map && !ops->dma_map && 1553 (vdpa->ngroups > 1 || vdpa->nas > 1)) 1554 return -EOPNOTSUPP; 1555 1556 v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 1557 if (!v) 1558 return -ENOMEM; 1559 1560 minor = ida_simple_get(&vhost_vdpa_ida, 0, 1561 VHOST_VDPA_DEV_MAX, GFP_KERNEL); 1562 if (minor < 0) { 1563 kfree(v); 1564 return minor; 1565 } 1566 1567 atomic_set(&v->opened, 0); 1568 v->minor = minor; 1569 v->vdpa = vdpa; 1570 v->nvqs = vdpa->nvqs; 1571 v->virtio_id = ops->get_device_id(vdpa); 1572 1573 device_initialize(&v->dev); 1574 v->dev.release = vhost_vdpa_release_dev; 1575 v->dev.parent = &vdpa->dev; 1576 v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); 1577 v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), 1578 GFP_KERNEL); 1579 if (!v->vqs) { 1580 r = -ENOMEM; 1581 goto err; 1582 } 1583 1584 r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); 1585 if (r) 1586 goto err; 1587 1588 cdev_init(&v->cdev, &vhost_vdpa_fops); 1589 v->cdev.owner = THIS_MODULE; 1590 1591 r = cdev_device_add(&v->cdev, &v->dev); 1592 if (r) 1593 goto err; 1594 1595 init_completion(&v->completion); 1596 vdpa_set_drvdata(vdpa, v); 1597 1598 for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++) 1599 INIT_HLIST_HEAD(&v->as[i]); 1600 1601 return 0; 1602 1603 err: 1604 put_device(&v->dev); 1605 return r; 1606 } 1607 1608 static void vhost_vdpa_remove(struct vdpa_device *vdpa) 1609 { 1610 struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); 1611 int opened; 1612 1613 cdev_device_del(&v->cdev, &v->dev); 1614 1615 do { 1616 opened = atomic_cmpxchg(&v->opened, 0, 1); 1617 if (!opened) 1618 break; 1619 wait_for_completion(&v->completion); 1620 } while (1); 1621 1622 put_device(&v->dev); 1623 } 1624 1625 static struct vdpa_driver vhost_vdpa_driver = { 1626 .driver = { 1627 .name = "vhost_vdpa", 1628 }, 1629 .probe = vhost_vdpa_probe, 1630 .remove = vhost_vdpa_remove, 1631 }; 1632 1633 static int __init vhost_vdpa_init(void) 1634 { 1635 int r; 1636 1637 r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, 1638 "vhost-vdpa"); 1639 if (r) 1640 goto err_alloc_chrdev; 1641 1642 r = vdpa_register_driver(&vhost_vdpa_driver); 1643 if (r) 1644 goto err_vdpa_register_driver; 1645 1646 return 0; 1647 1648 err_vdpa_register_driver: 1649 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1650 err_alloc_chrdev: 1651 return r; 1652 } 1653 module_init(vhost_vdpa_init); 1654 1655 static void __exit vhost_vdpa_exit(void) 1656 { 1657 vdpa_unregister_driver(&vhost_vdpa_driver); 1658 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1659 } 1660 module_exit(vhost_vdpa_exit); 1661 1662 MODULE_VERSION("0.0.1"); 1663 MODULE_LICENSE("GPL v2"); 1664 MODULE_AUTHOR("Intel Corporation"); 1665 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); 1666