1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018-2020 Intel Corporation. 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Author: Tiwei Bie <tiwei.bie@intel.com> 7 * Jason Wang <jasowang@redhat.com> 8 * 9 * Thanks Michael S. Tsirkin for the valuable comments and 10 * suggestions. And thanks to Cunming Liang and Zhihong Wang for all 11 * their supports. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/cdev.h> 17 #include <linux/device.h> 18 #include <linux/mm.h> 19 #include <linux/slab.h> 20 #include <linux/iommu.h> 21 #include <linux/uuid.h> 22 #include <linux/vdpa.h> 23 #include <linux/nospec.h> 24 #include <linux/vhost.h> 25 26 #include "vhost.h" 27 28 enum { 29 VHOST_VDPA_BACKEND_FEATURES = 30 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | 31 (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) | 32 (1ULL << VHOST_BACKEND_F_IOTLB_ASID), 33 }; 34 35 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) 36 37 #define VHOST_VDPA_IOTLB_BUCKETS 16 38 39 struct vhost_vdpa_as { 40 struct hlist_node hash_link; 41 struct vhost_iotlb iotlb; 42 u32 id; 43 }; 44 45 struct vhost_vdpa { 46 struct vhost_dev vdev; 47 struct iommu_domain *domain; 48 struct vhost_virtqueue *vqs; 49 struct completion completion; 50 struct vdpa_device *vdpa; 51 struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; 52 struct device dev; 53 struct cdev cdev; 54 atomic_t opened; 55 u32 nvqs; 56 int virtio_id; 57 int minor; 58 struct eventfd_ctx *config_ctx; 59 int in_batch; 60 struct vdpa_iova_range range; 61 u32 batch_asid; 62 }; 63 64 static DEFINE_IDA(vhost_vdpa_ida); 65 66 static dev_t vhost_vdpa_major; 67 68 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, 69 struct vhost_iotlb *iotlb, u64 start, 70 u64 last, u32 asid); 71 72 static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) 73 { 74 struct vhost_vdpa_as *as = container_of(iotlb, struct 75 vhost_vdpa_as, iotlb); 76 return as->id; 77 } 78 79 static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) 80 { 81 struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; 82 struct vhost_vdpa_as *as; 83 84 hlist_for_each_entry(as, head, hash_link) 85 if (as->id == asid) 86 return as; 87 88 return NULL; 89 } 90 91 static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid) 92 { 93 struct vhost_vdpa_as *as = asid_to_as(v, asid); 94 95 if (!as) 96 return NULL; 97 98 return &as->iotlb; 99 } 100 101 static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) 102 { 103 struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; 104 struct vhost_vdpa_as *as; 105 106 if (asid_to_as(v, asid)) 107 return NULL; 108 109 if (asid >= v->vdpa->nas) 110 return NULL; 111 112 as = kmalloc(sizeof(*as), GFP_KERNEL); 113 if (!as) 114 return NULL; 115 116 vhost_iotlb_init(&as->iotlb, 0, 0); 117 as->id = asid; 118 hlist_add_head(&as->hash_link, head); 119 120 return as; 121 } 122 123 static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, 124 u32 asid) 125 { 126 struct vhost_vdpa_as *as = asid_to_as(v, asid); 127 128 if (as) 129 return as; 130 131 return vhost_vdpa_alloc_as(v, asid); 132 } 133 134 static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) 135 { 136 struct vhost_vdpa_as *as = asid_to_as(v, asid); 137 138 if (!as) 139 return -EINVAL; 140 141 hlist_del(&as->hash_link); 142 vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); 143 kfree(as); 144 145 return 0; 146 } 147 148 static void handle_vq_kick(struct vhost_work *work) 149 { 150 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 151 poll.work); 152 struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); 153 const struct vdpa_config_ops *ops = v->vdpa->config; 154 155 ops->kick_vq(v->vdpa, vq - v->vqs); 156 } 157 158 static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) 159 { 160 struct vhost_virtqueue *vq = private; 161 struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; 162 163 if (call_ctx) 164 eventfd_signal(call_ctx, 1); 165 166 return IRQ_HANDLED; 167 } 168 169 static irqreturn_t vhost_vdpa_config_cb(void *private) 170 { 171 struct vhost_vdpa *v = private; 172 struct eventfd_ctx *config_ctx = v->config_ctx; 173 174 if (config_ctx) 175 eventfd_signal(config_ctx, 1); 176 177 return IRQ_HANDLED; 178 } 179 180 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) 181 { 182 struct vhost_virtqueue *vq = &v->vqs[qid]; 183 const struct vdpa_config_ops *ops = v->vdpa->config; 184 struct vdpa_device *vdpa = v->vdpa; 185 int ret, irq; 186 187 if (!ops->get_vq_irq) 188 return; 189 190 irq = ops->get_vq_irq(vdpa, qid); 191 if (irq < 0) 192 return; 193 194 irq_bypass_unregister_producer(&vq->call_ctx.producer); 195 if (!vq->call_ctx.ctx) 196 return; 197 198 vq->call_ctx.producer.token = vq->call_ctx.ctx; 199 vq->call_ctx.producer.irq = irq; 200 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 201 if (unlikely(ret)) 202 dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n", 203 qid, vq->call_ctx.producer.token, ret); 204 } 205 206 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) 207 { 208 struct vhost_virtqueue *vq = &v->vqs[qid]; 209 210 irq_bypass_unregister_producer(&vq->call_ctx.producer); 211 } 212 213 static int vhost_vdpa_reset(struct vhost_vdpa *v) 214 { 215 struct vdpa_device *vdpa = v->vdpa; 216 217 v->in_batch = 0; 218 219 return vdpa_reset(vdpa); 220 } 221 222 static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) 223 { 224 struct vdpa_device *vdpa = v->vdpa; 225 const struct vdpa_config_ops *ops = vdpa->config; 226 227 if (!vdpa->use_va || !ops->bind_mm) 228 return 0; 229 230 return ops->bind_mm(vdpa, v->vdev.mm); 231 } 232 233 static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v) 234 { 235 struct vdpa_device *vdpa = v->vdpa; 236 const struct vdpa_config_ops *ops = vdpa->config; 237 238 if (!vdpa->use_va || !ops->unbind_mm) 239 return; 240 241 ops->unbind_mm(vdpa); 242 } 243 244 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) 245 { 246 struct vdpa_device *vdpa = v->vdpa; 247 const struct vdpa_config_ops *ops = vdpa->config; 248 u32 device_id; 249 250 device_id = ops->get_device_id(vdpa); 251 252 if (copy_to_user(argp, &device_id, sizeof(device_id))) 253 return -EFAULT; 254 255 return 0; 256 } 257 258 static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) 259 { 260 struct vdpa_device *vdpa = v->vdpa; 261 const struct vdpa_config_ops *ops = vdpa->config; 262 u8 status; 263 264 status = ops->get_status(vdpa); 265 266 if (copy_to_user(statusp, &status, sizeof(status))) 267 return -EFAULT; 268 269 return 0; 270 } 271 272 static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) 273 { 274 struct vdpa_device *vdpa = v->vdpa; 275 const struct vdpa_config_ops *ops = vdpa->config; 276 u8 status, status_old; 277 u32 nvqs = v->nvqs; 278 int ret; 279 u16 i; 280 281 if (copy_from_user(&status, statusp, sizeof(status))) 282 return -EFAULT; 283 284 status_old = ops->get_status(vdpa); 285 286 /* 287 * Userspace shouldn't remove status bits unless reset the 288 * status to 0. 289 */ 290 if (status != 0 && (status_old & ~status) != 0) 291 return -EINVAL; 292 293 if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) 294 for (i = 0; i < nvqs; i++) 295 vhost_vdpa_unsetup_vq_irq(v, i); 296 297 if (status == 0) { 298 ret = vdpa_reset(vdpa); 299 if (ret) 300 return ret; 301 } else 302 vdpa_set_status(vdpa, status); 303 304 if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) 305 for (i = 0; i < nvqs; i++) 306 vhost_vdpa_setup_vq_irq(v, i); 307 308 return 0; 309 } 310 311 static int vhost_vdpa_config_validate(struct vhost_vdpa *v, 312 struct vhost_vdpa_config *c) 313 { 314 struct vdpa_device *vdpa = v->vdpa; 315 size_t size = vdpa->config->get_config_size(vdpa); 316 317 if (c->len == 0 || c->off > size) 318 return -EINVAL; 319 320 if (c->len > size - c->off) 321 return -E2BIG; 322 323 return 0; 324 } 325 326 static long vhost_vdpa_get_config(struct vhost_vdpa *v, 327 struct vhost_vdpa_config __user *c) 328 { 329 struct vdpa_device *vdpa = v->vdpa; 330 struct vhost_vdpa_config config; 331 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 332 u8 *buf; 333 334 if (copy_from_user(&config, c, size)) 335 return -EFAULT; 336 if (vhost_vdpa_config_validate(v, &config)) 337 return -EINVAL; 338 buf = kvzalloc(config.len, GFP_KERNEL); 339 if (!buf) 340 return -ENOMEM; 341 342 vdpa_get_config(vdpa, config.off, buf, config.len); 343 344 if (copy_to_user(c->buf, buf, config.len)) { 345 kvfree(buf); 346 return -EFAULT; 347 } 348 349 kvfree(buf); 350 return 0; 351 } 352 353 static long vhost_vdpa_set_config(struct vhost_vdpa *v, 354 struct vhost_vdpa_config __user *c) 355 { 356 struct vdpa_device *vdpa = v->vdpa; 357 struct vhost_vdpa_config config; 358 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 359 u8 *buf; 360 361 if (copy_from_user(&config, c, size)) 362 return -EFAULT; 363 if (vhost_vdpa_config_validate(v, &config)) 364 return -EINVAL; 365 366 buf = vmemdup_user(c->buf, config.len); 367 if (IS_ERR(buf)) 368 return PTR_ERR(buf); 369 370 vdpa_set_config(vdpa, config.off, buf, config.len); 371 372 kvfree(buf); 373 return 0; 374 } 375 376 static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) 377 { 378 struct vdpa_device *vdpa = v->vdpa; 379 const struct vdpa_config_ops *ops = vdpa->config; 380 381 return ops->suspend; 382 } 383 384 static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) 385 { 386 struct vdpa_device *vdpa = v->vdpa; 387 const struct vdpa_config_ops *ops = vdpa->config; 388 389 return ops->resume; 390 } 391 392 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) 393 { 394 struct vdpa_device *vdpa = v->vdpa; 395 const struct vdpa_config_ops *ops = vdpa->config; 396 u64 features; 397 398 features = ops->get_device_features(vdpa); 399 400 if (copy_to_user(featurep, &features, sizeof(features))) 401 return -EFAULT; 402 403 return 0; 404 } 405 406 static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) 407 { 408 struct vdpa_device *vdpa = v->vdpa; 409 const struct vdpa_config_ops *ops = vdpa->config; 410 u64 features; 411 412 /* 413 * It's not allowed to change the features after they have 414 * been negotiated. 415 */ 416 if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) 417 return -EBUSY; 418 419 if (copy_from_user(&features, featurep, sizeof(features))) 420 return -EFAULT; 421 422 if (vdpa_set_features(vdpa, features)) 423 return -EINVAL; 424 425 return 0; 426 } 427 428 static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) 429 { 430 struct vdpa_device *vdpa = v->vdpa; 431 const struct vdpa_config_ops *ops = vdpa->config; 432 u16 num; 433 434 num = ops->get_vq_num_max(vdpa); 435 436 if (copy_to_user(argp, &num, sizeof(num))) 437 return -EFAULT; 438 439 return 0; 440 } 441 442 static void vhost_vdpa_config_put(struct vhost_vdpa *v) 443 { 444 if (v->config_ctx) { 445 eventfd_ctx_put(v->config_ctx); 446 v->config_ctx = NULL; 447 } 448 } 449 450 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) 451 { 452 struct vdpa_callback cb; 453 int fd; 454 struct eventfd_ctx *ctx; 455 456 cb.callback = vhost_vdpa_config_cb; 457 cb.private = v; 458 if (copy_from_user(&fd, argp, sizeof(fd))) 459 return -EFAULT; 460 461 ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); 462 swap(ctx, v->config_ctx); 463 464 if (!IS_ERR_OR_NULL(ctx)) 465 eventfd_ctx_put(ctx); 466 467 if (IS_ERR(v->config_ctx)) { 468 long ret = PTR_ERR(v->config_ctx); 469 470 v->config_ctx = NULL; 471 return ret; 472 } 473 474 v->vdpa->config->set_config_cb(v->vdpa, &cb); 475 476 return 0; 477 } 478 479 static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) 480 { 481 struct vhost_vdpa_iova_range range = { 482 .first = v->range.first, 483 .last = v->range.last, 484 }; 485 486 if (copy_to_user(argp, &range, sizeof(range))) 487 return -EFAULT; 488 return 0; 489 } 490 491 static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) 492 { 493 struct vdpa_device *vdpa = v->vdpa; 494 const struct vdpa_config_ops *ops = vdpa->config; 495 u32 size; 496 497 size = ops->get_config_size(vdpa); 498 499 if (copy_to_user(argp, &size, sizeof(size))) 500 return -EFAULT; 501 502 return 0; 503 } 504 505 static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) 506 { 507 struct vdpa_device *vdpa = v->vdpa; 508 509 if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs))) 510 return -EFAULT; 511 512 return 0; 513 } 514 515 /* After a successful return of ioctl the device must not process more 516 * virtqueue descriptors. The device can answer to read or writes of config 517 * fields as if it were not suspended. In particular, writing to "queue_enable" 518 * with a value of 1 will not make the device start processing buffers. 519 */ 520 static long vhost_vdpa_suspend(struct vhost_vdpa *v) 521 { 522 struct vdpa_device *vdpa = v->vdpa; 523 const struct vdpa_config_ops *ops = vdpa->config; 524 525 if (!ops->suspend) 526 return -EOPNOTSUPP; 527 528 return ops->suspend(vdpa); 529 } 530 531 /* After a successful return of this ioctl the device resumes processing 532 * virtqueue descriptors. The device becomes fully operational the same way it 533 * was before it was suspended. 534 */ 535 static long vhost_vdpa_resume(struct vhost_vdpa *v) 536 { 537 struct vdpa_device *vdpa = v->vdpa; 538 const struct vdpa_config_ops *ops = vdpa->config; 539 540 if (!ops->resume) 541 return -EOPNOTSUPP; 542 543 return ops->resume(vdpa); 544 } 545 546 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, 547 void __user *argp) 548 { 549 struct vdpa_device *vdpa = v->vdpa; 550 const struct vdpa_config_ops *ops = vdpa->config; 551 struct vdpa_vq_state vq_state; 552 struct vdpa_callback cb; 553 struct vhost_virtqueue *vq; 554 struct vhost_vring_state s; 555 u32 idx; 556 long r; 557 558 r = get_user(idx, (u32 __user *)argp); 559 if (r < 0) 560 return r; 561 562 if (idx >= v->nvqs) 563 return -ENOBUFS; 564 565 idx = array_index_nospec(idx, v->nvqs); 566 vq = &v->vqs[idx]; 567 568 switch (cmd) { 569 case VHOST_VDPA_SET_VRING_ENABLE: 570 if (copy_from_user(&s, argp, sizeof(s))) 571 return -EFAULT; 572 ops->set_vq_ready(vdpa, idx, s.num); 573 return 0; 574 case VHOST_VDPA_GET_VRING_GROUP: 575 if (!ops->get_vq_group) 576 return -EOPNOTSUPP; 577 s.index = idx; 578 s.num = ops->get_vq_group(vdpa, idx); 579 if (s.num >= vdpa->ngroups) 580 return -EIO; 581 else if (copy_to_user(argp, &s, sizeof(s))) 582 return -EFAULT; 583 return 0; 584 case VHOST_VDPA_SET_GROUP_ASID: 585 if (copy_from_user(&s, argp, sizeof(s))) 586 return -EFAULT; 587 if (s.num >= vdpa->nas) 588 return -EINVAL; 589 if (!ops->set_group_asid) 590 return -EOPNOTSUPP; 591 return ops->set_group_asid(vdpa, idx, s.num); 592 case VHOST_GET_VRING_BASE: 593 r = ops->get_vq_state(v->vdpa, idx, &vq_state); 594 if (r) 595 return r; 596 597 vq->last_avail_idx = vq_state.split.avail_index; 598 break; 599 } 600 601 r = vhost_vring_ioctl(&v->vdev, cmd, argp); 602 if (r) 603 return r; 604 605 switch (cmd) { 606 case VHOST_SET_VRING_ADDR: 607 if (ops->set_vq_address(vdpa, idx, 608 (u64)(uintptr_t)vq->desc, 609 (u64)(uintptr_t)vq->avail, 610 (u64)(uintptr_t)vq->used)) 611 r = -EINVAL; 612 break; 613 614 case VHOST_SET_VRING_BASE: 615 vq_state.split.avail_index = vq->last_avail_idx; 616 if (ops->set_vq_state(vdpa, idx, &vq_state)) 617 r = -EINVAL; 618 break; 619 620 case VHOST_SET_VRING_CALL: 621 if (vq->call_ctx.ctx) { 622 cb.callback = vhost_vdpa_virtqueue_cb; 623 cb.private = vq; 624 cb.trigger = vq->call_ctx.ctx; 625 } else { 626 cb.callback = NULL; 627 cb.private = NULL; 628 cb.trigger = NULL; 629 } 630 ops->set_vq_cb(vdpa, idx, &cb); 631 vhost_vdpa_setup_vq_irq(v, idx); 632 break; 633 634 case VHOST_SET_VRING_NUM: 635 ops->set_vq_num(vdpa, idx, vq->num); 636 break; 637 } 638 639 return r; 640 } 641 642 static long vhost_vdpa_unlocked_ioctl(struct file *filep, 643 unsigned int cmd, unsigned long arg) 644 { 645 struct vhost_vdpa *v = filep->private_data; 646 struct vhost_dev *d = &v->vdev; 647 void __user *argp = (void __user *)arg; 648 u64 __user *featurep = argp; 649 u64 features; 650 long r = 0; 651 652 if (cmd == VHOST_SET_BACKEND_FEATURES) { 653 if (copy_from_user(&features, featurep, sizeof(features))) 654 return -EFAULT; 655 if (features & ~(VHOST_VDPA_BACKEND_FEATURES | 656 BIT_ULL(VHOST_BACKEND_F_SUSPEND) | 657 BIT_ULL(VHOST_BACKEND_F_RESUME))) 658 return -EOPNOTSUPP; 659 if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && 660 !vhost_vdpa_can_suspend(v)) 661 return -EOPNOTSUPP; 662 if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && 663 !vhost_vdpa_can_resume(v)) 664 return -EOPNOTSUPP; 665 vhost_set_backend_features(&v->vdev, features); 666 return 0; 667 } 668 669 mutex_lock(&d->mutex); 670 671 switch (cmd) { 672 case VHOST_VDPA_GET_DEVICE_ID: 673 r = vhost_vdpa_get_device_id(v, argp); 674 break; 675 case VHOST_VDPA_GET_STATUS: 676 r = vhost_vdpa_get_status(v, argp); 677 break; 678 case VHOST_VDPA_SET_STATUS: 679 r = vhost_vdpa_set_status(v, argp); 680 break; 681 case VHOST_VDPA_GET_CONFIG: 682 r = vhost_vdpa_get_config(v, argp); 683 break; 684 case VHOST_VDPA_SET_CONFIG: 685 r = vhost_vdpa_set_config(v, argp); 686 break; 687 case VHOST_GET_FEATURES: 688 r = vhost_vdpa_get_features(v, argp); 689 break; 690 case VHOST_SET_FEATURES: 691 r = vhost_vdpa_set_features(v, argp); 692 break; 693 case VHOST_VDPA_GET_VRING_NUM: 694 r = vhost_vdpa_get_vring_num(v, argp); 695 break; 696 case VHOST_VDPA_GET_GROUP_NUM: 697 if (copy_to_user(argp, &v->vdpa->ngroups, 698 sizeof(v->vdpa->ngroups))) 699 r = -EFAULT; 700 break; 701 case VHOST_VDPA_GET_AS_NUM: 702 if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas))) 703 r = -EFAULT; 704 break; 705 case VHOST_SET_LOG_BASE: 706 case VHOST_SET_LOG_FD: 707 r = -ENOIOCTLCMD; 708 break; 709 case VHOST_VDPA_SET_CONFIG_CALL: 710 r = vhost_vdpa_set_config_call(v, argp); 711 break; 712 case VHOST_GET_BACKEND_FEATURES: 713 features = VHOST_VDPA_BACKEND_FEATURES; 714 if (vhost_vdpa_can_suspend(v)) 715 features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); 716 if (vhost_vdpa_can_resume(v)) 717 features |= BIT_ULL(VHOST_BACKEND_F_RESUME); 718 if (copy_to_user(featurep, &features, sizeof(features))) 719 r = -EFAULT; 720 break; 721 case VHOST_VDPA_GET_IOVA_RANGE: 722 r = vhost_vdpa_get_iova_range(v, argp); 723 break; 724 case VHOST_VDPA_GET_CONFIG_SIZE: 725 r = vhost_vdpa_get_config_size(v, argp); 726 break; 727 case VHOST_VDPA_GET_VQS_COUNT: 728 r = vhost_vdpa_get_vqs_count(v, argp); 729 break; 730 case VHOST_VDPA_SUSPEND: 731 r = vhost_vdpa_suspend(v); 732 break; 733 case VHOST_VDPA_RESUME: 734 r = vhost_vdpa_resume(v); 735 break; 736 default: 737 r = vhost_dev_ioctl(&v->vdev, cmd, argp); 738 if (r == -ENOIOCTLCMD) 739 r = vhost_vdpa_vring_ioctl(v, cmd, argp); 740 break; 741 } 742 743 if (r) 744 goto out; 745 746 switch (cmd) { 747 case VHOST_SET_OWNER: 748 r = vhost_vdpa_bind_mm(v); 749 if (r) 750 vhost_dev_reset_owner(d, NULL); 751 break; 752 } 753 out: 754 mutex_unlock(&d->mutex); 755 return r; 756 } 757 static void vhost_vdpa_general_unmap(struct vhost_vdpa *v, 758 struct vhost_iotlb_map *map, u32 asid) 759 { 760 struct vdpa_device *vdpa = v->vdpa; 761 const struct vdpa_config_ops *ops = vdpa->config; 762 if (ops->dma_map) { 763 ops->dma_unmap(vdpa, asid, map->start, map->size); 764 } else if (ops->set_map == NULL) { 765 iommu_unmap(v->domain, map->start, map->size); 766 } 767 } 768 769 static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 770 u64 start, u64 last, u32 asid) 771 { 772 struct vhost_dev *dev = &v->vdev; 773 struct vhost_iotlb_map *map; 774 struct page *page; 775 unsigned long pfn, pinned; 776 777 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 778 pinned = PFN_DOWN(map->size); 779 for (pfn = PFN_DOWN(map->addr); 780 pinned > 0; pfn++, pinned--) { 781 page = pfn_to_page(pfn); 782 if (map->perm & VHOST_ACCESS_WO) 783 set_page_dirty_lock(page); 784 unpin_user_page(page); 785 } 786 atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); 787 vhost_vdpa_general_unmap(v, map, asid); 788 vhost_iotlb_map_free(iotlb, map); 789 } 790 } 791 792 static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 793 u64 start, u64 last, u32 asid) 794 { 795 struct vhost_iotlb_map *map; 796 struct vdpa_map_file *map_file; 797 798 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 799 map_file = (struct vdpa_map_file *)map->opaque; 800 fput(map_file->file); 801 kfree(map_file); 802 vhost_vdpa_general_unmap(v, map, asid); 803 vhost_iotlb_map_free(iotlb, map); 804 } 805 } 806 807 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, 808 struct vhost_iotlb *iotlb, u64 start, 809 u64 last, u32 asid) 810 { 811 struct vdpa_device *vdpa = v->vdpa; 812 813 if (vdpa->use_va) 814 return vhost_vdpa_va_unmap(v, iotlb, start, last, asid); 815 816 return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid); 817 } 818 819 static int perm_to_iommu_flags(u32 perm) 820 { 821 int flags = 0; 822 823 switch (perm) { 824 case VHOST_ACCESS_WO: 825 flags |= IOMMU_WRITE; 826 break; 827 case VHOST_ACCESS_RO: 828 flags |= IOMMU_READ; 829 break; 830 case VHOST_ACCESS_RW: 831 flags |= (IOMMU_WRITE | IOMMU_READ); 832 break; 833 default: 834 WARN(1, "invalidate vhost IOTLB permission\n"); 835 break; 836 } 837 838 return flags | IOMMU_CACHE; 839 } 840 841 static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 842 u64 iova, u64 size, u64 pa, u32 perm, void *opaque) 843 { 844 struct vhost_dev *dev = &v->vdev; 845 struct vdpa_device *vdpa = v->vdpa; 846 const struct vdpa_config_ops *ops = vdpa->config; 847 u32 asid = iotlb_to_asid(iotlb); 848 int r = 0; 849 850 r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, 851 pa, perm, opaque); 852 if (r) 853 return r; 854 855 if (ops->dma_map) { 856 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque); 857 } else if (ops->set_map) { 858 if (!v->in_batch) 859 r = ops->set_map(vdpa, asid, iotlb); 860 } else { 861 r = iommu_map(v->domain, iova, pa, size, 862 perm_to_iommu_flags(perm), GFP_KERNEL); 863 } 864 if (r) { 865 vhost_iotlb_del_range(iotlb, iova, iova + size - 1); 866 return r; 867 } 868 869 if (!vdpa->use_va) 870 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); 871 872 return 0; 873 } 874 875 static void vhost_vdpa_unmap(struct vhost_vdpa *v, 876 struct vhost_iotlb *iotlb, 877 u64 iova, u64 size) 878 { 879 struct vdpa_device *vdpa = v->vdpa; 880 const struct vdpa_config_ops *ops = vdpa->config; 881 u32 asid = iotlb_to_asid(iotlb); 882 883 vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid); 884 885 if (ops->set_map) { 886 if (!v->in_batch) 887 ops->set_map(vdpa, asid, iotlb); 888 } 889 890 } 891 892 static int vhost_vdpa_va_map(struct vhost_vdpa *v, 893 struct vhost_iotlb *iotlb, 894 u64 iova, u64 size, u64 uaddr, u32 perm) 895 { 896 struct vhost_dev *dev = &v->vdev; 897 u64 offset, map_size, map_iova = iova; 898 struct vdpa_map_file *map_file; 899 struct vm_area_struct *vma; 900 int ret = 0; 901 902 mmap_read_lock(dev->mm); 903 904 while (size) { 905 vma = find_vma(dev->mm, uaddr); 906 if (!vma) { 907 ret = -EINVAL; 908 break; 909 } 910 map_size = min(size, vma->vm_end - uaddr); 911 if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && 912 !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) 913 goto next; 914 915 map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); 916 if (!map_file) { 917 ret = -ENOMEM; 918 break; 919 } 920 offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; 921 map_file->offset = offset; 922 map_file->file = get_file(vma->vm_file); 923 ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr, 924 perm, map_file); 925 if (ret) { 926 fput(map_file->file); 927 kfree(map_file); 928 break; 929 } 930 next: 931 size -= map_size; 932 uaddr += map_size; 933 map_iova += map_size; 934 } 935 if (ret) 936 vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova); 937 938 mmap_read_unlock(dev->mm); 939 940 return ret; 941 } 942 943 static int vhost_vdpa_pa_map(struct vhost_vdpa *v, 944 struct vhost_iotlb *iotlb, 945 u64 iova, u64 size, u64 uaddr, u32 perm) 946 { 947 struct vhost_dev *dev = &v->vdev; 948 struct page **page_list; 949 unsigned long list_size = PAGE_SIZE / sizeof(struct page *); 950 unsigned int gup_flags = FOLL_LONGTERM; 951 unsigned long npages, cur_base, map_pfn, last_pfn = 0; 952 unsigned long lock_limit, sz2pin, nchunks, i; 953 u64 start = iova; 954 long pinned; 955 int ret = 0; 956 957 /* Limit the use of memory for bookkeeping */ 958 page_list = (struct page **) __get_free_page(GFP_KERNEL); 959 if (!page_list) 960 return -ENOMEM; 961 962 if (perm & VHOST_ACCESS_WO) 963 gup_flags |= FOLL_WRITE; 964 965 npages = PFN_UP(size + (iova & ~PAGE_MASK)); 966 if (!npages) { 967 ret = -EINVAL; 968 goto free; 969 } 970 971 mmap_read_lock(dev->mm); 972 973 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 974 if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { 975 ret = -ENOMEM; 976 goto unlock; 977 } 978 979 cur_base = uaddr & PAGE_MASK; 980 iova &= PAGE_MASK; 981 nchunks = 0; 982 983 while (npages) { 984 sz2pin = min_t(unsigned long, npages, list_size); 985 pinned = pin_user_pages(cur_base, sz2pin, 986 gup_flags, page_list, NULL); 987 if (sz2pin != pinned) { 988 if (pinned < 0) { 989 ret = pinned; 990 } else { 991 unpin_user_pages(page_list, pinned); 992 ret = -ENOMEM; 993 } 994 goto out; 995 } 996 nchunks++; 997 998 if (!last_pfn) 999 map_pfn = page_to_pfn(page_list[0]); 1000 1001 for (i = 0; i < pinned; i++) { 1002 unsigned long this_pfn = page_to_pfn(page_list[i]); 1003 u64 csize; 1004 1005 if (last_pfn && (this_pfn != last_pfn + 1)) { 1006 /* Pin a contiguous chunk of memory */ 1007 csize = PFN_PHYS(last_pfn - map_pfn + 1); 1008 ret = vhost_vdpa_map(v, iotlb, iova, csize, 1009 PFN_PHYS(map_pfn), 1010 perm, NULL); 1011 if (ret) { 1012 /* 1013 * Unpin the pages that are left unmapped 1014 * from this point on in the current 1015 * page_list. The remaining outstanding 1016 * ones which may stride across several 1017 * chunks will be covered in the common 1018 * error path subsequently. 1019 */ 1020 unpin_user_pages(&page_list[i], 1021 pinned - i); 1022 goto out; 1023 } 1024 1025 map_pfn = this_pfn; 1026 iova += csize; 1027 nchunks = 0; 1028 } 1029 1030 last_pfn = this_pfn; 1031 } 1032 1033 cur_base += PFN_PHYS(pinned); 1034 npages -= pinned; 1035 } 1036 1037 /* Pin the rest chunk */ 1038 ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1), 1039 PFN_PHYS(map_pfn), perm, NULL); 1040 out: 1041 if (ret) { 1042 if (nchunks) { 1043 unsigned long pfn; 1044 1045 /* 1046 * Unpin the outstanding pages which are yet to be 1047 * mapped but haven't due to vdpa_map() or 1048 * pin_user_pages() failure. 1049 * 1050 * Mapped pages are accounted in vdpa_map(), hence 1051 * the corresponding unpinning will be handled by 1052 * vdpa_unmap(). 1053 */ 1054 WARN_ON(!last_pfn); 1055 for (pfn = map_pfn; pfn <= last_pfn; pfn++) 1056 unpin_user_page(pfn_to_page(pfn)); 1057 } 1058 vhost_vdpa_unmap(v, iotlb, start, size); 1059 } 1060 unlock: 1061 mmap_read_unlock(dev->mm); 1062 free: 1063 free_page((unsigned long)page_list); 1064 return ret; 1065 1066 } 1067 1068 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, 1069 struct vhost_iotlb *iotlb, 1070 struct vhost_iotlb_msg *msg) 1071 { 1072 struct vdpa_device *vdpa = v->vdpa; 1073 1074 if (msg->iova < v->range.first || !msg->size || 1075 msg->iova > U64_MAX - msg->size + 1 || 1076 msg->iova + msg->size - 1 > v->range.last) 1077 return -EINVAL; 1078 1079 if (vhost_iotlb_itree_first(iotlb, msg->iova, 1080 msg->iova + msg->size - 1)) 1081 return -EEXIST; 1082 1083 if (vdpa->use_va) 1084 return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size, 1085 msg->uaddr, msg->perm); 1086 1087 return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr, 1088 msg->perm); 1089 } 1090 1091 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, 1092 struct vhost_iotlb_msg *msg) 1093 { 1094 struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); 1095 struct vdpa_device *vdpa = v->vdpa; 1096 const struct vdpa_config_ops *ops = vdpa->config; 1097 struct vhost_iotlb *iotlb = NULL; 1098 struct vhost_vdpa_as *as = NULL; 1099 int r = 0; 1100 1101 mutex_lock(&dev->mutex); 1102 1103 r = vhost_dev_check_owner(dev); 1104 if (r) 1105 goto unlock; 1106 1107 if (msg->type == VHOST_IOTLB_UPDATE || 1108 msg->type == VHOST_IOTLB_BATCH_BEGIN) { 1109 as = vhost_vdpa_find_alloc_as(v, asid); 1110 if (!as) { 1111 dev_err(&v->dev, "can't find and alloc asid %d\n", 1112 asid); 1113 r = -EINVAL; 1114 goto unlock; 1115 } 1116 iotlb = &as->iotlb; 1117 } else 1118 iotlb = asid_to_iotlb(v, asid); 1119 1120 if ((v->in_batch && v->batch_asid != asid) || !iotlb) { 1121 if (v->in_batch && v->batch_asid != asid) { 1122 dev_info(&v->dev, "batch id %d asid %d\n", 1123 v->batch_asid, asid); 1124 } 1125 if (!iotlb) 1126 dev_err(&v->dev, "no iotlb for asid %d\n", asid); 1127 r = -EINVAL; 1128 goto unlock; 1129 } 1130 1131 switch (msg->type) { 1132 case VHOST_IOTLB_UPDATE: 1133 r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); 1134 break; 1135 case VHOST_IOTLB_INVALIDATE: 1136 vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size); 1137 break; 1138 case VHOST_IOTLB_BATCH_BEGIN: 1139 v->batch_asid = asid; 1140 v->in_batch = true; 1141 break; 1142 case VHOST_IOTLB_BATCH_END: 1143 if (v->in_batch && ops->set_map) 1144 ops->set_map(vdpa, asid, iotlb); 1145 v->in_batch = false; 1146 break; 1147 default: 1148 r = -EINVAL; 1149 break; 1150 } 1151 unlock: 1152 mutex_unlock(&dev->mutex); 1153 1154 return r; 1155 } 1156 1157 static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, 1158 struct iov_iter *from) 1159 { 1160 struct file *file = iocb->ki_filp; 1161 struct vhost_vdpa *v = file->private_data; 1162 struct vhost_dev *dev = &v->vdev; 1163 1164 return vhost_chr_write_iter(dev, from); 1165 } 1166 1167 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) 1168 { 1169 struct vdpa_device *vdpa = v->vdpa; 1170 const struct vdpa_config_ops *ops = vdpa->config; 1171 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 1172 const struct bus_type *bus; 1173 int ret; 1174 1175 /* Device want to do DMA by itself */ 1176 if (ops->set_map || ops->dma_map) 1177 return 0; 1178 1179 bus = dma_dev->bus; 1180 if (!bus) 1181 return -EFAULT; 1182 1183 if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) { 1184 dev_warn_once(&v->dev, 1185 "Failed to allocate domain, device is not IOMMU cache coherent capable\n"); 1186 return -ENOTSUPP; 1187 } 1188 1189 v->domain = iommu_domain_alloc(bus); 1190 if (!v->domain) 1191 return -EIO; 1192 1193 ret = iommu_attach_device(v->domain, dma_dev); 1194 if (ret) 1195 goto err_attach; 1196 1197 return 0; 1198 1199 err_attach: 1200 iommu_domain_free(v->domain); 1201 v->domain = NULL; 1202 return ret; 1203 } 1204 1205 static void vhost_vdpa_free_domain(struct vhost_vdpa *v) 1206 { 1207 struct vdpa_device *vdpa = v->vdpa; 1208 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 1209 1210 if (v->domain) { 1211 iommu_detach_device(v->domain, dma_dev); 1212 iommu_domain_free(v->domain); 1213 } 1214 1215 v->domain = NULL; 1216 } 1217 1218 static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) 1219 { 1220 struct vdpa_iova_range *range = &v->range; 1221 struct vdpa_device *vdpa = v->vdpa; 1222 const struct vdpa_config_ops *ops = vdpa->config; 1223 1224 if (ops->get_iova_range) { 1225 *range = ops->get_iova_range(vdpa); 1226 } else if (v->domain && v->domain->geometry.force_aperture) { 1227 range->first = v->domain->geometry.aperture_start; 1228 range->last = v->domain->geometry.aperture_end; 1229 } else { 1230 range->first = 0; 1231 range->last = ULLONG_MAX; 1232 } 1233 } 1234 1235 static void vhost_vdpa_cleanup(struct vhost_vdpa *v) 1236 { 1237 struct vhost_vdpa_as *as; 1238 u32 asid; 1239 1240 for (asid = 0; asid < v->vdpa->nas; asid++) { 1241 as = asid_to_as(v, asid); 1242 if (as) 1243 vhost_vdpa_remove_as(v, asid); 1244 } 1245 1246 vhost_vdpa_free_domain(v); 1247 vhost_dev_cleanup(&v->vdev); 1248 kfree(v->vdev.vqs); 1249 } 1250 1251 static int vhost_vdpa_open(struct inode *inode, struct file *filep) 1252 { 1253 struct vhost_vdpa *v; 1254 struct vhost_dev *dev; 1255 struct vhost_virtqueue **vqs; 1256 int r, opened; 1257 u32 i, nvqs; 1258 1259 v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); 1260 1261 opened = atomic_cmpxchg(&v->opened, 0, 1); 1262 if (opened) 1263 return -EBUSY; 1264 1265 nvqs = v->nvqs; 1266 r = vhost_vdpa_reset(v); 1267 if (r) 1268 goto err; 1269 1270 vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); 1271 if (!vqs) { 1272 r = -ENOMEM; 1273 goto err; 1274 } 1275 1276 dev = &v->vdev; 1277 for (i = 0; i < nvqs; i++) { 1278 vqs[i] = &v->vqs[i]; 1279 vqs[i]->handle_kick = handle_vq_kick; 1280 } 1281 vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, 1282 vhost_vdpa_process_iotlb_msg); 1283 1284 r = vhost_vdpa_alloc_domain(v); 1285 if (r) 1286 goto err_alloc_domain; 1287 1288 vhost_vdpa_set_iova_range(v); 1289 1290 filep->private_data = v; 1291 1292 return 0; 1293 1294 err_alloc_domain: 1295 vhost_vdpa_cleanup(v); 1296 err: 1297 atomic_dec(&v->opened); 1298 return r; 1299 } 1300 1301 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) 1302 { 1303 u32 i; 1304 1305 for (i = 0; i < v->nvqs; i++) 1306 vhost_vdpa_unsetup_vq_irq(v, i); 1307 } 1308 1309 static int vhost_vdpa_release(struct inode *inode, struct file *filep) 1310 { 1311 struct vhost_vdpa *v = filep->private_data; 1312 struct vhost_dev *d = &v->vdev; 1313 1314 mutex_lock(&d->mutex); 1315 filep->private_data = NULL; 1316 vhost_vdpa_clean_irq(v); 1317 vhost_vdpa_reset(v); 1318 vhost_dev_stop(&v->vdev); 1319 vhost_vdpa_unbind_mm(v); 1320 vhost_vdpa_config_put(v); 1321 vhost_vdpa_cleanup(v); 1322 mutex_unlock(&d->mutex); 1323 1324 atomic_dec(&v->opened); 1325 complete(&v->completion); 1326 1327 return 0; 1328 } 1329 1330 #ifdef CONFIG_MMU 1331 static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) 1332 { 1333 struct vhost_vdpa *v = vmf->vma->vm_file->private_data; 1334 struct vdpa_device *vdpa = v->vdpa; 1335 const struct vdpa_config_ops *ops = vdpa->config; 1336 struct vdpa_notification_area notify; 1337 struct vm_area_struct *vma = vmf->vma; 1338 u16 index = vma->vm_pgoff; 1339 1340 notify = ops->get_vq_notification(vdpa, index); 1341 1342 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1343 if (remap_pfn_range(vma, vmf->address & PAGE_MASK, 1344 PFN_DOWN(notify.addr), PAGE_SIZE, 1345 vma->vm_page_prot)) 1346 return VM_FAULT_SIGBUS; 1347 1348 return VM_FAULT_NOPAGE; 1349 } 1350 1351 static const struct vm_operations_struct vhost_vdpa_vm_ops = { 1352 .fault = vhost_vdpa_fault, 1353 }; 1354 1355 static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) 1356 { 1357 struct vhost_vdpa *v = vma->vm_file->private_data; 1358 struct vdpa_device *vdpa = v->vdpa; 1359 const struct vdpa_config_ops *ops = vdpa->config; 1360 struct vdpa_notification_area notify; 1361 unsigned long index = vma->vm_pgoff; 1362 1363 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1364 return -EINVAL; 1365 if ((vma->vm_flags & VM_SHARED) == 0) 1366 return -EINVAL; 1367 if (vma->vm_flags & VM_READ) 1368 return -EINVAL; 1369 if (index > 65535) 1370 return -EINVAL; 1371 if (!ops->get_vq_notification) 1372 return -ENOTSUPP; 1373 1374 /* To be safe and easily modelled by userspace, We only 1375 * support the doorbell which sits on the page boundary and 1376 * does not share the page with other registers. 1377 */ 1378 notify = ops->get_vq_notification(vdpa, index); 1379 if (notify.addr & (PAGE_SIZE - 1)) 1380 return -EINVAL; 1381 if (vma->vm_end - vma->vm_start != notify.size) 1382 return -ENOTSUPP; 1383 1384 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); 1385 vma->vm_ops = &vhost_vdpa_vm_ops; 1386 return 0; 1387 } 1388 #endif /* CONFIG_MMU */ 1389 1390 static const struct file_operations vhost_vdpa_fops = { 1391 .owner = THIS_MODULE, 1392 .open = vhost_vdpa_open, 1393 .release = vhost_vdpa_release, 1394 .write_iter = vhost_vdpa_chr_write_iter, 1395 .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, 1396 #ifdef CONFIG_MMU 1397 .mmap = vhost_vdpa_mmap, 1398 #endif /* CONFIG_MMU */ 1399 .compat_ioctl = compat_ptr_ioctl, 1400 }; 1401 1402 static void vhost_vdpa_release_dev(struct device *device) 1403 { 1404 struct vhost_vdpa *v = 1405 container_of(device, struct vhost_vdpa, dev); 1406 1407 ida_simple_remove(&vhost_vdpa_ida, v->minor); 1408 kfree(v->vqs); 1409 kfree(v); 1410 } 1411 1412 static int vhost_vdpa_probe(struct vdpa_device *vdpa) 1413 { 1414 const struct vdpa_config_ops *ops = vdpa->config; 1415 struct vhost_vdpa *v; 1416 int minor; 1417 int i, r; 1418 1419 /* We can't support platform IOMMU device with more than 1 1420 * group or as 1421 */ 1422 if (!ops->set_map && !ops->dma_map && 1423 (vdpa->ngroups > 1 || vdpa->nas > 1)) 1424 return -EOPNOTSUPP; 1425 1426 v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 1427 if (!v) 1428 return -ENOMEM; 1429 1430 minor = ida_simple_get(&vhost_vdpa_ida, 0, 1431 VHOST_VDPA_DEV_MAX, GFP_KERNEL); 1432 if (minor < 0) { 1433 kfree(v); 1434 return minor; 1435 } 1436 1437 atomic_set(&v->opened, 0); 1438 v->minor = minor; 1439 v->vdpa = vdpa; 1440 v->nvqs = vdpa->nvqs; 1441 v->virtio_id = ops->get_device_id(vdpa); 1442 1443 device_initialize(&v->dev); 1444 v->dev.release = vhost_vdpa_release_dev; 1445 v->dev.parent = &vdpa->dev; 1446 v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); 1447 v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), 1448 GFP_KERNEL); 1449 if (!v->vqs) { 1450 r = -ENOMEM; 1451 goto err; 1452 } 1453 1454 r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); 1455 if (r) 1456 goto err; 1457 1458 cdev_init(&v->cdev, &vhost_vdpa_fops); 1459 v->cdev.owner = THIS_MODULE; 1460 1461 r = cdev_device_add(&v->cdev, &v->dev); 1462 if (r) 1463 goto err; 1464 1465 init_completion(&v->completion); 1466 vdpa_set_drvdata(vdpa, v); 1467 1468 for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++) 1469 INIT_HLIST_HEAD(&v->as[i]); 1470 1471 return 0; 1472 1473 err: 1474 put_device(&v->dev); 1475 ida_simple_remove(&vhost_vdpa_ida, v->minor); 1476 return r; 1477 } 1478 1479 static void vhost_vdpa_remove(struct vdpa_device *vdpa) 1480 { 1481 struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); 1482 int opened; 1483 1484 cdev_device_del(&v->cdev, &v->dev); 1485 1486 do { 1487 opened = atomic_cmpxchg(&v->opened, 0, 1); 1488 if (!opened) 1489 break; 1490 wait_for_completion(&v->completion); 1491 } while (1); 1492 1493 put_device(&v->dev); 1494 } 1495 1496 static struct vdpa_driver vhost_vdpa_driver = { 1497 .driver = { 1498 .name = "vhost_vdpa", 1499 }, 1500 .probe = vhost_vdpa_probe, 1501 .remove = vhost_vdpa_remove, 1502 }; 1503 1504 static int __init vhost_vdpa_init(void) 1505 { 1506 int r; 1507 1508 r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, 1509 "vhost-vdpa"); 1510 if (r) 1511 goto err_alloc_chrdev; 1512 1513 r = vdpa_register_driver(&vhost_vdpa_driver); 1514 if (r) 1515 goto err_vdpa_register_driver; 1516 1517 return 0; 1518 1519 err_vdpa_register_driver: 1520 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1521 err_alloc_chrdev: 1522 return r; 1523 } 1524 module_init(vhost_vdpa_init); 1525 1526 static void __exit vhost_vdpa_exit(void) 1527 { 1528 vdpa_unregister_driver(&vhost_vdpa_driver); 1529 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1530 } 1531 module_exit(vhost_vdpa_exit); 1532 1533 MODULE_VERSION("0.0.1"); 1534 MODULE_LICENSE("GPL v2"); 1535 MODULE_AUTHOR("Intel Corporation"); 1536 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); 1537