1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018-2020 Intel Corporation. 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Author: Tiwei Bie <tiwei.bie@intel.com> 7 * Jason Wang <jasowang@redhat.com> 8 * 9 * Thanks Michael S. Tsirkin for the valuable comments and 10 * suggestions. And thanks to Cunming Liang and Zhihong Wang for all 11 * their supports. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/cdev.h> 17 #include <linux/device.h> 18 #include <linux/mm.h> 19 #include <linux/slab.h> 20 #include <linux/iommu.h> 21 #include <linux/uuid.h> 22 #include <linux/vdpa.h> 23 #include <linux/nospec.h> 24 #include <linux/vhost.h> 25 26 #include "vhost.h" 27 28 enum { 29 VHOST_VDPA_BACKEND_FEATURES = 30 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | 31 (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) | 32 (1ULL << VHOST_BACKEND_F_IOTLB_ASID), 33 }; 34 35 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) 36 37 #define VHOST_VDPA_IOTLB_BUCKETS 16 38 39 struct vhost_vdpa_as { 40 struct hlist_node hash_link; 41 struct vhost_iotlb iotlb; 42 u32 id; 43 }; 44 45 struct vhost_vdpa { 46 struct vhost_dev vdev; 47 struct iommu_domain *domain; 48 struct vhost_virtqueue *vqs; 49 struct completion completion; 50 struct vdpa_device *vdpa; 51 struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; 52 struct device dev; 53 struct cdev cdev; 54 atomic_t opened; 55 u32 nvqs; 56 int virtio_id; 57 int minor; 58 struct eventfd_ctx *config_ctx; 59 int in_batch; 60 struct vdpa_iova_range range; 61 u32 batch_asid; 62 }; 63 64 static DEFINE_IDA(vhost_vdpa_ida); 65 66 static dev_t vhost_vdpa_major; 67 68 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, 69 struct vhost_iotlb *iotlb, u64 start, 70 u64 last, u32 asid); 71 72 static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) 73 { 74 struct vhost_vdpa_as *as = container_of(iotlb, struct 75 vhost_vdpa_as, iotlb); 76 return as->id; 77 } 78 79 static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) 80 { 81 struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; 82 struct vhost_vdpa_as *as; 83 84 hlist_for_each_entry(as, head, hash_link) 85 if (as->id == asid) 86 return as; 87 88 return NULL; 89 } 90 91 static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid) 92 { 93 struct vhost_vdpa_as *as = asid_to_as(v, asid); 94 95 if (!as) 96 return NULL; 97 98 return &as->iotlb; 99 } 100 101 static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) 102 { 103 struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; 104 struct vhost_vdpa_as *as; 105 106 if (asid_to_as(v, asid)) 107 return NULL; 108 109 if (asid >= v->vdpa->nas) 110 return NULL; 111 112 as = kmalloc(sizeof(*as), GFP_KERNEL); 113 if (!as) 114 return NULL; 115 116 vhost_iotlb_init(&as->iotlb, 0, 0); 117 as->id = asid; 118 hlist_add_head(&as->hash_link, head); 119 120 return as; 121 } 122 123 static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, 124 u32 asid) 125 { 126 struct vhost_vdpa_as *as = asid_to_as(v, asid); 127 128 if (as) 129 return as; 130 131 return vhost_vdpa_alloc_as(v, asid); 132 } 133 134 static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) 135 { 136 struct vhost_vdpa_as *as = asid_to_as(v, asid); 137 138 if (!as) 139 return -EINVAL; 140 141 hlist_del(&as->hash_link); 142 vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); 143 kfree(as); 144 145 return 0; 146 } 147 148 static void handle_vq_kick(struct vhost_work *work) 149 { 150 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 151 poll.work); 152 struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); 153 const struct vdpa_config_ops *ops = v->vdpa->config; 154 155 ops->kick_vq(v->vdpa, vq - v->vqs); 156 } 157 158 static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) 159 { 160 struct vhost_virtqueue *vq = private; 161 struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; 162 163 if (call_ctx) 164 eventfd_signal(call_ctx, 1); 165 166 return IRQ_HANDLED; 167 } 168 169 static irqreturn_t vhost_vdpa_config_cb(void *private) 170 { 171 struct vhost_vdpa *v = private; 172 struct eventfd_ctx *config_ctx = v->config_ctx; 173 174 if (config_ctx) 175 eventfd_signal(config_ctx, 1); 176 177 return IRQ_HANDLED; 178 } 179 180 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) 181 { 182 struct vhost_virtqueue *vq = &v->vqs[qid]; 183 const struct vdpa_config_ops *ops = v->vdpa->config; 184 struct vdpa_device *vdpa = v->vdpa; 185 int ret, irq; 186 187 if (!ops->get_vq_irq) 188 return; 189 190 irq = ops->get_vq_irq(vdpa, qid); 191 if (irq < 0) 192 return; 193 194 irq_bypass_unregister_producer(&vq->call_ctx.producer); 195 if (!vq->call_ctx.ctx) 196 return; 197 198 vq->call_ctx.producer.token = vq->call_ctx.ctx; 199 vq->call_ctx.producer.irq = irq; 200 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 201 if (unlikely(ret)) 202 dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n", 203 qid, vq->call_ctx.producer.token, ret); 204 } 205 206 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) 207 { 208 struct vhost_virtqueue *vq = &v->vqs[qid]; 209 210 irq_bypass_unregister_producer(&vq->call_ctx.producer); 211 } 212 213 static int vhost_vdpa_reset(struct vhost_vdpa *v) 214 { 215 struct vdpa_device *vdpa = v->vdpa; 216 217 v->in_batch = 0; 218 219 return vdpa_reset(vdpa); 220 } 221 222 static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) 223 { 224 struct vdpa_device *vdpa = v->vdpa; 225 const struct vdpa_config_ops *ops = vdpa->config; 226 227 if (!vdpa->use_va || !ops->bind_mm) 228 return 0; 229 230 return ops->bind_mm(vdpa, v->vdev.mm); 231 } 232 233 static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v) 234 { 235 struct vdpa_device *vdpa = v->vdpa; 236 const struct vdpa_config_ops *ops = vdpa->config; 237 238 if (!vdpa->use_va || !ops->unbind_mm) 239 return; 240 241 ops->unbind_mm(vdpa); 242 } 243 244 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) 245 { 246 struct vdpa_device *vdpa = v->vdpa; 247 const struct vdpa_config_ops *ops = vdpa->config; 248 u32 device_id; 249 250 device_id = ops->get_device_id(vdpa); 251 252 if (copy_to_user(argp, &device_id, sizeof(device_id))) 253 return -EFAULT; 254 255 return 0; 256 } 257 258 static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) 259 { 260 struct vdpa_device *vdpa = v->vdpa; 261 const struct vdpa_config_ops *ops = vdpa->config; 262 u8 status; 263 264 status = ops->get_status(vdpa); 265 266 if (copy_to_user(statusp, &status, sizeof(status))) 267 return -EFAULT; 268 269 return 0; 270 } 271 272 static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) 273 { 274 struct vdpa_device *vdpa = v->vdpa; 275 const struct vdpa_config_ops *ops = vdpa->config; 276 u8 status, status_old; 277 u32 nvqs = v->nvqs; 278 int ret; 279 u16 i; 280 281 if (copy_from_user(&status, statusp, sizeof(status))) 282 return -EFAULT; 283 284 status_old = ops->get_status(vdpa); 285 286 /* 287 * Userspace shouldn't remove status bits unless reset the 288 * status to 0. 289 */ 290 if (status != 0 && (status_old & ~status) != 0) 291 return -EINVAL; 292 293 if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) 294 for (i = 0; i < nvqs; i++) 295 vhost_vdpa_unsetup_vq_irq(v, i); 296 297 if (status == 0) { 298 ret = vdpa_reset(vdpa); 299 if (ret) 300 return ret; 301 } else 302 vdpa_set_status(vdpa, status); 303 304 if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) 305 for (i = 0; i < nvqs; i++) 306 vhost_vdpa_setup_vq_irq(v, i); 307 308 return 0; 309 } 310 311 static int vhost_vdpa_config_validate(struct vhost_vdpa *v, 312 struct vhost_vdpa_config *c) 313 { 314 struct vdpa_device *vdpa = v->vdpa; 315 size_t size = vdpa->config->get_config_size(vdpa); 316 317 if (c->len == 0 || c->off > size) 318 return -EINVAL; 319 320 if (c->len > size - c->off) 321 return -E2BIG; 322 323 return 0; 324 } 325 326 static long vhost_vdpa_get_config(struct vhost_vdpa *v, 327 struct vhost_vdpa_config __user *c) 328 { 329 struct vdpa_device *vdpa = v->vdpa; 330 struct vhost_vdpa_config config; 331 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 332 u8 *buf; 333 334 if (copy_from_user(&config, c, size)) 335 return -EFAULT; 336 if (vhost_vdpa_config_validate(v, &config)) 337 return -EINVAL; 338 buf = kvzalloc(config.len, GFP_KERNEL); 339 if (!buf) 340 return -ENOMEM; 341 342 vdpa_get_config(vdpa, config.off, buf, config.len); 343 344 if (copy_to_user(c->buf, buf, config.len)) { 345 kvfree(buf); 346 return -EFAULT; 347 } 348 349 kvfree(buf); 350 return 0; 351 } 352 353 static long vhost_vdpa_set_config(struct vhost_vdpa *v, 354 struct vhost_vdpa_config __user *c) 355 { 356 struct vdpa_device *vdpa = v->vdpa; 357 struct vhost_vdpa_config config; 358 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 359 u8 *buf; 360 361 if (copy_from_user(&config, c, size)) 362 return -EFAULT; 363 if (vhost_vdpa_config_validate(v, &config)) 364 return -EINVAL; 365 366 buf = vmemdup_user(c->buf, config.len); 367 if (IS_ERR(buf)) 368 return PTR_ERR(buf); 369 370 vdpa_set_config(vdpa, config.off, buf, config.len); 371 372 kvfree(buf); 373 return 0; 374 } 375 376 static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) 377 { 378 struct vdpa_device *vdpa = v->vdpa; 379 const struct vdpa_config_ops *ops = vdpa->config; 380 381 return ops->suspend; 382 } 383 384 static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) 385 { 386 struct vdpa_device *vdpa = v->vdpa; 387 const struct vdpa_config_ops *ops = vdpa->config; 388 389 return ops->resume; 390 } 391 392 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) 393 { 394 struct vdpa_device *vdpa = v->vdpa; 395 const struct vdpa_config_ops *ops = vdpa->config; 396 u64 features; 397 398 features = ops->get_device_features(vdpa); 399 400 if (copy_to_user(featurep, &features, sizeof(features))) 401 return -EFAULT; 402 403 return 0; 404 } 405 406 static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) 407 { 408 struct vdpa_device *vdpa = v->vdpa; 409 const struct vdpa_config_ops *ops = vdpa->config; 410 u64 features; 411 412 /* 413 * It's not allowed to change the features after they have 414 * been negotiated. 415 */ 416 if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) 417 return -EBUSY; 418 419 if (copy_from_user(&features, featurep, sizeof(features))) 420 return -EFAULT; 421 422 if (vdpa_set_features(vdpa, features)) 423 return -EINVAL; 424 425 return 0; 426 } 427 428 static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) 429 { 430 struct vdpa_device *vdpa = v->vdpa; 431 const struct vdpa_config_ops *ops = vdpa->config; 432 u16 num; 433 434 num = ops->get_vq_num_max(vdpa); 435 436 if (copy_to_user(argp, &num, sizeof(num))) 437 return -EFAULT; 438 439 return 0; 440 } 441 442 static void vhost_vdpa_config_put(struct vhost_vdpa *v) 443 { 444 if (v->config_ctx) { 445 eventfd_ctx_put(v->config_ctx); 446 v->config_ctx = NULL; 447 } 448 } 449 450 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) 451 { 452 struct vdpa_callback cb; 453 int fd; 454 struct eventfd_ctx *ctx; 455 456 cb.callback = vhost_vdpa_config_cb; 457 cb.private = v; 458 if (copy_from_user(&fd, argp, sizeof(fd))) 459 return -EFAULT; 460 461 ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); 462 swap(ctx, v->config_ctx); 463 464 if (!IS_ERR_OR_NULL(ctx)) 465 eventfd_ctx_put(ctx); 466 467 if (IS_ERR(v->config_ctx)) { 468 long ret = PTR_ERR(v->config_ctx); 469 470 v->config_ctx = NULL; 471 return ret; 472 } 473 474 v->vdpa->config->set_config_cb(v->vdpa, &cb); 475 476 return 0; 477 } 478 479 static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) 480 { 481 struct vhost_vdpa_iova_range range = { 482 .first = v->range.first, 483 .last = v->range.last, 484 }; 485 486 if (copy_to_user(argp, &range, sizeof(range))) 487 return -EFAULT; 488 return 0; 489 } 490 491 static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) 492 { 493 struct vdpa_device *vdpa = v->vdpa; 494 const struct vdpa_config_ops *ops = vdpa->config; 495 u32 size; 496 497 size = ops->get_config_size(vdpa); 498 499 if (copy_to_user(argp, &size, sizeof(size))) 500 return -EFAULT; 501 502 return 0; 503 } 504 505 static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) 506 { 507 struct vdpa_device *vdpa = v->vdpa; 508 509 if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs))) 510 return -EFAULT; 511 512 return 0; 513 } 514 515 /* After a successful return of ioctl the device must not process more 516 * virtqueue descriptors. The device can answer to read or writes of config 517 * fields as if it were not suspended. In particular, writing to "queue_enable" 518 * with a value of 1 will not make the device start processing buffers. 519 */ 520 static long vhost_vdpa_suspend(struct vhost_vdpa *v) 521 { 522 struct vdpa_device *vdpa = v->vdpa; 523 const struct vdpa_config_ops *ops = vdpa->config; 524 525 if (!ops->suspend) 526 return -EOPNOTSUPP; 527 528 return ops->suspend(vdpa); 529 } 530 531 /* After a successful return of this ioctl the device resumes processing 532 * virtqueue descriptors. The device becomes fully operational the same way it 533 * was before it was suspended. 534 */ 535 static long vhost_vdpa_resume(struct vhost_vdpa *v) 536 { 537 struct vdpa_device *vdpa = v->vdpa; 538 const struct vdpa_config_ops *ops = vdpa->config; 539 540 if (!ops->resume) 541 return -EOPNOTSUPP; 542 543 return ops->resume(vdpa); 544 } 545 546 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, 547 void __user *argp) 548 { 549 struct vdpa_device *vdpa = v->vdpa; 550 const struct vdpa_config_ops *ops = vdpa->config; 551 struct vdpa_vq_state vq_state; 552 struct vdpa_callback cb; 553 struct vhost_virtqueue *vq; 554 struct vhost_vring_state s; 555 u32 idx; 556 long r; 557 558 r = get_user(idx, (u32 __user *)argp); 559 if (r < 0) 560 return r; 561 562 if (idx >= v->nvqs) 563 return -ENOBUFS; 564 565 idx = array_index_nospec(idx, v->nvqs); 566 vq = &v->vqs[idx]; 567 568 switch (cmd) { 569 case VHOST_VDPA_SET_VRING_ENABLE: 570 if (copy_from_user(&s, argp, sizeof(s))) 571 return -EFAULT; 572 ops->set_vq_ready(vdpa, idx, s.num); 573 return 0; 574 case VHOST_VDPA_GET_VRING_GROUP: 575 if (!ops->get_vq_group) 576 return -EOPNOTSUPP; 577 s.index = idx; 578 s.num = ops->get_vq_group(vdpa, idx); 579 if (s.num >= vdpa->ngroups) 580 return -EIO; 581 else if (copy_to_user(argp, &s, sizeof(s))) 582 return -EFAULT; 583 return 0; 584 case VHOST_VDPA_SET_GROUP_ASID: 585 if (copy_from_user(&s, argp, sizeof(s))) 586 return -EFAULT; 587 if (s.num >= vdpa->nas) 588 return -EINVAL; 589 if (!ops->set_group_asid) 590 return -EOPNOTSUPP; 591 return ops->set_group_asid(vdpa, idx, s.num); 592 case VHOST_GET_VRING_BASE: 593 r = ops->get_vq_state(v->vdpa, idx, &vq_state); 594 if (r) 595 return r; 596 597 vq->last_avail_idx = vq_state.split.avail_index; 598 break; 599 } 600 601 r = vhost_vring_ioctl(&v->vdev, cmd, argp); 602 if (r) 603 return r; 604 605 switch (cmd) { 606 case VHOST_SET_VRING_ADDR: 607 if (ops->set_vq_address(vdpa, idx, 608 (u64)(uintptr_t)vq->desc, 609 (u64)(uintptr_t)vq->avail, 610 (u64)(uintptr_t)vq->used)) 611 r = -EINVAL; 612 break; 613 614 case VHOST_SET_VRING_BASE: 615 vq_state.split.avail_index = vq->last_avail_idx; 616 if (ops->set_vq_state(vdpa, idx, &vq_state)) 617 r = -EINVAL; 618 break; 619 620 case VHOST_SET_VRING_CALL: 621 if (vq->call_ctx.ctx) { 622 cb.callback = vhost_vdpa_virtqueue_cb; 623 cb.private = vq; 624 cb.trigger = vq->call_ctx.ctx; 625 } else { 626 cb.callback = NULL; 627 cb.private = NULL; 628 cb.trigger = NULL; 629 } 630 ops->set_vq_cb(vdpa, idx, &cb); 631 vhost_vdpa_setup_vq_irq(v, idx); 632 break; 633 634 case VHOST_SET_VRING_NUM: 635 ops->set_vq_num(vdpa, idx, vq->num); 636 break; 637 } 638 639 return r; 640 } 641 642 static long vhost_vdpa_unlocked_ioctl(struct file *filep, 643 unsigned int cmd, unsigned long arg) 644 { 645 struct vhost_vdpa *v = filep->private_data; 646 struct vhost_dev *d = &v->vdev; 647 void __user *argp = (void __user *)arg; 648 u64 __user *featurep = argp; 649 u64 features; 650 long r = 0; 651 652 if (cmd == VHOST_SET_BACKEND_FEATURES) { 653 if (copy_from_user(&features, featurep, sizeof(features))) 654 return -EFAULT; 655 if (features & ~(VHOST_VDPA_BACKEND_FEATURES | 656 BIT_ULL(VHOST_BACKEND_F_SUSPEND) | 657 BIT_ULL(VHOST_BACKEND_F_RESUME))) 658 return -EOPNOTSUPP; 659 if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && 660 !vhost_vdpa_can_suspend(v)) 661 return -EOPNOTSUPP; 662 if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && 663 !vhost_vdpa_can_resume(v)) 664 return -EOPNOTSUPP; 665 vhost_set_backend_features(&v->vdev, features); 666 return 0; 667 } 668 669 mutex_lock(&d->mutex); 670 671 switch (cmd) { 672 case VHOST_VDPA_GET_DEVICE_ID: 673 r = vhost_vdpa_get_device_id(v, argp); 674 break; 675 case VHOST_VDPA_GET_STATUS: 676 r = vhost_vdpa_get_status(v, argp); 677 break; 678 case VHOST_VDPA_SET_STATUS: 679 r = vhost_vdpa_set_status(v, argp); 680 break; 681 case VHOST_VDPA_GET_CONFIG: 682 r = vhost_vdpa_get_config(v, argp); 683 break; 684 case VHOST_VDPA_SET_CONFIG: 685 r = vhost_vdpa_set_config(v, argp); 686 break; 687 case VHOST_GET_FEATURES: 688 r = vhost_vdpa_get_features(v, argp); 689 break; 690 case VHOST_SET_FEATURES: 691 r = vhost_vdpa_set_features(v, argp); 692 break; 693 case VHOST_VDPA_GET_VRING_NUM: 694 r = vhost_vdpa_get_vring_num(v, argp); 695 break; 696 case VHOST_VDPA_GET_GROUP_NUM: 697 if (copy_to_user(argp, &v->vdpa->ngroups, 698 sizeof(v->vdpa->ngroups))) 699 r = -EFAULT; 700 break; 701 case VHOST_VDPA_GET_AS_NUM: 702 if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas))) 703 r = -EFAULT; 704 break; 705 case VHOST_SET_LOG_BASE: 706 case VHOST_SET_LOG_FD: 707 r = -ENOIOCTLCMD; 708 break; 709 case VHOST_VDPA_SET_CONFIG_CALL: 710 r = vhost_vdpa_set_config_call(v, argp); 711 break; 712 case VHOST_GET_BACKEND_FEATURES: 713 features = VHOST_VDPA_BACKEND_FEATURES; 714 if (vhost_vdpa_can_suspend(v)) 715 features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); 716 if (vhost_vdpa_can_resume(v)) 717 features |= BIT_ULL(VHOST_BACKEND_F_RESUME); 718 if (copy_to_user(featurep, &features, sizeof(features))) 719 r = -EFAULT; 720 break; 721 case VHOST_VDPA_GET_IOVA_RANGE: 722 r = vhost_vdpa_get_iova_range(v, argp); 723 break; 724 case VHOST_VDPA_GET_CONFIG_SIZE: 725 r = vhost_vdpa_get_config_size(v, argp); 726 break; 727 case VHOST_VDPA_GET_VQS_COUNT: 728 r = vhost_vdpa_get_vqs_count(v, argp); 729 break; 730 case VHOST_VDPA_SUSPEND: 731 r = vhost_vdpa_suspend(v); 732 break; 733 case VHOST_VDPA_RESUME: 734 r = vhost_vdpa_resume(v); 735 break; 736 default: 737 r = vhost_dev_ioctl(&v->vdev, cmd, argp); 738 if (r == -ENOIOCTLCMD) 739 r = vhost_vdpa_vring_ioctl(v, cmd, argp); 740 break; 741 } 742 743 if (r) 744 goto out; 745 746 switch (cmd) { 747 case VHOST_SET_OWNER: 748 r = vhost_vdpa_bind_mm(v); 749 if (r) 750 vhost_dev_reset_owner(d, NULL); 751 break; 752 } 753 out: 754 mutex_unlock(&d->mutex); 755 return r; 756 } 757 static void vhost_vdpa_general_unmap(struct vhost_vdpa *v, 758 struct vhost_iotlb_map *map, u32 asid) 759 { 760 struct vdpa_device *vdpa = v->vdpa; 761 const struct vdpa_config_ops *ops = vdpa->config; 762 if (ops->dma_map) { 763 ops->dma_unmap(vdpa, asid, map->start, map->size); 764 } else if (ops->set_map == NULL) { 765 iommu_unmap(v->domain, map->start, map->size); 766 } 767 } 768 769 static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 770 u64 start, u64 last, u32 asid) 771 { 772 struct vhost_dev *dev = &v->vdev; 773 struct vhost_iotlb_map *map; 774 struct page *page; 775 unsigned long pfn, pinned; 776 777 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 778 pinned = PFN_DOWN(map->size); 779 for (pfn = PFN_DOWN(map->addr); 780 pinned > 0; pfn++, pinned--) { 781 page = pfn_to_page(pfn); 782 if (map->perm & VHOST_ACCESS_WO) 783 set_page_dirty_lock(page); 784 unpin_user_page(page); 785 } 786 atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); 787 vhost_vdpa_general_unmap(v, map, asid); 788 vhost_iotlb_map_free(iotlb, map); 789 } 790 } 791 792 static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 793 u64 start, u64 last, u32 asid) 794 { 795 struct vhost_iotlb_map *map; 796 struct vdpa_map_file *map_file; 797 798 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 799 map_file = (struct vdpa_map_file *)map->opaque; 800 fput(map_file->file); 801 kfree(map_file); 802 vhost_vdpa_general_unmap(v, map, asid); 803 vhost_iotlb_map_free(iotlb, map); 804 } 805 } 806 807 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, 808 struct vhost_iotlb *iotlb, u64 start, 809 u64 last, u32 asid) 810 { 811 struct vdpa_device *vdpa = v->vdpa; 812 813 if (vdpa->use_va) 814 return vhost_vdpa_va_unmap(v, iotlb, start, last, asid); 815 816 return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid); 817 } 818 819 static int perm_to_iommu_flags(u32 perm) 820 { 821 int flags = 0; 822 823 switch (perm) { 824 case VHOST_ACCESS_WO: 825 flags |= IOMMU_WRITE; 826 break; 827 case VHOST_ACCESS_RO: 828 flags |= IOMMU_READ; 829 break; 830 case VHOST_ACCESS_RW: 831 flags |= (IOMMU_WRITE | IOMMU_READ); 832 break; 833 default: 834 WARN(1, "invalidate vhost IOTLB permission\n"); 835 break; 836 } 837 838 return flags | IOMMU_CACHE; 839 } 840 841 static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, 842 u64 iova, u64 size, u64 pa, u32 perm, void *opaque) 843 { 844 struct vhost_dev *dev = &v->vdev; 845 struct vdpa_device *vdpa = v->vdpa; 846 const struct vdpa_config_ops *ops = vdpa->config; 847 u32 asid = iotlb_to_asid(iotlb); 848 int r = 0; 849 850 r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, 851 pa, perm, opaque); 852 if (r) 853 return r; 854 855 if (ops->dma_map) { 856 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque); 857 } else if (ops->set_map) { 858 if (!v->in_batch) 859 r = ops->set_map(vdpa, asid, iotlb); 860 } else { 861 r = iommu_map(v->domain, iova, pa, size, 862 perm_to_iommu_flags(perm), GFP_KERNEL); 863 } 864 if (r) { 865 vhost_iotlb_del_range(iotlb, iova, iova + size - 1); 866 return r; 867 } 868 869 if (!vdpa->use_va) 870 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); 871 872 return 0; 873 } 874 875 static void vhost_vdpa_unmap(struct vhost_vdpa *v, 876 struct vhost_iotlb *iotlb, 877 u64 iova, u64 size) 878 { 879 struct vdpa_device *vdpa = v->vdpa; 880 const struct vdpa_config_ops *ops = vdpa->config; 881 u32 asid = iotlb_to_asid(iotlb); 882 883 vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid); 884 885 if (ops->set_map) { 886 if (!v->in_batch) 887 ops->set_map(vdpa, asid, iotlb); 888 } 889 /* If we are in the middle of batch processing, delay the free 890 * of AS until BATCH_END. 891 */ 892 if (!v->in_batch && !iotlb->nmaps) 893 vhost_vdpa_remove_as(v, asid); 894 } 895 896 static int vhost_vdpa_va_map(struct vhost_vdpa *v, 897 struct vhost_iotlb *iotlb, 898 u64 iova, u64 size, u64 uaddr, u32 perm) 899 { 900 struct vhost_dev *dev = &v->vdev; 901 u64 offset, map_size, map_iova = iova; 902 struct vdpa_map_file *map_file; 903 struct vm_area_struct *vma; 904 int ret = 0; 905 906 mmap_read_lock(dev->mm); 907 908 while (size) { 909 vma = find_vma(dev->mm, uaddr); 910 if (!vma) { 911 ret = -EINVAL; 912 break; 913 } 914 map_size = min(size, vma->vm_end - uaddr); 915 if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && 916 !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) 917 goto next; 918 919 map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); 920 if (!map_file) { 921 ret = -ENOMEM; 922 break; 923 } 924 offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; 925 map_file->offset = offset; 926 map_file->file = get_file(vma->vm_file); 927 ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr, 928 perm, map_file); 929 if (ret) { 930 fput(map_file->file); 931 kfree(map_file); 932 break; 933 } 934 next: 935 size -= map_size; 936 uaddr += map_size; 937 map_iova += map_size; 938 } 939 if (ret) 940 vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova); 941 942 mmap_read_unlock(dev->mm); 943 944 return ret; 945 } 946 947 static int vhost_vdpa_pa_map(struct vhost_vdpa *v, 948 struct vhost_iotlb *iotlb, 949 u64 iova, u64 size, u64 uaddr, u32 perm) 950 { 951 struct vhost_dev *dev = &v->vdev; 952 struct page **page_list; 953 unsigned long list_size = PAGE_SIZE / sizeof(struct page *); 954 unsigned int gup_flags = FOLL_LONGTERM; 955 unsigned long npages, cur_base, map_pfn, last_pfn = 0; 956 unsigned long lock_limit, sz2pin, nchunks, i; 957 u64 start = iova; 958 long pinned; 959 int ret = 0; 960 961 /* Limit the use of memory for bookkeeping */ 962 page_list = (struct page **) __get_free_page(GFP_KERNEL); 963 if (!page_list) 964 return -ENOMEM; 965 966 if (perm & VHOST_ACCESS_WO) 967 gup_flags |= FOLL_WRITE; 968 969 npages = PFN_UP(size + (iova & ~PAGE_MASK)); 970 if (!npages) { 971 ret = -EINVAL; 972 goto free; 973 } 974 975 mmap_read_lock(dev->mm); 976 977 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 978 if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { 979 ret = -ENOMEM; 980 goto unlock; 981 } 982 983 cur_base = uaddr & PAGE_MASK; 984 iova &= PAGE_MASK; 985 nchunks = 0; 986 987 while (npages) { 988 sz2pin = min_t(unsigned long, npages, list_size); 989 pinned = pin_user_pages(cur_base, sz2pin, 990 gup_flags, page_list, NULL); 991 if (sz2pin != pinned) { 992 if (pinned < 0) { 993 ret = pinned; 994 } else { 995 unpin_user_pages(page_list, pinned); 996 ret = -ENOMEM; 997 } 998 goto out; 999 } 1000 nchunks++; 1001 1002 if (!last_pfn) 1003 map_pfn = page_to_pfn(page_list[0]); 1004 1005 for (i = 0; i < pinned; i++) { 1006 unsigned long this_pfn = page_to_pfn(page_list[i]); 1007 u64 csize; 1008 1009 if (last_pfn && (this_pfn != last_pfn + 1)) { 1010 /* Pin a contiguous chunk of memory */ 1011 csize = PFN_PHYS(last_pfn - map_pfn + 1); 1012 ret = vhost_vdpa_map(v, iotlb, iova, csize, 1013 PFN_PHYS(map_pfn), 1014 perm, NULL); 1015 if (ret) { 1016 /* 1017 * Unpin the pages that are left unmapped 1018 * from this point on in the current 1019 * page_list. The remaining outstanding 1020 * ones which may stride across several 1021 * chunks will be covered in the common 1022 * error path subsequently. 1023 */ 1024 unpin_user_pages(&page_list[i], 1025 pinned - i); 1026 goto out; 1027 } 1028 1029 map_pfn = this_pfn; 1030 iova += csize; 1031 nchunks = 0; 1032 } 1033 1034 last_pfn = this_pfn; 1035 } 1036 1037 cur_base += PFN_PHYS(pinned); 1038 npages -= pinned; 1039 } 1040 1041 /* Pin the rest chunk */ 1042 ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1), 1043 PFN_PHYS(map_pfn), perm, NULL); 1044 out: 1045 if (ret) { 1046 if (nchunks) { 1047 unsigned long pfn; 1048 1049 /* 1050 * Unpin the outstanding pages which are yet to be 1051 * mapped but haven't due to vdpa_map() or 1052 * pin_user_pages() failure. 1053 * 1054 * Mapped pages are accounted in vdpa_map(), hence 1055 * the corresponding unpinning will be handled by 1056 * vdpa_unmap(). 1057 */ 1058 WARN_ON(!last_pfn); 1059 for (pfn = map_pfn; pfn <= last_pfn; pfn++) 1060 unpin_user_page(pfn_to_page(pfn)); 1061 } 1062 vhost_vdpa_unmap(v, iotlb, start, size); 1063 } 1064 unlock: 1065 mmap_read_unlock(dev->mm); 1066 free: 1067 free_page((unsigned long)page_list); 1068 return ret; 1069 1070 } 1071 1072 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, 1073 struct vhost_iotlb *iotlb, 1074 struct vhost_iotlb_msg *msg) 1075 { 1076 struct vdpa_device *vdpa = v->vdpa; 1077 1078 if (msg->iova < v->range.first || !msg->size || 1079 msg->iova > U64_MAX - msg->size + 1 || 1080 msg->iova + msg->size - 1 > v->range.last) 1081 return -EINVAL; 1082 1083 if (vhost_iotlb_itree_first(iotlb, msg->iova, 1084 msg->iova + msg->size - 1)) 1085 return -EEXIST; 1086 1087 if (vdpa->use_va) 1088 return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size, 1089 msg->uaddr, msg->perm); 1090 1091 return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr, 1092 msg->perm); 1093 } 1094 1095 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, 1096 struct vhost_iotlb_msg *msg) 1097 { 1098 struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); 1099 struct vdpa_device *vdpa = v->vdpa; 1100 const struct vdpa_config_ops *ops = vdpa->config; 1101 struct vhost_iotlb *iotlb = NULL; 1102 struct vhost_vdpa_as *as = NULL; 1103 int r = 0; 1104 1105 mutex_lock(&dev->mutex); 1106 1107 r = vhost_dev_check_owner(dev); 1108 if (r) 1109 goto unlock; 1110 1111 if (msg->type == VHOST_IOTLB_UPDATE || 1112 msg->type == VHOST_IOTLB_BATCH_BEGIN) { 1113 as = vhost_vdpa_find_alloc_as(v, asid); 1114 if (!as) { 1115 dev_err(&v->dev, "can't find and alloc asid %d\n", 1116 asid); 1117 r = -EINVAL; 1118 goto unlock; 1119 } 1120 iotlb = &as->iotlb; 1121 } else 1122 iotlb = asid_to_iotlb(v, asid); 1123 1124 if ((v->in_batch && v->batch_asid != asid) || !iotlb) { 1125 if (v->in_batch && v->batch_asid != asid) { 1126 dev_info(&v->dev, "batch id %d asid %d\n", 1127 v->batch_asid, asid); 1128 } 1129 if (!iotlb) 1130 dev_err(&v->dev, "no iotlb for asid %d\n", asid); 1131 r = -EINVAL; 1132 goto unlock; 1133 } 1134 1135 switch (msg->type) { 1136 case VHOST_IOTLB_UPDATE: 1137 r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); 1138 break; 1139 case VHOST_IOTLB_INVALIDATE: 1140 vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size); 1141 break; 1142 case VHOST_IOTLB_BATCH_BEGIN: 1143 v->batch_asid = asid; 1144 v->in_batch = true; 1145 break; 1146 case VHOST_IOTLB_BATCH_END: 1147 if (v->in_batch && ops->set_map) 1148 ops->set_map(vdpa, asid, iotlb); 1149 v->in_batch = false; 1150 if (!iotlb->nmaps) 1151 vhost_vdpa_remove_as(v, asid); 1152 break; 1153 default: 1154 r = -EINVAL; 1155 break; 1156 } 1157 unlock: 1158 mutex_unlock(&dev->mutex); 1159 1160 return r; 1161 } 1162 1163 static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, 1164 struct iov_iter *from) 1165 { 1166 struct file *file = iocb->ki_filp; 1167 struct vhost_vdpa *v = file->private_data; 1168 struct vhost_dev *dev = &v->vdev; 1169 1170 return vhost_chr_write_iter(dev, from); 1171 } 1172 1173 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) 1174 { 1175 struct vdpa_device *vdpa = v->vdpa; 1176 const struct vdpa_config_ops *ops = vdpa->config; 1177 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 1178 struct bus_type *bus; 1179 int ret; 1180 1181 /* Device want to do DMA by itself */ 1182 if (ops->set_map || ops->dma_map) 1183 return 0; 1184 1185 bus = dma_dev->bus; 1186 if (!bus) 1187 return -EFAULT; 1188 1189 if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) { 1190 dev_warn_once(&v->dev, 1191 "Failed to allocate domain, device is not IOMMU cache coherent capable\n"); 1192 return -ENOTSUPP; 1193 } 1194 1195 v->domain = iommu_domain_alloc(bus); 1196 if (!v->domain) 1197 return -EIO; 1198 1199 ret = iommu_attach_device(v->domain, dma_dev); 1200 if (ret) 1201 goto err_attach; 1202 1203 return 0; 1204 1205 err_attach: 1206 iommu_domain_free(v->domain); 1207 v->domain = NULL; 1208 return ret; 1209 } 1210 1211 static void vhost_vdpa_free_domain(struct vhost_vdpa *v) 1212 { 1213 struct vdpa_device *vdpa = v->vdpa; 1214 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 1215 1216 if (v->domain) { 1217 iommu_detach_device(v->domain, dma_dev); 1218 iommu_domain_free(v->domain); 1219 } 1220 1221 v->domain = NULL; 1222 } 1223 1224 static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) 1225 { 1226 struct vdpa_iova_range *range = &v->range; 1227 struct vdpa_device *vdpa = v->vdpa; 1228 const struct vdpa_config_ops *ops = vdpa->config; 1229 1230 if (ops->get_iova_range) { 1231 *range = ops->get_iova_range(vdpa); 1232 } else if (v->domain && v->domain->geometry.force_aperture) { 1233 range->first = v->domain->geometry.aperture_start; 1234 range->last = v->domain->geometry.aperture_end; 1235 } else { 1236 range->first = 0; 1237 range->last = ULLONG_MAX; 1238 } 1239 } 1240 1241 static void vhost_vdpa_cleanup(struct vhost_vdpa *v) 1242 { 1243 struct vhost_vdpa_as *as; 1244 u32 asid; 1245 1246 for (asid = 0; asid < v->vdpa->nas; asid++) { 1247 as = asid_to_as(v, asid); 1248 if (as) 1249 vhost_vdpa_remove_as(v, asid); 1250 } 1251 1252 vhost_vdpa_free_domain(v); 1253 vhost_dev_cleanup(&v->vdev); 1254 kfree(v->vdev.vqs); 1255 } 1256 1257 static int vhost_vdpa_open(struct inode *inode, struct file *filep) 1258 { 1259 struct vhost_vdpa *v; 1260 struct vhost_dev *dev; 1261 struct vhost_virtqueue **vqs; 1262 int r, opened; 1263 u32 i, nvqs; 1264 1265 v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); 1266 1267 opened = atomic_cmpxchg(&v->opened, 0, 1); 1268 if (opened) 1269 return -EBUSY; 1270 1271 nvqs = v->nvqs; 1272 r = vhost_vdpa_reset(v); 1273 if (r) 1274 goto err; 1275 1276 vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); 1277 if (!vqs) { 1278 r = -ENOMEM; 1279 goto err; 1280 } 1281 1282 dev = &v->vdev; 1283 for (i = 0; i < nvqs; i++) { 1284 vqs[i] = &v->vqs[i]; 1285 vqs[i]->handle_kick = handle_vq_kick; 1286 } 1287 vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, 1288 vhost_vdpa_process_iotlb_msg); 1289 1290 r = vhost_vdpa_alloc_domain(v); 1291 if (r) 1292 goto err_alloc_domain; 1293 1294 vhost_vdpa_set_iova_range(v); 1295 1296 filep->private_data = v; 1297 1298 return 0; 1299 1300 err_alloc_domain: 1301 vhost_vdpa_cleanup(v); 1302 err: 1303 atomic_dec(&v->opened); 1304 return r; 1305 } 1306 1307 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) 1308 { 1309 u32 i; 1310 1311 for (i = 0; i < v->nvqs; i++) 1312 vhost_vdpa_unsetup_vq_irq(v, i); 1313 } 1314 1315 static int vhost_vdpa_release(struct inode *inode, struct file *filep) 1316 { 1317 struct vhost_vdpa *v = filep->private_data; 1318 struct vhost_dev *d = &v->vdev; 1319 1320 mutex_lock(&d->mutex); 1321 filep->private_data = NULL; 1322 vhost_vdpa_clean_irq(v); 1323 vhost_vdpa_reset(v); 1324 vhost_dev_stop(&v->vdev); 1325 vhost_vdpa_unbind_mm(v); 1326 vhost_vdpa_config_put(v); 1327 vhost_vdpa_cleanup(v); 1328 mutex_unlock(&d->mutex); 1329 1330 atomic_dec(&v->opened); 1331 complete(&v->completion); 1332 1333 return 0; 1334 } 1335 1336 #ifdef CONFIG_MMU 1337 static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) 1338 { 1339 struct vhost_vdpa *v = vmf->vma->vm_file->private_data; 1340 struct vdpa_device *vdpa = v->vdpa; 1341 const struct vdpa_config_ops *ops = vdpa->config; 1342 struct vdpa_notification_area notify; 1343 struct vm_area_struct *vma = vmf->vma; 1344 u16 index = vma->vm_pgoff; 1345 1346 notify = ops->get_vq_notification(vdpa, index); 1347 1348 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1349 if (remap_pfn_range(vma, vmf->address & PAGE_MASK, 1350 PFN_DOWN(notify.addr), PAGE_SIZE, 1351 vma->vm_page_prot)) 1352 return VM_FAULT_SIGBUS; 1353 1354 return VM_FAULT_NOPAGE; 1355 } 1356 1357 static const struct vm_operations_struct vhost_vdpa_vm_ops = { 1358 .fault = vhost_vdpa_fault, 1359 }; 1360 1361 static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) 1362 { 1363 struct vhost_vdpa *v = vma->vm_file->private_data; 1364 struct vdpa_device *vdpa = v->vdpa; 1365 const struct vdpa_config_ops *ops = vdpa->config; 1366 struct vdpa_notification_area notify; 1367 unsigned long index = vma->vm_pgoff; 1368 1369 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1370 return -EINVAL; 1371 if ((vma->vm_flags & VM_SHARED) == 0) 1372 return -EINVAL; 1373 if (vma->vm_flags & VM_READ) 1374 return -EINVAL; 1375 if (index > 65535) 1376 return -EINVAL; 1377 if (!ops->get_vq_notification) 1378 return -ENOTSUPP; 1379 1380 /* To be safe and easily modelled by userspace, We only 1381 * support the doorbell which sits on the page boundary and 1382 * does not share the page with other registers. 1383 */ 1384 notify = ops->get_vq_notification(vdpa, index); 1385 if (notify.addr & (PAGE_SIZE - 1)) 1386 return -EINVAL; 1387 if (vma->vm_end - vma->vm_start != notify.size) 1388 return -ENOTSUPP; 1389 1390 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); 1391 vma->vm_ops = &vhost_vdpa_vm_ops; 1392 return 0; 1393 } 1394 #endif /* CONFIG_MMU */ 1395 1396 static const struct file_operations vhost_vdpa_fops = { 1397 .owner = THIS_MODULE, 1398 .open = vhost_vdpa_open, 1399 .release = vhost_vdpa_release, 1400 .write_iter = vhost_vdpa_chr_write_iter, 1401 .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, 1402 #ifdef CONFIG_MMU 1403 .mmap = vhost_vdpa_mmap, 1404 #endif /* CONFIG_MMU */ 1405 .compat_ioctl = compat_ptr_ioctl, 1406 }; 1407 1408 static void vhost_vdpa_release_dev(struct device *device) 1409 { 1410 struct vhost_vdpa *v = 1411 container_of(device, struct vhost_vdpa, dev); 1412 1413 ida_simple_remove(&vhost_vdpa_ida, v->minor); 1414 kfree(v->vqs); 1415 kfree(v); 1416 } 1417 1418 static int vhost_vdpa_probe(struct vdpa_device *vdpa) 1419 { 1420 const struct vdpa_config_ops *ops = vdpa->config; 1421 struct vhost_vdpa *v; 1422 int minor; 1423 int i, r; 1424 1425 /* We can't support platform IOMMU device with more than 1 1426 * group or as 1427 */ 1428 if (!ops->set_map && !ops->dma_map && 1429 (vdpa->ngroups > 1 || vdpa->nas > 1)) 1430 return -EOPNOTSUPP; 1431 1432 v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 1433 if (!v) 1434 return -ENOMEM; 1435 1436 minor = ida_simple_get(&vhost_vdpa_ida, 0, 1437 VHOST_VDPA_DEV_MAX, GFP_KERNEL); 1438 if (minor < 0) { 1439 kfree(v); 1440 return minor; 1441 } 1442 1443 atomic_set(&v->opened, 0); 1444 v->minor = minor; 1445 v->vdpa = vdpa; 1446 v->nvqs = vdpa->nvqs; 1447 v->virtio_id = ops->get_device_id(vdpa); 1448 1449 device_initialize(&v->dev); 1450 v->dev.release = vhost_vdpa_release_dev; 1451 v->dev.parent = &vdpa->dev; 1452 v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); 1453 v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), 1454 GFP_KERNEL); 1455 if (!v->vqs) { 1456 r = -ENOMEM; 1457 goto err; 1458 } 1459 1460 r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); 1461 if (r) 1462 goto err; 1463 1464 cdev_init(&v->cdev, &vhost_vdpa_fops); 1465 v->cdev.owner = THIS_MODULE; 1466 1467 r = cdev_device_add(&v->cdev, &v->dev); 1468 if (r) 1469 goto err; 1470 1471 init_completion(&v->completion); 1472 vdpa_set_drvdata(vdpa, v); 1473 1474 for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++) 1475 INIT_HLIST_HEAD(&v->as[i]); 1476 1477 return 0; 1478 1479 err: 1480 put_device(&v->dev); 1481 ida_simple_remove(&vhost_vdpa_ida, v->minor); 1482 return r; 1483 } 1484 1485 static void vhost_vdpa_remove(struct vdpa_device *vdpa) 1486 { 1487 struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); 1488 int opened; 1489 1490 cdev_device_del(&v->cdev, &v->dev); 1491 1492 do { 1493 opened = atomic_cmpxchg(&v->opened, 0, 1); 1494 if (!opened) 1495 break; 1496 wait_for_completion(&v->completion); 1497 } while (1); 1498 1499 put_device(&v->dev); 1500 } 1501 1502 static struct vdpa_driver vhost_vdpa_driver = { 1503 .driver = { 1504 .name = "vhost_vdpa", 1505 }, 1506 .probe = vhost_vdpa_probe, 1507 .remove = vhost_vdpa_remove, 1508 }; 1509 1510 static int __init vhost_vdpa_init(void) 1511 { 1512 int r; 1513 1514 r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, 1515 "vhost-vdpa"); 1516 if (r) 1517 goto err_alloc_chrdev; 1518 1519 r = vdpa_register_driver(&vhost_vdpa_driver); 1520 if (r) 1521 goto err_vdpa_register_driver; 1522 1523 return 0; 1524 1525 err_vdpa_register_driver: 1526 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1527 err_alloc_chrdev: 1528 return r; 1529 } 1530 module_init(vhost_vdpa_init); 1531 1532 static void __exit vhost_vdpa_exit(void) 1533 { 1534 vdpa_unregister_driver(&vhost_vdpa_driver); 1535 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1536 } 1537 module_exit(vhost_vdpa_exit); 1538 1539 MODULE_VERSION("0.0.1"); 1540 MODULE_LICENSE("GPL v2"); 1541 MODULE_AUTHOR("Intel Corporation"); 1542 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); 1543