1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018-2020 Intel Corporation. 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Author: Tiwei Bie <tiwei.bie@intel.com> 7 * Jason Wang <jasowang@redhat.com> 8 * 9 * Thanks Michael S. Tsirkin for the valuable comments and 10 * suggestions. And thanks to Cunming Liang and Zhihong Wang for all 11 * their supports. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/cdev.h> 17 #include <linux/device.h> 18 #include <linux/mm.h> 19 #include <linux/slab.h> 20 #include <linux/iommu.h> 21 #include <linux/uuid.h> 22 #include <linux/vdpa.h> 23 #include <linux/nospec.h> 24 #include <linux/vhost.h> 25 26 #include "vhost.h" 27 28 enum { 29 VHOST_VDPA_BACKEND_FEATURES = 30 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | 31 (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), 32 }; 33 34 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) 35 36 struct vhost_vdpa { 37 struct vhost_dev vdev; 38 struct iommu_domain *domain; 39 struct vhost_virtqueue *vqs; 40 struct completion completion; 41 struct vdpa_device *vdpa; 42 struct device dev; 43 struct cdev cdev; 44 atomic_t opened; 45 u32 nvqs; 46 int virtio_id; 47 int minor; 48 struct eventfd_ctx *config_ctx; 49 int in_batch; 50 struct vdpa_iova_range range; 51 }; 52 53 static DEFINE_IDA(vhost_vdpa_ida); 54 55 static dev_t vhost_vdpa_major; 56 57 static void handle_vq_kick(struct vhost_work *work) 58 { 59 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 60 poll.work); 61 struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); 62 const struct vdpa_config_ops *ops = v->vdpa->config; 63 64 ops->kick_vq(v->vdpa, vq - v->vqs); 65 } 66 67 static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) 68 { 69 struct vhost_virtqueue *vq = private; 70 struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; 71 72 if (call_ctx) 73 eventfd_signal(call_ctx, 1); 74 75 return IRQ_HANDLED; 76 } 77 78 static irqreturn_t vhost_vdpa_config_cb(void *private) 79 { 80 struct vhost_vdpa *v = private; 81 struct eventfd_ctx *config_ctx = v->config_ctx; 82 83 if (config_ctx) 84 eventfd_signal(config_ctx, 1); 85 86 return IRQ_HANDLED; 87 } 88 89 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) 90 { 91 struct vhost_virtqueue *vq = &v->vqs[qid]; 92 const struct vdpa_config_ops *ops = v->vdpa->config; 93 struct vdpa_device *vdpa = v->vdpa; 94 int ret, irq; 95 96 if (!ops->get_vq_irq) 97 return; 98 99 irq = ops->get_vq_irq(vdpa, qid); 100 if (irq < 0) 101 return; 102 103 irq_bypass_unregister_producer(&vq->call_ctx.producer); 104 if (!vq->call_ctx.ctx) 105 return; 106 107 vq->call_ctx.producer.token = vq->call_ctx.ctx; 108 vq->call_ctx.producer.irq = irq; 109 ret = irq_bypass_register_producer(&vq->call_ctx.producer); 110 if (unlikely(ret)) 111 dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n", 112 qid, vq->call_ctx.producer.token, ret); 113 } 114 115 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) 116 { 117 struct vhost_virtqueue *vq = &v->vqs[qid]; 118 119 irq_bypass_unregister_producer(&vq->call_ctx.producer); 120 } 121 122 static int vhost_vdpa_reset(struct vhost_vdpa *v) 123 { 124 struct vdpa_device *vdpa = v->vdpa; 125 126 v->in_batch = 0; 127 128 return vdpa_reset(vdpa); 129 } 130 131 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) 132 { 133 struct vdpa_device *vdpa = v->vdpa; 134 const struct vdpa_config_ops *ops = vdpa->config; 135 u32 device_id; 136 137 device_id = ops->get_device_id(vdpa); 138 139 if (copy_to_user(argp, &device_id, sizeof(device_id))) 140 return -EFAULT; 141 142 return 0; 143 } 144 145 static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) 146 { 147 struct vdpa_device *vdpa = v->vdpa; 148 const struct vdpa_config_ops *ops = vdpa->config; 149 u8 status; 150 151 status = ops->get_status(vdpa); 152 153 if (copy_to_user(statusp, &status, sizeof(status))) 154 return -EFAULT; 155 156 return 0; 157 } 158 159 static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) 160 { 161 struct vdpa_device *vdpa = v->vdpa; 162 const struct vdpa_config_ops *ops = vdpa->config; 163 u8 status, status_old; 164 u32 nvqs = v->nvqs; 165 int ret; 166 u16 i; 167 168 if (copy_from_user(&status, statusp, sizeof(status))) 169 return -EFAULT; 170 171 status_old = ops->get_status(vdpa); 172 173 /* 174 * Userspace shouldn't remove status bits unless reset the 175 * status to 0. 176 */ 177 if (status != 0 && (status_old & ~status) != 0) 178 return -EINVAL; 179 180 if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) 181 for (i = 0; i < nvqs; i++) 182 vhost_vdpa_unsetup_vq_irq(v, i); 183 184 if (status == 0) { 185 ret = vdpa_reset(vdpa); 186 if (ret) 187 return ret; 188 } else 189 vdpa_set_status(vdpa, status); 190 191 if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) 192 for (i = 0; i < nvqs; i++) 193 vhost_vdpa_setup_vq_irq(v, i); 194 195 return 0; 196 } 197 198 static int vhost_vdpa_config_validate(struct vhost_vdpa *v, 199 struct vhost_vdpa_config *c) 200 { 201 struct vdpa_device *vdpa = v->vdpa; 202 size_t size = vdpa->config->get_config_size(vdpa); 203 204 if (c->len == 0 || c->off > size) 205 return -EINVAL; 206 207 if (c->len > size - c->off) 208 return -E2BIG; 209 210 return 0; 211 } 212 213 static long vhost_vdpa_get_config(struct vhost_vdpa *v, 214 struct vhost_vdpa_config __user *c) 215 { 216 struct vdpa_device *vdpa = v->vdpa; 217 struct vhost_vdpa_config config; 218 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 219 u8 *buf; 220 221 if (copy_from_user(&config, c, size)) 222 return -EFAULT; 223 if (vhost_vdpa_config_validate(v, &config)) 224 return -EINVAL; 225 buf = kvzalloc(config.len, GFP_KERNEL); 226 if (!buf) 227 return -ENOMEM; 228 229 vdpa_get_config(vdpa, config.off, buf, config.len); 230 231 if (copy_to_user(c->buf, buf, config.len)) { 232 kvfree(buf); 233 return -EFAULT; 234 } 235 236 kvfree(buf); 237 return 0; 238 } 239 240 static long vhost_vdpa_set_config(struct vhost_vdpa *v, 241 struct vhost_vdpa_config __user *c) 242 { 243 struct vdpa_device *vdpa = v->vdpa; 244 struct vhost_vdpa_config config; 245 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 246 u8 *buf; 247 248 if (copy_from_user(&config, c, size)) 249 return -EFAULT; 250 if (vhost_vdpa_config_validate(v, &config)) 251 return -EINVAL; 252 253 buf = vmemdup_user(c->buf, config.len); 254 if (IS_ERR(buf)) 255 return PTR_ERR(buf); 256 257 vdpa_set_config(vdpa, config.off, buf, config.len); 258 259 kvfree(buf); 260 return 0; 261 } 262 263 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) 264 { 265 struct vdpa_device *vdpa = v->vdpa; 266 const struct vdpa_config_ops *ops = vdpa->config; 267 u64 features; 268 269 features = ops->get_device_features(vdpa); 270 271 if (copy_to_user(featurep, &features, sizeof(features))) 272 return -EFAULT; 273 274 return 0; 275 } 276 277 static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) 278 { 279 struct vdpa_device *vdpa = v->vdpa; 280 const struct vdpa_config_ops *ops = vdpa->config; 281 u64 features; 282 283 /* 284 * It's not allowed to change the features after they have 285 * been negotiated. 286 */ 287 if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) 288 return -EBUSY; 289 290 if (copy_from_user(&features, featurep, sizeof(features))) 291 return -EFAULT; 292 293 if (vdpa_set_features(vdpa, features)) 294 return -EINVAL; 295 296 return 0; 297 } 298 299 static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) 300 { 301 struct vdpa_device *vdpa = v->vdpa; 302 const struct vdpa_config_ops *ops = vdpa->config; 303 u16 num; 304 305 num = ops->get_vq_num_max(vdpa); 306 307 if (copy_to_user(argp, &num, sizeof(num))) 308 return -EFAULT; 309 310 return 0; 311 } 312 313 static void vhost_vdpa_config_put(struct vhost_vdpa *v) 314 { 315 if (v->config_ctx) { 316 eventfd_ctx_put(v->config_ctx); 317 v->config_ctx = NULL; 318 } 319 } 320 321 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) 322 { 323 struct vdpa_callback cb; 324 int fd; 325 struct eventfd_ctx *ctx; 326 327 cb.callback = vhost_vdpa_config_cb; 328 cb.private = v; 329 if (copy_from_user(&fd, argp, sizeof(fd))) 330 return -EFAULT; 331 332 ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); 333 swap(ctx, v->config_ctx); 334 335 if (!IS_ERR_OR_NULL(ctx)) 336 eventfd_ctx_put(ctx); 337 338 if (IS_ERR(v->config_ctx)) { 339 long ret = PTR_ERR(v->config_ctx); 340 341 v->config_ctx = NULL; 342 return ret; 343 } 344 345 v->vdpa->config->set_config_cb(v->vdpa, &cb); 346 347 return 0; 348 } 349 350 static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) 351 { 352 struct vhost_vdpa_iova_range range = { 353 .first = v->range.first, 354 .last = v->range.last, 355 }; 356 357 if (copy_to_user(argp, &range, sizeof(range))) 358 return -EFAULT; 359 return 0; 360 } 361 362 static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) 363 { 364 struct vdpa_device *vdpa = v->vdpa; 365 const struct vdpa_config_ops *ops = vdpa->config; 366 u32 size; 367 368 size = ops->get_config_size(vdpa); 369 370 if (copy_to_user(argp, &size, sizeof(size))) 371 return -EFAULT; 372 373 return 0; 374 } 375 376 static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) 377 { 378 struct vdpa_device *vdpa = v->vdpa; 379 380 if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs))) 381 return -EFAULT; 382 383 return 0; 384 } 385 386 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, 387 void __user *argp) 388 { 389 struct vdpa_device *vdpa = v->vdpa; 390 const struct vdpa_config_ops *ops = vdpa->config; 391 struct vdpa_vq_state vq_state; 392 struct vdpa_callback cb; 393 struct vhost_virtqueue *vq; 394 struct vhost_vring_state s; 395 u32 idx; 396 long r; 397 398 r = get_user(idx, (u32 __user *)argp); 399 if (r < 0) 400 return r; 401 402 if (idx >= v->nvqs) 403 return -ENOBUFS; 404 405 idx = array_index_nospec(idx, v->nvqs); 406 vq = &v->vqs[idx]; 407 408 switch (cmd) { 409 case VHOST_VDPA_SET_VRING_ENABLE: 410 if (copy_from_user(&s, argp, sizeof(s))) 411 return -EFAULT; 412 ops->set_vq_ready(vdpa, idx, s.num); 413 return 0; 414 case VHOST_GET_VRING_BASE: 415 r = ops->get_vq_state(v->vdpa, idx, &vq_state); 416 if (r) 417 return r; 418 419 vq->last_avail_idx = vq_state.split.avail_index; 420 break; 421 } 422 423 r = vhost_vring_ioctl(&v->vdev, cmd, argp); 424 if (r) 425 return r; 426 427 switch (cmd) { 428 case VHOST_SET_VRING_ADDR: 429 if (ops->set_vq_address(vdpa, idx, 430 (u64)(uintptr_t)vq->desc, 431 (u64)(uintptr_t)vq->avail, 432 (u64)(uintptr_t)vq->used)) 433 r = -EINVAL; 434 break; 435 436 case VHOST_SET_VRING_BASE: 437 vq_state.split.avail_index = vq->last_avail_idx; 438 if (ops->set_vq_state(vdpa, idx, &vq_state)) 439 r = -EINVAL; 440 break; 441 442 case VHOST_SET_VRING_CALL: 443 if (vq->call_ctx.ctx) { 444 cb.callback = vhost_vdpa_virtqueue_cb; 445 cb.private = vq; 446 } else { 447 cb.callback = NULL; 448 cb.private = NULL; 449 } 450 ops->set_vq_cb(vdpa, idx, &cb); 451 vhost_vdpa_setup_vq_irq(v, idx); 452 break; 453 454 case VHOST_SET_VRING_NUM: 455 ops->set_vq_num(vdpa, idx, vq->num); 456 break; 457 } 458 459 return r; 460 } 461 462 static long vhost_vdpa_unlocked_ioctl(struct file *filep, 463 unsigned int cmd, unsigned long arg) 464 { 465 struct vhost_vdpa *v = filep->private_data; 466 struct vhost_dev *d = &v->vdev; 467 void __user *argp = (void __user *)arg; 468 u64 __user *featurep = argp; 469 u64 features; 470 long r = 0; 471 472 if (cmd == VHOST_SET_BACKEND_FEATURES) { 473 if (copy_from_user(&features, featurep, sizeof(features))) 474 return -EFAULT; 475 if (features & ~VHOST_VDPA_BACKEND_FEATURES) 476 return -EOPNOTSUPP; 477 vhost_set_backend_features(&v->vdev, features); 478 return 0; 479 } 480 481 mutex_lock(&d->mutex); 482 483 switch (cmd) { 484 case VHOST_VDPA_GET_DEVICE_ID: 485 r = vhost_vdpa_get_device_id(v, argp); 486 break; 487 case VHOST_VDPA_GET_STATUS: 488 r = vhost_vdpa_get_status(v, argp); 489 break; 490 case VHOST_VDPA_SET_STATUS: 491 r = vhost_vdpa_set_status(v, argp); 492 break; 493 case VHOST_VDPA_GET_CONFIG: 494 r = vhost_vdpa_get_config(v, argp); 495 break; 496 case VHOST_VDPA_SET_CONFIG: 497 r = vhost_vdpa_set_config(v, argp); 498 break; 499 case VHOST_GET_FEATURES: 500 r = vhost_vdpa_get_features(v, argp); 501 break; 502 case VHOST_SET_FEATURES: 503 r = vhost_vdpa_set_features(v, argp); 504 break; 505 case VHOST_VDPA_GET_VRING_NUM: 506 r = vhost_vdpa_get_vring_num(v, argp); 507 break; 508 case VHOST_SET_LOG_BASE: 509 case VHOST_SET_LOG_FD: 510 r = -ENOIOCTLCMD; 511 break; 512 case VHOST_VDPA_SET_CONFIG_CALL: 513 r = vhost_vdpa_set_config_call(v, argp); 514 break; 515 case VHOST_GET_BACKEND_FEATURES: 516 features = VHOST_VDPA_BACKEND_FEATURES; 517 if (copy_to_user(featurep, &features, sizeof(features))) 518 r = -EFAULT; 519 break; 520 case VHOST_VDPA_GET_IOVA_RANGE: 521 r = vhost_vdpa_get_iova_range(v, argp); 522 break; 523 case VHOST_VDPA_GET_CONFIG_SIZE: 524 r = vhost_vdpa_get_config_size(v, argp); 525 break; 526 case VHOST_VDPA_GET_VQS_COUNT: 527 r = vhost_vdpa_get_vqs_count(v, argp); 528 break; 529 default: 530 r = vhost_dev_ioctl(&v->vdev, cmd, argp); 531 if (r == -ENOIOCTLCMD) 532 r = vhost_vdpa_vring_ioctl(v, cmd, argp); 533 break; 534 } 535 536 mutex_unlock(&d->mutex); 537 return r; 538 } 539 540 static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) 541 { 542 struct vhost_dev *dev = &v->vdev; 543 struct vhost_iotlb *iotlb = dev->iotlb; 544 struct vhost_iotlb_map *map; 545 struct page *page; 546 unsigned long pfn, pinned; 547 548 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 549 pinned = PFN_DOWN(map->size); 550 for (pfn = PFN_DOWN(map->addr); 551 pinned > 0; pfn++, pinned--) { 552 page = pfn_to_page(pfn); 553 if (map->perm & VHOST_ACCESS_WO) 554 set_page_dirty_lock(page); 555 unpin_user_page(page); 556 } 557 atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); 558 vhost_iotlb_map_free(iotlb, map); 559 } 560 } 561 562 static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last) 563 { 564 struct vhost_dev *dev = &v->vdev; 565 struct vhost_iotlb *iotlb = dev->iotlb; 566 struct vhost_iotlb_map *map; 567 struct vdpa_map_file *map_file; 568 569 while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { 570 map_file = (struct vdpa_map_file *)map->opaque; 571 fput(map_file->file); 572 kfree(map_file); 573 vhost_iotlb_map_free(iotlb, map); 574 } 575 } 576 577 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) 578 { 579 struct vdpa_device *vdpa = v->vdpa; 580 581 if (vdpa->use_va) 582 return vhost_vdpa_va_unmap(v, start, last); 583 584 return vhost_vdpa_pa_unmap(v, start, last); 585 } 586 587 static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) 588 { 589 struct vhost_dev *dev = &v->vdev; 590 591 vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); 592 kfree(dev->iotlb); 593 dev->iotlb = NULL; 594 } 595 596 static int perm_to_iommu_flags(u32 perm) 597 { 598 int flags = 0; 599 600 switch (perm) { 601 case VHOST_ACCESS_WO: 602 flags |= IOMMU_WRITE; 603 break; 604 case VHOST_ACCESS_RO: 605 flags |= IOMMU_READ; 606 break; 607 case VHOST_ACCESS_RW: 608 flags |= (IOMMU_WRITE | IOMMU_READ); 609 break; 610 default: 611 WARN(1, "invalidate vhost IOTLB permission\n"); 612 break; 613 } 614 615 return flags | IOMMU_CACHE; 616 } 617 618 static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, 619 u64 size, u64 pa, u32 perm, void *opaque) 620 { 621 struct vhost_dev *dev = &v->vdev; 622 struct vdpa_device *vdpa = v->vdpa; 623 const struct vdpa_config_ops *ops = vdpa->config; 624 int r = 0; 625 626 r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1, 627 pa, perm, opaque); 628 if (r) 629 return r; 630 631 if (ops->dma_map) { 632 r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); 633 } else if (ops->set_map) { 634 if (!v->in_batch) 635 r = ops->set_map(vdpa, dev->iotlb); 636 } else { 637 r = iommu_map(v->domain, iova, pa, size, 638 perm_to_iommu_flags(perm)); 639 } 640 if (r) { 641 vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); 642 return r; 643 } 644 645 if (!vdpa->use_va) 646 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); 647 648 return 0; 649 } 650 651 static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) 652 { 653 struct vhost_dev *dev = &v->vdev; 654 struct vdpa_device *vdpa = v->vdpa; 655 const struct vdpa_config_ops *ops = vdpa->config; 656 657 vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); 658 659 if (ops->dma_map) { 660 ops->dma_unmap(vdpa, iova, size); 661 } else if (ops->set_map) { 662 if (!v->in_batch) 663 ops->set_map(vdpa, dev->iotlb); 664 } else { 665 iommu_unmap(v->domain, iova, size); 666 } 667 } 668 669 static int vhost_vdpa_va_map(struct vhost_vdpa *v, 670 u64 iova, u64 size, u64 uaddr, u32 perm) 671 { 672 struct vhost_dev *dev = &v->vdev; 673 u64 offset, map_size, map_iova = iova; 674 struct vdpa_map_file *map_file; 675 struct vm_area_struct *vma; 676 int ret = 0; 677 678 mmap_read_lock(dev->mm); 679 680 while (size) { 681 vma = find_vma(dev->mm, uaddr); 682 if (!vma) { 683 ret = -EINVAL; 684 break; 685 } 686 map_size = min(size, vma->vm_end - uaddr); 687 if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && 688 !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) 689 goto next; 690 691 map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); 692 if (!map_file) { 693 ret = -ENOMEM; 694 break; 695 } 696 offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; 697 map_file->offset = offset; 698 map_file->file = get_file(vma->vm_file); 699 ret = vhost_vdpa_map(v, map_iova, map_size, uaddr, 700 perm, map_file); 701 if (ret) { 702 fput(map_file->file); 703 kfree(map_file); 704 break; 705 } 706 next: 707 size -= map_size; 708 uaddr += map_size; 709 map_iova += map_size; 710 } 711 if (ret) 712 vhost_vdpa_unmap(v, iova, map_iova - iova); 713 714 mmap_read_unlock(dev->mm); 715 716 return ret; 717 } 718 719 static int vhost_vdpa_pa_map(struct vhost_vdpa *v, 720 u64 iova, u64 size, u64 uaddr, u32 perm) 721 { 722 struct vhost_dev *dev = &v->vdev; 723 struct page **page_list; 724 unsigned long list_size = PAGE_SIZE / sizeof(struct page *); 725 unsigned int gup_flags = FOLL_LONGTERM; 726 unsigned long npages, cur_base, map_pfn, last_pfn = 0; 727 unsigned long lock_limit, sz2pin, nchunks, i; 728 u64 start = iova; 729 long pinned; 730 int ret = 0; 731 732 /* Limit the use of memory for bookkeeping */ 733 page_list = (struct page **) __get_free_page(GFP_KERNEL); 734 if (!page_list) 735 return -ENOMEM; 736 737 if (perm & VHOST_ACCESS_WO) 738 gup_flags |= FOLL_WRITE; 739 740 npages = PFN_UP(size + (iova & ~PAGE_MASK)); 741 if (!npages) { 742 ret = -EINVAL; 743 goto free; 744 } 745 746 mmap_read_lock(dev->mm); 747 748 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); 749 if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { 750 ret = -ENOMEM; 751 goto unlock; 752 } 753 754 cur_base = uaddr & PAGE_MASK; 755 iova &= PAGE_MASK; 756 nchunks = 0; 757 758 while (npages) { 759 sz2pin = min_t(unsigned long, npages, list_size); 760 pinned = pin_user_pages(cur_base, sz2pin, 761 gup_flags, page_list, NULL); 762 if (sz2pin != pinned) { 763 if (pinned < 0) { 764 ret = pinned; 765 } else { 766 unpin_user_pages(page_list, pinned); 767 ret = -ENOMEM; 768 } 769 goto out; 770 } 771 nchunks++; 772 773 if (!last_pfn) 774 map_pfn = page_to_pfn(page_list[0]); 775 776 for (i = 0; i < pinned; i++) { 777 unsigned long this_pfn = page_to_pfn(page_list[i]); 778 u64 csize; 779 780 if (last_pfn && (this_pfn != last_pfn + 1)) { 781 /* Pin a contiguous chunk of memory */ 782 csize = PFN_PHYS(last_pfn - map_pfn + 1); 783 ret = vhost_vdpa_map(v, iova, csize, 784 PFN_PHYS(map_pfn), 785 perm, NULL); 786 if (ret) { 787 /* 788 * Unpin the pages that are left unmapped 789 * from this point on in the current 790 * page_list. The remaining outstanding 791 * ones which may stride across several 792 * chunks will be covered in the common 793 * error path subsequently. 794 */ 795 unpin_user_pages(&page_list[i], 796 pinned - i); 797 goto out; 798 } 799 800 map_pfn = this_pfn; 801 iova += csize; 802 nchunks = 0; 803 } 804 805 last_pfn = this_pfn; 806 } 807 808 cur_base += PFN_PHYS(pinned); 809 npages -= pinned; 810 } 811 812 /* Pin the rest chunk */ 813 ret = vhost_vdpa_map(v, iova, PFN_PHYS(last_pfn - map_pfn + 1), 814 PFN_PHYS(map_pfn), perm, NULL); 815 out: 816 if (ret) { 817 if (nchunks) { 818 unsigned long pfn; 819 820 /* 821 * Unpin the outstanding pages which are yet to be 822 * mapped but haven't due to vdpa_map() or 823 * pin_user_pages() failure. 824 * 825 * Mapped pages are accounted in vdpa_map(), hence 826 * the corresponding unpinning will be handled by 827 * vdpa_unmap(). 828 */ 829 WARN_ON(!last_pfn); 830 for (pfn = map_pfn; pfn <= last_pfn; pfn++) 831 unpin_user_page(pfn_to_page(pfn)); 832 } 833 vhost_vdpa_unmap(v, start, size); 834 } 835 unlock: 836 mmap_read_unlock(dev->mm); 837 free: 838 free_page((unsigned long)page_list); 839 return ret; 840 841 } 842 843 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, 844 struct vhost_iotlb_msg *msg) 845 { 846 struct vhost_dev *dev = &v->vdev; 847 struct vdpa_device *vdpa = v->vdpa; 848 struct vhost_iotlb *iotlb = dev->iotlb; 849 850 if (msg->iova < v->range.first || !msg->size || 851 msg->iova > U64_MAX - msg->size + 1 || 852 msg->iova + msg->size - 1 > v->range.last) 853 return -EINVAL; 854 855 if (vhost_iotlb_itree_first(iotlb, msg->iova, 856 msg->iova + msg->size - 1)) 857 return -EEXIST; 858 859 if (vdpa->use_va) 860 return vhost_vdpa_va_map(v, msg->iova, msg->size, 861 msg->uaddr, msg->perm); 862 863 return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr, 864 msg->perm); 865 } 866 867 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, 868 struct vhost_iotlb_msg *msg) 869 { 870 struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); 871 struct vdpa_device *vdpa = v->vdpa; 872 const struct vdpa_config_ops *ops = vdpa->config; 873 int r = 0; 874 875 mutex_lock(&dev->mutex); 876 877 r = vhost_dev_check_owner(dev); 878 if (r) 879 goto unlock; 880 881 switch (msg->type) { 882 case VHOST_IOTLB_UPDATE: 883 r = vhost_vdpa_process_iotlb_update(v, msg); 884 break; 885 case VHOST_IOTLB_INVALIDATE: 886 vhost_vdpa_unmap(v, msg->iova, msg->size); 887 break; 888 case VHOST_IOTLB_BATCH_BEGIN: 889 v->in_batch = true; 890 break; 891 case VHOST_IOTLB_BATCH_END: 892 if (v->in_batch && ops->set_map) 893 ops->set_map(vdpa, dev->iotlb); 894 v->in_batch = false; 895 break; 896 default: 897 r = -EINVAL; 898 break; 899 } 900 unlock: 901 mutex_unlock(&dev->mutex); 902 903 return r; 904 } 905 906 static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, 907 struct iov_iter *from) 908 { 909 struct file *file = iocb->ki_filp; 910 struct vhost_vdpa *v = file->private_data; 911 struct vhost_dev *dev = &v->vdev; 912 913 return vhost_chr_write_iter(dev, from); 914 } 915 916 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) 917 { 918 struct vdpa_device *vdpa = v->vdpa; 919 const struct vdpa_config_ops *ops = vdpa->config; 920 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 921 struct bus_type *bus; 922 int ret; 923 924 /* Device want to do DMA by itself */ 925 if (ops->set_map || ops->dma_map) 926 return 0; 927 928 bus = dma_dev->bus; 929 if (!bus) 930 return -EFAULT; 931 932 if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) 933 return -ENOTSUPP; 934 935 v->domain = iommu_domain_alloc(bus); 936 if (!v->domain) 937 return -EIO; 938 939 ret = iommu_attach_device(v->domain, dma_dev); 940 if (ret) 941 goto err_attach; 942 943 return 0; 944 945 err_attach: 946 iommu_domain_free(v->domain); 947 return ret; 948 } 949 950 static void vhost_vdpa_free_domain(struct vhost_vdpa *v) 951 { 952 struct vdpa_device *vdpa = v->vdpa; 953 struct device *dma_dev = vdpa_get_dma_dev(vdpa); 954 955 if (v->domain) { 956 iommu_detach_device(v->domain, dma_dev); 957 iommu_domain_free(v->domain); 958 } 959 960 v->domain = NULL; 961 } 962 963 static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) 964 { 965 struct vdpa_iova_range *range = &v->range; 966 struct vdpa_device *vdpa = v->vdpa; 967 const struct vdpa_config_ops *ops = vdpa->config; 968 969 if (ops->get_iova_range) { 970 *range = ops->get_iova_range(vdpa); 971 } else if (v->domain && v->domain->geometry.force_aperture) { 972 range->first = v->domain->geometry.aperture_start; 973 range->last = v->domain->geometry.aperture_end; 974 } else { 975 range->first = 0; 976 range->last = ULLONG_MAX; 977 } 978 } 979 980 static int vhost_vdpa_open(struct inode *inode, struct file *filep) 981 { 982 struct vhost_vdpa *v; 983 struct vhost_dev *dev; 984 struct vhost_virtqueue **vqs; 985 int r, opened; 986 u32 i, nvqs; 987 988 v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); 989 990 opened = atomic_cmpxchg(&v->opened, 0, 1); 991 if (opened) 992 return -EBUSY; 993 994 nvqs = v->nvqs; 995 r = vhost_vdpa_reset(v); 996 if (r) 997 goto err; 998 999 vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); 1000 if (!vqs) { 1001 r = -ENOMEM; 1002 goto err; 1003 } 1004 1005 dev = &v->vdev; 1006 for (i = 0; i < nvqs; i++) { 1007 vqs[i] = &v->vqs[i]; 1008 vqs[i]->handle_kick = handle_vq_kick; 1009 } 1010 vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, 1011 vhost_vdpa_process_iotlb_msg); 1012 1013 dev->iotlb = vhost_iotlb_alloc(0, 0); 1014 if (!dev->iotlb) { 1015 r = -ENOMEM; 1016 goto err_init_iotlb; 1017 } 1018 1019 r = vhost_vdpa_alloc_domain(v); 1020 if (r) 1021 goto err_init_iotlb; 1022 1023 vhost_vdpa_set_iova_range(v); 1024 1025 filep->private_data = v; 1026 1027 return 0; 1028 1029 err_init_iotlb: 1030 vhost_dev_cleanup(&v->vdev); 1031 kfree(vqs); 1032 err: 1033 atomic_dec(&v->opened); 1034 return r; 1035 } 1036 1037 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) 1038 { 1039 u32 i; 1040 1041 for (i = 0; i < v->nvqs; i++) 1042 vhost_vdpa_unsetup_vq_irq(v, i); 1043 } 1044 1045 static int vhost_vdpa_release(struct inode *inode, struct file *filep) 1046 { 1047 struct vhost_vdpa *v = filep->private_data; 1048 struct vhost_dev *d = &v->vdev; 1049 1050 mutex_lock(&d->mutex); 1051 filep->private_data = NULL; 1052 vhost_vdpa_clean_irq(v); 1053 vhost_vdpa_reset(v); 1054 vhost_dev_stop(&v->vdev); 1055 vhost_vdpa_iotlb_free(v); 1056 vhost_vdpa_free_domain(v); 1057 vhost_vdpa_config_put(v); 1058 vhost_dev_cleanup(&v->vdev); 1059 kfree(v->vdev.vqs); 1060 mutex_unlock(&d->mutex); 1061 1062 atomic_dec(&v->opened); 1063 complete(&v->completion); 1064 1065 return 0; 1066 } 1067 1068 #ifdef CONFIG_MMU 1069 static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) 1070 { 1071 struct vhost_vdpa *v = vmf->vma->vm_file->private_data; 1072 struct vdpa_device *vdpa = v->vdpa; 1073 const struct vdpa_config_ops *ops = vdpa->config; 1074 struct vdpa_notification_area notify; 1075 struct vm_area_struct *vma = vmf->vma; 1076 u16 index = vma->vm_pgoff; 1077 1078 notify = ops->get_vq_notification(vdpa, index); 1079 1080 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1081 if (remap_pfn_range(vma, vmf->address & PAGE_MASK, 1082 PFN_DOWN(notify.addr), PAGE_SIZE, 1083 vma->vm_page_prot)) 1084 return VM_FAULT_SIGBUS; 1085 1086 return VM_FAULT_NOPAGE; 1087 } 1088 1089 static const struct vm_operations_struct vhost_vdpa_vm_ops = { 1090 .fault = vhost_vdpa_fault, 1091 }; 1092 1093 static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) 1094 { 1095 struct vhost_vdpa *v = vma->vm_file->private_data; 1096 struct vdpa_device *vdpa = v->vdpa; 1097 const struct vdpa_config_ops *ops = vdpa->config; 1098 struct vdpa_notification_area notify; 1099 unsigned long index = vma->vm_pgoff; 1100 1101 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1102 return -EINVAL; 1103 if ((vma->vm_flags & VM_SHARED) == 0) 1104 return -EINVAL; 1105 if (vma->vm_flags & VM_READ) 1106 return -EINVAL; 1107 if (index > 65535) 1108 return -EINVAL; 1109 if (!ops->get_vq_notification) 1110 return -ENOTSUPP; 1111 1112 /* To be safe and easily modelled by userspace, We only 1113 * support the doorbell which sits on the page boundary and 1114 * does not share the page with other registers. 1115 */ 1116 notify = ops->get_vq_notification(vdpa, index); 1117 if (notify.addr & (PAGE_SIZE - 1)) 1118 return -EINVAL; 1119 if (vma->vm_end - vma->vm_start != notify.size) 1120 return -ENOTSUPP; 1121 1122 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; 1123 vma->vm_ops = &vhost_vdpa_vm_ops; 1124 return 0; 1125 } 1126 #endif /* CONFIG_MMU */ 1127 1128 static const struct file_operations vhost_vdpa_fops = { 1129 .owner = THIS_MODULE, 1130 .open = vhost_vdpa_open, 1131 .release = vhost_vdpa_release, 1132 .write_iter = vhost_vdpa_chr_write_iter, 1133 .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, 1134 #ifdef CONFIG_MMU 1135 .mmap = vhost_vdpa_mmap, 1136 #endif /* CONFIG_MMU */ 1137 .compat_ioctl = compat_ptr_ioctl, 1138 }; 1139 1140 static void vhost_vdpa_release_dev(struct device *device) 1141 { 1142 struct vhost_vdpa *v = 1143 container_of(device, struct vhost_vdpa, dev); 1144 1145 ida_simple_remove(&vhost_vdpa_ida, v->minor); 1146 kfree(v->vqs); 1147 kfree(v); 1148 } 1149 1150 static int vhost_vdpa_probe(struct vdpa_device *vdpa) 1151 { 1152 const struct vdpa_config_ops *ops = vdpa->config; 1153 struct vhost_vdpa *v; 1154 int minor; 1155 int r; 1156 1157 v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 1158 if (!v) 1159 return -ENOMEM; 1160 1161 minor = ida_simple_get(&vhost_vdpa_ida, 0, 1162 VHOST_VDPA_DEV_MAX, GFP_KERNEL); 1163 if (minor < 0) { 1164 kfree(v); 1165 return minor; 1166 } 1167 1168 atomic_set(&v->opened, 0); 1169 v->minor = minor; 1170 v->vdpa = vdpa; 1171 v->nvqs = vdpa->nvqs; 1172 v->virtio_id = ops->get_device_id(vdpa); 1173 1174 device_initialize(&v->dev); 1175 v->dev.release = vhost_vdpa_release_dev; 1176 v->dev.parent = &vdpa->dev; 1177 v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); 1178 v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), 1179 GFP_KERNEL); 1180 if (!v->vqs) { 1181 r = -ENOMEM; 1182 goto err; 1183 } 1184 1185 r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); 1186 if (r) 1187 goto err; 1188 1189 cdev_init(&v->cdev, &vhost_vdpa_fops); 1190 v->cdev.owner = THIS_MODULE; 1191 1192 r = cdev_device_add(&v->cdev, &v->dev); 1193 if (r) 1194 goto err; 1195 1196 init_completion(&v->completion); 1197 vdpa_set_drvdata(vdpa, v); 1198 1199 return 0; 1200 1201 err: 1202 put_device(&v->dev); 1203 return r; 1204 } 1205 1206 static void vhost_vdpa_remove(struct vdpa_device *vdpa) 1207 { 1208 struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); 1209 int opened; 1210 1211 cdev_device_del(&v->cdev, &v->dev); 1212 1213 do { 1214 opened = atomic_cmpxchg(&v->opened, 0, 1); 1215 if (!opened) 1216 break; 1217 wait_for_completion(&v->completion); 1218 } while (1); 1219 1220 put_device(&v->dev); 1221 } 1222 1223 static struct vdpa_driver vhost_vdpa_driver = { 1224 .driver = { 1225 .name = "vhost_vdpa", 1226 }, 1227 .probe = vhost_vdpa_probe, 1228 .remove = vhost_vdpa_remove, 1229 }; 1230 1231 static int __init vhost_vdpa_init(void) 1232 { 1233 int r; 1234 1235 r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, 1236 "vhost-vdpa"); 1237 if (r) 1238 goto err_alloc_chrdev; 1239 1240 r = vdpa_register_driver(&vhost_vdpa_driver); 1241 if (r) 1242 goto err_vdpa_register_driver; 1243 1244 return 0; 1245 1246 err_vdpa_register_driver: 1247 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1248 err_alloc_chrdev: 1249 return r; 1250 } 1251 module_init(vhost_vdpa_init); 1252 1253 static void __exit vhost_vdpa_exit(void) 1254 { 1255 vdpa_unregister_driver(&vhost_vdpa_driver); 1256 unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); 1257 } 1258 module_exit(vhost_vdpa_exit); 1259 1260 MODULE_VERSION("0.0.1"); 1261 MODULE_LICENSE("GPL v2"); 1262 MODULE_AUTHOR("Intel Corporation"); 1263 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); 1264