1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VFIO core 4 * 5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 6 * Author: Alex Williamson <alex.williamson@redhat.com> 7 * 8 * Derived from original vfio: 9 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 10 * Author: Tom Lyon, pugs@cisco.com 11 */ 12 13 #include <linux/cdev.h> 14 #include <linux/compat.h> 15 #include <linux/device.h> 16 #include <linux/file.h> 17 #include <linux/anon_inodes.h> 18 #include <linux/fs.h> 19 #include <linux/idr.h> 20 #include <linux/iommu.h> 21 #include <linux/list.h> 22 #include <linux/miscdevice.h> 23 #include <linux/module.h> 24 #include <linux/mutex.h> 25 #include <linux/pci.h> 26 #include <linux/rwsem.h> 27 #include <linux/sched.h> 28 #include <linux/slab.h> 29 #include <linux/stat.h> 30 #include <linux/string.h> 31 #include <linux/uaccess.h> 32 #include <linux/vfio.h> 33 #include <linux/wait.h> 34 #include <linux/sched/signal.h> 35 #include "vfio.h" 36 37 #define DRIVER_VERSION "0.3" 38 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 39 #define DRIVER_DESC "VFIO - User Level meta-driver" 40 41 static struct vfio { 42 struct class *class; 43 struct list_head iommu_drivers_list; 44 struct mutex iommu_drivers_lock; 45 struct list_head group_list; 46 struct mutex group_lock; /* locks group_list */ 47 struct ida group_ida; 48 dev_t group_devt; 49 } vfio; 50 51 struct vfio_iommu_driver { 52 const struct vfio_iommu_driver_ops *ops; 53 struct list_head vfio_next; 54 }; 55 56 struct vfio_container { 57 struct kref kref; 58 struct list_head group_list; 59 struct rw_semaphore group_lock; 60 struct vfio_iommu_driver *iommu_driver; 61 void *iommu_data; 62 bool noiommu; 63 }; 64 65 struct vfio_group { 66 struct device dev; 67 struct cdev cdev; 68 refcount_t users; 69 unsigned int container_users; 70 struct iommu_group *iommu_group; 71 struct vfio_container *container; 72 struct list_head device_list; 73 struct mutex device_lock; 74 struct list_head vfio_next; 75 struct list_head container_next; 76 enum vfio_group_type type; 77 unsigned int dev_counter; 78 struct rw_semaphore group_rwsem; 79 struct kvm *kvm; 80 struct file *opened_file; 81 struct blocking_notifier_head notifier; 82 }; 83 84 #ifdef CONFIG_VFIO_NOIOMMU 85 static bool noiommu __read_mostly; 86 module_param_named(enable_unsafe_noiommu_mode, 87 noiommu, bool, S_IRUGO | S_IWUSR); 88 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 89 #endif 90 91 static DEFINE_XARRAY(vfio_device_set_xa); 92 static const struct file_operations vfio_group_fops; 93 94 int vfio_assign_device_set(struct vfio_device *device, void *set_id) 95 { 96 unsigned long idx = (unsigned long)set_id; 97 struct vfio_device_set *new_dev_set; 98 struct vfio_device_set *dev_set; 99 100 if (WARN_ON(!set_id)) 101 return -EINVAL; 102 103 /* 104 * Atomically acquire a singleton object in the xarray for this set_id 105 */ 106 xa_lock(&vfio_device_set_xa); 107 dev_set = xa_load(&vfio_device_set_xa, idx); 108 if (dev_set) 109 goto found_get_ref; 110 xa_unlock(&vfio_device_set_xa); 111 112 new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); 113 if (!new_dev_set) 114 return -ENOMEM; 115 mutex_init(&new_dev_set->lock); 116 INIT_LIST_HEAD(&new_dev_set->device_list); 117 new_dev_set->set_id = set_id; 118 119 xa_lock(&vfio_device_set_xa); 120 dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, 121 GFP_KERNEL); 122 if (!dev_set) { 123 dev_set = new_dev_set; 124 goto found_get_ref; 125 } 126 127 kfree(new_dev_set); 128 if (xa_is_err(dev_set)) { 129 xa_unlock(&vfio_device_set_xa); 130 return xa_err(dev_set); 131 } 132 133 found_get_ref: 134 dev_set->device_count++; 135 xa_unlock(&vfio_device_set_xa); 136 mutex_lock(&dev_set->lock); 137 device->dev_set = dev_set; 138 list_add_tail(&device->dev_set_list, &dev_set->device_list); 139 mutex_unlock(&dev_set->lock); 140 return 0; 141 } 142 EXPORT_SYMBOL_GPL(vfio_assign_device_set); 143 144 static void vfio_release_device_set(struct vfio_device *device) 145 { 146 struct vfio_device_set *dev_set = device->dev_set; 147 148 if (!dev_set) 149 return; 150 151 mutex_lock(&dev_set->lock); 152 list_del(&device->dev_set_list); 153 mutex_unlock(&dev_set->lock); 154 155 xa_lock(&vfio_device_set_xa); 156 if (!--dev_set->device_count) { 157 __xa_erase(&vfio_device_set_xa, 158 (unsigned long)dev_set->set_id); 159 mutex_destroy(&dev_set->lock); 160 kfree(dev_set); 161 } 162 xa_unlock(&vfio_device_set_xa); 163 } 164 165 #ifdef CONFIG_VFIO_NOIOMMU 166 static void *vfio_noiommu_open(unsigned long arg) 167 { 168 if (arg != VFIO_NOIOMMU_IOMMU) 169 return ERR_PTR(-EINVAL); 170 if (!capable(CAP_SYS_RAWIO)) 171 return ERR_PTR(-EPERM); 172 173 return NULL; 174 } 175 176 static void vfio_noiommu_release(void *iommu_data) 177 { 178 } 179 180 static long vfio_noiommu_ioctl(void *iommu_data, 181 unsigned int cmd, unsigned long arg) 182 { 183 if (cmd == VFIO_CHECK_EXTENSION) 184 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; 185 186 return -ENOTTY; 187 } 188 189 static int vfio_noiommu_attach_group(void *iommu_data, 190 struct iommu_group *iommu_group, enum vfio_group_type type) 191 { 192 return 0; 193 } 194 195 static void vfio_noiommu_detach_group(void *iommu_data, 196 struct iommu_group *iommu_group) 197 { 198 } 199 200 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { 201 .name = "vfio-noiommu", 202 .owner = THIS_MODULE, 203 .open = vfio_noiommu_open, 204 .release = vfio_noiommu_release, 205 .ioctl = vfio_noiommu_ioctl, 206 .attach_group = vfio_noiommu_attach_group, 207 .detach_group = vfio_noiommu_detach_group, 208 }; 209 210 /* 211 * Only noiommu containers can use vfio-noiommu and noiommu containers can only 212 * use vfio-noiommu. 213 */ 214 static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 215 const struct vfio_iommu_driver *driver) 216 { 217 return container->noiommu == (driver->ops == &vfio_noiommu_ops); 218 } 219 #else 220 static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 221 const struct vfio_iommu_driver *driver) 222 { 223 return true; 224 } 225 #endif /* CONFIG_VFIO_NOIOMMU */ 226 227 /* 228 * IOMMU driver registration 229 */ 230 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 231 { 232 struct vfio_iommu_driver *driver, *tmp; 233 234 if (WARN_ON(!ops->register_device != !ops->unregister_device)) 235 return -EINVAL; 236 237 driver = kzalloc(sizeof(*driver), GFP_KERNEL); 238 if (!driver) 239 return -ENOMEM; 240 241 driver->ops = ops; 242 243 mutex_lock(&vfio.iommu_drivers_lock); 244 245 /* Check for duplicates */ 246 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 247 if (tmp->ops == ops) { 248 mutex_unlock(&vfio.iommu_drivers_lock); 249 kfree(driver); 250 return -EINVAL; 251 } 252 } 253 254 list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 255 256 mutex_unlock(&vfio.iommu_drivers_lock); 257 258 return 0; 259 } 260 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 261 262 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 263 { 264 struct vfio_iommu_driver *driver; 265 266 mutex_lock(&vfio.iommu_drivers_lock); 267 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 268 if (driver->ops == ops) { 269 list_del(&driver->vfio_next); 270 mutex_unlock(&vfio.iommu_drivers_lock); 271 kfree(driver); 272 return; 273 } 274 } 275 mutex_unlock(&vfio.iommu_drivers_lock); 276 } 277 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 278 279 static void vfio_group_get(struct vfio_group *group); 280 281 /* 282 * Container objects - containers are created when /dev/vfio/vfio is 283 * opened, but their lifecycle extends until the last user is done, so 284 * it's freed via kref. Must support container/group/device being 285 * closed in any order. 286 */ 287 static void vfio_container_get(struct vfio_container *container) 288 { 289 kref_get(&container->kref); 290 } 291 292 static void vfio_container_release(struct kref *kref) 293 { 294 struct vfio_container *container; 295 container = container_of(kref, struct vfio_container, kref); 296 297 kfree(container); 298 } 299 300 static void vfio_container_put(struct vfio_container *container) 301 { 302 kref_put(&container->kref, vfio_container_release); 303 } 304 305 /* 306 * Group objects - create, release, get, put, search 307 */ 308 static struct vfio_group * 309 __vfio_group_get_from_iommu(struct iommu_group *iommu_group) 310 { 311 struct vfio_group *group; 312 313 list_for_each_entry(group, &vfio.group_list, vfio_next) { 314 if (group->iommu_group == iommu_group) { 315 vfio_group_get(group); 316 return group; 317 } 318 } 319 return NULL; 320 } 321 322 static struct vfio_group * 323 vfio_group_get_from_iommu(struct iommu_group *iommu_group) 324 { 325 struct vfio_group *group; 326 327 mutex_lock(&vfio.group_lock); 328 group = __vfio_group_get_from_iommu(iommu_group); 329 mutex_unlock(&vfio.group_lock); 330 return group; 331 } 332 333 static void vfio_group_release(struct device *dev) 334 { 335 struct vfio_group *group = container_of(dev, struct vfio_group, dev); 336 337 mutex_destroy(&group->device_lock); 338 iommu_group_put(group->iommu_group); 339 ida_free(&vfio.group_ida, MINOR(group->dev.devt)); 340 kfree(group); 341 } 342 343 static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, 344 enum vfio_group_type type) 345 { 346 struct vfio_group *group; 347 int minor; 348 349 group = kzalloc(sizeof(*group), GFP_KERNEL); 350 if (!group) 351 return ERR_PTR(-ENOMEM); 352 353 minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); 354 if (minor < 0) { 355 kfree(group); 356 return ERR_PTR(minor); 357 } 358 359 device_initialize(&group->dev); 360 group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); 361 group->dev.class = vfio.class; 362 group->dev.release = vfio_group_release; 363 cdev_init(&group->cdev, &vfio_group_fops); 364 group->cdev.owner = THIS_MODULE; 365 366 refcount_set(&group->users, 1); 367 init_rwsem(&group->group_rwsem); 368 INIT_LIST_HEAD(&group->device_list); 369 mutex_init(&group->device_lock); 370 group->iommu_group = iommu_group; 371 /* put in vfio_group_release() */ 372 iommu_group_ref_get(iommu_group); 373 group->type = type; 374 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 375 376 return group; 377 } 378 379 static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 380 enum vfio_group_type type) 381 { 382 struct vfio_group *group; 383 struct vfio_group *ret; 384 int err; 385 386 group = vfio_group_alloc(iommu_group, type); 387 if (IS_ERR(group)) 388 return group; 389 390 err = dev_set_name(&group->dev, "%s%d", 391 group->type == VFIO_NO_IOMMU ? "noiommu-" : "", 392 iommu_group_id(iommu_group)); 393 if (err) { 394 ret = ERR_PTR(err); 395 goto err_put; 396 } 397 398 mutex_lock(&vfio.group_lock); 399 400 /* Did we race creating this group? */ 401 ret = __vfio_group_get_from_iommu(iommu_group); 402 if (ret) 403 goto err_unlock; 404 405 err = cdev_device_add(&group->cdev, &group->dev); 406 if (err) { 407 ret = ERR_PTR(err); 408 goto err_unlock; 409 } 410 411 list_add(&group->vfio_next, &vfio.group_list); 412 413 mutex_unlock(&vfio.group_lock); 414 return group; 415 416 err_unlock: 417 mutex_unlock(&vfio.group_lock); 418 err_put: 419 put_device(&group->dev); 420 return ret; 421 } 422 423 static void vfio_group_put(struct vfio_group *group) 424 { 425 if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock)) 426 return; 427 428 /* 429 * These data structures all have paired operations that can only be 430 * undone when the caller holds a live reference on the group. Since all 431 * pairs must be undone these WARN_ON's indicate some caller did not 432 * properly hold the group reference. 433 */ 434 WARN_ON(!list_empty(&group->device_list)); 435 WARN_ON(group->container || group->container_users); 436 WARN_ON(group->notifier.head); 437 438 list_del(&group->vfio_next); 439 cdev_device_del(&group->cdev, &group->dev); 440 mutex_unlock(&vfio.group_lock); 441 442 put_device(&group->dev); 443 } 444 445 static void vfio_group_get(struct vfio_group *group) 446 { 447 refcount_inc(&group->users); 448 } 449 450 /* 451 * Device objects - create, release, get, put, search 452 */ 453 /* Device reference always implies a group reference */ 454 static void vfio_device_put(struct vfio_device *device) 455 { 456 if (refcount_dec_and_test(&device->refcount)) 457 complete(&device->comp); 458 } 459 460 static bool vfio_device_try_get(struct vfio_device *device) 461 { 462 return refcount_inc_not_zero(&device->refcount); 463 } 464 465 static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 466 struct device *dev) 467 { 468 struct vfio_device *device; 469 470 mutex_lock(&group->device_lock); 471 list_for_each_entry(device, &group->device_list, group_next) { 472 if (device->dev == dev && vfio_device_try_get(device)) { 473 mutex_unlock(&group->device_lock); 474 return device; 475 } 476 } 477 mutex_unlock(&group->device_lock); 478 return NULL; 479 } 480 481 /* 482 * VFIO driver API 483 */ 484 void vfio_init_group_dev(struct vfio_device *device, struct device *dev, 485 const struct vfio_device_ops *ops) 486 { 487 init_completion(&device->comp); 488 device->dev = dev; 489 device->ops = ops; 490 } 491 EXPORT_SYMBOL_GPL(vfio_init_group_dev); 492 493 void vfio_uninit_group_dev(struct vfio_device *device) 494 { 495 vfio_release_device_set(device); 496 } 497 EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); 498 499 static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, 500 enum vfio_group_type type) 501 { 502 struct iommu_group *iommu_group; 503 struct vfio_group *group; 504 int ret; 505 506 iommu_group = iommu_group_alloc(); 507 if (IS_ERR(iommu_group)) 508 return ERR_CAST(iommu_group); 509 510 ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); 511 if (ret) 512 goto out_put_group; 513 ret = iommu_group_add_device(iommu_group, dev); 514 if (ret) 515 goto out_put_group; 516 517 group = vfio_create_group(iommu_group, type); 518 if (IS_ERR(group)) { 519 ret = PTR_ERR(group); 520 goto out_remove_device; 521 } 522 iommu_group_put(iommu_group); 523 return group; 524 525 out_remove_device: 526 iommu_group_remove_device(dev); 527 out_put_group: 528 iommu_group_put(iommu_group); 529 return ERR_PTR(ret); 530 } 531 532 static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) 533 { 534 struct iommu_group *iommu_group; 535 struct vfio_group *group; 536 537 iommu_group = iommu_group_get(dev); 538 #ifdef CONFIG_VFIO_NOIOMMU 539 if (!iommu_group && noiommu) { 540 /* 541 * With noiommu enabled, create an IOMMU group for devices that 542 * don't already have one, implying no IOMMU hardware/driver 543 * exists. Taint the kernel because we're about to give a DMA 544 * capable device to a user without IOMMU protection. 545 */ 546 group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); 547 if (!IS_ERR(group)) { 548 add_taint(TAINT_USER, LOCKDEP_STILL_OK); 549 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); 550 } 551 return group; 552 } 553 #endif 554 if (!iommu_group) 555 return ERR_PTR(-EINVAL); 556 557 /* 558 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to 559 * restore cache coherency. It has to be checked here because it is only 560 * valid for cases where we are using iommu groups. 561 */ 562 if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { 563 iommu_group_put(iommu_group); 564 return ERR_PTR(-EINVAL); 565 } 566 567 group = vfio_group_get_from_iommu(iommu_group); 568 if (!group) 569 group = vfio_create_group(iommu_group, VFIO_IOMMU); 570 571 /* The vfio_group holds a reference to the iommu_group */ 572 iommu_group_put(iommu_group); 573 return group; 574 } 575 576 static int __vfio_register_dev(struct vfio_device *device, 577 struct vfio_group *group) 578 { 579 struct vfio_device *existing_device; 580 581 if (IS_ERR(group)) 582 return PTR_ERR(group); 583 584 /* 585 * If the driver doesn't specify a set then the device is added to a 586 * singleton set just for itself. 587 */ 588 if (!device->dev_set) 589 vfio_assign_device_set(device, device); 590 591 existing_device = vfio_group_get_device(group, device->dev); 592 if (existing_device) { 593 dev_WARN(device->dev, "Device already exists on group %d\n", 594 iommu_group_id(group->iommu_group)); 595 vfio_device_put(existing_device); 596 if (group->type == VFIO_NO_IOMMU || 597 group->type == VFIO_EMULATED_IOMMU) 598 iommu_group_remove_device(device->dev); 599 vfio_group_put(group); 600 return -EBUSY; 601 } 602 603 /* Our reference on group is moved to the device */ 604 device->group = group; 605 606 /* Refcounting can't start until the driver calls register */ 607 refcount_set(&device->refcount, 1); 608 609 mutex_lock(&group->device_lock); 610 list_add(&device->group_next, &group->device_list); 611 group->dev_counter++; 612 mutex_unlock(&group->device_lock); 613 614 return 0; 615 } 616 617 int vfio_register_group_dev(struct vfio_device *device) 618 { 619 return __vfio_register_dev(device, 620 vfio_group_find_or_alloc(device->dev)); 621 } 622 EXPORT_SYMBOL_GPL(vfio_register_group_dev); 623 624 /* 625 * Register a virtual device without IOMMU backing. The user of this 626 * device must not be able to directly trigger unmediated DMA. 627 */ 628 int vfio_register_emulated_iommu_dev(struct vfio_device *device) 629 { 630 return __vfio_register_dev(device, 631 vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); 632 } 633 EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 634 635 static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 636 char *buf) 637 { 638 struct vfio_device *it, *device = ERR_PTR(-ENODEV); 639 640 mutex_lock(&group->device_lock); 641 list_for_each_entry(it, &group->device_list, group_next) { 642 int ret; 643 644 if (it->ops->match) { 645 ret = it->ops->match(it, buf); 646 if (ret < 0) { 647 device = ERR_PTR(ret); 648 break; 649 } 650 } else { 651 ret = !strcmp(dev_name(it->dev), buf); 652 } 653 654 if (ret && vfio_device_try_get(it)) { 655 device = it; 656 break; 657 } 658 } 659 mutex_unlock(&group->device_lock); 660 661 return device; 662 } 663 664 /* 665 * Decrement the device reference count and wait for the device to be 666 * removed. Open file descriptors for the device... */ 667 void vfio_unregister_group_dev(struct vfio_device *device) 668 { 669 struct vfio_group *group = device->group; 670 unsigned int i = 0; 671 bool interrupted = false; 672 long rc; 673 674 vfio_device_put(device); 675 rc = try_wait_for_completion(&device->comp); 676 while (rc <= 0) { 677 if (device->ops->request) 678 device->ops->request(device, i++); 679 680 if (interrupted) { 681 rc = wait_for_completion_timeout(&device->comp, 682 HZ * 10); 683 } else { 684 rc = wait_for_completion_interruptible_timeout( 685 &device->comp, HZ * 10); 686 if (rc < 0) { 687 interrupted = true; 688 dev_warn(device->dev, 689 "Device is currently in use, task" 690 " \"%s\" (%d) " 691 "blocked until device is released", 692 current->comm, task_pid_nr(current)); 693 } 694 } 695 } 696 697 mutex_lock(&group->device_lock); 698 list_del(&device->group_next); 699 group->dev_counter--; 700 mutex_unlock(&group->device_lock); 701 702 if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) 703 iommu_group_remove_device(device->dev); 704 705 /* Matches the get in vfio_register_group_dev() */ 706 vfio_group_put(group); 707 } 708 EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 709 710 /* 711 * VFIO base fd, /dev/vfio/vfio 712 */ 713 static long vfio_ioctl_check_extension(struct vfio_container *container, 714 unsigned long arg) 715 { 716 struct vfio_iommu_driver *driver; 717 long ret = 0; 718 719 down_read(&container->group_lock); 720 721 driver = container->iommu_driver; 722 723 switch (arg) { 724 /* No base extensions yet */ 725 default: 726 /* 727 * If no driver is set, poll all registered drivers for 728 * extensions and return the first positive result. If 729 * a driver is already set, further queries will be passed 730 * only to that driver. 731 */ 732 if (!driver) { 733 mutex_lock(&vfio.iommu_drivers_lock); 734 list_for_each_entry(driver, &vfio.iommu_drivers_list, 735 vfio_next) { 736 737 if (!list_empty(&container->group_list) && 738 !vfio_iommu_driver_allowed(container, 739 driver)) 740 continue; 741 if (!try_module_get(driver->ops->owner)) 742 continue; 743 744 ret = driver->ops->ioctl(NULL, 745 VFIO_CHECK_EXTENSION, 746 arg); 747 module_put(driver->ops->owner); 748 if (ret > 0) 749 break; 750 } 751 mutex_unlock(&vfio.iommu_drivers_lock); 752 } else 753 ret = driver->ops->ioctl(container->iommu_data, 754 VFIO_CHECK_EXTENSION, arg); 755 } 756 757 up_read(&container->group_lock); 758 759 return ret; 760 } 761 762 /* hold write lock on container->group_lock */ 763 static int __vfio_container_attach_groups(struct vfio_container *container, 764 struct vfio_iommu_driver *driver, 765 void *data) 766 { 767 struct vfio_group *group; 768 int ret = -ENODEV; 769 770 list_for_each_entry(group, &container->group_list, container_next) { 771 ret = driver->ops->attach_group(data, group->iommu_group, 772 group->type); 773 if (ret) 774 goto unwind; 775 } 776 777 return ret; 778 779 unwind: 780 list_for_each_entry_continue_reverse(group, &container->group_list, 781 container_next) { 782 driver->ops->detach_group(data, group->iommu_group); 783 } 784 785 return ret; 786 } 787 788 static long vfio_ioctl_set_iommu(struct vfio_container *container, 789 unsigned long arg) 790 { 791 struct vfio_iommu_driver *driver; 792 long ret = -ENODEV; 793 794 down_write(&container->group_lock); 795 796 /* 797 * The container is designed to be an unprivileged interface while 798 * the group can be assigned to specific users. Therefore, only by 799 * adding a group to a container does the user get the privilege of 800 * enabling the iommu, which may allocate finite resources. There 801 * is no unset_iommu, but by removing all the groups from a container, 802 * the container is deprivileged and returns to an unset state. 803 */ 804 if (list_empty(&container->group_list) || container->iommu_driver) { 805 up_write(&container->group_lock); 806 return -EINVAL; 807 } 808 809 mutex_lock(&vfio.iommu_drivers_lock); 810 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 811 void *data; 812 813 if (!vfio_iommu_driver_allowed(container, driver)) 814 continue; 815 if (!try_module_get(driver->ops->owner)) 816 continue; 817 818 /* 819 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 820 * so test which iommu driver reported support for this 821 * extension and call open on them. We also pass them the 822 * magic, allowing a single driver to support multiple 823 * interfaces if they'd like. 824 */ 825 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 826 module_put(driver->ops->owner); 827 continue; 828 } 829 830 data = driver->ops->open(arg); 831 if (IS_ERR(data)) { 832 ret = PTR_ERR(data); 833 module_put(driver->ops->owner); 834 continue; 835 } 836 837 ret = __vfio_container_attach_groups(container, driver, data); 838 if (ret) { 839 driver->ops->release(data); 840 module_put(driver->ops->owner); 841 continue; 842 } 843 844 container->iommu_driver = driver; 845 container->iommu_data = data; 846 break; 847 } 848 849 mutex_unlock(&vfio.iommu_drivers_lock); 850 up_write(&container->group_lock); 851 852 return ret; 853 } 854 855 static long vfio_fops_unl_ioctl(struct file *filep, 856 unsigned int cmd, unsigned long arg) 857 { 858 struct vfio_container *container = filep->private_data; 859 struct vfio_iommu_driver *driver; 860 void *data; 861 long ret = -EINVAL; 862 863 if (!container) 864 return ret; 865 866 switch (cmd) { 867 case VFIO_GET_API_VERSION: 868 ret = VFIO_API_VERSION; 869 break; 870 case VFIO_CHECK_EXTENSION: 871 ret = vfio_ioctl_check_extension(container, arg); 872 break; 873 case VFIO_SET_IOMMU: 874 ret = vfio_ioctl_set_iommu(container, arg); 875 break; 876 default: 877 driver = container->iommu_driver; 878 data = container->iommu_data; 879 880 if (driver) /* passthrough all unrecognized ioctls */ 881 ret = driver->ops->ioctl(data, cmd, arg); 882 } 883 884 return ret; 885 } 886 887 static int vfio_fops_open(struct inode *inode, struct file *filep) 888 { 889 struct vfio_container *container; 890 891 container = kzalloc(sizeof(*container), GFP_KERNEL); 892 if (!container) 893 return -ENOMEM; 894 895 INIT_LIST_HEAD(&container->group_list); 896 init_rwsem(&container->group_lock); 897 kref_init(&container->kref); 898 899 filep->private_data = container; 900 901 return 0; 902 } 903 904 static int vfio_fops_release(struct inode *inode, struct file *filep) 905 { 906 struct vfio_container *container = filep->private_data; 907 struct vfio_iommu_driver *driver = container->iommu_driver; 908 909 if (driver && driver->ops->notify) 910 driver->ops->notify(container->iommu_data, 911 VFIO_IOMMU_CONTAINER_CLOSE); 912 913 filep->private_data = NULL; 914 915 vfio_container_put(container); 916 917 return 0; 918 } 919 920 static const struct file_operations vfio_fops = { 921 .owner = THIS_MODULE, 922 .open = vfio_fops_open, 923 .release = vfio_fops_release, 924 .unlocked_ioctl = vfio_fops_unl_ioctl, 925 .compat_ioctl = compat_ptr_ioctl, 926 }; 927 928 /* 929 * VFIO Group fd, /dev/vfio/$GROUP 930 */ 931 static void __vfio_group_unset_container(struct vfio_group *group) 932 { 933 struct vfio_container *container = group->container; 934 struct vfio_iommu_driver *driver; 935 936 lockdep_assert_held_write(&group->group_rwsem); 937 938 down_write(&container->group_lock); 939 940 driver = container->iommu_driver; 941 if (driver) 942 driver->ops->detach_group(container->iommu_data, 943 group->iommu_group); 944 945 if (group->type == VFIO_IOMMU) 946 iommu_group_release_dma_owner(group->iommu_group); 947 948 group->container = NULL; 949 group->container_users = 0; 950 list_del(&group->container_next); 951 952 /* Detaching the last group deprivileges a container, remove iommu */ 953 if (driver && list_empty(&container->group_list)) { 954 driver->ops->release(container->iommu_data); 955 module_put(driver->ops->owner); 956 container->iommu_driver = NULL; 957 container->iommu_data = NULL; 958 } 959 960 up_write(&container->group_lock); 961 962 vfio_container_put(container); 963 } 964 965 /* 966 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 967 * if there was no container to unset. Since the ioctl is called on 968 * the group, we know that still exists, therefore the only valid 969 * transition here is 1->0. 970 */ 971 static int vfio_group_unset_container(struct vfio_group *group) 972 { 973 lockdep_assert_held_write(&group->group_rwsem); 974 975 if (!group->container) 976 return -EINVAL; 977 if (group->container_users != 1) 978 return -EBUSY; 979 __vfio_group_unset_container(group); 980 return 0; 981 } 982 983 static int vfio_group_set_container(struct vfio_group *group, int container_fd) 984 { 985 struct fd f; 986 struct vfio_container *container; 987 struct vfio_iommu_driver *driver; 988 int ret = 0; 989 990 lockdep_assert_held_write(&group->group_rwsem); 991 992 if (group->container || WARN_ON(group->container_users)) 993 return -EINVAL; 994 995 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 996 return -EPERM; 997 998 f = fdget(container_fd); 999 if (!f.file) 1000 return -EBADF; 1001 1002 /* Sanity check, is this really our fd? */ 1003 if (f.file->f_op != &vfio_fops) { 1004 fdput(f); 1005 return -EINVAL; 1006 } 1007 1008 container = f.file->private_data; 1009 WARN_ON(!container); /* fget ensures we don't race vfio_release */ 1010 1011 down_write(&container->group_lock); 1012 1013 /* Real groups and fake groups cannot mix */ 1014 if (!list_empty(&container->group_list) && 1015 container->noiommu != (group->type == VFIO_NO_IOMMU)) { 1016 ret = -EPERM; 1017 goto unlock_out; 1018 } 1019 1020 if (group->type == VFIO_IOMMU) { 1021 ret = iommu_group_claim_dma_owner(group->iommu_group, f.file); 1022 if (ret) 1023 goto unlock_out; 1024 } 1025 1026 driver = container->iommu_driver; 1027 if (driver) { 1028 ret = driver->ops->attach_group(container->iommu_data, 1029 group->iommu_group, 1030 group->type); 1031 if (ret) { 1032 if (group->type == VFIO_IOMMU) 1033 iommu_group_release_dma_owner( 1034 group->iommu_group); 1035 goto unlock_out; 1036 } 1037 } 1038 1039 group->container = container; 1040 group->container_users = 1; 1041 container->noiommu = (group->type == VFIO_NO_IOMMU); 1042 list_add(&group->container_next, &container->group_list); 1043 1044 /* Get a reference on the container and mark a user within the group */ 1045 vfio_container_get(container); 1046 1047 unlock_out: 1048 up_write(&container->group_lock); 1049 fdput(f); 1050 return ret; 1051 } 1052 1053 static const struct file_operations vfio_device_fops; 1054 1055 /* true if the vfio_device has open_device() called but not close_device() */ 1056 static bool vfio_assert_device_open(struct vfio_device *device) 1057 { 1058 return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 1059 } 1060 1061 static int vfio_device_assign_container(struct vfio_device *device) 1062 { 1063 struct vfio_group *group = device->group; 1064 1065 lockdep_assert_held_write(&group->group_rwsem); 1066 1067 if (!group->container || !group->container->iommu_driver || 1068 WARN_ON(!group->container_users)) 1069 return -EINVAL; 1070 1071 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 1072 return -EPERM; 1073 1074 get_file(group->opened_file); 1075 group->container_users++; 1076 return 0; 1077 } 1078 1079 static void vfio_device_unassign_container(struct vfio_device *device) 1080 { 1081 down_write(&device->group->group_rwsem); 1082 WARN_ON(device->group->container_users <= 1); 1083 device->group->container_users--; 1084 fput(device->group->opened_file); 1085 up_write(&device->group->group_rwsem); 1086 } 1087 1088 static struct file *vfio_device_open(struct vfio_device *device) 1089 { 1090 struct vfio_iommu_driver *iommu_driver; 1091 struct file *filep; 1092 int ret; 1093 1094 down_write(&device->group->group_rwsem); 1095 ret = vfio_device_assign_container(device); 1096 up_write(&device->group->group_rwsem); 1097 if (ret) 1098 return ERR_PTR(ret); 1099 1100 if (!try_module_get(device->dev->driver->owner)) { 1101 ret = -ENODEV; 1102 goto err_unassign_container; 1103 } 1104 1105 mutex_lock(&device->dev_set->lock); 1106 device->open_count++; 1107 if (device->open_count == 1) { 1108 /* 1109 * Here we pass the KVM pointer with the group under the read 1110 * lock. If the device driver will use it, it must obtain a 1111 * reference and release it during close_device. 1112 */ 1113 down_read(&device->group->group_rwsem); 1114 device->kvm = device->group->kvm; 1115 1116 if (device->ops->open_device) { 1117 ret = device->ops->open_device(device); 1118 if (ret) 1119 goto err_undo_count; 1120 } 1121 1122 iommu_driver = device->group->container->iommu_driver; 1123 if (iommu_driver && iommu_driver->ops->register_device) 1124 iommu_driver->ops->register_device( 1125 device->group->container->iommu_data, device); 1126 1127 up_read(&device->group->group_rwsem); 1128 } 1129 mutex_unlock(&device->dev_set->lock); 1130 1131 /* 1132 * We can't use anon_inode_getfd() because we need to modify 1133 * the f_mode flags directly to allow more than just ioctls 1134 */ 1135 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 1136 device, O_RDWR); 1137 if (IS_ERR(filep)) { 1138 ret = PTR_ERR(filep); 1139 goto err_close_device; 1140 } 1141 1142 /* 1143 * TODO: add an anon_inode interface to do this. 1144 * Appears to be missing by lack of need rather than 1145 * explicitly prevented. Now there's need. 1146 */ 1147 filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); 1148 1149 if (device->group->type == VFIO_NO_IOMMU) 1150 dev_warn(device->dev, "vfio-noiommu device opened by user " 1151 "(%s:%d)\n", current->comm, task_pid_nr(current)); 1152 /* 1153 * On success the ref of device is moved to the file and 1154 * put in vfio_device_fops_release() 1155 */ 1156 return filep; 1157 1158 err_close_device: 1159 mutex_lock(&device->dev_set->lock); 1160 down_read(&device->group->group_rwsem); 1161 if (device->open_count == 1 && device->ops->close_device) { 1162 device->ops->close_device(device); 1163 1164 iommu_driver = device->group->container->iommu_driver; 1165 if (iommu_driver && iommu_driver->ops->unregister_device) 1166 iommu_driver->ops->unregister_device( 1167 device->group->container->iommu_data, device); 1168 } 1169 err_undo_count: 1170 up_read(&device->group->group_rwsem); 1171 device->open_count--; 1172 if (device->open_count == 0 && device->kvm) 1173 device->kvm = NULL; 1174 mutex_unlock(&device->dev_set->lock); 1175 module_put(device->dev->driver->owner); 1176 err_unassign_container: 1177 vfio_device_unassign_container(device); 1178 return ERR_PTR(ret); 1179 } 1180 1181 static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) 1182 { 1183 struct vfio_device *device; 1184 struct file *filep; 1185 int fdno; 1186 int ret; 1187 1188 device = vfio_device_get_from_name(group, buf); 1189 if (IS_ERR(device)) 1190 return PTR_ERR(device); 1191 1192 fdno = get_unused_fd_flags(O_CLOEXEC); 1193 if (fdno < 0) { 1194 ret = fdno; 1195 goto err_put_device; 1196 } 1197 1198 filep = vfio_device_open(device); 1199 if (IS_ERR(filep)) { 1200 ret = PTR_ERR(filep); 1201 goto err_put_fdno; 1202 } 1203 1204 fd_install(fdno, filep); 1205 return fdno; 1206 1207 err_put_fdno: 1208 put_unused_fd(fdno); 1209 err_put_device: 1210 vfio_device_put(device); 1211 return ret; 1212 } 1213 1214 static long vfio_group_fops_unl_ioctl(struct file *filep, 1215 unsigned int cmd, unsigned long arg) 1216 { 1217 struct vfio_group *group = filep->private_data; 1218 long ret = -ENOTTY; 1219 1220 switch (cmd) { 1221 case VFIO_GROUP_GET_STATUS: 1222 { 1223 struct vfio_group_status status; 1224 unsigned long minsz; 1225 1226 minsz = offsetofend(struct vfio_group_status, flags); 1227 1228 if (copy_from_user(&status, (void __user *)arg, minsz)) 1229 return -EFAULT; 1230 1231 if (status.argsz < minsz) 1232 return -EINVAL; 1233 1234 status.flags = 0; 1235 1236 down_read(&group->group_rwsem); 1237 if (group->container) 1238 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | 1239 VFIO_GROUP_FLAGS_VIABLE; 1240 else if (!iommu_group_dma_owner_claimed(group->iommu_group)) 1241 status.flags |= VFIO_GROUP_FLAGS_VIABLE; 1242 up_read(&group->group_rwsem); 1243 1244 if (copy_to_user((void __user *)arg, &status, minsz)) 1245 return -EFAULT; 1246 1247 ret = 0; 1248 break; 1249 } 1250 case VFIO_GROUP_SET_CONTAINER: 1251 { 1252 int fd; 1253 1254 if (get_user(fd, (int __user *)arg)) 1255 return -EFAULT; 1256 1257 if (fd < 0) 1258 return -EINVAL; 1259 1260 down_write(&group->group_rwsem); 1261 ret = vfio_group_set_container(group, fd); 1262 up_write(&group->group_rwsem); 1263 break; 1264 } 1265 case VFIO_GROUP_UNSET_CONTAINER: 1266 down_write(&group->group_rwsem); 1267 ret = vfio_group_unset_container(group); 1268 up_write(&group->group_rwsem); 1269 break; 1270 case VFIO_GROUP_GET_DEVICE_FD: 1271 { 1272 char *buf; 1273 1274 buf = strndup_user((const char __user *)arg, PAGE_SIZE); 1275 if (IS_ERR(buf)) 1276 return PTR_ERR(buf); 1277 1278 ret = vfio_group_get_device_fd(group, buf); 1279 kfree(buf); 1280 break; 1281 } 1282 } 1283 1284 return ret; 1285 } 1286 1287 static int vfio_group_fops_open(struct inode *inode, struct file *filep) 1288 { 1289 struct vfio_group *group = 1290 container_of(inode->i_cdev, struct vfio_group, cdev); 1291 int ret; 1292 1293 down_write(&group->group_rwsem); 1294 1295 /* users can be zero if this races with vfio_group_put() */ 1296 if (!refcount_inc_not_zero(&group->users)) { 1297 ret = -ENODEV; 1298 goto err_unlock; 1299 } 1300 1301 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { 1302 ret = -EPERM; 1303 goto err_put; 1304 } 1305 1306 /* 1307 * Do we need multiple instances of the group open? Seems not. 1308 */ 1309 if (group->opened_file) { 1310 ret = -EBUSY; 1311 goto err_put; 1312 } 1313 group->opened_file = filep; 1314 filep->private_data = group; 1315 1316 up_write(&group->group_rwsem); 1317 return 0; 1318 err_put: 1319 vfio_group_put(group); 1320 err_unlock: 1321 up_write(&group->group_rwsem); 1322 return ret; 1323 } 1324 1325 static int vfio_group_fops_release(struct inode *inode, struct file *filep) 1326 { 1327 struct vfio_group *group = filep->private_data; 1328 1329 filep->private_data = NULL; 1330 1331 down_write(&group->group_rwsem); 1332 /* 1333 * Device FDs hold a group file reference, therefore the group release 1334 * is only called when there are no open devices. 1335 */ 1336 WARN_ON(group->notifier.head); 1337 if (group->container) { 1338 WARN_ON(group->container_users != 1); 1339 __vfio_group_unset_container(group); 1340 } 1341 group->opened_file = NULL; 1342 up_write(&group->group_rwsem); 1343 1344 vfio_group_put(group); 1345 1346 return 0; 1347 } 1348 1349 static const struct file_operations vfio_group_fops = { 1350 .owner = THIS_MODULE, 1351 .unlocked_ioctl = vfio_group_fops_unl_ioctl, 1352 .compat_ioctl = compat_ptr_ioctl, 1353 .open = vfio_group_fops_open, 1354 .release = vfio_group_fops_release, 1355 }; 1356 1357 /* 1358 * VFIO Device fd 1359 */ 1360 static int vfio_device_fops_release(struct inode *inode, struct file *filep) 1361 { 1362 struct vfio_device *device = filep->private_data; 1363 struct vfio_iommu_driver *iommu_driver; 1364 1365 mutex_lock(&device->dev_set->lock); 1366 vfio_assert_device_open(device); 1367 down_read(&device->group->group_rwsem); 1368 if (device->open_count == 1 && device->ops->close_device) 1369 device->ops->close_device(device); 1370 1371 iommu_driver = device->group->container->iommu_driver; 1372 if (iommu_driver && iommu_driver->ops->unregister_device) 1373 iommu_driver->ops->unregister_device( 1374 device->group->container->iommu_data, device); 1375 up_read(&device->group->group_rwsem); 1376 device->open_count--; 1377 if (device->open_count == 0) 1378 device->kvm = NULL; 1379 mutex_unlock(&device->dev_set->lock); 1380 1381 module_put(device->dev->driver->owner); 1382 1383 vfio_device_unassign_container(device); 1384 1385 vfio_device_put(device); 1386 1387 return 0; 1388 } 1389 1390 /* 1391 * vfio_mig_get_next_state - Compute the next step in the FSM 1392 * @cur_fsm - The current state the device is in 1393 * @new_fsm - The target state to reach 1394 * @next_fsm - Pointer to the next step to get to new_fsm 1395 * 1396 * Return 0 upon success, otherwise -errno 1397 * Upon success the next step in the state progression between cur_fsm and 1398 * new_fsm will be set in next_fsm. 1399 * 1400 * This breaks down requests for combination transitions into smaller steps and 1401 * returns the next step to get to new_fsm. The function may need to be called 1402 * multiple times before reaching new_fsm. 1403 * 1404 */ 1405 int vfio_mig_get_next_state(struct vfio_device *device, 1406 enum vfio_device_mig_state cur_fsm, 1407 enum vfio_device_mig_state new_fsm, 1408 enum vfio_device_mig_state *next_fsm) 1409 { 1410 enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; 1411 /* 1412 * The coding in this table requires the driver to implement the 1413 * following FSM arcs: 1414 * RESUMING -> STOP 1415 * STOP -> RESUMING 1416 * STOP -> STOP_COPY 1417 * STOP_COPY -> STOP 1418 * 1419 * If P2P is supported then the driver must also implement these FSM 1420 * arcs: 1421 * RUNNING -> RUNNING_P2P 1422 * RUNNING_P2P -> RUNNING 1423 * RUNNING_P2P -> STOP 1424 * STOP -> RUNNING_P2P 1425 * Without P2P the driver must implement: 1426 * RUNNING -> STOP 1427 * STOP -> RUNNING 1428 * 1429 * The coding will step through multiple states for some combination 1430 * transitions; if all optional features are supported, this means the 1431 * following ones: 1432 * RESUMING -> STOP -> RUNNING_P2P 1433 * RESUMING -> STOP -> RUNNING_P2P -> RUNNING 1434 * RESUMING -> STOP -> STOP_COPY 1435 * RUNNING -> RUNNING_P2P -> STOP 1436 * RUNNING -> RUNNING_P2P -> STOP -> RESUMING 1437 * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY 1438 * RUNNING_P2P -> STOP -> RESUMING 1439 * RUNNING_P2P -> STOP -> STOP_COPY 1440 * STOP -> RUNNING_P2P -> RUNNING 1441 * STOP_COPY -> STOP -> RESUMING 1442 * STOP_COPY -> STOP -> RUNNING_P2P 1443 * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING 1444 */ 1445 static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { 1446 [VFIO_DEVICE_STATE_STOP] = { 1447 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1448 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 1449 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 1450 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 1451 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 1452 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1453 }, 1454 [VFIO_DEVICE_STATE_RUNNING] = { 1455 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 1456 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 1457 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 1458 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 1459 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 1460 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1461 }, 1462 [VFIO_DEVICE_STATE_STOP_COPY] = { 1463 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1464 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 1465 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 1466 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 1467 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 1468 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1469 }, 1470 [VFIO_DEVICE_STATE_RESUMING] = { 1471 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1472 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 1473 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 1474 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 1475 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 1476 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1477 }, 1478 [VFIO_DEVICE_STATE_RUNNING_P2P] = { 1479 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1480 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 1481 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 1482 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 1483 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 1484 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1485 }, 1486 [VFIO_DEVICE_STATE_ERROR] = { 1487 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, 1488 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, 1489 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, 1490 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, 1491 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, 1492 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1493 }, 1494 }; 1495 1496 static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { 1497 [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, 1498 [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, 1499 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, 1500 [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, 1501 [VFIO_DEVICE_STATE_RUNNING_P2P] = 1502 VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, 1503 [VFIO_DEVICE_STATE_ERROR] = ~0U, 1504 }; 1505 1506 if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 1507 (state_flags_table[cur_fsm] & device->migration_flags) != 1508 state_flags_table[cur_fsm])) 1509 return -EINVAL; 1510 1511 if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 1512 (state_flags_table[new_fsm] & device->migration_flags) != 1513 state_flags_table[new_fsm]) 1514 return -EINVAL; 1515 1516 /* 1517 * Arcs touching optional and unsupported states are skipped over. The 1518 * driver will instead see an arc from the original state to the next 1519 * logical state, as per the above comment. 1520 */ 1521 *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; 1522 while ((state_flags_table[*next_fsm] & device->migration_flags) != 1523 state_flags_table[*next_fsm]) 1524 *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; 1525 1526 return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; 1527 } 1528 EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); 1529 1530 /* 1531 * Convert the drivers's struct file into a FD number and return it to userspace 1532 */ 1533 static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, 1534 struct vfio_device_feature_mig_state *mig) 1535 { 1536 int ret; 1537 int fd; 1538 1539 fd = get_unused_fd_flags(O_CLOEXEC); 1540 if (fd < 0) { 1541 ret = fd; 1542 goto out_fput; 1543 } 1544 1545 mig->data_fd = fd; 1546 if (copy_to_user(arg, mig, sizeof(*mig))) { 1547 ret = -EFAULT; 1548 goto out_put_unused; 1549 } 1550 fd_install(fd, filp); 1551 return 0; 1552 1553 out_put_unused: 1554 put_unused_fd(fd); 1555 out_fput: 1556 fput(filp); 1557 return ret; 1558 } 1559 1560 static int 1561 vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, 1562 u32 flags, void __user *arg, 1563 size_t argsz) 1564 { 1565 size_t minsz = 1566 offsetofend(struct vfio_device_feature_mig_state, data_fd); 1567 struct vfio_device_feature_mig_state mig; 1568 struct file *filp = NULL; 1569 int ret; 1570 1571 if (!device->mig_ops) 1572 return -ENOTTY; 1573 1574 ret = vfio_check_feature(flags, argsz, 1575 VFIO_DEVICE_FEATURE_SET | 1576 VFIO_DEVICE_FEATURE_GET, 1577 sizeof(mig)); 1578 if (ret != 1) 1579 return ret; 1580 1581 if (copy_from_user(&mig, arg, minsz)) 1582 return -EFAULT; 1583 1584 if (flags & VFIO_DEVICE_FEATURE_GET) { 1585 enum vfio_device_mig_state curr_state; 1586 1587 ret = device->mig_ops->migration_get_state(device, 1588 &curr_state); 1589 if (ret) 1590 return ret; 1591 mig.device_state = curr_state; 1592 goto out_copy; 1593 } 1594 1595 /* Handle the VFIO_DEVICE_FEATURE_SET */ 1596 filp = device->mig_ops->migration_set_state(device, mig.device_state); 1597 if (IS_ERR(filp) || !filp) 1598 goto out_copy; 1599 1600 return vfio_ioct_mig_return_fd(filp, arg, &mig); 1601 out_copy: 1602 mig.data_fd = -1; 1603 if (copy_to_user(arg, &mig, sizeof(mig))) 1604 return -EFAULT; 1605 if (IS_ERR(filp)) 1606 return PTR_ERR(filp); 1607 return 0; 1608 } 1609 1610 static int vfio_ioctl_device_feature_migration(struct vfio_device *device, 1611 u32 flags, void __user *arg, 1612 size_t argsz) 1613 { 1614 struct vfio_device_feature_migration mig = { 1615 .flags = device->migration_flags, 1616 }; 1617 int ret; 1618 1619 if (!device->mig_ops) 1620 return -ENOTTY; 1621 1622 ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 1623 sizeof(mig)); 1624 if (ret != 1) 1625 return ret; 1626 if (copy_to_user(arg, &mig, sizeof(mig))) 1627 return -EFAULT; 1628 return 0; 1629 } 1630 1631 static int vfio_ioctl_device_feature(struct vfio_device *device, 1632 struct vfio_device_feature __user *arg) 1633 { 1634 size_t minsz = offsetofend(struct vfio_device_feature, flags); 1635 struct vfio_device_feature feature; 1636 1637 if (copy_from_user(&feature, arg, minsz)) 1638 return -EFAULT; 1639 1640 if (feature.argsz < minsz) 1641 return -EINVAL; 1642 1643 /* Check unknown flags */ 1644 if (feature.flags & 1645 ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | 1646 VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) 1647 return -EINVAL; 1648 1649 /* GET & SET are mutually exclusive except with PROBE */ 1650 if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 1651 (feature.flags & VFIO_DEVICE_FEATURE_SET) && 1652 (feature.flags & VFIO_DEVICE_FEATURE_GET)) 1653 return -EINVAL; 1654 1655 switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 1656 case VFIO_DEVICE_FEATURE_MIGRATION: 1657 return vfio_ioctl_device_feature_migration( 1658 device, feature.flags, arg->data, 1659 feature.argsz - minsz); 1660 case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: 1661 return vfio_ioctl_device_feature_mig_device_state( 1662 device, feature.flags, arg->data, 1663 feature.argsz - minsz); 1664 default: 1665 if (unlikely(!device->ops->device_feature)) 1666 return -EINVAL; 1667 return device->ops->device_feature(device, feature.flags, 1668 arg->data, 1669 feature.argsz - minsz); 1670 } 1671 } 1672 1673 static long vfio_device_fops_unl_ioctl(struct file *filep, 1674 unsigned int cmd, unsigned long arg) 1675 { 1676 struct vfio_device *device = filep->private_data; 1677 1678 switch (cmd) { 1679 case VFIO_DEVICE_FEATURE: 1680 return vfio_ioctl_device_feature(device, (void __user *)arg); 1681 default: 1682 if (unlikely(!device->ops->ioctl)) 1683 return -EINVAL; 1684 return device->ops->ioctl(device, cmd, arg); 1685 } 1686 } 1687 1688 static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 1689 size_t count, loff_t *ppos) 1690 { 1691 struct vfio_device *device = filep->private_data; 1692 1693 if (unlikely(!device->ops->read)) 1694 return -EINVAL; 1695 1696 return device->ops->read(device, buf, count, ppos); 1697 } 1698 1699 static ssize_t vfio_device_fops_write(struct file *filep, 1700 const char __user *buf, 1701 size_t count, loff_t *ppos) 1702 { 1703 struct vfio_device *device = filep->private_data; 1704 1705 if (unlikely(!device->ops->write)) 1706 return -EINVAL; 1707 1708 return device->ops->write(device, buf, count, ppos); 1709 } 1710 1711 static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 1712 { 1713 struct vfio_device *device = filep->private_data; 1714 1715 if (unlikely(!device->ops->mmap)) 1716 return -EINVAL; 1717 1718 return device->ops->mmap(device, vma); 1719 } 1720 1721 static const struct file_operations vfio_device_fops = { 1722 .owner = THIS_MODULE, 1723 .release = vfio_device_fops_release, 1724 .read = vfio_device_fops_read, 1725 .write = vfio_device_fops_write, 1726 .unlocked_ioctl = vfio_device_fops_unl_ioctl, 1727 .compat_ioctl = compat_ptr_ioctl, 1728 .mmap = vfio_device_fops_mmap, 1729 }; 1730 1731 /** 1732 * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file 1733 * @file: VFIO group file 1734 * 1735 * The returned iommu_group is valid as long as a ref is held on the file. 1736 */ 1737 struct iommu_group *vfio_file_iommu_group(struct file *file) 1738 { 1739 struct vfio_group *group = file->private_data; 1740 1741 if (file->f_op != &vfio_group_fops) 1742 return NULL; 1743 return group->iommu_group; 1744 } 1745 EXPORT_SYMBOL_GPL(vfio_file_iommu_group); 1746 1747 /** 1748 * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 1749 * is always CPU cache coherent 1750 * @file: VFIO group file 1751 * 1752 * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 1753 * bit in DMA transactions. A return of false indicates that the user has 1754 * rights to access additional instructions such as wbinvd on x86. 1755 */ 1756 bool vfio_file_enforced_coherent(struct file *file) 1757 { 1758 struct vfio_group *group = file->private_data; 1759 bool ret; 1760 1761 if (file->f_op != &vfio_group_fops) 1762 return true; 1763 1764 down_read(&group->group_rwsem); 1765 if (group->container) { 1766 ret = vfio_ioctl_check_extension(group->container, 1767 VFIO_DMA_CC_IOMMU); 1768 } else { 1769 /* 1770 * Since the coherency state is determined only once a container 1771 * is attached the user must do so before they can prove they 1772 * have permission. 1773 */ 1774 ret = true; 1775 } 1776 up_read(&group->group_rwsem); 1777 return ret; 1778 } 1779 EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 1780 1781 /** 1782 * vfio_file_set_kvm - Link a kvm with VFIO drivers 1783 * @file: VFIO group file 1784 * @kvm: KVM to link 1785 * 1786 * When a VFIO device is first opened the KVM will be available in 1787 * device->kvm if one was associated with the group. 1788 */ 1789 void vfio_file_set_kvm(struct file *file, struct kvm *kvm) 1790 { 1791 struct vfio_group *group = file->private_data; 1792 1793 if (file->f_op != &vfio_group_fops) 1794 return; 1795 1796 down_write(&group->group_rwsem); 1797 group->kvm = kvm; 1798 up_write(&group->group_rwsem); 1799 } 1800 EXPORT_SYMBOL_GPL(vfio_file_set_kvm); 1801 1802 /** 1803 * vfio_file_has_dev - True if the VFIO file is a handle for device 1804 * @file: VFIO file to check 1805 * @device: Device that must be part of the file 1806 * 1807 * Returns true if given file has permission to manipulate the given device. 1808 */ 1809 bool vfio_file_has_dev(struct file *file, struct vfio_device *device) 1810 { 1811 struct vfio_group *group = file->private_data; 1812 1813 if (file->f_op != &vfio_group_fops) 1814 return false; 1815 1816 return group == device->group; 1817 } 1818 EXPORT_SYMBOL_GPL(vfio_file_has_dev); 1819 1820 /* 1821 * Sub-module support 1822 */ 1823 /* 1824 * Helper for managing a buffer of info chain capabilities, allocate or 1825 * reallocate a buffer with additional @size, filling in @id and @version 1826 * of the capability. A pointer to the new capability is returned. 1827 * 1828 * NB. The chain is based at the head of the buffer, so new entries are 1829 * added to the tail, vfio_info_cap_shift() should be called to fixup the 1830 * next offsets prior to copying to the user buffer. 1831 */ 1832 struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 1833 size_t size, u16 id, u16 version) 1834 { 1835 void *buf; 1836 struct vfio_info_cap_header *header, *tmp; 1837 1838 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 1839 if (!buf) { 1840 kfree(caps->buf); 1841 caps->buf = NULL; 1842 caps->size = 0; 1843 return ERR_PTR(-ENOMEM); 1844 } 1845 1846 caps->buf = buf; 1847 header = buf + caps->size; 1848 1849 /* Eventually copied to user buffer, zero */ 1850 memset(header, 0, size); 1851 1852 header->id = id; 1853 header->version = version; 1854 1855 /* Add to the end of the capability chain */ 1856 for (tmp = buf; tmp->next; tmp = buf + tmp->next) 1857 ; /* nothing */ 1858 1859 tmp->next = caps->size; 1860 caps->size += size; 1861 1862 return header; 1863 } 1864 EXPORT_SYMBOL_GPL(vfio_info_cap_add); 1865 1866 void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 1867 { 1868 struct vfio_info_cap_header *tmp; 1869 void *buf = (void *)caps->buf; 1870 1871 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 1872 tmp->next += offset; 1873 } 1874 EXPORT_SYMBOL(vfio_info_cap_shift); 1875 1876 int vfio_info_add_capability(struct vfio_info_cap *caps, 1877 struct vfio_info_cap_header *cap, size_t size) 1878 { 1879 struct vfio_info_cap_header *header; 1880 1881 header = vfio_info_cap_add(caps, size, cap->id, cap->version); 1882 if (IS_ERR(header)) 1883 return PTR_ERR(header); 1884 1885 memcpy(header + 1, cap + 1, size - sizeof(*header)); 1886 1887 return 0; 1888 } 1889 EXPORT_SYMBOL(vfio_info_add_capability); 1890 1891 int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 1892 int max_irq_type, size_t *data_size) 1893 { 1894 unsigned long minsz; 1895 size_t size; 1896 1897 minsz = offsetofend(struct vfio_irq_set, count); 1898 1899 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 1900 (hdr->count >= (U32_MAX - hdr->start)) || 1901 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 1902 VFIO_IRQ_SET_ACTION_TYPE_MASK))) 1903 return -EINVAL; 1904 1905 if (data_size) 1906 *data_size = 0; 1907 1908 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 1909 return -EINVAL; 1910 1911 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 1912 case VFIO_IRQ_SET_DATA_NONE: 1913 size = 0; 1914 break; 1915 case VFIO_IRQ_SET_DATA_BOOL: 1916 size = sizeof(uint8_t); 1917 break; 1918 case VFIO_IRQ_SET_DATA_EVENTFD: 1919 size = sizeof(int32_t); 1920 break; 1921 default: 1922 return -EINVAL; 1923 } 1924 1925 if (size) { 1926 if (hdr->argsz - minsz < hdr->count * size) 1927 return -EINVAL; 1928 1929 if (!data_size) 1930 return -EINVAL; 1931 1932 *data_size = hdr->count * size; 1933 } 1934 1935 return 0; 1936 } 1937 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 1938 1939 /* 1940 * Pin contiguous user pages and return their associated host pages for local 1941 * domain only. 1942 * @device [in] : device 1943 * @iova [in] : starting IOVA of user pages to be pinned. 1944 * @npage [in] : count of pages to be pinned. This count should not 1945 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 1946 * @prot [in] : protection flags 1947 * @pages[out] : array of host pages 1948 * Return error or number of pages pinned. 1949 */ 1950 int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, 1951 int npage, int prot, struct page **pages) 1952 { 1953 struct vfio_container *container; 1954 struct vfio_group *group = device->group; 1955 struct vfio_iommu_driver *driver; 1956 int ret; 1957 1958 if (!pages || !npage || !vfio_assert_device_open(device)) 1959 return -EINVAL; 1960 1961 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 1962 return -E2BIG; 1963 1964 if (group->dev_counter > 1) 1965 return -EINVAL; 1966 1967 /* group->container cannot change while a vfio device is open */ 1968 container = group->container; 1969 driver = container->iommu_driver; 1970 if (likely(driver && driver->ops->pin_pages)) 1971 ret = driver->ops->pin_pages(container->iommu_data, 1972 group->iommu_group, iova, 1973 npage, prot, pages); 1974 else 1975 ret = -ENOTTY; 1976 1977 return ret; 1978 } 1979 EXPORT_SYMBOL(vfio_pin_pages); 1980 1981 /* 1982 * Unpin contiguous host pages for local domain only. 1983 * @device [in] : device 1984 * @iova [in] : starting address of user pages to be unpinned. 1985 * @npage [in] : count of pages to be unpinned. This count should not 1986 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 1987 */ 1988 void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) 1989 { 1990 struct vfio_container *container; 1991 struct vfio_iommu_driver *driver; 1992 1993 if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) 1994 return; 1995 1996 if (WARN_ON(!vfio_assert_device_open(device))) 1997 return; 1998 1999 /* group->container cannot change while a vfio device is open */ 2000 container = device->group->container; 2001 driver = container->iommu_driver; 2002 2003 driver->ops->unpin_pages(container->iommu_data, iova, npage); 2004 } 2005 EXPORT_SYMBOL(vfio_unpin_pages); 2006 2007 /* 2008 * This interface allows the CPUs to perform some sort of virtual DMA on 2009 * behalf of the device. 2010 * 2011 * CPUs read/write from/into a range of IOVAs pointing to user space memory 2012 * into/from a kernel buffer. 2013 * 2014 * As the read/write of user space memory is conducted via the CPUs and is 2015 * not a real device DMA, it is not necessary to pin the user space memory. 2016 * 2017 * @device [in] : VFIO device 2018 * @iova [in] : base IOVA of a user space buffer 2019 * @data [in] : pointer to kernel buffer 2020 * @len [in] : kernel buffer length 2021 * @write : indicate read or write 2022 * Return error code on failure or 0 on success. 2023 */ 2024 int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, 2025 size_t len, bool write) 2026 { 2027 struct vfio_container *container; 2028 struct vfio_iommu_driver *driver; 2029 int ret = 0; 2030 2031 if (!data || len <= 0 || !vfio_assert_device_open(device)) 2032 return -EINVAL; 2033 2034 /* group->container cannot change while a vfio device is open */ 2035 container = device->group->container; 2036 driver = container->iommu_driver; 2037 2038 if (likely(driver && driver->ops->dma_rw)) 2039 ret = driver->ops->dma_rw(container->iommu_data, 2040 iova, data, len, write); 2041 else 2042 ret = -ENOTTY; 2043 return ret; 2044 } 2045 EXPORT_SYMBOL(vfio_dma_rw); 2046 2047 /* 2048 * Module/class support 2049 */ 2050 static char *vfio_devnode(struct device *dev, umode_t *mode) 2051 { 2052 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 2053 } 2054 2055 static struct miscdevice vfio_dev = { 2056 .minor = VFIO_MINOR, 2057 .name = "vfio", 2058 .fops = &vfio_fops, 2059 .nodename = "vfio/vfio", 2060 .mode = S_IRUGO | S_IWUGO, 2061 }; 2062 2063 static int __init vfio_init(void) 2064 { 2065 int ret; 2066 2067 ida_init(&vfio.group_ida); 2068 mutex_init(&vfio.group_lock); 2069 mutex_init(&vfio.iommu_drivers_lock); 2070 INIT_LIST_HEAD(&vfio.group_list); 2071 INIT_LIST_HEAD(&vfio.iommu_drivers_list); 2072 2073 ret = misc_register(&vfio_dev); 2074 if (ret) { 2075 pr_err("vfio: misc device register failed\n"); 2076 return ret; 2077 } 2078 2079 /* /dev/vfio/$GROUP */ 2080 vfio.class = class_create(THIS_MODULE, "vfio"); 2081 if (IS_ERR(vfio.class)) { 2082 ret = PTR_ERR(vfio.class); 2083 goto err_class; 2084 } 2085 2086 vfio.class->devnode = vfio_devnode; 2087 2088 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); 2089 if (ret) 2090 goto err_alloc_chrdev; 2091 2092 #ifdef CONFIG_VFIO_NOIOMMU 2093 ret = vfio_register_iommu_driver(&vfio_noiommu_ops); 2094 #endif 2095 if (ret) 2096 goto err_driver_register; 2097 2098 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 2099 return 0; 2100 2101 err_driver_register: 2102 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 2103 err_alloc_chrdev: 2104 class_destroy(vfio.class); 2105 vfio.class = NULL; 2106 err_class: 2107 misc_deregister(&vfio_dev); 2108 return ret; 2109 } 2110 2111 static void __exit vfio_cleanup(void) 2112 { 2113 WARN_ON(!list_empty(&vfio.group_list)); 2114 2115 #ifdef CONFIG_VFIO_NOIOMMU 2116 vfio_unregister_iommu_driver(&vfio_noiommu_ops); 2117 #endif 2118 ida_destroy(&vfio.group_ida); 2119 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 2120 class_destroy(vfio.class); 2121 vfio.class = NULL; 2122 misc_deregister(&vfio_dev); 2123 xa_destroy(&vfio_device_set_xa); 2124 } 2125 2126 module_init(vfio_init); 2127 module_exit(vfio_cleanup); 2128 2129 MODULE_VERSION(DRIVER_VERSION); 2130 MODULE_LICENSE("GPL v2"); 2131 MODULE_AUTHOR(DRIVER_AUTHOR); 2132 MODULE_DESCRIPTION(DRIVER_DESC); 2133 MODULE_ALIAS_MISCDEV(VFIO_MINOR); 2134 MODULE_ALIAS("devname:vfio/vfio"); 2135 MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); 2136