1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 4 * 5 * VFIO container (/dev/vfio/vfio) 6 */ 7 #include <linux/file.h> 8 #include <linux/slab.h> 9 #include <linux/fs.h> 10 #include <linux/capability.h> 11 #include <linux/iommu.h> 12 #include <linux/miscdevice.h> 13 #include <linux/vfio.h> 14 #include <uapi/linux/vfio.h> 15 16 #include "vfio.h" 17 18 struct vfio_container { 19 struct kref kref; 20 struct list_head group_list; 21 struct rw_semaphore group_lock; 22 struct vfio_iommu_driver *iommu_driver; 23 void *iommu_data; 24 bool noiommu; 25 }; 26 27 static struct vfio { 28 struct list_head iommu_drivers_list; 29 struct mutex iommu_drivers_lock; 30 } vfio; 31 32 static void *vfio_noiommu_open(unsigned long arg) 33 { 34 if (arg != VFIO_NOIOMMU_IOMMU) 35 return ERR_PTR(-EINVAL); 36 if (!capable(CAP_SYS_RAWIO)) 37 return ERR_PTR(-EPERM); 38 39 return NULL; 40 } 41 42 static void vfio_noiommu_release(void *iommu_data) 43 { 44 } 45 46 static long vfio_noiommu_ioctl(void *iommu_data, 47 unsigned int cmd, unsigned long arg) 48 { 49 if (cmd == VFIO_CHECK_EXTENSION) 50 return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; 51 52 return -ENOTTY; 53 } 54 55 static int vfio_noiommu_attach_group(void *iommu_data, 56 struct iommu_group *iommu_group, enum vfio_group_type type) 57 { 58 return 0; 59 } 60 61 static void vfio_noiommu_detach_group(void *iommu_data, 62 struct iommu_group *iommu_group) 63 { 64 } 65 66 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { 67 .name = "vfio-noiommu", 68 .owner = THIS_MODULE, 69 .open = vfio_noiommu_open, 70 .release = vfio_noiommu_release, 71 .ioctl = vfio_noiommu_ioctl, 72 .attach_group = vfio_noiommu_attach_group, 73 .detach_group = vfio_noiommu_detach_group, 74 }; 75 76 /* 77 * Only noiommu containers can use vfio-noiommu and noiommu containers can only 78 * use vfio-noiommu. 79 */ 80 static bool vfio_iommu_driver_allowed(struct vfio_container *container, 81 const struct vfio_iommu_driver *driver) 82 { 83 if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU)) 84 return true; 85 return container->noiommu == (driver->ops == &vfio_noiommu_ops); 86 } 87 88 /* 89 * IOMMU driver registration 90 */ 91 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 92 { 93 struct vfio_iommu_driver *driver, *tmp; 94 95 if (WARN_ON(!ops->register_device != !ops->unregister_device)) 96 return -EINVAL; 97 98 driver = kzalloc(sizeof(*driver), GFP_KERNEL); 99 if (!driver) 100 return -ENOMEM; 101 102 driver->ops = ops; 103 104 mutex_lock(&vfio.iommu_drivers_lock); 105 106 /* Check for duplicates */ 107 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 108 if (tmp->ops == ops) { 109 mutex_unlock(&vfio.iommu_drivers_lock); 110 kfree(driver); 111 return -EINVAL; 112 } 113 } 114 115 list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 116 117 mutex_unlock(&vfio.iommu_drivers_lock); 118 119 return 0; 120 } 121 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 122 123 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 124 { 125 struct vfio_iommu_driver *driver; 126 127 mutex_lock(&vfio.iommu_drivers_lock); 128 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 129 if (driver->ops == ops) { 130 list_del(&driver->vfio_next); 131 mutex_unlock(&vfio.iommu_drivers_lock); 132 kfree(driver); 133 return; 134 } 135 } 136 mutex_unlock(&vfio.iommu_drivers_lock); 137 } 138 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 139 140 /* 141 * Container objects - containers are created when /dev/vfio/vfio is 142 * opened, but their lifecycle extends until the last user is done, so 143 * it's freed via kref. Must support container/group/device being 144 * closed in any order. 145 */ 146 static void vfio_container_release(struct kref *kref) 147 { 148 struct vfio_container *container; 149 container = container_of(kref, struct vfio_container, kref); 150 151 kfree(container); 152 } 153 154 static void vfio_container_get(struct vfio_container *container) 155 { 156 kref_get(&container->kref); 157 } 158 159 static void vfio_container_put(struct vfio_container *container) 160 { 161 kref_put(&container->kref, vfio_container_release); 162 } 163 164 void vfio_device_container_register(struct vfio_device *device) 165 { 166 struct vfio_iommu_driver *iommu_driver = 167 device->group->container->iommu_driver; 168 169 if (iommu_driver && iommu_driver->ops->register_device) 170 iommu_driver->ops->register_device( 171 device->group->container->iommu_data, device); 172 } 173 174 void vfio_device_container_unregister(struct vfio_device *device) 175 { 176 struct vfio_iommu_driver *iommu_driver = 177 device->group->container->iommu_driver; 178 179 if (iommu_driver && iommu_driver->ops->unregister_device) 180 iommu_driver->ops->unregister_device( 181 device->group->container->iommu_data, device); 182 } 183 184 static long 185 vfio_container_ioctl_check_extension(struct vfio_container *container, 186 unsigned long arg) 187 { 188 struct vfio_iommu_driver *driver; 189 long ret = 0; 190 191 down_read(&container->group_lock); 192 193 driver = container->iommu_driver; 194 195 switch (arg) { 196 /* No base extensions yet */ 197 default: 198 /* 199 * If no driver is set, poll all registered drivers for 200 * extensions and return the first positive result. If 201 * a driver is already set, further queries will be passed 202 * only to that driver. 203 */ 204 if (!driver) { 205 mutex_lock(&vfio.iommu_drivers_lock); 206 list_for_each_entry(driver, &vfio.iommu_drivers_list, 207 vfio_next) { 208 209 if (!list_empty(&container->group_list) && 210 !vfio_iommu_driver_allowed(container, 211 driver)) 212 continue; 213 if (!try_module_get(driver->ops->owner)) 214 continue; 215 216 ret = driver->ops->ioctl(NULL, 217 VFIO_CHECK_EXTENSION, 218 arg); 219 module_put(driver->ops->owner); 220 if (ret > 0) 221 break; 222 } 223 mutex_unlock(&vfio.iommu_drivers_lock); 224 } else 225 ret = driver->ops->ioctl(container->iommu_data, 226 VFIO_CHECK_EXTENSION, arg); 227 } 228 229 up_read(&container->group_lock); 230 231 return ret; 232 } 233 234 /* hold write lock on container->group_lock */ 235 static int __vfio_container_attach_groups(struct vfio_container *container, 236 struct vfio_iommu_driver *driver, 237 void *data) 238 { 239 struct vfio_group *group; 240 int ret = -ENODEV; 241 242 list_for_each_entry(group, &container->group_list, container_next) { 243 ret = driver->ops->attach_group(data, group->iommu_group, 244 group->type); 245 if (ret) 246 goto unwind; 247 } 248 249 return ret; 250 251 unwind: 252 list_for_each_entry_continue_reverse(group, &container->group_list, 253 container_next) { 254 driver->ops->detach_group(data, group->iommu_group); 255 } 256 257 return ret; 258 } 259 260 static long vfio_ioctl_set_iommu(struct vfio_container *container, 261 unsigned long arg) 262 { 263 struct vfio_iommu_driver *driver; 264 long ret = -ENODEV; 265 266 down_write(&container->group_lock); 267 268 /* 269 * The container is designed to be an unprivileged interface while 270 * the group can be assigned to specific users. Therefore, only by 271 * adding a group to a container does the user get the privilege of 272 * enabling the iommu, which may allocate finite resources. There 273 * is no unset_iommu, but by removing all the groups from a container, 274 * the container is deprivileged and returns to an unset state. 275 */ 276 if (list_empty(&container->group_list) || container->iommu_driver) { 277 up_write(&container->group_lock); 278 return -EINVAL; 279 } 280 281 mutex_lock(&vfio.iommu_drivers_lock); 282 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 283 void *data; 284 285 if (!vfio_iommu_driver_allowed(container, driver)) 286 continue; 287 if (!try_module_get(driver->ops->owner)) 288 continue; 289 290 /* 291 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 292 * so test which iommu driver reported support for this 293 * extension and call open on them. We also pass them the 294 * magic, allowing a single driver to support multiple 295 * interfaces if they'd like. 296 */ 297 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 298 module_put(driver->ops->owner); 299 continue; 300 } 301 302 data = driver->ops->open(arg); 303 if (IS_ERR(data)) { 304 ret = PTR_ERR(data); 305 module_put(driver->ops->owner); 306 continue; 307 } 308 309 ret = __vfio_container_attach_groups(container, driver, data); 310 if (ret) { 311 driver->ops->release(data); 312 module_put(driver->ops->owner); 313 continue; 314 } 315 316 container->iommu_driver = driver; 317 container->iommu_data = data; 318 break; 319 } 320 321 mutex_unlock(&vfio.iommu_drivers_lock); 322 up_write(&container->group_lock); 323 324 return ret; 325 } 326 327 static long vfio_fops_unl_ioctl(struct file *filep, 328 unsigned int cmd, unsigned long arg) 329 { 330 struct vfio_container *container = filep->private_data; 331 struct vfio_iommu_driver *driver; 332 void *data; 333 long ret = -EINVAL; 334 335 if (!container) 336 return ret; 337 338 switch (cmd) { 339 case VFIO_GET_API_VERSION: 340 ret = VFIO_API_VERSION; 341 break; 342 case VFIO_CHECK_EXTENSION: 343 ret = vfio_container_ioctl_check_extension(container, arg); 344 break; 345 case VFIO_SET_IOMMU: 346 ret = vfio_ioctl_set_iommu(container, arg); 347 break; 348 default: 349 driver = container->iommu_driver; 350 data = container->iommu_data; 351 352 if (driver) /* passthrough all unrecognized ioctls */ 353 ret = driver->ops->ioctl(data, cmd, arg); 354 } 355 356 return ret; 357 } 358 359 static int vfio_fops_open(struct inode *inode, struct file *filep) 360 { 361 struct vfio_container *container; 362 363 container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT); 364 if (!container) 365 return -ENOMEM; 366 367 INIT_LIST_HEAD(&container->group_list); 368 init_rwsem(&container->group_lock); 369 kref_init(&container->kref); 370 371 filep->private_data = container; 372 373 return 0; 374 } 375 376 static int vfio_fops_release(struct inode *inode, struct file *filep) 377 { 378 struct vfio_container *container = filep->private_data; 379 380 filep->private_data = NULL; 381 382 vfio_container_put(container); 383 384 return 0; 385 } 386 387 static const struct file_operations vfio_fops = { 388 .owner = THIS_MODULE, 389 .open = vfio_fops_open, 390 .release = vfio_fops_release, 391 .unlocked_ioctl = vfio_fops_unl_ioctl, 392 .compat_ioctl = compat_ptr_ioctl, 393 }; 394 395 struct vfio_container *vfio_container_from_file(struct file *file) 396 { 397 struct vfio_container *container; 398 399 /* Sanity check, is this really our fd? */ 400 if (file->f_op != &vfio_fops) 401 return NULL; 402 403 container = file->private_data; 404 WARN_ON(!container); /* fget ensures we don't race vfio_release */ 405 return container; 406 } 407 408 static struct miscdevice vfio_dev = { 409 .minor = VFIO_MINOR, 410 .name = "vfio", 411 .fops = &vfio_fops, 412 .nodename = "vfio/vfio", 413 .mode = S_IRUGO | S_IWUGO, 414 }; 415 416 int vfio_container_attach_group(struct vfio_container *container, 417 struct vfio_group *group) 418 { 419 struct vfio_iommu_driver *driver; 420 int ret = 0; 421 422 lockdep_assert_held(&group->group_lock); 423 424 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 425 return -EPERM; 426 427 down_write(&container->group_lock); 428 429 /* Real groups and fake groups cannot mix */ 430 if (!list_empty(&container->group_list) && 431 container->noiommu != (group->type == VFIO_NO_IOMMU)) { 432 ret = -EPERM; 433 goto out_unlock_container; 434 } 435 436 if (group->type == VFIO_IOMMU) { 437 ret = iommu_group_claim_dma_owner(group->iommu_group, group); 438 if (ret) 439 goto out_unlock_container; 440 } 441 442 driver = container->iommu_driver; 443 if (driver) { 444 ret = driver->ops->attach_group(container->iommu_data, 445 group->iommu_group, 446 group->type); 447 if (ret) { 448 if (group->type == VFIO_IOMMU) 449 iommu_group_release_dma_owner( 450 group->iommu_group); 451 goto out_unlock_container; 452 } 453 } 454 455 group->container = container; 456 group->container_users = 1; 457 container->noiommu = (group->type == VFIO_NO_IOMMU); 458 list_add(&group->container_next, &container->group_list); 459 460 /* Get a reference on the container and mark a user within the group */ 461 vfio_container_get(container); 462 463 out_unlock_container: 464 up_write(&container->group_lock); 465 return ret; 466 } 467 468 void vfio_group_detach_container(struct vfio_group *group) 469 { 470 struct vfio_container *container = group->container; 471 struct vfio_iommu_driver *driver; 472 473 lockdep_assert_held(&group->group_lock); 474 WARN_ON(group->container_users != 1); 475 476 down_write(&container->group_lock); 477 478 driver = container->iommu_driver; 479 if (driver) 480 driver->ops->detach_group(container->iommu_data, 481 group->iommu_group); 482 483 if (group->type == VFIO_IOMMU) 484 iommu_group_release_dma_owner(group->iommu_group); 485 486 group->container = NULL; 487 group->container_users = 0; 488 list_del(&group->container_next); 489 490 /* Detaching the last group deprivileges a container, remove iommu */ 491 if (driver && list_empty(&container->group_list)) { 492 driver->ops->release(container->iommu_data); 493 module_put(driver->ops->owner); 494 container->iommu_driver = NULL; 495 container->iommu_data = NULL; 496 } 497 498 up_write(&container->group_lock); 499 500 vfio_container_put(container); 501 } 502 503 int vfio_group_use_container(struct vfio_group *group) 504 { 505 lockdep_assert_held(&group->group_lock); 506 507 /* 508 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but 509 * VFIO_SET_IOMMU hasn't been done yet. 510 */ 511 if (!group->container->iommu_driver) 512 return -EINVAL; 513 514 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 515 return -EPERM; 516 517 get_file(group->opened_file); 518 group->container_users++; 519 return 0; 520 } 521 522 void vfio_group_unuse_container(struct vfio_group *group) 523 { 524 lockdep_assert_held(&group->group_lock); 525 526 WARN_ON(group->container_users <= 1); 527 group->container_users--; 528 fput(group->opened_file); 529 } 530 531 int vfio_device_container_pin_pages(struct vfio_device *device, 532 dma_addr_t iova, int npage, 533 int prot, struct page **pages) 534 { 535 struct vfio_container *container = device->group->container; 536 struct iommu_group *iommu_group = device->group->iommu_group; 537 struct vfio_iommu_driver *driver = container->iommu_driver; 538 539 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 540 return -E2BIG; 541 542 if (unlikely(!driver || !driver->ops->pin_pages)) 543 return -ENOTTY; 544 return driver->ops->pin_pages(container->iommu_data, iommu_group, iova, 545 npage, prot, pages); 546 } 547 548 void vfio_device_container_unpin_pages(struct vfio_device *device, 549 dma_addr_t iova, int npage) 550 { 551 struct vfio_container *container = device->group->container; 552 553 if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) 554 return; 555 556 container->iommu_driver->ops->unpin_pages(container->iommu_data, iova, 557 npage); 558 } 559 560 int vfio_device_container_dma_rw(struct vfio_device *device, 561 dma_addr_t iova, void *data, 562 size_t len, bool write) 563 { 564 struct vfio_container *container = device->group->container; 565 struct vfio_iommu_driver *driver = container->iommu_driver; 566 567 if (unlikely(!driver || !driver->ops->dma_rw)) 568 return -ENOTTY; 569 return driver->ops->dma_rw(container->iommu_data, iova, data, len, 570 write); 571 } 572 573 int __init vfio_container_init(void) 574 { 575 int ret; 576 577 mutex_init(&vfio.iommu_drivers_lock); 578 INIT_LIST_HEAD(&vfio.iommu_drivers_list); 579 580 ret = misc_register(&vfio_dev); 581 if (ret) { 582 pr_err("vfio: misc device register failed\n"); 583 return ret; 584 } 585 586 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) { 587 ret = vfio_register_iommu_driver(&vfio_noiommu_ops); 588 if (ret) 589 goto err_misc; 590 } 591 return 0; 592 593 err_misc: 594 misc_deregister(&vfio_dev); 595 return ret; 596 } 597 598 void vfio_container_cleanup(void) 599 { 600 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) 601 vfio_unregister_iommu_driver(&vfio_noiommu_ops); 602 misc_deregister(&vfio_dev); 603 mutex_destroy(&vfio.iommu_drivers_lock); 604 } 605 606 MODULE_ALIAS_MISCDEV(VFIO_MINOR); 607 MODULE_ALIAS("devname:vfio/vfio"); 608