1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (C) 2021 Intel Corporation 3 * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 4 * 5 * iommufd provides control over the IOMMU HW objects created by IOMMU kernel 6 * drivers. IOMMU HW objects revolve around IO page tables that map incoming DMA 7 * addresses (IOVA) to CPU addresses. 8 */ 9 #define pr_fmt(fmt) "iommufd: " fmt 10 11 #include <linux/bug.h> 12 #include <linux/file.h> 13 #include <linux/fs.h> 14 #include <linux/iommufd.h> 15 #include <linux/miscdevice.h> 16 #include <linux/module.h> 17 #include <linux/mutex.h> 18 #include <linux/slab.h> 19 #include <uapi/linux/iommufd.h> 20 21 #include "io_pagetable.h" 22 #include "iommufd_private.h" 23 #include "iommufd_test.h" 24 25 struct iommufd_object_ops { 26 size_t file_offset; 27 void (*pre_destroy)(struct iommufd_object *obj); 28 void (*destroy)(struct iommufd_object *obj); 29 void (*abort)(struct iommufd_object *obj); 30 }; 31 static const struct iommufd_object_ops iommufd_object_ops[]; 32 static struct miscdevice vfio_misc_dev; 33 34 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, 35 size_t size, 36 enum iommufd_object_type type) 37 { 38 struct iommufd_object *obj; 39 int rc; 40 41 obj = kzalloc(size, GFP_KERNEL_ACCOUNT); 42 if (!obj) 43 return ERR_PTR(-ENOMEM); 44 obj->type = type; 45 /* Starts out bias'd by 1 until it is removed from the xarray */ 46 refcount_set(&obj->wait_cnt, 1); 47 refcount_set(&obj->users, 1); 48 49 /* 50 * Reserve an ID in the xarray but do not publish the pointer yet since 51 * the caller hasn't initialized it yet. Once the pointer is published 52 * in the xarray and visible to other threads we can't reliably destroy 53 * it anymore, so the caller must complete all errorable operations 54 * before calling iommufd_object_finalize(). 55 */ 56 rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b, 57 GFP_KERNEL_ACCOUNT); 58 if (rc) 59 goto out_free; 60 return obj; 61 out_free: 62 kfree(obj); 63 return ERR_PTR(rc); 64 } 65 66 struct iommufd_object *_iommufd_object_alloc_ucmd(struct iommufd_ucmd *ucmd, 67 size_t size, 68 enum iommufd_object_type type) 69 { 70 struct iommufd_object *new_obj; 71 72 /* Something is coded wrong if this is hit */ 73 if (WARN_ON(ucmd->new_obj)) 74 return ERR_PTR(-EBUSY); 75 76 /* 77 * An abort op means that its caller needs to invoke it within a lock in 78 * the caller. So it doesn't work with _iommufd_object_alloc_ucmd() that 79 * will invoke the abort op in iommufd_object_abort_and_destroy(), which 80 * must be outside the caller's lock. 81 */ 82 if (WARN_ON(iommufd_object_ops[type].abort)) 83 return ERR_PTR(-EOPNOTSUPP); 84 85 new_obj = _iommufd_object_alloc(ucmd->ictx, size, type); 86 if (IS_ERR(new_obj)) 87 return new_obj; 88 89 ucmd->new_obj = new_obj; 90 return new_obj; 91 } 92 93 /* 94 * Allow concurrent access to the object. 95 * 96 * Once another thread can see the object pointer it can prevent object 97 * destruction. Expect for special kernel-only objects there is no in-kernel way 98 * to reliably destroy a single object. Thus all APIs that are creating objects 99 * must use iommufd_object_abort() to handle their errors and only call 100 * iommufd_object_finalize() once object creation cannot fail. 101 */ 102 void iommufd_object_finalize(struct iommufd_ctx *ictx, 103 struct iommufd_object *obj) 104 { 105 XA_STATE(xas, &ictx->objects, obj->id); 106 void *old; 107 108 xa_lock(&ictx->objects); 109 old = xas_store(&xas, obj); 110 xa_unlock(&ictx->objects); 111 /* obj->id was returned from xa_alloc() so the xas_store() cannot fail */ 112 WARN_ON(old != XA_ZERO_ENTRY); 113 } 114 115 /* Undo _iommufd_object_alloc() if iommufd_object_finalize() was not called */ 116 void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj) 117 { 118 XA_STATE(xas, &ictx->objects, obj->id); 119 void *old; 120 121 xa_lock(&ictx->objects); 122 old = xas_store(&xas, NULL); 123 xa_unlock(&ictx->objects); 124 WARN_ON(old != XA_ZERO_ENTRY); 125 126 if (WARN_ON(!refcount_dec_and_test(&obj->users))) 127 return; 128 129 kfree(obj); 130 } 131 132 /* 133 * Abort an object that has been fully initialized and needs destroy, but has 134 * not been finalized. 135 */ 136 void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, 137 struct iommufd_object *obj) 138 { 139 const struct iommufd_object_ops *ops = &iommufd_object_ops[obj->type]; 140 141 if (ops->file_offset) { 142 struct file **filep = ((void *)obj) + ops->file_offset; 143 144 /* 145 * A file should hold a users refcount while the file is open 146 * and put it back in its release. The file should hold a 147 * pointer to obj in their private data. Normal fput() is 148 * deferred to a workqueue and can get out of order with the 149 * following kfree(obj). Using the sync version ensures the 150 * release happens immediately. During abort we require the file 151 * refcount is one at this point - meaning the object alloc 152 * function cannot do anything to allow another thread to take a 153 * refcount prior to a guaranteed success. 154 */ 155 if (*filep) 156 __fput_sync(*filep); 157 } 158 159 if (ops->abort) 160 ops->abort(obj); 161 else 162 ops->destroy(obj); 163 iommufd_object_abort(ictx, obj); 164 } 165 166 struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, 167 enum iommufd_object_type type) 168 { 169 struct iommufd_object *obj; 170 171 if (iommufd_should_fail()) 172 return ERR_PTR(-ENOENT); 173 174 xa_lock(&ictx->objects); 175 obj = xa_load(&ictx->objects, id); 176 if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) || 177 !iommufd_lock_obj(obj)) 178 obj = ERR_PTR(-ENOENT); 179 xa_unlock(&ictx->objects); 180 return obj; 181 } 182 183 static int iommufd_object_dec_wait(struct iommufd_ctx *ictx, 184 struct iommufd_object *to_destroy) 185 { 186 if (refcount_dec_and_test(&to_destroy->wait_cnt)) 187 return 0; 188 189 if (iommufd_object_ops[to_destroy->type].pre_destroy) 190 iommufd_object_ops[to_destroy->type].pre_destroy(to_destroy); 191 192 if (wait_event_timeout(ictx->destroy_wait, 193 refcount_read(&to_destroy->wait_cnt) == 0, 194 msecs_to_jiffies(60000))) 195 return 0; 196 197 pr_crit("Time out waiting for iommufd object to become free\n"); 198 refcount_inc(&to_destroy->wait_cnt); 199 return -EBUSY; 200 } 201 202 /* 203 * Remove the given object id from the xarray if the only reference to the 204 * object is held by the xarray. 205 */ 206 int iommufd_object_remove(struct iommufd_ctx *ictx, 207 struct iommufd_object *to_destroy, u32 id, 208 unsigned int flags) 209 { 210 struct iommufd_object *obj; 211 XA_STATE(xas, &ictx->objects, id); 212 bool zerod_wait_cnt = false; 213 int ret; 214 215 /* 216 * The purpose of the wait_cnt is to ensure deterministic destruction 217 * of objects used by external drivers and destroyed by this function. 218 * Incrementing this wait_cnt should either be short lived, such as 219 * during ioctl execution, or be revoked and blocked during 220 * pre_destroy(), such as vdev holding the idev's refcount. 221 */ 222 if (flags & REMOVE_WAIT) { 223 ret = iommufd_object_dec_wait(ictx, to_destroy); 224 if (ret) { 225 /* 226 * We have a bug. Put back the callers reference and 227 * defer cleaning this object until close. 228 */ 229 refcount_dec(&to_destroy->users); 230 return ret; 231 } 232 zerod_wait_cnt = true; 233 } 234 235 xa_lock(&ictx->objects); 236 obj = xas_load(&xas); 237 if (to_destroy) { 238 /* 239 * If the caller is holding a ref on obj we put it here under 240 * the spinlock. 241 */ 242 refcount_dec(&obj->users); 243 244 if (WARN_ON(obj != to_destroy)) { 245 ret = -ENOENT; 246 goto err_xa; 247 } 248 } else if (xa_is_zero(obj) || !obj) { 249 ret = -ENOENT; 250 goto err_xa; 251 } 252 253 if (!refcount_dec_if_one(&obj->users)) { 254 ret = -EBUSY; 255 goto err_xa; 256 } 257 258 xas_store(&xas, (flags & REMOVE_OBJ_TOMBSTONE) ? XA_ZERO_ENTRY : NULL); 259 if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj)) 260 ictx->vfio_ioas = NULL; 261 xa_unlock(&ictx->objects); 262 263 /* 264 * Since users is zero any positive wait_cnt must be racing 265 * iommufd_put_object(), or we have a bug. 266 */ 267 if (!zerod_wait_cnt) { 268 ret = iommufd_object_dec_wait(ictx, obj); 269 if (WARN_ON(ret)) 270 return ret; 271 } 272 273 iommufd_object_ops[obj->type].destroy(obj); 274 kfree(obj); 275 return 0; 276 277 err_xa: 278 if (zerod_wait_cnt) { 279 /* Restore the xarray owned reference */ 280 refcount_set(&obj->wait_cnt, 1); 281 } 282 xa_unlock(&ictx->objects); 283 284 /* The returned object reference count is zero */ 285 return ret; 286 } 287 288 static int iommufd_destroy(struct iommufd_ucmd *ucmd) 289 { 290 struct iommu_destroy *cmd = ucmd->cmd; 291 292 return iommufd_object_remove(ucmd->ictx, NULL, cmd->id, 0); 293 } 294 295 static int iommufd_fops_open(struct inode *inode, struct file *filp) 296 { 297 struct iommufd_ctx *ictx; 298 299 ictx = kzalloc(sizeof(*ictx), GFP_KERNEL_ACCOUNT); 300 if (!ictx) 301 return -ENOMEM; 302 303 /* 304 * For compatibility with VFIO when /dev/vfio/vfio is opened we default 305 * to the same rlimit accounting as vfio uses. 306 */ 307 if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) && 308 filp->private_data == &vfio_misc_dev) { 309 ictx->account_mode = IOPT_PAGES_ACCOUNT_MM; 310 pr_info_once("IOMMUFD is providing /dev/vfio/vfio, not VFIO.\n"); 311 } 312 313 init_rwsem(&ictx->ioas_creation_lock); 314 xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); 315 xa_init(&ictx->groups); 316 ictx->file = filp; 317 mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE); 318 init_waitqueue_head(&ictx->destroy_wait); 319 mutex_init(&ictx->sw_msi_lock); 320 INIT_LIST_HEAD(&ictx->sw_msi_list); 321 filp->private_data = ictx; 322 return 0; 323 } 324 325 static int iommufd_fops_release(struct inode *inode, struct file *filp) 326 { 327 struct iommufd_ctx *ictx = filp->private_data; 328 struct iommufd_sw_msi_map *next; 329 struct iommufd_sw_msi_map *cur; 330 struct iommufd_object *obj; 331 332 /* 333 * The objects in the xarray form a graph of "users" counts, and we have 334 * to destroy them in a depth first manner. Leaf objects will reduce the 335 * users count of interior objects when they are destroyed. 336 * 337 * Repeatedly destroying all the "1 users" leaf objects will progress 338 * until the entire list is destroyed. If this can't progress then there 339 * is some bug related to object refcounting. 340 */ 341 while (!xa_empty(&ictx->objects)) { 342 unsigned int destroyed = 0; 343 unsigned long index; 344 bool empty = true; 345 346 /* 347 * We can't use xa_empty() to end the loop as the tombstones 348 * are stored as XA_ZERO_ENTRY in the xarray. However 349 * xa_for_each() automatically converts them to NULL and skips 350 * them causing xa_empty() to be kept false. Thus once 351 * xa_for_each() finds no further !NULL entries the loop is 352 * done. 353 */ 354 xa_for_each(&ictx->objects, index, obj) { 355 empty = false; 356 if (!refcount_dec_if_one(&obj->users)) 357 continue; 358 359 destroyed++; 360 xa_erase(&ictx->objects, index); 361 iommufd_object_ops[obj->type].destroy(obj); 362 kfree(obj); 363 } 364 365 if (empty) 366 break; 367 368 /* Bug related to users refcount */ 369 if (WARN_ON(!destroyed)) 370 break; 371 } 372 373 /* 374 * There may be some tombstones left over from 375 * iommufd_object_tombstone_user() 376 */ 377 xa_destroy(&ictx->objects); 378 379 WARN_ON(!xa_empty(&ictx->groups)); 380 381 mutex_destroy(&ictx->sw_msi_lock); 382 list_for_each_entry_safe(cur, next, &ictx->sw_msi_list, sw_msi_item) 383 kfree(cur); 384 385 kfree(ictx); 386 return 0; 387 } 388 389 static int iommufd_option(struct iommufd_ucmd *ucmd) 390 { 391 struct iommu_option *cmd = ucmd->cmd; 392 int rc; 393 394 if (cmd->__reserved) 395 return -EOPNOTSUPP; 396 397 switch (cmd->option_id) { 398 case IOMMU_OPTION_RLIMIT_MODE: 399 rc = iommufd_option_rlimit_mode(cmd, ucmd->ictx); 400 break; 401 case IOMMU_OPTION_HUGE_PAGES: 402 rc = iommufd_ioas_option(ucmd); 403 break; 404 default: 405 return -EOPNOTSUPP; 406 } 407 if (rc) 408 return rc; 409 if (copy_to_user(&((struct iommu_option __user *)ucmd->ubuffer)->val64, 410 &cmd->val64, sizeof(cmd->val64))) 411 return -EFAULT; 412 return 0; 413 } 414 415 union ucmd_buffer { 416 struct iommu_destroy destroy; 417 struct iommu_fault_alloc fault; 418 struct iommu_hw_info info; 419 struct iommu_hw_queue_alloc hw_queue; 420 struct iommu_hwpt_alloc hwpt; 421 struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; 422 struct iommu_hwpt_invalidate cache; 423 struct iommu_hwpt_set_dirty_tracking set_dirty_tracking; 424 struct iommu_ioas_alloc alloc; 425 struct iommu_ioas_allow_iovas allow_iovas; 426 struct iommu_ioas_copy ioas_copy; 427 struct iommu_ioas_iova_ranges iova_ranges; 428 struct iommu_ioas_map map; 429 struct iommu_ioas_unmap unmap; 430 struct iommu_option option; 431 struct iommu_vdevice_alloc vdev; 432 struct iommu_veventq_alloc veventq; 433 struct iommu_vfio_ioas vfio_ioas; 434 struct iommu_viommu_alloc viommu; 435 #ifdef CONFIG_IOMMUFD_TEST 436 struct iommu_test_cmd test; 437 #endif 438 }; 439 440 struct iommufd_ioctl_op { 441 unsigned int size; 442 unsigned int min_size; 443 unsigned int ioctl_num; 444 int (*execute)(struct iommufd_ucmd *ucmd); 445 }; 446 447 #define IOCTL_OP(_ioctl, _fn, _struct, _last) \ 448 [_IOC_NR(_ioctl) - IOMMUFD_CMD_BASE] = { \ 449 .size = sizeof(_struct) + \ 450 BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \ 451 sizeof(_struct)), \ 452 .min_size = offsetofend(_struct, _last), \ 453 .ioctl_num = _ioctl, \ 454 .execute = _fn, \ 455 } 456 static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { 457 IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), 458 IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, 459 struct iommu_fault_alloc, out_fault_fd), 460 IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, 461 __reserved), 462 IOCTL_OP(IOMMU_HW_QUEUE_ALLOC, iommufd_hw_queue_alloc_ioctl, 463 struct iommu_hw_queue_alloc, length), 464 IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, 465 __reserved), 466 IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap, 467 struct iommu_hwpt_get_dirty_bitmap, data), 468 IOCTL_OP(IOMMU_HWPT_INVALIDATE, iommufd_hwpt_invalidate, 469 struct iommu_hwpt_invalidate, __reserved), 470 IOCTL_OP(IOMMU_HWPT_SET_DIRTY_TRACKING, iommufd_hwpt_set_dirty_tracking, 471 struct iommu_hwpt_set_dirty_tracking, __reserved), 472 IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, 473 struct iommu_ioas_alloc, out_ioas_id), 474 IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, 475 struct iommu_ioas_allow_iovas, allowed_iovas), 476 IOCTL_OP(IOMMU_IOAS_CHANGE_PROCESS, iommufd_ioas_change_process, 477 struct iommu_ioas_change_process, __reserved), 478 IOCTL_OP(IOMMU_IOAS_COPY, iommufd_ioas_copy, struct iommu_ioas_copy, 479 src_iova), 480 IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges, 481 struct iommu_ioas_iova_ranges, out_iova_alignment), 482 IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, iova), 483 IOCTL_OP(IOMMU_IOAS_MAP_FILE, iommufd_ioas_map_file, 484 struct iommu_ioas_map_file, iova), 485 IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap, 486 length), 487 IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64), 488 IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl, 489 struct iommu_vdevice_alloc, virt_id), 490 IOCTL_OP(IOMMU_VEVENTQ_ALLOC, iommufd_veventq_alloc, 491 struct iommu_veventq_alloc, out_veventq_fd), 492 IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas, 493 __reserved), 494 IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl, 495 struct iommu_viommu_alloc, out_viommu_id), 496 #ifdef CONFIG_IOMMUFD_TEST 497 IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), 498 #endif 499 }; 500 501 static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, 502 unsigned long arg) 503 { 504 struct iommufd_ctx *ictx = filp->private_data; 505 const struct iommufd_ioctl_op *op; 506 struct iommufd_ucmd ucmd = {}; 507 union ucmd_buffer buf; 508 unsigned int nr; 509 int ret; 510 511 nr = _IOC_NR(cmd); 512 if (nr < IOMMUFD_CMD_BASE || 513 (nr - IOMMUFD_CMD_BASE) >= ARRAY_SIZE(iommufd_ioctl_ops)) 514 return iommufd_vfio_ioctl(ictx, cmd, arg); 515 516 ucmd.ictx = ictx; 517 ucmd.ubuffer = (void __user *)arg; 518 ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer); 519 if (ret) 520 return ret; 521 522 op = &iommufd_ioctl_ops[nr - IOMMUFD_CMD_BASE]; 523 if (op->ioctl_num != cmd) 524 return -ENOIOCTLCMD; 525 if (ucmd.user_size < op->min_size) 526 return -EINVAL; 527 528 ucmd.cmd = &buf; 529 ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer, 530 ucmd.user_size); 531 if (ret) 532 return ret; 533 ret = op->execute(&ucmd); 534 535 if (ucmd.new_obj) { 536 if (ret) 537 iommufd_object_abort_and_destroy(ictx, ucmd.new_obj); 538 else 539 iommufd_object_finalize(ictx, ucmd.new_obj); 540 } 541 return ret; 542 } 543 544 static void iommufd_fops_vma_open(struct vm_area_struct *vma) 545 { 546 struct iommufd_mmap *immap = vma->vm_private_data; 547 548 refcount_inc(&immap->owner->users); 549 } 550 551 static void iommufd_fops_vma_close(struct vm_area_struct *vma) 552 { 553 struct iommufd_mmap *immap = vma->vm_private_data; 554 555 refcount_dec(&immap->owner->users); 556 } 557 558 static const struct vm_operations_struct iommufd_vma_ops = { 559 .open = iommufd_fops_vma_open, 560 .close = iommufd_fops_vma_close, 561 }; 562 563 /* The vm_pgoff must be pre-allocated from mt_mmap, and given to user space */ 564 static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma) 565 { 566 struct iommufd_ctx *ictx = filp->private_data; 567 size_t length = vma->vm_end - vma->vm_start; 568 struct iommufd_mmap *immap; 569 int rc; 570 571 if (!PAGE_ALIGNED(length)) 572 return -EINVAL; 573 if (!(vma->vm_flags & VM_SHARED)) 574 return -EINVAL; 575 if (vma->vm_flags & VM_EXEC) 576 return -EPERM; 577 578 mtree_lock(&ictx->mt_mmap); 579 /* vma->vm_pgoff carries a page-shifted start position to an immap */ 580 immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff << PAGE_SHIFT); 581 if (!immap || !refcount_inc_not_zero(&immap->owner->users)) { 582 mtree_unlock(&ictx->mt_mmap); 583 return -ENXIO; 584 } 585 mtree_unlock(&ictx->mt_mmap); 586 587 /* 588 * mtree_load() returns the immap for any contained mmio_addr, so only 589 * allow the exact immap thing to be mapped 590 */ 591 if (vma->vm_pgoff != immap->vm_pgoff || length != immap->length) { 592 rc = -ENXIO; 593 goto err_refcount; 594 } 595 596 vma->vm_pgoff = 0; 597 vma->vm_private_data = immap; 598 vma->vm_ops = &iommufd_vma_ops; 599 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 600 601 rc = io_remap_pfn_range(vma, vma->vm_start, 602 immap->mmio_addr >> PAGE_SHIFT, length, 603 vma->vm_page_prot); 604 if (rc) 605 goto err_refcount; 606 return 0; 607 608 err_refcount: 609 refcount_dec(&immap->owner->users); 610 return rc; 611 } 612 613 static const struct file_operations iommufd_fops = { 614 .owner = THIS_MODULE, 615 .open = iommufd_fops_open, 616 .release = iommufd_fops_release, 617 .unlocked_ioctl = iommufd_fops_ioctl, 618 .mmap = iommufd_fops_mmap, 619 }; 620 621 /** 622 * iommufd_ctx_get - Get a context reference 623 * @ictx: Context to get 624 * 625 * The caller must already hold a valid reference to ictx. 626 */ 627 void iommufd_ctx_get(struct iommufd_ctx *ictx) 628 { 629 get_file(ictx->file); 630 } 631 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_get, "IOMMUFD"); 632 633 /** 634 * iommufd_ctx_from_file - Acquires a reference to the iommufd context 635 * @file: File to obtain the reference from 636 * 637 * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. The struct file 638 * remains owned by the caller and the caller must still do fput. On success 639 * the caller is responsible to call iommufd_ctx_put(). 640 */ 641 struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) 642 { 643 struct iommufd_ctx *ictx; 644 645 if (file->f_op != &iommufd_fops) 646 return ERR_PTR(-EBADFD); 647 ictx = file->private_data; 648 iommufd_ctx_get(ictx); 649 return ictx; 650 } 651 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, "IOMMUFD"); 652 653 /** 654 * iommufd_ctx_from_fd - Acquires a reference to the iommufd context 655 * @fd: File descriptor to obtain the reference from 656 * 657 * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. On success 658 * the caller is responsible to call iommufd_ctx_put(). 659 */ 660 struct iommufd_ctx *iommufd_ctx_from_fd(int fd) 661 { 662 struct file *file; 663 664 file = fget(fd); 665 if (!file) 666 return ERR_PTR(-EBADF); 667 668 if (file->f_op != &iommufd_fops) { 669 fput(file); 670 return ERR_PTR(-EBADFD); 671 } 672 /* fget is the same as iommufd_ctx_get() */ 673 return file->private_data; 674 } 675 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_fd, "IOMMUFD"); 676 677 /** 678 * iommufd_ctx_put - Put back a reference 679 * @ictx: Context to put back 680 */ 681 void iommufd_ctx_put(struct iommufd_ctx *ictx) 682 { 683 fput(ictx->file); 684 } 685 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, "IOMMUFD"); 686 687 #define IOMMUFD_FILE_OFFSET(_struct, _filep, _obj) \ 688 .file_offset = (offsetof(_struct, _filep) + \ 689 BUILD_BUG_ON_ZERO(!__same_type( \ 690 struct file *, ((_struct *)NULL)->_filep)) + \ 691 BUILD_BUG_ON_ZERO(offsetof(_struct, _obj))) 692 693 static const struct iommufd_object_ops iommufd_object_ops[] = { 694 [IOMMUFD_OBJ_ACCESS] = { 695 .destroy = iommufd_access_destroy_object, 696 }, 697 [IOMMUFD_OBJ_DEVICE] = { 698 .pre_destroy = iommufd_device_pre_destroy, 699 .destroy = iommufd_device_destroy, 700 }, 701 [IOMMUFD_OBJ_FAULT] = { 702 .destroy = iommufd_fault_destroy, 703 IOMMUFD_FILE_OFFSET(struct iommufd_fault, common.filep, common.obj), 704 }, 705 [IOMMUFD_OBJ_HW_QUEUE] = { 706 .destroy = iommufd_hw_queue_destroy, 707 }, 708 [IOMMUFD_OBJ_HWPT_PAGING] = { 709 .destroy = iommufd_hwpt_paging_destroy, 710 .abort = iommufd_hwpt_paging_abort, 711 }, 712 [IOMMUFD_OBJ_HWPT_NESTED] = { 713 .destroy = iommufd_hwpt_nested_destroy, 714 .abort = iommufd_hwpt_nested_abort, 715 }, 716 [IOMMUFD_OBJ_IOAS] = { 717 .destroy = iommufd_ioas_destroy, 718 }, 719 [IOMMUFD_OBJ_VDEVICE] = { 720 .destroy = iommufd_vdevice_destroy, 721 .abort = iommufd_vdevice_abort, 722 }, 723 [IOMMUFD_OBJ_VEVENTQ] = { 724 .destroy = iommufd_veventq_destroy, 725 .abort = iommufd_veventq_abort, 726 IOMMUFD_FILE_OFFSET(struct iommufd_veventq, common.filep, common.obj), 727 }, 728 [IOMMUFD_OBJ_VIOMMU] = { 729 .destroy = iommufd_viommu_destroy, 730 }, 731 #ifdef CONFIG_IOMMUFD_TEST 732 [IOMMUFD_OBJ_SELFTEST] = { 733 .destroy = iommufd_selftest_destroy, 734 }, 735 #endif 736 }; 737 738 static struct miscdevice iommu_misc_dev = { 739 .minor = MISC_DYNAMIC_MINOR, 740 .name = "iommu", 741 .fops = &iommufd_fops, 742 .nodename = "iommu", 743 .mode = 0660, 744 }; 745 746 static struct miscdevice vfio_misc_dev = { 747 .minor = VFIO_MINOR, 748 .name = "vfio", 749 .fops = &iommufd_fops, 750 .nodename = "vfio/vfio", 751 .mode = 0666, 752 }; 753 754 static int __init iommufd_init(void) 755 { 756 int ret; 757 758 ret = misc_register(&iommu_misc_dev); 759 if (ret) 760 return ret; 761 762 if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) { 763 ret = misc_register(&vfio_misc_dev); 764 if (ret) 765 goto err_misc; 766 } 767 ret = iommufd_test_init(); 768 if (ret) 769 goto err_vfio_misc; 770 return 0; 771 772 err_vfio_misc: 773 if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) 774 misc_deregister(&vfio_misc_dev); 775 err_misc: 776 misc_deregister(&iommu_misc_dev); 777 return ret; 778 } 779 780 static void __exit iommufd_exit(void) 781 { 782 iommufd_test_exit(); 783 if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) 784 misc_deregister(&vfio_misc_dev); 785 misc_deregister(&iommu_misc_dev); 786 } 787 788 module_init(iommufd_init); 789 module_exit(iommufd_exit); 790 791 #if IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) 792 MODULE_ALIAS_MISCDEV(VFIO_MINOR); 793 MODULE_ALIAS("devname:vfio/vfio"); 794 #endif 795 MODULE_IMPORT_NS("IOMMUFD_INTERNAL"); 796 MODULE_IMPORT_NS("IOMMUFD"); 797 MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); 798 MODULE_LICENSE("GPL"); 799