1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 3 */ 4 #include <linux/iommufd.h> 5 #include <linux/slab.h> 6 #include <linux/iommu.h> 7 #include <uapi/linux/iommufd.h> 8 #include "../iommu-priv.h" 9 10 #include "io_pagetable.h" 11 #include "iommufd_private.h" 12 13 static bool allow_unsafe_interrupts; 14 module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); 15 MODULE_PARM_DESC( 16 allow_unsafe_interrupts, 17 "Allow IOMMUFD to bind to devices even if the platform cannot isolate " 18 "the MSI interrupt window. Enabling this is a security weakness."); 19 20 static void iommufd_group_release(struct kref *kref) 21 { 22 struct iommufd_group *igroup = 23 container_of(kref, struct iommufd_group, ref); 24 25 WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); 26 27 xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, 28 NULL, GFP_KERNEL); 29 iommu_group_put(igroup->group); 30 mutex_destroy(&igroup->lock); 31 kfree(igroup); 32 } 33 34 static void iommufd_put_group(struct iommufd_group *group) 35 { 36 kref_put(&group->ref, iommufd_group_release); 37 } 38 39 static bool iommufd_group_try_get(struct iommufd_group *igroup, 40 struct iommu_group *group) 41 { 42 if (!igroup) 43 return false; 44 /* 45 * group ID's cannot be re-used until the group is put back which does 46 * not happen if we could get an igroup pointer under the xa_lock. 47 */ 48 if (WARN_ON(igroup->group != group)) 49 return false; 50 return kref_get_unless_zero(&igroup->ref); 51 } 52 53 /* 54 * iommufd needs to store some more data for each iommu_group, we keep a 55 * parallel xarray indexed by iommu_group id to hold this instead of putting it 56 * in the core structure. To keep things simple the iommufd_group memory is 57 * unique within the iommufd_ctx. This makes it easy to check there are no 58 * memory leaks. 59 */ 60 static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, 61 struct device *dev) 62 { 63 struct iommufd_group *new_igroup; 64 struct iommufd_group *cur_igroup; 65 struct iommufd_group *igroup; 66 struct iommu_group *group; 67 unsigned int id; 68 69 group = iommu_group_get(dev); 70 if (!group) 71 return ERR_PTR(-ENODEV); 72 73 id = iommu_group_id(group); 74 75 xa_lock(&ictx->groups); 76 igroup = xa_load(&ictx->groups, id); 77 if (iommufd_group_try_get(igroup, group)) { 78 xa_unlock(&ictx->groups); 79 iommu_group_put(group); 80 return igroup; 81 } 82 xa_unlock(&ictx->groups); 83 84 new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL); 85 if (!new_igroup) { 86 iommu_group_put(group); 87 return ERR_PTR(-ENOMEM); 88 } 89 90 kref_init(&new_igroup->ref); 91 mutex_init(&new_igroup->lock); 92 INIT_LIST_HEAD(&new_igroup->device_list); 93 new_igroup->sw_msi_start = PHYS_ADDR_MAX; 94 /* group reference moves into new_igroup */ 95 new_igroup->group = group; 96 97 /* 98 * The ictx is not additionally refcounted here becase all objects using 99 * an igroup must put it before their destroy completes. 100 */ 101 new_igroup->ictx = ictx; 102 103 /* 104 * We dropped the lock so igroup is invalid. NULL is a safe and likely 105 * value to assume for the xa_cmpxchg algorithm. 106 */ 107 cur_igroup = NULL; 108 xa_lock(&ictx->groups); 109 while (true) { 110 igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup, 111 GFP_KERNEL); 112 if (xa_is_err(igroup)) { 113 xa_unlock(&ictx->groups); 114 iommufd_put_group(new_igroup); 115 return ERR_PTR(xa_err(igroup)); 116 } 117 118 /* new_group was successfully installed */ 119 if (cur_igroup == igroup) { 120 xa_unlock(&ictx->groups); 121 return new_igroup; 122 } 123 124 /* Check again if the current group is any good */ 125 if (iommufd_group_try_get(igroup, group)) { 126 xa_unlock(&ictx->groups); 127 iommufd_put_group(new_igroup); 128 return igroup; 129 } 130 cur_igroup = igroup; 131 } 132 } 133 134 void iommufd_device_destroy(struct iommufd_object *obj) 135 { 136 struct iommufd_device *idev = 137 container_of(obj, struct iommufd_device, obj); 138 139 iommu_device_release_dma_owner(idev->dev); 140 iommufd_put_group(idev->igroup); 141 if (!iommufd_selftest_is_mock_dev(idev->dev)) 142 iommufd_ctx_put(idev->ictx); 143 } 144 145 /** 146 * iommufd_device_bind - Bind a physical device to an iommu fd 147 * @ictx: iommufd file descriptor 148 * @dev: Pointer to a physical device struct 149 * @id: Output ID number to return to userspace for this device 150 * 151 * A successful bind establishes an ownership over the device and returns 152 * struct iommufd_device pointer, otherwise returns error pointer. 153 * 154 * A driver using this API must set driver_managed_dma and must not touch 155 * the device until this routine succeeds and establishes ownership. 156 * 157 * Binding a PCI device places the entire RID under iommufd control. 158 * 159 * The caller must undo this with iommufd_device_unbind() 160 */ 161 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, 162 struct device *dev, u32 *id) 163 { 164 struct iommufd_device *idev; 165 struct iommufd_group *igroup; 166 int rc; 167 168 /* 169 * iommufd always sets IOMMU_CACHE because we offer no way for userspace 170 * to restore cache coherency. 171 */ 172 if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) 173 return ERR_PTR(-EINVAL); 174 175 igroup = iommufd_get_group(ictx, dev); 176 if (IS_ERR(igroup)) 177 return ERR_CAST(igroup); 178 179 /* 180 * For historical compat with VFIO the insecure interrupt path is 181 * allowed if the module parameter is set. Secure/Isolated means that a 182 * MemWr operation from the device (eg a simple DMA) cannot trigger an 183 * interrupt outside this iommufd context. 184 */ 185 if (!iommufd_selftest_is_mock_dev(dev) && 186 !iommu_group_has_isolated_msi(igroup->group)) { 187 if (!allow_unsafe_interrupts) { 188 rc = -EPERM; 189 goto out_group_put; 190 } 191 192 dev_warn( 193 dev, 194 "MSI interrupts are not secure, they cannot be isolated by the platform. " 195 "Check that platform features like interrupt remapping are enabled. " 196 "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); 197 } 198 199 rc = iommu_device_claim_dma_owner(dev, ictx); 200 if (rc) 201 goto out_group_put; 202 203 idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); 204 if (IS_ERR(idev)) { 205 rc = PTR_ERR(idev); 206 goto out_release_owner; 207 } 208 idev->ictx = ictx; 209 if (!iommufd_selftest_is_mock_dev(dev)) 210 iommufd_ctx_get(ictx); 211 idev->dev = dev; 212 idev->enforce_cache_coherency = 213 device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); 214 /* The calling driver is a user until iommufd_device_unbind() */ 215 refcount_inc(&idev->obj.users); 216 /* igroup refcount moves into iommufd_device */ 217 idev->igroup = igroup; 218 mutex_init(&idev->iopf_lock); 219 220 /* 221 * If the caller fails after this success it must call 222 * iommufd_unbind_device() which is safe since we hold this refcount. 223 * This also means the device is a leaf in the graph and no other object 224 * can take a reference on it. 225 */ 226 iommufd_object_finalize(ictx, &idev->obj); 227 *id = idev->obj.id; 228 return idev; 229 230 out_release_owner: 231 iommu_device_release_dma_owner(dev); 232 out_group_put: 233 iommufd_put_group(igroup); 234 return ERR_PTR(rc); 235 } 236 EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); 237 238 /** 239 * iommufd_ctx_has_group - True if any device within the group is bound 240 * to the ictx 241 * @ictx: iommufd file descriptor 242 * @group: Pointer to a physical iommu_group struct 243 * 244 * True if any device within the group has been bound to this ictx, ex. via 245 * iommufd_device_bind(), therefore implying ictx ownership of the group. 246 */ 247 bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) 248 { 249 struct iommufd_object *obj; 250 unsigned long index; 251 252 if (!ictx || !group) 253 return false; 254 255 xa_lock(&ictx->objects); 256 xa_for_each(&ictx->objects, index, obj) { 257 if (obj->type == IOMMUFD_OBJ_DEVICE && 258 container_of(obj, struct iommufd_device, obj) 259 ->igroup->group == group) { 260 xa_unlock(&ictx->objects); 261 return true; 262 } 263 } 264 xa_unlock(&ictx->objects); 265 return false; 266 } 267 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD); 268 269 /** 270 * iommufd_device_unbind - Undo iommufd_device_bind() 271 * @idev: Device returned by iommufd_device_bind() 272 * 273 * Release the device from iommufd control. The DMA ownership will return back 274 * to unowned with DMA controlled by the DMA API. This invalidates the 275 * iommufd_device pointer, other APIs that consume it must not be called 276 * concurrently. 277 */ 278 void iommufd_device_unbind(struct iommufd_device *idev) 279 { 280 iommufd_object_destroy_user(idev->ictx, &idev->obj); 281 } 282 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); 283 284 struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) 285 { 286 return idev->ictx; 287 } 288 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD); 289 290 u32 iommufd_device_to_id(struct iommufd_device *idev) 291 { 292 return idev->obj.id; 293 } 294 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); 295 296 static int iommufd_group_setup_msi(struct iommufd_group *igroup, 297 struct iommufd_hwpt_paging *hwpt_paging) 298 { 299 phys_addr_t sw_msi_start = igroup->sw_msi_start; 300 int rc; 301 302 /* 303 * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to 304 * call iommu_get_msi_cookie() on its behalf. This is necessary to setup 305 * the MSI window so iommu_dma_prepare_msi() can install pages into our 306 * domain after request_irq(). If it is not done interrupts will not 307 * work on this domain. 308 * 309 * FIXME: This is conceptually broken for iommufd since we want to allow 310 * userspace to change the domains, eg switch from an identity IOAS to a 311 * DMA IOAS. There is currently no way to create a MSI window that 312 * matches what the IRQ layer actually expects in a newly created 313 * domain. 314 */ 315 if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) { 316 rc = iommu_get_msi_cookie(hwpt_paging->common.domain, 317 sw_msi_start); 318 if (rc) 319 return rc; 320 321 /* 322 * iommu_get_msi_cookie() can only be called once per domain, 323 * it returns -EBUSY on later calls. 324 */ 325 hwpt_paging->msi_cookie = true; 326 } 327 return 0; 328 } 329 330 static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging, 331 struct iommufd_device *idev) 332 { 333 int rc; 334 335 lockdep_assert_held(&idev->igroup->lock); 336 337 rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, 338 idev->dev, 339 &idev->igroup->sw_msi_start); 340 if (rc) 341 return rc; 342 343 if (list_empty(&idev->igroup->device_list)) { 344 rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging); 345 if (rc) { 346 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, 347 idev->dev); 348 return rc; 349 } 350 } 351 return 0; 352 } 353 354 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, 355 struct iommufd_device *idev) 356 { 357 int rc; 358 359 mutex_lock(&idev->igroup->lock); 360 361 if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { 362 rc = -EINVAL; 363 goto err_unlock; 364 } 365 366 if (hwpt_is_paging(hwpt)) { 367 rc = iommufd_hwpt_paging_attach(to_hwpt_paging(hwpt), idev); 368 if (rc) 369 goto err_unlock; 370 } 371 372 /* 373 * Only attach to the group once for the first device that is in the 374 * group. All the other devices will follow this attachment. The user 375 * should attach every device individually to the hwpt as the per-device 376 * reserved regions are only updated during individual device 377 * attachment. 378 */ 379 if (list_empty(&idev->igroup->device_list)) { 380 rc = iommufd_hwpt_attach_device(hwpt, idev); 381 if (rc) 382 goto err_unresv; 383 idev->igroup->hwpt = hwpt; 384 } 385 refcount_inc(&hwpt->obj.users); 386 list_add_tail(&idev->group_item, &idev->igroup->device_list); 387 mutex_unlock(&idev->igroup->lock); 388 return 0; 389 err_unresv: 390 if (hwpt_is_paging(hwpt)) 391 iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, 392 idev->dev); 393 err_unlock: 394 mutex_unlock(&idev->igroup->lock); 395 return rc; 396 } 397 398 struct iommufd_hw_pagetable * 399 iommufd_hw_pagetable_detach(struct iommufd_device *idev) 400 { 401 struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; 402 403 mutex_lock(&idev->igroup->lock); 404 list_del(&idev->group_item); 405 if (list_empty(&idev->igroup->device_list)) { 406 iommufd_hwpt_detach_device(hwpt, idev); 407 idev->igroup->hwpt = NULL; 408 } 409 if (hwpt_is_paging(hwpt)) 410 iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, 411 idev->dev); 412 mutex_unlock(&idev->igroup->lock); 413 414 /* Caller must destroy hwpt */ 415 return hwpt; 416 } 417 418 static struct iommufd_hw_pagetable * 419 iommufd_device_do_attach(struct iommufd_device *idev, 420 struct iommufd_hw_pagetable *hwpt) 421 { 422 int rc; 423 424 rc = iommufd_hw_pagetable_attach(hwpt, idev); 425 if (rc) 426 return ERR_PTR(rc); 427 return NULL; 428 } 429 430 static void 431 iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, 432 struct iommufd_hwpt_paging *hwpt_paging) 433 { 434 struct iommufd_device *cur; 435 436 lockdep_assert_held(&igroup->lock); 437 438 list_for_each_entry(cur, &igroup->device_list, group_item) 439 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); 440 } 441 442 static int 443 iommufd_group_do_replace_paging(struct iommufd_group *igroup, 444 struct iommufd_hwpt_paging *hwpt_paging) 445 { 446 struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt; 447 struct iommufd_device *cur; 448 int rc; 449 450 lockdep_assert_held(&igroup->lock); 451 452 if (!hwpt_is_paging(old_hwpt) || 453 hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) { 454 list_for_each_entry(cur, &igroup->device_list, group_item) { 455 rc = iopt_table_enforce_dev_resv_regions( 456 &hwpt_paging->ioas->iopt, cur->dev, NULL); 457 if (rc) 458 goto err_unresv; 459 } 460 } 461 462 rc = iommufd_group_setup_msi(igroup, hwpt_paging); 463 if (rc) 464 goto err_unresv; 465 return 0; 466 467 err_unresv: 468 iommufd_group_remove_reserved_iova(igroup, hwpt_paging); 469 return rc; 470 } 471 472 static struct iommufd_hw_pagetable * 473 iommufd_device_do_replace(struct iommufd_device *idev, 474 struct iommufd_hw_pagetable *hwpt) 475 { 476 struct iommufd_group *igroup = idev->igroup; 477 struct iommufd_hw_pagetable *old_hwpt; 478 unsigned int num_devices; 479 int rc; 480 481 mutex_lock(&idev->igroup->lock); 482 483 if (igroup->hwpt == NULL) { 484 rc = -EINVAL; 485 goto err_unlock; 486 } 487 488 if (hwpt == igroup->hwpt) { 489 mutex_unlock(&idev->igroup->lock); 490 return NULL; 491 } 492 493 old_hwpt = igroup->hwpt; 494 if (hwpt_is_paging(hwpt)) { 495 rc = iommufd_group_do_replace_paging(igroup, 496 to_hwpt_paging(hwpt)); 497 if (rc) 498 goto err_unlock; 499 } 500 501 rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt); 502 if (rc) 503 goto err_unresv; 504 505 if (hwpt_is_paging(old_hwpt) && 506 (!hwpt_is_paging(hwpt) || 507 to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas)) 508 iommufd_group_remove_reserved_iova(igroup, 509 to_hwpt_paging(old_hwpt)); 510 511 igroup->hwpt = hwpt; 512 513 num_devices = list_count_nodes(&igroup->device_list); 514 /* 515 * Move the refcounts held by the device_list to the new hwpt. Retain a 516 * refcount for this thread as the caller will free it. 517 */ 518 refcount_add(num_devices, &hwpt->obj.users); 519 if (num_devices > 1) 520 WARN_ON(refcount_sub_and_test(num_devices - 1, 521 &old_hwpt->obj.users)); 522 mutex_unlock(&idev->igroup->lock); 523 524 /* Caller must destroy old_hwpt */ 525 return old_hwpt; 526 err_unresv: 527 if (hwpt_is_paging(hwpt)) 528 iommufd_group_remove_reserved_iova(igroup, 529 to_hwpt_paging(old_hwpt)); 530 err_unlock: 531 mutex_unlock(&idev->igroup->lock); 532 return ERR_PTR(rc); 533 } 534 535 typedef struct iommufd_hw_pagetable *(*attach_fn)( 536 struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); 537 538 /* 539 * When automatically managing the domains we search for a compatible domain in 540 * the iopt and if one is found use it, otherwise create a new domain. 541 * Automatic domain selection will never pick a manually created domain. 542 */ 543 static struct iommufd_hw_pagetable * 544 iommufd_device_auto_get_domain(struct iommufd_device *idev, 545 struct iommufd_ioas *ioas, u32 *pt_id, 546 attach_fn do_attach) 547 { 548 /* 549 * iommufd_hw_pagetable_attach() is called by 550 * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as 551 * iommufd_device_do_attach(). So if we are in this mode then we prefer 552 * to use the immediate_attach path as it supports drivers that can't 553 * directly allocate a domain. 554 */ 555 bool immediate_attach = do_attach == iommufd_device_do_attach; 556 struct iommufd_hw_pagetable *destroy_hwpt; 557 struct iommufd_hwpt_paging *hwpt_paging; 558 struct iommufd_hw_pagetable *hwpt; 559 560 /* 561 * There is no differentiation when domains are allocated, so any domain 562 * that is willing to attach to the device is interchangeable with any 563 * other. 564 */ 565 mutex_lock(&ioas->mutex); 566 list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) { 567 if (!hwpt_paging->auto_domain) 568 continue; 569 570 hwpt = &hwpt_paging->common; 571 if (!iommufd_lock_obj(&hwpt->obj)) 572 continue; 573 destroy_hwpt = (*do_attach)(idev, hwpt); 574 if (IS_ERR(destroy_hwpt)) { 575 iommufd_put_object(idev->ictx, &hwpt->obj); 576 /* 577 * -EINVAL means the domain is incompatible with the 578 * device. Other error codes should propagate to 579 * userspace as failure. Success means the domain is 580 * attached. 581 */ 582 if (PTR_ERR(destroy_hwpt) == -EINVAL) 583 continue; 584 goto out_unlock; 585 } 586 *pt_id = hwpt->obj.id; 587 iommufd_put_object(idev->ictx, &hwpt->obj); 588 goto out_unlock; 589 } 590 591 hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0, 592 immediate_attach, NULL); 593 if (IS_ERR(hwpt_paging)) { 594 destroy_hwpt = ERR_CAST(hwpt_paging); 595 goto out_unlock; 596 } 597 hwpt = &hwpt_paging->common; 598 599 if (!immediate_attach) { 600 destroy_hwpt = (*do_attach)(idev, hwpt); 601 if (IS_ERR(destroy_hwpt)) 602 goto out_abort; 603 } else { 604 destroy_hwpt = NULL; 605 } 606 607 hwpt_paging->auto_domain = true; 608 *pt_id = hwpt->obj.id; 609 610 iommufd_object_finalize(idev->ictx, &hwpt->obj); 611 mutex_unlock(&ioas->mutex); 612 return destroy_hwpt; 613 614 out_abort: 615 iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); 616 out_unlock: 617 mutex_unlock(&ioas->mutex); 618 return destroy_hwpt; 619 } 620 621 static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, 622 attach_fn do_attach) 623 { 624 struct iommufd_hw_pagetable *destroy_hwpt; 625 struct iommufd_object *pt_obj; 626 627 pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); 628 if (IS_ERR(pt_obj)) 629 return PTR_ERR(pt_obj); 630 631 switch (pt_obj->type) { 632 case IOMMUFD_OBJ_HWPT_NESTED: 633 case IOMMUFD_OBJ_HWPT_PAGING: { 634 struct iommufd_hw_pagetable *hwpt = 635 container_of(pt_obj, struct iommufd_hw_pagetable, obj); 636 637 destroy_hwpt = (*do_attach)(idev, hwpt); 638 if (IS_ERR(destroy_hwpt)) 639 goto out_put_pt_obj; 640 break; 641 } 642 case IOMMUFD_OBJ_IOAS: { 643 struct iommufd_ioas *ioas = 644 container_of(pt_obj, struct iommufd_ioas, obj); 645 646 destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, 647 do_attach); 648 if (IS_ERR(destroy_hwpt)) 649 goto out_put_pt_obj; 650 break; 651 } 652 default: 653 destroy_hwpt = ERR_PTR(-EINVAL); 654 goto out_put_pt_obj; 655 } 656 iommufd_put_object(idev->ictx, pt_obj); 657 658 /* This destruction has to be after we unlock everything */ 659 if (destroy_hwpt) 660 iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt); 661 return 0; 662 663 out_put_pt_obj: 664 iommufd_put_object(idev->ictx, pt_obj); 665 return PTR_ERR(destroy_hwpt); 666 } 667 668 /** 669 * iommufd_device_attach - Connect a device to an iommu_domain 670 * @idev: device to attach 671 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING 672 * Output the IOMMUFD_OBJ_HWPT_PAGING ID 673 * 674 * This connects the device to an iommu_domain, either automatically or manually 675 * selected. Once this completes the device could do DMA. 676 * 677 * The caller should return the resulting pt_id back to userspace. 678 * This function is undone by calling iommufd_device_detach(). 679 */ 680 int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) 681 { 682 int rc; 683 684 rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach); 685 if (rc) 686 return rc; 687 688 /* 689 * Pairs with iommufd_device_detach() - catches caller bugs attempting 690 * to destroy a device with an attachment. 691 */ 692 refcount_inc(&idev->obj.users); 693 return 0; 694 } 695 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); 696 697 /** 698 * iommufd_device_replace - Change the device's iommu_domain 699 * @idev: device to change 700 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING 701 * Output the IOMMUFD_OBJ_HWPT_PAGING ID 702 * 703 * This is the same as:: 704 * 705 * iommufd_device_detach(); 706 * iommufd_device_attach(); 707 * 708 * If it fails then no change is made to the attachment. The iommu driver may 709 * implement this so there is no disruption in translation. This can only be 710 * called if iommufd_device_attach() has already succeeded. 711 */ 712 int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) 713 { 714 return iommufd_device_change_pt(idev, pt_id, 715 &iommufd_device_do_replace); 716 } 717 EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD); 718 719 /** 720 * iommufd_device_detach - Disconnect a device to an iommu_domain 721 * @idev: device to detach 722 * 723 * Undo iommufd_device_attach(). This disconnects the idev from the previously 724 * attached pt_id. The device returns back to a blocked DMA translation. 725 */ 726 void iommufd_device_detach(struct iommufd_device *idev) 727 { 728 struct iommufd_hw_pagetable *hwpt; 729 730 hwpt = iommufd_hw_pagetable_detach(idev); 731 iommufd_hw_pagetable_put(idev->ictx, hwpt); 732 refcount_dec(&idev->obj.users); 733 } 734 EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); 735 736 /* 737 * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at 738 * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should 739 * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas. 740 */ 741 static int iommufd_access_change_ioas(struct iommufd_access *access, 742 struct iommufd_ioas *new_ioas) 743 { 744 u32 iopt_access_list_id = access->iopt_access_list_id; 745 struct iommufd_ioas *cur_ioas = access->ioas; 746 int rc; 747 748 lockdep_assert_held(&access->ioas_lock); 749 750 /* We are racing with a concurrent detach, bail */ 751 if (cur_ioas != access->ioas_unpin) 752 return -EBUSY; 753 754 if (cur_ioas == new_ioas) 755 return 0; 756 757 /* 758 * Set ioas to NULL to block any further iommufd_access_pin_pages(). 759 * iommufd_access_unpin_pages() can continue using access->ioas_unpin. 760 */ 761 access->ioas = NULL; 762 763 if (new_ioas) { 764 rc = iopt_add_access(&new_ioas->iopt, access); 765 if (rc) { 766 access->ioas = cur_ioas; 767 return rc; 768 } 769 refcount_inc(&new_ioas->obj.users); 770 } 771 772 if (cur_ioas) { 773 if (access->ops->unmap) { 774 mutex_unlock(&access->ioas_lock); 775 access->ops->unmap(access->data, 0, ULONG_MAX); 776 mutex_lock(&access->ioas_lock); 777 } 778 iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id); 779 refcount_dec(&cur_ioas->obj.users); 780 } 781 782 access->ioas = new_ioas; 783 access->ioas_unpin = new_ioas; 784 785 return 0; 786 } 787 788 static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) 789 { 790 struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id); 791 int rc; 792 793 if (IS_ERR(ioas)) 794 return PTR_ERR(ioas); 795 rc = iommufd_access_change_ioas(access, ioas); 796 iommufd_put_object(access->ictx, &ioas->obj); 797 return rc; 798 } 799 800 void iommufd_access_destroy_object(struct iommufd_object *obj) 801 { 802 struct iommufd_access *access = 803 container_of(obj, struct iommufd_access, obj); 804 805 mutex_lock(&access->ioas_lock); 806 if (access->ioas) 807 WARN_ON(iommufd_access_change_ioas(access, NULL)); 808 mutex_unlock(&access->ioas_lock); 809 iommufd_ctx_put(access->ictx); 810 } 811 812 /** 813 * iommufd_access_create - Create an iommufd_access 814 * @ictx: iommufd file descriptor 815 * @ops: Driver's ops to associate with the access 816 * @data: Opaque data to pass into ops functions 817 * @id: Output ID number to return to userspace for this access 818 * 819 * An iommufd_access allows a driver to read/write to the IOAS without using 820 * DMA. The underlying CPU memory can be accessed using the 821 * iommufd_access_pin_pages() or iommufd_access_rw() functions. 822 * 823 * The provided ops are required to use iommufd_access_pin_pages(). 824 */ 825 struct iommufd_access * 826 iommufd_access_create(struct iommufd_ctx *ictx, 827 const struct iommufd_access_ops *ops, void *data, u32 *id) 828 { 829 struct iommufd_access *access; 830 831 /* 832 * There is no uAPI for the access object, but to keep things symmetric 833 * use the object infrastructure anyhow. 834 */ 835 access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); 836 if (IS_ERR(access)) 837 return access; 838 839 access->data = data; 840 access->ops = ops; 841 842 if (ops->needs_pin_pages) 843 access->iova_alignment = PAGE_SIZE; 844 else 845 access->iova_alignment = 1; 846 847 /* The calling driver is a user until iommufd_access_destroy() */ 848 refcount_inc(&access->obj.users); 849 access->ictx = ictx; 850 iommufd_ctx_get(ictx); 851 iommufd_object_finalize(ictx, &access->obj); 852 *id = access->obj.id; 853 mutex_init(&access->ioas_lock); 854 return access; 855 } 856 EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); 857 858 /** 859 * iommufd_access_destroy - Destroy an iommufd_access 860 * @access: The access to destroy 861 * 862 * The caller must stop using the access before destroying it. 863 */ 864 void iommufd_access_destroy(struct iommufd_access *access) 865 { 866 iommufd_object_destroy_user(access->ictx, &access->obj); 867 } 868 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); 869 870 void iommufd_access_detach(struct iommufd_access *access) 871 { 872 mutex_lock(&access->ioas_lock); 873 if (WARN_ON(!access->ioas)) { 874 mutex_unlock(&access->ioas_lock); 875 return; 876 } 877 WARN_ON(iommufd_access_change_ioas(access, NULL)); 878 mutex_unlock(&access->ioas_lock); 879 } 880 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); 881 882 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) 883 { 884 int rc; 885 886 mutex_lock(&access->ioas_lock); 887 if (WARN_ON(access->ioas)) { 888 mutex_unlock(&access->ioas_lock); 889 return -EINVAL; 890 } 891 892 rc = iommufd_access_change_ioas_id(access, ioas_id); 893 mutex_unlock(&access->ioas_lock); 894 return rc; 895 } 896 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); 897 898 int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) 899 { 900 int rc; 901 902 mutex_lock(&access->ioas_lock); 903 if (!access->ioas) { 904 mutex_unlock(&access->ioas_lock); 905 return -ENOENT; 906 } 907 rc = iommufd_access_change_ioas_id(access, ioas_id); 908 mutex_unlock(&access->ioas_lock); 909 return rc; 910 } 911 EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD); 912 913 /** 914 * iommufd_access_notify_unmap - Notify users of an iopt to stop using it 915 * @iopt: iopt to work on 916 * @iova: Starting iova in the iopt 917 * @length: Number of bytes 918 * 919 * After this function returns there should be no users attached to the pages 920 * linked to this iopt that intersect with iova,length. Anyone that has attached 921 * a user through iopt_access_pages() needs to detach it through 922 * iommufd_access_unpin_pages() before this function returns. 923 * 924 * iommufd_access_destroy() will wait for any outstanding unmap callback to 925 * complete. Once iommufd_access_destroy() no unmap ops are running or will 926 * run in the future. Due to this a driver must not create locking that prevents 927 * unmap to complete while iommufd_access_destroy() is running. 928 */ 929 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, 930 unsigned long length) 931 { 932 struct iommufd_ioas *ioas = 933 container_of(iopt, struct iommufd_ioas, iopt); 934 struct iommufd_access *access; 935 unsigned long index; 936 937 xa_lock(&ioas->iopt.access_list); 938 xa_for_each(&ioas->iopt.access_list, index, access) { 939 if (!iommufd_lock_obj(&access->obj)) 940 continue; 941 xa_unlock(&ioas->iopt.access_list); 942 943 access->ops->unmap(access->data, iova, length); 944 945 iommufd_put_object(access->ictx, &access->obj); 946 xa_lock(&ioas->iopt.access_list); 947 } 948 xa_unlock(&ioas->iopt.access_list); 949 } 950 951 /** 952 * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages 953 * @access: IOAS access to act on 954 * @iova: Starting IOVA 955 * @length: Number of bytes to access 956 * 957 * Return the struct page's. The caller must stop accessing them before calling 958 * this. The iova/length must exactly match the one provided to access_pages. 959 */ 960 void iommufd_access_unpin_pages(struct iommufd_access *access, 961 unsigned long iova, unsigned long length) 962 { 963 struct iopt_area_contig_iter iter; 964 struct io_pagetable *iopt; 965 unsigned long last_iova; 966 struct iopt_area *area; 967 968 if (WARN_ON(!length) || 969 WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) 970 return; 971 972 mutex_lock(&access->ioas_lock); 973 /* 974 * The driver must be doing something wrong if it calls this before an 975 * iommufd_access_attach() or after an iommufd_access_detach(). 976 */ 977 if (WARN_ON(!access->ioas_unpin)) { 978 mutex_unlock(&access->ioas_lock); 979 return; 980 } 981 iopt = &access->ioas_unpin->iopt; 982 983 down_read(&iopt->iova_rwsem); 984 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) 985 iopt_area_remove_access( 986 area, iopt_area_iova_to_index(area, iter.cur_iova), 987 iopt_area_iova_to_index( 988 area, 989 min(last_iova, iopt_area_last_iova(area)))); 990 WARN_ON(!iopt_area_contig_done(&iter)); 991 up_read(&iopt->iova_rwsem); 992 mutex_unlock(&access->ioas_lock); 993 } 994 EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); 995 996 static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) 997 { 998 if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE) 999 return false; 1000 1001 if (!iopt_area_contig_done(iter) && 1002 (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) % 1003 PAGE_SIZE) != (PAGE_SIZE - 1)) 1004 return false; 1005 return true; 1006 } 1007 1008 static bool check_area_prot(struct iopt_area *area, unsigned int flags) 1009 { 1010 if (flags & IOMMUFD_ACCESS_RW_WRITE) 1011 return area->iommu_prot & IOMMU_WRITE; 1012 return area->iommu_prot & IOMMU_READ; 1013 } 1014 1015 /** 1016 * iommufd_access_pin_pages() - Return a list of pages under the iova 1017 * @access: IOAS access to act on 1018 * @iova: Starting IOVA 1019 * @length: Number of bytes to access 1020 * @out_pages: Output page list 1021 * @flags: IOPMMUFD_ACCESS_RW_* flags 1022 * 1023 * Reads @length bytes starting at iova and returns the struct page * pointers. 1024 * These can be kmap'd by the caller for CPU access. 1025 * 1026 * The caller must perform iommufd_access_unpin_pages() when done to balance 1027 * this. 1028 * 1029 * This API always requires a page aligned iova. This happens naturally if the 1030 * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However 1031 * smaller alignments have corner cases where this API can fail on otherwise 1032 * aligned iova. 1033 */ 1034 int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, 1035 unsigned long length, struct page **out_pages, 1036 unsigned int flags) 1037 { 1038 struct iopt_area_contig_iter iter; 1039 struct io_pagetable *iopt; 1040 unsigned long last_iova; 1041 struct iopt_area *area; 1042 int rc; 1043 1044 /* Driver's ops don't support pin_pages */ 1045 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 1046 WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap)) 1047 return -EINVAL; 1048 1049 if (!length) 1050 return -EINVAL; 1051 if (check_add_overflow(iova, length - 1, &last_iova)) 1052 return -EOVERFLOW; 1053 1054 mutex_lock(&access->ioas_lock); 1055 if (!access->ioas) { 1056 mutex_unlock(&access->ioas_lock); 1057 return -ENOENT; 1058 } 1059 iopt = &access->ioas->iopt; 1060 1061 down_read(&iopt->iova_rwsem); 1062 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 1063 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 1064 unsigned long last_index = iopt_area_iova_to_index(area, last); 1065 unsigned long index = 1066 iopt_area_iova_to_index(area, iter.cur_iova); 1067 1068 if (area->prevent_access || 1069 !iopt_area_contig_is_aligned(&iter)) { 1070 rc = -EINVAL; 1071 goto err_remove; 1072 } 1073 1074 if (!check_area_prot(area, flags)) { 1075 rc = -EPERM; 1076 goto err_remove; 1077 } 1078 1079 rc = iopt_area_add_access(area, index, last_index, out_pages, 1080 flags); 1081 if (rc) 1082 goto err_remove; 1083 out_pages += last_index - index + 1; 1084 } 1085 if (!iopt_area_contig_done(&iter)) { 1086 rc = -ENOENT; 1087 goto err_remove; 1088 } 1089 1090 up_read(&iopt->iova_rwsem); 1091 mutex_unlock(&access->ioas_lock); 1092 return 0; 1093 1094 err_remove: 1095 if (iova < iter.cur_iova) { 1096 last_iova = iter.cur_iova - 1; 1097 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) 1098 iopt_area_remove_access( 1099 area, 1100 iopt_area_iova_to_index(area, iter.cur_iova), 1101 iopt_area_iova_to_index( 1102 area, min(last_iova, 1103 iopt_area_last_iova(area)))); 1104 } 1105 up_read(&iopt->iova_rwsem); 1106 mutex_unlock(&access->ioas_lock); 1107 return rc; 1108 } 1109 EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); 1110 1111 /** 1112 * iommufd_access_rw - Read or write data under the iova 1113 * @access: IOAS access to act on 1114 * @iova: Starting IOVA 1115 * @data: Kernel buffer to copy to/from 1116 * @length: Number of bytes to access 1117 * @flags: IOMMUFD_ACCESS_RW_* flags 1118 * 1119 * Copy kernel to/from data into the range given by IOVA/length. If flags 1120 * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized 1121 * by changing it into copy_to/from_user(). 1122 */ 1123 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, 1124 void *data, size_t length, unsigned int flags) 1125 { 1126 struct iopt_area_contig_iter iter; 1127 struct io_pagetable *iopt; 1128 struct iopt_area *area; 1129 unsigned long last_iova; 1130 int rc; 1131 1132 if (!length) 1133 return -EINVAL; 1134 if (check_add_overflow(iova, length - 1, &last_iova)) 1135 return -EOVERFLOW; 1136 1137 mutex_lock(&access->ioas_lock); 1138 if (!access->ioas) { 1139 mutex_unlock(&access->ioas_lock); 1140 return -ENOENT; 1141 } 1142 iopt = &access->ioas->iopt; 1143 1144 down_read(&iopt->iova_rwsem); 1145 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 1146 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 1147 unsigned long bytes = (last - iter.cur_iova) + 1; 1148 1149 if (area->prevent_access) { 1150 rc = -EINVAL; 1151 goto err_out; 1152 } 1153 1154 if (!check_area_prot(area, flags)) { 1155 rc = -EPERM; 1156 goto err_out; 1157 } 1158 1159 rc = iopt_pages_rw_access( 1160 area->pages, iopt_area_start_byte(area, iter.cur_iova), 1161 data, bytes, flags); 1162 if (rc) 1163 goto err_out; 1164 data += bytes; 1165 } 1166 if (!iopt_area_contig_done(&iter)) 1167 rc = -ENOENT; 1168 err_out: 1169 up_read(&iopt->iova_rwsem); 1170 mutex_unlock(&access->ioas_lock); 1171 return rc; 1172 } 1173 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); 1174 1175 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) 1176 { 1177 struct iommu_hw_info *cmd = ucmd->cmd; 1178 void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr); 1179 const struct iommu_ops *ops; 1180 struct iommufd_device *idev; 1181 unsigned int data_len; 1182 unsigned int copy_len; 1183 void *data; 1184 int rc; 1185 1186 if (cmd->flags || cmd->__reserved) 1187 return -EOPNOTSUPP; 1188 1189 idev = iommufd_get_device(ucmd, cmd->dev_id); 1190 if (IS_ERR(idev)) 1191 return PTR_ERR(idev); 1192 1193 ops = dev_iommu_ops(idev->dev); 1194 if (ops->hw_info) { 1195 data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type); 1196 if (IS_ERR(data)) { 1197 rc = PTR_ERR(data); 1198 goto out_put; 1199 } 1200 1201 /* 1202 * drivers that have hw_info callback should have a unique 1203 * iommu_hw_info_type. 1204 */ 1205 if (WARN_ON_ONCE(cmd->out_data_type == 1206 IOMMU_HW_INFO_TYPE_NONE)) { 1207 rc = -ENODEV; 1208 goto out_free; 1209 } 1210 } else { 1211 cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE; 1212 data_len = 0; 1213 data = NULL; 1214 } 1215 1216 copy_len = min(cmd->data_len, data_len); 1217 if (copy_to_user(user_ptr, data, copy_len)) { 1218 rc = -EFAULT; 1219 goto out_free; 1220 } 1221 1222 /* 1223 * Zero the trailing bytes if the user buffer is bigger than the 1224 * data size kernel actually has. 1225 */ 1226 if (copy_len < cmd->data_len) { 1227 if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) { 1228 rc = -EFAULT; 1229 goto out_free; 1230 } 1231 } 1232 1233 /* 1234 * We return the length the kernel supports so userspace may know what 1235 * the kernel capability is. It could be larger than the input buffer. 1236 */ 1237 cmd->data_len = data_len; 1238 1239 cmd->out_capabilities = 0; 1240 if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) 1241 cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; 1242 1243 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 1244 out_free: 1245 kfree(data); 1246 out_put: 1247 iommufd_put_object(ucmd->ictx, &idev->obj); 1248 return rc; 1249 } 1250