1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 3 */ 4 #include <linux/iommu.h> 5 #include <linux/iommufd.h> 6 #include <linux/pci-ats.h> 7 #include <linux/slab.h> 8 #include <uapi/linux/iommufd.h> 9 10 #include "../iommu-priv.h" 11 #include "io_pagetable.h" 12 #include "iommufd_private.h" 13 14 static bool allow_unsafe_interrupts; 15 module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); 16 MODULE_PARM_DESC( 17 allow_unsafe_interrupts, 18 "Allow IOMMUFD to bind to devices even if the platform cannot isolate " 19 "the MSI interrupt window. Enabling this is a security weakness."); 20 21 struct iommufd_attach { 22 struct iommufd_hw_pagetable *hwpt; 23 struct xarray device_array; 24 }; 25 26 static void iommufd_group_release(struct kref *kref) 27 { 28 struct iommufd_group *igroup = 29 container_of(kref, struct iommufd_group, ref); 30 31 WARN_ON(!xa_empty(&igroup->pasid_attach)); 32 33 xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, 34 NULL, GFP_KERNEL); 35 iommu_group_put(igroup->group); 36 mutex_destroy(&igroup->lock); 37 kfree(igroup); 38 } 39 40 static void iommufd_put_group(struct iommufd_group *group) 41 { 42 kref_put(&group->ref, iommufd_group_release); 43 } 44 45 static bool iommufd_group_try_get(struct iommufd_group *igroup, 46 struct iommu_group *group) 47 { 48 if (!igroup) 49 return false; 50 /* 51 * group ID's cannot be re-used until the group is put back which does 52 * not happen if we could get an igroup pointer under the xa_lock. 53 */ 54 if (WARN_ON(igroup->group != group)) 55 return false; 56 return kref_get_unless_zero(&igroup->ref); 57 } 58 59 /* 60 * iommufd needs to store some more data for each iommu_group, we keep a 61 * parallel xarray indexed by iommu_group id to hold this instead of putting it 62 * in the core structure. To keep things simple the iommufd_group memory is 63 * unique within the iommufd_ctx. This makes it easy to check there are no 64 * memory leaks. 65 */ 66 static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, 67 struct device *dev) 68 { 69 struct iommufd_group *new_igroup; 70 struct iommufd_group *cur_igroup; 71 struct iommufd_group *igroup; 72 struct iommu_group *group; 73 unsigned int id; 74 75 group = iommu_group_get(dev); 76 if (!group) 77 return ERR_PTR(-ENODEV); 78 79 id = iommu_group_id(group); 80 81 xa_lock(&ictx->groups); 82 igroup = xa_load(&ictx->groups, id); 83 if (iommufd_group_try_get(igroup, group)) { 84 xa_unlock(&ictx->groups); 85 iommu_group_put(group); 86 return igroup; 87 } 88 xa_unlock(&ictx->groups); 89 90 new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL); 91 if (!new_igroup) { 92 iommu_group_put(group); 93 return ERR_PTR(-ENOMEM); 94 } 95 96 kref_init(&new_igroup->ref); 97 mutex_init(&new_igroup->lock); 98 xa_init(&new_igroup->pasid_attach); 99 new_igroup->sw_msi_start = PHYS_ADDR_MAX; 100 /* group reference moves into new_igroup */ 101 new_igroup->group = group; 102 103 /* 104 * The ictx is not additionally refcounted here becase all objects using 105 * an igroup must put it before their destroy completes. 106 */ 107 new_igroup->ictx = ictx; 108 109 /* 110 * We dropped the lock so igroup is invalid. NULL is a safe and likely 111 * value to assume for the xa_cmpxchg algorithm. 112 */ 113 cur_igroup = NULL; 114 xa_lock(&ictx->groups); 115 while (true) { 116 igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup, 117 GFP_KERNEL); 118 if (xa_is_err(igroup)) { 119 xa_unlock(&ictx->groups); 120 iommufd_put_group(new_igroup); 121 return ERR_PTR(xa_err(igroup)); 122 } 123 124 /* new_group was successfully installed */ 125 if (cur_igroup == igroup) { 126 xa_unlock(&ictx->groups); 127 return new_igroup; 128 } 129 130 /* Check again if the current group is any good */ 131 if (iommufd_group_try_get(igroup, group)) { 132 xa_unlock(&ictx->groups); 133 iommufd_put_group(new_igroup); 134 return igroup; 135 } 136 cur_igroup = igroup; 137 } 138 } 139 140 void iommufd_device_destroy(struct iommufd_object *obj) 141 { 142 struct iommufd_device *idev = 143 container_of(obj, struct iommufd_device, obj); 144 145 iommu_device_release_dma_owner(idev->dev); 146 iommufd_put_group(idev->igroup); 147 if (!iommufd_selftest_is_mock_dev(idev->dev)) 148 iommufd_ctx_put(idev->ictx); 149 } 150 151 /** 152 * iommufd_device_bind - Bind a physical device to an iommu fd 153 * @ictx: iommufd file descriptor 154 * @dev: Pointer to a physical device struct 155 * @id: Output ID number to return to userspace for this device 156 * 157 * A successful bind establishes an ownership over the device and returns 158 * struct iommufd_device pointer, otherwise returns error pointer. 159 * 160 * A driver using this API must set driver_managed_dma and must not touch 161 * the device until this routine succeeds and establishes ownership. 162 * 163 * Binding a PCI device places the entire RID under iommufd control. 164 * 165 * The caller must undo this with iommufd_device_unbind() 166 */ 167 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, 168 struct device *dev, u32 *id) 169 { 170 struct iommufd_device *idev; 171 struct iommufd_group *igroup; 172 int rc; 173 174 /* 175 * iommufd always sets IOMMU_CACHE because we offer no way for userspace 176 * to restore cache coherency. 177 */ 178 if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) 179 return ERR_PTR(-EINVAL); 180 181 igroup = iommufd_get_group(ictx, dev); 182 if (IS_ERR(igroup)) 183 return ERR_CAST(igroup); 184 185 /* 186 * For historical compat with VFIO the insecure interrupt path is 187 * allowed if the module parameter is set. Secure/Isolated means that a 188 * MemWr operation from the device (eg a simple DMA) cannot trigger an 189 * interrupt outside this iommufd context. 190 */ 191 if (!iommufd_selftest_is_mock_dev(dev) && 192 !iommu_group_has_isolated_msi(igroup->group)) { 193 if (!allow_unsafe_interrupts) { 194 rc = -EPERM; 195 goto out_group_put; 196 } 197 198 dev_warn( 199 dev, 200 "MSI interrupts are not secure, they cannot be isolated by the platform. " 201 "Check that platform features like interrupt remapping are enabled. " 202 "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); 203 } 204 205 rc = iommu_device_claim_dma_owner(dev, ictx); 206 if (rc) 207 goto out_group_put; 208 209 idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); 210 if (IS_ERR(idev)) { 211 rc = PTR_ERR(idev); 212 goto out_release_owner; 213 } 214 idev->ictx = ictx; 215 if (!iommufd_selftest_is_mock_dev(dev)) 216 iommufd_ctx_get(ictx); 217 idev->dev = dev; 218 idev->enforce_cache_coherency = 219 device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); 220 /* The calling driver is a user until iommufd_device_unbind() */ 221 refcount_inc(&idev->obj.users); 222 /* igroup refcount moves into iommufd_device */ 223 idev->igroup = igroup; 224 mutex_init(&idev->iopf_lock); 225 226 /* 227 * If the caller fails after this success it must call 228 * iommufd_unbind_device() which is safe since we hold this refcount. 229 * This also means the device is a leaf in the graph and no other object 230 * can take a reference on it. 231 */ 232 iommufd_object_finalize(ictx, &idev->obj); 233 *id = idev->obj.id; 234 return idev; 235 236 out_release_owner: 237 iommu_device_release_dma_owner(dev); 238 out_group_put: 239 iommufd_put_group(igroup); 240 return ERR_PTR(rc); 241 } 242 EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, "IOMMUFD"); 243 244 /** 245 * iommufd_ctx_has_group - True if any device within the group is bound 246 * to the ictx 247 * @ictx: iommufd file descriptor 248 * @group: Pointer to a physical iommu_group struct 249 * 250 * True if any device within the group has been bound to this ictx, ex. via 251 * iommufd_device_bind(), therefore implying ictx ownership of the group. 252 */ 253 bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) 254 { 255 struct iommufd_object *obj; 256 unsigned long index; 257 258 if (!ictx || !group) 259 return false; 260 261 xa_lock(&ictx->objects); 262 xa_for_each(&ictx->objects, index, obj) { 263 if (obj->type == IOMMUFD_OBJ_DEVICE && 264 container_of(obj, struct iommufd_device, obj) 265 ->igroup->group == group) { 266 xa_unlock(&ictx->objects); 267 return true; 268 } 269 } 270 xa_unlock(&ictx->objects); 271 return false; 272 } 273 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD"); 274 275 /** 276 * iommufd_device_unbind - Undo iommufd_device_bind() 277 * @idev: Device returned by iommufd_device_bind() 278 * 279 * Release the device from iommufd control. The DMA ownership will return back 280 * to unowned with DMA controlled by the DMA API. This invalidates the 281 * iommufd_device pointer, other APIs that consume it must not be called 282 * concurrently. 283 */ 284 void iommufd_device_unbind(struct iommufd_device *idev) 285 { 286 iommufd_object_destroy_user(idev->ictx, &idev->obj); 287 } 288 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD"); 289 290 struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) 291 { 292 return idev->ictx; 293 } 294 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, "IOMMUFD"); 295 296 u32 iommufd_device_to_id(struct iommufd_device *idev) 297 { 298 return idev->obj.id; 299 } 300 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD"); 301 302 static unsigned int iommufd_group_device_num(struct iommufd_group *igroup, 303 ioasid_t pasid) 304 { 305 struct iommufd_attach *attach; 306 struct iommufd_device *idev; 307 unsigned int count = 0; 308 unsigned long index; 309 310 lockdep_assert_held(&igroup->lock); 311 312 attach = xa_load(&igroup->pasid_attach, pasid); 313 if (attach) 314 xa_for_each(&attach->device_array, index, idev) 315 count++; 316 return count; 317 } 318 319 #ifdef CONFIG_IRQ_MSI_IOMMU 320 static int iommufd_group_setup_msi(struct iommufd_group *igroup, 321 struct iommufd_hwpt_paging *hwpt_paging) 322 { 323 struct iommufd_ctx *ictx = igroup->ictx; 324 struct iommufd_sw_msi_map *cur; 325 326 if (igroup->sw_msi_start == PHYS_ADDR_MAX) 327 return 0; 328 329 /* 330 * Install all the MSI pages the device has been using into the domain 331 */ 332 guard(mutex)(&ictx->sw_msi_lock); 333 list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { 334 int rc; 335 336 if (cur->sw_msi_start != igroup->sw_msi_start || 337 !test_bit(cur->id, igroup->required_sw_msi.bitmap)) 338 continue; 339 340 rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur); 341 if (rc) 342 return rc; 343 } 344 return 0; 345 } 346 #else 347 static inline int 348 iommufd_group_setup_msi(struct iommufd_group *igroup, 349 struct iommufd_hwpt_paging *hwpt_paging) 350 { 351 return 0; 352 } 353 #endif 354 355 static bool 356 iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid) 357 { 358 lockdep_assert_held(&igroup->lock); 359 return !xa_load(&igroup->pasid_attach, pasid); 360 } 361 362 static int 363 iommufd_device_attach_reserved_iova(struct iommufd_device *idev, 364 struct iommufd_hwpt_paging *hwpt_paging) 365 { 366 struct iommufd_group *igroup = idev->igroup; 367 int rc; 368 369 lockdep_assert_held(&igroup->lock); 370 371 rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, 372 idev->dev, 373 &igroup->sw_msi_start); 374 if (rc) 375 return rc; 376 377 if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) { 378 rc = iommufd_group_setup_msi(igroup, hwpt_paging); 379 if (rc) { 380 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, 381 idev->dev); 382 return rc; 383 } 384 } 385 return 0; 386 } 387 388 /* The device attach/detach/replace helpers for attach_handle */ 389 390 static bool iommufd_device_is_attached(struct iommufd_device *idev, 391 ioasid_t pasid) 392 { 393 struct iommufd_attach *attach; 394 395 attach = xa_load(&idev->igroup->pasid_attach, pasid); 396 return xa_load(&attach->device_array, idev->obj.id); 397 } 398 399 static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt, 400 struct iommufd_device *idev, 401 ioasid_t pasid) 402 { 403 struct iommufd_group *igroup = idev->igroup; 404 405 lockdep_assert_held(&igroup->lock); 406 407 if (pasid == IOMMU_NO_PASID) { 408 unsigned long start = IOMMU_NO_PASID; 409 410 if (!hwpt->pasid_compat && 411 xa_find_after(&igroup->pasid_attach, 412 &start, UINT_MAX, XA_PRESENT)) 413 return -EINVAL; 414 } else { 415 struct iommufd_attach *attach; 416 417 if (!hwpt->pasid_compat) 418 return -EINVAL; 419 420 attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); 421 if (attach && attach->hwpt && !attach->hwpt->pasid_compat) 422 return -EINVAL; 423 } 424 425 return 0; 426 } 427 428 static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, 429 struct iommufd_device *idev, 430 ioasid_t pasid) 431 { 432 struct iommufd_attach_handle *handle; 433 int rc; 434 435 rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); 436 if (rc) 437 return rc; 438 439 handle = kzalloc(sizeof(*handle), GFP_KERNEL); 440 if (!handle) 441 return -ENOMEM; 442 443 if (hwpt->fault) { 444 rc = iommufd_fault_iopf_enable(idev); 445 if (rc) 446 goto out_free_handle; 447 } 448 449 handle->idev = idev; 450 if (pasid == IOMMU_NO_PASID) 451 rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, 452 &handle->handle); 453 else 454 rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid, 455 &handle->handle); 456 if (rc) 457 goto out_disable_iopf; 458 459 return 0; 460 461 out_disable_iopf: 462 if (hwpt->fault) 463 iommufd_fault_iopf_disable(idev); 464 out_free_handle: 465 kfree(handle); 466 return rc; 467 } 468 469 static struct iommufd_attach_handle * 470 iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid) 471 { 472 struct iommu_attach_handle *handle; 473 474 lockdep_assert_held(&idev->igroup->lock); 475 476 handle = 477 iommu_attach_handle_get(idev->igroup->group, pasid, 0); 478 if (IS_ERR(handle)) 479 return NULL; 480 return to_iommufd_handle(handle); 481 } 482 483 static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, 484 struct iommufd_device *idev, 485 ioasid_t pasid) 486 { 487 struct iommufd_attach_handle *handle; 488 489 handle = iommufd_device_get_attach_handle(idev, pasid); 490 if (pasid == IOMMU_NO_PASID) 491 iommu_detach_group_handle(hwpt->domain, idev->igroup->group); 492 else 493 iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid); 494 495 if (hwpt->fault) { 496 iommufd_auto_response_faults(hwpt, handle); 497 iommufd_fault_iopf_disable(idev); 498 } 499 kfree(handle); 500 } 501 502 static int iommufd_hwpt_replace_device(struct iommufd_device *idev, 503 ioasid_t pasid, 504 struct iommufd_hw_pagetable *hwpt, 505 struct iommufd_hw_pagetable *old) 506 { 507 struct iommufd_attach_handle *handle, *old_handle; 508 int rc; 509 510 rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); 511 if (rc) 512 return rc; 513 514 old_handle = iommufd_device_get_attach_handle(idev, pasid); 515 516 handle = kzalloc(sizeof(*handle), GFP_KERNEL); 517 if (!handle) 518 return -ENOMEM; 519 520 if (hwpt->fault && !old->fault) { 521 rc = iommufd_fault_iopf_enable(idev); 522 if (rc) 523 goto out_free_handle; 524 } 525 526 handle->idev = idev; 527 if (pasid == IOMMU_NO_PASID) 528 rc = iommu_replace_group_handle(idev->igroup->group, 529 hwpt->domain, &handle->handle); 530 else 531 rc = iommu_replace_device_pasid(hwpt->domain, idev->dev, 532 pasid, &handle->handle); 533 if (rc) 534 goto out_disable_iopf; 535 536 if (old->fault) { 537 iommufd_auto_response_faults(hwpt, old_handle); 538 if (!hwpt->fault) 539 iommufd_fault_iopf_disable(idev); 540 } 541 kfree(old_handle); 542 543 return 0; 544 545 out_disable_iopf: 546 if (hwpt->fault && !old->fault) 547 iommufd_fault_iopf_disable(idev); 548 out_free_handle: 549 kfree(handle); 550 return rc; 551 } 552 553 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, 554 struct iommufd_device *idev, ioasid_t pasid) 555 { 556 struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); 557 bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; 558 struct iommufd_group *igroup = idev->igroup; 559 struct iommufd_hw_pagetable *old_hwpt; 560 struct iommufd_attach *attach; 561 int rc; 562 563 mutex_lock(&igroup->lock); 564 565 attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL, 566 XA_ZERO_ENTRY, GFP_KERNEL); 567 if (xa_is_err(attach)) { 568 rc = xa_err(attach); 569 goto err_unlock; 570 } 571 572 if (!attach) { 573 attach = kzalloc(sizeof(*attach), GFP_KERNEL); 574 if (!attach) { 575 rc = -ENOMEM; 576 goto err_release_pasid; 577 } 578 xa_init(&attach->device_array); 579 } 580 581 old_hwpt = attach->hwpt; 582 583 rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY, 584 GFP_KERNEL); 585 if (rc) { 586 WARN_ON(rc == -EBUSY && !old_hwpt); 587 goto err_free_attach; 588 } 589 590 if (old_hwpt && old_hwpt != hwpt) { 591 rc = -EINVAL; 592 goto err_release_devid; 593 } 594 595 if (attach_resv) { 596 rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging); 597 if (rc) 598 goto err_release_devid; 599 } 600 601 /* 602 * Only attach to the group once for the first device that is in the 603 * group. All the other devices will follow this attachment. The user 604 * should attach every device individually to the hwpt as the per-device 605 * reserved regions are only updated during individual device 606 * attachment. 607 */ 608 if (iommufd_group_first_attach(igroup, pasid)) { 609 rc = iommufd_hwpt_attach_device(hwpt, idev, pasid); 610 if (rc) 611 goto err_unresv; 612 attach->hwpt = hwpt; 613 WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach, 614 GFP_KERNEL))); 615 } 616 refcount_inc(&hwpt->obj.users); 617 WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id, 618 idev, GFP_KERNEL))); 619 mutex_unlock(&igroup->lock); 620 return 0; 621 err_unresv: 622 if (attach_resv) 623 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); 624 err_release_devid: 625 xa_release(&attach->device_array, idev->obj.id); 626 err_free_attach: 627 if (iommufd_group_first_attach(igroup, pasid)) 628 kfree(attach); 629 err_release_pasid: 630 if (iommufd_group_first_attach(igroup, pasid)) 631 xa_release(&igroup->pasid_attach, pasid); 632 err_unlock: 633 mutex_unlock(&igroup->lock); 634 return rc; 635 } 636 637 struct iommufd_hw_pagetable * 638 iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) 639 { 640 struct iommufd_group *igroup = idev->igroup; 641 struct iommufd_hwpt_paging *hwpt_paging; 642 struct iommufd_hw_pagetable *hwpt; 643 struct iommufd_attach *attach; 644 645 mutex_lock(&igroup->lock); 646 attach = xa_load(&igroup->pasid_attach, pasid); 647 if (!attach) { 648 mutex_unlock(&igroup->lock); 649 return NULL; 650 } 651 652 hwpt = attach->hwpt; 653 hwpt_paging = find_hwpt_paging(hwpt); 654 655 xa_erase(&attach->device_array, idev->obj.id); 656 if (xa_empty(&attach->device_array)) { 657 iommufd_hwpt_detach_device(hwpt, idev, pasid); 658 xa_erase(&igroup->pasid_attach, pasid); 659 kfree(attach); 660 } 661 if (hwpt_paging && pasid == IOMMU_NO_PASID) 662 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); 663 mutex_unlock(&igroup->lock); 664 665 /* Caller must destroy hwpt */ 666 return hwpt; 667 } 668 669 static struct iommufd_hw_pagetable * 670 iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid, 671 struct iommufd_hw_pagetable *hwpt) 672 { 673 int rc; 674 675 rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid); 676 if (rc) 677 return ERR_PTR(rc); 678 return NULL; 679 } 680 681 static void 682 iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, 683 struct iommufd_hwpt_paging *hwpt_paging) 684 { 685 struct iommufd_attach *attach; 686 struct iommufd_device *cur; 687 unsigned long index; 688 689 lockdep_assert_held(&igroup->lock); 690 691 attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); 692 xa_for_each(&attach->device_array, index, cur) 693 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); 694 } 695 696 static int 697 iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup, 698 struct iommufd_hwpt_paging *hwpt_paging) 699 { 700 struct iommufd_hwpt_paging *old_hwpt_paging; 701 struct iommufd_attach *attach; 702 struct iommufd_device *cur; 703 unsigned long index; 704 int rc; 705 706 lockdep_assert_held(&igroup->lock); 707 708 attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); 709 old_hwpt_paging = find_hwpt_paging(attach->hwpt); 710 if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) { 711 xa_for_each(&attach->device_array, index, cur) { 712 rc = iopt_table_enforce_dev_resv_regions( 713 &hwpt_paging->ioas->iopt, cur->dev, NULL); 714 if (rc) 715 goto err_unresv; 716 } 717 } 718 719 rc = iommufd_group_setup_msi(igroup, hwpt_paging); 720 if (rc) 721 goto err_unresv; 722 return 0; 723 724 err_unresv: 725 iommufd_group_remove_reserved_iova(igroup, hwpt_paging); 726 return rc; 727 } 728 729 static struct iommufd_hw_pagetable * 730 iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, 731 struct iommufd_hw_pagetable *hwpt) 732 { 733 struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); 734 bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; 735 struct iommufd_hwpt_paging *old_hwpt_paging; 736 struct iommufd_group *igroup = idev->igroup; 737 struct iommufd_hw_pagetable *old_hwpt; 738 struct iommufd_attach *attach; 739 unsigned int num_devices; 740 int rc; 741 742 mutex_lock(&igroup->lock); 743 744 attach = xa_load(&igroup->pasid_attach, pasid); 745 if (!attach) { 746 rc = -EINVAL; 747 goto err_unlock; 748 } 749 750 old_hwpt = attach->hwpt; 751 752 WARN_ON(!old_hwpt || xa_empty(&attach->device_array)); 753 754 if (!iommufd_device_is_attached(idev, pasid)) { 755 rc = -EINVAL; 756 goto err_unlock; 757 } 758 759 if (hwpt == old_hwpt) { 760 mutex_unlock(&igroup->lock); 761 return NULL; 762 } 763 764 if (attach_resv) { 765 rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging); 766 if (rc) 767 goto err_unlock; 768 } 769 770 rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt); 771 if (rc) 772 goto err_unresv; 773 774 old_hwpt_paging = find_hwpt_paging(old_hwpt); 775 if (old_hwpt_paging && pasid == IOMMU_NO_PASID && 776 (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas)) 777 iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging); 778 779 attach->hwpt = hwpt; 780 781 num_devices = iommufd_group_device_num(igroup, pasid); 782 /* 783 * Move the refcounts held by the device_array to the new hwpt. Retain a 784 * refcount for this thread as the caller will free it. 785 */ 786 refcount_add(num_devices, &hwpt->obj.users); 787 if (num_devices > 1) 788 WARN_ON(refcount_sub_and_test(num_devices - 1, 789 &old_hwpt->obj.users)); 790 mutex_unlock(&igroup->lock); 791 792 /* Caller must destroy old_hwpt */ 793 return old_hwpt; 794 err_unresv: 795 if (attach_resv) 796 iommufd_group_remove_reserved_iova(igroup, hwpt_paging); 797 err_unlock: 798 mutex_unlock(&igroup->lock); 799 return ERR_PTR(rc); 800 } 801 802 typedef struct iommufd_hw_pagetable *(*attach_fn)( 803 struct iommufd_device *idev, ioasid_t pasid, 804 struct iommufd_hw_pagetable *hwpt); 805 806 /* 807 * When automatically managing the domains we search for a compatible domain in 808 * the iopt and if one is found use it, otherwise create a new domain. 809 * Automatic domain selection will never pick a manually created domain. 810 */ 811 static struct iommufd_hw_pagetable * 812 iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid, 813 struct iommufd_ioas *ioas, u32 *pt_id, 814 attach_fn do_attach) 815 { 816 /* 817 * iommufd_hw_pagetable_attach() is called by 818 * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as 819 * iommufd_device_do_attach(). So if we are in this mode then we prefer 820 * to use the immediate_attach path as it supports drivers that can't 821 * directly allocate a domain. 822 */ 823 bool immediate_attach = do_attach == iommufd_device_do_attach; 824 struct iommufd_hw_pagetable *destroy_hwpt; 825 struct iommufd_hwpt_paging *hwpt_paging; 826 struct iommufd_hw_pagetable *hwpt; 827 828 /* 829 * There is no differentiation when domains are allocated, so any domain 830 * that is willing to attach to the device is interchangeable with any 831 * other. 832 */ 833 mutex_lock(&ioas->mutex); 834 list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) { 835 if (!hwpt_paging->auto_domain) 836 continue; 837 838 hwpt = &hwpt_paging->common; 839 if (!iommufd_lock_obj(&hwpt->obj)) 840 continue; 841 destroy_hwpt = (*do_attach)(idev, pasid, hwpt); 842 if (IS_ERR(destroy_hwpt)) { 843 iommufd_put_object(idev->ictx, &hwpt->obj); 844 /* 845 * -EINVAL means the domain is incompatible with the 846 * device. Other error codes should propagate to 847 * userspace as failure. Success means the domain is 848 * attached. 849 */ 850 if (PTR_ERR(destroy_hwpt) == -EINVAL) 851 continue; 852 goto out_unlock; 853 } 854 *pt_id = hwpt->obj.id; 855 iommufd_put_object(idev->ictx, &hwpt->obj); 856 goto out_unlock; 857 } 858 859 hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid, 860 0, immediate_attach, NULL); 861 if (IS_ERR(hwpt_paging)) { 862 destroy_hwpt = ERR_CAST(hwpt_paging); 863 goto out_unlock; 864 } 865 hwpt = &hwpt_paging->common; 866 867 if (!immediate_attach) { 868 destroy_hwpt = (*do_attach)(idev, pasid, hwpt); 869 if (IS_ERR(destroy_hwpt)) 870 goto out_abort; 871 } else { 872 destroy_hwpt = NULL; 873 } 874 875 hwpt_paging->auto_domain = true; 876 *pt_id = hwpt->obj.id; 877 878 iommufd_object_finalize(idev->ictx, &hwpt->obj); 879 mutex_unlock(&ioas->mutex); 880 return destroy_hwpt; 881 882 out_abort: 883 iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); 884 out_unlock: 885 mutex_unlock(&ioas->mutex); 886 return destroy_hwpt; 887 } 888 889 static int iommufd_device_change_pt(struct iommufd_device *idev, 890 ioasid_t pasid, 891 u32 *pt_id, attach_fn do_attach) 892 { 893 struct iommufd_hw_pagetable *destroy_hwpt; 894 struct iommufd_object *pt_obj; 895 896 pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); 897 if (IS_ERR(pt_obj)) 898 return PTR_ERR(pt_obj); 899 900 switch (pt_obj->type) { 901 case IOMMUFD_OBJ_HWPT_NESTED: 902 case IOMMUFD_OBJ_HWPT_PAGING: { 903 struct iommufd_hw_pagetable *hwpt = 904 container_of(pt_obj, struct iommufd_hw_pagetable, obj); 905 906 destroy_hwpt = (*do_attach)(idev, pasid, hwpt); 907 if (IS_ERR(destroy_hwpt)) 908 goto out_put_pt_obj; 909 break; 910 } 911 case IOMMUFD_OBJ_IOAS: { 912 struct iommufd_ioas *ioas = 913 container_of(pt_obj, struct iommufd_ioas, obj); 914 915 destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas, 916 pt_id, do_attach); 917 if (IS_ERR(destroy_hwpt)) 918 goto out_put_pt_obj; 919 break; 920 } 921 default: 922 destroy_hwpt = ERR_PTR(-EINVAL); 923 goto out_put_pt_obj; 924 } 925 iommufd_put_object(idev->ictx, pt_obj); 926 927 /* This destruction has to be after we unlock everything */ 928 if (destroy_hwpt) 929 iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt); 930 return 0; 931 932 out_put_pt_obj: 933 iommufd_put_object(idev->ictx, pt_obj); 934 return PTR_ERR(destroy_hwpt); 935 } 936 937 /** 938 * iommufd_device_attach - Connect a device/pasid to an iommu_domain 939 * @idev: device to attach 940 * @pasid: pasid to attach 941 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING 942 * Output the IOMMUFD_OBJ_HWPT_PAGING ID 943 * 944 * This connects the device/pasid to an iommu_domain, either automatically 945 * or manually selected. Once this completes the device could do DMA with 946 * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage. 947 * 948 * The caller should return the resulting pt_id back to userspace. 949 * This function is undone by calling iommufd_device_detach(). 950 */ 951 int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, 952 u32 *pt_id) 953 { 954 int rc; 955 956 rc = iommufd_device_change_pt(idev, pasid, pt_id, 957 &iommufd_device_do_attach); 958 if (rc) 959 return rc; 960 961 /* 962 * Pairs with iommufd_device_detach() - catches caller bugs attempting 963 * to destroy a device with an attachment. 964 */ 965 refcount_inc(&idev->obj.users); 966 return 0; 967 } 968 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD"); 969 970 /** 971 * iommufd_device_replace - Change the device/pasid's iommu_domain 972 * @idev: device to change 973 * @pasid: pasid to change 974 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING 975 * Output the IOMMUFD_OBJ_HWPT_PAGING ID 976 * 977 * This is the same as:: 978 * 979 * iommufd_device_detach(); 980 * iommufd_device_attach(); 981 * 982 * If it fails then no change is made to the attachment. The iommu driver may 983 * implement this so there is no disruption in translation. This can only be 984 * called if iommufd_device_attach() has already succeeded. @pasid is 985 * IOMMU_NO_PASID for no pasid usage. 986 */ 987 int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, 988 u32 *pt_id) 989 { 990 return iommufd_device_change_pt(idev, pasid, pt_id, 991 &iommufd_device_do_replace); 992 } 993 EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD"); 994 995 /** 996 * iommufd_device_detach - Disconnect a device/device to an iommu_domain 997 * @idev: device to detach 998 * @pasid: pasid to detach 999 * 1000 * Undo iommufd_device_attach(). This disconnects the idev from the previously 1001 * attached pt_id. The device returns back to a blocked DMA translation. 1002 * @pasid is IOMMU_NO_PASID for no pasid usage. 1003 */ 1004 void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid) 1005 { 1006 struct iommufd_hw_pagetable *hwpt; 1007 1008 hwpt = iommufd_hw_pagetable_detach(idev, pasid); 1009 if (!hwpt) 1010 return; 1011 iommufd_hw_pagetable_put(idev->ictx, hwpt); 1012 refcount_dec(&idev->obj.users); 1013 } 1014 EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, "IOMMUFD"); 1015 1016 /* 1017 * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at 1018 * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should 1019 * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas. 1020 */ 1021 static int iommufd_access_change_ioas(struct iommufd_access *access, 1022 struct iommufd_ioas *new_ioas) 1023 { 1024 u32 iopt_access_list_id = access->iopt_access_list_id; 1025 struct iommufd_ioas *cur_ioas = access->ioas; 1026 int rc; 1027 1028 lockdep_assert_held(&access->ioas_lock); 1029 1030 /* We are racing with a concurrent detach, bail */ 1031 if (cur_ioas != access->ioas_unpin) 1032 return -EBUSY; 1033 1034 if (cur_ioas == new_ioas) 1035 return 0; 1036 1037 /* 1038 * Set ioas to NULL to block any further iommufd_access_pin_pages(). 1039 * iommufd_access_unpin_pages() can continue using access->ioas_unpin. 1040 */ 1041 access->ioas = NULL; 1042 1043 if (new_ioas) { 1044 rc = iopt_add_access(&new_ioas->iopt, access); 1045 if (rc) { 1046 access->ioas = cur_ioas; 1047 return rc; 1048 } 1049 refcount_inc(&new_ioas->obj.users); 1050 } 1051 1052 if (cur_ioas) { 1053 if (access->ops->unmap) { 1054 mutex_unlock(&access->ioas_lock); 1055 access->ops->unmap(access->data, 0, ULONG_MAX); 1056 mutex_lock(&access->ioas_lock); 1057 } 1058 iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id); 1059 refcount_dec(&cur_ioas->obj.users); 1060 } 1061 1062 access->ioas = new_ioas; 1063 access->ioas_unpin = new_ioas; 1064 1065 return 0; 1066 } 1067 1068 static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) 1069 { 1070 struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id); 1071 int rc; 1072 1073 if (IS_ERR(ioas)) 1074 return PTR_ERR(ioas); 1075 rc = iommufd_access_change_ioas(access, ioas); 1076 iommufd_put_object(access->ictx, &ioas->obj); 1077 return rc; 1078 } 1079 1080 void iommufd_access_destroy_object(struct iommufd_object *obj) 1081 { 1082 struct iommufd_access *access = 1083 container_of(obj, struct iommufd_access, obj); 1084 1085 mutex_lock(&access->ioas_lock); 1086 if (access->ioas) 1087 WARN_ON(iommufd_access_change_ioas(access, NULL)); 1088 mutex_unlock(&access->ioas_lock); 1089 iommufd_ctx_put(access->ictx); 1090 } 1091 1092 /** 1093 * iommufd_access_create - Create an iommufd_access 1094 * @ictx: iommufd file descriptor 1095 * @ops: Driver's ops to associate with the access 1096 * @data: Opaque data to pass into ops functions 1097 * @id: Output ID number to return to userspace for this access 1098 * 1099 * An iommufd_access allows a driver to read/write to the IOAS without using 1100 * DMA. The underlying CPU memory can be accessed using the 1101 * iommufd_access_pin_pages() or iommufd_access_rw() functions. 1102 * 1103 * The provided ops are required to use iommufd_access_pin_pages(). 1104 */ 1105 struct iommufd_access * 1106 iommufd_access_create(struct iommufd_ctx *ictx, 1107 const struct iommufd_access_ops *ops, void *data, u32 *id) 1108 { 1109 struct iommufd_access *access; 1110 1111 /* 1112 * There is no uAPI for the access object, but to keep things symmetric 1113 * use the object infrastructure anyhow. 1114 */ 1115 access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); 1116 if (IS_ERR(access)) 1117 return access; 1118 1119 access->data = data; 1120 access->ops = ops; 1121 1122 if (ops->needs_pin_pages) 1123 access->iova_alignment = PAGE_SIZE; 1124 else 1125 access->iova_alignment = 1; 1126 1127 /* The calling driver is a user until iommufd_access_destroy() */ 1128 refcount_inc(&access->obj.users); 1129 access->ictx = ictx; 1130 iommufd_ctx_get(ictx); 1131 iommufd_object_finalize(ictx, &access->obj); 1132 *id = access->obj.id; 1133 mutex_init(&access->ioas_lock); 1134 return access; 1135 } 1136 EXPORT_SYMBOL_NS_GPL(iommufd_access_create, "IOMMUFD"); 1137 1138 /** 1139 * iommufd_access_destroy - Destroy an iommufd_access 1140 * @access: The access to destroy 1141 * 1142 * The caller must stop using the access before destroying it. 1143 */ 1144 void iommufd_access_destroy(struct iommufd_access *access) 1145 { 1146 iommufd_object_destroy_user(access->ictx, &access->obj); 1147 } 1148 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, "IOMMUFD"); 1149 1150 void iommufd_access_detach(struct iommufd_access *access) 1151 { 1152 mutex_lock(&access->ioas_lock); 1153 if (WARN_ON(!access->ioas)) { 1154 mutex_unlock(&access->ioas_lock); 1155 return; 1156 } 1157 WARN_ON(iommufd_access_change_ioas(access, NULL)); 1158 mutex_unlock(&access->ioas_lock); 1159 } 1160 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, "IOMMUFD"); 1161 1162 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) 1163 { 1164 int rc; 1165 1166 mutex_lock(&access->ioas_lock); 1167 if (WARN_ON(access->ioas)) { 1168 mutex_unlock(&access->ioas_lock); 1169 return -EINVAL; 1170 } 1171 1172 rc = iommufd_access_change_ioas_id(access, ioas_id); 1173 mutex_unlock(&access->ioas_lock); 1174 return rc; 1175 } 1176 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, "IOMMUFD"); 1177 1178 int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) 1179 { 1180 int rc; 1181 1182 mutex_lock(&access->ioas_lock); 1183 if (!access->ioas) { 1184 mutex_unlock(&access->ioas_lock); 1185 return -ENOENT; 1186 } 1187 rc = iommufd_access_change_ioas_id(access, ioas_id); 1188 mutex_unlock(&access->ioas_lock); 1189 return rc; 1190 } 1191 EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, "IOMMUFD"); 1192 1193 /** 1194 * iommufd_access_notify_unmap - Notify users of an iopt to stop using it 1195 * @iopt: iopt to work on 1196 * @iova: Starting iova in the iopt 1197 * @length: Number of bytes 1198 * 1199 * After this function returns there should be no users attached to the pages 1200 * linked to this iopt that intersect with iova,length. Anyone that has attached 1201 * a user through iopt_access_pages() needs to detach it through 1202 * iommufd_access_unpin_pages() before this function returns. 1203 * 1204 * iommufd_access_destroy() will wait for any outstanding unmap callback to 1205 * complete. Once iommufd_access_destroy() no unmap ops are running or will 1206 * run in the future. Due to this a driver must not create locking that prevents 1207 * unmap to complete while iommufd_access_destroy() is running. 1208 */ 1209 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, 1210 unsigned long length) 1211 { 1212 struct iommufd_ioas *ioas = 1213 container_of(iopt, struct iommufd_ioas, iopt); 1214 struct iommufd_access *access; 1215 unsigned long index; 1216 1217 xa_lock(&ioas->iopt.access_list); 1218 xa_for_each(&ioas->iopt.access_list, index, access) { 1219 if (!iommufd_lock_obj(&access->obj)) 1220 continue; 1221 xa_unlock(&ioas->iopt.access_list); 1222 1223 access->ops->unmap(access->data, iova, length); 1224 1225 iommufd_put_object(access->ictx, &access->obj); 1226 xa_lock(&ioas->iopt.access_list); 1227 } 1228 xa_unlock(&ioas->iopt.access_list); 1229 } 1230 1231 /** 1232 * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages 1233 * @access: IOAS access to act on 1234 * @iova: Starting IOVA 1235 * @length: Number of bytes to access 1236 * 1237 * Return the struct page's. The caller must stop accessing them before calling 1238 * this. The iova/length must exactly match the one provided to access_pages. 1239 */ 1240 void iommufd_access_unpin_pages(struct iommufd_access *access, 1241 unsigned long iova, unsigned long length) 1242 { 1243 struct iopt_area_contig_iter iter; 1244 struct io_pagetable *iopt; 1245 unsigned long last_iova; 1246 struct iopt_area *area; 1247 1248 if (WARN_ON(!length) || 1249 WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) 1250 return; 1251 1252 mutex_lock(&access->ioas_lock); 1253 /* 1254 * The driver must be doing something wrong if it calls this before an 1255 * iommufd_access_attach() or after an iommufd_access_detach(). 1256 */ 1257 if (WARN_ON(!access->ioas_unpin)) { 1258 mutex_unlock(&access->ioas_lock); 1259 return; 1260 } 1261 iopt = &access->ioas_unpin->iopt; 1262 1263 down_read(&iopt->iova_rwsem); 1264 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) 1265 iopt_area_remove_access( 1266 area, iopt_area_iova_to_index(area, iter.cur_iova), 1267 iopt_area_iova_to_index( 1268 area, 1269 min(last_iova, iopt_area_last_iova(area)))); 1270 WARN_ON(!iopt_area_contig_done(&iter)); 1271 up_read(&iopt->iova_rwsem); 1272 mutex_unlock(&access->ioas_lock); 1273 } 1274 EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, "IOMMUFD"); 1275 1276 static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) 1277 { 1278 if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE) 1279 return false; 1280 1281 if (!iopt_area_contig_done(iter) && 1282 (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) % 1283 PAGE_SIZE) != (PAGE_SIZE - 1)) 1284 return false; 1285 return true; 1286 } 1287 1288 static bool check_area_prot(struct iopt_area *area, unsigned int flags) 1289 { 1290 if (flags & IOMMUFD_ACCESS_RW_WRITE) 1291 return area->iommu_prot & IOMMU_WRITE; 1292 return area->iommu_prot & IOMMU_READ; 1293 } 1294 1295 /** 1296 * iommufd_access_pin_pages() - Return a list of pages under the iova 1297 * @access: IOAS access to act on 1298 * @iova: Starting IOVA 1299 * @length: Number of bytes to access 1300 * @out_pages: Output page list 1301 * @flags: IOPMMUFD_ACCESS_RW_* flags 1302 * 1303 * Reads @length bytes starting at iova and returns the struct page * pointers. 1304 * These can be kmap'd by the caller for CPU access. 1305 * 1306 * The caller must perform iommufd_access_unpin_pages() when done to balance 1307 * this. 1308 * 1309 * This API always requires a page aligned iova. This happens naturally if the 1310 * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However 1311 * smaller alignments have corner cases where this API can fail on otherwise 1312 * aligned iova. 1313 */ 1314 int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, 1315 unsigned long length, struct page **out_pages, 1316 unsigned int flags) 1317 { 1318 struct iopt_area_contig_iter iter; 1319 struct io_pagetable *iopt; 1320 unsigned long last_iova; 1321 struct iopt_area *area; 1322 int rc; 1323 1324 /* Driver's ops don't support pin_pages */ 1325 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 1326 WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap)) 1327 return -EINVAL; 1328 1329 if (!length) 1330 return -EINVAL; 1331 if (check_add_overflow(iova, length - 1, &last_iova)) 1332 return -EOVERFLOW; 1333 1334 mutex_lock(&access->ioas_lock); 1335 if (!access->ioas) { 1336 mutex_unlock(&access->ioas_lock); 1337 return -ENOENT; 1338 } 1339 iopt = &access->ioas->iopt; 1340 1341 down_read(&iopt->iova_rwsem); 1342 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 1343 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 1344 unsigned long last_index = iopt_area_iova_to_index(area, last); 1345 unsigned long index = 1346 iopt_area_iova_to_index(area, iter.cur_iova); 1347 1348 if (area->prevent_access || 1349 !iopt_area_contig_is_aligned(&iter)) { 1350 rc = -EINVAL; 1351 goto err_remove; 1352 } 1353 1354 if (!check_area_prot(area, flags)) { 1355 rc = -EPERM; 1356 goto err_remove; 1357 } 1358 1359 rc = iopt_area_add_access(area, index, last_index, out_pages, 1360 flags); 1361 if (rc) 1362 goto err_remove; 1363 out_pages += last_index - index + 1; 1364 } 1365 if (!iopt_area_contig_done(&iter)) { 1366 rc = -ENOENT; 1367 goto err_remove; 1368 } 1369 1370 up_read(&iopt->iova_rwsem); 1371 mutex_unlock(&access->ioas_lock); 1372 return 0; 1373 1374 err_remove: 1375 if (iova < iter.cur_iova) { 1376 last_iova = iter.cur_iova - 1; 1377 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) 1378 iopt_area_remove_access( 1379 area, 1380 iopt_area_iova_to_index(area, iter.cur_iova), 1381 iopt_area_iova_to_index( 1382 area, min(last_iova, 1383 iopt_area_last_iova(area)))); 1384 } 1385 up_read(&iopt->iova_rwsem); 1386 mutex_unlock(&access->ioas_lock); 1387 return rc; 1388 } 1389 EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, "IOMMUFD"); 1390 1391 /** 1392 * iommufd_access_rw - Read or write data under the iova 1393 * @access: IOAS access to act on 1394 * @iova: Starting IOVA 1395 * @data: Kernel buffer to copy to/from 1396 * @length: Number of bytes to access 1397 * @flags: IOMMUFD_ACCESS_RW_* flags 1398 * 1399 * Copy kernel to/from data into the range given by IOVA/length. If flags 1400 * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized 1401 * by changing it into copy_to/from_user(). 1402 */ 1403 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, 1404 void *data, size_t length, unsigned int flags) 1405 { 1406 struct iopt_area_contig_iter iter; 1407 struct io_pagetable *iopt; 1408 struct iopt_area *area; 1409 unsigned long last_iova; 1410 int rc = -EINVAL; 1411 1412 if (!length) 1413 return -EINVAL; 1414 if (check_add_overflow(iova, length - 1, &last_iova)) 1415 return -EOVERFLOW; 1416 1417 mutex_lock(&access->ioas_lock); 1418 if (!access->ioas) { 1419 mutex_unlock(&access->ioas_lock); 1420 return -ENOENT; 1421 } 1422 iopt = &access->ioas->iopt; 1423 1424 down_read(&iopt->iova_rwsem); 1425 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 1426 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 1427 unsigned long bytes = (last - iter.cur_iova) + 1; 1428 1429 if (area->prevent_access) { 1430 rc = -EINVAL; 1431 goto err_out; 1432 } 1433 1434 if (!check_area_prot(area, flags)) { 1435 rc = -EPERM; 1436 goto err_out; 1437 } 1438 1439 rc = iopt_pages_rw_access( 1440 area->pages, iopt_area_start_byte(area, iter.cur_iova), 1441 data, bytes, flags); 1442 if (rc) 1443 goto err_out; 1444 data += bytes; 1445 } 1446 if (!iopt_area_contig_done(&iter)) 1447 rc = -ENOENT; 1448 err_out: 1449 up_read(&iopt->iova_rwsem); 1450 mutex_unlock(&access->ioas_lock); 1451 return rc; 1452 } 1453 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, "IOMMUFD"); 1454 1455 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) 1456 { 1457 struct iommu_hw_info *cmd = ucmd->cmd; 1458 void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr); 1459 const struct iommu_ops *ops; 1460 struct iommufd_device *idev; 1461 unsigned int data_len; 1462 unsigned int copy_len; 1463 void *data; 1464 int rc; 1465 1466 if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] || 1467 cmd->__reserved[2]) 1468 return -EOPNOTSUPP; 1469 1470 idev = iommufd_get_device(ucmd, cmd->dev_id); 1471 if (IS_ERR(idev)) 1472 return PTR_ERR(idev); 1473 1474 ops = dev_iommu_ops(idev->dev); 1475 if (ops->hw_info) { 1476 data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type); 1477 if (IS_ERR(data)) { 1478 rc = PTR_ERR(data); 1479 goto out_put; 1480 } 1481 1482 /* 1483 * drivers that have hw_info callback should have a unique 1484 * iommu_hw_info_type. 1485 */ 1486 if (WARN_ON_ONCE(cmd->out_data_type == 1487 IOMMU_HW_INFO_TYPE_NONE)) { 1488 rc = -ENODEV; 1489 goto out_free; 1490 } 1491 } else { 1492 cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE; 1493 data_len = 0; 1494 data = NULL; 1495 } 1496 1497 copy_len = min(cmd->data_len, data_len); 1498 if (copy_to_user(user_ptr, data, copy_len)) { 1499 rc = -EFAULT; 1500 goto out_free; 1501 } 1502 1503 /* 1504 * Zero the trailing bytes if the user buffer is bigger than the 1505 * data size kernel actually has. 1506 */ 1507 if (copy_len < cmd->data_len) { 1508 if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) { 1509 rc = -EFAULT; 1510 goto out_free; 1511 } 1512 } 1513 1514 /* 1515 * We return the length the kernel supports so userspace may know what 1516 * the kernel capability is. It could be larger than the input buffer. 1517 */ 1518 cmd->data_len = data_len; 1519 1520 cmd->out_capabilities = 0; 1521 if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) 1522 cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; 1523 1524 cmd->out_max_pasid_log2 = 0; 1525 /* 1526 * Currently, all iommu drivers enable PASID in the probe_device() 1527 * op if iommu and device supports it. So the max_pasids stored in 1528 * dev->iommu indicates both PASID support and enable status. A 1529 * non-zero dev->iommu->max_pasids means PASID is supported and 1530 * enabled. The iommufd only reports PASID capability to userspace 1531 * if it's enabled. 1532 */ 1533 if (idev->dev->iommu->max_pasids) { 1534 cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids); 1535 1536 if (dev_is_pci(idev->dev)) { 1537 struct pci_dev *pdev = to_pci_dev(idev->dev); 1538 int ctrl; 1539 1540 ctrl = pci_pasid_status(pdev); 1541 1542 WARN_ON_ONCE(ctrl < 0 || 1543 !(ctrl & PCI_PASID_CTRL_ENABLE)); 1544 1545 if (ctrl & PCI_PASID_CTRL_EXEC) 1546 cmd->out_capabilities |= 1547 IOMMU_HW_CAP_PCI_PASID_EXEC; 1548 if (ctrl & PCI_PASID_CTRL_PRIV) 1549 cmd->out_capabilities |= 1550 IOMMU_HW_CAP_PCI_PASID_PRIV; 1551 } 1552 } 1553 1554 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 1555 out_free: 1556 kfree(data); 1557 out_put: 1558 iommufd_put_object(ucmd->ictx, &idev->obj); 1559 return rc; 1560 } 1561