1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. 3 * 4 * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The 5 * PFNs can be placed into an iommu_domain, or returned to the caller as a page 6 * list for access by an in-kernel user. 7 * 8 * The datastructure uses the iopt_pages to optimize the storage of the PFNs 9 * between the domains and xarray. 10 */ 11 #include <linux/err.h> 12 #include <linux/errno.h> 13 #include <linux/iommu.h> 14 #include <linux/iommufd.h> 15 #include <linux/lockdep.h> 16 #include <linux/sched/mm.h> 17 #include <linux/slab.h> 18 #include <uapi/linux/iommufd.h> 19 20 #include "double_span.h" 21 #include "io_pagetable.h" 22 23 struct iopt_pages_list { 24 struct iopt_pages *pages; 25 struct iopt_area *area; 26 struct list_head next; 27 unsigned long start_byte; 28 unsigned long length; 29 }; 30 31 struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, 32 struct io_pagetable *iopt, 33 unsigned long iova, 34 unsigned long last_iova) 35 { 36 lockdep_assert_held(&iopt->iova_rwsem); 37 38 iter->cur_iova = iova; 39 iter->last_iova = last_iova; 40 iter->area = iopt_area_iter_first(iopt, iova, iova); 41 if (!iter->area) 42 return NULL; 43 if (!iter->area->pages) { 44 iter->area = NULL; 45 return NULL; 46 } 47 return iter->area; 48 } 49 50 struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) 51 { 52 unsigned long last_iova; 53 54 if (!iter->area) 55 return NULL; 56 last_iova = iopt_area_last_iova(iter->area); 57 if (iter->last_iova <= last_iova) 58 return NULL; 59 60 iter->cur_iova = last_iova + 1; 61 iter->area = iopt_area_iter_next(iter->area, iter->cur_iova, 62 iter->last_iova); 63 if (!iter->area) 64 return NULL; 65 if (iter->cur_iova != iopt_area_iova(iter->area) || 66 !iter->area->pages) { 67 iter->area = NULL; 68 return NULL; 69 } 70 return iter->area; 71 } 72 73 static bool __alloc_iova_check_range(unsigned long *start, unsigned long last, 74 unsigned long length, 75 unsigned long iova_alignment, 76 unsigned long page_offset) 77 { 78 unsigned long aligned_start; 79 80 /* ALIGN_UP() */ 81 if (check_add_overflow(*start, iova_alignment - 1, &aligned_start)) 82 return false; 83 aligned_start &= ~(iova_alignment - 1); 84 aligned_start |= page_offset; 85 86 if (aligned_start >= last || last - aligned_start < length - 1) 87 return false; 88 *start = aligned_start; 89 return true; 90 } 91 92 static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, 93 unsigned long length, 94 unsigned long iova_alignment, 95 unsigned long page_offset) 96 { 97 if (span->is_used) 98 return false; 99 return __alloc_iova_check_range(&span->start_hole, span->last_hole, 100 length, iova_alignment, page_offset); 101 } 102 103 static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, 104 unsigned long length, 105 unsigned long iova_alignment, 106 unsigned long page_offset) 107 { 108 if (span->is_hole) 109 return false; 110 return __alloc_iova_check_range(&span->start_used, span->last_used, 111 length, iova_alignment, page_offset); 112 } 113 114 /* 115 * Automatically find a block of IOVA that is not being used and not reserved. 116 * Does not return a 0 IOVA even if it is valid. 117 */ 118 static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, 119 unsigned long addr, unsigned long length) 120 { 121 unsigned long page_offset = addr % PAGE_SIZE; 122 struct interval_tree_double_span_iter used_span; 123 struct interval_tree_span_iter allowed_span; 124 unsigned long max_alignment = PAGE_SIZE; 125 unsigned long iova_alignment; 126 127 lockdep_assert_held(&iopt->iova_rwsem); 128 129 /* Protect roundup_pow-of_two() from overflow */ 130 if (length == 0 || length >= ULONG_MAX / 2) 131 return -EOVERFLOW; 132 133 /* 134 * Keep alignment present in addr when building the IOVA, which 135 * increases the chance we can map a THP. 136 */ 137 if (!addr) 138 iova_alignment = roundup_pow_of_two(length); 139 else 140 iova_alignment = min_t(unsigned long, 141 roundup_pow_of_two(length), 142 1UL << __ffs64(addr)); 143 144 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 145 max_alignment = HPAGE_SIZE; 146 #endif 147 /* Protect against ALIGN() overflow */ 148 if (iova_alignment >= max_alignment) 149 iova_alignment = max_alignment; 150 151 if (iova_alignment < iopt->iova_alignment) 152 return -EINVAL; 153 154 interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree, 155 PAGE_SIZE, ULONG_MAX - PAGE_SIZE) { 156 if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) { 157 allowed_span.start_used = PAGE_SIZE; 158 allowed_span.last_used = ULONG_MAX - PAGE_SIZE; 159 allowed_span.is_hole = false; 160 } 161 162 if (!__alloc_iova_check_used(&allowed_span, length, 163 iova_alignment, page_offset)) 164 continue; 165 166 interval_tree_for_each_double_span( 167 &used_span, &iopt->reserved_itree, &iopt->area_itree, 168 allowed_span.start_used, allowed_span.last_used) { 169 if (!__alloc_iova_check_hole(&used_span, length, 170 iova_alignment, 171 page_offset)) 172 continue; 173 174 *iova = used_span.start_hole; 175 return 0; 176 } 177 } 178 return -ENOSPC; 179 } 180 181 static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova, 182 unsigned long length) 183 { 184 unsigned long last; 185 186 lockdep_assert_held(&iopt->iova_rwsem); 187 188 if ((iova & (iopt->iova_alignment - 1))) 189 return -EINVAL; 190 191 if (check_add_overflow(iova, length - 1, &last)) 192 return -EOVERFLOW; 193 194 /* No reserved IOVA intersects the range */ 195 if (iopt_reserved_iter_first(iopt, iova, last)) 196 return -EINVAL; 197 198 /* Check that there is not already a mapping in the range */ 199 if (iopt_area_iter_first(iopt, iova, last)) 200 return -EEXIST; 201 return 0; 202 } 203 204 /* 205 * The area takes a slice of the pages from start_bytes to start_byte + length 206 */ 207 static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, 208 struct iopt_pages *pages, unsigned long iova, 209 unsigned long start_byte, unsigned long length, 210 int iommu_prot) 211 { 212 lockdep_assert_held_write(&iopt->iova_rwsem); 213 214 if ((iommu_prot & IOMMU_WRITE) && !pages->writable) 215 return -EPERM; 216 217 area->iommu_prot = iommu_prot; 218 area->page_offset = start_byte % PAGE_SIZE; 219 if (area->page_offset & (iopt->iova_alignment - 1)) 220 return -EINVAL; 221 222 area->node.start = iova; 223 if (check_add_overflow(iova, length - 1, &area->node.last)) 224 return -EOVERFLOW; 225 226 area->pages_node.start = start_byte / PAGE_SIZE; 227 if (check_add_overflow(start_byte, length - 1, &area->pages_node.last)) 228 return -EOVERFLOW; 229 area->pages_node.last = area->pages_node.last / PAGE_SIZE; 230 if (WARN_ON(area->pages_node.last >= pages->npages)) 231 return -EOVERFLOW; 232 233 /* 234 * The area is inserted with a NULL pages indicating it is not fully 235 * initialized yet. 236 */ 237 area->iopt = iopt; 238 interval_tree_insert(&area->node, &iopt->area_itree); 239 return 0; 240 } 241 242 static struct iopt_area *iopt_area_alloc(void) 243 { 244 struct iopt_area *area; 245 246 area = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); 247 if (!area) 248 return NULL; 249 RB_CLEAR_NODE(&area->node.rb); 250 RB_CLEAR_NODE(&area->pages_node.rb); 251 return area; 252 } 253 254 static int iopt_alloc_area_pages(struct io_pagetable *iopt, 255 struct list_head *pages_list, 256 unsigned long length, unsigned long *dst_iova, 257 int iommu_prot, unsigned int flags) 258 { 259 struct iopt_pages_list *elm; 260 unsigned long start; 261 unsigned long iova; 262 int rc = 0; 263 264 list_for_each_entry(elm, pages_list, next) { 265 elm->area = iopt_area_alloc(); 266 if (!elm->area) 267 return -ENOMEM; 268 } 269 270 down_write(&iopt->iova_rwsem); 271 if ((length & (iopt->iova_alignment - 1)) || !length) { 272 rc = -EINVAL; 273 goto out_unlock; 274 } 275 276 if (flags & IOPT_ALLOC_IOVA) { 277 /* Use the first entry to guess the ideal IOVA alignment */ 278 elm = list_first_entry(pages_list, struct iopt_pages_list, 279 next); 280 switch (elm->pages->type) { 281 case IOPT_ADDRESS_USER: 282 start = elm->start_byte + (uintptr_t)elm->pages->uptr; 283 break; 284 case IOPT_ADDRESS_FILE: 285 start = elm->start_byte + elm->pages->start; 286 break; 287 } 288 rc = iopt_alloc_iova(iopt, dst_iova, start, length); 289 if (rc) 290 goto out_unlock; 291 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 292 WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) { 293 rc = -EINVAL; 294 goto out_unlock; 295 } 296 } else { 297 rc = iopt_check_iova(iopt, *dst_iova, length); 298 if (rc) 299 goto out_unlock; 300 } 301 302 /* 303 * Areas are created with a NULL pages so that the IOVA space is 304 * reserved and we can unlock the iova_rwsem. 305 */ 306 iova = *dst_iova; 307 list_for_each_entry(elm, pages_list, next) { 308 rc = iopt_insert_area(iopt, elm->area, elm->pages, iova, 309 elm->start_byte, elm->length, iommu_prot); 310 if (rc) 311 goto out_unlock; 312 iova += elm->length; 313 } 314 315 out_unlock: 316 up_write(&iopt->iova_rwsem); 317 return rc; 318 } 319 320 static void iopt_abort_area(struct iopt_area *area) 321 { 322 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 323 WARN_ON(area->pages); 324 if (area->iopt) { 325 down_write(&area->iopt->iova_rwsem); 326 interval_tree_remove(&area->node, &area->iopt->area_itree); 327 up_write(&area->iopt->iova_rwsem); 328 } 329 kfree(area); 330 } 331 332 void iopt_free_pages_list(struct list_head *pages_list) 333 { 334 struct iopt_pages_list *elm; 335 336 while ((elm = list_first_entry_or_null(pages_list, 337 struct iopt_pages_list, next))) { 338 if (elm->area) 339 iopt_abort_area(elm->area); 340 if (elm->pages) 341 iopt_put_pages(elm->pages); 342 list_del(&elm->next); 343 kfree(elm); 344 } 345 } 346 347 static int iopt_fill_domains_pages(struct list_head *pages_list) 348 { 349 struct iopt_pages_list *undo_elm; 350 struct iopt_pages_list *elm; 351 int rc; 352 353 list_for_each_entry(elm, pages_list, next) { 354 rc = iopt_area_fill_domains(elm->area, elm->pages); 355 if (rc) 356 goto err_undo; 357 } 358 return 0; 359 360 err_undo: 361 list_for_each_entry(undo_elm, pages_list, next) { 362 if (undo_elm == elm) 363 break; 364 iopt_area_unfill_domains(undo_elm->area, undo_elm->pages); 365 } 366 return rc; 367 } 368 369 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, 370 unsigned long length, unsigned long *dst_iova, 371 int iommu_prot, unsigned int flags) 372 { 373 struct iopt_pages_list *elm; 374 int rc; 375 376 rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova, 377 iommu_prot, flags); 378 if (rc) 379 return rc; 380 381 down_read(&iopt->domains_rwsem); 382 rc = iopt_fill_domains_pages(pages_list); 383 if (rc) 384 goto out_unlock_domains; 385 386 down_write(&iopt->iova_rwsem); 387 list_for_each_entry(elm, pages_list, next) { 388 /* 389 * area->pages must be set inside the domains_rwsem to ensure 390 * any newly added domains will get filled. Moves the reference 391 * in from the list. 392 */ 393 elm->area->pages = elm->pages; 394 elm->pages = NULL; 395 elm->area = NULL; 396 } 397 up_write(&iopt->iova_rwsem); 398 out_unlock_domains: 399 up_read(&iopt->domains_rwsem); 400 return rc; 401 } 402 403 static int iopt_map_common(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 404 struct iopt_pages *pages, unsigned long *iova, 405 unsigned long length, unsigned long start_byte, 406 int iommu_prot, unsigned int flags) 407 { 408 struct iopt_pages_list elm = {}; 409 LIST_HEAD(pages_list); 410 int rc; 411 412 elm.pages = pages; 413 elm.start_byte = start_byte; 414 if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM && 415 elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER) 416 elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM; 417 elm.length = length; 418 list_add(&elm.next, &pages_list); 419 420 rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); 421 if (rc) { 422 if (elm.area) 423 iopt_abort_area(elm.area); 424 if (elm.pages) 425 iopt_put_pages(elm.pages); 426 return rc; 427 } 428 return 0; 429 } 430 431 /** 432 * iopt_map_user_pages() - Map a user VA to an iova in the io page table 433 * @ictx: iommufd_ctx the iopt is part of 434 * @iopt: io_pagetable to act on 435 * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains 436 * the chosen iova on output. Otherwise is the iova to map to on input 437 * @uptr: User VA to map 438 * @length: Number of bytes to map 439 * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping 440 * @flags: IOPT_ALLOC_IOVA or zero 441 * 442 * iova, uptr, and length must be aligned to iova_alignment. For domain backed 443 * page tables this will pin the pages and load them into the domain at iova. 444 * For non-domain page tables this will only setup a lazy reference and the 445 * caller must use iopt_access_pages() to touch them. 446 * 447 * iopt_unmap_iova() must be called to undo this before the io_pagetable can be 448 * destroyed. 449 */ 450 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 451 unsigned long *iova, void __user *uptr, 452 unsigned long length, int iommu_prot, 453 unsigned int flags) 454 { 455 struct iopt_pages *pages; 456 457 pages = iopt_alloc_user_pages(uptr, length, iommu_prot & IOMMU_WRITE); 458 if (IS_ERR(pages)) 459 return PTR_ERR(pages); 460 461 return iopt_map_common(ictx, iopt, pages, iova, length, 462 uptr - pages->uptr, iommu_prot, flags); 463 } 464 465 /** 466 * iopt_map_file_pages() - Like iopt_map_user_pages, but map a file. 467 * @ictx: iommufd_ctx the iopt is part of 468 * @iopt: io_pagetable to act on 469 * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains 470 * the chosen iova on output. Otherwise is the iova to map to on input 471 * @file: file to map 472 * @start: map file starting at this byte offset 473 * @length: Number of bytes to map 474 * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping 475 * @flags: IOPT_ALLOC_IOVA or zero 476 */ 477 int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 478 unsigned long *iova, struct file *file, 479 unsigned long start, unsigned long length, 480 int iommu_prot, unsigned int flags) 481 { 482 struct iopt_pages *pages; 483 484 pages = iopt_alloc_file_pages(file, start, length, 485 iommu_prot & IOMMU_WRITE); 486 if (IS_ERR(pages)) 487 return PTR_ERR(pages); 488 return iopt_map_common(ictx, iopt, pages, iova, length, 489 start - pages->start, iommu_prot, flags); 490 } 491 492 struct iova_bitmap_fn_arg { 493 unsigned long flags; 494 struct io_pagetable *iopt; 495 struct iommu_domain *domain; 496 struct iommu_dirty_bitmap *dirty; 497 }; 498 499 static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap, 500 unsigned long iova, size_t length, 501 void *opaque) 502 { 503 struct iopt_area *area; 504 struct iopt_area_contig_iter iter; 505 struct iova_bitmap_fn_arg *arg = opaque; 506 struct iommu_domain *domain = arg->domain; 507 struct iommu_dirty_bitmap *dirty = arg->dirty; 508 const struct iommu_dirty_ops *ops = domain->dirty_ops; 509 unsigned long last_iova = iova + length - 1; 510 unsigned long flags = arg->flags; 511 int ret; 512 513 iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) { 514 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 515 516 ret = ops->read_and_clear_dirty(domain, iter.cur_iova, 517 last - iter.cur_iova + 1, flags, 518 dirty); 519 if (ret) 520 return ret; 521 } 522 523 if (!iopt_area_contig_done(&iter)) 524 return -EINVAL; 525 return 0; 526 } 527 528 static int 529 iommu_read_and_clear_dirty(struct iommu_domain *domain, 530 struct io_pagetable *iopt, unsigned long flags, 531 struct iommu_hwpt_get_dirty_bitmap *bitmap) 532 { 533 const struct iommu_dirty_ops *ops = domain->dirty_ops; 534 struct iommu_iotlb_gather gather; 535 struct iommu_dirty_bitmap dirty; 536 struct iova_bitmap_fn_arg arg; 537 struct iova_bitmap *iter; 538 int ret = 0; 539 540 if (!ops || !ops->read_and_clear_dirty) 541 return -EOPNOTSUPP; 542 543 iter = iova_bitmap_alloc(bitmap->iova, bitmap->length, 544 bitmap->page_size, 545 u64_to_user_ptr(bitmap->data)); 546 if (IS_ERR(iter)) 547 return -ENOMEM; 548 549 iommu_dirty_bitmap_init(&dirty, iter, &gather); 550 551 arg.flags = flags; 552 arg.iopt = iopt; 553 arg.domain = domain; 554 arg.dirty = &dirty; 555 iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty); 556 557 if (!(flags & IOMMU_DIRTY_NO_CLEAR)) 558 iommu_iotlb_sync(domain, &gather); 559 560 iova_bitmap_free(iter); 561 562 return ret; 563 } 564 565 int iommufd_check_iova_range(struct io_pagetable *iopt, 566 struct iommu_hwpt_get_dirty_bitmap *bitmap) 567 { 568 size_t iommu_pgsize = iopt->iova_alignment; 569 u64 last_iova; 570 571 if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) 572 return -EOVERFLOW; 573 574 if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) 575 return -EOVERFLOW; 576 577 if ((bitmap->iova & (iommu_pgsize - 1)) || 578 ((last_iova + 1) & (iommu_pgsize - 1))) 579 return -EINVAL; 580 581 if (!bitmap->page_size) 582 return -EINVAL; 583 584 if ((bitmap->iova & (bitmap->page_size - 1)) || 585 ((last_iova + 1) & (bitmap->page_size - 1))) 586 return -EINVAL; 587 588 return 0; 589 } 590 591 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, 592 struct iommu_domain *domain, 593 unsigned long flags, 594 struct iommu_hwpt_get_dirty_bitmap *bitmap) 595 { 596 int ret; 597 598 ret = iommufd_check_iova_range(iopt, bitmap); 599 if (ret) 600 return ret; 601 602 down_read(&iopt->iova_rwsem); 603 ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap); 604 up_read(&iopt->iova_rwsem); 605 606 return ret; 607 } 608 609 static int iopt_clear_dirty_data(struct io_pagetable *iopt, 610 struct iommu_domain *domain) 611 { 612 const struct iommu_dirty_ops *ops = domain->dirty_ops; 613 struct iommu_iotlb_gather gather; 614 struct iommu_dirty_bitmap dirty; 615 struct iopt_area *area; 616 int ret = 0; 617 618 lockdep_assert_held_read(&iopt->iova_rwsem); 619 620 iommu_dirty_bitmap_init(&dirty, NULL, &gather); 621 622 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 623 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 624 if (!area->pages) 625 continue; 626 627 ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area), 628 iopt_area_length(area), 0, 629 &dirty); 630 if (ret) 631 break; 632 } 633 634 iommu_iotlb_sync(domain, &gather); 635 return ret; 636 } 637 638 int iopt_set_dirty_tracking(struct io_pagetable *iopt, 639 struct iommu_domain *domain, bool enable) 640 { 641 const struct iommu_dirty_ops *ops = domain->dirty_ops; 642 int ret = 0; 643 644 if (!ops) 645 return -EOPNOTSUPP; 646 647 down_read(&iopt->iova_rwsem); 648 649 /* Clear dirty bits from PTEs to ensure a clean snapshot */ 650 if (enable) { 651 ret = iopt_clear_dirty_data(iopt, domain); 652 if (ret) 653 goto out_unlock; 654 } 655 656 ret = ops->set_dirty_tracking(domain, enable); 657 658 out_unlock: 659 up_read(&iopt->iova_rwsem); 660 return ret; 661 } 662 663 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, 664 unsigned long length, struct list_head *pages_list) 665 { 666 struct iopt_area_contig_iter iter; 667 unsigned long last_iova; 668 struct iopt_area *area; 669 int rc; 670 671 if (!length) 672 return -EINVAL; 673 if (check_add_overflow(iova, length - 1, &last_iova)) 674 return -EOVERFLOW; 675 676 down_read(&iopt->iova_rwsem); 677 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 678 struct iopt_pages_list *elm; 679 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 680 681 elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT); 682 if (!elm) { 683 rc = -ENOMEM; 684 goto err_free; 685 } 686 elm->start_byte = iopt_area_start_byte(area, iter.cur_iova); 687 elm->pages = area->pages; 688 elm->length = (last - iter.cur_iova) + 1; 689 kref_get(&elm->pages->kref); 690 list_add_tail(&elm->next, pages_list); 691 } 692 if (!iopt_area_contig_done(&iter)) { 693 rc = -ENOENT; 694 goto err_free; 695 } 696 up_read(&iopt->iova_rwsem); 697 return 0; 698 err_free: 699 up_read(&iopt->iova_rwsem); 700 iopt_free_pages_list(pages_list); 701 return rc; 702 } 703 704 static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, 705 unsigned long last, unsigned long *unmapped) 706 { 707 struct iopt_area *area; 708 unsigned long unmapped_bytes = 0; 709 unsigned int tries = 0; 710 int rc = -ENOENT; 711 712 /* 713 * The domains_rwsem must be held in read mode any time any area->pages 714 * is NULL. This prevents domain attach/detatch from running 715 * concurrently with cleaning up the area. 716 */ 717 again: 718 down_read(&iopt->domains_rwsem); 719 down_write(&iopt->iova_rwsem); 720 while ((area = iopt_area_iter_first(iopt, start, last))) { 721 unsigned long area_last = iopt_area_last_iova(area); 722 unsigned long area_first = iopt_area_iova(area); 723 struct iopt_pages *pages; 724 725 /* Userspace should not race map/unmap's of the same area */ 726 if (!area->pages) { 727 rc = -EBUSY; 728 goto out_unlock_iova; 729 } 730 731 /* The area is locked by an object that has not been destroyed */ 732 if (area->num_locks) { 733 rc = -EBUSY; 734 goto out_unlock_iova; 735 } 736 737 if (area_first < start || area_last > last) { 738 rc = -ENOENT; 739 goto out_unlock_iova; 740 } 741 742 if (area_first != start) 743 tries = 0; 744 745 /* 746 * num_accesses writers must hold the iova_rwsem too, so we can 747 * safely read it under the write side of the iovam_rwsem 748 * without the pages->mutex. 749 */ 750 if (area->num_accesses) { 751 size_t length = iopt_area_length(area); 752 753 start = area_first; 754 area->prevent_access = true; 755 up_write(&iopt->iova_rwsem); 756 up_read(&iopt->domains_rwsem); 757 758 iommufd_access_notify_unmap(iopt, area_first, length); 759 /* Something is not responding to unmap requests. */ 760 tries++; 761 if (WARN_ON(tries > 100)) { 762 rc = -EDEADLOCK; 763 goto out_unmapped; 764 } 765 goto again; 766 } 767 768 pages = area->pages; 769 area->pages = NULL; 770 up_write(&iopt->iova_rwsem); 771 772 iopt_area_unfill_domains(area, pages); 773 iopt_abort_area(area); 774 iopt_put_pages(pages); 775 776 unmapped_bytes += area_last - area_first + 1; 777 778 down_write(&iopt->iova_rwsem); 779 } 780 if (unmapped_bytes) 781 rc = 0; 782 783 out_unlock_iova: 784 up_write(&iopt->iova_rwsem); 785 up_read(&iopt->domains_rwsem); 786 out_unmapped: 787 if (unmapped) 788 *unmapped = unmapped_bytes; 789 return rc; 790 } 791 792 /** 793 * iopt_unmap_iova() - Remove a range of iova 794 * @iopt: io_pagetable to act on 795 * @iova: Starting iova to unmap 796 * @length: Number of bytes to unmap 797 * @unmapped: Return number of bytes unmapped 798 * 799 * The requested range must be a superset of existing ranges. 800 * Splitting/truncating IOVA mappings is not allowed. 801 */ 802 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, 803 unsigned long length, unsigned long *unmapped) 804 { 805 unsigned long iova_last; 806 807 if (!length) 808 return -EINVAL; 809 810 if (check_add_overflow(iova, length - 1, &iova_last)) 811 return -EOVERFLOW; 812 813 return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped); 814 } 815 816 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) 817 { 818 int rc; 819 820 rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); 821 /* If the IOVAs are empty then unmap all succeeds */ 822 if (rc == -ENOENT) 823 return 0; 824 return rc; 825 } 826 827 /* The caller must always free all the nodes in the allowed_iova rb_root. */ 828 int iopt_set_allow_iova(struct io_pagetable *iopt, 829 struct rb_root_cached *allowed_iova) 830 { 831 struct iopt_allowed *allowed; 832 833 down_write(&iopt->iova_rwsem); 834 swap(*allowed_iova, iopt->allowed_itree); 835 836 for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed; 837 allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) { 838 if (iopt_reserved_iter_first(iopt, allowed->node.start, 839 allowed->node.last)) { 840 swap(*allowed_iova, iopt->allowed_itree); 841 up_write(&iopt->iova_rwsem); 842 return -EADDRINUSE; 843 } 844 } 845 up_write(&iopt->iova_rwsem); 846 return 0; 847 } 848 849 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, 850 unsigned long last, void *owner) 851 { 852 struct iopt_reserved *reserved; 853 854 lockdep_assert_held_write(&iopt->iova_rwsem); 855 856 if (iopt_area_iter_first(iopt, start, last) || 857 iopt_allowed_iter_first(iopt, start, last)) 858 return -EADDRINUSE; 859 860 reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT); 861 if (!reserved) 862 return -ENOMEM; 863 reserved->node.start = start; 864 reserved->node.last = last; 865 reserved->owner = owner; 866 interval_tree_insert(&reserved->node, &iopt->reserved_itree); 867 return 0; 868 } 869 870 static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) 871 { 872 struct iopt_reserved *reserved, *next; 873 874 lockdep_assert_held_write(&iopt->iova_rwsem); 875 876 for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved; 877 reserved = next) { 878 next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX); 879 880 if (reserved->owner == owner) { 881 interval_tree_remove(&reserved->node, 882 &iopt->reserved_itree); 883 kfree(reserved); 884 } 885 } 886 } 887 888 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) 889 { 890 down_write(&iopt->iova_rwsem); 891 __iopt_remove_reserved_iova(iopt, owner); 892 up_write(&iopt->iova_rwsem); 893 } 894 895 void iopt_init_table(struct io_pagetable *iopt) 896 { 897 init_rwsem(&iopt->iova_rwsem); 898 init_rwsem(&iopt->domains_rwsem); 899 iopt->area_itree = RB_ROOT_CACHED; 900 iopt->allowed_itree = RB_ROOT_CACHED; 901 iopt->reserved_itree = RB_ROOT_CACHED; 902 xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT); 903 xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC); 904 905 /* 906 * iopt's start as SW tables that can use the entire size_t IOVA space 907 * due to the use of size_t in the APIs. They have no alignment 908 * restriction. 909 */ 910 iopt->iova_alignment = 1; 911 } 912 913 void iopt_destroy_table(struct io_pagetable *iopt) 914 { 915 struct interval_tree_node *node; 916 917 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 918 iopt_remove_reserved_iova(iopt, NULL); 919 920 while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, 921 ULONG_MAX))) { 922 interval_tree_remove(node, &iopt->allowed_itree); 923 kfree(container_of(node, struct iopt_allowed, node)); 924 } 925 926 WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root)); 927 WARN_ON(!xa_empty(&iopt->domains)); 928 WARN_ON(!xa_empty(&iopt->access_list)); 929 WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)); 930 } 931 932 /** 933 * iopt_unfill_domain() - Unfill a domain with PFNs 934 * @iopt: io_pagetable to act on 935 * @domain: domain to unfill 936 * 937 * This is used when removing a domain from the iopt. Every area in the iopt 938 * will be unmapped from the domain. The domain must already be removed from the 939 * domains xarray. 940 */ 941 static void iopt_unfill_domain(struct io_pagetable *iopt, 942 struct iommu_domain *domain) 943 { 944 struct iopt_area *area; 945 946 lockdep_assert_held(&iopt->iova_rwsem); 947 lockdep_assert_held_write(&iopt->domains_rwsem); 948 949 /* 950 * Some other domain is holding all the pfns still, rapidly unmap this 951 * domain. 952 */ 953 if (iopt->next_domain_id != 0) { 954 /* Pick an arbitrary remaining domain to act as storage */ 955 struct iommu_domain *storage_domain = 956 xa_load(&iopt->domains, 0); 957 958 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 959 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 960 struct iopt_pages *pages = area->pages; 961 962 if (!pages) 963 continue; 964 965 mutex_lock(&pages->mutex); 966 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 967 WARN_ON(!area->storage_domain); 968 if (area->storage_domain == domain) 969 area->storage_domain = storage_domain; 970 mutex_unlock(&pages->mutex); 971 972 iopt_area_unmap_domain(area, domain); 973 } 974 return; 975 } 976 977 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 978 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 979 struct iopt_pages *pages = area->pages; 980 981 if (!pages) 982 continue; 983 984 mutex_lock(&pages->mutex); 985 interval_tree_remove(&area->pages_node, &pages->domains_itree); 986 WARN_ON(area->storage_domain != domain); 987 area->storage_domain = NULL; 988 iopt_area_unfill_domain(area, pages, domain); 989 mutex_unlock(&pages->mutex); 990 } 991 } 992 993 /** 994 * iopt_fill_domain() - Fill a domain with PFNs 995 * @iopt: io_pagetable to act on 996 * @domain: domain to fill 997 * 998 * Fill the domain with PFNs from every area in the iopt. On failure the domain 999 * is left unchanged. 1000 */ 1001 static int iopt_fill_domain(struct io_pagetable *iopt, 1002 struct iommu_domain *domain) 1003 { 1004 struct iopt_area *end_area; 1005 struct iopt_area *area; 1006 int rc; 1007 1008 lockdep_assert_held(&iopt->iova_rwsem); 1009 lockdep_assert_held_write(&iopt->domains_rwsem); 1010 1011 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 1012 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 1013 struct iopt_pages *pages = area->pages; 1014 1015 if (!pages) 1016 continue; 1017 1018 mutex_lock(&pages->mutex); 1019 rc = iopt_area_fill_domain(area, domain); 1020 if (rc) { 1021 mutex_unlock(&pages->mutex); 1022 goto out_unfill; 1023 } 1024 if (!area->storage_domain) { 1025 WARN_ON(iopt->next_domain_id != 0); 1026 area->storage_domain = domain; 1027 interval_tree_insert(&area->pages_node, 1028 &pages->domains_itree); 1029 } 1030 mutex_unlock(&pages->mutex); 1031 } 1032 return 0; 1033 1034 out_unfill: 1035 end_area = area; 1036 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 1037 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 1038 struct iopt_pages *pages = area->pages; 1039 1040 if (area == end_area) 1041 break; 1042 if (!pages) 1043 continue; 1044 mutex_lock(&pages->mutex); 1045 if (iopt->next_domain_id == 0) { 1046 interval_tree_remove(&area->pages_node, 1047 &pages->domains_itree); 1048 area->storage_domain = NULL; 1049 } 1050 iopt_area_unfill_domain(area, pages, domain); 1051 mutex_unlock(&pages->mutex); 1052 } 1053 return rc; 1054 } 1055 1056 /* All existing area's conform to an increased page size */ 1057 static int iopt_check_iova_alignment(struct io_pagetable *iopt, 1058 unsigned long new_iova_alignment) 1059 { 1060 unsigned long align_mask = new_iova_alignment - 1; 1061 struct iopt_area *area; 1062 1063 lockdep_assert_held(&iopt->iova_rwsem); 1064 lockdep_assert_held(&iopt->domains_rwsem); 1065 1066 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 1067 area = iopt_area_iter_next(area, 0, ULONG_MAX)) 1068 if ((iopt_area_iova(area) & align_mask) || 1069 (iopt_area_length(area) & align_mask) || 1070 (area->page_offset & align_mask)) 1071 return -EADDRINUSE; 1072 1073 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) { 1074 struct iommufd_access *access; 1075 unsigned long index; 1076 1077 xa_for_each(&iopt->access_list, index, access) 1078 if (WARN_ON(access->iova_alignment > 1079 new_iova_alignment)) 1080 return -EADDRINUSE; 1081 } 1082 return 0; 1083 } 1084 1085 int iopt_table_add_domain(struct io_pagetable *iopt, 1086 struct iommu_domain *domain) 1087 { 1088 const struct iommu_domain_geometry *geometry = &domain->geometry; 1089 struct iommu_domain *iter_domain; 1090 unsigned int new_iova_alignment; 1091 unsigned long index; 1092 int rc; 1093 1094 down_write(&iopt->domains_rwsem); 1095 down_write(&iopt->iova_rwsem); 1096 1097 xa_for_each(&iopt->domains, index, iter_domain) { 1098 if (WARN_ON(iter_domain == domain)) { 1099 rc = -EEXIST; 1100 goto out_unlock; 1101 } 1102 } 1103 1104 /* 1105 * The io page size drives the iova_alignment. Internally the iopt_pages 1106 * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE 1107 * objects into the iommu_domain. 1108 * 1109 * A iommu_domain must always be able to accept PAGE_SIZE to be 1110 * compatible as we can't guarantee higher contiguity. 1111 */ 1112 new_iova_alignment = max_t(unsigned long, 1113 1UL << __ffs(domain->pgsize_bitmap), 1114 iopt->iova_alignment); 1115 if (new_iova_alignment > PAGE_SIZE) { 1116 rc = -EINVAL; 1117 goto out_unlock; 1118 } 1119 if (new_iova_alignment != iopt->iova_alignment) { 1120 rc = iopt_check_iova_alignment(iopt, new_iova_alignment); 1121 if (rc) 1122 goto out_unlock; 1123 } 1124 1125 /* No area exists that is outside the allowed domain aperture */ 1126 if (geometry->aperture_start != 0) { 1127 rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1, 1128 domain); 1129 if (rc) 1130 goto out_reserved; 1131 } 1132 if (geometry->aperture_end != ULONG_MAX) { 1133 rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1, 1134 ULONG_MAX, domain); 1135 if (rc) 1136 goto out_reserved; 1137 } 1138 1139 rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); 1140 if (rc) 1141 goto out_reserved; 1142 1143 rc = iopt_fill_domain(iopt, domain); 1144 if (rc) 1145 goto out_release; 1146 1147 iopt->iova_alignment = new_iova_alignment; 1148 xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL); 1149 iopt->next_domain_id++; 1150 up_write(&iopt->iova_rwsem); 1151 up_write(&iopt->domains_rwsem); 1152 return 0; 1153 out_release: 1154 xa_release(&iopt->domains, iopt->next_domain_id); 1155 out_reserved: 1156 __iopt_remove_reserved_iova(iopt, domain); 1157 out_unlock: 1158 up_write(&iopt->iova_rwsem); 1159 up_write(&iopt->domains_rwsem); 1160 return rc; 1161 } 1162 1163 static int iopt_calculate_iova_alignment(struct io_pagetable *iopt) 1164 { 1165 unsigned long new_iova_alignment; 1166 struct iommufd_access *access; 1167 struct iommu_domain *domain; 1168 unsigned long index; 1169 1170 lockdep_assert_held_write(&iopt->iova_rwsem); 1171 lockdep_assert_held(&iopt->domains_rwsem); 1172 1173 /* See batch_iommu_map_small() */ 1174 if (iopt->disable_large_pages) 1175 new_iova_alignment = PAGE_SIZE; 1176 else 1177 new_iova_alignment = 1; 1178 1179 xa_for_each(&iopt->domains, index, domain) 1180 new_iova_alignment = max_t(unsigned long, 1181 1UL << __ffs(domain->pgsize_bitmap), 1182 new_iova_alignment); 1183 xa_for_each(&iopt->access_list, index, access) 1184 new_iova_alignment = max_t(unsigned long, 1185 access->iova_alignment, 1186 new_iova_alignment); 1187 1188 if (new_iova_alignment > iopt->iova_alignment) { 1189 int rc; 1190 1191 rc = iopt_check_iova_alignment(iopt, new_iova_alignment); 1192 if (rc) 1193 return rc; 1194 } 1195 iopt->iova_alignment = new_iova_alignment; 1196 return 0; 1197 } 1198 1199 void iopt_table_remove_domain(struct io_pagetable *iopt, 1200 struct iommu_domain *domain) 1201 { 1202 struct iommu_domain *iter_domain = NULL; 1203 unsigned long index; 1204 1205 down_write(&iopt->domains_rwsem); 1206 down_write(&iopt->iova_rwsem); 1207 1208 xa_for_each(&iopt->domains, index, iter_domain) 1209 if (iter_domain == domain) 1210 break; 1211 if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) 1212 goto out_unlock; 1213 1214 /* 1215 * Compress the xarray to keep it linear by swapping the entry to erase 1216 * with the tail entry and shrinking the tail. 1217 */ 1218 iopt->next_domain_id--; 1219 iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); 1220 if (index != iopt->next_domain_id) 1221 xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL); 1222 1223 iopt_unfill_domain(iopt, domain); 1224 __iopt_remove_reserved_iova(iopt, domain); 1225 1226 WARN_ON(iopt_calculate_iova_alignment(iopt)); 1227 out_unlock: 1228 up_write(&iopt->iova_rwsem); 1229 up_write(&iopt->domains_rwsem); 1230 } 1231 1232 /** 1233 * iopt_area_split - Split an area into two parts at iova 1234 * @area: The area to split 1235 * @iova: Becomes the last of a new area 1236 * 1237 * This splits an area into two. It is part of the VFIO compatibility to allow 1238 * poking a hole in the mapping. The two areas continue to point at the same 1239 * iopt_pages, just with different starting bytes. 1240 */ 1241 static int iopt_area_split(struct iopt_area *area, unsigned long iova) 1242 { 1243 unsigned long alignment = area->iopt->iova_alignment; 1244 unsigned long last_iova = iopt_area_last_iova(area); 1245 unsigned long start_iova = iopt_area_iova(area); 1246 unsigned long new_start = iova + 1; 1247 struct io_pagetable *iopt = area->iopt; 1248 struct iopt_pages *pages = area->pages; 1249 struct iopt_area *lhs; 1250 struct iopt_area *rhs; 1251 int rc; 1252 1253 lockdep_assert_held_write(&iopt->iova_rwsem); 1254 1255 if (iova == start_iova || iova == last_iova) 1256 return 0; 1257 1258 if (!pages || area->prevent_access) 1259 return -EBUSY; 1260 1261 if (new_start & (alignment - 1) || 1262 iopt_area_start_byte(area, new_start) & (alignment - 1)) 1263 return -EINVAL; 1264 1265 lhs = iopt_area_alloc(); 1266 if (!lhs) 1267 return -ENOMEM; 1268 1269 rhs = iopt_area_alloc(); 1270 if (!rhs) { 1271 rc = -ENOMEM; 1272 goto err_free_lhs; 1273 } 1274 1275 mutex_lock(&pages->mutex); 1276 /* 1277 * Splitting is not permitted if an access exists, we don't track enough 1278 * information to split existing accesses. 1279 */ 1280 if (area->num_accesses) { 1281 rc = -EINVAL; 1282 goto err_unlock; 1283 } 1284 1285 /* 1286 * Splitting is not permitted if a domain could have been mapped with 1287 * huge pages. 1288 */ 1289 if (area->storage_domain && !iopt->disable_large_pages) { 1290 rc = -EINVAL; 1291 goto err_unlock; 1292 } 1293 1294 interval_tree_remove(&area->node, &iopt->area_itree); 1295 rc = iopt_insert_area(iopt, lhs, area->pages, start_iova, 1296 iopt_area_start_byte(area, start_iova), 1297 (new_start - 1) - start_iova + 1, 1298 area->iommu_prot); 1299 if (WARN_ON(rc)) 1300 goto err_insert; 1301 1302 rc = iopt_insert_area(iopt, rhs, area->pages, new_start, 1303 iopt_area_start_byte(area, new_start), 1304 last_iova - new_start + 1, area->iommu_prot); 1305 if (WARN_ON(rc)) 1306 goto err_remove_lhs; 1307 1308 /* 1309 * If the original area has filled a domain, domains_itree has to be 1310 * updated. 1311 */ 1312 if (area->storage_domain) { 1313 interval_tree_remove(&area->pages_node, &pages->domains_itree); 1314 interval_tree_insert(&lhs->pages_node, &pages->domains_itree); 1315 interval_tree_insert(&rhs->pages_node, &pages->domains_itree); 1316 } 1317 1318 lhs->storage_domain = area->storage_domain; 1319 lhs->pages = area->pages; 1320 rhs->storage_domain = area->storage_domain; 1321 rhs->pages = area->pages; 1322 kref_get(&rhs->pages->kref); 1323 kfree(area); 1324 mutex_unlock(&pages->mutex); 1325 1326 /* 1327 * No change to domains or accesses because the pages hasn't been 1328 * changed 1329 */ 1330 return 0; 1331 1332 err_remove_lhs: 1333 interval_tree_remove(&lhs->node, &iopt->area_itree); 1334 err_insert: 1335 interval_tree_insert(&area->node, &iopt->area_itree); 1336 err_unlock: 1337 mutex_unlock(&pages->mutex); 1338 kfree(rhs); 1339 err_free_lhs: 1340 kfree(lhs); 1341 return rc; 1342 } 1343 1344 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, 1345 size_t num_iovas) 1346 { 1347 int rc = 0; 1348 int i; 1349 1350 down_write(&iopt->iova_rwsem); 1351 for (i = 0; i < num_iovas; i++) { 1352 struct iopt_area *area; 1353 1354 area = iopt_area_iter_first(iopt, iovas[i], iovas[i]); 1355 if (!area) 1356 continue; 1357 rc = iopt_area_split(area, iovas[i]); 1358 if (rc) 1359 break; 1360 } 1361 up_write(&iopt->iova_rwsem); 1362 return rc; 1363 } 1364 1365 void iopt_enable_large_pages(struct io_pagetable *iopt) 1366 { 1367 int rc; 1368 1369 down_write(&iopt->domains_rwsem); 1370 down_write(&iopt->iova_rwsem); 1371 WRITE_ONCE(iopt->disable_large_pages, false); 1372 rc = iopt_calculate_iova_alignment(iopt); 1373 WARN_ON(rc); 1374 up_write(&iopt->iova_rwsem); 1375 up_write(&iopt->domains_rwsem); 1376 } 1377 1378 int iopt_disable_large_pages(struct io_pagetable *iopt) 1379 { 1380 int rc = 0; 1381 1382 down_write(&iopt->domains_rwsem); 1383 down_write(&iopt->iova_rwsem); 1384 if (iopt->disable_large_pages) 1385 goto out_unlock; 1386 1387 /* Won't do it if domains already have pages mapped in them */ 1388 if (!xa_empty(&iopt->domains) && 1389 !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) { 1390 rc = -EINVAL; 1391 goto out_unlock; 1392 } 1393 1394 WRITE_ONCE(iopt->disable_large_pages, true); 1395 rc = iopt_calculate_iova_alignment(iopt); 1396 if (rc) 1397 WRITE_ONCE(iopt->disable_large_pages, false); 1398 out_unlock: 1399 up_write(&iopt->iova_rwsem); 1400 up_write(&iopt->domains_rwsem); 1401 return rc; 1402 } 1403 1404 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) 1405 { 1406 u32 new_id; 1407 int rc; 1408 1409 down_write(&iopt->domains_rwsem); 1410 down_write(&iopt->iova_rwsem); 1411 rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b, 1412 GFP_KERNEL_ACCOUNT); 1413 1414 if (rc) 1415 goto out_unlock; 1416 1417 rc = iopt_calculate_iova_alignment(iopt); 1418 if (rc) { 1419 xa_erase(&iopt->access_list, new_id); 1420 goto out_unlock; 1421 } 1422 access->iopt_access_list_id = new_id; 1423 1424 out_unlock: 1425 up_write(&iopt->iova_rwsem); 1426 up_write(&iopt->domains_rwsem); 1427 return rc; 1428 } 1429 1430 void iopt_remove_access(struct io_pagetable *iopt, 1431 struct iommufd_access *access, u32 iopt_access_list_id) 1432 { 1433 down_write(&iopt->domains_rwsem); 1434 down_write(&iopt->iova_rwsem); 1435 WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access); 1436 WARN_ON(iopt_calculate_iova_alignment(iopt)); 1437 up_write(&iopt->iova_rwsem); 1438 up_write(&iopt->domains_rwsem); 1439 } 1440 1441 /* Narrow the valid_iova_itree to include reserved ranges from a device. */ 1442 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, 1443 struct device *dev, 1444 phys_addr_t *sw_msi_start) 1445 { 1446 struct iommu_resv_region *resv; 1447 LIST_HEAD(resv_regions); 1448 unsigned int num_hw_msi = 0; 1449 unsigned int num_sw_msi = 0; 1450 int rc; 1451 1452 if (iommufd_should_fail()) 1453 return -EINVAL; 1454 1455 down_write(&iopt->iova_rwsem); 1456 /* FIXME: drivers allocate memory but there is no failure propogated */ 1457 iommu_get_resv_regions(dev, &resv_regions); 1458 1459 list_for_each_entry(resv, &resv_regions, list) { 1460 if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) 1461 continue; 1462 1463 if (sw_msi_start && resv->type == IOMMU_RESV_MSI) 1464 num_hw_msi++; 1465 if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) { 1466 *sw_msi_start = resv->start; 1467 num_sw_msi++; 1468 } 1469 1470 rc = iopt_reserve_iova(iopt, resv->start, 1471 resv->length - 1 + resv->start, dev); 1472 if (rc) 1473 goto out_reserved; 1474 } 1475 1476 /* Drivers must offer sane combinations of regions */ 1477 if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) { 1478 rc = -EINVAL; 1479 goto out_reserved; 1480 } 1481 1482 rc = 0; 1483 goto out_free_resv; 1484 1485 out_reserved: 1486 __iopt_remove_reserved_iova(iopt, dev); 1487 out_free_resv: 1488 iommu_put_resv_regions(dev, &resv_regions); 1489 up_write(&iopt->iova_rwsem); 1490 return rc; 1491 } 1492