1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. 3 * 4 * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The 5 * PFNs can be placed into an iommu_domain, or returned to the caller as a page 6 * list for access by an in-kernel user. 7 * 8 * The datastructure uses the iopt_pages to optimize the storage of the PFNs 9 * between the domains and xarray. 10 */ 11 #include <linux/err.h> 12 #include <linux/errno.h> 13 #include <linux/iommu.h> 14 #include <linux/iommufd.h> 15 #include <linux/lockdep.h> 16 #include <linux/sched/mm.h> 17 #include <linux/slab.h> 18 #include <uapi/linux/iommufd.h> 19 20 #include "double_span.h" 21 #include "io_pagetable.h" 22 23 struct iopt_pages_list { 24 struct iopt_pages *pages; 25 struct iopt_area *area; 26 struct list_head next; 27 unsigned long start_byte; 28 unsigned long length; 29 }; 30 31 struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, 32 struct io_pagetable *iopt, 33 unsigned long iova, 34 unsigned long last_iova) 35 { 36 lockdep_assert_held(&iopt->iova_rwsem); 37 38 iter->cur_iova = iova; 39 iter->last_iova = last_iova; 40 iter->area = iopt_area_iter_first(iopt, iova, iova); 41 if (!iter->area) 42 return NULL; 43 if (!iter->area->pages) { 44 iter->area = NULL; 45 return NULL; 46 } 47 return iter->area; 48 } 49 50 struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) 51 { 52 unsigned long last_iova; 53 54 if (!iter->area) 55 return NULL; 56 last_iova = iopt_area_last_iova(iter->area); 57 if (iter->last_iova <= last_iova) 58 return NULL; 59 60 iter->cur_iova = last_iova + 1; 61 iter->area = iopt_area_iter_next(iter->area, iter->cur_iova, 62 iter->last_iova); 63 if (!iter->area) 64 return NULL; 65 if (iter->cur_iova != iopt_area_iova(iter->area) || 66 !iter->area->pages) { 67 iter->area = NULL; 68 return NULL; 69 } 70 return iter->area; 71 } 72 73 static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, 74 unsigned long length, 75 unsigned long iova_alignment, 76 unsigned long page_offset) 77 { 78 if (span->is_used || span->last_hole - span->start_hole < length - 1) 79 return false; 80 81 span->start_hole = ALIGN(span->start_hole, iova_alignment) | 82 page_offset; 83 if (span->start_hole > span->last_hole || 84 span->last_hole - span->start_hole < length - 1) 85 return false; 86 return true; 87 } 88 89 static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, 90 unsigned long length, 91 unsigned long iova_alignment, 92 unsigned long page_offset) 93 { 94 if (span->is_hole || span->last_used - span->start_used < length - 1) 95 return false; 96 97 span->start_used = ALIGN(span->start_used, iova_alignment) | 98 page_offset; 99 if (span->start_used > span->last_used || 100 span->last_used - span->start_used < length - 1) 101 return false; 102 return true; 103 } 104 105 /* 106 * Automatically find a block of IOVA that is not being used and not reserved. 107 * Does not return a 0 IOVA even if it is valid. 108 */ 109 static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, 110 unsigned long addr, unsigned long length) 111 { 112 unsigned long page_offset = addr % PAGE_SIZE; 113 struct interval_tree_double_span_iter used_span; 114 struct interval_tree_span_iter allowed_span; 115 unsigned long max_alignment = PAGE_SIZE; 116 unsigned long iova_alignment; 117 118 lockdep_assert_held(&iopt->iova_rwsem); 119 120 /* Protect roundup_pow-of_two() from overflow */ 121 if (length == 0 || length >= ULONG_MAX / 2) 122 return -EOVERFLOW; 123 124 /* 125 * Keep alignment present in addr when building the IOVA, which 126 * increases the chance we can map a THP. 127 */ 128 if (!addr) 129 iova_alignment = roundup_pow_of_two(length); 130 else 131 iova_alignment = min_t(unsigned long, 132 roundup_pow_of_two(length), 133 1UL << __ffs64(addr)); 134 135 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 136 max_alignment = HPAGE_SIZE; 137 #endif 138 /* Protect against ALIGN() overflow */ 139 if (iova_alignment >= max_alignment) 140 iova_alignment = max_alignment; 141 142 if (iova_alignment < iopt->iova_alignment) 143 return -EINVAL; 144 145 interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree, 146 PAGE_SIZE, ULONG_MAX - PAGE_SIZE) { 147 if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) { 148 allowed_span.start_used = PAGE_SIZE; 149 allowed_span.last_used = ULONG_MAX - PAGE_SIZE; 150 allowed_span.is_hole = false; 151 } 152 153 if (!__alloc_iova_check_used(&allowed_span, length, 154 iova_alignment, page_offset)) 155 continue; 156 157 interval_tree_for_each_double_span( 158 &used_span, &iopt->reserved_itree, &iopt->area_itree, 159 allowed_span.start_used, allowed_span.last_used) { 160 if (!__alloc_iova_check_hole(&used_span, length, 161 iova_alignment, 162 page_offset)) 163 continue; 164 165 *iova = used_span.start_hole; 166 return 0; 167 } 168 } 169 return -ENOSPC; 170 } 171 172 static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova, 173 unsigned long length) 174 { 175 unsigned long last; 176 177 lockdep_assert_held(&iopt->iova_rwsem); 178 179 if ((iova & (iopt->iova_alignment - 1))) 180 return -EINVAL; 181 182 if (check_add_overflow(iova, length - 1, &last)) 183 return -EOVERFLOW; 184 185 /* No reserved IOVA intersects the range */ 186 if (iopt_reserved_iter_first(iopt, iova, last)) 187 return -EINVAL; 188 189 /* Check that there is not already a mapping in the range */ 190 if (iopt_area_iter_first(iopt, iova, last)) 191 return -EEXIST; 192 return 0; 193 } 194 195 /* 196 * The area takes a slice of the pages from start_bytes to start_byte + length 197 */ 198 static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, 199 struct iopt_pages *pages, unsigned long iova, 200 unsigned long start_byte, unsigned long length, 201 int iommu_prot) 202 { 203 lockdep_assert_held_write(&iopt->iova_rwsem); 204 205 if ((iommu_prot & IOMMU_WRITE) && !pages->writable) 206 return -EPERM; 207 208 area->iommu_prot = iommu_prot; 209 area->page_offset = start_byte % PAGE_SIZE; 210 if (area->page_offset & (iopt->iova_alignment - 1)) 211 return -EINVAL; 212 213 area->node.start = iova; 214 if (check_add_overflow(iova, length - 1, &area->node.last)) 215 return -EOVERFLOW; 216 217 area->pages_node.start = start_byte / PAGE_SIZE; 218 if (check_add_overflow(start_byte, length - 1, &area->pages_node.last)) 219 return -EOVERFLOW; 220 area->pages_node.last = area->pages_node.last / PAGE_SIZE; 221 if (WARN_ON(area->pages_node.last >= pages->npages)) 222 return -EOVERFLOW; 223 224 /* 225 * The area is inserted with a NULL pages indicating it is not fully 226 * initialized yet. 227 */ 228 area->iopt = iopt; 229 interval_tree_insert(&area->node, &iopt->area_itree); 230 return 0; 231 } 232 233 static struct iopt_area *iopt_area_alloc(void) 234 { 235 struct iopt_area *area; 236 237 area = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); 238 if (!area) 239 return NULL; 240 RB_CLEAR_NODE(&area->node.rb); 241 RB_CLEAR_NODE(&area->pages_node.rb); 242 return area; 243 } 244 245 static int iopt_alloc_area_pages(struct io_pagetable *iopt, 246 struct list_head *pages_list, 247 unsigned long length, unsigned long *dst_iova, 248 int iommu_prot, unsigned int flags) 249 { 250 struct iopt_pages_list *elm; 251 unsigned long start; 252 unsigned long iova; 253 int rc = 0; 254 255 list_for_each_entry(elm, pages_list, next) { 256 elm->area = iopt_area_alloc(); 257 if (!elm->area) 258 return -ENOMEM; 259 } 260 261 down_write(&iopt->iova_rwsem); 262 if ((length & (iopt->iova_alignment - 1)) || !length) { 263 rc = -EINVAL; 264 goto out_unlock; 265 } 266 267 if (flags & IOPT_ALLOC_IOVA) { 268 /* Use the first entry to guess the ideal IOVA alignment */ 269 elm = list_first_entry(pages_list, struct iopt_pages_list, 270 next); 271 switch (elm->pages->type) { 272 case IOPT_ADDRESS_USER: 273 start = elm->start_byte + (uintptr_t)elm->pages->uptr; 274 break; 275 case IOPT_ADDRESS_FILE: 276 start = elm->start_byte + elm->pages->start; 277 break; 278 } 279 rc = iopt_alloc_iova(iopt, dst_iova, start, length); 280 if (rc) 281 goto out_unlock; 282 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 283 WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) { 284 rc = -EINVAL; 285 goto out_unlock; 286 } 287 } else { 288 rc = iopt_check_iova(iopt, *dst_iova, length); 289 if (rc) 290 goto out_unlock; 291 } 292 293 /* 294 * Areas are created with a NULL pages so that the IOVA space is 295 * reserved and we can unlock the iova_rwsem. 296 */ 297 iova = *dst_iova; 298 list_for_each_entry(elm, pages_list, next) { 299 rc = iopt_insert_area(iopt, elm->area, elm->pages, iova, 300 elm->start_byte, elm->length, iommu_prot); 301 if (rc) 302 goto out_unlock; 303 iova += elm->length; 304 } 305 306 out_unlock: 307 up_write(&iopt->iova_rwsem); 308 return rc; 309 } 310 311 static void iopt_abort_area(struct iopt_area *area) 312 { 313 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 314 WARN_ON(area->pages); 315 if (area->iopt) { 316 down_write(&area->iopt->iova_rwsem); 317 interval_tree_remove(&area->node, &area->iopt->area_itree); 318 up_write(&area->iopt->iova_rwsem); 319 } 320 kfree(area); 321 } 322 323 void iopt_free_pages_list(struct list_head *pages_list) 324 { 325 struct iopt_pages_list *elm; 326 327 while ((elm = list_first_entry_or_null(pages_list, 328 struct iopt_pages_list, next))) { 329 if (elm->area) 330 iopt_abort_area(elm->area); 331 if (elm->pages) 332 iopt_put_pages(elm->pages); 333 list_del(&elm->next); 334 kfree(elm); 335 } 336 } 337 338 static int iopt_fill_domains_pages(struct list_head *pages_list) 339 { 340 struct iopt_pages_list *undo_elm; 341 struct iopt_pages_list *elm; 342 int rc; 343 344 list_for_each_entry(elm, pages_list, next) { 345 rc = iopt_area_fill_domains(elm->area, elm->pages); 346 if (rc) 347 goto err_undo; 348 } 349 return 0; 350 351 err_undo: 352 list_for_each_entry(undo_elm, pages_list, next) { 353 if (undo_elm == elm) 354 break; 355 iopt_area_unfill_domains(undo_elm->area, undo_elm->pages); 356 } 357 return rc; 358 } 359 360 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, 361 unsigned long length, unsigned long *dst_iova, 362 int iommu_prot, unsigned int flags) 363 { 364 struct iopt_pages_list *elm; 365 int rc; 366 367 rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova, 368 iommu_prot, flags); 369 if (rc) 370 return rc; 371 372 down_read(&iopt->domains_rwsem); 373 rc = iopt_fill_domains_pages(pages_list); 374 if (rc) 375 goto out_unlock_domains; 376 377 down_write(&iopt->iova_rwsem); 378 list_for_each_entry(elm, pages_list, next) { 379 /* 380 * area->pages must be set inside the domains_rwsem to ensure 381 * any newly added domains will get filled. Moves the reference 382 * in from the list. 383 */ 384 elm->area->pages = elm->pages; 385 elm->pages = NULL; 386 elm->area = NULL; 387 } 388 up_write(&iopt->iova_rwsem); 389 out_unlock_domains: 390 up_read(&iopt->domains_rwsem); 391 return rc; 392 } 393 394 static int iopt_map_common(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 395 struct iopt_pages *pages, unsigned long *iova, 396 unsigned long length, unsigned long start_byte, 397 int iommu_prot, unsigned int flags) 398 { 399 struct iopt_pages_list elm = {}; 400 LIST_HEAD(pages_list); 401 int rc; 402 403 elm.pages = pages; 404 elm.start_byte = start_byte; 405 if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM && 406 elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER) 407 elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM; 408 elm.length = length; 409 list_add(&elm.next, &pages_list); 410 411 rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); 412 if (rc) { 413 if (elm.area) 414 iopt_abort_area(elm.area); 415 if (elm.pages) 416 iopt_put_pages(elm.pages); 417 return rc; 418 } 419 return 0; 420 } 421 422 /** 423 * iopt_map_user_pages() - Map a user VA to an iova in the io page table 424 * @ictx: iommufd_ctx the iopt is part of 425 * @iopt: io_pagetable to act on 426 * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains 427 * the chosen iova on output. Otherwise is the iova to map to on input 428 * @uptr: User VA to map 429 * @length: Number of bytes to map 430 * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping 431 * @flags: IOPT_ALLOC_IOVA or zero 432 * 433 * iova, uptr, and length must be aligned to iova_alignment. For domain backed 434 * page tables this will pin the pages and load them into the domain at iova. 435 * For non-domain page tables this will only setup a lazy reference and the 436 * caller must use iopt_access_pages() to touch them. 437 * 438 * iopt_unmap_iova() must be called to undo this before the io_pagetable can be 439 * destroyed. 440 */ 441 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 442 unsigned long *iova, void __user *uptr, 443 unsigned long length, int iommu_prot, 444 unsigned int flags) 445 { 446 struct iopt_pages *pages; 447 448 pages = iopt_alloc_user_pages(uptr, length, iommu_prot & IOMMU_WRITE); 449 if (IS_ERR(pages)) 450 return PTR_ERR(pages); 451 452 return iopt_map_common(ictx, iopt, pages, iova, length, 453 uptr - pages->uptr, iommu_prot, flags); 454 } 455 456 /** 457 * iopt_map_file_pages() - Like iopt_map_user_pages, but map a file. 458 * @ictx: iommufd_ctx the iopt is part of 459 * @iopt: io_pagetable to act on 460 * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains 461 * the chosen iova on output. Otherwise is the iova to map to on input 462 * @file: file to map 463 * @start: map file starting at this byte offset 464 * @length: Number of bytes to map 465 * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping 466 * @flags: IOPT_ALLOC_IOVA or zero 467 */ 468 int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 469 unsigned long *iova, struct file *file, 470 unsigned long start, unsigned long length, 471 int iommu_prot, unsigned int flags) 472 { 473 struct iopt_pages *pages; 474 475 pages = iopt_alloc_file_pages(file, start, length, 476 iommu_prot & IOMMU_WRITE); 477 if (IS_ERR(pages)) 478 return PTR_ERR(pages); 479 return iopt_map_common(ictx, iopt, pages, iova, length, 480 start - pages->start, iommu_prot, flags); 481 } 482 483 struct iova_bitmap_fn_arg { 484 unsigned long flags; 485 struct io_pagetable *iopt; 486 struct iommu_domain *domain; 487 struct iommu_dirty_bitmap *dirty; 488 }; 489 490 static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap, 491 unsigned long iova, size_t length, 492 void *opaque) 493 { 494 struct iopt_area *area; 495 struct iopt_area_contig_iter iter; 496 struct iova_bitmap_fn_arg *arg = opaque; 497 struct iommu_domain *domain = arg->domain; 498 struct iommu_dirty_bitmap *dirty = arg->dirty; 499 const struct iommu_dirty_ops *ops = domain->dirty_ops; 500 unsigned long last_iova = iova + length - 1; 501 unsigned long flags = arg->flags; 502 int ret; 503 504 iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) { 505 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 506 507 ret = ops->read_and_clear_dirty(domain, iter.cur_iova, 508 last - iter.cur_iova + 1, flags, 509 dirty); 510 if (ret) 511 return ret; 512 } 513 514 if (!iopt_area_contig_done(&iter)) 515 return -EINVAL; 516 return 0; 517 } 518 519 static int 520 iommu_read_and_clear_dirty(struct iommu_domain *domain, 521 struct io_pagetable *iopt, unsigned long flags, 522 struct iommu_hwpt_get_dirty_bitmap *bitmap) 523 { 524 const struct iommu_dirty_ops *ops = domain->dirty_ops; 525 struct iommu_iotlb_gather gather; 526 struct iommu_dirty_bitmap dirty; 527 struct iova_bitmap_fn_arg arg; 528 struct iova_bitmap *iter; 529 int ret = 0; 530 531 if (!ops || !ops->read_and_clear_dirty) 532 return -EOPNOTSUPP; 533 534 iter = iova_bitmap_alloc(bitmap->iova, bitmap->length, 535 bitmap->page_size, 536 u64_to_user_ptr(bitmap->data)); 537 if (IS_ERR(iter)) 538 return -ENOMEM; 539 540 iommu_dirty_bitmap_init(&dirty, iter, &gather); 541 542 arg.flags = flags; 543 arg.iopt = iopt; 544 arg.domain = domain; 545 arg.dirty = &dirty; 546 iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty); 547 548 if (!(flags & IOMMU_DIRTY_NO_CLEAR)) 549 iommu_iotlb_sync(domain, &gather); 550 551 iova_bitmap_free(iter); 552 553 return ret; 554 } 555 556 int iommufd_check_iova_range(struct io_pagetable *iopt, 557 struct iommu_hwpt_get_dirty_bitmap *bitmap) 558 { 559 size_t iommu_pgsize = iopt->iova_alignment; 560 u64 last_iova; 561 562 if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) 563 return -EOVERFLOW; 564 565 if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) 566 return -EOVERFLOW; 567 568 if ((bitmap->iova & (iommu_pgsize - 1)) || 569 ((last_iova + 1) & (iommu_pgsize - 1))) 570 return -EINVAL; 571 572 if (!bitmap->page_size) 573 return -EINVAL; 574 575 if ((bitmap->iova & (bitmap->page_size - 1)) || 576 ((last_iova + 1) & (bitmap->page_size - 1))) 577 return -EINVAL; 578 579 return 0; 580 } 581 582 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, 583 struct iommu_domain *domain, 584 unsigned long flags, 585 struct iommu_hwpt_get_dirty_bitmap *bitmap) 586 { 587 int ret; 588 589 ret = iommufd_check_iova_range(iopt, bitmap); 590 if (ret) 591 return ret; 592 593 down_read(&iopt->iova_rwsem); 594 ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap); 595 up_read(&iopt->iova_rwsem); 596 597 return ret; 598 } 599 600 static int iopt_clear_dirty_data(struct io_pagetable *iopt, 601 struct iommu_domain *domain) 602 { 603 const struct iommu_dirty_ops *ops = domain->dirty_ops; 604 struct iommu_iotlb_gather gather; 605 struct iommu_dirty_bitmap dirty; 606 struct iopt_area *area; 607 int ret = 0; 608 609 lockdep_assert_held_read(&iopt->iova_rwsem); 610 611 iommu_dirty_bitmap_init(&dirty, NULL, &gather); 612 613 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 614 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 615 if (!area->pages) 616 continue; 617 618 ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area), 619 iopt_area_length(area), 0, 620 &dirty); 621 if (ret) 622 break; 623 } 624 625 iommu_iotlb_sync(domain, &gather); 626 return ret; 627 } 628 629 int iopt_set_dirty_tracking(struct io_pagetable *iopt, 630 struct iommu_domain *domain, bool enable) 631 { 632 const struct iommu_dirty_ops *ops = domain->dirty_ops; 633 int ret = 0; 634 635 if (!ops) 636 return -EOPNOTSUPP; 637 638 down_read(&iopt->iova_rwsem); 639 640 /* Clear dirty bits from PTEs to ensure a clean snapshot */ 641 if (enable) { 642 ret = iopt_clear_dirty_data(iopt, domain); 643 if (ret) 644 goto out_unlock; 645 } 646 647 ret = ops->set_dirty_tracking(domain, enable); 648 649 out_unlock: 650 up_read(&iopt->iova_rwsem); 651 return ret; 652 } 653 654 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, 655 unsigned long length, struct list_head *pages_list) 656 { 657 struct iopt_area_contig_iter iter; 658 unsigned long last_iova; 659 struct iopt_area *area; 660 int rc; 661 662 if (!length) 663 return -EINVAL; 664 if (check_add_overflow(iova, length - 1, &last_iova)) 665 return -EOVERFLOW; 666 667 down_read(&iopt->iova_rwsem); 668 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 669 struct iopt_pages_list *elm; 670 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 671 672 elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT); 673 if (!elm) { 674 rc = -ENOMEM; 675 goto err_free; 676 } 677 elm->start_byte = iopt_area_start_byte(area, iter.cur_iova); 678 elm->pages = area->pages; 679 elm->length = (last - iter.cur_iova) + 1; 680 kref_get(&elm->pages->kref); 681 list_add_tail(&elm->next, pages_list); 682 } 683 if (!iopt_area_contig_done(&iter)) { 684 rc = -ENOENT; 685 goto err_free; 686 } 687 up_read(&iopt->iova_rwsem); 688 return 0; 689 err_free: 690 up_read(&iopt->iova_rwsem); 691 iopt_free_pages_list(pages_list); 692 return rc; 693 } 694 695 static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, 696 unsigned long last, unsigned long *unmapped) 697 { 698 struct iopt_area *area; 699 unsigned long unmapped_bytes = 0; 700 unsigned int tries = 0; 701 int rc = -ENOENT; 702 703 /* 704 * The domains_rwsem must be held in read mode any time any area->pages 705 * is NULL. This prevents domain attach/detatch from running 706 * concurrently with cleaning up the area. 707 */ 708 again: 709 down_read(&iopt->domains_rwsem); 710 down_write(&iopt->iova_rwsem); 711 while ((area = iopt_area_iter_first(iopt, start, last))) { 712 unsigned long area_last = iopt_area_last_iova(area); 713 unsigned long area_first = iopt_area_iova(area); 714 struct iopt_pages *pages; 715 716 /* Userspace should not race map/unmap's of the same area */ 717 if (!area->pages) { 718 rc = -EBUSY; 719 goto out_unlock_iova; 720 } 721 722 if (area_first < start || area_last > last) { 723 rc = -ENOENT; 724 goto out_unlock_iova; 725 } 726 727 if (area_first != start) 728 tries = 0; 729 730 /* 731 * num_accesses writers must hold the iova_rwsem too, so we can 732 * safely read it under the write side of the iovam_rwsem 733 * without the pages->mutex. 734 */ 735 if (area->num_accesses) { 736 size_t length = iopt_area_length(area); 737 738 start = area_first; 739 area->prevent_access = true; 740 up_write(&iopt->iova_rwsem); 741 up_read(&iopt->domains_rwsem); 742 743 iommufd_access_notify_unmap(iopt, area_first, length); 744 /* Something is not responding to unmap requests. */ 745 tries++; 746 if (WARN_ON(tries > 100)) 747 return -EDEADLOCK; 748 goto again; 749 } 750 751 pages = area->pages; 752 area->pages = NULL; 753 up_write(&iopt->iova_rwsem); 754 755 iopt_area_unfill_domains(area, pages); 756 iopt_abort_area(area); 757 iopt_put_pages(pages); 758 759 unmapped_bytes += area_last - area_first + 1; 760 761 down_write(&iopt->iova_rwsem); 762 } 763 if (unmapped_bytes) 764 rc = 0; 765 766 out_unlock_iova: 767 up_write(&iopt->iova_rwsem); 768 up_read(&iopt->domains_rwsem); 769 if (unmapped) 770 *unmapped = unmapped_bytes; 771 return rc; 772 } 773 774 /** 775 * iopt_unmap_iova() - Remove a range of iova 776 * @iopt: io_pagetable to act on 777 * @iova: Starting iova to unmap 778 * @length: Number of bytes to unmap 779 * @unmapped: Return number of bytes unmapped 780 * 781 * The requested range must be a superset of existing ranges. 782 * Splitting/truncating IOVA mappings is not allowed. 783 */ 784 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, 785 unsigned long length, unsigned long *unmapped) 786 { 787 unsigned long iova_last; 788 789 if (!length) 790 return -EINVAL; 791 792 if (check_add_overflow(iova, length - 1, &iova_last)) 793 return -EOVERFLOW; 794 795 return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped); 796 } 797 798 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) 799 { 800 int rc; 801 802 rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); 803 /* If the IOVAs are empty then unmap all succeeds */ 804 if (rc == -ENOENT) 805 return 0; 806 return rc; 807 } 808 809 /* The caller must always free all the nodes in the allowed_iova rb_root. */ 810 int iopt_set_allow_iova(struct io_pagetable *iopt, 811 struct rb_root_cached *allowed_iova) 812 { 813 struct iopt_allowed *allowed; 814 815 down_write(&iopt->iova_rwsem); 816 swap(*allowed_iova, iopt->allowed_itree); 817 818 for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed; 819 allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) { 820 if (iopt_reserved_iter_first(iopt, allowed->node.start, 821 allowed->node.last)) { 822 swap(*allowed_iova, iopt->allowed_itree); 823 up_write(&iopt->iova_rwsem); 824 return -EADDRINUSE; 825 } 826 } 827 up_write(&iopt->iova_rwsem); 828 return 0; 829 } 830 831 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, 832 unsigned long last, void *owner) 833 { 834 struct iopt_reserved *reserved; 835 836 lockdep_assert_held_write(&iopt->iova_rwsem); 837 838 if (iopt_area_iter_first(iopt, start, last) || 839 iopt_allowed_iter_first(iopt, start, last)) 840 return -EADDRINUSE; 841 842 reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT); 843 if (!reserved) 844 return -ENOMEM; 845 reserved->node.start = start; 846 reserved->node.last = last; 847 reserved->owner = owner; 848 interval_tree_insert(&reserved->node, &iopt->reserved_itree); 849 return 0; 850 } 851 852 static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) 853 { 854 struct iopt_reserved *reserved, *next; 855 856 lockdep_assert_held_write(&iopt->iova_rwsem); 857 858 for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved; 859 reserved = next) { 860 next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX); 861 862 if (reserved->owner == owner) { 863 interval_tree_remove(&reserved->node, 864 &iopt->reserved_itree); 865 kfree(reserved); 866 } 867 } 868 } 869 870 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) 871 { 872 down_write(&iopt->iova_rwsem); 873 __iopt_remove_reserved_iova(iopt, owner); 874 up_write(&iopt->iova_rwsem); 875 } 876 877 void iopt_init_table(struct io_pagetable *iopt) 878 { 879 init_rwsem(&iopt->iova_rwsem); 880 init_rwsem(&iopt->domains_rwsem); 881 iopt->area_itree = RB_ROOT_CACHED; 882 iopt->allowed_itree = RB_ROOT_CACHED; 883 iopt->reserved_itree = RB_ROOT_CACHED; 884 xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT); 885 xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC); 886 887 /* 888 * iopt's start as SW tables that can use the entire size_t IOVA space 889 * due to the use of size_t in the APIs. They have no alignment 890 * restriction. 891 */ 892 iopt->iova_alignment = 1; 893 } 894 895 void iopt_destroy_table(struct io_pagetable *iopt) 896 { 897 struct interval_tree_node *node; 898 899 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 900 iopt_remove_reserved_iova(iopt, NULL); 901 902 while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, 903 ULONG_MAX))) { 904 interval_tree_remove(node, &iopt->allowed_itree); 905 kfree(container_of(node, struct iopt_allowed, node)); 906 } 907 908 WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root)); 909 WARN_ON(!xa_empty(&iopt->domains)); 910 WARN_ON(!xa_empty(&iopt->access_list)); 911 WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)); 912 } 913 914 /** 915 * iopt_unfill_domain() - Unfill a domain with PFNs 916 * @iopt: io_pagetable to act on 917 * @domain: domain to unfill 918 * 919 * This is used when removing a domain from the iopt. Every area in the iopt 920 * will be unmapped from the domain. The domain must already be removed from the 921 * domains xarray. 922 */ 923 static void iopt_unfill_domain(struct io_pagetable *iopt, 924 struct iommu_domain *domain) 925 { 926 struct iopt_area *area; 927 928 lockdep_assert_held(&iopt->iova_rwsem); 929 lockdep_assert_held_write(&iopt->domains_rwsem); 930 931 /* 932 * Some other domain is holding all the pfns still, rapidly unmap this 933 * domain. 934 */ 935 if (iopt->next_domain_id != 0) { 936 /* Pick an arbitrary remaining domain to act as storage */ 937 struct iommu_domain *storage_domain = 938 xa_load(&iopt->domains, 0); 939 940 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 941 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 942 struct iopt_pages *pages = area->pages; 943 944 if (!pages) 945 continue; 946 947 mutex_lock(&pages->mutex); 948 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 949 WARN_ON(!area->storage_domain); 950 if (area->storage_domain == domain) 951 area->storage_domain = storage_domain; 952 mutex_unlock(&pages->mutex); 953 954 iopt_area_unmap_domain(area, domain); 955 } 956 return; 957 } 958 959 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 960 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 961 struct iopt_pages *pages = area->pages; 962 963 if (!pages) 964 continue; 965 966 mutex_lock(&pages->mutex); 967 interval_tree_remove(&area->pages_node, &pages->domains_itree); 968 WARN_ON(area->storage_domain != domain); 969 area->storage_domain = NULL; 970 iopt_area_unfill_domain(area, pages, domain); 971 mutex_unlock(&pages->mutex); 972 } 973 } 974 975 /** 976 * iopt_fill_domain() - Fill a domain with PFNs 977 * @iopt: io_pagetable to act on 978 * @domain: domain to fill 979 * 980 * Fill the domain with PFNs from every area in the iopt. On failure the domain 981 * is left unchanged. 982 */ 983 static int iopt_fill_domain(struct io_pagetable *iopt, 984 struct iommu_domain *domain) 985 { 986 struct iopt_area *end_area; 987 struct iopt_area *area; 988 int rc; 989 990 lockdep_assert_held(&iopt->iova_rwsem); 991 lockdep_assert_held_write(&iopt->domains_rwsem); 992 993 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 994 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 995 struct iopt_pages *pages = area->pages; 996 997 if (!pages) 998 continue; 999 1000 mutex_lock(&pages->mutex); 1001 rc = iopt_area_fill_domain(area, domain); 1002 if (rc) { 1003 mutex_unlock(&pages->mutex); 1004 goto out_unfill; 1005 } 1006 if (!area->storage_domain) { 1007 WARN_ON(iopt->next_domain_id != 0); 1008 area->storage_domain = domain; 1009 interval_tree_insert(&area->pages_node, 1010 &pages->domains_itree); 1011 } 1012 mutex_unlock(&pages->mutex); 1013 } 1014 return 0; 1015 1016 out_unfill: 1017 end_area = area; 1018 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 1019 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 1020 struct iopt_pages *pages = area->pages; 1021 1022 if (area == end_area) 1023 break; 1024 if (!pages) 1025 continue; 1026 mutex_lock(&pages->mutex); 1027 if (iopt->next_domain_id == 0) { 1028 interval_tree_remove(&area->pages_node, 1029 &pages->domains_itree); 1030 area->storage_domain = NULL; 1031 } 1032 iopt_area_unfill_domain(area, pages, domain); 1033 mutex_unlock(&pages->mutex); 1034 } 1035 return rc; 1036 } 1037 1038 /* All existing area's conform to an increased page size */ 1039 static int iopt_check_iova_alignment(struct io_pagetable *iopt, 1040 unsigned long new_iova_alignment) 1041 { 1042 unsigned long align_mask = new_iova_alignment - 1; 1043 struct iopt_area *area; 1044 1045 lockdep_assert_held(&iopt->iova_rwsem); 1046 lockdep_assert_held(&iopt->domains_rwsem); 1047 1048 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 1049 area = iopt_area_iter_next(area, 0, ULONG_MAX)) 1050 if ((iopt_area_iova(area) & align_mask) || 1051 (iopt_area_length(area) & align_mask) || 1052 (area->page_offset & align_mask)) 1053 return -EADDRINUSE; 1054 1055 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) { 1056 struct iommufd_access *access; 1057 unsigned long index; 1058 1059 xa_for_each(&iopt->access_list, index, access) 1060 if (WARN_ON(access->iova_alignment > 1061 new_iova_alignment)) 1062 return -EADDRINUSE; 1063 } 1064 return 0; 1065 } 1066 1067 int iopt_table_add_domain(struct io_pagetable *iopt, 1068 struct iommu_domain *domain) 1069 { 1070 const struct iommu_domain_geometry *geometry = &domain->geometry; 1071 struct iommu_domain *iter_domain; 1072 unsigned int new_iova_alignment; 1073 unsigned long index; 1074 int rc; 1075 1076 down_write(&iopt->domains_rwsem); 1077 down_write(&iopt->iova_rwsem); 1078 1079 xa_for_each(&iopt->domains, index, iter_domain) { 1080 if (WARN_ON(iter_domain == domain)) { 1081 rc = -EEXIST; 1082 goto out_unlock; 1083 } 1084 } 1085 1086 /* 1087 * The io page size drives the iova_alignment. Internally the iopt_pages 1088 * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE 1089 * objects into the iommu_domain. 1090 * 1091 * A iommu_domain must always be able to accept PAGE_SIZE to be 1092 * compatible as we can't guarantee higher contiguity. 1093 */ 1094 new_iova_alignment = max_t(unsigned long, 1095 1UL << __ffs(domain->pgsize_bitmap), 1096 iopt->iova_alignment); 1097 if (new_iova_alignment > PAGE_SIZE) { 1098 rc = -EINVAL; 1099 goto out_unlock; 1100 } 1101 if (new_iova_alignment != iopt->iova_alignment) { 1102 rc = iopt_check_iova_alignment(iopt, new_iova_alignment); 1103 if (rc) 1104 goto out_unlock; 1105 } 1106 1107 /* No area exists that is outside the allowed domain aperture */ 1108 if (geometry->aperture_start != 0) { 1109 rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1, 1110 domain); 1111 if (rc) 1112 goto out_reserved; 1113 } 1114 if (geometry->aperture_end != ULONG_MAX) { 1115 rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1, 1116 ULONG_MAX, domain); 1117 if (rc) 1118 goto out_reserved; 1119 } 1120 1121 rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); 1122 if (rc) 1123 goto out_reserved; 1124 1125 rc = iopt_fill_domain(iopt, domain); 1126 if (rc) 1127 goto out_release; 1128 1129 iopt->iova_alignment = new_iova_alignment; 1130 xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL); 1131 iopt->next_domain_id++; 1132 up_write(&iopt->iova_rwsem); 1133 up_write(&iopt->domains_rwsem); 1134 return 0; 1135 out_release: 1136 xa_release(&iopt->domains, iopt->next_domain_id); 1137 out_reserved: 1138 __iopt_remove_reserved_iova(iopt, domain); 1139 out_unlock: 1140 up_write(&iopt->iova_rwsem); 1141 up_write(&iopt->domains_rwsem); 1142 return rc; 1143 } 1144 1145 static int iopt_calculate_iova_alignment(struct io_pagetable *iopt) 1146 { 1147 unsigned long new_iova_alignment; 1148 struct iommufd_access *access; 1149 struct iommu_domain *domain; 1150 unsigned long index; 1151 1152 lockdep_assert_held_write(&iopt->iova_rwsem); 1153 lockdep_assert_held(&iopt->domains_rwsem); 1154 1155 /* See batch_iommu_map_small() */ 1156 if (iopt->disable_large_pages) 1157 new_iova_alignment = PAGE_SIZE; 1158 else 1159 new_iova_alignment = 1; 1160 1161 xa_for_each(&iopt->domains, index, domain) 1162 new_iova_alignment = max_t(unsigned long, 1163 1UL << __ffs(domain->pgsize_bitmap), 1164 new_iova_alignment); 1165 xa_for_each(&iopt->access_list, index, access) 1166 new_iova_alignment = max_t(unsigned long, 1167 access->iova_alignment, 1168 new_iova_alignment); 1169 1170 if (new_iova_alignment > iopt->iova_alignment) { 1171 int rc; 1172 1173 rc = iopt_check_iova_alignment(iopt, new_iova_alignment); 1174 if (rc) 1175 return rc; 1176 } 1177 iopt->iova_alignment = new_iova_alignment; 1178 return 0; 1179 } 1180 1181 void iopt_table_remove_domain(struct io_pagetable *iopt, 1182 struct iommu_domain *domain) 1183 { 1184 struct iommu_domain *iter_domain = NULL; 1185 unsigned long index; 1186 1187 down_write(&iopt->domains_rwsem); 1188 down_write(&iopt->iova_rwsem); 1189 1190 xa_for_each(&iopt->domains, index, iter_domain) 1191 if (iter_domain == domain) 1192 break; 1193 if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) 1194 goto out_unlock; 1195 1196 /* 1197 * Compress the xarray to keep it linear by swapping the entry to erase 1198 * with the tail entry and shrinking the tail. 1199 */ 1200 iopt->next_domain_id--; 1201 iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); 1202 if (index != iopt->next_domain_id) 1203 xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL); 1204 1205 iopt_unfill_domain(iopt, domain); 1206 __iopt_remove_reserved_iova(iopt, domain); 1207 1208 WARN_ON(iopt_calculate_iova_alignment(iopt)); 1209 out_unlock: 1210 up_write(&iopt->iova_rwsem); 1211 up_write(&iopt->domains_rwsem); 1212 } 1213 1214 /** 1215 * iopt_area_split - Split an area into two parts at iova 1216 * @area: The area to split 1217 * @iova: Becomes the last of a new area 1218 * 1219 * This splits an area into two. It is part of the VFIO compatibility to allow 1220 * poking a hole in the mapping. The two areas continue to point at the same 1221 * iopt_pages, just with different starting bytes. 1222 */ 1223 static int iopt_area_split(struct iopt_area *area, unsigned long iova) 1224 { 1225 unsigned long alignment = area->iopt->iova_alignment; 1226 unsigned long last_iova = iopt_area_last_iova(area); 1227 unsigned long start_iova = iopt_area_iova(area); 1228 unsigned long new_start = iova + 1; 1229 struct io_pagetable *iopt = area->iopt; 1230 struct iopt_pages *pages = area->pages; 1231 struct iopt_area *lhs; 1232 struct iopt_area *rhs; 1233 int rc; 1234 1235 lockdep_assert_held_write(&iopt->iova_rwsem); 1236 1237 if (iova == start_iova || iova == last_iova) 1238 return 0; 1239 1240 if (!pages || area->prevent_access) 1241 return -EBUSY; 1242 1243 if (new_start & (alignment - 1) || 1244 iopt_area_start_byte(area, new_start) & (alignment - 1)) 1245 return -EINVAL; 1246 1247 lhs = iopt_area_alloc(); 1248 if (!lhs) 1249 return -ENOMEM; 1250 1251 rhs = iopt_area_alloc(); 1252 if (!rhs) { 1253 rc = -ENOMEM; 1254 goto err_free_lhs; 1255 } 1256 1257 mutex_lock(&pages->mutex); 1258 /* 1259 * Splitting is not permitted if an access exists, we don't track enough 1260 * information to split existing accesses. 1261 */ 1262 if (area->num_accesses) { 1263 rc = -EINVAL; 1264 goto err_unlock; 1265 } 1266 1267 /* 1268 * Splitting is not permitted if a domain could have been mapped with 1269 * huge pages. 1270 */ 1271 if (area->storage_domain && !iopt->disable_large_pages) { 1272 rc = -EINVAL; 1273 goto err_unlock; 1274 } 1275 1276 interval_tree_remove(&area->node, &iopt->area_itree); 1277 rc = iopt_insert_area(iopt, lhs, area->pages, start_iova, 1278 iopt_area_start_byte(area, start_iova), 1279 (new_start - 1) - start_iova + 1, 1280 area->iommu_prot); 1281 if (WARN_ON(rc)) 1282 goto err_insert; 1283 1284 rc = iopt_insert_area(iopt, rhs, area->pages, new_start, 1285 iopt_area_start_byte(area, new_start), 1286 last_iova - new_start + 1, area->iommu_prot); 1287 if (WARN_ON(rc)) 1288 goto err_remove_lhs; 1289 1290 /* 1291 * If the original area has filled a domain, domains_itree has to be 1292 * updated. 1293 */ 1294 if (area->storage_domain) { 1295 interval_tree_remove(&area->pages_node, &pages->domains_itree); 1296 interval_tree_insert(&lhs->pages_node, &pages->domains_itree); 1297 interval_tree_insert(&rhs->pages_node, &pages->domains_itree); 1298 } 1299 1300 lhs->storage_domain = area->storage_domain; 1301 lhs->pages = area->pages; 1302 rhs->storage_domain = area->storage_domain; 1303 rhs->pages = area->pages; 1304 kref_get(&rhs->pages->kref); 1305 kfree(area); 1306 mutex_unlock(&pages->mutex); 1307 1308 /* 1309 * No change to domains or accesses because the pages hasn't been 1310 * changed 1311 */ 1312 return 0; 1313 1314 err_remove_lhs: 1315 interval_tree_remove(&lhs->node, &iopt->area_itree); 1316 err_insert: 1317 interval_tree_insert(&area->node, &iopt->area_itree); 1318 err_unlock: 1319 mutex_unlock(&pages->mutex); 1320 kfree(rhs); 1321 err_free_lhs: 1322 kfree(lhs); 1323 return rc; 1324 } 1325 1326 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, 1327 size_t num_iovas) 1328 { 1329 int rc = 0; 1330 int i; 1331 1332 down_write(&iopt->iova_rwsem); 1333 for (i = 0; i < num_iovas; i++) { 1334 struct iopt_area *area; 1335 1336 area = iopt_area_iter_first(iopt, iovas[i], iovas[i]); 1337 if (!area) 1338 continue; 1339 rc = iopt_area_split(area, iovas[i]); 1340 if (rc) 1341 break; 1342 } 1343 up_write(&iopt->iova_rwsem); 1344 return rc; 1345 } 1346 1347 void iopt_enable_large_pages(struct io_pagetable *iopt) 1348 { 1349 int rc; 1350 1351 down_write(&iopt->domains_rwsem); 1352 down_write(&iopt->iova_rwsem); 1353 WRITE_ONCE(iopt->disable_large_pages, false); 1354 rc = iopt_calculate_iova_alignment(iopt); 1355 WARN_ON(rc); 1356 up_write(&iopt->iova_rwsem); 1357 up_write(&iopt->domains_rwsem); 1358 } 1359 1360 int iopt_disable_large_pages(struct io_pagetable *iopt) 1361 { 1362 int rc = 0; 1363 1364 down_write(&iopt->domains_rwsem); 1365 down_write(&iopt->iova_rwsem); 1366 if (iopt->disable_large_pages) 1367 goto out_unlock; 1368 1369 /* Won't do it if domains already have pages mapped in them */ 1370 if (!xa_empty(&iopt->domains) && 1371 !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) { 1372 rc = -EINVAL; 1373 goto out_unlock; 1374 } 1375 1376 WRITE_ONCE(iopt->disable_large_pages, true); 1377 rc = iopt_calculate_iova_alignment(iopt); 1378 if (rc) 1379 WRITE_ONCE(iopt->disable_large_pages, false); 1380 out_unlock: 1381 up_write(&iopt->iova_rwsem); 1382 up_write(&iopt->domains_rwsem); 1383 return rc; 1384 } 1385 1386 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) 1387 { 1388 u32 new_id; 1389 int rc; 1390 1391 down_write(&iopt->domains_rwsem); 1392 down_write(&iopt->iova_rwsem); 1393 rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b, 1394 GFP_KERNEL_ACCOUNT); 1395 1396 if (rc) 1397 goto out_unlock; 1398 1399 rc = iopt_calculate_iova_alignment(iopt); 1400 if (rc) { 1401 xa_erase(&iopt->access_list, new_id); 1402 goto out_unlock; 1403 } 1404 access->iopt_access_list_id = new_id; 1405 1406 out_unlock: 1407 up_write(&iopt->iova_rwsem); 1408 up_write(&iopt->domains_rwsem); 1409 return rc; 1410 } 1411 1412 void iopt_remove_access(struct io_pagetable *iopt, 1413 struct iommufd_access *access, 1414 u32 iopt_access_list_id) 1415 { 1416 down_write(&iopt->domains_rwsem); 1417 down_write(&iopt->iova_rwsem); 1418 WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access); 1419 WARN_ON(iopt_calculate_iova_alignment(iopt)); 1420 up_write(&iopt->iova_rwsem); 1421 up_write(&iopt->domains_rwsem); 1422 } 1423 1424 /* Narrow the valid_iova_itree to include reserved ranges from a device. */ 1425 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, 1426 struct device *dev, 1427 phys_addr_t *sw_msi_start) 1428 { 1429 struct iommu_resv_region *resv; 1430 LIST_HEAD(resv_regions); 1431 unsigned int num_hw_msi = 0; 1432 unsigned int num_sw_msi = 0; 1433 int rc; 1434 1435 if (iommufd_should_fail()) 1436 return -EINVAL; 1437 1438 down_write(&iopt->iova_rwsem); 1439 /* FIXME: drivers allocate memory but there is no failure propogated */ 1440 iommu_get_resv_regions(dev, &resv_regions); 1441 1442 list_for_each_entry(resv, &resv_regions, list) { 1443 if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) 1444 continue; 1445 1446 if (sw_msi_start && resv->type == IOMMU_RESV_MSI) 1447 num_hw_msi++; 1448 if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) { 1449 *sw_msi_start = resv->start; 1450 num_sw_msi++; 1451 } 1452 1453 rc = iopt_reserve_iova(iopt, resv->start, 1454 resv->length - 1 + resv->start, dev); 1455 if (rc) 1456 goto out_reserved; 1457 } 1458 1459 /* Drivers must offer sane combinations of regions */ 1460 if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) { 1461 rc = -EINVAL; 1462 goto out_reserved; 1463 } 1464 1465 rc = 0; 1466 goto out_free_resv; 1467 1468 out_reserved: 1469 __iopt_remove_reserved_iova(iopt, dev); 1470 out_free_resv: 1471 iommu_put_resv_regions(dev, &resv_regions); 1472 up_write(&iopt->iova_rwsem); 1473 return rc; 1474 } 1475