1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. 3 * 4 * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The 5 * PFNs can be placed into an iommu_domain, or returned to the caller as a page 6 * list for access by an in-kernel user. 7 * 8 * The datastructure uses the iopt_pages to optimize the storage of the PFNs 9 * between the domains and xarray. 10 */ 11 #include <linux/err.h> 12 #include <linux/errno.h> 13 #include <linux/iommu.h> 14 #include <linux/iommufd.h> 15 #include <linux/lockdep.h> 16 #include <linux/sched/mm.h> 17 #include <linux/slab.h> 18 #include <uapi/linux/iommufd.h> 19 20 #include "double_span.h" 21 #include "io_pagetable.h" 22 23 struct iopt_pages_list { 24 struct iopt_pages *pages; 25 struct iopt_area *area; 26 struct list_head next; 27 unsigned long start_byte; 28 unsigned long length; 29 }; 30 31 struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, 32 struct io_pagetable *iopt, 33 unsigned long iova, 34 unsigned long last_iova) 35 { 36 lockdep_assert_held(&iopt->iova_rwsem); 37 38 iter->cur_iova = iova; 39 iter->last_iova = last_iova; 40 iter->area = iopt_area_iter_first(iopt, iova, iova); 41 if (!iter->area) 42 return NULL; 43 if (!iter->area->pages) { 44 iter->area = NULL; 45 return NULL; 46 } 47 return iter->area; 48 } 49 50 struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) 51 { 52 unsigned long last_iova; 53 54 if (!iter->area) 55 return NULL; 56 last_iova = iopt_area_last_iova(iter->area); 57 if (iter->last_iova <= last_iova) 58 return NULL; 59 60 iter->cur_iova = last_iova + 1; 61 iter->area = iopt_area_iter_next(iter->area, iter->cur_iova, 62 iter->last_iova); 63 if (!iter->area) 64 return NULL; 65 if (iter->cur_iova != iopt_area_iova(iter->area) || 66 !iter->area->pages) { 67 iter->area = NULL; 68 return NULL; 69 } 70 return iter->area; 71 } 72 73 static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, 74 unsigned long length, 75 unsigned long iova_alignment, 76 unsigned long page_offset) 77 { 78 if (span->is_used || span->last_hole - span->start_hole < length - 1) 79 return false; 80 81 span->start_hole = ALIGN(span->start_hole, iova_alignment) | 82 page_offset; 83 if (span->start_hole > span->last_hole || 84 span->last_hole - span->start_hole < length - 1) 85 return false; 86 return true; 87 } 88 89 static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, 90 unsigned long length, 91 unsigned long iova_alignment, 92 unsigned long page_offset) 93 { 94 if (span->is_hole || span->last_used - span->start_used < length - 1) 95 return false; 96 97 span->start_used = ALIGN(span->start_used, iova_alignment) | 98 page_offset; 99 if (span->start_used > span->last_used || 100 span->last_used - span->start_used < length - 1) 101 return false; 102 return true; 103 } 104 105 /* 106 * Automatically find a block of IOVA that is not being used and not reserved. 107 * Does not return a 0 IOVA even if it is valid. 108 */ 109 static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, 110 unsigned long uptr, unsigned long length) 111 { 112 unsigned long page_offset = uptr % PAGE_SIZE; 113 struct interval_tree_double_span_iter used_span; 114 struct interval_tree_span_iter allowed_span; 115 unsigned long max_alignment = PAGE_SIZE; 116 unsigned long iova_alignment; 117 118 lockdep_assert_held(&iopt->iova_rwsem); 119 120 /* Protect roundup_pow-of_two() from overflow */ 121 if (length == 0 || length >= ULONG_MAX / 2) 122 return -EOVERFLOW; 123 124 /* 125 * Keep alignment present in the uptr when building the IOVA, this 126 * increases the chance we can map a THP. 127 */ 128 if (!uptr) 129 iova_alignment = roundup_pow_of_two(length); 130 else 131 iova_alignment = min_t(unsigned long, 132 roundup_pow_of_two(length), 133 1UL << __ffs64(uptr)); 134 135 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 136 max_alignment = HPAGE_SIZE; 137 #endif 138 /* Protect against ALIGN() overflow */ 139 if (iova_alignment >= max_alignment) 140 iova_alignment = max_alignment; 141 142 if (iova_alignment < iopt->iova_alignment) 143 return -EINVAL; 144 145 interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree, 146 PAGE_SIZE, ULONG_MAX - PAGE_SIZE) { 147 if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) { 148 allowed_span.start_used = PAGE_SIZE; 149 allowed_span.last_used = ULONG_MAX - PAGE_SIZE; 150 allowed_span.is_hole = false; 151 } 152 153 if (!__alloc_iova_check_used(&allowed_span, length, 154 iova_alignment, page_offset)) 155 continue; 156 157 interval_tree_for_each_double_span( 158 &used_span, &iopt->reserved_itree, &iopt->area_itree, 159 allowed_span.start_used, allowed_span.last_used) { 160 if (!__alloc_iova_check_hole(&used_span, length, 161 iova_alignment, 162 page_offset)) 163 continue; 164 165 *iova = used_span.start_hole; 166 return 0; 167 } 168 } 169 return -ENOSPC; 170 } 171 172 static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova, 173 unsigned long length) 174 { 175 unsigned long last; 176 177 lockdep_assert_held(&iopt->iova_rwsem); 178 179 if ((iova & (iopt->iova_alignment - 1))) 180 return -EINVAL; 181 182 if (check_add_overflow(iova, length - 1, &last)) 183 return -EOVERFLOW; 184 185 /* No reserved IOVA intersects the range */ 186 if (iopt_reserved_iter_first(iopt, iova, last)) 187 return -EINVAL; 188 189 /* Check that there is not already a mapping in the range */ 190 if (iopt_area_iter_first(iopt, iova, last)) 191 return -EEXIST; 192 return 0; 193 } 194 195 /* 196 * The area takes a slice of the pages from start_bytes to start_byte + length 197 */ 198 static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, 199 struct iopt_pages *pages, unsigned long iova, 200 unsigned long start_byte, unsigned long length, 201 int iommu_prot) 202 { 203 lockdep_assert_held_write(&iopt->iova_rwsem); 204 205 if ((iommu_prot & IOMMU_WRITE) && !pages->writable) 206 return -EPERM; 207 208 area->iommu_prot = iommu_prot; 209 area->page_offset = start_byte % PAGE_SIZE; 210 if (area->page_offset & (iopt->iova_alignment - 1)) 211 return -EINVAL; 212 213 area->node.start = iova; 214 if (check_add_overflow(iova, length - 1, &area->node.last)) 215 return -EOVERFLOW; 216 217 area->pages_node.start = start_byte / PAGE_SIZE; 218 if (check_add_overflow(start_byte, length - 1, &area->pages_node.last)) 219 return -EOVERFLOW; 220 area->pages_node.last = area->pages_node.last / PAGE_SIZE; 221 if (WARN_ON(area->pages_node.last >= pages->npages)) 222 return -EOVERFLOW; 223 224 /* 225 * The area is inserted with a NULL pages indicating it is not fully 226 * initialized yet. 227 */ 228 area->iopt = iopt; 229 interval_tree_insert(&area->node, &iopt->area_itree); 230 return 0; 231 } 232 233 static struct iopt_area *iopt_area_alloc(void) 234 { 235 struct iopt_area *area; 236 237 area = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); 238 if (!area) 239 return NULL; 240 RB_CLEAR_NODE(&area->node.rb); 241 RB_CLEAR_NODE(&area->pages_node.rb); 242 return area; 243 } 244 245 static int iopt_alloc_area_pages(struct io_pagetable *iopt, 246 struct list_head *pages_list, 247 unsigned long length, unsigned long *dst_iova, 248 int iommu_prot, unsigned int flags) 249 { 250 struct iopt_pages_list *elm; 251 unsigned long iova; 252 int rc = 0; 253 254 list_for_each_entry(elm, pages_list, next) { 255 elm->area = iopt_area_alloc(); 256 if (!elm->area) 257 return -ENOMEM; 258 } 259 260 down_write(&iopt->iova_rwsem); 261 if ((length & (iopt->iova_alignment - 1)) || !length) { 262 rc = -EINVAL; 263 goto out_unlock; 264 } 265 266 if (flags & IOPT_ALLOC_IOVA) { 267 /* Use the first entry to guess the ideal IOVA alignment */ 268 elm = list_first_entry(pages_list, struct iopt_pages_list, 269 next); 270 rc = iopt_alloc_iova( 271 iopt, dst_iova, 272 (uintptr_t)elm->pages->uptr + elm->start_byte, length); 273 if (rc) 274 goto out_unlock; 275 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 276 WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) { 277 rc = -EINVAL; 278 goto out_unlock; 279 } 280 } else { 281 rc = iopt_check_iova(iopt, *dst_iova, length); 282 if (rc) 283 goto out_unlock; 284 } 285 286 /* 287 * Areas are created with a NULL pages so that the IOVA space is 288 * reserved and we can unlock the iova_rwsem. 289 */ 290 iova = *dst_iova; 291 list_for_each_entry(elm, pages_list, next) { 292 rc = iopt_insert_area(iopt, elm->area, elm->pages, iova, 293 elm->start_byte, elm->length, iommu_prot); 294 if (rc) 295 goto out_unlock; 296 iova += elm->length; 297 } 298 299 out_unlock: 300 up_write(&iopt->iova_rwsem); 301 return rc; 302 } 303 304 static void iopt_abort_area(struct iopt_area *area) 305 { 306 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 307 WARN_ON(area->pages); 308 if (area->iopt) { 309 down_write(&area->iopt->iova_rwsem); 310 interval_tree_remove(&area->node, &area->iopt->area_itree); 311 up_write(&area->iopt->iova_rwsem); 312 } 313 kfree(area); 314 } 315 316 void iopt_free_pages_list(struct list_head *pages_list) 317 { 318 struct iopt_pages_list *elm; 319 320 while ((elm = list_first_entry_or_null(pages_list, 321 struct iopt_pages_list, next))) { 322 if (elm->area) 323 iopt_abort_area(elm->area); 324 if (elm->pages) 325 iopt_put_pages(elm->pages); 326 list_del(&elm->next); 327 kfree(elm); 328 } 329 } 330 331 static int iopt_fill_domains_pages(struct list_head *pages_list) 332 { 333 struct iopt_pages_list *undo_elm; 334 struct iopt_pages_list *elm; 335 int rc; 336 337 list_for_each_entry(elm, pages_list, next) { 338 rc = iopt_area_fill_domains(elm->area, elm->pages); 339 if (rc) 340 goto err_undo; 341 } 342 return 0; 343 344 err_undo: 345 list_for_each_entry(undo_elm, pages_list, next) { 346 if (undo_elm == elm) 347 break; 348 iopt_area_unfill_domains(undo_elm->area, undo_elm->pages); 349 } 350 return rc; 351 } 352 353 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, 354 unsigned long length, unsigned long *dst_iova, 355 int iommu_prot, unsigned int flags) 356 { 357 struct iopt_pages_list *elm; 358 int rc; 359 360 rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova, 361 iommu_prot, flags); 362 if (rc) 363 return rc; 364 365 down_read(&iopt->domains_rwsem); 366 rc = iopt_fill_domains_pages(pages_list); 367 if (rc) 368 goto out_unlock_domains; 369 370 down_write(&iopt->iova_rwsem); 371 list_for_each_entry(elm, pages_list, next) { 372 /* 373 * area->pages must be set inside the domains_rwsem to ensure 374 * any newly added domains will get filled. Moves the reference 375 * in from the list. 376 */ 377 elm->area->pages = elm->pages; 378 elm->pages = NULL; 379 elm->area = NULL; 380 } 381 up_write(&iopt->iova_rwsem); 382 out_unlock_domains: 383 up_read(&iopt->domains_rwsem); 384 return rc; 385 } 386 387 /** 388 * iopt_map_user_pages() - Map a user VA to an iova in the io page table 389 * @ictx: iommufd_ctx the iopt is part of 390 * @iopt: io_pagetable to act on 391 * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains 392 * the chosen iova on output. Otherwise is the iova to map to on input 393 * @uptr: User VA to map 394 * @length: Number of bytes to map 395 * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping 396 * @flags: IOPT_ALLOC_IOVA or zero 397 * 398 * iova, uptr, and length must be aligned to iova_alignment. For domain backed 399 * page tables this will pin the pages and load them into the domain at iova. 400 * For non-domain page tables this will only setup a lazy reference and the 401 * caller must use iopt_access_pages() to touch them. 402 * 403 * iopt_unmap_iova() must be called to undo this before the io_pagetable can be 404 * destroyed. 405 */ 406 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 407 unsigned long *iova, void __user *uptr, 408 unsigned long length, int iommu_prot, 409 unsigned int flags) 410 { 411 struct iopt_pages_list elm = {}; 412 LIST_HEAD(pages_list); 413 int rc; 414 415 elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE); 416 if (IS_ERR(elm.pages)) 417 return PTR_ERR(elm.pages); 418 if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM && 419 elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER) 420 elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM; 421 elm.start_byte = uptr - elm.pages->uptr; 422 elm.length = length; 423 list_add(&elm.next, &pages_list); 424 425 rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); 426 if (rc) { 427 if (elm.area) 428 iopt_abort_area(elm.area); 429 if (elm.pages) 430 iopt_put_pages(elm.pages); 431 return rc; 432 } 433 return 0; 434 } 435 436 struct iova_bitmap_fn_arg { 437 unsigned long flags; 438 struct io_pagetable *iopt; 439 struct iommu_domain *domain; 440 struct iommu_dirty_bitmap *dirty; 441 }; 442 443 static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap, 444 unsigned long iova, size_t length, 445 void *opaque) 446 { 447 struct iopt_area *area; 448 struct iopt_area_contig_iter iter; 449 struct iova_bitmap_fn_arg *arg = opaque; 450 struct iommu_domain *domain = arg->domain; 451 struct iommu_dirty_bitmap *dirty = arg->dirty; 452 const struct iommu_dirty_ops *ops = domain->dirty_ops; 453 unsigned long last_iova = iova + length - 1; 454 unsigned long flags = arg->flags; 455 int ret; 456 457 iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) { 458 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 459 460 ret = ops->read_and_clear_dirty(domain, iter.cur_iova, 461 last - iter.cur_iova + 1, flags, 462 dirty); 463 if (ret) 464 return ret; 465 } 466 467 if (!iopt_area_contig_done(&iter)) 468 return -EINVAL; 469 return 0; 470 } 471 472 static int 473 iommu_read_and_clear_dirty(struct iommu_domain *domain, 474 struct io_pagetable *iopt, unsigned long flags, 475 struct iommu_hwpt_get_dirty_bitmap *bitmap) 476 { 477 const struct iommu_dirty_ops *ops = domain->dirty_ops; 478 struct iommu_iotlb_gather gather; 479 struct iommu_dirty_bitmap dirty; 480 struct iova_bitmap_fn_arg arg; 481 struct iova_bitmap *iter; 482 int ret = 0; 483 484 if (!ops || !ops->read_and_clear_dirty) 485 return -EOPNOTSUPP; 486 487 iter = iova_bitmap_alloc(bitmap->iova, bitmap->length, 488 bitmap->page_size, 489 u64_to_user_ptr(bitmap->data)); 490 if (IS_ERR(iter)) 491 return -ENOMEM; 492 493 iommu_dirty_bitmap_init(&dirty, iter, &gather); 494 495 arg.flags = flags; 496 arg.iopt = iopt; 497 arg.domain = domain; 498 arg.dirty = &dirty; 499 iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty); 500 501 if (!(flags & IOMMU_DIRTY_NO_CLEAR)) 502 iommu_iotlb_sync(domain, &gather); 503 504 iova_bitmap_free(iter); 505 506 return ret; 507 } 508 509 int iommufd_check_iova_range(struct io_pagetable *iopt, 510 struct iommu_hwpt_get_dirty_bitmap *bitmap) 511 { 512 size_t iommu_pgsize = iopt->iova_alignment; 513 u64 last_iova; 514 515 if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) 516 return -EOVERFLOW; 517 518 if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) 519 return -EOVERFLOW; 520 521 if ((bitmap->iova & (iommu_pgsize - 1)) || 522 ((last_iova + 1) & (iommu_pgsize - 1))) 523 return -EINVAL; 524 525 if (!bitmap->page_size) 526 return -EINVAL; 527 528 if ((bitmap->iova & (bitmap->page_size - 1)) || 529 ((last_iova + 1) & (bitmap->page_size - 1))) 530 return -EINVAL; 531 532 return 0; 533 } 534 535 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, 536 struct iommu_domain *domain, 537 unsigned long flags, 538 struct iommu_hwpt_get_dirty_bitmap *bitmap) 539 { 540 int ret; 541 542 ret = iommufd_check_iova_range(iopt, bitmap); 543 if (ret) 544 return ret; 545 546 down_read(&iopt->iova_rwsem); 547 ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap); 548 up_read(&iopt->iova_rwsem); 549 550 return ret; 551 } 552 553 static int iopt_clear_dirty_data(struct io_pagetable *iopt, 554 struct iommu_domain *domain) 555 { 556 const struct iommu_dirty_ops *ops = domain->dirty_ops; 557 struct iommu_iotlb_gather gather; 558 struct iommu_dirty_bitmap dirty; 559 struct iopt_area *area; 560 int ret = 0; 561 562 lockdep_assert_held_read(&iopt->iova_rwsem); 563 564 iommu_dirty_bitmap_init(&dirty, NULL, &gather); 565 566 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 567 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 568 if (!area->pages) 569 continue; 570 571 ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area), 572 iopt_area_length(area), 0, 573 &dirty); 574 if (ret) 575 break; 576 } 577 578 iommu_iotlb_sync(domain, &gather); 579 return ret; 580 } 581 582 int iopt_set_dirty_tracking(struct io_pagetable *iopt, 583 struct iommu_domain *domain, bool enable) 584 { 585 const struct iommu_dirty_ops *ops = domain->dirty_ops; 586 int ret = 0; 587 588 if (!ops) 589 return -EOPNOTSUPP; 590 591 down_read(&iopt->iova_rwsem); 592 593 /* Clear dirty bits from PTEs to ensure a clean snapshot */ 594 if (enable) { 595 ret = iopt_clear_dirty_data(iopt, domain); 596 if (ret) 597 goto out_unlock; 598 } 599 600 ret = ops->set_dirty_tracking(domain, enable); 601 602 out_unlock: 603 up_read(&iopt->iova_rwsem); 604 return ret; 605 } 606 607 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, 608 unsigned long length, struct list_head *pages_list) 609 { 610 struct iopt_area_contig_iter iter; 611 unsigned long last_iova; 612 struct iopt_area *area; 613 int rc; 614 615 if (!length) 616 return -EINVAL; 617 if (check_add_overflow(iova, length - 1, &last_iova)) 618 return -EOVERFLOW; 619 620 down_read(&iopt->iova_rwsem); 621 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 622 struct iopt_pages_list *elm; 623 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 624 625 elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT); 626 if (!elm) { 627 rc = -ENOMEM; 628 goto err_free; 629 } 630 elm->start_byte = iopt_area_start_byte(area, iter.cur_iova); 631 elm->pages = area->pages; 632 elm->length = (last - iter.cur_iova) + 1; 633 kref_get(&elm->pages->kref); 634 list_add_tail(&elm->next, pages_list); 635 } 636 if (!iopt_area_contig_done(&iter)) { 637 rc = -ENOENT; 638 goto err_free; 639 } 640 up_read(&iopt->iova_rwsem); 641 return 0; 642 err_free: 643 up_read(&iopt->iova_rwsem); 644 iopt_free_pages_list(pages_list); 645 return rc; 646 } 647 648 static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, 649 unsigned long last, unsigned long *unmapped) 650 { 651 struct iopt_area *area; 652 unsigned long unmapped_bytes = 0; 653 unsigned int tries = 0; 654 int rc = -ENOENT; 655 656 /* 657 * The domains_rwsem must be held in read mode any time any area->pages 658 * is NULL. This prevents domain attach/detatch from running 659 * concurrently with cleaning up the area. 660 */ 661 again: 662 down_read(&iopt->domains_rwsem); 663 down_write(&iopt->iova_rwsem); 664 while ((area = iopt_area_iter_first(iopt, start, last))) { 665 unsigned long area_last = iopt_area_last_iova(area); 666 unsigned long area_first = iopt_area_iova(area); 667 struct iopt_pages *pages; 668 669 /* Userspace should not race map/unmap's of the same area */ 670 if (!area->pages) { 671 rc = -EBUSY; 672 goto out_unlock_iova; 673 } 674 675 if (area_first < start || area_last > last) { 676 rc = -ENOENT; 677 goto out_unlock_iova; 678 } 679 680 if (area_first != start) 681 tries = 0; 682 683 /* 684 * num_accesses writers must hold the iova_rwsem too, so we can 685 * safely read it under the write side of the iovam_rwsem 686 * without the pages->mutex. 687 */ 688 if (area->num_accesses) { 689 size_t length = iopt_area_length(area); 690 691 start = area_first; 692 area->prevent_access = true; 693 up_write(&iopt->iova_rwsem); 694 up_read(&iopt->domains_rwsem); 695 696 iommufd_access_notify_unmap(iopt, area_first, length); 697 /* Something is not responding to unmap requests. */ 698 tries++; 699 if (WARN_ON(tries > 100)) 700 return -EDEADLOCK; 701 goto again; 702 } 703 704 pages = area->pages; 705 area->pages = NULL; 706 up_write(&iopt->iova_rwsem); 707 708 iopt_area_unfill_domains(area, pages); 709 iopt_abort_area(area); 710 iopt_put_pages(pages); 711 712 unmapped_bytes += area_last - area_first + 1; 713 714 down_write(&iopt->iova_rwsem); 715 } 716 if (unmapped_bytes) 717 rc = 0; 718 719 out_unlock_iova: 720 up_write(&iopt->iova_rwsem); 721 up_read(&iopt->domains_rwsem); 722 if (unmapped) 723 *unmapped = unmapped_bytes; 724 return rc; 725 } 726 727 /** 728 * iopt_unmap_iova() - Remove a range of iova 729 * @iopt: io_pagetable to act on 730 * @iova: Starting iova to unmap 731 * @length: Number of bytes to unmap 732 * @unmapped: Return number of bytes unmapped 733 * 734 * The requested range must be a superset of existing ranges. 735 * Splitting/truncating IOVA mappings is not allowed. 736 */ 737 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, 738 unsigned long length, unsigned long *unmapped) 739 { 740 unsigned long iova_last; 741 742 if (!length) 743 return -EINVAL; 744 745 if (check_add_overflow(iova, length - 1, &iova_last)) 746 return -EOVERFLOW; 747 748 return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped); 749 } 750 751 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) 752 { 753 int rc; 754 755 rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); 756 /* If the IOVAs are empty then unmap all succeeds */ 757 if (rc == -ENOENT) 758 return 0; 759 return rc; 760 } 761 762 /* The caller must always free all the nodes in the allowed_iova rb_root. */ 763 int iopt_set_allow_iova(struct io_pagetable *iopt, 764 struct rb_root_cached *allowed_iova) 765 { 766 struct iopt_allowed *allowed; 767 768 down_write(&iopt->iova_rwsem); 769 swap(*allowed_iova, iopt->allowed_itree); 770 771 for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed; 772 allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) { 773 if (iopt_reserved_iter_first(iopt, allowed->node.start, 774 allowed->node.last)) { 775 swap(*allowed_iova, iopt->allowed_itree); 776 up_write(&iopt->iova_rwsem); 777 return -EADDRINUSE; 778 } 779 } 780 up_write(&iopt->iova_rwsem); 781 return 0; 782 } 783 784 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, 785 unsigned long last, void *owner) 786 { 787 struct iopt_reserved *reserved; 788 789 lockdep_assert_held_write(&iopt->iova_rwsem); 790 791 if (iopt_area_iter_first(iopt, start, last) || 792 iopt_allowed_iter_first(iopt, start, last)) 793 return -EADDRINUSE; 794 795 reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT); 796 if (!reserved) 797 return -ENOMEM; 798 reserved->node.start = start; 799 reserved->node.last = last; 800 reserved->owner = owner; 801 interval_tree_insert(&reserved->node, &iopt->reserved_itree); 802 return 0; 803 } 804 805 static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) 806 { 807 struct iopt_reserved *reserved, *next; 808 809 lockdep_assert_held_write(&iopt->iova_rwsem); 810 811 for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved; 812 reserved = next) { 813 next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX); 814 815 if (reserved->owner == owner) { 816 interval_tree_remove(&reserved->node, 817 &iopt->reserved_itree); 818 kfree(reserved); 819 } 820 } 821 } 822 823 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) 824 { 825 down_write(&iopt->iova_rwsem); 826 __iopt_remove_reserved_iova(iopt, owner); 827 up_write(&iopt->iova_rwsem); 828 } 829 830 void iopt_init_table(struct io_pagetable *iopt) 831 { 832 init_rwsem(&iopt->iova_rwsem); 833 init_rwsem(&iopt->domains_rwsem); 834 iopt->area_itree = RB_ROOT_CACHED; 835 iopt->allowed_itree = RB_ROOT_CACHED; 836 iopt->reserved_itree = RB_ROOT_CACHED; 837 xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT); 838 xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC); 839 840 /* 841 * iopt's start as SW tables that can use the entire size_t IOVA space 842 * due to the use of size_t in the APIs. They have no alignment 843 * restriction. 844 */ 845 iopt->iova_alignment = 1; 846 } 847 848 void iopt_destroy_table(struct io_pagetable *iopt) 849 { 850 struct interval_tree_node *node; 851 852 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 853 iopt_remove_reserved_iova(iopt, NULL); 854 855 while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, 856 ULONG_MAX))) { 857 interval_tree_remove(node, &iopt->allowed_itree); 858 kfree(container_of(node, struct iopt_allowed, node)); 859 } 860 861 WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root)); 862 WARN_ON(!xa_empty(&iopt->domains)); 863 WARN_ON(!xa_empty(&iopt->access_list)); 864 WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)); 865 } 866 867 /** 868 * iopt_unfill_domain() - Unfill a domain with PFNs 869 * @iopt: io_pagetable to act on 870 * @domain: domain to unfill 871 * 872 * This is used when removing a domain from the iopt. Every area in the iopt 873 * will be unmapped from the domain. The domain must already be removed from the 874 * domains xarray. 875 */ 876 static void iopt_unfill_domain(struct io_pagetable *iopt, 877 struct iommu_domain *domain) 878 { 879 struct iopt_area *area; 880 881 lockdep_assert_held(&iopt->iova_rwsem); 882 lockdep_assert_held_write(&iopt->domains_rwsem); 883 884 /* 885 * Some other domain is holding all the pfns still, rapidly unmap this 886 * domain. 887 */ 888 if (iopt->next_domain_id != 0) { 889 /* Pick an arbitrary remaining domain to act as storage */ 890 struct iommu_domain *storage_domain = 891 xa_load(&iopt->domains, 0); 892 893 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 894 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 895 struct iopt_pages *pages = area->pages; 896 897 if (!pages) 898 continue; 899 900 mutex_lock(&pages->mutex); 901 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 902 WARN_ON(!area->storage_domain); 903 if (area->storage_domain == domain) 904 area->storage_domain = storage_domain; 905 mutex_unlock(&pages->mutex); 906 907 iopt_area_unmap_domain(area, domain); 908 } 909 return; 910 } 911 912 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 913 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 914 struct iopt_pages *pages = area->pages; 915 916 if (!pages) 917 continue; 918 919 mutex_lock(&pages->mutex); 920 interval_tree_remove(&area->pages_node, &pages->domains_itree); 921 WARN_ON(area->storage_domain != domain); 922 area->storage_domain = NULL; 923 iopt_area_unfill_domain(area, pages, domain); 924 mutex_unlock(&pages->mutex); 925 } 926 } 927 928 /** 929 * iopt_fill_domain() - Fill a domain with PFNs 930 * @iopt: io_pagetable to act on 931 * @domain: domain to fill 932 * 933 * Fill the domain with PFNs from every area in the iopt. On failure the domain 934 * is left unchanged. 935 */ 936 static int iopt_fill_domain(struct io_pagetable *iopt, 937 struct iommu_domain *domain) 938 { 939 struct iopt_area *end_area; 940 struct iopt_area *area; 941 int rc; 942 943 lockdep_assert_held(&iopt->iova_rwsem); 944 lockdep_assert_held_write(&iopt->domains_rwsem); 945 946 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 947 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 948 struct iopt_pages *pages = area->pages; 949 950 if (!pages) 951 continue; 952 953 mutex_lock(&pages->mutex); 954 rc = iopt_area_fill_domain(area, domain); 955 if (rc) { 956 mutex_unlock(&pages->mutex); 957 goto out_unfill; 958 } 959 if (!area->storage_domain) { 960 WARN_ON(iopt->next_domain_id != 0); 961 area->storage_domain = domain; 962 interval_tree_insert(&area->pages_node, 963 &pages->domains_itree); 964 } 965 mutex_unlock(&pages->mutex); 966 } 967 return 0; 968 969 out_unfill: 970 end_area = area; 971 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 972 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 973 struct iopt_pages *pages = area->pages; 974 975 if (area == end_area) 976 break; 977 if (!pages) 978 continue; 979 mutex_lock(&pages->mutex); 980 if (iopt->next_domain_id == 0) { 981 interval_tree_remove(&area->pages_node, 982 &pages->domains_itree); 983 area->storage_domain = NULL; 984 } 985 iopt_area_unfill_domain(area, pages, domain); 986 mutex_unlock(&pages->mutex); 987 } 988 return rc; 989 } 990 991 /* All existing area's conform to an increased page size */ 992 static int iopt_check_iova_alignment(struct io_pagetable *iopt, 993 unsigned long new_iova_alignment) 994 { 995 unsigned long align_mask = new_iova_alignment - 1; 996 struct iopt_area *area; 997 998 lockdep_assert_held(&iopt->iova_rwsem); 999 lockdep_assert_held(&iopt->domains_rwsem); 1000 1001 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 1002 area = iopt_area_iter_next(area, 0, ULONG_MAX)) 1003 if ((iopt_area_iova(area) & align_mask) || 1004 (iopt_area_length(area) & align_mask) || 1005 (area->page_offset & align_mask)) 1006 return -EADDRINUSE; 1007 1008 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) { 1009 struct iommufd_access *access; 1010 unsigned long index; 1011 1012 xa_for_each(&iopt->access_list, index, access) 1013 if (WARN_ON(access->iova_alignment > 1014 new_iova_alignment)) 1015 return -EADDRINUSE; 1016 } 1017 return 0; 1018 } 1019 1020 int iopt_table_add_domain(struct io_pagetable *iopt, 1021 struct iommu_domain *domain) 1022 { 1023 const struct iommu_domain_geometry *geometry = &domain->geometry; 1024 struct iommu_domain *iter_domain; 1025 unsigned int new_iova_alignment; 1026 unsigned long index; 1027 int rc; 1028 1029 down_write(&iopt->domains_rwsem); 1030 down_write(&iopt->iova_rwsem); 1031 1032 xa_for_each(&iopt->domains, index, iter_domain) { 1033 if (WARN_ON(iter_domain == domain)) { 1034 rc = -EEXIST; 1035 goto out_unlock; 1036 } 1037 } 1038 1039 /* 1040 * The io page size drives the iova_alignment. Internally the iopt_pages 1041 * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE 1042 * objects into the iommu_domain. 1043 * 1044 * A iommu_domain must always be able to accept PAGE_SIZE to be 1045 * compatible as we can't guarantee higher contiguity. 1046 */ 1047 new_iova_alignment = max_t(unsigned long, 1048 1UL << __ffs(domain->pgsize_bitmap), 1049 iopt->iova_alignment); 1050 if (new_iova_alignment > PAGE_SIZE) { 1051 rc = -EINVAL; 1052 goto out_unlock; 1053 } 1054 if (new_iova_alignment != iopt->iova_alignment) { 1055 rc = iopt_check_iova_alignment(iopt, new_iova_alignment); 1056 if (rc) 1057 goto out_unlock; 1058 } 1059 1060 /* No area exists that is outside the allowed domain aperture */ 1061 if (geometry->aperture_start != 0) { 1062 rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1, 1063 domain); 1064 if (rc) 1065 goto out_reserved; 1066 } 1067 if (geometry->aperture_end != ULONG_MAX) { 1068 rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1, 1069 ULONG_MAX, domain); 1070 if (rc) 1071 goto out_reserved; 1072 } 1073 1074 rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); 1075 if (rc) 1076 goto out_reserved; 1077 1078 rc = iopt_fill_domain(iopt, domain); 1079 if (rc) 1080 goto out_release; 1081 1082 iopt->iova_alignment = new_iova_alignment; 1083 xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL); 1084 iopt->next_domain_id++; 1085 up_write(&iopt->iova_rwsem); 1086 up_write(&iopt->domains_rwsem); 1087 return 0; 1088 out_release: 1089 xa_release(&iopt->domains, iopt->next_domain_id); 1090 out_reserved: 1091 __iopt_remove_reserved_iova(iopt, domain); 1092 out_unlock: 1093 up_write(&iopt->iova_rwsem); 1094 up_write(&iopt->domains_rwsem); 1095 return rc; 1096 } 1097 1098 static int iopt_calculate_iova_alignment(struct io_pagetable *iopt) 1099 { 1100 unsigned long new_iova_alignment; 1101 struct iommufd_access *access; 1102 struct iommu_domain *domain; 1103 unsigned long index; 1104 1105 lockdep_assert_held_write(&iopt->iova_rwsem); 1106 lockdep_assert_held(&iopt->domains_rwsem); 1107 1108 /* See batch_iommu_map_small() */ 1109 if (iopt->disable_large_pages) 1110 new_iova_alignment = PAGE_SIZE; 1111 else 1112 new_iova_alignment = 1; 1113 1114 xa_for_each(&iopt->domains, index, domain) 1115 new_iova_alignment = max_t(unsigned long, 1116 1UL << __ffs(domain->pgsize_bitmap), 1117 new_iova_alignment); 1118 xa_for_each(&iopt->access_list, index, access) 1119 new_iova_alignment = max_t(unsigned long, 1120 access->iova_alignment, 1121 new_iova_alignment); 1122 1123 if (new_iova_alignment > iopt->iova_alignment) { 1124 int rc; 1125 1126 rc = iopt_check_iova_alignment(iopt, new_iova_alignment); 1127 if (rc) 1128 return rc; 1129 } 1130 iopt->iova_alignment = new_iova_alignment; 1131 return 0; 1132 } 1133 1134 void iopt_table_remove_domain(struct io_pagetable *iopt, 1135 struct iommu_domain *domain) 1136 { 1137 struct iommu_domain *iter_domain = NULL; 1138 unsigned long index; 1139 1140 down_write(&iopt->domains_rwsem); 1141 down_write(&iopt->iova_rwsem); 1142 1143 xa_for_each(&iopt->domains, index, iter_domain) 1144 if (iter_domain == domain) 1145 break; 1146 if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) 1147 goto out_unlock; 1148 1149 /* 1150 * Compress the xarray to keep it linear by swapping the entry to erase 1151 * with the tail entry and shrinking the tail. 1152 */ 1153 iopt->next_domain_id--; 1154 iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); 1155 if (index != iopt->next_domain_id) 1156 xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL); 1157 1158 iopt_unfill_domain(iopt, domain); 1159 __iopt_remove_reserved_iova(iopt, domain); 1160 1161 WARN_ON(iopt_calculate_iova_alignment(iopt)); 1162 out_unlock: 1163 up_write(&iopt->iova_rwsem); 1164 up_write(&iopt->domains_rwsem); 1165 } 1166 1167 /** 1168 * iopt_area_split - Split an area into two parts at iova 1169 * @area: The area to split 1170 * @iova: Becomes the last of a new area 1171 * 1172 * This splits an area into two. It is part of the VFIO compatibility to allow 1173 * poking a hole in the mapping. The two areas continue to point at the same 1174 * iopt_pages, just with different starting bytes. 1175 */ 1176 static int iopt_area_split(struct iopt_area *area, unsigned long iova) 1177 { 1178 unsigned long alignment = area->iopt->iova_alignment; 1179 unsigned long last_iova = iopt_area_last_iova(area); 1180 unsigned long start_iova = iopt_area_iova(area); 1181 unsigned long new_start = iova + 1; 1182 struct io_pagetable *iopt = area->iopt; 1183 struct iopt_pages *pages = area->pages; 1184 struct iopt_area *lhs; 1185 struct iopt_area *rhs; 1186 int rc; 1187 1188 lockdep_assert_held_write(&iopt->iova_rwsem); 1189 1190 if (iova == start_iova || iova == last_iova) 1191 return 0; 1192 1193 if (!pages || area->prevent_access) 1194 return -EBUSY; 1195 1196 if (new_start & (alignment - 1) || 1197 iopt_area_start_byte(area, new_start) & (alignment - 1)) 1198 return -EINVAL; 1199 1200 lhs = iopt_area_alloc(); 1201 if (!lhs) 1202 return -ENOMEM; 1203 1204 rhs = iopt_area_alloc(); 1205 if (!rhs) { 1206 rc = -ENOMEM; 1207 goto err_free_lhs; 1208 } 1209 1210 mutex_lock(&pages->mutex); 1211 /* 1212 * Splitting is not permitted if an access exists, we don't track enough 1213 * information to split existing accesses. 1214 */ 1215 if (area->num_accesses) { 1216 rc = -EINVAL; 1217 goto err_unlock; 1218 } 1219 1220 /* 1221 * Splitting is not permitted if a domain could have been mapped with 1222 * huge pages. 1223 */ 1224 if (area->storage_domain && !iopt->disable_large_pages) { 1225 rc = -EINVAL; 1226 goto err_unlock; 1227 } 1228 1229 interval_tree_remove(&area->node, &iopt->area_itree); 1230 rc = iopt_insert_area(iopt, lhs, area->pages, start_iova, 1231 iopt_area_start_byte(area, start_iova), 1232 (new_start - 1) - start_iova + 1, 1233 area->iommu_prot); 1234 if (WARN_ON(rc)) 1235 goto err_insert; 1236 1237 rc = iopt_insert_area(iopt, rhs, area->pages, new_start, 1238 iopt_area_start_byte(area, new_start), 1239 last_iova - new_start + 1, area->iommu_prot); 1240 if (WARN_ON(rc)) 1241 goto err_remove_lhs; 1242 1243 /* 1244 * If the original area has filled a domain, domains_itree has to be 1245 * updated. 1246 */ 1247 if (area->storage_domain) { 1248 interval_tree_remove(&area->pages_node, &pages->domains_itree); 1249 interval_tree_insert(&lhs->pages_node, &pages->domains_itree); 1250 interval_tree_insert(&rhs->pages_node, &pages->domains_itree); 1251 } 1252 1253 lhs->storage_domain = area->storage_domain; 1254 lhs->pages = area->pages; 1255 rhs->storage_domain = area->storage_domain; 1256 rhs->pages = area->pages; 1257 kref_get(&rhs->pages->kref); 1258 kfree(area); 1259 mutex_unlock(&pages->mutex); 1260 1261 /* 1262 * No change to domains or accesses because the pages hasn't been 1263 * changed 1264 */ 1265 return 0; 1266 1267 err_remove_lhs: 1268 interval_tree_remove(&lhs->node, &iopt->area_itree); 1269 err_insert: 1270 interval_tree_insert(&area->node, &iopt->area_itree); 1271 err_unlock: 1272 mutex_unlock(&pages->mutex); 1273 kfree(rhs); 1274 err_free_lhs: 1275 kfree(lhs); 1276 return rc; 1277 } 1278 1279 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, 1280 size_t num_iovas) 1281 { 1282 int rc = 0; 1283 int i; 1284 1285 down_write(&iopt->iova_rwsem); 1286 for (i = 0; i < num_iovas; i++) { 1287 struct iopt_area *area; 1288 1289 area = iopt_area_iter_first(iopt, iovas[i], iovas[i]); 1290 if (!area) 1291 continue; 1292 rc = iopt_area_split(area, iovas[i]); 1293 if (rc) 1294 break; 1295 } 1296 up_write(&iopt->iova_rwsem); 1297 return rc; 1298 } 1299 1300 void iopt_enable_large_pages(struct io_pagetable *iopt) 1301 { 1302 int rc; 1303 1304 down_write(&iopt->domains_rwsem); 1305 down_write(&iopt->iova_rwsem); 1306 WRITE_ONCE(iopt->disable_large_pages, false); 1307 rc = iopt_calculate_iova_alignment(iopt); 1308 WARN_ON(rc); 1309 up_write(&iopt->iova_rwsem); 1310 up_write(&iopt->domains_rwsem); 1311 } 1312 1313 int iopt_disable_large_pages(struct io_pagetable *iopt) 1314 { 1315 int rc = 0; 1316 1317 down_write(&iopt->domains_rwsem); 1318 down_write(&iopt->iova_rwsem); 1319 if (iopt->disable_large_pages) 1320 goto out_unlock; 1321 1322 /* Won't do it if domains already have pages mapped in them */ 1323 if (!xa_empty(&iopt->domains) && 1324 !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) { 1325 rc = -EINVAL; 1326 goto out_unlock; 1327 } 1328 1329 WRITE_ONCE(iopt->disable_large_pages, true); 1330 rc = iopt_calculate_iova_alignment(iopt); 1331 if (rc) 1332 WRITE_ONCE(iopt->disable_large_pages, false); 1333 out_unlock: 1334 up_write(&iopt->iova_rwsem); 1335 up_write(&iopt->domains_rwsem); 1336 return rc; 1337 } 1338 1339 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) 1340 { 1341 u32 new_id; 1342 int rc; 1343 1344 down_write(&iopt->domains_rwsem); 1345 down_write(&iopt->iova_rwsem); 1346 rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b, 1347 GFP_KERNEL_ACCOUNT); 1348 1349 if (rc) 1350 goto out_unlock; 1351 1352 rc = iopt_calculate_iova_alignment(iopt); 1353 if (rc) { 1354 xa_erase(&iopt->access_list, new_id); 1355 goto out_unlock; 1356 } 1357 access->iopt_access_list_id = new_id; 1358 1359 out_unlock: 1360 up_write(&iopt->iova_rwsem); 1361 up_write(&iopt->domains_rwsem); 1362 return rc; 1363 } 1364 1365 void iopt_remove_access(struct io_pagetable *iopt, 1366 struct iommufd_access *access, 1367 u32 iopt_access_list_id) 1368 { 1369 down_write(&iopt->domains_rwsem); 1370 down_write(&iopt->iova_rwsem); 1371 WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access); 1372 WARN_ON(iopt_calculate_iova_alignment(iopt)); 1373 up_write(&iopt->iova_rwsem); 1374 up_write(&iopt->domains_rwsem); 1375 } 1376 1377 /* Narrow the valid_iova_itree to include reserved ranges from a device. */ 1378 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, 1379 struct device *dev, 1380 phys_addr_t *sw_msi_start) 1381 { 1382 struct iommu_resv_region *resv; 1383 LIST_HEAD(resv_regions); 1384 unsigned int num_hw_msi = 0; 1385 unsigned int num_sw_msi = 0; 1386 int rc; 1387 1388 if (iommufd_should_fail()) 1389 return -EINVAL; 1390 1391 down_write(&iopt->iova_rwsem); 1392 /* FIXME: drivers allocate memory but there is no failure propogated */ 1393 iommu_get_resv_regions(dev, &resv_regions); 1394 1395 list_for_each_entry(resv, &resv_regions, list) { 1396 if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) 1397 continue; 1398 1399 if (sw_msi_start && resv->type == IOMMU_RESV_MSI) 1400 num_hw_msi++; 1401 if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) { 1402 *sw_msi_start = resv->start; 1403 num_sw_msi++; 1404 } 1405 1406 rc = iopt_reserve_iova(iopt, resv->start, 1407 resv->length - 1 + resv->start, dev); 1408 if (rc) 1409 goto out_reserved; 1410 } 1411 1412 /* Drivers must offer sane combinations of regions */ 1413 if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) { 1414 rc = -EINVAL; 1415 goto out_reserved; 1416 } 1417 1418 rc = 0; 1419 goto out_free_resv; 1420 1421 out_reserved: 1422 __iopt_remove_reserved_iova(iopt, dev); 1423 out_free_resv: 1424 iommu_put_resv_regions(dev, &resv_regions); 1425 up_write(&iopt->iova_rwsem); 1426 return rc; 1427 } 1428