1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. 3 * 4 * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The 5 * PFNs can be placed into an iommu_domain, or returned to the caller as a page 6 * list for access by an in-kernel user. 7 * 8 * The datastructure uses the iopt_pages to optimize the storage of the PFNs 9 * between the domains and xarray. 10 */ 11 #include <linux/iommufd.h> 12 #include <linux/lockdep.h> 13 #include <linux/iommu.h> 14 #include <linux/sched/mm.h> 15 #include <linux/err.h> 16 #include <linux/slab.h> 17 #include <linux/errno.h> 18 #include <uapi/linux/iommufd.h> 19 20 #include "io_pagetable.h" 21 #include "double_span.h" 22 23 struct iopt_pages_list { 24 struct iopt_pages *pages; 25 struct iopt_area *area; 26 struct list_head next; 27 unsigned long start_byte; 28 unsigned long length; 29 }; 30 31 struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, 32 struct io_pagetable *iopt, 33 unsigned long iova, 34 unsigned long last_iova) 35 { 36 lockdep_assert_held(&iopt->iova_rwsem); 37 38 iter->cur_iova = iova; 39 iter->last_iova = last_iova; 40 iter->area = iopt_area_iter_first(iopt, iova, iova); 41 if (!iter->area) 42 return NULL; 43 if (!iter->area->pages) { 44 iter->area = NULL; 45 return NULL; 46 } 47 return iter->area; 48 } 49 50 struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) 51 { 52 unsigned long last_iova; 53 54 if (!iter->area) 55 return NULL; 56 last_iova = iopt_area_last_iova(iter->area); 57 if (iter->last_iova <= last_iova) 58 return NULL; 59 60 iter->cur_iova = last_iova + 1; 61 iter->area = iopt_area_iter_next(iter->area, iter->cur_iova, 62 iter->last_iova); 63 if (!iter->area) 64 return NULL; 65 if (iter->cur_iova != iopt_area_iova(iter->area) || 66 !iter->area->pages) { 67 iter->area = NULL; 68 return NULL; 69 } 70 return iter->area; 71 } 72 73 static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, 74 unsigned long length, 75 unsigned long iova_alignment, 76 unsigned long page_offset) 77 { 78 if (span->is_used || span->last_hole - span->start_hole < length - 1) 79 return false; 80 81 span->start_hole = ALIGN(span->start_hole, iova_alignment) | 82 page_offset; 83 if (span->start_hole > span->last_hole || 84 span->last_hole - span->start_hole < length - 1) 85 return false; 86 return true; 87 } 88 89 static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, 90 unsigned long length, 91 unsigned long iova_alignment, 92 unsigned long page_offset) 93 { 94 if (span->is_hole || span->last_used - span->start_used < length - 1) 95 return false; 96 97 span->start_used = ALIGN(span->start_used, iova_alignment) | 98 page_offset; 99 if (span->start_used > span->last_used || 100 span->last_used - span->start_used < length - 1) 101 return false; 102 return true; 103 } 104 105 /* 106 * Automatically find a block of IOVA that is not being used and not reserved. 107 * Does not return a 0 IOVA even if it is valid. 108 */ 109 static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, 110 unsigned long uptr, unsigned long length) 111 { 112 unsigned long page_offset = uptr % PAGE_SIZE; 113 struct interval_tree_double_span_iter used_span; 114 struct interval_tree_span_iter allowed_span; 115 unsigned long iova_alignment; 116 117 lockdep_assert_held(&iopt->iova_rwsem); 118 119 /* Protect roundup_pow-of_two() from overflow */ 120 if (length == 0 || length >= ULONG_MAX / 2) 121 return -EOVERFLOW; 122 123 /* 124 * Keep alignment present in the uptr when building the IOVA, this 125 * increases the chance we can map a THP. 126 */ 127 if (!uptr) 128 iova_alignment = roundup_pow_of_two(length); 129 else 130 iova_alignment = min_t(unsigned long, 131 roundup_pow_of_two(length), 132 1UL << __ffs64(uptr)); 133 134 if (iova_alignment < iopt->iova_alignment) 135 return -EINVAL; 136 137 interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree, 138 PAGE_SIZE, ULONG_MAX - PAGE_SIZE) { 139 if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) { 140 allowed_span.start_used = PAGE_SIZE; 141 allowed_span.last_used = ULONG_MAX - PAGE_SIZE; 142 allowed_span.is_hole = false; 143 } 144 145 if (!__alloc_iova_check_used(&allowed_span, length, 146 iova_alignment, page_offset)) 147 continue; 148 149 interval_tree_for_each_double_span( 150 &used_span, &iopt->reserved_itree, &iopt->area_itree, 151 allowed_span.start_used, allowed_span.last_used) { 152 if (!__alloc_iova_check_hole(&used_span, length, 153 iova_alignment, 154 page_offset)) 155 continue; 156 157 *iova = used_span.start_hole; 158 return 0; 159 } 160 } 161 return -ENOSPC; 162 } 163 164 static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova, 165 unsigned long length) 166 { 167 unsigned long last; 168 169 lockdep_assert_held(&iopt->iova_rwsem); 170 171 if ((iova & (iopt->iova_alignment - 1))) 172 return -EINVAL; 173 174 if (check_add_overflow(iova, length - 1, &last)) 175 return -EOVERFLOW; 176 177 /* No reserved IOVA intersects the range */ 178 if (iopt_reserved_iter_first(iopt, iova, last)) 179 return -EINVAL; 180 181 /* Check that there is not already a mapping in the range */ 182 if (iopt_area_iter_first(iopt, iova, last)) 183 return -EEXIST; 184 return 0; 185 } 186 187 /* 188 * The area takes a slice of the pages from start_bytes to start_byte + length 189 */ 190 static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, 191 struct iopt_pages *pages, unsigned long iova, 192 unsigned long start_byte, unsigned long length, 193 int iommu_prot) 194 { 195 lockdep_assert_held_write(&iopt->iova_rwsem); 196 197 if ((iommu_prot & IOMMU_WRITE) && !pages->writable) 198 return -EPERM; 199 200 area->iommu_prot = iommu_prot; 201 area->page_offset = start_byte % PAGE_SIZE; 202 if (area->page_offset & (iopt->iova_alignment - 1)) 203 return -EINVAL; 204 205 area->node.start = iova; 206 if (check_add_overflow(iova, length - 1, &area->node.last)) 207 return -EOVERFLOW; 208 209 area->pages_node.start = start_byte / PAGE_SIZE; 210 if (check_add_overflow(start_byte, length - 1, &area->pages_node.last)) 211 return -EOVERFLOW; 212 area->pages_node.last = area->pages_node.last / PAGE_SIZE; 213 if (WARN_ON(area->pages_node.last >= pages->npages)) 214 return -EOVERFLOW; 215 216 /* 217 * The area is inserted with a NULL pages indicating it is not fully 218 * initialized yet. 219 */ 220 area->iopt = iopt; 221 interval_tree_insert(&area->node, &iopt->area_itree); 222 return 0; 223 } 224 225 static struct iopt_area *iopt_area_alloc(void) 226 { 227 struct iopt_area *area; 228 229 area = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); 230 if (!area) 231 return NULL; 232 RB_CLEAR_NODE(&area->node.rb); 233 RB_CLEAR_NODE(&area->pages_node.rb); 234 return area; 235 } 236 237 static int iopt_alloc_area_pages(struct io_pagetable *iopt, 238 struct list_head *pages_list, 239 unsigned long length, unsigned long *dst_iova, 240 int iommu_prot, unsigned int flags) 241 { 242 struct iopt_pages_list *elm; 243 unsigned long iova; 244 int rc = 0; 245 246 list_for_each_entry(elm, pages_list, next) { 247 elm->area = iopt_area_alloc(); 248 if (!elm->area) 249 return -ENOMEM; 250 } 251 252 down_write(&iopt->iova_rwsem); 253 if ((length & (iopt->iova_alignment - 1)) || !length) { 254 rc = -EINVAL; 255 goto out_unlock; 256 } 257 258 if (flags & IOPT_ALLOC_IOVA) { 259 /* Use the first entry to guess the ideal IOVA alignment */ 260 elm = list_first_entry(pages_list, struct iopt_pages_list, 261 next); 262 rc = iopt_alloc_iova( 263 iopt, dst_iova, 264 (uintptr_t)elm->pages->uptr + elm->start_byte, length); 265 if (rc) 266 goto out_unlock; 267 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 268 WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) { 269 rc = -EINVAL; 270 goto out_unlock; 271 } 272 } else { 273 rc = iopt_check_iova(iopt, *dst_iova, length); 274 if (rc) 275 goto out_unlock; 276 } 277 278 /* 279 * Areas are created with a NULL pages so that the IOVA space is 280 * reserved and we can unlock the iova_rwsem. 281 */ 282 iova = *dst_iova; 283 list_for_each_entry(elm, pages_list, next) { 284 rc = iopt_insert_area(iopt, elm->area, elm->pages, iova, 285 elm->start_byte, elm->length, iommu_prot); 286 if (rc) 287 goto out_unlock; 288 iova += elm->length; 289 } 290 291 out_unlock: 292 up_write(&iopt->iova_rwsem); 293 return rc; 294 } 295 296 static void iopt_abort_area(struct iopt_area *area) 297 { 298 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 299 WARN_ON(area->pages); 300 if (area->iopt) { 301 down_write(&area->iopt->iova_rwsem); 302 interval_tree_remove(&area->node, &area->iopt->area_itree); 303 up_write(&area->iopt->iova_rwsem); 304 } 305 kfree(area); 306 } 307 308 void iopt_free_pages_list(struct list_head *pages_list) 309 { 310 struct iopt_pages_list *elm; 311 312 while ((elm = list_first_entry_or_null(pages_list, 313 struct iopt_pages_list, next))) { 314 if (elm->area) 315 iopt_abort_area(elm->area); 316 if (elm->pages) 317 iopt_put_pages(elm->pages); 318 list_del(&elm->next); 319 kfree(elm); 320 } 321 } 322 323 static int iopt_fill_domains_pages(struct list_head *pages_list) 324 { 325 struct iopt_pages_list *undo_elm; 326 struct iopt_pages_list *elm; 327 int rc; 328 329 list_for_each_entry(elm, pages_list, next) { 330 rc = iopt_area_fill_domains(elm->area, elm->pages); 331 if (rc) 332 goto err_undo; 333 } 334 return 0; 335 336 err_undo: 337 list_for_each_entry(undo_elm, pages_list, next) { 338 if (undo_elm == elm) 339 break; 340 iopt_area_unfill_domains(undo_elm->area, undo_elm->pages); 341 } 342 return rc; 343 } 344 345 int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, 346 unsigned long length, unsigned long *dst_iova, 347 int iommu_prot, unsigned int flags) 348 { 349 struct iopt_pages_list *elm; 350 int rc; 351 352 rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova, 353 iommu_prot, flags); 354 if (rc) 355 return rc; 356 357 down_read(&iopt->domains_rwsem); 358 rc = iopt_fill_domains_pages(pages_list); 359 if (rc) 360 goto out_unlock_domains; 361 362 down_write(&iopt->iova_rwsem); 363 list_for_each_entry(elm, pages_list, next) { 364 /* 365 * area->pages must be set inside the domains_rwsem to ensure 366 * any newly added domains will get filled. Moves the reference 367 * in from the list. 368 */ 369 elm->area->pages = elm->pages; 370 elm->pages = NULL; 371 elm->area = NULL; 372 } 373 up_write(&iopt->iova_rwsem); 374 out_unlock_domains: 375 up_read(&iopt->domains_rwsem); 376 return rc; 377 } 378 379 /** 380 * iopt_map_user_pages() - Map a user VA to an iova in the io page table 381 * @ictx: iommufd_ctx the iopt is part of 382 * @iopt: io_pagetable to act on 383 * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains 384 * the chosen iova on output. Otherwise is the iova to map to on input 385 * @uptr: User VA to map 386 * @length: Number of bytes to map 387 * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping 388 * @flags: IOPT_ALLOC_IOVA or zero 389 * 390 * iova, uptr, and length must be aligned to iova_alignment. For domain backed 391 * page tables this will pin the pages and load them into the domain at iova. 392 * For non-domain page tables this will only setup a lazy reference and the 393 * caller must use iopt_access_pages() to touch them. 394 * 395 * iopt_unmap_iova() must be called to undo this before the io_pagetable can be 396 * destroyed. 397 */ 398 int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 399 unsigned long *iova, void __user *uptr, 400 unsigned long length, int iommu_prot, 401 unsigned int flags) 402 { 403 struct iopt_pages_list elm = {}; 404 LIST_HEAD(pages_list); 405 int rc; 406 407 elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE); 408 if (IS_ERR(elm.pages)) 409 return PTR_ERR(elm.pages); 410 if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM && 411 elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER) 412 elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM; 413 elm.start_byte = uptr - elm.pages->uptr; 414 elm.length = length; 415 list_add(&elm.next, &pages_list); 416 417 rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); 418 if (rc) { 419 if (elm.area) 420 iopt_abort_area(elm.area); 421 if (elm.pages) 422 iopt_put_pages(elm.pages); 423 return rc; 424 } 425 return 0; 426 } 427 428 struct iova_bitmap_fn_arg { 429 unsigned long flags; 430 struct io_pagetable *iopt; 431 struct iommu_domain *domain; 432 struct iommu_dirty_bitmap *dirty; 433 }; 434 435 static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap, 436 unsigned long iova, size_t length, 437 void *opaque) 438 { 439 struct iopt_area *area; 440 struct iopt_area_contig_iter iter; 441 struct iova_bitmap_fn_arg *arg = opaque; 442 struct iommu_domain *domain = arg->domain; 443 struct iommu_dirty_bitmap *dirty = arg->dirty; 444 const struct iommu_dirty_ops *ops = domain->dirty_ops; 445 unsigned long last_iova = iova + length - 1; 446 unsigned long flags = arg->flags; 447 int ret; 448 449 iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) { 450 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 451 452 ret = ops->read_and_clear_dirty(domain, iter.cur_iova, 453 last - iter.cur_iova + 1, flags, 454 dirty); 455 if (ret) 456 return ret; 457 } 458 459 if (!iopt_area_contig_done(&iter)) 460 return -EINVAL; 461 return 0; 462 } 463 464 static int 465 iommu_read_and_clear_dirty(struct iommu_domain *domain, 466 struct io_pagetable *iopt, unsigned long flags, 467 struct iommu_hwpt_get_dirty_bitmap *bitmap) 468 { 469 const struct iommu_dirty_ops *ops = domain->dirty_ops; 470 struct iommu_iotlb_gather gather; 471 struct iommu_dirty_bitmap dirty; 472 struct iova_bitmap_fn_arg arg; 473 struct iova_bitmap *iter; 474 int ret = 0; 475 476 if (!ops || !ops->read_and_clear_dirty) 477 return -EOPNOTSUPP; 478 479 iter = iova_bitmap_alloc(bitmap->iova, bitmap->length, 480 bitmap->page_size, 481 u64_to_user_ptr(bitmap->data)); 482 if (IS_ERR(iter)) 483 return -ENOMEM; 484 485 iommu_dirty_bitmap_init(&dirty, iter, &gather); 486 487 arg.flags = flags; 488 arg.iopt = iopt; 489 arg.domain = domain; 490 arg.dirty = &dirty; 491 iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty); 492 493 if (!(flags & IOMMU_DIRTY_NO_CLEAR)) 494 iommu_iotlb_sync(domain, &gather); 495 496 iova_bitmap_free(iter); 497 498 return ret; 499 } 500 501 int iommufd_check_iova_range(struct io_pagetable *iopt, 502 struct iommu_hwpt_get_dirty_bitmap *bitmap) 503 { 504 size_t iommu_pgsize = iopt->iova_alignment; 505 u64 last_iova; 506 507 if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) 508 return -EOVERFLOW; 509 510 if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) 511 return -EOVERFLOW; 512 513 if ((bitmap->iova & (iommu_pgsize - 1)) || 514 ((last_iova + 1) & (iommu_pgsize - 1))) 515 return -EINVAL; 516 517 if (!bitmap->page_size) 518 return -EINVAL; 519 520 if ((bitmap->iova & (bitmap->page_size - 1)) || 521 ((last_iova + 1) & (bitmap->page_size - 1))) 522 return -EINVAL; 523 524 return 0; 525 } 526 527 int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, 528 struct iommu_domain *domain, 529 unsigned long flags, 530 struct iommu_hwpt_get_dirty_bitmap *bitmap) 531 { 532 int ret; 533 534 ret = iommufd_check_iova_range(iopt, bitmap); 535 if (ret) 536 return ret; 537 538 down_read(&iopt->iova_rwsem); 539 ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap); 540 up_read(&iopt->iova_rwsem); 541 542 return ret; 543 } 544 545 static int iopt_clear_dirty_data(struct io_pagetable *iopt, 546 struct iommu_domain *domain) 547 { 548 const struct iommu_dirty_ops *ops = domain->dirty_ops; 549 struct iommu_iotlb_gather gather; 550 struct iommu_dirty_bitmap dirty; 551 struct iopt_area *area; 552 int ret = 0; 553 554 lockdep_assert_held_read(&iopt->iova_rwsem); 555 556 iommu_dirty_bitmap_init(&dirty, NULL, &gather); 557 558 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 559 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 560 if (!area->pages) 561 continue; 562 563 ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area), 564 iopt_area_length(area), 0, 565 &dirty); 566 if (ret) 567 break; 568 } 569 570 iommu_iotlb_sync(domain, &gather); 571 return ret; 572 } 573 574 int iopt_set_dirty_tracking(struct io_pagetable *iopt, 575 struct iommu_domain *domain, bool enable) 576 { 577 const struct iommu_dirty_ops *ops = domain->dirty_ops; 578 int ret = 0; 579 580 if (!ops) 581 return -EOPNOTSUPP; 582 583 down_read(&iopt->iova_rwsem); 584 585 /* Clear dirty bits from PTEs to ensure a clean snapshot */ 586 if (enable) { 587 ret = iopt_clear_dirty_data(iopt, domain); 588 if (ret) 589 goto out_unlock; 590 } 591 592 ret = ops->set_dirty_tracking(domain, enable); 593 594 out_unlock: 595 up_read(&iopt->iova_rwsem); 596 return ret; 597 } 598 599 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, 600 unsigned long length, struct list_head *pages_list) 601 { 602 struct iopt_area_contig_iter iter; 603 unsigned long last_iova; 604 struct iopt_area *area; 605 int rc; 606 607 if (!length) 608 return -EINVAL; 609 if (check_add_overflow(iova, length - 1, &last_iova)) 610 return -EOVERFLOW; 611 612 down_read(&iopt->iova_rwsem); 613 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 614 struct iopt_pages_list *elm; 615 unsigned long last = min(last_iova, iopt_area_last_iova(area)); 616 617 elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT); 618 if (!elm) { 619 rc = -ENOMEM; 620 goto err_free; 621 } 622 elm->start_byte = iopt_area_start_byte(area, iter.cur_iova); 623 elm->pages = area->pages; 624 elm->length = (last - iter.cur_iova) + 1; 625 kref_get(&elm->pages->kref); 626 list_add_tail(&elm->next, pages_list); 627 } 628 if (!iopt_area_contig_done(&iter)) { 629 rc = -ENOENT; 630 goto err_free; 631 } 632 up_read(&iopt->iova_rwsem); 633 return 0; 634 err_free: 635 up_read(&iopt->iova_rwsem); 636 iopt_free_pages_list(pages_list); 637 return rc; 638 } 639 640 static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, 641 unsigned long last, unsigned long *unmapped) 642 { 643 struct iopt_area *area; 644 unsigned long unmapped_bytes = 0; 645 unsigned int tries = 0; 646 int rc = -ENOENT; 647 648 /* 649 * The domains_rwsem must be held in read mode any time any area->pages 650 * is NULL. This prevents domain attach/detatch from running 651 * concurrently with cleaning up the area. 652 */ 653 again: 654 down_read(&iopt->domains_rwsem); 655 down_write(&iopt->iova_rwsem); 656 while ((area = iopt_area_iter_first(iopt, start, last))) { 657 unsigned long area_last = iopt_area_last_iova(area); 658 unsigned long area_first = iopt_area_iova(area); 659 struct iopt_pages *pages; 660 661 /* Userspace should not race map/unmap's of the same area */ 662 if (!area->pages) { 663 rc = -EBUSY; 664 goto out_unlock_iova; 665 } 666 667 if (area_first < start || area_last > last) { 668 rc = -ENOENT; 669 goto out_unlock_iova; 670 } 671 672 if (area_first != start) 673 tries = 0; 674 675 /* 676 * num_accesses writers must hold the iova_rwsem too, so we can 677 * safely read it under the write side of the iovam_rwsem 678 * without the pages->mutex. 679 */ 680 if (area->num_accesses) { 681 size_t length = iopt_area_length(area); 682 683 start = area_first; 684 area->prevent_access = true; 685 up_write(&iopt->iova_rwsem); 686 up_read(&iopt->domains_rwsem); 687 688 iommufd_access_notify_unmap(iopt, area_first, length); 689 /* Something is not responding to unmap requests. */ 690 tries++; 691 if (WARN_ON(tries > 100)) 692 return -EDEADLOCK; 693 goto again; 694 } 695 696 pages = area->pages; 697 area->pages = NULL; 698 up_write(&iopt->iova_rwsem); 699 700 iopt_area_unfill_domains(area, pages); 701 iopt_abort_area(area); 702 iopt_put_pages(pages); 703 704 unmapped_bytes += area_last - area_first + 1; 705 706 down_write(&iopt->iova_rwsem); 707 } 708 if (unmapped_bytes) 709 rc = 0; 710 711 out_unlock_iova: 712 up_write(&iopt->iova_rwsem); 713 up_read(&iopt->domains_rwsem); 714 if (unmapped) 715 *unmapped = unmapped_bytes; 716 return rc; 717 } 718 719 /** 720 * iopt_unmap_iova() - Remove a range of iova 721 * @iopt: io_pagetable to act on 722 * @iova: Starting iova to unmap 723 * @length: Number of bytes to unmap 724 * @unmapped: Return number of bytes unmapped 725 * 726 * The requested range must be a superset of existing ranges. 727 * Splitting/truncating IOVA mappings is not allowed. 728 */ 729 int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, 730 unsigned long length, unsigned long *unmapped) 731 { 732 unsigned long iova_last; 733 734 if (!length) 735 return -EINVAL; 736 737 if (check_add_overflow(iova, length - 1, &iova_last)) 738 return -EOVERFLOW; 739 740 return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped); 741 } 742 743 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) 744 { 745 int rc; 746 747 rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); 748 /* If the IOVAs are empty then unmap all succeeds */ 749 if (rc == -ENOENT) 750 return 0; 751 return rc; 752 } 753 754 /* The caller must always free all the nodes in the allowed_iova rb_root. */ 755 int iopt_set_allow_iova(struct io_pagetable *iopt, 756 struct rb_root_cached *allowed_iova) 757 { 758 struct iopt_allowed *allowed; 759 760 down_write(&iopt->iova_rwsem); 761 swap(*allowed_iova, iopt->allowed_itree); 762 763 for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed; 764 allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) { 765 if (iopt_reserved_iter_first(iopt, allowed->node.start, 766 allowed->node.last)) { 767 swap(*allowed_iova, iopt->allowed_itree); 768 up_write(&iopt->iova_rwsem); 769 return -EADDRINUSE; 770 } 771 } 772 up_write(&iopt->iova_rwsem); 773 return 0; 774 } 775 776 int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, 777 unsigned long last, void *owner) 778 { 779 struct iopt_reserved *reserved; 780 781 lockdep_assert_held_write(&iopt->iova_rwsem); 782 783 if (iopt_area_iter_first(iopt, start, last) || 784 iopt_allowed_iter_first(iopt, start, last)) 785 return -EADDRINUSE; 786 787 reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT); 788 if (!reserved) 789 return -ENOMEM; 790 reserved->node.start = start; 791 reserved->node.last = last; 792 reserved->owner = owner; 793 interval_tree_insert(&reserved->node, &iopt->reserved_itree); 794 return 0; 795 } 796 797 static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) 798 { 799 struct iopt_reserved *reserved, *next; 800 801 lockdep_assert_held_write(&iopt->iova_rwsem); 802 803 for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved; 804 reserved = next) { 805 next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX); 806 807 if (reserved->owner == owner) { 808 interval_tree_remove(&reserved->node, 809 &iopt->reserved_itree); 810 kfree(reserved); 811 } 812 } 813 } 814 815 void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) 816 { 817 down_write(&iopt->iova_rwsem); 818 __iopt_remove_reserved_iova(iopt, owner); 819 up_write(&iopt->iova_rwsem); 820 } 821 822 void iopt_init_table(struct io_pagetable *iopt) 823 { 824 init_rwsem(&iopt->iova_rwsem); 825 init_rwsem(&iopt->domains_rwsem); 826 iopt->area_itree = RB_ROOT_CACHED; 827 iopt->allowed_itree = RB_ROOT_CACHED; 828 iopt->reserved_itree = RB_ROOT_CACHED; 829 xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT); 830 xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC); 831 832 /* 833 * iopt's start as SW tables that can use the entire size_t IOVA space 834 * due to the use of size_t in the APIs. They have no alignment 835 * restriction. 836 */ 837 iopt->iova_alignment = 1; 838 } 839 840 void iopt_destroy_table(struct io_pagetable *iopt) 841 { 842 struct interval_tree_node *node; 843 844 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 845 iopt_remove_reserved_iova(iopt, NULL); 846 847 while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, 848 ULONG_MAX))) { 849 interval_tree_remove(node, &iopt->allowed_itree); 850 kfree(container_of(node, struct iopt_allowed, node)); 851 } 852 853 WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root)); 854 WARN_ON(!xa_empty(&iopt->domains)); 855 WARN_ON(!xa_empty(&iopt->access_list)); 856 WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)); 857 } 858 859 /** 860 * iopt_unfill_domain() - Unfill a domain with PFNs 861 * @iopt: io_pagetable to act on 862 * @domain: domain to unfill 863 * 864 * This is used when removing a domain from the iopt. Every area in the iopt 865 * will be unmapped from the domain. The domain must already be removed from the 866 * domains xarray. 867 */ 868 static void iopt_unfill_domain(struct io_pagetable *iopt, 869 struct iommu_domain *domain) 870 { 871 struct iopt_area *area; 872 873 lockdep_assert_held(&iopt->iova_rwsem); 874 lockdep_assert_held_write(&iopt->domains_rwsem); 875 876 /* 877 * Some other domain is holding all the pfns still, rapidly unmap this 878 * domain. 879 */ 880 if (iopt->next_domain_id != 0) { 881 /* Pick an arbitrary remaining domain to act as storage */ 882 struct iommu_domain *storage_domain = 883 xa_load(&iopt->domains, 0); 884 885 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 886 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 887 struct iopt_pages *pages = area->pages; 888 889 if (!pages) 890 continue; 891 892 mutex_lock(&pages->mutex); 893 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 894 WARN_ON(!area->storage_domain); 895 if (area->storage_domain == domain) 896 area->storage_domain = storage_domain; 897 mutex_unlock(&pages->mutex); 898 899 iopt_area_unmap_domain(area, domain); 900 } 901 return; 902 } 903 904 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 905 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 906 struct iopt_pages *pages = area->pages; 907 908 if (!pages) 909 continue; 910 911 mutex_lock(&pages->mutex); 912 interval_tree_remove(&area->pages_node, &pages->domains_itree); 913 WARN_ON(area->storage_domain != domain); 914 area->storage_domain = NULL; 915 iopt_area_unfill_domain(area, pages, domain); 916 mutex_unlock(&pages->mutex); 917 } 918 } 919 920 /** 921 * iopt_fill_domain() - Fill a domain with PFNs 922 * @iopt: io_pagetable to act on 923 * @domain: domain to fill 924 * 925 * Fill the domain with PFNs from every area in the iopt. On failure the domain 926 * is left unchanged. 927 */ 928 static int iopt_fill_domain(struct io_pagetable *iopt, 929 struct iommu_domain *domain) 930 { 931 struct iopt_area *end_area; 932 struct iopt_area *area; 933 int rc; 934 935 lockdep_assert_held(&iopt->iova_rwsem); 936 lockdep_assert_held_write(&iopt->domains_rwsem); 937 938 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 939 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 940 struct iopt_pages *pages = area->pages; 941 942 if (!pages) 943 continue; 944 945 mutex_lock(&pages->mutex); 946 rc = iopt_area_fill_domain(area, domain); 947 if (rc) { 948 mutex_unlock(&pages->mutex); 949 goto out_unfill; 950 } 951 if (!area->storage_domain) { 952 WARN_ON(iopt->next_domain_id != 0); 953 area->storage_domain = domain; 954 interval_tree_insert(&area->pages_node, 955 &pages->domains_itree); 956 } 957 mutex_unlock(&pages->mutex); 958 } 959 return 0; 960 961 out_unfill: 962 end_area = area; 963 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 964 area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 965 struct iopt_pages *pages = area->pages; 966 967 if (area == end_area) 968 break; 969 if (!pages) 970 continue; 971 mutex_lock(&pages->mutex); 972 if (iopt->next_domain_id == 0) { 973 interval_tree_remove(&area->pages_node, 974 &pages->domains_itree); 975 area->storage_domain = NULL; 976 } 977 iopt_area_unfill_domain(area, pages, domain); 978 mutex_unlock(&pages->mutex); 979 } 980 return rc; 981 } 982 983 /* All existing area's conform to an increased page size */ 984 static int iopt_check_iova_alignment(struct io_pagetable *iopt, 985 unsigned long new_iova_alignment) 986 { 987 unsigned long align_mask = new_iova_alignment - 1; 988 struct iopt_area *area; 989 990 lockdep_assert_held(&iopt->iova_rwsem); 991 lockdep_assert_held(&iopt->domains_rwsem); 992 993 for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 994 area = iopt_area_iter_next(area, 0, ULONG_MAX)) 995 if ((iopt_area_iova(area) & align_mask) || 996 (iopt_area_length(area) & align_mask) || 997 (area->page_offset & align_mask)) 998 return -EADDRINUSE; 999 1000 if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) { 1001 struct iommufd_access *access; 1002 unsigned long index; 1003 1004 xa_for_each(&iopt->access_list, index, access) 1005 if (WARN_ON(access->iova_alignment > 1006 new_iova_alignment)) 1007 return -EADDRINUSE; 1008 } 1009 return 0; 1010 } 1011 1012 int iopt_table_add_domain(struct io_pagetable *iopt, 1013 struct iommu_domain *domain) 1014 { 1015 const struct iommu_domain_geometry *geometry = &domain->geometry; 1016 struct iommu_domain *iter_domain; 1017 unsigned int new_iova_alignment; 1018 unsigned long index; 1019 int rc; 1020 1021 down_write(&iopt->domains_rwsem); 1022 down_write(&iopt->iova_rwsem); 1023 1024 xa_for_each(&iopt->domains, index, iter_domain) { 1025 if (WARN_ON(iter_domain == domain)) { 1026 rc = -EEXIST; 1027 goto out_unlock; 1028 } 1029 } 1030 1031 /* 1032 * The io page size drives the iova_alignment. Internally the iopt_pages 1033 * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE 1034 * objects into the iommu_domain. 1035 * 1036 * A iommu_domain must always be able to accept PAGE_SIZE to be 1037 * compatible as we can't guarantee higher contiguity. 1038 */ 1039 new_iova_alignment = max_t(unsigned long, 1040 1UL << __ffs(domain->pgsize_bitmap), 1041 iopt->iova_alignment); 1042 if (new_iova_alignment > PAGE_SIZE) { 1043 rc = -EINVAL; 1044 goto out_unlock; 1045 } 1046 if (new_iova_alignment != iopt->iova_alignment) { 1047 rc = iopt_check_iova_alignment(iopt, new_iova_alignment); 1048 if (rc) 1049 goto out_unlock; 1050 } 1051 1052 /* No area exists that is outside the allowed domain aperture */ 1053 if (geometry->aperture_start != 0) { 1054 rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1, 1055 domain); 1056 if (rc) 1057 goto out_reserved; 1058 } 1059 if (geometry->aperture_end != ULONG_MAX) { 1060 rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1, 1061 ULONG_MAX, domain); 1062 if (rc) 1063 goto out_reserved; 1064 } 1065 1066 rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); 1067 if (rc) 1068 goto out_reserved; 1069 1070 rc = iopt_fill_domain(iopt, domain); 1071 if (rc) 1072 goto out_release; 1073 1074 iopt->iova_alignment = new_iova_alignment; 1075 xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL); 1076 iopt->next_domain_id++; 1077 up_write(&iopt->iova_rwsem); 1078 up_write(&iopt->domains_rwsem); 1079 return 0; 1080 out_release: 1081 xa_release(&iopt->domains, iopt->next_domain_id); 1082 out_reserved: 1083 __iopt_remove_reserved_iova(iopt, domain); 1084 out_unlock: 1085 up_write(&iopt->iova_rwsem); 1086 up_write(&iopt->domains_rwsem); 1087 return rc; 1088 } 1089 1090 static int iopt_calculate_iova_alignment(struct io_pagetable *iopt) 1091 { 1092 unsigned long new_iova_alignment; 1093 struct iommufd_access *access; 1094 struct iommu_domain *domain; 1095 unsigned long index; 1096 1097 lockdep_assert_held_write(&iopt->iova_rwsem); 1098 lockdep_assert_held(&iopt->domains_rwsem); 1099 1100 /* See batch_iommu_map_small() */ 1101 if (iopt->disable_large_pages) 1102 new_iova_alignment = PAGE_SIZE; 1103 else 1104 new_iova_alignment = 1; 1105 1106 xa_for_each(&iopt->domains, index, domain) 1107 new_iova_alignment = max_t(unsigned long, 1108 1UL << __ffs(domain->pgsize_bitmap), 1109 new_iova_alignment); 1110 xa_for_each(&iopt->access_list, index, access) 1111 new_iova_alignment = max_t(unsigned long, 1112 access->iova_alignment, 1113 new_iova_alignment); 1114 1115 if (new_iova_alignment > iopt->iova_alignment) { 1116 int rc; 1117 1118 rc = iopt_check_iova_alignment(iopt, new_iova_alignment); 1119 if (rc) 1120 return rc; 1121 } 1122 iopt->iova_alignment = new_iova_alignment; 1123 return 0; 1124 } 1125 1126 void iopt_table_remove_domain(struct io_pagetable *iopt, 1127 struct iommu_domain *domain) 1128 { 1129 struct iommu_domain *iter_domain = NULL; 1130 unsigned long index; 1131 1132 down_write(&iopt->domains_rwsem); 1133 down_write(&iopt->iova_rwsem); 1134 1135 xa_for_each(&iopt->domains, index, iter_domain) 1136 if (iter_domain == domain) 1137 break; 1138 if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) 1139 goto out_unlock; 1140 1141 /* 1142 * Compress the xarray to keep it linear by swapping the entry to erase 1143 * with the tail entry and shrinking the tail. 1144 */ 1145 iopt->next_domain_id--; 1146 iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); 1147 if (index != iopt->next_domain_id) 1148 xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL); 1149 1150 iopt_unfill_domain(iopt, domain); 1151 __iopt_remove_reserved_iova(iopt, domain); 1152 1153 WARN_ON(iopt_calculate_iova_alignment(iopt)); 1154 out_unlock: 1155 up_write(&iopt->iova_rwsem); 1156 up_write(&iopt->domains_rwsem); 1157 } 1158 1159 /** 1160 * iopt_area_split - Split an area into two parts at iova 1161 * @area: The area to split 1162 * @iova: Becomes the last of a new area 1163 * 1164 * This splits an area into two. It is part of the VFIO compatibility to allow 1165 * poking a hole in the mapping. The two areas continue to point at the same 1166 * iopt_pages, just with different starting bytes. 1167 */ 1168 static int iopt_area_split(struct iopt_area *area, unsigned long iova) 1169 { 1170 unsigned long alignment = area->iopt->iova_alignment; 1171 unsigned long last_iova = iopt_area_last_iova(area); 1172 unsigned long start_iova = iopt_area_iova(area); 1173 unsigned long new_start = iova + 1; 1174 struct io_pagetable *iopt = area->iopt; 1175 struct iopt_pages *pages = area->pages; 1176 struct iopt_area *lhs; 1177 struct iopt_area *rhs; 1178 int rc; 1179 1180 lockdep_assert_held_write(&iopt->iova_rwsem); 1181 1182 if (iova == start_iova || iova == last_iova) 1183 return 0; 1184 1185 if (!pages || area->prevent_access) 1186 return -EBUSY; 1187 1188 if (new_start & (alignment - 1) || 1189 iopt_area_start_byte(area, new_start) & (alignment - 1)) 1190 return -EINVAL; 1191 1192 lhs = iopt_area_alloc(); 1193 if (!lhs) 1194 return -ENOMEM; 1195 1196 rhs = iopt_area_alloc(); 1197 if (!rhs) { 1198 rc = -ENOMEM; 1199 goto err_free_lhs; 1200 } 1201 1202 mutex_lock(&pages->mutex); 1203 /* 1204 * Splitting is not permitted if an access exists, we don't track enough 1205 * information to split existing accesses. 1206 */ 1207 if (area->num_accesses) { 1208 rc = -EINVAL; 1209 goto err_unlock; 1210 } 1211 1212 /* 1213 * Splitting is not permitted if a domain could have been mapped with 1214 * huge pages. 1215 */ 1216 if (area->storage_domain && !iopt->disable_large_pages) { 1217 rc = -EINVAL; 1218 goto err_unlock; 1219 } 1220 1221 interval_tree_remove(&area->node, &iopt->area_itree); 1222 rc = iopt_insert_area(iopt, lhs, area->pages, start_iova, 1223 iopt_area_start_byte(area, start_iova), 1224 (new_start - 1) - start_iova + 1, 1225 area->iommu_prot); 1226 if (WARN_ON(rc)) 1227 goto err_insert; 1228 1229 rc = iopt_insert_area(iopt, rhs, area->pages, new_start, 1230 iopt_area_start_byte(area, new_start), 1231 last_iova - new_start + 1, area->iommu_prot); 1232 if (WARN_ON(rc)) 1233 goto err_remove_lhs; 1234 1235 /* 1236 * If the original area has filled a domain, domains_itree has to be 1237 * updated. 1238 */ 1239 if (area->storage_domain) { 1240 interval_tree_remove(&area->pages_node, &pages->domains_itree); 1241 interval_tree_insert(&lhs->pages_node, &pages->domains_itree); 1242 interval_tree_insert(&rhs->pages_node, &pages->domains_itree); 1243 } 1244 1245 lhs->storage_domain = area->storage_domain; 1246 lhs->pages = area->pages; 1247 rhs->storage_domain = area->storage_domain; 1248 rhs->pages = area->pages; 1249 kref_get(&rhs->pages->kref); 1250 kfree(area); 1251 mutex_unlock(&pages->mutex); 1252 1253 /* 1254 * No change to domains or accesses because the pages hasn't been 1255 * changed 1256 */ 1257 return 0; 1258 1259 err_remove_lhs: 1260 interval_tree_remove(&lhs->node, &iopt->area_itree); 1261 err_insert: 1262 interval_tree_insert(&area->node, &iopt->area_itree); 1263 err_unlock: 1264 mutex_unlock(&pages->mutex); 1265 kfree(rhs); 1266 err_free_lhs: 1267 kfree(lhs); 1268 return rc; 1269 } 1270 1271 int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, 1272 size_t num_iovas) 1273 { 1274 int rc = 0; 1275 int i; 1276 1277 down_write(&iopt->iova_rwsem); 1278 for (i = 0; i < num_iovas; i++) { 1279 struct iopt_area *area; 1280 1281 area = iopt_area_iter_first(iopt, iovas[i], iovas[i]); 1282 if (!area) 1283 continue; 1284 rc = iopt_area_split(area, iovas[i]); 1285 if (rc) 1286 break; 1287 } 1288 up_write(&iopt->iova_rwsem); 1289 return rc; 1290 } 1291 1292 void iopt_enable_large_pages(struct io_pagetable *iopt) 1293 { 1294 int rc; 1295 1296 down_write(&iopt->domains_rwsem); 1297 down_write(&iopt->iova_rwsem); 1298 WRITE_ONCE(iopt->disable_large_pages, false); 1299 rc = iopt_calculate_iova_alignment(iopt); 1300 WARN_ON(rc); 1301 up_write(&iopt->iova_rwsem); 1302 up_write(&iopt->domains_rwsem); 1303 } 1304 1305 int iopt_disable_large_pages(struct io_pagetable *iopt) 1306 { 1307 int rc = 0; 1308 1309 down_write(&iopt->domains_rwsem); 1310 down_write(&iopt->iova_rwsem); 1311 if (iopt->disable_large_pages) 1312 goto out_unlock; 1313 1314 /* Won't do it if domains already have pages mapped in them */ 1315 if (!xa_empty(&iopt->domains) && 1316 !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) { 1317 rc = -EINVAL; 1318 goto out_unlock; 1319 } 1320 1321 WRITE_ONCE(iopt->disable_large_pages, true); 1322 rc = iopt_calculate_iova_alignment(iopt); 1323 if (rc) 1324 WRITE_ONCE(iopt->disable_large_pages, false); 1325 out_unlock: 1326 up_write(&iopt->iova_rwsem); 1327 up_write(&iopt->domains_rwsem); 1328 return rc; 1329 } 1330 1331 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) 1332 { 1333 int rc; 1334 1335 down_write(&iopt->domains_rwsem); 1336 down_write(&iopt->iova_rwsem); 1337 rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access, 1338 xa_limit_16b, GFP_KERNEL_ACCOUNT); 1339 if (rc) 1340 goto out_unlock; 1341 1342 rc = iopt_calculate_iova_alignment(iopt); 1343 if (rc) { 1344 xa_erase(&iopt->access_list, access->iopt_access_list_id); 1345 goto out_unlock; 1346 } 1347 1348 out_unlock: 1349 up_write(&iopt->iova_rwsem); 1350 up_write(&iopt->domains_rwsem); 1351 return rc; 1352 } 1353 1354 void iopt_remove_access(struct io_pagetable *iopt, 1355 struct iommufd_access *access, 1356 u32 iopt_access_list_id) 1357 { 1358 down_write(&iopt->domains_rwsem); 1359 down_write(&iopt->iova_rwsem); 1360 WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access); 1361 WARN_ON(iopt_calculate_iova_alignment(iopt)); 1362 up_write(&iopt->iova_rwsem); 1363 up_write(&iopt->domains_rwsem); 1364 } 1365 1366 /* Narrow the valid_iova_itree to include reserved ranges from a device. */ 1367 int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, 1368 struct device *dev, 1369 phys_addr_t *sw_msi_start) 1370 { 1371 struct iommu_resv_region *resv; 1372 LIST_HEAD(resv_regions); 1373 unsigned int num_hw_msi = 0; 1374 unsigned int num_sw_msi = 0; 1375 int rc; 1376 1377 if (iommufd_should_fail()) 1378 return -EINVAL; 1379 1380 down_write(&iopt->iova_rwsem); 1381 /* FIXME: drivers allocate memory but there is no failure propogated */ 1382 iommu_get_resv_regions(dev, &resv_regions); 1383 1384 list_for_each_entry(resv, &resv_regions, list) { 1385 if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) 1386 continue; 1387 1388 if (sw_msi_start && resv->type == IOMMU_RESV_MSI) 1389 num_hw_msi++; 1390 if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) { 1391 *sw_msi_start = resv->start; 1392 num_sw_msi++; 1393 } 1394 1395 rc = iopt_reserve_iova(iopt, resv->start, 1396 resv->length - 1 + resv->start, dev); 1397 if (rc) 1398 goto out_reserved; 1399 } 1400 1401 /* Drivers must offer sane combinations of regions */ 1402 if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) { 1403 rc = -EINVAL; 1404 goto out_reserved; 1405 } 1406 1407 rc = 0; 1408 goto out_free_resv; 1409 1410 out_reserved: 1411 __iopt_remove_reserved_iova(iopt, dev); 1412 out_free_resv: 1413 iommu_put_resv_regions(dev, &resv_regions); 1414 up_write(&iopt->iova_rwsem); 1415 return rc; 1416 } 1417