1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 4 */ 5 #include <linux/file.h> 6 #include <linux/interval_tree.h> 7 #include <linux/iommu.h> 8 #include <linux/iommufd.h> 9 #include <uapi/linux/iommufd.h> 10 11 #include "io_pagetable.h" 12 13 void iommufd_ioas_destroy(struct iommufd_object *obj) 14 { 15 struct iommufd_ioas *ioas = container_of(obj, struct iommufd_ioas, obj); 16 int rc; 17 18 rc = iopt_unmap_all(&ioas->iopt, NULL); 19 WARN_ON(rc && rc != -ENOENT); 20 iopt_destroy_table(&ioas->iopt); 21 mutex_destroy(&ioas->mutex); 22 } 23 24 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx) 25 { 26 struct iommufd_ioas *ioas; 27 28 ioas = iommufd_object_alloc(ictx, ioas, IOMMUFD_OBJ_IOAS); 29 if (IS_ERR(ioas)) 30 return ioas; 31 32 iopt_init_table(&ioas->iopt); 33 INIT_LIST_HEAD(&ioas->hwpt_list); 34 mutex_init(&ioas->mutex); 35 return ioas; 36 } 37 38 int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd) 39 { 40 struct iommu_ioas_alloc *cmd = ucmd->cmd; 41 struct iommufd_ioas *ioas; 42 int rc; 43 44 if (cmd->flags) 45 return -EOPNOTSUPP; 46 47 ioas = iommufd_ioas_alloc(ucmd->ictx); 48 if (IS_ERR(ioas)) 49 return PTR_ERR(ioas); 50 51 cmd->out_ioas_id = ioas->obj.id; 52 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 53 if (rc) 54 goto out_table; 55 56 down_read(&ucmd->ictx->ioas_creation_lock); 57 iommufd_object_finalize(ucmd->ictx, &ioas->obj); 58 up_read(&ucmd->ictx->ioas_creation_lock); 59 return 0; 60 61 out_table: 62 iommufd_object_abort_and_destroy(ucmd->ictx, &ioas->obj); 63 return rc; 64 } 65 66 int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd) 67 { 68 struct iommu_iova_range __user *ranges; 69 struct iommu_ioas_iova_ranges *cmd = ucmd->cmd; 70 struct iommufd_ioas *ioas; 71 struct interval_tree_span_iter span; 72 u32 max_iovas; 73 int rc; 74 75 if (cmd->__reserved) 76 return -EOPNOTSUPP; 77 78 ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id); 79 if (IS_ERR(ioas)) 80 return PTR_ERR(ioas); 81 82 down_read(&ioas->iopt.iova_rwsem); 83 max_iovas = cmd->num_iovas; 84 ranges = u64_to_user_ptr(cmd->allowed_iovas); 85 cmd->num_iovas = 0; 86 cmd->out_iova_alignment = ioas->iopt.iova_alignment; 87 interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0, 88 ULONG_MAX) { 89 if (!span.is_hole) 90 continue; 91 if (cmd->num_iovas < max_iovas) { 92 struct iommu_iova_range elm = { 93 .start = span.start_hole, 94 .last = span.last_hole, 95 }; 96 97 if (copy_to_user(&ranges[cmd->num_iovas], &elm, 98 sizeof(elm))) { 99 rc = -EFAULT; 100 goto out_put; 101 } 102 } 103 cmd->num_iovas++; 104 } 105 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 106 if (rc) 107 goto out_put; 108 if (cmd->num_iovas > max_iovas) 109 rc = -EMSGSIZE; 110 out_put: 111 up_read(&ioas->iopt.iova_rwsem); 112 iommufd_put_object(ucmd->ictx, &ioas->obj); 113 return rc; 114 } 115 116 static int iommufd_ioas_load_iovas(struct rb_root_cached *itree, 117 struct iommu_iova_range __user *ranges, 118 u32 num) 119 { 120 u32 i; 121 122 for (i = 0; i != num; i++) { 123 struct iommu_iova_range range; 124 struct iopt_allowed *allowed; 125 126 if (copy_from_user(&range, ranges + i, sizeof(range))) 127 return -EFAULT; 128 129 if (range.start >= range.last) 130 return -EINVAL; 131 132 if (interval_tree_iter_first(itree, range.start, range.last)) 133 return -EINVAL; 134 135 allowed = kzalloc(sizeof(*allowed), GFP_KERNEL_ACCOUNT); 136 if (!allowed) 137 return -ENOMEM; 138 allowed->node.start = range.start; 139 allowed->node.last = range.last; 140 141 interval_tree_insert(&allowed->node, itree); 142 } 143 return 0; 144 } 145 146 int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd) 147 { 148 struct iommu_ioas_allow_iovas *cmd = ucmd->cmd; 149 struct rb_root_cached allowed_iova = RB_ROOT_CACHED; 150 struct interval_tree_node *node; 151 struct iommufd_ioas *ioas; 152 struct io_pagetable *iopt; 153 int rc = 0; 154 155 if (cmd->__reserved) 156 return -EOPNOTSUPP; 157 158 ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id); 159 if (IS_ERR(ioas)) 160 return PTR_ERR(ioas); 161 iopt = &ioas->iopt; 162 163 rc = iommufd_ioas_load_iovas(&allowed_iova, 164 u64_to_user_ptr(cmd->allowed_iovas), 165 cmd->num_iovas); 166 if (rc) 167 goto out_free; 168 169 /* 170 * We want the allowed tree update to be atomic, so we have to keep the 171 * original nodes around, and keep track of the new nodes as we allocate 172 * memory for them. The simplest solution is to have a new/old tree and 173 * then swap new for old. On success we free the old tree, on failure we 174 * free the new tree. 175 */ 176 rc = iopt_set_allow_iova(iopt, &allowed_iova); 177 out_free: 178 while ((node = interval_tree_iter_first(&allowed_iova, 0, ULONG_MAX))) { 179 interval_tree_remove(node, &allowed_iova); 180 kfree(container_of(node, struct iopt_allowed, node)); 181 } 182 iommufd_put_object(ucmd->ictx, &ioas->obj); 183 return rc; 184 } 185 186 static int conv_iommu_prot(u32 map_flags) 187 { 188 /* 189 * We provide no manual cache coherency ioctls to userspace and most 190 * architectures make the CPU ops for cache flushing privileged. 191 * Therefore we require the underlying IOMMU to support CPU coherent 192 * operation. Support for IOMMU_CACHE is enforced by the 193 * IOMMU_CAP_CACHE_COHERENCY test during bind. 194 */ 195 int iommu_prot = IOMMU_CACHE; 196 197 if (map_flags & IOMMU_IOAS_MAP_WRITEABLE) 198 iommu_prot |= IOMMU_WRITE; 199 if (map_flags & IOMMU_IOAS_MAP_READABLE) 200 iommu_prot |= IOMMU_READ; 201 return iommu_prot; 202 } 203 204 int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd) 205 { 206 struct iommu_ioas_map_file *cmd = ucmd->cmd; 207 unsigned long iova = cmd->iova; 208 struct iommufd_ioas *ioas; 209 unsigned int flags = 0; 210 struct file *file; 211 int rc; 212 213 if (cmd->flags & 214 ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE | 215 IOMMU_IOAS_MAP_READABLE)) 216 return -EOPNOTSUPP; 217 218 if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) 219 return -EOVERFLOW; 220 221 if (!(cmd->flags & 222 (IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))) 223 return -EINVAL; 224 225 ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id); 226 if (IS_ERR(ioas)) 227 return PTR_ERR(ioas); 228 229 if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA)) 230 flags = IOPT_ALLOC_IOVA; 231 232 file = fget(cmd->fd); 233 if (!file) 234 return -EBADF; 235 236 rc = iopt_map_file_pages(ucmd->ictx, &ioas->iopt, &iova, file, 237 cmd->start, cmd->length, 238 conv_iommu_prot(cmd->flags), flags); 239 if (rc) 240 goto out_put; 241 242 cmd->iova = iova; 243 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 244 out_put: 245 iommufd_put_object(ucmd->ictx, &ioas->obj); 246 fput(file); 247 return rc; 248 } 249 250 int iommufd_ioas_map(struct iommufd_ucmd *ucmd) 251 { 252 struct iommu_ioas_map *cmd = ucmd->cmd; 253 unsigned long iova = cmd->iova; 254 struct iommufd_ioas *ioas; 255 unsigned int flags = 0; 256 int rc; 257 258 if ((cmd->flags & 259 ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE | 260 IOMMU_IOAS_MAP_READABLE)) || 261 cmd->__reserved) 262 return -EOPNOTSUPP; 263 if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) 264 return -EOVERFLOW; 265 266 if (!(cmd->flags & 267 (IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))) 268 return -EINVAL; 269 270 ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id); 271 if (IS_ERR(ioas)) 272 return PTR_ERR(ioas); 273 274 if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA)) 275 flags = IOPT_ALLOC_IOVA; 276 rc = iopt_map_user_pages(ucmd->ictx, &ioas->iopt, &iova, 277 u64_to_user_ptr(cmd->user_va), cmd->length, 278 conv_iommu_prot(cmd->flags), flags); 279 if (rc) 280 goto out_put; 281 282 cmd->iova = iova; 283 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 284 out_put: 285 iommufd_put_object(ucmd->ictx, &ioas->obj); 286 return rc; 287 } 288 289 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd) 290 { 291 struct iommu_ioas_copy *cmd = ucmd->cmd; 292 struct iommufd_ioas *src_ioas; 293 struct iommufd_ioas *dst_ioas; 294 unsigned int flags = 0; 295 LIST_HEAD(pages_list); 296 unsigned long iova; 297 int rc; 298 299 iommufd_test_syz_conv_iova_id(ucmd, cmd->src_ioas_id, &cmd->src_iova, 300 &cmd->flags); 301 302 if ((cmd->flags & 303 ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE | 304 IOMMU_IOAS_MAP_READABLE))) 305 return -EOPNOTSUPP; 306 if (cmd->length >= ULONG_MAX || cmd->src_iova >= ULONG_MAX || 307 cmd->dst_iova >= ULONG_MAX) 308 return -EOVERFLOW; 309 310 if (!(cmd->flags & 311 (IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))) 312 return -EINVAL; 313 314 src_ioas = iommufd_get_ioas(ucmd->ictx, cmd->src_ioas_id); 315 if (IS_ERR(src_ioas)) 316 return PTR_ERR(src_ioas); 317 rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length, 318 &pages_list); 319 iommufd_put_object(ucmd->ictx, &src_ioas->obj); 320 if (rc) 321 return rc; 322 323 dst_ioas = iommufd_get_ioas(ucmd->ictx, cmd->dst_ioas_id); 324 if (IS_ERR(dst_ioas)) { 325 rc = PTR_ERR(dst_ioas); 326 goto out_pages; 327 } 328 329 if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA)) 330 flags = IOPT_ALLOC_IOVA; 331 iova = cmd->dst_iova; 332 rc = iopt_map_pages(&dst_ioas->iopt, &pages_list, cmd->length, &iova, 333 conv_iommu_prot(cmd->flags), flags); 334 if (rc) 335 goto out_put_dst; 336 337 cmd->dst_iova = iova; 338 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 339 out_put_dst: 340 iommufd_put_object(ucmd->ictx, &dst_ioas->obj); 341 out_pages: 342 iopt_free_pages_list(&pages_list); 343 return rc; 344 } 345 346 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd) 347 { 348 struct iommu_ioas_unmap *cmd = ucmd->cmd; 349 struct iommufd_ioas *ioas; 350 unsigned long unmapped = 0; 351 int rc; 352 353 ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id); 354 if (IS_ERR(ioas)) 355 return PTR_ERR(ioas); 356 357 if (cmd->iova == 0 && cmd->length == U64_MAX) { 358 rc = iopt_unmap_all(&ioas->iopt, &unmapped); 359 if (rc) 360 goto out_put; 361 } else { 362 if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) { 363 rc = -EOVERFLOW; 364 goto out_put; 365 } 366 rc = iopt_unmap_iova(&ioas->iopt, cmd->iova, cmd->length, 367 &unmapped); 368 if (rc) 369 goto out_put; 370 if (!unmapped) { 371 rc = -ENOENT; 372 goto out_put; 373 } 374 } 375 376 cmd->length = unmapped; 377 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 378 379 out_put: 380 iommufd_put_object(ucmd->ictx, &ioas->obj); 381 return rc; 382 } 383 384 static void iommufd_release_all_iova_rwsem(struct iommufd_ctx *ictx, 385 struct xarray *ioas_list) 386 { 387 struct iommufd_ioas *ioas; 388 unsigned long index; 389 390 xa_for_each(ioas_list, index, ioas) { 391 up_write(&ioas->iopt.iova_rwsem); 392 refcount_dec(&ioas->obj.users); 393 } 394 up_write(&ictx->ioas_creation_lock); 395 xa_destroy(ioas_list); 396 } 397 398 static int iommufd_take_all_iova_rwsem(struct iommufd_ctx *ictx, 399 struct xarray *ioas_list) 400 { 401 struct iommufd_object *obj; 402 unsigned long index; 403 int rc; 404 405 /* 406 * This is very ugly, it is done instead of adding a lock around 407 * pages->source_mm, which is a performance path for mdev, we just 408 * obtain the write side of all the iova_rwsems which also protects the 409 * pages->source_*. Due to copies we can't know which IOAS could read 410 * from the pages, so we just lock everything. This is the only place 411 * locks are nested and they are uniformly taken in ID order. 412 * 413 * ioas_creation_lock prevents new IOAS from being installed in the 414 * xarray while we do this, and also prevents more than one thread from 415 * holding nested locks. 416 */ 417 down_write(&ictx->ioas_creation_lock); 418 xa_lock(&ictx->objects); 419 xa_for_each(&ictx->objects, index, obj) { 420 struct iommufd_ioas *ioas; 421 422 if (!obj || obj->type != IOMMUFD_OBJ_IOAS) 423 continue; 424 425 if (!refcount_inc_not_zero(&obj->users)) 426 continue; 427 428 xa_unlock(&ictx->objects); 429 430 ioas = container_of(obj, struct iommufd_ioas, obj); 431 down_write_nest_lock(&ioas->iopt.iova_rwsem, 432 &ictx->ioas_creation_lock); 433 434 rc = xa_err(xa_store(ioas_list, index, ioas, GFP_KERNEL)); 435 if (rc) { 436 iommufd_release_all_iova_rwsem(ictx, ioas_list); 437 return rc; 438 } 439 440 xa_lock(&ictx->objects); 441 } 442 xa_unlock(&ictx->objects); 443 return 0; 444 } 445 446 static bool need_charge_update(struct iopt_pages *pages) 447 { 448 switch (pages->account_mode) { 449 case IOPT_PAGES_ACCOUNT_NONE: 450 return false; 451 case IOPT_PAGES_ACCOUNT_MM: 452 return pages->source_mm != current->mm; 453 case IOPT_PAGES_ACCOUNT_USER: 454 /* 455 * Update when mm changes because it also accounts 456 * in mm->pinned_vm. 457 */ 458 return (pages->source_user != current_user()) || 459 (pages->source_mm != current->mm); 460 } 461 return true; 462 } 463 464 static int charge_current(unsigned long *npinned) 465 { 466 struct iopt_pages tmp = { 467 .source_mm = current->mm, 468 .source_task = current->group_leader, 469 .source_user = current_user(), 470 }; 471 unsigned int account_mode; 472 int rc; 473 474 for (account_mode = 0; account_mode != IOPT_PAGES_ACCOUNT_MODE_NUM; 475 account_mode++) { 476 if (!npinned[account_mode]) 477 continue; 478 479 tmp.account_mode = account_mode; 480 rc = iopt_pages_update_pinned(&tmp, npinned[account_mode], true, 481 NULL); 482 if (rc) 483 goto err_undo; 484 } 485 return 0; 486 487 err_undo: 488 while (account_mode != 0) { 489 account_mode--; 490 if (!npinned[account_mode]) 491 continue; 492 tmp.account_mode = account_mode; 493 iopt_pages_update_pinned(&tmp, npinned[account_mode], false, 494 NULL); 495 } 496 return rc; 497 } 498 499 static void change_mm(struct iopt_pages *pages) 500 { 501 struct task_struct *old_task = pages->source_task; 502 struct user_struct *old_user = pages->source_user; 503 struct mm_struct *old_mm = pages->source_mm; 504 505 pages->source_mm = current->mm; 506 mmgrab(pages->source_mm); 507 mmdrop(old_mm); 508 509 pages->source_task = current->group_leader; 510 get_task_struct(pages->source_task); 511 put_task_struct(old_task); 512 513 pages->source_user = get_uid(current_user()); 514 free_uid(old_user); 515 } 516 517 #define for_each_ioas_area(_xa, _index, _ioas, _area) \ 518 xa_for_each((_xa), (_index), (_ioas)) \ 519 for (_area = iopt_area_iter_first(&_ioas->iopt, 0, ULONG_MAX); \ 520 _area; \ 521 _area = iopt_area_iter_next(_area, 0, ULONG_MAX)) 522 523 int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd) 524 { 525 struct iommu_ioas_change_process *cmd = ucmd->cmd; 526 struct iommufd_ctx *ictx = ucmd->ictx; 527 unsigned long all_npinned[IOPT_PAGES_ACCOUNT_MODE_NUM] = {}; 528 struct iommufd_ioas *ioas; 529 struct iopt_area *area; 530 struct iopt_pages *pages; 531 struct xarray ioas_list; 532 unsigned long index; 533 int rc; 534 535 if (cmd->__reserved) 536 return -EOPNOTSUPP; 537 538 xa_init(&ioas_list); 539 rc = iommufd_take_all_iova_rwsem(ictx, &ioas_list); 540 if (rc) 541 return rc; 542 543 for_each_ioas_area(&ioas_list, index, ioas, area) { 544 if (area->pages->type != IOPT_ADDRESS_FILE) { 545 rc = -EINVAL; 546 goto out; 547 } 548 } 549 550 /* 551 * Count last_pinned pages, then clear it to avoid double counting 552 * if the same iopt_pages is visited multiple times in this loop. 553 * Since we are under all the locks, npinned == last_npinned, so we 554 * can easily restore last_npinned before we return. 555 */ 556 for_each_ioas_area(&ioas_list, index, ioas, area) { 557 pages = area->pages; 558 559 if (need_charge_update(pages)) { 560 all_npinned[pages->account_mode] += pages->last_npinned; 561 pages->last_npinned = 0; 562 } 563 } 564 565 rc = charge_current(all_npinned); 566 567 if (rc) { 568 /* Charge failed. Fix last_npinned and bail. */ 569 for_each_ioas_area(&ioas_list, index, ioas, area) 570 area->pages->last_npinned = area->pages->npinned; 571 goto out; 572 } 573 574 for_each_ioas_area(&ioas_list, index, ioas, area) { 575 pages = area->pages; 576 577 /* Uncharge the old one (which also restores last_npinned) */ 578 if (need_charge_update(pages)) { 579 int r = iopt_pages_update_pinned(pages, pages->npinned, 580 false, NULL); 581 582 if (WARN_ON(r)) 583 rc = r; 584 } 585 change_mm(pages); 586 } 587 588 out: 589 iommufd_release_all_iova_rwsem(ictx, &ioas_list); 590 return rc; 591 } 592 593 int iommufd_option_rlimit_mode(struct iommu_option *cmd, 594 struct iommufd_ctx *ictx) 595 { 596 if (cmd->object_id) 597 return -EOPNOTSUPP; 598 599 if (cmd->op == IOMMU_OPTION_OP_GET) { 600 cmd->val64 = ictx->account_mode == IOPT_PAGES_ACCOUNT_MM; 601 return 0; 602 } 603 if (cmd->op == IOMMU_OPTION_OP_SET) { 604 int rc = 0; 605 606 if (!capable(CAP_SYS_RESOURCE)) 607 return -EPERM; 608 609 xa_lock(&ictx->objects); 610 if (!xa_empty(&ictx->objects)) { 611 rc = -EBUSY; 612 } else { 613 if (cmd->val64 == 0) 614 ictx->account_mode = IOPT_PAGES_ACCOUNT_USER; 615 else if (cmd->val64 == 1) 616 ictx->account_mode = IOPT_PAGES_ACCOUNT_MM; 617 else 618 rc = -EINVAL; 619 } 620 xa_unlock(&ictx->objects); 621 622 return rc; 623 } 624 return -EOPNOTSUPP; 625 } 626 627 static int iommufd_ioas_option_huge_pages(struct iommu_option *cmd, 628 struct iommufd_ioas *ioas) 629 { 630 if (cmd->op == IOMMU_OPTION_OP_GET) { 631 cmd->val64 = !ioas->iopt.disable_large_pages; 632 return 0; 633 } 634 if (cmd->op == IOMMU_OPTION_OP_SET) { 635 if (cmd->val64 == 0) 636 return iopt_disable_large_pages(&ioas->iopt); 637 if (cmd->val64 == 1) { 638 iopt_enable_large_pages(&ioas->iopt); 639 return 0; 640 } 641 return -EINVAL; 642 } 643 return -EOPNOTSUPP; 644 } 645 646 int iommufd_ioas_option(struct iommufd_ucmd *ucmd) 647 { 648 struct iommu_option *cmd = ucmd->cmd; 649 struct iommufd_ioas *ioas; 650 int rc = 0; 651 652 if (cmd->__reserved) 653 return -EOPNOTSUPP; 654 655 ioas = iommufd_get_ioas(ucmd->ictx, cmd->object_id); 656 if (IS_ERR(ioas)) 657 return PTR_ERR(ioas); 658 659 switch (cmd->option_id) { 660 case IOMMU_OPTION_HUGE_PAGES: 661 rc = iommufd_ioas_option_huge_pages(cmd, ioas); 662 break; 663 default: 664 rc = -EOPNOTSUPP; 665 } 666 667 iommufd_put_object(ucmd->ictx, &ioas->obj); 668 return rc; 669 } 670