1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * MMU-based software IOTLB. 4 * 5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. 6 * 7 * Author: Xie Yongji <xieyongji@bytedance.com> 8 * 9 */ 10 11 #include <linux/slab.h> 12 #include <linux/file.h> 13 #include <linux/anon_inodes.h> 14 #include <linux/highmem.h> 15 #include <linux/vmalloc.h> 16 #include <linux/vdpa.h> 17 18 #include "iova_domain.h" 19 20 static int vduse_iotlb_add_range(struct vduse_iova_domain *domain, 21 u64 start, u64 last, 22 u64 addr, unsigned int perm, 23 struct file *file, u64 offset) 24 { 25 struct vdpa_map_file *map_file; 26 int ret; 27 28 map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC); 29 if (!map_file) 30 return -ENOMEM; 31 32 map_file->file = get_file(file); 33 map_file->offset = offset; 34 35 ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last, 36 addr, perm, map_file); 37 if (ret) { 38 fput(map_file->file); 39 kfree(map_file); 40 return ret; 41 } 42 return 0; 43 } 44 45 static void vduse_iotlb_del_range(struct vduse_iova_domain *domain, 46 u64 start, u64 last) 47 { 48 struct vdpa_map_file *map_file; 49 struct vhost_iotlb_map *map; 50 51 while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) { 52 map_file = (struct vdpa_map_file *)map->opaque; 53 fput(map_file->file); 54 kfree(map_file); 55 vhost_iotlb_map_free(domain->iotlb, map); 56 } 57 } 58 59 int vduse_domain_set_map(struct vduse_iova_domain *domain, 60 struct vhost_iotlb *iotlb) 61 { 62 struct vdpa_map_file *map_file; 63 struct vhost_iotlb_map *map; 64 u64 start = 0ULL, last = ULLONG_MAX; 65 int ret; 66 67 spin_lock(&domain->iotlb_lock); 68 vduse_iotlb_del_range(domain, start, last); 69 70 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 71 map = vhost_iotlb_itree_next(map, start, last)) { 72 map_file = (struct vdpa_map_file *)map->opaque; 73 ret = vduse_iotlb_add_range(domain, map->start, map->last, 74 map->addr, map->perm, 75 map_file->file, 76 map_file->offset); 77 if (ret) 78 goto err; 79 } 80 spin_unlock(&domain->iotlb_lock); 81 82 return 0; 83 err: 84 vduse_iotlb_del_range(domain, start, last); 85 spin_unlock(&domain->iotlb_lock); 86 return ret; 87 } 88 89 void vduse_domain_clear_map(struct vduse_iova_domain *domain, 90 struct vhost_iotlb *iotlb) 91 { 92 struct vhost_iotlb_map *map; 93 u64 start = 0ULL, last = ULLONG_MAX; 94 95 spin_lock(&domain->iotlb_lock); 96 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 97 map = vhost_iotlb_itree_next(map, start, last)) { 98 vduse_iotlb_del_range(domain, map->start, map->last); 99 } 100 spin_unlock(&domain->iotlb_lock); 101 } 102 103 static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain, 104 u64 iova, u64 size, u64 paddr) 105 { 106 struct vduse_bounce_map *map; 107 u64 last = iova + size - 1; 108 109 while (iova <= last) { 110 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 111 if (!map->bounce_page) { 112 map->bounce_page = alloc_page(GFP_ATOMIC); 113 if (!map->bounce_page) 114 return -ENOMEM; 115 } 116 map->orig_phys = paddr; 117 paddr += PAGE_SIZE; 118 iova += PAGE_SIZE; 119 } 120 return 0; 121 } 122 123 static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain, 124 u64 iova, u64 size) 125 { 126 struct vduse_bounce_map *map; 127 u64 last = iova + size - 1; 128 129 while (iova <= last) { 130 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 131 map->orig_phys = INVALID_PHYS_ADDR; 132 iova += PAGE_SIZE; 133 } 134 } 135 136 static void do_bounce(phys_addr_t orig, void *addr, size_t size, 137 enum dma_data_direction dir) 138 { 139 unsigned long pfn = PFN_DOWN(orig); 140 unsigned int offset = offset_in_page(orig); 141 struct page *page; 142 unsigned int sz = 0; 143 144 while (size) { 145 sz = min_t(size_t, PAGE_SIZE - offset, size); 146 147 page = pfn_to_page(pfn); 148 if (dir == DMA_TO_DEVICE) 149 memcpy_from_page(addr, page, offset, sz); 150 else 151 memcpy_to_page(page, offset, addr, sz); 152 153 size -= sz; 154 pfn++; 155 addr += sz; 156 offset = 0; 157 } 158 } 159 160 static void vduse_domain_bounce(struct vduse_iova_domain *domain, 161 dma_addr_t iova, size_t size, 162 enum dma_data_direction dir) 163 { 164 struct vduse_bounce_map *map; 165 struct page *page; 166 unsigned int offset; 167 void *addr; 168 size_t sz; 169 170 if (iova >= domain->bounce_size) 171 return; 172 173 while (size) { 174 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 175 offset = offset_in_page(iova); 176 sz = min_t(size_t, PAGE_SIZE - offset, size); 177 178 if (WARN_ON(!map->bounce_page || 179 map->orig_phys == INVALID_PHYS_ADDR)) 180 return; 181 182 page = domain->user_bounce_pages ? 183 map->user_bounce_page : map->bounce_page; 184 185 addr = kmap_local_page(page); 186 do_bounce(map->orig_phys + offset, addr + offset, sz, dir); 187 kunmap_local(addr); 188 size -= sz; 189 iova += sz; 190 } 191 } 192 193 static struct page * 194 vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova) 195 { 196 u64 start = iova & PAGE_MASK; 197 u64 last = start + PAGE_SIZE - 1; 198 struct vhost_iotlb_map *map; 199 struct page *page = NULL; 200 201 spin_lock(&domain->iotlb_lock); 202 map = vhost_iotlb_itree_first(domain->iotlb, start, last); 203 if (!map) 204 goto out; 205 206 page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT); 207 get_page(page); 208 out: 209 spin_unlock(&domain->iotlb_lock); 210 211 return page; 212 } 213 214 static struct page * 215 vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova) 216 { 217 struct vduse_bounce_map *map; 218 struct page *page = NULL; 219 220 read_lock(&domain->bounce_lock); 221 map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 222 if (domain->user_bounce_pages || !map->bounce_page) 223 goto out; 224 225 page = map->bounce_page; 226 get_page(page); 227 out: 228 read_unlock(&domain->bounce_lock); 229 230 return page; 231 } 232 233 static void 234 vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain) 235 { 236 struct vduse_bounce_map *map; 237 unsigned long pfn, bounce_pfns; 238 239 bounce_pfns = domain->bounce_size >> PAGE_SHIFT; 240 241 for (pfn = 0; pfn < bounce_pfns; pfn++) { 242 map = &domain->bounce_maps[pfn]; 243 if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR)) 244 continue; 245 246 if (!map->bounce_page) 247 continue; 248 249 __free_page(map->bounce_page); 250 map->bounce_page = NULL; 251 } 252 } 253 254 int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain, 255 struct page **pages, int count) 256 { 257 struct vduse_bounce_map *map; 258 int i, ret; 259 260 /* Now we don't support partial mapping */ 261 if (count != (domain->bounce_size >> PAGE_SHIFT)) 262 return -EINVAL; 263 264 write_lock(&domain->bounce_lock); 265 ret = -EEXIST; 266 if (domain->user_bounce_pages) 267 goto out; 268 269 for (i = 0; i < count; i++) { 270 map = &domain->bounce_maps[i]; 271 if (map->bounce_page) { 272 /* Copy kernel page to user page if it's in use */ 273 if (map->orig_phys != INVALID_PHYS_ADDR) 274 memcpy_to_page(pages[i], 0, 275 page_address(map->bounce_page), 276 PAGE_SIZE); 277 } 278 map->user_bounce_page = pages[i]; 279 get_page(pages[i]); 280 } 281 domain->user_bounce_pages = true; 282 ret = 0; 283 out: 284 write_unlock(&domain->bounce_lock); 285 286 return ret; 287 } 288 289 void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain) 290 { 291 struct vduse_bounce_map *map; 292 unsigned long i, count; 293 294 write_lock(&domain->bounce_lock); 295 if (!domain->user_bounce_pages) 296 goto out; 297 298 count = domain->bounce_size >> PAGE_SHIFT; 299 for (i = 0; i < count; i++) { 300 struct page *page = NULL; 301 302 map = &domain->bounce_maps[i]; 303 if (WARN_ON(!map->user_bounce_page)) 304 continue; 305 306 /* Copy user page to kernel page if it's in use */ 307 if (map->orig_phys != INVALID_PHYS_ADDR) { 308 page = map->bounce_page; 309 memcpy_from_page(page_address(page), 310 map->user_bounce_page, 0, PAGE_SIZE); 311 } 312 put_page(map->user_bounce_page); 313 map->user_bounce_page = NULL; 314 } 315 domain->user_bounce_pages = false; 316 out: 317 write_unlock(&domain->bounce_lock); 318 } 319 320 void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain) 321 { 322 if (!domain->bounce_map) 323 return; 324 325 spin_lock(&domain->iotlb_lock); 326 if (!domain->bounce_map) 327 goto unlock; 328 329 vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1); 330 domain->bounce_map = 0; 331 unlock: 332 spin_unlock(&domain->iotlb_lock); 333 } 334 335 static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain) 336 { 337 int ret = 0; 338 339 if (domain->bounce_map) 340 return 0; 341 342 spin_lock(&domain->iotlb_lock); 343 if (domain->bounce_map) 344 goto unlock; 345 346 ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1, 347 0, VHOST_MAP_RW, domain->file, 0); 348 if (ret) 349 goto unlock; 350 351 domain->bounce_map = 1; 352 unlock: 353 spin_unlock(&domain->iotlb_lock); 354 return ret; 355 } 356 357 static dma_addr_t 358 vduse_domain_alloc_iova(struct iova_domain *iovad, 359 unsigned long size, unsigned long limit) 360 { 361 unsigned long shift = iova_shift(iovad); 362 unsigned long iova_len = iova_align(iovad, size) >> shift; 363 unsigned long iova_pfn; 364 365 iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true); 366 367 return (dma_addr_t)iova_pfn << shift; 368 } 369 370 static void vduse_domain_free_iova(struct iova_domain *iovad, 371 dma_addr_t iova, size_t size) 372 { 373 unsigned long shift = iova_shift(iovad); 374 unsigned long iova_len = iova_align(iovad, size) >> shift; 375 376 free_iova_fast(iovad, iova >> shift, iova_len); 377 } 378 379 void vduse_domain_sync_single_for_device(struct vduse_iova_domain *domain, 380 dma_addr_t dma_addr, size_t size, 381 enum dma_data_direction dir) 382 { 383 read_lock(&domain->bounce_lock); 384 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) 385 vduse_domain_bounce(domain, dma_addr, size, DMA_TO_DEVICE); 386 read_unlock(&domain->bounce_lock); 387 } 388 389 void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain *domain, 390 dma_addr_t dma_addr, size_t size, 391 enum dma_data_direction dir) 392 { 393 read_lock(&domain->bounce_lock); 394 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 395 vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); 396 read_unlock(&domain->bounce_lock); 397 } 398 399 dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, 400 struct page *page, unsigned long offset, 401 size_t size, enum dma_data_direction dir, 402 unsigned long attrs) 403 { 404 struct iova_domain *iovad = &domain->stream_iovad; 405 unsigned long limit = domain->bounce_size - 1; 406 phys_addr_t pa = page_to_phys(page) + offset; 407 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit); 408 409 if (!iova) 410 return DMA_MAPPING_ERROR; 411 412 if (vduse_domain_init_bounce_map(domain)) 413 goto err; 414 415 read_lock(&domain->bounce_lock); 416 if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa)) 417 goto err_unlock; 418 419 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 420 (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) 421 vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE); 422 423 read_unlock(&domain->bounce_lock); 424 425 return iova; 426 err_unlock: 427 read_unlock(&domain->bounce_lock); 428 err: 429 vduse_domain_free_iova(iovad, iova, size); 430 return DMA_MAPPING_ERROR; 431 } 432 433 void vduse_domain_unmap_page(struct vduse_iova_domain *domain, 434 dma_addr_t dma_addr, size_t size, 435 enum dma_data_direction dir, unsigned long attrs) 436 { 437 struct iova_domain *iovad = &domain->stream_iovad; 438 read_lock(&domain->bounce_lock); 439 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 440 (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) 441 vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); 442 443 vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size); 444 read_unlock(&domain->bounce_lock); 445 vduse_domain_free_iova(iovad, dma_addr, size); 446 } 447 448 void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain, 449 size_t size, dma_addr_t *dma_addr, 450 gfp_t flag, unsigned long attrs) 451 { 452 struct iova_domain *iovad = &domain->consistent_iovad; 453 unsigned long limit = domain->iova_limit; 454 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit); 455 void *orig = alloc_pages_exact(size, flag); 456 457 if (!iova || !orig) 458 goto err; 459 460 spin_lock(&domain->iotlb_lock); 461 if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1, 462 virt_to_phys(orig), VHOST_MAP_RW, 463 domain->file, (u64)iova)) { 464 spin_unlock(&domain->iotlb_lock); 465 goto err; 466 } 467 spin_unlock(&domain->iotlb_lock); 468 469 *dma_addr = iova; 470 471 return orig; 472 err: 473 *dma_addr = DMA_MAPPING_ERROR; 474 if (orig) 475 free_pages_exact(orig, size); 476 if (iova) 477 vduse_domain_free_iova(iovad, iova, size); 478 479 return NULL; 480 } 481 482 void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size, 483 void *vaddr, dma_addr_t dma_addr, 484 unsigned long attrs) 485 { 486 struct iova_domain *iovad = &domain->consistent_iovad; 487 struct vhost_iotlb_map *map; 488 struct vdpa_map_file *map_file; 489 phys_addr_t pa; 490 491 spin_lock(&domain->iotlb_lock); 492 map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr, 493 (u64)dma_addr + size - 1); 494 if (WARN_ON(!map)) { 495 spin_unlock(&domain->iotlb_lock); 496 return; 497 } 498 map_file = (struct vdpa_map_file *)map->opaque; 499 fput(map_file->file); 500 kfree(map_file); 501 pa = map->addr; 502 vhost_iotlb_map_free(domain->iotlb, map); 503 spin_unlock(&domain->iotlb_lock); 504 505 vduse_domain_free_iova(iovad, dma_addr, size); 506 free_pages_exact(phys_to_virt(pa), size); 507 } 508 509 static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf) 510 { 511 struct vduse_iova_domain *domain = vmf->vma->vm_private_data; 512 unsigned long iova = vmf->pgoff << PAGE_SHIFT; 513 struct page *page; 514 515 if (!domain) 516 return VM_FAULT_SIGBUS; 517 518 if (iova < domain->bounce_size) 519 page = vduse_domain_get_bounce_page(domain, iova); 520 else 521 page = vduse_domain_get_coherent_page(domain, iova); 522 523 if (!page) 524 return VM_FAULT_SIGBUS; 525 526 vmf->page = page; 527 528 return 0; 529 } 530 531 static const struct vm_operations_struct vduse_domain_mmap_ops = { 532 .fault = vduse_domain_mmap_fault, 533 }; 534 535 static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma) 536 { 537 struct vduse_iova_domain *domain = file->private_data; 538 539 vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND); 540 vma->vm_private_data = domain; 541 vma->vm_ops = &vduse_domain_mmap_ops; 542 543 return 0; 544 } 545 546 static int vduse_domain_release(struct inode *inode, struct file *file) 547 { 548 struct vduse_iova_domain *domain = file->private_data; 549 550 spin_lock(&domain->iotlb_lock); 551 vduse_iotlb_del_range(domain, 0, ULLONG_MAX); 552 vduse_domain_remove_user_bounce_pages(domain); 553 vduse_domain_free_kernel_bounce_pages(domain); 554 spin_unlock(&domain->iotlb_lock); 555 put_iova_domain(&domain->stream_iovad); 556 put_iova_domain(&domain->consistent_iovad); 557 vhost_iotlb_free(domain->iotlb); 558 vfree(domain->bounce_maps); 559 kfree(domain); 560 561 return 0; 562 } 563 564 static const struct file_operations vduse_domain_fops = { 565 .owner = THIS_MODULE, 566 .mmap = vduse_domain_mmap, 567 .release = vduse_domain_release, 568 }; 569 570 void vduse_domain_destroy(struct vduse_iova_domain *domain) 571 { 572 fput(domain->file); 573 } 574 575 struct vduse_iova_domain * 576 vduse_domain_create(unsigned long iova_limit, size_t bounce_size) 577 { 578 struct vduse_iova_domain *domain; 579 struct file *file; 580 struct vduse_bounce_map *map; 581 unsigned long pfn, bounce_pfns; 582 int ret; 583 584 bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT; 585 if (iova_limit <= bounce_size) 586 return NULL; 587 588 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 589 if (!domain) 590 return NULL; 591 592 domain->iotlb = vhost_iotlb_alloc(0, 0); 593 if (!domain->iotlb) 594 goto err_iotlb; 595 596 domain->iova_limit = iova_limit; 597 domain->bounce_size = PAGE_ALIGN(bounce_size); 598 domain->bounce_maps = vzalloc(bounce_pfns * 599 sizeof(struct vduse_bounce_map)); 600 if (!domain->bounce_maps) 601 goto err_map; 602 603 for (pfn = 0; pfn < bounce_pfns; pfn++) { 604 map = &domain->bounce_maps[pfn]; 605 map->orig_phys = INVALID_PHYS_ADDR; 606 } 607 file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops, 608 domain, O_RDWR); 609 if (IS_ERR(file)) 610 goto err_file; 611 612 domain->file = file; 613 rwlock_init(&domain->bounce_lock); 614 spin_lock_init(&domain->iotlb_lock); 615 init_iova_domain(&domain->stream_iovad, 616 PAGE_SIZE, IOVA_START_PFN); 617 ret = iova_domain_init_rcaches(&domain->stream_iovad); 618 if (ret) 619 goto err_iovad_stream; 620 init_iova_domain(&domain->consistent_iovad, 621 PAGE_SIZE, bounce_pfns); 622 ret = iova_domain_init_rcaches(&domain->consistent_iovad); 623 if (ret) 624 goto err_iovad_consistent; 625 626 return domain; 627 err_iovad_consistent: 628 put_iova_domain(&domain->stream_iovad); 629 err_iovad_stream: 630 fput(file); 631 err_file: 632 vfree(domain->bounce_maps); 633 err_map: 634 vhost_iotlb_free(domain->iotlb); 635 err_iotlb: 636 kfree(domain); 637 return NULL; 638 } 639 640 int vduse_domain_init(void) 641 { 642 return iova_cache_get(); 643 } 644 645 void vduse_domain_exit(void) 646 { 647 iova_cache_put(); 648 } 649