1 /* 2 * Copyright 2018 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #include "nouveau_dmem.h" 23 #include "nouveau_drv.h" 24 #include "nouveau_chan.h" 25 #include "nouveau_dma.h" 26 #include "nouveau_mem.h" 27 #include "nouveau_bo.h" 28 #include "nouveau_svm.h" 29 30 #include <nvif/class.h> 31 #include <nvif/object.h> 32 #include <nvif/push906f.h> 33 #include <nvif/if000c.h> 34 #include <nvif/if500b.h> 35 #include <nvif/if900b.h> 36 37 #include <nvhw/class/cla0b5.h> 38 39 #include <linux/sched/mm.h> 40 #include <linux/hmm.h> 41 #include <linux/memremap.h> 42 #include <linux/migrate.h> 43 44 /* 45 * FIXME: this is ugly right now we are using TTM to allocate vram and we pin 46 * it in vram while in use. We likely want to overhaul memory management for 47 * nouveau to be more page like (not necessarily with system page size but a 48 * bigger page size) at lowest level and have some shim layer on top that would 49 * provide the same functionality as TTM. 50 */ 51 #define DMEM_CHUNK_SIZE (2UL << 20) 52 #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) 53 #define NR_CHUNKS (128) 54 55 enum nouveau_aper { 56 NOUVEAU_APER_VIRT, 57 NOUVEAU_APER_VRAM, 58 NOUVEAU_APER_HOST, 59 }; 60 61 typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages, 62 enum nouveau_aper, u64 dst_addr, 63 enum nouveau_aper, u64 src_addr); 64 typedef int (*nouveau_clear_page_t)(struct nouveau_drm *drm, u32 length, 65 enum nouveau_aper, u64 dst_addr); 66 67 struct nouveau_dmem_chunk { 68 struct list_head list; 69 struct nouveau_bo *bo; 70 struct nouveau_drm *drm; 71 unsigned long callocated; 72 struct dev_pagemap pagemap; 73 }; 74 75 struct nouveau_dmem_migrate { 76 nouveau_migrate_copy_t copy_func; 77 nouveau_clear_page_t clear_func; 78 struct nouveau_channel *chan; 79 }; 80 81 struct nouveau_dmem { 82 struct nouveau_drm *drm; 83 struct nouveau_dmem_migrate migrate; 84 struct list_head chunks; 85 struct mutex mutex; 86 struct page *free_pages; 87 struct folio *free_folios; 88 spinlock_t lock; 89 }; 90 91 struct nouveau_dmem_dma_info { 92 dma_addr_t dma_addr; 93 size_t size; 94 }; 95 96 static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page) 97 { 98 return container_of(page_pgmap(page), struct nouveau_dmem_chunk, 99 pagemap); 100 } 101 102 static struct nouveau_drm *page_to_drm(struct page *page) 103 { 104 struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); 105 106 return chunk->drm; 107 } 108 109 unsigned long nouveau_dmem_page_addr(struct page *page) 110 { 111 struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); 112 unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) - 113 chunk->pagemap.range.start; 114 115 return chunk->bo->offset + off; 116 } 117 118 static void nouveau_dmem_folio_free(struct folio *folio) 119 { 120 struct page *page = &folio->page; 121 struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); 122 struct nouveau_dmem *dmem = chunk->drm->dmem; 123 124 spin_lock(&dmem->lock); 125 if (folio_order(folio)) { 126 page->zone_device_data = dmem->free_folios; 127 dmem->free_folios = folio; 128 } else { 129 page->zone_device_data = dmem->free_pages; 130 dmem->free_pages = page; 131 } 132 133 WARN_ON(!chunk->callocated); 134 chunk->callocated--; 135 /* 136 * FIXME when chunk->callocated reach 0 we should add the chunk to 137 * a reclaim list so that it can be freed in case of memory pressure. 138 */ 139 spin_unlock(&dmem->lock); 140 } 141 142 static void nouveau_dmem_fence_done(struct nouveau_fence **fence) 143 { 144 if (fence) { 145 nouveau_fence_wait(*fence, true, false); 146 nouveau_fence_unref(fence); 147 } else { 148 /* 149 * FIXME wait for channel to be IDLE before calling finalizing 150 * the hmem object. 151 */ 152 } 153 } 154 155 static int nouveau_dmem_copy_folio(struct nouveau_drm *drm, 156 struct folio *sfolio, struct folio *dfolio, 157 struct nouveau_dmem_dma_info *dma_info) 158 { 159 struct device *dev = drm->dev->dev; 160 struct page *dpage = folio_page(dfolio, 0); 161 struct page *spage = folio_page(sfolio, 0); 162 163 folio_lock(dfolio); 164 165 dma_info->dma_addr = dma_map_page(dev, dpage, 0, page_size(dpage), 166 DMA_BIDIRECTIONAL); 167 dma_info->size = page_size(dpage); 168 if (dma_mapping_error(dev, dma_info->dma_addr)) 169 return -EIO; 170 171 if (drm->dmem->migrate.copy_func(drm, folio_nr_pages(sfolio), 172 NOUVEAU_APER_HOST, dma_info->dma_addr, 173 NOUVEAU_APER_VRAM, 174 nouveau_dmem_page_addr(spage))) { 175 dma_unmap_page(dev, dma_info->dma_addr, page_size(dpage), 176 DMA_BIDIRECTIONAL); 177 return -EIO; 178 } 179 180 return 0; 181 } 182 183 static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) 184 { 185 struct nouveau_drm *drm = page_to_drm(vmf->page); 186 struct nouveau_dmem *dmem = drm->dmem; 187 struct nouveau_fence *fence; 188 struct nouveau_svmm *svmm; 189 struct page *dpage; 190 vm_fault_t ret = 0; 191 int err; 192 struct migrate_vma args = { 193 .vma = vmf->vma, 194 .pgmap_owner = drm->dev, 195 .fault_page = vmf->page, 196 .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | 197 MIGRATE_VMA_SELECT_COMPOUND, 198 .src = NULL, 199 .dst = NULL, 200 }; 201 unsigned int order, nr; 202 struct folio *sfolio, *dfolio; 203 struct nouveau_dmem_dma_info dma_info; 204 205 sfolio = page_folio(vmf->page); 206 order = folio_order(sfolio); 207 nr = 1 << order; 208 209 /* 210 * Handle partial unmap faults, where the folio is large, but 211 * the pmd is split. 212 */ 213 if (vmf->pte) { 214 order = 0; 215 nr = 1; 216 } 217 218 if (order) 219 args.flags |= MIGRATE_VMA_SELECT_COMPOUND; 220 221 args.start = ALIGN_DOWN(vmf->address, (PAGE_SIZE << order)); 222 args.vma = vmf->vma; 223 args.end = args.start + (PAGE_SIZE << order); 224 args.src = kcalloc(nr, sizeof(*args.src), GFP_KERNEL); 225 args.dst = kcalloc(nr, sizeof(*args.dst), GFP_KERNEL); 226 227 if (!args.src || !args.dst) { 228 ret = VM_FAULT_OOM; 229 goto err; 230 } 231 /* 232 * FIXME what we really want is to find some heuristic to migrate more 233 * than just one page on CPU fault. When such fault happens it is very 234 * likely that more surrounding page will CPU fault too. 235 */ 236 if (migrate_vma_setup(&args) < 0) 237 return VM_FAULT_SIGBUS; 238 if (!args.cpages) 239 return 0; 240 241 if (order) 242 dpage = folio_page(vma_alloc_folio(GFP_HIGHUSER | __GFP_ZERO, 243 order, vmf->vma, vmf->address), 0); 244 else 245 dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma, 246 vmf->address); 247 if (!dpage) { 248 ret = VM_FAULT_OOM; 249 goto done; 250 } 251 252 args.dst[0] = migrate_pfn(page_to_pfn(dpage)); 253 if (order) 254 args.dst[0] |= MIGRATE_PFN_COMPOUND; 255 dfolio = page_folio(dpage); 256 257 svmm = folio_zone_device_data(sfolio); 258 mutex_lock(&svmm->mutex); 259 nouveau_svmm_invalidate(svmm, args.start, args.end); 260 err = nouveau_dmem_copy_folio(drm, sfolio, dfolio, &dma_info); 261 mutex_unlock(&svmm->mutex); 262 if (err) { 263 ret = VM_FAULT_SIGBUS; 264 goto done; 265 } 266 267 nouveau_fence_new(&fence, dmem->migrate.chan); 268 migrate_vma_pages(&args); 269 nouveau_dmem_fence_done(&fence); 270 dma_unmap_page(drm->dev->dev, dma_info.dma_addr, PAGE_SIZE, 271 DMA_BIDIRECTIONAL); 272 done: 273 migrate_vma_finalize(&args); 274 err: 275 kfree(args.src); 276 kfree(args.dst); 277 return ret; 278 } 279 280 static void nouveau_dmem_folio_split(struct folio *head, struct folio *tail) 281 { 282 if (tail == NULL) 283 return; 284 tail->pgmap = head->pgmap; 285 tail->mapping = head->mapping; 286 folio_set_zone_device_data(tail, folio_zone_device_data(head)); 287 } 288 289 static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { 290 .folio_free = nouveau_dmem_folio_free, 291 .migrate_to_ram = nouveau_dmem_migrate_to_ram, 292 .folio_split = nouveau_dmem_folio_split, 293 }; 294 295 static int 296 nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage, 297 bool is_large) 298 { 299 struct nouveau_dmem_chunk *chunk; 300 struct resource *res; 301 struct page *page; 302 void *ptr; 303 unsigned long i, pfn_first, pfn; 304 int ret; 305 306 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); 307 if (chunk == NULL) { 308 ret = -ENOMEM; 309 goto out; 310 } 311 312 /* Allocate unused physical address space for device private pages. */ 313 res = request_free_mem_region(&iomem_resource, DMEM_CHUNK_SIZE * NR_CHUNKS, 314 "nouveau_dmem"); 315 if (IS_ERR(res)) { 316 ret = PTR_ERR(res); 317 goto out_free; 318 } 319 320 chunk->drm = drm; 321 chunk->pagemap.type = MEMORY_DEVICE_PRIVATE; 322 chunk->pagemap.range.start = res->start; 323 chunk->pagemap.range.end = res->end; 324 chunk->pagemap.nr_range = 1; 325 chunk->pagemap.ops = &nouveau_dmem_pagemap_ops; 326 chunk->pagemap.owner = drm->dev; 327 328 ret = nouveau_bo_new_pin(&drm->client, NOUVEAU_GEM_DOMAIN_VRAM, DMEM_CHUNK_SIZE, 329 &chunk->bo); 330 if (ret) 331 goto out_release; 332 333 ptr = memremap_pages(&chunk->pagemap, numa_node_id()); 334 if (IS_ERR(ptr)) { 335 ret = PTR_ERR(ptr); 336 goto out_bo_free; 337 } 338 339 mutex_lock(&drm->dmem->mutex); 340 list_add(&chunk->list, &drm->dmem->chunks); 341 mutex_unlock(&drm->dmem->mutex); 342 343 pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT; 344 page = pfn_to_page(pfn_first); 345 spin_lock(&drm->dmem->lock); 346 347 pfn = pfn_first; 348 for (i = 0; i < NR_CHUNKS; i++) { 349 int j; 350 351 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) || !is_large) { 352 for (j = 0; j < DMEM_CHUNK_NPAGES - 1; j++, pfn++) { 353 page = pfn_to_page(pfn); 354 page->zone_device_data = drm->dmem->free_pages; 355 drm->dmem->free_pages = page; 356 } 357 } else { 358 page = pfn_to_page(pfn); 359 page->zone_device_data = drm->dmem->free_folios; 360 drm->dmem->free_folios = page_folio(page); 361 pfn += DMEM_CHUNK_NPAGES; 362 } 363 } 364 365 /* Move to next page */ 366 if (is_large) { 367 *ppage = &drm->dmem->free_folios->page; 368 drm->dmem->free_folios = (*ppage)->zone_device_data; 369 } else { 370 *ppage = drm->dmem->free_pages; 371 drm->dmem->free_pages = (*ppage)->zone_device_data; 372 } 373 374 chunk->callocated++; 375 spin_unlock(&drm->dmem->lock); 376 377 NV_INFO(drm, "DMEM: registered %ldMB of %sdevice memory %lx %lx\n", 378 NR_CHUNKS * DMEM_CHUNK_SIZE >> 20, is_large ? "THP " : "", pfn_first, 379 nouveau_dmem_page_addr(page)); 380 381 return 0; 382 383 out_bo_free: 384 nouveau_bo_unpin_del(&chunk->bo); 385 out_release: 386 release_mem_region(chunk->pagemap.range.start, range_len(&chunk->pagemap.range)); 387 out_free: 388 kfree(chunk); 389 out: 390 return ret; 391 } 392 393 static struct page * 394 nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm, bool is_large) 395 { 396 struct nouveau_dmem_chunk *chunk; 397 struct page *page = NULL; 398 struct folio *folio = NULL; 399 int ret; 400 unsigned int order = 0; 401 402 spin_lock(&drm->dmem->lock); 403 if (is_large && drm->dmem->free_folios) { 404 folio = drm->dmem->free_folios; 405 page = &folio->page; 406 drm->dmem->free_folios = page->zone_device_data; 407 chunk = nouveau_page_to_chunk(&folio->page); 408 chunk->callocated++; 409 spin_unlock(&drm->dmem->lock); 410 order = ilog2(DMEM_CHUNK_NPAGES); 411 } else if (!is_large && drm->dmem->free_pages) { 412 page = drm->dmem->free_pages; 413 drm->dmem->free_pages = page->zone_device_data; 414 chunk = nouveau_page_to_chunk(page); 415 chunk->callocated++; 416 spin_unlock(&drm->dmem->lock); 417 folio = page_folio(page); 418 } else { 419 spin_unlock(&drm->dmem->lock); 420 ret = nouveau_dmem_chunk_alloc(drm, &page, is_large); 421 if (ret) 422 return NULL; 423 folio = page_folio(page); 424 if (is_large) 425 order = ilog2(DMEM_CHUNK_NPAGES); 426 } 427 428 zone_device_folio_init(folio, page_pgmap(folio_page(folio, 0)), order); 429 return page; 430 } 431 432 static void 433 nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page) 434 { 435 unlock_page(page); 436 put_page(page); 437 } 438 439 void 440 nouveau_dmem_resume(struct nouveau_drm *drm) 441 { 442 struct nouveau_dmem_chunk *chunk; 443 int ret; 444 445 if (drm->dmem == NULL) 446 return; 447 448 mutex_lock(&drm->dmem->mutex); 449 list_for_each_entry(chunk, &drm->dmem->chunks, list) { 450 ret = nouveau_bo_pin(chunk->bo, NOUVEAU_GEM_DOMAIN_VRAM, false); 451 /* FIXME handle pin failure */ 452 WARN_ON(ret); 453 } 454 mutex_unlock(&drm->dmem->mutex); 455 } 456 457 void 458 nouveau_dmem_suspend(struct nouveau_drm *drm) 459 { 460 struct nouveau_dmem_chunk *chunk; 461 462 if (drm->dmem == NULL) 463 return; 464 465 mutex_lock(&drm->dmem->mutex); 466 list_for_each_entry(chunk, &drm->dmem->chunks, list) 467 nouveau_bo_unpin(chunk->bo); 468 mutex_unlock(&drm->dmem->mutex); 469 } 470 471 /* 472 * Evict all pages mapping a chunk. 473 */ 474 static void 475 nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) 476 { 477 unsigned long i, npages = range_len(&chunk->pagemap.range) >> PAGE_SHIFT; 478 unsigned long *src_pfns, *dst_pfns; 479 struct nouveau_dmem_dma_info *dma_info; 480 struct nouveau_fence *fence; 481 482 src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); 483 dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); 484 dma_info = kvcalloc(npages, sizeof(*dma_info), GFP_KERNEL | __GFP_NOFAIL); 485 486 migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT, 487 npages); 488 489 for (i = 0; i < npages; i++) { 490 if (src_pfns[i] & MIGRATE_PFN_MIGRATE) { 491 struct page *dpage; 492 struct folio *folio = page_folio( 493 migrate_pfn_to_page(src_pfns[i])); 494 unsigned int order = folio_order(folio); 495 496 if (src_pfns[i] & MIGRATE_PFN_COMPOUND) { 497 dpage = folio_page( 498 folio_alloc( 499 GFP_HIGHUSER_MOVABLE, order), 0); 500 } else { 501 /* 502 * _GFP_NOFAIL because the GPU is going away and there 503 * is nothing sensible we can do if we can't copy the 504 * data back. 505 */ 506 dpage = alloc_page(GFP_HIGHUSER | __GFP_NOFAIL); 507 } 508 509 dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)); 510 nouveau_dmem_copy_folio(chunk->drm, 511 page_folio(migrate_pfn_to_page(src_pfns[i])), 512 page_folio(dpage), 513 &dma_info[i]); 514 } 515 } 516 517 nouveau_fence_new(&fence, chunk->drm->dmem->migrate.chan); 518 migrate_device_pages(src_pfns, dst_pfns, npages); 519 nouveau_dmem_fence_done(&fence); 520 migrate_device_finalize(src_pfns, dst_pfns, npages); 521 kvfree(src_pfns); 522 kvfree(dst_pfns); 523 for (i = 0; i < npages; i++) 524 dma_unmap_page(chunk->drm->dev->dev, dma_info[i].dma_addr, 525 dma_info[i].size, DMA_BIDIRECTIONAL); 526 kvfree(dma_info); 527 } 528 529 void 530 nouveau_dmem_fini(struct nouveau_drm *drm) 531 { 532 struct nouveau_dmem_chunk *chunk, *tmp; 533 534 if (drm->dmem == NULL) 535 return; 536 537 mutex_lock(&drm->dmem->mutex); 538 539 list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) { 540 nouveau_dmem_evict_chunk(chunk); 541 nouveau_bo_unpin_del(&chunk->bo); 542 WARN_ON(chunk->callocated); 543 list_del(&chunk->list); 544 memunmap_pages(&chunk->pagemap); 545 release_mem_region(chunk->pagemap.range.start, 546 range_len(&chunk->pagemap.range)); 547 kfree(chunk); 548 } 549 550 mutex_unlock(&drm->dmem->mutex); 551 } 552 553 static int 554 nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages, 555 enum nouveau_aper dst_aper, u64 dst_addr, 556 enum nouveau_aper src_aper, u64 src_addr) 557 { 558 struct nvif_push *push = &drm->dmem->migrate.chan->chan.push; 559 u32 launch_dma = 0; 560 int ret; 561 562 ret = PUSH_WAIT(push, 13); 563 if (ret) 564 return ret; 565 566 if (src_aper != NOUVEAU_APER_VIRT) { 567 switch (src_aper) { 568 case NOUVEAU_APER_VRAM: 569 PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE, 570 NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB)); 571 break; 572 case NOUVEAU_APER_HOST: 573 PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE, 574 NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM)); 575 break; 576 default: 577 return -EINVAL; 578 } 579 580 launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL); 581 } 582 583 if (dst_aper != NOUVEAU_APER_VIRT) { 584 switch (dst_aper) { 585 case NOUVEAU_APER_VRAM: 586 PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, 587 NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB)); 588 break; 589 case NOUVEAU_APER_HOST: 590 PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, 591 NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM)); 592 break; 593 default: 594 return -EINVAL; 595 } 596 597 launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL); 598 } 599 600 PUSH_MTHD(push, NVA0B5, OFFSET_IN_UPPER, 601 NVVAL(NVA0B5, OFFSET_IN_UPPER, UPPER, upper_32_bits(src_addr)), 602 603 OFFSET_IN_LOWER, lower_32_bits(src_addr), 604 605 OFFSET_OUT_UPPER, 606 NVVAL(NVA0B5, OFFSET_OUT_UPPER, UPPER, upper_32_bits(dst_addr)), 607 608 OFFSET_OUT_LOWER, lower_32_bits(dst_addr), 609 PITCH_IN, PAGE_SIZE, 610 PITCH_OUT, PAGE_SIZE, 611 LINE_LENGTH_IN, PAGE_SIZE, 612 LINE_COUNT, npages); 613 614 PUSH_MTHD(push, NVA0B5, LAUNCH_DMA, launch_dma | 615 NVDEF(NVA0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED) | 616 NVDEF(NVA0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE) | 617 NVDEF(NVA0B5, LAUNCH_DMA, SEMAPHORE_TYPE, NONE) | 618 NVDEF(NVA0B5, LAUNCH_DMA, INTERRUPT_TYPE, NONE) | 619 NVDEF(NVA0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) | 620 NVDEF(NVA0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) | 621 NVDEF(NVA0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, TRUE) | 622 NVDEF(NVA0B5, LAUNCH_DMA, REMAP_ENABLE, FALSE) | 623 NVDEF(NVA0B5, LAUNCH_DMA, BYPASS_L2, USE_PTE_SETTING)); 624 return 0; 625 } 626 627 static int 628 nvc0b5_migrate_clear(struct nouveau_drm *drm, u32 length, 629 enum nouveau_aper dst_aper, u64 dst_addr) 630 { 631 struct nvif_push *push = &drm->dmem->migrate.chan->chan.push; 632 u32 launch_dma = 0; 633 int ret; 634 635 ret = PUSH_WAIT(push, 12); 636 if (ret) 637 return ret; 638 639 switch (dst_aper) { 640 case NOUVEAU_APER_VRAM: 641 PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, 642 NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB)); 643 break; 644 case NOUVEAU_APER_HOST: 645 PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE, 646 NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM)); 647 break; 648 default: 649 return -EINVAL; 650 } 651 652 launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL); 653 654 PUSH_MTHD(push, NVA0B5, SET_REMAP_CONST_A, 0, 655 SET_REMAP_CONST_B, 0, 656 657 SET_REMAP_COMPONENTS, 658 NVDEF(NVA0B5, SET_REMAP_COMPONENTS, DST_X, CONST_A) | 659 NVDEF(NVA0B5, SET_REMAP_COMPONENTS, DST_Y, CONST_B) | 660 NVDEF(NVA0B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, FOUR) | 661 NVDEF(NVA0B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, TWO)); 662 663 PUSH_MTHD(push, NVA0B5, OFFSET_OUT_UPPER, 664 NVVAL(NVA0B5, OFFSET_OUT_UPPER, UPPER, upper_32_bits(dst_addr)), 665 666 OFFSET_OUT_LOWER, lower_32_bits(dst_addr)); 667 668 PUSH_MTHD(push, NVA0B5, LINE_LENGTH_IN, length >> 3); 669 670 PUSH_MTHD(push, NVA0B5, LAUNCH_DMA, launch_dma | 671 NVDEF(NVA0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED) | 672 NVDEF(NVA0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE) | 673 NVDEF(NVA0B5, LAUNCH_DMA, SEMAPHORE_TYPE, NONE) | 674 NVDEF(NVA0B5, LAUNCH_DMA, INTERRUPT_TYPE, NONE) | 675 NVDEF(NVA0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) | 676 NVDEF(NVA0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) | 677 NVDEF(NVA0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) | 678 NVDEF(NVA0B5, LAUNCH_DMA, REMAP_ENABLE, TRUE) | 679 NVDEF(NVA0B5, LAUNCH_DMA, BYPASS_L2, USE_PTE_SETTING)); 680 return 0; 681 } 682 683 static int 684 nouveau_dmem_migrate_init(struct nouveau_drm *drm) 685 { 686 switch (drm->ttm.copy.oclass) { 687 case PASCAL_DMA_COPY_A: 688 case PASCAL_DMA_COPY_B: 689 case VOLTA_DMA_COPY_A: 690 case TURING_DMA_COPY_A: 691 drm->dmem->migrate.copy_func = nvc0b5_migrate_copy; 692 drm->dmem->migrate.clear_func = nvc0b5_migrate_clear; 693 drm->dmem->migrate.chan = drm->ttm.chan; 694 return 0; 695 default: 696 break; 697 } 698 return -ENODEV; 699 } 700 701 void 702 nouveau_dmem_init(struct nouveau_drm *drm) 703 { 704 int ret; 705 706 /* This only make sense on PASCAL or newer */ 707 if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL) 708 return; 709 710 if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL))) 711 return; 712 713 drm->dmem->drm = drm; 714 mutex_init(&drm->dmem->mutex); 715 INIT_LIST_HEAD(&drm->dmem->chunks); 716 mutex_init(&drm->dmem->mutex); 717 spin_lock_init(&drm->dmem->lock); 718 719 /* Initialize migration dma helpers before registering memory */ 720 ret = nouveau_dmem_migrate_init(drm); 721 if (ret) { 722 kfree(drm->dmem); 723 drm->dmem = NULL; 724 } 725 } 726 727 static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, 728 struct nouveau_svmm *svmm, unsigned long src, 729 struct nouveau_dmem_dma_info *dma_info, u64 *pfn) 730 { 731 struct device *dev = drm->dev->dev; 732 struct page *dpage, *spage; 733 unsigned long paddr; 734 bool is_large = false; 735 unsigned long mpfn; 736 737 spage = migrate_pfn_to_page(src); 738 if (!(src & MIGRATE_PFN_MIGRATE)) 739 goto out; 740 741 is_large = src & MIGRATE_PFN_COMPOUND; 742 dpage = nouveau_dmem_page_alloc_locked(drm, is_large); 743 if (!dpage) 744 goto out; 745 746 paddr = nouveau_dmem_page_addr(dpage); 747 if (spage) { 748 dma_info->dma_addr = dma_map_page(dev, spage, 0, page_size(spage), 749 DMA_BIDIRECTIONAL); 750 dma_info->size = page_size(spage); 751 if (dma_mapping_error(dev, dma_info->dma_addr)) 752 goto out_free_page; 753 if (drm->dmem->migrate.copy_func(drm, folio_nr_pages(page_folio(spage)), 754 NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST, 755 dma_info->dma_addr)) 756 goto out_dma_unmap; 757 } else { 758 dma_info->dma_addr = DMA_MAPPING_ERROR; 759 if (drm->dmem->migrate.clear_func(drm, page_size(dpage), 760 NOUVEAU_APER_VRAM, paddr)) 761 goto out_free_page; 762 } 763 764 dpage->zone_device_data = svmm; 765 *pfn = NVIF_VMM_PFNMAP_V0_V | NVIF_VMM_PFNMAP_V0_VRAM | 766 ((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT); 767 if (src & MIGRATE_PFN_WRITE) 768 *pfn |= NVIF_VMM_PFNMAP_V0_W; 769 mpfn = migrate_pfn(page_to_pfn(dpage)); 770 if (folio_order(page_folio(dpage))) 771 mpfn |= MIGRATE_PFN_COMPOUND; 772 return mpfn; 773 774 out_dma_unmap: 775 dma_unmap_page(dev, dma_info->dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); 776 out_free_page: 777 nouveau_dmem_page_free_locked(drm, dpage); 778 out: 779 *pfn = NVIF_VMM_PFNMAP_V0_NONE; 780 return 0; 781 } 782 783 static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm, 784 struct nouveau_svmm *svmm, struct migrate_vma *args, 785 struct nouveau_dmem_dma_info *dma_info, u64 *pfns) 786 { 787 struct nouveau_fence *fence; 788 unsigned long addr = args->start, nr_dma = 0, i; 789 unsigned long order = 0; 790 791 for (i = 0; addr < args->end; ) { 792 struct folio *folio; 793 794 args->dst[i] = nouveau_dmem_migrate_copy_one(drm, svmm, 795 args->src[i], dma_info + nr_dma, pfns + i); 796 if (!args->dst[i]) { 797 i++; 798 addr += PAGE_SIZE; 799 continue; 800 } 801 if (!dma_mapping_error(drm->dev->dev, dma_info[nr_dma].dma_addr)) 802 nr_dma++; 803 folio = page_folio(migrate_pfn_to_page(args->dst[i])); 804 order = folio_order(folio); 805 i += 1 << order; 806 addr += (1 << order) * PAGE_SIZE; 807 } 808 809 nouveau_fence_new(&fence, drm->dmem->migrate.chan); 810 migrate_vma_pages(args); 811 nouveau_dmem_fence_done(&fence); 812 nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i, order); 813 814 while (nr_dma--) { 815 dma_unmap_page(drm->dev->dev, dma_info[nr_dma].dma_addr, 816 dma_info[nr_dma].size, DMA_BIDIRECTIONAL); 817 } 818 migrate_vma_finalize(args); 819 } 820 821 int 822 nouveau_dmem_migrate_vma(struct nouveau_drm *drm, 823 struct nouveau_svmm *svmm, 824 struct vm_area_struct *vma, 825 unsigned long start, 826 unsigned long end) 827 { 828 unsigned long npages = (end - start) >> PAGE_SHIFT; 829 unsigned long max = npages; 830 struct migrate_vma args = { 831 .vma = vma, 832 .start = start, 833 .pgmap_owner = drm->dev, 834 .flags = MIGRATE_VMA_SELECT_SYSTEM 835 | MIGRATE_VMA_SELECT_COMPOUND, 836 }; 837 unsigned long i; 838 u64 *pfns; 839 int ret = -ENOMEM; 840 struct nouveau_dmem_dma_info *dma_info; 841 842 if (drm->dmem == NULL) { 843 ret = -ENODEV; 844 goto out; 845 } 846 847 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 848 if (max > (unsigned long)HPAGE_PMD_NR) 849 max = (unsigned long)HPAGE_PMD_NR; 850 851 args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL); 852 if (!args.src) 853 goto out; 854 args.dst = kcalloc(max, sizeof(*args.dst), GFP_KERNEL); 855 if (!args.dst) 856 goto out_free_src; 857 858 dma_info = kmalloc_array(max, sizeof(*dma_info), GFP_KERNEL); 859 if (!dma_info) 860 goto out_free_dst; 861 862 pfns = nouveau_pfns_alloc(max); 863 if (!pfns) 864 goto out_free_dma; 865 866 for (i = 0; i < npages; i += max) { 867 if (args.start + (max << PAGE_SHIFT) > end) 868 args.end = end; 869 else 870 args.end = args.start + (max << PAGE_SHIFT); 871 872 ret = migrate_vma_setup(&args); 873 if (ret) 874 goto out_free_pfns; 875 876 if (args.cpages) 877 nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_info, 878 pfns); 879 args.start = args.end; 880 } 881 882 ret = 0; 883 out_free_pfns: 884 nouveau_pfns_free(pfns); 885 out_free_dma: 886 kfree(dma_info); 887 out_free_dst: 888 kfree(args.dst); 889 out_free_src: 890 kfree(args.src); 891 out: 892 return ret; 893 } 894