1 /*
2 * Copyright 2018 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #include "nouveau_dmem.h"
23 #include "nouveau_drv.h"
24 #include "nouveau_chan.h"
25 #include "nouveau_dma.h"
26 #include "nouveau_mem.h"
27 #include "nouveau_bo.h"
28 #include "nouveau_svm.h"
29
30 #include <nvif/class.h>
31 #include <nvif/object.h>
32 #include <nvif/push906f.h>
33 #include <nvif/if000c.h>
34 #include <nvif/if500b.h>
35 #include <nvif/if900b.h>
36
37 #include <nvhw/class/cla0b5.h>
38
39 #include <linux/sched/mm.h>
40 #include <linux/hmm.h>
41 #include <linux/memremap.h>
42 #include <linux/migrate.h>
43
44 /*
45 * FIXME: this is ugly right now we are using TTM to allocate vram and we pin
46 * it in vram while in use. We likely want to overhaul memory management for
47 * nouveau to be more page like (not necessarily with system page size but a
48 * bigger page size) at lowest level and have some shim layer on top that would
49 * provide the same functionality as TTM.
50 */
51 #define DMEM_CHUNK_SIZE (2UL << 20)
52 #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT)
53 #define NR_CHUNKS (128)
54
55 enum nouveau_aper {
56 NOUVEAU_APER_VIRT,
57 NOUVEAU_APER_VRAM,
58 NOUVEAU_APER_HOST,
59 };
60
61 typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages,
62 enum nouveau_aper, u64 dst_addr,
63 enum nouveau_aper, u64 src_addr);
64 typedef int (*nouveau_clear_page_t)(struct nouveau_drm *drm, u32 length,
65 enum nouveau_aper, u64 dst_addr);
66
67 struct nouveau_dmem_chunk {
68 struct list_head list;
69 struct nouveau_bo *bo;
70 struct nouveau_drm *drm;
71 unsigned long callocated;
72 struct dev_pagemap pagemap;
73 };
74
75 struct nouveau_dmem_migrate {
76 nouveau_migrate_copy_t copy_func;
77 nouveau_clear_page_t clear_func;
78 struct nouveau_channel *chan;
79 };
80
81 struct nouveau_dmem {
82 struct nouveau_drm *drm;
83 struct nouveau_dmem_migrate migrate;
84 struct list_head chunks;
85 struct mutex mutex;
86 struct page *free_pages;
87 struct folio *free_folios;
88 spinlock_t lock;
89 };
90
91 struct nouveau_dmem_dma_info {
92 dma_addr_t dma_addr;
93 size_t size;
94 };
95
nouveau_page_to_chunk(struct page * page)96 static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page)
97 {
98 return container_of(page_pgmap(page), struct nouveau_dmem_chunk,
99 pagemap);
100 }
101
page_to_drm(struct page * page)102 static struct nouveau_drm *page_to_drm(struct page *page)
103 {
104 struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
105
106 return chunk->drm;
107 }
108
nouveau_dmem_page_addr(struct page * page)109 unsigned long nouveau_dmem_page_addr(struct page *page)
110 {
111 struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
112 unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) -
113 chunk->pagemap.range.start;
114
115 return chunk->bo->offset + off;
116 }
117
nouveau_dmem_folio_free(struct folio * folio)118 static void nouveau_dmem_folio_free(struct folio *folio)
119 {
120 struct page *page = &folio->page;
121 struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
122 struct nouveau_dmem *dmem = chunk->drm->dmem;
123
124 spin_lock(&dmem->lock);
125 if (folio_order(folio)) {
126 page->zone_device_data = dmem->free_folios;
127 dmem->free_folios = folio;
128 } else {
129 page->zone_device_data = dmem->free_pages;
130 dmem->free_pages = page;
131 }
132
133 WARN_ON(!chunk->callocated);
134 chunk->callocated--;
135 /*
136 * FIXME when chunk->callocated reach 0 we should add the chunk to
137 * a reclaim list so that it can be freed in case of memory pressure.
138 */
139 spin_unlock(&dmem->lock);
140 }
141
nouveau_dmem_fence_done(struct nouveau_fence ** fence)142 static void nouveau_dmem_fence_done(struct nouveau_fence **fence)
143 {
144 if (fence) {
145 nouveau_fence_wait(*fence, true, false);
146 nouveau_fence_unref(fence);
147 } else {
148 /*
149 * FIXME wait for channel to be IDLE before calling finalizing
150 * the hmem object.
151 */
152 }
153 }
154
nouveau_dmem_copy_folio(struct nouveau_drm * drm,struct folio * sfolio,struct folio * dfolio,struct nouveau_dmem_dma_info * dma_info)155 static int nouveau_dmem_copy_folio(struct nouveau_drm *drm,
156 struct folio *sfolio, struct folio *dfolio,
157 struct nouveau_dmem_dma_info *dma_info)
158 {
159 struct device *dev = drm->dev->dev;
160 struct page *dpage = folio_page(dfolio, 0);
161 struct page *spage = folio_page(sfolio, 0);
162
163 folio_lock(dfolio);
164
165 dma_info->dma_addr = dma_map_page(dev, dpage, 0, page_size(dpage),
166 DMA_BIDIRECTIONAL);
167 dma_info->size = page_size(dpage);
168 if (dma_mapping_error(dev, dma_info->dma_addr))
169 return -EIO;
170
171 if (drm->dmem->migrate.copy_func(drm, folio_nr_pages(sfolio),
172 NOUVEAU_APER_HOST, dma_info->dma_addr,
173 NOUVEAU_APER_VRAM,
174 nouveau_dmem_page_addr(spage))) {
175 dma_unmap_page(dev, dma_info->dma_addr, page_size(dpage),
176 DMA_BIDIRECTIONAL);
177 return -EIO;
178 }
179
180 return 0;
181 }
182
nouveau_dmem_migrate_to_ram(struct vm_fault * vmf)183 static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
184 {
185 struct nouveau_drm *drm = page_to_drm(vmf->page);
186 struct nouveau_dmem *dmem = drm->dmem;
187 struct nouveau_fence *fence;
188 struct nouveau_svmm *svmm;
189 struct page *dpage;
190 vm_fault_t ret = 0;
191 int err;
192 struct migrate_vma args = {
193 .vma = vmf->vma,
194 .pgmap_owner = drm->dev,
195 .fault_page = vmf->page,
196 .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE |
197 MIGRATE_VMA_SELECT_COMPOUND,
198 .src = NULL,
199 .dst = NULL,
200 };
201 unsigned int order, nr;
202 struct folio *sfolio, *dfolio;
203 struct nouveau_dmem_dma_info dma_info;
204
205 sfolio = page_folio(vmf->page);
206 order = folio_order(sfolio);
207 nr = 1 << order;
208
209 /*
210 * Handle partial unmap faults, where the folio is large, but
211 * the pmd is split.
212 */
213 if (vmf->pte) {
214 order = 0;
215 nr = 1;
216 }
217
218 if (order)
219 args.flags |= MIGRATE_VMA_SELECT_COMPOUND;
220
221 args.start = ALIGN_DOWN(vmf->address, (PAGE_SIZE << order));
222 args.vma = vmf->vma;
223 args.end = args.start + (PAGE_SIZE << order);
224 args.src = kcalloc(nr, sizeof(*args.src), GFP_KERNEL);
225 args.dst = kcalloc(nr, sizeof(*args.dst), GFP_KERNEL);
226
227 if (!args.src || !args.dst) {
228 ret = VM_FAULT_OOM;
229 goto err;
230 }
231 /*
232 * FIXME what we really want is to find some heuristic to migrate more
233 * than just one page on CPU fault. When such fault happens it is very
234 * likely that more surrounding page will CPU fault too.
235 */
236 if (migrate_vma_setup(&args) < 0)
237 return VM_FAULT_SIGBUS;
238 if (!args.cpages)
239 return 0;
240
241 if (order)
242 dpage = folio_page(vma_alloc_folio(GFP_HIGHUSER | __GFP_ZERO,
243 order, vmf->vma, vmf->address), 0);
244 else
245 dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma,
246 vmf->address);
247 if (!dpage) {
248 ret = VM_FAULT_OOM;
249 goto done;
250 }
251
252 args.dst[0] = migrate_pfn(page_to_pfn(dpage));
253 if (order)
254 args.dst[0] |= MIGRATE_PFN_COMPOUND;
255 dfolio = page_folio(dpage);
256
257 svmm = folio_zone_device_data(sfolio);
258 mutex_lock(&svmm->mutex);
259 nouveau_svmm_invalidate(svmm, args.start, args.end);
260 err = nouveau_dmem_copy_folio(drm, sfolio, dfolio, &dma_info);
261 mutex_unlock(&svmm->mutex);
262 if (err) {
263 ret = VM_FAULT_SIGBUS;
264 goto done;
265 }
266
267 nouveau_fence_new(&fence, dmem->migrate.chan);
268 migrate_vma_pages(&args);
269 nouveau_dmem_fence_done(&fence);
270 dma_unmap_page(drm->dev->dev, dma_info.dma_addr, PAGE_SIZE,
271 DMA_BIDIRECTIONAL);
272 done:
273 migrate_vma_finalize(&args);
274 err:
275 kfree(args.src);
276 kfree(args.dst);
277 return ret;
278 }
279
nouveau_dmem_folio_split(struct folio * head,struct folio * tail)280 static void nouveau_dmem_folio_split(struct folio *head, struct folio *tail)
281 {
282 if (tail == NULL)
283 return;
284 tail->pgmap = head->pgmap;
285 tail->mapping = head->mapping;
286 folio_set_zone_device_data(tail, folio_zone_device_data(head));
287 }
288
289 static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
290 .folio_free = nouveau_dmem_folio_free,
291 .migrate_to_ram = nouveau_dmem_migrate_to_ram,
292 .folio_split = nouveau_dmem_folio_split,
293 };
294
295 static int
nouveau_dmem_chunk_alloc(struct nouveau_drm * drm,struct page ** ppage,bool is_large)296 nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage,
297 bool is_large)
298 {
299 struct nouveau_dmem_chunk *chunk;
300 struct resource *res;
301 struct page *page;
302 void *ptr;
303 unsigned long i, pfn_first, pfn;
304 int ret;
305
306 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
307 if (chunk == NULL) {
308 ret = -ENOMEM;
309 goto out;
310 }
311
312 /* Allocate unused physical address space for device private pages. */
313 res = request_free_mem_region(&iomem_resource, DMEM_CHUNK_SIZE * NR_CHUNKS,
314 "nouveau_dmem");
315 if (IS_ERR(res)) {
316 ret = PTR_ERR(res);
317 goto out_free;
318 }
319
320 chunk->drm = drm;
321 chunk->pagemap.type = MEMORY_DEVICE_PRIVATE;
322 chunk->pagemap.range.start = res->start;
323 chunk->pagemap.range.end = res->end;
324 chunk->pagemap.nr_range = 1;
325 chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
326 chunk->pagemap.owner = drm->dev;
327
328 ret = nouveau_bo_new_pin(&drm->client, NOUVEAU_GEM_DOMAIN_VRAM, DMEM_CHUNK_SIZE,
329 &chunk->bo);
330 if (ret)
331 goto out_release;
332
333 ptr = memremap_pages(&chunk->pagemap, numa_node_id());
334 if (IS_ERR(ptr)) {
335 ret = PTR_ERR(ptr);
336 goto out_bo_free;
337 }
338
339 mutex_lock(&drm->dmem->mutex);
340 list_add(&chunk->list, &drm->dmem->chunks);
341 mutex_unlock(&drm->dmem->mutex);
342
343 pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT;
344 page = pfn_to_page(pfn_first);
345 spin_lock(&drm->dmem->lock);
346
347 pfn = pfn_first;
348 for (i = 0; i < NR_CHUNKS; i++) {
349 int j;
350
351 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) || !is_large) {
352 for (j = 0; j < DMEM_CHUNK_NPAGES - 1; j++, pfn++) {
353 page = pfn_to_page(pfn);
354 page->zone_device_data = drm->dmem->free_pages;
355 drm->dmem->free_pages = page;
356 }
357 } else {
358 page = pfn_to_page(pfn);
359 page->zone_device_data = drm->dmem->free_folios;
360 drm->dmem->free_folios = page_folio(page);
361 pfn += DMEM_CHUNK_NPAGES;
362 }
363 }
364
365 /* Move to next page */
366 if (is_large) {
367 *ppage = &drm->dmem->free_folios->page;
368 drm->dmem->free_folios = (*ppage)->zone_device_data;
369 } else {
370 *ppage = drm->dmem->free_pages;
371 drm->dmem->free_pages = (*ppage)->zone_device_data;
372 }
373
374 chunk->callocated++;
375 spin_unlock(&drm->dmem->lock);
376
377 NV_INFO(drm, "DMEM: registered %ldMB of %sdevice memory %lx %lx\n",
378 NR_CHUNKS * DMEM_CHUNK_SIZE >> 20, is_large ? "THP " : "", pfn_first,
379 nouveau_dmem_page_addr(page));
380
381 return 0;
382
383 out_bo_free:
384 nouveau_bo_unpin_del(&chunk->bo);
385 out_release:
386 release_mem_region(chunk->pagemap.range.start, range_len(&chunk->pagemap.range));
387 out_free:
388 kfree(chunk);
389 out:
390 return ret;
391 }
392
393 static struct page *
nouveau_dmem_page_alloc_locked(struct nouveau_drm * drm,bool is_large)394 nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm, bool is_large)
395 {
396 struct nouveau_dmem_chunk *chunk;
397 struct page *page = NULL;
398 struct folio *folio = NULL;
399 int ret;
400 unsigned int order = 0;
401
402 spin_lock(&drm->dmem->lock);
403 if (is_large && drm->dmem->free_folios) {
404 folio = drm->dmem->free_folios;
405 page = &folio->page;
406 drm->dmem->free_folios = page->zone_device_data;
407 chunk = nouveau_page_to_chunk(&folio->page);
408 chunk->callocated++;
409 spin_unlock(&drm->dmem->lock);
410 order = ilog2(DMEM_CHUNK_NPAGES);
411 } else if (!is_large && drm->dmem->free_pages) {
412 page = drm->dmem->free_pages;
413 drm->dmem->free_pages = page->zone_device_data;
414 chunk = nouveau_page_to_chunk(page);
415 chunk->callocated++;
416 spin_unlock(&drm->dmem->lock);
417 folio = page_folio(page);
418 } else {
419 spin_unlock(&drm->dmem->lock);
420 ret = nouveau_dmem_chunk_alloc(drm, &page, is_large);
421 if (ret)
422 return NULL;
423 folio = page_folio(page);
424 if (is_large)
425 order = ilog2(DMEM_CHUNK_NPAGES);
426 }
427
428 zone_device_folio_init(folio, order);
429 return page;
430 }
431
432 static void
nouveau_dmem_page_free_locked(struct nouveau_drm * drm,struct page * page)433 nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page)
434 {
435 unlock_page(page);
436 put_page(page);
437 }
438
439 void
nouveau_dmem_resume(struct nouveau_drm * drm)440 nouveau_dmem_resume(struct nouveau_drm *drm)
441 {
442 struct nouveau_dmem_chunk *chunk;
443 int ret;
444
445 if (drm->dmem == NULL)
446 return;
447
448 mutex_lock(&drm->dmem->mutex);
449 list_for_each_entry(chunk, &drm->dmem->chunks, list) {
450 ret = nouveau_bo_pin(chunk->bo, NOUVEAU_GEM_DOMAIN_VRAM, false);
451 /* FIXME handle pin failure */
452 WARN_ON(ret);
453 }
454 mutex_unlock(&drm->dmem->mutex);
455 }
456
457 void
nouveau_dmem_suspend(struct nouveau_drm * drm)458 nouveau_dmem_suspend(struct nouveau_drm *drm)
459 {
460 struct nouveau_dmem_chunk *chunk;
461
462 if (drm->dmem == NULL)
463 return;
464
465 mutex_lock(&drm->dmem->mutex);
466 list_for_each_entry(chunk, &drm->dmem->chunks, list)
467 nouveau_bo_unpin(chunk->bo);
468 mutex_unlock(&drm->dmem->mutex);
469 }
470
471 /*
472 * Evict all pages mapping a chunk.
473 */
474 static void
nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk * chunk)475 nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
476 {
477 unsigned long i, npages = range_len(&chunk->pagemap.range) >> PAGE_SHIFT;
478 unsigned long *src_pfns, *dst_pfns;
479 struct nouveau_dmem_dma_info *dma_info;
480 struct nouveau_fence *fence;
481
482 src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
483 dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
484 dma_info = kvcalloc(npages, sizeof(*dma_info), GFP_KERNEL | __GFP_NOFAIL);
485
486 migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT,
487 npages);
488
489 for (i = 0; i < npages; i++) {
490 if (src_pfns[i] & MIGRATE_PFN_MIGRATE) {
491 struct page *dpage;
492 struct folio *folio = page_folio(
493 migrate_pfn_to_page(src_pfns[i]));
494 unsigned int order = folio_order(folio);
495
496 if (src_pfns[i] & MIGRATE_PFN_COMPOUND) {
497 dpage = folio_page(
498 folio_alloc(
499 GFP_HIGHUSER_MOVABLE, order), 0);
500 } else {
501 /*
502 * _GFP_NOFAIL because the GPU is going away and there
503 * is nothing sensible we can do if we can't copy the
504 * data back.
505 */
506 dpage = alloc_page(GFP_HIGHUSER | __GFP_NOFAIL);
507 }
508
509 dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
510 nouveau_dmem_copy_folio(chunk->drm,
511 page_folio(migrate_pfn_to_page(src_pfns[i])),
512 page_folio(dpage),
513 &dma_info[i]);
514 }
515 }
516
517 nouveau_fence_new(&fence, chunk->drm->dmem->migrate.chan);
518 migrate_device_pages(src_pfns, dst_pfns, npages);
519 nouveau_dmem_fence_done(&fence);
520 migrate_device_finalize(src_pfns, dst_pfns, npages);
521 kvfree(src_pfns);
522 kvfree(dst_pfns);
523 for (i = 0; i < npages; i++)
524 dma_unmap_page(chunk->drm->dev->dev, dma_info[i].dma_addr,
525 dma_info[i].size, DMA_BIDIRECTIONAL);
526 kvfree(dma_info);
527 }
528
529 void
nouveau_dmem_fini(struct nouveau_drm * drm)530 nouveau_dmem_fini(struct nouveau_drm *drm)
531 {
532 struct nouveau_dmem_chunk *chunk, *tmp;
533
534 if (drm->dmem == NULL)
535 return;
536
537 mutex_lock(&drm->dmem->mutex);
538
539 list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) {
540 nouveau_dmem_evict_chunk(chunk);
541 nouveau_bo_unpin_del(&chunk->bo);
542 WARN_ON(chunk->callocated);
543 list_del(&chunk->list);
544 memunmap_pages(&chunk->pagemap);
545 release_mem_region(chunk->pagemap.range.start,
546 range_len(&chunk->pagemap.range));
547 kfree(chunk);
548 }
549
550 mutex_unlock(&drm->dmem->mutex);
551 }
552
553 static int
nvc0b5_migrate_copy(struct nouveau_drm * drm,u64 npages,enum nouveau_aper dst_aper,u64 dst_addr,enum nouveau_aper src_aper,u64 src_addr)554 nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages,
555 enum nouveau_aper dst_aper, u64 dst_addr,
556 enum nouveau_aper src_aper, u64 src_addr)
557 {
558 struct nvif_push *push = &drm->dmem->migrate.chan->chan.push;
559 u32 launch_dma = 0;
560 int ret;
561
562 ret = PUSH_WAIT(push, 13);
563 if (ret)
564 return ret;
565
566 if (src_aper != NOUVEAU_APER_VIRT) {
567 switch (src_aper) {
568 case NOUVEAU_APER_VRAM:
569 PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE,
570 NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB));
571 break;
572 case NOUVEAU_APER_HOST:
573 PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE,
574 NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM));
575 break;
576 default:
577 return -EINVAL;
578 }
579
580 launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
581 }
582
583 if (dst_aper != NOUVEAU_APER_VIRT) {
584 switch (dst_aper) {
585 case NOUVEAU_APER_VRAM:
586 PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE,
587 NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
588 break;
589 case NOUVEAU_APER_HOST:
590 PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE,
591 NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
592 break;
593 default:
594 return -EINVAL;
595 }
596
597 launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
598 }
599
600 PUSH_MTHD(push, NVA0B5, OFFSET_IN_UPPER,
601 NVVAL(NVA0B5, OFFSET_IN_UPPER, UPPER, upper_32_bits(src_addr)),
602
603 OFFSET_IN_LOWER, lower_32_bits(src_addr),
604
605 OFFSET_OUT_UPPER,
606 NVVAL(NVA0B5, OFFSET_OUT_UPPER, UPPER, upper_32_bits(dst_addr)),
607
608 OFFSET_OUT_LOWER, lower_32_bits(dst_addr),
609 PITCH_IN, PAGE_SIZE,
610 PITCH_OUT, PAGE_SIZE,
611 LINE_LENGTH_IN, PAGE_SIZE,
612 LINE_COUNT, npages);
613
614 PUSH_MTHD(push, NVA0B5, LAUNCH_DMA, launch_dma |
615 NVDEF(NVA0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED) |
616 NVDEF(NVA0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE) |
617 NVDEF(NVA0B5, LAUNCH_DMA, SEMAPHORE_TYPE, NONE) |
618 NVDEF(NVA0B5, LAUNCH_DMA, INTERRUPT_TYPE, NONE) |
619 NVDEF(NVA0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
620 NVDEF(NVA0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
621 NVDEF(NVA0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, TRUE) |
622 NVDEF(NVA0B5, LAUNCH_DMA, REMAP_ENABLE, FALSE) |
623 NVDEF(NVA0B5, LAUNCH_DMA, BYPASS_L2, USE_PTE_SETTING));
624 return 0;
625 }
626
627 static int
nvc0b5_migrate_clear(struct nouveau_drm * drm,u32 length,enum nouveau_aper dst_aper,u64 dst_addr)628 nvc0b5_migrate_clear(struct nouveau_drm *drm, u32 length,
629 enum nouveau_aper dst_aper, u64 dst_addr)
630 {
631 struct nvif_push *push = &drm->dmem->migrate.chan->chan.push;
632 u32 launch_dma = 0;
633 int ret;
634
635 ret = PUSH_WAIT(push, 12);
636 if (ret)
637 return ret;
638
639 switch (dst_aper) {
640 case NOUVEAU_APER_VRAM:
641 PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE,
642 NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
643 break;
644 case NOUVEAU_APER_HOST:
645 PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE,
646 NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
647 break;
648 default:
649 return -EINVAL;
650 }
651
652 launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
653
654 PUSH_MTHD(push, NVA0B5, SET_REMAP_CONST_A, 0,
655 SET_REMAP_CONST_B, 0,
656
657 SET_REMAP_COMPONENTS,
658 NVDEF(NVA0B5, SET_REMAP_COMPONENTS, DST_X, CONST_A) |
659 NVDEF(NVA0B5, SET_REMAP_COMPONENTS, DST_Y, CONST_B) |
660 NVDEF(NVA0B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, FOUR) |
661 NVDEF(NVA0B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, TWO));
662
663 PUSH_MTHD(push, NVA0B5, OFFSET_OUT_UPPER,
664 NVVAL(NVA0B5, OFFSET_OUT_UPPER, UPPER, upper_32_bits(dst_addr)),
665
666 OFFSET_OUT_LOWER, lower_32_bits(dst_addr));
667
668 PUSH_MTHD(push, NVA0B5, LINE_LENGTH_IN, length >> 3);
669
670 PUSH_MTHD(push, NVA0B5, LAUNCH_DMA, launch_dma |
671 NVDEF(NVA0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED) |
672 NVDEF(NVA0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE) |
673 NVDEF(NVA0B5, LAUNCH_DMA, SEMAPHORE_TYPE, NONE) |
674 NVDEF(NVA0B5, LAUNCH_DMA, INTERRUPT_TYPE, NONE) |
675 NVDEF(NVA0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
676 NVDEF(NVA0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
677 NVDEF(NVA0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
678 NVDEF(NVA0B5, LAUNCH_DMA, REMAP_ENABLE, TRUE) |
679 NVDEF(NVA0B5, LAUNCH_DMA, BYPASS_L2, USE_PTE_SETTING));
680 return 0;
681 }
682
683 static int
nouveau_dmem_migrate_init(struct nouveau_drm * drm)684 nouveau_dmem_migrate_init(struct nouveau_drm *drm)
685 {
686 switch (drm->ttm.copy.oclass) {
687 case PASCAL_DMA_COPY_A:
688 case PASCAL_DMA_COPY_B:
689 case VOLTA_DMA_COPY_A:
690 case TURING_DMA_COPY_A:
691 drm->dmem->migrate.copy_func = nvc0b5_migrate_copy;
692 drm->dmem->migrate.clear_func = nvc0b5_migrate_clear;
693 drm->dmem->migrate.chan = drm->ttm.chan;
694 return 0;
695 default:
696 break;
697 }
698 return -ENODEV;
699 }
700
701 void
nouveau_dmem_init(struct nouveau_drm * drm)702 nouveau_dmem_init(struct nouveau_drm *drm)
703 {
704 int ret;
705
706 /* This only make sense on PASCAL or newer */
707 if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL)
708 return;
709
710 if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL)))
711 return;
712
713 drm->dmem->drm = drm;
714 mutex_init(&drm->dmem->mutex);
715 INIT_LIST_HEAD(&drm->dmem->chunks);
716 mutex_init(&drm->dmem->mutex);
717 spin_lock_init(&drm->dmem->lock);
718
719 /* Initialize migration dma helpers before registering memory */
720 ret = nouveau_dmem_migrate_init(drm);
721 if (ret) {
722 kfree(drm->dmem);
723 drm->dmem = NULL;
724 }
725 }
726
nouveau_dmem_migrate_copy_one(struct nouveau_drm * drm,struct nouveau_svmm * svmm,unsigned long src,struct nouveau_dmem_dma_info * dma_info,u64 * pfn)727 static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
728 struct nouveau_svmm *svmm, unsigned long src,
729 struct nouveau_dmem_dma_info *dma_info, u64 *pfn)
730 {
731 struct device *dev = drm->dev->dev;
732 struct page *dpage, *spage;
733 unsigned long paddr;
734 bool is_large = false;
735 unsigned long mpfn;
736
737 spage = migrate_pfn_to_page(src);
738 if (!(src & MIGRATE_PFN_MIGRATE))
739 goto out;
740
741 is_large = src & MIGRATE_PFN_COMPOUND;
742 dpage = nouveau_dmem_page_alloc_locked(drm, is_large);
743 if (!dpage)
744 goto out;
745
746 paddr = nouveau_dmem_page_addr(dpage);
747 if (spage) {
748 dma_info->dma_addr = dma_map_page(dev, spage, 0, page_size(spage),
749 DMA_BIDIRECTIONAL);
750 dma_info->size = page_size(spage);
751 if (dma_mapping_error(dev, dma_info->dma_addr))
752 goto out_free_page;
753 if (drm->dmem->migrate.copy_func(drm, folio_nr_pages(page_folio(spage)),
754 NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST,
755 dma_info->dma_addr))
756 goto out_dma_unmap;
757 } else {
758 dma_info->dma_addr = DMA_MAPPING_ERROR;
759 if (drm->dmem->migrate.clear_func(drm, page_size(dpage),
760 NOUVEAU_APER_VRAM, paddr))
761 goto out_free_page;
762 }
763
764 dpage->zone_device_data = svmm;
765 *pfn = NVIF_VMM_PFNMAP_V0_V | NVIF_VMM_PFNMAP_V0_VRAM |
766 ((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT);
767 if (src & MIGRATE_PFN_WRITE)
768 *pfn |= NVIF_VMM_PFNMAP_V0_W;
769 mpfn = migrate_pfn(page_to_pfn(dpage));
770 if (folio_order(page_folio(dpage)))
771 mpfn |= MIGRATE_PFN_COMPOUND;
772 return mpfn;
773
774 out_dma_unmap:
775 dma_unmap_page(dev, dma_info->dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
776 out_free_page:
777 nouveau_dmem_page_free_locked(drm, dpage);
778 out:
779 *pfn = NVIF_VMM_PFNMAP_V0_NONE;
780 return 0;
781 }
782
nouveau_dmem_migrate_chunk(struct nouveau_drm * drm,struct nouveau_svmm * svmm,struct migrate_vma * args,struct nouveau_dmem_dma_info * dma_info,u64 * pfns)783 static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
784 struct nouveau_svmm *svmm, struct migrate_vma *args,
785 struct nouveau_dmem_dma_info *dma_info, u64 *pfns)
786 {
787 struct nouveau_fence *fence;
788 unsigned long addr = args->start, nr_dma = 0, i;
789 unsigned long order = 0;
790
791 for (i = 0; addr < args->end; ) {
792 struct folio *folio;
793
794 args->dst[i] = nouveau_dmem_migrate_copy_one(drm, svmm,
795 args->src[i], dma_info + nr_dma, pfns + i);
796 if (!args->dst[i]) {
797 i++;
798 addr += PAGE_SIZE;
799 continue;
800 }
801 if (!dma_mapping_error(drm->dev->dev, dma_info[nr_dma].dma_addr))
802 nr_dma++;
803 folio = page_folio(migrate_pfn_to_page(args->dst[i]));
804 order = folio_order(folio);
805 i += 1 << order;
806 addr += (1 << order) * PAGE_SIZE;
807 }
808
809 nouveau_fence_new(&fence, drm->dmem->migrate.chan);
810 migrate_vma_pages(args);
811 nouveau_dmem_fence_done(&fence);
812 nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i, order);
813
814 while (nr_dma--) {
815 dma_unmap_page(drm->dev->dev, dma_info[nr_dma].dma_addr,
816 dma_info[nr_dma].size, DMA_BIDIRECTIONAL);
817 }
818 migrate_vma_finalize(args);
819 }
820
821 int
nouveau_dmem_migrate_vma(struct nouveau_drm * drm,struct nouveau_svmm * svmm,struct vm_area_struct * vma,unsigned long start,unsigned long end)822 nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
823 struct nouveau_svmm *svmm,
824 struct vm_area_struct *vma,
825 unsigned long start,
826 unsigned long end)
827 {
828 unsigned long npages = (end - start) >> PAGE_SHIFT;
829 unsigned long max = npages;
830 struct migrate_vma args = {
831 .vma = vma,
832 .start = start,
833 .pgmap_owner = drm->dev,
834 .flags = MIGRATE_VMA_SELECT_SYSTEM
835 | MIGRATE_VMA_SELECT_COMPOUND,
836 };
837 unsigned long i;
838 u64 *pfns;
839 int ret = -ENOMEM;
840 struct nouveau_dmem_dma_info *dma_info;
841
842 if (drm->dmem == NULL) {
843 ret = -ENODEV;
844 goto out;
845 }
846
847 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
848 if (max > (unsigned long)HPAGE_PMD_NR)
849 max = (unsigned long)HPAGE_PMD_NR;
850
851 args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL);
852 if (!args.src)
853 goto out;
854 args.dst = kcalloc(max, sizeof(*args.dst), GFP_KERNEL);
855 if (!args.dst)
856 goto out_free_src;
857
858 dma_info = kmalloc_array(max, sizeof(*dma_info), GFP_KERNEL);
859 if (!dma_info)
860 goto out_free_dst;
861
862 pfns = nouveau_pfns_alloc(max);
863 if (!pfns)
864 goto out_free_dma;
865
866 for (i = 0; i < npages; i += max) {
867 if (args.start + (max << PAGE_SHIFT) > end)
868 args.end = end;
869 else
870 args.end = args.start + (max << PAGE_SHIFT);
871
872 ret = migrate_vma_setup(&args);
873 if (ret)
874 goto out_free_pfns;
875
876 if (args.cpages)
877 nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_info,
878 pfns);
879 args.start = args.end;
880 }
881
882 ret = 0;
883 out_free_pfns:
884 nouveau_pfns_free(pfns);
885 out_free_dma:
886 kfree(dma_info);
887 out_free_dst:
888 kfree(args.dst);
889 out_free_src:
890 kfree(args.src);
891 out:
892 return ret;
893 }
894