xref: /linux/drivers/gpu/drm/nouveau/nouveau_dmem.c (revision 7203ca412fc8e8a0588e9adc0f777d3163f8dff3)
1 /*
2  * Copyright 2018 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 #include "nouveau_dmem.h"
23 #include "nouveau_drv.h"
24 #include "nouveau_chan.h"
25 #include "nouveau_dma.h"
26 #include "nouveau_mem.h"
27 #include "nouveau_bo.h"
28 #include "nouveau_svm.h"
29 
30 #include <nvif/class.h>
31 #include <nvif/object.h>
32 #include <nvif/push906f.h>
33 #include <nvif/if000c.h>
34 #include <nvif/if500b.h>
35 #include <nvif/if900b.h>
36 
37 #include <nvhw/class/cla0b5.h>
38 
39 #include <linux/sched/mm.h>
40 #include <linux/hmm.h>
41 #include <linux/memremap.h>
42 #include <linux/migrate.h>
43 
44 /*
45  * FIXME: this is ugly right now we are using TTM to allocate vram and we pin
46  * it in vram while in use. We likely want to overhaul memory management for
47  * nouveau to be more page like (not necessarily with system page size but a
48  * bigger page size) at lowest level and have some shim layer on top that would
49  * provide the same functionality as TTM.
50  */
51 #define DMEM_CHUNK_SIZE (2UL << 20)
52 #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT)
53 #define NR_CHUNKS (128)
54 
55 enum nouveau_aper {
56 	NOUVEAU_APER_VIRT,
57 	NOUVEAU_APER_VRAM,
58 	NOUVEAU_APER_HOST,
59 };
60 
61 typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages,
62 				      enum nouveau_aper, u64 dst_addr,
63 				      enum nouveau_aper, u64 src_addr);
64 typedef int (*nouveau_clear_page_t)(struct nouveau_drm *drm, u32 length,
65 				      enum nouveau_aper, u64 dst_addr);
66 
67 struct nouveau_dmem_chunk {
68 	struct list_head list;
69 	struct nouveau_bo *bo;
70 	struct nouveau_drm *drm;
71 	unsigned long callocated;
72 	struct dev_pagemap pagemap;
73 };
74 
75 struct nouveau_dmem_migrate {
76 	nouveau_migrate_copy_t copy_func;
77 	nouveau_clear_page_t clear_func;
78 	struct nouveau_channel *chan;
79 };
80 
81 struct nouveau_dmem {
82 	struct nouveau_drm *drm;
83 	struct nouveau_dmem_migrate migrate;
84 	struct list_head chunks;
85 	struct mutex mutex;
86 	struct page *free_pages;
87 	struct folio *free_folios;
88 	spinlock_t lock;
89 };
90 
91 struct nouveau_dmem_dma_info {
92 	dma_addr_t dma_addr;
93 	size_t size;
94 };
95 
nouveau_page_to_chunk(struct page * page)96 static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page)
97 {
98 	return container_of(page_pgmap(page), struct nouveau_dmem_chunk,
99 			    pagemap);
100 }
101 
page_to_drm(struct page * page)102 static struct nouveau_drm *page_to_drm(struct page *page)
103 {
104 	struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
105 
106 	return chunk->drm;
107 }
108 
nouveau_dmem_page_addr(struct page * page)109 unsigned long nouveau_dmem_page_addr(struct page *page)
110 {
111 	struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
112 	unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) -
113 				chunk->pagemap.range.start;
114 
115 	return chunk->bo->offset + off;
116 }
117 
nouveau_dmem_folio_free(struct folio * folio)118 static void nouveau_dmem_folio_free(struct folio *folio)
119 {
120 	struct page *page = &folio->page;
121 	struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
122 	struct nouveau_dmem *dmem = chunk->drm->dmem;
123 
124 	spin_lock(&dmem->lock);
125 	if (folio_order(folio)) {
126 		page->zone_device_data = dmem->free_folios;
127 		dmem->free_folios = folio;
128 	} else {
129 		page->zone_device_data = dmem->free_pages;
130 		dmem->free_pages = page;
131 	}
132 
133 	WARN_ON(!chunk->callocated);
134 	chunk->callocated--;
135 	/*
136 	 * FIXME when chunk->callocated reach 0 we should add the chunk to
137 	 * a reclaim list so that it can be freed in case of memory pressure.
138 	 */
139 	spin_unlock(&dmem->lock);
140 }
141 
nouveau_dmem_fence_done(struct nouveau_fence ** fence)142 static void nouveau_dmem_fence_done(struct nouveau_fence **fence)
143 {
144 	if (fence) {
145 		nouveau_fence_wait(*fence, true, false);
146 		nouveau_fence_unref(fence);
147 	} else {
148 		/*
149 		 * FIXME wait for channel to be IDLE before calling finalizing
150 		 * the hmem object.
151 		 */
152 	}
153 }
154 
nouveau_dmem_copy_folio(struct nouveau_drm * drm,struct folio * sfolio,struct folio * dfolio,struct nouveau_dmem_dma_info * dma_info)155 static int nouveau_dmem_copy_folio(struct nouveau_drm *drm,
156 				   struct folio *sfolio, struct folio *dfolio,
157 				   struct nouveau_dmem_dma_info *dma_info)
158 {
159 	struct device *dev = drm->dev->dev;
160 	struct page *dpage = folio_page(dfolio, 0);
161 	struct page *spage = folio_page(sfolio, 0);
162 
163 	folio_lock(dfolio);
164 
165 	dma_info->dma_addr = dma_map_page(dev, dpage, 0, page_size(dpage),
166 					DMA_BIDIRECTIONAL);
167 	dma_info->size = page_size(dpage);
168 	if (dma_mapping_error(dev, dma_info->dma_addr))
169 		return -EIO;
170 
171 	if (drm->dmem->migrate.copy_func(drm, folio_nr_pages(sfolio),
172 					 NOUVEAU_APER_HOST, dma_info->dma_addr,
173 					 NOUVEAU_APER_VRAM,
174 					 nouveau_dmem_page_addr(spage))) {
175 		dma_unmap_page(dev, dma_info->dma_addr, page_size(dpage),
176 					DMA_BIDIRECTIONAL);
177 		return -EIO;
178 	}
179 
180 	return 0;
181 }
182 
nouveau_dmem_migrate_to_ram(struct vm_fault * vmf)183 static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
184 {
185 	struct nouveau_drm *drm = page_to_drm(vmf->page);
186 	struct nouveau_dmem *dmem = drm->dmem;
187 	struct nouveau_fence *fence;
188 	struct nouveau_svmm *svmm;
189 	struct page *dpage;
190 	vm_fault_t ret = 0;
191 	int err;
192 	struct migrate_vma args = {
193 		.vma		= vmf->vma,
194 		.pgmap_owner	= drm->dev,
195 		.fault_page	= vmf->page,
196 		.flags		= MIGRATE_VMA_SELECT_DEVICE_PRIVATE |
197 				  MIGRATE_VMA_SELECT_COMPOUND,
198 		.src = NULL,
199 		.dst = NULL,
200 	};
201 	unsigned int order, nr;
202 	struct folio *sfolio, *dfolio;
203 	struct nouveau_dmem_dma_info dma_info;
204 
205 	sfolio = page_folio(vmf->page);
206 	order = folio_order(sfolio);
207 	nr = 1 << order;
208 
209 	/*
210 	 * Handle partial unmap faults, where the folio is large, but
211 	 * the pmd is split.
212 	 */
213 	if (vmf->pte) {
214 		order = 0;
215 		nr = 1;
216 	}
217 
218 	if (order)
219 		args.flags |= MIGRATE_VMA_SELECT_COMPOUND;
220 
221 	args.start = ALIGN_DOWN(vmf->address, (PAGE_SIZE << order));
222 	args.vma = vmf->vma;
223 	args.end = args.start + (PAGE_SIZE << order);
224 	args.src = kcalloc(nr, sizeof(*args.src), GFP_KERNEL);
225 	args.dst = kcalloc(nr, sizeof(*args.dst), GFP_KERNEL);
226 
227 	if (!args.src || !args.dst) {
228 		ret = VM_FAULT_OOM;
229 		goto err;
230 	}
231 	/*
232 	 * FIXME what we really want is to find some heuristic to migrate more
233 	 * than just one page on CPU fault. When such fault happens it is very
234 	 * likely that more surrounding page will CPU fault too.
235 	 */
236 	if (migrate_vma_setup(&args) < 0)
237 		return VM_FAULT_SIGBUS;
238 	if (!args.cpages)
239 		return 0;
240 
241 	if (order)
242 		dpage = folio_page(vma_alloc_folio(GFP_HIGHUSER | __GFP_ZERO,
243 					order, vmf->vma, vmf->address), 0);
244 	else
245 		dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma,
246 					vmf->address);
247 	if (!dpage) {
248 		ret = VM_FAULT_OOM;
249 		goto done;
250 	}
251 
252 	args.dst[0] = migrate_pfn(page_to_pfn(dpage));
253 	if (order)
254 		args.dst[0] |= MIGRATE_PFN_COMPOUND;
255 	dfolio = page_folio(dpage);
256 
257 	svmm = folio_zone_device_data(sfolio);
258 	mutex_lock(&svmm->mutex);
259 	nouveau_svmm_invalidate(svmm, args.start, args.end);
260 	err = nouveau_dmem_copy_folio(drm, sfolio, dfolio, &dma_info);
261 	mutex_unlock(&svmm->mutex);
262 	if (err) {
263 		ret = VM_FAULT_SIGBUS;
264 		goto done;
265 	}
266 
267 	nouveau_fence_new(&fence, dmem->migrate.chan);
268 	migrate_vma_pages(&args);
269 	nouveau_dmem_fence_done(&fence);
270 	dma_unmap_page(drm->dev->dev, dma_info.dma_addr, PAGE_SIZE,
271 				DMA_BIDIRECTIONAL);
272 done:
273 	migrate_vma_finalize(&args);
274 err:
275 	kfree(args.src);
276 	kfree(args.dst);
277 	return ret;
278 }
279 
nouveau_dmem_folio_split(struct folio * head,struct folio * tail)280 static void nouveau_dmem_folio_split(struct folio *head, struct folio *tail)
281 {
282 	if (tail == NULL)
283 		return;
284 	tail->pgmap = head->pgmap;
285 	tail->mapping = head->mapping;
286 	folio_set_zone_device_data(tail, folio_zone_device_data(head));
287 }
288 
289 static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
290 	.folio_free		= nouveau_dmem_folio_free,
291 	.migrate_to_ram		= nouveau_dmem_migrate_to_ram,
292 	.folio_split		= nouveau_dmem_folio_split,
293 };
294 
295 static int
nouveau_dmem_chunk_alloc(struct nouveau_drm * drm,struct page ** ppage,bool is_large)296 nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage,
297 			 bool is_large)
298 {
299 	struct nouveau_dmem_chunk *chunk;
300 	struct resource *res;
301 	struct page *page;
302 	void *ptr;
303 	unsigned long i, pfn_first, pfn;
304 	int ret;
305 
306 	chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
307 	if (chunk == NULL) {
308 		ret = -ENOMEM;
309 		goto out;
310 	}
311 
312 	/* Allocate unused physical address space for device private pages. */
313 	res = request_free_mem_region(&iomem_resource, DMEM_CHUNK_SIZE * NR_CHUNKS,
314 				      "nouveau_dmem");
315 	if (IS_ERR(res)) {
316 		ret = PTR_ERR(res);
317 		goto out_free;
318 	}
319 
320 	chunk->drm = drm;
321 	chunk->pagemap.type = MEMORY_DEVICE_PRIVATE;
322 	chunk->pagemap.range.start = res->start;
323 	chunk->pagemap.range.end = res->end;
324 	chunk->pagemap.nr_range = 1;
325 	chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
326 	chunk->pagemap.owner = drm->dev;
327 
328 	ret = nouveau_bo_new_pin(&drm->client, NOUVEAU_GEM_DOMAIN_VRAM, DMEM_CHUNK_SIZE,
329 				 &chunk->bo);
330 	if (ret)
331 		goto out_release;
332 
333 	ptr = memremap_pages(&chunk->pagemap, numa_node_id());
334 	if (IS_ERR(ptr)) {
335 		ret = PTR_ERR(ptr);
336 		goto out_bo_free;
337 	}
338 
339 	mutex_lock(&drm->dmem->mutex);
340 	list_add(&chunk->list, &drm->dmem->chunks);
341 	mutex_unlock(&drm->dmem->mutex);
342 
343 	pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT;
344 	page = pfn_to_page(pfn_first);
345 	spin_lock(&drm->dmem->lock);
346 
347 	pfn = pfn_first;
348 	for (i = 0; i < NR_CHUNKS; i++) {
349 		int j;
350 
351 		if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) || !is_large) {
352 			for (j = 0; j < DMEM_CHUNK_NPAGES - 1; j++, pfn++) {
353 				page = pfn_to_page(pfn);
354 				page->zone_device_data = drm->dmem->free_pages;
355 				drm->dmem->free_pages = page;
356 			}
357 		} else {
358 			page = pfn_to_page(pfn);
359 			page->zone_device_data = drm->dmem->free_folios;
360 			drm->dmem->free_folios = page_folio(page);
361 			pfn += DMEM_CHUNK_NPAGES;
362 		}
363 	}
364 
365 	/* Move to next page */
366 	if (is_large) {
367 		*ppage = &drm->dmem->free_folios->page;
368 		drm->dmem->free_folios = (*ppage)->zone_device_data;
369 	} else {
370 		*ppage = drm->dmem->free_pages;
371 		drm->dmem->free_pages = (*ppage)->zone_device_data;
372 	}
373 
374 	chunk->callocated++;
375 	spin_unlock(&drm->dmem->lock);
376 
377 	NV_INFO(drm, "DMEM: registered %ldMB of %sdevice memory %lx %lx\n",
378 		NR_CHUNKS * DMEM_CHUNK_SIZE >> 20, is_large ? "THP " : "", pfn_first,
379 		nouveau_dmem_page_addr(page));
380 
381 	return 0;
382 
383 out_bo_free:
384 	nouveau_bo_unpin_del(&chunk->bo);
385 out_release:
386 	release_mem_region(chunk->pagemap.range.start, range_len(&chunk->pagemap.range));
387 out_free:
388 	kfree(chunk);
389 out:
390 	return ret;
391 }
392 
393 static struct page *
nouveau_dmem_page_alloc_locked(struct nouveau_drm * drm,bool is_large)394 nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm, bool is_large)
395 {
396 	struct nouveau_dmem_chunk *chunk;
397 	struct page *page = NULL;
398 	struct folio *folio = NULL;
399 	int ret;
400 	unsigned int order = 0;
401 
402 	spin_lock(&drm->dmem->lock);
403 	if (is_large && drm->dmem->free_folios) {
404 		folio = drm->dmem->free_folios;
405 		page = &folio->page;
406 		drm->dmem->free_folios = page->zone_device_data;
407 		chunk = nouveau_page_to_chunk(&folio->page);
408 		chunk->callocated++;
409 		spin_unlock(&drm->dmem->lock);
410 		order = ilog2(DMEM_CHUNK_NPAGES);
411 	} else if (!is_large && drm->dmem->free_pages) {
412 		page = drm->dmem->free_pages;
413 		drm->dmem->free_pages = page->zone_device_data;
414 		chunk = nouveau_page_to_chunk(page);
415 		chunk->callocated++;
416 		spin_unlock(&drm->dmem->lock);
417 		folio = page_folio(page);
418 	} else {
419 		spin_unlock(&drm->dmem->lock);
420 		ret = nouveau_dmem_chunk_alloc(drm, &page, is_large);
421 		if (ret)
422 			return NULL;
423 		folio = page_folio(page);
424 		if (is_large)
425 			order = ilog2(DMEM_CHUNK_NPAGES);
426 	}
427 
428 	zone_device_folio_init(folio, order);
429 	return page;
430 }
431 
432 static void
nouveau_dmem_page_free_locked(struct nouveau_drm * drm,struct page * page)433 nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page)
434 {
435 	unlock_page(page);
436 	put_page(page);
437 }
438 
439 void
nouveau_dmem_resume(struct nouveau_drm * drm)440 nouveau_dmem_resume(struct nouveau_drm *drm)
441 {
442 	struct nouveau_dmem_chunk *chunk;
443 	int ret;
444 
445 	if (drm->dmem == NULL)
446 		return;
447 
448 	mutex_lock(&drm->dmem->mutex);
449 	list_for_each_entry(chunk, &drm->dmem->chunks, list) {
450 		ret = nouveau_bo_pin(chunk->bo, NOUVEAU_GEM_DOMAIN_VRAM, false);
451 		/* FIXME handle pin failure */
452 		WARN_ON(ret);
453 	}
454 	mutex_unlock(&drm->dmem->mutex);
455 }
456 
457 void
nouveau_dmem_suspend(struct nouveau_drm * drm)458 nouveau_dmem_suspend(struct nouveau_drm *drm)
459 {
460 	struct nouveau_dmem_chunk *chunk;
461 
462 	if (drm->dmem == NULL)
463 		return;
464 
465 	mutex_lock(&drm->dmem->mutex);
466 	list_for_each_entry(chunk, &drm->dmem->chunks, list)
467 		nouveau_bo_unpin(chunk->bo);
468 	mutex_unlock(&drm->dmem->mutex);
469 }
470 
471 /*
472  * Evict all pages mapping a chunk.
473  */
474 static void
nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk * chunk)475 nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
476 {
477 	unsigned long i, npages = range_len(&chunk->pagemap.range) >> PAGE_SHIFT;
478 	unsigned long *src_pfns, *dst_pfns;
479 	struct nouveau_dmem_dma_info *dma_info;
480 	struct nouveau_fence *fence;
481 
482 	src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
483 	dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
484 	dma_info = kvcalloc(npages, sizeof(*dma_info), GFP_KERNEL | __GFP_NOFAIL);
485 
486 	migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT,
487 			npages);
488 
489 	for (i = 0; i < npages; i++) {
490 		if (src_pfns[i] & MIGRATE_PFN_MIGRATE) {
491 			struct page *dpage;
492 			struct folio *folio = page_folio(
493 				migrate_pfn_to_page(src_pfns[i]));
494 			unsigned int order = folio_order(folio);
495 
496 			if (src_pfns[i] & MIGRATE_PFN_COMPOUND) {
497 				dpage = folio_page(
498 						folio_alloc(
499 						GFP_HIGHUSER_MOVABLE, order), 0);
500 			} else {
501 				/*
502 				 * _GFP_NOFAIL because the GPU is going away and there
503 				 * is nothing sensible we can do if we can't copy the
504 				 * data back.
505 				 */
506 				dpage = alloc_page(GFP_HIGHUSER | __GFP_NOFAIL);
507 			}
508 
509 			dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
510 			nouveau_dmem_copy_folio(chunk->drm,
511 				page_folio(migrate_pfn_to_page(src_pfns[i])),
512 				page_folio(dpage),
513 				&dma_info[i]);
514 		}
515 	}
516 
517 	nouveau_fence_new(&fence, chunk->drm->dmem->migrate.chan);
518 	migrate_device_pages(src_pfns, dst_pfns, npages);
519 	nouveau_dmem_fence_done(&fence);
520 	migrate_device_finalize(src_pfns, dst_pfns, npages);
521 	kvfree(src_pfns);
522 	kvfree(dst_pfns);
523 	for (i = 0; i < npages; i++)
524 		dma_unmap_page(chunk->drm->dev->dev, dma_info[i].dma_addr,
525 				dma_info[i].size, DMA_BIDIRECTIONAL);
526 	kvfree(dma_info);
527 }
528 
529 void
nouveau_dmem_fini(struct nouveau_drm * drm)530 nouveau_dmem_fini(struct nouveau_drm *drm)
531 {
532 	struct nouveau_dmem_chunk *chunk, *tmp;
533 
534 	if (drm->dmem == NULL)
535 		return;
536 
537 	mutex_lock(&drm->dmem->mutex);
538 
539 	list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) {
540 		nouveau_dmem_evict_chunk(chunk);
541 		nouveau_bo_unpin_del(&chunk->bo);
542 		WARN_ON(chunk->callocated);
543 		list_del(&chunk->list);
544 		memunmap_pages(&chunk->pagemap);
545 		release_mem_region(chunk->pagemap.range.start,
546 				   range_len(&chunk->pagemap.range));
547 		kfree(chunk);
548 	}
549 
550 	mutex_unlock(&drm->dmem->mutex);
551 }
552 
553 static int
nvc0b5_migrate_copy(struct nouveau_drm * drm,u64 npages,enum nouveau_aper dst_aper,u64 dst_addr,enum nouveau_aper src_aper,u64 src_addr)554 nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages,
555 		    enum nouveau_aper dst_aper, u64 dst_addr,
556 		    enum nouveau_aper src_aper, u64 src_addr)
557 {
558 	struct nvif_push *push = &drm->dmem->migrate.chan->chan.push;
559 	u32 launch_dma = 0;
560 	int ret;
561 
562 	ret = PUSH_WAIT(push, 13);
563 	if (ret)
564 		return ret;
565 
566 	if (src_aper != NOUVEAU_APER_VIRT) {
567 		switch (src_aper) {
568 		case NOUVEAU_APER_VRAM:
569 			PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE,
570 				  NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB));
571 			break;
572 		case NOUVEAU_APER_HOST:
573 			PUSH_IMMD(push, NVA0B5, SET_SRC_PHYS_MODE,
574 				  NVDEF(NVA0B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM));
575 			break;
576 		default:
577 			return -EINVAL;
578 		}
579 
580 		launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
581 	}
582 
583 	if (dst_aper != NOUVEAU_APER_VIRT) {
584 		switch (dst_aper) {
585 		case NOUVEAU_APER_VRAM:
586 			PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE,
587 				  NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
588 			break;
589 		case NOUVEAU_APER_HOST:
590 			PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE,
591 				  NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
592 			break;
593 		default:
594 			return -EINVAL;
595 		}
596 
597 		launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
598 	}
599 
600 	PUSH_MTHD(push, NVA0B5, OFFSET_IN_UPPER,
601 		  NVVAL(NVA0B5, OFFSET_IN_UPPER, UPPER, upper_32_bits(src_addr)),
602 
603 				OFFSET_IN_LOWER, lower_32_bits(src_addr),
604 
605 				OFFSET_OUT_UPPER,
606 		  NVVAL(NVA0B5, OFFSET_OUT_UPPER, UPPER, upper_32_bits(dst_addr)),
607 
608 				OFFSET_OUT_LOWER, lower_32_bits(dst_addr),
609 				PITCH_IN, PAGE_SIZE,
610 				PITCH_OUT, PAGE_SIZE,
611 				LINE_LENGTH_IN, PAGE_SIZE,
612 				LINE_COUNT, npages);
613 
614 	PUSH_MTHD(push, NVA0B5, LAUNCH_DMA, launch_dma |
615 		  NVDEF(NVA0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED) |
616 		  NVDEF(NVA0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE) |
617 		  NVDEF(NVA0B5, LAUNCH_DMA, SEMAPHORE_TYPE, NONE) |
618 		  NVDEF(NVA0B5, LAUNCH_DMA, INTERRUPT_TYPE, NONE) |
619 		  NVDEF(NVA0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
620 		  NVDEF(NVA0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
621 		  NVDEF(NVA0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, TRUE) |
622 		  NVDEF(NVA0B5, LAUNCH_DMA, REMAP_ENABLE, FALSE) |
623 		  NVDEF(NVA0B5, LAUNCH_DMA, BYPASS_L2, USE_PTE_SETTING));
624 	return 0;
625 }
626 
627 static int
nvc0b5_migrate_clear(struct nouveau_drm * drm,u32 length,enum nouveau_aper dst_aper,u64 dst_addr)628 nvc0b5_migrate_clear(struct nouveau_drm *drm, u32 length,
629 		     enum nouveau_aper dst_aper, u64 dst_addr)
630 {
631 	struct nvif_push *push = &drm->dmem->migrate.chan->chan.push;
632 	u32 launch_dma = 0;
633 	int ret;
634 
635 	ret = PUSH_WAIT(push, 12);
636 	if (ret)
637 		return ret;
638 
639 	switch (dst_aper) {
640 	case NOUVEAU_APER_VRAM:
641 		PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE,
642 			  NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
643 		break;
644 	case NOUVEAU_APER_HOST:
645 		PUSH_IMMD(push, NVA0B5, SET_DST_PHYS_MODE,
646 			  NVDEF(NVA0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
647 		break;
648 	default:
649 		return -EINVAL;
650 	}
651 
652 	launch_dma |= NVDEF(NVA0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
653 
654 	PUSH_MTHD(push, NVA0B5, SET_REMAP_CONST_A, 0,
655 				SET_REMAP_CONST_B, 0,
656 
657 				SET_REMAP_COMPONENTS,
658 		  NVDEF(NVA0B5, SET_REMAP_COMPONENTS, DST_X, CONST_A) |
659 		  NVDEF(NVA0B5, SET_REMAP_COMPONENTS, DST_Y, CONST_B) |
660 		  NVDEF(NVA0B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, FOUR) |
661 		  NVDEF(NVA0B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, TWO));
662 
663 	PUSH_MTHD(push, NVA0B5, OFFSET_OUT_UPPER,
664 		  NVVAL(NVA0B5, OFFSET_OUT_UPPER, UPPER, upper_32_bits(dst_addr)),
665 
666 				OFFSET_OUT_LOWER, lower_32_bits(dst_addr));
667 
668 	PUSH_MTHD(push, NVA0B5, LINE_LENGTH_IN, length >> 3);
669 
670 	PUSH_MTHD(push, NVA0B5, LAUNCH_DMA, launch_dma |
671 		  NVDEF(NVA0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED) |
672 		  NVDEF(NVA0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE) |
673 		  NVDEF(NVA0B5, LAUNCH_DMA, SEMAPHORE_TYPE, NONE) |
674 		  NVDEF(NVA0B5, LAUNCH_DMA, INTERRUPT_TYPE, NONE) |
675 		  NVDEF(NVA0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
676 		  NVDEF(NVA0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
677 		  NVDEF(NVA0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
678 		  NVDEF(NVA0B5, LAUNCH_DMA, REMAP_ENABLE, TRUE) |
679 		  NVDEF(NVA0B5, LAUNCH_DMA, BYPASS_L2, USE_PTE_SETTING));
680 	return 0;
681 }
682 
683 static int
nouveau_dmem_migrate_init(struct nouveau_drm * drm)684 nouveau_dmem_migrate_init(struct nouveau_drm *drm)
685 {
686 	switch (drm->ttm.copy.oclass) {
687 	case PASCAL_DMA_COPY_A:
688 	case PASCAL_DMA_COPY_B:
689 	case  VOLTA_DMA_COPY_A:
690 	case TURING_DMA_COPY_A:
691 		drm->dmem->migrate.copy_func = nvc0b5_migrate_copy;
692 		drm->dmem->migrate.clear_func = nvc0b5_migrate_clear;
693 		drm->dmem->migrate.chan = drm->ttm.chan;
694 		return 0;
695 	default:
696 		break;
697 	}
698 	return -ENODEV;
699 }
700 
701 void
nouveau_dmem_init(struct nouveau_drm * drm)702 nouveau_dmem_init(struct nouveau_drm *drm)
703 {
704 	int ret;
705 
706 	/* This only make sense on PASCAL or newer */
707 	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL)
708 		return;
709 
710 	if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL)))
711 		return;
712 
713 	drm->dmem->drm = drm;
714 	mutex_init(&drm->dmem->mutex);
715 	INIT_LIST_HEAD(&drm->dmem->chunks);
716 	mutex_init(&drm->dmem->mutex);
717 	spin_lock_init(&drm->dmem->lock);
718 
719 	/* Initialize migration dma helpers before registering memory */
720 	ret = nouveau_dmem_migrate_init(drm);
721 	if (ret) {
722 		kfree(drm->dmem);
723 		drm->dmem = NULL;
724 	}
725 }
726 
nouveau_dmem_migrate_copy_one(struct nouveau_drm * drm,struct nouveau_svmm * svmm,unsigned long src,struct nouveau_dmem_dma_info * dma_info,u64 * pfn)727 static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
728 		struct nouveau_svmm *svmm, unsigned long src,
729 		struct nouveau_dmem_dma_info *dma_info, u64 *pfn)
730 {
731 	struct device *dev = drm->dev->dev;
732 	struct page *dpage, *spage;
733 	unsigned long paddr;
734 	bool is_large = false;
735 	unsigned long mpfn;
736 
737 	spage = migrate_pfn_to_page(src);
738 	if (!(src & MIGRATE_PFN_MIGRATE))
739 		goto out;
740 
741 	is_large = src & MIGRATE_PFN_COMPOUND;
742 	dpage = nouveau_dmem_page_alloc_locked(drm, is_large);
743 	if (!dpage)
744 		goto out;
745 
746 	paddr = nouveau_dmem_page_addr(dpage);
747 	if (spage) {
748 		dma_info->dma_addr = dma_map_page(dev, spage, 0, page_size(spage),
749 					 DMA_BIDIRECTIONAL);
750 		dma_info->size = page_size(spage);
751 		if (dma_mapping_error(dev, dma_info->dma_addr))
752 			goto out_free_page;
753 		if (drm->dmem->migrate.copy_func(drm, folio_nr_pages(page_folio(spage)),
754 			NOUVEAU_APER_VRAM, paddr, NOUVEAU_APER_HOST,
755 			dma_info->dma_addr))
756 			goto out_dma_unmap;
757 	} else {
758 		dma_info->dma_addr = DMA_MAPPING_ERROR;
759 		if (drm->dmem->migrate.clear_func(drm, page_size(dpage),
760 			NOUVEAU_APER_VRAM, paddr))
761 			goto out_free_page;
762 	}
763 
764 	dpage->zone_device_data = svmm;
765 	*pfn = NVIF_VMM_PFNMAP_V0_V | NVIF_VMM_PFNMAP_V0_VRAM |
766 		((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT);
767 	if (src & MIGRATE_PFN_WRITE)
768 		*pfn |= NVIF_VMM_PFNMAP_V0_W;
769 	mpfn = migrate_pfn(page_to_pfn(dpage));
770 	if (folio_order(page_folio(dpage)))
771 		mpfn |= MIGRATE_PFN_COMPOUND;
772 	return mpfn;
773 
774 out_dma_unmap:
775 	dma_unmap_page(dev, dma_info->dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
776 out_free_page:
777 	nouveau_dmem_page_free_locked(drm, dpage);
778 out:
779 	*pfn = NVIF_VMM_PFNMAP_V0_NONE;
780 	return 0;
781 }
782 
nouveau_dmem_migrate_chunk(struct nouveau_drm * drm,struct nouveau_svmm * svmm,struct migrate_vma * args,struct nouveau_dmem_dma_info * dma_info,u64 * pfns)783 static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
784 		struct nouveau_svmm *svmm, struct migrate_vma *args,
785 		struct nouveau_dmem_dma_info *dma_info, u64 *pfns)
786 {
787 	struct nouveau_fence *fence;
788 	unsigned long addr = args->start, nr_dma = 0, i;
789 	unsigned long order = 0;
790 
791 	for (i = 0; addr < args->end; ) {
792 		struct folio *folio;
793 
794 		args->dst[i] = nouveau_dmem_migrate_copy_one(drm, svmm,
795 				args->src[i], dma_info + nr_dma, pfns + i);
796 		if (!args->dst[i]) {
797 			i++;
798 			addr += PAGE_SIZE;
799 			continue;
800 		}
801 		if (!dma_mapping_error(drm->dev->dev, dma_info[nr_dma].dma_addr))
802 			nr_dma++;
803 		folio = page_folio(migrate_pfn_to_page(args->dst[i]));
804 		order = folio_order(folio);
805 		i += 1 << order;
806 		addr += (1 << order) * PAGE_SIZE;
807 	}
808 
809 	nouveau_fence_new(&fence, drm->dmem->migrate.chan);
810 	migrate_vma_pages(args);
811 	nouveau_dmem_fence_done(&fence);
812 	nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i, order);
813 
814 	while (nr_dma--) {
815 		dma_unmap_page(drm->dev->dev, dma_info[nr_dma].dma_addr,
816 				dma_info[nr_dma].size, DMA_BIDIRECTIONAL);
817 	}
818 	migrate_vma_finalize(args);
819 }
820 
821 int
nouveau_dmem_migrate_vma(struct nouveau_drm * drm,struct nouveau_svmm * svmm,struct vm_area_struct * vma,unsigned long start,unsigned long end)822 nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
823 			 struct nouveau_svmm *svmm,
824 			 struct vm_area_struct *vma,
825 			 unsigned long start,
826 			 unsigned long end)
827 {
828 	unsigned long npages = (end - start) >> PAGE_SHIFT;
829 	unsigned long max = npages;
830 	struct migrate_vma args = {
831 		.vma		= vma,
832 		.start		= start,
833 		.pgmap_owner	= drm->dev,
834 		.flags		= MIGRATE_VMA_SELECT_SYSTEM
835 				  | MIGRATE_VMA_SELECT_COMPOUND,
836 	};
837 	unsigned long i;
838 	u64 *pfns;
839 	int ret = -ENOMEM;
840 	struct nouveau_dmem_dma_info *dma_info;
841 
842 	if (drm->dmem == NULL) {
843 		ret = -ENODEV;
844 		goto out;
845 	}
846 
847 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
848 		if (max > (unsigned long)HPAGE_PMD_NR)
849 			max = (unsigned long)HPAGE_PMD_NR;
850 
851 	args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL);
852 	if (!args.src)
853 		goto out;
854 	args.dst = kcalloc(max, sizeof(*args.dst), GFP_KERNEL);
855 	if (!args.dst)
856 		goto out_free_src;
857 
858 	dma_info = kmalloc_array(max, sizeof(*dma_info), GFP_KERNEL);
859 	if (!dma_info)
860 		goto out_free_dst;
861 
862 	pfns = nouveau_pfns_alloc(max);
863 	if (!pfns)
864 		goto out_free_dma;
865 
866 	for (i = 0; i < npages; i += max) {
867 		if (args.start + (max << PAGE_SHIFT) > end)
868 			args.end = end;
869 		else
870 			args.end = args.start + (max << PAGE_SHIFT);
871 
872 		ret = migrate_vma_setup(&args);
873 		if (ret)
874 			goto out_free_pfns;
875 
876 		if (args.cpages)
877 			nouveau_dmem_migrate_chunk(drm, svmm, &args, dma_info,
878 						   pfns);
879 		args.start = args.end;
880 	}
881 
882 	ret = 0;
883 out_free_pfns:
884 	nouveau_pfns_free(pfns);
885 out_free_dma:
886 	kfree(dma_info);
887 out_free_dst:
888 	kfree(args.dst);
889 out_free_src:
890 	kfree(args.src);
891 out:
892 	return ret;
893 }
894