xref: /linux/drivers/gpu/drm/drm_pagemap.c (revision 9175f49330d199c03eb970f799526c9d479b5f1a)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /*
3  * Copyright © 2024-2025 Intel Corporation
4  */
5 
6 #include <linux/dma-fence.h>
7 #include <linux/dma-mapping.h>
8 #include <linux/migrate.h>
9 #include <linux/pagemap.h>
10 #include <drm/drm_drv.h>
11 #include <drm/drm_pagemap.h>
12 #include <drm/drm_pagemap_util.h>
13 #include <drm/drm_print.h>
14 
15 /**
16  * DOC: Overview
17  *
18  * The DRM pagemap layer is intended to augment the dev_pagemap functionality by
19  * providing a way to populate a struct mm_struct virtual range with device
20  * private pages and to provide helpers to abstract device memory allocations,
21  * to migrate memory back and forth between device memory and system RAM and
22  * to handle access (and in the future migration) between devices implementing
23  * a fast interconnect that is not necessarily visible to the rest of the
24  * system.
25  *
26  * Typically the DRM pagemap receives requests from one or more DRM GPU SVM
27  * instances to populate struct mm_struct virtual ranges with memory, and the
28  * migration is best effort only and may thus fail. The implementation should
29  * also handle device unbinding by blocking (return an -ENODEV) error for new
30  * population requests and after that migrate all device pages to system ram.
31  */
32 
33 /**
34  * DOC: Migration
35  *
36  * Migration granularity typically follows the GPU SVM range requests, but
37  * if there are clashes, due to races or due to the fact that multiple GPU
38  * SVM instances have different views of the ranges used, and because of that
39  * parts of a requested range is already present in the requested device memory,
40  * the implementation has a variety of options. It can fail and it can choose
41  * to populate only the part of the range that isn't already in device memory,
42  * and it can evict the range to system before trying to migrate. Ideally an
43  * implementation would just try to migrate the missing part of the range and
44  * allocate just enough memory to do so.
45  *
46  * When migrating to system memory as a response to a cpu fault or a device
47  * memory eviction request, currently a full device memory allocation is
48  * migrated back to system. Moving forward this might need improvement for
49  * situations where a single page needs bouncing between system memory and
50  * device memory due to, for example, atomic operations.
51  *
52  * Key DRM pagemap components:
53  *
54  * - Device Memory Allocations:
55  *      Embedded structure containing enough information for the drm_pagemap to
56  *      migrate to / from device memory.
57  *
58  * - Device Memory Operations:
59  *      Define the interface for driver-specific device memory operations
60  *      release memory, populate pfns, and copy to / from device memory.
61  */
62 
63 /**
64  * struct drm_pagemap_zdd - GPU SVM zone device data
65  *
66  * @refcount: Reference count for the zdd
67  * @devmem_allocation: device memory allocation
68  * @dpagemap: Refcounted pointer to the underlying struct drm_pagemap.
69  *
70  * This structure serves as a generic wrapper installed in
71  * page->zone_device_data. It provides infrastructure for looking up a device
72  * memory allocation upon CPU page fault and asynchronously releasing device
73  * memory once the CPU has no page references. Asynchronous release is useful
74  * because CPU page references can be dropped in IRQ contexts, while releasing
75  * device memory likely requires sleeping locks.
76  */
77 struct drm_pagemap_zdd {
78 	struct kref refcount;
79 	struct drm_pagemap_devmem *devmem_allocation;
80 	struct drm_pagemap *dpagemap;
81 };
82 
83 /**
84  * drm_pagemap_zdd_alloc() - Allocate a zdd structure.
85  * @dpagemap: Pointer to the underlying struct drm_pagemap.
86  *
87  * This function allocates and initializes a new zdd structure. It sets up the
88  * reference count and initializes the destroy work.
89  *
90  * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure.
91  */
92 static struct drm_pagemap_zdd *
93 drm_pagemap_zdd_alloc(struct drm_pagemap *dpagemap)
94 {
95 	struct drm_pagemap_zdd *zdd;
96 
97 	zdd = kmalloc_obj(*zdd);
98 	if (!zdd)
99 		return NULL;
100 
101 	kref_init(&zdd->refcount);
102 	zdd->devmem_allocation = NULL;
103 	zdd->dpagemap = drm_pagemap_get(dpagemap);
104 
105 	return zdd;
106 }
107 
108 /**
109  * drm_pagemap_zdd_get() - Get a reference to a zdd structure.
110  * @zdd: Pointer to the zdd structure.
111  *
112  * This function increments the reference count of the provided zdd structure.
113  *
114  * Return: Pointer to the zdd structure.
115  */
116 static struct drm_pagemap_zdd *drm_pagemap_zdd_get(struct drm_pagemap_zdd *zdd)
117 {
118 	kref_get(&zdd->refcount);
119 	return zdd;
120 }
121 
122 /**
123  * drm_pagemap_zdd_destroy() - Destroy a zdd structure.
124  * @ref: Pointer to the reference count structure.
125  *
126  * This function queues the destroy_work of the zdd for asynchronous destruction.
127  */
128 static void drm_pagemap_zdd_destroy(struct kref *ref)
129 {
130 	struct drm_pagemap_zdd *zdd =
131 		container_of(ref, struct drm_pagemap_zdd, refcount);
132 	struct drm_pagemap_devmem *devmem = zdd->devmem_allocation;
133 	struct drm_pagemap *dpagemap = zdd->dpagemap;
134 
135 	if (devmem) {
136 		complete_all(&devmem->detached);
137 		if (devmem->ops->devmem_release)
138 			devmem->ops->devmem_release(devmem);
139 	}
140 	kfree(zdd);
141 	drm_pagemap_put(dpagemap);
142 }
143 
144 /**
145  * drm_pagemap_zdd_put() - Put a zdd reference.
146  * @zdd: Pointer to the zdd structure.
147  *
148  * This function decrements the reference count of the provided zdd structure
149  * and schedules its destruction if the count drops to zero.
150  */
151 static void drm_pagemap_zdd_put(struct drm_pagemap_zdd *zdd)
152 {
153 	kref_put(&zdd->refcount, drm_pagemap_zdd_destroy);
154 }
155 
156 /**
157  * drm_pagemap_migration_unlock_put_folio() - Put a migration folio
158  * @folio: Pointer to the folio to put
159  *
160  * This function unlocks and puts a folio.
161  */
162 static void drm_pagemap_migration_unlock_put_folio(struct folio *folio)
163 {
164 	folio_unlock(folio);
165 	folio_put(folio);
166 }
167 
168 /**
169  * drm_pagemap_migration_unlock_put_pages() - Put migration pages
170  * @npages: Number of pages
171  * @migrate_pfn: Array of migrate page frame numbers
172  *
173  * This function unlocks and puts an array of pages.
174  */
175 static void drm_pagemap_migration_unlock_put_pages(unsigned long npages,
176 						   unsigned long *migrate_pfn)
177 {
178 	unsigned long i;
179 
180 	for (i = 0; i < npages;) {
181 		struct page *page;
182 		struct folio *folio;
183 		unsigned int order = 0;
184 
185 		if (!migrate_pfn[i])
186 			goto next;
187 
188 		page = migrate_pfn_to_page(migrate_pfn[i]);
189 		folio = page_folio(page);
190 		order = folio_order(folio);
191 
192 		drm_pagemap_migration_unlock_put_folio(folio);
193 		migrate_pfn[i] = 0;
194 
195 next:
196 		i += NR_PAGES(order);
197 	}
198 }
199 
200 /**
201  * drm_pagemap_get_devmem_page() - Get a reference to a device memory page
202  * @page: Pointer to the page
203  * @order: Order
204  * @zdd: Pointer to the GPU SVM zone device data
205  *
206  * This function associates the given page with the specified GPU SVM zone
207  * device data and initializes it for zone device usage.
208  */
209 static void drm_pagemap_get_devmem_page(struct page *page,
210 					unsigned int order,
211 					struct drm_pagemap_zdd *zdd)
212 {
213 	zone_device_folio_init((struct folio *)page, zdd->dpagemap->pagemap,
214 			       order);
215 	folio_set_zone_device_data(page_folio(page), drm_pagemap_zdd_get(zdd));
216 }
217 
218 /**
219  * drm_pagemap_migrate_map_device_private_pages() - Map device private migration
220  * pages for GPU SVM migration
221  * @dev: The device performing the migration.
222  * @local_dpagemap: The drm_pagemap local to the migrating device.
223  * @pagemap_addr: Array to store DMA information corresponding to mapped pages.
224  * @migrate_pfn: Array of page frame numbers of system pages or peer pages to map.
225  * @npages: Number of system pages or peer pages to map.
226  * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL)
227  * @mdetails: Details governing the migration behaviour.
228  *
229  * This function maps pages of memory for migration usage in GPU SVM. It
230  * iterates over each page frame number provided in @migrate_pfn, maps the
231  * corresponding page, and stores the DMA address in the provided @dma_addr
232  * array.
233  *
234  * Returns: 0 on success, -EFAULT if an error occurs during mapping.
235  */
236 static int
237 drm_pagemap_migrate_map_device_private_pages(struct device *dev,
238 					     struct drm_pagemap *local_dpagemap,
239 					     struct drm_pagemap_addr *pagemap_addr,
240 					     unsigned long *migrate_pfn,
241 					     unsigned long npages,
242 					     enum dma_data_direction dir,
243 					     const struct drm_pagemap_migrate_details *mdetails)
244 {
245 	unsigned long num_peer_pages = 0, num_local_pages = 0, i;
246 
247 	for (i = 0; i < npages;) {
248 		struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
249 		struct drm_pagemap_zdd *zdd;
250 		struct drm_pagemap *dpagemap;
251 		struct drm_pagemap_addr addr;
252 		struct folio *folio;
253 		unsigned int order = 0;
254 
255 		if (!page)
256 			goto next;
257 
258 		WARN_ON_ONCE(!is_device_private_page(page));
259 		folio = page_folio(page);
260 		order = folio_order(folio);
261 
262 		zdd = drm_pagemap_page_zone_device_data(page);
263 		dpagemap = zdd->dpagemap;
264 
265 		if (dpagemap == local_dpagemap) {
266 			if (!mdetails->can_migrate_same_pagemap)
267 				goto next;
268 
269 			num_local_pages += NR_PAGES(order);
270 		} else {
271 			num_peer_pages += NR_PAGES(order);
272 		}
273 
274 		addr = dpagemap->ops->device_map(dpagemap, dev, page, order, dir);
275 		if (dma_mapping_error(dev, addr.addr))
276 			return -EFAULT;
277 
278 		pagemap_addr[i] = addr;
279 
280 next:
281 		i += NR_PAGES(order);
282 	}
283 
284 	if (num_peer_pages)
285 		drm_dbg(local_dpagemap->drm, "Migrating %lu peer pages over interconnect.\n",
286 			num_peer_pages);
287 	if (num_local_pages)
288 		drm_dbg(local_dpagemap->drm, "Migrating %lu local pages over interconnect.\n",
289 			num_local_pages);
290 
291 	return 0;
292 }
293 
294 /**
295  * struct drm_pagemap_iova_state - DRM pagemap IOVA state
296  * @dma_state: DMA IOVA state.
297  * @offset: Current offset in IOVA.
298  *
299  * This structure acts as an iterator for packing all IOVA addresses within a
300  * contiguous range.
301  */
302 struct drm_pagemap_iova_state {
303 	struct dma_iova_state dma_state;
304 	unsigned long offset;
305 };
306 
307 /**
308  * drm_pagemap_migrate_map_system_pages() - Map system or device coherent
309  * migration pages for GPU SVM migration
310  * @dev: The device performing the migration.
311  * @pagemap_addr: Array to store DMA information corresponding to mapped pages.
312  * @migrate_pfn: Array of page frame numbers of system pages or peer pages to map.
313  * @npages: Number of system or device coherent pages to map.
314  * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL)
315  * @state: DMA IOVA state for mapping.
316  *
317  * This function maps pages of memory for migration usage in GPU SVM. It
318  * iterates over each page frame number provided in @migrate_pfn, maps the
319  * corresponding page, and stores the DMA address in the provided @dma_addr
320  * array.
321  *
322  * Returns: 0 on success, negative error code on failure.
323  */
324 static int
325 drm_pagemap_migrate_map_system_pages(struct device *dev,
326 				     struct drm_pagemap_addr *pagemap_addr,
327 				     unsigned long *migrate_pfn,
328 				     unsigned long npages,
329 				     enum dma_data_direction dir,
330 				     struct drm_pagemap_iova_state *state)
331 {
332 	unsigned long i;
333 	bool try_alloc = false;
334 
335 	for (i = 0; i < npages;) {
336 		struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
337 		dma_addr_t dma_addr;
338 		struct folio *folio;
339 		unsigned int order = 0;
340 
341 		if (!page)
342 			goto next;
343 
344 		WARN_ON_ONCE(is_device_private_page(page));
345 		folio = page_folio(page);
346 		order = folio_order(folio);
347 
348 		if (!try_alloc) {
349 			dma_iova_try_alloc(dev, &state->dma_state,
350 					   0, npages * PAGE_SIZE);
351 			try_alloc = true;
352 		}
353 
354 		if (dma_use_iova(&state->dma_state)) {
355 			int err = dma_iova_link(dev, &state->dma_state,
356 						page_to_phys(page),
357 						state->offset, page_size(page),
358 						dir, 0);
359 			if (err)
360 				return err;
361 
362 			dma_addr = state->dma_state.addr + state->offset;
363 			state->offset += page_size(page);
364 		} else {
365 			dma_addr = dma_map_page(dev, page, 0, page_size(page),
366 						dir);
367 			if (dma_mapping_error(dev, dma_addr))
368 				return -EFAULT;
369 		}
370 
371 		pagemap_addr[i] =
372 			drm_pagemap_addr_encode(dma_addr,
373 						DRM_INTERCONNECT_SYSTEM,
374 						order, dir);
375 
376 next:
377 		i += NR_PAGES(order);
378 	}
379 
380 	if (dma_use_iova(&state->dma_state))
381 		return dma_iova_sync(dev, &state->dma_state, 0, state->offset);
382 
383 	return 0;
384 }
385 
386 /**
387  * drm_pagemap_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration
388  * @dev: The device for which the pages were mapped
389  * @migrate_pfn: Array of migrate pfns set up for the mapped pages. Used to
390  * determine the drm_pagemap of a peer device private page.
391  * @pagemap_addr: Array of DMA information corresponding to mapped pages
392  * @npages: Number of pages to unmap
393  * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL)
394  * @state: DMA IOVA state for mapping.
395  *
396  * This function unmaps previously mapped pages of memory for GPU Shared Virtual
397  * Memory (SVM). It iterates over each DMA address provided in @pagemap_addr,
398  * checks if it's valid and not already unmapped, and unmaps the corresponding
399  * page.
400  */
401 static void drm_pagemap_migrate_unmap_pages(struct device *dev,
402 					    struct drm_pagemap_addr *pagemap_addr,
403 					    unsigned long *migrate_pfn,
404 					    unsigned long npages,
405 					    enum dma_data_direction dir,
406 					    struct drm_pagemap_iova_state *state)
407 {
408 	unsigned long i;
409 
410 	if (state && dma_use_iova(&state->dma_state)) {
411 		dma_iova_destroy(dev, &state->dma_state, state->offset, dir, 0);
412 		return;
413 	}
414 
415 	for (i = 0; i < npages;) {
416 		struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
417 
418 		if (!page || !pagemap_addr[i].addr || dma_mapping_error(dev, pagemap_addr[i].addr))
419 			goto next;
420 
421 		if (is_zone_device_page(page)) {
422 			struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(page);
423 			struct drm_pagemap *dpagemap = zdd->dpagemap;
424 
425 			dpagemap->ops->device_unmap(dpagemap, dev, &pagemap_addr[i]);
426 		} else {
427 			dma_unmap_page(dev, pagemap_addr[i].addr,
428 				       PAGE_SIZE << pagemap_addr[i].order, dir);
429 		}
430 
431 next:
432 		i += NR_PAGES(pagemap_addr[i].order);
433 	}
434 }
435 
436 static unsigned long
437 npages_in_range(unsigned long start, unsigned long end)
438 {
439 	return (end - start) >> PAGE_SHIFT;
440 }
441 
442 static int
443 drm_pagemap_migrate_remote_to_local(struct drm_pagemap_devmem *devmem,
444 				    struct device *remote_device,
445 				    struct drm_pagemap *remote_dpagemap,
446 				    unsigned long local_pfns[],
447 				    struct page *remote_pages[],
448 				    struct drm_pagemap_addr pagemap_addr[],
449 				    unsigned long npages,
450 				    const struct drm_pagemap_devmem_ops *ops,
451 				    const struct drm_pagemap_migrate_details *mdetails)
452 
453 {
454 	int err = drm_pagemap_migrate_map_device_private_pages(remote_device,
455 							       remote_dpagemap,
456 							       pagemap_addr,
457 							       local_pfns,
458 							       npages,
459 							       DMA_FROM_DEVICE,
460 							       mdetails);
461 
462 	if (err)
463 		goto out;
464 
465 	err = ops->copy_to_ram(remote_pages, pagemap_addr, npages,
466 			       devmem->pre_migrate_fence);
467 out:
468 	drm_pagemap_migrate_unmap_pages(remote_device, pagemap_addr, local_pfns,
469 					npages, DMA_FROM_DEVICE, NULL);
470 	return err;
471 }
472 
473 static int
474 drm_pagemap_migrate_sys_to_dev(struct drm_pagemap_devmem *devmem,
475 			       unsigned long sys_pfns[],
476 			       struct page *local_pages[],
477 			       struct drm_pagemap_addr pagemap_addr[],
478 			       unsigned long npages,
479 			       const struct drm_pagemap_devmem_ops *ops,
480 			       struct drm_pagemap_iova_state *state)
481 {
482 	int err = drm_pagemap_migrate_map_system_pages(devmem->dev,
483 						       pagemap_addr, sys_pfns,
484 						       npages, DMA_TO_DEVICE,
485 						       state);
486 
487 	if (err)
488 		goto out;
489 
490 	err = ops->copy_to_devmem(local_pages, pagemap_addr, npages,
491 				  devmem->pre_migrate_fence);
492 out:
493 	drm_pagemap_migrate_unmap_pages(devmem->dev, pagemap_addr, sys_pfns, npages,
494 					DMA_TO_DEVICE, state);
495 	return err;
496 }
497 
498 /**
499  * struct migrate_range_loc - Cursor into the loop over migrate_pfns for migrating to
500  * device.
501  * @start: The current loop index.
502  * @device: migrating device.
503  * @dpagemap: Pointer to struct drm_pagemap used by the migrating device.
504  * @ops: The copy ops to be used for the migrating device.
505  */
506 struct migrate_range_loc {
507 	unsigned long start;
508 	struct device *device;
509 	struct drm_pagemap *dpagemap;
510 	const struct drm_pagemap_devmem_ops *ops;
511 };
512 
513 static int drm_pagemap_migrate_range(struct drm_pagemap_devmem *devmem,
514 				     unsigned long src_pfns[],
515 				     unsigned long dst_pfns[],
516 				     struct page *pages[],
517 				     struct drm_pagemap_addr pagemap_addr[],
518 				     struct migrate_range_loc *last,
519 				     const struct migrate_range_loc *cur,
520 				     const struct drm_pagemap_migrate_details *mdetails)
521 {
522 	struct drm_pagemap_iova_state state = {};
523 	int ret = 0;
524 
525 	if (cur->start == 0)
526 		goto out;
527 
528 	if (cur->start <= last->start)
529 		return 0;
530 
531 	if (cur->dpagemap == last->dpagemap && cur->ops == last->ops)
532 		return 0;
533 
534 	if (last->dpagemap)
535 		ret = drm_pagemap_migrate_remote_to_local(devmem,
536 							  last->device,
537 							  last->dpagemap,
538 							  &dst_pfns[last->start],
539 							  &pages[last->start],
540 							  &pagemap_addr[last->start],
541 							  cur->start - last->start,
542 							  last->ops, mdetails);
543 
544 	else
545 		ret = drm_pagemap_migrate_sys_to_dev(devmem,
546 						     &src_pfns[last->start],
547 						     &pages[last->start],
548 						     &pagemap_addr[last->start],
549 						     cur->start - last->start,
550 						     last->ops, &state);
551 
552 out:
553 	*last = *cur;
554 	return ret;
555 }
556 
557 /**
558  * drm_pagemap_cpages() - Count collected pages
559  * @migrate_pfn: Array of migrate_pfn entries to account
560  * @npages: Number of entries in @migrate_pfn
561  *
562  * Compute the total number of minimum-sized pages represented by the
563  * collected entries in @migrate_pfn. The total is derived from the
564  * order encoded in each entry.
565  *
566  * Return: Total number of minimum-sized pages.
567  */
568 static int drm_pagemap_cpages(unsigned long *migrate_pfn, unsigned long npages)
569 {
570 	unsigned long i, cpages = 0;
571 
572 	for (i = 0; i < npages;) {
573 		struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
574 		struct folio *folio;
575 		unsigned int order = 0;
576 
577 		if (page) {
578 			folio = page_folio(page);
579 			order = folio_order(folio);
580 			cpages += NR_PAGES(order);
581 		} else if (migrate_pfn[i] & MIGRATE_PFN_COMPOUND) {
582 			order = HPAGE_PMD_ORDER;
583 			cpages += NR_PAGES(order);
584 		}
585 
586 		i += NR_PAGES(order);
587 	}
588 
589 	return cpages;
590 }
591 
592 /**
593  * drm_pagemap_migrate_to_devmem() - Migrate a struct mm_struct range to device memory
594  * @devmem_allocation: The device memory allocation to migrate to.
595  * The caller should hold a reference to the device memory allocation,
596  * and the reference is consumed by this function even if it returns with
597  * an error.
598  * @mm: Pointer to the struct mm_struct.
599  * @start: Start of the virtual address range to migrate.
600  * @end: End of the virtual address range to migrate.
601  * @mdetails: Details to govern the migration.
602  *
603  * This function migrates the specified virtual address range to device memory.
604  * It performs the necessary setup and invokes the driver-specific operations for
605  * migration to device memory. Expected to be called while holding the mmap lock in
606  * at least read mode.
607  *
608  * Note: The @timeslice_ms parameter can typically be used to force data to
609  * remain in pagemap pages long enough for a GPU to perform a task and to prevent
610  * a migration livelock. One alternative would be for the GPU driver to block
611  * in a mmu_notifier for the specified amount of time, but adding the
612  * functionality to the pagemap is likely nicer to the system as a whole.
613  *
614  * Return: %0 on success, negative error code on failure.
615  */
616 int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
617 				  struct mm_struct *mm,
618 				  unsigned long start, unsigned long end,
619 				  const struct drm_pagemap_migrate_details *mdetails)
620 {
621 	const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops;
622 	struct drm_pagemap *dpagemap = devmem_allocation->dpagemap;
623 	struct dev_pagemap *pagemap = dpagemap->pagemap;
624 	struct migrate_vma migrate = {
625 		.start		= start,
626 		.end		= end,
627 		.pgmap_owner	= pagemap->owner,
628 		.flags		= MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT |
629 		MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_SELECT_COMPOUND,
630 	};
631 	unsigned long i, npages = npages_in_range(start, end);
632 	unsigned long own_pages = 0, migrated_pages = 0;
633 	struct migrate_range_loc cur, last = {.device = dpagemap->drm->dev, .ops = ops};
634 	struct vm_area_struct *vas;
635 	struct drm_pagemap_zdd *zdd = NULL;
636 	struct page **pages;
637 	struct drm_pagemap_addr *pagemap_addr;
638 	void *buf;
639 	int err;
640 
641 	mmap_assert_locked(mm);
642 
643 	if (!ops->populate_devmem_pfn || !ops->copy_to_devmem ||
644 	    !ops->copy_to_ram)
645 		return -EOPNOTSUPP;
646 
647 	vas = vma_lookup(mm, start);
648 	if (!vas) {
649 		err = -ENOENT;
650 		goto err_out;
651 	}
652 
653 	if (end > vas->vm_end || start < vas->vm_start) {
654 		err = -EINVAL;
655 		goto err_out;
656 	}
657 
658 	if (!vma_is_anonymous(vas)) {
659 		err = -EBUSY;
660 		goto err_out;
661 	}
662 
663 	buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*pagemap_addr) +
664 		       sizeof(*pages), GFP_KERNEL);
665 	if (!buf) {
666 		err = -ENOMEM;
667 		goto err_out;
668 	}
669 	pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages);
670 	pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages;
671 
672 	zdd = drm_pagemap_zdd_alloc(dpagemap);
673 	if (!zdd) {
674 		err = -ENOMEM;
675 		kvfree(buf);
676 		goto err_out;
677 	}
678 	zdd->devmem_allocation = devmem_allocation;	/* Owns ref */
679 
680 	migrate.vma = vas;
681 	migrate.src = buf;
682 	migrate.dst = migrate.src + npages;
683 
684 	err = migrate_vma_setup(&migrate);
685 	if (err)
686 		goto err_free;
687 
688 	if (!migrate.cpages) {
689 		/* No pages to migrate. Raced or unknown device pages. */
690 		err = -EBUSY;
691 		goto err_free;
692 	}
693 
694 	if (migrate.cpages != npages &&
695 	    drm_pagemap_cpages(migrate.src, npages) != npages) {
696 		/*
697 		 * Some pages to migrate. But we want to migrate all or
698 		 * nothing. Raced or unknown device pages.
699 		 */
700 		err = -EBUSY;
701 		goto err_aborted_migration;
702 	}
703 
704 	/* Count device-private pages to migrate */
705 	for (i = 0; i < npages;) {
706 		struct page *src_page = migrate_pfn_to_page(migrate.src[i]);
707 		unsigned long nr_pages = src_page ? NR_PAGES(folio_order(page_folio(src_page))) : 1;
708 
709 		if (src_page && is_zone_device_page(src_page)) {
710 			if (page_pgmap(src_page) == pagemap)
711 				own_pages += nr_pages;
712 		}
713 
714 		i += nr_pages;
715 	}
716 
717 	drm_dbg(dpagemap->drm, "Total pages %lu; Own pages: %lu.\n",
718 		npages, own_pages);
719 	if (own_pages == npages) {
720 		err = 0;
721 		drm_dbg(dpagemap->drm, "Migration wasn't necessary.\n");
722 		goto err_aborted_migration;
723 	} else if (own_pages && !mdetails->can_migrate_same_pagemap) {
724 		err = -EBUSY;
725 		drm_dbg(dpagemap->drm, "Migration aborted due to fragmentation.\n");
726 		goto err_aborted_migration;
727 	}
728 
729 	err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst);
730 	if (err)
731 		goto err_aborted_migration;
732 
733 	own_pages = 0;
734 
735 	for (i = 0; i < npages;) {
736 		unsigned long j;
737 		struct page *page = pfn_to_page(migrate.dst[i]);
738 		struct page *src_page = migrate_pfn_to_page(migrate.src[i]);
739 		unsigned int order = 0;
740 
741 		cur.start = i;
742 		pages[i] = NULL;
743 		if (src_page && is_device_private_page(src_page)) {
744 			struct drm_pagemap_zdd *src_zdd =
745 				drm_pagemap_page_zone_device_data(src_page);
746 
747 			if (page_pgmap(src_page) == pagemap &&
748 			    !mdetails->can_migrate_same_pagemap) {
749 				migrate.dst[i] = 0;
750 				own_pages++;
751 				goto next;
752 			}
753 			cur.dpagemap = src_zdd->dpagemap;
754 			cur.ops = src_zdd->devmem_allocation->ops;
755 			cur.device = cur.dpagemap->drm->dev;
756 			pages[i] = src_page;
757 		}
758 		if (!pages[i]) {
759 			cur.dpagemap = NULL;
760 			cur.ops = ops;
761 			cur.device = dpagemap->drm->dev;
762 			pages[i] = page;
763 		}
764 		migrate.dst[i] = migrate_pfn(migrate.dst[i]);
765 
766 		if (migrate.src[i] & MIGRATE_PFN_COMPOUND) {
767 			drm_WARN_ONCE(dpagemap->drm, src_page &&
768 				      folio_order(page_folio(src_page)) != HPAGE_PMD_ORDER,
769 				      "Unexpected folio order\n");
770 
771 			order = HPAGE_PMD_ORDER;
772 			migrate.dst[i] |= MIGRATE_PFN_COMPOUND;
773 
774 			for (j = 1; j < NR_PAGES(order) && i + j < npages; j++)
775 				migrate.dst[i + j] = 0;
776 		}
777 
778 		drm_pagemap_get_devmem_page(page, order, zdd);
779 
780 		/* If we switched the migrating drm_pagemap, migrate previous pages now */
781 		err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst,
782 						pages, pagemap_addr, &last, &cur,
783 						mdetails);
784 		if (err) {
785 			npages = i + 1;
786 			goto err_finalize;
787 		}
788 
789 next:
790 		i += NR_PAGES(order);
791 	}
792 
793 	cur.start = npages;
794 	cur.ops = NULL; /* Force migration */
795 	err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst,
796 					pages, pagemap_addr, &last, &cur, mdetails);
797 	if (err)
798 		goto err_finalize;
799 
800 	drm_WARN_ON(dpagemap->drm, !!own_pages);
801 
802 	dma_fence_put(devmem_allocation->pre_migrate_fence);
803 	devmem_allocation->pre_migrate_fence = NULL;
804 
805 	/* Upon success bind devmem allocation to range and zdd */
806 	devmem_allocation->timeslice_expiration = get_jiffies_64() +
807 		msecs_to_jiffies(mdetails->timeslice_ms);
808 
809 err_finalize:
810 	if (err)
811 		drm_pagemap_migration_unlock_put_pages(npages, migrate.dst);
812 err_aborted_migration:
813 	migrate_vma_pages(&migrate);
814 
815 	for (i = 0; !err && i < npages;) {
816 		struct page *page = migrate_pfn_to_page(migrate.src[i]);
817 		unsigned long nr_pages = page ? NR_PAGES(folio_order(page_folio(page))) : 1;
818 
819 		if (migrate.src[i] & MIGRATE_PFN_MIGRATE)
820 			migrated_pages += nr_pages;
821 
822 		i += nr_pages;
823 	}
824 
825 	if (!err && migrated_pages < npages - own_pages) {
826 		drm_dbg(dpagemap->drm, "Raced while finalizing migration.\n");
827 		err = -EBUSY;
828 	}
829 
830 	migrate_vma_finalize(&migrate);
831 err_free:
832 	drm_pagemap_zdd_put(zdd);
833 	kvfree(buf);
834 	return err;
835 
836 err_out:
837 	devmem_allocation->ops->devmem_release(devmem_allocation);
838 	return err;
839 }
840 EXPORT_SYMBOL_GPL(drm_pagemap_migrate_to_devmem);
841 
842 /**
843  * drm_pagemap_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area
844  * @vas: Pointer to the VM area structure, can be NULL
845  * @fault_page: Fault page
846  * @npages: Number of pages to populate
847  * @mpages: Number of pages to migrate
848  * @src_mpfn: Source array of migrate PFNs
849  * @mpfn: Array of migrate PFNs to populate
850  * @addr: Start address for PFN allocation
851  *
852  * This function populates the RAM migrate page frame numbers (PFNs) for the
853  * specified VM area structure. It allocates and locks pages in the VM area for
854  * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use
855  * alloc_page for allocation.
856  *
857  * Return: 0 on success, negative error code on failure.
858  */
859 static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
860 						struct page *fault_page,
861 						unsigned long npages,
862 						unsigned long *mpages,
863 						unsigned long *src_mpfn,
864 						unsigned long *mpfn,
865 						unsigned long addr)
866 {
867 	unsigned long i;
868 
869 	for (i = 0; i < npages;) {
870 		struct page *page = NULL, *src_page;
871 		struct folio *folio;
872 		unsigned int order = 0;
873 
874 		if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE))
875 			goto next;
876 
877 		src_page = migrate_pfn_to_page(src_mpfn[i]);
878 		if (!src_page)
879 			goto next;
880 
881 		if (fault_page) {
882 			if (drm_pagemap_page_zone_device_data(src_page) !=
883 			    drm_pagemap_page_zone_device_data(fault_page))
884 				goto next;
885 		}
886 
887 		order = folio_order(page_folio(src_page));
888 
889 		/* TODO: Support fallback to single pages if THP allocation fails */
890 		if (vas)
891 			folio = vma_alloc_folio(GFP_HIGHUSER, order, vas, addr);
892 		else
893 			folio = folio_alloc(GFP_HIGHUSER, order);
894 
895 		if (!folio)
896 			goto free_pages;
897 
898 		page = folio_page(folio, 0);
899 		mpfn[i] = migrate_pfn(page_to_pfn(page));
900 
901 		if (order)
902 			mpfn[i] |= MIGRATE_PFN_COMPOUND;
903 next:
904 		if (page)
905 			addr += page_size(page);
906 		else
907 			addr += PAGE_SIZE;
908 
909 		i += NR_PAGES(order);
910 	}
911 
912 	for (i = 0; i < npages;) {
913 		struct page *page = migrate_pfn_to_page(mpfn[i]);
914 		unsigned int order = 0;
915 
916 		if (!page)
917 			goto next_lock;
918 
919 		WARN_ON_ONCE(!folio_trylock(page_folio(page)));
920 
921 		order = folio_order(page_folio(page));
922 		*mpages += NR_PAGES(order);
923 
924 next_lock:
925 		i += NR_PAGES(order);
926 	}
927 
928 	return 0;
929 
930 free_pages:
931 	for (i = 0; i < npages;) {
932 		struct page *page = migrate_pfn_to_page(mpfn[i]);
933 		unsigned int order = 0;
934 
935 		if (!page)
936 			goto next_put;
937 
938 		put_page(page);
939 		mpfn[i] = 0;
940 
941 		order = folio_order(page_folio(page));
942 
943 next_put:
944 		i += NR_PAGES(order);
945 	}
946 	return -ENOMEM;
947 }
948 
949 static void drm_pagemap_dev_unhold_work(struct work_struct *work);
950 static LLIST_HEAD(drm_pagemap_unhold_list);
951 static DECLARE_WORK(drm_pagemap_work, drm_pagemap_dev_unhold_work);
952 
953 /**
954  * struct drm_pagemap_dev_hold - Struct to aid in drm_device release.
955  * @link: Link into drm_pagemap_unhold_list for deferred reference releases.
956  * @drm: drm device to put.
957  *
958  * When a struct drm_pagemap is released, we also need to release the
959  * reference it holds on the drm device. However, typically that needs
960  * to be done separately from a system-wide workqueue.
961  * Each time a struct drm_pagemap is initialized
962  * (or re-initialized if cached) therefore allocate a separate
963  * drm_pagemap_dev_hold item, from which we put the drm device and
964  * associated module.
965  */
966 struct drm_pagemap_dev_hold {
967 	struct llist_node link;
968 	struct drm_device *drm;
969 };
970 
971 static void drm_pagemap_release(struct kref *ref)
972 {
973 	struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref);
974 	struct drm_pagemap_dev_hold *dev_hold = dpagemap->dev_hold;
975 
976 	/*
977 	 * We know the pagemap provider is alive at this point, since
978 	 * the struct drm_pagemap_dev_hold holds a reference to the
979 	 * pagemap provider drm_device and its module.
980 	 */
981 	dpagemap->dev_hold = NULL;
982 	drm_pagemap_shrinker_add(dpagemap);
983 	llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
984 	schedule_work(&drm_pagemap_work);
985 	/*
986 	 * Here, either the provider device is still alive, since if called from
987 	 * page_free(), the caller is holding a reference on the dev_pagemap,
988 	 * or if called from drm_pagemap_put(), the direct caller is still alive.
989 	 * This ensures we can't race with THIS module unload.
990 	 */
991 }
992 
993 static void drm_pagemap_dev_unhold_work(struct work_struct *work)
994 {
995 	struct llist_node *node = llist_del_all(&drm_pagemap_unhold_list);
996 	struct drm_pagemap_dev_hold *dev_hold, *next;
997 
998 	/*
999 	 * Deferred release of drm_pagemap provider device and module.
1000 	 * THIS module is kept alive during the release by the
1001 	 * flush_work() in the drm_pagemap_exit() function.
1002 	 */
1003 	llist_for_each_entry_safe(dev_hold, next, node, link) {
1004 		struct drm_device *drm = dev_hold->drm;
1005 		struct module *module = drm->driver->fops->owner;
1006 
1007 		drm_dbg(drm, "Releasing reference on provider device and module.\n");
1008 		drm_dev_put(drm);
1009 		module_put(module);
1010 		kfree(dev_hold);
1011 	}
1012 }
1013 
1014 static struct drm_pagemap_dev_hold *
1015 drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
1016 {
1017 	struct drm_pagemap_dev_hold *dev_hold;
1018 	struct drm_device *drm = dpagemap->drm;
1019 
1020 	dev_hold = kzalloc_obj(*dev_hold);
1021 	if (!dev_hold)
1022 		return ERR_PTR(-ENOMEM);
1023 
1024 	init_llist_node(&dev_hold->link);
1025 	dev_hold->drm = drm;
1026 	(void)try_module_get(drm->driver->fops->owner);
1027 	drm_dev_get(drm);
1028 
1029 	return dev_hold;
1030 }
1031 
1032 /**
1033  * drm_pagemap_reinit() - Reinitialize a drm_pagemap
1034  * @dpagemap: The drm_pagemap to reinitialize
1035  *
1036  * Reinitialize a drm_pagemap, for which drm_pagemap_release
1037  * has already been called. This interface is intended for the
1038  * situation where the driver caches a destroyed drm_pagemap.
1039  *
1040  * Return: 0 on success, negative error code on failure.
1041  */
1042 int drm_pagemap_reinit(struct drm_pagemap *dpagemap)
1043 {
1044 	dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap);
1045 	if (IS_ERR(dpagemap->dev_hold))
1046 		return PTR_ERR(dpagemap->dev_hold);
1047 
1048 	kref_init(&dpagemap->ref);
1049 	return 0;
1050 }
1051 EXPORT_SYMBOL(drm_pagemap_reinit);
1052 
1053 /**
1054  * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap
1055  * @dpagemap: The drm_pagemap to initialize.
1056  * @pagemap: The associated dev_pagemap providing the device
1057  * private pages.
1058  * @drm: The drm device. The drm_pagemap holds a reference on the
1059  * drm_device and the module owning the drm_device until
1060  * drm_pagemap_release(). This facilitates drm_pagemap exporting.
1061  * @ops: The drm_pagemap ops.
1062  *
1063  * Initialize and take an initial reference on a drm_pagemap.
1064  * After successful return, use drm_pagemap_put() to destroy.
1065  *
1066  ** Return: 0 on success, negative error code on error.
1067  */
1068 int drm_pagemap_init(struct drm_pagemap *dpagemap,
1069 		     struct dev_pagemap *pagemap,
1070 		     struct drm_device *drm,
1071 		     const struct drm_pagemap_ops *ops)
1072 {
1073 	kref_init(&dpagemap->ref);
1074 	dpagemap->ops = ops;
1075 	dpagemap->pagemap = pagemap;
1076 	dpagemap->drm = drm;
1077 	dpagemap->cache = NULL;
1078 	INIT_LIST_HEAD(&dpagemap->shrink_link);
1079 
1080 	return drm_pagemap_reinit(dpagemap);
1081 }
1082 EXPORT_SYMBOL(drm_pagemap_init);
1083 
1084 /**
1085  * drm_pagemap_put() - Put a struct drm_pagemap reference
1086  * @dpagemap: Pointer to a struct drm_pagemap object.
1087  *
1088  * Puts a struct drm_pagemap reference and frees the drm_pagemap object
1089  * if the refount reaches zero.
1090  */
1091 void drm_pagemap_put(struct drm_pagemap *dpagemap)
1092 {
1093 	if (likely(dpagemap)) {
1094 		drm_pagemap_shrinker_might_lock(dpagemap);
1095 		kref_put(&dpagemap->ref, drm_pagemap_release);
1096 	}
1097 }
1098 EXPORT_SYMBOL(drm_pagemap_put);
1099 
1100 /**
1101  * drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM
1102  * @devmem_allocation: Pointer to the device memory allocation
1103  *
1104  * Similar to __drm_pagemap_migrate_to_ram but does not require mmap lock and
1105  * migration done via migrate_device_* functions.
1106  *
1107  * Return: 0 on success, negative error code on failure.
1108  */
1109 int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation)
1110 {
1111 	const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops;
1112 	struct drm_pagemap_iova_state state = {};
1113 	unsigned long npages, mpages = 0;
1114 	struct page **pages;
1115 	unsigned long *src, *dst;
1116 	struct drm_pagemap_addr *pagemap_addr;
1117 	void *buf;
1118 	int i, err = 0;
1119 	unsigned int retry_count = 2;
1120 
1121 	npages = devmem_allocation->size >> PAGE_SHIFT;
1122 
1123 retry:
1124 	if (!mmget_not_zero(devmem_allocation->mm))
1125 		return -EFAULT;
1126 
1127 	buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*pagemap_addr) +
1128 		       sizeof(*pages), GFP_KERNEL);
1129 	if (!buf) {
1130 		err = -ENOMEM;
1131 		goto err_out;
1132 	}
1133 	src = buf;
1134 	dst = buf + (sizeof(*src) * npages);
1135 	pagemap_addr = buf + (2 * sizeof(*src) * npages);
1136 	pages = buf + (2 * sizeof(*src) + sizeof(*pagemap_addr)) * npages;
1137 
1138 	err = ops->populate_devmem_pfn(devmem_allocation, npages, src);
1139 	if (err)
1140 		goto err_free;
1141 
1142 	err = migrate_device_pfns(src, npages);
1143 	if (err)
1144 		goto err_free;
1145 
1146 	err = drm_pagemap_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages,
1147 						   src, dst, 0);
1148 	if (err || !mpages)
1149 		goto err_finalize;
1150 
1151 	err = drm_pagemap_migrate_map_system_pages(devmem_allocation->dev,
1152 						   pagemap_addr,
1153 						   dst, npages,
1154 						   DMA_FROM_DEVICE, &state);
1155 	if (err)
1156 		goto err_finalize;
1157 
1158 	for (i = 0; i < npages;) {
1159 		unsigned int order = 0;
1160 
1161 		pages[i] = migrate_pfn_to_page(src[i]);
1162 		if (pages[i])
1163 			order = folio_order(page_folio(pages[i]));
1164 
1165 		i += NR_PAGES(order);
1166 	}
1167 
1168 	err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
1169 	if (err)
1170 		goto err_finalize;
1171 
1172 err_finalize:
1173 	if (err)
1174 		drm_pagemap_migration_unlock_put_pages(npages, dst);
1175 	migrate_device_pages(src, dst, npages);
1176 	migrate_device_finalize(src, dst, npages);
1177 	drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, dst, npages,
1178 					DMA_FROM_DEVICE, &state);
1179 
1180 err_free:
1181 	kvfree(buf);
1182 err_out:
1183 	mmput_async(devmem_allocation->mm);
1184 
1185 	if (completion_done(&devmem_allocation->detached))
1186 		return 0;
1187 
1188 	if (retry_count--) {
1189 		cond_resched();
1190 		state = (struct drm_pagemap_iova_state){};
1191 		goto retry;
1192 	}
1193 
1194 	return err ?: -EBUSY;
1195 }
1196 EXPORT_SYMBOL_GPL(drm_pagemap_evict_to_ram);
1197 
1198 /**
1199  * __drm_pagemap_migrate_to_ram() - Migrate GPU SVM range to RAM (internal)
1200  * @vas: Pointer to the VM area structure
1201  * @page: Pointer to the page for fault handling.
1202  * @fault_addr: Fault address
1203  * @size: Size of migration
1204  *
1205  * This internal function performs the migration of the specified GPU SVM range
1206  * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and
1207  * invokes the driver-specific operations for migration to RAM.
1208  *
1209  * Return: 0 on success, negative error code on failure.
1210  */
1211 static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas,
1212 					struct page *page,
1213 					unsigned long fault_addr,
1214 					unsigned long size)
1215 {
1216 	struct migrate_vma migrate = {
1217 		.vma		= vas,
1218 		.pgmap_owner	= page_pgmap(page)->owner,
1219 		.flags		= MIGRATE_VMA_SELECT_DEVICE_PRIVATE |
1220 				  MIGRATE_VMA_SELECT_DEVICE_COHERENT |
1221 				  MIGRATE_VMA_SELECT_COMPOUND,
1222 		.fault_page	= page,
1223 	};
1224 	struct drm_pagemap_iova_state state = {};
1225 	struct drm_pagemap_zdd *zdd;
1226 	const struct drm_pagemap_devmem_ops *ops;
1227 	struct device *dev = NULL;
1228 	unsigned long npages, mpages = 0;
1229 	struct page **pages;
1230 	struct drm_pagemap_addr *pagemap_addr;
1231 	unsigned long start, end;
1232 	void *buf;
1233 	int i, err = 0;
1234 
1235 	zdd = drm_pagemap_page_zone_device_data(page);
1236 	if (time_before64(get_jiffies_64(), zdd->devmem_allocation->timeslice_expiration))
1237 		return 0;
1238 
1239 	start = ALIGN_DOWN(fault_addr, size);
1240 	end = ALIGN(fault_addr + 1, size);
1241 
1242 	/* Corner where VMA area struct has been partially unmapped */
1243 	if (start < vas->vm_start)
1244 		start = vas->vm_start;
1245 	if (end > vas->vm_end)
1246 		end = vas->vm_end;
1247 
1248 	migrate.start = start;
1249 	migrate.end = end;
1250 	npages = npages_in_range(start, end);
1251 
1252 	buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*pagemap_addr) +
1253 		       sizeof(*pages), GFP_KERNEL);
1254 	if (!buf) {
1255 		err = -ENOMEM;
1256 		goto err_out;
1257 	}
1258 	pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages);
1259 	pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages;
1260 
1261 	migrate.vma = vas;
1262 	migrate.src = buf;
1263 	migrate.dst = migrate.src + npages;
1264 
1265 	err = migrate_vma_setup(&migrate);
1266 	if (err)
1267 		goto err_free;
1268 
1269 	/* Raced with another CPU fault, nothing to do */
1270 	if (!migrate.cpages)
1271 		goto err_free;
1272 
1273 	ops = zdd->devmem_allocation->ops;
1274 	dev = zdd->devmem_allocation->dev;
1275 
1276 	err = drm_pagemap_migrate_populate_ram_pfn(vas, page, npages, &mpages,
1277 						   migrate.src, migrate.dst,
1278 						   start);
1279 	if (err)
1280 		goto err_finalize;
1281 
1282 	err = drm_pagemap_migrate_map_system_pages(dev, pagemap_addr,
1283 						   migrate.dst, npages,
1284 						   DMA_FROM_DEVICE, &state);
1285 	if (err)
1286 		goto err_finalize;
1287 
1288 	for (i = 0; i < npages;) {
1289 		unsigned int order = 0;
1290 
1291 		pages[i] = migrate_pfn_to_page(migrate.src[i]);
1292 		if (pages[i])
1293 			order = folio_order(page_folio(pages[i]));
1294 
1295 		i += NR_PAGES(order);
1296 	}
1297 
1298 	err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
1299 	if (err)
1300 		goto err_finalize;
1301 
1302 err_finalize:
1303 	if (err)
1304 		drm_pagemap_migration_unlock_put_pages(npages, migrate.dst);
1305 	migrate_vma_pages(&migrate);
1306 	migrate_vma_finalize(&migrate);
1307 	if (dev)
1308 		drm_pagemap_migrate_unmap_pages(dev, pagemap_addr, migrate.dst,
1309 						npages, DMA_FROM_DEVICE,
1310 						&state);
1311 err_free:
1312 	kvfree(buf);
1313 err_out:
1314 
1315 	return err;
1316 }
1317 
1318 /**
1319  * drm_pagemap_folio_free() - Put GPU SVM zone device data associated with a folio
1320  * @folio: Pointer to the folio
1321  *
1322  * This function is a callback used to put the GPU SVM zone device data
1323  * associated with a page when it is being released.
1324  */
1325 static void drm_pagemap_folio_free(struct folio *folio)
1326 {
1327 	struct page *page = folio_page(folio, 0);
1328 
1329 	drm_pagemap_zdd_put(drm_pagemap_page_zone_device_data(page));
1330 }
1331 
1332 /**
1333  * drm_pagemap_migrate_to_ram() - Migrate a virtual range to RAM (page fault handler)
1334  * @vmf: Pointer to the fault information structure
1335  *
1336  * This function is a page fault handler used to migrate a virtual range
1337  * to ram. The device memory allocation in which the device page is found is
1338  * migrated in its entirety.
1339  *
1340  * Returns:
1341  * VM_FAULT_SIGBUS on failure, 0 on success.
1342  */
1343 static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf)
1344 {
1345 	struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(vmf->page);
1346 	int err;
1347 
1348 	err = __drm_pagemap_migrate_to_ram(vmf->vma,
1349 					   vmf->page, vmf->address,
1350 					   zdd->devmem_allocation->size);
1351 
1352 	return err ? VM_FAULT_SIGBUS : 0;
1353 }
1354 
1355 static void drm_pagemap_folio_split(struct folio *orig_folio, struct folio *new_folio)
1356 {
1357 	struct drm_pagemap_zdd *zdd;
1358 
1359 	if (!new_folio)
1360 		return;
1361 
1362 	new_folio->pgmap = orig_folio->pgmap;
1363 	zdd = folio_zone_device_data(orig_folio);
1364 	folio_set_zone_device_data(new_folio, drm_pagemap_zdd_get(zdd));
1365 }
1366 
1367 static const struct dev_pagemap_ops drm_pagemap_pagemap_ops = {
1368 	.folio_free = drm_pagemap_folio_free,
1369 	.migrate_to_ram = drm_pagemap_migrate_to_ram,
1370 	.folio_split = drm_pagemap_folio_split,
1371 };
1372 
1373 /**
1374  * drm_pagemap_pagemap_ops_get() - Retrieve GPU SVM device page map operations
1375  *
1376  * Returns:
1377  * Pointer to the GPU SVM device page map operations structure.
1378  */
1379 const struct dev_pagemap_ops *drm_pagemap_pagemap_ops_get(void)
1380 {
1381 	return &drm_pagemap_pagemap_ops;
1382 }
1383 EXPORT_SYMBOL_GPL(drm_pagemap_pagemap_ops_get);
1384 
1385 /**
1386  * drm_pagemap_devmem_init() - Initialize a drm_pagemap device memory allocation
1387  *
1388  * @devmem_allocation: The struct drm_pagemap_devmem to initialize.
1389  * @dev: Pointer to the device structure which device memory allocation belongs to
1390  * @mm: Pointer to the mm_struct for the address space
1391  * @ops: Pointer to the operations structure for GPU SVM device memory
1392  * @dpagemap: The struct drm_pagemap we're allocating from.
1393  * @size: Size of device memory allocation
1394  * @pre_migrate_fence: Fence to wait for or pipeline behind before migration starts.
1395  * (May be NULL).
1396  */
1397 void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation,
1398 			     struct device *dev, struct mm_struct *mm,
1399 			     const struct drm_pagemap_devmem_ops *ops,
1400 			     struct drm_pagemap *dpagemap, size_t size,
1401 			     struct dma_fence *pre_migrate_fence)
1402 {
1403 	init_completion(&devmem_allocation->detached);
1404 	devmem_allocation->dev = dev;
1405 	devmem_allocation->mm = mm;
1406 	devmem_allocation->ops = ops;
1407 	devmem_allocation->dpagemap = dpagemap;
1408 	devmem_allocation->size = size;
1409 	devmem_allocation->pre_migrate_fence = pre_migrate_fence;
1410 }
1411 EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init);
1412 
1413 /**
1414  * drm_pagemap_page_to_dpagemap() - Return a pointer the drm_pagemap of a page
1415  * @page: The struct page.
1416  *
1417  * Return: A pointer to the struct drm_pagemap of a device private page that
1418  * was populated from the struct drm_pagemap. If the page was *not* populated
1419  * from a struct drm_pagemap, the result is undefined and the function call
1420  * may result in dereferencing and invalid address.
1421  */
1422 struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page)
1423 {
1424 	struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(page);
1425 
1426 	return zdd->devmem_allocation->dpagemap;
1427 }
1428 EXPORT_SYMBOL_GPL(drm_pagemap_page_to_dpagemap);
1429 
1430 /**
1431  * drm_pagemap_populate_mm() - Populate a virtual range with device memory pages
1432  * @dpagemap: Pointer to the drm_pagemap managing the device memory
1433  * @start: Start of the virtual range to populate.
1434  * @end: End of the virtual range to populate.
1435  * @mm: Pointer to the virtual address space.
1436  * @timeslice_ms: The time requested for the migrated pagemap pages to
1437  * be present in @mm before being allowed to be migrated back.
1438  *
1439  * Attempt to populate a virtual range with device memory pages,
1440  * clearing them or migrating data from the existing pages if necessary.
1441  * The function is best effort only, and implementations may vary
1442  * in how hard they try to satisfy the request.
1443  *
1444  * Return: %0 on success, negative error code on error. If the hardware
1445  * device was removed / unbound the function will return %-ENODEV.
1446  */
1447 int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
1448 			    unsigned long start, unsigned long end,
1449 			    struct mm_struct *mm,
1450 			    unsigned long timeslice_ms)
1451 {
1452 	int err;
1453 
1454 	if (!mmget_not_zero(mm))
1455 		return -EFAULT;
1456 	mmap_read_lock(mm);
1457 	err = dpagemap->ops->populate_mm(dpagemap, start, end, mm,
1458 					 timeslice_ms);
1459 	mmap_read_unlock(mm);
1460 	mmput(mm);
1461 
1462 	return err;
1463 }
1464 EXPORT_SYMBOL(drm_pagemap_populate_mm);
1465 
1466 void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim)
1467 {
1468 	if (dpagemap->ops->destroy)
1469 		dpagemap->ops->destroy(dpagemap, is_atomic_or_reclaim);
1470 	else
1471 		kfree(dpagemap);
1472 }
1473 
1474 static void drm_pagemap_exit(void)
1475 {
1476 	flush_work(&drm_pagemap_work);
1477 	if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list)))
1478 		disable_work_sync(&drm_pagemap_work);
1479 }
1480 module_exit(drm_pagemap_exit);
1481