xref: /linux/drivers/gpu/drm/drm_pagemap.c (revision c3fb1fb9e65fa6a108b4d19c61bdcb47fd4fe180)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /*
3  * Copyright © 2024-2025 Intel Corporation
4  */
5 
6 #include <linux/dma-fence.h>
7 #include <linux/dma-mapping.h>
8 #include <linux/migrate.h>
9 #include <linux/pagemap.h>
10 #include <drm/drm_drv.h>
11 #include <drm/drm_pagemap.h>
12 #include <drm/drm_pagemap_util.h>
13 #include <drm/drm_print.h>
14 
15 /**
16  * DOC: Overview
17  *
18  * The DRM pagemap layer is intended to augment the dev_pagemap functionality by
19  * providing a way to populate a struct mm_struct virtual range with device
20  * private pages and to provide helpers to abstract device memory allocations,
21  * to migrate memory back and forth between device memory and system RAM and
22  * to handle access (and in the future migration) between devices implementing
23  * a fast interconnect that is not necessarily visible to the rest of the
24  * system.
25  *
26  * Typically the DRM pagemap receives requests from one or more DRM GPU SVM
27  * instances to populate struct mm_struct virtual ranges with memory, and the
28  * migration is best effort only and may thus fail. The implementation should
29  * also handle device unbinding by blocking (return an -ENODEV) error for new
30  * population requests and after that migrate all device pages to system ram.
31  */
32 
33 /**
34  * DOC: Migration
35  *
36  * Migration granularity typically follows the GPU SVM range requests, but
37  * if there are clashes, due to races or due to the fact that multiple GPU
38  * SVM instances have different views of the ranges used, and because of that
39  * parts of a requested range is already present in the requested device memory,
40  * the implementation has a variety of options. It can fail and it can choose
41  * to populate only the part of the range that isn't already in device memory,
42  * and it can evict the range to system before trying to migrate. Ideally an
43  * implementation would just try to migrate the missing part of the range and
44  * allocate just enough memory to do so.
45  *
46  * When migrating to system memory as a response to a cpu fault or a device
47  * memory eviction request, currently a full device memory allocation is
48  * migrated back to system. Moving forward this might need improvement for
49  * situations where a single page needs bouncing between system memory and
50  * device memory due to, for example, atomic operations.
51  *
52  * Key DRM pagemap components:
53  *
54  * - Device Memory Allocations:
55  *      Embedded structure containing enough information for the drm_pagemap to
56  *      migrate to / from device memory.
57  *
58  * - Device Memory Operations:
59  *      Define the interface for driver-specific device memory operations
60  *      release memory, populate pfns, and copy to / from device memory.
61  */
62 
63 /**
64  * struct drm_pagemap_zdd - GPU SVM zone device data
65  *
66  * @refcount: Reference count for the zdd
67  * @devmem_allocation: device memory allocation
68  * @dpagemap: Refcounted pointer to the underlying struct drm_pagemap.
69  *
70  * This structure serves as a generic wrapper installed in
71  * page->zone_device_data. It provides infrastructure for looking up a device
72  * memory allocation upon CPU page fault and asynchronously releasing device
73  * memory once the CPU has no page references. Asynchronous release is useful
74  * because CPU page references can be dropped in IRQ contexts, while releasing
75  * device memory likely requires sleeping locks.
76  */
77 struct drm_pagemap_zdd {
78 	struct kref refcount;
79 	struct drm_pagemap_devmem *devmem_allocation;
80 	struct drm_pagemap *dpagemap;
81 };
82 
83 /**
84  * drm_pagemap_zdd_alloc() - Allocate a zdd structure.
85  * @dpagemap: Pointer to the underlying struct drm_pagemap.
86  *
87  * This function allocates and initializes a new zdd structure. It sets up the
88  * reference count and initializes the destroy work.
89  *
90  * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure.
91  */
92 static struct drm_pagemap_zdd *
93 drm_pagemap_zdd_alloc(struct drm_pagemap *dpagemap)
94 {
95 	struct drm_pagemap_zdd *zdd;
96 
97 	zdd = kmalloc_obj(*zdd);
98 	if (!zdd)
99 		return NULL;
100 
101 	kref_init(&zdd->refcount);
102 	zdd->devmem_allocation = NULL;
103 	zdd->dpagemap = drm_pagemap_get(dpagemap);
104 
105 	return zdd;
106 }
107 
108 /**
109  * drm_pagemap_zdd_get() - Get a reference to a zdd structure.
110  * @zdd: Pointer to the zdd structure.
111  *
112  * This function increments the reference count of the provided zdd structure.
113  *
114  * Return: Pointer to the zdd structure.
115  */
116 static struct drm_pagemap_zdd *drm_pagemap_zdd_get(struct drm_pagemap_zdd *zdd)
117 {
118 	kref_get(&zdd->refcount);
119 	return zdd;
120 }
121 
122 /**
123  * drm_pagemap_zdd_destroy() - Destroy a zdd structure.
124  * @ref: Pointer to the reference count structure.
125  *
126  * This function queues the destroy_work of the zdd for asynchronous destruction.
127  */
128 static void drm_pagemap_zdd_destroy(struct kref *ref)
129 {
130 	struct drm_pagemap_zdd *zdd =
131 		container_of(ref, struct drm_pagemap_zdd, refcount);
132 	struct drm_pagemap_devmem *devmem = zdd->devmem_allocation;
133 	struct drm_pagemap *dpagemap = zdd->dpagemap;
134 
135 	if (devmem) {
136 		complete_all(&devmem->detached);
137 		if (devmem->ops->devmem_release)
138 			devmem->ops->devmem_release(devmem);
139 	}
140 	kfree(zdd);
141 	drm_pagemap_put(dpagemap);
142 }
143 
144 /**
145  * drm_pagemap_zdd_put() - Put a zdd reference.
146  * @zdd: Pointer to the zdd structure.
147  *
148  * This function decrements the reference count of the provided zdd structure
149  * and schedules its destruction if the count drops to zero.
150  */
151 static void drm_pagemap_zdd_put(struct drm_pagemap_zdd *zdd)
152 {
153 	kref_put(&zdd->refcount, drm_pagemap_zdd_destroy);
154 }
155 
156 /**
157  * drm_pagemap_migration_unlock_put_folio() - Put a migration folio
158  * @folio: Pointer to the folio to put
159  *
160  * This function unlocks and puts a folio.
161  */
162 static void drm_pagemap_migration_unlock_put_folio(struct folio *folio)
163 {
164 	folio_unlock(folio);
165 	folio_put(folio);
166 }
167 
168 /**
169  * drm_pagemap_migration_unlock_put_pages() - Put migration pages
170  * @npages: Number of pages
171  * @migrate_pfn: Array of migrate page frame numbers
172  *
173  * This function unlocks and puts an array of pages.
174  */
175 static void drm_pagemap_migration_unlock_put_pages(unsigned long npages,
176 						   unsigned long *migrate_pfn)
177 {
178 	unsigned long i;
179 
180 	for (i = 0; i < npages;) {
181 		struct page *page;
182 		struct folio *folio;
183 		unsigned int order = 0;
184 
185 		if (!migrate_pfn[i])
186 			goto next;
187 
188 		page = migrate_pfn_to_page(migrate_pfn[i]);
189 		folio = page_folio(page);
190 		order = folio_order(folio);
191 
192 		drm_pagemap_migration_unlock_put_folio(folio);
193 		migrate_pfn[i] = 0;
194 
195 next:
196 		i += NR_PAGES(order);
197 	}
198 }
199 
200 /**
201  * drm_pagemap_get_devmem_page() - Get a reference to a device memory page
202  * @page: Pointer to the page
203  * @order: Order
204  * @zdd: Pointer to the GPU SVM zone device data
205  *
206  * This function associates the given page with the specified GPU SVM zone
207  * device data and initializes it for zone device usage.
208  */
209 static void drm_pagemap_get_devmem_page(struct page *page,
210 					unsigned int order,
211 					struct drm_pagemap_zdd *zdd)
212 {
213 	zone_device_folio_init((struct folio *)page, zdd->dpagemap->pagemap,
214 			       order);
215 	folio_set_zone_device_data(page_folio(page), drm_pagemap_zdd_get(zdd));
216 }
217 
218 /**
219  * drm_pagemap_migrate_map_device_private_pages() - Map device private migration
220  * pages for GPU SVM migration
221  * @dev: The device performing the migration.
222  * @local_dpagemap: The drm_pagemap local to the migrating device.
223  * @pagemap_addr: Array to store DMA information corresponding to mapped pages.
224  * @migrate_pfn: Array of page frame numbers of system pages or peer pages to map.
225  * @npages: Number of system pages or peer pages to map.
226  * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL)
227  * @mdetails: Details governing the migration behaviour.
228  *
229  * This function maps pages of memory for migration usage in GPU SVM. It
230  * iterates over each page frame number provided in @migrate_pfn, maps the
231  * corresponding page, and stores the DMA address in the provided @dma_addr
232  * array.
233  *
234  * Returns: 0 on success, -EFAULT if an error occurs during mapping.
235  */
236 static int
237 drm_pagemap_migrate_map_device_private_pages(struct device *dev,
238 					     struct drm_pagemap *local_dpagemap,
239 					     struct drm_pagemap_addr *pagemap_addr,
240 					     unsigned long *migrate_pfn,
241 					     unsigned long npages,
242 					     enum dma_data_direction dir,
243 					     const struct drm_pagemap_migrate_details *mdetails)
244 {
245 	unsigned long num_peer_pages = 0, num_local_pages = 0, i;
246 
247 	for (i = 0; i < npages;) {
248 		struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
249 		struct drm_pagemap_zdd *zdd;
250 		struct drm_pagemap *dpagemap;
251 		struct drm_pagemap_addr addr;
252 		struct folio *folio;
253 		unsigned int order = 0;
254 
255 		if (!page)
256 			goto next;
257 
258 		WARN_ON_ONCE(!is_device_private_page(page));
259 		folio = page_folio(page);
260 		order = folio_order(folio);
261 
262 		zdd = drm_pagemap_page_zone_device_data(page);
263 		dpagemap = zdd->dpagemap;
264 
265 		if (dpagemap == local_dpagemap) {
266 			if (!mdetails->can_migrate_same_pagemap)
267 				goto next;
268 
269 			num_local_pages += NR_PAGES(order);
270 		} else {
271 			num_peer_pages += NR_PAGES(order);
272 		}
273 
274 		addr = dpagemap->ops->device_map(dpagemap, dev, page, order, dir);
275 		if (dma_mapping_error(dev, addr.addr))
276 			return -EFAULT;
277 
278 		pagemap_addr[i] = addr;
279 
280 next:
281 		i += NR_PAGES(order);
282 	}
283 
284 	if (num_peer_pages)
285 		drm_dbg(local_dpagemap->drm, "Migrating %lu peer pages over interconnect.\n",
286 			num_peer_pages);
287 	if (num_local_pages)
288 		drm_dbg(local_dpagemap->drm, "Migrating %lu local pages over interconnect.\n",
289 			num_local_pages);
290 
291 	return 0;
292 }
293 
294 /**
295  * struct drm_pagemap_iova_state - DRM pagemap IOVA state
296  * @dma_state: DMA IOVA state.
297  * @offset: Current offset in IOVA.
298  *
299  * This structure acts as an iterator for packing all IOVA addresses within a
300  * contiguous range.
301  */
302 struct drm_pagemap_iova_state {
303 	struct dma_iova_state dma_state;
304 	unsigned long offset;
305 };
306 
307 /**
308  * drm_pagemap_migrate_map_system_pages() - Map system or device coherent
309  * migration pages for GPU SVM migration
310  * @dev: The device performing the migration.
311  * @pagemap_addr: Array to store DMA information corresponding to mapped pages.
312  * @migrate_pfn: Array of page frame numbers of system pages or peer pages to map.
313  * @npages: Number of system or device coherent pages to map.
314  * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL)
315  * @state: DMA IOVA state for mapping.
316  *
317  * This function maps pages of memory for migration usage in GPU SVM. It
318  * iterates over each page frame number provided in @migrate_pfn, maps the
319  * corresponding page, and stores the DMA address in the provided @dma_addr
320  * array.
321  *
322  * Returns: 0 on success, negative error code on failure.
323  */
324 static int
325 drm_pagemap_migrate_map_system_pages(struct device *dev,
326 				     struct drm_pagemap_addr *pagemap_addr,
327 				     unsigned long *migrate_pfn,
328 				     unsigned long npages,
329 				     enum dma_data_direction dir,
330 				     struct drm_pagemap_iova_state *state)
331 {
332 	unsigned long i;
333 	bool try_alloc = false;
334 
335 	for (i = 0; i < npages;) {
336 		struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
337 		dma_addr_t dma_addr;
338 		struct folio *folio;
339 		unsigned int order = 0;
340 
341 		if (!page)
342 			goto next;
343 
344 		WARN_ON_ONCE(is_device_private_page(page));
345 		folio = page_folio(page);
346 		order = folio_order(folio);
347 
348 		if (!try_alloc) {
349 			dma_iova_try_alloc(dev, &state->dma_state,
350 					   (npages - i) * PAGE_SIZE >=
351 					   HPAGE_PMD_SIZE ?
352 					   HPAGE_PMD_SIZE : 0,
353 					   npages * PAGE_SIZE);
354 			try_alloc = true;
355 		}
356 
357 		if (dma_use_iova(&state->dma_state)) {
358 			int err = dma_iova_link(dev, &state->dma_state,
359 						page_to_phys(page),
360 						state->offset, page_size(page),
361 						dir, 0);
362 			if (err)
363 				return err;
364 
365 			dma_addr = state->dma_state.addr + state->offset;
366 			state->offset += page_size(page);
367 		} else {
368 			dma_addr = dma_map_page(dev, page, 0, page_size(page),
369 						dir);
370 			if (dma_mapping_error(dev, dma_addr))
371 				return -EFAULT;
372 		}
373 
374 		pagemap_addr[i] =
375 			drm_pagemap_addr_encode(dma_addr,
376 						DRM_INTERCONNECT_SYSTEM,
377 						order, dir);
378 
379 next:
380 		i += NR_PAGES(order);
381 	}
382 
383 	if (dma_use_iova(&state->dma_state))
384 		return dma_iova_sync(dev, &state->dma_state, 0, state->offset);
385 
386 	return 0;
387 }
388 
389 /**
390  * drm_pagemap_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration
391  * @dev: The device for which the pages were mapped
392  * @migrate_pfn: Array of migrate pfns set up for the mapped pages. Used to
393  * determine the drm_pagemap of a peer device private page.
394  * @pagemap_addr: Array of DMA information corresponding to mapped pages
395  * @npages: Number of pages to unmap
396  * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL)
397  * @state: DMA IOVA state for mapping.
398  *
399  * This function unmaps previously mapped pages of memory for GPU Shared Virtual
400  * Memory (SVM). It iterates over each DMA address provided in @pagemap_addr,
401  * checks if it's valid and not already unmapped, and unmaps the corresponding
402  * page.
403  */
404 static void drm_pagemap_migrate_unmap_pages(struct device *dev,
405 					    struct drm_pagemap_addr *pagemap_addr,
406 					    unsigned long *migrate_pfn,
407 					    unsigned long npages,
408 					    enum dma_data_direction dir,
409 					    struct drm_pagemap_iova_state *state)
410 {
411 	unsigned long i;
412 
413 	if (state && dma_use_iova(&state->dma_state)) {
414 		dma_iova_destroy(dev, &state->dma_state, state->offset, dir, 0);
415 		return;
416 	}
417 
418 	for (i = 0; i < npages;) {
419 		struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
420 
421 		if (!page || !pagemap_addr[i].addr || dma_mapping_error(dev, pagemap_addr[i].addr))
422 			goto next;
423 
424 		if (is_zone_device_page(page)) {
425 			struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(page);
426 			struct drm_pagemap *dpagemap = zdd->dpagemap;
427 
428 			dpagemap->ops->device_unmap(dpagemap, dev, &pagemap_addr[i]);
429 		} else {
430 			dma_unmap_page(dev, pagemap_addr[i].addr,
431 				       PAGE_SIZE << pagemap_addr[i].order, dir);
432 		}
433 
434 next:
435 		i += NR_PAGES(pagemap_addr[i].order);
436 	}
437 }
438 
439 static unsigned long
440 npages_in_range(unsigned long start, unsigned long end)
441 {
442 	return (end - start) >> PAGE_SHIFT;
443 }
444 
445 static int
446 drm_pagemap_migrate_remote_to_local(struct drm_pagemap_devmem *devmem,
447 				    struct device *remote_device,
448 				    struct drm_pagemap *remote_dpagemap,
449 				    unsigned long local_pfns[],
450 				    struct page *remote_pages[],
451 				    struct drm_pagemap_addr pagemap_addr[],
452 				    unsigned long npages,
453 				    const struct drm_pagemap_devmem_ops *ops,
454 				    const struct drm_pagemap_migrate_details *mdetails)
455 
456 {
457 	int err = drm_pagemap_migrate_map_device_private_pages(remote_device,
458 							       remote_dpagemap,
459 							       pagemap_addr,
460 							       local_pfns,
461 							       npages,
462 							       DMA_FROM_DEVICE,
463 							       mdetails);
464 
465 	if (err)
466 		goto out;
467 
468 	err = ops->copy_to_ram(remote_pages, pagemap_addr, npages,
469 			       devmem->pre_migrate_fence);
470 out:
471 	drm_pagemap_migrate_unmap_pages(remote_device, pagemap_addr, local_pfns,
472 					npages, DMA_FROM_DEVICE, NULL);
473 	return err;
474 }
475 
476 static int
477 drm_pagemap_migrate_sys_to_dev(struct drm_pagemap_devmem *devmem,
478 			       unsigned long sys_pfns[],
479 			       struct page *local_pages[],
480 			       struct drm_pagemap_addr pagemap_addr[],
481 			       unsigned long npages,
482 			       const struct drm_pagemap_devmem_ops *ops,
483 			       struct drm_pagemap_iova_state *state)
484 {
485 	int err = drm_pagemap_migrate_map_system_pages(devmem->dev,
486 						       pagemap_addr, sys_pfns,
487 						       npages, DMA_TO_DEVICE,
488 						       state);
489 
490 	if (err)
491 		goto out;
492 
493 	err = ops->copy_to_devmem(local_pages, pagemap_addr, npages,
494 				  devmem->pre_migrate_fence);
495 out:
496 	drm_pagemap_migrate_unmap_pages(devmem->dev, pagemap_addr, sys_pfns, npages,
497 					DMA_TO_DEVICE, state);
498 	return err;
499 }
500 
501 /**
502  * struct migrate_range_loc - Cursor into the loop over migrate_pfns for migrating to
503  * device.
504  * @start: The current loop index.
505  * @device: migrating device.
506  * @dpagemap: Pointer to struct drm_pagemap used by the migrating device.
507  * @ops: The copy ops to be used for the migrating device.
508  */
509 struct migrate_range_loc {
510 	unsigned long start;
511 	struct device *device;
512 	struct drm_pagemap *dpagemap;
513 	const struct drm_pagemap_devmem_ops *ops;
514 };
515 
516 static int drm_pagemap_migrate_range(struct drm_pagemap_devmem *devmem,
517 				     unsigned long src_pfns[],
518 				     unsigned long dst_pfns[],
519 				     struct page *pages[],
520 				     struct drm_pagemap_addr pagemap_addr[],
521 				     struct migrate_range_loc *last,
522 				     const struct migrate_range_loc *cur,
523 				     const struct drm_pagemap_migrate_details *mdetails)
524 {
525 	struct drm_pagemap_iova_state state = {};
526 	int ret = 0;
527 
528 	if (cur->start == 0)
529 		goto out;
530 
531 	if (cur->start <= last->start)
532 		return 0;
533 
534 	if (cur->dpagemap == last->dpagemap && cur->ops == last->ops)
535 		return 0;
536 
537 	if (last->dpagemap)
538 		ret = drm_pagemap_migrate_remote_to_local(devmem,
539 							  last->device,
540 							  last->dpagemap,
541 							  &dst_pfns[last->start],
542 							  &pages[last->start],
543 							  &pagemap_addr[last->start],
544 							  cur->start - last->start,
545 							  last->ops, mdetails);
546 
547 	else
548 		ret = drm_pagemap_migrate_sys_to_dev(devmem,
549 						     &src_pfns[last->start],
550 						     &pages[last->start],
551 						     &pagemap_addr[last->start],
552 						     cur->start - last->start,
553 						     last->ops, &state);
554 
555 out:
556 	*last = *cur;
557 	return ret;
558 }
559 
560 /**
561  * drm_pagemap_cpages() - Count collected pages
562  * @migrate_pfn: Array of migrate_pfn entries to account
563  * @npages: Number of entries in @migrate_pfn
564  *
565  * Compute the total number of minimum-sized pages represented by the
566  * collected entries in @migrate_pfn. The total is derived from the
567  * order encoded in each entry.
568  *
569  * Return: Total number of minimum-sized pages.
570  */
571 static int drm_pagemap_cpages(unsigned long *migrate_pfn, unsigned long npages)
572 {
573 	unsigned long i, cpages = 0;
574 
575 	for (i = 0; i < npages;) {
576 		struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
577 		struct folio *folio;
578 		unsigned int order = 0;
579 
580 		if (page) {
581 			folio = page_folio(page);
582 			order = folio_order(folio);
583 			cpages += NR_PAGES(order);
584 		} else if (migrate_pfn[i] & MIGRATE_PFN_COMPOUND) {
585 			order = HPAGE_PMD_ORDER;
586 			cpages += NR_PAGES(order);
587 		}
588 
589 		i += NR_PAGES(order);
590 	}
591 
592 	return cpages;
593 }
594 
595 /**
596  * drm_pagemap_migrate_to_devmem() - Migrate a struct mm_struct range to device memory
597  * @devmem_allocation: The device memory allocation to migrate to.
598  * The caller should hold a reference to the device memory allocation,
599  * and the reference is consumed by this function even if it returns with
600  * an error.
601  * @mm: Pointer to the struct mm_struct.
602  * @start: Start of the virtual address range to migrate.
603  * @end: End of the virtual address range to migrate.
604  * @mdetails: Details to govern the migration.
605  *
606  * This function migrates the specified virtual address range to device memory.
607  * It performs the necessary setup and invokes the driver-specific operations for
608  * migration to device memory. Expected to be called while holding the mmap lock in
609  * at least read mode.
610  *
611  * Note: The @timeslice_ms parameter can typically be used to force data to
612  * remain in pagemap pages long enough for a GPU to perform a task and to prevent
613  * a migration livelock. One alternative would be for the GPU driver to block
614  * in a mmu_notifier for the specified amount of time, but adding the
615  * functionality to the pagemap is likely nicer to the system as a whole.
616  *
617  * Return: %0 on success, negative error code on failure.
618  */
619 int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
620 				  struct mm_struct *mm,
621 				  unsigned long start, unsigned long end,
622 				  const struct drm_pagemap_migrate_details *mdetails)
623 {
624 	const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops;
625 	struct drm_pagemap *dpagemap = devmem_allocation->dpagemap;
626 	struct dev_pagemap *pagemap = dpagemap->pagemap;
627 	struct migrate_vma migrate = {
628 		.start		= start,
629 		.end		= end,
630 		.pgmap_owner	= pagemap->owner,
631 		.flags		= MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT |
632 		MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_SELECT_COMPOUND,
633 	};
634 	unsigned long i, npages = npages_in_range(start, end);
635 	unsigned long own_pages = 0, migrated_pages = 0;
636 	struct migrate_range_loc cur, last = {.device = dpagemap->drm->dev, .ops = ops};
637 	struct vm_area_struct *vas;
638 	struct drm_pagemap_zdd *zdd = NULL;
639 	struct page **pages;
640 	struct drm_pagemap_addr *pagemap_addr;
641 	void *buf;
642 	int err;
643 
644 	mmap_assert_locked(mm);
645 
646 	if (!ops->populate_devmem_pfn || !ops->copy_to_devmem ||
647 	    !ops->copy_to_ram)
648 		return -EOPNOTSUPP;
649 
650 	vas = vma_lookup(mm, start);
651 	if (!vas) {
652 		err = -ENOENT;
653 		goto err_out;
654 	}
655 
656 	if (end > vas->vm_end || start < vas->vm_start) {
657 		err = -EINVAL;
658 		goto err_out;
659 	}
660 
661 	if (!vma_is_anonymous(vas)) {
662 		err = -EBUSY;
663 		goto err_out;
664 	}
665 
666 	buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*pagemap_addr) +
667 		       sizeof(*pages), GFP_KERNEL);
668 	if (!buf) {
669 		err = -ENOMEM;
670 		goto err_out;
671 	}
672 	pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages);
673 	pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages;
674 
675 	zdd = drm_pagemap_zdd_alloc(dpagemap);
676 	if (!zdd) {
677 		err = -ENOMEM;
678 		kvfree(buf);
679 		goto err_out;
680 	}
681 	zdd->devmem_allocation = devmem_allocation;	/* Owns ref */
682 
683 	migrate.vma = vas;
684 	migrate.src = buf;
685 	migrate.dst = migrate.src + npages;
686 
687 	err = migrate_vma_setup(&migrate);
688 	if (err)
689 		goto err_free;
690 
691 	if (!migrate.cpages) {
692 		/* No pages to migrate. Raced or unknown device pages. */
693 		err = -EBUSY;
694 		goto err_free;
695 	}
696 
697 	if (migrate.cpages != npages &&
698 	    drm_pagemap_cpages(migrate.src, npages) != npages) {
699 		/*
700 		 * Some pages to migrate. But we want to migrate all or
701 		 * nothing. Raced or unknown device pages.
702 		 */
703 		err = -EBUSY;
704 		goto err_aborted_migration;
705 	}
706 
707 	/* Count device-private pages to migrate */
708 	for (i = 0; i < npages;) {
709 		struct page *src_page = migrate_pfn_to_page(migrate.src[i]);
710 		unsigned long nr_pages = src_page ? NR_PAGES(folio_order(page_folio(src_page))) : 1;
711 
712 		if (src_page && is_zone_device_page(src_page)) {
713 			if (page_pgmap(src_page) == pagemap)
714 				own_pages += nr_pages;
715 		}
716 
717 		i += nr_pages;
718 	}
719 
720 	drm_dbg(dpagemap->drm, "Total pages %lu; Own pages: %lu.\n",
721 		npages, own_pages);
722 	if (own_pages == npages) {
723 		err = 0;
724 		drm_dbg(dpagemap->drm, "Migration wasn't necessary.\n");
725 		goto err_aborted_migration;
726 	} else if (own_pages && !mdetails->can_migrate_same_pagemap) {
727 		err = -EBUSY;
728 		drm_dbg(dpagemap->drm, "Migration aborted due to fragmentation.\n");
729 		goto err_aborted_migration;
730 	}
731 
732 	err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst);
733 	if (err)
734 		goto err_aborted_migration;
735 
736 	own_pages = 0;
737 
738 	for (i = 0; i < npages;) {
739 		unsigned long j;
740 		struct page *page = pfn_to_page(migrate.dst[i]);
741 		struct page *src_page = migrate_pfn_to_page(migrate.src[i]);
742 		unsigned int order = 0;
743 
744 		cur.start = i;
745 		pages[i] = NULL;
746 		if (src_page && is_device_private_page(src_page)) {
747 			struct drm_pagemap_zdd *src_zdd =
748 				drm_pagemap_page_zone_device_data(src_page);
749 
750 			if (page_pgmap(src_page) == pagemap &&
751 			    !mdetails->can_migrate_same_pagemap) {
752 				migrate.dst[i] = 0;
753 				own_pages++;
754 				goto next;
755 			}
756 			cur.dpagemap = src_zdd->dpagemap;
757 			cur.ops = src_zdd->devmem_allocation->ops;
758 			cur.device = cur.dpagemap->drm->dev;
759 			pages[i] = src_page;
760 		}
761 		if (!pages[i]) {
762 			cur.dpagemap = NULL;
763 			cur.ops = ops;
764 			cur.device = dpagemap->drm->dev;
765 			pages[i] = page;
766 		}
767 		migrate.dst[i] = migrate_pfn(migrate.dst[i]);
768 
769 		if (migrate.src[i] & MIGRATE_PFN_COMPOUND) {
770 			drm_WARN_ONCE(dpagemap->drm, src_page &&
771 				      folio_order(page_folio(src_page)) != HPAGE_PMD_ORDER,
772 				      "Unexpected folio order\n");
773 
774 			order = HPAGE_PMD_ORDER;
775 			migrate.dst[i] |= MIGRATE_PFN_COMPOUND;
776 
777 			for (j = 1; j < NR_PAGES(order) && i + j < npages; j++)
778 				migrate.dst[i + j] = 0;
779 		}
780 
781 		drm_pagemap_get_devmem_page(page, order, zdd);
782 
783 		/* If we switched the migrating drm_pagemap, migrate previous pages now */
784 		err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst,
785 						pages, pagemap_addr, &last, &cur,
786 						mdetails);
787 		if (err) {
788 			npages = i + 1;
789 			goto err_finalize;
790 		}
791 
792 next:
793 		i += NR_PAGES(order);
794 	}
795 
796 	cur.start = npages;
797 	cur.ops = NULL; /* Force migration */
798 	err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst,
799 					pages, pagemap_addr, &last, &cur, mdetails);
800 	if (err)
801 		goto err_finalize;
802 
803 	drm_WARN_ON(dpagemap->drm, !!own_pages);
804 
805 	dma_fence_put(devmem_allocation->pre_migrate_fence);
806 	devmem_allocation->pre_migrate_fence = NULL;
807 
808 	/* Upon success bind devmem allocation to range and zdd */
809 	devmem_allocation->timeslice_expiration = get_jiffies_64() +
810 		msecs_to_jiffies(mdetails->timeslice_ms);
811 
812 err_finalize:
813 	if (err)
814 		drm_pagemap_migration_unlock_put_pages(npages, migrate.dst);
815 err_aborted_migration:
816 	migrate_vma_pages(&migrate);
817 
818 	for (i = 0; !err && i < npages;) {
819 		struct page *page = migrate_pfn_to_page(migrate.src[i]);
820 		unsigned long nr_pages = page ? NR_PAGES(folio_order(page_folio(page))) : 1;
821 
822 		if (migrate.src[i] & MIGRATE_PFN_MIGRATE)
823 			migrated_pages += nr_pages;
824 
825 		i += nr_pages;
826 	}
827 
828 	if (!err && migrated_pages < npages - own_pages) {
829 		drm_dbg(dpagemap->drm, "Raced while finalizing migration.\n");
830 		err = -EBUSY;
831 	}
832 
833 	migrate_vma_finalize(&migrate);
834 err_free:
835 	drm_pagemap_zdd_put(zdd);
836 	kvfree(buf);
837 	return err;
838 
839 err_out:
840 	devmem_allocation->ops->devmem_release(devmem_allocation);
841 	return err;
842 }
843 EXPORT_SYMBOL_GPL(drm_pagemap_migrate_to_devmem);
844 
845 /**
846  * drm_pagemap_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area
847  * @vas: Pointer to the VM area structure, can be NULL
848  * @fault_page: Fault page
849  * @npages: Number of pages to populate
850  * @mpages: Number of pages to migrate
851  * @src_mpfn: Source array of migrate PFNs
852  * @mpfn: Array of migrate PFNs to populate
853  * @addr: Start address for PFN allocation
854  *
855  * This function populates the RAM migrate page frame numbers (PFNs) for the
856  * specified VM area structure. It allocates and locks pages in the VM area for
857  * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use
858  * alloc_page for allocation.
859  *
860  * Return: 0 on success, negative error code on failure.
861  */
862 static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
863 						struct page *fault_page,
864 						unsigned long npages,
865 						unsigned long *mpages,
866 						unsigned long *src_mpfn,
867 						unsigned long *mpfn,
868 						unsigned long addr)
869 {
870 	unsigned long i;
871 
872 	for (i = 0; i < npages;) {
873 		struct page *page = NULL, *src_page;
874 		struct folio *folio;
875 		unsigned int order = 0;
876 
877 		if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE))
878 			goto next;
879 
880 		src_page = migrate_pfn_to_page(src_mpfn[i]);
881 		if (!src_page)
882 			goto next;
883 
884 		if (fault_page) {
885 			if (drm_pagemap_page_zone_device_data(src_page) !=
886 			    drm_pagemap_page_zone_device_data(fault_page))
887 				goto next;
888 		}
889 
890 		order = folio_order(page_folio(src_page));
891 
892 		/* TODO: Support fallback to single pages if THP allocation fails */
893 		if (vas)
894 			folio = vma_alloc_folio(GFP_HIGHUSER, order, vas, addr);
895 		else
896 			folio = folio_alloc(GFP_HIGHUSER, order);
897 
898 		if (!folio)
899 			goto free_pages;
900 
901 		page = folio_page(folio, 0);
902 		mpfn[i] = migrate_pfn(page_to_pfn(page));
903 
904 		if (order)
905 			mpfn[i] |= MIGRATE_PFN_COMPOUND;
906 next:
907 		if (page)
908 			addr += page_size(page);
909 		else
910 			addr += PAGE_SIZE;
911 
912 		i += NR_PAGES(order);
913 	}
914 
915 	for (i = 0; i < npages;) {
916 		struct page *page = migrate_pfn_to_page(mpfn[i]);
917 		unsigned int order = 0;
918 
919 		if (!page)
920 			goto next_lock;
921 
922 		WARN_ON_ONCE(!folio_trylock(page_folio(page)));
923 
924 		order = folio_order(page_folio(page));
925 		*mpages += NR_PAGES(order);
926 
927 next_lock:
928 		i += NR_PAGES(order);
929 	}
930 
931 	return 0;
932 
933 free_pages:
934 	for (i = 0; i < npages;) {
935 		struct page *page = migrate_pfn_to_page(mpfn[i]);
936 		unsigned int order = 0;
937 
938 		if (!page)
939 			goto next_put;
940 
941 		put_page(page);
942 		mpfn[i] = 0;
943 
944 		order = folio_order(page_folio(page));
945 
946 next_put:
947 		i += NR_PAGES(order);
948 	}
949 	return -ENOMEM;
950 }
951 
952 static void drm_pagemap_dev_unhold_work(struct work_struct *work);
953 static LLIST_HEAD(drm_pagemap_unhold_list);
954 static DECLARE_WORK(drm_pagemap_work, drm_pagemap_dev_unhold_work);
955 
956 /**
957  * struct drm_pagemap_dev_hold - Struct to aid in drm_device release.
958  * @link: Link into drm_pagemap_unhold_list for deferred reference releases.
959  * @drm: drm device to put.
960  *
961  * When a struct drm_pagemap is released, we also need to release the
962  * reference it holds on the drm device. However, typically that needs
963  * to be done separately from a system-wide workqueue.
964  * Each time a struct drm_pagemap is initialized
965  * (or re-initialized if cached) therefore allocate a separate
966  * drm_pagemap_dev_hold item, from which we put the drm device and
967  * associated module.
968  */
969 struct drm_pagemap_dev_hold {
970 	struct llist_node link;
971 	struct drm_device *drm;
972 };
973 
974 static void drm_pagemap_release(struct kref *ref)
975 {
976 	struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref);
977 	struct drm_pagemap_dev_hold *dev_hold = dpagemap->dev_hold;
978 
979 	/*
980 	 * We know the pagemap provider is alive at this point, since
981 	 * the struct drm_pagemap_dev_hold holds a reference to the
982 	 * pagemap provider drm_device and its module.
983 	 */
984 	dpagemap->dev_hold = NULL;
985 	drm_pagemap_shrinker_add(dpagemap);
986 	llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
987 	schedule_work(&drm_pagemap_work);
988 	/*
989 	 * Here, either the provider device is still alive, since if called from
990 	 * page_free(), the caller is holding a reference on the dev_pagemap,
991 	 * or if called from drm_pagemap_put(), the direct caller is still alive.
992 	 * This ensures we can't race with THIS module unload.
993 	 */
994 }
995 
996 static void drm_pagemap_dev_unhold_work(struct work_struct *work)
997 {
998 	struct llist_node *node = llist_del_all(&drm_pagemap_unhold_list);
999 	struct drm_pagemap_dev_hold *dev_hold, *next;
1000 
1001 	/*
1002 	 * Deferred release of drm_pagemap provider device and module.
1003 	 * THIS module is kept alive during the release by the
1004 	 * flush_work() in the drm_pagemap_exit() function.
1005 	 */
1006 	llist_for_each_entry_safe(dev_hold, next, node, link) {
1007 		struct drm_device *drm = dev_hold->drm;
1008 		struct module *module = drm->driver->fops->owner;
1009 
1010 		drm_dbg(drm, "Releasing reference on provider device and module.\n");
1011 		drm_dev_put(drm);
1012 		module_put(module);
1013 		kfree(dev_hold);
1014 	}
1015 }
1016 
1017 static struct drm_pagemap_dev_hold *
1018 drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
1019 {
1020 	struct drm_pagemap_dev_hold *dev_hold;
1021 	struct drm_device *drm = dpagemap->drm;
1022 
1023 	dev_hold = kzalloc_obj(*dev_hold);
1024 	if (!dev_hold)
1025 		return ERR_PTR(-ENOMEM);
1026 
1027 	init_llist_node(&dev_hold->link);
1028 	dev_hold->drm = drm;
1029 	(void)try_module_get(drm->driver->fops->owner);
1030 	drm_dev_get(drm);
1031 
1032 	return dev_hold;
1033 }
1034 
1035 /**
1036  * drm_pagemap_reinit() - Reinitialize a drm_pagemap
1037  * @dpagemap: The drm_pagemap to reinitialize
1038  *
1039  * Reinitialize a drm_pagemap, for which drm_pagemap_release
1040  * has already been called. This interface is intended for the
1041  * situation where the driver caches a destroyed drm_pagemap.
1042  *
1043  * Return: 0 on success, negative error code on failure.
1044  */
1045 int drm_pagemap_reinit(struct drm_pagemap *dpagemap)
1046 {
1047 	dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap);
1048 	if (IS_ERR(dpagemap->dev_hold))
1049 		return PTR_ERR(dpagemap->dev_hold);
1050 
1051 	kref_init(&dpagemap->ref);
1052 	return 0;
1053 }
1054 EXPORT_SYMBOL(drm_pagemap_reinit);
1055 
1056 /**
1057  * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap
1058  * @dpagemap: The drm_pagemap to initialize.
1059  * @pagemap: The associated dev_pagemap providing the device
1060  * private pages.
1061  * @drm: The drm device. The drm_pagemap holds a reference on the
1062  * drm_device and the module owning the drm_device until
1063  * drm_pagemap_release(). This facilitates drm_pagemap exporting.
1064  * @ops: The drm_pagemap ops.
1065  *
1066  * Initialize and take an initial reference on a drm_pagemap.
1067  * After successful return, use drm_pagemap_put() to destroy.
1068  *
1069  ** Return: 0 on success, negative error code on error.
1070  */
1071 int drm_pagemap_init(struct drm_pagemap *dpagemap,
1072 		     struct dev_pagemap *pagemap,
1073 		     struct drm_device *drm,
1074 		     const struct drm_pagemap_ops *ops)
1075 {
1076 	kref_init(&dpagemap->ref);
1077 	dpagemap->ops = ops;
1078 	dpagemap->pagemap = pagemap;
1079 	dpagemap->drm = drm;
1080 	dpagemap->cache = NULL;
1081 	INIT_LIST_HEAD(&dpagemap->shrink_link);
1082 
1083 	return drm_pagemap_reinit(dpagemap);
1084 }
1085 EXPORT_SYMBOL(drm_pagemap_init);
1086 
1087 /**
1088  * drm_pagemap_put() - Put a struct drm_pagemap reference
1089  * @dpagemap: Pointer to a struct drm_pagemap object.
1090  *
1091  * Puts a struct drm_pagemap reference and frees the drm_pagemap object
1092  * if the refount reaches zero.
1093  */
1094 void drm_pagemap_put(struct drm_pagemap *dpagemap)
1095 {
1096 	if (likely(dpagemap)) {
1097 		drm_pagemap_shrinker_might_lock(dpagemap);
1098 		kref_put(&dpagemap->ref, drm_pagemap_release);
1099 	}
1100 }
1101 EXPORT_SYMBOL(drm_pagemap_put);
1102 
1103 /**
1104  * drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM
1105  * @devmem_allocation: Pointer to the device memory allocation
1106  *
1107  * Similar to __drm_pagemap_migrate_to_ram but does not require mmap lock and
1108  * migration done via migrate_device_* functions.
1109  *
1110  * Return: 0 on success, negative error code on failure.
1111  */
1112 int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation)
1113 {
1114 	const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops;
1115 	struct drm_pagemap_iova_state state = {};
1116 	unsigned long npages, mpages = 0;
1117 	struct page **pages;
1118 	unsigned long *src, *dst;
1119 	struct drm_pagemap_addr *pagemap_addr;
1120 	void *buf;
1121 	int i, err = 0;
1122 	unsigned int retry_count = 2;
1123 
1124 	npages = devmem_allocation->size >> PAGE_SHIFT;
1125 
1126 retry:
1127 	if (!mmget_not_zero(devmem_allocation->mm))
1128 		return -EFAULT;
1129 
1130 	buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*pagemap_addr) +
1131 		       sizeof(*pages), GFP_KERNEL);
1132 	if (!buf) {
1133 		err = -ENOMEM;
1134 		goto err_out;
1135 	}
1136 	src = buf;
1137 	dst = buf + (sizeof(*src) * npages);
1138 	pagemap_addr = buf + (2 * sizeof(*src) * npages);
1139 	pages = buf + (2 * sizeof(*src) + sizeof(*pagemap_addr)) * npages;
1140 
1141 	err = ops->populate_devmem_pfn(devmem_allocation, npages, src);
1142 	if (err)
1143 		goto err_free;
1144 
1145 	err = migrate_device_pfns(src, npages);
1146 	if (err)
1147 		goto err_free;
1148 
1149 	err = drm_pagemap_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages,
1150 						   src, dst, 0);
1151 	if (err || !mpages)
1152 		goto err_finalize;
1153 
1154 	err = drm_pagemap_migrate_map_system_pages(devmem_allocation->dev,
1155 						   pagemap_addr,
1156 						   dst, npages,
1157 						   DMA_FROM_DEVICE, &state);
1158 	if (err)
1159 		goto err_finalize;
1160 
1161 	for (i = 0; i < npages;) {
1162 		unsigned int order = 0;
1163 
1164 		pages[i] = migrate_pfn_to_page(src[i]);
1165 		if (pages[i])
1166 			order = folio_order(page_folio(pages[i]));
1167 
1168 		i += NR_PAGES(order);
1169 	}
1170 
1171 	err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
1172 	if (err)
1173 		goto err_finalize;
1174 
1175 err_finalize:
1176 	if (err)
1177 		drm_pagemap_migration_unlock_put_pages(npages, dst);
1178 	migrate_device_pages(src, dst, npages);
1179 	migrate_device_finalize(src, dst, npages);
1180 	drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, dst, npages,
1181 					DMA_FROM_DEVICE, &state);
1182 
1183 err_free:
1184 	kvfree(buf);
1185 err_out:
1186 	mmput_async(devmem_allocation->mm);
1187 
1188 	if (completion_done(&devmem_allocation->detached))
1189 		return 0;
1190 
1191 	if (retry_count--) {
1192 		cond_resched();
1193 		state = (struct drm_pagemap_iova_state){};
1194 		goto retry;
1195 	}
1196 
1197 	return err ?: -EBUSY;
1198 }
1199 EXPORT_SYMBOL_GPL(drm_pagemap_evict_to_ram);
1200 
1201 /**
1202  * __drm_pagemap_migrate_to_ram() - Migrate GPU SVM range to RAM (internal)
1203  * @vas: Pointer to the VM area structure
1204  * @page: Pointer to the page for fault handling.
1205  * @fault_addr: Fault address
1206  * @size: Size of migration
1207  *
1208  * This internal function performs the migration of the specified GPU SVM range
1209  * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and
1210  * invokes the driver-specific operations for migration to RAM.
1211  *
1212  * Return: 0 on success, negative error code on failure.
1213  */
1214 static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas,
1215 					struct page *page,
1216 					unsigned long fault_addr,
1217 					unsigned long size)
1218 {
1219 	struct migrate_vma migrate = {
1220 		.vma		= vas,
1221 		.pgmap_owner	= page_pgmap(page)->owner,
1222 		.flags		= MIGRATE_VMA_SELECT_DEVICE_PRIVATE |
1223 				  MIGRATE_VMA_SELECT_DEVICE_COHERENT |
1224 				  MIGRATE_VMA_SELECT_COMPOUND,
1225 		.fault_page	= page,
1226 	};
1227 	struct drm_pagemap_iova_state state = {};
1228 	struct drm_pagemap_zdd *zdd;
1229 	const struct drm_pagemap_devmem_ops *ops;
1230 	struct device *dev = NULL;
1231 	unsigned long npages, mpages = 0;
1232 	struct page **pages;
1233 	struct drm_pagemap_addr *pagemap_addr;
1234 	unsigned long start, end;
1235 	void *buf;
1236 	int i, err = 0;
1237 
1238 	zdd = drm_pagemap_page_zone_device_data(page);
1239 	if (time_before64(get_jiffies_64(), zdd->devmem_allocation->timeslice_expiration))
1240 		return 0;
1241 
1242 	start = ALIGN_DOWN(fault_addr, size);
1243 	end = ALIGN(fault_addr + 1, size);
1244 
1245 	/* Corner where VMA area struct has been partially unmapped */
1246 	if (start < vas->vm_start)
1247 		start = vas->vm_start;
1248 	if (end > vas->vm_end)
1249 		end = vas->vm_end;
1250 
1251 	migrate.start = start;
1252 	migrate.end = end;
1253 	npages = npages_in_range(start, end);
1254 
1255 	buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*pagemap_addr) +
1256 		       sizeof(*pages), GFP_KERNEL);
1257 	if (!buf) {
1258 		err = -ENOMEM;
1259 		goto err_out;
1260 	}
1261 	pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages);
1262 	pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages;
1263 
1264 	migrate.vma = vas;
1265 	migrate.src = buf;
1266 	migrate.dst = migrate.src + npages;
1267 
1268 	err = migrate_vma_setup(&migrate);
1269 	if (err)
1270 		goto err_free;
1271 
1272 	/* Raced with another CPU fault, nothing to do */
1273 	if (!migrate.cpages)
1274 		goto err_free;
1275 
1276 	ops = zdd->devmem_allocation->ops;
1277 	dev = zdd->devmem_allocation->dev;
1278 
1279 	err = drm_pagemap_migrate_populate_ram_pfn(vas, page, npages, &mpages,
1280 						   migrate.src, migrate.dst,
1281 						   start);
1282 	if (err)
1283 		goto err_finalize;
1284 
1285 	err = drm_pagemap_migrate_map_system_pages(dev, pagemap_addr,
1286 						   migrate.dst, npages,
1287 						   DMA_FROM_DEVICE, &state);
1288 	if (err)
1289 		goto err_finalize;
1290 
1291 	for (i = 0; i < npages;) {
1292 		unsigned int order = 0;
1293 
1294 		pages[i] = migrate_pfn_to_page(migrate.src[i]);
1295 		if (pages[i])
1296 			order = folio_order(page_folio(pages[i]));
1297 
1298 		i += NR_PAGES(order);
1299 	}
1300 
1301 	err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
1302 	if (err)
1303 		goto err_finalize;
1304 
1305 err_finalize:
1306 	if (err)
1307 		drm_pagemap_migration_unlock_put_pages(npages, migrate.dst);
1308 	migrate_vma_pages(&migrate);
1309 	migrate_vma_finalize(&migrate);
1310 	if (dev)
1311 		drm_pagemap_migrate_unmap_pages(dev, pagemap_addr, migrate.dst,
1312 						npages, DMA_FROM_DEVICE,
1313 						&state);
1314 err_free:
1315 	kvfree(buf);
1316 err_out:
1317 
1318 	return err;
1319 }
1320 
1321 /**
1322  * drm_pagemap_folio_free() - Put GPU SVM zone device data associated with a folio
1323  * @folio: Pointer to the folio
1324  *
1325  * This function is a callback used to put the GPU SVM zone device data
1326  * associated with a page when it is being released.
1327  */
1328 static void drm_pagemap_folio_free(struct folio *folio)
1329 {
1330 	struct page *page = folio_page(folio, 0);
1331 
1332 	drm_pagemap_zdd_put(drm_pagemap_page_zone_device_data(page));
1333 }
1334 
1335 /**
1336  * drm_pagemap_migrate_to_ram() - Migrate a virtual range to RAM (page fault handler)
1337  * @vmf: Pointer to the fault information structure
1338  *
1339  * This function is a page fault handler used to migrate a virtual range
1340  * to ram. The device memory allocation in which the device page is found is
1341  * migrated in its entirety.
1342  *
1343  * Returns:
1344  * VM_FAULT_SIGBUS on failure, 0 on success.
1345  */
1346 static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf)
1347 {
1348 	struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(vmf->page);
1349 	int err;
1350 
1351 	err = __drm_pagemap_migrate_to_ram(vmf->vma,
1352 					   vmf->page, vmf->address,
1353 					   zdd->devmem_allocation->size);
1354 
1355 	return err ? VM_FAULT_SIGBUS : 0;
1356 }
1357 
1358 static void drm_pagemap_folio_split(struct folio *orig_folio, struct folio *new_folio)
1359 {
1360 	struct drm_pagemap_zdd *zdd;
1361 
1362 	if (!new_folio)
1363 		return;
1364 
1365 	new_folio->pgmap = orig_folio->pgmap;
1366 	zdd = folio_zone_device_data(orig_folio);
1367 	folio_set_zone_device_data(new_folio, drm_pagemap_zdd_get(zdd));
1368 }
1369 
1370 static const struct dev_pagemap_ops drm_pagemap_pagemap_ops = {
1371 	.folio_free = drm_pagemap_folio_free,
1372 	.migrate_to_ram = drm_pagemap_migrate_to_ram,
1373 	.folio_split = drm_pagemap_folio_split,
1374 };
1375 
1376 /**
1377  * drm_pagemap_pagemap_ops_get() - Retrieve GPU SVM device page map operations
1378  *
1379  * Returns:
1380  * Pointer to the GPU SVM device page map operations structure.
1381  */
1382 const struct dev_pagemap_ops *drm_pagemap_pagemap_ops_get(void)
1383 {
1384 	return &drm_pagemap_pagemap_ops;
1385 }
1386 EXPORT_SYMBOL_GPL(drm_pagemap_pagemap_ops_get);
1387 
1388 /**
1389  * drm_pagemap_devmem_init() - Initialize a drm_pagemap device memory allocation
1390  *
1391  * @devmem_allocation: The struct drm_pagemap_devmem to initialize.
1392  * @dev: Pointer to the device structure which device memory allocation belongs to
1393  * @mm: Pointer to the mm_struct for the address space
1394  * @ops: Pointer to the operations structure for GPU SVM device memory
1395  * @dpagemap: The struct drm_pagemap we're allocating from.
1396  * @size: Size of device memory allocation
1397  * @pre_migrate_fence: Fence to wait for or pipeline behind before migration starts.
1398  * (May be NULL).
1399  */
1400 void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation,
1401 			     struct device *dev, struct mm_struct *mm,
1402 			     const struct drm_pagemap_devmem_ops *ops,
1403 			     struct drm_pagemap *dpagemap, size_t size,
1404 			     struct dma_fence *pre_migrate_fence)
1405 {
1406 	init_completion(&devmem_allocation->detached);
1407 	devmem_allocation->dev = dev;
1408 	devmem_allocation->mm = mm;
1409 	devmem_allocation->ops = ops;
1410 	devmem_allocation->dpagemap = dpagemap;
1411 	devmem_allocation->size = size;
1412 	devmem_allocation->pre_migrate_fence = pre_migrate_fence;
1413 }
1414 EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init);
1415 
1416 /**
1417  * drm_pagemap_page_to_dpagemap() - Return a pointer the drm_pagemap of a page
1418  * @page: The struct page.
1419  *
1420  * Return: A pointer to the struct drm_pagemap of a device private page that
1421  * was populated from the struct drm_pagemap. If the page was *not* populated
1422  * from a struct drm_pagemap, the result is undefined and the function call
1423  * may result in dereferencing and invalid address.
1424  */
1425 struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page)
1426 {
1427 	struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(page);
1428 
1429 	return zdd->devmem_allocation->dpagemap;
1430 }
1431 EXPORT_SYMBOL_GPL(drm_pagemap_page_to_dpagemap);
1432 
1433 /**
1434  * drm_pagemap_populate_mm() - Populate a virtual range with device memory pages
1435  * @dpagemap: Pointer to the drm_pagemap managing the device memory
1436  * @start: Start of the virtual range to populate.
1437  * @end: End of the virtual range to populate.
1438  * @mm: Pointer to the virtual address space.
1439  * @timeslice_ms: The time requested for the migrated pagemap pages to
1440  * be present in @mm before being allowed to be migrated back.
1441  *
1442  * Attempt to populate a virtual range with device memory pages,
1443  * clearing them or migrating data from the existing pages if necessary.
1444  * The function is best effort only, and implementations may vary
1445  * in how hard they try to satisfy the request.
1446  *
1447  * Return: %0 on success, negative error code on error. If the hardware
1448  * device was removed / unbound the function will return %-ENODEV.
1449  */
1450 int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
1451 			    unsigned long start, unsigned long end,
1452 			    struct mm_struct *mm,
1453 			    unsigned long timeslice_ms)
1454 {
1455 	int err;
1456 
1457 	if (!mmget_not_zero(mm))
1458 		return -EFAULT;
1459 	mmap_read_lock(mm);
1460 	err = dpagemap->ops->populate_mm(dpagemap, start, end, mm,
1461 					 timeslice_ms);
1462 	mmap_read_unlock(mm);
1463 	mmput(mm);
1464 
1465 	return err;
1466 }
1467 EXPORT_SYMBOL(drm_pagemap_populate_mm);
1468 
1469 void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim)
1470 {
1471 	if (dpagemap->ops->destroy)
1472 		dpagemap->ops->destroy(dpagemap, is_atomic_or_reclaim);
1473 	else
1474 		kfree(dpagemap);
1475 }
1476 
1477 static void drm_pagemap_exit(void)
1478 {
1479 	flush_work(&drm_pagemap_work);
1480 	if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list)))
1481 		disable_work_sync(&drm_pagemap_work);
1482 }
1483 module_exit(drm_pagemap_exit);
1484