xref: /linux/drivers/gpu/drm/drm_gpusvm.c (revision 85502b2214d50ba0ddf2a5fb454e4d28a160d175)
199624bdfSMatthew Brost // SPDX-License-Identifier: GPL-2.0-only OR MIT
299624bdfSMatthew Brost /*
399624bdfSMatthew Brost  * Copyright © 2024 Intel Corporation
499624bdfSMatthew Brost  *
599624bdfSMatthew Brost  * Authors:
699624bdfSMatthew Brost  *     Matthew Brost <matthew.brost@intel.com>
799624bdfSMatthew Brost  */
899624bdfSMatthew Brost 
999624bdfSMatthew Brost #include <linux/dma-mapping.h>
1099624bdfSMatthew Brost #include <linux/hmm.h>
1199624bdfSMatthew Brost #include <linux/memremap.h>
1299624bdfSMatthew Brost #include <linux/migrate.h>
1399624bdfSMatthew Brost #include <linux/mm_types.h>
1499624bdfSMatthew Brost #include <linux/pagemap.h>
1599624bdfSMatthew Brost #include <linux/slab.h>
1699624bdfSMatthew Brost 
1799624bdfSMatthew Brost #include <drm/drm_device.h>
1899624bdfSMatthew Brost #include <drm/drm_gpusvm.h>
1999624bdfSMatthew Brost #include <drm/drm_pagemap.h>
2099624bdfSMatthew Brost #include <drm/drm_print.h>
2199624bdfSMatthew Brost 
2299624bdfSMatthew Brost /**
2399624bdfSMatthew Brost  * DOC: Overview
2499624bdfSMatthew Brost  *
2599624bdfSMatthew Brost  * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM)
26fd6c10e6SLucas De Marchi  * is a component of the DRM framework designed to manage shared virtual memory
27fd6c10e6SLucas De Marchi  * between the CPU and GPU. It enables efficient data exchange and processing
28fd6c10e6SLucas De Marchi  * for GPU-accelerated applications by allowing memory sharing and
2999624bdfSMatthew Brost  * synchronization between the CPU's and GPU's virtual address spaces.
3099624bdfSMatthew Brost  *
3199624bdfSMatthew Brost  * Key GPU SVM Components:
32fd6c10e6SLucas De Marchi  *
33fd6c10e6SLucas De Marchi  * - Notifiers:
34fd6c10e6SLucas De Marchi  *	Used for tracking memory intervals and notifying the GPU of changes,
35fd6c10e6SLucas De Marchi  *	notifiers are sized based on a GPU SVM initialization parameter, with a
36fd6c10e6SLucas De Marchi  *	recommendation of 512M or larger. They maintain a Red-BlacK tree and a
37fd6c10e6SLucas De Marchi  *	list of ranges that fall within the notifier interval.  Notifiers are
38fd6c10e6SLucas De Marchi  *	tracked within a GPU SVM Red-BlacK tree and list and are dynamically
39fd6c10e6SLucas De Marchi  *	inserted or removed as ranges within the interval are created or
4099624bdfSMatthew Brost  *	destroyed.
41fd6c10e6SLucas De Marchi  * - Ranges:
42fd6c10e6SLucas De Marchi  *	Represent memory ranges mapped in a DRM device and managed by GPU SVM.
43fd6c10e6SLucas De Marchi  *	They are sized based on an array of chunk sizes, which is a GPU SVM
44fd6c10e6SLucas De Marchi  *	initialization parameter, and the CPU address space.  Upon GPU fault,
45fd6c10e6SLucas De Marchi  *	the largest aligned chunk that fits within the faulting CPU address
46fd6c10e6SLucas De Marchi  *	space is chosen for the range size. Ranges are expected to be
47fd6c10e6SLucas De Marchi  *	dynamically allocated on GPU fault and removed on an MMU notifier UNMAP
48fd6c10e6SLucas De Marchi  *	event. As mentioned above, ranges are tracked in a notifier's Red-Black
49fd6c10e6SLucas De Marchi  *	tree.
50fd6c10e6SLucas De Marchi  *
51fd6c10e6SLucas De Marchi  * - Operations:
52fd6c10e6SLucas De Marchi  *	Define the interface for driver-specific GPU SVM operations such as
53fd6c10e6SLucas De Marchi  *	range allocation, notifier allocation, and invalidations.
54fd6c10e6SLucas De Marchi  *
55fd6c10e6SLucas De Marchi  * - Device Memory Allocations:
56fd6c10e6SLucas De Marchi  *	Embedded structure containing enough information for GPU SVM to migrate
57fd6c10e6SLucas De Marchi  *	to / from device memory.
58fd6c10e6SLucas De Marchi  *
59fd6c10e6SLucas De Marchi  * - Device Memory Operations:
60fd6c10e6SLucas De Marchi  *	Define the interface for driver-specific device memory operations
61fd6c10e6SLucas De Marchi  *	release memory, populate pfns, and copy to / from device memory.
6299624bdfSMatthew Brost  *
6399624bdfSMatthew Brost  * This layer provides interfaces for allocating, mapping, migrating, and
6499624bdfSMatthew Brost  * releasing memory ranges between the CPU and GPU. It handles all core memory
6599624bdfSMatthew Brost  * management interactions (DMA mapping, HMM, and migration) and provides
6699624bdfSMatthew Brost  * driver-specific virtual functions (vfuncs). This infrastructure is sufficient
6799624bdfSMatthew Brost  * to build the expected driver components for an SVM implementation as detailed
6899624bdfSMatthew Brost  * below.
6999624bdfSMatthew Brost  *
7099624bdfSMatthew Brost  * Expected Driver Components:
71fd6c10e6SLucas De Marchi  *
72fd6c10e6SLucas De Marchi  * - GPU page fault handler:
73fd6c10e6SLucas De Marchi  *	Used to create ranges and notifiers based on the fault address,
74fd6c10e6SLucas De Marchi  *	optionally migrate the range to device memory, and create GPU bindings.
75fd6c10e6SLucas De Marchi  *
76fd6c10e6SLucas De Marchi  * - Garbage collector:
77fd6c10e6SLucas De Marchi  *	Used to unmap and destroy GPU bindings for ranges.  Ranges are expected
78fd6c10e6SLucas De Marchi  *	to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in
79fd6c10e6SLucas De Marchi  *	notifier callback.
80fd6c10e6SLucas De Marchi  *
81fd6c10e6SLucas De Marchi  * - Notifier callback:
82fd6c10e6SLucas De Marchi  *	Used to invalidate and DMA unmap GPU bindings for ranges.
8399624bdfSMatthew Brost  */
8499624bdfSMatthew Brost 
8599624bdfSMatthew Brost /**
8699624bdfSMatthew Brost  * DOC: Locking
8799624bdfSMatthew Brost  *
8899624bdfSMatthew Brost  * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the
8999624bdfSMatthew Brost  * mmap lock as needed.
9099624bdfSMatthew Brost  *
9199624bdfSMatthew Brost  * GPU SVM introduces a global notifier lock, which safeguards the notifier's
9299624bdfSMatthew Brost  * range RB tree and list, as well as the range's DMA mappings and sequence
9399624bdfSMatthew Brost  * number. GPU SVM manages all necessary locking and unlocking operations,
9499624bdfSMatthew Brost  * except for the recheck range's pages being valid
95fd6c10e6SLucas De Marchi  * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings.
96fd6c10e6SLucas De Marchi  * This lock corresponds to the ``driver->update`` lock mentioned in
97fd6c10e6SLucas De Marchi  * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM
9899624bdfSMatthew Brost  * global lock to a per-notifier lock if finer-grained locking is deemed
9999624bdfSMatthew Brost  * necessary.
10099624bdfSMatthew Brost  *
10199624bdfSMatthew Brost  * In addition to the locking mentioned above, the driver should implement a
10299624bdfSMatthew Brost  * lock to safeguard core GPU SVM function calls that modify state, such as
10399624bdfSMatthew Brost  * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is
10499624bdfSMatthew Brost  * denoted as 'driver_svm_lock' in code examples. Finer grained driver side
10599624bdfSMatthew Brost  * locking should also be possible for concurrent GPU fault processing within a
10699624bdfSMatthew Brost  * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock
10799624bdfSMatthew Brost  * to add annotations to GPU SVM.
10899624bdfSMatthew Brost  */
10999624bdfSMatthew Brost 
11099624bdfSMatthew Brost /**
11199624bdfSMatthew Brost  * DOC: Migration
11299624bdfSMatthew Brost  *
11399624bdfSMatthew Brost  * The migration support is quite simple, allowing migration between RAM and
114fd6c10e6SLucas De Marchi  * device memory at the range granularity. For example, GPU SVM currently does
115fd6c10e6SLucas De Marchi  * not support mixing RAM and device memory pages within a range. This means
116fd6c10e6SLucas De Marchi  * that upon GPU fault, the entire range can be migrated to device memory, and
117fd6c10e6SLucas De Marchi  * upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device
118fd6c10e6SLucas De Marchi  * memory storage within a range could be added in the future if required.
11999624bdfSMatthew Brost  *
12099624bdfSMatthew Brost  * The reasoning for only supporting range granularity is as follows: it
12199624bdfSMatthew Brost  * simplifies the implementation, and range sizes are driver-defined and should
12299624bdfSMatthew Brost  * be relatively small.
12399624bdfSMatthew Brost  */
12499624bdfSMatthew Brost 
12599624bdfSMatthew Brost /**
12699624bdfSMatthew Brost  * DOC: Partial Unmapping of Ranges
12799624bdfSMatthew Brost  *
12899624bdfSMatthew Brost  * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting
12999624bdfSMatthew Brost  * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one
13099624bdfSMatthew Brost  * being that a subset of the range still has CPU and GPU mappings. If the
131fd6c10e6SLucas De Marchi  * backing store for the range is in device memory, a subset of the backing
132fd6c10e6SLucas De Marchi  * store has references. One option would be to split the range and device
133fd6c10e6SLucas De Marchi  * memory backing store, but the implementation for this would be quite
134fd6c10e6SLucas De Marchi  * complicated. Given that partial unmappings are rare and driver-defined range
135fd6c10e6SLucas De Marchi  * sizes are relatively small, GPU SVM does not support splitting of ranges.
13699624bdfSMatthew Brost  *
13799624bdfSMatthew Brost  * With no support for range splitting, upon partial unmapping of a range, the
13899624bdfSMatthew Brost  * driver is expected to invalidate and destroy the entire range. If the range
13999624bdfSMatthew Brost  * has device memory as its backing, the driver is also expected to migrate any
14099624bdfSMatthew Brost  * remaining pages back to RAM.
14199624bdfSMatthew Brost  */
14299624bdfSMatthew Brost 
14399624bdfSMatthew Brost /**
14499624bdfSMatthew Brost  * DOC: Examples
14599624bdfSMatthew Brost  *
14699624bdfSMatthew Brost  * This section provides three examples of how to build the expected driver
14799624bdfSMatthew Brost  * components: the GPU page fault handler, the garbage collector, and the
14899624bdfSMatthew Brost  * notifier callback.
14999624bdfSMatthew Brost  *
15099624bdfSMatthew Brost  * The generic code provided does not include logic for complex migration
15199624bdfSMatthew Brost  * policies, optimized invalidations, fined grained driver locking, or other
15299624bdfSMatthew Brost  * potentially required driver locking (e.g., DMA-resv locks).
15399624bdfSMatthew Brost  *
15499624bdfSMatthew Brost  * 1) GPU page fault handler
15599624bdfSMatthew Brost  *
156fd6c10e6SLucas De Marchi  * .. code-block:: c
157fd6c10e6SLucas De Marchi  *
15899624bdfSMatthew Brost  *	int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range)
15999624bdfSMatthew Brost  *	{
16099624bdfSMatthew Brost  *		int err = 0;
16199624bdfSMatthew Brost  *
16299624bdfSMatthew Brost  *		driver_alloc_and_setup_memory_for_bind(gpusvm, range);
16399624bdfSMatthew Brost  *
16499624bdfSMatthew Brost  *		drm_gpusvm_notifier_lock(gpusvm);
16599624bdfSMatthew Brost  *		if (drm_gpusvm_range_pages_valid(range))
16699624bdfSMatthew Brost  *			driver_commit_bind(gpusvm, range);
16799624bdfSMatthew Brost  *		else
16899624bdfSMatthew Brost  *			err = -EAGAIN;
16999624bdfSMatthew Brost  *		drm_gpusvm_notifier_unlock(gpusvm);
17099624bdfSMatthew Brost  *
17199624bdfSMatthew Brost  *		return err;
17299624bdfSMatthew Brost  *	}
17399624bdfSMatthew Brost  *
17499624bdfSMatthew Brost  *	int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr,
17599624bdfSMatthew Brost  *			     unsigned long gpuva_start, unsigned long gpuva_end)
17699624bdfSMatthew Brost  *	{
17799624bdfSMatthew Brost  *		struct drm_gpusvm_ctx ctx = {};
17899624bdfSMatthew Brost  *		int err;
17999624bdfSMatthew Brost  *
18099624bdfSMatthew Brost  *		driver_svm_lock();
18199624bdfSMatthew Brost  *	retry:
18299624bdfSMatthew Brost  *		// Always process UNMAPs first so view of GPU SVM ranges is current
18399624bdfSMatthew Brost  *		driver_garbage_collector(gpusvm);
18499624bdfSMatthew Brost  *
18599624bdfSMatthew Brost  *		range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr,
18699624bdfSMatthew Brost  *							gpuva_start, gpuva_end,
18799624bdfSMatthew Brost  *						        &ctx);
18899624bdfSMatthew Brost  *		if (IS_ERR(range)) {
18999624bdfSMatthew Brost  *			err = PTR_ERR(range);
19099624bdfSMatthew Brost  *			goto unlock;
19199624bdfSMatthew Brost  *		}
19299624bdfSMatthew Brost  *
19399624bdfSMatthew Brost  *		if (driver_migration_policy(range)) {
19499624bdfSMatthew Brost  *			mmap_read_lock(mm);
19599624bdfSMatthew Brost  *			devmem = driver_alloc_devmem();
19699624bdfSMatthew Brost  *			err = drm_gpusvm_migrate_to_devmem(gpusvm, range,
19799624bdfSMatthew Brost  *							   devmem_allocation,
19899624bdfSMatthew Brost  *							   &ctx);
19999624bdfSMatthew Brost  *			mmap_read_unlock(mm);
20099624bdfSMatthew Brost  *			if (err)	// CPU mappings may have changed
20199624bdfSMatthew Brost  *				goto retry;
20299624bdfSMatthew Brost  *		}
20399624bdfSMatthew Brost  *
20499624bdfSMatthew Brost  *		err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx);
20599624bdfSMatthew Brost  *		if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {	// CPU mappings changed
20699624bdfSMatthew Brost  *			if (err == -EOPNOTSUPP)
20799624bdfSMatthew Brost  *				drm_gpusvm_range_evict(gpusvm, range);
20899624bdfSMatthew Brost  *			goto retry;
20999624bdfSMatthew Brost  *		} else if (err) {
21099624bdfSMatthew Brost  *			goto unlock;
21199624bdfSMatthew Brost  *		}
21299624bdfSMatthew Brost  *
21399624bdfSMatthew Brost  *		err = driver_bind_range(gpusvm, range);
21499624bdfSMatthew Brost  *		if (err == -EAGAIN)	// CPU mappings changed
21599624bdfSMatthew Brost  *			goto retry
21699624bdfSMatthew Brost  *
21799624bdfSMatthew Brost  *	unlock:
21899624bdfSMatthew Brost  *		driver_svm_unlock();
21999624bdfSMatthew Brost  *		return err;
22099624bdfSMatthew Brost  *	}
22199624bdfSMatthew Brost  *
222fd6c10e6SLucas De Marchi  * 2) Garbage Collector
223fd6c10e6SLucas De Marchi  *
224fd6c10e6SLucas De Marchi  * .. code-block:: c
22599624bdfSMatthew Brost  *
22699624bdfSMatthew Brost  *	void __driver_garbage_collector(struct drm_gpusvm *gpusvm,
22799624bdfSMatthew Brost  *					struct drm_gpusvm_range *range)
22899624bdfSMatthew Brost  *	{
22999624bdfSMatthew Brost  *		assert_driver_svm_locked(gpusvm);
23099624bdfSMatthew Brost  *
23199624bdfSMatthew Brost  *		// Partial unmap, migrate any remaining device memory pages back to RAM
23299624bdfSMatthew Brost  *		if (range->flags.partial_unmap)
23399624bdfSMatthew Brost  *			drm_gpusvm_range_evict(gpusvm, range);
23499624bdfSMatthew Brost  *
23599624bdfSMatthew Brost  *		driver_unbind_range(range);
23699624bdfSMatthew Brost  *		drm_gpusvm_range_remove(gpusvm, range);
23799624bdfSMatthew Brost  *	}
23899624bdfSMatthew Brost  *
23999624bdfSMatthew Brost  *	void driver_garbage_collector(struct drm_gpusvm *gpusvm)
24099624bdfSMatthew Brost  *	{
24199624bdfSMatthew Brost  *		assert_driver_svm_locked(gpusvm);
24299624bdfSMatthew Brost  *
24399624bdfSMatthew Brost  *		for_each_range_in_garbage_collector(gpusvm, range)
24499624bdfSMatthew Brost  *			__driver_garbage_collector(gpusvm, range);
24599624bdfSMatthew Brost  *	}
24699624bdfSMatthew Brost  *
247fd6c10e6SLucas De Marchi  * 3) Notifier callback
248fd6c10e6SLucas De Marchi  *
249fd6c10e6SLucas De Marchi  * .. code-block:: c
25099624bdfSMatthew Brost  *
25199624bdfSMatthew Brost  *	void driver_invalidation(struct drm_gpusvm *gpusvm,
25299624bdfSMatthew Brost  *				 struct drm_gpusvm_notifier *notifier,
25399624bdfSMatthew Brost  *				 const struct mmu_notifier_range *mmu_range)
25499624bdfSMatthew Brost  *	{
25599624bdfSMatthew Brost  *		struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
25699624bdfSMatthew Brost  *		struct drm_gpusvm_range *range = NULL;
25799624bdfSMatthew Brost  *
25899624bdfSMatthew Brost  *		driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end);
25999624bdfSMatthew Brost  *
26099624bdfSMatthew Brost  *		drm_gpusvm_for_each_range(range, notifier, mmu_range->start,
26199624bdfSMatthew Brost  *					  mmu_range->end) {
26299624bdfSMatthew Brost  *			drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx);
26399624bdfSMatthew Brost  *
26499624bdfSMatthew Brost  *			if (mmu_range->event != MMU_NOTIFY_UNMAP)
26599624bdfSMatthew Brost  *				continue;
26699624bdfSMatthew Brost  *
26799624bdfSMatthew Brost  *			drm_gpusvm_range_set_unmapped(range, mmu_range);
26899624bdfSMatthew Brost  *			driver_garbage_collector_add(gpusvm, range);
26999624bdfSMatthew Brost  *		}
27099624bdfSMatthew Brost  *	}
27199624bdfSMatthew Brost  */
27299624bdfSMatthew Brost 
27399624bdfSMatthew Brost /**
27499624bdfSMatthew Brost  * npages_in_range() - Calculate the number of pages in a given range
27599624bdfSMatthew Brost  * @start: The start address of the range
27699624bdfSMatthew Brost  * @end: The end address of the range
27799624bdfSMatthew Brost  *
27899624bdfSMatthew Brost  * This macro calculates the number of pages in a given memory range,
27999624bdfSMatthew Brost  * specified by the start and end addresses. It divides the difference
28099624bdfSMatthew Brost  * between the end and start addresses by the page size (PAGE_SIZE) to
28199624bdfSMatthew Brost  * determine the number of pages in the range.
28299624bdfSMatthew Brost  *
28399624bdfSMatthew Brost  * Return: The number of pages in the specified range.
28499624bdfSMatthew Brost  */
28599624bdfSMatthew Brost static unsigned long
npages_in_range(unsigned long start,unsigned long end)28699624bdfSMatthew Brost npages_in_range(unsigned long start, unsigned long end)
28799624bdfSMatthew Brost {
28899624bdfSMatthew Brost 	return (end - start) >> PAGE_SHIFT;
28999624bdfSMatthew Brost }
29099624bdfSMatthew Brost 
29199624bdfSMatthew Brost /**
29299624bdfSMatthew Brost  * struct drm_gpusvm_zdd - GPU SVM zone device data
29399624bdfSMatthew Brost  *
29499624bdfSMatthew Brost  * @refcount: Reference count for the zdd
29599624bdfSMatthew Brost  * @devmem_allocation: device memory allocation
29699624bdfSMatthew Brost  * @device_private_page_owner: Device private pages owner
29799624bdfSMatthew Brost  *
29899624bdfSMatthew Brost  * This structure serves as a generic wrapper installed in
29999624bdfSMatthew Brost  * page->zone_device_data. It provides infrastructure for looking up a device
30099624bdfSMatthew Brost  * memory allocation upon CPU page fault and asynchronously releasing device
30199624bdfSMatthew Brost  * memory once the CPU has no page references. Asynchronous release is useful
30299624bdfSMatthew Brost  * because CPU page references can be dropped in IRQ contexts, while releasing
30399624bdfSMatthew Brost  * device memory likely requires sleeping locks.
30499624bdfSMatthew Brost  */
30599624bdfSMatthew Brost struct drm_gpusvm_zdd {
30699624bdfSMatthew Brost 	struct kref refcount;
30799624bdfSMatthew Brost 	struct drm_gpusvm_devmem *devmem_allocation;
30899624bdfSMatthew Brost 	void *device_private_page_owner;
30999624bdfSMatthew Brost };
31099624bdfSMatthew Brost 
31199624bdfSMatthew Brost /**
31299624bdfSMatthew Brost  * drm_gpusvm_zdd_alloc() - Allocate a zdd structure.
31399624bdfSMatthew Brost  * @device_private_page_owner: Device private pages owner
31499624bdfSMatthew Brost  *
31599624bdfSMatthew Brost  * This function allocates and initializes a new zdd structure. It sets up the
31699624bdfSMatthew Brost  * reference count and initializes the destroy work.
31799624bdfSMatthew Brost  *
31899624bdfSMatthew Brost  * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure.
31999624bdfSMatthew Brost  */
32099624bdfSMatthew Brost static struct drm_gpusvm_zdd *
drm_gpusvm_zdd_alloc(void * device_private_page_owner)32199624bdfSMatthew Brost drm_gpusvm_zdd_alloc(void *device_private_page_owner)
32299624bdfSMatthew Brost {
32399624bdfSMatthew Brost 	struct drm_gpusvm_zdd *zdd;
32499624bdfSMatthew Brost 
32599624bdfSMatthew Brost 	zdd = kmalloc(sizeof(*zdd), GFP_KERNEL);
32699624bdfSMatthew Brost 	if (!zdd)
32799624bdfSMatthew Brost 		return NULL;
32899624bdfSMatthew Brost 
32999624bdfSMatthew Brost 	kref_init(&zdd->refcount);
33099624bdfSMatthew Brost 	zdd->devmem_allocation = NULL;
33199624bdfSMatthew Brost 	zdd->device_private_page_owner = device_private_page_owner;
33299624bdfSMatthew Brost 
33399624bdfSMatthew Brost 	return zdd;
33499624bdfSMatthew Brost }
33599624bdfSMatthew Brost 
33699624bdfSMatthew Brost /**
33799624bdfSMatthew Brost  * drm_gpusvm_zdd_get() - Get a reference to a zdd structure.
33899624bdfSMatthew Brost  * @zdd: Pointer to the zdd structure.
33999624bdfSMatthew Brost  *
34099624bdfSMatthew Brost  * This function increments the reference count of the provided zdd structure.
34199624bdfSMatthew Brost  *
34299624bdfSMatthew Brost  * Return: Pointer to the zdd structure.
34399624bdfSMatthew Brost  */
drm_gpusvm_zdd_get(struct drm_gpusvm_zdd * zdd)34499624bdfSMatthew Brost static struct drm_gpusvm_zdd *drm_gpusvm_zdd_get(struct drm_gpusvm_zdd *zdd)
34599624bdfSMatthew Brost {
34699624bdfSMatthew Brost 	kref_get(&zdd->refcount);
34799624bdfSMatthew Brost 	return zdd;
34899624bdfSMatthew Brost }
34999624bdfSMatthew Brost 
35099624bdfSMatthew Brost /**
35199624bdfSMatthew Brost  * drm_gpusvm_zdd_destroy() - Destroy a zdd structure.
35299624bdfSMatthew Brost  * @ref: Pointer to the reference count structure.
35399624bdfSMatthew Brost  *
35499624bdfSMatthew Brost  * This function queues the destroy_work of the zdd for asynchronous destruction.
35599624bdfSMatthew Brost  */
drm_gpusvm_zdd_destroy(struct kref * ref)35699624bdfSMatthew Brost static void drm_gpusvm_zdd_destroy(struct kref *ref)
35799624bdfSMatthew Brost {
35899624bdfSMatthew Brost 	struct drm_gpusvm_zdd *zdd =
35999624bdfSMatthew Brost 		container_of(ref, struct drm_gpusvm_zdd, refcount);
36099624bdfSMatthew Brost 	struct drm_gpusvm_devmem *devmem = zdd->devmem_allocation;
36199624bdfSMatthew Brost 
36299624bdfSMatthew Brost 	if (devmem) {
36399624bdfSMatthew Brost 		complete_all(&devmem->detached);
36499624bdfSMatthew Brost 		if (devmem->ops->devmem_release)
36599624bdfSMatthew Brost 			devmem->ops->devmem_release(devmem);
36699624bdfSMatthew Brost 	}
36799624bdfSMatthew Brost 	kfree(zdd);
36899624bdfSMatthew Brost }
36999624bdfSMatthew Brost 
37099624bdfSMatthew Brost /**
37199624bdfSMatthew Brost  * drm_gpusvm_zdd_put() - Put a zdd reference.
37299624bdfSMatthew Brost  * @zdd: Pointer to the zdd structure.
37399624bdfSMatthew Brost  *
37499624bdfSMatthew Brost  * This function decrements the reference count of the provided zdd structure
37599624bdfSMatthew Brost  * and schedules its destruction if the count drops to zero.
37699624bdfSMatthew Brost  */
drm_gpusvm_zdd_put(struct drm_gpusvm_zdd * zdd)37799624bdfSMatthew Brost static void drm_gpusvm_zdd_put(struct drm_gpusvm_zdd *zdd)
37899624bdfSMatthew Brost {
37999624bdfSMatthew Brost 	kref_put(&zdd->refcount, drm_gpusvm_zdd_destroy);
38099624bdfSMatthew Brost }
38199624bdfSMatthew Brost 
38299624bdfSMatthew Brost /**
38399624bdfSMatthew Brost  * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier
38499624bdfSMatthew Brost  * @notifier: Pointer to the GPU SVM notifier structure.
38599624bdfSMatthew Brost  * @start: Start address of the range
38699624bdfSMatthew Brost  * @end: End address of the range
38799624bdfSMatthew Brost  *
38899624bdfSMatthew Brost  * Return: A pointer to the drm_gpusvm_range if found or NULL
38999624bdfSMatthew Brost  */
39099624bdfSMatthew Brost struct drm_gpusvm_range *
drm_gpusvm_range_find(struct drm_gpusvm_notifier * notifier,unsigned long start,unsigned long end)39199624bdfSMatthew Brost drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start,
39299624bdfSMatthew Brost 		      unsigned long end)
39399624bdfSMatthew Brost {
39499624bdfSMatthew Brost 	struct interval_tree_node *itree;
39599624bdfSMatthew Brost 
39699624bdfSMatthew Brost 	itree = interval_tree_iter_first(&notifier->root, start, end - 1);
39799624bdfSMatthew Brost 
39899624bdfSMatthew Brost 	if (itree)
39999624bdfSMatthew Brost 		return container_of(itree, struct drm_gpusvm_range, itree);
40099624bdfSMatthew Brost 	else
40199624bdfSMatthew Brost 		return NULL;
40299624bdfSMatthew Brost }
40399624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_range_find);
40499624bdfSMatthew Brost 
40599624bdfSMatthew Brost /**
40699624bdfSMatthew Brost  * drm_gpusvm_for_each_range_safe() - Safely iterate over GPU SVM ranges in a notifier
40799624bdfSMatthew Brost  * @range__: Iterator variable for the ranges
40899624bdfSMatthew Brost  * @next__: Iterator variable for the ranges temporay storage
40999624bdfSMatthew Brost  * @notifier__: Pointer to the GPU SVM notifier
41099624bdfSMatthew Brost  * @start__: Start address of the range
41199624bdfSMatthew Brost  * @end__: End address of the range
41299624bdfSMatthew Brost  *
41399624bdfSMatthew Brost  * This macro is used to iterate over GPU SVM ranges in a notifier while
41499624bdfSMatthew Brost  * removing ranges from it.
41599624bdfSMatthew Brost  */
41699624bdfSMatthew Brost #define drm_gpusvm_for_each_range_safe(range__, next__, notifier__, start__, end__)	\
41799624bdfSMatthew Brost 	for ((range__) = drm_gpusvm_range_find((notifier__), (start__), (end__)),	\
41899624bdfSMatthew Brost 	     (next__) = __drm_gpusvm_range_next(range__);				\
41999624bdfSMatthew Brost 	     (range__) && (drm_gpusvm_range_start(range__) < (end__));			\
42099624bdfSMatthew Brost 	     (range__) = (next__), (next__) = __drm_gpusvm_range_next(range__))
42199624bdfSMatthew Brost 
42299624bdfSMatthew Brost /**
42399624bdfSMatthew Brost  * __drm_gpusvm_notifier_next() - get the next drm_gpusvm_notifier in the list
42499624bdfSMatthew Brost  * @notifier: a pointer to the current drm_gpusvm_notifier
42599624bdfSMatthew Brost  *
42699624bdfSMatthew Brost  * Return: A pointer to the next drm_gpusvm_notifier if available, or NULL if
42799624bdfSMatthew Brost  *         the current notifier is the last one or if the input notifier is
42899624bdfSMatthew Brost  *         NULL.
42999624bdfSMatthew Brost  */
43099624bdfSMatthew Brost static struct drm_gpusvm_notifier *
__drm_gpusvm_notifier_next(struct drm_gpusvm_notifier * notifier)43199624bdfSMatthew Brost __drm_gpusvm_notifier_next(struct drm_gpusvm_notifier *notifier)
43299624bdfSMatthew Brost {
43399624bdfSMatthew Brost 	if (notifier && !list_is_last(&notifier->entry,
43499624bdfSMatthew Brost 				      &notifier->gpusvm->notifier_list))
43599624bdfSMatthew Brost 		return list_next_entry(notifier, entry);
43699624bdfSMatthew Brost 
43799624bdfSMatthew Brost 	return NULL;
43899624bdfSMatthew Brost }
43999624bdfSMatthew Brost 
44099624bdfSMatthew Brost static struct drm_gpusvm_notifier *
notifier_iter_first(struct rb_root_cached * root,unsigned long start,unsigned long last)44199624bdfSMatthew Brost notifier_iter_first(struct rb_root_cached *root, unsigned long start,
44299624bdfSMatthew Brost 		    unsigned long last)
44399624bdfSMatthew Brost {
44499624bdfSMatthew Brost 	struct interval_tree_node *itree;
44599624bdfSMatthew Brost 
44699624bdfSMatthew Brost 	itree = interval_tree_iter_first(root, start, last);
44799624bdfSMatthew Brost 
44899624bdfSMatthew Brost 	if (itree)
44999624bdfSMatthew Brost 		return container_of(itree, struct drm_gpusvm_notifier, itree);
45099624bdfSMatthew Brost 	else
45199624bdfSMatthew Brost 		return NULL;
45299624bdfSMatthew Brost }
45399624bdfSMatthew Brost 
45499624bdfSMatthew Brost /**
45599624bdfSMatthew Brost  * drm_gpusvm_for_each_notifier() - Iterate over GPU SVM notifiers in a gpusvm
45699624bdfSMatthew Brost  * @notifier__: Iterator variable for the notifiers
45799624bdfSMatthew Brost  * @notifier__: Pointer to the GPU SVM notifier
45899624bdfSMatthew Brost  * @start__: Start address of the notifier
45999624bdfSMatthew Brost  * @end__: End address of the notifier
46099624bdfSMatthew Brost  *
46199624bdfSMatthew Brost  * This macro is used to iterate over GPU SVM notifiers in a gpusvm.
46299624bdfSMatthew Brost  */
46399624bdfSMatthew Brost #define drm_gpusvm_for_each_notifier(notifier__, gpusvm__, start__, end__)		\
46499624bdfSMatthew Brost 	for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1);	\
46599624bdfSMatthew Brost 	     (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__));		\
46699624bdfSMatthew Brost 	     (notifier__) = __drm_gpusvm_notifier_next(notifier__))
46799624bdfSMatthew Brost 
46899624bdfSMatthew Brost /**
46999624bdfSMatthew Brost  * drm_gpusvm_for_each_notifier_safe() - Safely iterate over GPU SVM notifiers in a gpusvm
47099624bdfSMatthew Brost  * @notifier__: Iterator variable for the notifiers
47199624bdfSMatthew Brost  * @next__: Iterator variable for the notifiers temporay storage
47299624bdfSMatthew Brost  * @notifier__: Pointer to the GPU SVM notifier
47399624bdfSMatthew Brost  * @start__: Start address of the notifier
47499624bdfSMatthew Brost  * @end__: End address of the notifier
47599624bdfSMatthew Brost  *
47699624bdfSMatthew Brost  * This macro is used to iterate over GPU SVM notifiers in a gpusvm while
47799624bdfSMatthew Brost  * removing notifiers from it.
47899624bdfSMatthew Brost  */
47999624bdfSMatthew Brost #define drm_gpusvm_for_each_notifier_safe(notifier__, next__, gpusvm__, start__, end__)	\
48099624bdfSMatthew Brost 	for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1),	\
48199624bdfSMatthew Brost 	     (next__) = __drm_gpusvm_notifier_next(notifier__);				\
48299624bdfSMatthew Brost 	     (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__));		\
48399624bdfSMatthew Brost 	     (notifier__) = (next__), (next__) = __drm_gpusvm_notifier_next(notifier__))
48499624bdfSMatthew Brost 
48599624bdfSMatthew Brost /**
48699624bdfSMatthew Brost  * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier.
48799624bdfSMatthew Brost  * @mni: Pointer to the mmu_interval_notifier structure.
48899624bdfSMatthew Brost  * @mmu_range: Pointer to the mmu_notifier_range structure.
48999624bdfSMatthew Brost  * @cur_seq: Current sequence number.
49099624bdfSMatthew Brost  *
49199624bdfSMatthew Brost  * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU
49299624bdfSMatthew Brost  * notifier sequence number and calls the driver invalidate vfunc under
49399624bdfSMatthew Brost  * gpusvm->notifier_lock.
49499624bdfSMatthew Brost  *
49599624bdfSMatthew Brost  * Return: true if the operation succeeds, false otherwise.
49699624bdfSMatthew Brost  */
49799624bdfSMatthew Brost static bool
drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * mmu_range,unsigned long cur_seq)49899624bdfSMatthew Brost drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni,
49999624bdfSMatthew Brost 			       const struct mmu_notifier_range *mmu_range,
50099624bdfSMatthew Brost 			       unsigned long cur_seq)
50199624bdfSMatthew Brost {
50299624bdfSMatthew Brost 	struct drm_gpusvm_notifier *notifier =
50399624bdfSMatthew Brost 		container_of(mni, typeof(*notifier), notifier);
50499624bdfSMatthew Brost 	struct drm_gpusvm *gpusvm = notifier->gpusvm;
50599624bdfSMatthew Brost 
50699624bdfSMatthew Brost 	if (!mmu_notifier_range_blockable(mmu_range))
50799624bdfSMatthew Brost 		return false;
50899624bdfSMatthew Brost 
50999624bdfSMatthew Brost 	down_write(&gpusvm->notifier_lock);
51099624bdfSMatthew Brost 	mmu_interval_set_seq(mni, cur_seq);
51199624bdfSMatthew Brost 	gpusvm->ops->invalidate(gpusvm, notifier, mmu_range);
51299624bdfSMatthew Brost 	up_write(&gpusvm->notifier_lock);
51399624bdfSMatthew Brost 
51499624bdfSMatthew Brost 	return true;
51599624bdfSMatthew Brost }
51699624bdfSMatthew Brost 
517fd6c10e6SLucas De Marchi /*
51899624bdfSMatthew Brost  * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM
51999624bdfSMatthew Brost  */
52099624bdfSMatthew Brost static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
52199624bdfSMatthew Brost 	.invalidate = drm_gpusvm_notifier_invalidate,
52299624bdfSMatthew Brost };
52399624bdfSMatthew Brost 
52499624bdfSMatthew Brost /**
52599624bdfSMatthew Brost  * drm_gpusvm_init() - Initialize the GPU SVM.
52699624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure.
52799624bdfSMatthew Brost  * @name: Name of the GPU SVM.
52899624bdfSMatthew Brost  * @drm: Pointer to the DRM device structure.
52999624bdfSMatthew Brost  * @mm: Pointer to the mm_struct for the address space.
53099624bdfSMatthew Brost  * @device_private_page_owner: Device private pages owner.
53199624bdfSMatthew Brost  * @mm_start: Start address of GPU SVM.
53299624bdfSMatthew Brost  * @mm_range: Range of the GPU SVM.
53399624bdfSMatthew Brost  * @notifier_size: Size of individual notifiers.
53499624bdfSMatthew Brost  * @ops: Pointer to the operations structure for GPU SVM.
53599624bdfSMatthew Brost  * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation.
53699624bdfSMatthew Brost  *               Entries should be powers of 2 in descending order with last
53799624bdfSMatthew Brost  *               entry being SZ_4K.
53899624bdfSMatthew Brost  * @num_chunks: Number of chunks.
53999624bdfSMatthew Brost  *
54099624bdfSMatthew Brost  * This function initializes the GPU SVM.
54199624bdfSMatthew Brost  *
54299624bdfSMatthew Brost  * Return: 0 on success, a negative error code on failure.
54399624bdfSMatthew Brost  */
drm_gpusvm_init(struct drm_gpusvm * gpusvm,const char * name,struct drm_device * drm,struct mm_struct * mm,void * device_private_page_owner,unsigned long mm_start,unsigned long mm_range,unsigned long notifier_size,const struct drm_gpusvm_ops * ops,const unsigned long * chunk_sizes,int num_chunks)54499624bdfSMatthew Brost int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
54599624bdfSMatthew Brost 		    const char *name, struct drm_device *drm,
54699624bdfSMatthew Brost 		    struct mm_struct *mm, void *device_private_page_owner,
54799624bdfSMatthew Brost 		    unsigned long mm_start, unsigned long mm_range,
54899624bdfSMatthew Brost 		    unsigned long notifier_size,
54999624bdfSMatthew Brost 		    const struct drm_gpusvm_ops *ops,
55099624bdfSMatthew Brost 		    const unsigned long *chunk_sizes, int num_chunks)
55199624bdfSMatthew Brost {
55299624bdfSMatthew Brost 	if (!ops->invalidate || !num_chunks)
55399624bdfSMatthew Brost 		return -EINVAL;
55499624bdfSMatthew Brost 
55599624bdfSMatthew Brost 	gpusvm->name = name;
55699624bdfSMatthew Brost 	gpusvm->drm = drm;
55799624bdfSMatthew Brost 	gpusvm->mm = mm;
55899624bdfSMatthew Brost 	gpusvm->device_private_page_owner = device_private_page_owner;
55999624bdfSMatthew Brost 	gpusvm->mm_start = mm_start;
56099624bdfSMatthew Brost 	gpusvm->mm_range = mm_range;
56199624bdfSMatthew Brost 	gpusvm->notifier_size = notifier_size;
56299624bdfSMatthew Brost 	gpusvm->ops = ops;
56399624bdfSMatthew Brost 	gpusvm->chunk_sizes = chunk_sizes;
56499624bdfSMatthew Brost 	gpusvm->num_chunks = num_chunks;
56599624bdfSMatthew Brost 
56699624bdfSMatthew Brost 	mmgrab(mm);
56799624bdfSMatthew Brost 	gpusvm->root = RB_ROOT_CACHED;
56899624bdfSMatthew Brost 	INIT_LIST_HEAD(&gpusvm->notifier_list);
56999624bdfSMatthew Brost 
57099624bdfSMatthew Brost 	init_rwsem(&gpusvm->notifier_lock);
57199624bdfSMatthew Brost 
57299624bdfSMatthew Brost 	fs_reclaim_acquire(GFP_KERNEL);
57399624bdfSMatthew Brost 	might_lock(&gpusvm->notifier_lock);
57499624bdfSMatthew Brost 	fs_reclaim_release(GFP_KERNEL);
57599624bdfSMatthew Brost 
57699624bdfSMatthew Brost #ifdef CONFIG_LOCKDEP
57799624bdfSMatthew Brost 	gpusvm->lock_dep_map = NULL;
57899624bdfSMatthew Brost #endif
57999624bdfSMatthew Brost 
58099624bdfSMatthew Brost 	return 0;
58199624bdfSMatthew Brost }
58299624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_init);
58399624bdfSMatthew Brost 
58499624bdfSMatthew Brost /**
58599624bdfSMatthew Brost  * drm_gpusvm_notifier_find() - Find GPU SVM notifier
58699624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
58799624bdfSMatthew Brost  * @fault_addr: Fault address
58899624bdfSMatthew Brost  *
58999624bdfSMatthew Brost  * This function finds the GPU SVM notifier associated with the fault address.
59099624bdfSMatthew Brost  *
59199624bdfSMatthew Brost  * Return: Pointer to the GPU SVM notifier on success, NULL otherwise.
59299624bdfSMatthew Brost  */
59399624bdfSMatthew Brost static struct drm_gpusvm_notifier *
drm_gpusvm_notifier_find(struct drm_gpusvm * gpusvm,unsigned long fault_addr)59499624bdfSMatthew Brost drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm,
59599624bdfSMatthew Brost 			 unsigned long fault_addr)
59699624bdfSMatthew Brost {
59799624bdfSMatthew Brost 	return notifier_iter_first(&gpusvm->root, fault_addr, fault_addr + 1);
59899624bdfSMatthew Brost }
59999624bdfSMatthew Brost 
60099624bdfSMatthew Brost /**
60199624bdfSMatthew Brost  * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node
60299624bdfSMatthew Brost  * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct
60399624bdfSMatthew Brost  *
60499624bdfSMatthew Brost  * Return: A pointer to the containing drm_gpusvm_notifier structure.
60599624bdfSMatthew Brost  */
to_drm_gpusvm_notifier(struct rb_node * node)60699624bdfSMatthew Brost static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node)
60799624bdfSMatthew Brost {
60899624bdfSMatthew Brost 	return container_of(node, struct drm_gpusvm_notifier, itree.rb);
60999624bdfSMatthew Brost }
61099624bdfSMatthew Brost 
61199624bdfSMatthew Brost /**
61299624bdfSMatthew Brost  * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier
61399624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
61499624bdfSMatthew Brost  * @notifier: Pointer to the GPU SVM notifier structure
61599624bdfSMatthew Brost  *
61699624bdfSMatthew Brost  * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list.
61799624bdfSMatthew Brost  */
drm_gpusvm_notifier_insert(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)61899624bdfSMatthew Brost static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm,
61999624bdfSMatthew Brost 				       struct drm_gpusvm_notifier *notifier)
62099624bdfSMatthew Brost {
62199624bdfSMatthew Brost 	struct rb_node *node;
62299624bdfSMatthew Brost 	struct list_head *head;
62399624bdfSMatthew Brost 
62499624bdfSMatthew Brost 	interval_tree_insert(&notifier->itree, &gpusvm->root);
62599624bdfSMatthew Brost 
62699624bdfSMatthew Brost 	node = rb_prev(&notifier->itree.rb);
62799624bdfSMatthew Brost 	if (node)
62899624bdfSMatthew Brost 		head = &(to_drm_gpusvm_notifier(node))->entry;
62999624bdfSMatthew Brost 	else
63099624bdfSMatthew Brost 		head = &gpusvm->notifier_list;
63199624bdfSMatthew Brost 
63299624bdfSMatthew Brost 	list_add(&notifier->entry, head);
63399624bdfSMatthew Brost }
63499624bdfSMatthew Brost 
63599624bdfSMatthew Brost /**
63699624bdfSMatthew Brost  * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier
63799624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM tructure
63899624bdfSMatthew Brost  * @notifier: Pointer to the GPU SVM notifier structure
63999624bdfSMatthew Brost  *
64099624bdfSMatthew Brost  * This function removes the GPU SVM notifier from the GPU SVM RB tree and list.
64199624bdfSMatthew Brost  */
drm_gpusvm_notifier_remove(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)64299624bdfSMatthew Brost static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm,
64399624bdfSMatthew Brost 				       struct drm_gpusvm_notifier *notifier)
64499624bdfSMatthew Brost {
64599624bdfSMatthew Brost 	interval_tree_remove(&notifier->itree, &gpusvm->root);
64699624bdfSMatthew Brost 	list_del(&notifier->entry);
64799624bdfSMatthew Brost }
64899624bdfSMatthew Brost 
64999624bdfSMatthew Brost /**
65099624bdfSMatthew Brost  * drm_gpusvm_fini() - Finalize the GPU SVM.
65199624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure.
65299624bdfSMatthew Brost  *
65399624bdfSMatthew Brost  * This function finalizes the GPU SVM by cleaning up any remaining ranges and
65499624bdfSMatthew Brost  * notifiers, and dropping a reference to struct MM.
65599624bdfSMatthew Brost  */
drm_gpusvm_fini(struct drm_gpusvm * gpusvm)65699624bdfSMatthew Brost void drm_gpusvm_fini(struct drm_gpusvm *gpusvm)
65799624bdfSMatthew Brost {
65899624bdfSMatthew Brost 	struct drm_gpusvm_notifier *notifier, *next;
65999624bdfSMatthew Brost 
66099624bdfSMatthew Brost 	drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) {
66199624bdfSMatthew Brost 		struct drm_gpusvm_range *range, *__next;
66299624bdfSMatthew Brost 
66399624bdfSMatthew Brost 		/*
66499624bdfSMatthew Brost 		 * Remove notifier first to avoid racing with any invalidation
66599624bdfSMatthew Brost 		 */
66699624bdfSMatthew Brost 		mmu_interval_notifier_remove(&notifier->notifier);
66799624bdfSMatthew Brost 		notifier->flags.removed = true;
66899624bdfSMatthew Brost 
66999624bdfSMatthew Brost 		drm_gpusvm_for_each_range_safe(range, __next, notifier, 0,
67099624bdfSMatthew Brost 					       LONG_MAX)
67199624bdfSMatthew Brost 			drm_gpusvm_range_remove(gpusvm, range);
67299624bdfSMatthew Brost 	}
67399624bdfSMatthew Brost 
67499624bdfSMatthew Brost 	mmdrop(gpusvm->mm);
67599624bdfSMatthew Brost 	WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root));
67699624bdfSMatthew Brost }
67799624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_fini);
67899624bdfSMatthew Brost 
67999624bdfSMatthew Brost /**
68099624bdfSMatthew Brost  * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier
68199624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
68299624bdfSMatthew Brost  * @fault_addr: Fault address
68399624bdfSMatthew Brost  *
68499624bdfSMatthew Brost  * This function allocates and initializes the GPU SVM notifier structure.
68599624bdfSMatthew Brost  *
68699624bdfSMatthew Brost  * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure.
68799624bdfSMatthew Brost  */
68899624bdfSMatthew Brost static struct drm_gpusvm_notifier *
drm_gpusvm_notifier_alloc(struct drm_gpusvm * gpusvm,unsigned long fault_addr)68999624bdfSMatthew Brost drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr)
69099624bdfSMatthew Brost {
69199624bdfSMatthew Brost 	struct drm_gpusvm_notifier *notifier;
69299624bdfSMatthew Brost 
69399624bdfSMatthew Brost 	if (gpusvm->ops->notifier_alloc)
69499624bdfSMatthew Brost 		notifier = gpusvm->ops->notifier_alloc();
69599624bdfSMatthew Brost 	else
69699624bdfSMatthew Brost 		notifier = kzalloc(sizeof(*notifier), GFP_KERNEL);
69799624bdfSMatthew Brost 
69899624bdfSMatthew Brost 	if (!notifier)
69999624bdfSMatthew Brost 		return ERR_PTR(-ENOMEM);
70099624bdfSMatthew Brost 
70199624bdfSMatthew Brost 	notifier->gpusvm = gpusvm;
70299624bdfSMatthew Brost 	notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size);
70399624bdfSMatthew Brost 	notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1;
70499624bdfSMatthew Brost 	INIT_LIST_HEAD(&notifier->entry);
70599624bdfSMatthew Brost 	notifier->root = RB_ROOT_CACHED;
70699624bdfSMatthew Brost 	INIT_LIST_HEAD(&notifier->range_list);
70799624bdfSMatthew Brost 
70899624bdfSMatthew Brost 	return notifier;
70999624bdfSMatthew Brost }
71099624bdfSMatthew Brost 
71199624bdfSMatthew Brost /**
71299624bdfSMatthew Brost  * drm_gpusvm_notifier_free() - Free GPU SVM notifier
71399624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
71499624bdfSMatthew Brost  * @notifier: Pointer to the GPU SVM notifier structure
71599624bdfSMatthew Brost  *
71699624bdfSMatthew Brost  * This function frees the GPU SVM notifier structure.
71799624bdfSMatthew Brost  */
drm_gpusvm_notifier_free(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)71899624bdfSMatthew Brost static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm,
71999624bdfSMatthew Brost 				     struct drm_gpusvm_notifier *notifier)
72099624bdfSMatthew Brost {
72199624bdfSMatthew Brost 	WARN_ON(!RB_EMPTY_ROOT(&notifier->root.rb_root));
72299624bdfSMatthew Brost 
72399624bdfSMatthew Brost 	if (gpusvm->ops->notifier_free)
72499624bdfSMatthew Brost 		gpusvm->ops->notifier_free(notifier);
72599624bdfSMatthew Brost 	else
72699624bdfSMatthew Brost 		kfree(notifier);
72799624bdfSMatthew Brost }
72899624bdfSMatthew Brost 
72999624bdfSMatthew Brost /**
73099624bdfSMatthew Brost  * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node
73199624bdfSMatthew Brost  * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct
73299624bdfSMatthew Brost  *
73399624bdfSMatthew Brost  * Return: A pointer to the containing drm_gpusvm_range structure.
73499624bdfSMatthew Brost  */
to_drm_gpusvm_range(struct rb_node * node)73599624bdfSMatthew Brost static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node)
73699624bdfSMatthew Brost {
73799624bdfSMatthew Brost 	return container_of(node, struct drm_gpusvm_range, itree.rb);
73899624bdfSMatthew Brost }
73999624bdfSMatthew Brost 
74099624bdfSMatthew Brost /**
74199624bdfSMatthew Brost  * drm_gpusvm_range_insert() - Insert GPU SVM range
74299624bdfSMatthew Brost  * @notifier: Pointer to the GPU SVM notifier structure
74399624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range structure
74499624bdfSMatthew Brost  *
74599624bdfSMatthew Brost  * This function inserts the GPU SVM range into the notifier RB tree and list.
74699624bdfSMatthew Brost  */
drm_gpusvm_range_insert(struct drm_gpusvm_notifier * notifier,struct drm_gpusvm_range * range)74799624bdfSMatthew Brost static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier,
74899624bdfSMatthew Brost 				    struct drm_gpusvm_range *range)
74999624bdfSMatthew Brost {
75099624bdfSMatthew Brost 	struct rb_node *node;
75199624bdfSMatthew Brost 	struct list_head *head;
75299624bdfSMatthew Brost 
75399624bdfSMatthew Brost 	drm_gpusvm_notifier_lock(notifier->gpusvm);
75499624bdfSMatthew Brost 	interval_tree_insert(&range->itree, &notifier->root);
75599624bdfSMatthew Brost 
75699624bdfSMatthew Brost 	node = rb_prev(&range->itree.rb);
75799624bdfSMatthew Brost 	if (node)
75899624bdfSMatthew Brost 		head = &(to_drm_gpusvm_range(node))->entry;
75999624bdfSMatthew Brost 	else
76099624bdfSMatthew Brost 		head = &notifier->range_list;
76199624bdfSMatthew Brost 
76299624bdfSMatthew Brost 	list_add(&range->entry, head);
76399624bdfSMatthew Brost 	drm_gpusvm_notifier_unlock(notifier->gpusvm);
76499624bdfSMatthew Brost }
76599624bdfSMatthew Brost 
76699624bdfSMatthew Brost /**
76799624bdfSMatthew Brost  * __drm_gpusvm_range_remove() - Remove GPU SVM range
76899624bdfSMatthew Brost  * @notifier: Pointer to the GPU SVM notifier structure
76999624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range structure
77099624bdfSMatthew Brost  *
77199624bdfSMatthew Brost  * This macro removes the GPU SVM range from the notifier RB tree and list.
77299624bdfSMatthew Brost  */
__drm_gpusvm_range_remove(struct drm_gpusvm_notifier * notifier,struct drm_gpusvm_range * range)77399624bdfSMatthew Brost static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier,
77499624bdfSMatthew Brost 				      struct drm_gpusvm_range *range)
77599624bdfSMatthew Brost {
77699624bdfSMatthew Brost 	interval_tree_remove(&range->itree, &notifier->root);
77799624bdfSMatthew Brost 	list_del(&range->entry);
77899624bdfSMatthew Brost }
77999624bdfSMatthew Brost 
78099624bdfSMatthew Brost /**
78199624bdfSMatthew Brost  * drm_gpusvm_range_alloc() - Allocate GPU SVM range
78299624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
78399624bdfSMatthew Brost  * @notifier: Pointer to the GPU SVM notifier structure
78499624bdfSMatthew Brost  * @fault_addr: Fault address
78599624bdfSMatthew Brost  * @chunk_size: Chunk size
78699624bdfSMatthew Brost  * @migrate_devmem: Flag indicating whether to migrate device memory
78799624bdfSMatthew Brost  *
78899624bdfSMatthew Brost  * This function allocates and initializes the GPU SVM range structure.
78999624bdfSMatthew Brost  *
79099624bdfSMatthew Brost  * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure.
79199624bdfSMatthew Brost  */
79299624bdfSMatthew Brost static struct drm_gpusvm_range *
drm_gpusvm_range_alloc(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,unsigned long fault_addr,unsigned long chunk_size,bool migrate_devmem)79399624bdfSMatthew Brost drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm,
79499624bdfSMatthew Brost 		       struct drm_gpusvm_notifier *notifier,
79599624bdfSMatthew Brost 		       unsigned long fault_addr, unsigned long chunk_size,
79699624bdfSMatthew Brost 		       bool migrate_devmem)
79799624bdfSMatthew Brost {
79899624bdfSMatthew Brost 	struct drm_gpusvm_range *range;
79999624bdfSMatthew Brost 
80099624bdfSMatthew Brost 	if (gpusvm->ops->range_alloc)
80199624bdfSMatthew Brost 		range = gpusvm->ops->range_alloc(gpusvm);
80299624bdfSMatthew Brost 	else
80399624bdfSMatthew Brost 		range = kzalloc(sizeof(*range), GFP_KERNEL);
80499624bdfSMatthew Brost 
80599624bdfSMatthew Brost 	if (!range)
80699624bdfSMatthew Brost 		return ERR_PTR(-ENOMEM);
80799624bdfSMatthew Brost 
80899624bdfSMatthew Brost 	kref_init(&range->refcount);
80999624bdfSMatthew Brost 	range->gpusvm = gpusvm;
81099624bdfSMatthew Brost 	range->notifier = notifier;
81199624bdfSMatthew Brost 	range->itree.start = ALIGN_DOWN(fault_addr, chunk_size);
81299624bdfSMatthew Brost 	range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1;
81399624bdfSMatthew Brost 	INIT_LIST_HEAD(&range->entry);
81499624bdfSMatthew Brost 	range->notifier_seq = LONG_MAX;
81599624bdfSMatthew Brost 	range->flags.migrate_devmem = migrate_devmem ? 1 : 0;
81699624bdfSMatthew Brost 
81799624bdfSMatthew Brost 	return range;
81899624bdfSMatthew Brost }
81999624bdfSMatthew Brost 
82099624bdfSMatthew Brost /**
82199624bdfSMatthew Brost  * drm_gpusvm_check_pages() - Check pages
82299624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
82399624bdfSMatthew Brost  * @notifier: Pointer to the GPU SVM notifier structure
82499624bdfSMatthew Brost  * @start: Start address
82599624bdfSMatthew Brost  * @end: End address
82699624bdfSMatthew Brost  *
82799624bdfSMatthew Brost  * Check if pages between start and end have been faulted in on the CPU. Use to
82899624bdfSMatthew Brost  * prevent migration of pages without CPU backing store.
82999624bdfSMatthew Brost  *
83099624bdfSMatthew Brost  * Return: True if pages have been faulted into CPU, False otherwise
83199624bdfSMatthew Brost  */
drm_gpusvm_check_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,unsigned long start,unsigned long end)83299624bdfSMatthew Brost static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm,
83399624bdfSMatthew Brost 				   struct drm_gpusvm_notifier *notifier,
83499624bdfSMatthew Brost 				   unsigned long start, unsigned long end)
83599624bdfSMatthew Brost {
83699624bdfSMatthew Brost 	struct hmm_range hmm_range = {
83799624bdfSMatthew Brost 		.default_flags = 0,
83899624bdfSMatthew Brost 		.notifier = &notifier->notifier,
83999624bdfSMatthew Brost 		.start = start,
84099624bdfSMatthew Brost 		.end = end,
84199624bdfSMatthew Brost 		.dev_private_owner = gpusvm->device_private_page_owner,
84299624bdfSMatthew Brost 	};
84399624bdfSMatthew Brost 	unsigned long timeout =
84499624bdfSMatthew Brost 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
84599624bdfSMatthew Brost 	unsigned long *pfns;
84699624bdfSMatthew Brost 	unsigned long npages = npages_in_range(start, end);
84799624bdfSMatthew Brost 	int err, i;
84899624bdfSMatthew Brost 
84999624bdfSMatthew Brost 	mmap_assert_locked(gpusvm->mm);
85099624bdfSMatthew Brost 
85199624bdfSMatthew Brost 	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
85299624bdfSMatthew Brost 	if (!pfns)
85399624bdfSMatthew Brost 		return false;
85499624bdfSMatthew Brost 
85599624bdfSMatthew Brost 	hmm_range.notifier_seq = mmu_interval_read_begin(&notifier->notifier);
85699624bdfSMatthew Brost 	hmm_range.hmm_pfns = pfns;
85799624bdfSMatthew Brost 
85899624bdfSMatthew Brost 	while (true) {
85999624bdfSMatthew Brost 		err = hmm_range_fault(&hmm_range);
86099624bdfSMatthew Brost 		if (err == -EBUSY) {
86199624bdfSMatthew Brost 			if (time_after(jiffies, timeout))
86299624bdfSMatthew Brost 				break;
86399624bdfSMatthew Brost 
86499624bdfSMatthew Brost 			hmm_range.notifier_seq =
86599624bdfSMatthew Brost 				mmu_interval_read_begin(&notifier->notifier);
86699624bdfSMatthew Brost 			continue;
86799624bdfSMatthew Brost 		}
86899624bdfSMatthew Brost 		break;
86999624bdfSMatthew Brost 	}
87099624bdfSMatthew Brost 	if (err)
87199624bdfSMatthew Brost 		goto err_free;
87299624bdfSMatthew Brost 
87399624bdfSMatthew Brost 	for (i = 0; i < npages;) {
87499624bdfSMatthew Brost 		if (!(pfns[i] & HMM_PFN_VALID)) {
87599624bdfSMatthew Brost 			err = -EFAULT;
87699624bdfSMatthew Brost 			goto err_free;
87799624bdfSMatthew Brost 		}
87899624bdfSMatthew Brost 		i += 0x1 << hmm_pfn_to_map_order(pfns[i]);
87999624bdfSMatthew Brost 	}
88099624bdfSMatthew Brost 
88199624bdfSMatthew Brost err_free:
88299624bdfSMatthew Brost 	kvfree(pfns);
88399624bdfSMatthew Brost 	return err ? false : true;
88499624bdfSMatthew Brost }
88599624bdfSMatthew Brost 
88699624bdfSMatthew Brost /**
88799624bdfSMatthew Brost  * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range
88899624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
88999624bdfSMatthew Brost  * @notifier: Pointer to the GPU SVM notifier structure
89099624bdfSMatthew Brost  * @vas: Pointer to the virtual memory area structure
89199624bdfSMatthew Brost  * @fault_addr: Fault address
89299624bdfSMatthew Brost  * @gpuva_start: Start address of GPUVA which mirrors CPU
89399624bdfSMatthew Brost  * @gpuva_end: End address of GPUVA which mirrors CPU
89499624bdfSMatthew Brost  * @check_pages_threshold: Check CPU pages for present threshold
89599624bdfSMatthew Brost  *
89699624bdfSMatthew Brost  * This function determines the chunk size for the GPU SVM range based on the
89799624bdfSMatthew Brost  * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual
89899624bdfSMatthew Brost  * memory area boundaries.
89999624bdfSMatthew Brost  *
90099624bdfSMatthew Brost  * Return: Chunk size on success, LONG_MAX on failure.
90199624bdfSMatthew Brost  */
90299624bdfSMatthew Brost static unsigned long
drm_gpusvm_range_chunk_size(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,struct vm_area_struct * vas,unsigned long fault_addr,unsigned long gpuva_start,unsigned long gpuva_end,unsigned long check_pages_threshold)90399624bdfSMatthew Brost drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm,
90499624bdfSMatthew Brost 			    struct drm_gpusvm_notifier *notifier,
90599624bdfSMatthew Brost 			    struct vm_area_struct *vas,
90699624bdfSMatthew Brost 			    unsigned long fault_addr,
90799624bdfSMatthew Brost 			    unsigned long gpuva_start,
90899624bdfSMatthew Brost 			    unsigned long gpuva_end,
90999624bdfSMatthew Brost 			    unsigned long check_pages_threshold)
91099624bdfSMatthew Brost {
91199624bdfSMatthew Brost 	unsigned long start, end;
91299624bdfSMatthew Brost 	int i = 0;
91399624bdfSMatthew Brost 
91499624bdfSMatthew Brost retry:
91599624bdfSMatthew Brost 	for (; i < gpusvm->num_chunks; ++i) {
91699624bdfSMatthew Brost 		start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]);
91799624bdfSMatthew Brost 		end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]);
91899624bdfSMatthew Brost 
91999624bdfSMatthew Brost 		if (start >= vas->vm_start && end <= vas->vm_end &&
92099624bdfSMatthew Brost 		    start >= drm_gpusvm_notifier_start(notifier) &&
92199624bdfSMatthew Brost 		    end <= drm_gpusvm_notifier_end(notifier) &&
92299624bdfSMatthew Brost 		    start >= gpuva_start && end <= gpuva_end)
92399624bdfSMatthew Brost 			break;
92499624bdfSMatthew Brost 	}
92599624bdfSMatthew Brost 
92699624bdfSMatthew Brost 	if (i == gpusvm->num_chunks)
92799624bdfSMatthew Brost 		return LONG_MAX;
92899624bdfSMatthew Brost 
92999624bdfSMatthew Brost 	/*
93099624bdfSMatthew Brost 	 * If allocation more than page, ensure not to overlap with existing
93199624bdfSMatthew Brost 	 * ranges.
93299624bdfSMatthew Brost 	 */
93399624bdfSMatthew Brost 	if (end - start != SZ_4K) {
93499624bdfSMatthew Brost 		struct drm_gpusvm_range *range;
93599624bdfSMatthew Brost 
93699624bdfSMatthew Brost 		range = drm_gpusvm_range_find(notifier, start, end);
93799624bdfSMatthew Brost 		if (range) {
93899624bdfSMatthew Brost 			++i;
93999624bdfSMatthew Brost 			goto retry;
94099624bdfSMatthew Brost 		}
94199624bdfSMatthew Brost 
94299624bdfSMatthew Brost 		/*
94399624bdfSMatthew Brost 		 * XXX: Only create range on pages CPU has faulted in. Without
94499624bdfSMatthew Brost 		 * this check, or prefault, on BMG 'xe_exec_system_allocator --r
94599624bdfSMatthew Brost 		 * process-many-malloc' fails. In the failure case, each process
94699624bdfSMatthew Brost 		 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM
94799624bdfSMatthew Brost 		 * ranges. When migrating the SVM ranges, some processes fail in
94899624bdfSMatthew Brost 		 * drm_gpusvm_migrate_to_devmem with 'migrate.cpages != npages'
94999624bdfSMatthew Brost 		 * and then upon drm_gpusvm_range_get_pages device pages from
95099624bdfSMatthew Brost 		 * other processes are collected + faulted in which creates all
95199624bdfSMatthew Brost 		 * sorts of problems. Unsure exactly how this happening, also
95299624bdfSMatthew Brost 		 * problem goes away if 'xe_exec_system_allocator --r
95399624bdfSMatthew Brost 		 * process-many-malloc' mallocs at least 64k at a time.
95499624bdfSMatthew Brost 		 */
95599624bdfSMatthew Brost 		if (end - start <= check_pages_threshold &&
95699624bdfSMatthew Brost 		    !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) {
95799624bdfSMatthew Brost 			++i;
95899624bdfSMatthew Brost 			goto retry;
95999624bdfSMatthew Brost 		}
96099624bdfSMatthew Brost 	}
96199624bdfSMatthew Brost 
96299624bdfSMatthew Brost 	return end - start;
96399624bdfSMatthew Brost }
96499624bdfSMatthew Brost 
96599624bdfSMatthew Brost #ifdef CONFIG_LOCKDEP
96699624bdfSMatthew Brost /**
96799624bdfSMatthew Brost  * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held
96899624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure.
96999624bdfSMatthew Brost  *
97099624bdfSMatthew Brost  * Ensure driver lock is held.
97199624bdfSMatthew Brost  */
drm_gpusvm_driver_lock_held(struct drm_gpusvm * gpusvm)97299624bdfSMatthew Brost static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm)
97399624bdfSMatthew Brost {
97499624bdfSMatthew Brost 	if ((gpusvm)->lock_dep_map)
97599624bdfSMatthew Brost 		lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0));
97699624bdfSMatthew Brost }
97799624bdfSMatthew Brost #else
drm_gpusvm_driver_lock_held(struct drm_gpusvm * gpusvm)97899624bdfSMatthew Brost static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm)
97999624bdfSMatthew Brost {
98099624bdfSMatthew Brost }
98199624bdfSMatthew Brost #endif
98299624bdfSMatthew Brost 
98399624bdfSMatthew Brost /**
98499624bdfSMatthew Brost  * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range
98599624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
98699624bdfSMatthew Brost  * @fault_addr: Fault address
98799624bdfSMatthew Brost  * @gpuva_start: Start address of GPUVA which mirrors CPU
98899624bdfSMatthew Brost  * @gpuva_end: End address of GPUVA which mirrors CPU
98999624bdfSMatthew Brost  * @ctx: GPU SVM context
99099624bdfSMatthew Brost  *
99199624bdfSMatthew Brost  * This function finds or inserts a newly allocated a GPU SVM range based on the
99299624bdfSMatthew Brost  * fault address. Caller must hold a lock to protect range lookup and insertion.
99399624bdfSMatthew Brost  *
99499624bdfSMatthew Brost  * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure.
99599624bdfSMatthew Brost  */
99699624bdfSMatthew Brost struct drm_gpusvm_range *
drm_gpusvm_range_find_or_insert(struct drm_gpusvm * gpusvm,unsigned long fault_addr,unsigned long gpuva_start,unsigned long gpuva_end,const struct drm_gpusvm_ctx * ctx)99799624bdfSMatthew Brost drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm,
99899624bdfSMatthew Brost 				unsigned long fault_addr,
99999624bdfSMatthew Brost 				unsigned long gpuva_start,
100099624bdfSMatthew Brost 				unsigned long gpuva_end,
100199624bdfSMatthew Brost 				const struct drm_gpusvm_ctx *ctx)
100299624bdfSMatthew Brost {
100399624bdfSMatthew Brost 	struct drm_gpusvm_notifier *notifier;
100499624bdfSMatthew Brost 	struct drm_gpusvm_range *range;
100599624bdfSMatthew Brost 	struct mm_struct *mm = gpusvm->mm;
100699624bdfSMatthew Brost 	struct vm_area_struct *vas;
100799624bdfSMatthew Brost 	bool notifier_alloc = false;
100899624bdfSMatthew Brost 	unsigned long chunk_size;
100999624bdfSMatthew Brost 	int err;
101099624bdfSMatthew Brost 	bool migrate_devmem;
101199624bdfSMatthew Brost 
101299624bdfSMatthew Brost 	drm_gpusvm_driver_lock_held(gpusvm);
101399624bdfSMatthew Brost 
101499624bdfSMatthew Brost 	if (fault_addr < gpusvm->mm_start ||
101599624bdfSMatthew Brost 	    fault_addr > gpusvm->mm_start + gpusvm->mm_range)
101699624bdfSMatthew Brost 		return ERR_PTR(-EINVAL);
101799624bdfSMatthew Brost 
101899624bdfSMatthew Brost 	if (!mmget_not_zero(mm))
101999624bdfSMatthew Brost 		return ERR_PTR(-EFAULT);
102099624bdfSMatthew Brost 
102199624bdfSMatthew Brost 	notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr);
102299624bdfSMatthew Brost 	if (!notifier) {
102399624bdfSMatthew Brost 		notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr);
102499624bdfSMatthew Brost 		if (IS_ERR(notifier)) {
102599624bdfSMatthew Brost 			err = PTR_ERR(notifier);
102699624bdfSMatthew Brost 			goto err_mmunlock;
102799624bdfSMatthew Brost 		}
102899624bdfSMatthew Brost 		notifier_alloc = true;
102999624bdfSMatthew Brost 		err = mmu_interval_notifier_insert(&notifier->notifier,
103099624bdfSMatthew Brost 						   mm,
103199624bdfSMatthew Brost 						   drm_gpusvm_notifier_start(notifier),
103299624bdfSMatthew Brost 						   drm_gpusvm_notifier_size(notifier),
103399624bdfSMatthew Brost 						   &drm_gpusvm_notifier_ops);
103499624bdfSMatthew Brost 		if (err)
103599624bdfSMatthew Brost 			goto err_notifier;
103699624bdfSMatthew Brost 	}
103799624bdfSMatthew Brost 
103899624bdfSMatthew Brost 	mmap_read_lock(mm);
103999624bdfSMatthew Brost 
104099624bdfSMatthew Brost 	vas = vma_lookup(mm, fault_addr);
104199624bdfSMatthew Brost 	if (!vas) {
104299624bdfSMatthew Brost 		err = -ENOENT;
104399624bdfSMatthew Brost 		goto err_notifier_remove;
104499624bdfSMatthew Brost 	}
104599624bdfSMatthew Brost 
104699624bdfSMatthew Brost 	if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) {
104799624bdfSMatthew Brost 		err = -EPERM;
104899624bdfSMatthew Brost 		goto err_notifier_remove;
104999624bdfSMatthew Brost 	}
105099624bdfSMatthew Brost 
105199624bdfSMatthew Brost 	range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1);
105299624bdfSMatthew Brost 	if (range)
105399624bdfSMatthew Brost 		goto out_mmunlock;
105499624bdfSMatthew Brost 	/*
105599624bdfSMatthew Brost 	 * XXX: Short-circuiting migration based on migrate_vma_* current
105699624bdfSMatthew Brost 	 * limitations. If/when migrate_vma_* add more support, this logic will
105799624bdfSMatthew Brost 	 * have to change.
105899624bdfSMatthew Brost 	 */
105999624bdfSMatthew Brost 	migrate_devmem = ctx->devmem_possible &&
106099624bdfSMatthew Brost 		vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas);
106199624bdfSMatthew Brost 
106299624bdfSMatthew Brost 	chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas,
106399624bdfSMatthew Brost 						 fault_addr, gpuva_start,
106499624bdfSMatthew Brost 						 gpuva_end,
106599624bdfSMatthew Brost 						 ctx->check_pages_threshold);
106699624bdfSMatthew Brost 	if (chunk_size == LONG_MAX) {
106799624bdfSMatthew Brost 		err = -EINVAL;
106899624bdfSMatthew Brost 		goto err_notifier_remove;
106999624bdfSMatthew Brost 	}
107099624bdfSMatthew Brost 
107199624bdfSMatthew Brost 	range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size,
107299624bdfSMatthew Brost 				       migrate_devmem);
107399624bdfSMatthew Brost 	if (IS_ERR(range)) {
107499624bdfSMatthew Brost 		err = PTR_ERR(range);
107599624bdfSMatthew Brost 		goto err_notifier_remove;
107699624bdfSMatthew Brost 	}
107799624bdfSMatthew Brost 
107899624bdfSMatthew Brost 	drm_gpusvm_range_insert(notifier, range);
107999624bdfSMatthew Brost 	if (notifier_alloc)
108099624bdfSMatthew Brost 		drm_gpusvm_notifier_insert(gpusvm, notifier);
108199624bdfSMatthew Brost 
108299624bdfSMatthew Brost out_mmunlock:
108399624bdfSMatthew Brost 	mmap_read_unlock(mm);
108499624bdfSMatthew Brost 	mmput(mm);
108599624bdfSMatthew Brost 
108699624bdfSMatthew Brost 	return range;
108799624bdfSMatthew Brost 
108899624bdfSMatthew Brost err_notifier_remove:
108999624bdfSMatthew Brost 	mmap_read_unlock(mm);
109099624bdfSMatthew Brost 	if (notifier_alloc)
109199624bdfSMatthew Brost 		mmu_interval_notifier_remove(&notifier->notifier);
109299624bdfSMatthew Brost err_notifier:
109399624bdfSMatthew Brost 	if (notifier_alloc)
109499624bdfSMatthew Brost 		drm_gpusvm_notifier_free(gpusvm, notifier);
109599624bdfSMatthew Brost err_mmunlock:
109699624bdfSMatthew Brost 	mmput(mm);
109799624bdfSMatthew Brost 	return ERR_PTR(err);
109899624bdfSMatthew Brost }
109999624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert);
110099624bdfSMatthew Brost 
110199624bdfSMatthew Brost /**
110299624bdfSMatthew Brost  * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal)
110399624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
110499624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range structure
110599624bdfSMatthew Brost  * @npages: Number of pages to unmap
110699624bdfSMatthew Brost  *
110799624bdfSMatthew Brost  * This function unmap pages associated with a GPU SVM range. Assumes and
110899624bdfSMatthew Brost  * asserts correct locking is in place when called.
110999624bdfSMatthew Brost  */
__drm_gpusvm_range_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,unsigned long npages)111099624bdfSMatthew Brost static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
111199624bdfSMatthew Brost 					   struct drm_gpusvm_range *range,
111299624bdfSMatthew Brost 					   unsigned long npages)
111399624bdfSMatthew Brost {
111499624bdfSMatthew Brost 	unsigned long i, j;
111599624bdfSMatthew Brost 	struct drm_pagemap *dpagemap = range->dpagemap;
111699624bdfSMatthew Brost 	struct device *dev = gpusvm->drm->dev;
111799624bdfSMatthew Brost 
111899624bdfSMatthew Brost 	lockdep_assert_held(&gpusvm->notifier_lock);
111999624bdfSMatthew Brost 
112099624bdfSMatthew Brost 	if (range->flags.has_dma_mapping) {
1121794f5493SMatthew Brost 		struct drm_gpusvm_range_flags flags = {
1122794f5493SMatthew Brost 			.__flags = range->flags.__flags,
1123794f5493SMatthew Brost 		};
1124794f5493SMatthew Brost 
112599624bdfSMatthew Brost 		for (i = 0, j = 0; i < npages; j++) {
112699624bdfSMatthew Brost 			struct drm_pagemap_device_addr *addr = &range->dma_addr[j];
112799624bdfSMatthew Brost 
112899624bdfSMatthew Brost 			if (addr->proto == DRM_INTERCONNECT_SYSTEM)
112999624bdfSMatthew Brost 				dma_unmap_page(dev,
113099624bdfSMatthew Brost 					       addr->addr,
113199624bdfSMatthew Brost 					       PAGE_SIZE << addr->order,
113299624bdfSMatthew Brost 					       addr->dir);
113399624bdfSMatthew Brost 			else if (dpagemap && dpagemap->ops->device_unmap)
113499624bdfSMatthew Brost 				dpagemap->ops->device_unmap(dpagemap,
113599624bdfSMatthew Brost 							    dev, *addr);
113699624bdfSMatthew Brost 			i += 1 << addr->order;
113799624bdfSMatthew Brost 		}
1138794f5493SMatthew Brost 
1139794f5493SMatthew Brost 		/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
1140794f5493SMatthew Brost 		flags.has_devmem_pages = false;
1141794f5493SMatthew Brost 		flags.has_dma_mapping = false;
1142794f5493SMatthew Brost 		WRITE_ONCE(range->flags.__flags, flags.__flags);
1143794f5493SMatthew Brost 
114499624bdfSMatthew Brost 		range->dpagemap = NULL;
114599624bdfSMatthew Brost 	}
114699624bdfSMatthew Brost }
114799624bdfSMatthew Brost 
114899624bdfSMatthew Brost /**
114999624bdfSMatthew Brost  * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range
115099624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
115199624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range structure
115299624bdfSMatthew Brost  *
115399624bdfSMatthew Brost  * This function frees the dma address array associated with a GPU SVM range.
115499624bdfSMatthew Brost  */
drm_gpusvm_range_free_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)115599624bdfSMatthew Brost static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm,
115699624bdfSMatthew Brost 					struct drm_gpusvm_range *range)
115799624bdfSMatthew Brost {
115899624bdfSMatthew Brost 	lockdep_assert_held(&gpusvm->notifier_lock);
115999624bdfSMatthew Brost 
116099624bdfSMatthew Brost 	if (range->dma_addr) {
116199624bdfSMatthew Brost 		kvfree(range->dma_addr);
116299624bdfSMatthew Brost 		range->dma_addr = NULL;
116399624bdfSMatthew Brost 	}
116499624bdfSMatthew Brost }
116599624bdfSMatthew Brost 
116699624bdfSMatthew Brost /**
116799624bdfSMatthew Brost  * drm_gpusvm_range_remove() - Remove GPU SVM range
116899624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
116999624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range to be removed
117099624bdfSMatthew Brost  *
117199624bdfSMatthew Brost  * This function removes the specified GPU SVM range and also removes the parent
117299624bdfSMatthew Brost  * GPU SVM notifier if no more ranges remain in the notifier. The caller must
117399624bdfSMatthew Brost  * hold a lock to protect range and notifier removal.
117499624bdfSMatthew Brost  */
drm_gpusvm_range_remove(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)117599624bdfSMatthew Brost void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm,
117699624bdfSMatthew Brost 			     struct drm_gpusvm_range *range)
117799624bdfSMatthew Brost {
117899624bdfSMatthew Brost 	unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
117999624bdfSMatthew Brost 					       drm_gpusvm_range_end(range));
118099624bdfSMatthew Brost 	struct drm_gpusvm_notifier *notifier;
118199624bdfSMatthew Brost 
118299624bdfSMatthew Brost 	drm_gpusvm_driver_lock_held(gpusvm);
118399624bdfSMatthew Brost 
118499624bdfSMatthew Brost 	notifier = drm_gpusvm_notifier_find(gpusvm,
118599624bdfSMatthew Brost 					    drm_gpusvm_range_start(range));
118699624bdfSMatthew Brost 	if (WARN_ON_ONCE(!notifier))
118799624bdfSMatthew Brost 		return;
118899624bdfSMatthew Brost 
118999624bdfSMatthew Brost 	drm_gpusvm_notifier_lock(gpusvm);
119099624bdfSMatthew Brost 	__drm_gpusvm_range_unmap_pages(gpusvm, range, npages);
119199624bdfSMatthew Brost 	drm_gpusvm_range_free_pages(gpusvm, range);
119299624bdfSMatthew Brost 	__drm_gpusvm_range_remove(notifier, range);
119399624bdfSMatthew Brost 	drm_gpusvm_notifier_unlock(gpusvm);
119499624bdfSMatthew Brost 
119599624bdfSMatthew Brost 	drm_gpusvm_range_put(range);
119699624bdfSMatthew Brost 
119799624bdfSMatthew Brost 	if (RB_EMPTY_ROOT(&notifier->root.rb_root)) {
119899624bdfSMatthew Brost 		if (!notifier->flags.removed)
119999624bdfSMatthew Brost 			mmu_interval_notifier_remove(&notifier->notifier);
120099624bdfSMatthew Brost 		drm_gpusvm_notifier_remove(gpusvm, notifier);
120199624bdfSMatthew Brost 		drm_gpusvm_notifier_free(gpusvm, notifier);
120299624bdfSMatthew Brost 	}
120399624bdfSMatthew Brost }
120499624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove);
120599624bdfSMatthew Brost 
120699624bdfSMatthew Brost /**
120799624bdfSMatthew Brost  * drm_gpusvm_range_get() - Get a reference to GPU SVM range
120899624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range
120999624bdfSMatthew Brost  *
121099624bdfSMatthew Brost  * This function increments the reference count of the specified GPU SVM range.
121199624bdfSMatthew Brost  *
121299624bdfSMatthew Brost  * Return: Pointer to the GPU SVM range.
121399624bdfSMatthew Brost  */
121499624bdfSMatthew Brost struct drm_gpusvm_range *
drm_gpusvm_range_get(struct drm_gpusvm_range * range)121599624bdfSMatthew Brost drm_gpusvm_range_get(struct drm_gpusvm_range *range)
121699624bdfSMatthew Brost {
121799624bdfSMatthew Brost 	kref_get(&range->refcount);
121899624bdfSMatthew Brost 
121999624bdfSMatthew Brost 	return range;
122099624bdfSMatthew Brost }
122199624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_range_get);
122299624bdfSMatthew Brost 
122399624bdfSMatthew Brost /**
122499624bdfSMatthew Brost  * drm_gpusvm_range_destroy() - Destroy GPU SVM range
122599624bdfSMatthew Brost  * @refcount: Pointer to the reference counter embedded in the GPU SVM range
122699624bdfSMatthew Brost  *
122799624bdfSMatthew Brost  * This function destroys the specified GPU SVM range when its reference count
122899624bdfSMatthew Brost  * reaches zero. If a custom range-free function is provided, it is invoked to
122999624bdfSMatthew Brost  * free the range; otherwise, the range is deallocated using kfree().
123099624bdfSMatthew Brost  */
drm_gpusvm_range_destroy(struct kref * refcount)123199624bdfSMatthew Brost static void drm_gpusvm_range_destroy(struct kref *refcount)
123299624bdfSMatthew Brost {
123399624bdfSMatthew Brost 	struct drm_gpusvm_range *range =
123499624bdfSMatthew Brost 		container_of(refcount, struct drm_gpusvm_range, refcount);
123599624bdfSMatthew Brost 	struct drm_gpusvm *gpusvm = range->gpusvm;
123699624bdfSMatthew Brost 
123799624bdfSMatthew Brost 	if (gpusvm->ops->range_free)
123899624bdfSMatthew Brost 		gpusvm->ops->range_free(range);
123999624bdfSMatthew Brost 	else
124099624bdfSMatthew Brost 		kfree(range);
124199624bdfSMatthew Brost }
124299624bdfSMatthew Brost 
124399624bdfSMatthew Brost /**
124499624bdfSMatthew Brost  * drm_gpusvm_range_put() - Put a reference to GPU SVM range
124599624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range
124699624bdfSMatthew Brost  *
124799624bdfSMatthew Brost  * This function decrements the reference count of the specified GPU SVM range
124899624bdfSMatthew Brost  * and frees it when the count reaches zero.
124999624bdfSMatthew Brost  */
drm_gpusvm_range_put(struct drm_gpusvm_range * range)125099624bdfSMatthew Brost void drm_gpusvm_range_put(struct drm_gpusvm_range *range)
125199624bdfSMatthew Brost {
125299624bdfSMatthew Brost 	kref_put(&range->refcount, drm_gpusvm_range_destroy);
125399624bdfSMatthew Brost }
125499624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_range_put);
125599624bdfSMatthew Brost 
125699624bdfSMatthew Brost /**
125799624bdfSMatthew Brost  * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid
125899624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
125999624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range structure
126099624bdfSMatthew Brost  *
126199624bdfSMatthew Brost  * This function determines if a GPU SVM range pages are valid. Expected be
126299624bdfSMatthew Brost  * called holding gpusvm->notifier_lock and as the last step before committing a
126399624bdfSMatthew Brost  * GPU binding. This is akin to a notifier seqno check in the HMM documentation
126499624bdfSMatthew Brost  * but due to wider notifiers (i.e., notifiers which span multiple ranges) this
126599624bdfSMatthew Brost  * function is required for finer grained checking (i.e., per range) if pages
126699624bdfSMatthew Brost  * are valid.
126799624bdfSMatthew Brost  *
126899624bdfSMatthew Brost  * Return: True if GPU SVM range has valid pages, False otherwise
126999624bdfSMatthew Brost  */
drm_gpusvm_range_pages_valid(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)127099624bdfSMatthew Brost bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm,
127199624bdfSMatthew Brost 				  struct drm_gpusvm_range *range)
127299624bdfSMatthew Brost {
127399624bdfSMatthew Brost 	lockdep_assert_held(&gpusvm->notifier_lock);
127499624bdfSMatthew Brost 
127599624bdfSMatthew Brost 	return range->flags.has_devmem_pages || range->flags.has_dma_mapping;
127699624bdfSMatthew Brost }
127799624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid);
127899624bdfSMatthew Brost 
127999624bdfSMatthew Brost /**
128099624bdfSMatthew Brost  * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked
128199624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
128299624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range structure
128399624bdfSMatthew Brost  *
128499624bdfSMatthew Brost  * This function determines if a GPU SVM range pages are valid. Expected be
128599624bdfSMatthew Brost  * called without holding gpusvm->notifier_lock.
128699624bdfSMatthew Brost  *
128799624bdfSMatthew Brost  * Return: True if GPU SVM range has valid pages, False otherwise
128899624bdfSMatthew Brost  */
128999624bdfSMatthew Brost static bool
drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)129099624bdfSMatthew Brost drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm,
129199624bdfSMatthew Brost 				      struct drm_gpusvm_range *range)
129299624bdfSMatthew Brost {
129399624bdfSMatthew Brost 	bool pages_valid;
129499624bdfSMatthew Brost 
129599624bdfSMatthew Brost 	if (!range->dma_addr)
129699624bdfSMatthew Brost 		return false;
129799624bdfSMatthew Brost 
129899624bdfSMatthew Brost 	drm_gpusvm_notifier_lock(gpusvm);
129999624bdfSMatthew Brost 	pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range);
130099624bdfSMatthew Brost 	if (!pages_valid)
130199624bdfSMatthew Brost 		drm_gpusvm_range_free_pages(gpusvm, range);
130299624bdfSMatthew Brost 	drm_gpusvm_notifier_unlock(gpusvm);
130399624bdfSMatthew Brost 
130499624bdfSMatthew Brost 	return pages_valid;
130599624bdfSMatthew Brost }
130699624bdfSMatthew Brost 
130799624bdfSMatthew Brost /**
130899624bdfSMatthew Brost  * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range
130999624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
131099624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range structure
131199624bdfSMatthew Brost  * @ctx: GPU SVM context
131299624bdfSMatthew Brost  *
131399624bdfSMatthew Brost  * This function gets pages for a GPU SVM range and ensures they are mapped for
131499624bdfSMatthew Brost  * DMA access.
131599624bdfSMatthew Brost  *
131699624bdfSMatthew Brost  * Return: 0 on success, negative error code on failure.
131799624bdfSMatthew Brost  */
drm_gpusvm_range_get_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,const struct drm_gpusvm_ctx * ctx)131899624bdfSMatthew Brost int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
131999624bdfSMatthew Brost 			       struct drm_gpusvm_range *range,
132099624bdfSMatthew Brost 			       const struct drm_gpusvm_ctx *ctx)
132199624bdfSMatthew Brost {
132299624bdfSMatthew Brost 	struct mmu_interval_notifier *notifier = &range->notifier->notifier;
132399624bdfSMatthew Brost 	struct hmm_range hmm_range = {
132499624bdfSMatthew Brost 		.default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 :
132599624bdfSMatthew Brost 			HMM_PFN_REQ_WRITE),
132699624bdfSMatthew Brost 		.notifier = notifier,
132799624bdfSMatthew Brost 		.start = drm_gpusvm_range_start(range),
132899624bdfSMatthew Brost 		.end = drm_gpusvm_range_end(range),
132999624bdfSMatthew Brost 		.dev_private_owner = gpusvm->device_private_page_owner,
133099624bdfSMatthew Brost 	};
133199624bdfSMatthew Brost 	struct mm_struct *mm = gpusvm->mm;
133299624bdfSMatthew Brost 	struct drm_gpusvm_zdd *zdd;
133399624bdfSMatthew Brost 	unsigned long timeout =
133499624bdfSMatthew Brost 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
133599624bdfSMatthew Brost 	unsigned long i, j;
133699624bdfSMatthew Brost 	unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
133799624bdfSMatthew Brost 					       drm_gpusvm_range_end(range));
133899624bdfSMatthew Brost 	unsigned long num_dma_mapped;
133999624bdfSMatthew Brost 	unsigned int order = 0;
134099624bdfSMatthew Brost 	unsigned long *pfns;
134199624bdfSMatthew Brost 	int err = 0;
134299624bdfSMatthew Brost 	struct dev_pagemap *pagemap;
134399624bdfSMatthew Brost 	struct drm_pagemap *dpagemap;
134499624bdfSMatthew Brost 	struct drm_gpusvm_range_flags flags;
1345794f5493SMatthew Brost 
134699624bdfSMatthew Brost retry:
134799624bdfSMatthew Brost 	hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
134899624bdfSMatthew Brost 	if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range))
134999624bdfSMatthew Brost 		goto set_seqno;
135099624bdfSMatthew Brost 
135199624bdfSMatthew Brost 	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
135299624bdfSMatthew Brost 	if (!pfns)
135399624bdfSMatthew Brost 		return -ENOMEM;
135499624bdfSMatthew Brost 
135599624bdfSMatthew Brost 	if (!mmget_not_zero(mm)) {
135699624bdfSMatthew Brost 		err = -EFAULT;
135799624bdfSMatthew Brost 		goto err_free;
135899624bdfSMatthew Brost 	}
135999624bdfSMatthew Brost 
136099624bdfSMatthew Brost 	hmm_range.hmm_pfns = pfns;
136199624bdfSMatthew Brost 	while (true) {
136299624bdfSMatthew Brost 		mmap_read_lock(mm);
136399624bdfSMatthew Brost 		err = hmm_range_fault(&hmm_range);
136499624bdfSMatthew Brost 		mmap_read_unlock(mm);
136599624bdfSMatthew Brost 
136699624bdfSMatthew Brost 		if (err == -EBUSY) {
136799624bdfSMatthew Brost 			if (time_after(jiffies, timeout))
136899624bdfSMatthew Brost 				break;
136999624bdfSMatthew Brost 
137099624bdfSMatthew Brost 			hmm_range.notifier_seq =
137199624bdfSMatthew Brost 				mmu_interval_read_begin(notifier);
137299624bdfSMatthew Brost 			continue;
137399624bdfSMatthew Brost 		}
137499624bdfSMatthew Brost 		break;
137599624bdfSMatthew Brost 	}
137699624bdfSMatthew Brost 	mmput(mm);
137799624bdfSMatthew Brost 	if (err)
137899624bdfSMatthew Brost 		goto err_free;
137999624bdfSMatthew Brost 
138099624bdfSMatthew Brost map_pages:
138199624bdfSMatthew Brost 	/*
138299624bdfSMatthew Brost 	 * Perform all dma mappings under the notifier lock to not
138399624bdfSMatthew Brost 	 * access freed pages. A notifier will either block on
138499624bdfSMatthew Brost 	 * the notifier lock or unmap dma.
138599624bdfSMatthew Brost 	 */
138699624bdfSMatthew Brost 	drm_gpusvm_notifier_lock(gpusvm);
138799624bdfSMatthew Brost 
138899624bdfSMatthew Brost 	flags.__flags = range->flags.__flags;
138999624bdfSMatthew Brost 	if (flags.unmapped) {
1390794f5493SMatthew Brost 		drm_gpusvm_notifier_unlock(gpusvm);
1391794f5493SMatthew Brost 		err = -EFAULT;
139299624bdfSMatthew Brost 		goto err_free;
139399624bdfSMatthew Brost 	}
139499624bdfSMatthew Brost 
139599624bdfSMatthew Brost 	if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) {
139699624bdfSMatthew Brost 		drm_gpusvm_notifier_unlock(gpusvm);
139799624bdfSMatthew Brost 		kvfree(pfns);
139899624bdfSMatthew Brost 		goto retry;
139999624bdfSMatthew Brost 	}
140099624bdfSMatthew Brost 
140199624bdfSMatthew Brost 	if (!range->dma_addr) {
140299624bdfSMatthew Brost 		/* Unlock and restart mapping to allocate memory. */
140399624bdfSMatthew Brost 		drm_gpusvm_notifier_unlock(gpusvm);
140499624bdfSMatthew Brost 		range->dma_addr = kvmalloc_array(npages,
140599624bdfSMatthew Brost 						 sizeof(*range->dma_addr),
140699624bdfSMatthew Brost 						 GFP_KERNEL);
140799624bdfSMatthew Brost 		if (!range->dma_addr) {
140899624bdfSMatthew Brost 			err = -ENOMEM;
140999624bdfSMatthew Brost 			goto err_free;
141099624bdfSMatthew Brost 		}
141199624bdfSMatthew Brost 		goto map_pages;
141299624bdfSMatthew Brost 	}
141399624bdfSMatthew Brost 
141499624bdfSMatthew Brost 	zdd = NULL;
141599624bdfSMatthew Brost 	num_dma_mapped = 0;
141699624bdfSMatthew Brost 	for (i = 0, j = 0; i < npages; ++j) {
141799624bdfSMatthew Brost 		struct page *page = hmm_pfn_to_page(pfns[i]);
141899624bdfSMatthew Brost 
141999624bdfSMatthew Brost 		order = hmm_pfn_to_map_order(pfns[i]);
142099624bdfSMatthew Brost 		if (is_device_private_page(page) ||
142199624bdfSMatthew Brost 		    is_device_coherent_page(page)) {
142299624bdfSMatthew Brost 			if (zdd != page->zone_device_data && i > 0) {
142399624bdfSMatthew Brost 				err = -EOPNOTSUPP;
142499624bdfSMatthew Brost 				goto err_unmap;
142599624bdfSMatthew Brost 			}
142699624bdfSMatthew Brost 			zdd = page->zone_device_data;
142799624bdfSMatthew Brost 			if (pagemap != page_pgmap(page)) {
142899624bdfSMatthew Brost 				if (i > 0) {
1429eb0ece16SLinus Torvalds 					err = -EOPNOTSUPP;
143099624bdfSMatthew Brost 					goto err_unmap;
143199624bdfSMatthew Brost 				}
143299624bdfSMatthew Brost 
143399624bdfSMatthew Brost 				pagemap = page_pgmap(page);
143499624bdfSMatthew Brost 				dpagemap = zdd->devmem_allocation->dpagemap;
1435eb0ece16SLinus Torvalds 				if (drm_WARN_ON(gpusvm->drm, !dpagemap)) {
143699624bdfSMatthew Brost 					/*
143799624bdfSMatthew Brost 					 * Raced. This is not supposed to happen
143899624bdfSMatthew Brost 					 * since hmm_range_fault() should've migrated
143999624bdfSMatthew Brost 					 * this page to system.
144099624bdfSMatthew Brost 					 */
144199624bdfSMatthew Brost 					err = -EAGAIN;
144299624bdfSMatthew Brost 					goto err_unmap;
144399624bdfSMatthew Brost 				}
144499624bdfSMatthew Brost 			}
144599624bdfSMatthew Brost 			range->dma_addr[j] =
144699624bdfSMatthew Brost 				dpagemap->ops->device_map(dpagemap,
144799624bdfSMatthew Brost 							  gpusvm->drm->dev,
144899624bdfSMatthew Brost 							  page, order,
144999624bdfSMatthew Brost 							  DMA_BIDIRECTIONAL);
145099624bdfSMatthew Brost 			if (dma_mapping_error(gpusvm->drm->dev,
145199624bdfSMatthew Brost 					      range->dma_addr[j].addr)) {
145299624bdfSMatthew Brost 				err = -EFAULT;
145399624bdfSMatthew Brost 				goto err_unmap;
145499624bdfSMatthew Brost 			}
145599624bdfSMatthew Brost 		} else {
145699624bdfSMatthew Brost 			dma_addr_t addr;
145799624bdfSMatthew Brost 
145899624bdfSMatthew Brost 			if (is_zone_device_page(page) || zdd) {
145999624bdfSMatthew Brost 				err = -EOPNOTSUPP;
146099624bdfSMatthew Brost 				goto err_unmap;
146199624bdfSMatthew Brost 			}
146299624bdfSMatthew Brost 
146399624bdfSMatthew Brost 			if (ctx->devmem_only) {
146499624bdfSMatthew Brost 				err = -EFAULT;
146599624bdfSMatthew Brost 				goto err_unmap;
146699624bdfSMatthew Brost 			}
14677bd68ce2SHimal Prasad Ghimiray 
14687bd68ce2SHimal Prasad Ghimiray 			addr = dma_map_page(gpusvm->drm->dev,
14697bd68ce2SHimal Prasad Ghimiray 					    page, 0,
14707bd68ce2SHimal Prasad Ghimiray 					    PAGE_SIZE << order,
14717bd68ce2SHimal Prasad Ghimiray 					    DMA_BIDIRECTIONAL);
147299624bdfSMatthew Brost 			if (dma_mapping_error(gpusvm->drm->dev, addr)) {
147399624bdfSMatthew Brost 				err = -EFAULT;
147499624bdfSMatthew Brost 				goto err_unmap;
147599624bdfSMatthew Brost 			}
147699624bdfSMatthew Brost 
147799624bdfSMatthew Brost 			range->dma_addr[j] = drm_pagemap_device_addr_encode
147899624bdfSMatthew Brost 				(addr, DRM_INTERCONNECT_SYSTEM, order,
147999624bdfSMatthew Brost 				 DMA_BIDIRECTIONAL);
148099624bdfSMatthew Brost 		}
148199624bdfSMatthew Brost 		i += 1 << order;
148299624bdfSMatthew Brost 		num_dma_mapped = i;
148399624bdfSMatthew Brost 		flags.has_dma_mapping = true;
148499624bdfSMatthew Brost 	}
148599624bdfSMatthew Brost 
148699624bdfSMatthew Brost 	if (zdd) {
1487794f5493SMatthew Brost 		flags.has_devmem_pages = true;
148899624bdfSMatthew Brost 		range->dpagemap = dpagemap;
148999624bdfSMatthew Brost 	}
149099624bdfSMatthew Brost 
1491794f5493SMatthew Brost 	/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
149299624bdfSMatthew Brost 	WRITE_ONCE(range->flags.__flags, flags.__flags);
149399624bdfSMatthew Brost 
149499624bdfSMatthew Brost 	drm_gpusvm_notifier_unlock(gpusvm);
1495794f5493SMatthew Brost 	kvfree(pfns);
1496794f5493SMatthew Brost set_seqno:
1497794f5493SMatthew Brost 	range->notifier_seq = hmm_range.notifier_seq;
149899624bdfSMatthew Brost 
149999624bdfSMatthew Brost 	return 0;
150099624bdfSMatthew Brost 
150199624bdfSMatthew Brost err_unmap:
150299624bdfSMatthew Brost 	__drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped);
150399624bdfSMatthew Brost 	drm_gpusvm_notifier_unlock(gpusvm);
150499624bdfSMatthew Brost err_free:
150599624bdfSMatthew Brost 	kvfree(pfns);
150699624bdfSMatthew Brost 	if (err == -EAGAIN)
150799624bdfSMatthew Brost 		goto retry;
150899624bdfSMatthew Brost 	return err;
150999624bdfSMatthew Brost }
151099624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages);
151199624bdfSMatthew Brost 
151299624bdfSMatthew Brost /**
151399624bdfSMatthew Brost  * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range
151499624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
151599624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range structure
151699624bdfSMatthew Brost  * @ctx: GPU SVM context
151799624bdfSMatthew Brost  *
151899624bdfSMatthew Brost  * This function unmaps pages associated with a GPU SVM range. If @in_notifier
151999624bdfSMatthew Brost  * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it
152099624bdfSMatthew Brost  * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on
152199624bdfSMatthew Brost  * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU
152299624bdfSMatthew Brost  * security model.
152399624bdfSMatthew Brost  */
drm_gpusvm_range_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,const struct drm_gpusvm_ctx * ctx)152499624bdfSMatthew Brost void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
152599624bdfSMatthew Brost 				  struct drm_gpusvm_range *range,
152699624bdfSMatthew Brost 				  const struct drm_gpusvm_ctx *ctx)
152799624bdfSMatthew Brost {
152899624bdfSMatthew Brost 	unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
152999624bdfSMatthew Brost 					       drm_gpusvm_range_end(range));
153099624bdfSMatthew Brost 
153199624bdfSMatthew Brost 	if (ctx->in_notifier)
153299624bdfSMatthew Brost 		lockdep_assert_held_write(&gpusvm->notifier_lock);
153399624bdfSMatthew Brost 	else
153499624bdfSMatthew Brost 		drm_gpusvm_notifier_lock(gpusvm);
153599624bdfSMatthew Brost 
153699624bdfSMatthew Brost 	__drm_gpusvm_range_unmap_pages(gpusvm, range, npages);
153799624bdfSMatthew Brost 
153899624bdfSMatthew Brost 	if (!ctx->in_notifier)
153999624bdfSMatthew Brost 		drm_gpusvm_notifier_unlock(gpusvm);
154099624bdfSMatthew Brost }
154199624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages);
154299624bdfSMatthew Brost 
154399624bdfSMatthew Brost /**
154499624bdfSMatthew Brost  * drm_gpusvm_migration_unlock_put_page() - Put a migration page
154599624bdfSMatthew Brost  * @page: Pointer to the page to put
154699624bdfSMatthew Brost  *
154799624bdfSMatthew Brost  * This function unlocks and puts a page.
154899624bdfSMatthew Brost  */
drm_gpusvm_migration_unlock_put_page(struct page * page)154999624bdfSMatthew Brost static void drm_gpusvm_migration_unlock_put_page(struct page *page)
155099624bdfSMatthew Brost {
155199624bdfSMatthew Brost 	unlock_page(page);
155299624bdfSMatthew Brost 	put_page(page);
155399624bdfSMatthew Brost }
155499624bdfSMatthew Brost 
155599624bdfSMatthew Brost /**
155699624bdfSMatthew Brost  * drm_gpusvm_migration_unlock_put_pages() - Put migration pages
155799624bdfSMatthew Brost  * @npages: Number of pages
155899624bdfSMatthew Brost  * @migrate_pfn: Array of migrate page frame numbers
155999624bdfSMatthew Brost  *
156099624bdfSMatthew Brost  * This function unlocks and puts an array of pages.
156199624bdfSMatthew Brost  */
drm_gpusvm_migration_unlock_put_pages(unsigned long npages,unsigned long * migrate_pfn)156299624bdfSMatthew Brost static void drm_gpusvm_migration_unlock_put_pages(unsigned long npages,
156399624bdfSMatthew Brost 						  unsigned long *migrate_pfn)
156499624bdfSMatthew Brost {
156599624bdfSMatthew Brost 	unsigned long i;
156699624bdfSMatthew Brost 
156799624bdfSMatthew Brost 	for (i = 0; i < npages; ++i) {
156899624bdfSMatthew Brost 		struct page *page;
156999624bdfSMatthew Brost 
157099624bdfSMatthew Brost 		if (!migrate_pfn[i])
157199624bdfSMatthew Brost 			continue;
157299624bdfSMatthew Brost 
157399624bdfSMatthew Brost 		page = migrate_pfn_to_page(migrate_pfn[i]);
157499624bdfSMatthew Brost 		drm_gpusvm_migration_unlock_put_page(page);
157599624bdfSMatthew Brost 		migrate_pfn[i] = 0;
157699624bdfSMatthew Brost 	}
157799624bdfSMatthew Brost }
157899624bdfSMatthew Brost 
157999624bdfSMatthew Brost /**
158099624bdfSMatthew Brost  * drm_gpusvm_get_devmem_page() - Get a reference to a device memory page
158199624bdfSMatthew Brost  * @page: Pointer to the page
158299624bdfSMatthew Brost  * @zdd: Pointer to the GPU SVM zone device data
158399624bdfSMatthew Brost  *
158499624bdfSMatthew Brost  * This function associates the given page with the specified GPU SVM zone
158599624bdfSMatthew Brost  * device data and initializes it for zone device usage.
158699624bdfSMatthew Brost  */
drm_gpusvm_get_devmem_page(struct page * page,struct drm_gpusvm_zdd * zdd)158799624bdfSMatthew Brost static void drm_gpusvm_get_devmem_page(struct page *page,
158899624bdfSMatthew Brost 				       struct drm_gpusvm_zdd *zdd)
158999624bdfSMatthew Brost {
159099624bdfSMatthew Brost 	page->zone_device_data = drm_gpusvm_zdd_get(zdd);
159199624bdfSMatthew Brost 	zone_device_page_init(page);
159299624bdfSMatthew Brost }
159399624bdfSMatthew Brost 
159499624bdfSMatthew Brost /**
159599624bdfSMatthew Brost  * drm_gpusvm_migrate_map_pages() - Map migration pages for GPU SVM migration
159699624bdfSMatthew Brost  * @dev: The device for which the pages are being mapped
159799624bdfSMatthew Brost  * @dma_addr: Array to store DMA addresses corresponding to mapped pages
159899624bdfSMatthew Brost  * @migrate_pfn: Array of migrate page frame numbers to map
159999624bdfSMatthew Brost  * @npages: Number of pages to map
160099624bdfSMatthew Brost  * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL)
160199624bdfSMatthew Brost  *
160299624bdfSMatthew Brost  * This function maps pages of memory for migration usage in GPU SVM. It
160399624bdfSMatthew Brost  * iterates over each page frame number provided in @migrate_pfn, maps the
160499624bdfSMatthew Brost  * corresponding page, and stores the DMA address in the provided @dma_addr
160599624bdfSMatthew Brost  * array.
160699624bdfSMatthew Brost  *
160799624bdfSMatthew Brost  * Return: 0 on success, -EFAULT if an error occurs during mapping.
160899624bdfSMatthew Brost  */
drm_gpusvm_migrate_map_pages(struct device * dev,dma_addr_t * dma_addr,unsigned long * migrate_pfn,unsigned long npages,enum dma_data_direction dir)160999624bdfSMatthew Brost static int drm_gpusvm_migrate_map_pages(struct device *dev,
161099624bdfSMatthew Brost 					dma_addr_t *dma_addr,
161199624bdfSMatthew Brost 					unsigned long *migrate_pfn,
161299624bdfSMatthew Brost 					unsigned long npages,
161399624bdfSMatthew Brost 					enum dma_data_direction dir)
161499624bdfSMatthew Brost {
161599624bdfSMatthew Brost 	unsigned long i;
161699624bdfSMatthew Brost 
161799624bdfSMatthew Brost 	for (i = 0; i < npages; ++i) {
161899624bdfSMatthew Brost 		struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
161999624bdfSMatthew Brost 
162099624bdfSMatthew Brost 		if (!page)
162199624bdfSMatthew Brost 			continue;
162299624bdfSMatthew Brost 
162399624bdfSMatthew Brost 		if (WARN_ON_ONCE(is_zone_device_page(page)))
162499624bdfSMatthew Brost 			return -EFAULT;
162599624bdfSMatthew Brost 
162699624bdfSMatthew Brost 		dma_addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
162799624bdfSMatthew Brost 		if (dma_mapping_error(dev, dma_addr[i]))
162899624bdfSMatthew Brost 			return -EFAULT;
162999624bdfSMatthew Brost 	}
163099624bdfSMatthew Brost 
163199624bdfSMatthew Brost 	return 0;
163299624bdfSMatthew Brost }
163399624bdfSMatthew Brost 
163499624bdfSMatthew Brost /**
163599624bdfSMatthew Brost  * drm_gpusvm_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration
163699624bdfSMatthew Brost  * @dev: The device for which the pages were mapped
163799624bdfSMatthew Brost  * @dma_addr: Array of DMA addresses corresponding to mapped pages
163899624bdfSMatthew Brost  * @npages: Number of pages to unmap
163999624bdfSMatthew Brost  * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL)
164099624bdfSMatthew Brost  *
164199624bdfSMatthew Brost  * This function unmaps previously mapped pages of memory for GPU Shared Virtual
164299624bdfSMatthew Brost  * Memory (SVM). It iterates over each DMA address provided in @dma_addr, checks
164399624bdfSMatthew Brost  * if it's valid and not already unmapped, and unmaps the corresponding page.
164499624bdfSMatthew Brost  */
drm_gpusvm_migrate_unmap_pages(struct device * dev,dma_addr_t * dma_addr,unsigned long npages,enum dma_data_direction dir)164599624bdfSMatthew Brost static void drm_gpusvm_migrate_unmap_pages(struct device *dev,
164699624bdfSMatthew Brost 					   dma_addr_t *dma_addr,
164799624bdfSMatthew Brost 					   unsigned long npages,
164899624bdfSMatthew Brost 					   enum dma_data_direction dir)
164999624bdfSMatthew Brost {
165099624bdfSMatthew Brost 	unsigned long i;
165199624bdfSMatthew Brost 
165299624bdfSMatthew Brost 	for (i = 0; i < npages; ++i) {
165399624bdfSMatthew Brost 		if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i]))
165499624bdfSMatthew Brost 			continue;
165599624bdfSMatthew Brost 
165699624bdfSMatthew Brost 		dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
165799624bdfSMatthew Brost 	}
165899624bdfSMatthew Brost }
165999624bdfSMatthew Brost 
166099624bdfSMatthew Brost /**
166199624bdfSMatthew Brost  * drm_gpusvm_migrate_to_devmem() - Migrate GPU SVM range to device memory
166299624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure
166399624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range structure
166499624bdfSMatthew Brost  * @devmem_allocation: Pointer to the device memory allocation. The caller
166599624bdfSMatthew Brost  *                     should hold a reference to the device memory allocation,
166699624bdfSMatthew Brost  *                     which should be dropped via ops->devmem_release or upon
166799624bdfSMatthew Brost  *                     the failure of this function.
166899624bdfSMatthew Brost  * @ctx: GPU SVM context
166999624bdfSMatthew Brost  *
167099624bdfSMatthew Brost  * This function migrates the specified GPU SVM range to device memory. It
167199624bdfSMatthew Brost  * performs the necessary setup and invokes the driver-specific operations for
167299624bdfSMatthew Brost  * migration to device memory. Upon successful return, @devmem_allocation can
167399624bdfSMatthew Brost  * safely reference @range until ops->devmem_release is called which only upon
167499624bdfSMatthew Brost  * successful return. Expected to be called while holding the mmap lock in read
167599624bdfSMatthew Brost  * mode.
167699624bdfSMatthew Brost  *
167799624bdfSMatthew Brost  * Return: 0 on success, negative error code on failure.
167899624bdfSMatthew Brost  */
drm_gpusvm_migrate_to_devmem(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,struct drm_gpusvm_devmem * devmem_allocation,const struct drm_gpusvm_ctx * ctx)167999624bdfSMatthew Brost int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm,
168099624bdfSMatthew Brost 				 struct drm_gpusvm_range *range,
168199624bdfSMatthew Brost 				 struct drm_gpusvm_devmem *devmem_allocation,
168299624bdfSMatthew Brost 				 const struct drm_gpusvm_ctx *ctx)
168399624bdfSMatthew Brost {
168499624bdfSMatthew Brost 	const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops;
168599624bdfSMatthew Brost 	unsigned long start = drm_gpusvm_range_start(range),
168699624bdfSMatthew Brost 		      end = drm_gpusvm_range_end(range);
168799624bdfSMatthew Brost 	struct migrate_vma migrate = {
168899624bdfSMatthew Brost 		.start		= start,
168999624bdfSMatthew Brost 		.end		= end,
169099624bdfSMatthew Brost 		.pgmap_owner	= gpusvm->device_private_page_owner,
169199624bdfSMatthew Brost 		.flags		= MIGRATE_VMA_SELECT_SYSTEM,
169299624bdfSMatthew Brost 	};
169399624bdfSMatthew Brost 	struct mm_struct *mm = gpusvm->mm;
169499624bdfSMatthew Brost 	unsigned long i, npages = npages_in_range(start, end);
169599624bdfSMatthew Brost 	struct vm_area_struct *vas;
169699624bdfSMatthew Brost 	struct drm_gpusvm_zdd *zdd = NULL;
169799624bdfSMatthew Brost 	struct page **pages;
169899624bdfSMatthew Brost 	dma_addr_t *dma_addr;
169999624bdfSMatthew Brost 	void *buf;
170099624bdfSMatthew Brost 	int err;
170199624bdfSMatthew Brost 
170299624bdfSMatthew Brost 	mmap_assert_locked(gpusvm->mm);
170399624bdfSMatthew Brost 
170499624bdfSMatthew Brost 	if (!range->flags.migrate_devmem)
170599624bdfSMatthew Brost 		return -EINVAL;
170699624bdfSMatthew Brost 
170799624bdfSMatthew Brost 	if (!ops->populate_devmem_pfn || !ops->copy_to_devmem ||
170899624bdfSMatthew Brost 	    !ops->copy_to_ram)
170999624bdfSMatthew Brost 		return -EOPNOTSUPP;
171099624bdfSMatthew Brost 
171199624bdfSMatthew Brost 	vas = vma_lookup(mm, start);
171299624bdfSMatthew Brost 	if (!vas) {
171399624bdfSMatthew Brost 		err = -ENOENT;
171499624bdfSMatthew Brost 		goto err_out;
171599624bdfSMatthew Brost 	}
171699624bdfSMatthew Brost 
171799624bdfSMatthew Brost 	if (end > vas->vm_end || start < vas->vm_start) {
171899624bdfSMatthew Brost 		err = -EINVAL;
171999624bdfSMatthew Brost 		goto err_out;
172099624bdfSMatthew Brost 	}
172199624bdfSMatthew Brost 
172299624bdfSMatthew Brost 	if (!vma_is_anonymous(vas)) {
172399624bdfSMatthew Brost 		err = -EBUSY;
172499624bdfSMatthew Brost 		goto err_out;
172599624bdfSMatthew Brost 	}
172699624bdfSMatthew Brost 
172799624bdfSMatthew Brost 	buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) +
172899624bdfSMatthew Brost 		       sizeof(*pages), GFP_KERNEL);
172999624bdfSMatthew Brost 	if (!buf) {
173099624bdfSMatthew Brost 		err = -ENOMEM;
173199624bdfSMatthew Brost 		goto err_out;
173299624bdfSMatthew Brost 	}
173399624bdfSMatthew Brost 	dma_addr = buf + (2 * sizeof(*migrate.src) * npages);
173499624bdfSMatthew Brost 	pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages;
173599624bdfSMatthew Brost 
173699624bdfSMatthew Brost 	zdd = drm_gpusvm_zdd_alloc(gpusvm->device_private_page_owner);
173799624bdfSMatthew Brost 	if (!zdd) {
173899624bdfSMatthew Brost 		err = -ENOMEM;
173999624bdfSMatthew Brost 		goto err_free;
174099624bdfSMatthew Brost 	}
174199624bdfSMatthew Brost 
174299624bdfSMatthew Brost 	migrate.vma = vas;
174399624bdfSMatthew Brost 	migrate.src = buf;
174499624bdfSMatthew Brost 	migrate.dst = migrate.src + npages;
174599624bdfSMatthew Brost 
174699624bdfSMatthew Brost 	err = migrate_vma_setup(&migrate);
174799624bdfSMatthew Brost 	if (err)
174899624bdfSMatthew Brost 		goto err_free;
174999624bdfSMatthew Brost 
175099624bdfSMatthew Brost 	if (!migrate.cpages) {
175199624bdfSMatthew Brost 		err = -EFAULT;
175299624bdfSMatthew Brost 		goto err_free;
175399624bdfSMatthew Brost 	}
175499624bdfSMatthew Brost 
175599624bdfSMatthew Brost 	if (migrate.cpages != npages) {
175699624bdfSMatthew Brost 		err = -EBUSY;
175799624bdfSMatthew Brost 		goto err_finalize;
175899624bdfSMatthew Brost 	}
175999624bdfSMatthew Brost 
176099624bdfSMatthew Brost 	err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst);
176199624bdfSMatthew Brost 	if (err)
176299624bdfSMatthew Brost 		goto err_finalize;
176399624bdfSMatthew Brost 
176499624bdfSMatthew Brost 	err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr,
176599624bdfSMatthew Brost 					   migrate.src, npages, DMA_TO_DEVICE);
176699624bdfSMatthew Brost 	if (err)
176799624bdfSMatthew Brost 		goto err_finalize;
176899624bdfSMatthew Brost 
176999624bdfSMatthew Brost 	for (i = 0; i < npages; ++i) {
177099624bdfSMatthew Brost 		struct page *page = pfn_to_page(migrate.dst[i]);
177199624bdfSMatthew Brost 
177299624bdfSMatthew Brost 		pages[i] = page;
177399624bdfSMatthew Brost 		migrate.dst[i] = migrate_pfn(migrate.dst[i]);
177499624bdfSMatthew Brost 		drm_gpusvm_get_devmem_page(page, zdd);
177599624bdfSMatthew Brost 	}
177699624bdfSMatthew Brost 
177799624bdfSMatthew Brost 	err = ops->copy_to_devmem(pages, dma_addr, npages);
177899624bdfSMatthew Brost 	if (err)
177999624bdfSMatthew Brost 		goto err_finalize;
178099624bdfSMatthew Brost 
178199624bdfSMatthew Brost 	/* Upon success bind devmem allocation to range and zdd */
178299624bdfSMatthew Brost 	devmem_allocation->timeslice_expiration = get_jiffies_64() +
178399624bdfSMatthew Brost 		msecs_to_jiffies(ctx->timeslice_ms);
178499624bdfSMatthew Brost 	zdd->devmem_allocation = devmem_allocation;	/* Owns ref */
178599624bdfSMatthew Brost 
1786*df8c3781SMatthew Brost err_finalize:
1787*df8c3781SMatthew Brost 	if (err)
178899624bdfSMatthew Brost 		drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst);
178999624bdfSMatthew Brost 	migrate_vma_pages(&migrate);
179099624bdfSMatthew Brost 	migrate_vma_finalize(&migrate);
179199624bdfSMatthew Brost 	drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages,
179299624bdfSMatthew Brost 				       DMA_TO_DEVICE);
179399624bdfSMatthew Brost err_free:
179499624bdfSMatthew Brost 	if (zdd)
179599624bdfSMatthew Brost 		drm_gpusvm_zdd_put(zdd);
179699624bdfSMatthew Brost 	kvfree(buf);
179799624bdfSMatthew Brost err_out:
179899624bdfSMatthew Brost 	return err;
179999624bdfSMatthew Brost }
180099624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_migrate_to_devmem);
180199624bdfSMatthew Brost 
180299624bdfSMatthew Brost /**
180399624bdfSMatthew Brost  * drm_gpusvm_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area
180499624bdfSMatthew Brost  * @vas: Pointer to the VM area structure, can be NULL
180599624bdfSMatthew Brost  * @fault_page: Fault page
180699624bdfSMatthew Brost  * @npages: Number of pages to populate
180799624bdfSMatthew Brost  * @mpages: Number of pages to migrate
180899624bdfSMatthew Brost  * @src_mpfn: Source array of migrate PFNs
180999624bdfSMatthew Brost  * @mpfn: Array of migrate PFNs to populate
181099624bdfSMatthew Brost  * @addr: Start address for PFN allocation
181199624bdfSMatthew Brost  *
181299624bdfSMatthew Brost  * This function populates the RAM migrate page frame numbers (PFNs) for the
181399624bdfSMatthew Brost  * specified VM area structure. It allocates and locks pages in the VM area for
181499624bdfSMatthew Brost  * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use
181599624bdfSMatthew Brost  * alloc_page for allocation.
181699624bdfSMatthew Brost  *
181799624bdfSMatthew Brost  * Return: 0 on success, negative error code on failure.
181899624bdfSMatthew Brost  */
drm_gpusvm_migrate_populate_ram_pfn(struct vm_area_struct * vas,struct page * fault_page,unsigned long npages,unsigned long * mpages,unsigned long * src_mpfn,unsigned long * mpfn,unsigned long addr)181999624bdfSMatthew Brost static int drm_gpusvm_migrate_populate_ram_pfn(struct vm_area_struct *vas,
182099624bdfSMatthew Brost 					       struct page *fault_page,
182199624bdfSMatthew Brost 					       unsigned long npages,
182299624bdfSMatthew Brost 					       unsigned long *mpages,
182399624bdfSMatthew Brost 					       unsigned long *src_mpfn,
182499624bdfSMatthew Brost 					       unsigned long *mpfn,
182599624bdfSMatthew Brost 					       unsigned long addr)
182699624bdfSMatthew Brost {
182799624bdfSMatthew Brost 	unsigned long i;
182899624bdfSMatthew Brost 
182999624bdfSMatthew Brost 	for (i = 0; i < npages; ++i, addr += PAGE_SIZE) {
183099624bdfSMatthew Brost 		struct page *page, *src_page;
183199624bdfSMatthew Brost 
183299624bdfSMatthew Brost 		if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE))
183399624bdfSMatthew Brost 			continue;
183499624bdfSMatthew Brost 
183599624bdfSMatthew Brost 		src_page = migrate_pfn_to_page(src_mpfn[i]);
183699624bdfSMatthew Brost 		if (!src_page)
183799624bdfSMatthew Brost 			continue;
183899624bdfSMatthew Brost 
183999624bdfSMatthew Brost 		if (fault_page) {
184099624bdfSMatthew Brost 			if (src_page->zone_device_data !=
184199624bdfSMatthew Brost 			    fault_page->zone_device_data)
184299624bdfSMatthew Brost 				continue;
184399624bdfSMatthew Brost 		}
184499624bdfSMatthew Brost 
184599624bdfSMatthew Brost 		if (vas)
184699624bdfSMatthew Brost 			page = alloc_page_vma(GFP_HIGHUSER, vas, addr);
184799624bdfSMatthew Brost 		else
184899624bdfSMatthew Brost 			page = alloc_page(GFP_HIGHUSER);
184999624bdfSMatthew Brost 
185099624bdfSMatthew Brost 		if (!page)
185199624bdfSMatthew Brost 			goto free_pages;
185299624bdfSMatthew Brost 
185399624bdfSMatthew Brost 		mpfn[i] = migrate_pfn(page_to_pfn(page));
185499624bdfSMatthew Brost 	}
185599624bdfSMatthew Brost 
185699624bdfSMatthew Brost 	for (i = 0; i < npages; ++i) {
185799624bdfSMatthew Brost 		struct page *page = migrate_pfn_to_page(mpfn[i]);
185899624bdfSMatthew Brost 
185999624bdfSMatthew Brost 		if (!page)
186099624bdfSMatthew Brost 			continue;
186199624bdfSMatthew Brost 
186299624bdfSMatthew Brost 		WARN_ON_ONCE(!trylock_page(page));
186399624bdfSMatthew Brost 		++*mpages;
186499624bdfSMatthew Brost 	}
186599624bdfSMatthew Brost 
186699624bdfSMatthew Brost 	return 0;
186799624bdfSMatthew Brost 
186899624bdfSMatthew Brost free_pages:
186999624bdfSMatthew Brost 	for (i = 0; i < npages; ++i) {
187099624bdfSMatthew Brost 		struct page *page = migrate_pfn_to_page(mpfn[i]);
187199624bdfSMatthew Brost 
187299624bdfSMatthew Brost 		if (!page)
187399624bdfSMatthew Brost 			continue;
187499624bdfSMatthew Brost 
187599624bdfSMatthew Brost 		put_page(page);
187699624bdfSMatthew Brost 		mpfn[i] = 0;
187799624bdfSMatthew Brost 	}
187899624bdfSMatthew Brost 	return -ENOMEM;
187999624bdfSMatthew Brost }
188099624bdfSMatthew Brost 
188199624bdfSMatthew Brost /**
188299624bdfSMatthew Brost  * drm_gpusvm_evict_to_ram() - Evict GPU SVM range to RAM
188399624bdfSMatthew Brost  * @devmem_allocation: Pointer to the device memory allocation
188499624bdfSMatthew Brost  *
188599624bdfSMatthew Brost  * Similar to __drm_gpusvm_migrate_to_ram but does not require mmap lock and
188699624bdfSMatthew Brost  * migration done via migrate_device_* functions.
188799624bdfSMatthew Brost  *
188899624bdfSMatthew Brost  * Return: 0 on success, negative error code on failure.
188999624bdfSMatthew Brost  */
drm_gpusvm_evict_to_ram(struct drm_gpusvm_devmem * devmem_allocation)189099624bdfSMatthew Brost int drm_gpusvm_evict_to_ram(struct drm_gpusvm_devmem *devmem_allocation)
189199624bdfSMatthew Brost {
189299624bdfSMatthew Brost 	const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops;
189399624bdfSMatthew Brost 	unsigned long npages, mpages = 0;
189499624bdfSMatthew Brost 	struct page **pages;
189599624bdfSMatthew Brost 	unsigned long *src, *dst;
189699624bdfSMatthew Brost 	dma_addr_t *dma_addr;
189799624bdfSMatthew Brost 	void *buf;
189899624bdfSMatthew Brost 	int i, err = 0;
189999624bdfSMatthew Brost 	unsigned int retry_count = 2;
190099624bdfSMatthew Brost 
190199624bdfSMatthew Brost 	npages = devmem_allocation->size >> PAGE_SHIFT;
190299624bdfSMatthew Brost 
190399624bdfSMatthew Brost retry:
190499624bdfSMatthew Brost 	if (!mmget_not_zero(devmem_allocation->mm))
190599624bdfSMatthew Brost 		return -EFAULT;
190699624bdfSMatthew Brost 
190799624bdfSMatthew Brost 	buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*dma_addr) +
190899624bdfSMatthew Brost 		       sizeof(*pages), GFP_KERNEL);
190999624bdfSMatthew Brost 	if (!buf) {
191099624bdfSMatthew Brost 		err = -ENOMEM;
191199624bdfSMatthew Brost 		goto err_out;
191299624bdfSMatthew Brost 	}
191399624bdfSMatthew Brost 	src = buf;
191499624bdfSMatthew Brost 	dst = buf + (sizeof(*src) * npages);
191599624bdfSMatthew Brost 	dma_addr = buf + (2 * sizeof(*src) * npages);
191699624bdfSMatthew Brost 	pages = buf + (2 * sizeof(*src) + sizeof(*dma_addr)) * npages;
191799624bdfSMatthew Brost 
191899624bdfSMatthew Brost 	err = ops->populate_devmem_pfn(devmem_allocation, npages, src);
191999624bdfSMatthew Brost 	if (err)
192099624bdfSMatthew Brost 		goto err_free;
192199624bdfSMatthew Brost 
192299624bdfSMatthew Brost 	err = migrate_device_pfns(src, npages);
192399624bdfSMatthew Brost 	if (err)
192499624bdfSMatthew Brost 		goto err_free;
192599624bdfSMatthew Brost 
192699624bdfSMatthew Brost 	err = drm_gpusvm_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages,
192799624bdfSMatthew Brost 						  src, dst, 0);
192899624bdfSMatthew Brost 	if (err || !mpages)
192999624bdfSMatthew Brost 		goto err_finalize;
193099624bdfSMatthew Brost 
193199624bdfSMatthew Brost 	err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr,
193299624bdfSMatthew Brost 					   dst, npages, DMA_FROM_DEVICE);
193399624bdfSMatthew Brost 	if (err)
193499624bdfSMatthew Brost 		goto err_finalize;
193599624bdfSMatthew Brost 
193699624bdfSMatthew Brost 	for (i = 0; i < npages; ++i)
193799624bdfSMatthew Brost 		pages[i] = migrate_pfn_to_page(src[i]);
193899624bdfSMatthew Brost 
193999624bdfSMatthew Brost 	err = ops->copy_to_ram(pages, dma_addr, npages);
194099624bdfSMatthew Brost 	if (err)
194199624bdfSMatthew Brost 		goto err_finalize;
194299624bdfSMatthew Brost 
194399624bdfSMatthew Brost err_finalize:
194499624bdfSMatthew Brost 	if (err)
194599624bdfSMatthew Brost 		drm_gpusvm_migration_unlock_put_pages(npages, dst);
194699624bdfSMatthew Brost 	migrate_device_pages(src, dst, npages);
194799624bdfSMatthew Brost 	migrate_device_finalize(src, dst, npages);
194899624bdfSMatthew Brost 	drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages,
194999624bdfSMatthew Brost 				       DMA_FROM_DEVICE);
195099624bdfSMatthew Brost err_free:
195199624bdfSMatthew Brost 	kvfree(buf);
195299624bdfSMatthew Brost err_out:
195399624bdfSMatthew Brost 	mmput_async(devmem_allocation->mm);
195499624bdfSMatthew Brost 
195599624bdfSMatthew Brost 	if (completion_done(&devmem_allocation->detached))
195699624bdfSMatthew Brost 		return 0;
195799624bdfSMatthew Brost 
195899624bdfSMatthew Brost 	if (retry_count--) {
195999624bdfSMatthew Brost 		cond_resched();
196099624bdfSMatthew Brost 		goto retry;
196199624bdfSMatthew Brost 	}
196299624bdfSMatthew Brost 
196399624bdfSMatthew Brost 	return err ?: -EBUSY;
196499624bdfSMatthew Brost }
196599624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_evict_to_ram);
196699624bdfSMatthew Brost 
196799624bdfSMatthew Brost /**
196899624bdfSMatthew Brost  * __drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (internal)
196999624bdfSMatthew Brost  * @vas: Pointer to the VM area structure
197099624bdfSMatthew Brost  * @device_private_page_owner: Device private pages owner
197199624bdfSMatthew Brost  * @page: Pointer to the page for fault handling (can be NULL)
197299624bdfSMatthew Brost  * @fault_addr: Fault address
197399624bdfSMatthew Brost  * @size: Size of migration
197499624bdfSMatthew Brost  *
197599624bdfSMatthew Brost  * This internal function performs the migration of the specified GPU SVM range
197699624bdfSMatthew Brost  * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and
197799624bdfSMatthew Brost  * invokes the driver-specific operations for migration to RAM.
197899624bdfSMatthew Brost  *
197999624bdfSMatthew Brost  * Return: 0 on success, negative error code on failure.
198099624bdfSMatthew Brost  */
__drm_gpusvm_migrate_to_ram(struct vm_area_struct * vas,void * device_private_page_owner,struct page * page,unsigned long fault_addr,unsigned long size)198199624bdfSMatthew Brost static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas,
198299624bdfSMatthew Brost 				       void *device_private_page_owner,
198399624bdfSMatthew Brost 				       struct page *page,
198499624bdfSMatthew Brost 				       unsigned long fault_addr,
198599624bdfSMatthew Brost 				       unsigned long size)
198699624bdfSMatthew Brost {
198799624bdfSMatthew Brost 	struct migrate_vma migrate = {
198899624bdfSMatthew Brost 		.vma		= vas,
198999624bdfSMatthew Brost 		.pgmap_owner	= device_private_page_owner,
199099624bdfSMatthew Brost 		.flags		= MIGRATE_VMA_SELECT_DEVICE_PRIVATE |
199199624bdfSMatthew Brost 			MIGRATE_VMA_SELECT_DEVICE_COHERENT,
199299624bdfSMatthew Brost 		.fault_page	= page,
199399624bdfSMatthew Brost 	};
199499624bdfSMatthew Brost 	struct drm_gpusvm_zdd *zdd;
199599624bdfSMatthew Brost 	const struct drm_gpusvm_devmem_ops *ops;
199699624bdfSMatthew Brost 	struct device *dev = NULL;
199799624bdfSMatthew Brost 	unsigned long npages, mpages = 0;
199899624bdfSMatthew Brost 	struct page **pages;
199999624bdfSMatthew Brost 	dma_addr_t *dma_addr;
200099624bdfSMatthew Brost 	unsigned long start, end;
200199624bdfSMatthew Brost 	void *buf;
200299624bdfSMatthew Brost 	int i, err = 0;
200399624bdfSMatthew Brost 
200499624bdfSMatthew Brost 	if (page) {
200599624bdfSMatthew Brost 		zdd = page->zone_device_data;
200699624bdfSMatthew Brost 		if (time_before64(get_jiffies_64(),
200799624bdfSMatthew Brost 				  zdd->devmem_allocation->timeslice_expiration))
2008*df8c3781SMatthew Brost 			return 0;
2009*df8c3781SMatthew Brost 	}
2010*df8c3781SMatthew Brost 
2011*df8c3781SMatthew Brost 	start = ALIGN_DOWN(fault_addr, size);
2012*df8c3781SMatthew Brost 	end = ALIGN(fault_addr + 1, size);
2013*df8c3781SMatthew Brost 
2014*df8c3781SMatthew Brost 	/* Corner where VMA area struct has been partially unmapped */
201599624bdfSMatthew Brost 	if (start < vas->vm_start)
201699624bdfSMatthew Brost 		start = vas->vm_start;
201799624bdfSMatthew Brost 	if (end > vas->vm_end)
201899624bdfSMatthew Brost 		end = vas->vm_end;
201999624bdfSMatthew Brost 
202099624bdfSMatthew Brost 	migrate.start = start;
202199624bdfSMatthew Brost 	migrate.end = end;
202299624bdfSMatthew Brost 	npages = npages_in_range(start, end);
202399624bdfSMatthew Brost 
202499624bdfSMatthew Brost 	buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) +
202599624bdfSMatthew Brost 		       sizeof(*pages), GFP_KERNEL);
202699624bdfSMatthew Brost 	if (!buf) {
202799624bdfSMatthew Brost 		err = -ENOMEM;
202899624bdfSMatthew Brost 		goto err_out;
202999624bdfSMatthew Brost 	}
203099624bdfSMatthew Brost 	dma_addr = buf + (2 * sizeof(*migrate.src) * npages);
203199624bdfSMatthew Brost 	pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages;
203299624bdfSMatthew Brost 
203399624bdfSMatthew Brost 	migrate.vma = vas;
203499624bdfSMatthew Brost 	migrate.src = buf;
203599624bdfSMatthew Brost 	migrate.dst = migrate.src + npages;
203699624bdfSMatthew Brost 
203799624bdfSMatthew Brost 	err = migrate_vma_setup(&migrate);
203899624bdfSMatthew Brost 	if (err)
203999624bdfSMatthew Brost 		goto err_free;
204099624bdfSMatthew Brost 
204199624bdfSMatthew Brost 	/* Raced with another CPU fault, nothing to do */
204299624bdfSMatthew Brost 	if (!migrate.cpages)
204399624bdfSMatthew Brost 		goto err_free;
204499624bdfSMatthew Brost 
204599624bdfSMatthew Brost 	if (!page) {
204699624bdfSMatthew Brost 		for (i = 0; i < npages; ++i) {
204799624bdfSMatthew Brost 			if (!(migrate.src[i] & MIGRATE_PFN_MIGRATE))
204899624bdfSMatthew Brost 				continue;
204999624bdfSMatthew Brost 
205099624bdfSMatthew Brost 			page = migrate_pfn_to_page(migrate.src[i]);
205199624bdfSMatthew Brost 			break;
205299624bdfSMatthew Brost 		}
205399624bdfSMatthew Brost 
205499624bdfSMatthew Brost 		if (!page)
205599624bdfSMatthew Brost 			goto err_finalize;
205699624bdfSMatthew Brost 	}
205799624bdfSMatthew Brost 	zdd = page->zone_device_data;
205899624bdfSMatthew Brost 	ops = zdd->devmem_allocation->ops;
205999624bdfSMatthew Brost 	dev = zdd->devmem_allocation->dev;
206099624bdfSMatthew Brost 
206199624bdfSMatthew Brost 	err = drm_gpusvm_migrate_populate_ram_pfn(vas, page, npages, &mpages,
206299624bdfSMatthew Brost 						  migrate.src, migrate.dst,
206399624bdfSMatthew Brost 						  start);
206499624bdfSMatthew Brost 	if (err)
206599624bdfSMatthew Brost 		goto err_finalize;
206699624bdfSMatthew Brost 
206799624bdfSMatthew Brost 	err = drm_gpusvm_migrate_map_pages(dev, dma_addr, migrate.dst, npages,
206899624bdfSMatthew Brost 					   DMA_FROM_DEVICE);
206999624bdfSMatthew Brost 	if (err)
207099624bdfSMatthew Brost 		goto err_finalize;
207199624bdfSMatthew Brost 
207299624bdfSMatthew Brost 	for (i = 0; i < npages; ++i)
207399624bdfSMatthew Brost 		pages[i] = migrate_pfn_to_page(migrate.src[i]);
207499624bdfSMatthew Brost 
207599624bdfSMatthew Brost 	err = ops->copy_to_ram(pages, dma_addr, npages);
207699624bdfSMatthew Brost 	if (err)
207799624bdfSMatthew Brost 		goto err_finalize;
207899624bdfSMatthew Brost 
207999624bdfSMatthew Brost err_finalize:
208099624bdfSMatthew Brost 	if (err)
208199624bdfSMatthew Brost 		drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst);
208299624bdfSMatthew Brost 	migrate_vma_pages(&migrate);
208399624bdfSMatthew Brost 	migrate_vma_finalize(&migrate);
208499624bdfSMatthew Brost 	if (dev)
208599624bdfSMatthew Brost 		drm_gpusvm_migrate_unmap_pages(dev, dma_addr, npages,
208699624bdfSMatthew Brost 					       DMA_FROM_DEVICE);
208799624bdfSMatthew Brost err_free:
208899624bdfSMatthew Brost 	kvfree(buf);
208999624bdfSMatthew Brost err_out:
209099624bdfSMatthew Brost 
209199624bdfSMatthew Brost 	return err;
209299624bdfSMatthew Brost }
209399624bdfSMatthew Brost 
209499624bdfSMatthew Brost /**
209599624bdfSMatthew Brost  * drm_gpusvm_range_evict - Evict GPU SVM range
209699624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range to be removed
209799624bdfSMatthew Brost  *
209899624bdfSMatthew Brost  * This function evicts the specified GPU SVM range. This function will not
209999624bdfSMatthew Brost  * evict coherent pages.
210099624bdfSMatthew Brost  *
210199624bdfSMatthew Brost  * Return: 0 on success, a negative error code on failure.
210299624bdfSMatthew Brost  */
drm_gpusvm_range_evict(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)210399624bdfSMatthew Brost int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm,
210499624bdfSMatthew Brost 			   struct drm_gpusvm_range *range)
210599624bdfSMatthew Brost {
210699624bdfSMatthew Brost 	struct mmu_interval_notifier *notifier = &range->notifier->notifier;
210799624bdfSMatthew Brost 	struct hmm_range hmm_range = {
210899624bdfSMatthew Brost 		.default_flags = HMM_PFN_REQ_FAULT,
210999624bdfSMatthew Brost 		.notifier = notifier,
211099624bdfSMatthew Brost 		.start = drm_gpusvm_range_start(range),
211199624bdfSMatthew Brost 		.end = drm_gpusvm_range_end(range),
211299624bdfSMatthew Brost 		.dev_private_owner = NULL,
211399624bdfSMatthew Brost 	};
211499624bdfSMatthew Brost 	unsigned long timeout =
211599624bdfSMatthew Brost 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
211699624bdfSMatthew Brost 	unsigned long *pfns;
211799624bdfSMatthew Brost 	unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
211899624bdfSMatthew Brost 					       drm_gpusvm_range_end(range));
211999624bdfSMatthew Brost 	int err = 0;
212099624bdfSMatthew Brost 	struct mm_struct *mm = gpusvm->mm;
212199624bdfSMatthew Brost 
212299624bdfSMatthew Brost 	if (!mmget_not_zero(mm))
212399624bdfSMatthew Brost 		return -EFAULT;
212499624bdfSMatthew Brost 
212599624bdfSMatthew Brost 	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
212699624bdfSMatthew Brost 	if (!pfns)
212799624bdfSMatthew Brost 		return -ENOMEM;
212899624bdfSMatthew Brost 
212999624bdfSMatthew Brost 	hmm_range.hmm_pfns = pfns;
213099624bdfSMatthew Brost 	while (!time_after(jiffies, timeout)) {
213199624bdfSMatthew Brost 		hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
213299624bdfSMatthew Brost 		if (time_after(jiffies, timeout)) {
213399624bdfSMatthew Brost 			err = -ETIME;
213499624bdfSMatthew Brost 			break;
213599624bdfSMatthew Brost 		}
213699624bdfSMatthew Brost 
213799624bdfSMatthew Brost 		mmap_read_lock(mm);
213899624bdfSMatthew Brost 		err = hmm_range_fault(&hmm_range);
213999624bdfSMatthew Brost 		mmap_read_unlock(mm);
214099624bdfSMatthew Brost 		if (err != -EBUSY)
214199624bdfSMatthew Brost 			break;
214299624bdfSMatthew Brost 	}
214399624bdfSMatthew Brost 
214499624bdfSMatthew Brost 	kvfree(pfns);
214599624bdfSMatthew Brost 	mmput(mm);
214699624bdfSMatthew Brost 
214799624bdfSMatthew Brost 	return err;
214899624bdfSMatthew Brost }
214999624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict);
215099624bdfSMatthew Brost 
215199624bdfSMatthew Brost /**
215299624bdfSMatthew Brost  * drm_gpusvm_page_free() - Put GPU SVM zone device data associated with a page
215399624bdfSMatthew Brost  * @page: Pointer to the page
215499624bdfSMatthew Brost  *
215599624bdfSMatthew Brost  * This function is a callback used to put the GPU SVM zone device data
215699624bdfSMatthew Brost  * associated with a page when it is being released.
215799624bdfSMatthew Brost  */
drm_gpusvm_page_free(struct page * page)215899624bdfSMatthew Brost static void drm_gpusvm_page_free(struct page *page)
215999624bdfSMatthew Brost {
216099624bdfSMatthew Brost 	drm_gpusvm_zdd_put(page->zone_device_data);
216199624bdfSMatthew Brost }
216299624bdfSMatthew Brost 
216399624bdfSMatthew Brost /**
216499624bdfSMatthew Brost  * drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (page fault handler)
216599624bdfSMatthew Brost  * @vmf: Pointer to the fault information structure
216699624bdfSMatthew Brost  *
216799624bdfSMatthew Brost  * This function is a page fault handler used to migrate a GPU SVM range to RAM.
216899624bdfSMatthew Brost  * It retrieves the GPU SVM range information from the faulting page and invokes
216999624bdfSMatthew Brost  * the internal migration function to migrate the range back to RAM.
217099624bdfSMatthew Brost  *
217199624bdfSMatthew Brost  * Return: VM_FAULT_SIGBUS on failure, 0 on success.
217299624bdfSMatthew Brost  */
drm_gpusvm_migrate_to_ram(struct vm_fault * vmf)217399624bdfSMatthew Brost static vm_fault_t drm_gpusvm_migrate_to_ram(struct vm_fault *vmf)
217499624bdfSMatthew Brost {
217599624bdfSMatthew Brost 	struct drm_gpusvm_zdd *zdd = vmf->page->zone_device_data;
217699624bdfSMatthew Brost 	int err;
217799624bdfSMatthew Brost 
217899624bdfSMatthew Brost 	err = __drm_gpusvm_migrate_to_ram(vmf->vma,
217999624bdfSMatthew Brost 					  zdd->device_private_page_owner,
218099624bdfSMatthew Brost 					  vmf->page, vmf->address,
218199624bdfSMatthew Brost 					  zdd->devmem_allocation->size);
218299624bdfSMatthew Brost 
218399624bdfSMatthew Brost 	return err ? VM_FAULT_SIGBUS : 0;
218499624bdfSMatthew Brost }
218599624bdfSMatthew Brost 
218699624bdfSMatthew Brost /*
218799624bdfSMatthew Brost  * drm_gpusvm_pagemap_ops - Device page map operations for GPU SVM
218899624bdfSMatthew Brost  */
218999624bdfSMatthew Brost static const struct dev_pagemap_ops drm_gpusvm_pagemap_ops = {
2190fd6c10e6SLucas De Marchi 	.page_free = drm_gpusvm_page_free,
2191fd6c10e6SLucas De Marchi 	.migrate_to_ram = drm_gpusvm_migrate_to_ram,
219299624bdfSMatthew Brost };
219399624bdfSMatthew Brost 
219499624bdfSMatthew Brost /**
219599624bdfSMatthew Brost  * drm_gpusvm_pagemap_ops_get() - Retrieve GPU SVM device page map operations
219699624bdfSMatthew Brost  *
219799624bdfSMatthew Brost  * Return: Pointer to the GPU SVM device page map operations structure.
219899624bdfSMatthew Brost  */
drm_gpusvm_pagemap_ops_get(void)219999624bdfSMatthew Brost const struct dev_pagemap_ops *drm_gpusvm_pagemap_ops_get(void)
220099624bdfSMatthew Brost {
220199624bdfSMatthew Brost 	return &drm_gpusvm_pagemap_ops;
220299624bdfSMatthew Brost }
220399624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_pagemap_ops_get);
220499624bdfSMatthew Brost 
220599624bdfSMatthew Brost /**
220699624bdfSMatthew Brost  * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range
220799624bdfSMatthew Brost  * @gpusvm: Pointer to the GPU SVM structure.
220899624bdfSMatthew Brost  * @start: Start address
220999624bdfSMatthew Brost  * @end: End address
221099624bdfSMatthew Brost  *
221199624bdfSMatthew Brost  * Return: True if GPU SVM has mapping, False otherwise
221299624bdfSMatthew Brost  */
drm_gpusvm_has_mapping(struct drm_gpusvm * gpusvm,unsigned long start,unsigned long end)221399624bdfSMatthew Brost bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start,
221499624bdfSMatthew Brost 			    unsigned long end)
221599624bdfSMatthew Brost {
221699624bdfSMatthew Brost 	struct drm_gpusvm_notifier *notifier;
221799624bdfSMatthew Brost 
221899624bdfSMatthew Brost 	drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) {
221999624bdfSMatthew Brost 		struct drm_gpusvm_range *range = NULL;
222099624bdfSMatthew Brost 
222199624bdfSMatthew Brost 		drm_gpusvm_for_each_range(range, notifier, start, end)
222299624bdfSMatthew Brost 			return true;
222399624bdfSMatthew Brost 	}
222499624bdfSMatthew Brost 
222599624bdfSMatthew Brost 	return false;
222699624bdfSMatthew Brost }
222799624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping);
222899624bdfSMatthew Brost 
222999624bdfSMatthew Brost /**
223099624bdfSMatthew Brost  * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped
223199624bdfSMatthew Brost  * @range: Pointer to the GPU SVM range structure.
223299624bdfSMatthew Brost  * @mmu_range: Pointer to the MMU notifier range structure.
223399624bdfSMatthew Brost  *
223499624bdfSMatthew Brost  * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag
223599624bdfSMatthew Brost  * if the range partially falls within the provided MMU notifier range.
223699624bdfSMatthew Brost  */
drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range * range,const struct mmu_notifier_range * mmu_range)223799624bdfSMatthew Brost void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range,
223899624bdfSMatthew Brost 				   const struct mmu_notifier_range *mmu_range)
223999624bdfSMatthew Brost {
224099624bdfSMatthew Brost 	lockdep_assert_held_write(&range->gpusvm->notifier_lock);
224199624bdfSMatthew Brost 
224299624bdfSMatthew Brost 	range->flags.unmapped = true;
224399624bdfSMatthew Brost 	if (drm_gpusvm_range_start(range) < mmu_range->start ||
224499624bdfSMatthew Brost 	    drm_gpusvm_range_end(range) > mmu_range->end)
224599624bdfSMatthew Brost 		range->flags.partial_unmap = true;
224699624bdfSMatthew Brost }
224799624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped);
224899624bdfSMatthew Brost 
224999624bdfSMatthew Brost /**
225099624bdfSMatthew Brost  * drm_gpusvm_devmem_init() - Initialize a GPU SVM device memory allocation
225199624bdfSMatthew Brost  *
225299624bdfSMatthew Brost  * @dev: Pointer to the device structure which device memory allocation belongs to
225399624bdfSMatthew Brost  * @mm: Pointer to the mm_struct for the address space
225499624bdfSMatthew Brost  * @ops: Pointer to the operations structure for GPU SVM device memory
225599624bdfSMatthew Brost  * @dpagemap: The struct drm_pagemap we're allocating from.
225699624bdfSMatthew Brost  * @size: Size of device memory allocation
225799624bdfSMatthew Brost  */
drm_gpusvm_devmem_init(struct drm_gpusvm_devmem * devmem_allocation,struct device * dev,struct mm_struct * mm,const struct drm_gpusvm_devmem_ops * ops,struct drm_pagemap * dpagemap,size_t size)225899624bdfSMatthew Brost void drm_gpusvm_devmem_init(struct drm_gpusvm_devmem *devmem_allocation,
225999624bdfSMatthew Brost 			    struct device *dev, struct mm_struct *mm,
226099624bdfSMatthew Brost 			    const struct drm_gpusvm_devmem_ops *ops,
226199624bdfSMatthew Brost 			    struct drm_pagemap *dpagemap, size_t size)
226299624bdfSMatthew Brost {
226399624bdfSMatthew Brost 	init_completion(&devmem_allocation->detached);
226499624bdfSMatthew Brost 	devmem_allocation->dev = dev;
226599624bdfSMatthew Brost 	devmem_allocation->mm = mm;
226699624bdfSMatthew Brost 	devmem_allocation->ops = ops;
226799624bdfSMatthew Brost 	devmem_allocation->dpagemap = dpagemap;
226899624bdfSMatthew Brost 	devmem_allocation->size = size;
226999624bdfSMatthew Brost }
227099624bdfSMatthew Brost EXPORT_SYMBOL_GPL(drm_gpusvm_devmem_init);
227199624bdfSMatthew Brost 
227299624bdfSMatthew Brost MODULE_DESCRIPTION("DRM GPUSVM");
227399624bdfSMatthew Brost MODULE_LICENSE("GPL");
227499624bdfSMatthew Brost