xref: /linux/drivers/gpu/drm/drm_gpusvm.c (revision 220994d61cebfc04f071d69049127657c7e8191b)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /*
3  * Copyright © 2024 Intel Corporation
4  *
5  * Authors:
6  *     Matthew Brost <matthew.brost@intel.com>
7  */
8 
9 #include <linux/dma-mapping.h>
10 #include <linux/export.h>
11 #include <linux/hmm.h>
12 #include <linux/hugetlb_inline.h>
13 #include <linux/memremap.h>
14 #include <linux/mm_types.h>
15 #include <linux/slab.h>
16 
17 #include <drm/drm_device.h>
18 #include <drm/drm_gpusvm.h>
19 #include <drm/drm_pagemap.h>
20 #include <drm/drm_print.h>
21 
22 /**
23  * DOC: Overview
24  *
25  * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM)
26  * is a component of the DRM framework designed to manage shared virtual memory
27  * between the CPU and GPU. It enables efficient data exchange and processing
28  * for GPU-accelerated applications by allowing memory sharing and
29  * synchronization between the CPU's and GPU's virtual address spaces.
30  *
31  * Key GPU SVM Components:
32  *
33  * - Notifiers:
34  *	Used for tracking memory intervals and notifying the GPU of changes,
35  *	notifiers are sized based on a GPU SVM initialization parameter, with a
36  *	recommendation of 512M or larger. They maintain a Red-BlacK tree and a
37  *	list of ranges that fall within the notifier interval.  Notifiers are
38  *	tracked within a GPU SVM Red-BlacK tree and list and are dynamically
39  *	inserted or removed as ranges within the interval are created or
40  *	destroyed.
41  * - Ranges:
42  *	Represent memory ranges mapped in a DRM device and managed by GPU SVM.
43  *	They are sized based on an array of chunk sizes, which is a GPU SVM
44  *	initialization parameter, and the CPU address space.  Upon GPU fault,
45  *	the largest aligned chunk that fits within the faulting CPU address
46  *	space is chosen for the range size. Ranges are expected to be
47  *	dynamically allocated on GPU fault and removed on an MMU notifier UNMAP
48  *	event. As mentioned above, ranges are tracked in a notifier's Red-Black
49  *	tree.
50  *
51  * - Operations:
52  *	Define the interface for driver-specific GPU SVM operations such as
53  *	range allocation, notifier allocation, and invalidations.
54  *
55  * - Device Memory Allocations:
56  *	Embedded structure containing enough information for GPU SVM to migrate
57  *	to / from device memory.
58  *
59  * - Device Memory Operations:
60  *	Define the interface for driver-specific device memory operations
61  *	release memory, populate pfns, and copy to / from device memory.
62  *
63  * This layer provides interfaces for allocating, mapping, migrating, and
64  * releasing memory ranges between the CPU and GPU. It handles all core memory
65  * management interactions (DMA mapping, HMM, and migration) and provides
66  * driver-specific virtual functions (vfuncs). This infrastructure is sufficient
67  * to build the expected driver components for an SVM implementation as detailed
68  * below.
69  *
70  * Expected Driver Components:
71  *
72  * - GPU page fault handler:
73  *	Used to create ranges and notifiers based on the fault address,
74  *	optionally migrate the range to device memory, and create GPU bindings.
75  *
76  * - Garbage collector:
77  *	Used to unmap and destroy GPU bindings for ranges.  Ranges are expected
78  *	to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in
79  *	notifier callback.
80  *
81  * - Notifier callback:
82  *	Used to invalidate and DMA unmap GPU bindings for ranges.
83  */
84 
85 /**
86  * DOC: Locking
87  *
88  * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the
89  * mmap lock as needed.
90  *
91  * GPU SVM introduces a global notifier lock, which safeguards the notifier's
92  * range RB tree and list, as well as the range's DMA mappings and sequence
93  * number. GPU SVM manages all necessary locking and unlocking operations,
94  * except for the recheck range's pages being valid
95  * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings.
96  * This lock corresponds to the ``driver->update`` lock mentioned in
97  * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM
98  * global lock to a per-notifier lock if finer-grained locking is deemed
99  * necessary.
100  *
101  * In addition to the locking mentioned above, the driver should implement a
102  * lock to safeguard core GPU SVM function calls that modify state, such as
103  * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is
104  * denoted as 'driver_svm_lock' in code examples. Finer grained driver side
105  * locking should also be possible for concurrent GPU fault processing within a
106  * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock
107  * to add annotations to GPU SVM.
108  */
109 
110 /**
111  * DOC: Partial Unmapping of Ranges
112  *
113  * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting
114  * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one
115  * being that a subset of the range still has CPU and GPU mappings. If the
116  * backing store for the range is in device memory, a subset of the backing
117  * store has references. One option would be to split the range and device
118  * memory backing store, but the implementation for this would be quite
119  * complicated. Given that partial unmappings are rare and driver-defined range
120  * sizes are relatively small, GPU SVM does not support splitting of ranges.
121  *
122  * With no support for range splitting, upon partial unmapping of a range, the
123  * driver is expected to invalidate and destroy the entire range. If the range
124  * has device memory as its backing, the driver is also expected to migrate any
125  * remaining pages back to RAM.
126  */
127 
128 /**
129  * DOC: Examples
130  *
131  * This section provides three examples of how to build the expected driver
132  * components: the GPU page fault handler, the garbage collector, and the
133  * notifier callback.
134  *
135  * The generic code provided does not include logic for complex migration
136  * policies, optimized invalidations, fined grained driver locking, or other
137  * potentially required driver locking (e.g., DMA-resv locks).
138  *
139  * 1) GPU page fault handler
140  *
141  * .. code-block:: c
142  *
143  *	int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range)
144  *	{
145  *		int err = 0;
146  *
147  *		driver_alloc_and_setup_memory_for_bind(gpusvm, range);
148  *
149  *		drm_gpusvm_notifier_lock(gpusvm);
150  *		if (drm_gpusvm_range_pages_valid(range))
151  *			driver_commit_bind(gpusvm, range);
152  *		else
153  *			err = -EAGAIN;
154  *		drm_gpusvm_notifier_unlock(gpusvm);
155  *
156  *		return err;
157  *	}
158  *
159  *	int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr,
160  *			     unsigned long gpuva_start, unsigned long gpuva_end)
161  *	{
162  *		struct drm_gpusvm_ctx ctx = {};
163  *		int err;
164  *
165  *		driver_svm_lock();
166  *	retry:
167  *		// Always process UNMAPs first so view of GPU SVM ranges is current
168  *		driver_garbage_collector(gpusvm);
169  *
170  *		range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr,
171  *							gpuva_start, gpuva_end,
172  *						        &ctx);
173  *		if (IS_ERR(range)) {
174  *			err = PTR_ERR(range);
175  *			goto unlock;
176  *		}
177  *
178  *		if (driver_migration_policy(range)) {
179  *			err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(),
180  *						      gpuva_start, gpuva_end, gpusvm->mm,
181  *						      ctx->timeslice_ms);
182  *			if (err)	// CPU mappings may have changed
183  *				goto retry;
184  *		}
185  *
186  *		err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx);
187  *		if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {	// CPU mappings changed
188  *			if (err == -EOPNOTSUPP)
189  *				drm_gpusvm_range_evict(gpusvm, range);
190  *			goto retry;
191  *		} else if (err) {
192  *			goto unlock;
193  *		}
194  *
195  *		err = driver_bind_range(gpusvm, range);
196  *		if (err == -EAGAIN)	// CPU mappings changed
197  *			goto retry
198  *
199  *	unlock:
200  *		driver_svm_unlock();
201  *		return err;
202  *	}
203  *
204  * 2) Garbage Collector
205  *
206  * .. code-block:: c
207  *
208  *	void __driver_garbage_collector(struct drm_gpusvm *gpusvm,
209  *					struct drm_gpusvm_range *range)
210  *	{
211  *		assert_driver_svm_locked(gpusvm);
212  *
213  *		// Partial unmap, migrate any remaining device memory pages back to RAM
214  *		if (range->flags.partial_unmap)
215  *			drm_gpusvm_range_evict(gpusvm, range);
216  *
217  *		driver_unbind_range(range);
218  *		drm_gpusvm_range_remove(gpusvm, range);
219  *	}
220  *
221  *	void driver_garbage_collector(struct drm_gpusvm *gpusvm)
222  *	{
223  *		assert_driver_svm_locked(gpusvm);
224  *
225  *		for_each_range_in_garbage_collector(gpusvm, range)
226  *			__driver_garbage_collector(gpusvm, range);
227  *	}
228  *
229  * 3) Notifier callback
230  *
231  * .. code-block:: c
232  *
233  *	void driver_invalidation(struct drm_gpusvm *gpusvm,
234  *				 struct drm_gpusvm_notifier *notifier,
235  *				 const struct mmu_notifier_range *mmu_range)
236  *	{
237  *		struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
238  *		struct drm_gpusvm_range *range = NULL;
239  *
240  *		driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end);
241  *
242  *		drm_gpusvm_for_each_range(range, notifier, mmu_range->start,
243  *					  mmu_range->end) {
244  *			drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx);
245  *
246  *			if (mmu_range->event != MMU_NOTIFY_UNMAP)
247  *				continue;
248  *
249  *			drm_gpusvm_range_set_unmapped(range, mmu_range);
250  *			driver_garbage_collector_add(gpusvm, range);
251  *		}
252  *	}
253  */
254 
255 /**
256  * npages_in_range() - Calculate the number of pages in a given range
257  * @start: The start address of the range
258  * @end: The end address of the range
259  *
260  * This macro calculates the number of pages in a given memory range,
261  * specified by the start and end addresses. It divides the difference
262  * between the end and start addresses by the page size (PAGE_SIZE) to
263  * determine the number of pages in the range.
264  *
265  * Return: The number of pages in the specified range.
266  */
267 static unsigned long
npages_in_range(unsigned long start,unsigned long end)268 npages_in_range(unsigned long start, unsigned long end)
269 {
270 	return (end - start) >> PAGE_SHIFT;
271 }
272 
273 /**
274  * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier
275  * @notifier: Pointer to the GPU SVM notifier structure.
276  * @start: Start address of the range
277  * @end: End address of the range
278  *
279  * Return: A pointer to the drm_gpusvm_range if found or NULL
280  */
281 struct drm_gpusvm_range *
drm_gpusvm_range_find(struct drm_gpusvm_notifier * notifier,unsigned long start,unsigned long end)282 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start,
283 		      unsigned long end)
284 {
285 	struct interval_tree_node *itree;
286 
287 	itree = interval_tree_iter_first(&notifier->root, start, end - 1);
288 
289 	if (itree)
290 		return container_of(itree, struct drm_gpusvm_range, itree);
291 	else
292 		return NULL;
293 }
294 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find);
295 
296 /**
297  * drm_gpusvm_for_each_range_safe() - Safely iterate over GPU SVM ranges in a notifier
298  * @range__: Iterator variable for the ranges
299  * @next__: Iterator variable for the ranges temporay storage
300  * @notifier__: Pointer to the GPU SVM notifier
301  * @start__: Start address of the range
302  * @end__: End address of the range
303  *
304  * This macro is used to iterate over GPU SVM ranges in a notifier while
305  * removing ranges from it.
306  */
307 #define drm_gpusvm_for_each_range_safe(range__, next__, notifier__, start__, end__)	\
308 	for ((range__) = drm_gpusvm_range_find((notifier__), (start__), (end__)),	\
309 	     (next__) = __drm_gpusvm_range_next(range__);				\
310 	     (range__) && (drm_gpusvm_range_start(range__) < (end__));			\
311 	     (range__) = (next__), (next__) = __drm_gpusvm_range_next(range__))
312 
313 /**
314  * __drm_gpusvm_notifier_next() - get the next drm_gpusvm_notifier in the list
315  * @notifier: a pointer to the current drm_gpusvm_notifier
316  *
317  * Return: A pointer to the next drm_gpusvm_notifier if available, or NULL if
318  *         the current notifier is the last one or if the input notifier is
319  *         NULL.
320  */
321 static struct drm_gpusvm_notifier *
__drm_gpusvm_notifier_next(struct drm_gpusvm_notifier * notifier)322 __drm_gpusvm_notifier_next(struct drm_gpusvm_notifier *notifier)
323 {
324 	if (notifier && !list_is_last(&notifier->entry,
325 				      &notifier->gpusvm->notifier_list))
326 		return list_next_entry(notifier, entry);
327 
328 	return NULL;
329 }
330 
331 static struct drm_gpusvm_notifier *
notifier_iter_first(struct rb_root_cached * root,unsigned long start,unsigned long last)332 notifier_iter_first(struct rb_root_cached *root, unsigned long start,
333 		    unsigned long last)
334 {
335 	struct interval_tree_node *itree;
336 
337 	itree = interval_tree_iter_first(root, start, last);
338 
339 	if (itree)
340 		return container_of(itree, struct drm_gpusvm_notifier, itree);
341 	else
342 		return NULL;
343 }
344 
345 /**
346  * drm_gpusvm_for_each_notifier() - Iterate over GPU SVM notifiers in a gpusvm
347  * @notifier__: Iterator variable for the notifiers
348  * @notifier__: Pointer to the GPU SVM notifier
349  * @start__: Start address of the notifier
350  * @end__: End address of the notifier
351  *
352  * This macro is used to iterate over GPU SVM notifiers in a gpusvm.
353  */
354 #define drm_gpusvm_for_each_notifier(notifier__, gpusvm__, start__, end__)		\
355 	for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1);	\
356 	     (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__));		\
357 	     (notifier__) = __drm_gpusvm_notifier_next(notifier__))
358 
359 /**
360  * drm_gpusvm_for_each_notifier_safe() - Safely iterate over GPU SVM notifiers in a gpusvm
361  * @notifier__: Iterator variable for the notifiers
362  * @next__: Iterator variable for the notifiers temporay storage
363  * @notifier__: Pointer to the GPU SVM notifier
364  * @start__: Start address of the notifier
365  * @end__: End address of the notifier
366  *
367  * This macro is used to iterate over GPU SVM notifiers in a gpusvm while
368  * removing notifiers from it.
369  */
370 #define drm_gpusvm_for_each_notifier_safe(notifier__, next__, gpusvm__, start__, end__)	\
371 	for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1),	\
372 	     (next__) = __drm_gpusvm_notifier_next(notifier__);				\
373 	     (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__));		\
374 	     (notifier__) = (next__), (next__) = __drm_gpusvm_notifier_next(notifier__))
375 
376 /**
377  * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier.
378  * @mni: Pointer to the mmu_interval_notifier structure.
379  * @mmu_range: Pointer to the mmu_notifier_range structure.
380  * @cur_seq: Current sequence number.
381  *
382  * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU
383  * notifier sequence number and calls the driver invalidate vfunc under
384  * gpusvm->notifier_lock.
385  *
386  * Return: true if the operation succeeds, false otherwise.
387  */
388 static bool
drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * mmu_range,unsigned long cur_seq)389 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni,
390 			       const struct mmu_notifier_range *mmu_range,
391 			       unsigned long cur_seq)
392 {
393 	struct drm_gpusvm_notifier *notifier =
394 		container_of(mni, typeof(*notifier), notifier);
395 	struct drm_gpusvm *gpusvm = notifier->gpusvm;
396 
397 	if (!mmu_notifier_range_blockable(mmu_range))
398 		return false;
399 
400 	down_write(&gpusvm->notifier_lock);
401 	mmu_interval_set_seq(mni, cur_seq);
402 	gpusvm->ops->invalidate(gpusvm, notifier, mmu_range);
403 	up_write(&gpusvm->notifier_lock);
404 
405 	return true;
406 }
407 
408 /*
409  * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM
410  */
411 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
412 	.invalidate = drm_gpusvm_notifier_invalidate,
413 };
414 
415 /**
416  * drm_gpusvm_init() - Initialize the GPU SVM.
417  * @gpusvm: Pointer to the GPU SVM structure.
418  * @name: Name of the GPU SVM.
419  * @drm: Pointer to the DRM device structure.
420  * @mm: Pointer to the mm_struct for the address space.
421  * @device_private_page_owner: Device private pages owner.
422  * @mm_start: Start address of GPU SVM.
423  * @mm_range: Range of the GPU SVM.
424  * @notifier_size: Size of individual notifiers.
425  * @ops: Pointer to the operations structure for GPU SVM.
426  * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation.
427  *               Entries should be powers of 2 in descending order with last
428  *               entry being SZ_4K.
429  * @num_chunks: Number of chunks.
430  *
431  * This function initializes the GPU SVM.
432  *
433  * Return: 0 on success, a negative error code on failure.
434  */
drm_gpusvm_init(struct drm_gpusvm * gpusvm,const char * name,struct drm_device * drm,struct mm_struct * mm,void * device_private_page_owner,unsigned long mm_start,unsigned long mm_range,unsigned long notifier_size,const struct drm_gpusvm_ops * ops,const unsigned long * chunk_sizes,int num_chunks)435 int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
436 		    const char *name, struct drm_device *drm,
437 		    struct mm_struct *mm, void *device_private_page_owner,
438 		    unsigned long mm_start, unsigned long mm_range,
439 		    unsigned long notifier_size,
440 		    const struct drm_gpusvm_ops *ops,
441 		    const unsigned long *chunk_sizes, int num_chunks)
442 {
443 	if (!ops->invalidate || !num_chunks)
444 		return -EINVAL;
445 
446 	gpusvm->name = name;
447 	gpusvm->drm = drm;
448 	gpusvm->mm = mm;
449 	gpusvm->device_private_page_owner = device_private_page_owner;
450 	gpusvm->mm_start = mm_start;
451 	gpusvm->mm_range = mm_range;
452 	gpusvm->notifier_size = notifier_size;
453 	gpusvm->ops = ops;
454 	gpusvm->chunk_sizes = chunk_sizes;
455 	gpusvm->num_chunks = num_chunks;
456 
457 	mmgrab(mm);
458 	gpusvm->root = RB_ROOT_CACHED;
459 	INIT_LIST_HEAD(&gpusvm->notifier_list);
460 
461 	init_rwsem(&gpusvm->notifier_lock);
462 
463 	fs_reclaim_acquire(GFP_KERNEL);
464 	might_lock(&gpusvm->notifier_lock);
465 	fs_reclaim_release(GFP_KERNEL);
466 
467 #ifdef CONFIG_LOCKDEP
468 	gpusvm->lock_dep_map = NULL;
469 #endif
470 
471 	return 0;
472 }
473 EXPORT_SYMBOL_GPL(drm_gpusvm_init);
474 
475 /**
476  * drm_gpusvm_notifier_find() - Find GPU SVM notifier
477  * @gpusvm: Pointer to the GPU SVM structure
478  * @fault_addr: Fault address
479  *
480  * This function finds the GPU SVM notifier associated with the fault address.
481  *
482  * Return: Pointer to the GPU SVM notifier on success, NULL otherwise.
483  */
484 static struct drm_gpusvm_notifier *
drm_gpusvm_notifier_find(struct drm_gpusvm * gpusvm,unsigned long fault_addr)485 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm,
486 			 unsigned long fault_addr)
487 {
488 	return notifier_iter_first(&gpusvm->root, fault_addr, fault_addr + 1);
489 }
490 
491 /**
492  * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node
493  * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct
494  *
495  * Return: A pointer to the containing drm_gpusvm_notifier structure.
496  */
to_drm_gpusvm_notifier(struct rb_node * node)497 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node)
498 {
499 	return container_of(node, struct drm_gpusvm_notifier, itree.rb);
500 }
501 
502 /**
503  * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier
504  * @gpusvm: Pointer to the GPU SVM structure
505  * @notifier: Pointer to the GPU SVM notifier structure
506  *
507  * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list.
508  */
drm_gpusvm_notifier_insert(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)509 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm,
510 				       struct drm_gpusvm_notifier *notifier)
511 {
512 	struct rb_node *node;
513 	struct list_head *head;
514 
515 	interval_tree_insert(&notifier->itree, &gpusvm->root);
516 
517 	node = rb_prev(&notifier->itree.rb);
518 	if (node)
519 		head = &(to_drm_gpusvm_notifier(node))->entry;
520 	else
521 		head = &gpusvm->notifier_list;
522 
523 	list_add(&notifier->entry, head);
524 }
525 
526 /**
527  * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier
528  * @gpusvm: Pointer to the GPU SVM tructure
529  * @notifier: Pointer to the GPU SVM notifier structure
530  *
531  * This function removes the GPU SVM notifier from the GPU SVM RB tree and list.
532  */
drm_gpusvm_notifier_remove(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)533 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm,
534 				       struct drm_gpusvm_notifier *notifier)
535 {
536 	interval_tree_remove(&notifier->itree, &gpusvm->root);
537 	list_del(&notifier->entry);
538 }
539 
540 /**
541  * drm_gpusvm_fini() - Finalize the GPU SVM.
542  * @gpusvm: Pointer to the GPU SVM structure.
543  *
544  * This function finalizes the GPU SVM by cleaning up any remaining ranges and
545  * notifiers, and dropping a reference to struct MM.
546  */
drm_gpusvm_fini(struct drm_gpusvm * gpusvm)547 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm)
548 {
549 	struct drm_gpusvm_notifier *notifier, *next;
550 
551 	drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) {
552 		struct drm_gpusvm_range *range, *__next;
553 
554 		/*
555 		 * Remove notifier first to avoid racing with any invalidation
556 		 */
557 		mmu_interval_notifier_remove(&notifier->notifier);
558 		notifier->flags.removed = true;
559 
560 		drm_gpusvm_for_each_range_safe(range, __next, notifier, 0,
561 					       LONG_MAX)
562 			drm_gpusvm_range_remove(gpusvm, range);
563 	}
564 
565 	mmdrop(gpusvm->mm);
566 	WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root));
567 }
568 EXPORT_SYMBOL_GPL(drm_gpusvm_fini);
569 
570 /**
571  * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier
572  * @gpusvm: Pointer to the GPU SVM structure
573  * @fault_addr: Fault address
574  *
575  * This function allocates and initializes the GPU SVM notifier structure.
576  *
577  * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure.
578  */
579 static struct drm_gpusvm_notifier *
drm_gpusvm_notifier_alloc(struct drm_gpusvm * gpusvm,unsigned long fault_addr)580 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr)
581 {
582 	struct drm_gpusvm_notifier *notifier;
583 
584 	if (gpusvm->ops->notifier_alloc)
585 		notifier = gpusvm->ops->notifier_alloc();
586 	else
587 		notifier = kzalloc(sizeof(*notifier), GFP_KERNEL);
588 
589 	if (!notifier)
590 		return ERR_PTR(-ENOMEM);
591 
592 	notifier->gpusvm = gpusvm;
593 	notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size);
594 	notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1;
595 	INIT_LIST_HEAD(&notifier->entry);
596 	notifier->root = RB_ROOT_CACHED;
597 	INIT_LIST_HEAD(&notifier->range_list);
598 
599 	return notifier;
600 }
601 
602 /**
603  * drm_gpusvm_notifier_free() - Free GPU SVM notifier
604  * @gpusvm: Pointer to the GPU SVM structure
605  * @notifier: Pointer to the GPU SVM notifier structure
606  *
607  * This function frees the GPU SVM notifier structure.
608  */
drm_gpusvm_notifier_free(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)609 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm,
610 				     struct drm_gpusvm_notifier *notifier)
611 {
612 	WARN_ON(!RB_EMPTY_ROOT(&notifier->root.rb_root));
613 
614 	if (gpusvm->ops->notifier_free)
615 		gpusvm->ops->notifier_free(notifier);
616 	else
617 		kfree(notifier);
618 }
619 
620 /**
621  * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node
622  * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct
623  *
624  * Return: A pointer to the containing drm_gpusvm_range structure.
625  */
to_drm_gpusvm_range(struct rb_node * node)626 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node)
627 {
628 	return container_of(node, struct drm_gpusvm_range, itree.rb);
629 }
630 
631 /**
632  * drm_gpusvm_range_insert() - Insert GPU SVM range
633  * @notifier: Pointer to the GPU SVM notifier structure
634  * @range: Pointer to the GPU SVM range structure
635  *
636  * This function inserts the GPU SVM range into the notifier RB tree and list.
637  */
drm_gpusvm_range_insert(struct drm_gpusvm_notifier * notifier,struct drm_gpusvm_range * range)638 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier,
639 				    struct drm_gpusvm_range *range)
640 {
641 	struct rb_node *node;
642 	struct list_head *head;
643 
644 	drm_gpusvm_notifier_lock(notifier->gpusvm);
645 	interval_tree_insert(&range->itree, &notifier->root);
646 
647 	node = rb_prev(&range->itree.rb);
648 	if (node)
649 		head = &(to_drm_gpusvm_range(node))->entry;
650 	else
651 		head = &notifier->range_list;
652 
653 	list_add(&range->entry, head);
654 	drm_gpusvm_notifier_unlock(notifier->gpusvm);
655 }
656 
657 /**
658  * __drm_gpusvm_range_remove() - Remove GPU SVM range
659  * @notifier: Pointer to the GPU SVM notifier structure
660  * @range: Pointer to the GPU SVM range structure
661  *
662  * This macro removes the GPU SVM range from the notifier RB tree and list.
663  */
__drm_gpusvm_range_remove(struct drm_gpusvm_notifier * notifier,struct drm_gpusvm_range * range)664 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier,
665 				      struct drm_gpusvm_range *range)
666 {
667 	interval_tree_remove(&range->itree, &notifier->root);
668 	list_del(&range->entry);
669 }
670 
671 /**
672  * drm_gpusvm_range_alloc() - Allocate GPU SVM range
673  * @gpusvm: Pointer to the GPU SVM structure
674  * @notifier: Pointer to the GPU SVM notifier structure
675  * @fault_addr: Fault address
676  * @chunk_size: Chunk size
677  * @migrate_devmem: Flag indicating whether to migrate device memory
678  *
679  * This function allocates and initializes the GPU SVM range structure.
680  *
681  * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure.
682  */
683 static struct drm_gpusvm_range *
drm_gpusvm_range_alloc(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,unsigned long fault_addr,unsigned long chunk_size,bool migrate_devmem)684 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm,
685 		       struct drm_gpusvm_notifier *notifier,
686 		       unsigned long fault_addr, unsigned long chunk_size,
687 		       bool migrate_devmem)
688 {
689 	struct drm_gpusvm_range *range;
690 
691 	if (gpusvm->ops->range_alloc)
692 		range = gpusvm->ops->range_alloc(gpusvm);
693 	else
694 		range = kzalloc(sizeof(*range), GFP_KERNEL);
695 
696 	if (!range)
697 		return ERR_PTR(-ENOMEM);
698 
699 	kref_init(&range->refcount);
700 	range->gpusvm = gpusvm;
701 	range->notifier = notifier;
702 	range->itree.start = ALIGN_DOWN(fault_addr, chunk_size);
703 	range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1;
704 	INIT_LIST_HEAD(&range->entry);
705 	range->notifier_seq = LONG_MAX;
706 	range->flags.migrate_devmem = migrate_devmem ? 1 : 0;
707 
708 	return range;
709 }
710 
711 /**
712  * drm_gpusvm_check_pages() - Check pages
713  * @gpusvm: Pointer to the GPU SVM structure
714  * @notifier: Pointer to the GPU SVM notifier structure
715  * @start: Start address
716  * @end: End address
717  *
718  * Check if pages between start and end have been faulted in on the CPU. Use to
719  * prevent migration of pages without CPU backing store.
720  *
721  * Return: True if pages have been faulted into CPU, False otherwise
722  */
drm_gpusvm_check_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,unsigned long start,unsigned long end)723 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm,
724 				   struct drm_gpusvm_notifier *notifier,
725 				   unsigned long start, unsigned long end)
726 {
727 	struct hmm_range hmm_range = {
728 		.default_flags = 0,
729 		.notifier = &notifier->notifier,
730 		.start = start,
731 		.end = end,
732 		.dev_private_owner = gpusvm->device_private_page_owner,
733 	};
734 	unsigned long timeout =
735 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
736 	unsigned long *pfns;
737 	unsigned long npages = npages_in_range(start, end);
738 	int err, i;
739 
740 	mmap_assert_locked(gpusvm->mm);
741 
742 	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
743 	if (!pfns)
744 		return false;
745 
746 	hmm_range.notifier_seq = mmu_interval_read_begin(&notifier->notifier);
747 	hmm_range.hmm_pfns = pfns;
748 
749 	while (true) {
750 		err = hmm_range_fault(&hmm_range);
751 		if (err == -EBUSY) {
752 			if (time_after(jiffies, timeout))
753 				break;
754 
755 			hmm_range.notifier_seq =
756 				mmu_interval_read_begin(&notifier->notifier);
757 			continue;
758 		}
759 		break;
760 	}
761 	if (err)
762 		goto err_free;
763 
764 	for (i = 0; i < npages;) {
765 		if (!(pfns[i] & HMM_PFN_VALID)) {
766 			err = -EFAULT;
767 			goto err_free;
768 		}
769 		i += 0x1 << hmm_pfn_to_map_order(pfns[i]);
770 	}
771 
772 err_free:
773 	kvfree(pfns);
774 	return err ? false : true;
775 }
776 
777 /**
778  * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range
779  * @gpusvm: Pointer to the GPU SVM structure
780  * @notifier: Pointer to the GPU SVM notifier structure
781  * @vas: Pointer to the virtual memory area structure
782  * @fault_addr: Fault address
783  * @gpuva_start: Start address of GPUVA which mirrors CPU
784  * @gpuva_end: End address of GPUVA which mirrors CPU
785  * @check_pages_threshold: Check CPU pages for present threshold
786  *
787  * This function determines the chunk size for the GPU SVM range based on the
788  * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual
789  * memory area boundaries.
790  *
791  * Return: Chunk size on success, LONG_MAX on failure.
792  */
793 static unsigned long
drm_gpusvm_range_chunk_size(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,struct vm_area_struct * vas,unsigned long fault_addr,unsigned long gpuva_start,unsigned long gpuva_end,unsigned long check_pages_threshold)794 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm,
795 			    struct drm_gpusvm_notifier *notifier,
796 			    struct vm_area_struct *vas,
797 			    unsigned long fault_addr,
798 			    unsigned long gpuva_start,
799 			    unsigned long gpuva_end,
800 			    unsigned long check_pages_threshold)
801 {
802 	unsigned long start, end;
803 	int i = 0;
804 
805 retry:
806 	for (; i < gpusvm->num_chunks; ++i) {
807 		start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]);
808 		end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]);
809 
810 		if (start >= vas->vm_start && end <= vas->vm_end &&
811 		    start >= drm_gpusvm_notifier_start(notifier) &&
812 		    end <= drm_gpusvm_notifier_end(notifier) &&
813 		    start >= gpuva_start && end <= gpuva_end)
814 			break;
815 	}
816 
817 	if (i == gpusvm->num_chunks)
818 		return LONG_MAX;
819 
820 	/*
821 	 * If allocation more than page, ensure not to overlap with existing
822 	 * ranges.
823 	 */
824 	if (end - start != SZ_4K) {
825 		struct drm_gpusvm_range *range;
826 
827 		range = drm_gpusvm_range_find(notifier, start, end);
828 		if (range) {
829 			++i;
830 			goto retry;
831 		}
832 
833 		/*
834 		 * XXX: Only create range on pages CPU has faulted in. Without
835 		 * this check, or prefault, on BMG 'xe_exec_system_allocator --r
836 		 * process-many-malloc' fails. In the failure case, each process
837 		 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM
838 		 * ranges. When migrating the SVM ranges, some processes fail in
839 		 * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages'
840 		 * and then upon drm_gpusvm_range_get_pages device pages from
841 		 * other processes are collected + faulted in which creates all
842 		 * sorts of problems. Unsure exactly how this happening, also
843 		 * problem goes away if 'xe_exec_system_allocator --r
844 		 * process-many-malloc' mallocs at least 64k at a time.
845 		 */
846 		if (end - start <= check_pages_threshold &&
847 		    !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) {
848 			++i;
849 			goto retry;
850 		}
851 	}
852 
853 	return end - start;
854 }
855 
856 #ifdef CONFIG_LOCKDEP
857 /**
858  * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held
859  * @gpusvm: Pointer to the GPU SVM structure.
860  *
861  * Ensure driver lock is held.
862  */
drm_gpusvm_driver_lock_held(struct drm_gpusvm * gpusvm)863 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm)
864 {
865 	if ((gpusvm)->lock_dep_map)
866 		lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0));
867 }
868 #else
drm_gpusvm_driver_lock_held(struct drm_gpusvm * gpusvm)869 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm)
870 {
871 }
872 #endif
873 
874 /**
875  * drm_gpusvm_find_vma_start() - Find start address for first VMA in range
876  * @gpusvm: Pointer to the GPU SVM structure
877  * @start: The inclusive start user address.
878  * @end: The exclusive end user address.
879  *
880  * Returns: The start address of first VMA within the provided range,
881  * ULONG_MAX otherwise. Assumes start_addr < end_addr.
882  */
883 unsigned long
drm_gpusvm_find_vma_start(struct drm_gpusvm * gpusvm,unsigned long start,unsigned long end)884 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm,
885 			  unsigned long start,
886 			  unsigned long end)
887 {
888 	struct mm_struct *mm = gpusvm->mm;
889 	struct vm_area_struct *vma;
890 	unsigned long addr = ULONG_MAX;
891 
892 	if (!mmget_not_zero(mm))
893 		return addr;
894 
895 	mmap_read_lock(mm);
896 
897 	vma = find_vma_intersection(mm, start, end);
898 	if (vma)
899 		addr =  vma->vm_start;
900 
901 	mmap_read_unlock(mm);
902 	mmput(mm);
903 
904 	return addr;
905 }
906 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start);
907 
908 /**
909  * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range
910  * @gpusvm: Pointer to the GPU SVM structure
911  * @fault_addr: Fault address
912  * @gpuva_start: Start address of GPUVA which mirrors CPU
913  * @gpuva_end: End address of GPUVA which mirrors CPU
914  * @ctx: GPU SVM context
915  *
916  * This function finds or inserts a newly allocated a GPU SVM range based on the
917  * fault address. Caller must hold a lock to protect range lookup and insertion.
918  *
919  * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure.
920  */
921 struct drm_gpusvm_range *
drm_gpusvm_range_find_or_insert(struct drm_gpusvm * gpusvm,unsigned long fault_addr,unsigned long gpuva_start,unsigned long gpuva_end,const struct drm_gpusvm_ctx * ctx)922 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm,
923 				unsigned long fault_addr,
924 				unsigned long gpuva_start,
925 				unsigned long gpuva_end,
926 				const struct drm_gpusvm_ctx *ctx)
927 {
928 	struct drm_gpusvm_notifier *notifier;
929 	struct drm_gpusvm_range *range;
930 	struct mm_struct *mm = gpusvm->mm;
931 	struct vm_area_struct *vas;
932 	bool notifier_alloc = false;
933 	unsigned long chunk_size;
934 	int err;
935 	bool migrate_devmem;
936 
937 	drm_gpusvm_driver_lock_held(gpusvm);
938 
939 	if (fault_addr < gpusvm->mm_start ||
940 	    fault_addr > gpusvm->mm_start + gpusvm->mm_range)
941 		return ERR_PTR(-EINVAL);
942 
943 	if (!mmget_not_zero(mm))
944 		return ERR_PTR(-EFAULT);
945 
946 	notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr);
947 	if (!notifier) {
948 		notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr);
949 		if (IS_ERR(notifier)) {
950 			err = PTR_ERR(notifier);
951 			goto err_mmunlock;
952 		}
953 		notifier_alloc = true;
954 		err = mmu_interval_notifier_insert(&notifier->notifier,
955 						   mm,
956 						   drm_gpusvm_notifier_start(notifier),
957 						   drm_gpusvm_notifier_size(notifier),
958 						   &drm_gpusvm_notifier_ops);
959 		if (err)
960 			goto err_notifier;
961 	}
962 
963 	mmap_read_lock(mm);
964 
965 	vas = vma_lookup(mm, fault_addr);
966 	if (!vas) {
967 		err = -ENOENT;
968 		goto err_notifier_remove;
969 	}
970 
971 	if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) {
972 		err = -EPERM;
973 		goto err_notifier_remove;
974 	}
975 
976 	range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1);
977 	if (range)
978 		goto out_mmunlock;
979 	/*
980 	 * XXX: Short-circuiting migration based on migrate_vma_* current
981 	 * limitations. If/when migrate_vma_* add more support, this logic will
982 	 * have to change.
983 	 */
984 	migrate_devmem = ctx->devmem_possible &&
985 		vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas);
986 
987 	chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas,
988 						 fault_addr, gpuva_start,
989 						 gpuva_end,
990 						 ctx->check_pages_threshold);
991 	if (chunk_size == LONG_MAX) {
992 		err = -EINVAL;
993 		goto err_notifier_remove;
994 	}
995 
996 	range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size,
997 				       migrate_devmem);
998 	if (IS_ERR(range)) {
999 		err = PTR_ERR(range);
1000 		goto err_notifier_remove;
1001 	}
1002 
1003 	drm_gpusvm_range_insert(notifier, range);
1004 	if (notifier_alloc)
1005 		drm_gpusvm_notifier_insert(gpusvm, notifier);
1006 
1007 out_mmunlock:
1008 	mmap_read_unlock(mm);
1009 	mmput(mm);
1010 
1011 	return range;
1012 
1013 err_notifier_remove:
1014 	mmap_read_unlock(mm);
1015 	if (notifier_alloc)
1016 		mmu_interval_notifier_remove(&notifier->notifier);
1017 err_notifier:
1018 	if (notifier_alloc)
1019 		drm_gpusvm_notifier_free(gpusvm, notifier);
1020 err_mmunlock:
1021 	mmput(mm);
1022 	return ERR_PTR(err);
1023 }
1024 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert);
1025 
1026 /**
1027  * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal)
1028  * @gpusvm: Pointer to the GPU SVM structure
1029  * @range: Pointer to the GPU SVM range structure
1030  * @npages: Number of pages to unmap
1031  *
1032  * This function unmap pages associated with a GPU SVM range. Assumes and
1033  * asserts correct locking is in place when called.
1034  */
__drm_gpusvm_range_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,unsigned long npages)1035 static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
1036 					   struct drm_gpusvm_range *range,
1037 					   unsigned long npages)
1038 {
1039 	unsigned long i, j;
1040 	struct drm_pagemap *dpagemap = range->dpagemap;
1041 	struct device *dev = gpusvm->drm->dev;
1042 
1043 	lockdep_assert_held(&gpusvm->notifier_lock);
1044 
1045 	if (range->flags.has_dma_mapping) {
1046 		struct drm_gpusvm_range_flags flags = {
1047 			.__flags = range->flags.__flags,
1048 		};
1049 
1050 		for (i = 0, j = 0; i < npages; j++) {
1051 			struct drm_pagemap_device_addr *addr = &range->dma_addr[j];
1052 
1053 			if (addr->proto == DRM_INTERCONNECT_SYSTEM)
1054 				dma_unmap_page(dev,
1055 					       addr->addr,
1056 					       PAGE_SIZE << addr->order,
1057 					       addr->dir);
1058 			else if (dpagemap && dpagemap->ops->device_unmap)
1059 				dpagemap->ops->device_unmap(dpagemap,
1060 							    dev, *addr);
1061 			i += 1 << addr->order;
1062 		}
1063 
1064 		/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
1065 		flags.has_devmem_pages = false;
1066 		flags.has_dma_mapping = false;
1067 		WRITE_ONCE(range->flags.__flags, flags.__flags);
1068 
1069 		range->dpagemap = NULL;
1070 	}
1071 }
1072 
1073 /**
1074  * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range
1075  * @gpusvm: Pointer to the GPU SVM structure
1076  * @range: Pointer to the GPU SVM range structure
1077  *
1078  * This function frees the dma address array associated with a GPU SVM range.
1079  */
drm_gpusvm_range_free_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1080 static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm,
1081 					struct drm_gpusvm_range *range)
1082 {
1083 	lockdep_assert_held(&gpusvm->notifier_lock);
1084 
1085 	if (range->dma_addr) {
1086 		kvfree(range->dma_addr);
1087 		range->dma_addr = NULL;
1088 	}
1089 }
1090 
1091 /**
1092  * drm_gpusvm_range_remove() - Remove GPU SVM range
1093  * @gpusvm: Pointer to the GPU SVM structure
1094  * @range: Pointer to the GPU SVM range to be removed
1095  *
1096  * This function removes the specified GPU SVM range and also removes the parent
1097  * GPU SVM notifier if no more ranges remain in the notifier. The caller must
1098  * hold a lock to protect range and notifier removal.
1099  */
drm_gpusvm_range_remove(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1100 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm,
1101 			     struct drm_gpusvm_range *range)
1102 {
1103 	unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1104 					       drm_gpusvm_range_end(range));
1105 	struct drm_gpusvm_notifier *notifier;
1106 
1107 	drm_gpusvm_driver_lock_held(gpusvm);
1108 
1109 	notifier = drm_gpusvm_notifier_find(gpusvm,
1110 					    drm_gpusvm_range_start(range));
1111 	if (WARN_ON_ONCE(!notifier))
1112 		return;
1113 
1114 	drm_gpusvm_notifier_lock(gpusvm);
1115 	__drm_gpusvm_range_unmap_pages(gpusvm, range, npages);
1116 	drm_gpusvm_range_free_pages(gpusvm, range);
1117 	__drm_gpusvm_range_remove(notifier, range);
1118 	drm_gpusvm_notifier_unlock(gpusvm);
1119 
1120 	drm_gpusvm_range_put(range);
1121 
1122 	if (RB_EMPTY_ROOT(&notifier->root.rb_root)) {
1123 		if (!notifier->flags.removed)
1124 			mmu_interval_notifier_remove(&notifier->notifier);
1125 		drm_gpusvm_notifier_remove(gpusvm, notifier);
1126 		drm_gpusvm_notifier_free(gpusvm, notifier);
1127 	}
1128 }
1129 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove);
1130 
1131 /**
1132  * drm_gpusvm_range_get() - Get a reference to GPU SVM range
1133  * @range: Pointer to the GPU SVM range
1134  *
1135  * This function increments the reference count of the specified GPU SVM range.
1136  *
1137  * Return: Pointer to the GPU SVM range.
1138  */
1139 struct drm_gpusvm_range *
drm_gpusvm_range_get(struct drm_gpusvm_range * range)1140 drm_gpusvm_range_get(struct drm_gpusvm_range *range)
1141 {
1142 	kref_get(&range->refcount);
1143 
1144 	return range;
1145 }
1146 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get);
1147 
1148 /**
1149  * drm_gpusvm_range_destroy() - Destroy GPU SVM range
1150  * @refcount: Pointer to the reference counter embedded in the GPU SVM range
1151  *
1152  * This function destroys the specified GPU SVM range when its reference count
1153  * reaches zero. If a custom range-free function is provided, it is invoked to
1154  * free the range; otherwise, the range is deallocated using kfree().
1155  */
drm_gpusvm_range_destroy(struct kref * refcount)1156 static void drm_gpusvm_range_destroy(struct kref *refcount)
1157 {
1158 	struct drm_gpusvm_range *range =
1159 		container_of(refcount, struct drm_gpusvm_range, refcount);
1160 	struct drm_gpusvm *gpusvm = range->gpusvm;
1161 
1162 	if (gpusvm->ops->range_free)
1163 		gpusvm->ops->range_free(range);
1164 	else
1165 		kfree(range);
1166 }
1167 
1168 /**
1169  * drm_gpusvm_range_put() - Put a reference to GPU SVM range
1170  * @range: Pointer to the GPU SVM range
1171  *
1172  * This function decrements the reference count of the specified GPU SVM range
1173  * and frees it when the count reaches zero.
1174  */
drm_gpusvm_range_put(struct drm_gpusvm_range * range)1175 void drm_gpusvm_range_put(struct drm_gpusvm_range *range)
1176 {
1177 	kref_put(&range->refcount, drm_gpusvm_range_destroy);
1178 }
1179 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put);
1180 
1181 /**
1182  * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid
1183  * @gpusvm: Pointer to the GPU SVM structure
1184  * @range: Pointer to the GPU SVM range structure
1185  *
1186  * This function determines if a GPU SVM range pages are valid. Expected be
1187  * called holding gpusvm->notifier_lock and as the last step before committing a
1188  * GPU binding. This is akin to a notifier seqno check in the HMM documentation
1189  * but due to wider notifiers (i.e., notifiers which span multiple ranges) this
1190  * function is required for finer grained checking (i.e., per range) if pages
1191  * are valid.
1192  *
1193  * Return: True if GPU SVM range has valid pages, False otherwise
1194  */
drm_gpusvm_range_pages_valid(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1195 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm,
1196 				  struct drm_gpusvm_range *range)
1197 {
1198 	lockdep_assert_held(&gpusvm->notifier_lock);
1199 
1200 	return range->flags.has_devmem_pages || range->flags.has_dma_mapping;
1201 }
1202 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid);
1203 
1204 /**
1205  * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked
1206  * @gpusvm: Pointer to the GPU SVM structure
1207  * @range: Pointer to the GPU SVM range structure
1208  *
1209  * This function determines if a GPU SVM range pages are valid. Expected be
1210  * called without holding gpusvm->notifier_lock.
1211  *
1212  * Return: True if GPU SVM range has valid pages, False otherwise
1213  */
1214 static bool
drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1215 drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm,
1216 				      struct drm_gpusvm_range *range)
1217 {
1218 	bool pages_valid;
1219 
1220 	if (!range->dma_addr)
1221 		return false;
1222 
1223 	drm_gpusvm_notifier_lock(gpusvm);
1224 	pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range);
1225 	if (!pages_valid)
1226 		drm_gpusvm_range_free_pages(gpusvm, range);
1227 	drm_gpusvm_notifier_unlock(gpusvm);
1228 
1229 	return pages_valid;
1230 }
1231 
1232 /**
1233  * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range
1234  * @gpusvm: Pointer to the GPU SVM structure
1235  * @range: Pointer to the GPU SVM range structure
1236  * @ctx: GPU SVM context
1237  *
1238  * This function gets pages for a GPU SVM range and ensures they are mapped for
1239  * DMA access.
1240  *
1241  * Return: 0 on success, negative error code on failure.
1242  */
drm_gpusvm_range_get_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,const struct drm_gpusvm_ctx * ctx)1243 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
1244 			       struct drm_gpusvm_range *range,
1245 			       const struct drm_gpusvm_ctx *ctx)
1246 {
1247 	struct mmu_interval_notifier *notifier = &range->notifier->notifier;
1248 	struct hmm_range hmm_range = {
1249 		.default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 :
1250 			HMM_PFN_REQ_WRITE),
1251 		.notifier = notifier,
1252 		.start = drm_gpusvm_range_start(range),
1253 		.end = drm_gpusvm_range_end(range),
1254 		.dev_private_owner = gpusvm->device_private_page_owner,
1255 	};
1256 	struct mm_struct *mm = gpusvm->mm;
1257 	void *zdd;
1258 	unsigned long timeout =
1259 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1260 	unsigned long i, j;
1261 	unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1262 					       drm_gpusvm_range_end(range));
1263 	unsigned long num_dma_mapped;
1264 	unsigned int order = 0;
1265 	unsigned long *pfns;
1266 	int err = 0;
1267 	struct dev_pagemap *pagemap;
1268 	struct drm_pagemap *dpagemap;
1269 	struct drm_gpusvm_range_flags flags;
1270 
1271 retry:
1272 	hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
1273 	if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range))
1274 		goto set_seqno;
1275 
1276 	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
1277 	if (!pfns)
1278 		return -ENOMEM;
1279 
1280 	if (!mmget_not_zero(mm)) {
1281 		err = -EFAULT;
1282 		goto err_free;
1283 	}
1284 
1285 	hmm_range.hmm_pfns = pfns;
1286 	while (true) {
1287 		mmap_read_lock(mm);
1288 		err = hmm_range_fault(&hmm_range);
1289 		mmap_read_unlock(mm);
1290 
1291 		if (err == -EBUSY) {
1292 			if (time_after(jiffies, timeout))
1293 				break;
1294 
1295 			hmm_range.notifier_seq =
1296 				mmu_interval_read_begin(notifier);
1297 			continue;
1298 		}
1299 		break;
1300 	}
1301 	mmput(mm);
1302 	if (err)
1303 		goto err_free;
1304 
1305 map_pages:
1306 	/*
1307 	 * Perform all dma mappings under the notifier lock to not
1308 	 * access freed pages. A notifier will either block on
1309 	 * the notifier lock or unmap dma.
1310 	 */
1311 	drm_gpusvm_notifier_lock(gpusvm);
1312 
1313 	flags.__flags = range->flags.__flags;
1314 	if (flags.unmapped) {
1315 		drm_gpusvm_notifier_unlock(gpusvm);
1316 		err = -EFAULT;
1317 		goto err_free;
1318 	}
1319 
1320 	if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) {
1321 		drm_gpusvm_notifier_unlock(gpusvm);
1322 		kvfree(pfns);
1323 		goto retry;
1324 	}
1325 
1326 	if (!range->dma_addr) {
1327 		/* Unlock and restart mapping to allocate memory. */
1328 		drm_gpusvm_notifier_unlock(gpusvm);
1329 		range->dma_addr = kvmalloc_array(npages,
1330 						 sizeof(*range->dma_addr),
1331 						 GFP_KERNEL);
1332 		if (!range->dma_addr) {
1333 			err = -ENOMEM;
1334 			goto err_free;
1335 		}
1336 		goto map_pages;
1337 	}
1338 
1339 	zdd = NULL;
1340 	pagemap = NULL;
1341 	num_dma_mapped = 0;
1342 	for (i = 0, j = 0; i < npages; ++j) {
1343 		struct page *page = hmm_pfn_to_page(pfns[i]);
1344 
1345 		order = hmm_pfn_to_map_order(pfns[i]);
1346 		if (is_device_private_page(page) ||
1347 		    is_device_coherent_page(page)) {
1348 			if (zdd != page->zone_device_data && i > 0) {
1349 				err = -EOPNOTSUPP;
1350 				goto err_unmap;
1351 			}
1352 			zdd = page->zone_device_data;
1353 			if (pagemap != page_pgmap(page)) {
1354 				if (i > 0) {
1355 					err = -EOPNOTSUPP;
1356 					goto err_unmap;
1357 				}
1358 
1359 				pagemap = page_pgmap(page);
1360 				dpagemap = drm_pagemap_page_to_dpagemap(page);
1361 				if (drm_WARN_ON(gpusvm->drm, !dpagemap)) {
1362 					/*
1363 					 * Raced. This is not supposed to happen
1364 					 * since hmm_range_fault() should've migrated
1365 					 * this page to system.
1366 					 */
1367 					err = -EAGAIN;
1368 					goto err_unmap;
1369 				}
1370 			}
1371 			range->dma_addr[j] =
1372 				dpagemap->ops->device_map(dpagemap,
1373 							  gpusvm->drm->dev,
1374 							  page, order,
1375 							  DMA_BIDIRECTIONAL);
1376 			if (dma_mapping_error(gpusvm->drm->dev,
1377 					      range->dma_addr[j].addr)) {
1378 				err = -EFAULT;
1379 				goto err_unmap;
1380 			}
1381 		} else {
1382 			dma_addr_t addr;
1383 
1384 			if (is_zone_device_page(page) || pagemap) {
1385 				err = -EOPNOTSUPP;
1386 				goto err_unmap;
1387 			}
1388 
1389 			if (ctx->devmem_only) {
1390 				err = -EFAULT;
1391 				goto err_unmap;
1392 			}
1393 
1394 			addr = dma_map_page(gpusvm->drm->dev,
1395 					    page, 0,
1396 					    PAGE_SIZE << order,
1397 					    DMA_BIDIRECTIONAL);
1398 			if (dma_mapping_error(gpusvm->drm->dev, addr)) {
1399 				err = -EFAULT;
1400 				goto err_unmap;
1401 			}
1402 
1403 			range->dma_addr[j] = drm_pagemap_device_addr_encode
1404 				(addr, DRM_INTERCONNECT_SYSTEM, order,
1405 				 DMA_BIDIRECTIONAL);
1406 		}
1407 		i += 1 << order;
1408 		num_dma_mapped = i;
1409 		flags.has_dma_mapping = true;
1410 	}
1411 
1412 	if (pagemap) {
1413 		flags.has_devmem_pages = true;
1414 		range->dpagemap = dpagemap;
1415 	}
1416 
1417 	/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
1418 	WRITE_ONCE(range->flags.__flags, flags.__flags);
1419 
1420 	drm_gpusvm_notifier_unlock(gpusvm);
1421 	kvfree(pfns);
1422 set_seqno:
1423 	range->notifier_seq = hmm_range.notifier_seq;
1424 
1425 	return 0;
1426 
1427 err_unmap:
1428 	__drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped);
1429 	drm_gpusvm_notifier_unlock(gpusvm);
1430 err_free:
1431 	kvfree(pfns);
1432 	if (err == -EAGAIN)
1433 		goto retry;
1434 	return err;
1435 }
1436 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages);
1437 
1438 /**
1439  * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range
1440  * drm_gpusvm_range_evict() - Evict GPU SVM range
1441  * @gpusvm: Pointer to the GPU SVM structure
1442  * @range: Pointer to the GPU SVM range structure
1443  * @ctx: GPU SVM context
1444  *
1445  * This function unmaps pages associated with a GPU SVM range. If @in_notifier
1446  * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it
1447  * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on
1448  * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU
1449  * security model.
1450  */
drm_gpusvm_range_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,const struct drm_gpusvm_ctx * ctx)1451 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
1452 				  struct drm_gpusvm_range *range,
1453 				  const struct drm_gpusvm_ctx *ctx)
1454 {
1455 	unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1456 					       drm_gpusvm_range_end(range));
1457 
1458 	if (ctx->in_notifier)
1459 		lockdep_assert_held_write(&gpusvm->notifier_lock);
1460 	else
1461 		drm_gpusvm_notifier_lock(gpusvm);
1462 
1463 	__drm_gpusvm_range_unmap_pages(gpusvm, range, npages);
1464 
1465 	if (!ctx->in_notifier)
1466 		drm_gpusvm_notifier_unlock(gpusvm);
1467 }
1468 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages);
1469 
1470 /**
1471  * drm_gpusvm_range_evict() - Evict GPU SVM range
1472  * @gpusvm: Pointer to the GPU SVM structure
1473  * @range: Pointer to the GPU SVM range to be removed
1474  *
1475  * This function evicts the specified GPU SVM range.
1476  *
1477  * Return: 0 on success, a negative error code on failure.
1478  */
drm_gpusvm_range_evict(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1479 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm,
1480 			   struct drm_gpusvm_range *range)
1481 {
1482 	struct mmu_interval_notifier *notifier = &range->notifier->notifier;
1483 	struct hmm_range hmm_range = {
1484 		.default_flags = HMM_PFN_REQ_FAULT,
1485 		.notifier = notifier,
1486 		.start = drm_gpusvm_range_start(range),
1487 		.end = drm_gpusvm_range_end(range),
1488 		.dev_private_owner = NULL,
1489 	};
1490 	unsigned long timeout =
1491 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1492 	unsigned long *pfns;
1493 	unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1494 					       drm_gpusvm_range_end(range));
1495 	int err = 0;
1496 	struct mm_struct *mm = gpusvm->mm;
1497 
1498 	if (!mmget_not_zero(mm))
1499 		return -EFAULT;
1500 
1501 	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
1502 	if (!pfns)
1503 		return -ENOMEM;
1504 
1505 	hmm_range.hmm_pfns = pfns;
1506 	while (!time_after(jiffies, timeout)) {
1507 		hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
1508 		if (time_after(jiffies, timeout)) {
1509 			err = -ETIME;
1510 			break;
1511 		}
1512 
1513 		mmap_read_lock(mm);
1514 		err = hmm_range_fault(&hmm_range);
1515 		mmap_read_unlock(mm);
1516 		if (err != -EBUSY)
1517 			break;
1518 	}
1519 
1520 	kvfree(pfns);
1521 	mmput(mm);
1522 
1523 	return err;
1524 }
1525 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict);
1526 
1527 /**
1528  * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range
1529  * @gpusvm: Pointer to the GPU SVM structure.
1530  * @start: Start address
1531  * @end: End address
1532  *
1533  * Return: True if GPU SVM has mapping, False otherwise
1534  */
drm_gpusvm_has_mapping(struct drm_gpusvm * gpusvm,unsigned long start,unsigned long end)1535 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start,
1536 			    unsigned long end)
1537 {
1538 	struct drm_gpusvm_notifier *notifier;
1539 
1540 	drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) {
1541 		struct drm_gpusvm_range *range = NULL;
1542 
1543 		drm_gpusvm_for_each_range(range, notifier, start, end)
1544 			return true;
1545 	}
1546 
1547 	return false;
1548 }
1549 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping);
1550 
1551 /**
1552  * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped
1553  * @range: Pointer to the GPU SVM range structure.
1554  * @mmu_range: Pointer to the MMU notifier range structure.
1555  *
1556  * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag
1557  * if the range partially falls within the provided MMU notifier range.
1558  */
drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range * range,const struct mmu_notifier_range * mmu_range)1559 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range,
1560 				   const struct mmu_notifier_range *mmu_range)
1561 {
1562 	lockdep_assert_held_write(&range->gpusvm->notifier_lock);
1563 
1564 	range->flags.unmapped = true;
1565 	if (drm_gpusvm_range_start(range) < mmu_range->start ||
1566 	    drm_gpusvm_range_end(range) > mmu_range->end)
1567 		range->flags.partial_unmap = true;
1568 }
1569 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped);
1570 
1571 MODULE_DESCRIPTION("DRM GPUSVM");
1572 MODULE_LICENSE("GPL");
1573