xref: /linux/drivers/gpu/drm/drm_gpusvm.c (revision 284fc30e66e602a5df58393860f67477d6a79339)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /*
3  * Copyright © 2024 Intel Corporation
4  *
5  * Authors:
6  *     Matthew Brost <matthew.brost@intel.com>
7  */
8 
9 #include <linux/dma-mapping.h>
10 #include <linux/export.h>
11 #include <linux/hmm.h>
12 #include <linux/hugetlb_inline.h>
13 #include <linux/memremap.h>
14 #include <linux/mm_types.h>
15 #include <linux/slab.h>
16 
17 #include <drm/drm_device.h>
18 #include <drm/drm_gpusvm.h>
19 #include <drm/drm_pagemap.h>
20 #include <drm/drm_print.h>
21 
22 /**
23  * DOC: Overview
24  *
25  * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM)
26  * is a component of the DRM framework designed to manage shared virtual memory
27  * between the CPU and GPU. It enables efficient data exchange and processing
28  * for GPU-accelerated applications by allowing memory sharing and
29  * synchronization between the CPU's and GPU's virtual address spaces.
30  *
31  * Key GPU SVM Components:
32  *
33  * - Notifiers:
34  *	Used for tracking memory intervals and notifying the GPU of changes,
35  *	notifiers are sized based on a GPU SVM initialization parameter, with a
36  *	recommendation of 512M or larger. They maintain a Red-BlacK tree and a
37  *	list of ranges that fall within the notifier interval.  Notifiers are
38  *	tracked within a GPU SVM Red-BlacK tree and list and are dynamically
39  *	inserted or removed as ranges within the interval are created or
40  *	destroyed.
41  * - Ranges:
42  *	Represent memory ranges mapped in a DRM device and managed by GPU SVM.
43  *	They are sized based on an array of chunk sizes, which is a GPU SVM
44  *	initialization parameter, and the CPU address space.  Upon GPU fault,
45  *	the largest aligned chunk that fits within the faulting CPU address
46  *	space is chosen for the range size. Ranges are expected to be
47  *	dynamically allocated on GPU fault and removed on an MMU notifier UNMAP
48  *	event. As mentioned above, ranges are tracked in a notifier's Red-Black
49  *	tree.
50  *
51  * - Operations:
52  *	Define the interface for driver-specific GPU SVM operations such as
53  *	range allocation, notifier allocation, and invalidations.
54  *
55  * - Device Memory Allocations:
56  *	Embedded structure containing enough information for GPU SVM to migrate
57  *	to / from device memory.
58  *
59  * - Device Memory Operations:
60  *	Define the interface for driver-specific device memory operations
61  *	release memory, populate pfns, and copy to / from device memory.
62  *
63  * This layer provides interfaces for allocating, mapping, migrating, and
64  * releasing memory ranges between the CPU and GPU. It handles all core memory
65  * management interactions (DMA mapping, HMM, and migration) and provides
66  * driver-specific virtual functions (vfuncs). This infrastructure is sufficient
67  * to build the expected driver components for an SVM implementation as detailed
68  * below.
69  *
70  * Expected Driver Components:
71  *
72  * - GPU page fault handler:
73  *	Used to create ranges and notifiers based on the fault address,
74  *	optionally migrate the range to device memory, and create GPU bindings.
75  *
76  * - Garbage collector:
77  *	Used to unmap and destroy GPU bindings for ranges.  Ranges are expected
78  *	to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in
79  *	notifier callback.
80  *
81  * - Notifier callback:
82  *	Used to invalidate and DMA unmap GPU bindings for ranges.
83  */
84 
85 /**
86  * DOC: Locking
87  *
88  * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the
89  * mmap lock as needed.
90  *
91  * GPU SVM introduces a global notifier lock, which safeguards the notifier's
92  * range RB tree and list, as well as the range's DMA mappings and sequence
93  * number. GPU SVM manages all necessary locking and unlocking operations,
94  * except for the recheck range's pages being valid
95  * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings.
96  * This lock corresponds to the ``driver->update`` lock mentioned in
97  * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM
98  * global lock to a per-notifier lock if finer-grained locking is deemed
99  * necessary.
100  *
101  * In addition to the locking mentioned above, the driver should implement a
102  * lock to safeguard core GPU SVM function calls that modify state, such as
103  * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is
104  * denoted as 'driver_svm_lock' in code examples. Finer grained driver side
105  * locking should also be possible for concurrent GPU fault processing within a
106  * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock
107  * to add annotations to GPU SVM.
108  */
109 
110 /**
111  * DOC: Partial Unmapping of Ranges
112  *
113  * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting
114  * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one
115  * being that a subset of the range still has CPU and GPU mappings. If the
116  * backing store for the range is in device memory, a subset of the backing
117  * store has references. One option would be to split the range and device
118  * memory backing store, but the implementation for this would be quite
119  * complicated. Given that partial unmappings are rare and driver-defined range
120  * sizes are relatively small, GPU SVM does not support splitting of ranges.
121  *
122  * With no support for range splitting, upon partial unmapping of a range, the
123  * driver is expected to invalidate and destroy the entire range. If the range
124  * has device memory as its backing, the driver is also expected to migrate any
125  * remaining pages back to RAM.
126  */
127 
128 /**
129  * DOC: Examples
130  *
131  * This section provides three examples of how to build the expected driver
132  * components: the GPU page fault handler, the garbage collector, and the
133  * notifier callback.
134  *
135  * The generic code provided does not include logic for complex migration
136  * policies, optimized invalidations, fined grained driver locking, or other
137  * potentially required driver locking (e.g., DMA-resv locks).
138  *
139  * 1) GPU page fault handler
140  *
141  * .. code-block:: c
142  *
143  *	int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range)
144  *	{
145  *		int err = 0;
146  *
147  *		driver_alloc_and_setup_memory_for_bind(gpusvm, range);
148  *
149  *		drm_gpusvm_notifier_lock(gpusvm);
150  *		if (drm_gpusvm_range_pages_valid(range))
151  *			driver_commit_bind(gpusvm, range);
152  *		else
153  *			err = -EAGAIN;
154  *		drm_gpusvm_notifier_unlock(gpusvm);
155  *
156  *		return err;
157  *	}
158  *
159  *	int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr,
160  *			     unsigned long gpuva_start, unsigned long gpuva_end)
161  *	{
162  *		struct drm_gpusvm_ctx ctx = {};
163  *		int err;
164  *
165  *		driver_svm_lock();
166  *	retry:
167  *		// Always process UNMAPs first so view of GPU SVM ranges is current
168  *		driver_garbage_collector(gpusvm);
169  *
170  *		range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr,
171  *							gpuva_start, gpuva_end,
172  *						        &ctx);
173  *		if (IS_ERR(range)) {
174  *			err = PTR_ERR(range);
175  *			goto unlock;
176  *		}
177  *
178  *		if (driver_migration_policy(range)) {
179  *			err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(),
180  *						      gpuva_start, gpuva_end, gpusvm->mm,
181  *						      ctx->timeslice_ms);
182  *			if (err)	// CPU mappings may have changed
183  *				goto retry;
184  *		}
185  *
186  *		err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx);
187  *		if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {	// CPU mappings changed
188  *			if (err == -EOPNOTSUPP)
189  *				drm_gpusvm_range_evict(gpusvm, range);
190  *			goto retry;
191  *		} else if (err) {
192  *			goto unlock;
193  *		}
194  *
195  *		err = driver_bind_range(gpusvm, range);
196  *		if (err == -EAGAIN)	// CPU mappings changed
197  *			goto retry
198  *
199  *	unlock:
200  *		driver_svm_unlock();
201  *		return err;
202  *	}
203  *
204  * 2) Garbage Collector
205  *
206  * .. code-block:: c
207  *
208  *	void __driver_garbage_collector(struct drm_gpusvm *gpusvm,
209  *					struct drm_gpusvm_range *range)
210  *	{
211  *		assert_driver_svm_locked(gpusvm);
212  *
213  *		// Partial unmap, migrate any remaining device memory pages back to RAM
214  *		if (range->flags.partial_unmap)
215  *			drm_gpusvm_range_evict(gpusvm, range);
216  *
217  *		driver_unbind_range(range);
218  *		drm_gpusvm_range_remove(gpusvm, range);
219  *	}
220  *
221  *	void driver_garbage_collector(struct drm_gpusvm *gpusvm)
222  *	{
223  *		assert_driver_svm_locked(gpusvm);
224  *
225  *		for_each_range_in_garbage_collector(gpusvm, range)
226  *			__driver_garbage_collector(gpusvm, range);
227  *	}
228  *
229  * 3) Notifier callback
230  *
231  * .. code-block:: c
232  *
233  *	void driver_invalidation(struct drm_gpusvm *gpusvm,
234  *				 struct drm_gpusvm_notifier *notifier,
235  *				 const struct mmu_notifier_range *mmu_range)
236  *	{
237  *		struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
238  *		struct drm_gpusvm_range *range = NULL;
239  *
240  *		driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end);
241  *
242  *		drm_gpusvm_for_each_range(range, notifier, mmu_range->start,
243  *					  mmu_range->end) {
244  *			drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx);
245  *
246  *			if (mmu_range->event != MMU_NOTIFY_UNMAP)
247  *				continue;
248  *
249  *			drm_gpusvm_range_set_unmapped(range, mmu_range);
250  *			driver_garbage_collector_add(gpusvm, range);
251  *		}
252  *	}
253  */
254 
255 /**
256  * npages_in_range() - Calculate the number of pages in a given range
257  * @start: The start address of the range
258  * @end: The end address of the range
259  *
260  * This macro calculates the number of pages in a given memory range,
261  * specified by the start and end addresses. It divides the difference
262  * between the end and start addresses by the page size (PAGE_SIZE) to
263  * determine the number of pages in the range.
264  *
265  * Return: The number of pages in the specified range.
266  */
267 static unsigned long
npages_in_range(unsigned long start,unsigned long end)268 npages_in_range(unsigned long start, unsigned long end)
269 {
270 	return (end - start) >> PAGE_SHIFT;
271 }
272 
273 /**
274  * drm_gpusvm_notifier_find() - Find GPU SVM notifier from GPU SVM
275  * @gpusvm: Pointer to the GPU SVM structure.
276  * @start: Start address of the notifier
277  * @end: End address of the notifier
278  *
279  * Return: A pointer to the drm_gpusvm_notifier if found or NULL
280  */
281 struct drm_gpusvm_notifier *
drm_gpusvm_notifier_find(struct drm_gpusvm * gpusvm,unsigned long start,unsigned long end)282 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, unsigned long start,
283 			 unsigned long end)
284 {
285 	struct interval_tree_node *itree;
286 
287 	itree = interval_tree_iter_first(&gpusvm->root, start, end - 1);
288 
289 	if (itree)
290 		return container_of(itree, struct drm_gpusvm_notifier, itree);
291 	else
292 		return NULL;
293 }
294 EXPORT_SYMBOL_GPL(drm_gpusvm_notifier_find);
295 
296 /**
297  * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier
298  * @notifier: Pointer to the GPU SVM notifier structure.
299  * @start: Start address of the range
300  * @end: End address of the range
301  *
302  * Return: A pointer to the drm_gpusvm_range if found or NULL
303  */
304 struct drm_gpusvm_range *
drm_gpusvm_range_find(struct drm_gpusvm_notifier * notifier,unsigned long start,unsigned long end)305 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start,
306 		      unsigned long end)
307 {
308 	struct interval_tree_node *itree;
309 
310 	itree = interval_tree_iter_first(&notifier->root, start, end - 1);
311 
312 	if (itree)
313 		return container_of(itree, struct drm_gpusvm_range, itree);
314 	else
315 		return NULL;
316 }
317 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find);
318 
319 /**
320  * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier.
321  * @mni: Pointer to the mmu_interval_notifier structure.
322  * @mmu_range: Pointer to the mmu_notifier_range structure.
323  * @cur_seq: Current sequence number.
324  *
325  * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU
326  * notifier sequence number and calls the driver invalidate vfunc under
327  * gpusvm->notifier_lock.
328  *
329  * Return: true if the operation succeeds, false otherwise.
330  */
331 static bool
drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * mmu_range,unsigned long cur_seq)332 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni,
333 			       const struct mmu_notifier_range *mmu_range,
334 			       unsigned long cur_seq)
335 {
336 	struct drm_gpusvm_notifier *notifier =
337 		container_of(mni, typeof(*notifier), notifier);
338 	struct drm_gpusvm *gpusvm = notifier->gpusvm;
339 
340 	if (!mmu_notifier_range_blockable(mmu_range))
341 		return false;
342 
343 	down_write(&gpusvm->notifier_lock);
344 	mmu_interval_set_seq(mni, cur_seq);
345 	gpusvm->ops->invalidate(gpusvm, notifier, mmu_range);
346 	up_write(&gpusvm->notifier_lock);
347 
348 	return true;
349 }
350 
351 /*
352  * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM
353  */
354 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
355 	.invalidate = drm_gpusvm_notifier_invalidate,
356 };
357 
358 /**
359  * drm_gpusvm_init() - Initialize the GPU SVM.
360  * @gpusvm: Pointer to the GPU SVM structure.
361  * @name: Name of the GPU SVM.
362  * @drm: Pointer to the DRM device structure.
363  * @mm: Pointer to the mm_struct for the address space.
364  * @mm_start: Start address of GPU SVM.
365  * @mm_range: Range of the GPU SVM.
366  * @notifier_size: Size of individual notifiers.
367  * @ops: Pointer to the operations structure for GPU SVM.
368  * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation.
369  *               Entries should be powers of 2 in descending order with last
370  *               entry being SZ_4K.
371  * @num_chunks: Number of chunks.
372  *
373  * This function initializes the GPU SVM.
374  *
375  * Note: If only using the simple drm_gpusvm_pages API (get/unmap/free),
376  * then only @gpusvm, @name, and @drm are expected. However, the same base
377  * @gpusvm can also be used with both modes together in which case the full
378  * setup is needed, where the core drm_gpusvm_pages API will simply never use
379  * the other fields.
380  *
381  * Return: 0 on success, a negative error code on failure.
382  */
drm_gpusvm_init(struct drm_gpusvm * gpusvm,const char * name,struct drm_device * drm,struct mm_struct * mm,unsigned long mm_start,unsigned long mm_range,unsigned long notifier_size,const struct drm_gpusvm_ops * ops,const unsigned long * chunk_sizes,int num_chunks)383 int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
384 		    const char *name, struct drm_device *drm,
385 		    struct mm_struct *mm,
386 		    unsigned long mm_start, unsigned long mm_range,
387 		    unsigned long notifier_size,
388 		    const struct drm_gpusvm_ops *ops,
389 		    const unsigned long *chunk_sizes, int num_chunks)
390 {
391 	if (mm) {
392 		if (!ops->invalidate || !num_chunks)
393 			return -EINVAL;
394 		mmgrab(mm);
395 	} else {
396 		/* No full SVM mode, only core drm_gpusvm_pages API. */
397 		if (ops || num_chunks || mm_range || notifier_size)
398 			return -EINVAL;
399 	}
400 
401 	gpusvm->name = name;
402 	gpusvm->drm = drm;
403 	gpusvm->mm = mm;
404 	gpusvm->mm_start = mm_start;
405 	gpusvm->mm_range = mm_range;
406 	gpusvm->notifier_size = notifier_size;
407 	gpusvm->ops = ops;
408 	gpusvm->chunk_sizes = chunk_sizes;
409 	gpusvm->num_chunks = num_chunks;
410 
411 	gpusvm->root = RB_ROOT_CACHED;
412 	INIT_LIST_HEAD(&gpusvm->notifier_list);
413 
414 	init_rwsem(&gpusvm->notifier_lock);
415 
416 	fs_reclaim_acquire(GFP_KERNEL);
417 	might_lock(&gpusvm->notifier_lock);
418 	fs_reclaim_release(GFP_KERNEL);
419 
420 #ifdef CONFIG_LOCKDEP
421 	gpusvm->lock_dep_map = NULL;
422 #endif
423 
424 	return 0;
425 }
426 EXPORT_SYMBOL_GPL(drm_gpusvm_init);
427 
428 /**
429  * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node
430  * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct
431  *
432  * Return: A pointer to the containing drm_gpusvm_notifier structure.
433  */
to_drm_gpusvm_notifier(struct rb_node * node)434 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node)
435 {
436 	return container_of(node, struct drm_gpusvm_notifier, itree.rb);
437 }
438 
439 /**
440  * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier
441  * @gpusvm: Pointer to the GPU SVM structure
442  * @notifier: Pointer to the GPU SVM notifier structure
443  *
444  * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list.
445  */
drm_gpusvm_notifier_insert(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)446 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm,
447 				       struct drm_gpusvm_notifier *notifier)
448 {
449 	struct rb_node *node;
450 	struct list_head *head;
451 
452 	interval_tree_insert(&notifier->itree, &gpusvm->root);
453 
454 	node = rb_prev(&notifier->itree.rb);
455 	if (node)
456 		head = &(to_drm_gpusvm_notifier(node))->entry;
457 	else
458 		head = &gpusvm->notifier_list;
459 
460 	list_add(&notifier->entry, head);
461 }
462 
463 /**
464  * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier
465  * @gpusvm: Pointer to the GPU SVM tructure
466  * @notifier: Pointer to the GPU SVM notifier structure
467  *
468  * This function removes the GPU SVM notifier from the GPU SVM RB tree and list.
469  */
drm_gpusvm_notifier_remove(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)470 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm,
471 				       struct drm_gpusvm_notifier *notifier)
472 {
473 	interval_tree_remove(&notifier->itree, &gpusvm->root);
474 	list_del(&notifier->entry);
475 }
476 
477 /**
478  * drm_gpusvm_fini() - Finalize the GPU SVM.
479  * @gpusvm: Pointer to the GPU SVM structure.
480  *
481  * This function finalizes the GPU SVM by cleaning up any remaining ranges and
482  * notifiers, and dropping a reference to struct MM.
483  */
drm_gpusvm_fini(struct drm_gpusvm * gpusvm)484 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm)
485 {
486 	struct drm_gpusvm_notifier *notifier, *next;
487 
488 	drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) {
489 		struct drm_gpusvm_range *range, *__next;
490 
491 		/*
492 		 * Remove notifier first to avoid racing with any invalidation
493 		 */
494 		mmu_interval_notifier_remove(&notifier->notifier);
495 		notifier->flags.removed = true;
496 
497 		drm_gpusvm_for_each_range_safe(range, __next, notifier, 0,
498 					       LONG_MAX)
499 			drm_gpusvm_range_remove(gpusvm, range);
500 	}
501 
502 	if (gpusvm->mm)
503 		mmdrop(gpusvm->mm);
504 	WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root));
505 }
506 EXPORT_SYMBOL_GPL(drm_gpusvm_fini);
507 
508 /**
509  * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier
510  * @gpusvm: Pointer to the GPU SVM structure
511  * @fault_addr: Fault address
512  *
513  * This function allocates and initializes the GPU SVM notifier structure.
514  *
515  * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure.
516  */
517 static struct drm_gpusvm_notifier *
drm_gpusvm_notifier_alloc(struct drm_gpusvm * gpusvm,unsigned long fault_addr)518 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr)
519 {
520 	struct drm_gpusvm_notifier *notifier;
521 
522 	if (gpusvm->ops->notifier_alloc)
523 		notifier = gpusvm->ops->notifier_alloc();
524 	else
525 		notifier = kzalloc(sizeof(*notifier), GFP_KERNEL);
526 
527 	if (!notifier)
528 		return ERR_PTR(-ENOMEM);
529 
530 	notifier->gpusvm = gpusvm;
531 	notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size);
532 	notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1;
533 	INIT_LIST_HEAD(&notifier->entry);
534 	notifier->root = RB_ROOT_CACHED;
535 	INIT_LIST_HEAD(&notifier->range_list);
536 
537 	return notifier;
538 }
539 
540 /**
541  * drm_gpusvm_notifier_free() - Free GPU SVM notifier
542  * @gpusvm: Pointer to the GPU SVM structure
543  * @notifier: Pointer to the GPU SVM notifier structure
544  *
545  * This function frees the GPU SVM notifier structure.
546  */
drm_gpusvm_notifier_free(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)547 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm,
548 				     struct drm_gpusvm_notifier *notifier)
549 {
550 	WARN_ON(!RB_EMPTY_ROOT(&notifier->root.rb_root));
551 
552 	if (gpusvm->ops->notifier_free)
553 		gpusvm->ops->notifier_free(notifier);
554 	else
555 		kfree(notifier);
556 }
557 
558 /**
559  * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node
560  * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct
561  *
562  * Return: A pointer to the containing drm_gpusvm_range structure.
563  */
to_drm_gpusvm_range(struct rb_node * node)564 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node)
565 {
566 	return container_of(node, struct drm_gpusvm_range, itree.rb);
567 }
568 
569 /**
570  * drm_gpusvm_range_insert() - Insert GPU SVM range
571  * @notifier: Pointer to the GPU SVM notifier structure
572  * @range: Pointer to the GPU SVM range structure
573  *
574  * This function inserts the GPU SVM range into the notifier RB tree and list.
575  */
drm_gpusvm_range_insert(struct drm_gpusvm_notifier * notifier,struct drm_gpusvm_range * range)576 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier,
577 				    struct drm_gpusvm_range *range)
578 {
579 	struct rb_node *node;
580 	struct list_head *head;
581 
582 	drm_gpusvm_notifier_lock(notifier->gpusvm);
583 	interval_tree_insert(&range->itree, &notifier->root);
584 
585 	node = rb_prev(&range->itree.rb);
586 	if (node)
587 		head = &(to_drm_gpusvm_range(node))->entry;
588 	else
589 		head = &notifier->range_list;
590 
591 	list_add(&range->entry, head);
592 	drm_gpusvm_notifier_unlock(notifier->gpusvm);
593 }
594 
595 /**
596  * __drm_gpusvm_range_remove() - Remove GPU SVM range
597  * @notifier: Pointer to the GPU SVM notifier structure
598  * @range: Pointer to the GPU SVM range structure
599  *
600  * This macro removes the GPU SVM range from the notifier RB tree and list.
601  */
__drm_gpusvm_range_remove(struct drm_gpusvm_notifier * notifier,struct drm_gpusvm_range * range)602 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier,
603 				      struct drm_gpusvm_range *range)
604 {
605 	interval_tree_remove(&range->itree, &notifier->root);
606 	list_del(&range->entry);
607 }
608 
609 /**
610  * drm_gpusvm_range_alloc() - Allocate GPU SVM range
611  * @gpusvm: Pointer to the GPU SVM structure
612  * @notifier: Pointer to the GPU SVM notifier structure
613  * @fault_addr: Fault address
614  * @chunk_size: Chunk size
615  * @migrate_devmem: Flag indicating whether to migrate device memory
616  *
617  * This function allocates and initializes the GPU SVM range structure.
618  *
619  * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure.
620  */
621 static struct drm_gpusvm_range *
drm_gpusvm_range_alloc(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,unsigned long fault_addr,unsigned long chunk_size,bool migrate_devmem)622 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm,
623 		       struct drm_gpusvm_notifier *notifier,
624 		       unsigned long fault_addr, unsigned long chunk_size,
625 		       bool migrate_devmem)
626 {
627 	struct drm_gpusvm_range *range;
628 
629 	if (gpusvm->ops->range_alloc)
630 		range = gpusvm->ops->range_alloc(gpusvm);
631 	else
632 		range = kzalloc(sizeof(*range), GFP_KERNEL);
633 
634 	if (!range)
635 		return ERR_PTR(-ENOMEM);
636 
637 	kref_init(&range->refcount);
638 	range->gpusvm = gpusvm;
639 	range->notifier = notifier;
640 	range->itree.start = ALIGN_DOWN(fault_addr, chunk_size);
641 	range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1;
642 	INIT_LIST_HEAD(&range->entry);
643 	range->pages.notifier_seq = LONG_MAX;
644 	range->pages.flags.migrate_devmem = migrate_devmem ? 1 : 0;
645 
646 	return range;
647 }
648 
649 /**
650  * drm_gpusvm_hmm_pfn_to_order() - Get the largest CPU mapping order.
651  * @hmm_pfn: The current hmm_pfn.
652  * @hmm_pfn_index: Index of the @hmm_pfn within the pfn array.
653  * @npages: Number of pages within the pfn array i.e the hmm range size.
654  *
655  * To allow skipping PFNs with the same flags (like when they belong to
656  * the same huge PTE) when looping over the pfn array, take a given a hmm_pfn,
657  * and return the largest order that will fit inside the CPU PTE, but also
658  * crucially accounting for the original hmm range boundaries.
659  *
660  * Return: The largest order that will safely fit within the size of the hmm_pfn
661  * CPU PTE.
662  */
drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn,unsigned long hmm_pfn_index,unsigned long npages)663 static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn,
664 						unsigned long hmm_pfn_index,
665 						unsigned long npages)
666 {
667 	unsigned long size;
668 
669 	size = 1UL << hmm_pfn_to_map_order(hmm_pfn);
670 	size -= (hmm_pfn & ~HMM_PFN_FLAGS) & (size - 1);
671 	hmm_pfn_index += size;
672 	if (hmm_pfn_index > npages)
673 		size -= (hmm_pfn_index - npages);
674 
675 	return ilog2(size);
676 }
677 
678 /**
679  * drm_gpusvm_check_pages() - Check pages
680  * @gpusvm: Pointer to the GPU SVM structure
681  * @notifier: Pointer to the GPU SVM notifier structure
682  * @start: Start address
683  * @end: End address
684  * @dev_private_owner: The device private page owner
685  *
686  * Check if pages between start and end have been faulted in on the CPU. Use to
687  * prevent migration of pages without CPU backing store.
688  *
689  * Return: True if pages have been faulted into CPU, False otherwise
690  */
drm_gpusvm_check_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,unsigned long start,unsigned long end,void * dev_private_owner)691 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm,
692 				   struct drm_gpusvm_notifier *notifier,
693 				   unsigned long start, unsigned long end,
694 				   void *dev_private_owner)
695 {
696 	struct hmm_range hmm_range = {
697 		.default_flags = 0,
698 		.notifier = &notifier->notifier,
699 		.start = start,
700 		.end = end,
701 		.dev_private_owner = dev_private_owner,
702 	};
703 	unsigned long timeout =
704 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
705 	unsigned long *pfns;
706 	unsigned long npages = npages_in_range(start, end);
707 	int err, i;
708 
709 	mmap_assert_locked(gpusvm->mm);
710 
711 	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
712 	if (!pfns)
713 		return false;
714 
715 	hmm_range.notifier_seq = mmu_interval_read_begin(&notifier->notifier);
716 	hmm_range.hmm_pfns = pfns;
717 
718 	while (true) {
719 		err = hmm_range_fault(&hmm_range);
720 		if (err == -EBUSY) {
721 			if (time_after(jiffies, timeout))
722 				break;
723 
724 			hmm_range.notifier_seq =
725 				mmu_interval_read_begin(&notifier->notifier);
726 			continue;
727 		}
728 		break;
729 	}
730 	if (err)
731 		goto err_free;
732 
733 	for (i = 0; i < npages;) {
734 		if (!(pfns[i] & HMM_PFN_VALID)) {
735 			err = -EFAULT;
736 			goto err_free;
737 		}
738 		i += 0x1 << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages);
739 	}
740 
741 err_free:
742 	kvfree(pfns);
743 	return err ? false : true;
744 }
745 
746 /**
747  * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range
748  * @gpusvm: Pointer to the GPU SVM structure
749  * @notifier: Pointer to the GPU SVM notifier structure
750  * @vas: Pointer to the virtual memory area structure
751  * @fault_addr: Fault address
752  * @gpuva_start: Start address of GPUVA which mirrors CPU
753  * @gpuva_end: End address of GPUVA which mirrors CPU
754  * @check_pages_threshold: Check CPU pages for present threshold
755  * @dev_private_owner: The device private page owner
756  *
757  * This function determines the chunk size for the GPU SVM range based on the
758  * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual
759  * memory area boundaries.
760  *
761  * Return: Chunk size on success, LONG_MAX on failure.
762  */
763 static unsigned long
drm_gpusvm_range_chunk_size(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,struct vm_area_struct * vas,unsigned long fault_addr,unsigned long gpuva_start,unsigned long gpuva_end,unsigned long check_pages_threshold,void * dev_private_owner)764 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm,
765 			    struct drm_gpusvm_notifier *notifier,
766 			    struct vm_area_struct *vas,
767 			    unsigned long fault_addr,
768 			    unsigned long gpuva_start,
769 			    unsigned long gpuva_end,
770 			    unsigned long check_pages_threshold,
771 			    void *dev_private_owner)
772 {
773 	unsigned long start, end;
774 	int i = 0;
775 
776 retry:
777 	for (; i < gpusvm->num_chunks; ++i) {
778 		start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]);
779 		end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]);
780 
781 		if (start >= vas->vm_start && end <= vas->vm_end &&
782 		    start >= drm_gpusvm_notifier_start(notifier) &&
783 		    end <= drm_gpusvm_notifier_end(notifier) &&
784 		    start >= gpuva_start && end <= gpuva_end)
785 			break;
786 	}
787 
788 	if (i == gpusvm->num_chunks)
789 		return LONG_MAX;
790 
791 	/*
792 	 * If allocation more than page, ensure not to overlap with existing
793 	 * ranges.
794 	 */
795 	if (end - start != SZ_4K) {
796 		struct drm_gpusvm_range *range;
797 
798 		range = drm_gpusvm_range_find(notifier, start, end);
799 		if (range) {
800 			++i;
801 			goto retry;
802 		}
803 
804 		/*
805 		 * XXX: Only create range on pages CPU has faulted in. Without
806 		 * this check, or prefault, on BMG 'xe_exec_system_allocator --r
807 		 * process-many-malloc' fails. In the failure case, each process
808 		 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM
809 		 * ranges. When migrating the SVM ranges, some processes fail in
810 		 * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages'
811 		 * and then upon drm_gpusvm_range_get_pages device pages from
812 		 * other processes are collected + faulted in which creates all
813 		 * sorts of problems. Unsure exactly how this happening, also
814 		 * problem goes away if 'xe_exec_system_allocator --r
815 		 * process-many-malloc' mallocs at least 64k at a time.
816 		 */
817 		if (end - start <= check_pages_threshold &&
818 		    !drm_gpusvm_check_pages(gpusvm, notifier, start, end, dev_private_owner)) {
819 			++i;
820 			goto retry;
821 		}
822 	}
823 
824 	return end - start;
825 }
826 
827 #ifdef CONFIG_LOCKDEP
828 /**
829  * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held
830  * @gpusvm: Pointer to the GPU SVM structure.
831  *
832  * Ensure driver lock is held.
833  */
drm_gpusvm_driver_lock_held(struct drm_gpusvm * gpusvm)834 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm)
835 {
836 	if ((gpusvm)->lock_dep_map)
837 		lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0));
838 }
839 #else
drm_gpusvm_driver_lock_held(struct drm_gpusvm * gpusvm)840 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm)
841 {
842 }
843 #endif
844 
845 /**
846  * drm_gpusvm_find_vma_start() - Find start address for first VMA in range
847  * @gpusvm: Pointer to the GPU SVM structure
848  * @start: The inclusive start user address.
849  * @end: The exclusive end user address.
850  *
851  * Returns: The start address of first VMA within the provided range,
852  * ULONG_MAX otherwise. Assumes start_addr < end_addr.
853  */
854 unsigned long
drm_gpusvm_find_vma_start(struct drm_gpusvm * gpusvm,unsigned long start,unsigned long end)855 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm,
856 			  unsigned long start,
857 			  unsigned long end)
858 {
859 	struct mm_struct *mm = gpusvm->mm;
860 	struct vm_area_struct *vma;
861 	unsigned long addr = ULONG_MAX;
862 
863 	if (!mmget_not_zero(mm))
864 		return addr;
865 
866 	mmap_read_lock(mm);
867 
868 	vma = find_vma_intersection(mm, start, end);
869 	if (vma)
870 		addr =  vma->vm_start;
871 
872 	mmap_read_unlock(mm);
873 	mmput(mm);
874 
875 	return addr;
876 }
877 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start);
878 
879 /**
880  * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range
881  * @gpusvm: Pointer to the GPU SVM structure
882  * @fault_addr: Fault address
883  * @gpuva_start: Start address of GPUVA which mirrors CPU
884  * @gpuva_end: End address of GPUVA which mirrors CPU
885  * @ctx: GPU SVM context
886  *
887  * This function finds or inserts a newly allocated a GPU SVM range based on the
888  * fault address. Caller must hold a lock to protect range lookup and insertion.
889  *
890  * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure.
891  */
892 struct drm_gpusvm_range *
drm_gpusvm_range_find_or_insert(struct drm_gpusvm * gpusvm,unsigned long fault_addr,unsigned long gpuva_start,unsigned long gpuva_end,const struct drm_gpusvm_ctx * ctx)893 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm,
894 				unsigned long fault_addr,
895 				unsigned long gpuva_start,
896 				unsigned long gpuva_end,
897 				const struct drm_gpusvm_ctx *ctx)
898 {
899 	struct drm_gpusvm_notifier *notifier;
900 	struct drm_gpusvm_range *range;
901 	struct mm_struct *mm = gpusvm->mm;
902 	struct vm_area_struct *vas;
903 	bool notifier_alloc = false;
904 	unsigned long chunk_size;
905 	int err;
906 	bool migrate_devmem;
907 
908 	drm_gpusvm_driver_lock_held(gpusvm);
909 
910 	if (fault_addr < gpusvm->mm_start ||
911 	    fault_addr > gpusvm->mm_start + gpusvm->mm_range)
912 		return ERR_PTR(-EINVAL);
913 
914 	if (!mmget_not_zero(mm))
915 		return ERR_PTR(-EFAULT);
916 
917 	notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr, fault_addr + 1);
918 	if (!notifier) {
919 		notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr);
920 		if (IS_ERR(notifier)) {
921 			err = PTR_ERR(notifier);
922 			goto err_mmunlock;
923 		}
924 		notifier_alloc = true;
925 		err = mmu_interval_notifier_insert(&notifier->notifier,
926 						   mm,
927 						   drm_gpusvm_notifier_start(notifier),
928 						   drm_gpusvm_notifier_size(notifier),
929 						   &drm_gpusvm_notifier_ops);
930 		if (err)
931 			goto err_notifier;
932 	}
933 
934 	mmap_read_lock(mm);
935 
936 	vas = vma_lookup(mm, fault_addr);
937 	if (!vas) {
938 		err = -ENOENT;
939 		goto err_notifier_remove;
940 	}
941 
942 	if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) {
943 		err = -EPERM;
944 		goto err_notifier_remove;
945 	}
946 
947 	range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1);
948 	if (range)
949 		goto out_mmunlock;
950 	/*
951 	 * XXX: Short-circuiting migration based on migrate_vma_* current
952 	 * limitations. If/when migrate_vma_* add more support, this logic will
953 	 * have to change.
954 	 */
955 	migrate_devmem = ctx->devmem_possible &&
956 		vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas);
957 
958 	chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas,
959 						 fault_addr, gpuva_start,
960 						 gpuva_end,
961 						 ctx->check_pages_threshold,
962 						 ctx->device_private_page_owner);
963 	if (chunk_size == LONG_MAX) {
964 		err = -EINVAL;
965 		goto err_notifier_remove;
966 	}
967 
968 	range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size,
969 				       migrate_devmem);
970 	if (IS_ERR(range)) {
971 		err = PTR_ERR(range);
972 		goto err_notifier_remove;
973 	}
974 
975 	drm_gpusvm_range_insert(notifier, range);
976 	if (notifier_alloc)
977 		drm_gpusvm_notifier_insert(gpusvm, notifier);
978 
979 out_mmunlock:
980 	mmap_read_unlock(mm);
981 	mmput(mm);
982 
983 	return range;
984 
985 err_notifier_remove:
986 	mmap_read_unlock(mm);
987 	if (notifier_alloc)
988 		mmu_interval_notifier_remove(&notifier->notifier);
989 err_notifier:
990 	if (notifier_alloc)
991 		drm_gpusvm_notifier_free(gpusvm, notifier);
992 err_mmunlock:
993 	mmput(mm);
994 	return ERR_PTR(err);
995 }
996 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert);
997 
998 /**
999  * __drm_gpusvm_unmap_pages() - Unmap pages associated with GPU SVM pages (internal)
1000  * @gpusvm: Pointer to the GPU SVM structure
1001  * @svm_pages: Pointer to the GPU SVM pages structure
1002  * @npages: Number of pages to unmap
1003  *
1004  * This function unmap pages associated with a GPU SVM pages struct. Assumes and
1005  * asserts correct locking is in place when called.
1006  */
__drm_gpusvm_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages,unsigned long npages)1007 static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm,
1008 				     struct drm_gpusvm_pages *svm_pages,
1009 				     unsigned long npages)
1010 {
1011 	struct drm_pagemap *dpagemap = svm_pages->dpagemap;
1012 	struct device *dev = gpusvm->drm->dev;
1013 	unsigned long i, j;
1014 
1015 	lockdep_assert_held(&gpusvm->notifier_lock);
1016 
1017 	if (svm_pages->flags.has_dma_mapping) {
1018 		struct drm_gpusvm_pages_flags flags = {
1019 			.__flags = svm_pages->flags.__flags,
1020 		};
1021 
1022 		for (i = 0, j = 0; i < npages; j++) {
1023 			struct drm_pagemap_addr *addr = &svm_pages->dma_addr[j];
1024 
1025 			if (addr->proto == DRM_INTERCONNECT_SYSTEM)
1026 				dma_unmap_page(dev,
1027 					       addr->addr,
1028 					       PAGE_SIZE << addr->order,
1029 					       addr->dir);
1030 			else if (dpagemap && dpagemap->ops->device_unmap)
1031 				dpagemap->ops->device_unmap(dpagemap,
1032 							    dev, *addr);
1033 			i += 1 << addr->order;
1034 		}
1035 
1036 		/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
1037 		flags.has_devmem_pages = false;
1038 		flags.has_dma_mapping = false;
1039 		WRITE_ONCE(svm_pages->flags.__flags, flags.__flags);
1040 
1041 		svm_pages->dpagemap = NULL;
1042 	}
1043 }
1044 
1045 /**
1046  * __drm_gpusvm_free_pages() - Free dma array associated with GPU SVM pages
1047  * @gpusvm: Pointer to the GPU SVM structure
1048  * @svm_pages: Pointer to the GPU SVM pages structure
1049  *
1050  * This function frees the dma address array associated with a GPU SVM range.
1051  */
__drm_gpusvm_free_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages)1052 static void __drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm,
1053 				    struct drm_gpusvm_pages *svm_pages)
1054 {
1055 	lockdep_assert_held(&gpusvm->notifier_lock);
1056 
1057 	if (svm_pages->dma_addr) {
1058 		kvfree(svm_pages->dma_addr);
1059 		svm_pages->dma_addr = NULL;
1060 	}
1061 }
1062 
1063 /**
1064  * drm_gpusvm_free_pages() - Free dma-mapping associated with GPU SVM pages
1065  * struct
1066  * @gpusvm: Pointer to the GPU SVM structure
1067  * @svm_pages: Pointer to the GPU SVM pages structure
1068  * @npages: Number of mapped pages
1069  *
1070  * This function unmaps and frees the dma address array associated with a GPU
1071  * SVM pages struct.
1072  */
drm_gpusvm_free_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages,unsigned long npages)1073 void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm,
1074 			   struct drm_gpusvm_pages *svm_pages,
1075 			   unsigned long npages)
1076 {
1077 	drm_gpusvm_notifier_lock(gpusvm);
1078 	__drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages);
1079 	__drm_gpusvm_free_pages(gpusvm, svm_pages);
1080 	drm_gpusvm_notifier_unlock(gpusvm);
1081 }
1082 EXPORT_SYMBOL_GPL(drm_gpusvm_free_pages);
1083 
1084 /**
1085  * drm_gpusvm_range_remove() - Remove GPU SVM range
1086  * @gpusvm: Pointer to the GPU SVM structure
1087  * @range: Pointer to the GPU SVM range to be removed
1088  *
1089  * This function removes the specified GPU SVM range and also removes the parent
1090  * GPU SVM notifier if no more ranges remain in the notifier. The caller must
1091  * hold a lock to protect range and notifier removal.
1092  */
drm_gpusvm_range_remove(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1093 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm,
1094 			     struct drm_gpusvm_range *range)
1095 {
1096 	unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1097 					       drm_gpusvm_range_end(range));
1098 	struct drm_gpusvm_notifier *notifier;
1099 
1100 	drm_gpusvm_driver_lock_held(gpusvm);
1101 
1102 	notifier = drm_gpusvm_notifier_find(gpusvm,
1103 					    drm_gpusvm_range_start(range),
1104 					    drm_gpusvm_range_start(range) + 1);
1105 	if (WARN_ON_ONCE(!notifier))
1106 		return;
1107 
1108 	drm_gpusvm_notifier_lock(gpusvm);
1109 	__drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages);
1110 	__drm_gpusvm_free_pages(gpusvm, &range->pages);
1111 	__drm_gpusvm_range_remove(notifier, range);
1112 	drm_gpusvm_notifier_unlock(gpusvm);
1113 
1114 	drm_gpusvm_range_put(range);
1115 
1116 	if (RB_EMPTY_ROOT(&notifier->root.rb_root)) {
1117 		if (!notifier->flags.removed)
1118 			mmu_interval_notifier_remove(&notifier->notifier);
1119 		drm_gpusvm_notifier_remove(gpusvm, notifier);
1120 		drm_gpusvm_notifier_free(gpusvm, notifier);
1121 	}
1122 }
1123 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove);
1124 
1125 /**
1126  * drm_gpusvm_range_get() - Get a reference to GPU SVM range
1127  * @range: Pointer to the GPU SVM range
1128  *
1129  * This function increments the reference count of the specified GPU SVM range.
1130  *
1131  * Return: Pointer to the GPU SVM range.
1132  */
1133 struct drm_gpusvm_range *
drm_gpusvm_range_get(struct drm_gpusvm_range * range)1134 drm_gpusvm_range_get(struct drm_gpusvm_range *range)
1135 {
1136 	kref_get(&range->refcount);
1137 
1138 	return range;
1139 }
1140 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get);
1141 
1142 /**
1143  * drm_gpusvm_range_destroy() - Destroy GPU SVM range
1144  * @refcount: Pointer to the reference counter embedded in the GPU SVM range
1145  *
1146  * This function destroys the specified GPU SVM range when its reference count
1147  * reaches zero. If a custom range-free function is provided, it is invoked to
1148  * free the range; otherwise, the range is deallocated using kfree().
1149  */
drm_gpusvm_range_destroy(struct kref * refcount)1150 static void drm_gpusvm_range_destroy(struct kref *refcount)
1151 {
1152 	struct drm_gpusvm_range *range =
1153 		container_of(refcount, struct drm_gpusvm_range, refcount);
1154 	struct drm_gpusvm *gpusvm = range->gpusvm;
1155 
1156 	if (gpusvm->ops->range_free)
1157 		gpusvm->ops->range_free(range);
1158 	else
1159 		kfree(range);
1160 }
1161 
1162 /**
1163  * drm_gpusvm_range_put() - Put a reference to GPU SVM range
1164  * @range: Pointer to the GPU SVM range
1165  *
1166  * This function decrements the reference count of the specified GPU SVM range
1167  * and frees it when the count reaches zero.
1168  */
drm_gpusvm_range_put(struct drm_gpusvm_range * range)1169 void drm_gpusvm_range_put(struct drm_gpusvm_range *range)
1170 {
1171 	kref_put(&range->refcount, drm_gpusvm_range_destroy);
1172 }
1173 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put);
1174 
1175 /**
1176  * drm_gpusvm_pages_valid() - GPU SVM range pages valid
1177  * @gpusvm: Pointer to the GPU SVM structure
1178  * @svm_pages: Pointer to the GPU SVM pages structure
1179  *
1180  * This function determines if a GPU SVM range pages are valid. Expected be
1181  * called holding gpusvm->notifier_lock and as the last step before committing a
1182  * GPU binding. This is akin to a notifier seqno check in the HMM documentation
1183  * but due to wider notifiers (i.e., notifiers which span multiple ranges) this
1184  * function is required for finer grained checking (i.e., per range) if pages
1185  * are valid.
1186  *
1187  * Return: True if GPU SVM range has valid pages, False otherwise
1188  */
drm_gpusvm_pages_valid(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages)1189 static bool drm_gpusvm_pages_valid(struct drm_gpusvm *gpusvm,
1190 				   struct drm_gpusvm_pages *svm_pages)
1191 {
1192 	lockdep_assert_held(&gpusvm->notifier_lock);
1193 
1194 	return svm_pages->flags.has_devmem_pages || svm_pages->flags.has_dma_mapping;
1195 }
1196 
1197 /**
1198  * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid
1199  * @gpusvm: Pointer to the GPU SVM structure
1200  * @range: Pointer to the GPU SVM range structure
1201  *
1202  * This function determines if a GPU SVM range pages are valid. Expected be
1203  * called holding gpusvm->notifier_lock and as the last step before committing a
1204  * GPU binding. This is akin to a notifier seqno check in the HMM documentation
1205  * but due to wider notifiers (i.e., notifiers which span multiple ranges) this
1206  * function is required for finer grained checking (i.e., per range) if pages
1207  * are valid.
1208  *
1209  * Return: True if GPU SVM range has valid pages, False otherwise
1210  */
drm_gpusvm_range_pages_valid(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1211 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm,
1212 				  struct drm_gpusvm_range *range)
1213 {
1214 	return drm_gpusvm_pages_valid(gpusvm, &range->pages);
1215 }
1216 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid);
1217 
1218 /**
1219  * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked
1220  * @gpusvm: Pointer to the GPU SVM structure
1221  * @range: Pointer to the GPU SVM range structure
1222  *
1223  * This function determines if a GPU SVM range pages are valid. Expected be
1224  * called without holding gpusvm->notifier_lock.
1225  *
1226  * Return: True if GPU SVM range has valid pages, False otherwise
1227  */
drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages)1228 static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm,
1229 					    struct drm_gpusvm_pages *svm_pages)
1230 {
1231 	bool pages_valid;
1232 
1233 	if (!svm_pages->dma_addr)
1234 		return false;
1235 
1236 	drm_gpusvm_notifier_lock(gpusvm);
1237 	pages_valid = drm_gpusvm_pages_valid(gpusvm, svm_pages);
1238 	if (!pages_valid)
1239 		__drm_gpusvm_free_pages(gpusvm, svm_pages);
1240 	drm_gpusvm_notifier_unlock(gpusvm);
1241 
1242 	return pages_valid;
1243 }
1244 
1245 /**
1246  * drm_gpusvm_get_pages() - Get pages and populate GPU SVM pages struct
1247  * @gpusvm: Pointer to the GPU SVM structure
1248  * @svm_pages: The SVM pages to populate. This will contain the dma-addresses
1249  * @mm: The mm corresponding to the CPU range
1250  * @notifier: The corresponding notifier for the given CPU range
1251  * @pages_start: Start CPU address for the pages
1252  * @pages_end: End CPU address for the pages (exclusive)
1253  * @ctx: GPU SVM context
1254  *
1255  * This function gets and maps pages for CPU range and ensures they are
1256  * mapped for DMA access.
1257  *
1258  * Return: 0 on success, negative error code on failure.
1259  */
drm_gpusvm_get_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages,struct mm_struct * mm,struct mmu_interval_notifier * notifier,unsigned long pages_start,unsigned long pages_end,const struct drm_gpusvm_ctx * ctx)1260 int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
1261 			 struct drm_gpusvm_pages *svm_pages,
1262 			 struct mm_struct *mm,
1263 			 struct mmu_interval_notifier *notifier,
1264 			 unsigned long pages_start, unsigned long pages_end,
1265 			 const struct drm_gpusvm_ctx *ctx)
1266 {
1267 	struct hmm_range hmm_range = {
1268 		.default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 :
1269 			HMM_PFN_REQ_WRITE),
1270 		.notifier = notifier,
1271 		.start = pages_start,
1272 		.end = pages_end,
1273 		.dev_private_owner = ctx->device_private_page_owner,
1274 	};
1275 	void *zdd;
1276 	unsigned long timeout =
1277 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1278 	unsigned long i, j;
1279 	unsigned long npages = npages_in_range(pages_start, pages_end);
1280 	unsigned long num_dma_mapped;
1281 	unsigned int order = 0;
1282 	unsigned long *pfns;
1283 	int err = 0;
1284 	struct dev_pagemap *pagemap;
1285 	struct drm_pagemap *dpagemap;
1286 	struct drm_gpusvm_pages_flags flags;
1287 	enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE :
1288 							   DMA_BIDIRECTIONAL;
1289 
1290 retry:
1291 	hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
1292 	if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages))
1293 		goto set_seqno;
1294 
1295 	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
1296 	if (!pfns)
1297 		return -ENOMEM;
1298 
1299 	if (!mmget_not_zero(mm)) {
1300 		err = -EFAULT;
1301 		goto err_free;
1302 	}
1303 
1304 	hmm_range.hmm_pfns = pfns;
1305 	while (true) {
1306 		mmap_read_lock(mm);
1307 		err = hmm_range_fault(&hmm_range);
1308 		mmap_read_unlock(mm);
1309 
1310 		if (err == -EBUSY) {
1311 			if (time_after(jiffies, timeout))
1312 				break;
1313 
1314 			hmm_range.notifier_seq =
1315 				mmu_interval_read_begin(notifier);
1316 			continue;
1317 		}
1318 		break;
1319 	}
1320 	mmput(mm);
1321 	if (err)
1322 		goto err_free;
1323 
1324 map_pages:
1325 	/*
1326 	 * Perform all dma mappings under the notifier lock to not
1327 	 * access freed pages. A notifier will either block on
1328 	 * the notifier lock or unmap dma.
1329 	 */
1330 	drm_gpusvm_notifier_lock(gpusvm);
1331 
1332 	flags.__flags = svm_pages->flags.__flags;
1333 	if (flags.unmapped) {
1334 		drm_gpusvm_notifier_unlock(gpusvm);
1335 		err = -EFAULT;
1336 		goto err_free;
1337 	}
1338 
1339 	if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) {
1340 		drm_gpusvm_notifier_unlock(gpusvm);
1341 		kvfree(pfns);
1342 		goto retry;
1343 	}
1344 
1345 	if (!svm_pages->dma_addr) {
1346 		/* Unlock and restart mapping to allocate memory. */
1347 		drm_gpusvm_notifier_unlock(gpusvm);
1348 		svm_pages->dma_addr =
1349 			kvmalloc_array(npages, sizeof(*svm_pages->dma_addr), GFP_KERNEL);
1350 		if (!svm_pages->dma_addr) {
1351 			err = -ENOMEM;
1352 			goto err_free;
1353 		}
1354 		goto map_pages;
1355 	}
1356 
1357 	zdd = NULL;
1358 	pagemap = NULL;
1359 	num_dma_mapped = 0;
1360 	for (i = 0, j = 0; i < npages; ++j) {
1361 		struct page *page = hmm_pfn_to_page(pfns[i]);
1362 
1363 		order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages);
1364 		if (is_device_private_page(page) ||
1365 		    is_device_coherent_page(page)) {
1366 			if (zdd != page->zone_device_data && i > 0) {
1367 				err = -EOPNOTSUPP;
1368 				goto err_unmap;
1369 			}
1370 			zdd = page->zone_device_data;
1371 			if (pagemap != page_pgmap(page)) {
1372 				if (i > 0) {
1373 					err = -EOPNOTSUPP;
1374 					goto err_unmap;
1375 				}
1376 
1377 				pagemap = page_pgmap(page);
1378 				dpagemap = drm_pagemap_page_to_dpagemap(page);
1379 				if (drm_WARN_ON(gpusvm->drm, !dpagemap)) {
1380 					/*
1381 					 * Raced. This is not supposed to happen
1382 					 * since hmm_range_fault() should've migrated
1383 					 * this page to system.
1384 					 */
1385 					err = -EAGAIN;
1386 					goto err_unmap;
1387 				}
1388 			}
1389 			svm_pages->dma_addr[j] =
1390 				dpagemap->ops->device_map(dpagemap,
1391 							  gpusvm->drm->dev,
1392 							  page, order,
1393 							  dma_dir);
1394 			if (dma_mapping_error(gpusvm->drm->dev,
1395 					      svm_pages->dma_addr[j].addr)) {
1396 				err = -EFAULT;
1397 				goto err_unmap;
1398 			}
1399 		} else {
1400 			dma_addr_t addr;
1401 
1402 			if (is_zone_device_page(page) || pagemap) {
1403 				err = -EOPNOTSUPP;
1404 				goto err_unmap;
1405 			}
1406 
1407 			if (ctx->devmem_only) {
1408 				err = -EFAULT;
1409 				goto err_unmap;
1410 			}
1411 
1412 			addr = dma_map_page(gpusvm->drm->dev,
1413 					    page, 0,
1414 					    PAGE_SIZE << order,
1415 					    dma_dir);
1416 			if (dma_mapping_error(gpusvm->drm->dev, addr)) {
1417 				err = -EFAULT;
1418 				goto err_unmap;
1419 			}
1420 
1421 			svm_pages->dma_addr[j] = drm_pagemap_addr_encode
1422 				(addr, DRM_INTERCONNECT_SYSTEM, order,
1423 				 dma_dir);
1424 		}
1425 		i += 1 << order;
1426 		num_dma_mapped = i;
1427 		flags.has_dma_mapping = true;
1428 	}
1429 
1430 	if (pagemap) {
1431 		flags.has_devmem_pages = true;
1432 		svm_pages->dpagemap = dpagemap;
1433 	}
1434 
1435 	/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
1436 	WRITE_ONCE(svm_pages->flags.__flags, flags.__flags);
1437 
1438 	drm_gpusvm_notifier_unlock(gpusvm);
1439 	kvfree(pfns);
1440 set_seqno:
1441 	svm_pages->notifier_seq = hmm_range.notifier_seq;
1442 
1443 	return 0;
1444 
1445 err_unmap:
1446 	__drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped);
1447 	drm_gpusvm_notifier_unlock(gpusvm);
1448 err_free:
1449 	kvfree(pfns);
1450 	if (err == -EAGAIN)
1451 		goto retry;
1452 	return err;
1453 }
1454 EXPORT_SYMBOL_GPL(drm_gpusvm_get_pages);
1455 
1456 /**
1457  * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range
1458  * @gpusvm: Pointer to the GPU SVM structure
1459  * @range: Pointer to the GPU SVM range structure
1460  * @ctx: GPU SVM context
1461  *
1462  * This function gets pages for a GPU SVM range and ensures they are mapped for
1463  * DMA access.
1464  *
1465  * Return: 0 on success, negative error code on failure.
1466  */
drm_gpusvm_range_get_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,const struct drm_gpusvm_ctx * ctx)1467 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
1468 			       struct drm_gpusvm_range *range,
1469 			       const struct drm_gpusvm_ctx *ctx)
1470 {
1471 	return drm_gpusvm_get_pages(gpusvm, &range->pages, gpusvm->mm,
1472 				    &range->notifier->notifier,
1473 				    drm_gpusvm_range_start(range),
1474 				    drm_gpusvm_range_end(range), ctx);
1475 }
1476 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages);
1477 
1478 /**
1479  * drm_gpusvm_unmap_pages() - Unmap GPU svm pages
1480  * @gpusvm: Pointer to the GPU SVM structure
1481  * @svm_pages: Pointer to the GPU SVM pages structure
1482  * @npages: Number of pages in @svm_pages.
1483  * @ctx: GPU SVM context
1484  *
1485  * This function unmaps pages associated with a GPU SVM pages struct. If
1486  * @in_notifier is set, it is assumed that gpusvm->notifier_lock is held in
1487  * write mode; if it is clear, it acquires gpusvm->notifier_lock in read mode.
1488  * Must be called in the invalidate() callback of the corresponding notifier for
1489  * IOMMU security model.
1490  */
drm_gpusvm_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages,unsigned long npages,const struct drm_gpusvm_ctx * ctx)1491 void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm,
1492 			    struct drm_gpusvm_pages *svm_pages,
1493 			    unsigned long npages,
1494 			    const struct drm_gpusvm_ctx *ctx)
1495 {
1496 	if (ctx->in_notifier)
1497 		lockdep_assert_held_write(&gpusvm->notifier_lock);
1498 	else
1499 		drm_gpusvm_notifier_lock(gpusvm);
1500 
1501 	__drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages);
1502 
1503 	if (!ctx->in_notifier)
1504 		drm_gpusvm_notifier_unlock(gpusvm);
1505 }
1506 EXPORT_SYMBOL_GPL(drm_gpusvm_unmap_pages);
1507 
1508 /**
1509  * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range
1510  * @gpusvm: Pointer to the GPU SVM structure
1511  * @range: Pointer to the GPU SVM range structure
1512  * @ctx: GPU SVM context
1513  *
1514  * This function unmaps pages associated with a GPU SVM range. If @in_notifier
1515  * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it
1516  * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on
1517  * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU
1518  * security model.
1519  */
drm_gpusvm_range_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,const struct drm_gpusvm_ctx * ctx)1520 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
1521 				  struct drm_gpusvm_range *range,
1522 				  const struct drm_gpusvm_ctx *ctx)
1523 {
1524 	unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1525 					       drm_gpusvm_range_end(range));
1526 
1527 	return drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages, ctx);
1528 }
1529 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages);
1530 
1531 /**
1532  * drm_gpusvm_range_evict() - Evict GPU SVM range
1533  * @gpusvm: Pointer to the GPU SVM structure
1534  * @range: Pointer to the GPU SVM range to be removed
1535  *
1536  * This function evicts the specified GPU SVM range.
1537  *
1538  * Return: 0 on success, a negative error code on failure.
1539  */
drm_gpusvm_range_evict(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1540 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm,
1541 			   struct drm_gpusvm_range *range)
1542 {
1543 	struct mmu_interval_notifier *notifier = &range->notifier->notifier;
1544 	struct hmm_range hmm_range = {
1545 		.default_flags = HMM_PFN_REQ_FAULT,
1546 		.notifier = notifier,
1547 		.start = drm_gpusvm_range_start(range),
1548 		.end = drm_gpusvm_range_end(range),
1549 		.dev_private_owner = NULL,
1550 	};
1551 	unsigned long timeout =
1552 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1553 	unsigned long *pfns;
1554 	unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1555 					       drm_gpusvm_range_end(range));
1556 	int err = 0;
1557 	struct mm_struct *mm = gpusvm->mm;
1558 
1559 	if (!mmget_not_zero(mm))
1560 		return -EFAULT;
1561 
1562 	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
1563 	if (!pfns)
1564 		return -ENOMEM;
1565 
1566 	hmm_range.hmm_pfns = pfns;
1567 	while (!time_after(jiffies, timeout)) {
1568 		hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
1569 		if (time_after(jiffies, timeout)) {
1570 			err = -ETIME;
1571 			break;
1572 		}
1573 
1574 		mmap_read_lock(mm);
1575 		err = hmm_range_fault(&hmm_range);
1576 		mmap_read_unlock(mm);
1577 		if (err != -EBUSY)
1578 			break;
1579 	}
1580 
1581 	kvfree(pfns);
1582 	mmput(mm);
1583 
1584 	return err;
1585 }
1586 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict);
1587 
1588 /**
1589  * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range
1590  * @gpusvm: Pointer to the GPU SVM structure.
1591  * @start: Start address
1592  * @end: End address
1593  *
1594  * Return: True if GPU SVM has mapping, False otherwise
1595  */
drm_gpusvm_has_mapping(struct drm_gpusvm * gpusvm,unsigned long start,unsigned long end)1596 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start,
1597 			    unsigned long end)
1598 {
1599 	struct drm_gpusvm_notifier *notifier;
1600 
1601 	drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) {
1602 		struct drm_gpusvm_range *range = NULL;
1603 
1604 		drm_gpusvm_for_each_range(range, notifier, start, end)
1605 			return true;
1606 	}
1607 
1608 	return false;
1609 }
1610 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping);
1611 
1612 /**
1613  * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped
1614  * @range: Pointer to the GPU SVM range structure.
1615  * @mmu_range: Pointer to the MMU notifier range structure.
1616  *
1617  * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag
1618  * if the range partially falls within the provided MMU notifier range.
1619  */
drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range * range,const struct mmu_notifier_range * mmu_range)1620 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range,
1621 				   const struct mmu_notifier_range *mmu_range)
1622 {
1623 	lockdep_assert_held_write(&range->gpusvm->notifier_lock);
1624 
1625 	range->pages.flags.unmapped = true;
1626 	if (drm_gpusvm_range_start(range) < mmu_range->start ||
1627 	    drm_gpusvm_range_end(range) > mmu_range->end)
1628 		range->pages.flags.partial_unmap = true;
1629 }
1630 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped);
1631 
1632 MODULE_DESCRIPTION("DRM GPUSVM");
1633 MODULE_LICENSE("GPL");
1634