1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /*
3 * Copyright © 2024 Intel Corporation
4 *
5 * Authors:
6 * Matthew Brost <matthew.brost@intel.com>
7 */
8
9 #include <linux/dma-mapping.h>
10 #include <linux/export.h>
11 #include <linux/hmm.h>
12 #include <linux/hugetlb_inline.h>
13 #include <linux/memremap.h>
14 #include <linux/mm_types.h>
15 #include <linux/slab.h>
16
17 #include <drm/drm_device.h>
18 #include <drm/drm_gpusvm.h>
19 #include <drm/drm_pagemap.h>
20 #include <drm/drm_print.h>
21
22 /**
23 * DOC: Overview
24 *
25 * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM)
26 * is a component of the DRM framework designed to manage shared virtual memory
27 * between the CPU and GPU. It enables efficient data exchange and processing
28 * for GPU-accelerated applications by allowing memory sharing and
29 * synchronization between the CPU's and GPU's virtual address spaces.
30 *
31 * Key GPU SVM Components:
32 *
33 * - Notifiers:
34 * Used for tracking memory intervals and notifying the GPU of changes,
35 * notifiers are sized based on a GPU SVM initialization parameter, with a
36 * recommendation of 512M or larger. They maintain a Red-BlacK tree and a
37 * list of ranges that fall within the notifier interval. Notifiers are
38 * tracked within a GPU SVM Red-BlacK tree and list and are dynamically
39 * inserted or removed as ranges within the interval are created or
40 * destroyed.
41 * - Ranges:
42 * Represent memory ranges mapped in a DRM device and managed by GPU SVM.
43 * They are sized based on an array of chunk sizes, which is a GPU SVM
44 * initialization parameter, and the CPU address space. Upon GPU fault,
45 * the largest aligned chunk that fits within the faulting CPU address
46 * space is chosen for the range size. Ranges are expected to be
47 * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP
48 * event. As mentioned above, ranges are tracked in a notifier's Red-Black
49 * tree.
50 *
51 * - Operations:
52 * Define the interface for driver-specific GPU SVM operations such as
53 * range allocation, notifier allocation, and invalidations.
54 *
55 * - Device Memory Allocations:
56 * Embedded structure containing enough information for GPU SVM to migrate
57 * to / from device memory.
58 *
59 * - Device Memory Operations:
60 * Define the interface for driver-specific device memory operations
61 * release memory, populate pfns, and copy to / from device memory.
62 *
63 * This layer provides interfaces for allocating, mapping, migrating, and
64 * releasing memory ranges between the CPU and GPU. It handles all core memory
65 * management interactions (DMA mapping, HMM, and migration) and provides
66 * driver-specific virtual functions (vfuncs). This infrastructure is sufficient
67 * to build the expected driver components for an SVM implementation as detailed
68 * below.
69 *
70 * Expected Driver Components:
71 *
72 * - GPU page fault handler:
73 * Used to create ranges and notifiers based on the fault address,
74 * optionally migrate the range to device memory, and create GPU bindings.
75 *
76 * - Garbage collector:
77 * Used to unmap and destroy GPU bindings for ranges. Ranges are expected
78 * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in
79 * notifier callback.
80 *
81 * - Notifier callback:
82 * Used to invalidate and DMA unmap GPU bindings for ranges.
83 */
84
85 /**
86 * DOC: Locking
87 *
88 * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the
89 * mmap lock as needed.
90 *
91 * GPU SVM introduces a global notifier lock, which safeguards the notifier's
92 * range RB tree and list, as well as the range's DMA mappings and sequence
93 * number. GPU SVM manages all necessary locking and unlocking operations,
94 * except for the recheck range's pages being valid
95 * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings.
96 * This lock corresponds to the ``driver->update`` lock mentioned in
97 * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM
98 * global lock to a per-notifier lock if finer-grained locking is deemed
99 * necessary.
100 *
101 * In addition to the locking mentioned above, the driver should implement a
102 * lock to safeguard core GPU SVM function calls that modify state, such as
103 * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is
104 * denoted as 'driver_svm_lock' in code examples. Finer grained driver side
105 * locking should also be possible for concurrent GPU fault processing within a
106 * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock
107 * to add annotations to GPU SVM.
108 */
109
110 /**
111 * DOC: Partial Unmapping of Ranges
112 *
113 * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting
114 * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one
115 * being that a subset of the range still has CPU and GPU mappings. If the
116 * backing store for the range is in device memory, a subset of the backing
117 * store has references. One option would be to split the range and device
118 * memory backing store, but the implementation for this would be quite
119 * complicated. Given that partial unmappings are rare and driver-defined range
120 * sizes are relatively small, GPU SVM does not support splitting of ranges.
121 *
122 * With no support for range splitting, upon partial unmapping of a range, the
123 * driver is expected to invalidate and destroy the entire range. If the range
124 * has device memory as its backing, the driver is also expected to migrate any
125 * remaining pages back to RAM.
126 */
127
128 /**
129 * DOC: Examples
130 *
131 * This section provides three examples of how to build the expected driver
132 * components: the GPU page fault handler, the garbage collector, and the
133 * notifier callback.
134 *
135 * The generic code provided does not include logic for complex migration
136 * policies, optimized invalidations, fined grained driver locking, or other
137 * potentially required driver locking (e.g., DMA-resv locks).
138 *
139 * 1) GPU page fault handler
140 *
141 * .. code-block:: c
142 *
143 * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range)
144 * {
145 * int err = 0;
146 *
147 * driver_alloc_and_setup_memory_for_bind(gpusvm, range);
148 *
149 * drm_gpusvm_notifier_lock(gpusvm);
150 * if (drm_gpusvm_range_pages_valid(range))
151 * driver_commit_bind(gpusvm, range);
152 * else
153 * err = -EAGAIN;
154 * drm_gpusvm_notifier_unlock(gpusvm);
155 *
156 * return err;
157 * }
158 *
159 * int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr,
160 * unsigned long gpuva_start, unsigned long gpuva_end)
161 * {
162 * struct drm_gpusvm_ctx ctx = {};
163 * int err;
164 *
165 * driver_svm_lock();
166 * retry:
167 * // Always process UNMAPs first so view of GPU SVM ranges is current
168 * driver_garbage_collector(gpusvm);
169 *
170 * range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr,
171 * gpuva_start, gpuva_end,
172 * &ctx);
173 * if (IS_ERR(range)) {
174 * err = PTR_ERR(range);
175 * goto unlock;
176 * }
177 *
178 * if (driver_migration_policy(range)) {
179 * err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(),
180 * gpuva_start, gpuva_end, gpusvm->mm,
181 * ctx->timeslice_ms);
182 * if (err) // CPU mappings may have changed
183 * goto retry;
184 * }
185 *
186 * err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx);
187 * if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { // CPU mappings changed
188 * if (err == -EOPNOTSUPP)
189 * drm_gpusvm_range_evict(gpusvm, range);
190 * goto retry;
191 * } else if (err) {
192 * goto unlock;
193 * }
194 *
195 * err = driver_bind_range(gpusvm, range);
196 * if (err == -EAGAIN) // CPU mappings changed
197 * goto retry
198 *
199 * unlock:
200 * driver_svm_unlock();
201 * return err;
202 * }
203 *
204 * 2) Garbage Collector
205 *
206 * .. code-block:: c
207 *
208 * void __driver_garbage_collector(struct drm_gpusvm *gpusvm,
209 * struct drm_gpusvm_range *range)
210 * {
211 * assert_driver_svm_locked(gpusvm);
212 *
213 * // Partial unmap, migrate any remaining device memory pages back to RAM
214 * if (range->flags.partial_unmap)
215 * drm_gpusvm_range_evict(gpusvm, range);
216 *
217 * driver_unbind_range(range);
218 * drm_gpusvm_range_remove(gpusvm, range);
219 * }
220 *
221 * void driver_garbage_collector(struct drm_gpusvm *gpusvm)
222 * {
223 * assert_driver_svm_locked(gpusvm);
224 *
225 * for_each_range_in_garbage_collector(gpusvm, range)
226 * __driver_garbage_collector(gpusvm, range);
227 * }
228 *
229 * 3) Notifier callback
230 *
231 * .. code-block:: c
232 *
233 * void driver_invalidation(struct drm_gpusvm *gpusvm,
234 * struct drm_gpusvm_notifier *notifier,
235 * const struct mmu_notifier_range *mmu_range)
236 * {
237 * struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
238 * struct drm_gpusvm_range *range = NULL;
239 *
240 * driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end);
241 *
242 * drm_gpusvm_for_each_range(range, notifier, mmu_range->start,
243 * mmu_range->end) {
244 * drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx);
245 *
246 * if (mmu_range->event != MMU_NOTIFY_UNMAP)
247 * continue;
248 *
249 * drm_gpusvm_range_set_unmapped(range, mmu_range);
250 * driver_garbage_collector_add(gpusvm, range);
251 * }
252 * }
253 */
254
255 /**
256 * npages_in_range() - Calculate the number of pages in a given range
257 * @start: The start address of the range
258 * @end: The end address of the range
259 *
260 * This macro calculates the number of pages in a given memory range,
261 * specified by the start and end addresses. It divides the difference
262 * between the end and start addresses by the page size (PAGE_SIZE) to
263 * determine the number of pages in the range.
264 *
265 * Return: The number of pages in the specified range.
266 */
267 static unsigned long
npages_in_range(unsigned long start,unsigned long end)268 npages_in_range(unsigned long start, unsigned long end)
269 {
270 return (end - start) >> PAGE_SHIFT;
271 }
272
273 /**
274 * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier
275 * @notifier: Pointer to the GPU SVM notifier structure.
276 * @start: Start address of the range
277 * @end: End address of the range
278 *
279 * Return: A pointer to the drm_gpusvm_range if found or NULL
280 */
281 struct drm_gpusvm_range *
drm_gpusvm_range_find(struct drm_gpusvm_notifier * notifier,unsigned long start,unsigned long end)282 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start,
283 unsigned long end)
284 {
285 struct interval_tree_node *itree;
286
287 itree = interval_tree_iter_first(¬ifier->root, start, end - 1);
288
289 if (itree)
290 return container_of(itree, struct drm_gpusvm_range, itree);
291 else
292 return NULL;
293 }
294 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find);
295
296 /**
297 * drm_gpusvm_for_each_range_safe() - Safely iterate over GPU SVM ranges in a notifier
298 * @range__: Iterator variable for the ranges
299 * @next__: Iterator variable for the ranges temporay storage
300 * @notifier__: Pointer to the GPU SVM notifier
301 * @start__: Start address of the range
302 * @end__: End address of the range
303 *
304 * This macro is used to iterate over GPU SVM ranges in a notifier while
305 * removing ranges from it.
306 */
307 #define drm_gpusvm_for_each_range_safe(range__, next__, notifier__, start__, end__) \
308 for ((range__) = drm_gpusvm_range_find((notifier__), (start__), (end__)), \
309 (next__) = __drm_gpusvm_range_next(range__); \
310 (range__) && (drm_gpusvm_range_start(range__) < (end__)); \
311 (range__) = (next__), (next__) = __drm_gpusvm_range_next(range__))
312
313 /**
314 * __drm_gpusvm_notifier_next() - get the next drm_gpusvm_notifier in the list
315 * @notifier: a pointer to the current drm_gpusvm_notifier
316 *
317 * Return: A pointer to the next drm_gpusvm_notifier if available, or NULL if
318 * the current notifier is the last one or if the input notifier is
319 * NULL.
320 */
321 static struct drm_gpusvm_notifier *
__drm_gpusvm_notifier_next(struct drm_gpusvm_notifier * notifier)322 __drm_gpusvm_notifier_next(struct drm_gpusvm_notifier *notifier)
323 {
324 if (notifier && !list_is_last(¬ifier->entry,
325 ¬ifier->gpusvm->notifier_list))
326 return list_next_entry(notifier, entry);
327
328 return NULL;
329 }
330
331 static struct drm_gpusvm_notifier *
notifier_iter_first(struct rb_root_cached * root,unsigned long start,unsigned long last)332 notifier_iter_first(struct rb_root_cached *root, unsigned long start,
333 unsigned long last)
334 {
335 struct interval_tree_node *itree;
336
337 itree = interval_tree_iter_first(root, start, last);
338
339 if (itree)
340 return container_of(itree, struct drm_gpusvm_notifier, itree);
341 else
342 return NULL;
343 }
344
345 /**
346 * drm_gpusvm_for_each_notifier() - Iterate over GPU SVM notifiers in a gpusvm
347 * @notifier__: Iterator variable for the notifiers
348 * @notifier__: Pointer to the GPU SVM notifier
349 * @start__: Start address of the notifier
350 * @end__: End address of the notifier
351 *
352 * This macro is used to iterate over GPU SVM notifiers in a gpusvm.
353 */
354 #define drm_gpusvm_for_each_notifier(notifier__, gpusvm__, start__, end__) \
355 for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1); \
356 (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \
357 (notifier__) = __drm_gpusvm_notifier_next(notifier__))
358
359 /**
360 * drm_gpusvm_for_each_notifier_safe() - Safely iterate over GPU SVM notifiers in a gpusvm
361 * @notifier__: Iterator variable for the notifiers
362 * @next__: Iterator variable for the notifiers temporay storage
363 * @notifier__: Pointer to the GPU SVM notifier
364 * @start__: Start address of the notifier
365 * @end__: End address of the notifier
366 *
367 * This macro is used to iterate over GPU SVM notifiers in a gpusvm while
368 * removing notifiers from it.
369 */
370 #define drm_gpusvm_for_each_notifier_safe(notifier__, next__, gpusvm__, start__, end__) \
371 for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1), \
372 (next__) = __drm_gpusvm_notifier_next(notifier__); \
373 (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \
374 (notifier__) = (next__), (next__) = __drm_gpusvm_notifier_next(notifier__))
375
376 /**
377 * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier.
378 * @mni: Pointer to the mmu_interval_notifier structure.
379 * @mmu_range: Pointer to the mmu_notifier_range structure.
380 * @cur_seq: Current sequence number.
381 *
382 * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU
383 * notifier sequence number and calls the driver invalidate vfunc under
384 * gpusvm->notifier_lock.
385 *
386 * Return: true if the operation succeeds, false otherwise.
387 */
388 static bool
drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * mmu_range,unsigned long cur_seq)389 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni,
390 const struct mmu_notifier_range *mmu_range,
391 unsigned long cur_seq)
392 {
393 struct drm_gpusvm_notifier *notifier =
394 container_of(mni, typeof(*notifier), notifier);
395 struct drm_gpusvm *gpusvm = notifier->gpusvm;
396
397 if (!mmu_notifier_range_blockable(mmu_range))
398 return false;
399
400 down_write(&gpusvm->notifier_lock);
401 mmu_interval_set_seq(mni, cur_seq);
402 gpusvm->ops->invalidate(gpusvm, notifier, mmu_range);
403 up_write(&gpusvm->notifier_lock);
404
405 return true;
406 }
407
408 /*
409 * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM
410 */
411 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
412 .invalidate = drm_gpusvm_notifier_invalidate,
413 };
414
415 /**
416 * drm_gpusvm_init() - Initialize the GPU SVM.
417 * @gpusvm: Pointer to the GPU SVM structure.
418 * @name: Name of the GPU SVM.
419 * @drm: Pointer to the DRM device structure.
420 * @mm: Pointer to the mm_struct for the address space.
421 * @device_private_page_owner: Device private pages owner.
422 * @mm_start: Start address of GPU SVM.
423 * @mm_range: Range of the GPU SVM.
424 * @notifier_size: Size of individual notifiers.
425 * @ops: Pointer to the operations structure for GPU SVM.
426 * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation.
427 * Entries should be powers of 2 in descending order with last
428 * entry being SZ_4K.
429 * @num_chunks: Number of chunks.
430 *
431 * This function initializes the GPU SVM.
432 *
433 * Return: 0 on success, a negative error code on failure.
434 */
drm_gpusvm_init(struct drm_gpusvm * gpusvm,const char * name,struct drm_device * drm,struct mm_struct * mm,void * device_private_page_owner,unsigned long mm_start,unsigned long mm_range,unsigned long notifier_size,const struct drm_gpusvm_ops * ops,const unsigned long * chunk_sizes,int num_chunks)435 int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
436 const char *name, struct drm_device *drm,
437 struct mm_struct *mm, void *device_private_page_owner,
438 unsigned long mm_start, unsigned long mm_range,
439 unsigned long notifier_size,
440 const struct drm_gpusvm_ops *ops,
441 const unsigned long *chunk_sizes, int num_chunks)
442 {
443 if (!ops->invalidate || !num_chunks)
444 return -EINVAL;
445
446 gpusvm->name = name;
447 gpusvm->drm = drm;
448 gpusvm->mm = mm;
449 gpusvm->device_private_page_owner = device_private_page_owner;
450 gpusvm->mm_start = mm_start;
451 gpusvm->mm_range = mm_range;
452 gpusvm->notifier_size = notifier_size;
453 gpusvm->ops = ops;
454 gpusvm->chunk_sizes = chunk_sizes;
455 gpusvm->num_chunks = num_chunks;
456
457 mmgrab(mm);
458 gpusvm->root = RB_ROOT_CACHED;
459 INIT_LIST_HEAD(&gpusvm->notifier_list);
460
461 init_rwsem(&gpusvm->notifier_lock);
462
463 fs_reclaim_acquire(GFP_KERNEL);
464 might_lock(&gpusvm->notifier_lock);
465 fs_reclaim_release(GFP_KERNEL);
466
467 #ifdef CONFIG_LOCKDEP
468 gpusvm->lock_dep_map = NULL;
469 #endif
470
471 return 0;
472 }
473 EXPORT_SYMBOL_GPL(drm_gpusvm_init);
474
475 /**
476 * drm_gpusvm_notifier_find() - Find GPU SVM notifier
477 * @gpusvm: Pointer to the GPU SVM structure
478 * @fault_addr: Fault address
479 *
480 * This function finds the GPU SVM notifier associated with the fault address.
481 *
482 * Return: Pointer to the GPU SVM notifier on success, NULL otherwise.
483 */
484 static struct drm_gpusvm_notifier *
drm_gpusvm_notifier_find(struct drm_gpusvm * gpusvm,unsigned long fault_addr)485 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm,
486 unsigned long fault_addr)
487 {
488 return notifier_iter_first(&gpusvm->root, fault_addr, fault_addr + 1);
489 }
490
491 /**
492 * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node
493 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct
494 *
495 * Return: A pointer to the containing drm_gpusvm_notifier structure.
496 */
to_drm_gpusvm_notifier(struct rb_node * node)497 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node)
498 {
499 return container_of(node, struct drm_gpusvm_notifier, itree.rb);
500 }
501
502 /**
503 * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier
504 * @gpusvm: Pointer to the GPU SVM structure
505 * @notifier: Pointer to the GPU SVM notifier structure
506 *
507 * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list.
508 */
drm_gpusvm_notifier_insert(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)509 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm,
510 struct drm_gpusvm_notifier *notifier)
511 {
512 struct rb_node *node;
513 struct list_head *head;
514
515 interval_tree_insert(¬ifier->itree, &gpusvm->root);
516
517 node = rb_prev(¬ifier->itree.rb);
518 if (node)
519 head = &(to_drm_gpusvm_notifier(node))->entry;
520 else
521 head = &gpusvm->notifier_list;
522
523 list_add(¬ifier->entry, head);
524 }
525
526 /**
527 * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier
528 * @gpusvm: Pointer to the GPU SVM tructure
529 * @notifier: Pointer to the GPU SVM notifier structure
530 *
531 * This function removes the GPU SVM notifier from the GPU SVM RB tree and list.
532 */
drm_gpusvm_notifier_remove(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)533 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm,
534 struct drm_gpusvm_notifier *notifier)
535 {
536 interval_tree_remove(¬ifier->itree, &gpusvm->root);
537 list_del(¬ifier->entry);
538 }
539
540 /**
541 * drm_gpusvm_fini() - Finalize the GPU SVM.
542 * @gpusvm: Pointer to the GPU SVM structure.
543 *
544 * This function finalizes the GPU SVM by cleaning up any remaining ranges and
545 * notifiers, and dropping a reference to struct MM.
546 */
drm_gpusvm_fini(struct drm_gpusvm * gpusvm)547 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm)
548 {
549 struct drm_gpusvm_notifier *notifier, *next;
550
551 drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) {
552 struct drm_gpusvm_range *range, *__next;
553
554 /*
555 * Remove notifier first to avoid racing with any invalidation
556 */
557 mmu_interval_notifier_remove(¬ifier->notifier);
558 notifier->flags.removed = true;
559
560 drm_gpusvm_for_each_range_safe(range, __next, notifier, 0,
561 LONG_MAX)
562 drm_gpusvm_range_remove(gpusvm, range);
563 }
564
565 mmdrop(gpusvm->mm);
566 WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root));
567 }
568 EXPORT_SYMBOL_GPL(drm_gpusvm_fini);
569
570 /**
571 * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier
572 * @gpusvm: Pointer to the GPU SVM structure
573 * @fault_addr: Fault address
574 *
575 * This function allocates and initializes the GPU SVM notifier structure.
576 *
577 * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure.
578 */
579 static struct drm_gpusvm_notifier *
drm_gpusvm_notifier_alloc(struct drm_gpusvm * gpusvm,unsigned long fault_addr)580 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr)
581 {
582 struct drm_gpusvm_notifier *notifier;
583
584 if (gpusvm->ops->notifier_alloc)
585 notifier = gpusvm->ops->notifier_alloc();
586 else
587 notifier = kzalloc(sizeof(*notifier), GFP_KERNEL);
588
589 if (!notifier)
590 return ERR_PTR(-ENOMEM);
591
592 notifier->gpusvm = gpusvm;
593 notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size);
594 notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1;
595 INIT_LIST_HEAD(¬ifier->entry);
596 notifier->root = RB_ROOT_CACHED;
597 INIT_LIST_HEAD(¬ifier->range_list);
598
599 return notifier;
600 }
601
602 /**
603 * drm_gpusvm_notifier_free() - Free GPU SVM notifier
604 * @gpusvm: Pointer to the GPU SVM structure
605 * @notifier: Pointer to the GPU SVM notifier structure
606 *
607 * This function frees the GPU SVM notifier structure.
608 */
drm_gpusvm_notifier_free(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)609 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm,
610 struct drm_gpusvm_notifier *notifier)
611 {
612 WARN_ON(!RB_EMPTY_ROOT(¬ifier->root.rb_root));
613
614 if (gpusvm->ops->notifier_free)
615 gpusvm->ops->notifier_free(notifier);
616 else
617 kfree(notifier);
618 }
619
620 /**
621 * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node
622 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct
623 *
624 * Return: A pointer to the containing drm_gpusvm_range structure.
625 */
to_drm_gpusvm_range(struct rb_node * node)626 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node)
627 {
628 return container_of(node, struct drm_gpusvm_range, itree.rb);
629 }
630
631 /**
632 * drm_gpusvm_range_insert() - Insert GPU SVM range
633 * @notifier: Pointer to the GPU SVM notifier structure
634 * @range: Pointer to the GPU SVM range structure
635 *
636 * This function inserts the GPU SVM range into the notifier RB tree and list.
637 */
drm_gpusvm_range_insert(struct drm_gpusvm_notifier * notifier,struct drm_gpusvm_range * range)638 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier,
639 struct drm_gpusvm_range *range)
640 {
641 struct rb_node *node;
642 struct list_head *head;
643
644 drm_gpusvm_notifier_lock(notifier->gpusvm);
645 interval_tree_insert(&range->itree, ¬ifier->root);
646
647 node = rb_prev(&range->itree.rb);
648 if (node)
649 head = &(to_drm_gpusvm_range(node))->entry;
650 else
651 head = ¬ifier->range_list;
652
653 list_add(&range->entry, head);
654 drm_gpusvm_notifier_unlock(notifier->gpusvm);
655 }
656
657 /**
658 * __drm_gpusvm_range_remove() - Remove GPU SVM range
659 * @notifier: Pointer to the GPU SVM notifier structure
660 * @range: Pointer to the GPU SVM range structure
661 *
662 * This macro removes the GPU SVM range from the notifier RB tree and list.
663 */
__drm_gpusvm_range_remove(struct drm_gpusvm_notifier * notifier,struct drm_gpusvm_range * range)664 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier,
665 struct drm_gpusvm_range *range)
666 {
667 interval_tree_remove(&range->itree, ¬ifier->root);
668 list_del(&range->entry);
669 }
670
671 /**
672 * drm_gpusvm_range_alloc() - Allocate GPU SVM range
673 * @gpusvm: Pointer to the GPU SVM structure
674 * @notifier: Pointer to the GPU SVM notifier structure
675 * @fault_addr: Fault address
676 * @chunk_size: Chunk size
677 * @migrate_devmem: Flag indicating whether to migrate device memory
678 *
679 * This function allocates and initializes the GPU SVM range structure.
680 *
681 * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure.
682 */
683 static struct drm_gpusvm_range *
drm_gpusvm_range_alloc(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,unsigned long fault_addr,unsigned long chunk_size,bool migrate_devmem)684 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm,
685 struct drm_gpusvm_notifier *notifier,
686 unsigned long fault_addr, unsigned long chunk_size,
687 bool migrate_devmem)
688 {
689 struct drm_gpusvm_range *range;
690
691 if (gpusvm->ops->range_alloc)
692 range = gpusvm->ops->range_alloc(gpusvm);
693 else
694 range = kzalloc(sizeof(*range), GFP_KERNEL);
695
696 if (!range)
697 return ERR_PTR(-ENOMEM);
698
699 kref_init(&range->refcount);
700 range->gpusvm = gpusvm;
701 range->notifier = notifier;
702 range->itree.start = ALIGN_DOWN(fault_addr, chunk_size);
703 range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1;
704 INIT_LIST_HEAD(&range->entry);
705 range->notifier_seq = LONG_MAX;
706 range->flags.migrate_devmem = migrate_devmem ? 1 : 0;
707
708 return range;
709 }
710
711 /**
712 * drm_gpusvm_check_pages() - Check pages
713 * @gpusvm: Pointer to the GPU SVM structure
714 * @notifier: Pointer to the GPU SVM notifier structure
715 * @start: Start address
716 * @end: End address
717 *
718 * Check if pages between start and end have been faulted in on the CPU. Use to
719 * prevent migration of pages without CPU backing store.
720 *
721 * Return: True if pages have been faulted into CPU, False otherwise
722 */
drm_gpusvm_check_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,unsigned long start,unsigned long end)723 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm,
724 struct drm_gpusvm_notifier *notifier,
725 unsigned long start, unsigned long end)
726 {
727 struct hmm_range hmm_range = {
728 .default_flags = 0,
729 .notifier = ¬ifier->notifier,
730 .start = start,
731 .end = end,
732 .dev_private_owner = gpusvm->device_private_page_owner,
733 };
734 unsigned long timeout =
735 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
736 unsigned long *pfns;
737 unsigned long npages = npages_in_range(start, end);
738 int err, i;
739
740 mmap_assert_locked(gpusvm->mm);
741
742 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
743 if (!pfns)
744 return false;
745
746 hmm_range.notifier_seq = mmu_interval_read_begin(¬ifier->notifier);
747 hmm_range.hmm_pfns = pfns;
748
749 while (true) {
750 err = hmm_range_fault(&hmm_range);
751 if (err == -EBUSY) {
752 if (time_after(jiffies, timeout))
753 break;
754
755 hmm_range.notifier_seq =
756 mmu_interval_read_begin(¬ifier->notifier);
757 continue;
758 }
759 break;
760 }
761 if (err)
762 goto err_free;
763
764 for (i = 0; i < npages;) {
765 if (!(pfns[i] & HMM_PFN_VALID)) {
766 err = -EFAULT;
767 goto err_free;
768 }
769 i += 0x1 << hmm_pfn_to_map_order(pfns[i]);
770 }
771
772 err_free:
773 kvfree(pfns);
774 return err ? false : true;
775 }
776
777 /**
778 * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range
779 * @gpusvm: Pointer to the GPU SVM structure
780 * @notifier: Pointer to the GPU SVM notifier structure
781 * @vas: Pointer to the virtual memory area structure
782 * @fault_addr: Fault address
783 * @gpuva_start: Start address of GPUVA which mirrors CPU
784 * @gpuva_end: End address of GPUVA which mirrors CPU
785 * @check_pages_threshold: Check CPU pages for present threshold
786 *
787 * This function determines the chunk size for the GPU SVM range based on the
788 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual
789 * memory area boundaries.
790 *
791 * Return: Chunk size on success, LONG_MAX on failure.
792 */
793 static unsigned long
drm_gpusvm_range_chunk_size(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,struct vm_area_struct * vas,unsigned long fault_addr,unsigned long gpuva_start,unsigned long gpuva_end,unsigned long check_pages_threshold)794 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm,
795 struct drm_gpusvm_notifier *notifier,
796 struct vm_area_struct *vas,
797 unsigned long fault_addr,
798 unsigned long gpuva_start,
799 unsigned long gpuva_end,
800 unsigned long check_pages_threshold)
801 {
802 unsigned long start, end;
803 int i = 0;
804
805 retry:
806 for (; i < gpusvm->num_chunks; ++i) {
807 start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]);
808 end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]);
809
810 if (start >= vas->vm_start && end <= vas->vm_end &&
811 start >= drm_gpusvm_notifier_start(notifier) &&
812 end <= drm_gpusvm_notifier_end(notifier) &&
813 start >= gpuva_start && end <= gpuva_end)
814 break;
815 }
816
817 if (i == gpusvm->num_chunks)
818 return LONG_MAX;
819
820 /*
821 * If allocation more than page, ensure not to overlap with existing
822 * ranges.
823 */
824 if (end - start != SZ_4K) {
825 struct drm_gpusvm_range *range;
826
827 range = drm_gpusvm_range_find(notifier, start, end);
828 if (range) {
829 ++i;
830 goto retry;
831 }
832
833 /*
834 * XXX: Only create range on pages CPU has faulted in. Without
835 * this check, or prefault, on BMG 'xe_exec_system_allocator --r
836 * process-many-malloc' fails. In the failure case, each process
837 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM
838 * ranges. When migrating the SVM ranges, some processes fail in
839 * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages'
840 * and then upon drm_gpusvm_range_get_pages device pages from
841 * other processes are collected + faulted in which creates all
842 * sorts of problems. Unsure exactly how this happening, also
843 * problem goes away if 'xe_exec_system_allocator --r
844 * process-many-malloc' mallocs at least 64k at a time.
845 */
846 if (end - start <= check_pages_threshold &&
847 !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) {
848 ++i;
849 goto retry;
850 }
851 }
852
853 return end - start;
854 }
855
856 #ifdef CONFIG_LOCKDEP
857 /**
858 * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held
859 * @gpusvm: Pointer to the GPU SVM structure.
860 *
861 * Ensure driver lock is held.
862 */
drm_gpusvm_driver_lock_held(struct drm_gpusvm * gpusvm)863 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm)
864 {
865 if ((gpusvm)->lock_dep_map)
866 lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0));
867 }
868 #else
drm_gpusvm_driver_lock_held(struct drm_gpusvm * gpusvm)869 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm)
870 {
871 }
872 #endif
873
874 /**
875 * drm_gpusvm_find_vma_start() - Find start address for first VMA in range
876 * @gpusvm: Pointer to the GPU SVM structure
877 * @start: The inclusive start user address.
878 * @end: The exclusive end user address.
879 *
880 * Returns: The start address of first VMA within the provided range,
881 * ULONG_MAX otherwise. Assumes start_addr < end_addr.
882 */
883 unsigned long
drm_gpusvm_find_vma_start(struct drm_gpusvm * gpusvm,unsigned long start,unsigned long end)884 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm,
885 unsigned long start,
886 unsigned long end)
887 {
888 struct mm_struct *mm = gpusvm->mm;
889 struct vm_area_struct *vma;
890 unsigned long addr = ULONG_MAX;
891
892 if (!mmget_not_zero(mm))
893 return addr;
894
895 mmap_read_lock(mm);
896
897 vma = find_vma_intersection(mm, start, end);
898 if (vma)
899 addr = vma->vm_start;
900
901 mmap_read_unlock(mm);
902 mmput(mm);
903
904 return addr;
905 }
906 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start);
907
908 /**
909 * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range
910 * @gpusvm: Pointer to the GPU SVM structure
911 * @fault_addr: Fault address
912 * @gpuva_start: Start address of GPUVA which mirrors CPU
913 * @gpuva_end: End address of GPUVA which mirrors CPU
914 * @ctx: GPU SVM context
915 *
916 * This function finds or inserts a newly allocated a GPU SVM range based on the
917 * fault address. Caller must hold a lock to protect range lookup and insertion.
918 *
919 * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure.
920 */
921 struct drm_gpusvm_range *
drm_gpusvm_range_find_or_insert(struct drm_gpusvm * gpusvm,unsigned long fault_addr,unsigned long gpuva_start,unsigned long gpuva_end,const struct drm_gpusvm_ctx * ctx)922 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm,
923 unsigned long fault_addr,
924 unsigned long gpuva_start,
925 unsigned long gpuva_end,
926 const struct drm_gpusvm_ctx *ctx)
927 {
928 struct drm_gpusvm_notifier *notifier;
929 struct drm_gpusvm_range *range;
930 struct mm_struct *mm = gpusvm->mm;
931 struct vm_area_struct *vas;
932 bool notifier_alloc = false;
933 unsigned long chunk_size;
934 int err;
935 bool migrate_devmem;
936
937 drm_gpusvm_driver_lock_held(gpusvm);
938
939 if (fault_addr < gpusvm->mm_start ||
940 fault_addr > gpusvm->mm_start + gpusvm->mm_range)
941 return ERR_PTR(-EINVAL);
942
943 if (!mmget_not_zero(mm))
944 return ERR_PTR(-EFAULT);
945
946 notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr);
947 if (!notifier) {
948 notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr);
949 if (IS_ERR(notifier)) {
950 err = PTR_ERR(notifier);
951 goto err_mmunlock;
952 }
953 notifier_alloc = true;
954 err = mmu_interval_notifier_insert(¬ifier->notifier,
955 mm,
956 drm_gpusvm_notifier_start(notifier),
957 drm_gpusvm_notifier_size(notifier),
958 &drm_gpusvm_notifier_ops);
959 if (err)
960 goto err_notifier;
961 }
962
963 mmap_read_lock(mm);
964
965 vas = vma_lookup(mm, fault_addr);
966 if (!vas) {
967 err = -ENOENT;
968 goto err_notifier_remove;
969 }
970
971 if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) {
972 err = -EPERM;
973 goto err_notifier_remove;
974 }
975
976 range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1);
977 if (range)
978 goto out_mmunlock;
979 /*
980 * XXX: Short-circuiting migration based on migrate_vma_* current
981 * limitations. If/when migrate_vma_* add more support, this logic will
982 * have to change.
983 */
984 migrate_devmem = ctx->devmem_possible &&
985 vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas);
986
987 chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas,
988 fault_addr, gpuva_start,
989 gpuva_end,
990 ctx->check_pages_threshold);
991 if (chunk_size == LONG_MAX) {
992 err = -EINVAL;
993 goto err_notifier_remove;
994 }
995
996 range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size,
997 migrate_devmem);
998 if (IS_ERR(range)) {
999 err = PTR_ERR(range);
1000 goto err_notifier_remove;
1001 }
1002
1003 drm_gpusvm_range_insert(notifier, range);
1004 if (notifier_alloc)
1005 drm_gpusvm_notifier_insert(gpusvm, notifier);
1006
1007 out_mmunlock:
1008 mmap_read_unlock(mm);
1009 mmput(mm);
1010
1011 return range;
1012
1013 err_notifier_remove:
1014 mmap_read_unlock(mm);
1015 if (notifier_alloc)
1016 mmu_interval_notifier_remove(¬ifier->notifier);
1017 err_notifier:
1018 if (notifier_alloc)
1019 drm_gpusvm_notifier_free(gpusvm, notifier);
1020 err_mmunlock:
1021 mmput(mm);
1022 return ERR_PTR(err);
1023 }
1024 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert);
1025
1026 /**
1027 * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal)
1028 * @gpusvm: Pointer to the GPU SVM structure
1029 * @range: Pointer to the GPU SVM range structure
1030 * @npages: Number of pages to unmap
1031 *
1032 * This function unmap pages associated with a GPU SVM range. Assumes and
1033 * asserts correct locking is in place when called.
1034 */
__drm_gpusvm_range_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,unsigned long npages)1035 static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
1036 struct drm_gpusvm_range *range,
1037 unsigned long npages)
1038 {
1039 unsigned long i, j;
1040 struct drm_pagemap *dpagemap = range->dpagemap;
1041 struct device *dev = gpusvm->drm->dev;
1042
1043 lockdep_assert_held(&gpusvm->notifier_lock);
1044
1045 if (range->flags.has_dma_mapping) {
1046 struct drm_gpusvm_range_flags flags = {
1047 .__flags = range->flags.__flags,
1048 };
1049
1050 for (i = 0, j = 0; i < npages; j++) {
1051 struct drm_pagemap_device_addr *addr = &range->dma_addr[j];
1052
1053 if (addr->proto == DRM_INTERCONNECT_SYSTEM)
1054 dma_unmap_page(dev,
1055 addr->addr,
1056 PAGE_SIZE << addr->order,
1057 addr->dir);
1058 else if (dpagemap && dpagemap->ops->device_unmap)
1059 dpagemap->ops->device_unmap(dpagemap,
1060 dev, *addr);
1061 i += 1 << addr->order;
1062 }
1063
1064 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
1065 flags.has_devmem_pages = false;
1066 flags.has_dma_mapping = false;
1067 WRITE_ONCE(range->flags.__flags, flags.__flags);
1068
1069 range->dpagemap = NULL;
1070 }
1071 }
1072
1073 /**
1074 * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range
1075 * @gpusvm: Pointer to the GPU SVM structure
1076 * @range: Pointer to the GPU SVM range structure
1077 *
1078 * This function frees the dma address array associated with a GPU SVM range.
1079 */
drm_gpusvm_range_free_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1080 static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm,
1081 struct drm_gpusvm_range *range)
1082 {
1083 lockdep_assert_held(&gpusvm->notifier_lock);
1084
1085 if (range->dma_addr) {
1086 kvfree(range->dma_addr);
1087 range->dma_addr = NULL;
1088 }
1089 }
1090
1091 /**
1092 * drm_gpusvm_range_remove() - Remove GPU SVM range
1093 * @gpusvm: Pointer to the GPU SVM structure
1094 * @range: Pointer to the GPU SVM range to be removed
1095 *
1096 * This function removes the specified GPU SVM range and also removes the parent
1097 * GPU SVM notifier if no more ranges remain in the notifier. The caller must
1098 * hold a lock to protect range and notifier removal.
1099 */
drm_gpusvm_range_remove(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1100 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm,
1101 struct drm_gpusvm_range *range)
1102 {
1103 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1104 drm_gpusvm_range_end(range));
1105 struct drm_gpusvm_notifier *notifier;
1106
1107 drm_gpusvm_driver_lock_held(gpusvm);
1108
1109 notifier = drm_gpusvm_notifier_find(gpusvm,
1110 drm_gpusvm_range_start(range));
1111 if (WARN_ON_ONCE(!notifier))
1112 return;
1113
1114 drm_gpusvm_notifier_lock(gpusvm);
1115 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages);
1116 drm_gpusvm_range_free_pages(gpusvm, range);
1117 __drm_gpusvm_range_remove(notifier, range);
1118 drm_gpusvm_notifier_unlock(gpusvm);
1119
1120 drm_gpusvm_range_put(range);
1121
1122 if (RB_EMPTY_ROOT(¬ifier->root.rb_root)) {
1123 if (!notifier->flags.removed)
1124 mmu_interval_notifier_remove(¬ifier->notifier);
1125 drm_gpusvm_notifier_remove(gpusvm, notifier);
1126 drm_gpusvm_notifier_free(gpusvm, notifier);
1127 }
1128 }
1129 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove);
1130
1131 /**
1132 * drm_gpusvm_range_get() - Get a reference to GPU SVM range
1133 * @range: Pointer to the GPU SVM range
1134 *
1135 * This function increments the reference count of the specified GPU SVM range.
1136 *
1137 * Return: Pointer to the GPU SVM range.
1138 */
1139 struct drm_gpusvm_range *
drm_gpusvm_range_get(struct drm_gpusvm_range * range)1140 drm_gpusvm_range_get(struct drm_gpusvm_range *range)
1141 {
1142 kref_get(&range->refcount);
1143
1144 return range;
1145 }
1146 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get);
1147
1148 /**
1149 * drm_gpusvm_range_destroy() - Destroy GPU SVM range
1150 * @refcount: Pointer to the reference counter embedded in the GPU SVM range
1151 *
1152 * This function destroys the specified GPU SVM range when its reference count
1153 * reaches zero. If a custom range-free function is provided, it is invoked to
1154 * free the range; otherwise, the range is deallocated using kfree().
1155 */
drm_gpusvm_range_destroy(struct kref * refcount)1156 static void drm_gpusvm_range_destroy(struct kref *refcount)
1157 {
1158 struct drm_gpusvm_range *range =
1159 container_of(refcount, struct drm_gpusvm_range, refcount);
1160 struct drm_gpusvm *gpusvm = range->gpusvm;
1161
1162 if (gpusvm->ops->range_free)
1163 gpusvm->ops->range_free(range);
1164 else
1165 kfree(range);
1166 }
1167
1168 /**
1169 * drm_gpusvm_range_put() - Put a reference to GPU SVM range
1170 * @range: Pointer to the GPU SVM range
1171 *
1172 * This function decrements the reference count of the specified GPU SVM range
1173 * and frees it when the count reaches zero.
1174 */
drm_gpusvm_range_put(struct drm_gpusvm_range * range)1175 void drm_gpusvm_range_put(struct drm_gpusvm_range *range)
1176 {
1177 kref_put(&range->refcount, drm_gpusvm_range_destroy);
1178 }
1179 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put);
1180
1181 /**
1182 * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid
1183 * @gpusvm: Pointer to the GPU SVM structure
1184 * @range: Pointer to the GPU SVM range structure
1185 *
1186 * This function determines if a GPU SVM range pages are valid. Expected be
1187 * called holding gpusvm->notifier_lock and as the last step before committing a
1188 * GPU binding. This is akin to a notifier seqno check in the HMM documentation
1189 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this
1190 * function is required for finer grained checking (i.e., per range) if pages
1191 * are valid.
1192 *
1193 * Return: True if GPU SVM range has valid pages, False otherwise
1194 */
drm_gpusvm_range_pages_valid(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1195 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm,
1196 struct drm_gpusvm_range *range)
1197 {
1198 lockdep_assert_held(&gpusvm->notifier_lock);
1199
1200 return range->flags.has_devmem_pages || range->flags.has_dma_mapping;
1201 }
1202 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid);
1203
1204 /**
1205 * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked
1206 * @gpusvm: Pointer to the GPU SVM structure
1207 * @range: Pointer to the GPU SVM range structure
1208 *
1209 * This function determines if a GPU SVM range pages are valid. Expected be
1210 * called without holding gpusvm->notifier_lock.
1211 *
1212 * Return: True if GPU SVM range has valid pages, False otherwise
1213 */
1214 static bool
drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1215 drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm,
1216 struct drm_gpusvm_range *range)
1217 {
1218 bool pages_valid;
1219
1220 if (!range->dma_addr)
1221 return false;
1222
1223 drm_gpusvm_notifier_lock(gpusvm);
1224 pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range);
1225 if (!pages_valid)
1226 drm_gpusvm_range_free_pages(gpusvm, range);
1227 drm_gpusvm_notifier_unlock(gpusvm);
1228
1229 return pages_valid;
1230 }
1231
1232 /**
1233 * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range
1234 * @gpusvm: Pointer to the GPU SVM structure
1235 * @range: Pointer to the GPU SVM range structure
1236 * @ctx: GPU SVM context
1237 *
1238 * This function gets pages for a GPU SVM range and ensures they are mapped for
1239 * DMA access.
1240 *
1241 * Return: 0 on success, negative error code on failure.
1242 */
drm_gpusvm_range_get_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,const struct drm_gpusvm_ctx * ctx)1243 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
1244 struct drm_gpusvm_range *range,
1245 const struct drm_gpusvm_ctx *ctx)
1246 {
1247 struct mmu_interval_notifier *notifier = &range->notifier->notifier;
1248 struct hmm_range hmm_range = {
1249 .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 :
1250 HMM_PFN_REQ_WRITE),
1251 .notifier = notifier,
1252 .start = drm_gpusvm_range_start(range),
1253 .end = drm_gpusvm_range_end(range),
1254 .dev_private_owner = gpusvm->device_private_page_owner,
1255 };
1256 struct mm_struct *mm = gpusvm->mm;
1257 void *zdd;
1258 unsigned long timeout =
1259 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1260 unsigned long i, j;
1261 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1262 drm_gpusvm_range_end(range));
1263 unsigned long num_dma_mapped;
1264 unsigned int order = 0;
1265 unsigned long *pfns;
1266 int err = 0;
1267 struct dev_pagemap *pagemap;
1268 struct drm_pagemap *dpagemap;
1269 struct drm_gpusvm_range_flags flags;
1270
1271 retry:
1272 hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
1273 if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range))
1274 goto set_seqno;
1275
1276 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
1277 if (!pfns)
1278 return -ENOMEM;
1279
1280 if (!mmget_not_zero(mm)) {
1281 err = -EFAULT;
1282 goto err_free;
1283 }
1284
1285 hmm_range.hmm_pfns = pfns;
1286 while (true) {
1287 mmap_read_lock(mm);
1288 err = hmm_range_fault(&hmm_range);
1289 mmap_read_unlock(mm);
1290
1291 if (err == -EBUSY) {
1292 if (time_after(jiffies, timeout))
1293 break;
1294
1295 hmm_range.notifier_seq =
1296 mmu_interval_read_begin(notifier);
1297 continue;
1298 }
1299 break;
1300 }
1301 mmput(mm);
1302 if (err)
1303 goto err_free;
1304
1305 map_pages:
1306 /*
1307 * Perform all dma mappings under the notifier lock to not
1308 * access freed pages. A notifier will either block on
1309 * the notifier lock or unmap dma.
1310 */
1311 drm_gpusvm_notifier_lock(gpusvm);
1312
1313 flags.__flags = range->flags.__flags;
1314 if (flags.unmapped) {
1315 drm_gpusvm_notifier_unlock(gpusvm);
1316 err = -EFAULT;
1317 goto err_free;
1318 }
1319
1320 if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) {
1321 drm_gpusvm_notifier_unlock(gpusvm);
1322 kvfree(pfns);
1323 goto retry;
1324 }
1325
1326 if (!range->dma_addr) {
1327 /* Unlock and restart mapping to allocate memory. */
1328 drm_gpusvm_notifier_unlock(gpusvm);
1329 range->dma_addr = kvmalloc_array(npages,
1330 sizeof(*range->dma_addr),
1331 GFP_KERNEL);
1332 if (!range->dma_addr) {
1333 err = -ENOMEM;
1334 goto err_free;
1335 }
1336 goto map_pages;
1337 }
1338
1339 zdd = NULL;
1340 pagemap = NULL;
1341 num_dma_mapped = 0;
1342 for (i = 0, j = 0; i < npages; ++j) {
1343 struct page *page = hmm_pfn_to_page(pfns[i]);
1344
1345 order = hmm_pfn_to_map_order(pfns[i]);
1346 if (is_device_private_page(page) ||
1347 is_device_coherent_page(page)) {
1348 if (zdd != page->zone_device_data && i > 0) {
1349 err = -EOPNOTSUPP;
1350 goto err_unmap;
1351 }
1352 zdd = page->zone_device_data;
1353 if (pagemap != page_pgmap(page)) {
1354 if (i > 0) {
1355 err = -EOPNOTSUPP;
1356 goto err_unmap;
1357 }
1358
1359 pagemap = page_pgmap(page);
1360 dpagemap = drm_pagemap_page_to_dpagemap(page);
1361 if (drm_WARN_ON(gpusvm->drm, !dpagemap)) {
1362 /*
1363 * Raced. This is not supposed to happen
1364 * since hmm_range_fault() should've migrated
1365 * this page to system.
1366 */
1367 err = -EAGAIN;
1368 goto err_unmap;
1369 }
1370 }
1371 range->dma_addr[j] =
1372 dpagemap->ops->device_map(dpagemap,
1373 gpusvm->drm->dev,
1374 page, order,
1375 DMA_BIDIRECTIONAL);
1376 if (dma_mapping_error(gpusvm->drm->dev,
1377 range->dma_addr[j].addr)) {
1378 err = -EFAULT;
1379 goto err_unmap;
1380 }
1381 } else {
1382 dma_addr_t addr;
1383
1384 if (is_zone_device_page(page) || pagemap) {
1385 err = -EOPNOTSUPP;
1386 goto err_unmap;
1387 }
1388
1389 if (ctx->devmem_only) {
1390 err = -EFAULT;
1391 goto err_unmap;
1392 }
1393
1394 addr = dma_map_page(gpusvm->drm->dev,
1395 page, 0,
1396 PAGE_SIZE << order,
1397 DMA_BIDIRECTIONAL);
1398 if (dma_mapping_error(gpusvm->drm->dev, addr)) {
1399 err = -EFAULT;
1400 goto err_unmap;
1401 }
1402
1403 range->dma_addr[j] = drm_pagemap_device_addr_encode
1404 (addr, DRM_INTERCONNECT_SYSTEM, order,
1405 DMA_BIDIRECTIONAL);
1406 }
1407 i += 1 << order;
1408 num_dma_mapped = i;
1409 flags.has_dma_mapping = true;
1410 }
1411
1412 if (pagemap) {
1413 flags.has_devmem_pages = true;
1414 range->dpagemap = dpagemap;
1415 }
1416
1417 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
1418 WRITE_ONCE(range->flags.__flags, flags.__flags);
1419
1420 drm_gpusvm_notifier_unlock(gpusvm);
1421 kvfree(pfns);
1422 set_seqno:
1423 range->notifier_seq = hmm_range.notifier_seq;
1424
1425 return 0;
1426
1427 err_unmap:
1428 __drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped);
1429 drm_gpusvm_notifier_unlock(gpusvm);
1430 err_free:
1431 kvfree(pfns);
1432 if (err == -EAGAIN)
1433 goto retry;
1434 return err;
1435 }
1436 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages);
1437
1438 /**
1439 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range
1440 * drm_gpusvm_range_evict() - Evict GPU SVM range
1441 * @gpusvm: Pointer to the GPU SVM structure
1442 * @range: Pointer to the GPU SVM range structure
1443 * @ctx: GPU SVM context
1444 *
1445 * This function unmaps pages associated with a GPU SVM range. If @in_notifier
1446 * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it
1447 * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on
1448 * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU
1449 * security model.
1450 */
drm_gpusvm_range_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,const struct drm_gpusvm_ctx * ctx)1451 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
1452 struct drm_gpusvm_range *range,
1453 const struct drm_gpusvm_ctx *ctx)
1454 {
1455 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1456 drm_gpusvm_range_end(range));
1457
1458 if (ctx->in_notifier)
1459 lockdep_assert_held_write(&gpusvm->notifier_lock);
1460 else
1461 drm_gpusvm_notifier_lock(gpusvm);
1462
1463 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages);
1464
1465 if (!ctx->in_notifier)
1466 drm_gpusvm_notifier_unlock(gpusvm);
1467 }
1468 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages);
1469
1470 /**
1471 * drm_gpusvm_range_evict() - Evict GPU SVM range
1472 * @gpusvm: Pointer to the GPU SVM structure
1473 * @range: Pointer to the GPU SVM range to be removed
1474 *
1475 * This function evicts the specified GPU SVM range.
1476 *
1477 * Return: 0 on success, a negative error code on failure.
1478 */
drm_gpusvm_range_evict(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1479 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm,
1480 struct drm_gpusvm_range *range)
1481 {
1482 struct mmu_interval_notifier *notifier = &range->notifier->notifier;
1483 struct hmm_range hmm_range = {
1484 .default_flags = HMM_PFN_REQ_FAULT,
1485 .notifier = notifier,
1486 .start = drm_gpusvm_range_start(range),
1487 .end = drm_gpusvm_range_end(range),
1488 .dev_private_owner = NULL,
1489 };
1490 unsigned long timeout =
1491 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1492 unsigned long *pfns;
1493 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1494 drm_gpusvm_range_end(range));
1495 int err = 0;
1496 struct mm_struct *mm = gpusvm->mm;
1497
1498 if (!mmget_not_zero(mm))
1499 return -EFAULT;
1500
1501 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
1502 if (!pfns)
1503 return -ENOMEM;
1504
1505 hmm_range.hmm_pfns = pfns;
1506 while (!time_after(jiffies, timeout)) {
1507 hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
1508 if (time_after(jiffies, timeout)) {
1509 err = -ETIME;
1510 break;
1511 }
1512
1513 mmap_read_lock(mm);
1514 err = hmm_range_fault(&hmm_range);
1515 mmap_read_unlock(mm);
1516 if (err != -EBUSY)
1517 break;
1518 }
1519
1520 kvfree(pfns);
1521 mmput(mm);
1522
1523 return err;
1524 }
1525 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict);
1526
1527 /**
1528 * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range
1529 * @gpusvm: Pointer to the GPU SVM structure.
1530 * @start: Start address
1531 * @end: End address
1532 *
1533 * Return: True if GPU SVM has mapping, False otherwise
1534 */
drm_gpusvm_has_mapping(struct drm_gpusvm * gpusvm,unsigned long start,unsigned long end)1535 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start,
1536 unsigned long end)
1537 {
1538 struct drm_gpusvm_notifier *notifier;
1539
1540 drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) {
1541 struct drm_gpusvm_range *range = NULL;
1542
1543 drm_gpusvm_for_each_range(range, notifier, start, end)
1544 return true;
1545 }
1546
1547 return false;
1548 }
1549 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping);
1550
1551 /**
1552 * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped
1553 * @range: Pointer to the GPU SVM range structure.
1554 * @mmu_range: Pointer to the MMU notifier range structure.
1555 *
1556 * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag
1557 * if the range partially falls within the provided MMU notifier range.
1558 */
drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range * range,const struct mmu_notifier_range * mmu_range)1559 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range,
1560 const struct mmu_notifier_range *mmu_range)
1561 {
1562 lockdep_assert_held_write(&range->gpusvm->notifier_lock);
1563
1564 range->flags.unmapped = true;
1565 if (drm_gpusvm_range_start(range) < mmu_range->start ||
1566 drm_gpusvm_range_end(range) > mmu_range->end)
1567 range->flags.partial_unmap = true;
1568 }
1569 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped);
1570
1571 MODULE_DESCRIPTION("DRM GPUSVM");
1572 MODULE_LICENSE("GPL");
1573