1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /*
3 * Copyright © 2024 Intel Corporation
4 *
5 * Authors:
6 * Matthew Brost <matthew.brost@intel.com>
7 */
8
9 #include <linux/dma-mapping.h>
10 #include <linux/export.h>
11 #include <linux/hmm.h>
12 #include <linux/hugetlb_inline.h>
13 #include <linux/memremap.h>
14 #include <linux/mm_types.h>
15 #include <linux/slab.h>
16
17 #include <drm/drm_device.h>
18 #include <drm/drm_gpusvm.h>
19 #include <drm/drm_pagemap.h>
20 #include <drm/drm_print.h>
21
22 /**
23 * DOC: Overview
24 *
25 * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM)
26 * is a component of the DRM framework designed to manage shared virtual memory
27 * between the CPU and GPU. It enables efficient data exchange and processing
28 * for GPU-accelerated applications by allowing memory sharing and
29 * synchronization between the CPU's and GPU's virtual address spaces.
30 *
31 * Key GPU SVM Components:
32 *
33 * - Notifiers:
34 * Used for tracking memory intervals and notifying the GPU of changes,
35 * notifiers are sized based on a GPU SVM initialization parameter, with a
36 * recommendation of 512M or larger. They maintain a Red-BlacK tree and a
37 * list of ranges that fall within the notifier interval. Notifiers are
38 * tracked within a GPU SVM Red-BlacK tree and list and are dynamically
39 * inserted or removed as ranges within the interval are created or
40 * destroyed.
41 * - Ranges:
42 * Represent memory ranges mapped in a DRM device and managed by GPU SVM.
43 * They are sized based on an array of chunk sizes, which is a GPU SVM
44 * initialization parameter, and the CPU address space. Upon GPU fault,
45 * the largest aligned chunk that fits within the faulting CPU address
46 * space is chosen for the range size. Ranges are expected to be
47 * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP
48 * event. As mentioned above, ranges are tracked in a notifier's Red-Black
49 * tree.
50 *
51 * - Operations:
52 * Define the interface for driver-specific GPU SVM operations such as
53 * range allocation, notifier allocation, and invalidations.
54 *
55 * - Device Memory Allocations:
56 * Embedded structure containing enough information for GPU SVM to migrate
57 * to / from device memory.
58 *
59 * - Device Memory Operations:
60 * Define the interface for driver-specific device memory operations
61 * release memory, populate pfns, and copy to / from device memory.
62 *
63 * This layer provides interfaces for allocating, mapping, migrating, and
64 * releasing memory ranges between the CPU and GPU. It handles all core memory
65 * management interactions (DMA mapping, HMM, and migration) and provides
66 * driver-specific virtual functions (vfuncs). This infrastructure is sufficient
67 * to build the expected driver components for an SVM implementation as detailed
68 * below.
69 *
70 * Expected Driver Components:
71 *
72 * - GPU page fault handler:
73 * Used to create ranges and notifiers based on the fault address,
74 * optionally migrate the range to device memory, and create GPU bindings.
75 *
76 * - Garbage collector:
77 * Used to unmap and destroy GPU bindings for ranges. Ranges are expected
78 * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in
79 * notifier callback.
80 *
81 * - Notifier callback:
82 * Used to invalidate and DMA unmap GPU bindings for ranges.
83 */
84
85 /**
86 * DOC: Locking
87 *
88 * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the
89 * mmap lock as needed.
90 *
91 * GPU SVM introduces a global notifier lock, which safeguards the notifier's
92 * range RB tree and list, as well as the range's DMA mappings and sequence
93 * number. GPU SVM manages all necessary locking and unlocking operations,
94 * except for the recheck range's pages being valid
95 * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings.
96 * This lock corresponds to the ``driver->update`` lock mentioned in
97 * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM
98 * global lock to a per-notifier lock if finer-grained locking is deemed
99 * necessary.
100 *
101 * In addition to the locking mentioned above, the driver should implement a
102 * lock to safeguard core GPU SVM function calls that modify state, such as
103 * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is
104 * denoted as 'driver_svm_lock' in code examples. Finer grained driver side
105 * locking should also be possible for concurrent GPU fault processing within a
106 * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock
107 * to add annotations to GPU SVM.
108 */
109
110 /**
111 * DOC: Partial Unmapping of Ranges
112 *
113 * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting
114 * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one
115 * being that a subset of the range still has CPU and GPU mappings. If the
116 * backing store for the range is in device memory, a subset of the backing
117 * store has references. One option would be to split the range and device
118 * memory backing store, but the implementation for this would be quite
119 * complicated. Given that partial unmappings are rare and driver-defined range
120 * sizes are relatively small, GPU SVM does not support splitting of ranges.
121 *
122 * With no support for range splitting, upon partial unmapping of a range, the
123 * driver is expected to invalidate and destroy the entire range. If the range
124 * has device memory as its backing, the driver is also expected to migrate any
125 * remaining pages back to RAM.
126 */
127
128 /**
129 * DOC: Examples
130 *
131 * This section provides three examples of how to build the expected driver
132 * components: the GPU page fault handler, the garbage collector, and the
133 * notifier callback.
134 *
135 * The generic code provided does not include logic for complex migration
136 * policies, optimized invalidations, fined grained driver locking, or other
137 * potentially required driver locking (e.g., DMA-resv locks).
138 *
139 * 1) GPU page fault handler
140 *
141 * .. code-block:: c
142 *
143 * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range)
144 * {
145 * int err = 0;
146 *
147 * driver_alloc_and_setup_memory_for_bind(gpusvm, range);
148 *
149 * drm_gpusvm_notifier_lock(gpusvm);
150 * if (drm_gpusvm_range_pages_valid(range))
151 * driver_commit_bind(gpusvm, range);
152 * else
153 * err = -EAGAIN;
154 * drm_gpusvm_notifier_unlock(gpusvm);
155 *
156 * return err;
157 * }
158 *
159 * int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr,
160 * unsigned long gpuva_start, unsigned long gpuva_end)
161 * {
162 * struct drm_gpusvm_ctx ctx = {};
163 * int err;
164 *
165 * driver_svm_lock();
166 * retry:
167 * // Always process UNMAPs first so view of GPU SVM ranges is current
168 * driver_garbage_collector(gpusvm);
169 *
170 * range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr,
171 * gpuva_start, gpuva_end,
172 * &ctx);
173 * if (IS_ERR(range)) {
174 * err = PTR_ERR(range);
175 * goto unlock;
176 * }
177 *
178 * if (driver_migration_policy(range)) {
179 * err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(),
180 * gpuva_start, gpuva_end, gpusvm->mm,
181 * ctx->timeslice_ms);
182 * if (err) // CPU mappings may have changed
183 * goto retry;
184 * }
185 *
186 * err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx);
187 * if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { // CPU mappings changed
188 * if (err == -EOPNOTSUPP)
189 * drm_gpusvm_range_evict(gpusvm, range);
190 * goto retry;
191 * } else if (err) {
192 * goto unlock;
193 * }
194 *
195 * err = driver_bind_range(gpusvm, range);
196 * if (err == -EAGAIN) // CPU mappings changed
197 * goto retry
198 *
199 * unlock:
200 * driver_svm_unlock();
201 * return err;
202 * }
203 *
204 * 2) Garbage Collector
205 *
206 * .. code-block:: c
207 *
208 * void __driver_garbage_collector(struct drm_gpusvm *gpusvm,
209 * struct drm_gpusvm_range *range)
210 * {
211 * assert_driver_svm_locked(gpusvm);
212 *
213 * // Partial unmap, migrate any remaining device memory pages back to RAM
214 * if (range->flags.partial_unmap)
215 * drm_gpusvm_range_evict(gpusvm, range);
216 *
217 * driver_unbind_range(range);
218 * drm_gpusvm_range_remove(gpusvm, range);
219 * }
220 *
221 * void driver_garbage_collector(struct drm_gpusvm *gpusvm)
222 * {
223 * assert_driver_svm_locked(gpusvm);
224 *
225 * for_each_range_in_garbage_collector(gpusvm, range)
226 * __driver_garbage_collector(gpusvm, range);
227 * }
228 *
229 * 3) Notifier callback
230 *
231 * .. code-block:: c
232 *
233 * void driver_invalidation(struct drm_gpusvm *gpusvm,
234 * struct drm_gpusvm_notifier *notifier,
235 * const struct mmu_notifier_range *mmu_range)
236 * {
237 * struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
238 * struct drm_gpusvm_range *range = NULL;
239 *
240 * driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end);
241 *
242 * drm_gpusvm_for_each_range(range, notifier, mmu_range->start,
243 * mmu_range->end) {
244 * drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx);
245 *
246 * if (mmu_range->event != MMU_NOTIFY_UNMAP)
247 * continue;
248 *
249 * drm_gpusvm_range_set_unmapped(range, mmu_range);
250 * driver_garbage_collector_add(gpusvm, range);
251 * }
252 * }
253 */
254
255 /**
256 * npages_in_range() - Calculate the number of pages in a given range
257 * @start: The start address of the range
258 * @end: The end address of the range
259 *
260 * This macro calculates the number of pages in a given memory range,
261 * specified by the start and end addresses. It divides the difference
262 * between the end and start addresses by the page size (PAGE_SIZE) to
263 * determine the number of pages in the range.
264 *
265 * Return: The number of pages in the specified range.
266 */
267 static unsigned long
npages_in_range(unsigned long start,unsigned long end)268 npages_in_range(unsigned long start, unsigned long end)
269 {
270 return (end - start) >> PAGE_SHIFT;
271 }
272
273 /**
274 * drm_gpusvm_notifier_find() - Find GPU SVM notifier from GPU SVM
275 * @gpusvm: Pointer to the GPU SVM structure.
276 * @start: Start address of the notifier
277 * @end: End address of the notifier
278 *
279 * Return: A pointer to the drm_gpusvm_notifier if found or NULL
280 */
281 struct drm_gpusvm_notifier *
drm_gpusvm_notifier_find(struct drm_gpusvm * gpusvm,unsigned long start,unsigned long end)282 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, unsigned long start,
283 unsigned long end)
284 {
285 struct interval_tree_node *itree;
286
287 itree = interval_tree_iter_first(&gpusvm->root, start, end - 1);
288
289 if (itree)
290 return container_of(itree, struct drm_gpusvm_notifier, itree);
291 else
292 return NULL;
293 }
294 EXPORT_SYMBOL_GPL(drm_gpusvm_notifier_find);
295
296 /**
297 * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier
298 * @notifier: Pointer to the GPU SVM notifier structure.
299 * @start: Start address of the range
300 * @end: End address of the range
301 *
302 * Return: A pointer to the drm_gpusvm_range if found or NULL
303 */
304 struct drm_gpusvm_range *
drm_gpusvm_range_find(struct drm_gpusvm_notifier * notifier,unsigned long start,unsigned long end)305 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start,
306 unsigned long end)
307 {
308 struct interval_tree_node *itree;
309
310 itree = interval_tree_iter_first(¬ifier->root, start, end - 1);
311
312 if (itree)
313 return container_of(itree, struct drm_gpusvm_range, itree);
314 else
315 return NULL;
316 }
317 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find);
318
319 /**
320 * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier.
321 * @mni: Pointer to the mmu_interval_notifier structure.
322 * @mmu_range: Pointer to the mmu_notifier_range structure.
323 * @cur_seq: Current sequence number.
324 *
325 * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU
326 * notifier sequence number and calls the driver invalidate vfunc under
327 * gpusvm->notifier_lock.
328 *
329 * Return: true if the operation succeeds, false otherwise.
330 */
331 static bool
drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * mmu_range,unsigned long cur_seq)332 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni,
333 const struct mmu_notifier_range *mmu_range,
334 unsigned long cur_seq)
335 {
336 struct drm_gpusvm_notifier *notifier =
337 container_of(mni, typeof(*notifier), notifier);
338 struct drm_gpusvm *gpusvm = notifier->gpusvm;
339
340 if (!mmu_notifier_range_blockable(mmu_range))
341 return false;
342
343 down_write(&gpusvm->notifier_lock);
344 mmu_interval_set_seq(mni, cur_seq);
345 gpusvm->ops->invalidate(gpusvm, notifier, mmu_range);
346 up_write(&gpusvm->notifier_lock);
347
348 return true;
349 }
350
351 /*
352 * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM
353 */
354 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
355 .invalidate = drm_gpusvm_notifier_invalidate,
356 };
357
358 /**
359 * drm_gpusvm_init() - Initialize the GPU SVM.
360 * @gpusvm: Pointer to the GPU SVM structure.
361 * @name: Name of the GPU SVM.
362 * @drm: Pointer to the DRM device structure.
363 * @mm: Pointer to the mm_struct for the address space.
364 * @mm_start: Start address of GPU SVM.
365 * @mm_range: Range of the GPU SVM.
366 * @notifier_size: Size of individual notifiers.
367 * @ops: Pointer to the operations structure for GPU SVM.
368 * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation.
369 * Entries should be powers of 2 in descending order with last
370 * entry being SZ_4K.
371 * @num_chunks: Number of chunks.
372 *
373 * This function initializes the GPU SVM.
374 *
375 * Note: If only using the simple drm_gpusvm_pages API (get/unmap/free),
376 * then only @gpusvm, @name, and @drm are expected. However, the same base
377 * @gpusvm can also be used with both modes together in which case the full
378 * setup is needed, where the core drm_gpusvm_pages API will simply never use
379 * the other fields.
380 *
381 * Return: 0 on success, a negative error code on failure.
382 */
drm_gpusvm_init(struct drm_gpusvm * gpusvm,const char * name,struct drm_device * drm,struct mm_struct * mm,unsigned long mm_start,unsigned long mm_range,unsigned long notifier_size,const struct drm_gpusvm_ops * ops,const unsigned long * chunk_sizes,int num_chunks)383 int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
384 const char *name, struct drm_device *drm,
385 struct mm_struct *mm,
386 unsigned long mm_start, unsigned long mm_range,
387 unsigned long notifier_size,
388 const struct drm_gpusvm_ops *ops,
389 const unsigned long *chunk_sizes, int num_chunks)
390 {
391 if (mm) {
392 if (!ops->invalidate || !num_chunks)
393 return -EINVAL;
394 mmgrab(mm);
395 } else {
396 /* No full SVM mode, only core drm_gpusvm_pages API. */
397 if (ops || num_chunks || mm_range || notifier_size)
398 return -EINVAL;
399 }
400
401 gpusvm->name = name;
402 gpusvm->drm = drm;
403 gpusvm->mm = mm;
404 gpusvm->mm_start = mm_start;
405 gpusvm->mm_range = mm_range;
406 gpusvm->notifier_size = notifier_size;
407 gpusvm->ops = ops;
408 gpusvm->chunk_sizes = chunk_sizes;
409 gpusvm->num_chunks = num_chunks;
410
411 gpusvm->root = RB_ROOT_CACHED;
412 INIT_LIST_HEAD(&gpusvm->notifier_list);
413
414 init_rwsem(&gpusvm->notifier_lock);
415
416 fs_reclaim_acquire(GFP_KERNEL);
417 might_lock(&gpusvm->notifier_lock);
418 fs_reclaim_release(GFP_KERNEL);
419
420 #ifdef CONFIG_LOCKDEP
421 gpusvm->lock_dep_map = NULL;
422 #endif
423
424 return 0;
425 }
426 EXPORT_SYMBOL_GPL(drm_gpusvm_init);
427
428 /**
429 * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node
430 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct
431 *
432 * Return: A pointer to the containing drm_gpusvm_notifier structure.
433 */
to_drm_gpusvm_notifier(struct rb_node * node)434 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node)
435 {
436 return container_of(node, struct drm_gpusvm_notifier, itree.rb);
437 }
438
439 /**
440 * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier
441 * @gpusvm: Pointer to the GPU SVM structure
442 * @notifier: Pointer to the GPU SVM notifier structure
443 *
444 * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list.
445 */
drm_gpusvm_notifier_insert(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)446 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm,
447 struct drm_gpusvm_notifier *notifier)
448 {
449 struct rb_node *node;
450 struct list_head *head;
451
452 interval_tree_insert(¬ifier->itree, &gpusvm->root);
453
454 node = rb_prev(¬ifier->itree.rb);
455 if (node)
456 head = &(to_drm_gpusvm_notifier(node))->entry;
457 else
458 head = &gpusvm->notifier_list;
459
460 list_add(¬ifier->entry, head);
461 }
462
463 /**
464 * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier
465 * @gpusvm: Pointer to the GPU SVM tructure
466 * @notifier: Pointer to the GPU SVM notifier structure
467 *
468 * This function removes the GPU SVM notifier from the GPU SVM RB tree and list.
469 */
drm_gpusvm_notifier_remove(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)470 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm,
471 struct drm_gpusvm_notifier *notifier)
472 {
473 interval_tree_remove(¬ifier->itree, &gpusvm->root);
474 list_del(¬ifier->entry);
475 }
476
477 /**
478 * drm_gpusvm_fini() - Finalize the GPU SVM.
479 * @gpusvm: Pointer to the GPU SVM structure.
480 *
481 * This function finalizes the GPU SVM by cleaning up any remaining ranges and
482 * notifiers, and dropping a reference to struct MM.
483 */
drm_gpusvm_fini(struct drm_gpusvm * gpusvm)484 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm)
485 {
486 struct drm_gpusvm_notifier *notifier, *next;
487
488 drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) {
489 struct drm_gpusvm_range *range, *__next;
490
491 /*
492 * Remove notifier first to avoid racing with any invalidation
493 */
494 mmu_interval_notifier_remove(¬ifier->notifier);
495 notifier->flags.removed = true;
496
497 drm_gpusvm_for_each_range_safe(range, __next, notifier, 0,
498 LONG_MAX)
499 drm_gpusvm_range_remove(gpusvm, range);
500 }
501
502 if (gpusvm->mm)
503 mmdrop(gpusvm->mm);
504 WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root));
505 }
506 EXPORT_SYMBOL_GPL(drm_gpusvm_fini);
507
508 /**
509 * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier
510 * @gpusvm: Pointer to the GPU SVM structure
511 * @fault_addr: Fault address
512 *
513 * This function allocates and initializes the GPU SVM notifier structure.
514 *
515 * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure.
516 */
517 static struct drm_gpusvm_notifier *
drm_gpusvm_notifier_alloc(struct drm_gpusvm * gpusvm,unsigned long fault_addr)518 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr)
519 {
520 struct drm_gpusvm_notifier *notifier;
521
522 if (gpusvm->ops->notifier_alloc)
523 notifier = gpusvm->ops->notifier_alloc();
524 else
525 notifier = kzalloc(sizeof(*notifier), GFP_KERNEL);
526
527 if (!notifier)
528 return ERR_PTR(-ENOMEM);
529
530 notifier->gpusvm = gpusvm;
531 notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size);
532 notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1;
533 INIT_LIST_HEAD(¬ifier->entry);
534 notifier->root = RB_ROOT_CACHED;
535 INIT_LIST_HEAD(¬ifier->range_list);
536
537 return notifier;
538 }
539
540 /**
541 * drm_gpusvm_notifier_free() - Free GPU SVM notifier
542 * @gpusvm: Pointer to the GPU SVM structure
543 * @notifier: Pointer to the GPU SVM notifier structure
544 *
545 * This function frees the GPU SVM notifier structure.
546 */
drm_gpusvm_notifier_free(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier)547 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm,
548 struct drm_gpusvm_notifier *notifier)
549 {
550 WARN_ON(!RB_EMPTY_ROOT(¬ifier->root.rb_root));
551
552 if (gpusvm->ops->notifier_free)
553 gpusvm->ops->notifier_free(notifier);
554 else
555 kfree(notifier);
556 }
557
558 /**
559 * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node
560 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct
561 *
562 * Return: A pointer to the containing drm_gpusvm_range structure.
563 */
to_drm_gpusvm_range(struct rb_node * node)564 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node)
565 {
566 return container_of(node, struct drm_gpusvm_range, itree.rb);
567 }
568
569 /**
570 * drm_gpusvm_range_insert() - Insert GPU SVM range
571 * @notifier: Pointer to the GPU SVM notifier structure
572 * @range: Pointer to the GPU SVM range structure
573 *
574 * This function inserts the GPU SVM range into the notifier RB tree and list.
575 */
drm_gpusvm_range_insert(struct drm_gpusvm_notifier * notifier,struct drm_gpusvm_range * range)576 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier,
577 struct drm_gpusvm_range *range)
578 {
579 struct rb_node *node;
580 struct list_head *head;
581
582 drm_gpusvm_notifier_lock(notifier->gpusvm);
583 interval_tree_insert(&range->itree, ¬ifier->root);
584
585 node = rb_prev(&range->itree.rb);
586 if (node)
587 head = &(to_drm_gpusvm_range(node))->entry;
588 else
589 head = ¬ifier->range_list;
590
591 list_add(&range->entry, head);
592 drm_gpusvm_notifier_unlock(notifier->gpusvm);
593 }
594
595 /**
596 * __drm_gpusvm_range_remove() - Remove GPU SVM range
597 * @notifier: Pointer to the GPU SVM notifier structure
598 * @range: Pointer to the GPU SVM range structure
599 *
600 * This macro removes the GPU SVM range from the notifier RB tree and list.
601 */
__drm_gpusvm_range_remove(struct drm_gpusvm_notifier * notifier,struct drm_gpusvm_range * range)602 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier,
603 struct drm_gpusvm_range *range)
604 {
605 interval_tree_remove(&range->itree, ¬ifier->root);
606 list_del(&range->entry);
607 }
608
609 /**
610 * drm_gpusvm_range_alloc() - Allocate GPU SVM range
611 * @gpusvm: Pointer to the GPU SVM structure
612 * @notifier: Pointer to the GPU SVM notifier structure
613 * @fault_addr: Fault address
614 * @chunk_size: Chunk size
615 * @migrate_devmem: Flag indicating whether to migrate device memory
616 *
617 * This function allocates and initializes the GPU SVM range structure.
618 *
619 * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure.
620 */
621 static struct drm_gpusvm_range *
drm_gpusvm_range_alloc(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,unsigned long fault_addr,unsigned long chunk_size,bool migrate_devmem)622 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm,
623 struct drm_gpusvm_notifier *notifier,
624 unsigned long fault_addr, unsigned long chunk_size,
625 bool migrate_devmem)
626 {
627 struct drm_gpusvm_range *range;
628
629 if (gpusvm->ops->range_alloc)
630 range = gpusvm->ops->range_alloc(gpusvm);
631 else
632 range = kzalloc(sizeof(*range), GFP_KERNEL);
633
634 if (!range)
635 return ERR_PTR(-ENOMEM);
636
637 kref_init(&range->refcount);
638 range->gpusvm = gpusvm;
639 range->notifier = notifier;
640 range->itree.start = ALIGN_DOWN(fault_addr, chunk_size);
641 range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1;
642 INIT_LIST_HEAD(&range->entry);
643 range->pages.notifier_seq = LONG_MAX;
644 range->pages.flags.migrate_devmem = migrate_devmem ? 1 : 0;
645
646 return range;
647 }
648
649 /**
650 * drm_gpusvm_hmm_pfn_to_order() - Get the largest CPU mapping order.
651 * @hmm_pfn: The current hmm_pfn.
652 * @hmm_pfn_index: Index of the @hmm_pfn within the pfn array.
653 * @npages: Number of pages within the pfn array i.e the hmm range size.
654 *
655 * To allow skipping PFNs with the same flags (like when they belong to
656 * the same huge PTE) when looping over the pfn array, take a given a hmm_pfn,
657 * and return the largest order that will fit inside the CPU PTE, but also
658 * crucially accounting for the original hmm range boundaries.
659 *
660 * Return: The largest order that will safely fit within the size of the hmm_pfn
661 * CPU PTE.
662 */
drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn,unsigned long hmm_pfn_index,unsigned long npages)663 static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn,
664 unsigned long hmm_pfn_index,
665 unsigned long npages)
666 {
667 unsigned long size;
668
669 size = 1UL << hmm_pfn_to_map_order(hmm_pfn);
670 size -= (hmm_pfn & ~HMM_PFN_FLAGS) & (size - 1);
671 hmm_pfn_index += size;
672 if (hmm_pfn_index > npages)
673 size -= (hmm_pfn_index - npages);
674
675 return ilog2(size);
676 }
677
678 /**
679 * drm_gpusvm_check_pages() - Check pages
680 * @gpusvm: Pointer to the GPU SVM structure
681 * @notifier: Pointer to the GPU SVM notifier structure
682 * @start: Start address
683 * @end: End address
684 * @dev_private_owner: The device private page owner
685 *
686 * Check if pages between start and end have been faulted in on the CPU. Use to
687 * prevent migration of pages without CPU backing store.
688 *
689 * Return: True if pages have been faulted into CPU, False otherwise
690 */
drm_gpusvm_check_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,unsigned long start,unsigned long end,void * dev_private_owner)691 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm,
692 struct drm_gpusvm_notifier *notifier,
693 unsigned long start, unsigned long end,
694 void *dev_private_owner)
695 {
696 struct hmm_range hmm_range = {
697 .default_flags = 0,
698 .notifier = ¬ifier->notifier,
699 .start = start,
700 .end = end,
701 .dev_private_owner = dev_private_owner,
702 };
703 unsigned long timeout =
704 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
705 unsigned long *pfns;
706 unsigned long npages = npages_in_range(start, end);
707 int err, i;
708
709 mmap_assert_locked(gpusvm->mm);
710
711 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
712 if (!pfns)
713 return false;
714
715 hmm_range.notifier_seq = mmu_interval_read_begin(¬ifier->notifier);
716 hmm_range.hmm_pfns = pfns;
717
718 while (true) {
719 err = hmm_range_fault(&hmm_range);
720 if (err == -EBUSY) {
721 if (time_after(jiffies, timeout))
722 break;
723
724 hmm_range.notifier_seq =
725 mmu_interval_read_begin(¬ifier->notifier);
726 continue;
727 }
728 break;
729 }
730 if (err)
731 goto err_free;
732
733 for (i = 0; i < npages;) {
734 if (!(pfns[i] & HMM_PFN_VALID)) {
735 err = -EFAULT;
736 goto err_free;
737 }
738 i += 0x1 << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages);
739 }
740
741 err_free:
742 kvfree(pfns);
743 return err ? false : true;
744 }
745
746 /**
747 * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range
748 * @gpusvm: Pointer to the GPU SVM structure
749 * @notifier: Pointer to the GPU SVM notifier structure
750 * @vas: Pointer to the virtual memory area structure
751 * @fault_addr: Fault address
752 * @gpuva_start: Start address of GPUVA which mirrors CPU
753 * @gpuva_end: End address of GPUVA which mirrors CPU
754 * @check_pages_threshold: Check CPU pages for present threshold
755 * @dev_private_owner: The device private page owner
756 *
757 * This function determines the chunk size for the GPU SVM range based on the
758 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual
759 * memory area boundaries.
760 *
761 * Return: Chunk size on success, LONG_MAX on failure.
762 */
763 static unsigned long
drm_gpusvm_range_chunk_size(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,struct vm_area_struct * vas,unsigned long fault_addr,unsigned long gpuva_start,unsigned long gpuva_end,unsigned long check_pages_threshold,void * dev_private_owner)764 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm,
765 struct drm_gpusvm_notifier *notifier,
766 struct vm_area_struct *vas,
767 unsigned long fault_addr,
768 unsigned long gpuva_start,
769 unsigned long gpuva_end,
770 unsigned long check_pages_threshold,
771 void *dev_private_owner)
772 {
773 unsigned long start, end;
774 int i = 0;
775
776 retry:
777 for (; i < gpusvm->num_chunks; ++i) {
778 start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]);
779 end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]);
780
781 if (start >= vas->vm_start && end <= vas->vm_end &&
782 start >= drm_gpusvm_notifier_start(notifier) &&
783 end <= drm_gpusvm_notifier_end(notifier) &&
784 start >= gpuva_start && end <= gpuva_end)
785 break;
786 }
787
788 if (i == gpusvm->num_chunks)
789 return LONG_MAX;
790
791 /*
792 * If allocation more than page, ensure not to overlap with existing
793 * ranges.
794 */
795 if (end - start != SZ_4K) {
796 struct drm_gpusvm_range *range;
797
798 range = drm_gpusvm_range_find(notifier, start, end);
799 if (range) {
800 ++i;
801 goto retry;
802 }
803
804 /*
805 * XXX: Only create range on pages CPU has faulted in. Without
806 * this check, or prefault, on BMG 'xe_exec_system_allocator --r
807 * process-many-malloc' fails. In the failure case, each process
808 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM
809 * ranges. When migrating the SVM ranges, some processes fail in
810 * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages'
811 * and then upon drm_gpusvm_range_get_pages device pages from
812 * other processes are collected + faulted in which creates all
813 * sorts of problems. Unsure exactly how this happening, also
814 * problem goes away if 'xe_exec_system_allocator --r
815 * process-many-malloc' mallocs at least 64k at a time.
816 */
817 if (end - start <= check_pages_threshold &&
818 !drm_gpusvm_check_pages(gpusvm, notifier, start, end, dev_private_owner)) {
819 ++i;
820 goto retry;
821 }
822 }
823
824 return end - start;
825 }
826
827 #ifdef CONFIG_LOCKDEP
828 /**
829 * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held
830 * @gpusvm: Pointer to the GPU SVM structure.
831 *
832 * Ensure driver lock is held.
833 */
drm_gpusvm_driver_lock_held(struct drm_gpusvm * gpusvm)834 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm)
835 {
836 if ((gpusvm)->lock_dep_map)
837 lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0));
838 }
839 #else
drm_gpusvm_driver_lock_held(struct drm_gpusvm * gpusvm)840 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm)
841 {
842 }
843 #endif
844
845 /**
846 * drm_gpusvm_find_vma_start() - Find start address for first VMA in range
847 * @gpusvm: Pointer to the GPU SVM structure
848 * @start: The inclusive start user address.
849 * @end: The exclusive end user address.
850 *
851 * Returns: The start address of first VMA within the provided range,
852 * ULONG_MAX otherwise. Assumes start_addr < end_addr.
853 */
854 unsigned long
drm_gpusvm_find_vma_start(struct drm_gpusvm * gpusvm,unsigned long start,unsigned long end)855 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm,
856 unsigned long start,
857 unsigned long end)
858 {
859 struct mm_struct *mm = gpusvm->mm;
860 struct vm_area_struct *vma;
861 unsigned long addr = ULONG_MAX;
862
863 if (!mmget_not_zero(mm))
864 return addr;
865
866 mmap_read_lock(mm);
867
868 vma = find_vma_intersection(mm, start, end);
869 if (vma)
870 addr = vma->vm_start;
871
872 mmap_read_unlock(mm);
873 mmput(mm);
874
875 return addr;
876 }
877 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start);
878
879 /**
880 * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range
881 * @gpusvm: Pointer to the GPU SVM structure
882 * @fault_addr: Fault address
883 * @gpuva_start: Start address of GPUVA which mirrors CPU
884 * @gpuva_end: End address of GPUVA which mirrors CPU
885 * @ctx: GPU SVM context
886 *
887 * This function finds or inserts a newly allocated a GPU SVM range based on the
888 * fault address. Caller must hold a lock to protect range lookup and insertion.
889 *
890 * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure.
891 */
892 struct drm_gpusvm_range *
drm_gpusvm_range_find_or_insert(struct drm_gpusvm * gpusvm,unsigned long fault_addr,unsigned long gpuva_start,unsigned long gpuva_end,const struct drm_gpusvm_ctx * ctx)893 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm,
894 unsigned long fault_addr,
895 unsigned long gpuva_start,
896 unsigned long gpuva_end,
897 const struct drm_gpusvm_ctx *ctx)
898 {
899 struct drm_gpusvm_notifier *notifier;
900 struct drm_gpusvm_range *range;
901 struct mm_struct *mm = gpusvm->mm;
902 struct vm_area_struct *vas;
903 bool notifier_alloc = false;
904 unsigned long chunk_size;
905 int err;
906 bool migrate_devmem;
907
908 drm_gpusvm_driver_lock_held(gpusvm);
909
910 if (fault_addr < gpusvm->mm_start ||
911 fault_addr > gpusvm->mm_start + gpusvm->mm_range)
912 return ERR_PTR(-EINVAL);
913
914 if (!mmget_not_zero(mm))
915 return ERR_PTR(-EFAULT);
916
917 notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr, fault_addr + 1);
918 if (!notifier) {
919 notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr);
920 if (IS_ERR(notifier)) {
921 err = PTR_ERR(notifier);
922 goto err_mmunlock;
923 }
924 notifier_alloc = true;
925 err = mmu_interval_notifier_insert(¬ifier->notifier,
926 mm,
927 drm_gpusvm_notifier_start(notifier),
928 drm_gpusvm_notifier_size(notifier),
929 &drm_gpusvm_notifier_ops);
930 if (err)
931 goto err_notifier;
932 }
933
934 mmap_read_lock(mm);
935
936 vas = vma_lookup(mm, fault_addr);
937 if (!vas) {
938 err = -ENOENT;
939 goto err_notifier_remove;
940 }
941
942 if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) {
943 err = -EPERM;
944 goto err_notifier_remove;
945 }
946
947 range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1);
948 if (range)
949 goto out_mmunlock;
950 /*
951 * XXX: Short-circuiting migration based on migrate_vma_* current
952 * limitations. If/when migrate_vma_* add more support, this logic will
953 * have to change.
954 */
955 migrate_devmem = ctx->devmem_possible &&
956 vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas);
957
958 chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas,
959 fault_addr, gpuva_start,
960 gpuva_end,
961 ctx->check_pages_threshold,
962 ctx->device_private_page_owner);
963 if (chunk_size == LONG_MAX) {
964 err = -EINVAL;
965 goto err_notifier_remove;
966 }
967
968 range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size,
969 migrate_devmem);
970 if (IS_ERR(range)) {
971 err = PTR_ERR(range);
972 goto err_notifier_remove;
973 }
974
975 drm_gpusvm_range_insert(notifier, range);
976 if (notifier_alloc)
977 drm_gpusvm_notifier_insert(gpusvm, notifier);
978
979 out_mmunlock:
980 mmap_read_unlock(mm);
981 mmput(mm);
982
983 return range;
984
985 err_notifier_remove:
986 mmap_read_unlock(mm);
987 if (notifier_alloc)
988 mmu_interval_notifier_remove(¬ifier->notifier);
989 err_notifier:
990 if (notifier_alloc)
991 drm_gpusvm_notifier_free(gpusvm, notifier);
992 err_mmunlock:
993 mmput(mm);
994 return ERR_PTR(err);
995 }
996 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert);
997
998 /**
999 * __drm_gpusvm_unmap_pages() - Unmap pages associated with GPU SVM pages (internal)
1000 * @gpusvm: Pointer to the GPU SVM structure
1001 * @svm_pages: Pointer to the GPU SVM pages structure
1002 * @npages: Number of pages to unmap
1003 *
1004 * This function unmap pages associated with a GPU SVM pages struct. Assumes and
1005 * asserts correct locking is in place when called.
1006 */
__drm_gpusvm_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages,unsigned long npages)1007 static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm,
1008 struct drm_gpusvm_pages *svm_pages,
1009 unsigned long npages)
1010 {
1011 struct drm_pagemap *dpagemap = svm_pages->dpagemap;
1012 struct device *dev = gpusvm->drm->dev;
1013 unsigned long i, j;
1014
1015 lockdep_assert_held(&gpusvm->notifier_lock);
1016
1017 if (svm_pages->flags.has_dma_mapping) {
1018 struct drm_gpusvm_pages_flags flags = {
1019 .__flags = svm_pages->flags.__flags,
1020 };
1021
1022 for (i = 0, j = 0; i < npages; j++) {
1023 struct drm_pagemap_addr *addr = &svm_pages->dma_addr[j];
1024
1025 if (addr->proto == DRM_INTERCONNECT_SYSTEM)
1026 dma_unmap_page(dev,
1027 addr->addr,
1028 PAGE_SIZE << addr->order,
1029 addr->dir);
1030 else if (dpagemap && dpagemap->ops->device_unmap)
1031 dpagemap->ops->device_unmap(dpagemap,
1032 dev, *addr);
1033 i += 1 << addr->order;
1034 }
1035
1036 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
1037 flags.has_devmem_pages = false;
1038 flags.has_dma_mapping = false;
1039 WRITE_ONCE(svm_pages->flags.__flags, flags.__flags);
1040
1041 svm_pages->dpagemap = NULL;
1042 }
1043 }
1044
1045 /**
1046 * __drm_gpusvm_free_pages() - Free dma array associated with GPU SVM pages
1047 * @gpusvm: Pointer to the GPU SVM structure
1048 * @svm_pages: Pointer to the GPU SVM pages structure
1049 *
1050 * This function frees the dma address array associated with a GPU SVM range.
1051 */
__drm_gpusvm_free_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages)1052 static void __drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm,
1053 struct drm_gpusvm_pages *svm_pages)
1054 {
1055 lockdep_assert_held(&gpusvm->notifier_lock);
1056
1057 if (svm_pages->dma_addr) {
1058 kvfree(svm_pages->dma_addr);
1059 svm_pages->dma_addr = NULL;
1060 }
1061 }
1062
1063 /**
1064 * drm_gpusvm_free_pages() - Free dma-mapping associated with GPU SVM pages
1065 * struct
1066 * @gpusvm: Pointer to the GPU SVM structure
1067 * @svm_pages: Pointer to the GPU SVM pages structure
1068 * @npages: Number of mapped pages
1069 *
1070 * This function unmaps and frees the dma address array associated with a GPU
1071 * SVM pages struct.
1072 */
drm_gpusvm_free_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages,unsigned long npages)1073 void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm,
1074 struct drm_gpusvm_pages *svm_pages,
1075 unsigned long npages)
1076 {
1077 drm_gpusvm_notifier_lock(gpusvm);
1078 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages);
1079 __drm_gpusvm_free_pages(gpusvm, svm_pages);
1080 drm_gpusvm_notifier_unlock(gpusvm);
1081 }
1082 EXPORT_SYMBOL_GPL(drm_gpusvm_free_pages);
1083
1084 /**
1085 * drm_gpusvm_range_remove() - Remove GPU SVM range
1086 * @gpusvm: Pointer to the GPU SVM structure
1087 * @range: Pointer to the GPU SVM range to be removed
1088 *
1089 * This function removes the specified GPU SVM range and also removes the parent
1090 * GPU SVM notifier if no more ranges remain in the notifier. The caller must
1091 * hold a lock to protect range and notifier removal.
1092 */
drm_gpusvm_range_remove(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1093 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm,
1094 struct drm_gpusvm_range *range)
1095 {
1096 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1097 drm_gpusvm_range_end(range));
1098 struct drm_gpusvm_notifier *notifier;
1099
1100 drm_gpusvm_driver_lock_held(gpusvm);
1101
1102 notifier = drm_gpusvm_notifier_find(gpusvm,
1103 drm_gpusvm_range_start(range),
1104 drm_gpusvm_range_start(range) + 1);
1105 if (WARN_ON_ONCE(!notifier))
1106 return;
1107
1108 drm_gpusvm_notifier_lock(gpusvm);
1109 __drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages);
1110 __drm_gpusvm_free_pages(gpusvm, &range->pages);
1111 __drm_gpusvm_range_remove(notifier, range);
1112 drm_gpusvm_notifier_unlock(gpusvm);
1113
1114 drm_gpusvm_range_put(range);
1115
1116 if (RB_EMPTY_ROOT(¬ifier->root.rb_root)) {
1117 if (!notifier->flags.removed)
1118 mmu_interval_notifier_remove(¬ifier->notifier);
1119 drm_gpusvm_notifier_remove(gpusvm, notifier);
1120 drm_gpusvm_notifier_free(gpusvm, notifier);
1121 }
1122 }
1123 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove);
1124
1125 /**
1126 * drm_gpusvm_range_get() - Get a reference to GPU SVM range
1127 * @range: Pointer to the GPU SVM range
1128 *
1129 * This function increments the reference count of the specified GPU SVM range.
1130 *
1131 * Return: Pointer to the GPU SVM range.
1132 */
1133 struct drm_gpusvm_range *
drm_gpusvm_range_get(struct drm_gpusvm_range * range)1134 drm_gpusvm_range_get(struct drm_gpusvm_range *range)
1135 {
1136 kref_get(&range->refcount);
1137
1138 return range;
1139 }
1140 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get);
1141
1142 /**
1143 * drm_gpusvm_range_destroy() - Destroy GPU SVM range
1144 * @refcount: Pointer to the reference counter embedded in the GPU SVM range
1145 *
1146 * This function destroys the specified GPU SVM range when its reference count
1147 * reaches zero. If a custom range-free function is provided, it is invoked to
1148 * free the range; otherwise, the range is deallocated using kfree().
1149 */
drm_gpusvm_range_destroy(struct kref * refcount)1150 static void drm_gpusvm_range_destroy(struct kref *refcount)
1151 {
1152 struct drm_gpusvm_range *range =
1153 container_of(refcount, struct drm_gpusvm_range, refcount);
1154 struct drm_gpusvm *gpusvm = range->gpusvm;
1155
1156 if (gpusvm->ops->range_free)
1157 gpusvm->ops->range_free(range);
1158 else
1159 kfree(range);
1160 }
1161
1162 /**
1163 * drm_gpusvm_range_put() - Put a reference to GPU SVM range
1164 * @range: Pointer to the GPU SVM range
1165 *
1166 * This function decrements the reference count of the specified GPU SVM range
1167 * and frees it when the count reaches zero.
1168 */
drm_gpusvm_range_put(struct drm_gpusvm_range * range)1169 void drm_gpusvm_range_put(struct drm_gpusvm_range *range)
1170 {
1171 kref_put(&range->refcount, drm_gpusvm_range_destroy);
1172 }
1173 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put);
1174
1175 /**
1176 * drm_gpusvm_pages_valid() - GPU SVM range pages valid
1177 * @gpusvm: Pointer to the GPU SVM structure
1178 * @svm_pages: Pointer to the GPU SVM pages structure
1179 *
1180 * This function determines if a GPU SVM range pages are valid. Expected be
1181 * called holding gpusvm->notifier_lock and as the last step before committing a
1182 * GPU binding. This is akin to a notifier seqno check in the HMM documentation
1183 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this
1184 * function is required for finer grained checking (i.e., per range) if pages
1185 * are valid.
1186 *
1187 * Return: True if GPU SVM range has valid pages, False otherwise
1188 */
drm_gpusvm_pages_valid(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages)1189 static bool drm_gpusvm_pages_valid(struct drm_gpusvm *gpusvm,
1190 struct drm_gpusvm_pages *svm_pages)
1191 {
1192 lockdep_assert_held(&gpusvm->notifier_lock);
1193
1194 return svm_pages->flags.has_devmem_pages || svm_pages->flags.has_dma_mapping;
1195 }
1196
1197 /**
1198 * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid
1199 * @gpusvm: Pointer to the GPU SVM structure
1200 * @range: Pointer to the GPU SVM range structure
1201 *
1202 * This function determines if a GPU SVM range pages are valid. Expected be
1203 * called holding gpusvm->notifier_lock and as the last step before committing a
1204 * GPU binding. This is akin to a notifier seqno check in the HMM documentation
1205 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this
1206 * function is required for finer grained checking (i.e., per range) if pages
1207 * are valid.
1208 *
1209 * Return: True if GPU SVM range has valid pages, False otherwise
1210 */
drm_gpusvm_range_pages_valid(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1211 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm,
1212 struct drm_gpusvm_range *range)
1213 {
1214 return drm_gpusvm_pages_valid(gpusvm, &range->pages);
1215 }
1216 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid);
1217
1218 /**
1219 * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked
1220 * @gpusvm: Pointer to the GPU SVM structure
1221 * @range: Pointer to the GPU SVM range structure
1222 *
1223 * This function determines if a GPU SVM range pages are valid. Expected be
1224 * called without holding gpusvm->notifier_lock.
1225 *
1226 * Return: True if GPU SVM range has valid pages, False otherwise
1227 */
drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages)1228 static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm,
1229 struct drm_gpusvm_pages *svm_pages)
1230 {
1231 bool pages_valid;
1232
1233 if (!svm_pages->dma_addr)
1234 return false;
1235
1236 drm_gpusvm_notifier_lock(gpusvm);
1237 pages_valid = drm_gpusvm_pages_valid(gpusvm, svm_pages);
1238 if (!pages_valid)
1239 __drm_gpusvm_free_pages(gpusvm, svm_pages);
1240 drm_gpusvm_notifier_unlock(gpusvm);
1241
1242 return pages_valid;
1243 }
1244
1245 /**
1246 * drm_gpusvm_get_pages() - Get pages and populate GPU SVM pages struct
1247 * @gpusvm: Pointer to the GPU SVM structure
1248 * @svm_pages: The SVM pages to populate. This will contain the dma-addresses
1249 * @mm: The mm corresponding to the CPU range
1250 * @notifier: The corresponding notifier for the given CPU range
1251 * @pages_start: Start CPU address for the pages
1252 * @pages_end: End CPU address for the pages (exclusive)
1253 * @ctx: GPU SVM context
1254 *
1255 * This function gets and maps pages for CPU range and ensures they are
1256 * mapped for DMA access.
1257 *
1258 * Return: 0 on success, negative error code on failure.
1259 */
drm_gpusvm_get_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages,struct mm_struct * mm,struct mmu_interval_notifier * notifier,unsigned long pages_start,unsigned long pages_end,const struct drm_gpusvm_ctx * ctx)1260 int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
1261 struct drm_gpusvm_pages *svm_pages,
1262 struct mm_struct *mm,
1263 struct mmu_interval_notifier *notifier,
1264 unsigned long pages_start, unsigned long pages_end,
1265 const struct drm_gpusvm_ctx *ctx)
1266 {
1267 struct hmm_range hmm_range = {
1268 .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 :
1269 HMM_PFN_REQ_WRITE),
1270 .notifier = notifier,
1271 .start = pages_start,
1272 .end = pages_end,
1273 .dev_private_owner = ctx->device_private_page_owner,
1274 };
1275 void *zdd;
1276 unsigned long timeout =
1277 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1278 unsigned long i, j;
1279 unsigned long npages = npages_in_range(pages_start, pages_end);
1280 unsigned long num_dma_mapped;
1281 unsigned int order = 0;
1282 unsigned long *pfns;
1283 int err = 0;
1284 struct dev_pagemap *pagemap;
1285 struct drm_pagemap *dpagemap;
1286 struct drm_gpusvm_pages_flags flags;
1287 enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE :
1288 DMA_BIDIRECTIONAL;
1289
1290 retry:
1291 hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
1292 if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages))
1293 goto set_seqno;
1294
1295 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
1296 if (!pfns)
1297 return -ENOMEM;
1298
1299 if (!mmget_not_zero(mm)) {
1300 err = -EFAULT;
1301 goto err_free;
1302 }
1303
1304 hmm_range.hmm_pfns = pfns;
1305 while (true) {
1306 mmap_read_lock(mm);
1307 err = hmm_range_fault(&hmm_range);
1308 mmap_read_unlock(mm);
1309
1310 if (err == -EBUSY) {
1311 if (time_after(jiffies, timeout))
1312 break;
1313
1314 hmm_range.notifier_seq =
1315 mmu_interval_read_begin(notifier);
1316 continue;
1317 }
1318 break;
1319 }
1320 mmput(mm);
1321 if (err)
1322 goto err_free;
1323
1324 map_pages:
1325 /*
1326 * Perform all dma mappings under the notifier lock to not
1327 * access freed pages. A notifier will either block on
1328 * the notifier lock or unmap dma.
1329 */
1330 drm_gpusvm_notifier_lock(gpusvm);
1331
1332 flags.__flags = svm_pages->flags.__flags;
1333 if (flags.unmapped) {
1334 drm_gpusvm_notifier_unlock(gpusvm);
1335 err = -EFAULT;
1336 goto err_free;
1337 }
1338
1339 if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) {
1340 drm_gpusvm_notifier_unlock(gpusvm);
1341 kvfree(pfns);
1342 goto retry;
1343 }
1344
1345 if (!svm_pages->dma_addr) {
1346 /* Unlock and restart mapping to allocate memory. */
1347 drm_gpusvm_notifier_unlock(gpusvm);
1348 svm_pages->dma_addr =
1349 kvmalloc_array(npages, sizeof(*svm_pages->dma_addr), GFP_KERNEL);
1350 if (!svm_pages->dma_addr) {
1351 err = -ENOMEM;
1352 goto err_free;
1353 }
1354 goto map_pages;
1355 }
1356
1357 zdd = NULL;
1358 pagemap = NULL;
1359 num_dma_mapped = 0;
1360 for (i = 0, j = 0; i < npages; ++j) {
1361 struct page *page = hmm_pfn_to_page(pfns[i]);
1362
1363 order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages);
1364 if (is_device_private_page(page) ||
1365 is_device_coherent_page(page)) {
1366 if (zdd != page->zone_device_data && i > 0) {
1367 err = -EOPNOTSUPP;
1368 goto err_unmap;
1369 }
1370 zdd = page->zone_device_data;
1371 if (pagemap != page_pgmap(page)) {
1372 if (i > 0) {
1373 err = -EOPNOTSUPP;
1374 goto err_unmap;
1375 }
1376
1377 pagemap = page_pgmap(page);
1378 dpagemap = drm_pagemap_page_to_dpagemap(page);
1379 if (drm_WARN_ON(gpusvm->drm, !dpagemap)) {
1380 /*
1381 * Raced. This is not supposed to happen
1382 * since hmm_range_fault() should've migrated
1383 * this page to system.
1384 */
1385 err = -EAGAIN;
1386 goto err_unmap;
1387 }
1388 }
1389 svm_pages->dma_addr[j] =
1390 dpagemap->ops->device_map(dpagemap,
1391 gpusvm->drm->dev,
1392 page, order,
1393 dma_dir);
1394 if (dma_mapping_error(gpusvm->drm->dev,
1395 svm_pages->dma_addr[j].addr)) {
1396 err = -EFAULT;
1397 goto err_unmap;
1398 }
1399 } else {
1400 dma_addr_t addr;
1401
1402 if (is_zone_device_page(page) || pagemap) {
1403 err = -EOPNOTSUPP;
1404 goto err_unmap;
1405 }
1406
1407 if (ctx->devmem_only) {
1408 err = -EFAULT;
1409 goto err_unmap;
1410 }
1411
1412 addr = dma_map_page(gpusvm->drm->dev,
1413 page, 0,
1414 PAGE_SIZE << order,
1415 dma_dir);
1416 if (dma_mapping_error(gpusvm->drm->dev, addr)) {
1417 err = -EFAULT;
1418 goto err_unmap;
1419 }
1420
1421 svm_pages->dma_addr[j] = drm_pagemap_addr_encode
1422 (addr, DRM_INTERCONNECT_SYSTEM, order,
1423 dma_dir);
1424 }
1425 i += 1 << order;
1426 num_dma_mapped = i;
1427 flags.has_dma_mapping = true;
1428 }
1429
1430 if (pagemap) {
1431 flags.has_devmem_pages = true;
1432 svm_pages->dpagemap = dpagemap;
1433 }
1434
1435 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
1436 WRITE_ONCE(svm_pages->flags.__flags, flags.__flags);
1437
1438 drm_gpusvm_notifier_unlock(gpusvm);
1439 kvfree(pfns);
1440 set_seqno:
1441 svm_pages->notifier_seq = hmm_range.notifier_seq;
1442
1443 return 0;
1444
1445 err_unmap:
1446 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped);
1447 drm_gpusvm_notifier_unlock(gpusvm);
1448 err_free:
1449 kvfree(pfns);
1450 if (err == -EAGAIN)
1451 goto retry;
1452 return err;
1453 }
1454 EXPORT_SYMBOL_GPL(drm_gpusvm_get_pages);
1455
1456 /**
1457 * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range
1458 * @gpusvm: Pointer to the GPU SVM structure
1459 * @range: Pointer to the GPU SVM range structure
1460 * @ctx: GPU SVM context
1461 *
1462 * This function gets pages for a GPU SVM range and ensures they are mapped for
1463 * DMA access.
1464 *
1465 * Return: 0 on success, negative error code on failure.
1466 */
drm_gpusvm_range_get_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,const struct drm_gpusvm_ctx * ctx)1467 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
1468 struct drm_gpusvm_range *range,
1469 const struct drm_gpusvm_ctx *ctx)
1470 {
1471 return drm_gpusvm_get_pages(gpusvm, &range->pages, gpusvm->mm,
1472 &range->notifier->notifier,
1473 drm_gpusvm_range_start(range),
1474 drm_gpusvm_range_end(range), ctx);
1475 }
1476 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages);
1477
1478 /**
1479 * drm_gpusvm_unmap_pages() - Unmap GPU svm pages
1480 * @gpusvm: Pointer to the GPU SVM structure
1481 * @svm_pages: Pointer to the GPU SVM pages structure
1482 * @npages: Number of pages in @svm_pages.
1483 * @ctx: GPU SVM context
1484 *
1485 * This function unmaps pages associated with a GPU SVM pages struct. If
1486 * @in_notifier is set, it is assumed that gpusvm->notifier_lock is held in
1487 * write mode; if it is clear, it acquires gpusvm->notifier_lock in read mode.
1488 * Must be called in the invalidate() callback of the corresponding notifier for
1489 * IOMMU security model.
1490 */
drm_gpusvm_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_pages * svm_pages,unsigned long npages,const struct drm_gpusvm_ctx * ctx)1491 void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm,
1492 struct drm_gpusvm_pages *svm_pages,
1493 unsigned long npages,
1494 const struct drm_gpusvm_ctx *ctx)
1495 {
1496 if (ctx->in_notifier)
1497 lockdep_assert_held_write(&gpusvm->notifier_lock);
1498 else
1499 drm_gpusvm_notifier_lock(gpusvm);
1500
1501 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages);
1502
1503 if (!ctx->in_notifier)
1504 drm_gpusvm_notifier_unlock(gpusvm);
1505 }
1506 EXPORT_SYMBOL_GPL(drm_gpusvm_unmap_pages);
1507
1508 /**
1509 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range
1510 * @gpusvm: Pointer to the GPU SVM structure
1511 * @range: Pointer to the GPU SVM range structure
1512 * @ctx: GPU SVM context
1513 *
1514 * This function unmaps pages associated with a GPU SVM range. If @in_notifier
1515 * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it
1516 * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on
1517 * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU
1518 * security model.
1519 */
drm_gpusvm_range_unmap_pages(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range,const struct drm_gpusvm_ctx * ctx)1520 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
1521 struct drm_gpusvm_range *range,
1522 const struct drm_gpusvm_ctx *ctx)
1523 {
1524 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1525 drm_gpusvm_range_end(range));
1526
1527 return drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages, ctx);
1528 }
1529 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages);
1530
1531 /**
1532 * drm_gpusvm_range_evict() - Evict GPU SVM range
1533 * @gpusvm: Pointer to the GPU SVM structure
1534 * @range: Pointer to the GPU SVM range to be removed
1535 *
1536 * This function evicts the specified GPU SVM range.
1537 *
1538 * Return: 0 on success, a negative error code on failure.
1539 */
drm_gpusvm_range_evict(struct drm_gpusvm * gpusvm,struct drm_gpusvm_range * range)1540 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm,
1541 struct drm_gpusvm_range *range)
1542 {
1543 struct mmu_interval_notifier *notifier = &range->notifier->notifier;
1544 struct hmm_range hmm_range = {
1545 .default_flags = HMM_PFN_REQ_FAULT,
1546 .notifier = notifier,
1547 .start = drm_gpusvm_range_start(range),
1548 .end = drm_gpusvm_range_end(range),
1549 .dev_private_owner = NULL,
1550 };
1551 unsigned long timeout =
1552 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1553 unsigned long *pfns;
1554 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
1555 drm_gpusvm_range_end(range));
1556 int err = 0;
1557 struct mm_struct *mm = gpusvm->mm;
1558
1559 if (!mmget_not_zero(mm))
1560 return -EFAULT;
1561
1562 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
1563 if (!pfns)
1564 return -ENOMEM;
1565
1566 hmm_range.hmm_pfns = pfns;
1567 while (!time_after(jiffies, timeout)) {
1568 hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
1569 if (time_after(jiffies, timeout)) {
1570 err = -ETIME;
1571 break;
1572 }
1573
1574 mmap_read_lock(mm);
1575 err = hmm_range_fault(&hmm_range);
1576 mmap_read_unlock(mm);
1577 if (err != -EBUSY)
1578 break;
1579 }
1580
1581 kvfree(pfns);
1582 mmput(mm);
1583
1584 return err;
1585 }
1586 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict);
1587
1588 /**
1589 * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range
1590 * @gpusvm: Pointer to the GPU SVM structure.
1591 * @start: Start address
1592 * @end: End address
1593 *
1594 * Return: True if GPU SVM has mapping, False otherwise
1595 */
drm_gpusvm_has_mapping(struct drm_gpusvm * gpusvm,unsigned long start,unsigned long end)1596 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start,
1597 unsigned long end)
1598 {
1599 struct drm_gpusvm_notifier *notifier;
1600
1601 drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) {
1602 struct drm_gpusvm_range *range = NULL;
1603
1604 drm_gpusvm_for_each_range(range, notifier, start, end)
1605 return true;
1606 }
1607
1608 return false;
1609 }
1610 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping);
1611
1612 /**
1613 * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped
1614 * @range: Pointer to the GPU SVM range structure.
1615 * @mmu_range: Pointer to the MMU notifier range structure.
1616 *
1617 * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag
1618 * if the range partially falls within the provided MMU notifier range.
1619 */
drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range * range,const struct mmu_notifier_range * mmu_range)1620 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range,
1621 const struct mmu_notifier_range *mmu_range)
1622 {
1623 lockdep_assert_held_write(&range->gpusvm->notifier_lock);
1624
1625 range->pages.flags.unmapped = true;
1626 if (drm_gpusvm_range_start(range) < mmu_range->start ||
1627 drm_gpusvm_range_end(range) > mmu_range->end)
1628 range->pages.flags.partial_unmap = true;
1629 }
1630 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped);
1631
1632 MODULE_DESCRIPTION("DRM GPUSVM");
1633 MODULE_LICENSE("GPL");
1634