1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 * 5 * Authors: 6 * Matthew Brost <matthew.brost@intel.com> 7 */ 8 9 #include <linux/dma-mapping.h> 10 #include <linux/export.h> 11 #include <linux/hmm.h> 12 #include <linux/hugetlb_inline.h> 13 #include <linux/memremap.h> 14 #include <linux/mm_types.h> 15 #include <linux/slab.h> 16 17 #include <drm/drm_device.h> 18 #include <drm/drm_gpusvm.h> 19 #include <drm/drm_pagemap.h> 20 #include <drm/drm_print.h> 21 22 /** 23 * DOC: Overview 24 * 25 * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM) 26 * is a component of the DRM framework designed to manage shared virtual memory 27 * between the CPU and GPU. It enables efficient data exchange and processing 28 * for GPU-accelerated applications by allowing memory sharing and 29 * synchronization between the CPU's and GPU's virtual address spaces. 30 * 31 * Key GPU SVM Components: 32 * 33 * - Notifiers: 34 * Used for tracking memory intervals and notifying the GPU of changes, 35 * notifiers are sized based on a GPU SVM initialization parameter, with a 36 * recommendation of 512M or larger. They maintain a Red-BlacK tree and a 37 * list of ranges that fall within the notifier interval. Notifiers are 38 * tracked within a GPU SVM Red-BlacK tree and list and are dynamically 39 * inserted or removed as ranges within the interval are created or 40 * destroyed. 41 * - Ranges: 42 * Represent memory ranges mapped in a DRM device and managed by GPU SVM. 43 * They are sized based on an array of chunk sizes, which is a GPU SVM 44 * initialization parameter, and the CPU address space. Upon GPU fault, 45 * the largest aligned chunk that fits within the faulting CPU address 46 * space is chosen for the range size. Ranges are expected to be 47 * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP 48 * event. As mentioned above, ranges are tracked in a notifier's Red-Black 49 * tree. 50 * 51 * - Operations: 52 * Define the interface for driver-specific GPU SVM operations such as 53 * range allocation, notifier allocation, and invalidations. 54 * 55 * - Device Memory Allocations: 56 * Embedded structure containing enough information for GPU SVM to migrate 57 * to / from device memory. 58 * 59 * - Device Memory Operations: 60 * Define the interface for driver-specific device memory operations 61 * release memory, populate pfns, and copy to / from device memory. 62 * 63 * This layer provides interfaces for allocating, mapping, migrating, and 64 * releasing memory ranges between the CPU and GPU. It handles all core memory 65 * management interactions (DMA mapping, HMM, and migration) and provides 66 * driver-specific virtual functions (vfuncs). This infrastructure is sufficient 67 * to build the expected driver components for an SVM implementation as detailed 68 * below. 69 * 70 * Expected Driver Components: 71 * 72 * - GPU page fault handler: 73 * Used to create ranges and notifiers based on the fault address, 74 * optionally migrate the range to device memory, and create GPU bindings. 75 * 76 * - Garbage collector: 77 * Used to unmap and destroy GPU bindings for ranges. Ranges are expected 78 * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in 79 * notifier callback. 80 * 81 * - Notifier callback: 82 * Used to invalidate and DMA unmap GPU bindings for ranges. 83 */ 84 85 /** 86 * DOC: Locking 87 * 88 * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the 89 * mmap lock as needed. 90 * 91 * GPU SVM introduces a global notifier lock, which safeguards the notifier's 92 * range RB tree and list, as well as the range's DMA mappings and sequence 93 * number. GPU SVM manages all necessary locking and unlocking operations, 94 * except for the recheck range's pages being valid 95 * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. 96 * This lock corresponds to the ``driver->update`` lock mentioned in 97 * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM 98 * global lock to a per-notifier lock if finer-grained locking is deemed 99 * necessary. 100 * 101 * In addition to the locking mentioned above, the driver should implement a 102 * lock to safeguard core GPU SVM function calls that modify state, such as 103 * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is 104 * denoted as 'driver_svm_lock' in code examples. Finer grained driver side 105 * locking should also be possible for concurrent GPU fault processing within a 106 * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock 107 * to add annotations to GPU SVM. 108 */ 109 110 /** 111 * DOC: Partial Unmapping of Ranges 112 * 113 * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting 114 * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one 115 * being that a subset of the range still has CPU and GPU mappings. If the 116 * backing store for the range is in device memory, a subset of the backing 117 * store has references. One option would be to split the range and device 118 * memory backing store, but the implementation for this would be quite 119 * complicated. Given that partial unmappings are rare and driver-defined range 120 * sizes are relatively small, GPU SVM does not support splitting of ranges. 121 * 122 * With no support for range splitting, upon partial unmapping of a range, the 123 * driver is expected to invalidate and destroy the entire range. If the range 124 * has device memory as its backing, the driver is also expected to migrate any 125 * remaining pages back to RAM. 126 */ 127 128 /** 129 * DOC: Examples 130 * 131 * This section provides three examples of how to build the expected driver 132 * components: the GPU page fault handler, the garbage collector, and the 133 * notifier callback. 134 * 135 * The generic code provided does not include logic for complex migration 136 * policies, optimized invalidations, fined grained driver locking, or other 137 * potentially required driver locking (e.g., DMA-resv locks). 138 * 139 * 1) GPU page fault handler 140 * 141 * .. code-block:: c 142 * 143 * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) 144 * { 145 * int err = 0; 146 * 147 * driver_alloc_and_setup_memory_for_bind(gpusvm, range); 148 * 149 * drm_gpusvm_notifier_lock(gpusvm); 150 * if (drm_gpusvm_range_pages_valid(range)) 151 * driver_commit_bind(gpusvm, range); 152 * else 153 * err = -EAGAIN; 154 * drm_gpusvm_notifier_unlock(gpusvm); 155 * 156 * return err; 157 * } 158 * 159 * int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr, 160 * unsigned long gpuva_start, unsigned long gpuva_end) 161 * { 162 * struct drm_gpusvm_ctx ctx = {}; 163 * int err; 164 * 165 * driver_svm_lock(); 166 * retry: 167 * // Always process UNMAPs first so view of GPU SVM ranges is current 168 * driver_garbage_collector(gpusvm); 169 * 170 * range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr, 171 * gpuva_start, gpuva_end, 172 * &ctx); 173 * if (IS_ERR(range)) { 174 * err = PTR_ERR(range); 175 * goto unlock; 176 * } 177 * 178 * if (driver_migration_policy(range)) { 179 * err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(), 180 * gpuva_start, gpuva_end, gpusvm->mm, 181 * ctx->timeslice_ms); 182 * if (err) // CPU mappings may have changed 183 * goto retry; 184 * } 185 * 186 * err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx); 187 * if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { // CPU mappings changed 188 * if (err == -EOPNOTSUPP) 189 * drm_gpusvm_range_evict(gpusvm, range); 190 * goto retry; 191 * } else if (err) { 192 * goto unlock; 193 * } 194 * 195 * err = driver_bind_range(gpusvm, range); 196 * if (err == -EAGAIN) // CPU mappings changed 197 * goto retry 198 * 199 * unlock: 200 * driver_svm_unlock(); 201 * return err; 202 * } 203 * 204 * 2) Garbage Collector 205 * 206 * .. code-block:: c 207 * 208 * void __driver_garbage_collector(struct drm_gpusvm *gpusvm, 209 * struct drm_gpusvm_range *range) 210 * { 211 * assert_driver_svm_locked(gpusvm); 212 * 213 * // Partial unmap, migrate any remaining device memory pages back to RAM 214 * if (range->flags.partial_unmap) 215 * drm_gpusvm_range_evict(gpusvm, range); 216 * 217 * driver_unbind_range(range); 218 * drm_gpusvm_range_remove(gpusvm, range); 219 * } 220 * 221 * void driver_garbage_collector(struct drm_gpusvm *gpusvm) 222 * { 223 * assert_driver_svm_locked(gpusvm); 224 * 225 * for_each_range_in_garbage_collector(gpusvm, range) 226 * __driver_garbage_collector(gpusvm, range); 227 * } 228 * 229 * 3) Notifier callback 230 * 231 * .. code-block:: c 232 * 233 * void driver_invalidation(struct drm_gpusvm *gpusvm, 234 * struct drm_gpusvm_notifier *notifier, 235 * const struct mmu_notifier_range *mmu_range) 236 * { 237 * struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 238 * struct drm_gpusvm_range *range = NULL; 239 * 240 * driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end); 241 * 242 * drm_gpusvm_for_each_range(range, notifier, mmu_range->start, 243 * mmu_range->end) { 244 * drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx); 245 * 246 * if (mmu_range->event != MMU_NOTIFY_UNMAP) 247 * continue; 248 * 249 * drm_gpusvm_range_set_unmapped(range, mmu_range); 250 * driver_garbage_collector_add(gpusvm, range); 251 * } 252 * } 253 */ 254 255 /** 256 * npages_in_range() - Calculate the number of pages in a given range 257 * @start: The start address of the range 258 * @end: The end address of the range 259 * 260 * This macro calculates the number of pages in a given memory range, 261 * specified by the start and end addresses. It divides the difference 262 * between the end and start addresses by the page size (PAGE_SIZE) to 263 * determine the number of pages in the range. 264 * 265 * Return: The number of pages in the specified range. 266 */ 267 static unsigned long 268 npages_in_range(unsigned long start, unsigned long end) 269 { 270 return (end - start) >> PAGE_SHIFT; 271 } 272 273 /** 274 * drm_gpusvm_notifier_find() - Find GPU SVM notifier from GPU SVM 275 * @gpusvm: Pointer to the GPU SVM structure. 276 * @start: Start address of the notifier 277 * @end: End address of the notifier 278 * 279 * Return: A pointer to the drm_gpusvm_notifier if found or NULL 280 */ 281 struct drm_gpusvm_notifier * 282 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, unsigned long start, 283 unsigned long end) 284 { 285 struct interval_tree_node *itree; 286 287 itree = interval_tree_iter_first(&gpusvm->root, start, end - 1); 288 289 if (itree) 290 return container_of(itree, struct drm_gpusvm_notifier, itree); 291 else 292 return NULL; 293 } 294 EXPORT_SYMBOL_GPL(drm_gpusvm_notifier_find); 295 296 /** 297 * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier 298 * @notifier: Pointer to the GPU SVM notifier structure. 299 * @start: Start address of the range 300 * @end: End address of the range 301 * 302 * Return: A pointer to the drm_gpusvm_range if found or NULL 303 */ 304 struct drm_gpusvm_range * 305 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, 306 unsigned long end) 307 { 308 struct interval_tree_node *itree; 309 310 itree = interval_tree_iter_first(¬ifier->root, start, end - 1); 311 312 if (itree) 313 return container_of(itree, struct drm_gpusvm_range, itree); 314 else 315 return NULL; 316 } 317 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find); 318 319 /** 320 * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier. 321 * @mni: Pointer to the mmu_interval_notifier structure. 322 * @mmu_range: Pointer to the mmu_notifier_range structure. 323 * @cur_seq: Current sequence number. 324 * 325 * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU 326 * notifier sequence number and calls the driver invalidate vfunc under 327 * gpusvm->notifier_lock. 328 * 329 * Return: true if the operation succeeds, false otherwise. 330 */ 331 static bool 332 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni, 333 const struct mmu_notifier_range *mmu_range, 334 unsigned long cur_seq) 335 { 336 struct drm_gpusvm_notifier *notifier = 337 container_of(mni, typeof(*notifier), notifier); 338 struct drm_gpusvm *gpusvm = notifier->gpusvm; 339 340 if (!mmu_notifier_range_blockable(mmu_range)) 341 return false; 342 343 down_write(&gpusvm->notifier_lock); 344 mmu_interval_set_seq(mni, cur_seq); 345 gpusvm->ops->invalidate(gpusvm, notifier, mmu_range); 346 up_write(&gpusvm->notifier_lock); 347 348 return true; 349 } 350 351 /* 352 * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM 353 */ 354 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { 355 .invalidate = drm_gpusvm_notifier_invalidate, 356 }; 357 358 /** 359 * drm_gpusvm_init() - Initialize the GPU SVM. 360 * @gpusvm: Pointer to the GPU SVM structure. 361 * @name: Name of the GPU SVM. 362 * @drm: Pointer to the DRM device structure. 363 * @mm: Pointer to the mm_struct for the address space. 364 * @mm_start: Start address of GPU SVM. 365 * @mm_range: Range of the GPU SVM. 366 * @notifier_size: Size of individual notifiers. 367 * @ops: Pointer to the operations structure for GPU SVM. 368 * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation. 369 * Entries should be powers of 2 in descending order with last 370 * entry being SZ_4K. 371 * @num_chunks: Number of chunks. 372 * 373 * This function initializes the GPU SVM. 374 * 375 * Note: If only using the simple drm_gpusvm_pages API (get/unmap/free), 376 * then only @gpusvm, @name, and @drm are expected. However, the same base 377 * @gpusvm can also be used with both modes together in which case the full 378 * setup is needed, where the core drm_gpusvm_pages API will simply never use 379 * the other fields. 380 * 381 * Return: 0 on success, a negative error code on failure. 382 */ 383 int drm_gpusvm_init(struct drm_gpusvm *gpusvm, 384 const char *name, struct drm_device *drm, 385 struct mm_struct *mm, 386 unsigned long mm_start, unsigned long mm_range, 387 unsigned long notifier_size, 388 const struct drm_gpusvm_ops *ops, 389 const unsigned long *chunk_sizes, int num_chunks) 390 { 391 if (mm) { 392 if (!ops->invalidate || !num_chunks) 393 return -EINVAL; 394 mmgrab(mm); 395 } else { 396 /* No full SVM mode, only core drm_gpusvm_pages API. */ 397 if (ops || num_chunks || mm_range || notifier_size) 398 return -EINVAL; 399 } 400 401 gpusvm->name = name; 402 gpusvm->drm = drm; 403 gpusvm->mm = mm; 404 gpusvm->mm_start = mm_start; 405 gpusvm->mm_range = mm_range; 406 gpusvm->notifier_size = notifier_size; 407 gpusvm->ops = ops; 408 gpusvm->chunk_sizes = chunk_sizes; 409 gpusvm->num_chunks = num_chunks; 410 411 gpusvm->root = RB_ROOT_CACHED; 412 INIT_LIST_HEAD(&gpusvm->notifier_list); 413 414 init_rwsem(&gpusvm->notifier_lock); 415 416 fs_reclaim_acquire(GFP_KERNEL); 417 might_lock(&gpusvm->notifier_lock); 418 fs_reclaim_release(GFP_KERNEL); 419 420 #ifdef CONFIG_LOCKDEP 421 gpusvm->lock_dep_map = NULL; 422 #endif 423 424 return 0; 425 } 426 EXPORT_SYMBOL_GPL(drm_gpusvm_init); 427 428 /** 429 * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node 430 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct 431 * 432 * Return: A pointer to the containing drm_gpusvm_notifier structure. 433 */ 434 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node) 435 { 436 return container_of(node, struct drm_gpusvm_notifier, itree.rb); 437 } 438 439 /** 440 * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier 441 * @gpusvm: Pointer to the GPU SVM structure 442 * @notifier: Pointer to the GPU SVM notifier structure 443 * 444 * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list. 445 */ 446 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm, 447 struct drm_gpusvm_notifier *notifier) 448 { 449 struct rb_node *node; 450 struct list_head *head; 451 452 interval_tree_insert(¬ifier->itree, &gpusvm->root); 453 454 node = rb_prev(¬ifier->itree.rb); 455 if (node) 456 head = &(to_drm_gpusvm_notifier(node))->entry; 457 else 458 head = &gpusvm->notifier_list; 459 460 list_add(¬ifier->entry, head); 461 } 462 463 /** 464 * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier 465 * @gpusvm: Pointer to the GPU SVM tructure 466 * @notifier: Pointer to the GPU SVM notifier structure 467 * 468 * This function removes the GPU SVM notifier from the GPU SVM RB tree and list. 469 */ 470 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm, 471 struct drm_gpusvm_notifier *notifier) 472 { 473 interval_tree_remove(¬ifier->itree, &gpusvm->root); 474 list_del(¬ifier->entry); 475 } 476 477 /** 478 * drm_gpusvm_fini() - Finalize the GPU SVM. 479 * @gpusvm: Pointer to the GPU SVM structure. 480 * 481 * This function finalizes the GPU SVM by cleaning up any remaining ranges and 482 * notifiers, and dropping a reference to struct MM. 483 */ 484 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm) 485 { 486 struct drm_gpusvm_notifier *notifier, *next; 487 488 drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) { 489 struct drm_gpusvm_range *range, *__next; 490 491 /* 492 * Remove notifier first to avoid racing with any invalidation 493 */ 494 mmu_interval_notifier_remove(¬ifier->notifier); 495 notifier->flags.removed = true; 496 497 drm_gpusvm_for_each_range_safe(range, __next, notifier, 0, 498 LONG_MAX) 499 drm_gpusvm_range_remove(gpusvm, range); 500 } 501 502 if (gpusvm->mm) 503 mmdrop(gpusvm->mm); 504 WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root)); 505 } 506 EXPORT_SYMBOL_GPL(drm_gpusvm_fini); 507 508 /** 509 * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier 510 * @gpusvm: Pointer to the GPU SVM structure 511 * @fault_addr: Fault address 512 * 513 * This function allocates and initializes the GPU SVM notifier structure. 514 * 515 * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure. 516 */ 517 static struct drm_gpusvm_notifier * 518 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr) 519 { 520 struct drm_gpusvm_notifier *notifier; 521 522 if (gpusvm->ops->notifier_alloc) 523 notifier = gpusvm->ops->notifier_alloc(); 524 else 525 notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); 526 527 if (!notifier) 528 return ERR_PTR(-ENOMEM); 529 530 notifier->gpusvm = gpusvm; 531 notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size); 532 notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1; 533 INIT_LIST_HEAD(¬ifier->entry); 534 notifier->root = RB_ROOT_CACHED; 535 INIT_LIST_HEAD(¬ifier->range_list); 536 537 return notifier; 538 } 539 540 /** 541 * drm_gpusvm_notifier_free() - Free GPU SVM notifier 542 * @gpusvm: Pointer to the GPU SVM structure 543 * @notifier: Pointer to the GPU SVM notifier structure 544 * 545 * This function frees the GPU SVM notifier structure. 546 */ 547 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm, 548 struct drm_gpusvm_notifier *notifier) 549 { 550 WARN_ON(!RB_EMPTY_ROOT(¬ifier->root.rb_root)); 551 552 if (gpusvm->ops->notifier_free) 553 gpusvm->ops->notifier_free(notifier); 554 else 555 kfree(notifier); 556 } 557 558 /** 559 * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node 560 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct 561 * 562 * Return: A pointer to the containing drm_gpusvm_range structure. 563 */ 564 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node) 565 { 566 return container_of(node, struct drm_gpusvm_range, itree.rb); 567 } 568 569 /** 570 * drm_gpusvm_range_insert() - Insert GPU SVM range 571 * @notifier: Pointer to the GPU SVM notifier structure 572 * @range: Pointer to the GPU SVM range structure 573 * 574 * This function inserts the GPU SVM range into the notifier RB tree and list. 575 */ 576 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier, 577 struct drm_gpusvm_range *range) 578 { 579 struct rb_node *node; 580 struct list_head *head; 581 582 drm_gpusvm_notifier_lock(notifier->gpusvm); 583 interval_tree_insert(&range->itree, ¬ifier->root); 584 585 node = rb_prev(&range->itree.rb); 586 if (node) 587 head = &(to_drm_gpusvm_range(node))->entry; 588 else 589 head = ¬ifier->range_list; 590 591 list_add(&range->entry, head); 592 drm_gpusvm_notifier_unlock(notifier->gpusvm); 593 } 594 595 /** 596 * __drm_gpusvm_range_remove() - Remove GPU SVM range 597 * @notifier: Pointer to the GPU SVM notifier structure 598 * @range: Pointer to the GPU SVM range structure 599 * 600 * This macro removes the GPU SVM range from the notifier RB tree and list. 601 */ 602 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier, 603 struct drm_gpusvm_range *range) 604 { 605 interval_tree_remove(&range->itree, ¬ifier->root); 606 list_del(&range->entry); 607 } 608 609 /** 610 * drm_gpusvm_range_alloc() - Allocate GPU SVM range 611 * @gpusvm: Pointer to the GPU SVM structure 612 * @notifier: Pointer to the GPU SVM notifier structure 613 * @fault_addr: Fault address 614 * @chunk_size: Chunk size 615 * @migrate_devmem: Flag indicating whether to migrate device memory 616 * 617 * This function allocates and initializes the GPU SVM range structure. 618 * 619 * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure. 620 */ 621 static struct drm_gpusvm_range * 622 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, 623 struct drm_gpusvm_notifier *notifier, 624 unsigned long fault_addr, unsigned long chunk_size, 625 bool migrate_devmem) 626 { 627 struct drm_gpusvm_range *range; 628 629 if (gpusvm->ops->range_alloc) 630 range = gpusvm->ops->range_alloc(gpusvm); 631 else 632 range = kzalloc(sizeof(*range), GFP_KERNEL); 633 634 if (!range) 635 return ERR_PTR(-ENOMEM); 636 637 kref_init(&range->refcount); 638 range->gpusvm = gpusvm; 639 range->notifier = notifier; 640 range->itree.start = ALIGN_DOWN(fault_addr, chunk_size); 641 range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1; 642 INIT_LIST_HEAD(&range->entry); 643 range->pages.notifier_seq = LONG_MAX; 644 range->pages.flags.migrate_devmem = migrate_devmem ? 1 : 0; 645 646 return range; 647 } 648 649 /** 650 * drm_gpusvm_hmm_pfn_to_order() - Get the largest CPU mapping order. 651 * @hmm_pfn: The current hmm_pfn. 652 * @hmm_pfn_index: Index of the @hmm_pfn within the pfn array. 653 * @npages: Number of pages within the pfn array i.e the hmm range size. 654 * 655 * To allow skipping PFNs with the same flags (like when they belong to 656 * the same huge PTE) when looping over the pfn array, take a given a hmm_pfn, 657 * and return the largest order that will fit inside the CPU PTE, but also 658 * crucially accounting for the original hmm range boundaries. 659 * 660 * Return: The largest order that will safely fit within the size of the hmm_pfn 661 * CPU PTE. 662 */ 663 static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn, 664 unsigned long hmm_pfn_index, 665 unsigned long npages) 666 { 667 unsigned long size; 668 669 size = 1UL << hmm_pfn_to_map_order(hmm_pfn); 670 size -= (hmm_pfn & ~HMM_PFN_FLAGS) & (size - 1); 671 hmm_pfn_index += size; 672 if (hmm_pfn_index > npages) 673 size -= (hmm_pfn_index - npages); 674 675 return ilog2(size); 676 } 677 678 /** 679 * drm_gpusvm_check_pages() - Check pages 680 * @gpusvm: Pointer to the GPU SVM structure 681 * @notifier: Pointer to the GPU SVM notifier structure 682 * @start: Start address 683 * @end: End address 684 * @dev_private_owner: The device private page owner 685 * 686 * Check if pages between start and end have been faulted in on the CPU. Use to 687 * prevent migration of pages without CPU backing store. 688 * 689 * Return: True if pages have been faulted into CPU, False otherwise 690 */ 691 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, 692 struct drm_gpusvm_notifier *notifier, 693 unsigned long start, unsigned long end, 694 void *dev_private_owner) 695 { 696 struct hmm_range hmm_range = { 697 .default_flags = 0, 698 .notifier = ¬ifier->notifier, 699 .start = start, 700 .end = end, 701 .dev_private_owner = dev_private_owner, 702 }; 703 unsigned long timeout = 704 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 705 unsigned long *pfns; 706 unsigned long npages = npages_in_range(start, end); 707 int err, i; 708 709 mmap_assert_locked(gpusvm->mm); 710 711 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 712 if (!pfns) 713 return false; 714 715 hmm_range.notifier_seq = mmu_interval_read_begin(¬ifier->notifier); 716 hmm_range.hmm_pfns = pfns; 717 718 while (true) { 719 err = hmm_range_fault(&hmm_range); 720 if (err == -EBUSY) { 721 if (time_after(jiffies, timeout)) 722 break; 723 724 hmm_range.notifier_seq = 725 mmu_interval_read_begin(¬ifier->notifier); 726 continue; 727 } 728 break; 729 } 730 if (err) 731 goto err_free; 732 733 for (i = 0; i < npages;) { 734 if (!(pfns[i] & HMM_PFN_VALID)) { 735 err = -EFAULT; 736 goto err_free; 737 } 738 i += 0x1 << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); 739 } 740 741 err_free: 742 kvfree(pfns); 743 return err ? false : true; 744 } 745 746 /** 747 * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range 748 * @gpusvm: Pointer to the GPU SVM structure 749 * @notifier: Pointer to the GPU SVM notifier structure 750 * @vas: Pointer to the virtual memory area structure 751 * @fault_addr: Fault address 752 * @gpuva_start: Start address of GPUVA which mirrors CPU 753 * @gpuva_end: End address of GPUVA which mirrors CPU 754 * @check_pages_threshold: Check CPU pages for present threshold 755 * @dev_private_owner: The device private page owner 756 * 757 * This function determines the chunk size for the GPU SVM range based on the 758 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual 759 * memory area boundaries. 760 * 761 * Return: Chunk size on success, LONG_MAX on failure. 762 */ 763 static unsigned long 764 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, 765 struct drm_gpusvm_notifier *notifier, 766 struct vm_area_struct *vas, 767 unsigned long fault_addr, 768 unsigned long gpuva_start, 769 unsigned long gpuva_end, 770 unsigned long check_pages_threshold, 771 void *dev_private_owner) 772 { 773 unsigned long start, end; 774 int i = 0; 775 776 retry: 777 for (; i < gpusvm->num_chunks; ++i) { 778 start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]); 779 end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]); 780 781 if (start >= vas->vm_start && end <= vas->vm_end && 782 start >= drm_gpusvm_notifier_start(notifier) && 783 end <= drm_gpusvm_notifier_end(notifier) && 784 start >= gpuva_start && end <= gpuva_end) 785 break; 786 } 787 788 if (i == gpusvm->num_chunks) 789 return LONG_MAX; 790 791 /* 792 * If allocation more than page, ensure not to overlap with existing 793 * ranges. 794 */ 795 if (end - start != SZ_4K) { 796 struct drm_gpusvm_range *range; 797 798 range = drm_gpusvm_range_find(notifier, start, end); 799 if (range) { 800 ++i; 801 goto retry; 802 } 803 804 /* 805 * XXX: Only create range on pages CPU has faulted in. Without 806 * this check, or prefault, on BMG 'xe_exec_system_allocator --r 807 * process-many-malloc' fails. In the failure case, each process 808 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM 809 * ranges. When migrating the SVM ranges, some processes fail in 810 * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages' 811 * and then upon drm_gpusvm_range_get_pages device pages from 812 * other processes are collected + faulted in which creates all 813 * sorts of problems. Unsure exactly how this happening, also 814 * problem goes away if 'xe_exec_system_allocator --r 815 * process-many-malloc' mallocs at least 64k at a time. 816 */ 817 if (end - start <= check_pages_threshold && 818 !drm_gpusvm_check_pages(gpusvm, notifier, start, end, dev_private_owner)) { 819 ++i; 820 goto retry; 821 } 822 } 823 824 return end - start; 825 } 826 827 #ifdef CONFIG_LOCKDEP 828 /** 829 * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held 830 * @gpusvm: Pointer to the GPU SVM structure. 831 * 832 * Ensure driver lock is held. 833 */ 834 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 835 { 836 if ((gpusvm)->lock_dep_map) 837 lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0)); 838 } 839 #else 840 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 841 { 842 } 843 #endif 844 845 /** 846 * drm_gpusvm_find_vma_start() - Find start address for first VMA in range 847 * @gpusvm: Pointer to the GPU SVM structure 848 * @start: The inclusive start user address. 849 * @end: The exclusive end user address. 850 * 851 * Returns: The start address of first VMA within the provided range, 852 * ULONG_MAX otherwise. Assumes start_addr < end_addr. 853 */ 854 unsigned long 855 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm, 856 unsigned long start, 857 unsigned long end) 858 { 859 struct mm_struct *mm = gpusvm->mm; 860 struct vm_area_struct *vma; 861 unsigned long addr = ULONG_MAX; 862 863 if (!mmget_not_zero(mm)) 864 return addr; 865 866 mmap_read_lock(mm); 867 868 vma = find_vma_intersection(mm, start, end); 869 if (vma) 870 addr = vma->vm_start; 871 872 mmap_read_unlock(mm); 873 mmput(mm); 874 875 return addr; 876 } 877 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start); 878 879 /** 880 * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range 881 * @gpusvm: Pointer to the GPU SVM structure 882 * @fault_addr: Fault address 883 * @gpuva_start: Start address of GPUVA which mirrors CPU 884 * @gpuva_end: End address of GPUVA which mirrors CPU 885 * @ctx: GPU SVM context 886 * 887 * This function finds or inserts a newly allocated a GPU SVM range based on the 888 * fault address. Caller must hold a lock to protect range lookup and insertion. 889 * 890 * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure. 891 */ 892 struct drm_gpusvm_range * 893 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, 894 unsigned long fault_addr, 895 unsigned long gpuva_start, 896 unsigned long gpuva_end, 897 const struct drm_gpusvm_ctx *ctx) 898 { 899 struct drm_gpusvm_notifier *notifier; 900 struct drm_gpusvm_range *range; 901 struct mm_struct *mm = gpusvm->mm; 902 struct vm_area_struct *vas; 903 bool notifier_alloc = false; 904 unsigned long chunk_size; 905 int err; 906 bool migrate_devmem; 907 908 drm_gpusvm_driver_lock_held(gpusvm); 909 910 if (fault_addr < gpusvm->mm_start || 911 fault_addr > gpusvm->mm_start + gpusvm->mm_range) 912 return ERR_PTR(-EINVAL); 913 914 if (!mmget_not_zero(mm)) 915 return ERR_PTR(-EFAULT); 916 917 notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr, fault_addr + 1); 918 if (!notifier) { 919 notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr); 920 if (IS_ERR(notifier)) { 921 err = PTR_ERR(notifier); 922 goto err_mmunlock; 923 } 924 notifier_alloc = true; 925 err = mmu_interval_notifier_insert(¬ifier->notifier, 926 mm, 927 drm_gpusvm_notifier_start(notifier), 928 drm_gpusvm_notifier_size(notifier), 929 &drm_gpusvm_notifier_ops); 930 if (err) 931 goto err_notifier; 932 } 933 934 mmap_read_lock(mm); 935 936 vas = vma_lookup(mm, fault_addr); 937 if (!vas) { 938 err = -ENOENT; 939 goto err_notifier_remove; 940 } 941 942 if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) { 943 err = -EPERM; 944 goto err_notifier_remove; 945 } 946 947 range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1); 948 if (range) 949 goto out_mmunlock; 950 /* 951 * XXX: Short-circuiting migration based on migrate_vma_* current 952 * limitations. If/when migrate_vma_* add more support, this logic will 953 * have to change. 954 */ 955 migrate_devmem = ctx->devmem_possible && 956 vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas); 957 958 chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, 959 fault_addr, gpuva_start, 960 gpuva_end, 961 ctx->check_pages_threshold, 962 ctx->device_private_page_owner); 963 if (chunk_size == LONG_MAX) { 964 err = -EINVAL; 965 goto err_notifier_remove; 966 } 967 968 range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size, 969 migrate_devmem); 970 if (IS_ERR(range)) { 971 err = PTR_ERR(range); 972 goto err_notifier_remove; 973 } 974 975 drm_gpusvm_range_insert(notifier, range); 976 if (notifier_alloc) 977 drm_gpusvm_notifier_insert(gpusvm, notifier); 978 979 out_mmunlock: 980 mmap_read_unlock(mm); 981 mmput(mm); 982 983 return range; 984 985 err_notifier_remove: 986 mmap_read_unlock(mm); 987 if (notifier_alloc) 988 mmu_interval_notifier_remove(¬ifier->notifier); 989 err_notifier: 990 if (notifier_alloc) 991 drm_gpusvm_notifier_free(gpusvm, notifier); 992 err_mmunlock: 993 mmput(mm); 994 return ERR_PTR(err); 995 } 996 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert); 997 998 /** 999 * __drm_gpusvm_unmap_pages() - Unmap pages associated with GPU SVM pages (internal) 1000 * @gpusvm: Pointer to the GPU SVM structure 1001 * @svm_pages: Pointer to the GPU SVM pages structure 1002 * @npages: Number of pages to unmap 1003 * 1004 * This function unmap pages associated with a GPU SVM pages struct. Assumes and 1005 * asserts correct locking is in place when called. 1006 */ 1007 static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, 1008 struct drm_gpusvm_pages *svm_pages, 1009 unsigned long npages) 1010 { 1011 struct drm_pagemap *dpagemap = svm_pages->dpagemap; 1012 struct device *dev = gpusvm->drm->dev; 1013 unsigned long i, j; 1014 1015 lockdep_assert_held(&gpusvm->notifier_lock); 1016 1017 if (svm_pages->flags.has_dma_mapping) { 1018 struct drm_gpusvm_pages_flags flags = { 1019 .__flags = svm_pages->flags.__flags, 1020 }; 1021 1022 for (i = 0, j = 0; i < npages; j++) { 1023 struct drm_pagemap_addr *addr = &svm_pages->dma_addr[j]; 1024 1025 if (addr->proto == DRM_INTERCONNECT_SYSTEM) 1026 dma_unmap_page(dev, 1027 addr->addr, 1028 PAGE_SIZE << addr->order, 1029 addr->dir); 1030 else if (dpagemap && dpagemap->ops->device_unmap) 1031 dpagemap->ops->device_unmap(dpagemap, 1032 dev, *addr); 1033 i += 1 << addr->order; 1034 } 1035 1036 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1037 flags.has_devmem_pages = false; 1038 flags.has_dma_mapping = false; 1039 WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); 1040 1041 svm_pages->dpagemap = NULL; 1042 } 1043 } 1044 1045 /** 1046 * __drm_gpusvm_free_pages() - Free dma array associated with GPU SVM pages 1047 * @gpusvm: Pointer to the GPU SVM structure 1048 * @svm_pages: Pointer to the GPU SVM pages structure 1049 * 1050 * This function frees the dma address array associated with a GPU SVM range. 1051 */ 1052 static void __drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, 1053 struct drm_gpusvm_pages *svm_pages) 1054 { 1055 lockdep_assert_held(&gpusvm->notifier_lock); 1056 1057 if (svm_pages->dma_addr) { 1058 kvfree(svm_pages->dma_addr); 1059 svm_pages->dma_addr = NULL; 1060 } 1061 } 1062 1063 /** 1064 * drm_gpusvm_free_pages() - Free dma-mapping associated with GPU SVM pages 1065 * struct 1066 * @gpusvm: Pointer to the GPU SVM structure 1067 * @svm_pages: Pointer to the GPU SVM pages structure 1068 * @npages: Number of mapped pages 1069 * 1070 * This function unmaps and frees the dma address array associated with a GPU 1071 * SVM pages struct. 1072 */ 1073 void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, 1074 struct drm_gpusvm_pages *svm_pages, 1075 unsigned long npages) 1076 { 1077 drm_gpusvm_notifier_lock(gpusvm); 1078 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); 1079 __drm_gpusvm_free_pages(gpusvm, svm_pages); 1080 drm_gpusvm_notifier_unlock(gpusvm); 1081 } 1082 EXPORT_SYMBOL_GPL(drm_gpusvm_free_pages); 1083 1084 /** 1085 * drm_gpusvm_range_remove() - Remove GPU SVM range 1086 * @gpusvm: Pointer to the GPU SVM structure 1087 * @range: Pointer to the GPU SVM range to be removed 1088 * 1089 * This function removes the specified GPU SVM range and also removes the parent 1090 * GPU SVM notifier if no more ranges remain in the notifier. The caller must 1091 * hold a lock to protect range and notifier removal. 1092 */ 1093 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm, 1094 struct drm_gpusvm_range *range) 1095 { 1096 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1097 drm_gpusvm_range_end(range)); 1098 struct drm_gpusvm_notifier *notifier; 1099 1100 drm_gpusvm_driver_lock_held(gpusvm); 1101 1102 notifier = drm_gpusvm_notifier_find(gpusvm, 1103 drm_gpusvm_range_start(range), 1104 drm_gpusvm_range_start(range) + 1); 1105 if (WARN_ON_ONCE(!notifier)) 1106 return; 1107 1108 drm_gpusvm_notifier_lock(gpusvm); 1109 __drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages); 1110 __drm_gpusvm_free_pages(gpusvm, &range->pages); 1111 __drm_gpusvm_range_remove(notifier, range); 1112 drm_gpusvm_notifier_unlock(gpusvm); 1113 1114 drm_gpusvm_range_put(range); 1115 1116 if (RB_EMPTY_ROOT(¬ifier->root.rb_root)) { 1117 if (!notifier->flags.removed) 1118 mmu_interval_notifier_remove(¬ifier->notifier); 1119 drm_gpusvm_notifier_remove(gpusvm, notifier); 1120 drm_gpusvm_notifier_free(gpusvm, notifier); 1121 } 1122 } 1123 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove); 1124 1125 /** 1126 * drm_gpusvm_range_get() - Get a reference to GPU SVM range 1127 * @range: Pointer to the GPU SVM range 1128 * 1129 * This function increments the reference count of the specified GPU SVM range. 1130 * 1131 * Return: Pointer to the GPU SVM range. 1132 */ 1133 struct drm_gpusvm_range * 1134 drm_gpusvm_range_get(struct drm_gpusvm_range *range) 1135 { 1136 kref_get(&range->refcount); 1137 1138 return range; 1139 } 1140 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get); 1141 1142 /** 1143 * drm_gpusvm_range_destroy() - Destroy GPU SVM range 1144 * @refcount: Pointer to the reference counter embedded in the GPU SVM range 1145 * 1146 * This function destroys the specified GPU SVM range when its reference count 1147 * reaches zero. If a custom range-free function is provided, it is invoked to 1148 * free the range; otherwise, the range is deallocated using kfree(). 1149 */ 1150 static void drm_gpusvm_range_destroy(struct kref *refcount) 1151 { 1152 struct drm_gpusvm_range *range = 1153 container_of(refcount, struct drm_gpusvm_range, refcount); 1154 struct drm_gpusvm *gpusvm = range->gpusvm; 1155 1156 if (gpusvm->ops->range_free) 1157 gpusvm->ops->range_free(range); 1158 else 1159 kfree(range); 1160 } 1161 1162 /** 1163 * drm_gpusvm_range_put() - Put a reference to GPU SVM range 1164 * @range: Pointer to the GPU SVM range 1165 * 1166 * This function decrements the reference count of the specified GPU SVM range 1167 * and frees it when the count reaches zero. 1168 */ 1169 void drm_gpusvm_range_put(struct drm_gpusvm_range *range) 1170 { 1171 kref_put(&range->refcount, drm_gpusvm_range_destroy); 1172 } 1173 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); 1174 1175 /** 1176 * drm_gpusvm_pages_valid() - GPU SVM range pages valid 1177 * @gpusvm: Pointer to the GPU SVM structure 1178 * @svm_pages: Pointer to the GPU SVM pages structure 1179 * 1180 * This function determines if a GPU SVM range pages are valid. Expected be 1181 * called holding gpusvm->notifier_lock and as the last step before committing a 1182 * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1183 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1184 * function is required for finer grained checking (i.e., per range) if pages 1185 * are valid. 1186 * 1187 * Return: True if GPU SVM range has valid pages, False otherwise 1188 */ 1189 static bool drm_gpusvm_pages_valid(struct drm_gpusvm *gpusvm, 1190 struct drm_gpusvm_pages *svm_pages) 1191 { 1192 lockdep_assert_held(&gpusvm->notifier_lock); 1193 1194 return svm_pages->flags.has_devmem_pages || svm_pages->flags.has_dma_mapping; 1195 } 1196 1197 /** 1198 * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid 1199 * @gpusvm: Pointer to the GPU SVM structure 1200 * @range: Pointer to the GPU SVM range structure 1201 * 1202 * This function determines if a GPU SVM range pages are valid. Expected be 1203 * called holding gpusvm->notifier_lock and as the last step before committing a 1204 * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1205 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1206 * function is required for finer grained checking (i.e., per range) if pages 1207 * are valid. 1208 * 1209 * Return: True if GPU SVM range has valid pages, False otherwise 1210 */ 1211 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, 1212 struct drm_gpusvm_range *range) 1213 { 1214 return drm_gpusvm_pages_valid(gpusvm, &range->pages); 1215 } 1216 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); 1217 1218 /** 1219 * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked 1220 * @gpusvm: Pointer to the GPU SVM structure 1221 * @range: Pointer to the GPU SVM range structure 1222 * 1223 * This function determines if a GPU SVM range pages are valid. Expected be 1224 * called without holding gpusvm->notifier_lock. 1225 * 1226 * Return: True if GPU SVM range has valid pages, False otherwise 1227 */ 1228 static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm, 1229 struct drm_gpusvm_pages *svm_pages) 1230 { 1231 bool pages_valid; 1232 1233 if (!svm_pages->dma_addr) 1234 return false; 1235 1236 drm_gpusvm_notifier_lock(gpusvm); 1237 pages_valid = drm_gpusvm_pages_valid(gpusvm, svm_pages); 1238 if (!pages_valid) 1239 __drm_gpusvm_free_pages(gpusvm, svm_pages); 1240 drm_gpusvm_notifier_unlock(gpusvm); 1241 1242 return pages_valid; 1243 } 1244 1245 /** 1246 * drm_gpusvm_get_pages() - Get pages and populate GPU SVM pages struct 1247 * @gpusvm: Pointer to the GPU SVM structure 1248 * @svm_pages: The SVM pages to populate. This will contain the dma-addresses 1249 * @mm: The mm corresponding to the CPU range 1250 * @notifier: The corresponding notifier for the given CPU range 1251 * @pages_start: Start CPU address for the pages 1252 * @pages_end: End CPU address for the pages (exclusive) 1253 * @ctx: GPU SVM context 1254 * 1255 * This function gets and maps pages for CPU range and ensures they are 1256 * mapped for DMA access. 1257 * 1258 * Return: 0 on success, negative error code on failure. 1259 */ 1260 int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, 1261 struct drm_gpusvm_pages *svm_pages, 1262 struct mm_struct *mm, 1263 struct mmu_interval_notifier *notifier, 1264 unsigned long pages_start, unsigned long pages_end, 1265 const struct drm_gpusvm_ctx *ctx) 1266 { 1267 struct hmm_range hmm_range = { 1268 .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 : 1269 HMM_PFN_REQ_WRITE), 1270 .notifier = notifier, 1271 .start = pages_start, 1272 .end = pages_end, 1273 .dev_private_owner = ctx->device_private_page_owner, 1274 }; 1275 void *zdd; 1276 unsigned long timeout = 1277 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1278 unsigned long i, j; 1279 unsigned long npages = npages_in_range(pages_start, pages_end); 1280 unsigned long num_dma_mapped; 1281 unsigned int order = 0; 1282 unsigned long *pfns; 1283 int err = 0; 1284 struct dev_pagemap *pagemap; 1285 struct drm_pagemap *dpagemap; 1286 struct drm_gpusvm_pages_flags flags; 1287 enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE : 1288 DMA_BIDIRECTIONAL; 1289 1290 retry: 1291 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1292 if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages)) 1293 goto set_seqno; 1294 1295 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1296 if (!pfns) 1297 return -ENOMEM; 1298 1299 if (!mmget_not_zero(mm)) { 1300 err = -EFAULT; 1301 goto err_free; 1302 } 1303 1304 hmm_range.hmm_pfns = pfns; 1305 while (true) { 1306 mmap_read_lock(mm); 1307 err = hmm_range_fault(&hmm_range); 1308 mmap_read_unlock(mm); 1309 1310 if (err == -EBUSY) { 1311 if (time_after(jiffies, timeout)) 1312 break; 1313 1314 hmm_range.notifier_seq = 1315 mmu_interval_read_begin(notifier); 1316 continue; 1317 } 1318 break; 1319 } 1320 mmput(mm); 1321 if (err) 1322 goto err_free; 1323 1324 map_pages: 1325 /* 1326 * Perform all dma mappings under the notifier lock to not 1327 * access freed pages. A notifier will either block on 1328 * the notifier lock or unmap dma. 1329 */ 1330 drm_gpusvm_notifier_lock(gpusvm); 1331 1332 flags.__flags = svm_pages->flags.__flags; 1333 if (flags.unmapped) { 1334 drm_gpusvm_notifier_unlock(gpusvm); 1335 err = -EFAULT; 1336 goto err_free; 1337 } 1338 1339 if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) { 1340 drm_gpusvm_notifier_unlock(gpusvm); 1341 kvfree(pfns); 1342 goto retry; 1343 } 1344 1345 if (!svm_pages->dma_addr) { 1346 /* Unlock and restart mapping to allocate memory. */ 1347 drm_gpusvm_notifier_unlock(gpusvm); 1348 svm_pages->dma_addr = 1349 kvmalloc_array(npages, sizeof(*svm_pages->dma_addr), GFP_KERNEL); 1350 if (!svm_pages->dma_addr) { 1351 err = -ENOMEM; 1352 goto err_free; 1353 } 1354 goto map_pages; 1355 } 1356 1357 zdd = NULL; 1358 pagemap = NULL; 1359 num_dma_mapped = 0; 1360 for (i = 0, j = 0; i < npages; ++j) { 1361 struct page *page = hmm_pfn_to_page(pfns[i]); 1362 1363 order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); 1364 if (is_device_private_page(page) || 1365 is_device_coherent_page(page)) { 1366 if (zdd != page->zone_device_data && i > 0) { 1367 err = -EOPNOTSUPP; 1368 goto err_unmap; 1369 } 1370 zdd = page->zone_device_data; 1371 if (pagemap != page_pgmap(page)) { 1372 if (i > 0) { 1373 err = -EOPNOTSUPP; 1374 goto err_unmap; 1375 } 1376 1377 pagemap = page_pgmap(page); 1378 dpagemap = drm_pagemap_page_to_dpagemap(page); 1379 if (drm_WARN_ON(gpusvm->drm, !dpagemap)) { 1380 /* 1381 * Raced. This is not supposed to happen 1382 * since hmm_range_fault() should've migrated 1383 * this page to system. 1384 */ 1385 err = -EAGAIN; 1386 goto err_unmap; 1387 } 1388 } 1389 svm_pages->dma_addr[j] = 1390 dpagemap->ops->device_map(dpagemap, 1391 gpusvm->drm->dev, 1392 page, order, 1393 dma_dir); 1394 if (dma_mapping_error(gpusvm->drm->dev, 1395 svm_pages->dma_addr[j].addr)) { 1396 err = -EFAULT; 1397 goto err_unmap; 1398 } 1399 } else { 1400 dma_addr_t addr; 1401 1402 if (is_zone_device_page(page) || pagemap) { 1403 err = -EOPNOTSUPP; 1404 goto err_unmap; 1405 } 1406 1407 if (ctx->devmem_only) { 1408 err = -EFAULT; 1409 goto err_unmap; 1410 } 1411 1412 addr = dma_map_page(gpusvm->drm->dev, 1413 page, 0, 1414 PAGE_SIZE << order, 1415 dma_dir); 1416 if (dma_mapping_error(gpusvm->drm->dev, addr)) { 1417 err = -EFAULT; 1418 goto err_unmap; 1419 } 1420 1421 svm_pages->dma_addr[j] = drm_pagemap_addr_encode 1422 (addr, DRM_INTERCONNECT_SYSTEM, order, 1423 dma_dir); 1424 } 1425 i += 1 << order; 1426 num_dma_mapped = i; 1427 flags.has_dma_mapping = true; 1428 } 1429 1430 if (pagemap) { 1431 flags.has_devmem_pages = true; 1432 svm_pages->dpagemap = dpagemap; 1433 } 1434 1435 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1436 WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); 1437 1438 drm_gpusvm_notifier_unlock(gpusvm); 1439 kvfree(pfns); 1440 set_seqno: 1441 svm_pages->notifier_seq = hmm_range.notifier_seq; 1442 1443 return 0; 1444 1445 err_unmap: 1446 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped); 1447 drm_gpusvm_notifier_unlock(gpusvm); 1448 err_free: 1449 kvfree(pfns); 1450 if (err == -EAGAIN) 1451 goto retry; 1452 return err; 1453 } 1454 EXPORT_SYMBOL_GPL(drm_gpusvm_get_pages); 1455 1456 /** 1457 * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range 1458 * @gpusvm: Pointer to the GPU SVM structure 1459 * @range: Pointer to the GPU SVM range structure 1460 * @ctx: GPU SVM context 1461 * 1462 * This function gets pages for a GPU SVM range and ensures they are mapped for 1463 * DMA access. 1464 * 1465 * Return: 0 on success, negative error code on failure. 1466 */ 1467 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, 1468 struct drm_gpusvm_range *range, 1469 const struct drm_gpusvm_ctx *ctx) 1470 { 1471 return drm_gpusvm_get_pages(gpusvm, &range->pages, gpusvm->mm, 1472 &range->notifier->notifier, 1473 drm_gpusvm_range_start(range), 1474 drm_gpusvm_range_end(range), ctx); 1475 } 1476 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); 1477 1478 /** 1479 * drm_gpusvm_unmap_pages() - Unmap GPU svm pages 1480 * @gpusvm: Pointer to the GPU SVM structure 1481 * @svm_pages: Pointer to the GPU SVM pages structure 1482 * @npages: Number of pages in @svm_pages. 1483 * @ctx: GPU SVM context 1484 * 1485 * This function unmaps pages associated with a GPU SVM pages struct. If 1486 * @in_notifier is set, it is assumed that gpusvm->notifier_lock is held in 1487 * write mode; if it is clear, it acquires gpusvm->notifier_lock in read mode. 1488 * Must be called in the invalidate() callback of the corresponding notifier for 1489 * IOMMU security model. 1490 */ 1491 void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, 1492 struct drm_gpusvm_pages *svm_pages, 1493 unsigned long npages, 1494 const struct drm_gpusvm_ctx *ctx) 1495 { 1496 if (ctx->in_notifier) 1497 lockdep_assert_held_write(&gpusvm->notifier_lock); 1498 else 1499 drm_gpusvm_notifier_lock(gpusvm); 1500 1501 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); 1502 1503 if (!ctx->in_notifier) 1504 drm_gpusvm_notifier_unlock(gpusvm); 1505 } 1506 EXPORT_SYMBOL_GPL(drm_gpusvm_unmap_pages); 1507 1508 /** 1509 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range 1510 * @gpusvm: Pointer to the GPU SVM structure 1511 * @range: Pointer to the GPU SVM range structure 1512 * @ctx: GPU SVM context 1513 * 1514 * This function unmaps pages associated with a GPU SVM range. If @in_notifier 1515 * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it 1516 * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on 1517 * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU 1518 * security model. 1519 */ 1520 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1521 struct drm_gpusvm_range *range, 1522 const struct drm_gpusvm_ctx *ctx) 1523 { 1524 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1525 drm_gpusvm_range_end(range)); 1526 1527 return drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages, ctx); 1528 } 1529 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); 1530 1531 /** 1532 * drm_gpusvm_range_evict() - Evict GPU SVM range 1533 * @gpusvm: Pointer to the GPU SVM structure 1534 * @range: Pointer to the GPU SVM range to be removed 1535 * 1536 * This function evicts the specified GPU SVM range. 1537 * 1538 * Return: 0 on success, a negative error code on failure. 1539 */ 1540 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm, 1541 struct drm_gpusvm_range *range) 1542 { 1543 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1544 struct hmm_range hmm_range = { 1545 .default_flags = HMM_PFN_REQ_FAULT, 1546 .notifier = notifier, 1547 .start = drm_gpusvm_range_start(range), 1548 .end = drm_gpusvm_range_end(range), 1549 .dev_private_owner = NULL, 1550 }; 1551 unsigned long timeout = 1552 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1553 unsigned long *pfns; 1554 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1555 drm_gpusvm_range_end(range)); 1556 int err = 0; 1557 struct mm_struct *mm = gpusvm->mm; 1558 1559 if (!mmget_not_zero(mm)) 1560 return -EFAULT; 1561 1562 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1563 if (!pfns) 1564 return -ENOMEM; 1565 1566 hmm_range.hmm_pfns = pfns; 1567 while (!time_after(jiffies, timeout)) { 1568 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1569 if (time_after(jiffies, timeout)) { 1570 err = -ETIME; 1571 break; 1572 } 1573 1574 mmap_read_lock(mm); 1575 err = hmm_range_fault(&hmm_range); 1576 mmap_read_unlock(mm); 1577 if (err != -EBUSY) 1578 break; 1579 } 1580 1581 kvfree(pfns); 1582 mmput(mm); 1583 1584 return err; 1585 } 1586 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict); 1587 1588 /** 1589 * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range 1590 * @gpusvm: Pointer to the GPU SVM structure. 1591 * @start: Start address 1592 * @end: End address 1593 * 1594 * Return: True if GPU SVM has mapping, False otherwise 1595 */ 1596 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start, 1597 unsigned long end) 1598 { 1599 struct drm_gpusvm_notifier *notifier; 1600 1601 drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) { 1602 struct drm_gpusvm_range *range = NULL; 1603 1604 drm_gpusvm_for_each_range(range, notifier, start, end) 1605 return true; 1606 } 1607 1608 return false; 1609 } 1610 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping); 1611 1612 /** 1613 * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped 1614 * @range: Pointer to the GPU SVM range structure. 1615 * @mmu_range: Pointer to the MMU notifier range structure. 1616 * 1617 * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag 1618 * if the range partially falls within the provided MMU notifier range. 1619 */ 1620 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, 1621 const struct mmu_notifier_range *mmu_range) 1622 { 1623 lockdep_assert_held_write(&range->gpusvm->notifier_lock); 1624 1625 range->pages.flags.unmapped = true; 1626 if (drm_gpusvm_range_start(range) < mmu_range->start || 1627 drm_gpusvm_range_end(range) > mmu_range->end) 1628 range->pages.flags.partial_unmap = true; 1629 } 1630 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); 1631 1632 MODULE_DESCRIPTION("DRM GPUSVM"); 1633 MODULE_LICENSE("GPL"); 1634