1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 * 5 * Authors: 6 * Matthew Brost <matthew.brost@intel.com> 7 */ 8 9 #include <linux/dma-mapping.h> 10 #include <linux/export.h> 11 #include <linux/hmm.h> 12 #include <linux/hugetlb_inline.h> 13 #include <linux/memremap.h> 14 #include <linux/mm_types.h> 15 #include <linux/slab.h> 16 17 #include <drm/drm_device.h> 18 #include <drm/drm_gpusvm.h> 19 #include <drm/drm_pagemap.h> 20 #include <drm/drm_print.h> 21 22 /** 23 * DOC: Overview 24 * 25 * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM) 26 * is a component of the DRM framework designed to manage shared virtual memory 27 * between the CPU and GPU. It enables efficient data exchange and processing 28 * for GPU-accelerated applications by allowing memory sharing and 29 * synchronization between the CPU's and GPU's virtual address spaces. 30 * 31 * Key GPU SVM Components: 32 * 33 * - Notifiers: 34 * Used for tracking memory intervals and notifying the GPU of changes, 35 * notifiers are sized based on a GPU SVM initialization parameter, with a 36 * recommendation of 512M or larger. They maintain a Red-BlacK tree and a 37 * list of ranges that fall within the notifier interval. Notifiers are 38 * tracked within a GPU SVM Red-BlacK tree and list and are dynamically 39 * inserted or removed as ranges within the interval are created or 40 * destroyed. 41 * - Ranges: 42 * Represent memory ranges mapped in a DRM device and managed by GPU SVM. 43 * They are sized based on an array of chunk sizes, which is a GPU SVM 44 * initialization parameter, and the CPU address space. Upon GPU fault, 45 * the largest aligned chunk that fits within the faulting CPU address 46 * space is chosen for the range size. Ranges are expected to be 47 * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP 48 * event. As mentioned above, ranges are tracked in a notifier's Red-Black 49 * tree. 50 * 51 * - Operations: 52 * Define the interface for driver-specific GPU SVM operations such as 53 * range allocation, notifier allocation, and invalidations. 54 * 55 * - Device Memory Allocations: 56 * Embedded structure containing enough information for GPU SVM to migrate 57 * to / from device memory. 58 * 59 * - Device Memory Operations: 60 * Define the interface for driver-specific device memory operations 61 * release memory, populate pfns, and copy to / from device memory. 62 * 63 * This layer provides interfaces for allocating, mapping, migrating, and 64 * releasing memory ranges between the CPU and GPU. It handles all core memory 65 * management interactions (DMA mapping, HMM, and migration) and provides 66 * driver-specific virtual functions (vfuncs). This infrastructure is sufficient 67 * to build the expected driver components for an SVM implementation as detailed 68 * below. 69 * 70 * Expected Driver Components: 71 * 72 * - GPU page fault handler: 73 * Used to create ranges and notifiers based on the fault address, 74 * optionally migrate the range to device memory, and create GPU bindings. 75 * 76 * - Garbage collector: 77 * Used to unmap and destroy GPU bindings for ranges. Ranges are expected 78 * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in 79 * notifier callback. 80 * 81 * - Notifier callback: 82 * Used to invalidate and DMA unmap GPU bindings for ranges. 83 */ 84 85 /** 86 * DOC: Locking 87 * 88 * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the 89 * mmap lock as needed. 90 * 91 * GPU SVM introduces a global notifier lock, which safeguards the notifier's 92 * range RB tree and list, as well as the range's DMA mappings and sequence 93 * number. GPU SVM manages all necessary locking and unlocking operations, 94 * except for the recheck range's pages being valid 95 * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. 96 * This lock corresponds to the ``driver->update`` lock mentioned in 97 * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM 98 * global lock to a per-notifier lock if finer-grained locking is deemed 99 * necessary. 100 * 101 * In addition to the locking mentioned above, the driver should implement a 102 * lock to safeguard core GPU SVM function calls that modify state, such as 103 * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is 104 * denoted as 'driver_svm_lock' in code examples. Finer grained driver side 105 * locking should also be possible for concurrent GPU fault processing within a 106 * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock 107 * to add annotations to GPU SVM. 108 */ 109 110 /** 111 * DOC: Partial Unmapping of Ranges 112 * 113 * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting 114 * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one 115 * being that a subset of the range still has CPU and GPU mappings. If the 116 * backing store for the range is in device memory, a subset of the backing 117 * store has references. One option would be to split the range and device 118 * memory backing store, but the implementation for this would be quite 119 * complicated. Given that partial unmappings are rare and driver-defined range 120 * sizes are relatively small, GPU SVM does not support splitting of ranges. 121 * 122 * With no support for range splitting, upon partial unmapping of a range, the 123 * driver is expected to invalidate and destroy the entire range. If the range 124 * has device memory as its backing, the driver is also expected to migrate any 125 * remaining pages back to RAM. 126 */ 127 128 /** 129 * DOC: Examples 130 * 131 * This section provides three examples of how to build the expected driver 132 * components: the GPU page fault handler, the garbage collector, and the 133 * notifier callback. 134 * 135 * The generic code provided does not include logic for complex migration 136 * policies, optimized invalidations, fined grained driver locking, or other 137 * potentially required driver locking (e.g., DMA-resv locks). 138 * 139 * 1) GPU page fault handler 140 * 141 * .. code-block:: c 142 * 143 * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) 144 * { 145 * int err = 0; 146 * 147 * driver_alloc_and_setup_memory_for_bind(gpusvm, range); 148 * 149 * drm_gpusvm_notifier_lock(gpusvm); 150 * if (drm_gpusvm_range_pages_valid(range)) 151 * driver_commit_bind(gpusvm, range); 152 * else 153 * err = -EAGAIN; 154 * drm_gpusvm_notifier_unlock(gpusvm); 155 * 156 * return err; 157 * } 158 * 159 * int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr, 160 * unsigned long gpuva_start, unsigned long gpuva_end) 161 * { 162 * struct drm_gpusvm_ctx ctx = {}; 163 * int err; 164 * 165 * driver_svm_lock(); 166 * retry: 167 * // Always process UNMAPs first so view of GPU SVM ranges is current 168 * driver_garbage_collector(gpusvm); 169 * 170 * range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr, 171 * gpuva_start, gpuva_end, 172 * &ctx); 173 * if (IS_ERR(range)) { 174 * err = PTR_ERR(range); 175 * goto unlock; 176 * } 177 * 178 * if (driver_migration_policy(range)) { 179 * err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(), 180 * gpuva_start, gpuva_end, gpusvm->mm, 181 * ctx->timeslice_ms); 182 * if (err) // CPU mappings may have changed 183 * goto retry; 184 * } 185 * 186 * err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx); 187 * if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { // CPU mappings changed 188 * if (err == -EOPNOTSUPP) 189 * drm_gpusvm_range_evict(gpusvm, range); 190 * goto retry; 191 * } else if (err) { 192 * goto unlock; 193 * } 194 * 195 * err = driver_bind_range(gpusvm, range); 196 * if (err == -EAGAIN) // CPU mappings changed 197 * goto retry 198 * 199 * unlock: 200 * driver_svm_unlock(); 201 * return err; 202 * } 203 * 204 * 2) Garbage Collector 205 * 206 * .. code-block:: c 207 * 208 * void __driver_garbage_collector(struct drm_gpusvm *gpusvm, 209 * struct drm_gpusvm_range *range) 210 * { 211 * assert_driver_svm_locked(gpusvm); 212 * 213 * // Partial unmap, migrate any remaining device memory pages back to RAM 214 * if (range->flags.partial_unmap) 215 * drm_gpusvm_range_evict(gpusvm, range); 216 * 217 * driver_unbind_range(range); 218 * drm_gpusvm_range_remove(gpusvm, range); 219 * } 220 * 221 * void driver_garbage_collector(struct drm_gpusvm *gpusvm) 222 * { 223 * assert_driver_svm_locked(gpusvm); 224 * 225 * for_each_range_in_garbage_collector(gpusvm, range) 226 * __driver_garbage_collector(gpusvm, range); 227 * } 228 * 229 * 3) Notifier callback 230 * 231 * .. code-block:: c 232 * 233 * void driver_invalidation(struct drm_gpusvm *gpusvm, 234 * struct drm_gpusvm_notifier *notifier, 235 * const struct mmu_notifier_range *mmu_range) 236 * { 237 * struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 238 * struct drm_gpusvm_range *range = NULL; 239 * 240 * driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end); 241 * 242 * drm_gpusvm_for_each_range(range, notifier, mmu_range->start, 243 * mmu_range->end) { 244 * drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx); 245 * 246 * if (mmu_range->event != MMU_NOTIFY_UNMAP) 247 * continue; 248 * 249 * drm_gpusvm_range_set_unmapped(range, mmu_range); 250 * driver_garbage_collector_add(gpusvm, range); 251 * } 252 * } 253 */ 254 255 /** 256 * npages_in_range() - Calculate the number of pages in a given range 257 * @start: The start address of the range 258 * @end: The end address of the range 259 * 260 * This macro calculates the number of pages in a given memory range, 261 * specified by the start and end addresses. It divides the difference 262 * between the end and start addresses by the page size (PAGE_SIZE) to 263 * determine the number of pages in the range. 264 * 265 * Return: The number of pages in the specified range. 266 */ 267 static unsigned long 268 npages_in_range(unsigned long start, unsigned long end) 269 { 270 return (end - start) >> PAGE_SHIFT; 271 } 272 273 /** 274 * drm_gpusvm_notifier_find() - Find GPU SVM notifier from GPU SVM 275 * @gpusvm: Pointer to the GPU SVM structure. 276 * @start: Start address of the notifier 277 * @end: End address of the notifier 278 * 279 * Return: A pointer to the drm_gpusvm_notifier if found or NULL 280 */ 281 struct drm_gpusvm_notifier * 282 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, unsigned long start, 283 unsigned long end) 284 { 285 struct interval_tree_node *itree; 286 287 itree = interval_tree_iter_first(&gpusvm->root, start, end - 1); 288 289 if (itree) 290 return container_of(itree, struct drm_gpusvm_notifier, itree); 291 else 292 return NULL; 293 } 294 EXPORT_SYMBOL_GPL(drm_gpusvm_notifier_find); 295 296 /** 297 * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier 298 * @notifier: Pointer to the GPU SVM notifier structure. 299 * @start: Start address of the range 300 * @end: End address of the range 301 * 302 * Return: A pointer to the drm_gpusvm_range if found or NULL 303 */ 304 struct drm_gpusvm_range * 305 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, 306 unsigned long end) 307 { 308 struct interval_tree_node *itree; 309 310 itree = interval_tree_iter_first(¬ifier->root, start, end - 1); 311 312 if (itree) 313 return container_of(itree, struct drm_gpusvm_range, itree); 314 else 315 return NULL; 316 } 317 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find); 318 319 /** 320 * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier. 321 * @mni: Pointer to the mmu_interval_notifier structure. 322 * @mmu_range: Pointer to the mmu_notifier_range structure. 323 * @cur_seq: Current sequence number. 324 * 325 * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU 326 * notifier sequence number and calls the driver invalidate vfunc under 327 * gpusvm->notifier_lock. 328 * 329 * Return: true if the operation succeeds, false otherwise. 330 */ 331 static bool 332 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni, 333 const struct mmu_notifier_range *mmu_range, 334 unsigned long cur_seq) 335 { 336 struct drm_gpusvm_notifier *notifier = 337 container_of(mni, typeof(*notifier), notifier); 338 struct drm_gpusvm *gpusvm = notifier->gpusvm; 339 340 if (!mmu_notifier_range_blockable(mmu_range)) 341 return false; 342 343 down_write(&gpusvm->notifier_lock); 344 mmu_interval_set_seq(mni, cur_seq); 345 gpusvm->ops->invalidate(gpusvm, notifier, mmu_range); 346 up_write(&gpusvm->notifier_lock); 347 348 return true; 349 } 350 351 /* 352 * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM 353 */ 354 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { 355 .invalidate = drm_gpusvm_notifier_invalidate, 356 }; 357 358 /** 359 * drm_gpusvm_init() - Initialize the GPU SVM. 360 * @gpusvm: Pointer to the GPU SVM structure. 361 * @name: Name of the GPU SVM. 362 * @drm: Pointer to the DRM device structure. 363 * @mm: Pointer to the mm_struct for the address space. 364 * @device_private_page_owner: Device private pages owner. 365 * @mm_start: Start address of GPU SVM. 366 * @mm_range: Range of the GPU SVM. 367 * @notifier_size: Size of individual notifiers. 368 * @ops: Pointer to the operations structure for GPU SVM. 369 * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation. 370 * Entries should be powers of 2 in descending order with last 371 * entry being SZ_4K. 372 * @num_chunks: Number of chunks. 373 * 374 * This function initializes the GPU SVM. 375 * 376 * Note: If only using the simple drm_gpusvm_pages API (get/unmap/free), 377 * then only @gpusvm, @name, and @drm are expected. However, the same base 378 * @gpusvm can also be used with both modes together in which case the full 379 * setup is needed, where the core drm_gpusvm_pages API will simply never use 380 * the other fields. 381 * 382 * Return: 0 on success, a negative error code on failure. 383 */ 384 int drm_gpusvm_init(struct drm_gpusvm *gpusvm, 385 const char *name, struct drm_device *drm, 386 struct mm_struct *mm, void *device_private_page_owner, 387 unsigned long mm_start, unsigned long mm_range, 388 unsigned long notifier_size, 389 const struct drm_gpusvm_ops *ops, 390 const unsigned long *chunk_sizes, int num_chunks) 391 { 392 if (mm) { 393 if (!ops->invalidate || !num_chunks) 394 return -EINVAL; 395 mmgrab(mm); 396 } else { 397 /* No full SVM mode, only core drm_gpusvm_pages API. */ 398 if (ops || num_chunks || mm_range || notifier_size || 399 device_private_page_owner) 400 return -EINVAL; 401 } 402 403 gpusvm->name = name; 404 gpusvm->drm = drm; 405 gpusvm->mm = mm; 406 gpusvm->device_private_page_owner = device_private_page_owner; 407 gpusvm->mm_start = mm_start; 408 gpusvm->mm_range = mm_range; 409 gpusvm->notifier_size = notifier_size; 410 gpusvm->ops = ops; 411 gpusvm->chunk_sizes = chunk_sizes; 412 gpusvm->num_chunks = num_chunks; 413 414 gpusvm->root = RB_ROOT_CACHED; 415 INIT_LIST_HEAD(&gpusvm->notifier_list); 416 417 init_rwsem(&gpusvm->notifier_lock); 418 419 fs_reclaim_acquire(GFP_KERNEL); 420 might_lock(&gpusvm->notifier_lock); 421 fs_reclaim_release(GFP_KERNEL); 422 423 #ifdef CONFIG_LOCKDEP 424 gpusvm->lock_dep_map = NULL; 425 #endif 426 427 return 0; 428 } 429 EXPORT_SYMBOL_GPL(drm_gpusvm_init); 430 431 /** 432 * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node 433 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct 434 * 435 * Return: A pointer to the containing drm_gpusvm_notifier structure. 436 */ 437 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node) 438 { 439 return container_of(node, struct drm_gpusvm_notifier, itree.rb); 440 } 441 442 /** 443 * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier 444 * @gpusvm: Pointer to the GPU SVM structure 445 * @notifier: Pointer to the GPU SVM notifier structure 446 * 447 * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list. 448 */ 449 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm, 450 struct drm_gpusvm_notifier *notifier) 451 { 452 struct rb_node *node; 453 struct list_head *head; 454 455 interval_tree_insert(¬ifier->itree, &gpusvm->root); 456 457 node = rb_prev(¬ifier->itree.rb); 458 if (node) 459 head = &(to_drm_gpusvm_notifier(node))->entry; 460 else 461 head = &gpusvm->notifier_list; 462 463 list_add(¬ifier->entry, head); 464 } 465 466 /** 467 * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier 468 * @gpusvm: Pointer to the GPU SVM tructure 469 * @notifier: Pointer to the GPU SVM notifier structure 470 * 471 * This function removes the GPU SVM notifier from the GPU SVM RB tree and list. 472 */ 473 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm, 474 struct drm_gpusvm_notifier *notifier) 475 { 476 interval_tree_remove(¬ifier->itree, &gpusvm->root); 477 list_del(¬ifier->entry); 478 } 479 480 /** 481 * drm_gpusvm_fini() - Finalize the GPU SVM. 482 * @gpusvm: Pointer to the GPU SVM structure. 483 * 484 * This function finalizes the GPU SVM by cleaning up any remaining ranges and 485 * notifiers, and dropping a reference to struct MM. 486 */ 487 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm) 488 { 489 struct drm_gpusvm_notifier *notifier, *next; 490 491 drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) { 492 struct drm_gpusvm_range *range, *__next; 493 494 /* 495 * Remove notifier first to avoid racing with any invalidation 496 */ 497 mmu_interval_notifier_remove(¬ifier->notifier); 498 notifier->flags.removed = true; 499 500 drm_gpusvm_for_each_range_safe(range, __next, notifier, 0, 501 LONG_MAX) 502 drm_gpusvm_range_remove(gpusvm, range); 503 } 504 505 if (gpusvm->mm) 506 mmdrop(gpusvm->mm); 507 WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root)); 508 } 509 EXPORT_SYMBOL_GPL(drm_gpusvm_fini); 510 511 /** 512 * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier 513 * @gpusvm: Pointer to the GPU SVM structure 514 * @fault_addr: Fault address 515 * 516 * This function allocates and initializes the GPU SVM notifier structure. 517 * 518 * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure. 519 */ 520 static struct drm_gpusvm_notifier * 521 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr) 522 { 523 struct drm_gpusvm_notifier *notifier; 524 525 if (gpusvm->ops->notifier_alloc) 526 notifier = gpusvm->ops->notifier_alloc(); 527 else 528 notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); 529 530 if (!notifier) 531 return ERR_PTR(-ENOMEM); 532 533 notifier->gpusvm = gpusvm; 534 notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size); 535 notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1; 536 INIT_LIST_HEAD(¬ifier->entry); 537 notifier->root = RB_ROOT_CACHED; 538 INIT_LIST_HEAD(¬ifier->range_list); 539 540 return notifier; 541 } 542 543 /** 544 * drm_gpusvm_notifier_free() - Free GPU SVM notifier 545 * @gpusvm: Pointer to the GPU SVM structure 546 * @notifier: Pointer to the GPU SVM notifier structure 547 * 548 * This function frees the GPU SVM notifier structure. 549 */ 550 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm, 551 struct drm_gpusvm_notifier *notifier) 552 { 553 WARN_ON(!RB_EMPTY_ROOT(¬ifier->root.rb_root)); 554 555 if (gpusvm->ops->notifier_free) 556 gpusvm->ops->notifier_free(notifier); 557 else 558 kfree(notifier); 559 } 560 561 /** 562 * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node 563 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct 564 * 565 * Return: A pointer to the containing drm_gpusvm_range structure. 566 */ 567 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node) 568 { 569 return container_of(node, struct drm_gpusvm_range, itree.rb); 570 } 571 572 /** 573 * drm_gpusvm_range_insert() - Insert GPU SVM range 574 * @notifier: Pointer to the GPU SVM notifier structure 575 * @range: Pointer to the GPU SVM range structure 576 * 577 * This function inserts the GPU SVM range into the notifier RB tree and list. 578 */ 579 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier, 580 struct drm_gpusvm_range *range) 581 { 582 struct rb_node *node; 583 struct list_head *head; 584 585 drm_gpusvm_notifier_lock(notifier->gpusvm); 586 interval_tree_insert(&range->itree, ¬ifier->root); 587 588 node = rb_prev(&range->itree.rb); 589 if (node) 590 head = &(to_drm_gpusvm_range(node))->entry; 591 else 592 head = ¬ifier->range_list; 593 594 list_add(&range->entry, head); 595 drm_gpusvm_notifier_unlock(notifier->gpusvm); 596 } 597 598 /** 599 * __drm_gpusvm_range_remove() - Remove GPU SVM range 600 * @notifier: Pointer to the GPU SVM notifier structure 601 * @range: Pointer to the GPU SVM range structure 602 * 603 * This macro removes the GPU SVM range from the notifier RB tree and list. 604 */ 605 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier, 606 struct drm_gpusvm_range *range) 607 { 608 interval_tree_remove(&range->itree, ¬ifier->root); 609 list_del(&range->entry); 610 } 611 612 /** 613 * drm_gpusvm_range_alloc() - Allocate GPU SVM range 614 * @gpusvm: Pointer to the GPU SVM structure 615 * @notifier: Pointer to the GPU SVM notifier structure 616 * @fault_addr: Fault address 617 * @chunk_size: Chunk size 618 * @migrate_devmem: Flag indicating whether to migrate device memory 619 * 620 * This function allocates and initializes the GPU SVM range structure. 621 * 622 * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure. 623 */ 624 static struct drm_gpusvm_range * 625 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, 626 struct drm_gpusvm_notifier *notifier, 627 unsigned long fault_addr, unsigned long chunk_size, 628 bool migrate_devmem) 629 { 630 struct drm_gpusvm_range *range; 631 632 if (gpusvm->ops->range_alloc) 633 range = gpusvm->ops->range_alloc(gpusvm); 634 else 635 range = kzalloc(sizeof(*range), GFP_KERNEL); 636 637 if (!range) 638 return ERR_PTR(-ENOMEM); 639 640 kref_init(&range->refcount); 641 range->gpusvm = gpusvm; 642 range->notifier = notifier; 643 range->itree.start = ALIGN_DOWN(fault_addr, chunk_size); 644 range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1; 645 INIT_LIST_HEAD(&range->entry); 646 range->pages.notifier_seq = LONG_MAX; 647 range->pages.flags.migrate_devmem = migrate_devmem ? 1 : 0; 648 649 return range; 650 } 651 652 /** 653 * drm_gpusvm_hmm_pfn_to_order() - Get the largest CPU mapping order. 654 * @hmm_pfn: The current hmm_pfn. 655 * @hmm_pfn_index: Index of the @hmm_pfn within the pfn array. 656 * @npages: Number of pages within the pfn array i.e the hmm range size. 657 * 658 * To allow skipping PFNs with the same flags (like when they belong to 659 * the same huge PTE) when looping over the pfn array, take a given a hmm_pfn, 660 * and return the largest order that will fit inside the CPU PTE, but also 661 * crucially accounting for the original hmm range boundaries. 662 * 663 * Return: The largest order that will safely fit within the size of the hmm_pfn 664 * CPU PTE. 665 */ 666 static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn, 667 unsigned long hmm_pfn_index, 668 unsigned long npages) 669 { 670 unsigned long size; 671 672 size = 1UL << hmm_pfn_to_map_order(hmm_pfn); 673 size -= (hmm_pfn & ~HMM_PFN_FLAGS) & (size - 1); 674 hmm_pfn_index += size; 675 if (hmm_pfn_index > npages) 676 size -= (hmm_pfn_index - npages); 677 678 return ilog2(size); 679 } 680 681 /** 682 * drm_gpusvm_check_pages() - Check pages 683 * @gpusvm: Pointer to the GPU SVM structure 684 * @notifier: Pointer to the GPU SVM notifier structure 685 * @start: Start address 686 * @end: End address 687 * 688 * Check if pages between start and end have been faulted in on the CPU. Use to 689 * prevent migration of pages without CPU backing store. 690 * 691 * Return: True if pages have been faulted into CPU, False otherwise 692 */ 693 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, 694 struct drm_gpusvm_notifier *notifier, 695 unsigned long start, unsigned long end) 696 { 697 struct hmm_range hmm_range = { 698 .default_flags = 0, 699 .notifier = ¬ifier->notifier, 700 .start = start, 701 .end = end, 702 .dev_private_owner = gpusvm->device_private_page_owner, 703 }; 704 unsigned long timeout = 705 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 706 unsigned long *pfns; 707 unsigned long npages = npages_in_range(start, end); 708 int err, i; 709 710 mmap_assert_locked(gpusvm->mm); 711 712 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 713 if (!pfns) 714 return false; 715 716 hmm_range.notifier_seq = mmu_interval_read_begin(¬ifier->notifier); 717 hmm_range.hmm_pfns = pfns; 718 719 while (true) { 720 err = hmm_range_fault(&hmm_range); 721 if (err == -EBUSY) { 722 if (time_after(jiffies, timeout)) 723 break; 724 725 hmm_range.notifier_seq = 726 mmu_interval_read_begin(¬ifier->notifier); 727 continue; 728 } 729 break; 730 } 731 if (err) 732 goto err_free; 733 734 for (i = 0; i < npages;) { 735 if (!(pfns[i] & HMM_PFN_VALID)) { 736 err = -EFAULT; 737 goto err_free; 738 } 739 i += 0x1 << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); 740 } 741 742 err_free: 743 kvfree(pfns); 744 return err ? false : true; 745 } 746 747 /** 748 * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range 749 * @gpusvm: Pointer to the GPU SVM structure 750 * @notifier: Pointer to the GPU SVM notifier structure 751 * @vas: Pointer to the virtual memory area structure 752 * @fault_addr: Fault address 753 * @gpuva_start: Start address of GPUVA which mirrors CPU 754 * @gpuva_end: End address of GPUVA which mirrors CPU 755 * @check_pages_threshold: Check CPU pages for present threshold 756 * 757 * This function determines the chunk size for the GPU SVM range based on the 758 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual 759 * memory area boundaries. 760 * 761 * Return: Chunk size on success, LONG_MAX on failure. 762 */ 763 static unsigned long 764 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, 765 struct drm_gpusvm_notifier *notifier, 766 struct vm_area_struct *vas, 767 unsigned long fault_addr, 768 unsigned long gpuva_start, 769 unsigned long gpuva_end, 770 unsigned long check_pages_threshold) 771 { 772 unsigned long start, end; 773 int i = 0; 774 775 retry: 776 for (; i < gpusvm->num_chunks; ++i) { 777 start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]); 778 end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]); 779 780 if (start >= vas->vm_start && end <= vas->vm_end && 781 start >= drm_gpusvm_notifier_start(notifier) && 782 end <= drm_gpusvm_notifier_end(notifier) && 783 start >= gpuva_start && end <= gpuva_end) 784 break; 785 } 786 787 if (i == gpusvm->num_chunks) 788 return LONG_MAX; 789 790 /* 791 * If allocation more than page, ensure not to overlap with existing 792 * ranges. 793 */ 794 if (end - start != SZ_4K) { 795 struct drm_gpusvm_range *range; 796 797 range = drm_gpusvm_range_find(notifier, start, end); 798 if (range) { 799 ++i; 800 goto retry; 801 } 802 803 /* 804 * XXX: Only create range on pages CPU has faulted in. Without 805 * this check, or prefault, on BMG 'xe_exec_system_allocator --r 806 * process-many-malloc' fails. In the failure case, each process 807 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM 808 * ranges. When migrating the SVM ranges, some processes fail in 809 * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages' 810 * and then upon drm_gpusvm_range_get_pages device pages from 811 * other processes are collected + faulted in which creates all 812 * sorts of problems. Unsure exactly how this happening, also 813 * problem goes away if 'xe_exec_system_allocator --r 814 * process-many-malloc' mallocs at least 64k at a time. 815 */ 816 if (end - start <= check_pages_threshold && 817 !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) { 818 ++i; 819 goto retry; 820 } 821 } 822 823 return end - start; 824 } 825 826 #ifdef CONFIG_LOCKDEP 827 /** 828 * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held 829 * @gpusvm: Pointer to the GPU SVM structure. 830 * 831 * Ensure driver lock is held. 832 */ 833 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 834 { 835 if ((gpusvm)->lock_dep_map) 836 lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0)); 837 } 838 #else 839 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 840 { 841 } 842 #endif 843 844 /** 845 * drm_gpusvm_find_vma_start() - Find start address for first VMA in range 846 * @gpusvm: Pointer to the GPU SVM structure 847 * @start: The inclusive start user address. 848 * @end: The exclusive end user address. 849 * 850 * Returns: The start address of first VMA within the provided range, 851 * ULONG_MAX otherwise. Assumes start_addr < end_addr. 852 */ 853 unsigned long 854 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm, 855 unsigned long start, 856 unsigned long end) 857 { 858 struct mm_struct *mm = gpusvm->mm; 859 struct vm_area_struct *vma; 860 unsigned long addr = ULONG_MAX; 861 862 if (!mmget_not_zero(mm)) 863 return addr; 864 865 mmap_read_lock(mm); 866 867 vma = find_vma_intersection(mm, start, end); 868 if (vma) 869 addr = vma->vm_start; 870 871 mmap_read_unlock(mm); 872 mmput(mm); 873 874 return addr; 875 } 876 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start); 877 878 /** 879 * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range 880 * @gpusvm: Pointer to the GPU SVM structure 881 * @fault_addr: Fault address 882 * @gpuva_start: Start address of GPUVA which mirrors CPU 883 * @gpuva_end: End address of GPUVA which mirrors CPU 884 * @ctx: GPU SVM context 885 * 886 * This function finds or inserts a newly allocated a GPU SVM range based on the 887 * fault address. Caller must hold a lock to protect range lookup and insertion. 888 * 889 * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure. 890 */ 891 struct drm_gpusvm_range * 892 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, 893 unsigned long fault_addr, 894 unsigned long gpuva_start, 895 unsigned long gpuva_end, 896 const struct drm_gpusvm_ctx *ctx) 897 { 898 struct drm_gpusvm_notifier *notifier; 899 struct drm_gpusvm_range *range; 900 struct mm_struct *mm = gpusvm->mm; 901 struct vm_area_struct *vas; 902 bool notifier_alloc = false; 903 unsigned long chunk_size; 904 int err; 905 bool migrate_devmem; 906 907 drm_gpusvm_driver_lock_held(gpusvm); 908 909 if (fault_addr < gpusvm->mm_start || 910 fault_addr > gpusvm->mm_start + gpusvm->mm_range) 911 return ERR_PTR(-EINVAL); 912 913 if (!mmget_not_zero(mm)) 914 return ERR_PTR(-EFAULT); 915 916 notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr, fault_addr + 1); 917 if (!notifier) { 918 notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr); 919 if (IS_ERR(notifier)) { 920 err = PTR_ERR(notifier); 921 goto err_mmunlock; 922 } 923 notifier_alloc = true; 924 err = mmu_interval_notifier_insert(¬ifier->notifier, 925 mm, 926 drm_gpusvm_notifier_start(notifier), 927 drm_gpusvm_notifier_size(notifier), 928 &drm_gpusvm_notifier_ops); 929 if (err) 930 goto err_notifier; 931 } 932 933 mmap_read_lock(mm); 934 935 vas = vma_lookup(mm, fault_addr); 936 if (!vas) { 937 err = -ENOENT; 938 goto err_notifier_remove; 939 } 940 941 if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) { 942 err = -EPERM; 943 goto err_notifier_remove; 944 } 945 946 range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1); 947 if (range) 948 goto out_mmunlock; 949 /* 950 * XXX: Short-circuiting migration based on migrate_vma_* current 951 * limitations. If/when migrate_vma_* add more support, this logic will 952 * have to change. 953 */ 954 migrate_devmem = ctx->devmem_possible && 955 vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas); 956 957 chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, 958 fault_addr, gpuva_start, 959 gpuva_end, 960 ctx->check_pages_threshold); 961 if (chunk_size == LONG_MAX) { 962 err = -EINVAL; 963 goto err_notifier_remove; 964 } 965 966 range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size, 967 migrate_devmem); 968 if (IS_ERR(range)) { 969 err = PTR_ERR(range); 970 goto err_notifier_remove; 971 } 972 973 drm_gpusvm_range_insert(notifier, range); 974 if (notifier_alloc) 975 drm_gpusvm_notifier_insert(gpusvm, notifier); 976 977 out_mmunlock: 978 mmap_read_unlock(mm); 979 mmput(mm); 980 981 return range; 982 983 err_notifier_remove: 984 mmap_read_unlock(mm); 985 if (notifier_alloc) 986 mmu_interval_notifier_remove(¬ifier->notifier); 987 err_notifier: 988 if (notifier_alloc) 989 drm_gpusvm_notifier_free(gpusvm, notifier); 990 err_mmunlock: 991 mmput(mm); 992 return ERR_PTR(err); 993 } 994 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert); 995 996 /** 997 * __drm_gpusvm_unmap_pages() - Unmap pages associated with GPU SVM pages (internal) 998 * @gpusvm: Pointer to the GPU SVM structure 999 * @svm_pages: Pointer to the GPU SVM pages structure 1000 * @npages: Number of pages to unmap 1001 * 1002 * This function unmap pages associated with a GPU SVM pages struct. Assumes and 1003 * asserts correct locking is in place when called. 1004 */ 1005 static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, 1006 struct drm_gpusvm_pages *svm_pages, 1007 unsigned long npages) 1008 { 1009 struct drm_pagemap *dpagemap = svm_pages->dpagemap; 1010 struct device *dev = gpusvm->drm->dev; 1011 unsigned long i, j; 1012 1013 lockdep_assert_held(&gpusvm->notifier_lock); 1014 1015 if (svm_pages->flags.has_dma_mapping) { 1016 struct drm_gpusvm_pages_flags flags = { 1017 .__flags = svm_pages->flags.__flags, 1018 }; 1019 1020 for (i = 0, j = 0; i < npages; j++) { 1021 struct drm_pagemap_addr *addr = &svm_pages->dma_addr[j]; 1022 1023 if (addr->proto == DRM_INTERCONNECT_SYSTEM) 1024 dma_unmap_page(dev, 1025 addr->addr, 1026 PAGE_SIZE << addr->order, 1027 addr->dir); 1028 else if (dpagemap && dpagemap->ops->device_unmap) 1029 dpagemap->ops->device_unmap(dpagemap, 1030 dev, *addr); 1031 i += 1 << addr->order; 1032 } 1033 1034 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1035 flags.has_devmem_pages = false; 1036 flags.has_dma_mapping = false; 1037 WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); 1038 1039 svm_pages->dpagemap = NULL; 1040 } 1041 } 1042 1043 /** 1044 * __drm_gpusvm_free_pages() - Free dma array associated with GPU SVM pages 1045 * @gpusvm: Pointer to the GPU SVM structure 1046 * @svm_pages: Pointer to the GPU SVM pages structure 1047 * 1048 * This function frees the dma address array associated with a GPU SVM range. 1049 */ 1050 static void __drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, 1051 struct drm_gpusvm_pages *svm_pages) 1052 { 1053 lockdep_assert_held(&gpusvm->notifier_lock); 1054 1055 if (svm_pages->dma_addr) { 1056 kvfree(svm_pages->dma_addr); 1057 svm_pages->dma_addr = NULL; 1058 } 1059 } 1060 1061 /** 1062 * drm_gpusvm_free_pages() - Free dma-mapping associated with GPU SVM pages 1063 * struct 1064 * @gpusvm: Pointer to the GPU SVM structure 1065 * @svm_pages: Pointer to the GPU SVM pages structure 1066 * @npages: Number of mapped pages 1067 * 1068 * This function unmaps and frees the dma address array associated with a GPU 1069 * SVM pages struct. 1070 */ 1071 void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, 1072 struct drm_gpusvm_pages *svm_pages, 1073 unsigned long npages) 1074 { 1075 drm_gpusvm_notifier_lock(gpusvm); 1076 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); 1077 __drm_gpusvm_free_pages(gpusvm, svm_pages); 1078 drm_gpusvm_notifier_unlock(gpusvm); 1079 } 1080 EXPORT_SYMBOL_GPL(drm_gpusvm_free_pages); 1081 1082 /** 1083 * drm_gpusvm_range_remove() - Remove GPU SVM range 1084 * @gpusvm: Pointer to the GPU SVM structure 1085 * @range: Pointer to the GPU SVM range to be removed 1086 * 1087 * This function removes the specified GPU SVM range and also removes the parent 1088 * GPU SVM notifier if no more ranges remain in the notifier. The caller must 1089 * hold a lock to protect range and notifier removal. 1090 */ 1091 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm, 1092 struct drm_gpusvm_range *range) 1093 { 1094 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1095 drm_gpusvm_range_end(range)); 1096 struct drm_gpusvm_notifier *notifier; 1097 1098 drm_gpusvm_driver_lock_held(gpusvm); 1099 1100 notifier = drm_gpusvm_notifier_find(gpusvm, 1101 drm_gpusvm_range_start(range), 1102 drm_gpusvm_range_start(range) + 1); 1103 if (WARN_ON_ONCE(!notifier)) 1104 return; 1105 1106 drm_gpusvm_notifier_lock(gpusvm); 1107 __drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages); 1108 __drm_gpusvm_free_pages(gpusvm, &range->pages); 1109 __drm_gpusvm_range_remove(notifier, range); 1110 drm_gpusvm_notifier_unlock(gpusvm); 1111 1112 drm_gpusvm_range_put(range); 1113 1114 if (RB_EMPTY_ROOT(¬ifier->root.rb_root)) { 1115 if (!notifier->flags.removed) 1116 mmu_interval_notifier_remove(¬ifier->notifier); 1117 drm_gpusvm_notifier_remove(gpusvm, notifier); 1118 drm_gpusvm_notifier_free(gpusvm, notifier); 1119 } 1120 } 1121 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove); 1122 1123 /** 1124 * drm_gpusvm_range_get() - Get a reference to GPU SVM range 1125 * @range: Pointer to the GPU SVM range 1126 * 1127 * This function increments the reference count of the specified GPU SVM range. 1128 * 1129 * Return: Pointer to the GPU SVM range. 1130 */ 1131 struct drm_gpusvm_range * 1132 drm_gpusvm_range_get(struct drm_gpusvm_range *range) 1133 { 1134 kref_get(&range->refcount); 1135 1136 return range; 1137 } 1138 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get); 1139 1140 /** 1141 * drm_gpusvm_range_destroy() - Destroy GPU SVM range 1142 * @refcount: Pointer to the reference counter embedded in the GPU SVM range 1143 * 1144 * This function destroys the specified GPU SVM range when its reference count 1145 * reaches zero. If a custom range-free function is provided, it is invoked to 1146 * free the range; otherwise, the range is deallocated using kfree(). 1147 */ 1148 static void drm_gpusvm_range_destroy(struct kref *refcount) 1149 { 1150 struct drm_gpusvm_range *range = 1151 container_of(refcount, struct drm_gpusvm_range, refcount); 1152 struct drm_gpusvm *gpusvm = range->gpusvm; 1153 1154 if (gpusvm->ops->range_free) 1155 gpusvm->ops->range_free(range); 1156 else 1157 kfree(range); 1158 } 1159 1160 /** 1161 * drm_gpusvm_range_put() - Put a reference to GPU SVM range 1162 * @range: Pointer to the GPU SVM range 1163 * 1164 * This function decrements the reference count of the specified GPU SVM range 1165 * and frees it when the count reaches zero. 1166 */ 1167 void drm_gpusvm_range_put(struct drm_gpusvm_range *range) 1168 { 1169 kref_put(&range->refcount, drm_gpusvm_range_destroy); 1170 } 1171 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); 1172 1173 /** 1174 * drm_gpusvm_pages_valid() - GPU SVM range pages valid 1175 * @gpusvm: Pointer to the GPU SVM structure 1176 * @svm_pages: Pointer to the GPU SVM pages structure 1177 * 1178 * This function determines if a GPU SVM range pages are valid. Expected be 1179 * called holding gpusvm->notifier_lock and as the last step before committing a 1180 * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1181 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1182 * function is required for finer grained checking (i.e., per range) if pages 1183 * are valid. 1184 * 1185 * Return: True if GPU SVM range has valid pages, False otherwise 1186 */ 1187 static bool drm_gpusvm_pages_valid(struct drm_gpusvm *gpusvm, 1188 struct drm_gpusvm_pages *svm_pages) 1189 { 1190 lockdep_assert_held(&gpusvm->notifier_lock); 1191 1192 return svm_pages->flags.has_devmem_pages || svm_pages->flags.has_dma_mapping; 1193 } 1194 1195 /** 1196 * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid 1197 * @gpusvm: Pointer to the GPU SVM structure 1198 * @range: Pointer to the GPU SVM range structure 1199 * 1200 * This function determines if a GPU SVM range pages are valid. Expected be 1201 * called holding gpusvm->notifier_lock and as the last step before committing a 1202 * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1203 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1204 * function is required for finer grained checking (i.e., per range) if pages 1205 * are valid. 1206 * 1207 * Return: True if GPU SVM range has valid pages, False otherwise 1208 */ 1209 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, 1210 struct drm_gpusvm_range *range) 1211 { 1212 return drm_gpusvm_pages_valid(gpusvm, &range->pages); 1213 } 1214 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); 1215 1216 /** 1217 * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked 1218 * @gpusvm: Pointer to the GPU SVM structure 1219 * @range: Pointer to the GPU SVM range structure 1220 * 1221 * This function determines if a GPU SVM range pages are valid. Expected be 1222 * called without holding gpusvm->notifier_lock. 1223 * 1224 * Return: True if GPU SVM range has valid pages, False otherwise 1225 */ 1226 static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm, 1227 struct drm_gpusvm_pages *svm_pages) 1228 { 1229 bool pages_valid; 1230 1231 if (!svm_pages->dma_addr) 1232 return false; 1233 1234 drm_gpusvm_notifier_lock(gpusvm); 1235 pages_valid = drm_gpusvm_pages_valid(gpusvm, svm_pages); 1236 if (!pages_valid) 1237 __drm_gpusvm_free_pages(gpusvm, svm_pages); 1238 drm_gpusvm_notifier_unlock(gpusvm); 1239 1240 return pages_valid; 1241 } 1242 1243 /** 1244 * drm_gpusvm_get_pages() - Get pages and populate GPU SVM pages struct 1245 * @gpusvm: Pointer to the GPU SVM structure 1246 * @svm_pages: The SVM pages to populate. This will contain the dma-addresses 1247 * @mm: The mm corresponding to the CPU range 1248 * @notifier: The corresponding notifier for the given CPU range 1249 * @pages_start: Start CPU address for the pages 1250 * @pages_end: End CPU address for the pages (exclusive) 1251 * @ctx: GPU SVM context 1252 * 1253 * This function gets and maps pages for CPU range and ensures they are 1254 * mapped for DMA access. 1255 * 1256 * Return: 0 on success, negative error code on failure. 1257 */ 1258 int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, 1259 struct drm_gpusvm_pages *svm_pages, 1260 struct mm_struct *mm, 1261 struct mmu_interval_notifier *notifier, 1262 unsigned long pages_start, unsigned long pages_end, 1263 const struct drm_gpusvm_ctx *ctx) 1264 { 1265 struct hmm_range hmm_range = { 1266 .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 : 1267 HMM_PFN_REQ_WRITE), 1268 .notifier = notifier, 1269 .start = pages_start, 1270 .end = pages_end, 1271 .dev_private_owner = gpusvm->device_private_page_owner, 1272 }; 1273 void *zdd; 1274 unsigned long timeout = 1275 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1276 unsigned long i, j; 1277 unsigned long npages = npages_in_range(pages_start, pages_end); 1278 unsigned long num_dma_mapped; 1279 unsigned int order = 0; 1280 unsigned long *pfns; 1281 int err = 0; 1282 struct dev_pagemap *pagemap; 1283 struct drm_pagemap *dpagemap; 1284 struct drm_gpusvm_pages_flags flags; 1285 enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE : 1286 DMA_BIDIRECTIONAL; 1287 1288 retry: 1289 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1290 if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages)) 1291 goto set_seqno; 1292 1293 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1294 if (!pfns) 1295 return -ENOMEM; 1296 1297 if (!mmget_not_zero(mm)) { 1298 err = -EFAULT; 1299 goto err_free; 1300 } 1301 1302 hmm_range.hmm_pfns = pfns; 1303 while (true) { 1304 mmap_read_lock(mm); 1305 err = hmm_range_fault(&hmm_range); 1306 mmap_read_unlock(mm); 1307 1308 if (err == -EBUSY) { 1309 if (time_after(jiffies, timeout)) 1310 break; 1311 1312 hmm_range.notifier_seq = 1313 mmu_interval_read_begin(notifier); 1314 continue; 1315 } 1316 break; 1317 } 1318 mmput(mm); 1319 if (err) 1320 goto err_free; 1321 1322 map_pages: 1323 /* 1324 * Perform all dma mappings under the notifier lock to not 1325 * access freed pages. A notifier will either block on 1326 * the notifier lock or unmap dma. 1327 */ 1328 drm_gpusvm_notifier_lock(gpusvm); 1329 1330 flags.__flags = svm_pages->flags.__flags; 1331 if (flags.unmapped) { 1332 drm_gpusvm_notifier_unlock(gpusvm); 1333 err = -EFAULT; 1334 goto err_free; 1335 } 1336 1337 if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) { 1338 drm_gpusvm_notifier_unlock(gpusvm); 1339 kvfree(pfns); 1340 goto retry; 1341 } 1342 1343 if (!svm_pages->dma_addr) { 1344 /* Unlock and restart mapping to allocate memory. */ 1345 drm_gpusvm_notifier_unlock(gpusvm); 1346 svm_pages->dma_addr = 1347 kvmalloc_array(npages, sizeof(*svm_pages->dma_addr), GFP_KERNEL); 1348 if (!svm_pages->dma_addr) { 1349 err = -ENOMEM; 1350 goto err_free; 1351 } 1352 goto map_pages; 1353 } 1354 1355 zdd = NULL; 1356 pagemap = NULL; 1357 num_dma_mapped = 0; 1358 for (i = 0, j = 0; i < npages; ++j) { 1359 struct page *page = hmm_pfn_to_page(pfns[i]); 1360 1361 order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); 1362 if (is_device_private_page(page) || 1363 is_device_coherent_page(page)) { 1364 if (zdd != page->zone_device_data && i > 0) { 1365 err = -EOPNOTSUPP; 1366 goto err_unmap; 1367 } 1368 zdd = page->zone_device_data; 1369 if (pagemap != page_pgmap(page)) { 1370 if (i > 0) { 1371 err = -EOPNOTSUPP; 1372 goto err_unmap; 1373 } 1374 1375 pagemap = page_pgmap(page); 1376 dpagemap = drm_pagemap_page_to_dpagemap(page); 1377 if (drm_WARN_ON(gpusvm->drm, !dpagemap)) { 1378 /* 1379 * Raced. This is not supposed to happen 1380 * since hmm_range_fault() should've migrated 1381 * this page to system. 1382 */ 1383 err = -EAGAIN; 1384 goto err_unmap; 1385 } 1386 } 1387 svm_pages->dma_addr[j] = 1388 dpagemap->ops->device_map(dpagemap, 1389 gpusvm->drm->dev, 1390 page, order, 1391 dma_dir); 1392 if (dma_mapping_error(gpusvm->drm->dev, 1393 svm_pages->dma_addr[j].addr)) { 1394 err = -EFAULT; 1395 goto err_unmap; 1396 } 1397 } else { 1398 dma_addr_t addr; 1399 1400 if (is_zone_device_page(page) || pagemap) { 1401 err = -EOPNOTSUPP; 1402 goto err_unmap; 1403 } 1404 1405 if (ctx->devmem_only) { 1406 err = -EFAULT; 1407 goto err_unmap; 1408 } 1409 1410 addr = dma_map_page(gpusvm->drm->dev, 1411 page, 0, 1412 PAGE_SIZE << order, 1413 dma_dir); 1414 if (dma_mapping_error(gpusvm->drm->dev, addr)) { 1415 err = -EFAULT; 1416 goto err_unmap; 1417 } 1418 1419 svm_pages->dma_addr[j] = drm_pagemap_addr_encode 1420 (addr, DRM_INTERCONNECT_SYSTEM, order, 1421 dma_dir); 1422 } 1423 i += 1 << order; 1424 num_dma_mapped = i; 1425 flags.has_dma_mapping = true; 1426 } 1427 1428 if (pagemap) { 1429 flags.has_devmem_pages = true; 1430 svm_pages->dpagemap = dpagemap; 1431 } 1432 1433 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1434 WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); 1435 1436 drm_gpusvm_notifier_unlock(gpusvm); 1437 kvfree(pfns); 1438 set_seqno: 1439 svm_pages->notifier_seq = hmm_range.notifier_seq; 1440 1441 return 0; 1442 1443 err_unmap: 1444 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped); 1445 drm_gpusvm_notifier_unlock(gpusvm); 1446 err_free: 1447 kvfree(pfns); 1448 if (err == -EAGAIN) 1449 goto retry; 1450 return err; 1451 } 1452 EXPORT_SYMBOL_GPL(drm_gpusvm_get_pages); 1453 1454 /** 1455 * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range 1456 * @gpusvm: Pointer to the GPU SVM structure 1457 * @range: Pointer to the GPU SVM range structure 1458 * @ctx: GPU SVM context 1459 * 1460 * This function gets pages for a GPU SVM range and ensures they are mapped for 1461 * DMA access. 1462 * 1463 * Return: 0 on success, negative error code on failure. 1464 */ 1465 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, 1466 struct drm_gpusvm_range *range, 1467 const struct drm_gpusvm_ctx *ctx) 1468 { 1469 return drm_gpusvm_get_pages(gpusvm, &range->pages, gpusvm->mm, 1470 &range->notifier->notifier, 1471 drm_gpusvm_range_start(range), 1472 drm_gpusvm_range_end(range), ctx); 1473 } 1474 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); 1475 1476 /** 1477 * drm_gpusvm_unmap_pages() - Unmap GPU svm pages 1478 * @gpusvm: Pointer to the GPU SVM structure 1479 * @svm_pages: Pointer to the GPU SVM pages structure 1480 * @npages: Number of pages in @svm_pages. 1481 * @ctx: GPU SVM context 1482 * 1483 * This function unmaps pages associated with a GPU SVM pages struct. If 1484 * @in_notifier is set, it is assumed that gpusvm->notifier_lock is held in 1485 * write mode; if it is clear, it acquires gpusvm->notifier_lock in read mode. 1486 * Must be called in the invalidate() callback of the corresponding notifier for 1487 * IOMMU security model. 1488 */ 1489 void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, 1490 struct drm_gpusvm_pages *svm_pages, 1491 unsigned long npages, 1492 const struct drm_gpusvm_ctx *ctx) 1493 { 1494 if (ctx->in_notifier) 1495 lockdep_assert_held_write(&gpusvm->notifier_lock); 1496 else 1497 drm_gpusvm_notifier_lock(gpusvm); 1498 1499 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); 1500 1501 if (!ctx->in_notifier) 1502 drm_gpusvm_notifier_unlock(gpusvm); 1503 } 1504 EXPORT_SYMBOL_GPL(drm_gpusvm_unmap_pages); 1505 1506 /** 1507 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range 1508 * @gpusvm: Pointer to the GPU SVM structure 1509 * @range: Pointer to the GPU SVM range structure 1510 * @ctx: GPU SVM context 1511 * 1512 * This function unmaps pages associated with a GPU SVM range. If @in_notifier 1513 * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it 1514 * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on 1515 * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU 1516 * security model. 1517 */ 1518 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1519 struct drm_gpusvm_range *range, 1520 const struct drm_gpusvm_ctx *ctx) 1521 { 1522 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1523 drm_gpusvm_range_end(range)); 1524 1525 return drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages, ctx); 1526 } 1527 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); 1528 1529 /** 1530 * drm_gpusvm_range_evict() - Evict GPU SVM range 1531 * @gpusvm: Pointer to the GPU SVM structure 1532 * @range: Pointer to the GPU SVM range to be removed 1533 * 1534 * This function evicts the specified GPU SVM range. 1535 * 1536 * Return: 0 on success, a negative error code on failure. 1537 */ 1538 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm, 1539 struct drm_gpusvm_range *range) 1540 { 1541 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1542 struct hmm_range hmm_range = { 1543 .default_flags = HMM_PFN_REQ_FAULT, 1544 .notifier = notifier, 1545 .start = drm_gpusvm_range_start(range), 1546 .end = drm_gpusvm_range_end(range), 1547 .dev_private_owner = NULL, 1548 }; 1549 unsigned long timeout = 1550 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1551 unsigned long *pfns; 1552 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1553 drm_gpusvm_range_end(range)); 1554 int err = 0; 1555 struct mm_struct *mm = gpusvm->mm; 1556 1557 if (!mmget_not_zero(mm)) 1558 return -EFAULT; 1559 1560 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1561 if (!pfns) 1562 return -ENOMEM; 1563 1564 hmm_range.hmm_pfns = pfns; 1565 while (!time_after(jiffies, timeout)) { 1566 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1567 if (time_after(jiffies, timeout)) { 1568 err = -ETIME; 1569 break; 1570 } 1571 1572 mmap_read_lock(mm); 1573 err = hmm_range_fault(&hmm_range); 1574 mmap_read_unlock(mm); 1575 if (err != -EBUSY) 1576 break; 1577 } 1578 1579 kvfree(pfns); 1580 mmput(mm); 1581 1582 return err; 1583 } 1584 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict); 1585 1586 /** 1587 * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range 1588 * @gpusvm: Pointer to the GPU SVM structure. 1589 * @start: Start address 1590 * @end: End address 1591 * 1592 * Return: True if GPU SVM has mapping, False otherwise 1593 */ 1594 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start, 1595 unsigned long end) 1596 { 1597 struct drm_gpusvm_notifier *notifier; 1598 1599 drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) { 1600 struct drm_gpusvm_range *range = NULL; 1601 1602 drm_gpusvm_for_each_range(range, notifier, start, end) 1603 return true; 1604 } 1605 1606 return false; 1607 } 1608 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping); 1609 1610 /** 1611 * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped 1612 * @range: Pointer to the GPU SVM range structure. 1613 * @mmu_range: Pointer to the MMU notifier range structure. 1614 * 1615 * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag 1616 * if the range partially falls within the provided MMU notifier range. 1617 */ 1618 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, 1619 const struct mmu_notifier_range *mmu_range) 1620 { 1621 lockdep_assert_held_write(&range->gpusvm->notifier_lock); 1622 1623 range->pages.flags.unmapped = true; 1624 if (drm_gpusvm_range_start(range) < mmu_range->start || 1625 drm_gpusvm_range_end(range) > mmu_range->end) 1626 range->pages.flags.partial_unmap = true; 1627 } 1628 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); 1629 1630 MODULE_DESCRIPTION("DRM GPUSVM"); 1631 MODULE_LICENSE("GPL"); 1632