1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 * 5 * Authors: 6 * Matthew Brost <matthew.brost@intel.com> 7 */ 8 9 #include <linux/dma-mapping.h> 10 #include <linux/export.h> 11 #include <linux/hmm.h> 12 #include <linux/hugetlb_inline.h> 13 #include <linux/memremap.h> 14 #include <linux/mm_types.h> 15 #include <linux/slab.h> 16 17 #include <drm/drm_device.h> 18 #include <drm/drm_gpusvm.h> 19 #include <drm/drm_pagemap.h> 20 #include <drm/drm_print.h> 21 22 /** 23 * DOC: Overview 24 * 25 * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM) 26 * is a component of the DRM framework designed to manage shared virtual memory 27 * between the CPU and GPU. It enables efficient data exchange and processing 28 * for GPU-accelerated applications by allowing memory sharing and 29 * synchronization between the CPU's and GPU's virtual address spaces. 30 * 31 * Key GPU SVM Components: 32 * 33 * - Notifiers: 34 * Used for tracking memory intervals and notifying the GPU of changes, 35 * notifiers are sized based on a GPU SVM initialization parameter, with a 36 * recommendation of 512M or larger. They maintain a Red-BlacK tree and a 37 * list of ranges that fall within the notifier interval. Notifiers are 38 * tracked within a GPU SVM Red-BlacK tree and list and are dynamically 39 * inserted or removed as ranges within the interval are created or 40 * destroyed. 41 * - Ranges: 42 * Represent memory ranges mapped in a DRM device and managed by GPU SVM. 43 * They are sized based on an array of chunk sizes, which is a GPU SVM 44 * initialization parameter, and the CPU address space. Upon GPU fault, 45 * the largest aligned chunk that fits within the faulting CPU address 46 * space is chosen for the range size. Ranges are expected to be 47 * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP 48 * event. As mentioned above, ranges are tracked in a notifier's Red-Black 49 * tree. 50 * 51 * - Operations: 52 * Define the interface for driver-specific GPU SVM operations such as 53 * range allocation, notifier allocation, and invalidations. 54 * 55 * - Device Memory Allocations: 56 * Embedded structure containing enough information for GPU SVM to migrate 57 * to / from device memory. 58 * 59 * - Device Memory Operations: 60 * Define the interface for driver-specific device memory operations 61 * release memory, populate pfns, and copy to / from device memory. 62 * 63 * This layer provides interfaces for allocating, mapping, migrating, and 64 * releasing memory ranges between the CPU and GPU. It handles all core memory 65 * management interactions (DMA mapping, HMM, and migration) and provides 66 * driver-specific virtual functions (vfuncs). This infrastructure is sufficient 67 * to build the expected driver components for an SVM implementation as detailed 68 * below. 69 * 70 * Expected Driver Components: 71 * 72 * - GPU page fault handler: 73 * Used to create ranges and notifiers based on the fault address, 74 * optionally migrate the range to device memory, and create GPU bindings. 75 * 76 * - Garbage collector: 77 * Used to unmap and destroy GPU bindings for ranges. Ranges are expected 78 * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in 79 * notifier callback. 80 * 81 * - Notifier callback: 82 * Used to invalidate and DMA unmap GPU bindings for ranges. 83 */ 84 85 /** 86 * DOC: Locking 87 * 88 * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the 89 * mmap lock as needed. 90 * 91 * GPU SVM introduces a global notifier lock, which safeguards the notifier's 92 * range RB tree and list, as well as the range's DMA mappings and sequence 93 * number. GPU SVM manages all necessary locking and unlocking operations, 94 * except for the recheck range's pages being valid 95 * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. 96 * This lock corresponds to the ``driver->update`` lock mentioned in 97 * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM 98 * global lock to a per-notifier lock if finer-grained locking is deemed 99 * necessary. 100 * 101 * In addition to the locking mentioned above, the driver should implement a 102 * lock to safeguard core GPU SVM function calls that modify state, such as 103 * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is 104 * denoted as 'driver_svm_lock' in code examples. Finer grained driver side 105 * locking should also be possible for concurrent GPU fault processing within a 106 * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock 107 * to add annotations to GPU SVM. 108 */ 109 110 /** 111 * DOC: Partial Unmapping of Ranges 112 * 113 * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting 114 * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one 115 * being that a subset of the range still has CPU and GPU mappings. If the 116 * backing store for the range is in device memory, a subset of the backing 117 * store has references. One option would be to split the range and device 118 * memory backing store, but the implementation for this would be quite 119 * complicated. Given that partial unmappings are rare and driver-defined range 120 * sizes are relatively small, GPU SVM does not support splitting of ranges. 121 * 122 * With no support for range splitting, upon partial unmapping of a range, the 123 * driver is expected to invalidate and destroy the entire range. If the range 124 * has device memory as its backing, the driver is also expected to migrate any 125 * remaining pages back to RAM. 126 */ 127 128 /** 129 * DOC: Examples 130 * 131 * This section provides three examples of how to build the expected driver 132 * components: the GPU page fault handler, the garbage collector, and the 133 * notifier callback. 134 * 135 * The generic code provided does not include logic for complex migration 136 * policies, optimized invalidations, fined grained driver locking, or other 137 * potentially required driver locking (e.g., DMA-resv locks). 138 * 139 * 1) GPU page fault handler 140 * 141 * .. code-block:: c 142 * 143 * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) 144 * { 145 * int err = 0; 146 * 147 * driver_alloc_and_setup_memory_for_bind(gpusvm, range); 148 * 149 * drm_gpusvm_notifier_lock(gpusvm); 150 * if (drm_gpusvm_range_pages_valid(range)) 151 * driver_commit_bind(gpusvm, range); 152 * else 153 * err = -EAGAIN; 154 * drm_gpusvm_notifier_unlock(gpusvm); 155 * 156 * return err; 157 * } 158 * 159 * int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr, 160 * unsigned long gpuva_start, unsigned long gpuva_end) 161 * { 162 * struct drm_gpusvm_ctx ctx = {}; 163 * int err; 164 * 165 * driver_svm_lock(); 166 * retry: 167 * // Always process UNMAPs first so view of GPU SVM ranges is current 168 * driver_garbage_collector(gpusvm); 169 * 170 * range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr, 171 * gpuva_start, gpuva_end, 172 * &ctx); 173 * if (IS_ERR(range)) { 174 * err = PTR_ERR(range); 175 * goto unlock; 176 * } 177 * 178 * if (driver_migration_policy(range)) { 179 * err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(), 180 * gpuva_start, gpuva_end, gpusvm->mm, 181 * ctx->timeslice_ms); 182 * if (err) // CPU mappings may have changed 183 * goto retry; 184 * } 185 * 186 * err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx); 187 * if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { // CPU mappings changed 188 * if (err == -EOPNOTSUPP) 189 * drm_gpusvm_range_evict(gpusvm, range); 190 * goto retry; 191 * } else if (err) { 192 * goto unlock; 193 * } 194 * 195 * err = driver_bind_range(gpusvm, range); 196 * if (err == -EAGAIN) // CPU mappings changed 197 * goto retry 198 * 199 * unlock: 200 * driver_svm_unlock(); 201 * return err; 202 * } 203 * 204 * 2) Garbage Collector 205 * 206 * .. code-block:: c 207 * 208 * void __driver_garbage_collector(struct drm_gpusvm *gpusvm, 209 * struct drm_gpusvm_range *range) 210 * { 211 * assert_driver_svm_locked(gpusvm); 212 * 213 * // Partial unmap, migrate any remaining device memory pages back to RAM 214 * if (range->flags.partial_unmap) 215 * drm_gpusvm_range_evict(gpusvm, range); 216 * 217 * driver_unbind_range(range); 218 * drm_gpusvm_range_remove(gpusvm, range); 219 * } 220 * 221 * void driver_garbage_collector(struct drm_gpusvm *gpusvm) 222 * { 223 * assert_driver_svm_locked(gpusvm); 224 * 225 * for_each_range_in_garbage_collector(gpusvm, range) 226 * __driver_garbage_collector(gpusvm, range); 227 * } 228 * 229 * 3) Notifier callback 230 * 231 * .. code-block:: c 232 * 233 * void driver_invalidation(struct drm_gpusvm *gpusvm, 234 * struct drm_gpusvm_notifier *notifier, 235 * const struct mmu_notifier_range *mmu_range) 236 * { 237 * struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 238 * struct drm_gpusvm_range *range = NULL; 239 * 240 * driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end); 241 * 242 * drm_gpusvm_for_each_range(range, notifier, mmu_range->start, 243 * mmu_range->end) { 244 * drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx); 245 * 246 * if (mmu_range->event != MMU_NOTIFY_UNMAP) 247 * continue; 248 * 249 * drm_gpusvm_range_set_unmapped(range, mmu_range); 250 * driver_garbage_collector_add(gpusvm, range); 251 * } 252 * } 253 */ 254 255 /** 256 * npages_in_range() - Calculate the number of pages in a given range 257 * @start: The start address of the range 258 * @end: The end address of the range 259 * 260 * This macro calculates the number of pages in a given memory range, 261 * specified by the start and end addresses. It divides the difference 262 * between the end and start addresses by the page size (PAGE_SIZE) to 263 * determine the number of pages in the range. 264 * 265 * Return: The number of pages in the specified range. 266 */ 267 static unsigned long 268 npages_in_range(unsigned long start, unsigned long end) 269 { 270 return (end - start) >> PAGE_SHIFT; 271 } 272 273 /** 274 * drm_gpusvm_notifier_find() - Find GPU SVM notifier from GPU SVM 275 * @gpusvm: Pointer to the GPU SVM structure. 276 * @start: Start address of the notifier 277 * @end: End address of the notifier 278 * 279 * Return: A pointer to the drm_gpusvm_notifier if found or NULL 280 */ 281 struct drm_gpusvm_notifier * 282 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, unsigned long start, 283 unsigned long end) 284 { 285 struct interval_tree_node *itree; 286 287 itree = interval_tree_iter_first(&gpusvm->root, start, end - 1); 288 289 if (itree) 290 return container_of(itree, struct drm_gpusvm_notifier, itree); 291 else 292 return NULL; 293 } 294 EXPORT_SYMBOL_GPL(drm_gpusvm_notifier_find); 295 296 /** 297 * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier 298 * @notifier: Pointer to the GPU SVM notifier structure. 299 * @start: Start address of the range 300 * @end: End address of the range 301 * 302 * Return: A pointer to the drm_gpusvm_range if found or NULL 303 */ 304 struct drm_gpusvm_range * 305 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, 306 unsigned long end) 307 { 308 struct interval_tree_node *itree; 309 310 itree = interval_tree_iter_first(¬ifier->root, start, end - 1); 311 312 if (itree) 313 return container_of(itree, struct drm_gpusvm_range, itree); 314 else 315 return NULL; 316 } 317 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find); 318 319 /** 320 * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier. 321 * @mni: Pointer to the mmu_interval_notifier structure. 322 * @mmu_range: Pointer to the mmu_notifier_range structure. 323 * @cur_seq: Current sequence number. 324 * 325 * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU 326 * notifier sequence number and calls the driver invalidate vfunc under 327 * gpusvm->notifier_lock. 328 * 329 * Return: true if the operation succeeds, false otherwise. 330 */ 331 static bool 332 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni, 333 const struct mmu_notifier_range *mmu_range, 334 unsigned long cur_seq) 335 { 336 struct drm_gpusvm_notifier *notifier = 337 container_of(mni, typeof(*notifier), notifier); 338 struct drm_gpusvm *gpusvm = notifier->gpusvm; 339 340 if (!mmu_notifier_range_blockable(mmu_range)) 341 return false; 342 343 down_write(&gpusvm->notifier_lock); 344 mmu_interval_set_seq(mni, cur_seq); 345 gpusvm->ops->invalidate(gpusvm, notifier, mmu_range); 346 up_write(&gpusvm->notifier_lock); 347 348 return true; 349 } 350 351 /* 352 * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM 353 */ 354 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { 355 .invalidate = drm_gpusvm_notifier_invalidate, 356 }; 357 358 /** 359 * drm_gpusvm_init() - Initialize the GPU SVM. 360 * @gpusvm: Pointer to the GPU SVM structure. 361 * @name: Name of the GPU SVM. 362 * @drm: Pointer to the DRM device structure. 363 * @mm: Pointer to the mm_struct for the address space. 364 * @mm_start: Start address of GPU SVM. 365 * @mm_range: Range of the GPU SVM. 366 * @notifier_size: Size of individual notifiers. 367 * @ops: Pointer to the operations structure for GPU SVM. 368 * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation. 369 * Entries should be powers of 2 in descending order with last 370 * entry being SZ_4K. 371 * @num_chunks: Number of chunks. 372 * 373 * This function initializes the GPU SVM. 374 * 375 * Note: If only using the simple drm_gpusvm_pages API (get/unmap/free), 376 * then only @gpusvm, @name, and @drm are expected. However, the same base 377 * @gpusvm can also be used with both modes together in which case the full 378 * setup is needed, where the core drm_gpusvm_pages API will simply never use 379 * the other fields. 380 * 381 * Return: 0 on success, a negative error code on failure. 382 */ 383 int drm_gpusvm_init(struct drm_gpusvm *gpusvm, 384 const char *name, struct drm_device *drm, 385 struct mm_struct *mm, 386 unsigned long mm_start, unsigned long mm_range, 387 unsigned long notifier_size, 388 const struct drm_gpusvm_ops *ops, 389 const unsigned long *chunk_sizes, int num_chunks) 390 { 391 if (mm) { 392 if (!ops->invalidate || !num_chunks) 393 return -EINVAL; 394 mmgrab(mm); 395 } else { 396 /* No full SVM mode, only core drm_gpusvm_pages API. */ 397 if (ops || num_chunks || mm_range || notifier_size) 398 return -EINVAL; 399 } 400 401 gpusvm->name = name; 402 gpusvm->drm = drm; 403 gpusvm->mm = mm; 404 gpusvm->mm_start = mm_start; 405 gpusvm->mm_range = mm_range; 406 gpusvm->notifier_size = notifier_size; 407 gpusvm->ops = ops; 408 gpusvm->chunk_sizes = chunk_sizes; 409 gpusvm->num_chunks = num_chunks; 410 411 gpusvm->root = RB_ROOT_CACHED; 412 INIT_LIST_HEAD(&gpusvm->notifier_list); 413 414 init_rwsem(&gpusvm->notifier_lock); 415 416 fs_reclaim_acquire(GFP_KERNEL); 417 might_lock(&gpusvm->notifier_lock); 418 fs_reclaim_release(GFP_KERNEL); 419 420 #ifdef CONFIG_LOCKDEP 421 gpusvm->lock_dep_map = NULL; 422 #endif 423 424 return 0; 425 } 426 EXPORT_SYMBOL_GPL(drm_gpusvm_init); 427 428 /** 429 * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node 430 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct 431 * 432 * Return: A pointer to the containing drm_gpusvm_notifier structure. 433 */ 434 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node) 435 { 436 return container_of(node, struct drm_gpusvm_notifier, itree.rb); 437 } 438 439 /** 440 * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier 441 * @gpusvm: Pointer to the GPU SVM structure 442 * @notifier: Pointer to the GPU SVM notifier structure 443 * 444 * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list. 445 */ 446 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm, 447 struct drm_gpusvm_notifier *notifier) 448 { 449 struct rb_node *node; 450 struct list_head *head; 451 452 interval_tree_insert(¬ifier->itree, &gpusvm->root); 453 454 node = rb_prev(¬ifier->itree.rb); 455 if (node) 456 head = &(to_drm_gpusvm_notifier(node))->entry; 457 else 458 head = &gpusvm->notifier_list; 459 460 list_add(¬ifier->entry, head); 461 } 462 463 /** 464 * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier 465 * @gpusvm: Pointer to the GPU SVM tructure 466 * @notifier: Pointer to the GPU SVM notifier structure 467 * 468 * This function removes the GPU SVM notifier from the GPU SVM RB tree and list. 469 */ 470 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm, 471 struct drm_gpusvm_notifier *notifier) 472 { 473 interval_tree_remove(¬ifier->itree, &gpusvm->root); 474 list_del(¬ifier->entry); 475 } 476 477 /** 478 * drm_gpusvm_fini() - Finalize the GPU SVM. 479 * @gpusvm: Pointer to the GPU SVM structure. 480 * 481 * This function finalizes the GPU SVM by cleaning up any remaining ranges and 482 * notifiers, and dropping a reference to struct MM. 483 */ 484 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm) 485 { 486 struct drm_gpusvm_notifier *notifier, *next; 487 488 drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) { 489 struct drm_gpusvm_range *range, *__next; 490 491 /* 492 * Remove notifier first to avoid racing with any invalidation 493 */ 494 mmu_interval_notifier_remove(¬ifier->notifier); 495 notifier->flags.removed = true; 496 497 drm_gpusvm_for_each_range_safe(range, __next, notifier, 0, 498 LONG_MAX) 499 drm_gpusvm_range_remove(gpusvm, range); 500 } 501 502 if (gpusvm->mm) 503 mmdrop(gpusvm->mm); 504 WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root)); 505 } 506 EXPORT_SYMBOL_GPL(drm_gpusvm_fini); 507 508 /** 509 * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier 510 * @gpusvm: Pointer to the GPU SVM structure 511 * @fault_addr: Fault address 512 * 513 * This function allocates and initializes the GPU SVM notifier structure. 514 * 515 * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure. 516 */ 517 static struct drm_gpusvm_notifier * 518 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr) 519 { 520 struct drm_gpusvm_notifier *notifier; 521 522 if (gpusvm->ops->notifier_alloc) 523 notifier = gpusvm->ops->notifier_alloc(); 524 else 525 notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); 526 527 if (!notifier) 528 return ERR_PTR(-ENOMEM); 529 530 notifier->gpusvm = gpusvm; 531 notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size); 532 notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1; 533 INIT_LIST_HEAD(¬ifier->entry); 534 notifier->root = RB_ROOT_CACHED; 535 INIT_LIST_HEAD(¬ifier->range_list); 536 537 return notifier; 538 } 539 540 /** 541 * drm_gpusvm_notifier_free() - Free GPU SVM notifier 542 * @gpusvm: Pointer to the GPU SVM structure 543 * @notifier: Pointer to the GPU SVM notifier structure 544 * 545 * This function frees the GPU SVM notifier structure. 546 */ 547 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm, 548 struct drm_gpusvm_notifier *notifier) 549 { 550 WARN_ON(!RB_EMPTY_ROOT(¬ifier->root.rb_root)); 551 552 if (gpusvm->ops->notifier_free) 553 gpusvm->ops->notifier_free(notifier); 554 else 555 kfree(notifier); 556 } 557 558 /** 559 * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node 560 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct 561 * 562 * Return: A pointer to the containing drm_gpusvm_range structure. 563 */ 564 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node) 565 { 566 return container_of(node, struct drm_gpusvm_range, itree.rb); 567 } 568 569 /** 570 * drm_gpusvm_range_insert() - Insert GPU SVM range 571 * @notifier: Pointer to the GPU SVM notifier structure 572 * @range: Pointer to the GPU SVM range structure 573 * 574 * This function inserts the GPU SVM range into the notifier RB tree and list. 575 */ 576 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier, 577 struct drm_gpusvm_range *range) 578 { 579 struct rb_node *node; 580 struct list_head *head; 581 582 drm_gpusvm_notifier_lock(notifier->gpusvm); 583 interval_tree_insert(&range->itree, ¬ifier->root); 584 585 node = rb_prev(&range->itree.rb); 586 if (node) 587 head = &(to_drm_gpusvm_range(node))->entry; 588 else 589 head = ¬ifier->range_list; 590 591 list_add(&range->entry, head); 592 drm_gpusvm_notifier_unlock(notifier->gpusvm); 593 } 594 595 /** 596 * __drm_gpusvm_range_remove() - Remove GPU SVM range 597 * @notifier: Pointer to the GPU SVM notifier structure 598 * @range: Pointer to the GPU SVM range structure 599 * 600 * This macro removes the GPU SVM range from the notifier RB tree and list. 601 */ 602 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier, 603 struct drm_gpusvm_range *range) 604 { 605 interval_tree_remove(&range->itree, ¬ifier->root); 606 list_del(&range->entry); 607 } 608 609 /** 610 * drm_gpusvm_range_alloc() - Allocate GPU SVM range 611 * @gpusvm: Pointer to the GPU SVM structure 612 * @notifier: Pointer to the GPU SVM notifier structure 613 * @fault_addr: Fault address 614 * @chunk_size: Chunk size 615 * @migrate_devmem: Flag indicating whether to migrate device memory 616 * 617 * This function allocates and initializes the GPU SVM range structure. 618 * 619 * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure. 620 */ 621 static struct drm_gpusvm_range * 622 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, 623 struct drm_gpusvm_notifier *notifier, 624 unsigned long fault_addr, unsigned long chunk_size, 625 bool migrate_devmem) 626 { 627 struct drm_gpusvm_range *range; 628 629 if (gpusvm->ops->range_alloc) 630 range = gpusvm->ops->range_alloc(gpusvm); 631 else 632 range = kzalloc(sizeof(*range), GFP_KERNEL); 633 634 if (!range) 635 return ERR_PTR(-ENOMEM); 636 637 kref_init(&range->refcount); 638 range->gpusvm = gpusvm; 639 range->notifier = notifier; 640 range->itree.start = ALIGN_DOWN(fault_addr, chunk_size); 641 range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1; 642 INIT_LIST_HEAD(&range->entry); 643 range->pages.notifier_seq = LONG_MAX; 644 range->pages.flags.migrate_devmem = migrate_devmem ? 1 : 0; 645 646 return range; 647 } 648 649 /** 650 * drm_gpusvm_hmm_pfn_to_order() - Get the largest CPU mapping order. 651 * @hmm_pfn: The current hmm_pfn. 652 * @hmm_pfn_index: Index of the @hmm_pfn within the pfn array. 653 * @npages: Number of pages within the pfn array i.e the hmm range size. 654 * 655 * To allow skipping PFNs with the same flags (like when they belong to 656 * the same huge PTE) when looping over the pfn array, take a given a hmm_pfn, 657 * and return the largest order that will fit inside the CPU PTE, but also 658 * crucially accounting for the original hmm range boundaries. 659 * 660 * Return: The largest order that will safely fit within the size of the hmm_pfn 661 * CPU PTE. 662 */ 663 static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn, 664 unsigned long hmm_pfn_index, 665 unsigned long npages) 666 { 667 unsigned long size; 668 669 size = 1UL << hmm_pfn_to_map_order(hmm_pfn); 670 size -= (hmm_pfn & ~HMM_PFN_FLAGS) & (size - 1); 671 hmm_pfn_index += size; 672 if (hmm_pfn_index > npages) 673 size -= (hmm_pfn_index - npages); 674 675 return ilog2(size); 676 } 677 678 /** 679 * drm_gpusvm_check_pages() - Check pages 680 * @gpusvm: Pointer to the GPU SVM structure 681 * @notifier: Pointer to the GPU SVM notifier structure 682 * @start: Start address 683 * @end: End address 684 * @dev_private_owner: The device private page owner 685 * 686 * Check if pages between start and end have been faulted in on the CPU. Use to 687 * prevent migration of pages without CPU backing store. 688 * 689 * Return: True if pages have been faulted into CPU, False otherwise 690 */ 691 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, 692 struct drm_gpusvm_notifier *notifier, 693 unsigned long start, unsigned long end, 694 void *dev_private_owner) 695 { 696 struct hmm_range hmm_range = { 697 .default_flags = 0, 698 .notifier = ¬ifier->notifier, 699 .start = start, 700 .end = end, 701 .dev_private_owner = dev_private_owner, 702 }; 703 unsigned long timeout = 704 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 705 unsigned long *pfns; 706 unsigned long npages = npages_in_range(start, end); 707 int err, i; 708 709 mmap_assert_locked(gpusvm->mm); 710 711 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 712 if (!pfns) 713 return false; 714 715 hmm_range.notifier_seq = mmu_interval_read_begin(¬ifier->notifier); 716 hmm_range.hmm_pfns = pfns; 717 718 while (true) { 719 err = hmm_range_fault(&hmm_range); 720 if (err == -EBUSY) { 721 if (time_after(jiffies, timeout)) 722 break; 723 724 hmm_range.notifier_seq = 725 mmu_interval_read_begin(¬ifier->notifier); 726 continue; 727 } 728 break; 729 } 730 if (err) 731 goto err_free; 732 733 for (i = 0; i < npages;) { 734 if (!(pfns[i] & HMM_PFN_VALID)) { 735 err = -EFAULT; 736 goto err_free; 737 } 738 i += 0x1 << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); 739 } 740 741 err_free: 742 kvfree(pfns); 743 return err ? false : true; 744 } 745 746 /** 747 * drm_gpusvm_scan_mm() - Check the migration state of a drm_gpusvm_range 748 * @range: Pointer to the struct drm_gpusvm_range to check. 749 * @dev_private_owner: The struct dev_private_owner to use to determine 750 * compatible device-private pages. 751 * @pagemap: The struct dev_pagemap pointer to use for pagemap-specific 752 * checks. 753 * 754 * Scan the CPU address space corresponding to @range and return the 755 * current migration state. Note that the result may be invalid as 756 * soon as the function returns. It's an advisory check. 757 * 758 * TODO: Bail early and call hmm_range_fault() for subranges. 759 * 760 * Return: See &enum drm_gpusvm_scan_result. 761 */ 762 enum drm_gpusvm_scan_result drm_gpusvm_scan_mm(struct drm_gpusvm_range *range, 763 void *dev_private_owner, 764 const struct dev_pagemap *pagemap) 765 { 766 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 767 unsigned long start = drm_gpusvm_range_start(range); 768 unsigned long end = drm_gpusvm_range_end(range); 769 struct hmm_range hmm_range = { 770 .default_flags = 0, 771 .notifier = notifier, 772 .start = start, 773 .end = end, 774 .dev_private_owner = dev_private_owner, 775 }; 776 unsigned long timeout = 777 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 778 enum drm_gpusvm_scan_result state = DRM_GPUSVM_SCAN_UNPOPULATED, new_state; 779 unsigned long *pfns; 780 unsigned long npages = npages_in_range(start, end); 781 const struct dev_pagemap *other = NULL; 782 int err, i; 783 784 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 785 if (!pfns) 786 return DRM_GPUSVM_SCAN_UNPOPULATED; 787 788 hmm_range.hmm_pfns = pfns; 789 790 retry: 791 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 792 mmap_read_lock(range->gpusvm->mm); 793 794 while (true) { 795 err = hmm_range_fault(&hmm_range); 796 if (err == -EBUSY) { 797 if (time_after(jiffies, timeout)) 798 break; 799 800 hmm_range.notifier_seq = 801 mmu_interval_read_begin(notifier); 802 continue; 803 } 804 break; 805 } 806 mmap_read_unlock(range->gpusvm->mm); 807 if (err) 808 goto err_free; 809 810 drm_gpusvm_notifier_lock(range->gpusvm); 811 if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) { 812 drm_gpusvm_notifier_unlock(range->gpusvm); 813 goto retry; 814 } 815 816 for (i = 0; i < npages;) { 817 struct page *page; 818 const struct dev_pagemap *cur = NULL; 819 820 if (!(pfns[i] & HMM_PFN_VALID)) { 821 state = DRM_GPUSVM_SCAN_UNPOPULATED; 822 goto err_free; 823 } 824 825 page = hmm_pfn_to_page(pfns[i]); 826 if (is_device_private_page(page) || 827 is_device_coherent_page(page)) 828 cur = page_pgmap(page); 829 830 if (cur == pagemap) { 831 new_state = DRM_GPUSVM_SCAN_EQUAL; 832 } else if (cur && (cur == other || !other)) { 833 new_state = DRM_GPUSVM_SCAN_OTHER; 834 other = cur; 835 } else if (cur) { 836 new_state = DRM_GPUSVM_SCAN_MIXED_DEVICE; 837 } else { 838 new_state = DRM_GPUSVM_SCAN_SYSTEM; 839 } 840 841 /* 842 * TODO: Could use an array for state 843 * transitions, and caller might want it 844 * to bail early for some results. 845 */ 846 if (state == DRM_GPUSVM_SCAN_UNPOPULATED) { 847 state = new_state; 848 } else if (state != new_state) { 849 if (new_state == DRM_GPUSVM_SCAN_SYSTEM || 850 state == DRM_GPUSVM_SCAN_SYSTEM) 851 state = DRM_GPUSVM_SCAN_MIXED; 852 else if (state != DRM_GPUSVM_SCAN_MIXED) 853 state = DRM_GPUSVM_SCAN_MIXED_DEVICE; 854 } 855 856 i += 1ul << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); 857 } 858 859 err_free: 860 drm_gpusvm_notifier_unlock(range->gpusvm); 861 862 kvfree(pfns); 863 return state; 864 } 865 EXPORT_SYMBOL(drm_gpusvm_scan_mm); 866 867 /** 868 * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range 869 * @gpusvm: Pointer to the GPU SVM structure 870 * @notifier: Pointer to the GPU SVM notifier structure 871 * @vas: Pointer to the virtual memory area structure 872 * @fault_addr: Fault address 873 * @gpuva_start: Start address of GPUVA which mirrors CPU 874 * @gpuva_end: End address of GPUVA which mirrors CPU 875 * @check_pages_threshold: Check CPU pages for present threshold 876 * @dev_private_owner: The device private page owner 877 * 878 * This function determines the chunk size for the GPU SVM range based on the 879 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual 880 * memory area boundaries. 881 * 882 * Return: Chunk size on success, LONG_MAX on failure. 883 */ 884 static unsigned long 885 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, 886 struct drm_gpusvm_notifier *notifier, 887 struct vm_area_struct *vas, 888 unsigned long fault_addr, 889 unsigned long gpuva_start, 890 unsigned long gpuva_end, 891 unsigned long check_pages_threshold, 892 void *dev_private_owner) 893 { 894 unsigned long start, end; 895 int i = 0; 896 897 retry: 898 for (; i < gpusvm->num_chunks; ++i) { 899 start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]); 900 end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]); 901 902 if (start >= vas->vm_start && end <= vas->vm_end && 903 start >= drm_gpusvm_notifier_start(notifier) && 904 end <= drm_gpusvm_notifier_end(notifier) && 905 start >= gpuva_start && end <= gpuva_end) 906 break; 907 } 908 909 if (i == gpusvm->num_chunks) 910 return LONG_MAX; 911 912 /* 913 * If allocation more than page, ensure not to overlap with existing 914 * ranges. 915 */ 916 if (end - start != SZ_4K) { 917 struct drm_gpusvm_range *range; 918 919 range = drm_gpusvm_range_find(notifier, start, end); 920 if (range) { 921 ++i; 922 goto retry; 923 } 924 925 /* 926 * XXX: Only create range on pages CPU has faulted in. Without 927 * this check, or prefault, on BMG 'xe_exec_system_allocator --r 928 * process-many-malloc' fails. In the failure case, each process 929 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM 930 * ranges. When migrating the SVM ranges, some processes fail in 931 * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages' 932 * and then upon drm_gpusvm_range_get_pages device pages from 933 * other processes are collected + faulted in which creates all 934 * sorts of problems. Unsure exactly how this happening, also 935 * problem goes away if 'xe_exec_system_allocator --r 936 * process-many-malloc' mallocs at least 64k at a time. 937 */ 938 if (end - start <= check_pages_threshold && 939 !drm_gpusvm_check_pages(gpusvm, notifier, start, end, dev_private_owner)) { 940 ++i; 941 goto retry; 942 } 943 } 944 945 return end - start; 946 } 947 948 #ifdef CONFIG_LOCKDEP 949 /** 950 * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held 951 * @gpusvm: Pointer to the GPU SVM structure. 952 * 953 * Ensure driver lock is held. 954 */ 955 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 956 { 957 if ((gpusvm)->lock_dep_map) 958 lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0)); 959 } 960 #else 961 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 962 { 963 } 964 #endif 965 966 /** 967 * drm_gpusvm_find_vma_start() - Find start address for first VMA in range 968 * @gpusvm: Pointer to the GPU SVM structure 969 * @start: The inclusive start user address. 970 * @end: The exclusive end user address. 971 * 972 * Returns: The start address of first VMA within the provided range, 973 * ULONG_MAX otherwise. Assumes start_addr < end_addr. 974 */ 975 unsigned long 976 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm, 977 unsigned long start, 978 unsigned long end) 979 { 980 struct mm_struct *mm = gpusvm->mm; 981 struct vm_area_struct *vma; 982 unsigned long addr = ULONG_MAX; 983 984 if (!mmget_not_zero(mm)) 985 return addr; 986 987 mmap_read_lock(mm); 988 989 vma = find_vma_intersection(mm, start, end); 990 if (vma) 991 addr = vma->vm_start; 992 993 mmap_read_unlock(mm); 994 mmput(mm); 995 996 return addr; 997 } 998 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start); 999 1000 /** 1001 * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range 1002 * @gpusvm: Pointer to the GPU SVM structure 1003 * @fault_addr: Fault address 1004 * @gpuva_start: Start address of GPUVA which mirrors CPU 1005 * @gpuva_end: End address of GPUVA which mirrors CPU 1006 * @ctx: GPU SVM context 1007 * 1008 * This function finds or inserts a newly allocated a GPU SVM range based on the 1009 * fault address. Caller must hold a lock to protect range lookup and insertion. 1010 * 1011 * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure. 1012 */ 1013 struct drm_gpusvm_range * 1014 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, 1015 unsigned long fault_addr, 1016 unsigned long gpuva_start, 1017 unsigned long gpuva_end, 1018 const struct drm_gpusvm_ctx *ctx) 1019 { 1020 struct drm_gpusvm_notifier *notifier; 1021 struct drm_gpusvm_range *range; 1022 struct mm_struct *mm = gpusvm->mm; 1023 struct vm_area_struct *vas; 1024 bool notifier_alloc = false; 1025 unsigned long chunk_size; 1026 int err; 1027 bool migrate_devmem; 1028 1029 drm_gpusvm_driver_lock_held(gpusvm); 1030 1031 if (fault_addr < gpusvm->mm_start || 1032 fault_addr > gpusvm->mm_start + gpusvm->mm_range) 1033 return ERR_PTR(-EINVAL); 1034 1035 if (!mmget_not_zero(mm)) 1036 return ERR_PTR(-EFAULT); 1037 1038 notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr, fault_addr + 1); 1039 if (!notifier) { 1040 notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr); 1041 if (IS_ERR(notifier)) { 1042 err = PTR_ERR(notifier); 1043 goto err_mmunlock; 1044 } 1045 notifier_alloc = true; 1046 err = mmu_interval_notifier_insert(¬ifier->notifier, 1047 mm, 1048 drm_gpusvm_notifier_start(notifier), 1049 drm_gpusvm_notifier_size(notifier), 1050 &drm_gpusvm_notifier_ops); 1051 if (err) 1052 goto err_notifier; 1053 } 1054 1055 mmap_read_lock(mm); 1056 1057 vas = vma_lookup(mm, fault_addr); 1058 if (!vas) { 1059 err = -ENOENT; 1060 goto err_notifier_remove; 1061 } 1062 1063 if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) { 1064 err = -EPERM; 1065 goto err_notifier_remove; 1066 } 1067 1068 range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1); 1069 if (range) 1070 goto out_mmunlock; 1071 /* 1072 * XXX: Short-circuiting migration based on migrate_vma_* current 1073 * limitations. If/when migrate_vma_* add more support, this logic will 1074 * have to change. 1075 */ 1076 migrate_devmem = ctx->devmem_possible && 1077 vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas); 1078 1079 chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, 1080 fault_addr, gpuva_start, 1081 gpuva_end, 1082 ctx->check_pages_threshold, 1083 ctx->device_private_page_owner); 1084 if (chunk_size == LONG_MAX) { 1085 err = -EINVAL; 1086 goto err_notifier_remove; 1087 } 1088 1089 range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size, 1090 migrate_devmem); 1091 if (IS_ERR(range)) { 1092 err = PTR_ERR(range); 1093 goto err_notifier_remove; 1094 } 1095 1096 drm_gpusvm_range_insert(notifier, range); 1097 if (notifier_alloc) 1098 drm_gpusvm_notifier_insert(gpusvm, notifier); 1099 1100 out_mmunlock: 1101 mmap_read_unlock(mm); 1102 mmput(mm); 1103 1104 return range; 1105 1106 err_notifier_remove: 1107 mmap_read_unlock(mm); 1108 if (notifier_alloc) 1109 mmu_interval_notifier_remove(¬ifier->notifier); 1110 err_notifier: 1111 if (notifier_alloc) 1112 drm_gpusvm_notifier_free(gpusvm, notifier); 1113 err_mmunlock: 1114 mmput(mm); 1115 return ERR_PTR(err); 1116 } 1117 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert); 1118 1119 /** 1120 * __drm_gpusvm_unmap_pages() - Unmap pages associated with GPU SVM pages (internal) 1121 * @gpusvm: Pointer to the GPU SVM structure 1122 * @svm_pages: Pointer to the GPU SVM pages structure 1123 * @npages: Number of pages to unmap 1124 * 1125 * This function unmap pages associated with a GPU SVM pages struct. Assumes and 1126 * asserts correct locking is in place when called. 1127 */ 1128 static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, 1129 struct drm_gpusvm_pages *svm_pages, 1130 unsigned long npages) 1131 { 1132 struct drm_pagemap *dpagemap = svm_pages->dpagemap; 1133 struct device *dev = gpusvm->drm->dev; 1134 unsigned long i, j; 1135 1136 lockdep_assert_held(&gpusvm->notifier_lock); 1137 1138 if (svm_pages->flags.has_dma_mapping) { 1139 struct drm_gpusvm_pages_flags flags = { 1140 .__flags = svm_pages->flags.__flags, 1141 }; 1142 1143 for (i = 0, j = 0; i < npages; j++) { 1144 struct drm_pagemap_addr *addr = &svm_pages->dma_addr[j]; 1145 1146 if (addr->proto == DRM_INTERCONNECT_SYSTEM) 1147 dma_unmap_page(dev, 1148 addr->addr, 1149 PAGE_SIZE << addr->order, 1150 addr->dir); 1151 else if (dpagemap && dpagemap->ops->device_unmap) 1152 dpagemap->ops->device_unmap(dpagemap, 1153 dev, *addr); 1154 i += 1 << addr->order; 1155 } 1156 1157 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1158 flags.has_devmem_pages = false; 1159 flags.has_dma_mapping = false; 1160 WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); 1161 1162 drm_pagemap_put(svm_pages->dpagemap); 1163 svm_pages->dpagemap = NULL; 1164 } 1165 } 1166 1167 /** 1168 * __drm_gpusvm_free_pages() - Free dma array associated with GPU SVM pages 1169 * @gpusvm: Pointer to the GPU SVM structure 1170 * @svm_pages: Pointer to the GPU SVM pages structure 1171 * 1172 * This function frees the dma address array associated with a GPU SVM range. 1173 */ 1174 static void __drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, 1175 struct drm_gpusvm_pages *svm_pages) 1176 { 1177 lockdep_assert_held(&gpusvm->notifier_lock); 1178 1179 if (svm_pages->dma_addr) { 1180 kvfree(svm_pages->dma_addr); 1181 svm_pages->dma_addr = NULL; 1182 } 1183 } 1184 1185 /** 1186 * drm_gpusvm_free_pages() - Free dma-mapping associated with GPU SVM pages 1187 * struct 1188 * @gpusvm: Pointer to the GPU SVM structure 1189 * @svm_pages: Pointer to the GPU SVM pages structure 1190 * @npages: Number of mapped pages 1191 * 1192 * This function unmaps and frees the dma address array associated with a GPU 1193 * SVM pages struct. 1194 */ 1195 void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, 1196 struct drm_gpusvm_pages *svm_pages, 1197 unsigned long npages) 1198 { 1199 drm_gpusvm_notifier_lock(gpusvm); 1200 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); 1201 __drm_gpusvm_free_pages(gpusvm, svm_pages); 1202 drm_gpusvm_notifier_unlock(gpusvm); 1203 } 1204 EXPORT_SYMBOL_GPL(drm_gpusvm_free_pages); 1205 1206 /** 1207 * drm_gpusvm_range_remove() - Remove GPU SVM range 1208 * @gpusvm: Pointer to the GPU SVM structure 1209 * @range: Pointer to the GPU SVM range to be removed 1210 * 1211 * This function removes the specified GPU SVM range and also removes the parent 1212 * GPU SVM notifier if no more ranges remain in the notifier. The caller must 1213 * hold a lock to protect range and notifier removal. 1214 */ 1215 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm, 1216 struct drm_gpusvm_range *range) 1217 { 1218 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1219 drm_gpusvm_range_end(range)); 1220 struct drm_gpusvm_notifier *notifier; 1221 1222 drm_gpusvm_driver_lock_held(gpusvm); 1223 1224 notifier = drm_gpusvm_notifier_find(gpusvm, 1225 drm_gpusvm_range_start(range), 1226 drm_gpusvm_range_start(range) + 1); 1227 if (WARN_ON_ONCE(!notifier)) 1228 return; 1229 1230 drm_gpusvm_notifier_lock(gpusvm); 1231 __drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages); 1232 __drm_gpusvm_free_pages(gpusvm, &range->pages); 1233 __drm_gpusvm_range_remove(notifier, range); 1234 drm_gpusvm_notifier_unlock(gpusvm); 1235 1236 drm_gpusvm_range_put(range); 1237 1238 if (RB_EMPTY_ROOT(¬ifier->root.rb_root)) { 1239 if (!notifier->flags.removed) 1240 mmu_interval_notifier_remove(¬ifier->notifier); 1241 drm_gpusvm_notifier_remove(gpusvm, notifier); 1242 drm_gpusvm_notifier_free(gpusvm, notifier); 1243 } 1244 } 1245 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove); 1246 1247 /** 1248 * drm_gpusvm_range_get() - Get a reference to GPU SVM range 1249 * @range: Pointer to the GPU SVM range 1250 * 1251 * This function increments the reference count of the specified GPU SVM range. 1252 * 1253 * Return: Pointer to the GPU SVM range. 1254 */ 1255 struct drm_gpusvm_range * 1256 drm_gpusvm_range_get(struct drm_gpusvm_range *range) 1257 { 1258 kref_get(&range->refcount); 1259 1260 return range; 1261 } 1262 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get); 1263 1264 /** 1265 * drm_gpusvm_range_destroy() - Destroy GPU SVM range 1266 * @refcount: Pointer to the reference counter embedded in the GPU SVM range 1267 * 1268 * This function destroys the specified GPU SVM range when its reference count 1269 * reaches zero. If a custom range-free function is provided, it is invoked to 1270 * free the range; otherwise, the range is deallocated using kfree(). 1271 */ 1272 static void drm_gpusvm_range_destroy(struct kref *refcount) 1273 { 1274 struct drm_gpusvm_range *range = 1275 container_of(refcount, struct drm_gpusvm_range, refcount); 1276 struct drm_gpusvm *gpusvm = range->gpusvm; 1277 1278 if (gpusvm->ops->range_free) 1279 gpusvm->ops->range_free(range); 1280 else 1281 kfree(range); 1282 } 1283 1284 /** 1285 * drm_gpusvm_range_put() - Put a reference to GPU SVM range 1286 * @range: Pointer to the GPU SVM range 1287 * 1288 * This function decrements the reference count of the specified GPU SVM range 1289 * and frees it when the count reaches zero. 1290 */ 1291 void drm_gpusvm_range_put(struct drm_gpusvm_range *range) 1292 { 1293 kref_put(&range->refcount, drm_gpusvm_range_destroy); 1294 } 1295 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); 1296 1297 /** 1298 * drm_gpusvm_pages_valid() - GPU SVM range pages valid 1299 * @gpusvm: Pointer to the GPU SVM structure 1300 * @svm_pages: Pointer to the GPU SVM pages structure 1301 * 1302 * This function determines if a GPU SVM range pages are valid. Expected be 1303 * called holding gpusvm->notifier_lock and as the last step before committing a 1304 * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1305 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1306 * function is required for finer grained checking (i.e., per range) if pages 1307 * are valid. 1308 * 1309 * Return: True if GPU SVM range has valid pages, False otherwise 1310 */ 1311 static bool drm_gpusvm_pages_valid(struct drm_gpusvm *gpusvm, 1312 struct drm_gpusvm_pages *svm_pages) 1313 { 1314 lockdep_assert_held(&gpusvm->notifier_lock); 1315 1316 return svm_pages->flags.has_devmem_pages || svm_pages->flags.has_dma_mapping; 1317 } 1318 1319 /** 1320 * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid 1321 * @gpusvm: Pointer to the GPU SVM structure 1322 * @range: Pointer to the GPU SVM range structure 1323 * 1324 * This function determines if a GPU SVM range pages are valid. Expected be 1325 * called holding gpusvm->notifier_lock and as the last step before committing a 1326 * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1327 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1328 * function is required for finer grained checking (i.e., per range) if pages 1329 * are valid. 1330 * 1331 * Return: True if GPU SVM range has valid pages, False otherwise 1332 */ 1333 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, 1334 struct drm_gpusvm_range *range) 1335 { 1336 return drm_gpusvm_pages_valid(gpusvm, &range->pages); 1337 } 1338 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); 1339 1340 /** 1341 * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked 1342 * @gpusvm: Pointer to the GPU SVM structure 1343 * @range: Pointer to the GPU SVM range structure 1344 * 1345 * This function determines if a GPU SVM range pages are valid. Expected be 1346 * called without holding gpusvm->notifier_lock. 1347 * 1348 * Return: True if GPU SVM range has valid pages, False otherwise 1349 */ 1350 static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm, 1351 struct drm_gpusvm_pages *svm_pages) 1352 { 1353 bool pages_valid; 1354 1355 if (!svm_pages->dma_addr) 1356 return false; 1357 1358 drm_gpusvm_notifier_lock(gpusvm); 1359 pages_valid = drm_gpusvm_pages_valid(gpusvm, svm_pages); 1360 if (!pages_valid) 1361 __drm_gpusvm_free_pages(gpusvm, svm_pages); 1362 drm_gpusvm_notifier_unlock(gpusvm); 1363 1364 return pages_valid; 1365 } 1366 1367 /** 1368 * drm_gpusvm_get_pages() - Get pages and populate GPU SVM pages struct 1369 * @gpusvm: Pointer to the GPU SVM structure 1370 * @svm_pages: The SVM pages to populate. This will contain the dma-addresses 1371 * @mm: The mm corresponding to the CPU range 1372 * @notifier: The corresponding notifier for the given CPU range 1373 * @pages_start: Start CPU address for the pages 1374 * @pages_end: End CPU address for the pages (exclusive) 1375 * @ctx: GPU SVM context 1376 * 1377 * This function gets and maps pages for CPU range and ensures they are 1378 * mapped for DMA access. 1379 * 1380 * Return: 0 on success, negative error code on failure. 1381 */ 1382 int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, 1383 struct drm_gpusvm_pages *svm_pages, 1384 struct mm_struct *mm, 1385 struct mmu_interval_notifier *notifier, 1386 unsigned long pages_start, unsigned long pages_end, 1387 const struct drm_gpusvm_ctx *ctx) 1388 { 1389 struct hmm_range hmm_range = { 1390 .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 : 1391 HMM_PFN_REQ_WRITE), 1392 .notifier = notifier, 1393 .start = pages_start, 1394 .end = pages_end, 1395 .dev_private_owner = ctx->device_private_page_owner, 1396 }; 1397 void *zdd; 1398 unsigned long timeout = 1399 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1400 unsigned long i, j; 1401 unsigned long npages = npages_in_range(pages_start, pages_end); 1402 unsigned long num_dma_mapped; 1403 unsigned int order = 0; 1404 unsigned long *pfns; 1405 int err = 0; 1406 struct dev_pagemap *pagemap; 1407 struct drm_pagemap *dpagemap; 1408 struct drm_gpusvm_pages_flags flags; 1409 enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE : 1410 DMA_BIDIRECTIONAL; 1411 1412 retry: 1413 if (time_after(jiffies, timeout)) 1414 return -EBUSY; 1415 1416 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1417 if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages)) 1418 goto set_seqno; 1419 1420 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1421 if (!pfns) 1422 return -ENOMEM; 1423 1424 if (!mmget_not_zero(mm)) { 1425 err = -EFAULT; 1426 goto err_free; 1427 } 1428 1429 hmm_range.hmm_pfns = pfns; 1430 while (true) { 1431 mmap_read_lock(mm); 1432 err = hmm_range_fault(&hmm_range); 1433 mmap_read_unlock(mm); 1434 1435 if (err == -EBUSY) { 1436 if (time_after(jiffies, timeout)) 1437 break; 1438 1439 hmm_range.notifier_seq = 1440 mmu_interval_read_begin(notifier); 1441 continue; 1442 } 1443 break; 1444 } 1445 mmput(mm); 1446 if (err) 1447 goto err_free; 1448 1449 map_pages: 1450 /* 1451 * Perform all dma mappings under the notifier lock to not 1452 * access freed pages. A notifier will either block on 1453 * the notifier lock or unmap dma. 1454 */ 1455 drm_gpusvm_notifier_lock(gpusvm); 1456 1457 flags.__flags = svm_pages->flags.__flags; 1458 if (flags.unmapped) { 1459 drm_gpusvm_notifier_unlock(gpusvm); 1460 err = -EFAULT; 1461 goto err_free; 1462 } 1463 1464 if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) { 1465 drm_gpusvm_notifier_unlock(gpusvm); 1466 kvfree(pfns); 1467 goto retry; 1468 } 1469 1470 if (!svm_pages->dma_addr) { 1471 /* Unlock and restart mapping to allocate memory. */ 1472 drm_gpusvm_notifier_unlock(gpusvm); 1473 svm_pages->dma_addr = 1474 kvmalloc_array(npages, sizeof(*svm_pages->dma_addr), GFP_KERNEL); 1475 if (!svm_pages->dma_addr) { 1476 err = -ENOMEM; 1477 goto err_free; 1478 } 1479 goto map_pages; 1480 } 1481 1482 zdd = NULL; 1483 pagemap = NULL; 1484 num_dma_mapped = 0; 1485 for (i = 0, j = 0; i < npages; ++j) { 1486 struct page *page = hmm_pfn_to_page(pfns[i]); 1487 1488 order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); 1489 if (is_device_private_page(page) || 1490 is_device_coherent_page(page)) { 1491 if (!ctx->allow_mixed && 1492 zdd != page->zone_device_data && i > 0) { 1493 err = -EOPNOTSUPP; 1494 goto err_unmap; 1495 } 1496 zdd = page->zone_device_data; 1497 if (pagemap != page_pgmap(page)) { 1498 if (i > 0) { 1499 err = -EOPNOTSUPP; 1500 goto err_unmap; 1501 } 1502 1503 pagemap = page_pgmap(page); 1504 dpagemap = drm_pagemap_page_to_dpagemap(page); 1505 if (drm_WARN_ON(gpusvm->drm, !dpagemap)) { 1506 /* 1507 * Raced. This is not supposed to happen 1508 * since hmm_range_fault() should've migrated 1509 * this page to system. 1510 */ 1511 err = -EAGAIN; 1512 goto err_unmap; 1513 } 1514 } 1515 svm_pages->dma_addr[j] = 1516 dpagemap->ops->device_map(dpagemap, 1517 gpusvm->drm->dev, 1518 page, order, 1519 dma_dir); 1520 if (dma_mapping_error(gpusvm->drm->dev, 1521 svm_pages->dma_addr[j].addr)) { 1522 err = -EFAULT; 1523 goto err_unmap; 1524 } 1525 } else { 1526 dma_addr_t addr; 1527 1528 if (is_zone_device_page(page) || 1529 (pagemap && !ctx->allow_mixed)) { 1530 err = -EOPNOTSUPP; 1531 goto err_unmap; 1532 } 1533 1534 if (ctx->devmem_only) { 1535 err = -EFAULT; 1536 goto err_unmap; 1537 } 1538 1539 addr = dma_map_page(gpusvm->drm->dev, 1540 page, 0, 1541 PAGE_SIZE << order, 1542 dma_dir); 1543 if (dma_mapping_error(gpusvm->drm->dev, addr)) { 1544 err = -EFAULT; 1545 goto err_unmap; 1546 } 1547 1548 svm_pages->dma_addr[j] = drm_pagemap_addr_encode 1549 (addr, DRM_INTERCONNECT_SYSTEM, order, 1550 dma_dir); 1551 } 1552 i += 1 << order; 1553 num_dma_mapped = i; 1554 flags.has_dma_mapping = true; 1555 } 1556 1557 if (pagemap) { 1558 flags.has_devmem_pages = true; 1559 drm_pagemap_get(dpagemap); 1560 drm_pagemap_put(svm_pages->dpagemap); 1561 svm_pages->dpagemap = dpagemap; 1562 } 1563 1564 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1565 WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); 1566 1567 drm_gpusvm_notifier_unlock(gpusvm); 1568 kvfree(pfns); 1569 set_seqno: 1570 svm_pages->notifier_seq = hmm_range.notifier_seq; 1571 1572 return 0; 1573 1574 err_unmap: 1575 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped); 1576 drm_gpusvm_notifier_unlock(gpusvm); 1577 err_free: 1578 kvfree(pfns); 1579 if (err == -EAGAIN) 1580 goto retry; 1581 return err; 1582 } 1583 EXPORT_SYMBOL_GPL(drm_gpusvm_get_pages); 1584 1585 /** 1586 * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range 1587 * @gpusvm: Pointer to the GPU SVM structure 1588 * @range: Pointer to the GPU SVM range structure 1589 * @ctx: GPU SVM context 1590 * 1591 * This function gets pages for a GPU SVM range and ensures they are mapped for 1592 * DMA access. 1593 * 1594 * Return: 0 on success, negative error code on failure. 1595 */ 1596 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, 1597 struct drm_gpusvm_range *range, 1598 const struct drm_gpusvm_ctx *ctx) 1599 { 1600 return drm_gpusvm_get_pages(gpusvm, &range->pages, gpusvm->mm, 1601 &range->notifier->notifier, 1602 drm_gpusvm_range_start(range), 1603 drm_gpusvm_range_end(range), ctx); 1604 } 1605 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); 1606 1607 /** 1608 * drm_gpusvm_unmap_pages() - Unmap GPU svm pages 1609 * @gpusvm: Pointer to the GPU SVM structure 1610 * @svm_pages: Pointer to the GPU SVM pages structure 1611 * @npages: Number of pages in @svm_pages. 1612 * @ctx: GPU SVM context 1613 * 1614 * This function unmaps pages associated with a GPU SVM pages struct. If 1615 * @in_notifier is set, it is assumed that gpusvm->notifier_lock is held in 1616 * write mode; if it is clear, it acquires gpusvm->notifier_lock in read mode. 1617 * Must be called in the invalidate() callback of the corresponding notifier for 1618 * IOMMU security model. 1619 */ 1620 void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, 1621 struct drm_gpusvm_pages *svm_pages, 1622 unsigned long npages, 1623 const struct drm_gpusvm_ctx *ctx) 1624 { 1625 if (ctx->in_notifier) 1626 lockdep_assert_held_write(&gpusvm->notifier_lock); 1627 else 1628 drm_gpusvm_notifier_lock(gpusvm); 1629 1630 __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); 1631 1632 if (!ctx->in_notifier) 1633 drm_gpusvm_notifier_unlock(gpusvm); 1634 } 1635 EXPORT_SYMBOL_GPL(drm_gpusvm_unmap_pages); 1636 1637 /** 1638 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range 1639 * @gpusvm: Pointer to the GPU SVM structure 1640 * @range: Pointer to the GPU SVM range structure 1641 * @ctx: GPU SVM context 1642 * 1643 * This function unmaps pages associated with a GPU SVM range. If @in_notifier 1644 * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it 1645 * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on 1646 * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU 1647 * security model. 1648 */ 1649 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1650 struct drm_gpusvm_range *range, 1651 const struct drm_gpusvm_ctx *ctx) 1652 { 1653 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1654 drm_gpusvm_range_end(range)); 1655 1656 return drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages, ctx); 1657 } 1658 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); 1659 1660 /** 1661 * drm_gpusvm_range_evict() - Evict GPU SVM range 1662 * @gpusvm: Pointer to the GPU SVM structure 1663 * @range: Pointer to the GPU SVM range to be removed 1664 * 1665 * This function evicts the specified GPU SVM range. 1666 * 1667 * Return: 0 on success, a negative error code on failure. 1668 */ 1669 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm, 1670 struct drm_gpusvm_range *range) 1671 { 1672 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1673 struct hmm_range hmm_range = { 1674 .default_flags = HMM_PFN_REQ_FAULT, 1675 .notifier = notifier, 1676 .start = drm_gpusvm_range_start(range), 1677 .end = drm_gpusvm_range_end(range), 1678 .dev_private_owner = NULL, 1679 }; 1680 unsigned long timeout = 1681 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1682 unsigned long *pfns; 1683 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1684 drm_gpusvm_range_end(range)); 1685 int err = 0; 1686 struct mm_struct *mm = gpusvm->mm; 1687 1688 if (!mmget_not_zero(mm)) 1689 return -EFAULT; 1690 1691 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1692 if (!pfns) 1693 return -ENOMEM; 1694 1695 hmm_range.hmm_pfns = pfns; 1696 while (!time_after(jiffies, timeout)) { 1697 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1698 if (time_after(jiffies, timeout)) { 1699 err = -ETIME; 1700 break; 1701 } 1702 1703 mmap_read_lock(mm); 1704 err = hmm_range_fault(&hmm_range); 1705 mmap_read_unlock(mm); 1706 if (err != -EBUSY) 1707 break; 1708 } 1709 1710 kvfree(pfns); 1711 mmput(mm); 1712 1713 return err; 1714 } 1715 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict); 1716 1717 /** 1718 * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range 1719 * @gpusvm: Pointer to the GPU SVM structure. 1720 * @start: Start address 1721 * @end: End address 1722 * 1723 * Return: True if GPU SVM has mapping, False otherwise 1724 */ 1725 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start, 1726 unsigned long end) 1727 { 1728 struct drm_gpusvm_notifier *notifier; 1729 1730 drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) { 1731 struct drm_gpusvm_range *range = NULL; 1732 1733 drm_gpusvm_for_each_range(range, notifier, start, end) 1734 return true; 1735 } 1736 1737 return false; 1738 } 1739 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping); 1740 1741 /** 1742 * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped 1743 * @range: Pointer to the GPU SVM range structure. 1744 * @mmu_range: Pointer to the MMU notifier range structure. 1745 * 1746 * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag 1747 * if the range partially falls within the provided MMU notifier range. 1748 */ 1749 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, 1750 const struct mmu_notifier_range *mmu_range) 1751 { 1752 lockdep_assert_held_write(&range->gpusvm->notifier_lock); 1753 1754 range->pages.flags.unmapped = true; 1755 if (drm_gpusvm_range_start(range) < mmu_range->start || 1756 drm_gpusvm_range_end(range) > mmu_range->end) 1757 range->pages.flags.partial_unmap = true; 1758 } 1759 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); 1760 1761 MODULE_DESCRIPTION("DRM GPUSVM"); 1762 MODULE_LICENSE("GPL"); 1763