1 // SPDX-License-Identifier: GPL-2.0-only OR MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 * 5 * Authors: 6 * Matthew Brost <matthew.brost@intel.com> 7 */ 8 9 #include <linux/dma-mapping.h> 10 #include <linux/export.h> 11 #include <linux/hmm.h> 12 #include <linux/hugetlb_inline.h> 13 #include <linux/memremap.h> 14 #include <linux/mm_types.h> 15 #include <linux/slab.h> 16 17 #include <drm/drm_device.h> 18 #include <drm/drm_gpusvm.h> 19 #include <drm/drm_pagemap.h> 20 #include <drm/drm_print.h> 21 22 /** 23 * DOC: Overview 24 * 25 * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM) 26 * is a component of the DRM framework designed to manage shared virtual memory 27 * between the CPU and GPU. It enables efficient data exchange and processing 28 * for GPU-accelerated applications by allowing memory sharing and 29 * synchronization between the CPU's and GPU's virtual address spaces. 30 * 31 * Key GPU SVM Components: 32 * 33 * - Notifiers: 34 * Used for tracking memory intervals and notifying the GPU of changes, 35 * notifiers are sized based on a GPU SVM initialization parameter, with a 36 * recommendation of 512M or larger. They maintain a Red-BlacK tree and a 37 * list of ranges that fall within the notifier interval. Notifiers are 38 * tracked within a GPU SVM Red-BlacK tree and list and are dynamically 39 * inserted or removed as ranges within the interval are created or 40 * destroyed. 41 * - Ranges: 42 * Represent memory ranges mapped in a DRM device and managed by GPU SVM. 43 * They are sized based on an array of chunk sizes, which is a GPU SVM 44 * initialization parameter, and the CPU address space. Upon GPU fault, 45 * the largest aligned chunk that fits within the faulting CPU address 46 * space is chosen for the range size. Ranges are expected to be 47 * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP 48 * event. As mentioned above, ranges are tracked in a notifier's Red-Black 49 * tree. 50 * 51 * - Operations: 52 * Define the interface for driver-specific GPU SVM operations such as 53 * range allocation, notifier allocation, and invalidations. 54 * 55 * - Device Memory Allocations: 56 * Embedded structure containing enough information for GPU SVM to migrate 57 * to / from device memory. 58 * 59 * - Device Memory Operations: 60 * Define the interface for driver-specific device memory operations 61 * release memory, populate pfns, and copy to / from device memory. 62 * 63 * This layer provides interfaces for allocating, mapping, migrating, and 64 * releasing memory ranges between the CPU and GPU. It handles all core memory 65 * management interactions (DMA mapping, HMM, and migration) and provides 66 * driver-specific virtual functions (vfuncs). This infrastructure is sufficient 67 * to build the expected driver components for an SVM implementation as detailed 68 * below. 69 * 70 * Expected Driver Components: 71 * 72 * - GPU page fault handler: 73 * Used to create ranges and notifiers based on the fault address, 74 * optionally migrate the range to device memory, and create GPU bindings. 75 * 76 * - Garbage collector: 77 * Used to unmap and destroy GPU bindings for ranges. Ranges are expected 78 * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in 79 * notifier callback. 80 * 81 * - Notifier callback: 82 * Used to invalidate and DMA unmap GPU bindings for ranges. 83 */ 84 85 /** 86 * DOC: Locking 87 * 88 * GPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the 89 * mmap lock as needed. 90 * 91 * GPU SVM introduces a global notifier lock, which safeguards the notifier's 92 * range RB tree and list, as well as the range's DMA mappings and sequence 93 * number. GPU SVM manages all necessary locking and unlocking operations, 94 * except for the recheck range's pages being valid 95 * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. 96 * This lock corresponds to the ``driver->update`` lock mentioned in 97 * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM 98 * global lock to a per-notifier lock if finer-grained locking is deemed 99 * necessary. 100 * 101 * In addition to the locking mentioned above, the driver should implement a 102 * lock to safeguard core GPU SVM function calls that modify state, such as 103 * drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is 104 * denoted as 'driver_svm_lock' in code examples. Finer grained driver side 105 * locking should also be possible for concurrent GPU fault processing within a 106 * single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock 107 * to add annotations to GPU SVM. 108 */ 109 110 /** 111 * DOC: Partial Unmapping of Ranges 112 * 113 * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting 114 * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one 115 * being that a subset of the range still has CPU and GPU mappings. If the 116 * backing store for the range is in device memory, a subset of the backing 117 * store has references. One option would be to split the range and device 118 * memory backing store, but the implementation for this would be quite 119 * complicated. Given that partial unmappings are rare and driver-defined range 120 * sizes are relatively small, GPU SVM does not support splitting of ranges. 121 * 122 * With no support for range splitting, upon partial unmapping of a range, the 123 * driver is expected to invalidate and destroy the entire range. If the range 124 * has device memory as its backing, the driver is also expected to migrate any 125 * remaining pages back to RAM. 126 */ 127 128 /** 129 * DOC: Examples 130 * 131 * This section provides three examples of how to build the expected driver 132 * components: the GPU page fault handler, the garbage collector, and the 133 * notifier callback. 134 * 135 * The generic code provided does not include logic for complex migration 136 * policies, optimized invalidations, fined grained driver locking, or other 137 * potentially required driver locking (e.g., DMA-resv locks). 138 * 139 * 1) GPU page fault handler 140 * 141 * .. code-block:: c 142 * 143 * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) 144 * { 145 * int err = 0; 146 * 147 * driver_alloc_and_setup_memory_for_bind(gpusvm, range); 148 * 149 * drm_gpusvm_notifier_lock(gpusvm); 150 * if (drm_gpusvm_range_pages_valid(range)) 151 * driver_commit_bind(gpusvm, range); 152 * else 153 * err = -EAGAIN; 154 * drm_gpusvm_notifier_unlock(gpusvm); 155 * 156 * return err; 157 * } 158 * 159 * int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr, 160 * unsigned long gpuva_start, unsigned long gpuva_end) 161 * { 162 * struct drm_gpusvm_ctx ctx = {}; 163 * int err; 164 * 165 * driver_svm_lock(); 166 * retry: 167 * // Always process UNMAPs first so view of GPU SVM ranges is current 168 * driver_garbage_collector(gpusvm); 169 * 170 * range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr, 171 * gpuva_start, gpuva_end, 172 * &ctx); 173 * if (IS_ERR(range)) { 174 * err = PTR_ERR(range); 175 * goto unlock; 176 * } 177 * 178 * if (driver_migration_policy(range)) { 179 * err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(), 180 * gpuva_start, gpuva_end, gpusvm->mm, 181 * ctx->timeslice_ms); 182 * if (err) // CPU mappings may have changed 183 * goto retry; 184 * } 185 * 186 * err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx); 187 * if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { // CPU mappings changed 188 * if (err == -EOPNOTSUPP) 189 * drm_gpusvm_range_evict(gpusvm, range); 190 * goto retry; 191 * } else if (err) { 192 * goto unlock; 193 * } 194 * 195 * err = driver_bind_range(gpusvm, range); 196 * if (err == -EAGAIN) // CPU mappings changed 197 * goto retry 198 * 199 * unlock: 200 * driver_svm_unlock(); 201 * return err; 202 * } 203 * 204 * 2) Garbage Collector 205 * 206 * .. code-block:: c 207 * 208 * void __driver_garbage_collector(struct drm_gpusvm *gpusvm, 209 * struct drm_gpusvm_range *range) 210 * { 211 * assert_driver_svm_locked(gpusvm); 212 * 213 * // Partial unmap, migrate any remaining device memory pages back to RAM 214 * if (range->flags.partial_unmap) 215 * drm_gpusvm_range_evict(gpusvm, range); 216 * 217 * driver_unbind_range(range); 218 * drm_gpusvm_range_remove(gpusvm, range); 219 * } 220 * 221 * void driver_garbage_collector(struct drm_gpusvm *gpusvm) 222 * { 223 * assert_driver_svm_locked(gpusvm); 224 * 225 * for_each_range_in_garbage_collector(gpusvm, range) 226 * __driver_garbage_collector(gpusvm, range); 227 * } 228 * 229 * 3) Notifier callback 230 * 231 * .. code-block:: c 232 * 233 * void driver_invalidation(struct drm_gpusvm *gpusvm, 234 * struct drm_gpusvm_notifier *notifier, 235 * const struct mmu_notifier_range *mmu_range) 236 * { 237 * struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 238 * struct drm_gpusvm_range *range = NULL; 239 * 240 * driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end); 241 * 242 * drm_gpusvm_for_each_range(range, notifier, mmu_range->start, 243 * mmu_range->end) { 244 * drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx); 245 * 246 * if (mmu_range->event != MMU_NOTIFY_UNMAP) 247 * continue; 248 * 249 * drm_gpusvm_range_set_unmapped(range, mmu_range); 250 * driver_garbage_collector_add(gpusvm, range); 251 * } 252 * } 253 */ 254 255 /** 256 * npages_in_range() - Calculate the number of pages in a given range 257 * @start: The start address of the range 258 * @end: The end address of the range 259 * 260 * This macro calculates the number of pages in a given memory range, 261 * specified by the start and end addresses. It divides the difference 262 * between the end and start addresses by the page size (PAGE_SIZE) to 263 * determine the number of pages in the range. 264 * 265 * Return: The number of pages in the specified range. 266 */ 267 static unsigned long 268 npages_in_range(unsigned long start, unsigned long end) 269 { 270 return (end - start) >> PAGE_SHIFT; 271 } 272 273 /** 274 * drm_gpusvm_notifier_find() - Find GPU SVM notifier from GPU SVM 275 * @gpusvm: Pointer to the GPU SVM structure. 276 * @start: Start address of the notifier 277 * @end: End address of the notifier 278 * 279 * Return: A pointer to the drm_gpusvm_notifier if found or NULL 280 */ 281 struct drm_gpusvm_notifier * 282 drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, unsigned long start, 283 unsigned long end) 284 { 285 struct interval_tree_node *itree; 286 287 itree = interval_tree_iter_first(&gpusvm->root, start, end - 1); 288 289 if (itree) 290 return container_of(itree, struct drm_gpusvm_notifier, itree); 291 else 292 return NULL; 293 } 294 EXPORT_SYMBOL_GPL(drm_gpusvm_notifier_find); 295 296 /** 297 * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier 298 * @notifier: Pointer to the GPU SVM notifier structure. 299 * @start: Start address of the range 300 * @end: End address of the range 301 * 302 * Return: A pointer to the drm_gpusvm_range if found or NULL 303 */ 304 struct drm_gpusvm_range * 305 drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, 306 unsigned long end) 307 { 308 struct interval_tree_node *itree; 309 310 itree = interval_tree_iter_first(¬ifier->root, start, end - 1); 311 312 if (itree) 313 return container_of(itree, struct drm_gpusvm_range, itree); 314 else 315 return NULL; 316 } 317 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find); 318 319 /** 320 * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier. 321 * @mni: Pointer to the mmu_interval_notifier structure. 322 * @mmu_range: Pointer to the mmu_notifier_range structure. 323 * @cur_seq: Current sequence number. 324 * 325 * This function serves as a generic MMU notifier for GPU SVM. It sets the MMU 326 * notifier sequence number and calls the driver invalidate vfunc under 327 * gpusvm->notifier_lock. 328 * 329 * Return: true if the operation succeeds, false otherwise. 330 */ 331 static bool 332 drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni, 333 const struct mmu_notifier_range *mmu_range, 334 unsigned long cur_seq) 335 { 336 struct drm_gpusvm_notifier *notifier = 337 container_of(mni, typeof(*notifier), notifier); 338 struct drm_gpusvm *gpusvm = notifier->gpusvm; 339 340 if (!mmu_notifier_range_blockable(mmu_range)) 341 return false; 342 343 down_write(&gpusvm->notifier_lock); 344 mmu_interval_set_seq(mni, cur_seq); 345 gpusvm->ops->invalidate(gpusvm, notifier, mmu_range); 346 up_write(&gpusvm->notifier_lock); 347 348 return true; 349 } 350 351 /* 352 * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM 353 */ 354 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { 355 .invalidate = drm_gpusvm_notifier_invalidate, 356 }; 357 358 /** 359 * drm_gpusvm_init() - Initialize the GPU SVM. 360 * @gpusvm: Pointer to the GPU SVM structure. 361 * @name: Name of the GPU SVM. 362 * @drm: Pointer to the DRM device structure. 363 * @mm: Pointer to the mm_struct for the address space. 364 * @device_private_page_owner: Device private pages owner. 365 * @mm_start: Start address of GPU SVM. 366 * @mm_range: Range of the GPU SVM. 367 * @notifier_size: Size of individual notifiers. 368 * @ops: Pointer to the operations structure for GPU SVM. 369 * @chunk_sizes: Pointer to the array of chunk sizes used in range allocation. 370 * Entries should be powers of 2 in descending order with last 371 * entry being SZ_4K. 372 * @num_chunks: Number of chunks. 373 * 374 * This function initializes the GPU SVM. 375 * 376 * Return: 0 on success, a negative error code on failure. 377 */ 378 int drm_gpusvm_init(struct drm_gpusvm *gpusvm, 379 const char *name, struct drm_device *drm, 380 struct mm_struct *mm, void *device_private_page_owner, 381 unsigned long mm_start, unsigned long mm_range, 382 unsigned long notifier_size, 383 const struct drm_gpusvm_ops *ops, 384 const unsigned long *chunk_sizes, int num_chunks) 385 { 386 if (!ops->invalidate || !num_chunks) 387 return -EINVAL; 388 389 gpusvm->name = name; 390 gpusvm->drm = drm; 391 gpusvm->mm = mm; 392 gpusvm->device_private_page_owner = device_private_page_owner; 393 gpusvm->mm_start = mm_start; 394 gpusvm->mm_range = mm_range; 395 gpusvm->notifier_size = notifier_size; 396 gpusvm->ops = ops; 397 gpusvm->chunk_sizes = chunk_sizes; 398 gpusvm->num_chunks = num_chunks; 399 400 mmgrab(mm); 401 gpusvm->root = RB_ROOT_CACHED; 402 INIT_LIST_HEAD(&gpusvm->notifier_list); 403 404 init_rwsem(&gpusvm->notifier_lock); 405 406 fs_reclaim_acquire(GFP_KERNEL); 407 might_lock(&gpusvm->notifier_lock); 408 fs_reclaim_release(GFP_KERNEL); 409 410 #ifdef CONFIG_LOCKDEP 411 gpusvm->lock_dep_map = NULL; 412 #endif 413 414 return 0; 415 } 416 EXPORT_SYMBOL_GPL(drm_gpusvm_init); 417 418 /** 419 * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node 420 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct 421 * 422 * Return: A pointer to the containing drm_gpusvm_notifier structure. 423 */ 424 static struct drm_gpusvm_notifier *to_drm_gpusvm_notifier(struct rb_node *node) 425 { 426 return container_of(node, struct drm_gpusvm_notifier, itree.rb); 427 } 428 429 /** 430 * drm_gpusvm_notifier_insert() - Insert GPU SVM notifier 431 * @gpusvm: Pointer to the GPU SVM structure 432 * @notifier: Pointer to the GPU SVM notifier structure 433 * 434 * This function inserts the GPU SVM notifier into the GPU SVM RB tree and list. 435 */ 436 static void drm_gpusvm_notifier_insert(struct drm_gpusvm *gpusvm, 437 struct drm_gpusvm_notifier *notifier) 438 { 439 struct rb_node *node; 440 struct list_head *head; 441 442 interval_tree_insert(¬ifier->itree, &gpusvm->root); 443 444 node = rb_prev(¬ifier->itree.rb); 445 if (node) 446 head = &(to_drm_gpusvm_notifier(node))->entry; 447 else 448 head = &gpusvm->notifier_list; 449 450 list_add(¬ifier->entry, head); 451 } 452 453 /** 454 * drm_gpusvm_notifier_remove() - Remove GPU SVM notifier 455 * @gpusvm: Pointer to the GPU SVM tructure 456 * @notifier: Pointer to the GPU SVM notifier structure 457 * 458 * This function removes the GPU SVM notifier from the GPU SVM RB tree and list. 459 */ 460 static void drm_gpusvm_notifier_remove(struct drm_gpusvm *gpusvm, 461 struct drm_gpusvm_notifier *notifier) 462 { 463 interval_tree_remove(¬ifier->itree, &gpusvm->root); 464 list_del(¬ifier->entry); 465 } 466 467 /** 468 * drm_gpusvm_fini() - Finalize the GPU SVM. 469 * @gpusvm: Pointer to the GPU SVM structure. 470 * 471 * This function finalizes the GPU SVM by cleaning up any remaining ranges and 472 * notifiers, and dropping a reference to struct MM. 473 */ 474 void drm_gpusvm_fini(struct drm_gpusvm *gpusvm) 475 { 476 struct drm_gpusvm_notifier *notifier, *next; 477 478 drm_gpusvm_for_each_notifier_safe(notifier, next, gpusvm, 0, LONG_MAX) { 479 struct drm_gpusvm_range *range, *__next; 480 481 /* 482 * Remove notifier first to avoid racing with any invalidation 483 */ 484 mmu_interval_notifier_remove(¬ifier->notifier); 485 notifier->flags.removed = true; 486 487 drm_gpusvm_for_each_range_safe(range, __next, notifier, 0, 488 LONG_MAX) 489 drm_gpusvm_range_remove(gpusvm, range); 490 } 491 492 mmdrop(gpusvm->mm); 493 WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root)); 494 } 495 EXPORT_SYMBOL_GPL(drm_gpusvm_fini); 496 497 /** 498 * drm_gpusvm_notifier_alloc() - Allocate GPU SVM notifier 499 * @gpusvm: Pointer to the GPU SVM structure 500 * @fault_addr: Fault address 501 * 502 * This function allocates and initializes the GPU SVM notifier structure. 503 * 504 * Return: Pointer to the allocated GPU SVM notifier on success, ERR_PTR() on failure. 505 */ 506 static struct drm_gpusvm_notifier * 507 drm_gpusvm_notifier_alloc(struct drm_gpusvm *gpusvm, unsigned long fault_addr) 508 { 509 struct drm_gpusvm_notifier *notifier; 510 511 if (gpusvm->ops->notifier_alloc) 512 notifier = gpusvm->ops->notifier_alloc(); 513 else 514 notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); 515 516 if (!notifier) 517 return ERR_PTR(-ENOMEM); 518 519 notifier->gpusvm = gpusvm; 520 notifier->itree.start = ALIGN_DOWN(fault_addr, gpusvm->notifier_size); 521 notifier->itree.last = ALIGN(fault_addr + 1, gpusvm->notifier_size) - 1; 522 INIT_LIST_HEAD(¬ifier->entry); 523 notifier->root = RB_ROOT_CACHED; 524 INIT_LIST_HEAD(¬ifier->range_list); 525 526 return notifier; 527 } 528 529 /** 530 * drm_gpusvm_notifier_free() - Free GPU SVM notifier 531 * @gpusvm: Pointer to the GPU SVM structure 532 * @notifier: Pointer to the GPU SVM notifier structure 533 * 534 * This function frees the GPU SVM notifier structure. 535 */ 536 static void drm_gpusvm_notifier_free(struct drm_gpusvm *gpusvm, 537 struct drm_gpusvm_notifier *notifier) 538 { 539 WARN_ON(!RB_EMPTY_ROOT(¬ifier->root.rb_root)); 540 541 if (gpusvm->ops->notifier_free) 542 gpusvm->ops->notifier_free(notifier); 543 else 544 kfree(notifier); 545 } 546 547 /** 548 * to_drm_gpusvm_range() - retrieve the container struct for a given rbtree node 549 * @node: a pointer to the rbtree node embedded within a drm_gpusvm_range struct 550 * 551 * Return: A pointer to the containing drm_gpusvm_range structure. 552 */ 553 static struct drm_gpusvm_range *to_drm_gpusvm_range(struct rb_node *node) 554 { 555 return container_of(node, struct drm_gpusvm_range, itree.rb); 556 } 557 558 /** 559 * drm_gpusvm_range_insert() - Insert GPU SVM range 560 * @notifier: Pointer to the GPU SVM notifier structure 561 * @range: Pointer to the GPU SVM range structure 562 * 563 * This function inserts the GPU SVM range into the notifier RB tree and list. 564 */ 565 static void drm_gpusvm_range_insert(struct drm_gpusvm_notifier *notifier, 566 struct drm_gpusvm_range *range) 567 { 568 struct rb_node *node; 569 struct list_head *head; 570 571 drm_gpusvm_notifier_lock(notifier->gpusvm); 572 interval_tree_insert(&range->itree, ¬ifier->root); 573 574 node = rb_prev(&range->itree.rb); 575 if (node) 576 head = &(to_drm_gpusvm_range(node))->entry; 577 else 578 head = ¬ifier->range_list; 579 580 list_add(&range->entry, head); 581 drm_gpusvm_notifier_unlock(notifier->gpusvm); 582 } 583 584 /** 585 * __drm_gpusvm_range_remove() - Remove GPU SVM range 586 * @notifier: Pointer to the GPU SVM notifier structure 587 * @range: Pointer to the GPU SVM range structure 588 * 589 * This macro removes the GPU SVM range from the notifier RB tree and list. 590 */ 591 static void __drm_gpusvm_range_remove(struct drm_gpusvm_notifier *notifier, 592 struct drm_gpusvm_range *range) 593 { 594 interval_tree_remove(&range->itree, ¬ifier->root); 595 list_del(&range->entry); 596 } 597 598 /** 599 * drm_gpusvm_range_alloc() - Allocate GPU SVM range 600 * @gpusvm: Pointer to the GPU SVM structure 601 * @notifier: Pointer to the GPU SVM notifier structure 602 * @fault_addr: Fault address 603 * @chunk_size: Chunk size 604 * @migrate_devmem: Flag indicating whether to migrate device memory 605 * 606 * This function allocates and initializes the GPU SVM range structure. 607 * 608 * Return: Pointer to the allocated GPU SVM range on success, ERR_PTR() on failure. 609 */ 610 static struct drm_gpusvm_range * 611 drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, 612 struct drm_gpusvm_notifier *notifier, 613 unsigned long fault_addr, unsigned long chunk_size, 614 bool migrate_devmem) 615 { 616 struct drm_gpusvm_range *range; 617 618 if (gpusvm->ops->range_alloc) 619 range = gpusvm->ops->range_alloc(gpusvm); 620 else 621 range = kzalloc(sizeof(*range), GFP_KERNEL); 622 623 if (!range) 624 return ERR_PTR(-ENOMEM); 625 626 kref_init(&range->refcount); 627 range->gpusvm = gpusvm; 628 range->notifier = notifier; 629 range->itree.start = ALIGN_DOWN(fault_addr, chunk_size); 630 range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1; 631 INIT_LIST_HEAD(&range->entry); 632 range->notifier_seq = LONG_MAX; 633 range->flags.migrate_devmem = migrate_devmem ? 1 : 0; 634 635 return range; 636 } 637 638 /** 639 * drm_gpusvm_check_pages() - Check pages 640 * @gpusvm: Pointer to the GPU SVM structure 641 * @notifier: Pointer to the GPU SVM notifier structure 642 * @start: Start address 643 * @end: End address 644 * 645 * Check if pages between start and end have been faulted in on the CPU. Use to 646 * prevent migration of pages without CPU backing store. 647 * 648 * Return: True if pages have been faulted into CPU, False otherwise 649 */ 650 static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, 651 struct drm_gpusvm_notifier *notifier, 652 unsigned long start, unsigned long end) 653 { 654 struct hmm_range hmm_range = { 655 .default_flags = 0, 656 .notifier = ¬ifier->notifier, 657 .start = start, 658 .end = end, 659 .dev_private_owner = gpusvm->device_private_page_owner, 660 }; 661 unsigned long timeout = 662 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 663 unsigned long *pfns; 664 unsigned long npages = npages_in_range(start, end); 665 int err, i; 666 667 mmap_assert_locked(gpusvm->mm); 668 669 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 670 if (!pfns) 671 return false; 672 673 hmm_range.notifier_seq = mmu_interval_read_begin(¬ifier->notifier); 674 hmm_range.hmm_pfns = pfns; 675 676 while (true) { 677 err = hmm_range_fault(&hmm_range); 678 if (err == -EBUSY) { 679 if (time_after(jiffies, timeout)) 680 break; 681 682 hmm_range.notifier_seq = 683 mmu_interval_read_begin(¬ifier->notifier); 684 continue; 685 } 686 break; 687 } 688 if (err) 689 goto err_free; 690 691 for (i = 0; i < npages;) { 692 if (!(pfns[i] & HMM_PFN_VALID)) { 693 err = -EFAULT; 694 goto err_free; 695 } 696 i += 0x1 << hmm_pfn_to_map_order(pfns[i]); 697 } 698 699 err_free: 700 kvfree(pfns); 701 return err ? false : true; 702 } 703 704 /** 705 * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range 706 * @gpusvm: Pointer to the GPU SVM structure 707 * @notifier: Pointer to the GPU SVM notifier structure 708 * @vas: Pointer to the virtual memory area structure 709 * @fault_addr: Fault address 710 * @gpuva_start: Start address of GPUVA which mirrors CPU 711 * @gpuva_end: End address of GPUVA which mirrors CPU 712 * @check_pages_threshold: Check CPU pages for present threshold 713 * 714 * This function determines the chunk size for the GPU SVM range based on the 715 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual 716 * memory area boundaries. 717 * 718 * Return: Chunk size on success, LONG_MAX on failure. 719 */ 720 static unsigned long 721 drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, 722 struct drm_gpusvm_notifier *notifier, 723 struct vm_area_struct *vas, 724 unsigned long fault_addr, 725 unsigned long gpuva_start, 726 unsigned long gpuva_end, 727 unsigned long check_pages_threshold) 728 { 729 unsigned long start, end; 730 int i = 0; 731 732 retry: 733 for (; i < gpusvm->num_chunks; ++i) { 734 start = ALIGN_DOWN(fault_addr, gpusvm->chunk_sizes[i]); 735 end = ALIGN(fault_addr + 1, gpusvm->chunk_sizes[i]); 736 737 if (start >= vas->vm_start && end <= vas->vm_end && 738 start >= drm_gpusvm_notifier_start(notifier) && 739 end <= drm_gpusvm_notifier_end(notifier) && 740 start >= gpuva_start && end <= gpuva_end) 741 break; 742 } 743 744 if (i == gpusvm->num_chunks) 745 return LONG_MAX; 746 747 /* 748 * If allocation more than page, ensure not to overlap with existing 749 * ranges. 750 */ 751 if (end - start != SZ_4K) { 752 struct drm_gpusvm_range *range; 753 754 range = drm_gpusvm_range_find(notifier, start, end); 755 if (range) { 756 ++i; 757 goto retry; 758 } 759 760 /* 761 * XXX: Only create range on pages CPU has faulted in. Without 762 * this check, or prefault, on BMG 'xe_exec_system_allocator --r 763 * process-many-malloc' fails. In the failure case, each process 764 * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM 765 * ranges. When migrating the SVM ranges, some processes fail in 766 * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages' 767 * and then upon drm_gpusvm_range_get_pages device pages from 768 * other processes are collected + faulted in which creates all 769 * sorts of problems. Unsure exactly how this happening, also 770 * problem goes away if 'xe_exec_system_allocator --r 771 * process-many-malloc' mallocs at least 64k at a time. 772 */ 773 if (end - start <= check_pages_threshold && 774 !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) { 775 ++i; 776 goto retry; 777 } 778 } 779 780 return end - start; 781 } 782 783 #ifdef CONFIG_LOCKDEP 784 /** 785 * drm_gpusvm_driver_lock_held() - Assert GPU SVM driver lock is held 786 * @gpusvm: Pointer to the GPU SVM structure. 787 * 788 * Ensure driver lock is held. 789 */ 790 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 791 { 792 if ((gpusvm)->lock_dep_map) 793 lockdep_assert(lock_is_held_type((gpusvm)->lock_dep_map, 0)); 794 } 795 #else 796 static void drm_gpusvm_driver_lock_held(struct drm_gpusvm *gpusvm) 797 { 798 } 799 #endif 800 801 /** 802 * drm_gpusvm_find_vma_start() - Find start address for first VMA in range 803 * @gpusvm: Pointer to the GPU SVM structure 804 * @start: The inclusive start user address. 805 * @end: The exclusive end user address. 806 * 807 * Returns: The start address of first VMA within the provided range, 808 * ULONG_MAX otherwise. Assumes start_addr < end_addr. 809 */ 810 unsigned long 811 drm_gpusvm_find_vma_start(struct drm_gpusvm *gpusvm, 812 unsigned long start, 813 unsigned long end) 814 { 815 struct mm_struct *mm = gpusvm->mm; 816 struct vm_area_struct *vma; 817 unsigned long addr = ULONG_MAX; 818 819 if (!mmget_not_zero(mm)) 820 return addr; 821 822 mmap_read_lock(mm); 823 824 vma = find_vma_intersection(mm, start, end); 825 if (vma) 826 addr = vma->vm_start; 827 828 mmap_read_unlock(mm); 829 mmput(mm); 830 831 return addr; 832 } 833 EXPORT_SYMBOL_GPL(drm_gpusvm_find_vma_start); 834 835 /** 836 * drm_gpusvm_range_find_or_insert() - Find or insert GPU SVM range 837 * @gpusvm: Pointer to the GPU SVM structure 838 * @fault_addr: Fault address 839 * @gpuva_start: Start address of GPUVA which mirrors CPU 840 * @gpuva_end: End address of GPUVA which mirrors CPU 841 * @ctx: GPU SVM context 842 * 843 * This function finds or inserts a newly allocated a GPU SVM range based on the 844 * fault address. Caller must hold a lock to protect range lookup and insertion. 845 * 846 * Return: Pointer to the GPU SVM range on success, ERR_PTR() on failure. 847 */ 848 struct drm_gpusvm_range * 849 drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, 850 unsigned long fault_addr, 851 unsigned long gpuva_start, 852 unsigned long gpuva_end, 853 const struct drm_gpusvm_ctx *ctx) 854 { 855 struct drm_gpusvm_notifier *notifier; 856 struct drm_gpusvm_range *range; 857 struct mm_struct *mm = gpusvm->mm; 858 struct vm_area_struct *vas; 859 bool notifier_alloc = false; 860 unsigned long chunk_size; 861 int err; 862 bool migrate_devmem; 863 864 drm_gpusvm_driver_lock_held(gpusvm); 865 866 if (fault_addr < gpusvm->mm_start || 867 fault_addr > gpusvm->mm_start + gpusvm->mm_range) 868 return ERR_PTR(-EINVAL); 869 870 if (!mmget_not_zero(mm)) 871 return ERR_PTR(-EFAULT); 872 873 notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr, fault_addr + 1); 874 if (!notifier) { 875 notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr); 876 if (IS_ERR(notifier)) { 877 err = PTR_ERR(notifier); 878 goto err_mmunlock; 879 } 880 notifier_alloc = true; 881 err = mmu_interval_notifier_insert(¬ifier->notifier, 882 mm, 883 drm_gpusvm_notifier_start(notifier), 884 drm_gpusvm_notifier_size(notifier), 885 &drm_gpusvm_notifier_ops); 886 if (err) 887 goto err_notifier; 888 } 889 890 mmap_read_lock(mm); 891 892 vas = vma_lookup(mm, fault_addr); 893 if (!vas) { 894 err = -ENOENT; 895 goto err_notifier_remove; 896 } 897 898 if (!ctx->read_only && !(vas->vm_flags & VM_WRITE)) { 899 err = -EPERM; 900 goto err_notifier_remove; 901 } 902 903 range = drm_gpusvm_range_find(notifier, fault_addr, fault_addr + 1); 904 if (range) 905 goto out_mmunlock; 906 /* 907 * XXX: Short-circuiting migration based on migrate_vma_* current 908 * limitations. If/when migrate_vma_* add more support, this logic will 909 * have to change. 910 */ 911 migrate_devmem = ctx->devmem_possible && 912 vma_is_anonymous(vas) && !is_vm_hugetlb_page(vas); 913 914 chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, 915 fault_addr, gpuva_start, 916 gpuva_end, 917 ctx->check_pages_threshold); 918 if (chunk_size == LONG_MAX) { 919 err = -EINVAL; 920 goto err_notifier_remove; 921 } 922 923 range = drm_gpusvm_range_alloc(gpusvm, notifier, fault_addr, chunk_size, 924 migrate_devmem); 925 if (IS_ERR(range)) { 926 err = PTR_ERR(range); 927 goto err_notifier_remove; 928 } 929 930 drm_gpusvm_range_insert(notifier, range); 931 if (notifier_alloc) 932 drm_gpusvm_notifier_insert(gpusvm, notifier); 933 934 out_mmunlock: 935 mmap_read_unlock(mm); 936 mmput(mm); 937 938 return range; 939 940 err_notifier_remove: 941 mmap_read_unlock(mm); 942 if (notifier_alloc) 943 mmu_interval_notifier_remove(¬ifier->notifier); 944 err_notifier: 945 if (notifier_alloc) 946 drm_gpusvm_notifier_free(gpusvm, notifier); 947 err_mmunlock: 948 mmput(mm); 949 return ERR_PTR(err); 950 } 951 EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert); 952 953 /** 954 * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal) 955 * @gpusvm: Pointer to the GPU SVM structure 956 * @range: Pointer to the GPU SVM range structure 957 * @npages: Number of pages to unmap 958 * 959 * This function unmap pages associated with a GPU SVM range. Assumes and 960 * asserts correct locking is in place when called. 961 */ 962 static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 963 struct drm_gpusvm_range *range, 964 unsigned long npages) 965 { 966 unsigned long i, j; 967 struct drm_pagemap *dpagemap = range->dpagemap; 968 struct device *dev = gpusvm->drm->dev; 969 970 lockdep_assert_held(&gpusvm->notifier_lock); 971 972 if (range->flags.has_dma_mapping) { 973 struct drm_gpusvm_range_flags flags = { 974 .__flags = range->flags.__flags, 975 }; 976 977 for (i = 0, j = 0; i < npages; j++) { 978 struct drm_pagemap_addr *addr = &range->dma_addr[j]; 979 980 if (addr->proto == DRM_INTERCONNECT_SYSTEM) 981 dma_unmap_page(dev, 982 addr->addr, 983 PAGE_SIZE << addr->order, 984 addr->dir); 985 else if (dpagemap && dpagemap->ops->device_unmap) 986 dpagemap->ops->device_unmap(dpagemap, 987 dev, *addr); 988 i += 1 << addr->order; 989 } 990 991 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 992 flags.has_devmem_pages = false; 993 flags.has_dma_mapping = false; 994 WRITE_ONCE(range->flags.__flags, flags.__flags); 995 996 range->dpagemap = NULL; 997 } 998 } 999 1000 /** 1001 * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range 1002 * @gpusvm: Pointer to the GPU SVM structure 1003 * @range: Pointer to the GPU SVM range structure 1004 * 1005 * This function frees the dma address array associated with a GPU SVM range. 1006 */ 1007 static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm, 1008 struct drm_gpusvm_range *range) 1009 { 1010 lockdep_assert_held(&gpusvm->notifier_lock); 1011 1012 if (range->dma_addr) { 1013 kvfree(range->dma_addr); 1014 range->dma_addr = NULL; 1015 } 1016 } 1017 1018 /** 1019 * drm_gpusvm_range_remove() - Remove GPU SVM range 1020 * @gpusvm: Pointer to the GPU SVM structure 1021 * @range: Pointer to the GPU SVM range to be removed 1022 * 1023 * This function removes the specified GPU SVM range and also removes the parent 1024 * GPU SVM notifier if no more ranges remain in the notifier. The caller must 1025 * hold a lock to protect range and notifier removal. 1026 */ 1027 void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm, 1028 struct drm_gpusvm_range *range) 1029 { 1030 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1031 drm_gpusvm_range_end(range)); 1032 struct drm_gpusvm_notifier *notifier; 1033 1034 drm_gpusvm_driver_lock_held(gpusvm); 1035 1036 notifier = drm_gpusvm_notifier_find(gpusvm, 1037 drm_gpusvm_range_start(range), 1038 drm_gpusvm_range_start(range) + 1); 1039 if (WARN_ON_ONCE(!notifier)) 1040 return; 1041 1042 drm_gpusvm_notifier_lock(gpusvm); 1043 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1044 drm_gpusvm_range_free_pages(gpusvm, range); 1045 __drm_gpusvm_range_remove(notifier, range); 1046 drm_gpusvm_notifier_unlock(gpusvm); 1047 1048 drm_gpusvm_range_put(range); 1049 1050 if (RB_EMPTY_ROOT(¬ifier->root.rb_root)) { 1051 if (!notifier->flags.removed) 1052 mmu_interval_notifier_remove(¬ifier->notifier); 1053 drm_gpusvm_notifier_remove(gpusvm, notifier); 1054 drm_gpusvm_notifier_free(gpusvm, notifier); 1055 } 1056 } 1057 EXPORT_SYMBOL_GPL(drm_gpusvm_range_remove); 1058 1059 /** 1060 * drm_gpusvm_range_get() - Get a reference to GPU SVM range 1061 * @range: Pointer to the GPU SVM range 1062 * 1063 * This function increments the reference count of the specified GPU SVM range. 1064 * 1065 * Return: Pointer to the GPU SVM range. 1066 */ 1067 struct drm_gpusvm_range * 1068 drm_gpusvm_range_get(struct drm_gpusvm_range *range) 1069 { 1070 kref_get(&range->refcount); 1071 1072 return range; 1073 } 1074 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get); 1075 1076 /** 1077 * drm_gpusvm_range_destroy() - Destroy GPU SVM range 1078 * @refcount: Pointer to the reference counter embedded in the GPU SVM range 1079 * 1080 * This function destroys the specified GPU SVM range when its reference count 1081 * reaches zero. If a custom range-free function is provided, it is invoked to 1082 * free the range; otherwise, the range is deallocated using kfree(). 1083 */ 1084 static void drm_gpusvm_range_destroy(struct kref *refcount) 1085 { 1086 struct drm_gpusvm_range *range = 1087 container_of(refcount, struct drm_gpusvm_range, refcount); 1088 struct drm_gpusvm *gpusvm = range->gpusvm; 1089 1090 if (gpusvm->ops->range_free) 1091 gpusvm->ops->range_free(range); 1092 else 1093 kfree(range); 1094 } 1095 1096 /** 1097 * drm_gpusvm_range_put() - Put a reference to GPU SVM range 1098 * @range: Pointer to the GPU SVM range 1099 * 1100 * This function decrements the reference count of the specified GPU SVM range 1101 * and frees it when the count reaches zero. 1102 */ 1103 void drm_gpusvm_range_put(struct drm_gpusvm_range *range) 1104 { 1105 kref_put(&range->refcount, drm_gpusvm_range_destroy); 1106 } 1107 EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); 1108 1109 /** 1110 * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid 1111 * @gpusvm: Pointer to the GPU SVM structure 1112 * @range: Pointer to the GPU SVM range structure 1113 * 1114 * This function determines if a GPU SVM range pages are valid. Expected be 1115 * called holding gpusvm->notifier_lock and as the last step before committing a 1116 * GPU binding. This is akin to a notifier seqno check in the HMM documentation 1117 * but due to wider notifiers (i.e., notifiers which span multiple ranges) this 1118 * function is required for finer grained checking (i.e., per range) if pages 1119 * are valid. 1120 * 1121 * Return: True if GPU SVM range has valid pages, False otherwise 1122 */ 1123 bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, 1124 struct drm_gpusvm_range *range) 1125 { 1126 lockdep_assert_held(&gpusvm->notifier_lock); 1127 1128 return range->flags.has_devmem_pages || range->flags.has_dma_mapping; 1129 } 1130 EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); 1131 1132 /** 1133 * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked 1134 * @gpusvm: Pointer to the GPU SVM structure 1135 * @range: Pointer to the GPU SVM range structure 1136 * 1137 * This function determines if a GPU SVM range pages are valid. Expected be 1138 * called without holding gpusvm->notifier_lock. 1139 * 1140 * Return: True if GPU SVM range has valid pages, False otherwise 1141 */ 1142 static bool 1143 drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm, 1144 struct drm_gpusvm_range *range) 1145 { 1146 bool pages_valid; 1147 1148 if (!range->dma_addr) 1149 return false; 1150 1151 drm_gpusvm_notifier_lock(gpusvm); 1152 pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range); 1153 if (!pages_valid) 1154 drm_gpusvm_range_free_pages(gpusvm, range); 1155 drm_gpusvm_notifier_unlock(gpusvm); 1156 1157 return pages_valid; 1158 } 1159 1160 /** 1161 * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range 1162 * @gpusvm: Pointer to the GPU SVM structure 1163 * @range: Pointer to the GPU SVM range structure 1164 * @ctx: GPU SVM context 1165 * 1166 * This function gets pages for a GPU SVM range and ensures they are mapped for 1167 * DMA access. 1168 * 1169 * Return: 0 on success, negative error code on failure. 1170 */ 1171 int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, 1172 struct drm_gpusvm_range *range, 1173 const struct drm_gpusvm_ctx *ctx) 1174 { 1175 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1176 struct hmm_range hmm_range = { 1177 .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 : 1178 HMM_PFN_REQ_WRITE), 1179 .notifier = notifier, 1180 .start = drm_gpusvm_range_start(range), 1181 .end = drm_gpusvm_range_end(range), 1182 .dev_private_owner = gpusvm->device_private_page_owner, 1183 }; 1184 struct mm_struct *mm = gpusvm->mm; 1185 void *zdd; 1186 unsigned long timeout = 1187 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1188 unsigned long i, j; 1189 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1190 drm_gpusvm_range_end(range)); 1191 unsigned long num_dma_mapped; 1192 unsigned int order = 0; 1193 unsigned long *pfns; 1194 int err = 0; 1195 struct dev_pagemap *pagemap; 1196 struct drm_pagemap *dpagemap; 1197 struct drm_gpusvm_range_flags flags; 1198 1199 retry: 1200 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1201 if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range)) 1202 goto set_seqno; 1203 1204 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1205 if (!pfns) 1206 return -ENOMEM; 1207 1208 if (!mmget_not_zero(mm)) { 1209 err = -EFAULT; 1210 goto err_free; 1211 } 1212 1213 hmm_range.hmm_pfns = pfns; 1214 while (true) { 1215 mmap_read_lock(mm); 1216 err = hmm_range_fault(&hmm_range); 1217 mmap_read_unlock(mm); 1218 1219 if (err == -EBUSY) { 1220 if (time_after(jiffies, timeout)) 1221 break; 1222 1223 hmm_range.notifier_seq = 1224 mmu_interval_read_begin(notifier); 1225 continue; 1226 } 1227 break; 1228 } 1229 mmput(mm); 1230 if (err) 1231 goto err_free; 1232 1233 map_pages: 1234 /* 1235 * Perform all dma mappings under the notifier lock to not 1236 * access freed pages. A notifier will either block on 1237 * the notifier lock or unmap dma. 1238 */ 1239 drm_gpusvm_notifier_lock(gpusvm); 1240 1241 flags.__flags = range->flags.__flags; 1242 if (flags.unmapped) { 1243 drm_gpusvm_notifier_unlock(gpusvm); 1244 err = -EFAULT; 1245 goto err_free; 1246 } 1247 1248 if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) { 1249 drm_gpusvm_notifier_unlock(gpusvm); 1250 kvfree(pfns); 1251 goto retry; 1252 } 1253 1254 if (!range->dma_addr) { 1255 /* Unlock and restart mapping to allocate memory. */ 1256 drm_gpusvm_notifier_unlock(gpusvm); 1257 range->dma_addr = kvmalloc_array(npages, 1258 sizeof(*range->dma_addr), 1259 GFP_KERNEL); 1260 if (!range->dma_addr) { 1261 err = -ENOMEM; 1262 goto err_free; 1263 } 1264 goto map_pages; 1265 } 1266 1267 zdd = NULL; 1268 pagemap = NULL; 1269 num_dma_mapped = 0; 1270 for (i = 0, j = 0; i < npages; ++j) { 1271 struct page *page = hmm_pfn_to_page(pfns[i]); 1272 1273 order = hmm_pfn_to_map_order(pfns[i]); 1274 if (is_device_private_page(page) || 1275 is_device_coherent_page(page)) { 1276 if (zdd != page->zone_device_data && i > 0) { 1277 err = -EOPNOTSUPP; 1278 goto err_unmap; 1279 } 1280 zdd = page->zone_device_data; 1281 if (pagemap != page_pgmap(page)) { 1282 if (i > 0) { 1283 err = -EOPNOTSUPP; 1284 goto err_unmap; 1285 } 1286 1287 pagemap = page_pgmap(page); 1288 dpagemap = drm_pagemap_page_to_dpagemap(page); 1289 if (drm_WARN_ON(gpusvm->drm, !dpagemap)) { 1290 /* 1291 * Raced. This is not supposed to happen 1292 * since hmm_range_fault() should've migrated 1293 * this page to system. 1294 */ 1295 err = -EAGAIN; 1296 goto err_unmap; 1297 } 1298 } 1299 range->dma_addr[j] = 1300 dpagemap->ops->device_map(dpagemap, 1301 gpusvm->drm->dev, 1302 page, order, 1303 DMA_BIDIRECTIONAL); 1304 if (dma_mapping_error(gpusvm->drm->dev, 1305 range->dma_addr[j].addr)) { 1306 err = -EFAULT; 1307 goto err_unmap; 1308 } 1309 } else { 1310 dma_addr_t addr; 1311 1312 if (is_zone_device_page(page) || pagemap) { 1313 err = -EOPNOTSUPP; 1314 goto err_unmap; 1315 } 1316 1317 if (ctx->devmem_only) { 1318 err = -EFAULT; 1319 goto err_unmap; 1320 } 1321 1322 addr = dma_map_page(gpusvm->drm->dev, 1323 page, 0, 1324 PAGE_SIZE << order, 1325 DMA_BIDIRECTIONAL); 1326 if (dma_mapping_error(gpusvm->drm->dev, addr)) { 1327 err = -EFAULT; 1328 goto err_unmap; 1329 } 1330 1331 range->dma_addr[j] = drm_pagemap_addr_encode 1332 (addr, DRM_INTERCONNECT_SYSTEM, order, 1333 DMA_BIDIRECTIONAL); 1334 } 1335 i += 1 << order; 1336 num_dma_mapped = i; 1337 flags.has_dma_mapping = true; 1338 } 1339 1340 if (pagemap) { 1341 flags.has_devmem_pages = true; 1342 range->dpagemap = dpagemap; 1343 } 1344 1345 /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ 1346 WRITE_ONCE(range->flags.__flags, flags.__flags); 1347 1348 drm_gpusvm_notifier_unlock(gpusvm); 1349 kvfree(pfns); 1350 set_seqno: 1351 range->notifier_seq = hmm_range.notifier_seq; 1352 1353 return 0; 1354 1355 err_unmap: 1356 __drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped); 1357 drm_gpusvm_notifier_unlock(gpusvm); 1358 err_free: 1359 kvfree(pfns); 1360 if (err == -EAGAIN) 1361 goto retry; 1362 return err; 1363 } 1364 EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); 1365 1366 /** 1367 * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range 1368 * drm_gpusvm_range_evict() - Evict GPU SVM range 1369 * @gpusvm: Pointer to the GPU SVM structure 1370 * @range: Pointer to the GPU SVM range structure 1371 * @ctx: GPU SVM context 1372 * 1373 * This function unmaps pages associated with a GPU SVM range. If @in_notifier 1374 * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it 1375 * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on 1376 * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU 1377 * security model. 1378 */ 1379 void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, 1380 struct drm_gpusvm_range *range, 1381 const struct drm_gpusvm_ctx *ctx) 1382 { 1383 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1384 drm_gpusvm_range_end(range)); 1385 1386 if (ctx->in_notifier) 1387 lockdep_assert_held_write(&gpusvm->notifier_lock); 1388 else 1389 drm_gpusvm_notifier_lock(gpusvm); 1390 1391 __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); 1392 1393 if (!ctx->in_notifier) 1394 drm_gpusvm_notifier_unlock(gpusvm); 1395 } 1396 EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); 1397 1398 /** 1399 * drm_gpusvm_range_evict() - Evict GPU SVM range 1400 * @gpusvm: Pointer to the GPU SVM structure 1401 * @range: Pointer to the GPU SVM range to be removed 1402 * 1403 * This function evicts the specified GPU SVM range. 1404 * 1405 * Return: 0 on success, a negative error code on failure. 1406 */ 1407 int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm, 1408 struct drm_gpusvm_range *range) 1409 { 1410 struct mmu_interval_notifier *notifier = &range->notifier->notifier; 1411 struct hmm_range hmm_range = { 1412 .default_flags = HMM_PFN_REQ_FAULT, 1413 .notifier = notifier, 1414 .start = drm_gpusvm_range_start(range), 1415 .end = drm_gpusvm_range_end(range), 1416 .dev_private_owner = NULL, 1417 }; 1418 unsigned long timeout = 1419 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1420 unsigned long *pfns; 1421 unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), 1422 drm_gpusvm_range_end(range)); 1423 int err = 0; 1424 struct mm_struct *mm = gpusvm->mm; 1425 1426 if (!mmget_not_zero(mm)) 1427 return -EFAULT; 1428 1429 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 1430 if (!pfns) 1431 return -ENOMEM; 1432 1433 hmm_range.hmm_pfns = pfns; 1434 while (!time_after(jiffies, timeout)) { 1435 hmm_range.notifier_seq = mmu_interval_read_begin(notifier); 1436 if (time_after(jiffies, timeout)) { 1437 err = -ETIME; 1438 break; 1439 } 1440 1441 mmap_read_lock(mm); 1442 err = hmm_range_fault(&hmm_range); 1443 mmap_read_unlock(mm); 1444 if (err != -EBUSY) 1445 break; 1446 } 1447 1448 kvfree(pfns); 1449 mmput(mm); 1450 1451 return err; 1452 } 1453 EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict); 1454 1455 /** 1456 * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range 1457 * @gpusvm: Pointer to the GPU SVM structure. 1458 * @start: Start address 1459 * @end: End address 1460 * 1461 * Return: True if GPU SVM has mapping, False otherwise 1462 */ 1463 bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start, 1464 unsigned long end) 1465 { 1466 struct drm_gpusvm_notifier *notifier; 1467 1468 drm_gpusvm_for_each_notifier(notifier, gpusvm, start, end) { 1469 struct drm_gpusvm_range *range = NULL; 1470 1471 drm_gpusvm_for_each_range(range, notifier, start, end) 1472 return true; 1473 } 1474 1475 return false; 1476 } 1477 EXPORT_SYMBOL_GPL(drm_gpusvm_has_mapping); 1478 1479 /** 1480 * drm_gpusvm_range_set_unmapped() - Mark a GPU SVM range as unmapped 1481 * @range: Pointer to the GPU SVM range structure. 1482 * @mmu_range: Pointer to the MMU notifier range structure. 1483 * 1484 * This function marks a GPU SVM range as unmapped and sets the partial_unmap flag 1485 * if the range partially falls within the provided MMU notifier range. 1486 */ 1487 void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, 1488 const struct mmu_notifier_range *mmu_range) 1489 { 1490 lockdep_assert_held_write(&range->gpusvm->notifier_lock); 1491 1492 range->flags.unmapped = true; 1493 if (drm_gpusvm_range_start(range) < mmu_range->start || 1494 drm_gpusvm_range_end(range) > mmu_range->end) 1495 range->flags.partial_unmap = true; 1496 } 1497 EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); 1498 1499 MODULE_DESCRIPTION("DRM GPUSVM"); 1500 MODULE_LICENSE("GPL"); 1501