1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2008-2015 Intel Corporation 4 */ 5 6 #include <linux/oom.h> 7 #include <linux/sched/mm.h> 8 #include <linux/shmem_fs.h> 9 #include <linux/slab.h> 10 #include <linux/swap.h> 11 #include <linux/pci.h> 12 #include <linux/dma-buf.h> 13 #include <linux/vmalloc.h> 14 15 #include <drm/drm_print.h> 16 17 #include "gt/intel_gt_requests.h" 18 #include "gt/intel_gt.h" 19 20 #include "i915_trace.h" 21 22 static bool swap_available(void) 23 { 24 return get_nr_swap_pages() > 0; 25 } 26 27 static bool can_release_pages(struct drm_i915_gem_object *obj) 28 { 29 /* Consider only shrinkable objects. */ 30 if (!i915_gem_object_is_shrinkable(obj)) 31 return false; 32 33 /* 34 * We can only return physical pages to the system if we can either 35 * discard the contents (because the user has marked them as being 36 * purgeable) or if we can move their contents out to swap. 37 */ 38 return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; 39 } 40 41 static bool drop_pages(struct drm_i915_gem_object *obj, 42 unsigned long shrink, bool trylock_vm) 43 { 44 unsigned long flags; 45 46 flags = 0; 47 if (shrink & I915_SHRINK_ACTIVE) 48 flags |= I915_GEM_OBJECT_UNBIND_ACTIVE; 49 if (!(shrink & I915_SHRINK_BOUND)) 50 flags |= I915_GEM_OBJECT_UNBIND_TEST; 51 if (trylock_vm) 52 flags |= I915_GEM_OBJECT_UNBIND_VM_TRYLOCK; 53 54 if (i915_gem_object_unbind(obj, flags) == 0) 55 return true; 56 57 return false; 58 } 59 60 static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags) 61 { 62 if (obj->ops->shrink) { 63 unsigned int shrink_flags = 0; 64 65 if (!(flags & I915_SHRINK_ACTIVE)) 66 shrink_flags |= I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT; 67 68 if (flags & I915_SHRINK_WRITEBACK) 69 shrink_flags |= I915_GEM_OBJECT_SHRINK_WRITEBACK; 70 71 return obj->ops->shrink(obj, shrink_flags); 72 } 73 74 return 0; 75 } 76 77 /** 78 * i915_gem_shrink - Shrink buffer object caches 79 * @ww: i915 gem ww acquire ctx, or NULL 80 * @i915: i915 device 81 * @target: amount of memory to make available, in pages 82 * @nr_scanned: optional output for number of pages scanned (incremental) 83 * @shrink: control flags for selecting cache types 84 * 85 * This function is the main interface to the shrinker. It will try to release 86 * up to @target pages of main memory backing storage from buffer objects. 87 * Selection of the specific caches can be done with @flags. This is e.g. useful 88 * when purgeable objects should be removed from caches preferentially. 89 * 90 * Note that it's not guaranteed that released amount is actually available as 91 * free system memory - the pages might still be in-used to due to other reasons 92 * (like cpu mmaps) or the mm core has reused them before we could grab them. 93 * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to 94 * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all(). 95 * 96 * Also note that any kind of pinning (both per-vma address space pins and 97 * backing storage pins at the buffer object level) result in the shrinker code 98 * having to skip the object. 99 * 100 * Returns: 101 * The number of pages of backing storage actually released. 102 */ 103 unsigned long 104 i915_gem_shrink(struct i915_gem_ww_ctx *ww, 105 struct drm_i915_private *i915, 106 unsigned long target, 107 unsigned long *nr_scanned, 108 unsigned int shrink) 109 { 110 const struct { 111 struct list_head *list; 112 unsigned int bit; 113 } phases[] = { 114 { &i915->mm.purge_list, ~0u }, 115 { 116 &i915->mm.shrink_list, 117 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND 118 }, 119 { NULL, 0 }, 120 }, *phase; 121 intel_wakeref_t wakeref = NULL; 122 unsigned long count = 0; 123 unsigned long scanned = 0; 124 int err = 0, i = 0; 125 struct intel_gt *gt; 126 127 /* CHV + VTD workaround use stop_machine(); need to trylock vm->mutex */ 128 bool trylock_vm = !ww && intel_vm_no_concurrent_access_wa(i915); 129 130 trace_i915_gem_shrink(i915, target, shrink); 131 132 /* 133 * Unbinding of objects will require HW access; Let us not wake the 134 * device just to recover a little memory. If absolutely necessary, 135 * we will force the wake during oom-notifier. 136 */ 137 if (shrink & I915_SHRINK_BOUND) { 138 wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm); 139 if (!wakeref) 140 shrink &= ~I915_SHRINK_BOUND; 141 } 142 143 /* 144 * When shrinking the active list, we should also consider active 145 * contexts. Active contexts are pinned until they are retired, and 146 * so can not be simply unbound to retire and unpin their pages. To 147 * shrink the contexts, we must wait until the gpu is idle and 148 * completed its switch to the kernel context. In short, we do 149 * not have a good mechanism for idling a specific context, but 150 * what we can do is give them a kick so that we do not keep idle 151 * contexts around longer than is necessary. 152 */ 153 if (shrink & I915_SHRINK_ACTIVE) { 154 for_each_gt(gt, i915, i) 155 /* Retire requests to unpin all idle contexts */ 156 intel_gt_retire_requests(gt); 157 } 158 159 /* 160 * As we may completely rewrite the (un)bound list whilst unbinding 161 * (due to retiring requests) we have to strictly process only 162 * one element of the list at the time, and recheck the list 163 * on every iteration. 164 * 165 * In particular, we must hold a reference whilst removing the 166 * object as we may end up waiting for and/or retiring the objects. 167 * This might release the final reference (held by the active list) 168 * and result in the object being freed from under us. This is 169 * similar to the precautions the eviction code must take whilst 170 * removing objects. 171 * 172 * Also note that although these lists do not hold a reference to 173 * the object we can safely grab one here: The final object 174 * unreferencing and the bound_list are both protected by the 175 * i915->mm.obj_lock and so we won't ever be able to observe an 176 * object on the bound_list with a reference count equals 0. 177 */ 178 for (phase = phases; phase->list; phase++) { 179 struct list_head still_in_list; 180 struct drm_i915_gem_object *obj; 181 unsigned long flags; 182 183 if ((shrink & phase->bit) == 0) 184 continue; 185 186 INIT_LIST_HEAD(&still_in_list); 187 188 /* 189 * We serialize our access to unreferenced objects through 190 * the use of the obj_lock. While the objects are not 191 * yet freed (due to RCU then a workqueue) we still want 192 * to be able to shrink their pages, so they remain on 193 * the unbound/bound list until actually freed. 194 */ 195 spin_lock_irqsave(&i915->mm.obj_lock, flags); 196 while (count < target && 197 (obj = list_first_entry_or_null(phase->list, 198 typeof(*obj), 199 mm.link))) { 200 list_move_tail(&obj->mm.link, &still_in_list); 201 202 if (shrink & I915_SHRINK_VMAPS && 203 !is_vmalloc_addr(obj->mm.mapping)) 204 continue; 205 206 if (!(shrink & I915_SHRINK_ACTIVE) && 207 i915_gem_object_is_framebuffer(obj)) 208 continue; 209 210 if (!can_release_pages(obj)) 211 continue; 212 213 if (!kref_get_unless_zero(&obj->base.refcount)) 214 continue; 215 216 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 217 218 /* May arrive from get_pages on another bo */ 219 if (!ww) { 220 if (!i915_gem_object_trylock(obj, NULL)) 221 goto skip; 222 } else { 223 err = i915_gem_object_lock(obj, ww); 224 if (err) 225 goto skip; 226 } 227 228 if (drop_pages(obj, shrink, trylock_vm) && 229 !__i915_gem_object_put_pages(obj) && 230 !try_to_writeback(obj, shrink)) 231 count += obj->base.size >> PAGE_SHIFT; 232 233 if (!ww) 234 i915_gem_object_unlock(obj); 235 236 scanned += obj->base.size >> PAGE_SHIFT; 237 skip: 238 i915_gem_object_put(obj); 239 240 spin_lock_irqsave(&i915->mm.obj_lock, flags); 241 if (err) 242 break; 243 } 244 list_splice_tail(&still_in_list, phase->list); 245 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 246 if (err) 247 break; 248 } 249 250 if (shrink & I915_SHRINK_BOUND) 251 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 252 253 if (err) 254 return err; 255 256 if (nr_scanned) 257 *nr_scanned += scanned; 258 return count; 259 } 260 261 /** 262 * i915_gem_shrink_all - Shrink buffer object caches completely 263 * @i915: i915 device 264 * 265 * This is a simple wrapper around i915_gem_shrink() to aggressively shrink all 266 * caches completely. It also first waits for and retires all outstanding 267 * requests to also be able to release backing storage for active objects. 268 * 269 * This should only be used in code to intentionally quiescent the gpu or as a 270 * last-ditch effort when memory seems to have run out. 271 * 272 * Returns: 273 * The number of pages of backing storage actually released. 274 */ 275 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) 276 { 277 intel_wakeref_t wakeref; 278 unsigned long freed = 0; 279 280 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 281 freed = i915_gem_shrink(NULL, i915, -1UL, NULL, 282 I915_SHRINK_BOUND | 283 I915_SHRINK_UNBOUND); 284 } 285 286 return freed; 287 } 288 289 static unsigned long 290 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) 291 { 292 struct drm_i915_private *i915 = shrinker->private_data; 293 unsigned long num_objects; 294 unsigned long count; 295 296 count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT; 297 num_objects = READ_ONCE(i915->mm.shrink_count); 298 299 /* 300 * Update our preferred vmscan batch size for the next pass. 301 * Our rough guess for an effective batch size is roughly 2 302 * available GEM objects worth of pages. That is we don't want 303 * the shrinker to fire, until it is worth the cost of freeing an 304 * entire GEM object. 305 */ 306 if (num_objects) { 307 unsigned long avg = 2 * count / num_objects; 308 309 i915->mm.shrinker->batch = 310 max((i915->mm.shrinker->batch + avg) >> 1, 311 128ul /* default SHRINK_BATCH */); 312 } 313 314 return count; 315 } 316 317 static unsigned long 318 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) 319 { 320 struct drm_i915_private *i915 = shrinker->private_data; 321 unsigned long freed; 322 323 sc->nr_scanned = 0; 324 325 freed = i915_gem_shrink(NULL, i915, 326 sc->nr_to_scan, 327 &sc->nr_scanned, 328 I915_SHRINK_BOUND | 329 I915_SHRINK_UNBOUND); 330 if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { 331 intel_wakeref_t wakeref; 332 333 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 334 freed += i915_gem_shrink(NULL, i915, 335 sc->nr_to_scan - sc->nr_scanned, 336 &sc->nr_scanned, 337 I915_SHRINK_ACTIVE | 338 I915_SHRINK_BOUND | 339 I915_SHRINK_UNBOUND | 340 I915_SHRINK_WRITEBACK); 341 } 342 } 343 344 return sc->nr_scanned ? freed : SHRINK_STOP; 345 } 346 347 static int 348 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) 349 { 350 struct drm_i915_private *i915 = 351 container_of(nb, struct drm_i915_private, mm.oom_notifier); 352 struct drm_i915_gem_object *obj; 353 unsigned long unevictable, available, freed_pages; 354 intel_wakeref_t wakeref; 355 unsigned long flags; 356 357 freed_pages = 0; 358 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 359 freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL, 360 I915_SHRINK_BOUND | 361 I915_SHRINK_UNBOUND | 362 I915_SHRINK_WRITEBACK); 363 364 /* Because we may be allocating inside our own driver, we cannot 365 * assert that there are no objects with pinned pages that are not 366 * being pointed to by hardware. 367 */ 368 available = unevictable = 0; 369 spin_lock_irqsave(&i915->mm.obj_lock, flags); 370 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 371 if (!can_release_pages(obj)) 372 unevictable += obj->base.size >> PAGE_SHIFT; 373 else 374 available += obj->base.size >> PAGE_SHIFT; 375 } 376 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 377 378 if (freed_pages || available) 379 pr_info("Purging GPU memory, %lu pages freed, " 380 "%lu pages still pinned, %lu pages left available.\n", 381 freed_pages, unevictable, available); 382 383 *(unsigned long *)ptr += freed_pages; 384 return NOTIFY_DONE; 385 } 386 387 static int 388 i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) 389 { 390 struct drm_i915_private *i915 = 391 container_of(nb, struct drm_i915_private, mm.vmap_notifier); 392 struct i915_vma *vma, *next; 393 unsigned long freed_pages = 0; 394 intel_wakeref_t wakeref; 395 struct intel_gt *gt; 396 int i; 397 398 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 399 freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL, 400 I915_SHRINK_BOUND | 401 I915_SHRINK_UNBOUND | 402 I915_SHRINK_VMAPS); 403 404 /* We also want to clear any cached iomaps as they wrap vmap */ 405 for_each_gt(gt, i915, i) { 406 mutex_lock(>->ggtt->vm.mutex); 407 list_for_each_entry_safe(vma, next, 408 >->ggtt->vm.bound_list, vm_link) { 409 unsigned long count = i915_vma_size(vma) >> PAGE_SHIFT; 410 struct drm_i915_gem_object *obj = vma->obj; 411 412 if (!vma->iomap || i915_vma_is_active(vma)) 413 continue; 414 415 if (!i915_gem_object_trylock(obj, NULL)) 416 continue; 417 418 if (__i915_vma_unbind(vma) == 0) 419 freed_pages += count; 420 421 i915_gem_object_unlock(obj); 422 } 423 mutex_unlock(>->ggtt->vm.mutex); 424 } 425 426 *(unsigned long *)ptr += freed_pages; 427 return NOTIFY_DONE; 428 } 429 430 void i915_gem_driver_register__shrinker(struct drm_i915_private *i915) 431 { 432 i915->mm.shrinker = shrinker_alloc(0, "drm-i915_gem"); 433 if (!i915->mm.shrinker) { 434 drm_WARN_ON(&i915->drm, 1); 435 } else { 436 i915->mm.shrinker->scan_objects = i915_gem_shrinker_scan; 437 i915->mm.shrinker->count_objects = i915_gem_shrinker_count; 438 i915->mm.shrinker->batch = 4096; 439 i915->mm.shrinker->private_data = i915; 440 441 shrinker_register(i915->mm.shrinker); 442 } 443 444 i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; 445 drm_WARN_ON(&i915->drm, register_oom_notifier(&i915->mm.oom_notifier)); 446 447 i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; 448 drm_WARN_ON(&i915->drm, 449 register_vmap_purge_notifier(&i915->mm.vmap_notifier)); 450 } 451 452 void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915) 453 { 454 drm_WARN_ON(&i915->drm, 455 unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); 456 drm_WARN_ON(&i915->drm, 457 unregister_oom_notifier(&i915->mm.oom_notifier)); 458 shrinker_free(i915->mm.shrinker); 459 } 460 461 void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, 462 struct mutex *mutex) 463 { 464 if (!IS_ENABLED(CONFIG_LOCKDEP)) 465 return; 466 467 fs_reclaim_acquire(GFP_KERNEL); 468 469 mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); 470 mutex_release(&mutex->dep_map, _RET_IP_); 471 472 fs_reclaim_release(GFP_KERNEL); 473 } 474 475 /** 476 * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By 477 * default all object types that support shrinking(see IS_SHRINKABLE), will also 478 * make the object visible to the shrinker after allocating the system memory 479 * pages. 480 * @obj: The GEM object. 481 * 482 * This is typically used for special kernel internal objects that can't be 483 * easily processed by the shrinker, like if they are perma-pinned. 484 */ 485 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) 486 { 487 struct drm_i915_private *i915 = obj_to_i915(obj); 488 unsigned long flags; 489 490 /* 491 * We can only be called while the pages are pinned or when 492 * the pages are released. If pinned, we should only be called 493 * from a single caller under controlled conditions; and on release 494 * only one caller may release us. Neither the two may cross. 495 */ 496 if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0)) 497 return; 498 499 spin_lock_irqsave(&i915->mm.obj_lock, flags); 500 if (!atomic_fetch_inc(&obj->mm.shrink_pin) && 501 !list_empty(&obj->mm.link)) { 502 list_del_init(&obj->mm.link); 503 i915->mm.shrink_count--; 504 i915->mm.shrink_memory -= obj->base.size; 505 } 506 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 507 } 508 509 static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, 510 struct list_head *head) 511 { 512 struct drm_i915_private *i915 = obj_to_i915(obj); 513 unsigned long flags; 514 515 if (!i915_gem_object_is_shrinkable(obj)) 516 return; 517 518 if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1)) 519 return; 520 521 spin_lock_irqsave(&i915->mm.obj_lock, flags); 522 GEM_BUG_ON(!kref_read(&obj->base.refcount)); 523 if (atomic_dec_and_test(&obj->mm.shrink_pin)) { 524 GEM_BUG_ON(!list_empty(&obj->mm.link)); 525 526 list_add_tail(&obj->mm.link, head); 527 i915->mm.shrink_count++; 528 i915->mm.shrink_memory += obj->base.size; 529 530 } 531 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 532 } 533 534 /** 535 * __i915_gem_object_make_shrinkable - Move the object to the tail of the 536 * shrinkable list. Objects on this list might be swapped out. Used with 537 * WILLNEED objects. 538 * @obj: The GEM object. 539 * 540 * DO NOT USE. This is intended to be called on very special objects that don't 541 * yet have mm.pages, but are guaranteed to have potentially reclaimable pages 542 * underneath. 543 */ 544 void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) 545 { 546 ___i915_gem_object_make_shrinkable(obj, 547 &obj_to_i915(obj)->mm.shrink_list); 548 } 549 550 /** 551 * __i915_gem_object_make_purgeable - Move the object to the tail of the 552 * purgeable list. Objects on this list might be swapped out. Used with 553 * DONTNEED objects. 554 * @obj: The GEM object. 555 * 556 * DO NOT USE. This is intended to be called on very special objects that don't 557 * yet have mm.pages, but are guaranteed to have potentially reclaimable pages 558 * underneath. 559 */ 560 void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) 561 { 562 ___i915_gem_object_make_shrinkable(obj, 563 &obj_to_i915(obj)->mm.purge_list); 564 } 565 566 /** 567 * i915_gem_object_make_shrinkable - Move the object to the tail of the 568 * shrinkable list. Objects on this list might be swapped out. Used with 569 * WILLNEED objects. 570 * @obj: The GEM object. 571 * 572 * MUST only be called on objects which have backing pages. 573 * 574 * MUST be balanced with previous call to i915_gem_object_make_unshrinkable(). 575 */ 576 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) 577 { 578 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 579 __i915_gem_object_make_shrinkable(obj); 580 } 581 582 /** 583 * i915_gem_object_make_purgeable - Move the object to the tail of the purgeable 584 * list. Used with DONTNEED objects. Unlike with shrinkable objects, the 585 * shrinker will attempt to discard the backing pages, instead of trying to swap 586 * them out. 587 * @obj: The GEM object. 588 * 589 * MUST only be called on objects which have backing pages. 590 * 591 * MUST be balanced with previous call to i915_gem_object_make_unshrinkable(). 592 */ 593 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) 594 { 595 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 596 __i915_gem_object_make_purgeable(obj); 597 } 598