1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include <drm/ttm/ttm_bo_driver.h> 7 #include <drm/ttm/ttm_placement.h> 8 9 #include "i915_drv.h" 10 #include "intel_memory_region.h" 11 #include "intel_region_ttm.h" 12 13 #include "gem/i915_gem_mman.h" 14 #include "gem/i915_gem_object.h" 15 #include "gem/i915_gem_region.h" 16 #include "gem/i915_gem_ttm.h" 17 #include "gem/i915_gem_ttm_pm.h" 18 19 20 #include "gt/intel_engine_pm.h" 21 #include "gt/intel_gt.h" 22 #include "gt/intel_migrate.h" 23 24 #define I915_TTM_PRIO_PURGE 0 25 #define I915_TTM_PRIO_NO_PAGES 1 26 #define I915_TTM_PRIO_HAS_PAGES 2 27 28 /* 29 * Size of struct ttm_place vector in on-stack struct ttm_placement allocs 30 */ 31 #define I915_TTM_MAX_PLACEMENTS INTEL_REGION_UNKNOWN 32 33 /** 34 * struct i915_ttm_tt - TTM page vector with additional private information 35 * @ttm: The base TTM page vector. 36 * @dev: The struct device used for dma mapping and unmapping. 37 * @cached_st: The cached scatter-gather table. 38 * 39 * Note that DMA may be going on right up to the point where the page- 40 * vector is unpopulated in delayed destroy. Hence keep the 41 * scatter-gather table mapped and cached up to that point. This is 42 * different from the cached gem object io scatter-gather table which 43 * doesn't have an associated dma mapping. 44 */ 45 struct i915_ttm_tt { 46 struct ttm_tt ttm; 47 struct device *dev; 48 struct sg_table *cached_st; 49 }; 50 51 static const struct ttm_place sys_placement_flags = { 52 .fpfn = 0, 53 .lpfn = 0, 54 .mem_type = I915_PL_SYSTEM, 55 .flags = 0, 56 }; 57 58 static struct ttm_placement i915_sys_placement = { 59 .num_placement = 1, 60 .placement = &sys_placement_flags, 61 .num_busy_placement = 1, 62 .busy_placement = &sys_placement_flags, 63 }; 64 65 /** 66 * i915_ttm_sys_placement - Return the struct ttm_placement to be 67 * used for an object in system memory. 68 * 69 * Rather than making the struct extern, use this 70 * function. 71 * 72 * Return: A pointer to a static variable for sys placement. 73 */ 74 struct ttm_placement *i915_ttm_sys_placement(void) 75 { 76 return &i915_sys_placement; 77 } 78 79 static int i915_ttm_err_to_gem(int err) 80 { 81 /* Fastpath */ 82 if (likely(!err)) 83 return 0; 84 85 switch (err) { 86 case -EBUSY: 87 /* 88 * TTM likes to convert -EDEADLK to -EBUSY, and wants us to 89 * restart the operation, since we don't record the contending 90 * lock. We use -EAGAIN to restart. 91 */ 92 return -EAGAIN; 93 case -ENOSPC: 94 /* 95 * Memory type / region is full, and we can't evict. 96 * Except possibly system, that returns -ENOMEM; 97 */ 98 return -ENXIO; 99 default: 100 break; 101 } 102 103 return err; 104 } 105 106 static bool gpu_binds_iomem(struct ttm_resource *mem) 107 { 108 return mem->mem_type != TTM_PL_SYSTEM; 109 } 110 111 static bool cpu_maps_iomem(struct ttm_resource *mem) 112 { 113 /* Once / if we support GGTT, this is also false for cached ttm_tts */ 114 return mem->mem_type != TTM_PL_SYSTEM; 115 } 116 117 static enum i915_cache_level 118 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, 119 struct ttm_tt *ttm) 120 { 121 return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) && 122 ttm->caching == ttm_cached) ? I915_CACHE_LLC : 123 I915_CACHE_NONE; 124 } 125 126 static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); 127 128 static enum ttm_caching 129 i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) 130 { 131 /* 132 * Objects only allowed in system get cached cpu-mappings. 133 * Other objects get WC mapping for now. Even if in system. 134 */ 135 if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && 136 obj->mm.n_placements <= 1) 137 return ttm_cached; 138 139 return ttm_write_combined; 140 } 141 142 static void 143 i915_ttm_place_from_region(const struct intel_memory_region *mr, 144 struct ttm_place *place, 145 unsigned int flags) 146 { 147 memset(place, 0, sizeof(*place)); 148 place->mem_type = intel_region_to_ttm_type(mr); 149 150 if (flags & I915_BO_ALLOC_CONTIGUOUS) 151 place->flags = TTM_PL_FLAG_CONTIGUOUS; 152 } 153 154 static void 155 i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, 156 struct ttm_place *requested, 157 struct ttm_place *busy, 158 struct ttm_placement *placement) 159 { 160 unsigned int num_allowed = obj->mm.n_placements; 161 unsigned int flags = obj->flags; 162 unsigned int i; 163 164 placement->num_placement = 1; 165 i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : 166 obj->mm.region, requested, flags); 167 168 /* Cache this on object? */ 169 placement->num_busy_placement = num_allowed; 170 for (i = 0; i < placement->num_busy_placement; ++i) 171 i915_ttm_place_from_region(obj->mm.placements[i], busy + i, flags); 172 173 if (num_allowed == 0) { 174 *busy = *requested; 175 placement->num_busy_placement = 1; 176 } 177 178 placement->placement = requested; 179 placement->busy_placement = busy; 180 } 181 182 static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, 183 uint32_t page_flags) 184 { 185 struct ttm_resource_manager *man = 186 ttm_manager_type(bo->bdev, bo->resource->mem_type); 187 struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); 188 struct i915_ttm_tt *i915_tt; 189 int ret; 190 191 i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL); 192 if (!i915_tt) 193 return NULL; 194 195 if (obj->flags & I915_BO_ALLOC_CPU_CLEAR && 196 man->use_tt) 197 page_flags |= TTM_TT_FLAG_ZERO_ALLOC; 198 199 ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, 200 i915_ttm_select_tt_caching(obj)); 201 if (ret) { 202 kfree(i915_tt); 203 return NULL; 204 } 205 206 i915_tt->dev = obj->base.dev->dev; 207 208 return &i915_tt->ttm; 209 } 210 211 static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) 212 { 213 struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); 214 215 if (i915_tt->cached_st) { 216 dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st, 217 DMA_BIDIRECTIONAL, 0); 218 sg_free_table(i915_tt->cached_st); 219 kfree(i915_tt->cached_st); 220 i915_tt->cached_st = NULL; 221 } 222 ttm_pool_free(&bdev->pool, ttm); 223 } 224 225 static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) 226 { 227 struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); 228 229 ttm_tt_fini(ttm); 230 kfree(i915_tt); 231 } 232 233 static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, 234 const struct ttm_place *place) 235 { 236 struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); 237 238 /* Will do for now. Our pinned objects are still on TTM's LRU lists */ 239 return i915_gem_object_evictable(obj); 240 } 241 242 static void i915_ttm_evict_flags(struct ttm_buffer_object *bo, 243 struct ttm_placement *placement) 244 { 245 *placement = i915_sys_placement; 246 } 247 248 static int i915_ttm_move_notify(struct ttm_buffer_object *bo) 249 { 250 struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); 251 int ret; 252 253 ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); 254 if (ret) 255 return ret; 256 257 ret = __i915_gem_object_put_pages(obj); 258 if (ret) 259 return ret; 260 261 return 0; 262 } 263 264 static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) 265 { 266 struct radix_tree_iter iter; 267 void __rcu **slot; 268 269 if (!obj->ttm.cached_io_st) 270 return; 271 272 rcu_read_lock(); 273 radix_tree_for_each_slot(slot, &obj->ttm.get_io_page.radix, &iter, 0) 274 radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index); 275 rcu_read_unlock(); 276 277 sg_free_table(obj->ttm.cached_io_st); 278 kfree(obj->ttm.cached_io_st); 279 obj->ttm.cached_io_st = NULL; 280 } 281 282 static void 283 i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) 284 { 285 struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); 286 287 if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { 288 obj->write_domain = I915_GEM_DOMAIN_WC; 289 obj->read_domains = I915_GEM_DOMAIN_WC; 290 } else { 291 obj->write_domain = I915_GEM_DOMAIN_CPU; 292 obj->read_domains = I915_GEM_DOMAIN_CPU; 293 } 294 } 295 296 static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) 297 { 298 struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); 299 unsigned int cache_level; 300 unsigned int i; 301 302 /* 303 * If object was moved to an allowable region, update the object 304 * region to consider it migrated. Note that if it's currently not 305 * in an allowable region, it's evicted and we don't update the 306 * object region. 307 */ 308 if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { 309 for (i = 0; i < obj->mm.n_placements; ++i) { 310 struct intel_memory_region *mr = obj->mm.placements[i]; 311 312 if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && 313 mr != obj->mm.region) { 314 i915_gem_object_release_memory_region(obj); 315 i915_gem_object_init_memory_region(obj, mr); 316 break; 317 } 318 } 319 } 320 321 obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); 322 323 obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : 324 I915_BO_FLAG_STRUCT_PAGE; 325 326 cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, 327 bo->ttm); 328 i915_gem_object_set_cache_coherency(obj, cache_level); 329 } 330 331 static void i915_ttm_purge(struct drm_i915_gem_object *obj) 332 { 333 struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); 334 struct ttm_operation_ctx ctx = { 335 .interruptible = true, 336 .no_wait_gpu = false, 337 }; 338 struct ttm_placement place = {}; 339 int ret; 340 341 if (obj->mm.madv == __I915_MADV_PURGED) 342 return; 343 344 /* TTM's purge interface. Note that we might be reentering. */ 345 ret = ttm_bo_validate(bo, &place, &ctx); 346 if (!ret) { 347 obj->write_domain = 0; 348 obj->read_domains = 0; 349 i915_ttm_adjust_gem_after_move(obj); 350 i915_ttm_free_cached_io_st(obj); 351 obj->mm.madv = __I915_MADV_PURGED; 352 } 353 } 354 355 static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) 356 { 357 struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); 358 int ret = i915_ttm_move_notify(bo); 359 360 GEM_WARN_ON(ret); 361 GEM_WARN_ON(obj->ttm.cached_io_st); 362 if (!ret && obj->mm.madv != I915_MADV_WILLNEED) 363 i915_ttm_purge(obj); 364 } 365 366 static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) 367 { 368 struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); 369 370 if (likely(obj)) { 371 __i915_gem_object_pages_fini(obj); 372 i915_ttm_free_cached_io_st(obj); 373 } 374 } 375 376 static struct intel_memory_region * 377 i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) 378 { 379 struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); 380 381 /* There's some room for optimization here... */ 382 GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && 383 ttm_mem_type < I915_PL_LMEM0); 384 if (ttm_mem_type == I915_PL_SYSTEM) 385 return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, 386 0); 387 388 return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, 389 ttm_mem_type - I915_PL_LMEM0); 390 } 391 392 static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm) 393 { 394 struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); 395 struct sg_table *st; 396 int ret; 397 398 if (i915_tt->cached_st) 399 return i915_tt->cached_st; 400 401 st = kzalloc(sizeof(*st), GFP_KERNEL); 402 if (!st) 403 return ERR_PTR(-ENOMEM); 404 405 ret = sg_alloc_table_from_pages_segment(st, 406 ttm->pages, ttm->num_pages, 407 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, 408 i915_sg_segment_size(), GFP_KERNEL); 409 if (ret) { 410 kfree(st); 411 return ERR_PTR(ret); 412 } 413 414 ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0); 415 if (ret) { 416 sg_free_table(st); 417 kfree(st); 418 return ERR_PTR(ret); 419 } 420 421 i915_tt->cached_st = st; 422 return st; 423 } 424 425 static struct sg_table * 426 i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, 427 struct ttm_resource *res) 428 { 429 struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); 430 431 if (!gpu_binds_iomem(res)) 432 return i915_ttm_tt_get_st(bo->ttm); 433 434 /* 435 * If CPU mapping differs, we need to add the ttm_tt pages to 436 * the resulting st. Might make sense for GGTT. 437 */ 438 GEM_WARN_ON(!cpu_maps_iomem(res)); 439 return intel_region_ttm_resource_to_st(obj->mm.region, res); 440 } 441 442 static int i915_ttm_accel_move(struct ttm_buffer_object *bo, 443 bool clear, 444 struct ttm_resource *dst_mem, 445 struct ttm_tt *dst_ttm, 446 struct sg_table *dst_st) 447 { 448 struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), 449 bdev); 450 struct ttm_resource_manager *src_man = 451 ttm_manager_type(bo->bdev, bo->resource->mem_type); 452 struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); 453 struct sg_table *src_st; 454 struct i915_request *rq; 455 struct ttm_tt *src_ttm = bo->ttm; 456 enum i915_cache_level src_level, dst_level; 457 int ret; 458 459 if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt)) 460 return -EINVAL; 461 462 dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm); 463 if (clear) { 464 if (bo->type == ttm_bo_type_kernel) 465 return -EINVAL; 466 467 intel_engine_pm_get(i915->gt.migrate.context->engine); 468 ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, 469 dst_st->sgl, dst_level, 470 gpu_binds_iomem(dst_mem), 471 0, &rq); 472 473 if (!ret && rq) { 474 i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 475 i915_request_put(rq); 476 } 477 intel_engine_pm_put(i915->gt.migrate.context->engine); 478 } else { 479 src_st = src_man->use_tt ? i915_ttm_tt_get_st(src_ttm) : 480 obj->ttm.cached_io_st; 481 482 src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm); 483 intel_engine_pm_get(i915->gt.migrate.context->engine); 484 ret = intel_context_migrate_copy(i915->gt.migrate.context, 485 NULL, src_st->sgl, src_level, 486 gpu_binds_iomem(bo->resource), 487 dst_st->sgl, dst_level, 488 gpu_binds_iomem(dst_mem), 489 &rq); 490 if (!ret && rq) { 491 i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 492 i915_request_put(rq); 493 } 494 intel_engine_pm_put(i915->gt.migrate.context->engine); 495 } 496 497 return ret; 498 } 499 500 static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, 501 struct ttm_resource *dst_mem, 502 struct ttm_tt *dst_ttm, 503 struct sg_table *dst_st, 504 bool allow_accel) 505 { 506 int ret = -EINVAL; 507 508 if (allow_accel) 509 ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, dst_st); 510 if (ret) { 511 struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); 512 struct intel_memory_region *dst_reg, *src_reg; 513 union { 514 struct ttm_kmap_iter_tt tt; 515 struct ttm_kmap_iter_iomap io; 516 } _dst_iter, _src_iter; 517 struct ttm_kmap_iter *dst_iter, *src_iter; 518 519 dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); 520 src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); 521 GEM_BUG_ON(!dst_reg || !src_reg); 522 523 dst_iter = !cpu_maps_iomem(dst_mem) ? 524 ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) : 525 ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, 526 dst_st, dst_reg->region.start); 527 528 src_iter = !cpu_maps_iomem(bo->resource) ? 529 ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : 530 ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, 531 obj->ttm.cached_io_st, 532 src_reg->region.start); 533 534 ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); 535 } 536 } 537 538 static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, 539 struct ttm_operation_ctx *ctx, 540 struct ttm_resource *dst_mem, 541 struct ttm_place *hop) 542 { 543 struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); 544 struct ttm_resource_manager *dst_man = 545 ttm_manager_type(bo->bdev, dst_mem->mem_type); 546 struct ttm_tt *ttm = bo->ttm; 547 struct sg_table *dst_st; 548 bool clear; 549 int ret; 550 551 /* Sync for now. We could do the actual copy async. */ 552 ret = ttm_bo_wait_ctx(bo, ctx); 553 if (ret) 554 return ret; 555 556 ret = i915_ttm_move_notify(bo); 557 if (ret) 558 return ret; 559 560 if (obj->mm.madv != I915_MADV_WILLNEED) { 561 i915_ttm_purge(obj); 562 ttm_resource_free(bo, &dst_mem); 563 return 0; 564 } 565 566 /* Populate ttm with pages if needed. Typically system memory. */ 567 if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) { 568 ret = ttm_tt_populate(bo->bdev, ttm, ctx); 569 if (ret) 570 return ret; 571 } 572 573 dst_st = i915_ttm_resource_get_st(obj, dst_mem); 574 if (IS_ERR(dst_st)) 575 return PTR_ERR(dst_st); 576 577 clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); 578 if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) 579 __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_st, true); 580 581 ttm_bo_move_sync_cleanup(bo, dst_mem); 582 i915_ttm_adjust_domains_after_move(obj); 583 i915_ttm_free_cached_io_st(obj); 584 585 if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) { 586 obj->ttm.cached_io_st = dst_st; 587 obj->ttm.get_io_page.sg_pos = dst_st->sgl; 588 obj->ttm.get_io_page.sg_idx = 0; 589 } 590 591 i915_ttm_adjust_gem_after_move(obj); 592 return 0; 593 } 594 595 static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) 596 { 597 if (!cpu_maps_iomem(mem)) 598 return 0; 599 600 mem->bus.caching = ttm_write_combined; 601 mem->bus.is_iomem = true; 602 603 return 0; 604 } 605 606 static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, 607 unsigned long page_offset) 608 { 609 struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); 610 unsigned long base = obj->mm.region->iomap.base - obj->mm.region->region.start; 611 struct scatterlist *sg; 612 unsigned int ofs; 613 614 GEM_WARN_ON(bo->ttm); 615 616 sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true); 617 618 return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; 619 } 620 621 static struct ttm_device_funcs i915_ttm_bo_driver = { 622 .ttm_tt_create = i915_ttm_tt_create, 623 .ttm_tt_unpopulate = i915_ttm_tt_unpopulate, 624 .ttm_tt_destroy = i915_ttm_tt_destroy, 625 .eviction_valuable = i915_ttm_eviction_valuable, 626 .evict_flags = i915_ttm_evict_flags, 627 .move = i915_ttm_move, 628 .swap_notify = i915_ttm_swap_notify, 629 .delete_mem_notify = i915_ttm_delete_mem_notify, 630 .io_mem_reserve = i915_ttm_io_mem_reserve, 631 .io_mem_pfn = i915_ttm_io_mem_pfn, 632 }; 633 634 /** 635 * i915_ttm_driver - Return a pointer to the TTM device funcs 636 * 637 * Return: Pointer to statically allocated TTM device funcs. 638 */ 639 struct ttm_device_funcs *i915_ttm_driver(void) 640 { 641 return &i915_ttm_bo_driver; 642 } 643 644 static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, 645 struct ttm_placement *placement) 646 { 647 struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); 648 struct ttm_operation_ctx ctx = { 649 .interruptible = true, 650 .no_wait_gpu = false, 651 }; 652 struct sg_table *st; 653 int real_num_busy; 654 int ret; 655 656 /* First try only the requested placement. No eviction. */ 657 real_num_busy = fetch_and_zero(&placement->num_busy_placement); 658 ret = ttm_bo_validate(bo, placement, &ctx); 659 if (ret) { 660 ret = i915_ttm_err_to_gem(ret); 661 /* 662 * Anything that wants to restart the operation gets to 663 * do that. 664 */ 665 if (ret == -EDEADLK || ret == -EINTR || ret == -ERESTARTSYS || 666 ret == -EAGAIN) 667 return ret; 668 669 /* 670 * If the initial attempt fails, allow all accepted placements, 671 * evicting if necessary. 672 */ 673 placement->num_busy_placement = real_num_busy; 674 ret = ttm_bo_validate(bo, placement, &ctx); 675 if (ret) 676 return i915_ttm_err_to_gem(ret); 677 } 678 679 i915_ttm_adjust_lru(obj); 680 if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { 681 ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); 682 if (ret) 683 return ret; 684 685 i915_ttm_adjust_domains_after_move(obj); 686 i915_ttm_adjust_gem_after_move(obj); 687 } 688 689 if (!i915_gem_object_has_pages(obj)) { 690 /* Object either has a page vector or is an iomem object */ 691 st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; 692 if (IS_ERR(st)) 693 return PTR_ERR(st); 694 695 __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); 696 } 697 698 return ret; 699 } 700 701 static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) 702 { 703 struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; 704 struct ttm_placement placement; 705 706 GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); 707 708 /* Move to the requested placement. */ 709 i915_ttm_placement_from_obj(obj, &requested, busy, &placement); 710 711 return __i915_ttm_get_pages(obj, &placement); 712 } 713 714 /** 715 * DOC: Migration vs eviction 716 * 717 * GEM migration may not be the same as TTM migration / eviction. If 718 * the TTM core decides to evict an object it may be evicted to a 719 * TTM memory type that is not in the object's allowable GEM regions, or 720 * in fact theoretically to a TTM memory type that doesn't correspond to 721 * a GEM memory region. In that case the object's GEM region is not 722 * updated, and the data is migrated back to the GEM region at 723 * get_pages time. TTM may however set up CPU ptes to the object even 724 * when it is evicted. 725 * Gem forced migration using the i915_ttm_migrate() op, is allowed even 726 * to regions that are not in the object's list of allowable placements. 727 */ 728 static int i915_ttm_migrate(struct drm_i915_gem_object *obj, 729 struct intel_memory_region *mr) 730 { 731 struct ttm_place requested; 732 struct ttm_placement placement; 733 int ret; 734 735 i915_ttm_place_from_region(mr, &requested, obj->flags); 736 placement.num_placement = 1; 737 placement.num_busy_placement = 1; 738 placement.placement = &requested; 739 placement.busy_placement = &requested; 740 741 ret = __i915_ttm_get_pages(obj, &placement); 742 if (ret) 743 return ret; 744 745 /* 746 * Reinitialize the region bindings. This is primarily 747 * required for objects where the new region is not in 748 * its allowable placements. 749 */ 750 if (obj->mm.region != mr) { 751 i915_gem_object_release_memory_region(obj); 752 i915_gem_object_init_memory_region(obj, mr); 753 } 754 755 return 0; 756 } 757 758 static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, 759 struct sg_table *st) 760 { 761 /* 762 * We're currently not called from a shrinker, so put_pages() 763 * typically means the object is about to destroyed, or called 764 * from move_notify(). So just avoid doing much for now. 765 * If the object is not destroyed next, The TTM eviction logic 766 * and shrinkers will move it out if needed. 767 */ 768 769 i915_ttm_adjust_lru(obj); 770 } 771 772 static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) 773 { 774 struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); 775 776 /* 777 * Don't manipulate the TTM LRUs while in TTM bo destruction. 778 * We're called through i915_ttm_delete_mem_notify(). 779 */ 780 if (!kref_read(&bo->kref)) 781 return; 782 783 /* 784 * Put on the correct LRU list depending on the MADV status 785 */ 786 spin_lock(&bo->bdev->lru_lock); 787 if (obj->mm.madv != I915_MADV_WILLNEED) { 788 bo->priority = I915_TTM_PRIO_PURGE; 789 } else if (!i915_gem_object_has_pages(obj)) { 790 if (bo->priority < I915_TTM_PRIO_HAS_PAGES) 791 bo->priority = I915_TTM_PRIO_HAS_PAGES; 792 } else { 793 if (bo->priority > I915_TTM_PRIO_NO_PAGES) 794 bo->priority = I915_TTM_PRIO_NO_PAGES; 795 } 796 797 ttm_bo_move_to_lru_tail(bo, bo->resource, NULL); 798 spin_unlock(&bo->bdev->lru_lock); 799 } 800 801 /* 802 * TTM-backed gem object destruction requires some clarification. 803 * Basically we have two possibilities here. We can either rely on the 804 * i915 delayed destruction and put the TTM object when the object 805 * is idle. This would be detected by TTM which would bypass the 806 * TTM delayed destroy handling. The other approach is to put the TTM 807 * object early and rely on the TTM destroyed handling, and then free 808 * the leftover parts of the GEM object once TTM's destroyed list handling is 809 * complete. For now, we rely on the latter for two reasons: 810 * a) TTM can evict an object even when it's on the delayed destroy list, 811 * which in theory allows for complete eviction. 812 * b) There is work going on in TTM to allow freeing an object even when 813 * it's not idle, and using the TTM destroyed list handling could help us 814 * benefit from that. 815 */ 816 static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) 817 { 818 GEM_BUG_ON(!obj->ttm.created); 819 820 ttm_bo_put(i915_gem_to_ttm(obj)); 821 } 822 823 static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) 824 { 825 struct vm_area_struct *area = vmf->vma; 826 struct drm_i915_gem_object *obj = 827 i915_ttm_to_gem(area->vm_private_data); 828 829 /* Sanity check that we allow writing into this object */ 830 if (unlikely(i915_gem_object_is_readonly(obj) && 831 area->vm_flags & VM_WRITE)) 832 return VM_FAULT_SIGBUS; 833 834 return ttm_bo_vm_fault(vmf); 835 } 836 837 static int 838 vm_access_ttm(struct vm_area_struct *area, unsigned long addr, 839 void *buf, int len, int write) 840 { 841 struct drm_i915_gem_object *obj = 842 i915_ttm_to_gem(area->vm_private_data); 843 844 if (i915_gem_object_is_readonly(obj) && write) 845 return -EACCES; 846 847 return ttm_bo_vm_access(area, addr, buf, len, write); 848 } 849 850 static void ttm_vm_open(struct vm_area_struct *vma) 851 { 852 struct drm_i915_gem_object *obj = 853 i915_ttm_to_gem(vma->vm_private_data); 854 855 GEM_BUG_ON(!obj); 856 i915_gem_object_get(obj); 857 } 858 859 static void ttm_vm_close(struct vm_area_struct *vma) 860 { 861 struct drm_i915_gem_object *obj = 862 i915_ttm_to_gem(vma->vm_private_data); 863 864 GEM_BUG_ON(!obj); 865 i915_gem_object_put(obj); 866 } 867 868 static const struct vm_operations_struct vm_ops_ttm = { 869 .fault = vm_fault_ttm, 870 .access = vm_access_ttm, 871 .open = ttm_vm_open, 872 .close = ttm_vm_close, 873 }; 874 875 static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) 876 { 877 /* The ttm_bo must be allocated with I915_BO_ALLOC_USER */ 878 GEM_BUG_ON(!drm_mm_node_allocated(&obj->base.vma_node.vm_node)); 879 880 return drm_vma_node_offset_addr(&obj->base.vma_node); 881 } 882 883 static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { 884 .name = "i915_gem_object_ttm", 885 886 .get_pages = i915_ttm_get_pages, 887 .put_pages = i915_ttm_put_pages, 888 .truncate = i915_ttm_purge, 889 .adjust_lru = i915_ttm_adjust_lru, 890 .delayed_free = i915_ttm_delayed_free, 891 .migrate = i915_ttm_migrate, 892 .mmap_offset = i915_ttm_mmap_offset, 893 .mmap_ops = &vm_ops_ttm, 894 }; 895 896 void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) 897 { 898 struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); 899 900 i915_gem_object_release_memory_region(obj); 901 mutex_destroy(&obj->ttm.get_io_page.lock); 902 903 if (obj->ttm.created) { 904 i915_ttm_backup_free(obj); 905 906 /* This releases all gem object bindings to the backend. */ 907 __i915_gem_free_object(obj); 908 909 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 910 } else { 911 __i915_gem_object_fini(obj); 912 } 913 } 914 915 /** 916 * __i915_gem_ttm_object_init - Initialize a ttm-backed i915 gem object 917 * @mem: The initial memory region for the object. 918 * @obj: The gem object. 919 * @size: Object size in bytes. 920 * @flags: gem object flags. 921 * 922 * Return: 0 on success, negative error code on failure. 923 */ 924 int __i915_gem_ttm_object_init(struct intel_memory_region *mem, 925 struct drm_i915_gem_object *obj, 926 resource_size_t size, 927 resource_size_t page_size, 928 unsigned int flags) 929 { 930 static struct lock_class_key lock_class; 931 struct drm_i915_private *i915 = mem->i915; 932 struct ttm_operation_ctx ctx = { 933 .interruptible = true, 934 .no_wait_gpu = false, 935 }; 936 enum ttm_bo_type bo_type; 937 int ret; 938 939 drm_gem_private_object_init(&i915->drm, &obj->base, size); 940 i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); 941 942 /* Don't put on a region list until we're either locked or fully initialized. */ 943 obj->mm.region = intel_memory_region_get(mem); 944 INIT_LIST_HEAD(&obj->mm.region_link); 945 946 i915_gem_object_make_unshrinkable(obj); 947 INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); 948 mutex_init(&obj->ttm.get_io_page.lock); 949 bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : 950 ttm_bo_type_kernel; 951 952 obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); 953 954 /* Forcing the page size is kernel internal only */ 955 GEM_BUG_ON(page_size && obj->mm.n_placements); 956 957 /* 958 * If this function fails, it will call the destructor, but 959 * our caller still owns the object. So no freeing in the 960 * destructor until obj->ttm.created is true. 961 * Similarly, in delayed_destroy, we can't call ttm_bo_put() 962 * until successful initialization. 963 */ 964 ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, 965 bo_type, &i915_sys_placement, 966 page_size >> PAGE_SHIFT, 967 &ctx, NULL, NULL, i915_ttm_bo_destroy); 968 if (ret) 969 return i915_ttm_err_to_gem(ret); 970 971 obj->ttm.created = true; 972 i915_gem_object_release_memory_region(obj); 973 i915_gem_object_init_memory_region(obj, mem); 974 i915_ttm_adjust_domains_after_move(obj); 975 i915_ttm_adjust_gem_after_move(obj); 976 i915_gem_object_unlock(obj); 977 978 return 0; 979 } 980 981 static const struct intel_memory_region_ops ttm_system_region_ops = { 982 .init_object = __i915_gem_ttm_object_init, 983 }; 984 985 struct intel_memory_region * 986 i915_gem_ttm_system_setup(struct drm_i915_private *i915, 987 u16 type, u16 instance) 988 { 989 struct intel_memory_region *mr; 990 991 mr = intel_memory_region_create(i915, 0, 992 totalram_pages() << PAGE_SHIFT, 993 PAGE_SIZE, 0, 994 type, instance, 995 &ttm_system_region_ops); 996 if (IS_ERR(mr)) 997 return mr; 998 999 intel_memory_region_set_name(mr, "system-ttm"); 1000 return mr; 1001 } 1002 1003 /** 1004 * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to 1005 * another 1006 * @dst: The destination object 1007 * @src: The source object 1008 * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used. 1009 * @intr: Whether to perform waits interruptible: 1010 * 1011 * Note: The caller is responsible for assuring that the underlying 1012 * TTM objects are populated if needed and locked. 1013 * 1014 * Return: Zero on success. Negative error code on error. If @intr == true, 1015 * then it may return -ERESTARTSYS or -EINTR. 1016 */ 1017 int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, 1018 struct drm_i915_gem_object *src, 1019 bool allow_accel, bool intr) 1020 { 1021 struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst); 1022 struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src); 1023 struct ttm_operation_ctx ctx = { 1024 .interruptible = intr, 1025 }; 1026 struct sg_table *dst_st; 1027 int ret; 1028 1029 assert_object_held(dst); 1030 assert_object_held(src); 1031 1032 /* 1033 * Sync for now. This will change with async moves. 1034 */ 1035 ret = ttm_bo_wait_ctx(dst_bo, &ctx); 1036 if (!ret) 1037 ret = ttm_bo_wait_ctx(src_bo, &ctx); 1038 if (ret) 1039 return ret; 1040 1041 dst_st = gpu_binds_iomem(dst_bo->resource) ? 1042 dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo->ttm); 1043 1044 __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm, 1045 dst_st, allow_accel); 1046 1047 return 0; 1048 } 1049