Lines Matching +full:iommu +full:- +full:ctx

15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
29 * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
33 #include <linux/dma-mapping.h>
34 #include <linux/iommu.h>
41 #include <linux/dma-buf.h>
78 return ttm_range_man_init(&adev->mman.bdev, type,
83 * amdgpu_evict_flags - Compute placement flags
93 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
103 if (bo->type == ttm_bo_type_sg) {
104 placement->num_placement = 0;
110 placement->placement = &placements;
111 placement->num_placement = 1;
116 if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
117 placement->num_placement = 0;
121 switch (bo->resource->mem_type) {
126 placement->num_placement = 0;
130 if (!adev->mman.buffer_funcs_enabled) {
134 } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
135 !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
136 amdgpu_res_cpu_visible(adev, bo->resource)) {
146 abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
147 abo->placements[0].lpfn = 0;
148 abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
161 *placement = abo->placement;
165 * amdgpu_ttm_map_buffer - Map memory into the GART windows
184 struct amdgpu_device *adev = ring->adev;
193 BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
196 if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
197 return -EINVAL;
200 if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
201 *addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
202 mm_cur->start;
211 offset = mm_cur->start & ~PAGE_MASK;
216 *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
218 *addr = adev->gmc.gart_start;
223 num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
226 r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
234 src_addr += job->ibs[0].gpu_addr;
236 dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
238 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
241 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
242 WARN_ON(job->ibs[0].length_dw > num_dw);
244 flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
248 cpu_addr = &job->ibs[0].ptr[num_dw];
250 if (mem->mem_type == TTM_PL_TT) {
253 dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
258 dma_address = mm_cur->start;
259 dma_address += adev->vm_manager.vram_base_offset;
273 * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
282 * The function copies @size bytes from {src->mem + src->offset} to
283 * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
294 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
301 if (!adev->mman.buffer_funcs_enabled) {
302 dev_err(adev->dev,
304 return -EINVAL;
307 amdgpu_res_first(src->mem, src->offset, size, &src_mm);
308 amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
310 mutex_lock(&adev->mman.gtt_window_lock);
320 r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
325 r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
330 abo_src = ttm_to_amdgpu_bo(src->bo);
331 abo_dst = ttm_to_amdgpu_bo(dst->bo);
334 if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
335 (abo_src->tbo.resource->mem_type == TTM_PL_VRAM))
337 if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
338 (dst->mem->mem_type == TTM_PL_VRAM)) {
365 mutex_unlock(&adev->mman.gtt_window_lock);
373 * amdgpu_move_blit - Copy an entire buffer to another buffer
383 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
397 new_mem->size,
399 bo->base.resv, &fence);
404 if (old_mem->mem_type == TTM_PL_VRAM &&
405 (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
413 amdgpu_vram_mgr_set_cleared(bo->resource);
420 if (bo->type == ttm_bo_type_kernel)
435 * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
449 if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
450 res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL)
453 if (res->mem_type != TTM_PL_VRAM)
456 amdgpu_res_first(res, 0, res->size, &cursor);
458 if ((cursor.start + cursor.size) > adev->gmc.visible_vram_size)
467 * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
478 if (mem->mem_type == TTM_PL_VRAM &&
479 !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
486 * amdgpu_bo_move - Move a buffer object to a new memory location
491 struct ttm_operation_ctx *ctx,
497 struct ttm_resource *old_mem = bo->resource;
500 if (new_mem->mem_type == TTM_PL_TT ||
501 new_mem->mem_type == AMDGPU_PL_PREEMPT) {
502 r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
508 adev = amdgpu_ttm_adev(bo->bdev);
510 if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
511 bo->ttm == NULL)) {
516 if (old_mem->mem_type == TTM_PL_SYSTEM &&
517 (new_mem->mem_type == TTM_PL_TT ||
518 new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
523 if ((old_mem->mem_type == TTM_PL_TT ||
524 old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
525 new_mem->mem_type == TTM_PL_SYSTEM) {
526 r = ttm_bo_wait_ctx(bo, ctx);
530 amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
532 ttm_resource_free(bo, &bo->resource);
537 if (old_mem->mem_type == AMDGPU_PL_GDS ||
538 old_mem->mem_type == AMDGPU_PL_GWS ||
539 old_mem->mem_type == AMDGPU_PL_OA ||
540 old_mem->mem_type == AMDGPU_PL_DOORBELL ||
541 new_mem->mem_type == AMDGPU_PL_GDS ||
542 new_mem->mem_type == AMDGPU_PL_GWS ||
543 new_mem->mem_type == AMDGPU_PL_OA ||
544 new_mem->mem_type == AMDGPU_PL_DOORBELL) {
551 if (bo->type == ttm_bo_type_device &&
552 new_mem->mem_type == TTM_PL_VRAM &&
553 old_mem->mem_type != TTM_PL_VRAM) {
554 /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
557 abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
560 if (adev->mman.buffer_funcs_enabled &&
561 ((old_mem->mem_type == TTM_PL_SYSTEM &&
562 new_mem->mem_type == TTM_PL_VRAM) ||
563 (old_mem->mem_type == TTM_PL_VRAM &&
564 new_mem->mem_type == TTM_PL_SYSTEM))) {
565 hop->fpfn = 0;
566 hop->lpfn = 0;
567 hop->mem_type = TTM_PL_TT;
568 hop->flags = TTM_PL_FLAG_TEMPORARY;
569 return -EMULTIHOP;
573 if (adev->mman.buffer_funcs_enabled)
576 r = -ENODEV;
586 r = ttm_bo_move_memcpy(bo, ctx, new_mem);
593 atomic64_inc(&adev->num_evictions);
594 atomic64_add(bo->base.size, &adev->num_bytes_moved);
599 * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
608 switch (mem->mem_type) {
616 mem->bus.offset = mem->start << PAGE_SHIFT;
618 if (adev->mman.aper_base_kaddr &&
619 mem->placement & TTM_PL_FLAG_CONTIGUOUS)
620 mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
621 mem->bus.offset;
623 mem->bus.offset += adev->gmc.aper_base;
624 mem->bus.is_iomem = true;
627 mem->bus.offset = mem->start << PAGE_SHIFT;
628 mem->bus.offset += adev->doorbell.base;
629 mem->bus.is_iomem = true;
630 mem->bus.caching = ttm_uncached;
633 return -EINVAL;
641 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
644 amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
647 if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
648 return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
650 return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
654 * amdgpu_ttm_domain_start - Returns GPU start address
666 return adev->gmc.gart_start;
668 return adev->gmc.vram_start;
692 * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
701 struct ttm_tt *ttm = bo->tbo.ttm;
703 unsigned long start = gtt->userptr;
712 mm = bo->notifier.mm;
715 return -EFAULT;
719 return -ESRCH;
724 r = -EFAULT;
727 if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
728 vma->vm_file)) {
729 r = -EPERM;
734 r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
746 /* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations
753 if (gtt && gtt->userptr && range)
758 * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change
768 if (!gtt || !gtt->userptr || !range)
772 gtt->userptr, ttm->num_pages);
774 WARN_ONCE(!range->hmm_pfns, "No user pages to check\n");
781 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
791 for (i = 0; i < ttm->num_pages; ++i)
792 ttm->pages[i] = pages ? pages[i] : NULL;
796 * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages
805 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
811 r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
812 (u64)ttm->num_pages << PAGE_SHIFT,
818 r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
823 drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
824 ttm->num_pages);
829 sg_free_table(ttm->sg);
831 kfree(ttm->sg);
832 ttm->sg = NULL;
837 * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
844 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
849 if (!ttm->sg || !ttm->sg->sgl)
853 dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
854 sg_free_table(ttm->sg);
868 uint64_t total_pages = ttm->num_pages;
869 int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
880 gtt->offset + (page_idx << PAGE_SHIFT),
881 1, &gtt->ttm.dma_address[page_idx], flags);
883 * Ctrl pages - modify the memory type to NC (ctrl_flags) from
887 gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
888 pages_per_xcc - 1,
889 &gtt->ttm.dma_address[page_idx + 1],
899 struct ttm_tt *ttm = tbo->ttm;
905 if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
908 amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
909 gtt->ttm.dma_address, flags);
911 gtt->bound = true;
915 * amdgpu_ttm_backend_bind - Bind GTT memory
930 return -EINVAL;
932 if (gtt->bound)
935 if (gtt->userptr) {
938 dev_err(adev->dev, "failed to pin userptr\n");
941 } else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) {
942 if (!ttm->sg) {
946 attach = gtt->gobj->import_attach;
951 ttm->sg = sgt;
954 drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
955 ttm->num_pages);
958 if (!ttm->num_pages) {
960 ttm->num_pages, bo_mem, ttm);
963 if (bo_mem->mem_type != TTM_PL_TT ||
965 gtt->offset = AMDGPU_BO_INVALID_OFFSET;
973 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
974 amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
975 gtt->ttm.dma_address, flags);
976 gtt->bound = true;
981 * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
990 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
991 struct ttm_operation_ctx ctx = { false, false };
992 struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
999 if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET)
1010 placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
1012 placements.flags = bo->resource->placement;
1014 r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
1019 flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, tmp);
1022 gtt->offset = (u64)tmp->start << PAGE_SHIFT;
1025 ttm_resource_free(bo, &bo->resource);
1032 * amdgpu_ttm_recover_gart - Rebind GTT pages
1039 struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
1042 if (!tbo->ttm)
1045 flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource);
1050 * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
1062 if (gtt->userptr) {
1064 } else if (ttm->sg && drm_gem_is_imported(gtt->gobj)) {
1067 attach = gtt->gobj->import_attach;
1068 dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
1069 ttm->sg = NULL;
1072 if (!gtt->bound)
1075 if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
1079 amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
1080 gtt->bound = false;
1088 if (gtt->usertask)
1089 put_task_struct(gtt->usertask);
1091 ttm_tt_fini(&gtt->ttm);
1096 * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
1106 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
1115 gtt->gobj = &bo->base;
1116 if (adev->gmc.mem_partitions && abo->xcp_id >= 0)
1117 gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
1119 gtt->pool_id = abo->xcp_id;
1121 if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
1127 if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
1131 return &gtt->ttm;
1135 * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
1142 struct ttm_operation_ctx *ctx)
1151 if (gtt->userptr) {
1152 ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1153 if (!ttm->sg)
1154 return -ENOMEM;
1158 if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
1161 if (adev->mman.ttm_pools && gtt->pool_id >= 0)
1162 pool = &adev->mman.ttm_pools[gtt->pool_id];
1164 pool = &adev->mman.bdev.pool;
1165 ret = ttm_pool_alloc(pool, ttm, ctx);
1169 for (i = 0; i < ttm->num_pages; ++i)
1170 ttm->pages[i]->mapping = bdev->dev_mapping;
1176 * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
1191 if (gtt->userptr) {
1193 kfree(ttm->sg);
1194 ttm->sg = NULL;
1198 if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
1201 for (i = 0; i < ttm->num_pages; ++i)
1202 ttm->pages[i]->mapping = NULL;
1206 if (adev->mman.ttm_pools && gtt->pool_id >= 0)
1207 pool = &adev->mman.ttm_pools[gtt->pool_id];
1209 pool = &adev->mman.bdev.pool;
1215 * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current
1226 if (!tbo->ttm)
1227 return -EINVAL;
1229 gtt = (void *)tbo->ttm;
1230 *user_addr = gtt->userptr;
1235 * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
1251 if (!bo->ttm) {
1253 bo->ttm = amdgpu_ttm_tt_create(bo, 0);
1254 if (bo->ttm == NULL)
1255 return -ENOMEM;
1259 bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
1261 gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
1262 gtt->userptr = addr;
1263 gtt->userflags = flags;
1265 if (gtt->usertask)
1266 put_task_struct(gtt->usertask);
1267 gtt->usertask = current->group_leader;
1268 get_task_struct(gtt->usertask);
1274 * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
1283 if (gtt->usertask == NULL)
1286 return gtt->usertask->mm;
1290 * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
1300 if (gtt == NULL || !gtt->userptr)
1306 size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE;
1307 if (gtt->userptr > end || gtt->userptr + size <= start)
1311 *userptr = gtt->userptr;
1316 * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
1322 if (gtt == NULL || !gtt->userptr)
1329 * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
1338 return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1342 * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
1353 if (mem && mem->mem_type != TTM_PL_SYSTEM)
1356 if (mem && (mem->mem_type == TTM_PL_TT ||
1357 mem->mem_type == AMDGPU_PL_DOORBELL ||
1358 mem->mem_type == AMDGPU_PL_PREEMPT)) {
1361 if (ttm->caching == ttm_cached)
1365 if (mem && mem->mem_type == TTM_PL_VRAM &&
1366 mem->bus.caching == ttm_cached)
1373 * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
1386 flags |= adev->gart.gart_pte_flags;
1396 * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
1414 if (bo->resource->mem_type == TTM_PL_SYSTEM)
1417 if (bo->type == ttm_bo_type_kernel &&
1425 dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
1427 if (amdkfd_fence_check_mm(f, current->mm) &&
1428 !(place->flags & TTM_PL_FLAG_CONTIGUOUS))
1439 if (bo->resource->mem_type == AMDGPU_PL_PREEMPT)
1442 if (bo->resource->mem_type == TTM_PL_TT &&
1454 uint64_t bytes = 4 - (pos & 0x3);
1460 mask &= 0xffffffff >> (bytes - size) * 8;
1480 size -= bytes;
1489 struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1498 return -EINVAL;
1500 if (!adev->mman.sdma_access_ptr)
1501 return -EACCES;
1504 return -ENODEV;
1507 memcpy(adev->mman.sdma_access_ptr, buf, len);
1509 num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
1510 r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
1517 amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
1518 src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
1520 dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
1524 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
1527 amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
1528 WARN_ON(job->ibs[0].length_dw > num_dw);
1532 if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
1533 r = -ETIMEDOUT;
1537 memcpy(buf, adev->mman.sdma_access_ptr, len);
1544 * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
1560 struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1564 if (bo->resource->mem_type != TTM_PL_VRAM)
1565 return -EIO;
1571 amdgpu_res_first(bo->resource, offset, len, &cursor);
1577 size -= count;
1579 /* using MM to access rest vram and handle un-aligned address */
1618 * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
1626 amdgpu_bo_free_kernel(&adev->mman.fw_vram_usage_reserved_bo,
1627 NULL, &adev->mman.fw_vram_usage_va);
1634 * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram
1642 amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo,
1644 &adev->mman.drv_vram_usage_va);
1648 * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
1656 uint64_t vram_size = adev->gmc.visible_vram_size;
1658 adev->mman.fw_vram_usage_va = NULL;
1659 adev->mman.fw_vram_usage_reserved_bo = NULL;
1661 if (adev->mman.fw_vram_usage_size == 0 ||
1662 adev->mman.fw_vram_usage_size > vram_size)
1666 adev->mman.fw_vram_usage_start_offset,
1667 adev->mman.fw_vram_usage_size,
1668 &adev->mman.fw_vram_usage_reserved_bo,
1669 &adev->mman.fw_vram_usage_va);
1673 * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver
1681 u64 vram_size = adev->gmc.visible_vram_size;
1683 adev->mman.drv_vram_usage_va = NULL;
1684 adev->mman.drv_vram_usage_reserved_bo = NULL;
1686 if (adev->mman.drv_vram_usage_size == 0 ||
1687 adev->mman.drv_vram_usage_size > vram_size)
1691 adev->mman.drv_vram_usage_start_offset,
1692 adev->mman.drv_vram_usage_size,
1693 &adev->mman.drv_vram_usage_reserved_bo,
1694 &adev->mman.drv_vram_usage_va);
1702 * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
1710 struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1712 ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
1713 amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
1714 ctx->c2p_bo = NULL;
1722 struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1724 memset(ctx, 0, sizeof(*ctx));
1726 ctx->c2p_train_data_offset =
1727 ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
1728 ctx->p2c_train_data_offset =
1729 (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
1730 ctx->train_data_size =
1734 ctx->train_data_size,
1735 ctx->p2c_train_data_offset,
1736 ctx->c2p_train_data_offset);
1745 struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1750 if (adev->bios && !amdgpu_sriov_vf(adev)) {
1764 if (adev->bios)
1768 if (!adev->bios &&
1780 ctx->c2p_train_data_offset,
1781 ctx->train_data_size,
1782 &ctx->c2p_bo,
1785 dev_err(adev->dev, "alloc c2p_bo failed(%d)!\n", ret);
1789 ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
1792 if (!adev->gmc.is_app_apu) {
1794 adev, adev->gmc.real_vram_size - reserve_size,
1795 reserve_size, &adev->mman.fw_reserved_memory, NULL);
1797 dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret);
1798 amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
1813 if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
1816 adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions,
1817 sizeof(*adev->mman.ttm_pools),
1819 if (!adev->mman.ttm_pools)
1820 return -ENOMEM;
1822 for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
1823 ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
1824 adev->gmc.mem_partitions[i].numa.node,
1834 if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools)
1837 for (i = 0; i < adev->gmc.num_mem_partitions; i++)
1838 ttm_pool_fini(&adev->mman.ttm_pools[i]);
1840 kfree(adev->mman.ttm_pools);
1841 adev->mman.ttm_pools = NULL;
1845 * amdgpu_ttm_init - Init the memory management (ttm) as well as various
1850 * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
1858 mutex_init(&adev->mman.gtt_window_lock);
1860 dma_set_max_seg_size(adev->dev, UINT_MAX);
1862 r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
1863 adev_to_drm(adev)->anon_inode->i_mapping,
1864 adev_to_drm(adev)->vma_offset_manager,
1865 adev->need_swiotlb,
1866 dma_addressing_limited(adev->dev));
1868 dev_err(adev->dev,
1875 dev_err(adev->dev, "failed to init ttm pools(%d).\n", r);
1878 adev->mman.initialized = true;
1883 dev_err(adev->dev, "Failed initializing VRAM heap.\n");
1891 if (adev->gmc.xgmi.connected_to_cpu)
1892 adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
1893 adev->gmc.visible_vram_size);
1895 else if (adev->gmc.is_app_apu)
1900 adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
1901 adev->gmc.visible_vram_size);
1925 if (adev->mman.discovery_bin) {
1932 * This is used for VGA emulation and pre-OS scanout buffers to
1933 * avoid display artifacts while transitioning between pre-OS
1936 if (!adev->gmc.is_app_apu) {
1938 adev->mman.stolen_vga_size,
1939 &adev->mman.stolen_vga_memory,
1944 r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
1945 adev->mman.stolen_extended_size,
1946 &adev->mman.stolen_extended_memory,
1953 adev->mman.stolen_reserved_offset,
1954 adev->mman.stolen_reserved_size,
1955 &adev->mman.stolen_reserved_memory,
1963 dev_info(adev->dev, "amdgpu: %uM of VRAM memory ready\n",
1964 (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
1970 if (amdgpu_gtt_size != -1) {
1973 drm_warn(&adev->ddev,
1976 drm_warn(&adev->ddev,
1986 dev_err(adev->dev, "Failed initializing GTT heap.\n");
1989 dev_info(adev->dev, "amdgpu: %uM of GTT memory ready.\n",
1992 if (adev->flags & AMD_IS_APU) {
1993 if (adev->gmc.real_vram_size < gtt_size)
1994 adev->apu_prefer_gtt = true;
1998 r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
2000 dev_err(adev->dev, "Failed initializing doorbell heap.\n");
2007 dev_err(adev->dev, "Failed to initialize kernel doorbells.\n");
2014 dev_err(adev->dev, "Failed initializing PREEMPT heap.\n");
2018 /* Initialize various on-chip memory pools */
2019 r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
2021 dev_err(adev->dev, "Failed initializing GDS heap.\n");
2025 r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
2027 dev_err(adev->dev, "Failed initializing gws heap.\n");
2031 r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
2033 dev_err(adev->dev, "Failed initializing oa heap.\n");
2038 &adev->mman.sdma_access_bo, NULL,
2039 &adev->mman.sdma_access_ptr))
2046 * amdgpu_ttm_fini - De-initialize the TTM memory pools
2052 if (!adev->mman.initialized)
2059 if (!adev->gmc.is_app_apu) {
2060 amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
2061 amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
2063 amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
2065 amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL,
2067 if (adev->mman.stolen_reserved_size)
2068 amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
2071 amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
2072 &adev->mman.sdma_access_ptr);
2078 if (adev->mman.aper_base_kaddr)
2079 iounmap(adev->mman.aper_base_kaddr);
2080 adev->mman.aper_base_kaddr = NULL;
2090 ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
2091 ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
2092 ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
2093 ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
2094 ttm_device_fini(&adev->mman.bdev);
2095 adev->mman.initialized = false;
2096 dev_info(adev->dev, "amdgpu: ttm finalized\n");
2100 * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
2110 struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
2114 if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
2115 adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
2122 ring = adev->mman.buffer_funcs_ring;
2123 sched = &ring->sched;
2124 r = drm_sched_entity_init(&adev->mman.high_pr,
2128 dev_err(adev->dev,
2134 r = drm_sched_entity_init(&adev->mman.low_pr,
2138 dev_err(adev->dev,
2144 drm_sched_entity_destroy(&adev->mman.high_pr);
2145 drm_sched_entity_destroy(&adev->mman.low_pr);
2146 dma_fence_put(man->move);
2147 man->move = NULL;
2152 size = adev->gmc.real_vram_size;
2154 size = adev->gmc.visible_vram_size;
2155 man->size = size;
2156 adev->mman.buffer_funcs_enabled = enable;
2161 drm_sched_entity_destroy(&adev->mman.high_pr);
2176 struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
2177 &adev->mman.high_pr;
2185 (*job)->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
2186 adev->gmc.pdb0_bo :
2187 adev->gart.bo);
2188 (*job)->vm_needs_flush = true;
2193 return drm_sched_job_add_resv_dependencies(&(*job)->base, resv,
2203 struct amdgpu_device *adev = ring->adev;
2210 if (!direct_submit && !ring->sched.ready) {
2211 dev_err(adev->dev,
2213 return -EINVAL;
2216 max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
2218 num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
2227 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
2231 byte_count -= cur_size_in_bytes;
2234 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
2235 WARN_ON(job->ibs[0].length_dw > num_dw);
2247 dev_err(adev->dev, "Error scheduling IBs (%d)\n", r);
2257 struct amdgpu_device *adev = ring->adev;
2264 max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
2266 num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
2275 amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
2279 byte_count -= cur_size;
2282 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
2283 WARN_ON(job->ibs[0].length_dw > num_dw);
2289 * amdgpu_ttm_clear_buffer - clear memory buffers
2303 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
2304 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
2309 if (!adev->mman.buffer_funcs_enabled)
2310 return -EINVAL;
2313 return -EINVAL;
2317 amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
2319 mutex_lock(&adev->mman.gtt_window_lock);
2332 r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
2348 mutex_unlock(&adev->mman.gtt_window_lock);
2359 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
2360 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
2365 if (!adev->mman.buffer_funcs_enabled) {
2366 dev_err(adev->dev,
2368 return -EINVAL;
2371 amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst);
2373 mutex_lock(&adev->mman.gtt_window_lock);
2381 r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
2397 mutex_unlock(&adev->mman.gtt_window_lock);
2405 * amdgpu_ttm_evict_resources - evict memory buffers
2424 man = ttm_manager_type(&adev->mman.bdev, mem_type);
2427 dev_err(adev->dev, "Trying to evict invalid memory type\n");
2428 return -EINVAL;
2431 return ttm_resource_manager_evict_all(&adev->mman.bdev, man);
2438 struct amdgpu_device *adev = m->private;
2440 return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
2446 * amdgpu_ttm_vram_read - Linear read access to VRAM
2453 struct amdgpu_device *adev = file_inode(f)->i_private;
2457 return -EINVAL;
2459 if (*pos >= adev->gmc.mc_vram_size)
2460 return -ENXIO;
2462 size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
2469 return -EFAULT;
2474 size -= bytes;
2481 * amdgpu_ttm_vram_write - Linear write access to VRAM
2488 struct amdgpu_device *adev = file_inode(f)->i_private;
2493 return -EINVAL;
2495 if (*pos >= adev->gmc.mc_vram_size)
2496 return -ENXIO;
2501 if (*pos >= adev->gmc.mc_vram_size)
2513 size -= 4;
2527 * amdgpu_iomem_read - Virtual read access to GPU mapped memory
2536 struct amdgpu_device *adev = file_inode(f)->i_private;
2541 /* retrieve the IOMMU domain if any for this device */
2542 dom = iommu_get_domain_for_dev(adev->dev);
2547 size_t bytes = PAGE_SIZE - off;
2555 * the domain is NULL it means there is no IOMMU active
2562 return -EPERM;
2565 if (p->mapping != adev->mman.bdev.dev_mapping)
2566 return -EPERM;
2572 return -EFAULT;
2574 size -= bytes;
2583 * amdgpu_iomem_write - Virtual write access to GPU mapped memory
2592 struct amdgpu_device *adev = file_inode(f)->i_private;
2597 dom = iommu_get_domain_for_dev(adev->dev);
2602 size_t bytes = PAGE_SIZE - off;
2613 return -EPERM;
2616 if (p->mapping != adev->mman.bdev.dev_mapping)
2617 return -EPERM;
2623 return -EFAULT;
2625 size -= bytes;
2645 struct drm_minor *minor = adev_to_drm(adev)->primary;
2646 struct dentry *root = minor->debugfs_root;
2649 &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
2654 ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
2657 ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
2660 ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
2663 ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
2666 ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,