Lines Matching +full:resource +full:- +full:attachments

31 #include <linux/dma-buf.h>
50 struct amdgpu_fpriv *fpriv = filp->driver_priv; in amdgpu_cs_parser_init()
52 if (cs->in.num_chunks == 0) in amdgpu_cs_parser_init()
53 return -EINVAL; in amdgpu_cs_parser_init()
56 p->adev = adev; in amdgpu_cs_parser_init()
57 p->filp = filp; in amdgpu_cs_parser_init()
59 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); in amdgpu_cs_parser_init()
60 if (!p->ctx) in amdgpu_cs_parser_init()
61 return -EINVAL; in amdgpu_cs_parser_init()
63 if (atomic_read(&p->ctx->guilty)) { in amdgpu_cs_parser_init()
64 amdgpu_ctx_put(p->ctx); in amdgpu_cs_parser_init()
65 return -ECANCELED; in amdgpu_cs_parser_init()
68 amdgpu_sync_create(&p->sync); in amdgpu_cs_parser_init()
69 drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | in amdgpu_cs_parser_init()
81 r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type, in amdgpu_cs_job_idx()
82 chunk_ib->ip_instance, in amdgpu_cs_job_idx()
83 chunk_ib->ring, &entity); in amdgpu_cs_job_idx()
91 if (entity->rq == NULL) in amdgpu_cs_job_idx()
92 return -EINVAL; in amdgpu_cs_job_idx()
95 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_job_idx()
96 if (p->entities[i] == entity) in amdgpu_cs_job_idx()
101 return -EINVAL; in amdgpu_cs_job_idx()
103 p->entities[i] = entity; in amdgpu_cs_job_idx()
104 p->gang_size = i + 1; in amdgpu_cs_job_idx()
118 if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type)) in amdgpu_cs_p1_ib()
119 return -EINVAL; in amdgpu_cs_p1_ib()
122 p->gang_leader_idx = r; in amdgpu_cs_p1_ib()
133 gobj = drm_gem_object_lookup(p->filp, data->handle); in amdgpu_cs_p1_user_fence()
135 return -EINVAL; in amdgpu_cs_p1_user_fence()
137 p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); in amdgpu_cs_p1_user_fence()
140 size = amdgpu_bo_size(p->uf_bo); in amdgpu_cs_p1_user_fence()
141 if (size != PAGE_SIZE || data->offset > (size - 8)) in amdgpu_cs_p1_user_fence()
142 return -EINVAL; in amdgpu_cs_p1_user_fence()
144 if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm)) in amdgpu_cs_p1_user_fence()
145 return -EINVAL; in amdgpu_cs_p1_user_fence()
147 *offset = data->offset; in amdgpu_cs_p1_user_fence()
161 r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, in amdgpu_cs_p1_bo_handles()
162 &p->bo_list); in amdgpu_cs_p1_bo_handles()
179 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_pass1()
181 struct amdgpu_vm *vm = &fpriv->vm; in amdgpu_cs_pass1()
188 chunk_array = memdup_array_user(u64_to_user_ptr(cs->in.chunks), in amdgpu_cs_pass1()
189 cs->in.num_chunks, in amdgpu_cs_pass1()
194 p->nchunks = cs->in.num_chunks; in amdgpu_cs_pass1()
195 p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), in amdgpu_cs_pass1()
197 if (!p->chunks) { in amdgpu_cs_pass1()
198 ret = -ENOMEM; in amdgpu_cs_pass1()
202 for (i = 0; i < p->nchunks; i++) { in amdgpu_cs_pass1()
209 ret = -EFAULT; in amdgpu_cs_pass1()
210 i--; in amdgpu_cs_pass1()
213 p->chunks[i].chunk_id = user_chunk.chunk_id; in amdgpu_cs_pass1()
214 p->chunks[i].length_dw = user_chunk.length_dw; in amdgpu_cs_pass1()
216 size = p->chunks[i].length_dw; in amdgpu_cs_pass1()
218 p->chunks[i].kdata = vmemdup_array_user(u64_to_user_ptr(user_chunk.chunk_data), in amdgpu_cs_pass1()
221 if (IS_ERR(p->chunks[i].kdata)) { in amdgpu_cs_pass1()
222 ret = PTR_ERR(p->chunks[i].kdata); in amdgpu_cs_pass1()
223 i--; in amdgpu_cs_pass1()
229 ret = -EINVAL; in amdgpu_cs_pass1()
230 switch (p->chunks[i].chunk_id) { in amdgpu_cs_pass1()
235 ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs); in amdgpu_cs_pass1()
244 ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata, in amdgpu_cs_pass1()
255 if (p->bo_list) in amdgpu_cs_pass1()
258 ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); in amdgpu_cs_pass1()
277 if (!p->gang_size || (amdgpu_sriov_vf(p->adev) && p->gang_size > 1)) { in amdgpu_cs_pass1()
278 ret = -EINVAL; in amdgpu_cs_pass1()
282 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_pass1()
283 ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, in amdgpu_cs_pass1()
284 num_ibs[i], &p->jobs[i], in amdgpu_cs_pass1()
285 p->filp->client_id); in amdgpu_cs_pass1()
288 switch (p->adev->enforce_isolation[fpriv->xcp_id]) { in amdgpu_cs_pass1()
291 p->jobs[i]->enforce_isolation = false; in amdgpu_cs_pass1()
292 p->jobs[i]->run_cleaner_shader = false; in amdgpu_cs_pass1()
295 p->jobs[i]->enforce_isolation = true; in amdgpu_cs_pass1()
296 p->jobs[i]->run_cleaner_shader = true; in amdgpu_cs_pass1()
299 p->jobs[i]->enforce_isolation = true; in amdgpu_cs_pass1()
300 p->jobs[i]->run_cleaner_shader = false; in amdgpu_cs_pass1()
303 p->jobs[i]->enforce_isolation = true; in amdgpu_cs_pass1()
304 p->jobs[i]->run_cleaner_shader = false; in amdgpu_cs_pass1()
308 p->gang_leader = p->jobs[p->gang_leader_idx]; in amdgpu_cs_pass1()
310 if (p->ctx->generation != p->gang_leader->generation) { in amdgpu_cs_pass1()
311 ret = -ECANCELED; in amdgpu_cs_pass1()
315 if (p->uf_bo) in amdgpu_cs_pass1()
316 p->gang_leader->uf_addr = uf_offset; in amdgpu_cs_pass1()
325 i = p->nchunks - 1; in amdgpu_cs_pass1()
327 for (; i >= 0; i--) in amdgpu_cs_pass1()
328 kvfree(p->chunks[i].kdata); in amdgpu_cs_pass1()
329 kvfree(p->chunks); in amdgpu_cs_pass1()
330 p->chunks = NULL; in amdgpu_cs_pass1()
331 p->nchunks = 0; in amdgpu_cs_pass1()
343 struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata; in amdgpu_cs_p2_ib()
344 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_p2_ib()
345 struct amdgpu_vm *vm = &fpriv->vm; in amdgpu_cs_p2_ib()
355 job = p->jobs[r]; in amdgpu_cs_p2_ib()
357 ib = &job->ibs[job->num_ibs++]; in amdgpu_cs_p2_ib()
360 if (ring->no_user_submission) in amdgpu_cs_p2_ib()
361 return -EINVAL; in amdgpu_cs_p2_ib()
364 if (p->uf_bo && ring->funcs->no_user_fence) in amdgpu_cs_p2_ib()
365 return -EINVAL; in amdgpu_cs_p2_ib()
367 if (!p->adev->debug_enable_ce_cs && in amdgpu_cs_p2_ib()
368 chunk_ib->flags & AMDGPU_IB_FLAG_CE) { in amdgpu_cs_p2_ib()
369 dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n"); in amdgpu_cs_p2_ib()
370 return -EINVAL; in amdgpu_cs_p2_ib()
373 if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && in amdgpu_cs_p2_ib()
374 chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { in amdgpu_cs_p2_ib()
375 if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) in amdgpu_cs_p2_ib()
383 return -EINVAL; in amdgpu_cs_p2_ib()
386 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) in amdgpu_cs_p2_ib()
387 job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; in amdgpu_cs_p2_ib()
389 r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ? in amdgpu_cs_p2_ib()
390 chunk_ib->ib_bytes : 0, in amdgpu_cs_p2_ib()
393 drm_err(adev_to_drm(p->adev), "Failed to get ib !\n"); in amdgpu_cs_p2_ib()
397 ib->gpu_addr = chunk_ib->va_start; in amdgpu_cs_p2_ib()
398 ib->length_dw = chunk_ib->ib_bytes / 4; in amdgpu_cs_p2_ib()
399 ib->flags = chunk_ib->flags; in amdgpu_cs_p2_ib()
406 struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata; in amdgpu_cs_p2_dependencies()
407 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_p2_dependencies()
411 num_deps = chunk->length_dw * 4 / in amdgpu_cs_p2_dependencies()
421 return -EINVAL; in amdgpu_cs_p2_dependencies()
439 if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { in amdgpu_cs_p2_dependencies()
444 fence = dma_fence_get(&s_fence->scheduled); in amdgpu_cs_p2_dependencies()
448 r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL); in amdgpu_cs_p2_dependencies()
463 r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); in amdgpu_syncobj_lookup_and_add()
465 drm_err(adev_to_drm(p->adev), "syncobj %u failed to find fence @ %llu (%d)!\n", in amdgpu_syncobj_lookup_and_add()
470 r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL); in amdgpu_syncobj_lookup_and_add()
478 struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; in amdgpu_cs_p2_syncobj_in()
482 num_deps = chunk->length_dw * 4 / in amdgpu_cs_p2_syncobj_in()
496 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; in amdgpu_cs_p2_syncobj_timeline_wait()
500 num_deps = chunk->length_dw * 4 / in amdgpu_cs_p2_syncobj_timeline_wait()
516 struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; in amdgpu_cs_p2_syncobj_out()
520 num_deps = chunk->length_dw * 4 / in amdgpu_cs_p2_syncobj_out()
523 if (p->post_deps) in amdgpu_cs_p2_syncobj_out()
524 return -EINVAL; in amdgpu_cs_p2_syncobj_out()
526 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), in amdgpu_cs_p2_syncobj_out()
528 p->num_post_deps = 0; in amdgpu_cs_p2_syncobj_out()
530 if (!p->post_deps) in amdgpu_cs_p2_syncobj_out()
531 return -ENOMEM; in amdgpu_cs_p2_syncobj_out()
535 p->post_deps[i].syncobj = in amdgpu_cs_p2_syncobj_out()
536 drm_syncobj_find(p->filp, deps[i].handle); in amdgpu_cs_p2_syncobj_out()
537 if (!p->post_deps[i].syncobj) in amdgpu_cs_p2_syncobj_out()
538 return -EINVAL; in amdgpu_cs_p2_syncobj_out()
539 p->post_deps[i].chain = NULL; in amdgpu_cs_p2_syncobj_out()
540 p->post_deps[i].point = 0; in amdgpu_cs_p2_syncobj_out()
541 p->num_post_deps++; in amdgpu_cs_p2_syncobj_out()
550 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; in amdgpu_cs_p2_syncobj_timeline_signal()
554 num_deps = chunk->length_dw * 4 / in amdgpu_cs_p2_syncobj_timeline_signal()
557 if (p->post_deps) in amdgpu_cs_p2_syncobj_timeline_signal()
558 return -EINVAL; in amdgpu_cs_p2_syncobj_timeline_signal()
560 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), in amdgpu_cs_p2_syncobj_timeline_signal()
562 p->num_post_deps = 0; in amdgpu_cs_p2_syncobj_timeline_signal()
564 if (!p->post_deps) in amdgpu_cs_p2_syncobj_timeline_signal()
565 return -ENOMEM; in amdgpu_cs_p2_syncobj_timeline_signal()
568 struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; in amdgpu_cs_p2_syncobj_timeline_signal()
570 dep->chain = NULL; in amdgpu_cs_p2_syncobj_timeline_signal()
572 dep->chain = dma_fence_chain_alloc(); in amdgpu_cs_p2_syncobj_timeline_signal()
573 if (!dep->chain) in amdgpu_cs_p2_syncobj_timeline_signal()
574 return -ENOMEM; in amdgpu_cs_p2_syncobj_timeline_signal()
577 dep->syncobj = drm_syncobj_find(p->filp, in amdgpu_cs_p2_syncobj_timeline_signal()
579 if (!dep->syncobj) { in amdgpu_cs_p2_syncobj_timeline_signal()
580 dma_fence_chain_free(dep->chain); in amdgpu_cs_p2_syncobj_timeline_signal()
581 return -EINVAL; in amdgpu_cs_p2_syncobj_timeline_signal()
583 dep->point = syncobj_deps[i].point; in amdgpu_cs_p2_syncobj_timeline_signal()
584 p->num_post_deps++; in amdgpu_cs_p2_syncobj_timeline_signal()
593 struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata; in amdgpu_cs_p2_shadow()
596 if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW) in amdgpu_cs_p2_shadow()
597 return -EINVAL; in amdgpu_cs_p2_shadow()
599 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_p2_shadow()
600 p->jobs[i]->shadow_va = shadow->shadow_va; in amdgpu_cs_p2_shadow()
601 p->jobs[i]->csa_va = shadow->csa_va; in amdgpu_cs_p2_shadow()
602 p->jobs[i]->gds_va = shadow->gds_va; in amdgpu_cs_p2_shadow()
603 p->jobs[i]->init_shadow = in amdgpu_cs_p2_shadow()
604 shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW; in amdgpu_cs_p2_shadow()
615 for (i = 0; i < p->nchunks; ++i) { in amdgpu_cs_pass2()
618 chunk = &p->chunks[i]; in amdgpu_cs_pass2()
620 switch (chunk->chunk_id) { in amdgpu_cs_pass2()
666 if (us <= 0 || !adev->mm_stats.log2_max_MBps) in us_to_bytes()
672 return us << adev->mm_stats.log2_max_MBps; in us_to_bytes()
677 if (!adev->mm_stats.log2_max_MBps) in bytes_to_us()
680 return bytes >> adev->mm_stats.log2_max_MBps; in bytes_to_us()
684 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
702 /* Allow a maximum of 200 accumulated ms. This is basically per-IB in amdgpu_cs_get_threshold_for_moves()
711 if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) { in amdgpu_cs_get_threshold_for_moves()
717 total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); in amdgpu_cs_get_threshold_for_moves()
718 used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); in amdgpu_cs_get_threshold_for_moves()
719 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; in amdgpu_cs_get_threshold_for_moves()
721 spin_lock(&adev->mm_stats.lock); in amdgpu_cs_get_threshold_for_moves()
725 increment_us = time_us - adev->mm_stats.last_update_us; in amdgpu_cs_get_threshold_for_moves()
726 adev->mm_stats.last_update_us = time_us; in amdgpu_cs_get_threshold_for_moves()
727 adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, in amdgpu_cs_get_threshold_for_moves()
735 * - a lot of VRAM is freed by userspace in amdgpu_cs_get_threshold_for_moves()
736 * - the presence of a big buffer causes a lot of evictions in amdgpu_cs_get_threshold_for_moves()
748 if (!(adev->flags & AMD_IS_APU)) in amdgpu_cs_get_threshold_for_moves()
753 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); in amdgpu_cs_get_threshold_for_moves()
759 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); in amdgpu_cs_get_threshold_for_moves()
762 if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { in amdgpu_cs_get_threshold_for_moves()
763 u64 total_vis_vram = adev->gmc.visible_vram_size; in amdgpu_cs_get_threshold_for_moves()
765 amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); in amdgpu_cs_get_threshold_for_moves()
768 u64 free_vis_vram = total_vis_vram - used_vis_vram; in amdgpu_cs_get_threshold_for_moves()
770 adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + in amdgpu_cs_get_threshold_for_moves()
774 adev->mm_stats.accum_us_vis = in amdgpu_cs_get_threshold_for_moves()
776 adev->mm_stats.accum_us_vis); in amdgpu_cs_get_threshold_for_moves()
779 *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); in amdgpu_cs_get_threshold_for_moves()
784 spin_unlock(&adev->mm_stats.lock); in amdgpu_cs_get_threshold_for_moves()
794 spin_lock(&adev->mm_stats.lock); in amdgpu_cs_report_moved_bytes()
795 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); in amdgpu_cs_report_moved_bytes()
796 adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); in amdgpu_cs_report_moved_bytes()
797 spin_unlock(&adev->mm_stats.lock); in amdgpu_cs_report_moved_bytes()
802 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); in amdgpu_cs_bo_validate()
807 .resv = bo->tbo.base.resv in amdgpu_cs_bo_validate()
812 if (bo->tbo.pin_count) in amdgpu_cs_bo_validate()
818 if (p->bytes_moved < p->bytes_moved_threshold && in amdgpu_cs_bo_validate()
819 (!bo->tbo.base.dma_buf || in amdgpu_cs_bo_validate()
820 list_empty(&bo->tbo.base.dma_buf->attachments))) { in amdgpu_cs_bo_validate()
821 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && in amdgpu_cs_bo_validate()
822 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { in amdgpu_cs_bo_validate()
827 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) in amdgpu_cs_bo_validate()
828 domain = bo->preferred_domains; in amdgpu_cs_bo_validate()
830 domain = bo->allowed_domains; in amdgpu_cs_bo_validate()
832 domain = bo->preferred_domains; in amdgpu_cs_bo_validate()
835 domain = bo->allowed_domains; in amdgpu_cs_bo_validate()
840 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); in amdgpu_cs_bo_validate()
842 p->bytes_moved += ctx.bytes_moved; in amdgpu_cs_bo_validate()
843 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && in amdgpu_cs_bo_validate()
844 amdgpu_res_cpu_visible(adev, bo->tbo.resource)) in amdgpu_cs_bo_validate()
845 p->bytes_moved_vis += ctx.bytes_moved; in amdgpu_cs_bo_validate()
847 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { in amdgpu_cs_bo_validate()
848 domain = bo->allowed_domains; in amdgpu_cs_bo_validate()
858 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_parser_bos()
860 struct amdgpu_vm *vm = &fpriv->vm; in amdgpu_cs_parser_bos()
867 /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ in amdgpu_cs_parser_bos()
868 if (cs->in.bo_list_handle) { in amdgpu_cs_parser_bos()
869 if (p->bo_list) in amdgpu_cs_parser_bos()
870 return -EINVAL; in amdgpu_cs_parser_bos()
872 r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, in amdgpu_cs_parser_bos()
873 &p->bo_list); in amdgpu_cs_parser_bos()
876 } else if (!p->bo_list) { in amdgpu_cs_parser_bos()
878 r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, in amdgpu_cs_parser_bos()
879 &p->bo_list); in amdgpu_cs_parser_bos()
884 mutex_lock(&p->bo_list->bo_list_mutex); in amdgpu_cs_parser_bos()
890 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
892 struct amdgpu_bo *bo = e->bo; in amdgpu_cs_parser_bos()
894 r = amdgpu_ttm_tt_get_user_pages(bo, &e->range); in amdgpu_cs_parser_bos()
898 for (i = 0; i < bo->tbo.ttm->num_pages; i++) { in amdgpu_cs_parser_bos()
899 if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) { in amdgpu_cs_parser_bos()
904 e->user_invalidated = userpage_invalidated; in amdgpu_cs_parser_bos()
907 drm_exec_until_all_locked(&p->exec) { in amdgpu_cs_parser_bos()
908 r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size); in amdgpu_cs_parser_bos()
909 drm_exec_retry_on_contention(&p->exec); in amdgpu_cs_parser_bos()
913 amdgpu_bo_list_for_each_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
915 r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base, in amdgpu_cs_parser_bos()
916 1 + p->gang_size); in amdgpu_cs_parser_bos()
917 drm_exec_retry_on_contention(&p->exec); in amdgpu_cs_parser_bos()
921 e->bo_va = amdgpu_vm_bo_find(vm, e->bo); in amdgpu_cs_parser_bos()
924 if (p->uf_bo) { in amdgpu_cs_parser_bos()
925 r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base, in amdgpu_cs_parser_bos()
926 1 + p->gang_size); in amdgpu_cs_parser_bos()
927 drm_exec_retry_on_contention(&p->exec); in amdgpu_cs_parser_bos()
933 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
936 usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm); in amdgpu_cs_parser_bos()
937 if (usermm && usermm != current->mm) { in amdgpu_cs_parser_bos()
938 r = -EPERM; in amdgpu_cs_parser_bos()
942 if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) && in amdgpu_cs_parser_bos()
943 e->user_invalidated) { in amdgpu_cs_parser_bos()
944 amdgpu_bo_placement_from_domain(e->bo, in amdgpu_cs_parser_bos()
946 r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement, in amdgpu_cs_parser_bos()
951 amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm, in amdgpu_cs_parser_bos()
952 e->range); in amdgpu_cs_parser_bos()
956 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, in amdgpu_cs_parser_bos()
957 &p->bytes_moved_vis_threshold); in amdgpu_cs_parser_bos()
958 p->bytes_moved = 0; in amdgpu_cs_parser_bos()
959 p->bytes_moved_vis = 0; in amdgpu_cs_parser_bos()
961 r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL, in amdgpu_cs_parser_bos()
964 drm_err(adev_to_drm(p->adev), "amdgpu_vm_validate() failed.\n"); in amdgpu_cs_parser_bos()
968 drm_exec_for_each_locked_object(&p->exec, index, obj) { in amdgpu_cs_parser_bos()
974 if (p->uf_bo) { in amdgpu_cs_parser_bos()
975 r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo); in amdgpu_cs_parser_bos()
979 p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo); in amdgpu_cs_parser_bos()
982 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, in amdgpu_cs_parser_bos()
983 p->bytes_moved_vis); in amdgpu_cs_parser_bos()
985 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_parser_bos()
986 amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj, in amdgpu_cs_parser_bos()
987 p->bo_list->gws_obj, in amdgpu_cs_parser_bos()
988 p->bo_list->oa_obj); in amdgpu_cs_parser_bos()
992 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
993 struct amdgpu_bo *bo = e->bo; in amdgpu_cs_parser_bos()
995 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); in amdgpu_cs_parser_bos()
996 e->range = NULL; in amdgpu_cs_parser_bos()
998 mutex_unlock(&p->bo_list->bo_list_mutex); in amdgpu_cs_parser_bos()
1009 for (i = 0; i < p->gang_size; ++i) { in trace_amdgpu_cs_ibs()
1010 struct amdgpu_job *job = p->jobs[i]; in trace_amdgpu_cs_ibs()
1012 for (j = 0; j < job->num_ibs; ++j) in trace_amdgpu_cs_ibs()
1013 trace_amdgpu_cs(p, job, &job->ibs[j]); in trace_amdgpu_cs_ibs()
1025 if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place) in amdgpu_cs_patch_ibs()
1028 for (i = 0; i < job->num_ibs; ++i) { in amdgpu_cs_patch_ibs()
1029 struct amdgpu_ib *ib = &job->ibs[i]; in amdgpu_cs_patch_ibs()
1035 va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK; in amdgpu_cs_patch_ibs()
1038 drm_err(adev_to_drm(p->adev), "IB va_start is invalid\n"); in amdgpu_cs_patch_ibs()
1042 if ((va_start + ib->length_dw * 4) > in amdgpu_cs_patch_ibs()
1043 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { in amdgpu_cs_patch_ibs()
1044 drm_err(adev_to_drm(p->adev), "IB va_start+ib_bytes is invalid\n"); in amdgpu_cs_patch_ibs()
1045 return -EINVAL; in amdgpu_cs_patch_ibs()
1053 kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE); in amdgpu_cs_patch_ibs()
1055 if (ring->funcs->parse_cs) { in amdgpu_cs_patch_ibs()
1056 memcpy(ib->ptr, kptr, ib->length_dw * 4); in amdgpu_cs_patch_ibs()
1063 if (ib->sa_bo) in amdgpu_cs_patch_ibs()
1064 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); in amdgpu_cs_patch_ibs()
1066 ib->ptr = (uint32_t *)kptr; in amdgpu_cs_patch_ibs()
1082 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_patch_jobs()
1083 r = amdgpu_cs_patch_ibs(p, p->jobs[i]); in amdgpu_cs_patch_jobs()
1092 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_vm_handling()
1093 struct amdgpu_job *job = p->gang_leader; in amdgpu_cs_vm_handling()
1094 struct amdgpu_device *adev = p->adev; in amdgpu_cs_vm_handling()
1095 struct amdgpu_vm *vm = &fpriv->vm; in amdgpu_cs_vm_handling()
1105 if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) { in amdgpu_cs_vm_handling()
1106 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_vm_handling()
1107 struct drm_sched_entity *entity = p->entities[i]; in amdgpu_cs_vm_handling()
1108 struct drm_gpu_scheduler *sched = entity->rq->sched; in amdgpu_cs_vm_handling()
1111 if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) in amdgpu_cs_vm_handling()
1112 return -EINVAL; in amdgpu_cs_vm_handling()
1117 return -EINVAL; in amdgpu_cs_vm_handling()
1123 r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); in amdgpu_cs_vm_handling()
1127 r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update, in amdgpu_cs_vm_handling()
1132 if (fpriv->csa_va) { in amdgpu_cs_vm_handling()
1133 bo_va = fpriv->csa_va; in amdgpu_cs_vm_handling()
1139 r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, in amdgpu_cs_vm_handling()
1147 * with p->ticket. But removing it caused test regressions, so I'm in amdgpu_cs_vm_handling()
1150 amdgpu_bo_list_for_each_entry(e, p->bo_list) { in amdgpu_cs_vm_handling()
1151 bo_va = e->bo_va; in amdgpu_cs_vm_handling()
1159 r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, in amdgpu_cs_vm_handling()
1165 r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket); in amdgpu_cs_vm_handling()
1173 r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL); in amdgpu_cs_vm_handling()
1177 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_vm_handling()
1178 job = p->jobs[i]; in amdgpu_cs_vm_handling()
1180 if (!job->vm) in amdgpu_cs_vm_handling()
1183 job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo); in amdgpu_cs_vm_handling()
1186 if (adev->debug_vm) { in amdgpu_cs_vm_handling()
1188 amdgpu_bo_list_for_each_entry(e, p->bo_list) { in amdgpu_cs_vm_handling()
1189 struct amdgpu_bo *bo = e->bo; in amdgpu_cs_vm_handling()
1204 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_sync_rings()
1212 r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); in amdgpu_cs_sync_rings()
1214 if (r != -ERESTARTSYS) in amdgpu_cs_sync_rings()
1215 drm_err(adev_to_drm(p->adev), "amdgpu_ctx_wait_prev_fence failed.\n"); in amdgpu_cs_sync_rings()
1219 drm_exec_for_each_locked_object(&p->exec, index, obj) { in amdgpu_cs_sync_rings()
1222 struct dma_resv *resv = bo->tbo.base.resv; in amdgpu_cs_sync_rings()
1227 r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode, in amdgpu_cs_sync_rings()
1228 &fpriv->vm); in amdgpu_cs_sync_rings()
1233 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_sync_rings()
1234 r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]); in amdgpu_cs_sync_rings()
1239 sched = p->gang_leader->base.entity->rq->sched; in amdgpu_cs_sync_rings()
1240 while ((fence = amdgpu_sync_get_fence(&p->sync))) { in amdgpu_cs_sync_rings()
1249 if (!s_fence || s_fence->sched != sched) { in amdgpu_cs_sync_rings()
1254 r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence, in amdgpu_cs_sync_rings()
1267 for (i = 0; i < p->num_post_deps; ++i) { in amdgpu_cs_post_dependencies()
1268 if (p->post_deps[i].chain && p->post_deps[i].point) { in amdgpu_cs_post_dependencies()
1269 drm_syncobj_add_point(p->post_deps[i].syncobj, in amdgpu_cs_post_dependencies()
1270 p->post_deps[i].chain, in amdgpu_cs_post_dependencies()
1271 p->fence, p->post_deps[i].point); in amdgpu_cs_post_dependencies()
1272 p->post_deps[i].chain = NULL; in amdgpu_cs_post_dependencies()
1274 drm_syncobj_replace_fence(p->post_deps[i].syncobj, in amdgpu_cs_post_dependencies()
1275 p->fence); in amdgpu_cs_post_dependencies()
1283 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_submit()
1284 struct amdgpu_job *leader = p->gang_leader; in amdgpu_cs_submit()
1292 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_submit()
1293 drm_sched_job_arm(&p->jobs[i]->base); in amdgpu_cs_submit()
1295 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_submit()
1298 if (p->jobs[i] == leader) in amdgpu_cs_submit()
1301 fence = &p->jobs[i]->base.s_fence->scheduled; in amdgpu_cs_submit()
1303 r = drm_sched_job_add_dependency(&leader->base, fence); in amdgpu_cs_submit()
1310 if (p->gang_size > 1) { in amdgpu_cs_submit()
1311 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_submit()
1312 amdgpu_job_set_gang_leader(p->jobs[i], leader); in amdgpu_cs_submit()
1319 mutex_lock(&p->adev->notifier_lock); in amdgpu_cs_submit()
1322 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. in amdgpu_cs_submit()
1325 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_submit()
1326 r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm, in amdgpu_cs_submit()
1327 e->range); in amdgpu_cs_submit()
1328 e->range = NULL; in amdgpu_cs_submit()
1331 r = -EAGAIN; in amdgpu_cs_submit()
1332 mutex_unlock(&p->adev->notifier_lock); in amdgpu_cs_submit()
1336 p->fence = dma_fence_get(&leader->base.s_fence->finished); in amdgpu_cs_submit()
1337 drm_exec_for_each_locked_object(&p->exec, index, gobj) { in amdgpu_cs_submit()
1339 ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo); in amdgpu_cs_submit()
1342 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_submit()
1343 if (p->jobs[i] == leader) in amdgpu_cs_submit()
1346 dma_resv_add_fence(gobj->resv, in amdgpu_cs_submit()
1347 &p->jobs[i]->base.s_fence->finished, in amdgpu_cs_submit()
1352 dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE); in amdgpu_cs_submit()
1355 seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx], in amdgpu_cs_submit()
1356 p->fence); in amdgpu_cs_submit()
1359 if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && in amdgpu_cs_submit()
1360 !p->ctx->preamble_presented) { in amdgpu_cs_submit()
1361 leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; in amdgpu_cs_submit()
1362 p->ctx->preamble_presented = true; in amdgpu_cs_submit()
1365 cs->out.handle = seq; in amdgpu_cs_submit()
1366 leader->uf_sequence = seq; in amdgpu_cs_submit()
1368 amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket); in amdgpu_cs_submit()
1369 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_submit()
1370 amdgpu_job_free_resources(p->jobs[i]); in amdgpu_cs_submit()
1371 trace_amdgpu_cs_ioctl(p->jobs[i]); in amdgpu_cs_submit()
1372 drm_sched_entity_push_job(&p->jobs[i]->base); in amdgpu_cs_submit()
1373 p->jobs[i] = NULL; in amdgpu_cs_submit()
1376 amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); in amdgpu_cs_submit()
1378 mutex_unlock(&p->adev->notifier_lock); in amdgpu_cs_submit()
1379 mutex_unlock(&p->bo_list->bo_list_mutex); in amdgpu_cs_submit()
1388 amdgpu_sync_free(&parser->sync); in amdgpu_cs_parser_fini()
1389 drm_exec_fini(&parser->exec); in amdgpu_cs_parser_fini()
1391 for (i = 0; i < parser->num_post_deps; i++) { in amdgpu_cs_parser_fini()
1392 drm_syncobj_put(parser->post_deps[i].syncobj); in amdgpu_cs_parser_fini()
1393 kfree(parser->post_deps[i].chain); in amdgpu_cs_parser_fini()
1395 kfree(parser->post_deps); in amdgpu_cs_parser_fini()
1397 dma_fence_put(parser->fence); in amdgpu_cs_parser_fini()
1399 if (parser->ctx) in amdgpu_cs_parser_fini()
1400 amdgpu_ctx_put(parser->ctx); in amdgpu_cs_parser_fini()
1401 if (parser->bo_list) in amdgpu_cs_parser_fini()
1402 amdgpu_bo_list_put(parser->bo_list); in amdgpu_cs_parser_fini()
1404 for (i = 0; i < parser->nchunks; i++) in amdgpu_cs_parser_fini()
1405 kvfree(parser->chunks[i].kdata); in amdgpu_cs_parser_fini()
1406 kvfree(parser->chunks); in amdgpu_cs_parser_fini()
1407 for (i = 0; i < parser->gang_size; ++i) { in amdgpu_cs_parser_fini()
1408 if (parser->jobs[i]) in amdgpu_cs_parser_fini()
1409 amdgpu_job_free(parser->jobs[i]); in amdgpu_cs_parser_fini()
1411 amdgpu_bo_unref(&parser->uf_bo); in amdgpu_cs_parser_fini()
1421 return -EHWPOISON; in amdgpu_cs_ioctl()
1423 if (!adev->accel_working) in amdgpu_cs_ioctl()
1424 return -EBUSY; in amdgpu_cs_ioctl()
1442 if (r == -ENOMEM) in amdgpu_cs_ioctl()
1444 else if (r != -ERESTARTSYS && r != -EAGAIN) in amdgpu_cs_ioctl()
1471 mutex_unlock(&parser.bo_list->bo_list_mutex); in amdgpu_cs_ioctl()
1479 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1491 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); in amdgpu_cs_wait_ioctl()
1497 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); in amdgpu_cs_wait_ioctl()
1499 return -EINVAL; in amdgpu_cs_wait_ioctl()
1501 r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance, in amdgpu_cs_wait_ioctl()
1502 wait->in.ring, &entity); in amdgpu_cs_wait_ioctl()
1508 fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle); in amdgpu_cs_wait_ioctl()
1513 if (r > 0 && fence->error) in amdgpu_cs_wait_ioctl()
1514 r = fence->error; in amdgpu_cs_wait_ioctl()
1524 wait->out.status = (r == 0); in amdgpu_cs_wait_ioctl()
1530 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1545 ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); in amdgpu_cs_get_fence()
1547 return ERR_PTR(-EINVAL); in amdgpu_cs_get_fence()
1549 r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance, in amdgpu_cs_get_fence()
1550 user->ring, &entity); in amdgpu_cs_get_fence()
1556 fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no); in amdgpu_cs_get_fence()
1572 fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); in amdgpu_cs_fence_to_handle_ioctl()
1579 switch (info->in.what) { in amdgpu_cs_fence_to_handle_ioctl()
1585 r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle); in amdgpu_cs_fence_to_handle_ioctl()
1594 r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle); in amdgpu_cs_fence_to_handle_ioctl()
1609 return -ENOMEM; in amdgpu_cs_fence_to_handle_ioctl()
1612 fd_install(fd, sync_file->file); in amdgpu_cs_fence_to_handle_ioctl()
1613 info->out.handle = fd; in amdgpu_cs_fence_to_handle_ioctl()
1618 return -EINVAL; in amdgpu_cs_fence_to_handle_ioctl()
1623 * amdgpu_cs_wait_all_fences - wait on all fences to signal
1635 uint32_t fence_count = wait->in.fence_count; in amdgpu_cs_wait_all_fences()
1641 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); in amdgpu_cs_wait_all_fences()
1650 if (r > 0 && fence->error) in amdgpu_cs_wait_all_fences()
1651 r = fence->error; in amdgpu_cs_wait_all_fences()
1662 wait->out.status = (r > 0); in amdgpu_cs_wait_all_fences()
1668 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1680 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); in amdgpu_cs_wait_any_fence()
1681 uint32_t fence_count = wait->in.fence_count; in amdgpu_cs_wait_any_fence()
1691 return -ENOMEM; in amdgpu_cs_wait_any_fence()
1716 wait->out.status = (r > 0); in amdgpu_cs_wait_any_fence()
1717 wait->out.first_signaled = first; in amdgpu_cs_wait_any_fence()
1720 r = array[first]->error; in amdgpu_cs_wait_any_fence()
1733 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1748 fences = memdup_array_user(u64_to_user_ptr(wait->in.fences), in amdgpu_cs_wait_fences_ioctl()
1749 wait->in.fence_count, in amdgpu_cs_wait_fences_ioctl()
1754 if (wait->in.wait_all) in amdgpu_cs_wait_fences_ioctl()
1765 * amdgpu_cs_find_mapping - find bo_va for VM address
1780 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; in amdgpu_cs_find_mapping()
1782 struct amdgpu_vm *vm = &fpriv->vm; in amdgpu_cs_find_mapping()
1789 if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) in amdgpu_cs_find_mapping()
1790 return -EINVAL; in amdgpu_cs_find_mapping()
1792 *bo = mapping->bo_va->base.bo; in amdgpu_cs_find_mapping()
1796 if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) in amdgpu_cs_find_mapping()
1797 return -EINVAL; in amdgpu_cs_find_mapping()
1800 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; in amdgpu_cs_find_mapping()
1801 if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM && in amdgpu_cs_find_mapping()
1802 !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) { in amdgpu_cs_find_mapping()
1804 amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); in amdgpu_cs_find_mapping()
1805 for (i = 0; i < (*bo)->placement.num_placement; i++) in amdgpu_cs_find_mapping()
1806 (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; in amdgpu_cs_find_mapping()
1807 r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); in amdgpu_cs_find_mapping()
1812 return amdgpu_ttm_alloc_gart(&(*bo)->tbo); in amdgpu_cs_find_mapping()