Lines Matching +full:ctx +full:- +full:asid
1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2021 HabanaLabs, Ltd.
23 * enum hl_cs_wait_status - cs wait status
53 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
66 * push outcome - store a recent CS outcome in the store
67 * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store
69 * It has a pre-allocated amount of nodes, each node stores
84 spin_lock_irqsave(&outcome_store->db_lock, flags);
86 if (list_empty(&outcome_store->free_list)) {
87 node = list_last_entry(&outcome_store->used_list,
89 hash_del(&node->map_link);
90 dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq);
92 node = list_last_entry(&outcome_store->free_list,
96 list_del_init(&node->list_link);
98 node->seq = seq;
99 node->ts = ts;
100 node->error = error;
102 list_add(&node->list_link, &outcome_store->used_list);
103 hash_add(outcome_store->outcome_map, &node->map_link, node->seq);
105 spin_unlock_irqrestore(&outcome_store->db_lock, flags);
114 spin_lock_irqsave(&outcome_store->db_lock, flags);
116 hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq)
117 if (node->seq == seq) {
118 *ts = node->ts;
119 *error = node->error;
121 hash_del(&node->map_link);
122 list_del_init(&node->list_link);
123 list_add(&node->list_link, &outcome_store->free_list);
125 spin_unlock_irqrestore(&outcome_store->db_lock, flags);
130 spin_unlock_irqrestore(&outcome_store->db_lock, flags);
139 struct hl_device *hdev = hw_sob->hdev;
141 dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
143 hdev->asic_funcs->reset_sob(hdev, hw_sob);
145 hw_sob->need_reset = false;
152 struct hl_device *hdev = hw_sob->hdev;
154 dev_crit(hdev->dev,
156 hw_sob->q_idx, hw_sob->sob_id);
162 kref_put(&hw_sob->kref, hl_sob_reset);
168 kref_put(&hw_sob->kref, hl_sob_reset_error);
174 kref_get(&hw_sob->kref);
178 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
190 return -EINVAL;
196 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
200 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
201 return -EINVAL;
223 kref_put(&fence->refcount, hl_fence_release);
237 kref_get(&fence->refcount);
242 kref_init(&fence->refcount);
243 fence->cs_sequence = sequence;
244 fence->error = 0;
245 fence->timestamp = ktime_set(0, 0);
246 fence->mcs_handling_done = false;
247 init_completion(&fence->completion);
252 kref_get(&cs->refcount);
257 return kref_get_unless_zero(&cs->refcount);
262 kref_put(&cs->refcount, cs_do_release);
274 kref_put(&job->refcount, cs_job_do_release);
282 if (cs->staged_cs && !cs->staged_last)
293 if (cs->staged_cs && !cs->staged_first)
302 return (job->queue_type == QUEUE_TYPE_EXT);
306 * cs_parser - parse the user command submission
318 struct hl_device *hdev = hpriv->hdev;
322 parser.ctx_id = job->cs->ctx->asid;
323 parser.cs_sequence = job->cs->sequence;
324 parser.job_id = job->id;
326 parser.hw_queue_id = job->hw_queue_id;
327 parser.job_userptr_list = &job->userptr_list;
329 parser.user_cb = job->user_cb;
330 parser.user_cb_size = job->user_cb_size;
331 parser.queue_type = job->queue_type;
332 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
333 job->patched_cb = NULL;
334 parser.completion = cs_needs_completion(job->cs);
336 rc = hdev->asic_funcs->cs_parser(hdev, &parser);
340 job->patched_cb = parser.patched_cb;
341 job->job_cb_size = parser.patched_cb_size;
342 job->contains_dma_pkt = parser.contains_dma_pkt;
343 atomic_inc(&job->patched_cb->cs_cnt);
351 atomic_dec(&job->user_cb->cs_cnt);
352 hl_cb_put(job->user_cb);
353 job->user_cb = NULL;
355 job->job_cb_size = job->user_cb_size;
363 struct hl_cs *cs = job->cs;
366 hl_userptr_delete_list(hdev, &job->userptr_list);
372 if (job->patched_cb) {
373 atomic_dec(&job->patched_cb->cs_cnt);
374 hl_cb_put(job->patched_cb);
383 if (job->is_kernel_allocated_cb &&
384 (job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) {
385 atomic_dec(&job->user_cb->cs_cnt);
386 hl_cb_put(job->user_cb);
393 spin_lock(&cs->job_lock);
394 list_del(&job->cs_node);
395 spin_unlock(&cs->job_lock);
412 (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) {
417 if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB)
418 cs->completion_timestamp = job->timestamp;
427 * hl_staged_cs_find_first - locate the first CS in this staged submission
432 * @note: This function must be called under 'hdev->cs_mirror_lock'
440 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
441 if (cs->staged_cs && cs->staged_first &&
442 cs->sequence == cs_seq)
449 * is_staged_cs_last_exists - returns true if the last CS in sequence exists
459 last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
462 if (last_entry->staged_last)
469 * staged_cs_get - get CS reference if this CS is a part of a staged CS
485 if (!cs->staged_last)
490 * staged_cs_put - put a CS in case it is part of staged submission
513 spin_lock(&hdev->cs_mirror_lock);
523 if (cs->staged_cs && cs->staged_last) {
524 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
529 spin_unlock(&hdev->cs_mirror_lock);
534 if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
537 if (cs->tdr_active)
538 cancel_delayed_work_sync(&cs->work_tdr);
540 spin_lock(&hdev->cs_mirror_lock);
543 list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node)
549 if (next && !next->tdr_active) {
550 next->tdr_active = true;
551 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
554 spin_unlock(&hdev->cs_mirror_lock);
558 * force_complete_multi_cs - complete all contexts that wait on multi-CS
569 mcs_compl = &hdev->multi_cs_completion[i];
571 spin_lock(&mcs_compl->lock);
573 if (!mcs_compl->used) {
574 spin_unlock(&mcs_compl->lock);
579 * multi-cS.
583 dev_err(hdev->dev,
584 "multi-CS completion context %d still waiting when calling force completion\n",
586 complete_all(&mcs_compl->completion);
587 spin_unlock(&mcs_compl->lock);
592 * complete_multi_cs - complete all waiting entities on multi-CS
599 * - a completed CS worked on stream master QID 4, multi CS completion
602 * - a completed CS worked on stream master QID 4, multi CS completion
608 struct hl_fence *fence = cs->fence;
612 if (cs->staged_cs && !cs->staged_first)
618 mcs_compl = &hdev->multi_cs_completion[i];
619 if (!mcs_compl->used)
622 spin_lock(&mcs_compl->lock);
630 if (mcs_compl->used &&
631 (fence->stream_master_qid_map &
632 mcs_compl->stream_master_qid_map)) {
634 if (!mcs_compl->timestamp)
635 mcs_compl->timestamp = ktime_to_ns(fence->timestamp);
637 complete_all(&mcs_compl->completion);
646 fence->mcs_handling_done = true;
649 spin_unlock(&mcs_compl->lock);
652 fence->mcs_handling_done = true;
663 if (!hl_cs_cmpl->hw_sob || !cs->submitted)
666 spin_lock(&hl_cs_cmpl->lock);
673 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
674 (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
675 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
676 (!!hl_cs_cmpl->encaps_signals)) {
677 dev_dbg(hdev->dev,
679 hl_cs_cmpl->cs_seq,
680 hl_cs_cmpl->type,
681 hl_cs_cmpl->hw_sob->sob_id,
682 hl_cs_cmpl->sob_val);
684 hw_sob_put(hl_cs_cmpl->hw_sob);
686 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
687 hdev->asic_funcs->reset_sob_group(hdev,
688 hl_cs_cmpl->sob_group);
691 spin_unlock(&hl_cs_cmpl->lock);
697 struct hl_device *hdev = cs->ctx->hdev;
700 container_of(cs->fence, struct hl_cs_compl, base_fence);
702 cs->completed = true;
709 * potentially the CTX object) could be released, while the JOB
712 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
715 if (!cs->submitted) {
721 if (cs->type == CS_TYPE_WAIT ||
722 cs->type == CS_TYPE_COLLECTIVE_WAIT)
723 hl_fence_put(cs->signal_fence);
732 spin_lock(&hdev->cs_mirror_lock);
733 list_del_init(&cs->mirror_node);
734 spin_unlock(&hdev->cs_mirror_lock);
738 if (cs->staged_cs) {
742 if (cs->staged_last) {
746 &cs->staged_cs_node, staged_cs_node)
754 if (cs->submitted) {
755 spin_lock(&hdev->cs_mirror_lock);
756 list_del(&cs->staged_cs_node);
757 spin_unlock(&hdev->cs_mirror_lock);
763 if (hl_cs_cmpl->encaps_signals)
764 kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
768 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals)
769 kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
772 /* Must be called before hl_ctx_put because inside we use ctx to get
777 hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL;
783 if (cs->timedout)
784 cs->fence->error = -ETIMEDOUT;
785 else if (cs->aborted)
786 cs->fence->error = -EIO;
787 else if (!cs->submitted)
788 cs->fence->error = -EBUSY;
790 if (unlikely(cs->skip_reset_on_timeout)) {
791 dev_err(hdev->dev,
793 cs->sequence,
794 div_u64(jiffies - cs->submission_time_jiffies, HZ));
797 if (cs->timestamp) {
798 cs->fence->timestamp = cs->completion_timestamp;
799 hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
800 cs->fence->timestamp, cs->fence->error);
803 hl_ctx_put(cs->ctx);
805 complete_all(&cs->fence->completion);
810 hl_fence_put(cs->fence);
812 kfree(cs->jobs_in_queue_cnt);
825 skip_reset_on_timeout = cs->skip_reset_on_timeout;
831 if ((!cs->submitted) || (cs->completed)) {
836 hdev = cs->ctx->hdev;
839 if (hdev->reset_on_lockup)
842 hdev->reset_info.needs_reset = true;
845 cs->timedout = true;
849 rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
851 hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
852 hdev->captured_err_info.cs_timeout.seq = cs->sequence;
856 timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000;
858 switch (cs->type) {
860 dev_err(hdev->dev,
862 cs->sequence, timeout_sec);
866 dev_err(hdev->dev,
868 cs->sequence, timeout_sec);
872 dev_err(hdev->dev,
874 cs->sequence, timeout_sec);
878 dev_err(hdev->dev,
880 cs->sequence, timeout_sec);
886 dev_err(hdev->dev, "Error during system state dump %d\n", rc);
898 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
908 cntr = &hdev->aggregated_cs_counters;
915 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
916 atomic64_inc(&cntr->out_of_mem_drop_cnt);
917 return -ENOMEM;
921 hl_ctx_get(ctx);
923 cs->ctx = ctx;
924 cs->submitted = false;
925 cs->completed = false;
926 cs->type = cs_type;
927 cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
928 cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
929 cs->timeout_jiffies = timeout;
930 cs->skip_reset_on_timeout =
931 hdev->reset_info.skip_reset_on_timeout ||
933 cs->submission_time_jiffies = jiffies;
934 INIT_LIST_HEAD(&cs->job_list);
935 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
936 kref_init(&cs->refcount);
937 spin_lock_init(&cs->job_lock);
944 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
945 atomic64_inc(&cntr->out_of_mem_drop_cnt);
946 rc = -ENOMEM;
950 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
951 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
952 if (!cs->jobs_in_queue_cnt)
953 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
954 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
956 if (!cs->jobs_in_queue_cnt) {
957 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
958 atomic64_inc(&cntr->out_of_mem_drop_cnt);
959 rc = -ENOMEM;
963 cs_cmpl->hdev = hdev;
964 cs_cmpl->type = cs->type;
965 spin_lock_init(&cs_cmpl->lock);
966 cs->fence = &cs_cmpl->base_fence;
968 spin_lock(&ctx->cs_lock);
970 cs_cmpl->cs_seq = ctx->cs_sequence;
971 other = ctx->cs_pending[cs_cmpl->cs_seq &
972 (hdev->asic_prop.max_pending_cs - 1)];
974 if (other && !completion_done(&other->completion)) {
982 if (other->cs_sequence == user_sequence)
983 dev_crit_ratelimited(hdev->dev,
987 dev_dbg_ratelimited(hdev->dev,
988 "Rejecting CS because of too many in-flights CS\n");
989 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
990 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
991 rc = -EAGAIN;
996 hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
998 cs->sequence = cs_cmpl->cs_seq;
1000 ctx->cs_pending[cs_cmpl->cs_seq &
1001 (hdev->asic_prop.max_pending_cs - 1)] =
1002 &cs_cmpl->base_fence;
1003 ctx->cs_sequence++;
1005 hl_fence_get(&cs_cmpl->base_fence);
1009 spin_unlock(&ctx->cs_lock);
1016 spin_unlock(&ctx->cs_lock);
1017 kfree(cs->jobs_in_queue_cnt);
1022 hl_ctx_put(ctx);
1032 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
1037 * release_reserved_encaps_signals() - release reserved encapsulated signals.
1040 * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with
1041 * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back.
1047 struct hl_ctx *ctx = hl_get_compute_ctx(hdev);
1052 if (!ctx)
1055 mgr = &ctx->sig_mgr;
1057 idr_for_each_entry(&mgr->handles, handle, id)
1058 if (handle->cs_seq == ULLONG_MAX)
1059 kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx);
1061 hl_ctx_put(ctx);
1070 flush_workqueue(hdev->ts_free_obj_wq);
1075 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1076 flush_workqueue(hdev->cq_wq[i]);
1078 flush_workqueue(hdev->cs_cmplt_wq);
1082 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
1084 cs->aborted = true;
1085 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
1086 cs->ctx->asid, cs->sequence);
1102 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
1103 list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) {
1104 pend->fence.error = -EIO;
1105 complete_all(&pend->fence.completion);
1107 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
1109 spin_lock_irqsave(&interrupt->ts_list_lock, flags);
1110 list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) {
1111 list_del(&pend->list_node);
1112 hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
1113 hl_cb_put(pend->ts_reg_info.cq_cb);
1115 spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
1120 struct asic_fixed_properties *prop = &hdev->asic_prop;
1124 if (!prop->user_interrupt_count)
1134 for (i = 0 ; i < prop->user_interrupt_count ; i++) {
1135 interrupt = &hdev->user_interrupt[i];
1139 interrupt = &hdev->common_user_cq_interrupt;
1142 interrupt = &hdev->common_decoder_interrupt;
1150 spin_lock(&hdev->cs_mirror_lock);
1152 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) {
1153 cs->fence->error = -EIO;
1154 complete_all(&cs->fence->completion);
1157 spin_unlock(&hdev->cs_mirror_lock);
1170 struct hl_cs *cs = job->cs;
1171 struct hl_device *hdev = cs->ctx->hdev;
1180 struct hl_device *hdev = cs->ctx->hdev;
1183 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
1192 spin_lock(&hdev->cs_mirror_lock);
1194 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node)
1195 if (!cs->completed)
1198 spin_unlock(&hdev->cs_mirror_lock);
1208 struct asic_fixed_properties *asic = &hdev->asic_prop;
1211 /* This must be checked here to prevent out-of-bounds access to
1214 if (chunk->queue_index >= asic->max_queues) {
1215 dev_err(hdev->dev, "Queue index %d is invalid\n",
1216 chunk->queue_index);
1217 return -EINVAL;
1220 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
1222 if (hw_queue_prop->type == QUEUE_TYPE_NA) {
1223 dev_err(hdev->dev, "Queue index %d is not applicable\n",
1224 chunk->queue_index);
1225 return -EINVAL;
1228 if (hw_queue_prop->binned) {
1229 dev_err(hdev->dev, "Queue index %d is binned out\n",
1230 chunk->queue_index);
1231 return -EINVAL;
1234 if (hw_queue_prop->driver_only) {
1235 dev_err(hdev->dev,
1237 chunk->queue_index);
1238 return -EINVAL;
1244 if (hw_queue_prop->type == QUEUE_TYPE_HW) {
1245 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
1246 if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
1247 dev_err(hdev->dev,
1249 chunk->queue_index);
1250 return -EINVAL;
1255 if (!(hw_queue_prop->cb_alloc_flags &
1257 dev_err(hdev->dev,
1259 chunk->queue_index);
1260 return -EINVAL;
1266 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
1270 *queue_type = hw_queue_prop->type;
1280 cb = hl_cb_get(mmg, chunk->cb_handle);
1282 dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle);
1286 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
1287 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
1291 atomic_inc(&cb->cs_cnt);
1312 kref_init(&job->refcount);
1313 job->queue_type = queue_type;
1314 job->is_kernel_allocated_cb = is_kernel_allocated_cb;
1317 INIT_LIST_HEAD(&job->userptr_list);
1319 if (job->queue_type == QUEUE_TYPE_EXT)
1320 INIT_WORK(&job->finish_work, job_wq_completion);
1349 struct hl_device *hdev = hpriv->hdev;
1350 struct hl_ctx *ctx = hpriv->ctx;
1357 for (i = 0 ; i < sizeof(args->in.pad) ; i++)
1358 if (args->in.pad[i]) {
1359 dev_dbg(hdev->dev, "Padding bytes must be 0\n");
1360 return -EINVAL;
1364 return -EBUSY;
1366 if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1367 !hdev->supports_staged_submission) {
1368 dev_err(hdev->dev, "staged submission not supported");
1369 return -EPERM;
1372 cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
1375 dev_err(hdev->dev,
1377 ctx->asid);
1378 return -EINVAL;
1382 num_chunks = args->in.num_chunks_execute;
1387 if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) {
1388 dev_err(hdev->dev, "Sync stream CS is not supported\n");
1389 return -EINVAL;
1394 dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid);
1395 return -EINVAL;
1398 dev_err(hdev->dev,
1400 ctx->asid);
1401 return -EINVAL;
1410 struct hl_ctx *ctx)
1415 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1416 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1417 dev_err(hdev->dev,
1420 return -EINVAL;
1429 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1430 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1431 return -ENOMEM;
1436 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1437 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1438 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1440 return -EFAULT;
1453 cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1454 cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1456 if (cs->staged_first) {
1458 INIT_LIST_HEAD(&cs->staged_cs_node);
1459 cs->staged_sequence = cs->sequence;
1461 if (cs->encaps_signals)
1462 cs->encaps_sig_hdl_id = encaps_signal_handle;
1467 cs->staged_sequence = sequence;
1473 cs->staged_cs = true;
1482 for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
1483 if (qid == hdev->stream_master_qid_arr[i])
1495 struct hl_device *hdev = hpriv->hdev;
1498 struct hl_ctx *ctx = hpriv->ctx;
1506 cntr = &hdev->aggregated_cs_counters;
1511 hpriv->ctx);
1521 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1527 *cs_seq = cs->sequence;
1539 if (cs->staged_cs)
1540 *cs_seq = cs->staged_sequence;
1551 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1552 atomic64_inc(&cntr->validation_drop_cnt);
1557 cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk);
1560 &ctx->cs_counters.validation_drop_cnt);
1561 atomic64_inc(&cntr->validation_drop_cnt);
1562 rc = -EINVAL;
1566 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1577 if (hdev->supports_wait_for_multi_cs)
1580 chunk->queue_index);
1589 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1590 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1591 dev_err(hdev->dev, "Failed to allocate a new job\n");
1592 rc = -ENOMEM;
1599 job->id = i + 1;
1600 job->cs = cs;
1601 job->user_cb = cb;
1602 job->user_cb_size = chunk->cb_size;
1603 job->hw_queue_id = chunk->queue_index;
1605 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1606 cs->jobs_cnt++;
1608 list_add_tail(&job->cs_node, &cs->job_list);
1617 (job->queue_type == QUEUE_TYPE_EXT ||
1618 job->queue_type == QUEUE_TYPE_HW))
1625 atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1626 atomic64_inc(&cntr->parsing_drop_cnt);
1627 dev_err(hdev->dev,
1629 cs->ctx->asid, cs->sequence, job->id, rc);
1638 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1639 atomic64_inc(&cntr->validation_drop_cnt);
1640 dev_err(hdev->dev,
1642 cs->ctx->asid, cs->sequence);
1643 rc = -EINVAL;
1648 INIT_WORK(&cs->finish_work, cs_completion);
1652 * fence object for multi-CS completion
1654 if (hdev->supports_wait_for_multi_cs)
1655 cs->fence->stream_master_qid_map = stream_master_qid_map;
1659 if (rc != -EAGAIN)
1660 dev_err(hdev->dev,
1662 cs->ctx->asid, cs->sequence, rc);
1666 *signal_initial_sob_count = cs->initial_sob_count;
1672 atomic_dec(&cb->cs_cnt);
1690 struct hl_device *hdev = hpriv->hdev;
1691 struct hl_ctx *ctx = hpriv->ctx;
1699 if (hdev->supports_ctx_switch)
1700 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1702 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1703 mutex_lock(&hpriv->restore_phase_mutex);
1706 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1708 dev_err_ratelimited(hdev->dev,
1710 ctx->asid, rc);
1713 * while we want to do context-switch (-EBUSY),
1714 * we need to soft-reset because QMAN is
1720 if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1722 mutex_unlock(&hpriv->restore_phase_mutex);
1727 hdev->asic_funcs->restore_phase_topology(hdev);
1729 chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1730 num_chunks = args->in.num_chunks_restore;
1733 dev_dbg(hdev->dev,
1738 cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count);
1741 mutex_unlock(&hpriv->restore_phase_mutex);
1744 dev_err(hdev->dev,
1746 ctx->asid, rc);
1754 ret = _hl_cs_wait_ioctl(hdev, ctx,
1755 jiffies_to_usecs(hdev->timeout_jiffies),
1758 dev_err(hdev->dev,
1760 ctx->asid, ret);
1761 rc = -ENOEXEC;
1766 if (hdev->supports_ctx_switch)
1767 ctx->thread_ctx_switch_wait_token = 1;
1769 } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) {
1771 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1772 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1774 if (rc == -ETIMEDOUT) {
1775 dev_err(hdev->dev,
1782 if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1808 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1813 if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
1826 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
1827 other_sob = &prop->hw_sob[other_sob_offset];
1829 if (kref_read(&other_sob->kref) != 1) {
1830 dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1832 return -EINVAL;
1841 prop->next_sob_val = count + 1;
1843 prop->next_sob_val = count;
1846 prop->curr_sob_offset = other_sob_offset;
1862 if (other_sob->need_reset)
1867 sob->need_reset = true;
1871 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1872 prop->curr_sob_offset, q_idx);
1874 prop->next_sob_val += count;
1881 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
1889 *signal_seq = chunk->encaps_signal_seq;
1893 signal_seq_arr_len = chunk->num_signal_seq_arr;
1897 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1898 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1899 dev_err(hdev->dev,
1901 return -EINVAL;
1912 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1913 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1914 return -ENOMEM;
1919 u64_to_user_ptr(chunk->signal_seq_arr),
1921 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1922 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1923 dev_err(hdev->dev,
1925 rc = -EFAULT;
1939 struct hl_ctx *ctx, struct hl_cs *cs,
1947 cntr = &hdev->aggregated_cs_counters;
1951 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1952 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1953 dev_err(hdev->dev, "Failed to allocate a new job\n");
1954 return -ENOMEM;
1957 if (cs->type == CS_TYPE_WAIT)
1958 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1960 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1964 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1965 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1967 return -EFAULT;
1970 job->id = 0;
1971 job->cs = cs;
1972 job->user_cb = cb;
1973 atomic_inc(&job->user_cb->cs_cnt);
1974 job->user_cb_size = cb_size;
1975 job->hw_queue_id = q_idx;
1977 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
1978 && cs->encaps_signals)
1979 job->encaps_sig_wait_offset = encaps_signal_offset;
1982 * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1986 job->patched_cb = job->user_cb;
1987 job->job_cb_size = job->user_cb_size;
1988 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1993 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1994 cs->jobs_cnt++;
1996 list_add_tail(&job->cs_node, &cs->job_list);
2010 struct hl_device *hdev = hpriv->hdev;
2018 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
2020 rc = -EINVAL;
2024 if (q_idx >= hdev->asic_prop.max_queues) {
2025 dev_err(hdev->dev, "Queue index %d is invalid\n",
2027 rc = -EINVAL;
2031 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2033 if (!hw_queue_prop->supports_sync_stream) {
2034 dev_err(hdev->dev,
2037 rc = -EINVAL;
2041 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
2045 rc = -ENOMEM;
2049 handle->count = count;
2051 hl_ctx_get(hpriv->ctx);
2052 handle->ctx = hpriv->ctx;
2053 mgr = &hpriv->ctx->sig_mgr;
2055 spin_lock(&mgr->lock);
2056 hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC);
2057 spin_unlock(&mgr->lock);
2060 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
2061 rc = -EINVAL;
2065 handle->id = hdl_id;
2066 handle->q_idx = q_idx;
2067 handle->hdev = hdev;
2068 kref_init(&handle->refcount);
2070 hdev->asic_funcs->hw_queues_lock(hdev);
2072 hw_sob = &prop->hw_sob[prop->curr_sob_offset];
2083 dev_err(hdev->dev, "Failed to switch SOB\n");
2084 hdev->asic_funcs->hw_queues_unlock(hdev);
2085 rc = -EINVAL;
2091 handle->hw_sob = hw_sob;
2096 handle->pre_sob_val = prop->next_sob_val - handle->count;
2098 handle->cs_seq = ULLONG_MAX;
2100 *signals_count = prop->next_sob_val;
2101 hdev->asic_funcs->hw_queues_unlock(hdev);
2103 *sob_addr = handle->hw_sob->sob_addr;
2106 dev_dbg(hdev->dev,
2108 hw_sob->sob_id, handle->hw_sob->sob_addr,
2109 prop->next_sob_val - 1, q_idx, hdl_id);
2113 spin_lock(&mgr->lock);
2114 idr_remove(&mgr->handles, hdl_id);
2115 spin_unlock(&mgr->lock);
2118 hl_ctx_put(handle->ctx);
2129 struct hl_device *hdev = hpriv->hdev;
2135 mgr = &hpriv->ctx->sig_mgr;
2137 spin_lock(&mgr->lock);
2138 encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
2140 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
2141 handle_id, encaps_sig_hdl->hw_sob->sob_addr,
2142 encaps_sig_hdl->count);
2144 hdev->asic_funcs->hw_queues_lock(hdev);
2146 q_idx = encaps_sig_hdl->q_idx;
2147 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
2148 hw_sob = &prop->hw_sob[prop->curr_sob_offset];
2149 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
2153 * between the reserve-unreserve calls or SOB switch
2156 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
2157 != prop->next_sob_val ||
2158 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
2159 dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
2160 encaps_sig_hdl->pre_sob_val,
2161 (prop->next_sob_val - encaps_sig_hdl->count));
2163 hdev->asic_funcs->hw_queues_unlock(hdev);
2164 rc = -EINVAL;
2172 prop->next_sob_val -= encaps_sig_hdl->count;
2174 hdev->asic_funcs->hw_queues_unlock(hdev);
2179 idr_remove(&mgr->handles, handle_id);
2182 spin_unlock(&mgr->lock);
2183 hl_ctx_put(encaps_sig_hdl->ctx);
2187 rc = -EINVAL;
2188 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
2192 spin_unlock(&mgr->lock);
2210 struct hl_device *hdev = hpriv->hdev;
2215 struct hl_ctx *ctx = hpriv->ctx;
2221 cntr = &hdev->aggregated_cs_counters;
2225 ctx);
2232 if (chunk->queue_index >= hdev->asic_prop.max_queues) {
2233 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2234 atomic64_inc(&cntr->validation_drop_cnt);
2235 dev_err(hdev->dev, "Queue index %d is invalid\n",
2236 chunk->queue_index);
2237 rc = -EINVAL;
2241 q_idx = chunk->queue_index;
2242 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2243 q_type = hw_queue_prop->type;
2245 if (!hw_queue_prop->supports_sync_stream) {
2246 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2247 atomic64_inc(&cntr->validation_drop_cnt);
2248 dev_err(hdev->dev,
2251 rc = -EINVAL;
2256 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
2257 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2258 atomic64_inc(&cntr->validation_drop_cnt);
2259 dev_err(hdev->dev,
2261 rc = -EINVAL;
2265 if (!hdev->nic_ports_mask) {
2266 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2267 atomic64_inc(&cntr->validation_drop_cnt);
2268 dev_err(hdev->dev,
2270 rc = -EINVAL;
2274 collective_engine_id = chunk->collective_engine_id;
2284 ctx, cs_encaps_signals);
2295 spin_lock(&ctx->sig_mgr.lock);
2296 idp = &ctx->sig_mgr.handles;
2298 if (encaps_sig_hdl->cs_seq == signal_seq) {
2307 if (kref_get_unless_zero(&encaps_sig_hdl->refcount))
2312 spin_unlock(&ctx->sig_mgr.lock);
2316 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
2323 if (chunk->encaps_signal_offset >
2324 encaps_sig_hdl->count) {
2325 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
2326 chunk->encaps_signal_offset,
2327 encaps_sig_hdl->count);
2328 rc = -EINVAL;
2333 sig_fence = hl_ctx_get_fence(ctx, signal_seq);
2335 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2336 atomic64_inc(&cntr->validation_drop_cnt);
2337 dev_err(hdev->dev,
2354 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
2357 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
2359 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2360 atomic64_inc(&cntr->validation_drop_cnt);
2361 dev_err(hdev->dev,
2362 "CS seq 0x%llx is not of a signal/encaps-signal CS\n",
2365 rc = -EINVAL;
2369 if (completion_done(&sig_fence->completion)) {
2377 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
2392 cs->signal_fence = sig_fence;
2397 if (cs->encaps_signals)
2398 cs->encaps_sig_hdl = encaps_sig_hdl;
2403 *cs_seq = cs->sequence;
2406 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
2407 q_idx, chunk->encaps_signal_offset);
2409 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
2411 chunk->encaps_signal_offset);
2413 atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2414 atomic64_inc(&cntr->validation_drop_cnt);
2415 rc = -EINVAL;
2422 INIT_WORK(&cs->finish_work, cs_completion);
2432 else if (rc != -EAGAIN)
2433 dev_err(hdev->dev,
2435 ctx->asid, cs->sequence, rc);
2439 *signal_sob_addr_offset = cs->sob_addr_offset;
2440 *signal_initial_sob_count = cs->initial_sob_count;
2456 kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
2465 struct hl_device *hdev = hpriv->hdev;
2470 if (!hdev->asic_prop.supports_engine_modes)
2471 return -EPERM;
2473 if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
2474 dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
2475 return -EINVAL;
2479 dev_err(hdev->dev, "Engine core command is invalid\n");
2480 return -EINVAL;
2486 return -ENOMEM;
2489 dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
2491 return -EFAULT;
2494 rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
2503 struct hl_device *hdev = hpriv->hdev;
2508 if (!hdev->asic_prop.supports_engine_modes)
2509 return -EPERM;
2512 dev_err(hdev->dev, "Engine command is invalid\n");
2513 return -EINVAL;
2516 max_num_of_engines = hdev->asic_prop.max_num_of_engines;
2518 max_num_of_engines = hdev->asic_prop.num_engine_cores;
2521 dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines);
2522 return -EINVAL;
2528 return -ENOMEM;
2531 dev_err(hdev->dev, "Failed to copy engine-ids array from user\n");
2533 return -EFAULT;
2536 rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command);
2544 struct hl_device *hdev = hpriv->hdev;
2545 struct asic_fixed_properties *prop = &hdev->asic_prop;
2547 if (!prop->hbw_flush_reg) {
2548 dev_dbg(hdev->dev, "HBW flush is not supported\n");
2549 return -EOPNOTSUPP;
2552 RREG32(prop->hbw_flush_reg);
2559 struct hl_fpriv *hpriv = file_priv->driver_priv;
2577 cs_type = hl_cs_get_cs_type(args->in.cs_flags &
2579 chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
2580 num_chunks = args->in.num_chunks_execute;
2581 flags = args->in.cs_flags;
2586 cs_seq = args->in.seq;
2589 ? secs_to_jiffies(args->in.timeout)
2590 : hpriv->hdev->timeout_jiffies;
2597 &cs_seq, args->in.cs_flags, timeout,
2602 args->in.encaps_signals_q_idx,
2603 args->in.encaps_signals_count,
2608 args->in.encaps_sig_handle_id);
2611 rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
2612 args->in.num_engine_cores, args->in.core_command);
2615 rc = cs_ioctl_engines(hpriv, args->in.engines,
2616 args->in.num_engines, args->in.engine_command);
2623 args->in.cs_flags,
2624 args->in.encaps_sig_handle_id,
2629 if (rc != -EAGAIN) {
2634 args->out.handle_id = handle_id;
2635 args->out.sob_base_addr_offset = sob_addr;
2636 args->out.count = signals_count;
2639 args->out.sob_base_addr_offset = sob_addr;
2640 args->out.sob_count_before_submission = sob_initial_count;
2641 args->out.seq = cs_seq;
2644 args->out.sob_count_before_submission = sob_initial_count;
2645 args->out.seq = cs_seq;
2648 args->out.seq = cs_seq;
2652 args->out.status = rc;
2658 static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
2661 struct hl_device *hdev = ctx->hdev;
2668 if (rc == -EINVAL)
2669 dev_notice_ratelimited(hdev->dev,
2671 seq, ctx->cs_sequence);
2676 if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, ×tamp_kt, &error)) {
2677 dev_dbg(hdev->dev,
2679 seq, ctx->cs_sequence);
2689 completion_rc = completion_done(&fence->completion);
2697 &fence->completion, timeout);
2700 error = fence->error;
2701 timestamp_kt = fence->timestamp;
2712 if (completion_rc == -ERESTARTSYS)
2714 else if (error == -ETIMEDOUT || error == -EIO)
2721 * hl_cs_poll_fences - iterate CS fences to check for CS completion
2723 * @mcs_data: multi-CS internal data
2724 * @mcs_compl: multi-CS completion structure
2732 * completion to the multi-CS context.
2737 struct hl_fence **fence_ptr = mcs_data->fence_arr;
2738 struct hl_device *hdev = mcs_data->ctx->hdev;
2739 int i, rc, arr_len = mcs_data->arr_len;
2740 u64 *seq_arr = mcs_data->seq_arr;
2747 rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
2752 * re-initialize the completion here to handle 2 possible cases:
2753 * 1. CS will complete the multi-CS prior clearing the completion. in which
2755 * 2. the completion will occur after re-init of the completion.
2758 reinit_completion(&mcs_compl->completion);
2762 * this value is maintained- no timestamp was updated
2772 * with the multi-CS actually completed we do things in the below order:
2773 * 1. for each fence set it's QID map in the multi-CS completion QID map. This way
2777 * 2. only after allowing multi-CS completion for the specific QID we check whether
2783 mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map;
2789 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, &status, 0, NULL);
2791 dev_err(hdev->dev,
2810 if (fence && !fence->mcs_handling_done) {
2814 * until time-out and the "multi-CS handling done" will have
2817 complete_all(&mcs_compl->completion);
2821 mcs_data->completion_bitmap |= BIT(i);
2827 if (fence && mcs_data->update_ts &&
2828 (ktime_compare(fence->timestamp, first_cs_time) < 0))
2829 first_cs_time = fence->timestamp;
2832 mcs_data->update_ts = false;
2833 mcs_data->gone_cs = true;
2840 mcs_data->completion_bitmap |= BIT(i);
2843 dev_err(hdev->dev, "Invalid fence status\n");
2844 rc = -EINVAL;
2850 hl_fences_put(mcs_data->fence_arr, arr_len);
2852 if (mcs_data->update_ts &&
2854 mcs_data->timestamp = ktime_to_ns(first_cs_time);
2859 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
2868 hl_ctx_get(ctx);
2870 fence = hl_ctx_get_fence(ctx, seq);
2872 rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp);
2874 hl_ctx_put(ctx);
2895 * hl_wait_multi_cs_completion_init - init completion structure
2914 mcs_compl = &hdev->multi_cs_completion[i];
2915 spin_lock(&mcs_compl->lock);
2916 if (!mcs_compl->used) {
2917 mcs_compl->used = 1;
2918 mcs_compl->timestamp = 0;
2921 * to multi-CS CSs will be set incrementally at a later stage
2923 mcs_compl->stream_master_qid_map = 0;
2924 spin_unlock(&mcs_compl->lock);
2927 spin_unlock(&mcs_compl->lock);
2931 dev_err(hdev->dev, "no available multi-CS completion structure\n");
2932 return ERR_PTR(-ENOMEM);
2938 * hl_wait_multi_cs_completion_fini - return completion structure and set as
2947 * free completion structure, do it under lock to be in-sync with the
2950 spin_lock(&mcs_compl->lock);
2951 mcs_compl->used = 0;
2952 spin_unlock(&mcs_compl->lock);
2956 * hl_wait_multi_cs_completion - wait for first CS to complete
2958 * @mcs_data: multi-CS internal data
2967 completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
2968 mcs_data->timeout_jiffies);
2972 mcs_data->timestamp = mcs_compl->timestamp;
2974 if (completion_rc == -ERESTARTSYS)
2977 mcs_data->wait_status = completion_rc;
2983 * hl_multi_cs_completion_init - init array of multi-CS completion structures
2993 mcs_cmpl = &hdev->multi_cs_completion[i];
2994 mcs_cmpl->used = 0;
2995 spin_lock_init(&mcs_cmpl->lock);
2996 init_completion(&mcs_cmpl->completion);
3001 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
3004 * @data: pointer to multi-CS wait ioctl in/out args
3010 struct hl_device *hdev = hpriv->hdev;
3013 struct hl_ctx *ctx = hpriv->ctx;
3021 for (i = 0 ; i < sizeof(args->in.pad) ; i++)
3022 if (args->in.pad[i]) {
3023 dev_dbg(hdev->dev, "Padding bytes must be 0\n");
3024 return -EINVAL;
3027 if (!hdev->supports_wait_for_multi_cs) {
3028 dev_err(hdev->dev, "Wait for multi CS is not supported\n");
3029 return -EPERM;
3032 seq_arr_len = args->in.seq_arr_len;
3035 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
3037 return -EINVAL;
3044 return -ENOMEM;
3047 seq_arr = (void __user *) (uintptr_t) args->in.seq;
3050 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
3051 rc = -EFAULT;
3058 rc = -ENOMEM;
3062 /* initialize the multi-CS internal data */
3063 mcs_data.ctx = ctx;
3068 hl_ctx_get(ctx);
3071 mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
3083 * - an error on the poll function
3084 * - one or more CS in the list completed
3085 * - the user called ioctl with timeout 0
3087 if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
3112 * multi-CS set a new, relevant, timestamp)
3115 mcs_compl->timestamp = 0;
3122 hl_ctx_put(ctx);
3128 if (rc == -ERESTARTSYS) {
3129 dev_err_ratelimited(hdev->dev,
3130 "user process got signal while waiting for Multi-CS\n");
3131 rc = -EINTR;
3141 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
3142 args->out.cs_completion_map = mcs_data.completion_bitmap;
3144 /* if timestamp not 0- it's valid */
3146 args->out.timestamp_nsec = mcs_data.timestamp;
3147 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3152 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
3154 args->out.status = HL_WAIT_CS_STATUS_BUSY;
3162 struct hl_device *hdev = hpriv->hdev;
3165 u64 seq = args->in.seq;
3169 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, ×tamp);
3171 if (rc == -ERESTARTSYS) {
3172 dev_err_ratelimited(hdev->dev,
3175 return -EINTR;
3181 if (rc == -ETIMEDOUT) {
3182 dev_err_ratelimited(hdev->dev,
3183 "CS %llu has timed-out while user process is waiting for it\n",
3185 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
3186 } else if (rc == -EIO) {
3187 dev_err_ratelimited(hdev->dev,
3190 args->out.status = HL_WAIT_CS_STATUS_ABORTED;
3196 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3197 args->out.timestamp_nsec = timestamp;
3202 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
3205 args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
3209 args->out.status = HL_WAIT_CS_STATUS_BUSY;
3219 record->ts_reg_info.cq_cb = cq_cb;
3220 record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
3221 record->cq_target_value = target_value;
3230 *req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
3232 ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
3233 (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
3239 return -EINVAL;
3248 struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
3253 spin_lock_irqsave(&interrupt->ts_list_lock, flags);
3255 if (record->ts_reg_info.in_use) {
3256 record->ts_reg_info.in_use = false;
3257 list_del(&record->list_node);
3262 spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
3266 hl_mmap_mem_buf_put(record->ts_reg_info.buf);
3267 hl_cb_put(record->ts_reg_info.cq_cb);
3271 static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
3276 struct hl_ts_buff *ts_buff = data->buf->private;
3280 rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
3285 /* In case the node already registered, need to unregister first then re-use */
3286 if (req_offset_record->ts_reg_info.in_use) {
3292 if (data->interrupt->interrupt_id !=
3293 req_offset_record->ts_reg_info.interrupt->interrupt_id) {
3296 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags);
3302 spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags);
3306 req_offset_record->ts_reg_info.in_use = true;
3307 req_offset_record->ts_reg_info.buf = data->buf;
3308 req_offset_record->ts_reg_info.timestamp_kernel_addr =
3309 (u64 *) ts_buff->user_buff_address + data->ts_offset;
3310 req_offset_record->ts_reg_info.interrupt = data->interrupt;
3311 set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset,
3312 data->target_value);
3319 static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
3327 hl_ctx_get(ctx);
3329 data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
3330 if (!data->cq_cb) {
3331 rc = -EINVAL;
3336 if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
3337 ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
3338 rc = -EINVAL;
3342 data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
3343 if (!data->buf) {
3344 rc = -EINVAL;
3348 spin_lock_irqsave(&data->interrupt->ts_list_lock, flags);
3351 rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend);
3353 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3360 if (*pend->cq_kernel_addr >= data->target_value) {
3361 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3363 pend->ts_reg_info.in_use = 0;
3365 *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
3370 list_add_tail(&pend->list_node, &data->interrupt->ts_list_head);
3371 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3375 hl_ctx_put(ctx);
3380 hl_mmap_mem_buf_put(data->buf);
3382 hl_cb_put(data->cq_cb);
3384 hl_ctx_put(ctx);
3389 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
3398 timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
3400 hl_ctx_get(ctx);
3402 data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
3403 if (!data->cq_cb) {
3404 rc = -EINVAL;
3409 if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
3410 ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
3411 rc = -EINVAL;
3417 rc = -ENOMEM;
3421 hl_fence_init(&pend->fence, ULONG_MAX);
3422 pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
3423 pend->cq_target_value = data->target_value;
3424 spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
3430 if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) {
3431 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3433 if (*pend->cq_kernel_addr >= data->target_value)
3438 pend->fence.timestamp = ktime_get();
3448 list_add_tail(&pend->list_node, &data->interrupt->wait_list_head);
3449 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3452 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3455 if (pend->fence.error == -EIO) {
3456 dev_err_ratelimited(hdev->dev,
3458 pend->fence.error);
3459 rc = -EIO;
3465 if (completion_rc == -ERESTARTSYS) {
3466 dev_err_ratelimited(hdev->dev,
3468 data->interrupt->interrupt_id);
3469 rc = -EINTR;
3472 /* The wait has timed-out. We don't know anything beyond that
3488 spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
3489 list_del(&pend->list_node);
3490 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3493 *timestamp = ktime_to_ns(pend->fence.timestamp);
3495 hl_cb_put(data->cq_cb);
3496 hl_ctx_put(ctx);
3501 hl_cb_put(data->cq_cb);
3503 hl_ctx_put(ctx);
3508 static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
3522 hl_ctx_get(ctx);
3526 hl_ctx_put(ctx);
3527 return -ENOMEM;
3530 hl_fence_init(&pend->fence, ULONG_MAX);
3535 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3536 list_add_tail(&pend->list_node, &interrupt->wait_list_head);
3537 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3543 dev_err(hdev->dev, "Failed to copy completion value from user\n");
3544 rc = -EFAULT;
3551 pend->fence.timestamp = ktime_get();
3561 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3568 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3574 reinit_completion(&pend->fence.completion);
3575 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3578 dev_err(hdev->dev, "Failed to copy completion value from user\n");
3579 rc = -EFAULT;
3586 } else if (pend->fence.error) {
3587 dev_err_ratelimited(hdev->dev,
3589 pend->fence.error);
3596 } else if (completion_rc == -ERESTARTSYS) {
3597 dev_err_ratelimited(hdev->dev,
3599 interrupt->interrupt_id);
3600 rc = -EINTR;
3602 /* The wait has timed-out. We don't know anything beyond that
3612 spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3613 list_del(&pend->list_node);
3614 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3616 *timestamp = ktime_to_ns(pend->fence.timestamp);
3619 hl_ctx_put(ctx);
3627 struct hl_device *hdev = hpriv->hdev;
3635 prop = &hdev->asic_prop;
3637 if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) {
3638 dev_err(hdev->dev, "no user interrupts allowed");
3639 return -EPERM;
3642 interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
3644 first_interrupt = prop->first_available_user_interrupt;
3645 last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1;
3647 if (interrupt_id < prop->user_dec_intr_count) {
3650 if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) {
3651 dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed",
3653 return -EINVAL;
3656 interrupt = &hdev->user_interrupt[interrupt_id];
3660 int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count;
3661 interrupt = &hdev->user_interrupt[int_idx];
3664 interrupt = &hdev->common_user_cq_interrupt;
3666 interrupt = &hdev->common_decoder_interrupt;
3668 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
3669 return -EINVAL;
3672 if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
3676 wait_intr_data.mmg = &hpriv->mem_mgr;
3677 wait_intr_data.cq_handle = args->in.cq_counters_handle;
3678 wait_intr_data.cq_offset = args->in.cq_counters_offset;
3679 wait_intr_data.ts_handle = args->in.timestamp_handle;
3680 wait_intr_data.ts_offset = args->in.timestamp_offset;
3681 wait_intr_data.target_value = args->in.target;
3682 wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
3684 if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) {
3687 * issues while handling the flow of re-use of the same offset.
3689 * re-use flow might request to move ts node to another interrupt list,
3692 mutex_lock(&hpriv->ctx->ts_reg_lock);
3694 rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data,
3697 mutex_unlock(&hpriv->ctx->ts_reg_lock);
3699 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
3702 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
3703 args->in.interrupt_timeout_us, args->in.addr,
3704 args->in.target, interrupt, &status,
3712 args->out.status = status;
3715 args->out.timestamp_nsec = timestamp;
3716 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3724 struct hl_fpriv *hpriv = file_priv->driver_priv;
3725 struct hl_device *hdev = hpriv->hdev;
3727 u32 flags = args->in.flags;
3733 if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active)
3734 return -EBUSY;