xref: /linux/drivers/accel/habanalabs/common/command_submission.c (revision a1ff5a7d78a036d6c2178ee5acd6ba4946243800)
1  // SPDX-License-Identifier: GPL-2.0
2  
3  /*
4   * Copyright 2016-2021 HabanaLabs, Ltd.
5   * All Rights Reserved.
6   */
7  
8  #include <uapi/drm/habanalabs_accel.h>
9  #include "habanalabs.h"
10  
11  #include <linux/uaccess.h>
12  #include <linux/slab.h>
13  
14  #define HL_CS_FLAGS_TYPE_MASK	(HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
15  			HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
16  			HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \
17  			HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
18  
19  
20  #define MAX_TS_ITER_NUM 100
21  
22  /**
23   * enum hl_cs_wait_status - cs wait status
24   * @CS_WAIT_STATUS_BUSY: cs was not completed yet
25   * @CS_WAIT_STATUS_COMPLETED: cs completed
26   * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
27   */
28  enum hl_cs_wait_status {
29  	CS_WAIT_STATUS_BUSY,
30  	CS_WAIT_STATUS_COMPLETED,
31  	CS_WAIT_STATUS_GONE
32  };
33  
34  /*
35   * Data used while handling wait/timestamp nodes.
36   * The purpose of this struct is to store the needed data for both operations
37   * in one variable instead of passing large number of arguments to functions.
38   */
39  struct wait_interrupt_data {
40  	struct hl_user_interrupt *interrupt;
41  	struct hl_mmap_mem_buf *buf;
42  	struct hl_mem_mgr *mmg;
43  	struct hl_cb *cq_cb;
44  	u64 ts_handle;
45  	u64 ts_offset;
46  	u64 cq_handle;
47  	u64 cq_offset;
48  	u64 target_value;
49  	u64 intr_timeout_us;
50  };
51  
52  static void job_wq_completion(struct work_struct *work);
53  static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
54  				enum hl_cs_wait_status *status, s64 *timestamp);
55  static void cs_do_release(struct kref *ref);
56  
hl_push_cs_outcome(struct hl_device * hdev,struct hl_cs_outcome_store * outcome_store,u64 seq,ktime_t ts,int error)57  static void hl_push_cs_outcome(struct hl_device *hdev,
58  			       struct hl_cs_outcome_store *outcome_store,
59  			       u64 seq, ktime_t ts, int error)
60  {
61  	struct hl_cs_outcome *node;
62  	unsigned long flags;
63  
64  	/*
65  	 * CS outcome store supports the following operations:
66  	 * push outcome - store a recent CS outcome in the store
67  	 * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store
68  	 * It uses 2 lists: used list and free list.
69  	 * It has a pre-allocated amount of nodes, each node stores
70  	 * a single CS outcome.
71  	 * Initially, all the nodes are in the free list.
72  	 * On push outcome, a node (any) is taken from the free list, its
73  	 * information is filled in, and the node is moved to the used list.
74  	 * It is possible, that there are no nodes left in the free list.
75  	 * In this case, we will lose some information about old outcomes. We
76  	 * will pop the OLDEST node from the used list, and make it free.
77  	 * On pop, the node is searched for in the used list (using a search
78  	 * index).
79  	 * If found, the node is then removed from the used list, and moved
80  	 * back to the free list. The outcome data that the node contained is
81  	 * returned back to the user.
82  	 */
83  
84  	spin_lock_irqsave(&outcome_store->db_lock, flags);
85  
86  	if (list_empty(&outcome_store->free_list)) {
87  		node = list_last_entry(&outcome_store->used_list,
88  				       struct hl_cs_outcome, list_link);
89  		hash_del(&node->map_link);
90  		dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq);
91  	} else {
92  		node = list_last_entry(&outcome_store->free_list,
93  				       struct hl_cs_outcome, list_link);
94  	}
95  
96  	list_del_init(&node->list_link);
97  
98  	node->seq = seq;
99  	node->ts = ts;
100  	node->error = error;
101  
102  	list_add(&node->list_link, &outcome_store->used_list);
103  	hash_add(outcome_store->outcome_map, &node->map_link, node->seq);
104  
105  	spin_unlock_irqrestore(&outcome_store->db_lock, flags);
106  }
107  
hl_pop_cs_outcome(struct hl_cs_outcome_store * outcome_store,u64 seq,ktime_t * ts,int * error)108  static bool hl_pop_cs_outcome(struct hl_cs_outcome_store *outcome_store,
109  			       u64 seq, ktime_t *ts, int *error)
110  {
111  	struct hl_cs_outcome *node;
112  	unsigned long flags;
113  
114  	spin_lock_irqsave(&outcome_store->db_lock, flags);
115  
116  	hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq)
117  		if (node->seq == seq) {
118  			*ts = node->ts;
119  			*error = node->error;
120  
121  			hash_del(&node->map_link);
122  			list_del_init(&node->list_link);
123  			list_add(&node->list_link, &outcome_store->free_list);
124  
125  			spin_unlock_irqrestore(&outcome_store->db_lock, flags);
126  
127  			return true;
128  		}
129  
130  	spin_unlock_irqrestore(&outcome_store->db_lock, flags);
131  
132  	return false;
133  }
134  
hl_sob_reset(struct kref * ref)135  static void hl_sob_reset(struct kref *ref)
136  {
137  	struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
138  							kref);
139  	struct hl_device *hdev = hw_sob->hdev;
140  
141  	dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
142  
143  	hdev->asic_funcs->reset_sob(hdev, hw_sob);
144  
145  	hw_sob->need_reset = false;
146  }
147  
hl_sob_reset_error(struct kref * ref)148  void hl_sob_reset_error(struct kref *ref)
149  {
150  	struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
151  							kref);
152  	struct hl_device *hdev = hw_sob->hdev;
153  
154  	dev_crit(hdev->dev,
155  		"SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
156  		hw_sob->q_idx, hw_sob->sob_id);
157  }
158  
hw_sob_put(struct hl_hw_sob * hw_sob)159  void hw_sob_put(struct hl_hw_sob *hw_sob)
160  {
161  	if (hw_sob)
162  		kref_put(&hw_sob->kref, hl_sob_reset);
163  }
164  
hw_sob_put_err(struct hl_hw_sob * hw_sob)165  static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
166  {
167  	if (hw_sob)
168  		kref_put(&hw_sob->kref, hl_sob_reset_error);
169  }
170  
hw_sob_get(struct hl_hw_sob * hw_sob)171  void hw_sob_get(struct hl_hw_sob *hw_sob)
172  {
173  	if (hw_sob)
174  		kref_get(&hw_sob->kref);
175  }
176  
177  /**
178   * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
179   * @sob_base: sob base id
180   * @sob_mask: sob user mask, each bit represents a sob offset from sob base
181   * @mask: generated mask
182   *
183   * Return: 0 if given parameters are valid
184   */
hl_gen_sob_mask(u16 sob_base,u8 sob_mask,u8 * mask)185  int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
186  {
187  	int i;
188  
189  	if (sob_mask == 0)
190  		return -EINVAL;
191  
192  	if (sob_mask == 0x1) {
193  		*mask = ~(1 << (sob_base & 0x7));
194  	} else {
195  		/* find msb in order to verify sob range is valid */
196  		for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
197  			if (BIT(i) & sob_mask)
198  				break;
199  
200  		if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
201  			return -EINVAL;
202  
203  		*mask = ~sob_mask;
204  	}
205  
206  	return 0;
207  }
208  
hl_fence_release(struct kref * kref)209  static void hl_fence_release(struct kref *kref)
210  {
211  	struct hl_fence *fence =
212  		container_of(kref, struct hl_fence, refcount);
213  	struct hl_cs_compl *hl_cs_cmpl =
214  		container_of(fence, struct hl_cs_compl, base_fence);
215  
216  	kfree(hl_cs_cmpl);
217  }
218  
hl_fence_put(struct hl_fence * fence)219  void hl_fence_put(struct hl_fence *fence)
220  {
221  	if (IS_ERR_OR_NULL(fence))
222  		return;
223  	kref_put(&fence->refcount, hl_fence_release);
224  }
225  
hl_fences_put(struct hl_fence ** fence,int len)226  void hl_fences_put(struct hl_fence **fence, int len)
227  {
228  	int i;
229  
230  	for (i = 0; i < len; i++, fence++)
231  		hl_fence_put(*fence);
232  }
233  
hl_fence_get(struct hl_fence * fence)234  void hl_fence_get(struct hl_fence *fence)
235  {
236  	if (fence)
237  		kref_get(&fence->refcount);
238  }
239  
hl_fence_init(struct hl_fence * fence,u64 sequence)240  static void hl_fence_init(struct hl_fence *fence, u64 sequence)
241  {
242  	kref_init(&fence->refcount);
243  	fence->cs_sequence = sequence;
244  	fence->error = 0;
245  	fence->timestamp = ktime_set(0, 0);
246  	fence->mcs_handling_done = false;
247  	init_completion(&fence->completion);
248  }
249  
cs_get(struct hl_cs * cs)250  void cs_get(struct hl_cs *cs)
251  {
252  	kref_get(&cs->refcount);
253  }
254  
cs_get_unless_zero(struct hl_cs * cs)255  static int cs_get_unless_zero(struct hl_cs *cs)
256  {
257  	return kref_get_unless_zero(&cs->refcount);
258  }
259  
cs_put(struct hl_cs * cs)260  static void cs_put(struct hl_cs *cs)
261  {
262  	kref_put(&cs->refcount, cs_do_release);
263  }
264  
cs_job_do_release(struct kref * ref)265  static void cs_job_do_release(struct kref *ref)
266  {
267  	struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
268  
269  	kfree(job);
270  }
271  
hl_cs_job_put(struct hl_cs_job * job)272  static void hl_cs_job_put(struct hl_cs_job *job)
273  {
274  	kref_put(&job->refcount, cs_job_do_release);
275  }
276  
cs_needs_completion(struct hl_cs * cs)277  bool cs_needs_completion(struct hl_cs *cs)
278  {
279  	/* In case this is a staged CS, only the last CS in sequence should
280  	 * get a completion, any non staged CS will always get a completion
281  	 */
282  	if (cs->staged_cs && !cs->staged_last)
283  		return false;
284  
285  	return true;
286  }
287  
cs_needs_timeout(struct hl_cs * cs)288  bool cs_needs_timeout(struct hl_cs *cs)
289  {
290  	/* In case this is a staged CS, only the first CS in sequence should
291  	 * get a timeout, any non staged CS will always get a timeout
292  	 */
293  	if (cs->staged_cs && !cs->staged_first)
294  		return false;
295  
296  	return true;
297  }
298  
is_cb_patched(struct hl_device * hdev,struct hl_cs_job * job)299  static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
300  {
301  	/* Patched CB is created for external queues jobs */
302  	return (job->queue_type == QUEUE_TYPE_EXT);
303  }
304  
305  /*
306   * cs_parser - parse the user command submission
307   *
308   * @hpriv	: pointer to the private data of the fd
309   * @job        : pointer to the job that holds the command submission info
310   *
311   * The function parses the command submission of the user. It calls the
312   * ASIC specific parser, which returns a list of memory blocks to send
313   * to the device as different command buffers
314   *
315   */
cs_parser(struct hl_fpriv * hpriv,struct hl_cs_job * job)316  static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
317  {
318  	struct hl_device *hdev = hpriv->hdev;
319  	struct hl_cs_parser parser;
320  	int rc;
321  
322  	parser.ctx_id = job->cs->ctx->asid;
323  	parser.cs_sequence = job->cs->sequence;
324  	parser.job_id = job->id;
325  
326  	parser.hw_queue_id = job->hw_queue_id;
327  	parser.job_userptr_list = &job->userptr_list;
328  	parser.patched_cb = NULL;
329  	parser.user_cb = job->user_cb;
330  	parser.user_cb_size = job->user_cb_size;
331  	parser.queue_type = job->queue_type;
332  	parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
333  	job->patched_cb = NULL;
334  	parser.completion = cs_needs_completion(job->cs);
335  
336  	rc = hdev->asic_funcs->cs_parser(hdev, &parser);
337  
338  	if (is_cb_patched(hdev, job)) {
339  		if (!rc) {
340  			job->patched_cb = parser.patched_cb;
341  			job->job_cb_size = parser.patched_cb_size;
342  			job->contains_dma_pkt = parser.contains_dma_pkt;
343  			atomic_inc(&job->patched_cb->cs_cnt);
344  		}
345  
346  		/*
347  		 * Whether the parsing worked or not, we don't need the
348  		 * original CB anymore because it was already parsed and
349  		 * won't be accessed again for this CS
350  		 */
351  		atomic_dec(&job->user_cb->cs_cnt);
352  		hl_cb_put(job->user_cb);
353  		job->user_cb = NULL;
354  	} else if (!rc) {
355  		job->job_cb_size = job->user_cb_size;
356  	}
357  
358  	return rc;
359  }
360  
hl_complete_job(struct hl_device * hdev,struct hl_cs_job * job)361  static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
362  {
363  	struct hl_cs *cs = job->cs;
364  
365  	if (is_cb_patched(hdev, job)) {
366  		hl_userptr_delete_list(hdev, &job->userptr_list);
367  
368  		/*
369  		 * We might arrive here from rollback and patched CB wasn't
370  		 * created, so we need to check it's not NULL
371  		 */
372  		if (job->patched_cb) {
373  			atomic_dec(&job->patched_cb->cs_cnt);
374  			hl_cb_put(job->patched_cb);
375  		}
376  	}
377  
378  	/* For H/W queue jobs, if a user CB was allocated by driver,
379  	 * the user CB isn't released in cs_parser() and thus should be
380  	 * released here. This is also true for INT queues jobs which were
381  	 * allocated by driver.
382  	 */
383  	if (job->is_kernel_allocated_cb &&
384  			(job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) {
385  		atomic_dec(&job->user_cb->cs_cnt);
386  		hl_cb_put(job->user_cb);
387  	}
388  
389  	/*
390  	 * This is the only place where there can be multiple threads
391  	 * modifying the list at the same time
392  	 */
393  	spin_lock(&cs->job_lock);
394  	list_del(&job->cs_node);
395  	spin_unlock(&cs->job_lock);
396  
397  	hl_debugfs_remove_job(hdev, job);
398  
399  	/* We decrement reference only for a CS that gets completion
400  	 * because the reference was incremented only for this kind of CS
401  	 * right before it was scheduled.
402  	 *
403  	 * In staged submission, only the last CS marked as 'staged_last'
404  	 * gets completion, hence its release function will be called from here.
405  	 * As for all the rest CS's in the staged submission which do not get
406  	 * completion, their CS reference will be decremented by the
407  	 * 'staged_last' CS during the CS release flow.
408  	 * All relevant PQ CI counters will be incremented during the CS release
409  	 * flow by calling 'hl_hw_queue_update_ci'.
410  	 */
411  	if (cs_needs_completion(cs) &&
412  			(job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) {
413  
414  		/* In CS based completions, the timestamp is already available,
415  		 * so no need to extract it from job
416  		 */
417  		if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB)
418  			cs->completion_timestamp = job->timestamp;
419  
420  		cs_put(cs);
421  	}
422  
423  	hl_cs_job_put(job);
424  }
425  
426  /*
427   * hl_staged_cs_find_first - locate the first CS in this staged submission
428   *
429   * @hdev: pointer to device structure
430   * @cs_seq: staged submission sequence number
431   *
432   * @note: This function must be called under 'hdev->cs_mirror_lock'
433   *
434   * Find and return a CS pointer with the given sequence
435   */
hl_staged_cs_find_first(struct hl_device * hdev,u64 cs_seq)436  struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
437  {
438  	struct hl_cs *cs;
439  
440  	list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
441  		if (cs->staged_cs && cs->staged_first &&
442  				cs->sequence == cs_seq)
443  			return cs;
444  
445  	return NULL;
446  }
447  
448  /*
449   * is_staged_cs_last_exists - returns true if the last CS in sequence exists
450   *
451   * @hdev: pointer to device structure
452   * @cs: staged submission member
453   *
454   */
is_staged_cs_last_exists(struct hl_device * hdev,struct hl_cs * cs)455  bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
456  {
457  	struct hl_cs *last_entry;
458  
459  	last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
460  								staged_cs_node);
461  
462  	if (last_entry->staged_last)
463  		return true;
464  
465  	return false;
466  }
467  
468  /*
469   * staged_cs_get - get CS reference if this CS is a part of a staged CS
470   *
471   * @hdev: pointer to device structure
472   * @cs: current CS
473   * @cs_seq: staged submission sequence number
474   *
475   * Increment CS reference for every CS in this staged submission except for
476   * the CS which get completion.
477   */
staged_cs_get(struct hl_device * hdev,struct hl_cs * cs)478  static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
479  {
480  	/* Only the last CS in this staged submission will get a completion.
481  	 * We must increment the reference for all other CS's in this
482  	 * staged submission.
483  	 * Once we get a completion we will release the whole staged submission.
484  	 */
485  	if (!cs->staged_last)
486  		cs_get(cs);
487  }
488  
489  /*
490   * staged_cs_put - put a CS in case it is part of staged submission
491   *
492   * @hdev: pointer to device structure
493   * @cs: CS to put
494   *
495   * This function decrements a CS reference (for a non completion CS)
496   */
staged_cs_put(struct hl_device * hdev,struct hl_cs * cs)497  static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
498  {
499  	/* We release all CS's in a staged submission except the last
500  	 * CS which we have never incremented its reference.
501  	 */
502  	if (!cs_needs_completion(cs))
503  		cs_put(cs);
504  }
505  
cs_handle_tdr(struct hl_device * hdev,struct hl_cs * cs)506  static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
507  {
508  	struct hl_cs *next = NULL, *iter, *first_cs;
509  
510  	if (!cs_needs_timeout(cs))
511  		return;
512  
513  	spin_lock(&hdev->cs_mirror_lock);
514  
515  	/* We need to handle tdr only once for the complete staged submission.
516  	 * Hence, we choose the CS that reaches this function first which is
517  	 * the CS marked as 'staged_last'.
518  	 * In case single staged cs was submitted which has both first and last
519  	 * indications, then "cs_find_first" below will return NULL, since we
520  	 * removed the cs node from the list before getting here,
521  	 * in such cases just continue with the cs to cancel it's TDR work.
522  	 */
523  	if (cs->staged_cs && cs->staged_last) {
524  		first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
525  		if (first_cs)
526  			cs = first_cs;
527  	}
528  
529  	spin_unlock(&hdev->cs_mirror_lock);
530  
531  	/* Don't cancel TDR in case this CS was timedout because we might be
532  	 * running from the TDR context
533  	 */
534  	if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
535  		return;
536  
537  	if (cs->tdr_active)
538  		cancel_delayed_work_sync(&cs->work_tdr);
539  
540  	spin_lock(&hdev->cs_mirror_lock);
541  
542  	/* queue TDR for next CS */
543  	list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node)
544  		if (cs_needs_timeout(iter)) {
545  			next = iter;
546  			break;
547  		}
548  
549  	if (next && !next->tdr_active) {
550  		next->tdr_active = true;
551  		schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
552  	}
553  
554  	spin_unlock(&hdev->cs_mirror_lock);
555  }
556  
557  /*
558   * force_complete_multi_cs - complete all contexts that wait on multi-CS
559   *
560   * @hdev: pointer to habanalabs device structure
561   */
force_complete_multi_cs(struct hl_device * hdev)562  static void force_complete_multi_cs(struct hl_device *hdev)
563  {
564  	int i;
565  
566  	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
567  		struct multi_cs_completion *mcs_compl;
568  
569  		mcs_compl = &hdev->multi_cs_completion[i];
570  
571  		spin_lock(&mcs_compl->lock);
572  
573  		if (!mcs_compl->used) {
574  			spin_unlock(&mcs_compl->lock);
575  			continue;
576  		}
577  
578  		/* when calling force complete no context should be waiting on
579  		 * multi-cS.
580  		 * We are calling the function as a protection for such case
581  		 * to free any pending context and print error message
582  		 */
583  		dev_err(hdev->dev,
584  				"multi-CS completion context %d still waiting when calling force completion\n",
585  				i);
586  		complete_all(&mcs_compl->completion);
587  		spin_unlock(&mcs_compl->lock);
588  	}
589  }
590  
591  /*
592   * complete_multi_cs - complete all waiting entities on multi-CS
593   *
594   * @hdev: pointer to habanalabs device structure
595   * @cs: CS structure
596   * The function signals a waiting entity that has an overlapping stream masters
597   * with the completed CS.
598   * For example:
599   * - a completed CS worked on stream master QID 4, multi CS completion
600   *   is actively waiting on stream master QIDs 3, 5. don't send signal as no
601   *   common stream master QID
602   * - a completed CS worked on stream master QID 4, multi CS completion
603   *   is actively waiting on stream master QIDs 3, 4. send signal as stream
604   *   master QID 4 is common
605   */
complete_multi_cs(struct hl_device * hdev,struct hl_cs * cs)606  static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
607  {
608  	struct hl_fence *fence = cs->fence;
609  	int i;
610  
611  	/* in case of multi CS check for completion only for the first CS */
612  	if (cs->staged_cs && !cs->staged_first)
613  		return;
614  
615  	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
616  		struct multi_cs_completion *mcs_compl;
617  
618  		mcs_compl = &hdev->multi_cs_completion[i];
619  		if (!mcs_compl->used)
620  			continue;
621  
622  		spin_lock(&mcs_compl->lock);
623  
624  		/*
625  		 * complete if:
626  		 * 1. still waiting for completion
627  		 * 2. the completed CS has at least one overlapping stream
628  		 *    master with the stream masters in the completion
629  		 */
630  		if (mcs_compl->used &&
631  				(fence->stream_master_qid_map &
632  					mcs_compl->stream_master_qid_map)) {
633  			/* extract the timestamp only of first completed CS */
634  			if (!mcs_compl->timestamp)
635  				mcs_compl->timestamp = ktime_to_ns(fence->timestamp);
636  
637  			complete_all(&mcs_compl->completion);
638  
639  			/*
640  			 * Setting mcs_handling_done inside the lock ensures
641  			 * at least one fence have mcs_handling_done set to
642  			 * true before wait for mcs finish. This ensures at
643  			 * least one CS will be set as completed when polling
644  			 * mcs fences.
645  			 */
646  			fence->mcs_handling_done = true;
647  		}
648  
649  		spin_unlock(&mcs_compl->lock);
650  	}
651  	/* In case CS completed without mcs completion initialized */
652  	fence->mcs_handling_done = true;
653  }
654  
cs_release_sob_reset_handler(struct hl_device * hdev,struct hl_cs * cs,struct hl_cs_compl * hl_cs_cmpl)655  static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
656  					struct hl_cs *cs,
657  					struct hl_cs_compl *hl_cs_cmpl)
658  {
659  	/* Skip this handler if the cs wasn't submitted, to avoid putting
660  	 * the hw_sob twice, since this case already handled at this point,
661  	 * also skip if the hw_sob pointer wasn't set.
662  	 */
663  	if (!hl_cs_cmpl->hw_sob || !cs->submitted)
664  		return;
665  
666  	spin_lock(&hl_cs_cmpl->lock);
667  
668  	/*
669  	 * we get refcount upon reservation of signals or signal/wait cs for the
670  	 * hw_sob object, and need to put it when the first staged cs
671  	 * (which contains the encaps signals) or cs signal/wait is completed.
672  	 */
673  	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
674  			(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
675  			(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
676  			(!!hl_cs_cmpl->encaps_signals)) {
677  		dev_dbg(hdev->dev,
678  				"CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n",
679  				hl_cs_cmpl->cs_seq,
680  				hl_cs_cmpl->type,
681  				hl_cs_cmpl->hw_sob->sob_id,
682  				hl_cs_cmpl->sob_val);
683  
684  		hw_sob_put(hl_cs_cmpl->hw_sob);
685  
686  		if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
687  			hdev->asic_funcs->reset_sob_group(hdev,
688  					hl_cs_cmpl->sob_group);
689  	}
690  
691  	spin_unlock(&hl_cs_cmpl->lock);
692  }
693  
cs_do_release(struct kref * ref)694  static void cs_do_release(struct kref *ref)
695  {
696  	struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
697  	struct hl_device *hdev = cs->ctx->hdev;
698  	struct hl_cs_job *job, *tmp;
699  	struct hl_cs_compl *hl_cs_cmpl =
700  			container_of(cs->fence, struct hl_cs_compl, base_fence);
701  
702  	cs->completed = true;
703  
704  	/*
705  	 * Although if we reached here it means that all external jobs have
706  	 * finished, because each one of them took refcnt to CS, we still
707  	 * need to go over the internal jobs and complete them. Otherwise, we
708  	 * will have leaked memory and what's worse, the CS object (and
709  	 * potentially the CTX object) could be released, while the JOB
710  	 * still holds a pointer to them (but no reference).
711  	 */
712  	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
713  		hl_complete_job(hdev, job);
714  
715  	if (!cs->submitted) {
716  		/*
717  		 * In case the wait for signal CS was submitted, the fence put
718  		 * occurs in init_signal_wait_cs() or collective_wait_init_cs()
719  		 * right before hanging on the PQ.
720  		 */
721  		if (cs->type == CS_TYPE_WAIT ||
722  				cs->type == CS_TYPE_COLLECTIVE_WAIT)
723  			hl_fence_put(cs->signal_fence);
724  
725  		goto out;
726  	}
727  
728  	/* Need to update CI for all queue jobs that does not get completion */
729  	hl_hw_queue_update_ci(cs);
730  
731  	/* remove CS from CS mirror list */
732  	spin_lock(&hdev->cs_mirror_lock);
733  	list_del_init(&cs->mirror_node);
734  	spin_unlock(&hdev->cs_mirror_lock);
735  
736  	cs_handle_tdr(hdev, cs);
737  
738  	if (cs->staged_cs) {
739  		/* the completion CS decrements reference for the entire
740  		 * staged submission
741  		 */
742  		if (cs->staged_last) {
743  			struct hl_cs *staged_cs, *tmp_cs;
744  
745  			list_for_each_entry_safe(staged_cs, tmp_cs,
746  					&cs->staged_cs_node, staged_cs_node)
747  				staged_cs_put(hdev, staged_cs);
748  		}
749  
750  		/* A staged CS will be a member in the list only after it
751  		 * was submitted. We used 'cs_mirror_lock' when inserting
752  		 * it to list so we will use it again when removing it
753  		 */
754  		if (cs->submitted) {
755  			spin_lock(&hdev->cs_mirror_lock);
756  			list_del(&cs->staged_cs_node);
757  			spin_unlock(&hdev->cs_mirror_lock);
758  		}
759  
760  		/* decrement refcount to handle when first staged cs
761  		 * with encaps signals is completed.
762  		 */
763  		if (hl_cs_cmpl->encaps_signals)
764  			kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
765  					hl_encaps_release_handle_and_put_ctx);
766  	}
767  
768  	if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals)
769  		kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
770  
771  out:
772  	/* Must be called before hl_ctx_put because inside we use ctx to get
773  	 * the device
774  	 */
775  	hl_debugfs_remove_cs(cs);
776  
777  	hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL;
778  
779  	/* We need to mark an error for not submitted because in that case
780  	 * the hl fence release flow is different. Mainly, we don't need
781  	 * to handle hw_sob for signal/wait
782  	 */
783  	if (cs->timedout)
784  		cs->fence->error = -ETIMEDOUT;
785  	else if (cs->aborted)
786  		cs->fence->error = -EIO;
787  	else if (!cs->submitted)
788  		cs->fence->error = -EBUSY;
789  
790  	if (unlikely(cs->skip_reset_on_timeout)) {
791  		dev_err(hdev->dev,
792  			"Command submission %llu completed after %llu (s)\n",
793  			cs->sequence,
794  			div_u64(jiffies - cs->submission_time_jiffies, HZ));
795  	}
796  
797  	if (cs->timestamp) {
798  		cs->fence->timestamp = cs->completion_timestamp;
799  		hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
800  				   cs->fence->timestamp, cs->fence->error);
801  	}
802  
803  	hl_ctx_put(cs->ctx);
804  
805  	complete_all(&cs->fence->completion);
806  	complete_multi_cs(hdev, cs);
807  
808  	cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
809  
810  	hl_fence_put(cs->fence);
811  
812  	kfree(cs->jobs_in_queue_cnt);
813  	kfree(cs);
814  }
815  
cs_timedout(struct work_struct * work)816  static void cs_timedout(struct work_struct *work)
817  {
818  	struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work);
819  	bool skip_reset_on_timeout, device_reset = false;
820  	struct hl_device *hdev;
821  	u64 event_mask = 0x0;
822  	uint timeout_sec;
823  	int rc;
824  
825  	skip_reset_on_timeout = cs->skip_reset_on_timeout;
826  
827  	rc = cs_get_unless_zero(cs);
828  	if (!rc)
829  		return;
830  
831  	if ((!cs->submitted) || (cs->completed)) {
832  		cs_put(cs);
833  		return;
834  	}
835  
836  	hdev = cs->ctx->hdev;
837  
838  	if (likely(!skip_reset_on_timeout)) {
839  		if (hdev->reset_on_lockup)
840  			device_reset = true;
841  		else
842  			hdev->reset_info.needs_reset = true;
843  
844  		/* Mark the CS is timed out so we won't try to cancel its TDR */
845  		cs->timedout = true;
846  	}
847  
848  	/* Save only the first CS timeout parameters */
849  	rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
850  	if (rc) {
851  		hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
852  		hdev->captured_err_info.cs_timeout.seq = cs->sequence;
853  		event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT;
854  	}
855  
856  	timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000;
857  
858  	switch (cs->type) {
859  	case CS_TYPE_SIGNAL:
860  		dev_err(hdev->dev,
861  			"Signal command submission %llu has not finished in %u seconds!\n",
862  			cs->sequence, timeout_sec);
863  		break;
864  
865  	case CS_TYPE_WAIT:
866  		dev_err(hdev->dev,
867  			"Wait command submission %llu has not finished in %u seconds!\n",
868  			cs->sequence, timeout_sec);
869  		break;
870  
871  	case CS_TYPE_COLLECTIVE_WAIT:
872  		dev_err(hdev->dev,
873  			"Collective Wait command submission %llu has not finished in %u seconds!\n",
874  			cs->sequence, timeout_sec);
875  		break;
876  
877  	default:
878  		dev_err(hdev->dev,
879  			"Command submission %llu has not finished in %u seconds!\n",
880  			cs->sequence, timeout_sec);
881  		break;
882  	}
883  
884  	rc = hl_state_dump(hdev);
885  	if (rc)
886  		dev_err(hdev->dev, "Error during system state dump %d\n", rc);
887  
888  	cs_put(cs);
889  
890  	if (device_reset) {
891  		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
892  		hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask);
893  	} else if (event_mask) {
894  		hl_notifier_event_send_all(hdev, event_mask);
895  	}
896  }
897  
allocate_cs(struct hl_device * hdev,struct hl_ctx * ctx,enum hl_cs_type cs_type,u64 user_sequence,struct hl_cs ** cs_new,u32 flags,u32 timeout)898  static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
899  			enum hl_cs_type cs_type, u64 user_sequence,
900  			struct hl_cs **cs_new, u32 flags, u32 timeout)
901  {
902  	struct hl_cs_counters_atomic *cntr;
903  	struct hl_fence *other = NULL;
904  	struct hl_cs_compl *cs_cmpl;
905  	struct hl_cs *cs;
906  	int rc;
907  
908  	cntr = &hdev->aggregated_cs_counters;
909  
910  	cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
911  	if (!cs)
912  		cs = kzalloc(sizeof(*cs), GFP_KERNEL);
913  
914  	if (!cs) {
915  		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
916  		atomic64_inc(&cntr->out_of_mem_drop_cnt);
917  		return -ENOMEM;
918  	}
919  
920  	/* increment refcnt for context */
921  	hl_ctx_get(ctx);
922  
923  	cs->ctx = ctx;
924  	cs->submitted = false;
925  	cs->completed = false;
926  	cs->type = cs_type;
927  	cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
928  	cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
929  	cs->timeout_jiffies = timeout;
930  	cs->skip_reset_on_timeout =
931  		hdev->reset_info.skip_reset_on_timeout ||
932  		!!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
933  	cs->submission_time_jiffies = jiffies;
934  	INIT_LIST_HEAD(&cs->job_list);
935  	INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
936  	kref_init(&cs->refcount);
937  	spin_lock_init(&cs->job_lock);
938  
939  	cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
940  	if (!cs_cmpl)
941  		cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL);
942  
943  	if (!cs_cmpl) {
944  		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
945  		atomic64_inc(&cntr->out_of_mem_drop_cnt);
946  		rc = -ENOMEM;
947  		goto free_cs;
948  	}
949  
950  	cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
951  			sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
952  	if (!cs->jobs_in_queue_cnt)
953  		cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
954  				sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
955  
956  	if (!cs->jobs_in_queue_cnt) {
957  		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
958  		atomic64_inc(&cntr->out_of_mem_drop_cnt);
959  		rc = -ENOMEM;
960  		goto free_cs_cmpl;
961  	}
962  
963  	cs_cmpl->hdev = hdev;
964  	cs_cmpl->type = cs->type;
965  	spin_lock_init(&cs_cmpl->lock);
966  	cs->fence = &cs_cmpl->base_fence;
967  
968  	spin_lock(&ctx->cs_lock);
969  
970  	cs_cmpl->cs_seq = ctx->cs_sequence;
971  	other = ctx->cs_pending[cs_cmpl->cs_seq &
972  				(hdev->asic_prop.max_pending_cs - 1)];
973  
974  	if (other && !completion_done(&other->completion)) {
975  		/* If the following statement is true, it means we have reached
976  		 * a point in which only part of the staged submission was
977  		 * submitted and we don't have enough room in the 'cs_pending'
978  		 * array for the rest of the submission.
979  		 * This causes a deadlock because this CS will never be
980  		 * completed as it depends on future CS's for completion.
981  		 */
982  		if (other->cs_sequence == user_sequence)
983  			dev_crit_ratelimited(hdev->dev,
984  				"Staged CS %llu deadlock due to lack of resources",
985  				user_sequence);
986  
987  		dev_dbg_ratelimited(hdev->dev,
988  			"Rejecting CS because of too many in-flights CS\n");
989  		atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
990  		atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
991  		rc = -EAGAIN;
992  		goto free_fence;
993  	}
994  
995  	/* init hl_fence */
996  	hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
997  
998  	cs->sequence = cs_cmpl->cs_seq;
999  
1000  	ctx->cs_pending[cs_cmpl->cs_seq &
1001  			(hdev->asic_prop.max_pending_cs - 1)] =
1002  							&cs_cmpl->base_fence;
1003  	ctx->cs_sequence++;
1004  
1005  	hl_fence_get(&cs_cmpl->base_fence);
1006  
1007  	hl_fence_put(other);
1008  
1009  	spin_unlock(&ctx->cs_lock);
1010  
1011  	*cs_new = cs;
1012  
1013  	return 0;
1014  
1015  free_fence:
1016  	spin_unlock(&ctx->cs_lock);
1017  	kfree(cs->jobs_in_queue_cnt);
1018  free_cs_cmpl:
1019  	kfree(cs_cmpl);
1020  free_cs:
1021  	kfree(cs);
1022  	hl_ctx_put(ctx);
1023  	return rc;
1024  }
1025  
cs_rollback(struct hl_device * hdev,struct hl_cs * cs)1026  static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
1027  {
1028  	struct hl_cs_job *job, *tmp;
1029  
1030  	staged_cs_put(hdev, cs);
1031  
1032  	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
1033  		hl_complete_job(hdev, job);
1034  }
1035  
1036  /*
1037   * release_reserved_encaps_signals() - release reserved encapsulated signals.
1038   * @hdev: pointer to habanalabs device structure
1039   *
1040   * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with
1041   * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back.
1042   * For these signals need also to put the refcount of the H/W SOB which was taken at the
1043   * reservation.
1044   */
release_reserved_encaps_signals(struct hl_device * hdev)1045  static void release_reserved_encaps_signals(struct hl_device *hdev)
1046  {
1047  	struct hl_ctx *ctx = hl_get_compute_ctx(hdev);
1048  	struct hl_cs_encaps_sig_handle *handle;
1049  	struct hl_encaps_signals_mgr *mgr;
1050  	u32 id;
1051  
1052  	if (!ctx)
1053  		return;
1054  
1055  	mgr = &ctx->sig_mgr;
1056  
1057  	idr_for_each_entry(&mgr->handles, handle, id)
1058  		if (handle->cs_seq == ULLONG_MAX)
1059  			kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx);
1060  
1061  	hl_ctx_put(ctx);
1062  }
1063  
hl_cs_rollback_all(struct hl_device * hdev,bool skip_wq_flush)1064  void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
1065  {
1066  	int i;
1067  	struct hl_cs *cs, *tmp;
1068  
1069  	if (!skip_wq_flush) {
1070  		flush_workqueue(hdev->ts_free_obj_wq);
1071  
1072  		/* flush all completions before iterating over the CS mirror list in
1073  		 * order to avoid a race with the release functions
1074  		 */
1075  		for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1076  			flush_workqueue(hdev->cq_wq[i]);
1077  
1078  		flush_workqueue(hdev->cs_cmplt_wq);
1079  	}
1080  
1081  	/* Make sure we don't have leftovers in the CS mirror list */
1082  	list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
1083  		cs_get(cs);
1084  		cs->aborted = true;
1085  		dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
1086  					cs->ctx->asid, cs->sequence);
1087  		cs_rollback(hdev, cs);
1088  		cs_put(cs);
1089  	}
1090  
1091  	force_complete_multi_cs(hdev);
1092  
1093  	release_reserved_encaps_signals(hdev);
1094  }
1095  
1096  static void
wake_pending_user_interrupt_threads(struct hl_user_interrupt * interrupt)1097  wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
1098  {
1099  	struct hl_user_pending_interrupt *pend, *temp;
1100  	unsigned long flags;
1101  
1102  	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
1103  	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) {
1104  		pend->fence.error = -EIO;
1105  		complete_all(&pend->fence.completion);
1106  	}
1107  	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
1108  
1109  	spin_lock_irqsave(&interrupt->ts_list_lock, flags);
1110  	list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) {
1111  		list_del(&pend->list_node);
1112  		hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
1113  		hl_cb_put(pend->ts_reg_info.cq_cb);
1114  	}
1115  	spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
1116  }
1117  
hl_release_pending_user_interrupts(struct hl_device * hdev)1118  void hl_release_pending_user_interrupts(struct hl_device *hdev)
1119  {
1120  	struct asic_fixed_properties *prop = &hdev->asic_prop;
1121  	struct hl_user_interrupt *interrupt;
1122  	int i;
1123  
1124  	if (!prop->user_interrupt_count)
1125  		return;
1126  
1127  	/* We iterate through the user interrupt requests and waking up all
1128  	 * user threads waiting for interrupt completion. We iterate the
1129  	 * list under a lock, this is why all user threads, once awake,
1130  	 * will wait on the same lock and will release the waiting object upon
1131  	 * unlock.
1132  	 */
1133  
1134  	for (i = 0 ; i < prop->user_interrupt_count ; i++) {
1135  		interrupt = &hdev->user_interrupt[i];
1136  		wake_pending_user_interrupt_threads(interrupt);
1137  	}
1138  
1139  	interrupt = &hdev->common_user_cq_interrupt;
1140  	wake_pending_user_interrupt_threads(interrupt);
1141  
1142  	interrupt = &hdev->common_decoder_interrupt;
1143  	wake_pending_user_interrupt_threads(interrupt);
1144  }
1145  
force_complete_cs(struct hl_device * hdev)1146  static void force_complete_cs(struct hl_device *hdev)
1147  {
1148  	struct hl_cs *cs;
1149  
1150  	spin_lock(&hdev->cs_mirror_lock);
1151  
1152  	list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) {
1153  		cs->fence->error = -EIO;
1154  		complete_all(&cs->fence->completion);
1155  	}
1156  
1157  	spin_unlock(&hdev->cs_mirror_lock);
1158  }
1159  
hl_abort_waiting_for_cs_completions(struct hl_device * hdev)1160  void hl_abort_waiting_for_cs_completions(struct hl_device *hdev)
1161  {
1162  	force_complete_cs(hdev);
1163  	force_complete_multi_cs(hdev);
1164  }
1165  
job_wq_completion(struct work_struct * work)1166  static void job_wq_completion(struct work_struct *work)
1167  {
1168  	struct hl_cs_job *job = container_of(work, struct hl_cs_job,
1169  						finish_work);
1170  	struct hl_cs *cs = job->cs;
1171  	struct hl_device *hdev = cs->ctx->hdev;
1172  
1173  	/* job is no longer needed */
1174  	hl_complete_job(hdev, job);
1175  }
1176  
cs_completion(struct work_struct * work)1177  static void cs_completion(struct work_struct *work)
1178  {
1179  	struct hl_cs *cs = container_of(work, struct hl_cs, finish_work);
1180  	struct hl_device *hdev = cs->ctx->hdev;
1181  	struct hl_cs_job *job, *tmp;
1182  
1183  	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
1184  		hl_complete_job(hdev, job);
1185  }
1186  
hl_get_active_cs_num(struct hl_device * hdev)1187  u32 hl_get_active_cs_num(struct hl_device *hdev)
1188  {
1189  	u32 active_cs_num = 0;
1190  	struct hl_cs *cs;
1191  
1192  	spin_lock(&hdev->cs_mirror_lock);
1193  
1194  	list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node)
1195  		if (!cs->completed)
1196  			active_cs_num++;
1197  
1198  	spin_unlock(&hdev->cs_mirror_lock);
1199  
1200  	return active_cs_num;
1201  }
1202  
validate_queue_index(struct hl_device * hdev,struct hl_cs_chunk * chunk,enum hl_queue_type * queue_type,bool * is_kernel_allocated_cb)1203  static int validate_queue_index(struct hl_device *hdev,
1204  				struct hl_cs_chunk *chunk,
1205  				enum hl_queue_type *queue_type,
1206  				bool *is_kernel_allocated_cb)
1207  {
1208  	struct asic_fixed_properties *asic = &hdev->asic_prop;
1209  	struct hw_queue_properties *hw_queue_prop;
1210  
1211  	/* This must be checked here to prevent out-of-bounds access to
1212  	 * hw_queues_props array
1213  	 */
1214  	if (chunk->queue_index >= asic->max_queues) {
1215  		dev_err(hdev->dev, "Queue index %d is invalid\n",
1216  			chunk->queue_index);
1217  		return -EINVAL;
1218  	}
1219  
1220  	hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
1221  
1222  	if (hw_queue_prop->type == QUEUE_TYPE_NA) {
1223  		dev_err(hdev->dev, "Queue index %d is not applicable\n",
1224  			chunk->queue_index);
1225  		return -EINVAL;
1226  	}
1227  
1228  	if (hw_queue_prop->binned) {
1229  		dev_err(hdev->dev, "Queue index %d is binned out\n",
1230  			chunk->queue_index);
1231  		return -EINVAL;
1232  	}
1233  
1234  	if (hw_queue_prop->driver_only) {
1235  		dev_err(hdev->dev,
1236  			"Queue index %d is restricted for the kernel driver\n",
1237  			chunk->queue_index);
1238  		return -EINVAL;
1239  	}
1240  
1241  	/* When hw queue type isn't QUEUE_TYPE_HW,
1242  	 * USER_ALLOC_CB flag shall be referred as "don't care".
1243  	 */
1244  	if (hw_queue_prop->type == QUEUE_TYPE_HW) {
1245  		if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
1246  			if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
1247  				dev_err(hdev->dev,
1248  					"Queue index %d doesn't support user CB\n",
1249  					chunk->queue_index);
1250  				return -EINVAL;
1251  			}
1252  
1253  			*is_kernel_allocated_cb = false;
1254  		} else {
1255  			if (!(hw_queue_prop->cb_alloc_flags &
1256  					CB_ALLOC_KERNEL)) {
1257  				dev_err(hdev->dev,
1258  					"Queue index %d doesn't support kernel CB\n",
1259  					chunk->queue_index);
1260  				return -EINVAL;
1261  			}
1262  
1263  			*is_kernel_allocated_cb = true;
1264  		}
1265  	} else {
1266  		*is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
1267  						& CB_ALLOC_KERNEL);
1268  	}
1269  
1270  	*queue_type = hw_queue_prop->type;
1271  	return 0;
1272  }
1273  
get_cb_from_cs_chunk(struct hl_device * hdev,struct hl_mem_mgr * mmg,struct hl_cs_chunk * chunk)1274  static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
1275  					struct hl_mem_mgr *mmg,
1276  					struct hl_cs_chunk *chunk)
1277  {
1278  	struct hl_cb *cb;
1279  
1280  	cb = hl_cb_get(mmg, chunk->cb_handle);
1281  	if (!cb) {
1282  		dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle);
1283  		return NULL;
1284  	}
1285  
1286  	if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
1287  		dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
1288  		goto release_cb;
1289  	}
1290  
1291  	atomic_inc(&cb->cs_cnt);
1292  
1293  	return cb;
1294  
1295  release_cb:
1296  	hl_cb_put(cb);
1297  	return NULL;
1298  }
1299  
hl_cs_allocate_job(struct hl_device * hdev,enum hl_queue_type queue_type,bool is_kernel_allocated_cb)1300  struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
1301  		enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
1302  {
1303  	struct hl_cs_job *job;
1304  
1305  	job = kzalloc(sizeof(*job), GFP_ATOMIC);
1306  	if (!job)
1307  		job = kzalloc(sizeof(*job), GFP_KERNEL);
1308  
1309  	if (!job)
1310  		return NULL;
1311  
1312  	kref_init(&job->refcount);
1313  	job->queue_type = queue_type;
1314  	job->is_kernel_allocated_cb = is_kernel_allocated_cb;
1315  
1316  	if (is_cb_patched(hdev, job))
1317  		INIT_LIST_HEAD(&job->userptr_list);
1318  
1319  	if (job->queue_type == QUEUE_TYPE_EXT)
1320  		INIT_WORK(&job->finish_work, job_wq_completion);
1321  
1322  	return job;
1323  }
1324  
hl_cs_get_cs_type(u32 cs_type_flags)1325  static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
1326  {
1327  	if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
1328  		return CS_TYPE_SIGNAL;
1329  	else if (cs_type_flags & HL_CS_FLAGS_WAIT)
1330  		return CS_TYPE_WAIT;
1331  	else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
1332  		return CS_TYPE_COLLECTIVE_WAIT;
1333  	else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY)
1334  		return CS_RESERVE_SIGNALS;
1335  	else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
1336  		return CS_UNRESERVE_SIGNALS;
1337  	else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
1338  		return CS_TYPE_ENGINE_CORE;
1339  	else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND)
1340  		return CS_TYPE_ENGINES;
1341  	else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
1342  		return CS_TYPE_FLUSH_PCI_HBW_WRITES;
1343  	else
1344  		return CS_TYPE_DEFAULT;
1345  }
1346  
hl_cs_sanity_checks(struct hl_fpriv * hpriv,union hl_cs_args * args)1347  static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
1348  {
1349  	struct hl_device *hdev = hpriv->hdev;
1350  	struct hl_ctx *ctx = hpriv->ctx;
1351  	u32 cs_type_flags, num_chunks;
1352  	enum hl_device_status status;
1353  	enum hl_cs_type cs_type;
1354  	bool is_sync_stream;
1355  	int i;
1356  
1357  	for (i = 0 ; i < sizeof(args->in.pad) ; i++)
1358  		if (args->in.pad[i]) {
1359  			dev_dbg(hdev->dev, "Padding bytes must be 0\n");
1360  			return -EINVAL;
1361  		}
1362  
1363  	if (!hl_device_operational(hdev, &status))
1364  		return -EBUSY;
1365  
1366  	if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1367  			!hdev->supports_staged_submission) {
1368  		dev_err(hdev->dev, "staged submission not supported");
1369  		return -EPERM;
1370  	}
1371  
1372  	cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
1373  
1374  	if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
1375  		dev_err(hdev->dev,
1376  			"CS type flags are mutually exclusive, context %d\n",
1377  			ctx->asid);
1378  		return -EINVAL;
1379  	}
1380  
1381  	cs_type = hl_cs_get_cs_type(cs_type_flags);
1382  	num_chunks = args->in.num_chunks_execute;
1383  
1384  	is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
1385  			cs_type == CS_TYPE_COLLECTIVE_WAIT);
1386  
1387  	if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) {
1388  		dev_err(hdev->dev, "Sync stream CS is not supported\n");
1389  		return -EINVAL;
1390  	}
1391  
1392  	if (cs_type == CS_TYPE_DEFAULT) {
1393  		if (!num_chunks) {
1394  			dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid);
1395  			return -EINVAL;
1396  		}
1397  	} else if (is_sync_stream && num_chunks != 1) {
1398  		dev_err(hdev->dev,
1399  			"Sync stream CS mandates one chunk only, context %d\n",
1400  			ctx->asid);
1401  		return -EINVAL;
1402  	}
1403  
1404  	return 0;
1405  }
1406  
hl_cs_copy_chunk_array(struct hl_device * hdev,struct hl_cs_chunk ** cs_chunk_array,void __user * chunks,u32 num_chunks,struct hl_ctx * ctx)1407  static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1408  					struct hl_cs_chunk **cs_chunk_array,
1409  					void __user *chunks, u32 num_chunks,
1410  					struct hl_ctx *ctx)
1411  {
1412  	u32 size_to_copy;
1413  
1414  	if (num_chunks > HL_MAX_JOBS_PER_CS) {
1415  		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1416  		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1417  		dev_err(hdev->dev,
1418  			"Number of chunks can NOT be larger than %d\n",
1419  			HL_MAX_JOBS_PER_CS);
1420  		return -EINVAL;
1421  	}
1422  
1423  	*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1424  					GFP_ATOMIC);
1425  	if (!*cs_chunk_array)
1426  		*cs_chunk_array = kmalloc_array(num_chunks,
1427  					sizeof(**cs_chunk_array), GFP_KERNEL);
1428  	if (!*cs_chunk_array) {
1429  		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1430  		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1431  		return -ENOMEM;
1432  	}
1433  
1434  	size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1435  	if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1436  		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1437  		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1438  		dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1439  		kfree(*cs_chunk_array);
1440  		return -EFAULT;
1441  	}
1442  
1443  	return 0;
1444  }
1445  
cs_staged_submission(struct hl_device * hdev,struct hl_cs * cs,u64 sequence,u32 flags,u32 encaps_signal_handle)1446  static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1447  				u64 sequence, u32 flags,
1448  				u32 encaps_signal_handle)
1449  {
1450  	if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1451  		return 0;
1452  
1453  	cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1454  	cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1455  
1456  	if (cs->staged_first) {
1457  		/* Staged CS sequence is the first CS sequence */
1458  		INIT_LIST_HEAD(&cs->staged_cs_node);
1459  		cs->staged_sequence = cs->sequence;
1460  
1461  		if (cs->encaps_signals)
1462  			cs->encaps_sig_hdl_id = encaps_signal_handle;
1463  	} else {
1464  		/* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1465  		 * under the cs_mirror_lock
1466  		 */
1467  		cs->staged_sequence = sequence;
1468  	}
1469  
1470  	/* Increment CS reference if needed */
1471  	staged_cs_get(hdev, cs);
1472  
1473  	cs->staged_cs = true;
1474  
1475  	return 0;
1476  }
1477  
get_stream_master_qid_mask(struct hl_device * hdev,u32 qid)1478  static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
1479  {
1480  	int i;
1481  
1482  	for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
1483  		if (qid == hdev->stream_master_qid_arr[i])
1484  			return BIT(i);
1485  
1486  	return 0;
1487  }
1488  
cs_ioctl_default(struct hl_fpriv * hpriv,void __user * chunks,u32 num_chunks,u64 * cs_seq,u32 flags,u32 encaps_signals_handle,u32 timeout,u16 * signal_initial_sob_count)1489  static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1490  				u32 num_chunks, u64 *cs_seq, u32 flags,
1491  				u32 encaps_signals_handle, u32 timeout,
1492  				u16 *signal_initial_sob_count)
1493  {
1494  	bool staged_mid, int_queues_only = true, using_hw_queues = false;
1495  	struct hl_device *hdev = hpriv->hdev;
1496  	struct hl_cs_chunk *cs_chunk_array;
1497  	struct hl_cs_counters_atomic *cntr;
1498  	struct hl_ctx *ctx = hpriv->ctx;
1499  	struct hl_cs_job *job;
1500  	struct hl_cs *cs;
1501  	struct hl_cb *cb;
1502  	u64 user_sequence;
1503  	u8 stream_master_qid_map = 0;
1504  	int rc, i;
1505  
1506  	cntr = &hdev->aggregated_cs_counters;
1507  	user_sequence = *cs_seq;
1508  	*cs_seq = ULLONG_MAX;
1509  
1510  	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1511  			hpriv->ctx);
1512  	if (rc)
1513  		goto out;
1514  
1515  	if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1516  			!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1517  		staged_mid = true;
1518  	else
1519  		staged_mid = false;
1520  
1521  	rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1522  			staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1523  			timeout);
1524  	if (rc)
1525  		goto free_cs_chunk_array;
1526  
1527  	*cs_seq = cs->sequence;
1528  
1529  	hl_debugfs_add_cs(cs);
1530  
1531  	rc = cs_staged_submission(hdev, cs, user_sequence, flags,
1532  						encaps_signals_handle);
1533  	if (rc)
1534  		goto free_cs_object;
1535  
1536  	/* If this is a staged submission we must return the staged sequence
1537  	 * rather than the internal CS sequence
1538  	 */
1539  	if (cs->staged_cs)
1540  		*cs_seq = cs->staged_sequence;
1541  
1542  	/* Validate ALL the CS chunks before submitting the CS */
1543  	for (i = 0 ; i < num_chunks ; i++) {
1544  		struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1545  		enum hl_queue_type queue_type;
1546  		bool is_kernel_allocated_cb;
1547  
1548  		rc = validate_queue_index(hdev, chunk, &queue_type,
1549  						&is_kernel_allocated_cb);
1550  		if (rc) {
1551  			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1552  			atomic64_inc(&cntr->validation_drop_cnt);
1553  			goto free_cs_object;
1554  		}
1555  
1556  		if (is_kernel_allocated_cb) {
1557  			cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk);
1558  			if (!cb) {
1559  				atomic64_inc(
1560  					&ctx->cs_counters.validation_drop_cnt);
1561  				atomic64_inc(&cntr->validation_drop_cnt);
1562  				rc = -EINVAL;
1563  				goto free_cs_object;
1564  			}
1565  		} else {
1566  			cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1567  		}
1568  
1569  		if (queue_type == QUEUE_TYPE_EXT ||
1570  						queue_type == QUEUE_TYPE_HW) {
1571  			int_queues_only = false;
1572  
1573  			/*
1574  			 * store which stream are being used for external/HW
1575  			 * queues of this CS
1576  			 */
1577  			if (hdev->supports_wait_for_multi_cs)
1578  				stream_master_qid_map |=
1579  					get_stream_master_qid_mask(hdev,
1580  							chunk->queue_index);
1581  		}
1582  
1583  		if (queue_type == QUEUE_TYPE_HW)
1584  			using_hw_queues = true;
1585  
1586  		job = hl_cs_allocate_job(hdev, queue_type,
1587  						is_kernel_allocated_cb);
1588  		if (!job) {
1589  			atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1590  			atomic64_inc(&cntr->out_of_mem_drop_cnt);
1591  			dev_err(hdev->dev, "Failed to allocate a new job\n");
1592  			rc = -ENOMEM;
1593  			if (is_kernel_allocated_cb)
1594  				goto release_cb;
1595  
1596  			goto free_cs_object;
1597  		}
1598  
1599  		job->id = i + 1;
1600  		job->cs = cs;
1601  		job->user_cb = cb;
1602  		job->user_cb_size = chunk->cb_size;
1603  		job->hw_queue_id = chunk->queue_index;
1604  
1605  		cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1606  		cs->jobs_cnt++;
1607  
1608  		list_add_tail(&job->cs_node, &cs->job_list);
1609  
1610  		/*
1611  		 * Increment CS reference. When CS reference is 0, CS is
1612  		 * done and can be signaled to user and free all its resources
1613  		 * Only increment for JOB on external or H/W queues, because
1614  		 * only for those JOBs we get completion
1615  		 */
1616  		if (cs_needs_completion(cs) &&
1617  			(job->queue_type == QUEUE_TYPE_EXT ||
1618  				job->queue_type == QUEUE_TYPE_HW))
1619  			cs_get(cs);
1620  
1621  		hl_debugfs_add_job(hdev, job);
1622  
1623  		rc = cs_parser(hpriv, job);
1624  		if (rc) {
1625  			atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1626  			atomic64_inc(&cntr->parsing_drop_cnt);
1627  			dev_err(hdev->dev,
1628  				"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1629  				cs->ctx->asid, cs->sequence, job->id, rc);
1630  			goto free_cs_object;
1631  		}
1632  	}
1633  
1634  	/* We allow a CS with any queue type combination as long as it does
1635  	 * not get a completion
1636  	 */
1637  	if (int_queues_only && cs_needs_completion(cs)) {
1638  		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1639  		atomic64_inc(&cntr->validation_drop_cnt);
1640  		dev_err(hdev->dev,
1641  			"Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1642  			cs->ctx->asid, cs->sequence);
1643  		rc = -EINVAL;
1644  		goto free_cs_object;
1645  	}
1646  
1647  	if (using_hw_queues)
1648  		INIT_WORK(&cs->finish_work, cs_completion);
1649  
1650  	/*
1651  	 * store the (external/HW queues) streams used by the CS in the
1652  	 * fence object for multi-CS completion
1653  	 */
1654  	if (hdev->supports_wait_for_multi_cs)
1655  		cs->fence->stream_master_qid_map = stream_master_qid_map;
1656  
1657  	rc = hl_hw_queue_schedule_cs(cs);
1658  	if (rc) {
1659  		if (rc != -EAGAIN)
1660  			dev_err(hdev->dev,
1661  				"Failed to submit CS %d.%llu to H/W queues, error %d\n",
1662  				cs->ctx->asid, cs->sequence, rc);
1663  		goto free_cs_object;
1664  	}
1665  
1666  	*signal_initial_sob_count = cs->initial_sob_count;
1667  
1668  	rc = HL_CS_STATUS_SUCCESS;
1669  	goto put_cs;
1670  
1671  release_cb:
1672  	atomic_dec(&cb->cs_cnt);
1673  	hl_cb_put(cb);
1674  free_cs_object:
1675  	cs_rollback(hdev, cs);
1676  	*cs_seq = ULLONG_MAX;
1677  	/* The path below is both for good and erroneous exits */
1678  put_cs:
1679  	/* We finished with the CS in this function, so put the ref */
1680  	cs_put(cs);
1681  free_cs_chunk_array:
1682  	kfree(cs_chunk_array);
1683  out:
1684  	return rc;
1685  }
1686  
hl_cs_ctx_switch(struct hl_fpriv * hpriv,union hl_cs_args * args,u64 * cs_seq)1687  static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1688  				u64 *cs_seq)
1689  {
1690  	struct hl_device *hdev = hpriv->hdev;
1691  	struct hl_ctx *ctx = hpriv->ctx;
1692  	bool need_soft_reset = false;
1693  	int rc = 0, do_ctx_switch = 0;
1694  	void __user *chunks;
1695  	u32 num_chunks, tmp;
1696  	u16 sob_count;
1697  	int ret;
1698  
1699  	if (hdev->supports_ctx_switch)
1700  		do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1701  
1702  	if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1703  		mutex_lock(&hpriv->restore_phase_mutex);
1704  
1705  		if (do_ctx_switch) {
1706  			rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1707  			if (rc) {
1708  				dev_err_ratelimited(hdev->dev,
1709  					"Failed to switch to context %d, rejecting CS! %d\n",
1710  					ctx->asid, rc);
1711  				/*
1712  				 * If we timedout, or if the device is not IDLE
1713  				 * while we want to do context-switch (-EBUSY),
1714  				 * we need to soft-reset because QMAN is
1715  				 * probably stuck. However, we can't call to
1716  				 * reset here directly because of deadlock, so
1717  				 * need to do it at the very end of this
1718  				 * function
1719  				 */
1720  				if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1721  					need_soft_reset = true;
1722  				mutex_unlock(&hpriv->restore_phase_mutex);
1723  				goto out;
1724  			}
1725  		}
1726  
1727  		hdev->asic_funcs->restore_phase_topology(hdev);
1728  
1729  		chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1730  		num_chunks = args->in.num_chunks_restore;
1731  
1732  		if (!num_chunks) {
1733  			dev_dbg(hdev->dev,
1734  				"Need to run restore phase but restore CS is empty\n");
1735  			rc = 0;
1736  		} else {
1737  			rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1738  					cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count);
1739  		}
1740  
1741  		mutex_unlock(&hpriv->restore_phase_mutex);
1742  
1743  		if (rc) {
1744  			dev_err(hdev->dev,
1745  				"Failed to submit restore CS for context %d (%d)\n",
1746  				ctx->asid, rc);
1747  			goto out;
1748  		}
1749  
1750  		/* Need to wait for restore completion before execution phase */
1751  		if (num_chunks) {
1752  			enum hl_cs_wait_status status;
1753  
1754  			ret = _hl_cs_wait_ioctl(hdev, ctx,
1755  					jiffies_to_usecs(hdev->timeout_jiffies),
1756  					*cs_seq, &status, NULL);
1757  			if (ret) {
1758  				dev_err(hdev->dev,
1759  					"Restore CS for context %d failed to complete %d\n",
1760  					ctx->asid, ret);
1761  				rc = -ENOEXEC;
1762  				goto out;
1763  			}
1764  		}
1765  
1766  		if (hdev->supports_ctx_switch)
1767  			ctx->thread_ctx_switch_wait_token = 1;
1768  
1769  	} else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) {
1770  		rc = hl_poll_timeout_memory(hdev,
1771  			&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1772  			100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1773  
1774  		if (rc == -ETIMEDOUT) {
1775  			dev_err(hdev->dev,
1776  				"context switch phase timeout (%d)\n", tmp);
1777  			goto out;
1778  		}
1779  	}
1780  
1781  out:
1782  	if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1783  		hl_device_reset(hdev, 0);
1784  
1785  	return rc;
1786  }
1787  
1788  /*
1789   * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
1790   * if the SOB value reaches the max value move to the other SOB reserved
1791   * to the queue.
1792   * @hdev: pointer to device structure
1793   * @q_idx: stream queue index
1794   * @hw_sob: the H/W SOB used in this signal CS.
1795   * @count: signals count
1796   * @encaps_sig: tells whether it's reservation for encaps signals or not.
1797   *
1798   * Note that this function must be called while hw_queues_lock is taken.
1799   */
hl_cs_signal_sob_wraparound_handler(struct hl_device * hdev,u32 q_idx,struct hl_hw_sob ** hw_sob,u32 count,bool encaps_sig)1800  int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
1801  			struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)
1802  
1803  {
1804  	struct hl_sync_stream_properties *prop;
1805  	struct hl_hw_sob *sob = *hw_sob, *other_sob;
1806  	u8 other_sob_offset;
1807  
1808  	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1809  
1810  	hw_sob_get(sob);
1811  
1812  	/* check for wraparound */
1813  	if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
1814  		/*
1815  		 * Decrement as we reached the max value.
1816  		 * The release function won't be called here as we've
1817  		 * just incremented the refcount right before calling this
1818  		 * function.
1819  		 */
1820  		hw_sob_put_err(sob);
1821  
1822  		/*
1823  		 * check the other sob value, if it still in use then fail
1824  		 * otherwise make the switch
1825  		 */
1826  		other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
1827  		other_sob = &prop->hw_sob[other_sob_offset];
1828  
1829  		if (kref_read(&other_sob->kref) != 1) {
1830  			dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1831  								q_idx);
1832  			return -EINVAL;
1833  		}
1834  
1835  		/*
1836  		 * next_sob_val always points to the next available signal
1837  		 * in the sob, so in encaps signals it will be the next one
1838  		 * after reserving the required amount.
1839  		 */
1840  		if (encaps_sig)
1841  			prop->next_sob_val = count + 1;
1842  		else
1843  			prop->next_sob_val = count;
1844  
1845  		/* only two SOBs are currently in use */
1846  		prop->curr_sob_offset = other_sob_offset;
1847  		*hw_sob = other_sob;
1848  
1849  		/*
1850  		 * check if other_sob needs reset, then do it before using it
1851  		 * for the reservation or the next signal cs.
1852  		 * we do it here, and for both encaps and regular signal cs
1853  		 * cases in order to avoid possible races of two kref_put
1854  		 * of the sob which can occur at the same time if we move the
1855  		 * sob reset(kref_put) to cs_do_release function.
1856  		 * in addition, if we have combination of cs signal and
1857  		 * encaps, and at the point we need to reset the sob there was
1858  		 * no more reservations and only signal cs keep coming,
1859  		 * in such case we need signal_cs to put the refcount and
1860  		 * reset the sob.
1861  		 */
1862  		if (other_sob->need_reset)
1863  			hw_sob_put(other_sob);
1864  
1865  		if (encaps_sig) {
1866  			/* set reset indication for the sob */
1867  			sob->need_reset = true;
1868  			hw_sob_get(other_sob);
1869  		}
1870  
1871  		dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1872  				prop->curr_sob_offset, q_idx);
1873  	} else {
1874  		prop->next_sob_val += count;
1875  	}
1876  
1877  	return 0;
1878  }
1879  
cs_ioctl_extract_signal_seq(struct hl_device * hdev,struct hl_cs_chunk * chunk,u64 * signal_seq,struct hl_ctx * ctx,bool encaps_signals)1880  static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1881  		struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
1882  		bool encaps_signals)
1883  {
1884  	u64 *signal_seq_arr = NULL;
1885  	u32 size_to_copy, signal_seq_arr_len;
1886  	int rc = 0;
1887  
1888  	if (encaps_signals) {
1889  		*signal_seq = chunk->encaps_signal_seq;
1890  		return 0;
1891  	}
1892  
1893  	signal_seq_arr_len = chunk->num_signal_seq_arr;
1894  
1895  	/* currently only one signal seq is supported */
1896  	if (signal_seq_arr_len != 1) {
1897  		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1898  		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1899  		dev_err(hdev->dev,
1900  			"Wait for signal CS supports only one signal CS seq\n");
1901  		return -EINVAL;
1902  	}
1903  
1904  	signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1905  					sizeof(*signal_seq_arr),
1906  					GFP_ATOMIC);
1907  	if (!signal_seq_arr)
1908  		signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1909  					sizeof(*signal_seq_arr),
1910  					GFP_KERNEL);
1911  	if (!signal_seq_arr) {
1912  		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1913  		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1914  		return -ENOMEM;
1915  	}
1916  
1917  	size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr);
1918  	if (copy_from_user(signal_seq_arr,
1919  				u64_to_user_ptr(chunk->signal_seq_arr),
1920  				size_to_copy)) {
1921  		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1922  		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1923  		dev_err(hdev->dev,
1924  			"Failed to copy signal seq array from user\n");
1925  		rc = -EFAULT;
1926  		goto out;
1927  	}
1928  
1929  	/* currently it is guaranteed to have only one signal seq */
1930  	*signal_seq = signal_seq_arr[0];
1931  
1932  out:
1933  	kfree(signal_seq_arr);
1934  
1935  	return rc;
1936  }
1937  
cs_ioctl_signal_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_queue_type q_type,u32 q_idx,u32 encaps_signal_offset)1938  static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1939  		struct hl_ctx *ctx, struct hl_cs *cs,
1940  		enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset)
1941  {
1942  	struct hl_cs_counters_atomic *cntr;
1943  	struct hl_cs_job *job;
1944  	struct hl_cb *cb;
1945  	u32 cb_size;
1946  
1947  	cntr = &hdev->aggregated_cs_counters;
1948  
1949  	job = hl_cs_allocate_job(hdev, q_type, true);
1950  	if (!job) {
1951  		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1952  		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1953  		dev_err(hdev->dev, "Failed to allocate a new job\n");
1954  		return -ENOMEM;
1955  	}
1956  
1957  	if (cs->type == CS_TYPE_WAIT)
1958  		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1959  	else
1960  		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1961  
1962  	cb = hl_cb_kernel_create(hdev, cb_size, q_type == QUEUE_TYPE_HW);
1963  	if (!cb) {
1964  		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1965  		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1966  		kfree(job);
1967  		return -EFAULT;
1968  	}
1969  
1970  	job->id = 0;
1971  	job->cs = cs;
1972  	job->user_cb = cb;
1973  	atomic_inc(&job->user_cb->cs_cnt);
1974  	job->user_cb_size = cb_size;
1975  	job->hw_queue_id = q_idx;
1976  
1977  	if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
1978  			&& cs->encaps_signals)
1979  		job->encaps_sig_wait_offset = encaps_signal_offset;
1980  	/*
1981  	 * No need in parsing, user CB is the patched CB.
1982  	 * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1983  	 * the CB idr anymore and to decrement its refcount as it was
1984  	 * incremented inside hl_cb_kernel_create().
1985  	 */
1986  	job->patched_cb = job->user_cb;
1987  	job->job_cb_size = job->user_cb_size;
1988  	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1989  
1990  	/* increment refcount as for external queues we get completion */
1991  	cs_get(cs);
1992  
1993  	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1994  	cs->jobs_cnt++;
1995  
1996  	list_add_tail(&job->cs_node, &cs->job_list);
1997  
1998  	hl_debugfs_add_job(hdev, job);
1999  
2000  	return 0;
2001  }
2002  
cs_ioctl_reserve_signals(struct hl_fpriv * hpriv,u32 q_idx,u32 count,u32 * handle_id,u32 * sob_addr,u32 * signals_count)2003  static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
2004  				u32 q_idx, u32 count,
2005  				u32 *handle_id, u32 *sob_addr,
2006  				u32 *signals_count)
2007  {
2008  	struct hw_queue_properties *hw_queue_prop;
2009  	struct hl_sync_stream_properties *prop;
2010  	struct hl_device *hdev = hpriv->hdev;
2011  	struct hl_cs_encaps_sig_handle *handle;
2012  	struct hl_encaps_signals_mgr *mgr;
2013  	struct hl_hw_sob *hw_sob;
2014  	int hdl_id;
2015  	int rc = 0;
2016  
2017  	if (count >= HL_MAX_SOB_VAL) {
2018  		dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
2019  						count);
2020  		rc = -EINVAL;
2021  		goto out;
2022  	}
2023  
2024  	if (q_idx >= hdev->asic_prop.max_queues) {
2025  		dev_err(hdev->dev, "Queue index %d is invalid\n",
2026  			q_idx);
2027  		rc = -EINVAL;
2028  		goto out;
2029  	}
2030  
2031  	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2032  
2033  	if (!hw_queue_prop->supports_sync_stream) {
2034  		dev_err(hdev->dev,
2035  			"Queue index %d does not support sync stream operations\n",
2036  									q_idx);
2037  		rc = -EINVAL;
2038  		goto out;
2039  	}
2040  
2041  	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
2042  
2043  	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
2044  	if (!handle) {
2045  		rc = -ENOMEM;
2046  		goto out;
2047  	}
2048  
2049  	handle->count = count;
2050  
2051  	hl_ctx_get(hpriv->ctx);
2052  	handle->ctx = hpriv->ctx;
2053  	mgr = &hpriv->ctx->sig_mgr;
2054  
2055  	spin_lock(&mgr->lock);
2056  	hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC);
2057  	spin_unlock(&mgr->lock);
2058  
2059  	if (hdl_id < 0) {
2060  		dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
2061  		rc = -EINVAL;
2062  		goto put_ctx;
2063  	}
2064  
2065  	handle->id = hdl_id;
2066  	handle->q_idx = q_idx;
2067  	handle->hdev = hdev;
2068  	kref_init(&handle->refcount);
2069  
2070  	hdev->asic_funcs->hw_queues_lock(hdev);
2071  
2072  	hw_sob = &prop->hw_sob[prop->curr_sob_offset];
2073  
2074  	/*
2075  	 * Increment the SOB value by count by user request
2076  	 * to reserve those signals
2077  	 * check if the signals amount to reserve is not exceeding the max sob
2078  	 * value, if yes then switch sob.
2079  	 */
2080  	rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
2081  								true);
2082  	if (rc) {
2083  		dev_err(hdev->dev, "Failed to switch SOB\n");
2084  		hdev->asic_funcs->hw_queues_unlock(hdev);
2085  		rc = -EINVAL;
2086  		goto remove_idr;
2087  	}
2088  	/* set the hw_sob to the handle after calling the sob wraparound handler
2089  	 * since sob could have changed.
2090  	 */
2091  	handle->hw_sob = hw_sob;
2092  
2093  	/* store the current sob value for unreserve validity check, and
2094  	 * signal offset support
2095  	 */
2096  	handle->pre_sob_val = prop->next_sob_val - handle->count;
2097  
2098  	handle->cs_seq = ULLONG_MAX;
2099  
2100  	*signals_count = prop->next_sob_val;
2101  	hdev->asic_funcs->hw_queues_unlock(hdev);
2102  
2103  	*sob_addr = handle->hw_sob->sob_addr;
2104  	*handle_id = hdl_id;
2105  
2106  	dev_dbg(hdev->dev,
2107  		"Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n",
2108  			hw_sob->sob_id, handle->hw_sob->sob_addr,
2109  			prop->next_sob_val - 1, q_idx, hdl_id);
2110  	goto out;
2111  
2112  remove_idr:
2113  	spin_lock(&mgr->lock);
2114  	idr_remove(&mgr->handles, hdl_id);
2115  	spin_unlock(&mgr->lock);
2116  
2117  put_ctx:
2118  	hl_ctx_put(handle->ctx);
2119  	kfree(handle);
2120  
2121  out:
2122  	return rc;
2123  }
2124  
cs_ioctl_unreserve_signals(struct hl_fpriv * hpriv,u32 handle_id)2125  static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
2126  {
2127  	struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
2128  	struct hl_sync_stream_properties *prop;
2129  	struct hl_device *hdev = hpriv->hdev;
2130  	struct hl_encaps_signals_mgr *mgr;
2131  	struct hl_hw_sob *hw_sob;
2132  	u32 q_idx, sob_addr;
2133  	int rc = 0;
2134  
2135  	mgr = &hpriv->ctx->sig_mgr;
2136  
2137  	spin_lock(&mgr->lock);
2138  	encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
2139  	if (encaps_sig_hdl) {
2140  		dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
2141  				handle_id, encaps_sig_hdl->hw_sob->sob_addr,
2142  					encaps_sig_hdl->count);
2143  
2144  		hdev->asic_funcs->hw_queues_lock(hdev);
2145  
2146  		q_idx = encaps_sig_hdl->q_idx;
2147  		prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
2148  		hw_sob = &prop->hw_sob[prop->curr_sob_offset];
2149  		sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
2150  
2151  		/* Check if sob_val got out of sync due to other
2152  		 * signal submission requests which were handled
2153  		 * between the reserve-unreserve calls or SOB switch
2154  		 * upon reaching SOB max value.
2155  		 */
2156  		if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
2157  				!= prop->next_sob_val ||
2158  				sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
2159  			dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
2160  				encaps_sig_hdl->pre_sob_val,
2161  				(prop->next_sob_val - encaps_sig_hdl->count));
2162  
2163  			hdev->asic_funcs->hw_queues_unlock(hdev);
2164  			rc = -EINVAL;
2165  			goto out_unlock;
2166  		}
2167  
2168  		/*
2169  		 * Decrement the SOB value by count by user request
2170  		 * to unreserve those signals
2171  		 */
2172  		prop->next_sob_val -= encaps_sig_hdl->count;
2173  
2174  		hdev->asic_funcs->hw_queues_unlock(hdev);
2175  
2176  		hw_sob_put(hw_sob);
2177  
2178  		/* Release the id and free allocated memory of the handle */
2179  		idr_remove(&mgr->handles, handle_id);
2180  
2181  		/* unlock before calling ctx_put, where we might sleep */
2182  		spin_unlock(&mgr->lock);
2183  		hl_ctx_put(encaps_sig_hdl->ctx);
2184  		kfree(encaps_sig_hdl);
2185  		goto out;
2186  	} else {
2187  		rc = -EINVAL;
2188  		dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
2189  	}
2190  
2191  out_unlock:
2192  	spin_unlock(&mgr->lock);
2193  
2194  out:
2195  	return rc;
2196  }
2197  
cs_ioctl_signal_wait(struct hl_fpriv * hpriv,enum hl_cs_type cs_type,void __user * chunks,u32 num_chunks,u64 * cs_seq,u32 flags,u32 timeout,u32 * signal_sob_addr_offset,u16 * signal_initial_sob_count)2198  static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
2199  				void __user *chunks, u32 num_chunks,
2200  				u64 *cs_seq, u32 flags, u32 timeout,
2201  				u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
2202  {
2203  	struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
2204  	bool handle_found = false, is_wait_cs = false,
2205  			wait_cs_submitted = false,
2206  			cs_encaps_signals = false;
2207  	struct hl_cs_chunk *cs_chunk_array, *chunk;
2208  	bool staged_cs_with_encaps_signals = false;
2209  	struct hw_queue_properties *hw_queue_prop;
2210  	struct hl_device *hdev = hpriv->hdev;
2211  	struct hl_cs_compl *sig_waitcs_cmpl;
2212  	u32 q_idx, collective_engine_id = 0;
2213  	struct hl_cs_counters_atomic *cntr;
2214  	struct hl_fence *sig_fence = NULL;
2215  	struct hl_ctx *ctx = hpriv->ctx;
2216  	enum hl_queue_type q_type;
2217  	struct hl_cs *cs;
2218  	u64 signal_seq;
2219  	int rc;
2220  
2221  	cntr = &hdev->aggregated_cs_counters;
2222  	*cs_seq = ULLONG_MAX;
2223  
2224  	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
2225  			ctx);
2226  	if (rc)
2227  		goto out;
2228  
2229  	/* currently it is guaranteed to have only one chunk */
2230  	chunk = &cs_chunk_array[0];
2231  
2232  	if (chunk->queue_index >= hdev->asic_prop.max_queues) {
2233  		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2234  		atomic64_inc(&cntr->validation_drop_cnt);
2235  		dev_err(hdev->dev, "Queue index %d is invalid\n",
2236  			chunk->queue_index);
2237  		rc = -EINVAL;
2238  		goto free_cs_chunk_array;
2239  	}
2240  
2241  	q_idx = chunk->queue_index;
2242  	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2243  	q_type = hw_queue_prop->type;
2244  
2245  	if (!hw_queue_prop->supports_sync_stream) {
2246  		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2247  		atomic64_inc(&cntr->validation_drop_cnt);
2248  		dev_err(hdev->dev,
2249  			"Queue index %d does not support sync stream operations\n",
2250  			q_idx);
2251  		rc = -EINVAL;
2252  		goto free_cs_chunk_array;
2253  	}
2254  
2255  	if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
2256  		if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
2257  			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2258  			atomic64_inc(&cntr->validation_drop_cnt);
2259  			dev_err(hdev->dev,
2260  				"Queue index %d is invalid\n", q_idx);
2261  			rc = -EINVAL;
2262  			goto free_cs_chunk_array;
2263  		}
2264  
2265  		if (!hdev->nic_ports_mask) {
2266  			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2267  			atomic64_inc(&cntr->validation_drop_cnt);
2268  			dev_err(hdev->dev,
2269  				"Collective operations not supported when NIC ports are disabled");
2270  			rc = -EINVAL;
2271  			goto free_cs_chunk_array;
2272  		}
2273  
2274  		collective_engine_id = chunk->collective_engine_id;
2275  	}
2276  
2277  	is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
2278  			cs_type == CS_TYPE_COLLECTIVE_WAIT);
2279  
2280  	cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
2281  
2282  	if (is_wait_cs) {
2283  		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
2284  				ctx, cs_encaps_signals);
2285  		if (rc)
2286  			goto free_cs_chunk_array;
2287  
2288  		if (cs_encaps_signals) {
2289  			/* check if cs sequence has encapsulated
2290  			 * signals handle
2291  			 */
2292  			struct idr *idp;
2293  			u32 id;
2294  
2295  			spin_lock(&ctx->sig_mgr.lock);
2296  			idp = &ctx->sig_mgr.handles;
2297  			idr_for_each_entry(idp, encaps_sig_hdl, id) {
2298  				if (encaps_sig_hdl->cs_seq == signal_seq) {
2299  					/* get refcount to protect removing this handle from idr,
2300  					 * needed when multiple wait cs are used with offset
2301  					 * to wait on reserved encaps signals.
2302  					 * Since kref_put of this handle is executed outside the
2303  					 * current lock, it is possible that the handle refcount
2304  					 * is 0 but it yet to be removed from the list. In this
2305  					 * case need to consider the handle as not valid.
2306  					 */
2307  					if (kref_get_unless_zero(&encaps_sig_hdl->refcount))
2308  						handle_found = true;
2309  					break;
2310  				}
2311  			}
2312  			spin_unlock(&ctx->sig_mgr.lock);
2313  
2314  			if (!handle_found) {
2315  				/* treat as signal CS already finished */
2316  				dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
2317  						signal_seq);
2318  				rc = 0;
2319  				goto free_cs_chunk_array;
2320  			}
2321  
2322  			/* validate also the signal offset value */
2323  			if (chunk->encaps_signal_offset >
2324  					encaps_sig_hdl->count) {
2325  				dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
2326  						chunk->encaps_signal_offset,
2327  						encaps_sig_hdl->count);
2328  				rc = -EINVAL;
2329  				goto free_cs_chunk_array;
2330  			}
2331  		}
2332  
2333  		sig_fence = hl_ctx_get_fence(ctx, signal_seq);
2334  		if (IS_ERR(sig_fence)) {
2335  			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2336  			atomic64_inc(&cntr->validation_drop_cnt);
2337  			dev_err(hdev->dev,
2338  				"Failed to get signal CS with seq 0x%llx\n",
2339  				signal_seq);
2340  			rc = PTR_ERR(sig_fence);
2341  			goto free_cs_chunk_array;
2342  		}
2343  
2344  		if (!sig_fence) {
2345  			/* signal CS already finished */
2346  			rc = 0;
2347  			goto free_cs_chunk_array;
2348  		}
2349  
2350  		sig_waitcs_cmpl =
2351  			container_of(sig_fence, struct hl_cs_compl, base_fence);
2352  
2353  		staged_cs_with_encaps_signals = !!
2354  				(sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
2355  				(flags & HL_CS_FLAGS_ENCAP_SIGNALS));
2356  
2357  		if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
2358  				!staged_cs_with_encaps_signals) {
2359  			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2360  			atomic64_inc(&cntr->validation_drop_cnt);
2361  			dev_err(hdev->dev,
2362  				"CS seq 0x%llx is not of a signal/encaps-signal CS\n",
2363  				signal_seq);
2364  			hl_fence_put(sig_fence);
2365  			rc = -EINVAL;
2366  			goto free_cs_chunk_array;
2367  		}
2368  
2369  		if (completion_done(&sig_fence->completion)) {
2370  			/* signal CS already finished */
2371  			hl_fence_put(sig_fence);
2372  			rc = 0;
2373  			goto free_cs_chunk_array;
2374  		}
2375  	}
2376  
2377  	rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
2378  	if (rc) {
2379  		if (is_wait_cs)
2380  			hl_fence_put(sig_fence);
2381  
2382  		goto free_cs_chunk_array;
2383  	}
2384  
2385  	/*
2386  	 * Save the signal CS fence for later initialization right before
2387  	 * hanging the wait CS on the queue.
2388  	 * for encaps signals case, we save the cs sequence and handle pointer
2389  	 * for later initialization.
2390  	 */
2391  	if (is_wait_cs) {
2392  		cs->signal_fence = sig_fence;
2393  		/* store the handle pointer, so we don't have to
2394  		 * look for it again, later on the flow
2395  		 * when we need to set SOB info in hw_queue.
2396  		 */
2397  		if (cs->encaps_signals)
2398  			cs->encaps_sig_hdl = encaps_sig_hdl;
2399  	}
2400  
2401  	hl_debugfs_add_cs(cs);
2402  
2403  	*cs_seq = cs->sequence;
2404  
2405  	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
2406  		rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
2407  				q_idx, chunk->encaps_signal_offset);
2408  	else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
2409  		rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
2410  				cs, q_idx, collective_engine_id,
2411  				chunk->encaps_signal_offset);
2412  	else {
2413  		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2414  		atomic64_inc(&cntr->validation_drop_cnt);
2415  		rc = -EINVAL;
2416  	}
2417  
2418  	if (rc)
2419  		goto free_cs_object;
2420  
2421  	if (q_type == QUEUE_TYPE_HW)
2422  		INIT_WORK(&cs->finish_work, cs_completion);
2423  
2424  	rc = hl_hw_queue_schedule_cs(cs);
2425  	if (rc) {
2426  		/* In case wait cs failed here, it means the signal cs
2427  		 * already completed. we want to free all it's related objects
2428  		 * but we don't want to fail the ioctl.
2429  		 */
2430  		if (is_wait_cs)
2431  			rc = 0;
2432  		else if (rc != -EAGAIN)
2433  			dev_err(hdev->dev,
2434  				"Failed to submit CS %d.%llu to H/W queues, error %d\n",
2435  				ctx->asid, cs->sequence, rc);
2436  		goto free_cs_object;
2437  	}
2438  
2439  	*signal_sob_addr_offset = cs->sob_addr_offset;
2440  	*signal_initial_sob_count = cs->initial_sob_count;
2441  
2442  	rc = HL_CS_STATUS_SUCCESS;
2443  	if (is_wait_cs)
2444  		wait_cs_submitted = true;
2445  	goto put_cs;
2446  
2447  free_cs_object:
2448  	cs_rollback(hdev, cs);
2449  	*cs_seq = ULLONG_MAX;
2450  	/* The path below is both for good and erroneous exits */
2451  put_cs:
2452  	/* We finished with the CS in this function, so put the ref */
2453  	cs_put(cs);
2454  free_cs_chunk_array:
2455  	if (!wait_cs_submitted && cs_encaps_signals && handle_found && is_wait_cs)
2456  		kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
2457  	kfree(cs_chunk_array);
2458  out:
2459  	return rc;
2460  }
2461  
cs_ioctl_engine_cores(struct hl_fpriv * hpriv,u64 engine_cores,u32 num_engine_cores,u32 core_command)2462  static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
2463  						u32 num_engine_cores, u32 core_command)
2464  {
2465  	struct hl_device *hdev = hpriv->hdev;
2466  	void __user *engine_cores_arr;
2467  	u32 *cores;
2468  	int rc;
2469  
2470  	if (!hdev->asic_prop.supports_engine_modes)
2471  		return -EPERM;
2472  
2473  	if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
2474  		dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
2475  		return -EINVAL;
2476  	}
2477  
2478  	if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) {
2479  		dev_err(hdev->dev, "Engine core command is invalid\n");
2480  		return -EINVAL;
2481  	}
2482  
2483  	engine_cores_arr = (void __user *) (uintptr_t) engine_cores;
2484  	cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL);
2485  	if (!cores)
2486  		return -ENOMEM;
2487  
2488  	if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) {
2489  		dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
2490  		kfree(cores);
2491  		return -EFAULT;
2492  	}
2493  
2494  	rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
2495  	kfree(cores);
2496  
2497  	return rc;
2498  }
2499  
cs_ioctl_engines(struct hl_fpriv * hpriv,u64 engines_arr_user_addr,u32 num_engines,enum hl_engine_command command)2500  static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr,
2501  						u32 num_engines, enum hl_engine_command command)
2502  {
2503  	struct hl_device *hdev = hpriv->hdev;
2504  	u32 *engines, max_num_of_engines;
2505  	void __user *engines_arr;
2506  	int rc;
2507  
2508  	if (!hdev->asic_prop.supports_engine_modes)
2509  		return -EPERM;
2510  
2511  	if (command >= HL_ENGINE_COMMAND_MAX) {
2512  		dev_err(hdev->dev, "Engine command is invalid\n");
2513  		return -EINVAL;
2514  	}
2515  
2516  	max_num_of_engines = hdev->asic_prop.max_num_of_engines;
2517  	if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT)
2518  		max_num_of_engines = hdev->asic_prop.num_engine_cores;
2519  
2520  	if (!num_engines || num_engines > max_num_of_engines) {
2521  		dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines);
2522  		return -EINVAL;
2523  	}
2524  
2525  	engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr;
2526  	engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL);
2527  	if (!engines)
2528  		return -ENOMEM;
2529  
2530  	if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) {
2531  		dev_err(hdev->dev, "Failed to copy engine-ids array from user\n");
2532  		kfree(engines);
2533  		return -EFAULT;
2534  	}
2535  
2536  	rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command);
2537  	kfree(engines);
2538  
2539  	return rc;
2540  }
2541  
cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv * hpriv)2542  static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
2543  {
2544  	struct hl_device *hdev = hpriv->hdev;
2545  	struct asic_fixed_properties *prop = &hdev->asic_prop;
2546  
2547  	if (!prop->hbw_flush_reg) {
2548  		dev_dbg(hdev->dev, "HBW flush is not supported\n");
2549  		return -EOPNOTSUPP;
2550  	}
2551  
2552  	RREG32(prop->hbw_flush_reg);
2553  
2554  	return 0;
2555  }
2556  
hl_cs_ioctl(struct drm_device * ddev,void * data,struct drm_file * file_priv)2557  int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
2558  {
2559  	struct hl_fpriv *hpriv = file_priv->driver_priv;
2560  	union hl_cs_args *args = data;
2561  	enum hl_cs_type cs_type = 0;
2562  	u64 cs_seq = ULONG_MAX;
2563  	void __user *chunks;
2564  	u32 num_chunks, flags, timeout,
2565  		signals_count = 0, sob_addr = 0, handle_id = 0;
2566  	u16 sob_initial_count = 0;
2567  	int rc;
2568  
2569  	rc = hl_cs_sanity_checks(hpriv, args);
2570  	if (rc)
2571  		goto out;
2572  
2573  	rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
2574  	if (rc)
2575  		goto out;
2576  
2577  	cs_type = hl_cs_get_cs_type(args->in.cs_flags &
2578  					~HL_CS_FLAGS_FORCE_RESTORE);
2579  	chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
2580  	num_chunks = args->in.num_chunks_execute;
2581  	flags = args->in.cs_flags;
2582  
2583  	/* In case this is a staged CS, user should supply the CS sequence */
2584  	if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
2585  			!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
2586  		cs_seq = args->in.seq;
2587  
2588  	timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
2589  			? msecs_to_jiffies(args->in.timeout * 1000)
2590  			: hpriv->hdev->timeout_jiffies;
2591  
2592  	switch (cs_type) {
2593  	case CS_TYPE_SIGNAL:
2594  	case CS_TYPE_WAIT:
2595  	case CS_TYPE_COLLECTIVE_WAIT:
2596  		rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
2597  					&cs_seq, args->in.cs_flags, timeout,
2598  					&sob_addr, &sob_initial_count);
2599  		break;
2600  	case CS_RESERVE_SIGNALS:
2601  		rc = cs_ioctl_reserve_signals(hpriv,
2602  					args->in.encaps_signals_q_idx,
2603  					args->in.encaps_signals_count,
2604  					&handle_id, &sob_addr, &signals_count);
2605  		break;
2606  	case CS_UNRESERVE_SIGNALS:
2607  		rc = cs_ioctl_unreserve_signals(hpriv,
2608  					args->in.encaps_sig_handle_id);
2609  		break;
2610  	case CS_TYPE_ENGINE_CORE:
2611  		rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
2612  				args->in.num_engine_cores, args->in.core_command);
2613  		break;
2614  	case CS_TYPE_ENGINES:
2615  		rc = cs_ioctl_engines(hpriv, args->in.engines,
2616  				args->in.num_engines, args->in.engine_command);
2617  		break;
2618  	case CS_TYPE_FLUSH_PCI_HBW_WRITES:
2619  		rc = cs_ioctl_flush_pci_hbw_writes(hpriv);
2620  		break;
2621  	default:
2622  		rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
2623  						args->in.cs_flags,
2624  						args->in.encaps_sig_handle_id,
2625  						timeout, &sob_initial_count);
2626  		break;
2627  	}
2628  out:
2629  	if (rc != -EAGAIN) {
2630  		memset(args, 0, sizeof(*args));
2631  
2632  		switch (cs_type) {
2633  		case CS_RESERVE_SIGNALS:
2634  			args->out.handle_id = handle_id;
2635  			args->out.sob_base_addr_offset = sob_addr;
2636  			args->out.count = signals_count;
2637  			break;
2638  		case CS_TYPE_SIGNAL:
2639  			args->out.sob_base_addr_offset = sob_addr;
2640  			args->out.sob_count_before_submission = sob_initial_count;
2641  			args->out.seq = cs_seq;
2642  			break;
2643  		case CS_TYPE_DEFAULT:
2644  			args->out.sob_count_before_submission = sob_initial_count;
2645  			args->out.seq = cs_seq;
2646  			break;
2647  		default:
2648  			args->out.seq = cs_seq;
2649  			break;
2650  		}
2651  
2652  		args->out.status = rc;
2653  	}
2654  
2655  	return rc;
2656  }
2657  
hl_wait_for_fence(struct hl_ctx * ctx,u64 seq,struct hl_fence * fence,enum hl_cs_wait_status * status,u64 timeout_us,s64 * timestamp)2658  static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
2659  				enum hl_cs_wait_status *status, u64 timeout_us, s64 *timestamp)
2660  {
2661  	struct hl_device *hdev = ctx->hdev;
2662  	ktime_t timestamp_kt;
2663  	long completion_rc;
2664  	int rc = 0, error;
2665  
2666  	if (IS_ERR(fence)) {
2667  		rc = PTR_ERR(fence);
2668  		if (rc == -EINVAL)
2669  			dev_notice_ratelimited(hdev->dev,
2670  				"Can't wait on CS %llu because current CS is at seq %llu\n",
2671  				seq, ctx->cs_sequence);
2672  		return rc;
2673  	}
2674  
2675  	if (!fence) {
2676  		if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, &timestamp_kt, &error)) {
2677  			dev_dbg(hdev->dev,
2678  				"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
2679  				seq, ctx->cs_sequence);
2680  			*status = CS_WAIT_STATUS_GONE;
2681  			return 0;
2682  		}
2683  
2684  		completion_rc = 1;
2685  		goto report_results;
2686  	}
2687  
2688  	if (!timeout_us) {
2689  		completion_rc = completion_done(&fence->completion);
2690  	} else {
2691  		unsigned long timeout;
2692  
2693  		timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ?
2694  				timeout_us : usecs_to_jiffies(timeout_us);
2695  		completion_rc =
2696  			wait_for_completion_interruptible_timeout(
2697  				&fence->completion, timeout);
2698  	}
2699  
2700  	error = fence->error;
2701  	timestamp_kt = fence->timestamp;
2702  
2703  report_results:
2704  	if (completion_rc > 0) {
2705  		*status = CS_WAIT_STATUS_COMPLETED;
2706  		if (timestamp)
2707  			*timestamp = ktime_to_ns(timestamp_kt);
2708  	} else {
2709  		*status = CS_WAIT_STATUS_BUSY;
2710  	}
2711  
2712  	if (completion_rc == -ERESTARTSYS)
2713  		rc = completion_rc;
2714  	else if (error == -ETIMEDOUT || error == -EIO)
2715  		rc = error;
2716  
2717  	return rc;
2718  }
2719  
2720  /*
2721   * hl_cs_poll_fences - iterate CS fences to check for CS completion
2722   *
2723   * @mcs_data: multi-CS internal data
2724   * @mcs_compl: multi-CS completion structure
2725   *
2726   * @return 0 on success, otherwise non 0 error code
2727   *
2728   * The function iterates on all CS sequence in the list and set bit in
2729   * completion_bitmap for each completed CS.
2730   * While iterating, the function sets the stream map of each fence in the fence
2731   * array in the completion QID stream map to be used by CSs to perform
2732   * completion to the multi-CS context.
2733   * This function shall be called after taking context ref
2734   */
hl_cs_poll_fences(struct multi_cs_data * mcs_data,struct multi_cs_completion * mcs_compl)2735  static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl)
2736  {
2737  	struct hl_fence **fence_ptr = mcs_data->fence_arr;
2738  	struct hl_device *hdev = mcs_data->ctx->hdev;
2739  	int i, rc, arr_len = mcs_data->arr_len;
2740  	u64 *seq_arr = mcs_data->seq_arr;
2741  	ktime_t max_ktime, first_cs_time;
2742  	enum hl_cs_wait_status status;
2743  
2744  	memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *));
2745  
2746  	/* get all fences under the same lock */
2747  	rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
2748  	if (rc)
2749  		return rc;
2750  
2751  	/*
2752  	 * re-initialize the completion here to handle 2 possible cases:
2753  	 * 1. CS will complete the multi-CS prior clearing the completion. in which
2754  	 *    case the fence iteration is guaranteed to catch the CS completion.
2755  	 * 2. the completion will occur after re-init of the completion.
2756  	 *    in which case we will wake up immediately in wait_for_completion.
2757  	 */
2758  	reinit_completion(&mcs_compl->completion);
2759  
2760  	/*
2761  	 * set to maximum time to verify timestamp is valid: if at the end
2762  	 * this value is maintained- no timestamp was updated
2763  	 */
2764  	max_ktime = ktime_set(KTIME_SEC_MAX, 0);
2765  	first_cs_time = max_ktime;
2766  
2767  	for (i = 0; i < arr_len; i++, fence_ptr++) {
2768  		struct hl_fence *fence = *fence_ptr;
2769  
2770  		/*
2771  		 * In order to prevent case where we wait until timeout even though a CS associated
2772  		 * with the multi-CS actually completed we do things in the below order:
2773  		 * 1. for each fence set it's QID map in the multi-CS completion QID map. This way
2774  		 *    any CS can, potentially, complete the multi CS for the specific QID (note
2775  		 *    that once completion is initialized, calling complete* and then wait on the
2776  		 *    completion will cause it to return at once)
2777  		 * 2. only after allowing multi-CS completion for the specific QID we check whether
2778  		 *    the specific CS already completed (and thus the wait for completion part will
2779  		 *    be skipped). if the CS not completed it is guaranteed that completing CS will
2780  		 *    wake up the completion.
2781  		 */
2782  		if (fence)
2783  			mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map;
2784  
2785  		/*
2786  		 * function won't sleep as it is called with timeout 0 (i.e.
2787  		 * poll the fence)
2788  		 */
2789  		rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, &status, 0, NULL);
2790  		if (rc) {
2791  			dev_err(hdev->dev,
2792  				"wait_for_fence error :%d for CS seq %llu\n",
2793  								rc, seq_arr[i]);
2794  			break;
2795  		}
2796  
2797  		switch (status) {
2798  		case CS_WAIT_STATUS_BUSY:
2799  			/* CS did not finished, QID to wait on already stored */
2800  			break;
2801  		case CS_WAIT_STATUS_COMPLETED:
2802  			/*
2803  			 * Using mcs_handling_done to avoid possibility of mcs_data
2804  			 * returns to user indicating CS completed before it finished
2805  			 * all of its mcs handling, to avoid race the next time the
2806  			 * user waits for mcs.
2807  			 * note: when reaching this case fence is definitely not NULL
2808  			 *       but NULL check was added to overcome static analysis
2809  			 */
2810  			if (fence && !fence->mcs_handling_done) {
2811  				/*
2812  				 * in case multi CS is completed but MCS handling not done
2813  				 * we "complete" the multi CS to prevent it from waiting
2814  				 * until time-out and the "multi-CS handling done" will have
2815  				 * another chance at the next iteration
2816  				 */
2817  				complete_all(&mcs_compl->completion);
2818  				break;
2819  			}
2820  
2821  			mcs_data->completion_bitmap |= BIT(i);
2822  			/*
2823  			 * For all completed CSs we take the earliest timestamp.
2824  			 * For this we have to validate that the timestamp is
2825  			 * earliest of all timestamps so far.
2826  			 */
2827  			if (fence && mcs_data->update_ts &&
2828  					(ktime_compare(fence->timestamp, first_cs_time) < 0))
2829  				first_cs_time = fence->timestamp;
2830  			break;
2831  		case CS_WAIT_STATUS_GONE:
2832  			mcs_data->update_ts = false;
2833  			mcs_data->gone_cs = true;
2834  			/*
2835  			 * It is possible to get an old sequence numbers from user
2836  			 * which related to already completed CSs and their fences
2837  			 * already gone. In this case, CS set as completed but
2838  			 * no need to consider its QID for mcs completion.
2839  			 */
2840  			mcs_data->completion_bitmap |= BIT(i);
2841  			break;
2842  		default:
2843  			dev_err(hdev->dev, "Invalid fence status\n");
2844  			rc = -EINVAL;
2845  			break;
2846  		}
2847  
2848  	}
2849  
2850  	hl_fences_put(mcs_data->fence_arr, arr_len);
2851  
2852  	if (mcs_data->update_ts &&
2853  			(ktime_compare(first_cs_time, max_ktime) != 0))
2854  		mcs_data->timestamp = ktime_to_ns(first_cs_time);
2855  
2856  	return rc;
2857  }
2858  
_hl_cs_wait_ioctl(struct hl_device * hdev,struct hl_ctx * ctx,u64 timeout_us,u64 seq,enum hl_cs_wait_status * status,s64 * timestamp)2859  static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
2860  				enum hl_cs_wait_status *status, s64 *timestamp)
2861  {
2862  	struct hl_fence *fence;
2863  	int rc = 0;
2864  
2865  	if (timestamp)
2866  		*timestamp = 0;
2867  
2868  	hl_ctx_get(ctx);
2869  
2870  	fence = hl_ctx_get_fence(ctx, seq);
2871  
2872  	rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp);
2873  	hl_fence_put(fence);
2874  	hl_ctx_put(ctx);
2875  
2876  	return rc;
2877  }
2878  
hl_usecs64_to_jiffies(const u64 usecs)2879  static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
2880  {
2881  	if (usecs <= U32_MAX)
2882  		return usecs_to_jiffies(usecs);
2883  
2884  	/*
2885  	 * If the value in nanoseconds is larger than 64 bit, use the largest
2886  	 * 64 bit value.
2887  	 */
2888  	if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
2889  		return nsecs_to_jiffies(U64_MAX);
2890  
2891  	return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
2892  }
2893  
2894  /*
2895   * hl_wait_multi_cs_completion_init - init completion structure
2896   *
2897   * @hdev: pointer to habanalabs device structure
2898   * @stream_master_bitmap: stream master QIDs map, set bit indicates stream
2899   *                        master QID to wait on
2900   *
2901   * @return valid completion struct pointer on success, otherwise error pointer
2902   *
2903   * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver.
2904   * the function gets the first available completion (by marking it "used")
2905   * and initialize its values.
2906   */
hl_wait_multi_cs_completion_init(struct hl_device * hdev)2907  static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev)
2908  {
2909  	struct multi_cs_completion *mcs_compl;
2910  	int i;
2911  
2912  	/* find free multi_cs completion structure */
2913  	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2914  		mcs_compl = &hdev->multi_cs_completion[i];
2915  		spin_lock(&mcs_compl->lock);
2916  		if (!mcs_compl->used) {
2917  			mcs_compl->used = 1;
2918  			mcs_compl->timestamp = 0;
2919  			/*
2920  			 * init QID map to 0 to avoid completion by CSs. the actual QID map
2921  			 * to multi-CS CSs will be set incrementally at a later stage
2922  			 */
2923  			mcs_compl->stream_master_qid_map = 0;
2924  			spin_unlock(&mcs_compl->lock);
2925  			break;
2926  		}
2927  		spin_unlock(&mcs_compl->lock);
2928  	}
2929  
2930  	if (i == MULTI_CS_MAX_USER_CTX) {
2931  		dev_err(hdev->dev, "no available multi-CS completion structure\n");
2932  		return ERR_PTR(-ENOMEM);
2933  	}
2934  	return mcs_compl;
2935  }
2936  
2937  /*
2938   * hl_wait_multi_cs_completion_fini - return completion structure and set as
2939   *                                    unused
2940   *
2941   * @mcs_compl: pointer to the completion structure
2942   */
hl_wait_multi_cs_completion_fini(struct multi_cs_completion * mcs_compl)2943  static void hl_wait_multi_cs_completion_fini(
2944  					struct multi_cs_completion *mcs_compl)
2945  {
2946  	/*
2947  	 * free completion structure, do it under lock to be in-sync with the
2948  	 * thread that signals completion
2949  	 */
2950  	spin_lock(&mcs_compl->lock);
2951  	mcs_compl->used = 0;
2952  	spin_unlock(&mcs_compl->lock);
2953  }
2954  
2955  /*
2956   * hl_wait_multi_cs_completion - wait for first CS to complete
2957   *
2958   * @mcs_data: multi-CS internal data
2959   *
2960   * @return 0 on success, otherwise non 0 error code
2961   */
hl_wait_multi_cs_completion(struct multi_cs_data * mcs_data,struct multi_cs_completion * mcs_compl)2962  static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
2963  						struct multi_cs_completion *mcs_compl)
2964  {
2965  	long completion_rc;
2966  
2967  	completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
2968  									mcs_data->timeout_jiffies);
2969  
2970  	/* update timestamp */
2971  	if (completion_rc > 0)
2972  		mcs_data->timestamp = mcs_compl->timestamp;
2973  
2974  	if (completion_rc == -ERESTARTSYS)
2975  		return completion_rc;
2976  
2977  	mcs_data->wait_status = completion_rc;
2978  
2979  	return 0;
2980  }
2981  
2982  /*
2983   * hl_multi_cs_completion_init - init array of multi-CS completion structures
2984   *
2985   * @hdev: pointer to habanalabs device structure
2986   */
hl_multi_cs_completion_init(struct hl_device * hdev)2987  void hl_multi_cs_completion_init(struct hl_device *hdev)
2988  {
2989  	struct multi_cs_completion *mcs_cmpl;
2990  	int i;
2991  
2992  	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2993  		mcs_cmpl = &hdev->multi_cs_completion[i];
2994  		mcs_cmpl->used = 0;
2995  		spin_lock_init(&mcs_cmpl->lock);
2996  		init_completion(&mcs_cmpl->completion);
2997  	}
2998  }
2999  
3000  /*
3001   * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
3002   *
3003   * @hpriv: pointer to the private data of the fd
3004   * @data: pointer to multi-CS wait ioctl in/out args
3005   *
3006   */
hl_multi_cs_wait_ioctl(struct hl_fpriv * hpriv,void * data)3007  static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3008  {
3009  	struct multi_cs_completion *mcs_compl;
3010  	struct hl_device *hdev = hpriv->hdev;
3011  	struct multi_cs_data mcs_data = {};
3012  	union hl_wait_cs_args *args = data;
3013  	struct hl_ctx *ctx = hpriv->ctx;
3014  	struct hl_fence **fence_arr;
3015  	void __user *seq_arr;
3016  	u32 size_to_copy;
3017  	u64 *cs_seq_arr;
3018  	u8 seq_arr_len;
3019  	int rc, i;
3020  
3021  	for (i = 0 ; i < sizeof(args->in.pad) ; i++)
3022  		if (args->in.pad[i]) {
3023  			dev_dbg(hdev->dev, "Padding bytes must be 0\n");
3024  			return -EINVAL;
3025  		}
3026  
3027  	if (!hdev->supports_wait_for_multi_cs) {
3028  		dev_err(hdev->dev, "Wait for multi CS is not supported\n");
3029  		return -EPERM;
3030  	}
3031  
3032  	seq_arr_len = args->in.seq_arr_len;
3033  
3034  	if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) {
3035  		dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
3036  				HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len);
3037  		return -EINVAL;
3038  	}
3039  
3040  	/* allocate memory for sequence array */
3041  	cs_seq_arr =
3042  		kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL);
3043  	if (!cs_seq_arr)
3044  		return -ENOMEM;
3045  
3046  	/* copy CS sequence array from user */
3047  	seq_arr = (void __user *) (uintptr_t) args->in.seq;
3048  	size_to_copy = seq_arr_len * sizeof(*cs_seq_arr);
3049  	if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) {
3050  		dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
3051  		rc = -EFAULT;
3052  		goto free_seq_arr;
3053  	}
3054  
3055  	/* allocate array for the fences */
3056  	fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL);
3057  	if (!fence_arr) {
3058  		rc = -ENOMEM;
3059  		goto free_seq_arr;
3060  	}
3061  
3062  	/* initialize the multi-CS internal data */
3063  	mcs_data.ctx = ctx;
3064  	mcs_data.seq_arr = cs_seq_arr;
3065  	mcs_data.fence_arr = fence_arr;
3066  	mcs_data.arr_len = seq_arr_len;
3067  
3068  	hl_ctx_get(ctx);
3069  
3070  	/* wait (with timeout) for the first CS to be completed */
3071  	mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
3072  	mcs_compl = hl_wait_multi_cs_completion_init(hdev);
3073  	if (IS_ERR(mcs_compl)) {
3074  		rc = PTR_ERR(mcs_compl);
3075  		goto put_ctx;
3076  	}
3077  
3078  	/* poll all CS fences, extract timestamp */
3079  	mcs_data.update_ts = true;
3080  	rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
3081  	/*
3082  	 * skip wait for CS completion when one of the below is true:
3083  	 * - an error on the poll function
3084  	 * - one or more CS in the list completed
3085  	 * - the user called ioctl with timeout 0
3086  	 */
3087  	if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
3088  		goto completion_fini;
3089  
3090  	while (true) {
3091  		rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
3092  		if (rc || (mcs_data.wait_status == 0))
3093  			break;
3094  
3095  		/*
3096  		 * poll fences once again to update the CS map.
3097  		 * no timestamp should be updated this time.
3098  		 */
3099  		mcs_data.update_ts = false;
3100  		rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
3101  
3102  		if (rc || mcs_data.completion_bitmap)
3103  			break;
3104  
3105  		/*
3106  		 * if hl_wait_multi_cs_completion returned before timeout (i.e.
3107  		 * it got a completion) it either got completed by CS in the multi CS list
3108  		 * (in which case the indication will be non empty completion_bitmap) or it
3109  		 * got completed by CS submitted to one of the shared stream master but
3110  		 * not in the multi CS list (in which case we should wait again but modify
3111  		 * the timeout and set timestamp as zero to let a CS related to the current
3112  		 * multi-CS set a new, relevant, timestamp)
3113  		 */
3114  		mcs_data.timeout_jiffies = mcs_data.wait_status;
3115  		mcs_compl->timestamp = 0;
3116  	}
3117  
3118  completion_fini:
3119  	hl_wait_multi_cs_completion_fini(mcs_compl);
3120  
3121  put_ctx:
3122  	hl_ctx_put(ctx);
3123  	kfree(fence_arr);
3124  
3125  free_seq_arr:
3126  	kfree(cs_seq_arr);
3127  
3128  	if (rc == -ERESTARTSYS) {
3129  		dev_err_ratelimited(hdev->dev,
3130  				"user process got signal while waiting for Multi-CS\n");
3131  		rc = -EINTR;
3132  	}
3133  
3134  	if (rc)
3135  		return rc;
3136  
3137  	/* update output args */
3138  	memset(args, 0, sizeof(*args));
3139  
3140  	if (mcs_data.completion_bitmap) {
3141  		args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
3142  		args->out.cs_completion_map = mcs_data.completion_bitmap;
3143  
3144  		/* if timestamp not 0- it's valid */
3145  		if (mcs_data.timestamp) {
3146  			args->out.timestamp_nsec = mcs_data.timestamp;
3147  			args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3148  		}
3149  
3150  		/* update if some CS was gone */
3151  		if (!mcs_data.timestamp)
3152  			args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
3153  	} else {
3154  		args->out.status = HL_WAIT_CS_STATUS_BUSY;
3155  	}
3156  
3157  	return 0;
3158  }
3159  
hl_cs_wait_ioctl(struct hl_fpriv * hpriv,void * data)3160  static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3161  {
3162  	struct hl_device *hdev = hpriv->hdev;
3163  	union hl_wait_cs_args *args = data;
3164  	enum hl_cs_wait_status status;
3165  	u64 seq = args->in.seq;
3166  	s64 timestamp;
3167  	int rc;
3168  
3169  	rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, &timestamp);
3170  
3171  	if (rc == -ERESTARTSYS) {
3172  		dev_err_ratelimited(hdev->dev,
3173  			"user process got signal while waiting for CS handle %llu\n",
3174  			seq);
3175  		return -EINTR;
3176  	}
3177  
3178  	memset(args, 0, sizeof(*args));
3179  
3180  	if (rc) {
3181  		if (rc == -ETIMEDOUT) {
3182  			dev_err_ratelimited(hdev->dev,
3183  				"CS %llu has timed-out while user process is waiting for it\n",
3184  				seq);
3185  			args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
3186  		} else if (rc == -EIO) {
3187  			dev_err_ratelimited(hdev->dev,
3188  				"CS %llu has been aborted while user process is waiting for it\n",
3189  				seq);
3190  			args->out.status = HL_WAIT_CS_STATUS_ABORTED;
3191  		}
3192  		return rc;
3193  	}
3194  
3195  	if (timestamp) {
3196  		args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3197  		args->out.timestamp_nsec = timestamp;
3198  	}
3199  
3200  	switch (status) {
3201  	case CS_WAIT_STATUS_GONE:
3202  		args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
3203  		fallthrough;
3204  	case CS_WAIT_STATUS_COMPLETED:
3205  		args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
3206  		break;
3207  	case CS_WAIT_STATUS_BUSY:
3208  	default:
3209  		args->out.status = HL_WAIT_CS_STATUS_BUSY;
3210  		break;
3211  	}
3212  
3213  	return 0;
3214  }
3215  
set_record_cq_info(struct hl_user_pending_interrupt * record,struct hl_cb * cq_cb,u32 cq_offset,u32 target_value)3216  static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
3217  					struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
3218  {
3219  	record->ts_reg_info.cq_cb = cq_cb;
3220  	record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
3221  	record->cq_target_value = target_value;
3222  }
3223  
validate_and_get_ts_record(struct device * dev,struct hl_ts_buff * ts_buff,u64 ts_offset,struct hl_user_pending_interrupt ** req_event_record)3224  static int validate_and_get_ts_record(struct device *dev,
3225  					struct hl_ts_buff *ts_buff, u64 ts_offset,
3226  					struct hl_user_pending_interrupt **req_event_record)
3227  {
3228  	struct hl_user_pending_interrupt *ts_cb_last;
3229  
3230  	*req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
3231  						ts_offset;
3232  	ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
3233  			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
3234  
3235  	/* Validate ts_offset not exceeding last max */
3236  	if (*req_event_record >= ts_cb_last) {
3237  		dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n",
3238  				ts_offset, (u64)(uintptr_t)ts_cb_last);
3239  		return -EINVAL;
3240  	}
3241  
3242  	return 0;
3243  }
3244  
unregister_timestamp_node(struct hl_device * hdev,struct hl_user_pending_interrupt * record,bool need_lock)3245  static void unregister_timestamp_node(struct hl_device *hdev,
3246  			struct hl_user_pending_interrupt *record, bool need_lock)
3247  {
3248  	struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
3249  	bool ts_rec_found = false;
3250  	unsigned long flags;
3251  
3252  	if (need_lock)
3253  		spin_lock_irqsave(&interrupt->ts_list_lock, flags);
3254  
3255  	if (record->ts_reg_info.in_use) {
3256  		record->ts_reg_info.in_use = false;
3257  		list_del(&record->list_node);
3258  		ts_rec_found = true;
3259  	}
3260  
3261  	if (need_lock)
3262  		spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
3263  
3264  	/* Put refcounts that were taken when we registered the event */
3265  	if (ts_rec_found) {
3266  		hl_mmap_mem_buf_put(record->ts_reg_info.buf);
3267  		hl_cb_put(record->ts_reg_info.cq_cb);
3268  	}
3269  }
3270  
ts_get_and_handle_kernel_record(struct hl_device * hdev,struct hl_ctx * ctx,struct wait_interrupt_data * data,unsigned long * flags,struct hl_user_pending_interrupt ** pend)3271  static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
3272  					struct wait_interrupt_data *data, unsigned long *flags,
3273  					struct hl_user_pending_interrupt **pend)
3274  {
3275  	struct hl_user_pending_interrupt *req_offset_record;
3276  	struct hl_ts_buff *ts_buff = data->buf->private;
3277  	bool need_lock = false;
3278  	int rc;
3279  
3280  	rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
3281  									&req_offset_record);
3282  	if (rc)
3283  		return rc;
3284  
3285  	/* In case the node already registered, need to unregister first then re-use */
3286  	if (req_offset_record->ts_reg_info.in_use) {
3287  		/*
3288  		 * Since interrupt here can be different than the one the node currently registered
3289  		 * on, and we don't want to lock two lists while we're doing unregister, so
3290  		 * unlock the new interrupt wait list here and acquire the lock again after you done
3291  		 */
3292  		if (data->interrupt->interrupt_id !=
3293  				req_offset_record->ts_reg_info.interrupt->interrupt_id) {
3294  
3295  			need_lock = true;
3296  			spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags);
3297  		}
3298  
3299  		unregister_timestamp_node(hdev, req_offset_record, need_lock);
3300  
3301  		if (need_lock)
3302  			spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags);
3303  	}
3304  
3305  	/* Fill up the new registration node info and add it to the list */
3306  	req_offset_record->ts_reg_info.in_use = true;
3307  	req_offset_record->ts_reg_info.buf = data->buf;
3308  	req_offset_record->ts_reg_info.timestamp_kernel_addr =
3309  			(u64 *) ts_buff->user_buff_address + data->ts_offset;
3310  	req_offset_record->ts_reg_info.interrupt = data->interrupt;
3311  	set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset,
3312  						data->target_value);
3313  
3314  	*pend = req_offset_record;
3315  
3316  	return rc;
3317  }
3318  
_hl_interrupt_ts_reg_ioctl(struct hl_device * hdev,struct hl_ctx * ctx,struct wait_interrupt_data * data,u32 * status,u64 * timestamp)3319  static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
3320  				struct wait_interrupt_data *data,
3321  				u32 *status, u64 *timestamp)
3322  {
3323  	struct hl_user_pending_interrupt *pend;
3324  	unsigned long flags;
3325  	int rc = 0;
3326  
3327  	hl_ctx_get(ctx);
3328  
3329  	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
3330  	if (!data->cq_cb) {
3331  		rc = -EINVAL;
3332  		goto put_ctx;
3333  	}
3334  
3335  	/* Validate the cq offset */
3336  	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
3337  			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
3338  		rc = -EINVAL;
3339  		goto put_cq_cb;
3340  	}
3341  
3342  	data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
3343  	if (!data->buf) {
3344  		rc = -EINVAL;
3345  		goto put_cq_cb;
3346  	}
3347  
3348  	spin_lock_irqsave(&data->interrupt->ts_list_lock, flags);
3349  
3350  	/* get ts buffer record */
3351  	rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend);
3352  	if (rc) {
3353  		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3354  		goto put_ts_buff;
3355  	}
3356  
3357  	/* We check for completion value as interrupt could have been received
3358  	 * before we add the timestamp node to the ts list.
3359  	 */
3360  	if (*pend->cq_kernel_addr >= data->target_value) {
3361  		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3362  
3363  		pend->ts_reg_info.in_use = 0;
3364  		*status = HL_WAIT_CS_STATUS_COMPLETED;
3365  		*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
3366  
3367  		goto put_ts_buff;
3368  	}
3369  
3370  	list_add_tail(&pend->list_node, &data->interrupt->ts_list_head);
3371  	spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3372  
3373  	rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
3374  
3375  	hl_ctx_put(ctx);
3376  
3377  	return rc;
3378  
3379  put_ts_buff:
3380  	hl_mmap_mem_buf_put(data->buf);
3381  put_cq_cb:
3382  	hl_cb_put(data->cq_cb);
3383  put_ctx:
3384  	hl_ctx_put(ctx);
3385  
3386  	return rc;
3387  }
3388  
_hl_interrupt_wait_ioctl(struct hl_device * hdev,struct hl_ctx * ctx,struct wait_interrupt_data * data,u32 * status,u64 * timestamp)3389  static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
3390  				struct wait_interrupt_data *data,
3391  				u32 *status, u64 *timestamp)
3392  {
3393  	struct hl_user_pending_interrupt *pend;
3394  	unsigned long timeout, flags;
3395  	long completion_rc;
3396  	int rc = 0;
3397  
3398  	timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
3399  
3400  	hl_ctx_get(ctx);
3401  
3402  	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
3403  	if (!data->cq_cb) {
3404  		rc = -EINVAL;
3405  		goto put_ctx;
3406  	}
3407  
3408  	/* Validate the cq offset */
3409  	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
3410  			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
3411  		rc = -EINVAL;
3412  		goto put_cq_cb;
3413  	}
3414  
3415  	pend = kzalloc(sizeof(*pend), GFP_KERNEL);
3416  	if (!pend) {
3417  		rc = -ENOMEM;
3418  		goto put_cq_cb;
3419  	}
3420  
3421  	hl_fence_init(&pend->fence, ULONG_MAX);
3422  	pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
3423  	pend->cq_target_value = data->target_value;
3424  	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
3425  
3426  
3427  	/* We check for completion value as interrupt could have been received
3428  	 * before we add the wait node to the wait list.
3429  	 */
3430  	if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) {
3431  		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3432  
3433  		if (*pend->cq_kernel_addr >= data->target_value)
3434  			*status = HL_WAIT_CS_STATUS_COMPLETED;
3435  		else
3436  			*status = HL_WAIT_CS_STATUS_BUSY;
3437  
3438  		pend->fence.timestamp = ktime_get();
3439  		goto set_timestamp;
3440  	}
3441  
3442  	/* Add pending user interrupt to relevant list for the interrupt
3443  	 * handler to monitor.
3444  	 * Note that we cannot have sorted list by target value,
3445  	 * in order to shorten the list pass loop, since
3446  	 * same list could have nodes for different cq counter handle.
3447  	 */
3448  	list_add_tail(&pend->list_node, &data->interrupt->wait_list_head);
3449  	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3450  
3451  	/* Wait for interrupt handler to signal completion */
3452  	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3453  								timeout);
3454  	if (completion_rc > 0) {
3455  		if (pend->fence.error == -EIO) {
3456  			dev_err_ratelimited(hdev->dev,
3457  					"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
3458  					pend->fence.error);
3459  			rc = -EIO;
3460  			*status = HL_WAIT_CS_STATUS_ABORTED;
3461  		} else {
3462  			*status = HL_WAIT_CS_STATUS_COMPLETED;
3463  		}
3464  	} else {
3465  		if (completion_rc == -ERESTARTSYS) {
3466  			dev_err_ratelimited(hdev->dev,
3467  					"user process got signal while waiting for interrupt ID %d\n",
3468  					data->interrupt->interrupt_id);
3469  			rc = -EINTR;
3470  			*status = HL_WAIT_CS_STATUS_ABORTED;
3471  		} else {
3472  			/* The wait has timed-out. We don't know anything beyond that
3473  			 * because the workload was not submitted through the driver.
3474  			 * Therefore, from driver's perspective, the workload is still
3475  			 * executing.
3476  			 */
3477  			rc = 0;
3478  			*status = HL_WAIT_CS_STATUS_BUSY;
3479  		}
3480  	}
3481  
3482  	/*
3483  	 * We keep removing the node from list here, and not at the irq handler
3484  	 * for completion timeout case. and if it's a registration
3485  	 * for ts record, the node will be deleted in the irq handler after
3486  	 * we reach the target value.
3487  	 */
3488  	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
3489  	list_del(&pend->list_node);
3490  	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3491  
3492  set_timestamp:
3493  	*timestamp = ktime_to_ns(pend->fence.timestamp);
3494  	kfree(pend);
3495  	hl_cb_put(data->cq_cb);
3496  	hl_ctx_put(ctx);
3497  
3498  	return rc;
3499  
3500  put_cq_cb:
3501  	hl_cb_put(data->cq_cb);
3502  put_ctx:
3503  	hl_ctx_put(ctx);
3504  
3505  	return rc;
3506  }
3507  
_hl_interrupt_wait_ioctl_user_addr(struct hl_device * hdev,struct hl_ctx * ctx,u64 timeout_us,u64 user_address,u64 target_value,struct hl_user_interrupt * interrupt,u32 * status,u64 * timestamp)3508  static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
3509  				u64 timeout_us, u64 user_address,
3510  				u64 target_value, struct hl_user_interrupt *interrupt,
3511  				u32 *status,
3512  				u64 *timestamp)
3513  {
3514  	struct hl_user_pending_interrupt *pend;
3515  	unsigned long timeout, flags;
3516  	u64 completion_value;
3517  	long completion_rc;
3518  	int rc = 0;
3519  
3520  	timeout = hl_usecs64_to_jiffies(timeout_us);
3521  
3522  	hl_ctx_get(ctx);
3523  
3524  	pend = kzalloc(sizeof(*pend), GFP_KERNEL);
3525  	if (!pend) {
3526  		hl_ctx_put(ctx);
3527  		return -ENOMEM;
3528  	}
3529  
3530  	hl_fence_init(&pend->fence, ULONG_MAX);
3531  
3532  	/* Add pending user interrupt to relevant list for the interrupt
3533  	 * handler to monitor
3534  	 */
3535  	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3536  	list_add_tail(&pend->list_node, &interrupt->wait_list_head);
3537  	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3538  
3539  	/* We check for completion value as interrupt could have been received
3540  	 * before we added the node to the wait list
3541  	 */
3542  	if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3543  		dev_err(hdev->dev, "Failed to copy completion value from user\n");
3544  		rc = -EFAULT;
3545  		goto remove_pending_user_interrupt;
3546  	}
3547  
3548  	if (completion_value >= target_value) {
3549  		*status = HL_WAIT_CS_STATUS_COMPLETED;
3550  		/* There was no interrupt, we assume the completion is now. */
3551  		pend->fence.timestamp = ktime_get();
3552  	} else {
3553  		*status = HL_WAIT_CS_STATUS_BUSY;
3554  	}
3555  
3556  	if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
3557  		goto remove_pending_user_interrupt;
3558  
3559  wait_again:
3560  	/* Wait for interrupt handler to signal completion */
3561  	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3562  										timeout);
3563  
3564  	/* If timeout did not expire we need to perform the comparison.
3565  	 * If comparison fails, keep waiting until timeout expires
3566  	 */
3567  	if (completion_rc > 0) {
3568  		spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3569  		/* reinit_completion must be called before we check for user
3570  		 * completion value, otherwise, if interrupt is received after
3571  		 * the comparison and before the next wait_for_completion,
3572  		 * we will reach timeout and fail
3573  		 */
3574  		reinit_completion(&pend->fence.completion);
3575  		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3576  
3577  		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3578  			dev_err(hdev->dev, "Failed to copy completion value from user\n");
3579  			rc = -EFAULT;
3580  
3581  			goto remove_pending_user_interrupt;
3582  		}
3583  
3584  		if (completion_value >= target_value) {
3585  			*status = HL_WAIT_CS_STATUS_COMPLETED;
3586  		} else if (pend->fence.error) {
3587  			dev_err_ratelimited(hdev->dev,
3588  				"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
3589  				pend->fence.error);
3590  			/* set the command completion status as ABORTED */
3591  			*status = HL_WAIT_CS_STATUS_ABORTED;
3592  		} else {
3593  			timeout = completion_rc;
3594  			goto wait_again;
3595  		}
3596  	} else if (completion_rc == -ERESTARTSYS) {
3597  		dev_err_ratelimited(hdev->dev,
3598  			"user process got signal while waiting for interrupt ID %d\n",
3599  			interrupt->interrupt_id);
3600  		rc = -EINTR;
3601  	} else {
3602  		/* The wait has timed-out. We don't know anything beyond that
3603  		 * because the workload wasn't submitted through the driver.
3604  		 * Therefore, from driver's perspective, the workload is still
3605  		 * executing.
3606  		 */
3607  		rc = 0;
3608  		*status = HL_WAIT_CS_STATUS_BUSY;
3609  	}
3610  
3611  remove_pending_user_interrupt:
3612  	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3613  	list_del(&pend->list_node);
3614  	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3615  
3616  	*timestamp = ktime_to_ns(pend->fence.timestamp);
3617  
3618  	kfree(pend);
3619  	hl_ctx_put(ctx);
3620  
3621  	return rc;
3622  }
3623  
hl_interrupt_wait_ioctl(struct hl_fpriv * hpriv,void * data)3624  static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3625  {
3626  	u16 interrupt_id, first_interrupt, last_interrupt;
3627  	struct hl_device *hdev = hpriv->hdev;
3628  	struct asic_fixed_properties *prop;
3629  	struct hl_user_interrupt *interrupt;
3630  	union hl_wait_cs_args *args = data;
3631  	u32 status = HL_WAIT_CS_STATUS_BUSY;
3632  	u64 timestamp = 0;
3633  	int rc, int_idx;
3634  
3635  	prop = &hdev->asic_prop;
3636  
3637  	if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) {
3638  		dev_err(hdev->dev, "no user interrupts allowed");
3639  		return -EPERM;
3640  	}
3641  
3642  	interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
3643  
3644  	first_interrupt = prop->first_available_user_interrupt;
3645  	last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1;
3646  
3647  	if (interrupt_id < prop->user_dec_intr_count) {
3648  
3649  		/* Check if the requested core is enabled */
3650  		if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) {
3651  			dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed",
3652  				interrupt_id);
3653  			return -EINVAL;
3654  		}
3655  
3656  		interrupt = &hdev->user_interrupt[interrupt_id];
3657  
3658  	} else if (interrupt_id >= first_interrupt && interrupt_id <= last_interrupt) {
3659  
3660  		int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count;
3661  		interrupt = &hdev->user_interrupt[int_idx];
3662  
3663  	} else if (interrupt_id == HL_COMMON_USER_CQ_INTERRUPT_ID) {
3664  		interrupt = &hdev->common_user_cq_interrupt;
3665  	} else if (interrupt_id == HL_COMMON_DEC_INTERRUPT_ID) {
3666  		interrupt = &hdev->common_decoder_interrupt;
3667  	} else {
3668  		dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
3669  		return -EINVAL;
3670  	}
3671  
3672  	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
3673  		struct wait_interrupt_data wait_intr_data = {0};
3674  
3675  		wait_intr_data.interrupt = interrupt;
3676  		wait_intr_data.mmg = &hpriv->mem_mgr;
3677  		wait_intr_data.cq_handle = args->in.cq_counters_handle;
3678  		wait_intr_data.cq_offset = args->in.cq_counters_offset;
3679  		wait_intr_data.ts_handle = args->in.timestamp_handle;
3680  		wait_intr_data.ts_offset = args->in.timestamp_offset;
3681  		wait_intr_data.target_value = args->in.target;
3682  		wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
3683  
3684  		if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) {
3685  			/*
3686  			 * Allow only one registration at a time. this is needed in order to prevent
3687  			 * issues while handling the flow of re-use of the same offset.
3688  			 * Since the registration flow is protected only by the interrupt lock,
3689  			 * re-use flow might request to move ts node to another interrupt list,
3690  			 * and in such case we're not protected.
3691  			 */
3692  			mutex_lock(&hpriv->ctx->ts_reg_lock);
3693  
3694  			rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data,
3695  						&status, &timestamp);
3696  
3697  			mutex_unlock(&hpriv->ctx->ts_reg_lock);
3698  		} else
3699  			rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
3700  						&status, &timestamp);
3701  	} else {
3702  		rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
3703  				args->in.interrupt_timeout_us, args->in.addr,
3704  				args->in.target, interrupt, &status,
3705  				&timestamp);
3706  	}
3707  
3708  	if (rc)
3709  		return rc;
3710  
3711  	memset(args, 0, sizeof(*args));
3712  	args->out.status = status;
3713  
3714  	if (timestamp) {
3715  		args->out.timestamp_nsec = timestamp;
3716  		args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3717  	}
3718  
3719  	return 0;
3720  }
3721  
hl_wait_ioctl(struct drm_device * ddev,void * data,struct drm_file * file_priv)3722  int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
3723  {
3724  	struct hl_fpriv *hpriv = file_priv->driver_priv;
3725  	struct hl_device *hdev = hpriv->hdev;
3726  	union hl_wait_cs_args *args = data;
3727  	u32 flags = args->in.flags;
3728  	int rc;
3729  
3730  	/* If the device is not operational, or if an error has happened and user should release the
3731  	 * device, there is no point in waiting for any command submission or user interrupt.
3732  	 */
3733  	if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active)
3734  		return -EBUSY;
3735  
3736  	if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
3737  		rc = hl_interrupt_wait_ioctl(hpriv, data);
3738  	else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS)
3739  		rc = hl_multi_cs_wait_ioctl(hpriv, data);
3740  	else
3741  		rc = hl_cs_wait_ioctl(hpriv, data);
3742  
3743  	return rc;
3744  }
3745