xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c (revision 442bc81bd344dc52c37d8f80b854cc6da062b2d0)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
3 
4 #include "internal.h"
5 #include "lib/clock.h"
6 
7 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
8 
9 struct mlx5hws_send_ring_dep_wqe *
mlx5hws_send_add_new_dep_wqe(struct mlx5hws_send_engine * queue)10 mlx5hws_send_add_new_dep_wqe(struct mlx5hws_send_engine *queue)
11 {
12 	struct mlx5hws_send_ring_sq *send_sq = &queue->send_ring.send_sq;
13 	unsigned int idx = send_sq->head_dep_idx++ & (queue->num_entries - 1);
14 
15 	memset(&send_sq->dep_wqe[idx].wqe_data.tag, 0, MLX5HWS_MATCH_TAG_SZ);
16 
17 	return &send_sq->dep_wqe[idx];
18 }
19 
mlx5hws_send_abort_new_dep_wqe(struct mlx5hws_send_engine * queue)20 void mlx5hws_send_abort_new_dep_wqe(struct mlx5hws_send_engine *queue)
21 {
22 	queue->send_ring.send_sq.head_dep_idx--;
23 }
24 
mlx5hws_send_all_dep_wqe(struct mlx5hws_send_engine * queue)25 void mlx5hws_send_all_dep_wqe(struct mlx5hws_send_engine *queue)
26 {
27 	struct mlx5hws_send_ring_sq *send_sq = &queue->send_ring.send_sq;
28 	struct mlx5hws_send_ste_attr ste_attr = {0};
29 	struct mlx5hws_send_ring_dep_wqe *dep_wqe;
30 
31 	ste_attr.send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
32 	ste_attr.send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
33 	ste_attr.send_attr.len = MLX5HWS_WQE_SZ_GTA_CTRL + MLX5HWS_WQE_SZ_GTA_DATA;
34 	ste_attr.gta_opcode = MLX5HWS_WQE_GTA_OP_ACTIVATE;
35 
36 	/* Fence first from previous depend WQEs  */
37 	ste_attr.send_attr.fence = 1;
38 
39 	while (send_sq->head_dep_idx != send_sq->tail_dep_idx) {
40 		dep_wqe = &send_sq->dep_wqe[send_sq->tail_dep_idx++ & (queue->num_entries - 1)];
41 
42 		/* Notify HW on the last WQE */
43 		ste_attr.send_attr.notify_hw = (send_sq->tail_dep_idx == send_sq->head_dep_idx);
44 		ste_attr.send_attr.user_data = dep_wqe->user_data;
45 		ste_attr.send_attr.rule = dep_wqe->rule;
46 
47 		ste_attr.rtc_0 = dep_wqe->rtc_0;
48 		ste_attr.rtc_1 = dep_wqe->rtc_1;
49 		ste_attr.retry_rtc_0 = dep_wqe->retry_rtc_0;
50 		ste_attr.retry_rtc_1 = dep_wqe->retry_rtc_1;
51 		ste_attr.used_id_rtc_0 = &dep_wqe->rule->rtc_0;
52 		ste_attr.used_id_rtc_1 = &dep_wqe->rule->rtc_1;
53 		ste_attr.wqe_ctrl = &dep_wqe->wqe_ctrl;
54 		ste_attr.wqe_data = &dep_wqe->wqe_data;
55 		ste_attr.direct_index = dep_wqe->direct_index;
56 
57 		mlx5hws_send_ste(queue, &ste_attr);
58 
59 		/* Fencing is done only on the first WQE */
60 		ste_attr.send_attr.fence = 0;
61 	}
62 }
63 
64 struct mlx5hws_send_engine_post_ctrl
mlx5hws_send_engine_post_start(struct mlx5hws_send_engine * queue)65 mlx5hws_send_engine_post_start(struct mlx5hws_send_engine *queue)
66 {
67 	struct mlx5hws_send_engine_post_ctrl ctrl;
68 
69 	ctrl.queue = queue;
70 	/* Currently only one send ring is supported */
71 	ctrl.send_ring = &queue->send_ring;
72 	ctrl.num_wqebbs = 0;
73 
74 	return ctrl;
75 }
76 
mlx5hws_send_engine_post_req_wqe(struct mlx5hws_send_engine_post_ctrl * ctrl,char ** buf,size_t * len)77 void mlx5hws_send_engine_post_req_wqe(struct mlx5hws_send_engine_post_ctrl *ctrl,
78 				      char **buf, size_t *len)
79 {
80 	struct mlx5hws_send_ring_sq *send_sq = &ctrl->send_ring->send_sq;
81 	unsigned int idx;
82 
83 	idx = (send_sq->cur_post + ctrl->num_wqebbs) & send_sq->buf_mask;
84 
85 	/* Note that *buf is a single MLX5_SEND_WQE_BB. It cannot be used
86 	 * as buffer of more than one WQE_BB, since the two MLX5_SEND_WQE_BB
87 	 * can be on 2 different kernel memory pages.
88 	 */
89 	*buf = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
90 	*len = MLX5_SEND_WQE_BB;
91 
92 	if (!ctrl->num_wqebbs) {
93 		*buf += sizeof(struct mlx5hws_wqe_ctrl_seg);
94 		*len -= sizeof(struct mlx5hws_wqe_ctrl_seg);
95 	}
96 
97 	ctrl->num_wqebbs++;
98 }
99 
hws_send_engine_post_ring(struct mlx5hws_send_ring_sq * sq,struct mlx5hws_wqe_ctrl_seg * doorbell_cseg)100 static void hws_send_engine_post_ring(struct mlx5hws_send_ring_sq *sq,
101 				      struct mlx5hws_wqe_ctrl_seg *doorbell_cseg)
102 {
103 	/* ensure wqe is visible to device before updating doorbell record */
104 	dma_wmb();
105 
106 	*sq->wq.db = cpu_to_be32(sq->cur_post);
107 
108 	/* ensure doorbell record is visible to device before ringing the
109 	 * doorbell
110 	 */
111 	wmb();
112 
113 	mlx5_write64((__be32 *)doorbell_cseg, sq->uar_map);
114 
115 	/* Ensure doorbell is written on uar_page before poll_cq */
116 	WRITE_ONCE(doorbell_cseg, NULL);
117 }
118 
119 static void
hws_send_wqe_set_tag(struct mlx5hws_wqe_gta_data_seg_ste * wqe_data,struct mlx5hws_rule_match_tag * tag,bool is_jumbo)120 hws_send_wqe_set_tag(struct mlx5hws_wqe_gta_data_seg_ste *wqe_data,
121 		     struct mlx5hws_rule_match_tag *tag,
122 		     bool is_jumbo)
123 {
124 	if (is_jumbo) {
125 		/* Clear previous possibly dirty control */
126 		memset(wqe_data, 0, MLX5HWS_STE_CTRL_SZ);
127 		memcpy(wqe_data->jumbo, tag->jumbo, MLX5HWS_JUMBO_TAG_SZ);
128 	} else {
129 		/* Clear previous possibly dirty control and actions */
130 		memset(wqe_data, 0, MLX5HWS_STE_CTRL_SZ + MLX5HWS_ACTIONS_SZ);
131 		memcpy(wqe_data->tag, tag->match, MLX5HWS_MATCH_TAG_SZ);
132 	}
133 }
134 
mlx5hws_send_engine_post_end(struct mlx5hws_send_engine_post_ctrl * ctrl,struct mlx5hws_send_engine_post_attr * attr)135 void mlx5hws_send_engine_post_end(struct mlx5hws_send_engine_post_ctrl *ctrl,
136 				  struct mlx5hws_send_engine_post_attr *attr)
137 {
138 	struct mlx5hws_wqe_ctrl_seg *wqe_ctrl;
139 	struct mlx5hws_send_ring_sq *sq;
140 	unsigned int idx;
141 	u32 flags = 0;
142 
143 	sq = &ctrl->send_ring->send_sq;
144 	idx = sq->cur_post & sq->buf_mask;
145 	sq->last_idx = idx;
146 
147 	wqe_ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, idx);
148 
149 	wqe_ctrl->opmod_idx_opcode =
150 		cpu_to_be32((attr->opmod << 24) |
151 			    ((sq->cur_post & 0xffff) << 8) |
152 			    attr->opcode);
153 	wqe_ctrl->qpn_ds =
154 		cpu_to_be32((attr->len + sizeof(struct mlx5hws_wqe_ctrl_seg)) / 16 |
155 				 sq->sqn << 8);
156 	wqe_ctrl->imm = cpu_to_be32(attr->id);
157 
158 	flags |= attr->notify_hw ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
159 	flags |= attr->fence ? MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE : 0;
160 	wqe_ctrl->flags = cpu_to_be32(flags);
161 
162 	sq->wr_priv[idx].id = attr->id;
163 	sq->wr_priv[idx].retry_id = attr->retry_id;
164 
165 	sq->wr_priv[idx].rule = attr->rule;
166 	sq->wr_priv[idx].user_data = attr->user_data;
167 	sq->wr_priv[idx].num_wqebbs = ctrl->num_wqebbs;
168 
169 	if (attr->rule) {
170 		sq->wr_priv[idx].rule->pending_wqes++;
171 		sq->wr_priv[idx].used_id = attr->used_id;
172 	}
173 
174 	sq->cur_post += ctrl->num_wqebbs;
175 
176 	if (attr->notify_hw)
177 		hws_send_engine_post_ring(sq, wqe_ctrl);
178 }
179 
hws_send_wqe(struct mlx5hws_send_engine * queue,struct mlx5hws_send_engine_post_attr * send_attr,struct mlx5hws_wqe_gta_ctrl_seg * send_wqe_ctrl,void * send_wqe_data,void * send_wqe_tag,bool is_jumbo,u8 gta_opcode,u32 direct_index)180 static void hws_send_wqe(struct mlx5hws_send_engine *queue,
181 			 struct mlx5hws_send_engine_post_attr *send_attr,
182 			 struct mlx5hws_wqe_gta_ctrl_seg *send_wqe_ctrl,
183 			 void *send_wqe_data,
184 			 void *send_wqe_tag,
185 			 bool is_jumbo,
186 			 u8 gta_opcode,
187 			 u32 direct_index)
188 {
189 	struct mlx5hws_wqe_gta_data_seg_ste *wqe_data;
190 	struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl;
191 	struct mlx5hws_send_engine_post_ctrl ctrl;
192 	size_t wqe_len;
193 
194 	ctrl = mlx5hws_send_engine_post_start(queue);
195 	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
196 	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
197 
198 	wqe_ctrl->op_dirix = cpu_to_be32(gta_opcode << 28 | direct_index);
199 	memcpy(wqe_ctrl->stc_ix, send_wqe_ctrl->stc_ix,
200 	       sizeof(send_wqe_ctrl->stc_ix));
201 
202 	if (send_wqe_data)
203 		memcpy(wqe_data, send_wqe_data, sizeof(*wqe_data));
204 	else
205 		hws_send_wqe_set_tag(wqe_data, send_wqe_tag, is_jumbo);
206 
207 	mlx5hws_send_engine_post_end(&ctrl, send_attr);
208 }
209 
mlx5hws_send_ste(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ste_attr * ste_attr)210 void mlx5hws_send_ste(struct mlx5hws_send_engine *queue,
211 		      struct mlx5hws_send_ste_attr *ste_attr)
212 {
213 	struct mlx5hws_send_engine_post_attr *send_attr = &ste_attr->send_attr;
214 	u8 notify_hw = send_attr->notify_hw;
215 	u8 fence = send_attr->fence;
216 
217 	if (ste_attr->rtc_1) {
218 		send_attr->id = ste_attr->rtc_1;
219 		send_attr->used_id = ste_attr->used_id_rtc_1;
220 		send_attr->retry_id = ste_attr->retry_rtc_1;
221 		send_attr->fence = fence;
222 		send_attr->notify_hw = notify_hw && !ste_attr->rtc_0;
223 		hws_send_wqe(queue, send_attr,
224 			     ste_attr->wqe_ctrl,
225 			     ste_attr->wqe_data,
226 			     ste_attr->wqe_tag,
227 			     ste_attr->wqe_tag_is_jumbo,
228 			     ste_attr->gta_opcode,
229 			     ste_attr->direct_index);
230 	}
231 
232 	if (ste_attr->rtc_0) {
233 		send_attr->id = ste_attr->rtc_0;
234 		send_attr->used_id = ste_attr->used_id_rtc_0;
235 		send_attr->retry_id = ste_attr->retry_rtc_0;
236 		send_attr->fence = fence && !ste_attr->rtc_1;
237 		send_attr->notify_hw = notify_hw;
238 		hws_send_wqe(queue, send_attr,
239 			     ste_attr->wqe_ctrl,
240 			     ste_attr->wqe_data,
241 			     ste_attr->wqe_tag,
242 			     ste_attr->wqe_tag_is_jumbo,
243 			     ste_attr->gta_opcode,
244 			     ste_attr->direct_index);
245 	}
246 
247 	/* Restore to original requested values */
248 	send_attr->notify_hw = notify_hw;
249 	send_attr->fence = fence;
250 }
251 
hws_send_engine_retry_post_send(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_priv * priv,u16 wqe_cnt)252 static void hws_send_engine_retry_post_send(struct mlx5hws_send_engine *queue,
253 					    struct mlx5hws_send_ring_priv *priv,
254 					    u16 wqe_cnt)
255 {
256 	struct mlx5hws_send_engine_post_attr send_attr = {0};
257 	struct mlx5hws_wqe_gta_data_seg_ste *wqe_data;
258 	struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl;
259 	struct mlx5hws_send_engine_post_ctrl ctrl;
260 	struct mlx5hws_send_ring_sq *send_sq;
261 	unsigned int idx;
262 	size_t wqe_len;
263 	char *p;
264 
265 	send_attr.rule = priv->rule;
266 	send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
267 	send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
268 	send_attr.len = MLX5_SEND_WQE_BB * 2 - sizeof(struct mlx5hws_wqe_ctrl_seg);
269 	send_attr.notify_hw = 1;
270 	send_attr.fence = 0;
271 	send_attr.user_data = priv->user_data;
272 	send_attr.id = priv->retry_id;
273 	send_attr.used_id = priv->used_id;
274 
275 	ctrl = mlx5hws_send_engine_post_start(queue);
276 	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
277 	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
278 
279 	send_sq = &ctrl.send_ring->send_sq;
280 	idx = wqe_cnt & send_sq->buf_mask;
281 	p = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
282 
283 	/* Copy old gta ctrl */
284 	memcpy(wqe_ctrl, p + sizeof(struct mlx5hws_wqe_ctrl_seg),
285 	       MLX5_SEND_WQE_BB - sizeof(struct mlx5hws_wqe_ctrl_seg));
286 
287 	idx = (wqe_cnt + 1) & send_sq->buf_mask;
288 	p = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
289 
290 	/* Copy old gta data */
291 	memcpy(wqe_data, p, MLX5_SEND_WQE_BB);
292 
293 	mlx5hws_send_engine_post_end(&ctrl, &send_attr);
294 }
295 
mlx5hws_send_engine_flush_queue(struct mlx5hws_send_engine * queue)296 void mlx5hws_send_engine_flush_queue(struct mlx5hws_send_engine *queue)
297 {
298 	struct mlx5hws_send_ring_sq *sq = &queue->send_ring.send_sq;
299 	struct mlx5hws_wqe_ctrl_seg *wqe_ctrl;
300 
301 	wqe_ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, sq->last_idx);
302 	wqe_ctrl->flags |= cpu_to_be32(MLX5_WQE_CTRL_CQ_UPDATE);
303 
304 	hws_send_engine_post_ring(sq, wqe_ctrl);
305 }
306 
307 static void
hws_send_engine_update_rule_resize(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_priv * priv,enum mlx5hws_flow_op_status * status)308 hws_send_engine_update_rule_resize(struct mlx5hws_send_engine *queue,
309 				   struct mlx5hws_send_ring_priv *priv,
310 				   enum mlx5hws_flow_op_status *status)
311 {
312 	switch (priv->rule->resize_info->state) {
313 	case MLX5HWS_RULE_RESIZE_STATE_WRITING:
314 		if (priv->rule->status == MLX5HWS_RULE_STATUS_FAILING) {
315 			/* Backup original RTCs */
316 			u32 orig_rtc_0 = priv->rule->resize_info->rtc_0;
317 			u32 orig_rtc_1 = priv->rule->resize_info->rtc_1;
318 
319 			/* Delete partially failed move rule using resize_info */
320 			priv->rule->resize_info->rtc_0 = priv->rule->rtc_0;
321 			priv->rule->resize_info->rtc_1 = priv->rule->rtc_1;
322 
323 			/* Move rule to original RTC for future delete */
324 			priv->rule->rtc_0 = orig_rtc_0;
325 			priv->rule->rtc_1 = orig_rtc_1;
326 		}
327 		/* Clean leftovers */
328 		mlx5hws_rule_move_hws_remove(priv->rule, queue, priv->user_data);
329 		break;
330 
331 	case MLX5HWS_RULE_RESIZE_STATE_DELETING:
332 		if (priv->rule->status == MLX5HWS_RULE_STATUS_FAILING) {
333 			*status = MLX5HWS_FLOW_OP_ERROR;
334 		} else {
335 			*status = MLX5HWS_FLOW_OP_SUCCESS;
336 			priv->rule->matcher = priv->rule->matcher->resize_dst;
337 		}
338 		priv->rule->resize_info->state = MLX5HWS_RULE_RESIZE_STATE_IDLE;
339 		priv->rule->status = MLX5HWS_RULE_STATUS_CREATED;
340 		break;
341 
342 	default:
343 		break;
344 	}
345 }
346 
hws_send_engine_update_rule(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_priv * priv,u16 wqe_cnt,enum mlx5hws_flow_op_status * status)347 static void hws_send_engine_update_rule(struct mlx5hws_send_engine *queue,
348 					struct mlx5hws_send_ring_priv *priv,
349 					u16 wqe_cnt,
350 					enum mlx5hws_flow_op_status *status)
351 {
352 	priv->rule->pending_wqes--;
353 
354 	if (*status == MLX5HWS_FLOW_OP_ERROR) {
355 		if (priv->retry_id) {
356 			hws_send_engine_retry_post_send(queue, priv, wqe_cnt);
357 			return;
358 		}
359 		/* Some part of the rule failed */
360 		priv->rule->status = MLX5HWS_RULE_STATUS_FAILING;
361 		*priv->used_id = 0;
362 	} else {
363 		*priv->used_id = priv->id;
364 	}
365 
366 	/* Update rule status for the last completion */
367 	if (!priv->rule->pending_wqes) {
368 		if (unlikely(mlx5hws_rule_move_in_progress(priv->rule))) {
369 			hws_send_engine_update_rule_resize(queue, priv, status);
370 			return;
371 		}
372 
373 		if (unlikely(priv->rule->status == MLX5HWS_RULE_STATUS_FAILING)) {
374 			/* Rule completely failed and doesn't require cleanup */
375 			if (!priv->rule->rtc_0 && !priv->rule->rtc_1)
376 				priv->rule->status = MLX5HWS_RULE_STATUS_FAILED;
377 
378 			*status = MLX5HWS_FLOW_OP_ERROR;
379 		} else {
380 			/* Increase the status, this only works on good flow as
381 			 * the enum is arranged this way:
382 			 *  - creating -> created
383 			 *  - updating -> updated
384 			 *  - deleting -> deleted
385 			 */
386 			priv->rule->status++;
387 			*status = MLX5HWS_FLOW_OP_SUCCESS;
388 			if (priv->rule->status == MLX5HWS_RULE_STATUS_DELETED) {
389 				/* Rule was deleted, now we can safely release
390 				 * action STEs and clear resize info
391 				 */
392 				mlx5hws_rule_free_action_ste(&priv->rule->action_ste);
393 				mlx5hws_rule_clear_resize_info(priv->rule);
394 			} else if (priv->rule->status == MLX5HWS_RULE_STATUS_UPDATED) {
395 				/* Rule was updated, free the old action STEs */
396 				mlx5hws_rule_free_action_ste(&priv->rule->old_action_ste);
397 				/* Update completed - move the rule back to "created" */
398 				priv->rule->status = MLX5HWS_RULE_STATUS_CREATED;
399 			}
400 		}
401 	}
402 }
403 
hws_send_engine_update(struct mlx5hws_send_engine * queue,struct mlx5_cqe64 * cqe,struct mlx5hws_send_ring_priv * priv,struct mlx5hws_flow_op_result res[],s64 * i,u32 res_nb,u16 wqe_cnt)404 static void hws_send_engine_update(struct mlx5hws_send_engine *queue,
405 				   struct mlx5_cqe64 *cqe,
406 				   struct mlx5hws_send_ring_priv *priv,
407 				   struct mlx5hws_flow_op_result res[],
408 				   s64 *i,
409 				   u32 res_nb,
410 				   u16 wqe_cnt)
411 {
412 	enum mlx5hws_flow_op_status status;
413 
414 	if (!cqe || (likely(be32_to_cpu(cqe->byte_cnt) >> 31 == 0) &&
415 		     likely(get_cqe_opcode(cqe) == MLX5_CQE_REQ))) {
416 		status = MLX5HWS_FLOW_OP_SUCCESS;
417 	} else {
418 		status = MLX5HWS_FLOW_OP_ERROR;
419 	}
420 
421 	if (priv->user_data) {
422 		if (priv->rule) {
423 			hws_send_engine_update_rule(queue, priv, wqe_cnt, &status);
424 			/* Completion is provided on the last rule WQE */
425 			if (priv->rule->pending_wqes)
426 				return;
427 		}
428 
429 		if (*i < res_nb) {
430 			res[*i].user_data = priv->user_data;
431 			res[*i].status = status;
432 			(*i)++;
433 			mlx5hws_send_engine_dec_rule(queue);
434 		} else {
435 			mlx5hws_send_engine_gen_comp(queue, priv->user_data, status);
436 		}
437 	}
438 }
439 
mlx5hws_parse_cqe(struct mlx5hws_send_ring_cq * cq,struct mlx5_cqe64 * cqe64)440 static int mlx5hws_parse_cqe(struct mlx5hws_send_ring_cq *cq,
441 			     struct mlx5_cqe64 *cqe64)
442 {
443 	if (unlikely(get_cqe_opcode(cqe64) != MLX5_CQE_REQ)) {
444 		struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe64;
445 
446 		mlx5_core_err(cq->mdev, "Bad OP in HWS SQ CQE: 0x%x\n", get_cqe_opcode(cqe64));
447 		mlx5_core_err(cq->mdev, "vendor_err_synd=%x\n", err_cqe->vendor_err_synd);
448 		mlx5_core_err(cq->mdev, "syndrome=%x\n", err_cqe->syndrome);
449 		print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET,
450 			       16, 1, err_cqe,
451 			       sizeof(*err_cqe), false);
452 		return CQ_POLL_ERR;
453 	}
454 
455 	return CQ_OK;
456 }
457 
mlx5hws_cq_poll_one(struct mlx5hws_send_ring_cq * cq)458 static int mlx5hws_cq_poll_one(struct mlx5hws_send_ring_cq *cq)
459 {
460 	struct mlx5_cqe64 *cqe64;
461 	int err;
462 
463 	cqe64 = mlx5_cqwq_get_cqe(&cq->wq);
464 	if (!cqe64) {
465 		if (unlikely(cq->mdev->state ==
466 			     MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
467 			mlx5_core_dbg_once(cq->mdev,
468 					   "Polling CQ while device is shutting down\n");
469 			return CQ_POLL_ERR;
470 		}
471 		return CQ_EMPTY;
472 	}
473 
474 	mlx5_cqwq_pop(&cq->wq);
475 	err = mlx5hws_parse_cqe(cq, cqe64);
476 	mlx5_cqwq_update_db_record(&cq->wq);
477 
478 	return err;
479 }
480 
hws_send_engine_poll_cq(struct mlx5hws_send_engine * queue,struct mlx5hws_flow_op_result res[],s64 * polled,u32 res_nb)481 static void hws_send_engine_poll_cq(struct mlx5hws_send_engine *queue,
482 				    struct mlx5hws_flow_op_result res[],
483 				    s64 *polled,
484 				    u32 res_nb)
485 {
486 	struct mlx5hws_send_ring *send_ring = &queue->send_ring;
487 	struct mlx5hws_send_ring_cq *cq = &send_ring->send_cq;
488 	struct mlx5hws_send_ring_sq *sq = &send_ring->send_sq;
489 	struct mlx5hws_send_ring_priv *priv;
490 	struct mlx5_cqe64 *cqe;
491 	u8 cqe_opcode;
492 	u16 wqe_cnt;
493 
494 	cqe = mlx5_cqwq_get_cqe(&cq->wq);
495 	if (!cqe)
496 		return;
497 
498 	cqe_opcode = get_cqe_opcode(cqe);
499 	if (cqe_opcode == MLX5_CQE_INVALID)
500 		return;
501 
502 	if (unlikely(cqe_opcode != MLX5_CQE_REQ))
503 		queue->err = true;
504 
505 	wqe_cnt = be16_to_cpu(cqe->wqe_counter) & sq->buf_mask;
506 
507 	while (cq->poll_wqe != wqe_cnt) {
508 		priv = &sq->wr_priv[cq->poll_wqe];
509 		hws_send_engine_update(queue, NULL, priv, res, polled, res_nb, 0);
510 		cq->poll_wqe = (cq->poll_wqe + priv->num_wqebbs) & sq->buf_mask;
511 	}
512 
513 	priv = &sq->wr_priv[wqe_cnt];
514 	cq->poll_wqe = (wqe_cnt + priv->num_wqebbs) & sq->buf_mask;
515 	hws_send_engine_update(queue, cqe, priv, res, polled, res_nb, wqe_cnt);
516 	mlx5hws_cq_poll_one(cq);
517 }
518 
hws_send_engine_poll_list(struct mlx5hws_send_engine * queue,struct mlx5hws_flow_op_result res[],s64 * polled,u32 res_nb)519 static void hws_send_engine_poll_list(struct mlx5hws_send_engine *queue,
520 				      struct mlx5hws_flow_op_result res[],
521 				      s64 *polled,
522 				      u32 res_nb)
523 {
524 	struct mlx5hws_completed_poll *comp = &queue->completed;
525 
526 	while (comp->ci != comp->pi) {
527 		if (*polled < res_nb) {
528 			res[*polled].status =
529 				comp->entries[comp->ci].status;
530 			res[*polled].user_data =
531 				comp->entries[comp->ci].user_data;
532 			(*polled)++;
533 			comp->ci = (comp->ci + 1) & comp->mask;
534 			mlx5hws_send_engine_dec_rule(queue);
535 		} else {
536 			return;
537 		}
538 	}
539 }
540 
hws_send_engine_poll(struct mlx5hws_send_engine * queue,struct mlx5hws_flow_op_result res[],u32 res_nb)541 static int hws_send_engine_poll(struct mlx5hws_send_engine *queue,
542 				struct mlx5hws_flow_op_result res[],
543 				u32 res_nb)
544 {
545 	s64 polled = 0;
546 
547 	hws_send_engine_poll_list(queue, res, &polled, res_nb);
548 
549 	if (polled >= res_nb)
550 		return polled;
551 
552 	hws_send_engine_poll_cq(queue, res, &polled, res_nb);
553 
554 	return polled;
555 }
556 
mlx5hws_send_queue_poll(struct mlx5hws_context * ctx,u16 queue_id,struct mlx5hws_flow_op_result res[],u32 res_nb)557 int mlx5hws_send_queue_poll(struct mlx5hws_context *ctx,
558 			    u16 queue_id,
559 			    struct mlx5hws_flow_op_result res[],
560 			    u32 res_nb)
561 {
562 	return hws_send_engine_poll(&ctx->send_queue[queue_id], res, res_nb);
563 }
564 
hws_send_ring_alloc_sq(struct mlx5_core_dev * mdev,int numa_node,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,void * sqc_data)565 static int hws_send_ring_alloc_sq(struct mlx5_core_dev *mdev,
566 				  int numa_node,
567 				  struct mlx5hws_send_engine *queue,
568 				  struct mlx5hws_send_ring_sq *sq,
569 				  void *sqc_data)
570 {
571 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
572 	struct mlx5_wq_cyc *wq = &sq->wq;
573 	struct mlx5_wq_param param;
574 	size_t buf_sz;
575 	int err;
576 
577 	sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
578 	sq->mdev = mdev;
579 
580 	param.db_numa_node = numa_node;
581 	param.buf_numa_node = numa_node;
582 	err = mlx5_wq_cyc_create(mdev, &param, sqc_wq, wq, &sq->wq_ctrl);
583 	if (err)
584 		return err;
585 	wq->db = &wq->db[MLX5_SND_DBR];
586 
587 	buf_sz = queue->num_entries * MAX_WQES_PER_RULE;
588 	sq->dep_wqe = kcalloc(queue->num_entries, sizeof(*sq->dep_wqe), GFP_KERNEL);
589 	if (!sq->dep_wqe) {
590 		err = -ENOMEM;
591 		goto destroy_wq_cyc;
592 	}
593 
594 	sq->wr_priv = kzalloc(sizeof(*sq->wr_priv) * buf_sz, GFP_KERNEL);
595 	if (!sq->wr_priv) {
596 		err = -ENOMEM;
597 		goto free_dep_wqe;
598 	}
599 
600 	sq->buf_mask = (queue->num_entries * MAX_WQES_PER_RULE) - 1;
601 
602 	return 0;
603 
604 free_dep_wqe:
605 	kfree(sq->dep_wqe);
606 destroy_wq_cyc:
607 	mlx5_wq_destroy(&sq->wq_ctrl);
608 	return err;
609 }
610 
hws_send_ring_free_sq(struct mlx5hws_send_ring_sq * sq)611 static void hws_send_ring_free_sq(struct mlx5hws_send_ring_sq *sq)
612 {
613 	if (!sq)
614 		return;
615 	kfree(sq->wr_priv);
616 	kfree(sq->dep_wqe);
617 	mlx5_wq_destroy(&sq->wq_ctrl);
618 }
619 
hws_send_ring_create_sq(struct mlx5_core_dev * mdev,u32 pdn,void * sqc_data,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,struct mlx5hws_send_ring_cq * cq)620 static int hws_send_ring_create_sq(struct mlx5_core_dev *mdev, u32 pdn,
621 				   void *sqc_data,
622 				   struct mlx5hws_send_engine *queue,
623 				   struct mlx5hws_send_ring_sq *sq,
624 				   struct mlx5hws_send_ring_cq *cq)
625 {
626 	void *in, *sqc, *wq;
627 	int inlen, err;
628 	u8 ts_format;
629 
630 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
631 		sizeof(u64) * sq->wq_ctrl.buf.npages;
632 	in = kvzalloc(inlen, GFP_KERNEL);
633 	if (!in)
634 		return -ENOMEM;
635 
636 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
637 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
638 
639 	memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc));
640 	MLX5_SET(sqc, sqc, cqn, cq->mcq.cqn);
641 
642 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
643 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
644 	MLX5_SET(sqc, sqc, non_wire, 1);
645 
646 	ts_format = mlx5_is_real_time_sq(mdev) ? MLX5_TIMESTAMP_FORMAT_REAL_TIME :
647 						 MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
648 	MLX5_SET(sqc, sqc, ts_format, ts_format);
649 
650 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
651 	MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index);
652 	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
653 	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
654 
655 	mlx5_fill_page_frag_array(&sq->wq_ctrl.buf,
656 				  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
657 
658 	err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
659 
660 	kvfree(in);
661 
662 	return err;
663 }
664 
hws_send_ring_destroy_sq(struct mlx5_core_dev * mdev,struct mlx5hws_send_ring_sq * sq)665 static void hws_send_ring_destroy_sq(struct mlx5_core_dev *mdev,
666 				     struct mlx5hws_send_ring_sq *sq)
667 {
668 	mlx5_core_destroy_sq(mdev, sq->sqn);
669 }
670 
hws_send_ring_set_sq_rdy(struct mlx5_core_dev * mdev,u32 sqn)671 static int hws_send_ring_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn)
672 {
673 	void *in, *sqc;
674 	int inlen, err;
675 
676 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
677 	in = kvzalloc(inlen, GFP_KERNEL);
678 	if (!in)
679 		return -ENOMEM;
680 
681 	MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST);
682 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
683 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
684 
685 	err = mlx5_core_modify_sq(mdev, sqn, in);
686 
687 	kvfree(in);
688 
689 	return err;
690 }
691 
hws_send_ring_close_sq(struct mlx5hws_send_ring_sq * sq)692 static void hws_send_ring_close_sq(struct mlx5hws_send_ring_sq *sq)
693 {
694 	mlx5_core_destroy_sq(sq->mdev, sq->sqn);
695 	mlx5_wq_destroy(&sq->wq_ctrl);
696 	kfree(sq->wr_priv);
697 	kfree(sq->dep_wqe);
698 }
699 
hws_send_ring_create_sq_rdy(struct mlx5_core_dev * mdev,u32 pdn,void * sqc_data,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,struct mlx5hws_send_ring_cq * cq)700 static int hws_send_ring_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn,
701 				       void *sqc_data,
702 				       struct mlx5hws_send_engine *queue,
703 				       struct mlx5hws_send_ring_sq *sq,
704 				       struct mlx5hws_send_ring_cq *cq)
705 {
706 	int err;
707 
708 	err = hws_send_ring_create_sq(mdev, pdn, sqc_data, queue, sq, cq);
709 	if (err)
710 		return err;
711 
712 	err = hws_send_ring_set_sq_rdy(mdev, sq->sqn);
713 	if (err)
714 		hws_send_ring_destroy_sq(mdev, sq);
715 
716 	return err;
717 }
718 
hws_send_ring_open_sq(struct mlx5hws_context * ctx,int numa_node,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,struct mlx5hws_send_ring_cq * cq)719 static int hws_send_ring_open_sq(struct mlx5hws_context *ctx,
720 				 int numa_node,
721 				 struct mlx5hws_send_engine *queue,
722 				 struct mlx5hws_send_ring_sq *sq,
723 				 struct mlx5hws_send_ring_cq *cq)
724 {
725 	size_t buf_sz, sq_log_buf_sz;
726 	void *sqc_data, *wq;
727 	int err;
728 
729 	sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL);
730 	if (!sqc_data)
731 		return -ENOMEM;
732 
733 	buf_sz = queue->num_entries * MAX_WQES_PER_RULE;
734 	sq_log_buf_sz = ilog2(roundup_pow_of_two(buf_sz));
735 
736 	wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
737 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
738 	MLX5_SET(wq, wq, pd, ctx->pd_num);
739 	MLX5_SET(wq, wq, log_wq_sz, sq_log_buf_sz);
740 
741 	err = hws_send_ring_alloc_sq(ctx->mdev, numa_node, queue, sq, sqc_data);
742 	if (err)
743 		goto err_free_sqc;
744 
745 	err = hws_send_ring_create_sq_rdy(ctx->mdev, ctx->pd_num, sqc_data,
746 					  queue, sq, cq);
747 	if (err)
748 		goto err_free_sq;
749 
750 	kvfree(sqc_data);
751 
752 	return 0;
753 err_free_sq:
754 	hws_send_ring_free_sq(sq);
755 err_free_sqc:
756 	kvfree(sqc_data);
757 	return err;
758 }
759 
hws_cq_complete(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)760 static void hws_cq_complete(struct mlx5_core_cq *mcq,
761 			    struct mlx5_eqe *eqe)
762 {
763 	pr_err("CQ completion CQ: #%u\n", mcq->cqn);
764 }
765 
hws_send_ring_alloc_cq(struct mlx5_core_dev * mdev,int numa_node,struct mlx5hws_send_engine * queue,void * cqc_data,struct mlx5hws_send_ring_cq * cq)766 static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev,
767 				  int numa_node,
768 				  struct mlx5hws_send_engine *queue,
769 				  void *cqc_data,
770 				  struct mlx5hws_send_ring_cq *cq)
771 {
772 	struct mlx5_core_cq *mcq = &cq->mcq;
773 	struct mlx5_wq_param param;
774 	struct mlx5_cqe64 *cqe;
775 	int err;
776 	u32 i;
777 
778 	param.buf_numa_node = numa_node;
779 	param.db_numa_node = numa_node;
780 
781 	err = mlx5_cqwq_create(mdev, &param, cqc_data, &cq->wq, &cq->wq_ctrl);
782 	if (err)
783 		return err;
784 
785 	mcq->cqe_sz = 64;
786 	mcq->set_ci_db = cq->wq_ctrl.db.db;
787 	mcq->arm_db = cq->wq_ctrl.db.db + 1;
788 	mcq->comp = hws_cq_complete;
789 
790 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
791 		cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
792 		cqe->op_own = 0xf1;
793 	}
794 
795 	cq->mdev = mdev;
796 
797 	return 0;
798 }
799 
hws_send_ring_create_cq(struct mlx5_core_dev * mdev,struct mlx5hws_send_engine * queue,void * cqc_data,struct mlx5hws_send_ring_cq * cq)800 static int hws_send_ring_create_cq(struct mlx5_core_dev *mdev,
801 				   struct mlx5hws_send_engine *queue,
802 				   void *cqc_data,
803 				   struct mlx5hws_send_ring_cq *cq)
804 {
805 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
806 	struct mlx5_core_cq *mcq = &cq->mcq;
807 	void *in, *cqc;
808 	int inlen, eqn;
809 	int err;
810 
811 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
812 	if (err)
813 		return err;
814 
815 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
816 		sizeof(u64) * cq->wq_ctrl.buf.npages;
817 	in = kvzalloc(inlen, GFP_KERNEL);
818 	if (!in)
819 		return -ENOMEM;
820 
821 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
822 	memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc));
823 	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
824 				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
825 
826 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
827 	MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
828 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
829 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
830 
831 	err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
832 
833 	kvfree(in);
834 
835 	return err;
836 }
837 
hws_send_ring_open_cq(struct mlx5_core_dev * mdev,struct mlx5hws_send_engine * queue,int numa_node,struct mlx5hws_send_ring_cq * cq)838 static int hws_send_ring_open_cq(struct mlx5_core_dev *mdev,
839 				 struct mlx5hws_send_engine *queue,
840 				 int numa_node,
841 				 struct mlx5hws_send_ring_cq *cq)
842 {
843 	void *cqc_data;
844 	int err;
845 
846 	cqc_data = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL);
847 	if (!cqc_data)
848 		return -ENOMEM;
849 
850 	MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index);
851 	MLX5_SET(cqc, cqc_data, cqe_sz, queue->num_entries);
852 	MLX5_SET(cqc, cqc_data, log_cq_size, ilog2(queue->num_entries));
853 
854 	err = hws_send_ring_alloc_cq(mdev, numa_node, queue, cqc_data, cq);
855 	if (err)
856 		goto err_out;
857 
858 	err = hws_send_ring_create_cq(mdev, queue, cqc_data, cq);
859 	if (err)
860 		goto err_free_cq;
861 
862 	kvfree(cqc_data);
863 
864 	return 0;
865 
866 err_free_cq:
867 	mlx5_wq_destroy(&cq->wq_ctrl);
868 err_out:
869 	kvfree(cqc_data);
870 	return err;
871 }
872 
hws_send_ring_close_cq(struct mlx5hws_send_ring_cq * cq)873 static void hws_send_ring_close_cq(struct mlx5hws_send_ring_cq *cq)
874 {
875 	mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
876 	mlx5_wq_destroy(&cq->wq_ctrl);
877 }
878 
hws_send_ring_close(struct mlx5hws_send_engine * queue)879 static void hws_send_ring_close(struct mlx5hws_send_engine *queue)
880 {
881 	hws_send_ring_close_sq(&queue->send_ring.send_sq);
882 	hws_send_ring_close_cq(&queue->send_ring.send_cq);
883 }
884 
mlx5hws_send_ring_open(struct mlx5hws_context * ctx,struct mlx5hws_send_engine * queue)885 static int mlx5hws_send_ring_open(struct mlx5hws_context *ctx,
886 				  struct mlx5hws_send_engine *queue)
887 {
888 	int numa_node = dev_to_node(mlx5_core_dma_dev(ctx->mdev));
889 	struct mlx5hws_send_ring *ring = &queue->send_ring;
890 	int err;
891 
892 	err = hws_send_ring_open_cq(ctx->mdev, queue, numa_node, &ring->send_cq);
893 	if (err)
894 		return err;
895 
896 	err = hws_send_ring_open_sq(ctx, numa_node, queue, &ring->send_sq,
897 				    &ring->send_cq);
898 	if (err)
899 		goto close_cq;
900 
901 	return err;
902 
903 close_cq:
904 	hws_send_ring_close_cq(&ring->send_cq);
905 	return err;
906 }
907 
mlx5hws_send_queue_close(struct mlx5hws_send_engine * queue)908 static void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue)
909 {
910 	if (!queue->num_entries)
911 		return; /* this queue wasn't initialized */
912 
913 	hws_send_ring_close(queue);
914 	kfree(queue->completed.entries);
915 }
916 
mlx5hws_send_queue_open(struct mlx5hws_context * ctx,struct mlx5hws_send_engine * queue,u16 queue_size)917 static int mlx5hws_send_queue_open(struct mlx5hws_context *ctx,
918 				   struct mlx5hws_send_engine *queue,
919 				   u16 queue_size)
920 {
921 	int err;
922 
923 	mutex_init(&queue->lock);
924 
925 	queue->num_entries = roundup_pow_of_two(queue_size);
926 	queue->used_entries = 0;
927 
928 	queue->completed.entries = kcalloc(queue->num_entries,
929 					   sizeof(queue->completed.entries[0]),
930 					   GFP_KERNEL);
931 	if (!queue->completed.entries)
932 		return -ENOMEM;
933 
934 	queue->completed.pi = 0;
935 	queue->completed.ci = 0;
936 	queue->completed.mask = queue->num_entries - 1;
937 	err = mlx5hws_send_ring_open(ctx, queue);
938 	if (err)
939 		goto free_completed_entries;
940 
941 	return 0;
942 
943 free_completed_entries:
944 	kfree(queue->completed.entries);
945 	return err;
946 }
947 
__hws_send_queues_close(struct mlx5hws_context * ctx,u16 queues)948 static void __hws_send_queues_close(struct mlx5hws_context *ctx, u16 queues)
949 {
950 	while (queues--)
951 		mlx5hws_send_queue_close(&ctx->send_queue[queues]);
952 }
953 
hws_send_queues_bwc_locks_destroy(struct mlx5hws_context * ctx)954 static void hws_send_queues_bwc_locks_destroy(struct mlx5hws_context *ctx)
955 {
956 	int bwc_queues = mlx5hws_bwc_queues(ctx);
957 	int i;
958 
959 	if (!mlx5hws_context_bwc_supported(ctx))
960 		return;
961 
962 	for (i = 0; i < bwc_queues; i++) {
963 		mutex_destroy(&ctx->bwc_send_queue_locks[i]);
964 		lockdep_unregister_key(ctx->bwc_lock_class_keys + i);
965 	}
966 
967 	kfree(ctx->bwc_lock_class_keys);
968 	kfree(ctx->bwc_send_queue_locks);
969 }
970 
mlx5hws_send_queues_close(struct mlx5hws_context * ctx)971 void mlx5hws_send_queues_close(struct mlx5hws_context *ctx)
972 {
973 	hws_send_queues_bwc_locks_destroy(ctx);
974 	__hws_send_queues_close(ctx, ctx->queues);
975 	kfree(ctx->send_queue);
976 }
977 
hws_bwc_send_queues_init(struct mlx5hws_context * ctx)978 static int hws_bwc_send_queues_init(struct mlx5hws_context *ctx)
979 {
980 	/* Number of BWC queues is equal to number of the usual HWS queues */
981 	int bwc_queues = ctx->queues - 1;
982 	int i;
983 
984 	if (!mlx5hws_context_bwc_supported(ctx))
985 		return 0;
986 
987 	ctx->queues += bwc_queues;
988 
989 	ctx->bwc_send_queue_locks = kcalloc(bwc_queues,
990 					    sizeof(*ctx->bwc_send_queue_locks),
991 					    GFP_KERNEL);
992 
993 	if (!ctx->bwc_send_queue_locks)
994 		return -ENOMEM;
995 
996 	ctx->bwc_lock_class_keys = kcalloc(bwc_queues,
997 					   sizeof(*ctx->bwc_lock_class_keys),
998 					   GFP_KERNEL);
999 	if (!ctx->bwc_lock_class_keys)
1000 		goto err_lock_class_keys;
1001 
1002 	for (i = 0; i < bwc_queues; i++) {
1003 		mutex_init(&ctx->bwc_send_queue_locks[i]);
1004 		lockdep_register_key(ctx->bwc_lock_class_keys + i);
1005 		lockdep_set_class(ctx->bwc_send_queue_locks + i, ctx->bwc_lock_class_keys + i);
1006 	}
1007 
1008 	return 0;
1009 
1010 err_lock_class_keys:
1011 	kfree(ctx->bwc_send_queue_locks);
1012 	return -ENOMEM;
1013 }
1014 
mlx5hws_send_queues_open(struct mlx5hws_context * ctx,u16 queues,u16 queue_size)1015 int mlx5hws_send_queues_open(struct mlx5hws_context *ctx,
1016 			     u16 queues,
1017 			     u16 queue_size)
1018 {
1019 	int err = 0;
1020 	int i = 0;
1021 
1022 	/* Open one extra queue for control path */
1023 	ctx->queues = queues + 1;
1024 
1025 	/* open a separate set of queues and locks for bwc API */
1026 	err = hws_bwc_send_queues_init(ctx);
1027 	if (err)
1028 		return err;
1029 
1030 	ctx->send_queue = kcalloc(ctx->queues, sizeof(*ctx->send_queue), GFP_KERNEL);
1031 	if (!ctx->send_queue) {
1032 		err = -ENOMEM;
1033 		goto free_bwc_locks;
1034 	}
1035 
1036 	/* If native API isn't supported, skip the unused native queues:
1037 	 * initialize BWC queues and control queue only.
1038 	 */
1039 	if (!mlx5hws_context_native_supported(ctx))
1040 		i = mlx5hws_bwc_get_queue_id(ctx, 0);
1041 
1042 	for (; i < ctx->queues; i++) {
1043 		err = mlx5hws_send_queue_open(ctx, &ctx->send_queue[i], queue_size);
1044 		if (err)
1045 			goto close_send_queues;
1046 	}
1047 
1048 	return 0;
1049 
1050 close_send_queues:
1051 	 __hws_send_queues_close(ctx, i);
1052 
1053 	kfree(ctx->send_queue);
1054 
1055 free_bwc_locks:
1056 	hws_send_queues_bwc_locks_destroy(ctx);
1057 
1058 	return err;
1059 }
1060 
mlx5hws_send_queue_action(struct mlx5hws_context * ctx,u16 queue_id,u32 actions)1061 int mlx5hws_send_queue_action(struct mlx5hws_context *ctx,
1062 			      u16 queue_id,
1063 			      u32 actions)
1064 {
1065 	struct mlx5hws_send_ring_sq *send_sq;
1066 	struct mlx5hws_send_engine *queue;
1067 	bool wait_comp = false;
1068 	s64 polled = 0;
1069 
1070 	queue = &ctx->send_queue[queue_id];
1071 	send_sq = &queue->send_ring.send_sq;
1072 
1073 	switch (actions) {
1074 	case MLX5HWS_SEND_QUEUE_ACTION_DRAIN_SYNC:
1075 		wait_comp = true;
1076 		fallthrough;
1077 	case MLX5HWS_SEND_QUEUE_ACTION_DRAIN_ASYNC:
1078 		if (send_sq->head_dep_idx != send_sq->tail_dep_idx)
1079 			/* Send dependent WQEs to drain the queue */
1080 			mlx5hws_send_all_dep_wqe(queue);
1081 		else
1082 			/* Signal on the last posted WQE */
1083 			mlx5hws_send_engine_flush_queue(queue);
1084 
1085 		/* Poll queue until empty */
1086 		while (wait_comp && !mlx5hws_send_engine_empty(queue))
1087 			hws_send_engine_poll_cq(queue, NULL, &polled, 0);
1088 
1089 		break;
1090 	default:
1091 		return -EINVAL;
1092 	}
1093 
1094 	return 0;
1095 }
1096 
1097 static int
hws_send_wqe_fw(struct mlx5_core_dev * mdev,u32 pd_num,struct mlx5hws_send_engine_post_attr * send_attr,struct mlx5hws_wqe_gta_ctrl_seg * send_wqe_ctrl,void * send_wqe_match_data,void * send_wqe_match_tag,void * send_wqe_range_data,void * send_wqe_range_tag,bool is_jumbo,u8 gta_opcode)1098 hws_send_wqe_fw(struct mlx5_core_dev *mdev,
1099 		u32 pd_num,
1100 		struct mlx5hws_send_engine_post_attr *send_attr,
1101 		struct mlx5hws_wqe_gta_ctrl_seg *send_wqe_ctrl,
1102 		void *send_wqe_match_data,
1103 		void *send_wqe_match_tag,
1104 		void *send_wqe_range_data,
1105 		void *send_wqe_range_tag,
1106 		bool is_jumbo,
1107 		u8 gta_opcode)
1108 {
1109 	bool has_range = send_wqe_range_data || send_wqe_range_tag;
1110 	bool has_match = send_wqe_match_data || send_wqe_match_tag;
1111 	struct mlx5hws_wqe_gta_data_seg_ste gta_wqe_data0 = {0};
1112 	struct mlx5hws_wqe_gta_data_seg_ste gta_wqe_data1 = {0};
1113 	struct mlx5hws_wqe_gta_ctrl_seg gta_wqe_ctrl = {0};
1114 	struct mlx5hws_cmd_generate_wqe_attr attr = {0};
1115 	struct mlx5hws_wqe_ctrl_seg wqe_ctrl = {0};
1116 	struct mlx5_cqe64 cqe;
1117 	u32 flags = 0;
1118 	int ret;
1119 
1120 	/* Set WQE control */
1121 	wqe_ctrl.opmod_idx_opcode = cpu_to_be32((send_attr->opmod << 24) | send_attr->opcode);
1122 	wqe_ctrl.qpn_ds = cpu_to_be32((send_attr->len + sizeof(struct mlx5hws_wqe_ctrl_seg)) / 16);
1123 	flags |= send_attr->notify_hw ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
1124 	wqe_ctrl.flags = cpu_to_be32(flags);
1125 	wqe_ctrl.imm = cpu_to_be32(send_attr->id);
1126 
1127 	/* Set GTA WQE CTRL */
1128 	memcpy(gta_wqe_ctrl.stc_ix, send_wqe_ctrl->stc_ix, sizeof(send_wqe_ctrl->stc_ix));
1129 	gta_wqe_ctrl.op_dirix = cpu_to_be32(gta_opcode << 28);
1130 
1131 	/* Set GTA match WQE DATA */
1132 	if (has_match) {
1133 		if (send_wqe_match_data)
1134 			memcpy(&gta_wqe_data0, send_wqe_match_data, sizeof(gta_wqe_data0));
1135 		else
1136 			hws_send_wqe_set_tag(&gta_wqe_data0, send_wqe_match_tag, is_jumbo);
1137 
1138 		gta_wqe_data0.rsvd1_definer = cpu_to_be32(send_attr->match_definer_id << 8);
1139 		attr.gta_data_0 = (u8 *)&gta_wqe_data0;
1140 	}
1141 
1142 	/* Set GTA range WQE DATA */
1143 	if (has_range) {
1144 		if (send_wqe_range_data)
1145 			memcpy(&gta_wqe_data1, send_wqe_range_data, sizeof(gta_wqe_data1));
1146 		else
1147 			hws_send_wqe_set_tag(&gta_wqe_data1, send_wqe_range_tag, false);
1148 
1149 		gta_wqe_data1.rsvd1_definer = cpu_to_be32(send_attr->range_definer_id << 8);
1150 		attr.gta_data_1 = (u8 *)&gta_wqe_data1;
1151 	}
1152 
1153 	attr.pdn = pd_num;
1154 	attr.wqe_ctrl = (u8 *)&wqe_ctrl;
1155 	attr.gta_ctrl = (u8 *)&gta_wqe_ctrl;
1156 
1157 send_wqe:
1158 	ret = mlx5hws_cmd_generate_wqe(mdev, &attr, &cqe);
1159 	if (ret) {
1160 		mlx5_core_err(mdev, "Failed to write WQE using command");
1161 		return ret;
1162 	}
1163 
1164 	if ((get_cqe_opcode(&cqe) == MLX5_CQE_REQ) &&
1165 	    (be32_to_cpu(cqe.byte_cnt) >> 31 == 0)) {
1166 		*send_attr->used_id = send_attr->id;
1167 		return 0;
1168 	}
1169 
1170 	/* Retry if rule failed */
1171 	if (send_attr->retry_id) {
1172 		wqe_ctrl.imm = cpu_to_be32(send_attr->retry_id);
1173 		send_attr->id = send_attr->retry_id;
1174 		send_attr->retry_id = 0;
1175 		goto send_wqe;
1176 	}
1177 
1178 	return -1;
1179 }
1180 
mlx5hws_send_stes_fw(struct mlx5hws_context * ctx,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ste_attr * ste_attr)1181 void mlx5hws_send_stes_fw(struct mlx5hws_context *ctx,
1182 			  struct mlx5hws_send_engine *queue,
1183 			  struct mlx5hws_send_ste_attr *ste_attr)
1184 {
1185 	struct mlx5hws_send_engine_post_attr *send_attr = &ste_attr->send_attr;
1186 	struct mlx5hws_rule *rule = send_attr->rule;
1187 	struct mlx5_core_dev *mdev;
1188 	u16 queue_id;
1189 	u32 pdn;
1190 	int ret;
1191 
1192 	queue_id = queue - ctx->send_queue;
1193 	mdev = ctx->mdev;
1194 	pdn = ctx->pd_num;
1195 
1196 	/* Writing through FW can't HW fence, therefore we drain the queue */
1197 	if (send_attr->fence)
1198 		mlx5hws_send_queue_action(ctx,
1199 					  queue_id,
1200 					  MLX5HWS_SEND_QUEUE_ACTION_DRAIN_SYNC);
1201 
1202 	if (ste_attr->rtc_1) {
1203 		send_attr->id = ste_attr->rtc_1;
1204 		send_attr->used_id = ste_attr->used_id_rtc_1;
1205 		send_attr->retry_id = ste_attr->retry_rtc_1;
1206 		ret = hws_send_wqe_fw(mdev, pdn, send_attr,
1207 				      ste_attr->wqe_ctrl,
1208 				      ste_attr->wqe_data,
1209 				      ste_attr->wqe_tag,
1210 				      ste_attr->range_wqe_data,
1211 				      ste_attr->range_wqe_tag,
1212 				      ste_attr->wqe_tag_is_jumbo,
1213 				      ste_attr->gta_opcode);
1214 		if (ret)
1215 			goto fail_rule;
1216 	}
1217 
1218 	if (ste_attr->rtc_0) {
1219 		send_attr->id = ste_attr->rtc_0;
1220 		send_attr->used_id = ste_attr->used_id_rtc_0;
1221 		send_attr->retry_id = ste_attr->retry_rtc_0;
1222 		ret = hws_send_wqe_fw(mdev, pdn, send_attr,
1223 				      ste_attr->wqe_ctrl,
1224 				      ste_attr->wqe_data,
1225 				      ste_attr->wqe_tag,
1226 				      ste_attr->range_wqe_data,
1227 				      ste_attr->range_wqe_tag,
1228 				      ste_attr->wqe_tag_is_jumbo,
1229 				      ste_attr->gta_opcode);
1230 		if (ret)
1231 			goto fail_rule;
1232 	}
1233 
1234 	/* Increase the status, this only works on good flow as the enum
1235 	 * is arrange it away creating -> created -> deleting -> deleted
1236 	 */
1237 	if (likely(rule))
1238 		rule->status++;
1239 
1240 	mlx5hws_send_engine_gen_comp(queue, send_attr->user_data, MLX5HWS_FLOW_OP_SUCCESS);
1241 
1242 	return;
1243 
1244 fail_rule:
1245 	if (likely(rule))
1246 		rule->status = !rule->rtc_0 && !rule->rtc_1 ?
1247 			MLX5HWS_RULE_STATUS_FAILED : MLX5HWS_RULE_STATUS_FAILING;
1248 
1249 	mlx5hws_send_engine_gen_comp(queue, send_attr->user_data, MLX5HWS_FLOW_OP_ERROR);
1250 }
1251