xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_iq.c (revision 7be9a3b45356747f9fcb6d69a722c1c95f8060bf)
1 /*-
2  * Copyright (c) 2021 NVIDIA corporation & affiliates. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 /*
29  * The internal queue, IQ, code is more or less a stripped down copy
30  * of the existing SQ managing code with exception of:
31  *
32  * - an optional single segment memory buffer which can be read or
33  *   written as a whole by the hardware, may be provided.
34  *
35  * - an optional completion callback for all transmit operations, may
36  *   be provided.
37  *
38  * - does not support mbufs.
39  */
40 
41 #include <dev/mlx5/mlx5_en/en.h>
42 
43 static void
44 mlx5e_iq_poll(struct mlx5e_iq *iq, int budget)
45 {
46 	const struct mlx5_cqe64 *cqe;
47 	u16 ci;
48 	u16 iqcc;
49 
50 	/*
51 	 * iq->cc must be updated only after mlx5_cqwq_update_db_record(),
52 	 * otherwise a cq overrun may occur
53 	 */
54 	iqcc = iq->cc;
55 
56 	while (budget-- > 0) {
57 
58 		cqe = mlx5e_get_cqe(&iq->cq);
59 		if (!cqe)
60 			break;
61 
62 		mlx5_cqwq_pop(&iq->cq.wq);
63 
64 		ci = iqcc & iq->wq.sz_m1;
65 
66 		if (likely(iq->data[ci].dma_sync != 0)) {
67 			/* make sure data written by hardware is visible to CPU */
68 			bus_dmamap_sync(iq->dma_tag, iq->data[ci].dma_map, iq->data[ci].dma_sync);
69 			bus_dmamap_unload(iq->dma_tag, iq->data[ci].dma_map);
70 
71 			iq->data[ci].dma_sync = 0;
72 		}
73 
74 		if (likely(iq->data[ci].callback != NULL)) {
75 			iq->data[ci].callback(iq->data[ci].arg);
76 			iq->data[ci].callback = NULL;
77 		}
78 
79 		if (unlikely(iq->data[ci].p_refcount != NULL)) {
80 			atomic_add_int(iq->data[ci].p_refcount, -1);
81 			iq->data[ci].p_refcount = NULL;
82 		}
83 		iqcc += iq->data[ci].num_wqebbs;
84 	}
85 
86 	mlx5_cqwq_update_db_record(&iq->cq.wq);
87 
88 	/* Ensure cq space is freed before enabling more cqes */
89 	atomic_thread_fence_rel();
90 
91 	iq->cc = iqcc;
92 }
93 
94 static void
95 mlx5e_iq_completion(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
96 {
97 	struct mlx5e_iq *iq = container_of(mcq, struct mlx5e_iq, cq.mcq);
98 
99 	mtx_lock(&iq->comp_lock);
100 	mlx5e_iq_poll(iq, MLX5E_BUDGET_MAX);
101 	mlx5e_cq_arm(&iq->cq, MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
102 	mtx_unlock(&iq->comp_lock);
103 }
104 
105 void
106 mlx5e_iq_send_nop(struct mlx5e_iq *iq, u32 ds_cnt)
107 {
108 	u16 pi = iq->pc & iq->wq.sz_m1;
109 	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
110 
111 	mtx_assert(&iq->lock, MA_OWNED);
112 
113 	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
114 
115 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) | MLX5_OPCODE_NOP);
116 	wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
117 	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
118 
119 	/* Copy data for doorbell */
120 	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
121 
122 	iq->data[pi].callback = NULL;
123 	iq->data[pi].arg = NULL;
124 	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
125 	iq->data[pi].dma_sync = 0;
126 	iq->pc += iq->data[pi].num_wqebbs;
127 }
128 
129 static void
130 mlx5e_iq_free_db(struct mlx5e_iq *iq)
131 {
132 	int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
133 	int x;
134 
135 	for (x = 0; x != wq_sz; x++) {
136 		if (likely(iq->data[x].dma_sync != 0)) {
137 			bus_dmamap_unload(iq->dma_tag, iq->data[x].dma_map);
138 			iq->data[x].dma_sync = 0;
139 		}
140 		if (likely(iq->data[x].callback != NULL)) {
141 			iq->data[x].callback(iq->data[x].arg);
142 			iq->data[x].callback = NULL;
143 		}
144 		bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
145 	}
146 	free(iq->data, M_MLX5EN);
147 }
148 
149 static int
150 mlx5e_iq_alloc_db(struct mlx5e_iq *iq)
151 {
152 	int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
153 	int err;
154 	int x;
155 
156 	iq->data = malloc_domainset(wq_sz * sizeof(iq->data[0]), M_MLX5EN,
157 	    mlx5_dev_domainset(iq->priv->mdev), M_WAITOK | M_ZERO);
158 
159 	/* Create DMA descriptor maps */
160 	for (x = 0; x != wq_sz; x++) {
161 		err = -bus_dmamap_create(iq->dma_tag, 0, &iq->data[x].dma_map);
162 		if (err != 0) {
163 			while (x--)
164 				bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
165 			free(iq->data, M_MLX5EN);
166 			return (err);
167 		}
168 	}
169 	return (0);
170 }
171 
172 static int
173 mlx5e_iq_create(struct mlx5e_channel *c,
174     struct mlx5e_sq_param *param,
175     struct mlx5e_iq *iq)
176 {
177 	struct mlx5e_priv *priv = c->priv;
178 	struct mlx5_core_dev *mdev = priv->mdev;
179 	void *sqc = param->sqc;
180 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
181 	int err;
182 
183 	/* Create DMA descriptor TAG */
184 	if ((err = -bus_dma_tag_create(
185 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
186 	    1,				/* any alignment */
187 	    0,				/* no boundary */
188 	    BUS_SPACE_MAXADDR,		/* lowaddr */
189 	    BUS_SPACE_MAXADDR,		/* highaddr */
190 	    NULL, NULL,			/* filter, filterarg */
191 	    PAGE_SIZE,			/* maxsize */
192 	    1,				/* nsegments */
193 	    PAGE_SIZE,			/* maxsegsize */
194 	    0,				/* flags */
195 	    NULL, NULL,			/* lockfunc, lockfuncarg */
196 	    &iq->dma_tag)))
197 		goto done;
198 
199 	iq->mkey_be = cpu_to_be32(priv->mr.key);
200 	iq->priv = priv;
201 
202 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq,
203 	    &iq->wq, &iq->wq_ctrl);
204 	if (err)
205 		goto err_free_dma_tag;
206 
207 	iq->wq.db = &iq->wq.db[MLX5_SND_DBR];
208 
209 	err = mlx5e_iq_alloc_db(iq);
210 	if (err)
211 		goto err_iq_wq_destroy;
212 
213 	return (0);
214 
215 err_iq_wq_destroy:
216 	mlx5_wq_destroy(&iq->wq_ctrl);
217 
218 err_free_dma_tag:
219 	bus_dma_tag_destroy(iq->dma_tag);
220 done:
221 	return (err);
222 }
223 
224 static void
225 mlx5e_iq_destroy(struct mlx5e_iq *iq)
226 {
227 	mlx5e_iq_free_db(iq);
228 	mlx5_wq_destroy(&iq->wq_ctrl);
229 	bus_dma_tag_destroy(iq->dma_tag);
230 }
231 
232 static int
233 mlx5e_iq_enable(struct mlx5e_iq *iq, struct mlx5e_sq_param *param,
234     const struct mlx5_sq_bfreg *bfreg, int tis_num)
235 {
236 	void *in;
237 	void *sqc;
238 	void *wq;
239 	int inlen;
240 	int err;
241 	u8 ts_format;
242 
243 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
244 	    sizeof(u64) * iq->wq_ctrl.buf.npages;
245 	in = mlx5_vzalloc(inlen);
246 	if (in == NULL)
247 		return (-ENOMEM);
248 
249 	iq->uar_map = bfreg->map;
250 
251 	ts_format = mlx5_get_sq_default_ts(iq->priv->mdev);
252 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
253 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
254 
255 	memcpy(sqc, param->sqc, sizeof(param->sqc));
256 
257 	MLX5_SET(sqc, sqc, tis_num_0, tis_num);
258 	MLX5_SET(sqc, sqc, cqn, iq->cq.mcq.cqn);
259 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
260 	MLX5_SET(sqc, sqc, ts_format, ts_format);
261 	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
262 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
263 	MLX5_SET(sqc, sqc, allow_swp, 1);
264 
265 	/* SQ remap support requires reg_umr privileges level */
266 	if (MLX5_CAP_QOS(iq->priv->mdev, qos_remap_pp)) {
267 		MLX5_SET(sqc, sqc, qos_remap_en, 1);
268 		if (MLX5_CAP_ETH(iq->priv->mdev, reg_umr_sq))
269 			MLX5_SET(sqc, sqc, reg_umr, 1);
270 		 else
271 			mlx5_en_err(iq->priv->ifp,
272 			    "No reg umr SQ capability, SQ remap disabled\n");
273 	}
274 
275 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
276 	MLX5_SET(wq, wq, uar_page, bfreg->index);
277 	MLX5_SET(wq, wq, log_wq_pg_sz, iq->wq_ctrl.buf.page_shift -
278 	    PAGE_SHIFT);
279 	MLX5_SET64(wq, wq, dbr_addr, iq->wq_ctrl.db.dma);
280 
281 	mlx5_fill_page_array(&iq->wq_ctrl.buf,
282 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
283 
284 	err = mlx5_core_create_sq(iq->priv->mdev, in, inlen, &iq->sqn);
285 
286 	kvfree(in);
287 
288 	return (err);
289 }
290 
291 static int
292 mlx5e_iq_modify(struct mlx5e_iq *iq, int curr_state, int next_state)
293 {
294 	void *in;
295 	void *sqc;
296 	int inlen;
297 	int err;
298 
299 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
300 	in = mlx5_vzalloc(inlen);
301 	if (in == NULL)
302 		return (-ENOMEM);
303 
304 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
305 
306 	MLX5_SET(modify_sq_in, in, sqn, iq->sqn);
307 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
308 	MLX5_SET(sqc, sqc, state, next_state);
309 
310 	err = mlx5_core_modify_sq(iq->priv->mdev, in, inlen);
311 
312 	kvfree(in);
313 
314 	return (err);
315 }
316 
317 static void
318 mlx5e_iq_disable(struct mlx5e_iq *iq)
319 {
320 	mlx5_core_destroy_sq(iq->priv->mdev, iq->sqn);
321 }
322 
323 int
324 mlx5e_iq_open(struct mlx5e_channel *c,
325     struct mlx5e_sq_param *sq_param,
326     struct mlx5e_cq_param *cq_param,
327     struct mlx5e_iq *iq)
328 {
329 	int err;
330 
331 	err = mlx5e_open_cq(c->priv, cq_param, &iq->cq,
332 	    &mlx5e_iq_completion, c->ix);
333 	if (err)
334 		return (err);
335 
336 	err = mlx5e_iq_create(c, sq_param, iq);
337 	if (err)
338 		goto err_close_cq;
339 
340 	err = mlx5e_iq_enable(iq, sq_param, &c->bfreg, c->priv->tisn[0]);
341 	if (err)
342 		goto err_destroy_sq;
343 
344 	err = mlx5e_iq_modify(iq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
345 	if (err)
346 		goto err_disable_sq;
347 
348 	WRITE_ONCE(iq->running, 1);
349 
350 	return (0);
351 
352 err_disable_sq:
353 	mlx5e_iq_disable(iq);
354 err_destroy_sq:
355 	mlx5e_iq_destroy(iq);
356 err_close_cq:
357 	mlx5e_close_cq(&iq->cq);
358 
359 	return (err);
360 }
361 
362 static void
363 mlx5e_iq_drain(struct mlx5e_iq *iq)
364 {
365 	struct mlx5_core_dev *mdev = iq->priv->mdev;
366 
367 	/*
368 	 * Check if already stopped.
369 	 *
370 	 * NOTE: Serialization of this function is managed by the
371 	 * caller ensuring the priv's state lock is locked or in case
372 	 * of rate limit support, a single thread manages drain and
373 	 * resume of SQs. The "running" variable can therefore safely
374 	 * be read without any locks.
375 	 */
376 	if (READ_ONCE(iq->running) == 0)
377 		return;
378 
379 	/* don't put more packets into the SQ */
380 	WRITE_ONCE(iq->running, 0);
381 
382 	/* wait till SQ is empty or link is down */
383 	mtx_lock(&iq->lock);
384 	while (iq->cc != iq->pc &&
385 	    (iq->priv->media_status_last & IFM_ACTIVE) != 0 &&
386 	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
387 	    pci_channel_offline(mdev->pdev) == 0) {
388 		mtx_unlock(&iq->lock);
389 		msleep(1);
390 		iq->cq.mcq.comp(&iq->cq.mcq, NULL);
391 		mtx_lock(&iq->lock);
392 	}
393 	mtx_unlock(&iq->lock);
394 
395 	/* error out remaining requests */
396 	(void) mlx5e_iq_modify(iq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
397 
398 	/* wait till SQ is empty */
399 	mtx_lock(&iq->lock);
400 	while (iq->cc != iq->pc &&
401 	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
402 	    pci_channel_offline(mdev->pdev) == 0) {
403 		mtx_unlock(&iq->lock);
404 		msleep(1);
405 		iq->cq.mcq.comp(&iq->cq.mcq, NULL);
406 		mtx_lock(&iq->lock);
407 	}
408 	mtx_unlock(&iq->lock);
409 }
410 
411 void
412 mlx5e_iq_close(struct mlx5e_iq *iq)
413 {
414 	mlx5e_iq_drain(iq);
415 	mlx5e_iq_disable(iq);
416 	mlx5e_iq_destroy(iq);
417 	mlx5e_close_cq(&iq->cq);
418 }
419 
420 void
421 mlx5e_iq_static_init(struct mlx5e_iq *iq)
422 {
423 	mtx_init(&iq->lock, "mlx5iq",
424 	    MTX_NETWORK_LOCK " IQ", MTX_DEF);
425 	mtx_init(&iq->comp_lock, "mlx5iq_comp",
426 	    MTX_NETWORK_LOCK " IQ COMP", MTX_DEF);
427 }
428 
429 void
430 mlx5e_iq_static_destroy(struct mlx5e_iq *iq)
431 {
432 	mtx_destroy(&iq->lock);
433 	mtx_destroy(&iq->comp_lock);
434 }
435 
436 void
437 mlx5e_iq_notify_hw(struct mlx5e_iq *iq)
438 {
439 	mtx_assert(&iq->lock, MA_OWNED);
440 
441 	/* Check if we need to write the doorbell */
442 	if (unlikely(iq->db_inhibit != 0 || iq->doorbell.d64 == 0))
443 		return;
444 
445 	/* Ensure wqe is visible to device before updating doorbell record */
446 	wmb();
447 
448 	*iq->wq.db = cpu_to_be32(iq->pc);
449 
450 	/*
451 	 * Ensure the doorbell record is visible to device before ringing
452 	 * the doorbell:
453 	 */
454 	wmb();
455 
456 	mlx5_write64(iq->doorbell.d32, iq->uar_map,
457 	    MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
458 
459 	iq->doorbell.d64 = 0;
460 }
461 
462 static inline bool
463 mlx5e_iq_has_room_for(struct mlx5e_iq *iq, u16 n)
464 {
465         u16 cc = iq->cc;
466         u16 pc = iq->pc;
467 
468         return ((iq->wq.sz_m1 & (cc - pc)) >= n || cc == pc);
469 }
470 
471 int
472 mlx5e_iq_get_producer_index(struct mlx5e_iq *iq)
473 {
474 	u16 pi;
475 
476 	mtx_assert(&iq->lock, MA_OWNED);
477 
478 	if (unlikely(iq->running == 0))
479 		return (-1);
480 	if (unlikely(!mlx5e_iq_has_room_for(iq, 2 * MLX5_SEND_WQE_MAX_WQEBBS)))
481 		return (-1);
482 
483 	/* Align IQ edge with NOPs to avoid WQE wrap around */
484 	pi = ((~iq->pc) & iq->wq.sz_m1);
485 	if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) {
486 		/* Send one multi NOP message instead of many */
487 		mlx5e_iq_send_nop(iq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
488 		pi = ((~iq->pc) & iq->wq.sz_m1);
489 		if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)))
490 			return (-1);
491 	}
492 	return (iq->pc & iq->wq.sz_m1);
493 }
494 
495 static void
496 mlx5e_iq_load_memory_cb(void *arg, bus_dma_segment_t *segs,
497     int nseg, int error)
498 {
499 	u64 *pdma_address = arg;
500 
501 	if (unlikely(error || nseg != 1))
502 		panic("mlx5e_iq_load_memory_cb: error=%d nseg=%d", error, nseg);
503 
504 	*pdma_address = segs[0].ds_addr;
505 }
506 
507 CTASSERT(BUS_DMASYNC_POSTREAD != 0);
508 CTASSERT(BUS_DMASYNC_POSTWRITE != 0);
509 
510 void
511 mlx5e_iq_load_memory_single(struct mlx5e_iq *iq, u16 pi, void *buffer, size_t size,
512     u64 *pdma_address, u32 dma_sync)
513 {
514 	int error;
515 
516 	error = bus_dmamap_load(iq->dma_tag, iq->data[pi].dma_map, buffer, size,
517 	    &mlx5e_iq_load_memory_cb, pdma_address, BUS_DMA_NOWAIT);
518 	if (unlikely(error))
519 		panic("mlx5e_iq_load_memory: error=%d buffer=%p size=%zd", error, buffer, size);
520 
521 	switch (dma_sync) {
522 	case BUS_DMASYNC_PREREAD:
523 		iq->data[pi].dma_sync = BUS_DMASYNC_POSTREAD;
524 		break;
525 	case BUS_DMASYNC_PREWRITE:
526 		iq->data[pi].dma_sync = BUS_DMASYNC_POSTWRITE;
527 		break;
528 	default:
529 		panic("mlx5e_iq_load_memory_single: Invalid DMA sync operation(%d)", dma_sync);
530 	}
531 
532 	/* make sure data in buffer is visible to hardware */
533 	bus_dmamap_sync(iq->dma_tag, iq->data[pi].dma_map, dma_sync);
534 }
535