1 /*-
2 * Copyright (c) 2021-2022 NVIDIA corporation & affiliates.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26 /*
27 * The internal queue, IQ, code is more or less a stripped down copy
28 * of the existing SQ managing code with exception of:
29 *
30 * - an optional single segment memory buffer which can be read or
31 * written as a whole by the hardware, may be provided.
32 *
33 * - an optional completion callback for all transmit operations, may
34 * be provided.
35 *
36 * - does not support mbufs.
37 */
38
39 #include <dev/mlx5/mlx5_en/en.h>
40
41 static void
mlx5e_iq_poll(struct mlx5e_iq * iq,int budget)42 mlx5e_iq_poll(struct mlx5e_iq *iq, int budget)
43 {
44 const struct mlx5_cqe64 *cqe;
45 u16 ci;
46 u16 iqcc;
47
48 /*
49 * iq->cc must be updated only after mlx5_cqwq_update_db_record(),
50 * otherwise a cq overrun may occur
51 */
52 iqcc = iq->cc;
53
54 while (budget-- > 0) {
55
56 cqe = mlx5e_get_cqe(&iq->cq);
57 if (!cqe)
58 break;
59
60 mlx5_cqwq_pop(&iq->cq.wq);
61
62 ci = iqcc & iq->wq.sz_m1;
63
64 if (likely(iq->data[ci].dma_sync != 0)) {
65 /* make sure data written by hardware is visible to CPU */
66 bus_dmamap_sync(iq->dma_tag, iq->data[ci].dma_map, iq->data[ci].dma_sync);
67 bus_dmamap_unload(iq->dma_tag, iq->data[ci].dma_map);
68
69 iq->data[ci].dma_sync = 0;
70 }
71
72 if (likely(iq->data[ci].callback != NULL)) {
73 iq->data[ci].callback(iq->data[ci].arg);
74 iq->data[ci].callback = NULL;
75 }
76
77 if (unlikely(iq->data[ci].p_refcount != NULL)) {
78 atomic_add_int(iq->data[ci].p_refcount, -1);
79 iq->data[ci].p_refcount = NULL;
80 }
81 iqcc += iq->data[ci].num_wqebbs;
82 }
83
84 mlx5_cqwq_update_db_record(&iq->cq.wq);
85
86 /* Ensure cq space is freed before enabling more cqes */
87 atomic_thread_fence_rel();
88
89 iq->cc = iqcc;
90 }
91
92 static void
mlx5e_iq_completion(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe __unused)93 mlx5e_iq_completion(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
94 {
95 struct mlx5e_iq *iq = container_of(mcq, struct mlx5e_iq, cq.mcq);
96
97 mtx_lock(&iq->comp_lock);
98 mlx5e_iq_poll(iq, MLX5E_BUDGET_MAX);
99 mlx5e_cq_arm(&iq->cq, MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
100 mtx_unlock(&iq->comp_lock);
101 }
102
103 void
mlx5e_iq_send_nop(struct mlx5e_iq * iq,u32 ds_cnt)104 mlx5e_iq_send_nop(struct mlx5e_iq *iq, u32 ds_cnt)
105 {
106 u16 pi = iq->pc & iq->wq.sz_m1;
107 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
108
109 mtx_assert(&iq->lock, MA_OWNED);
110
111 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
112
113 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) | MLX5_OPCODE_NOP);
114 wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
115 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
116
117 /* Copy data for doorbell */
118 memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
119
120 iq->data[pi].callback = NULL;
121 iq->data[pi].arg = NULL;
122 iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
123 iq->data[pi].dma_sync = 0;
124 iq->pc += iq->data[pi].num_wqebbs;
125 }
126
127 static void
mlx5e_iq_free_db(struct mlx5e_iq * iq)128 mlx5e_iq_free_db(struct mlx5e_iq *iq)
129 {
130 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
131 int x;
132
133 for (x = 0; x != wq_sz; x++) {
134 if (likely(iq->data[x].dma_sync != 0)) {
135 bus_dmamap_unload(iq->dma_tag, iq->data[x].dma_map);
136 iq->data[x].dma_sync = 0;
137 }
138 if (likely(iq->data[x].callback != NULL)) {
139 iq->data[x].callback(iq->data[x].arg);
140 iq->data[x].callback = NULL;
141 }
142 if (unlikely(iq->data[x].p_refcount != NULL)) {
143 atomic_add_int(iq->data[x].p_refcount, -1);
144 iq->data[x].p_refcount = NULL;
145 }
146 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
147 }
148 free(iq->data, M_MLX5EN);
149 }
150
151 static int
mlx5e_iq_alloc_db(struct mlx5e_iq * iq)152 mlx5e_iq_alloc_db(struct mlx5e_iq *iq)
153 {
154 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
155 int err;
156 int x;
157
158 iq->data = malloc_domainset(wq_sz * sizeof(iq->data[0]), M_MLX5EN,
159 mlx5_dev_domainset(iq->priv->mdev), M_WAITOK | M_ZERO);
160
161 /* Create DMA descriptor maps */
162 for (x = 0; x != wq_sz; x++) {
163 err = -bus_dmamap_create(iq->dma_tag, 0, &iq->data[x].dma_map);
164 if (err != 0) {
165 while (x--)
166 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
167 free(iq->data, M_MLX5EN);
168 return (err);
169 }
170 }
171 return (0);
172 }
173
174 static int
mlx5e_iq_create(struct mlx5e_channel * c,struct mlx5e_sq_param * param,struct mlx5e_iq * iq)175 mlx5e_iq_create(struct mlx5e_channel *c,
176 struct mlx5e_sq_param *param,
177 struct mlx5e_iq *iq)
178 {
179 struct mlx5e_priv *priv = c->priv;
180 struct mlx5_core_dev *mdev = priv->mdev;
181 void *sqc = param->sqc;
182 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
183 int err;
184
185 /* Create DMA descriptor TAG */
186 if ((err = -bus_dma_tag_create(
187 bus_get_dma_tag(mdev->pdev->dev.bsddev),
188 1, /* any alignment */
189 0, /* no boundary */
190 BUS_SPACE_MAXADDR, /* lowaddr */
191 BUS_SPACE_MAXADDR, /* highaddr */
192 NULL, NULL, /* filter, filterarg */
193 PAGE_SIZE, /* maxsize */
194 1, /* nsegments */
195 PAGE_SIZE, /* maxsegsize */
196 0, /* flags */
197 NULL, NULL, /* lockfunc, lockfuncarg */
198 &iq->dma_tag)))
199 goto done;
200
201 iq->mkey_be = cpu_to_be32(priv->mr.key);
202 iq->priv = priv;
203
204 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq,
205 &iq->wq, &iq->wq_ctrl);
206 if (err)
207 goto err_free_dma_tag;
208
209 iq->wq.db = &iq->wq.db[MLX5_SND_DBR];
210
211 err = mlx5e_iq_alloc_db(iq);
212 if (err)
213 goto err_iq_wq_destroy;
214
215 return (0);
216
217 err_iq_wq_destroy:
218 mlx5_wq_destroy(&iq->wq_ctrl);
219
220 err_free_dma_tag:
221 bus_dma_tag_destroy(iq->dma_tag);
222 done:
223 return (err);
224 }
225
226 static void
mlx5e_iq_destroy(struct mlx5e_iq * iq)227 mlx5e_iq_destroy(struct mlx5e_iq *iq)
228 {
229 mlx5e_iq_free_db(iq);
230 mlx5_wq_destroy(&iq->wq_ctrl);
231 bus_dma_tag_destroy(iq->dma_tag);
232 }
233
234 static int
mlx5e_iq_enable(struct mlx5e_iq * iq,struct mlx5e_sq_param * param,const struct mlx5_sq_bfreg * bfreg,int tis_num)235 mlx5e_iq_enable(struct mlx5e_iq *iq, struct mlx5e_sq_param *param,
236 const struct mlx5_sq_bfreg *bfreg, int tis_num)
237 {
238 void *in;
239 void *sqc;
240 void *wq;
241 int inlen;
242 int err;
243 u8 ts_format;
244
245 inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
246 sizeof(u64) * iq->wq_ctrl.buf.npages;
247 in = mlx5_vzalloc(inlen);
248 if (in == NULL)
249 return (-ENOMEM);
250
251 iq->uar_map = bfreg->map;
252
253 ts_format = mlx5_get_sq_default_ts(iq->priv->mdev);
254 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
255 wq = MLX5_ADDR_OF(sqc, sqc, wq);
256
257 memcpy(sqc, param->sqc, sizeof(param->sqc));
258
259 MLX5_SET(sqc, sqc, tis_num_0, tis_num);
260 MLX5_SET(sqc, sqc, cqn, iq->cq.mcq.cqn);
261 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
262 MLX5_SET(sqc, sqc, ts_format, ts_format);
263 MLX5_SET(sqc, sqc, tis_lst_sz, 1);
264 MLX5_SET(sqc, sqc, flush_in_error_en, 1);
265 MLX5_SET(sqc, sqc, allow_swp, 1);
266
267 /* SQ remap support requires reg_umr privileges level */
268 if (MLX5_CAP_QOS(iq->priv->mdev, qos_remap_pp)) {
269 MLX5_SET(sqc, sqc, qos_remap_en, 1);
270 if (MLX5_CAP_ETH(iq->priv->mdev, reg_umr_sq))
271 MLX5_SET(sqc, sqc, reg_umr, 1);
272 else
273 mlx5_en_err(iq->priv->ifp,
274 "No reg umr SQ capability, SQ remap disabled\n");
275 }
276
277 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
278 MLX5_SET(wq, wq, uar_page, bfreg->index);
279 MLX5_SET(wq, wq, log_wq_pg_sz, iq->wq_ctrl.buf.page_shift -
280 MLX5_ADAPTER_PAGE_SHIFT);
281 MLX5_SET64(wq, wq, dbr_addr, iq->wq_ctrl.db.dma);
282
283 mlx5_fill_page_array(&iq->wq_ctrl.buf,
284 (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
285
286 err = mlx5_core_create_sq(iq->priv->mdev, in, inlen, &iq->sqn);
287
288 kvfree(in);
289
290 return (err);
291 }
292
293 static int
mlx5e_iq_modify(struct mlx5e_iq * iq,int curr_state,int next_state)294 mlx5e_iq_modify(struct mlx5e_iq *iq, int curr_state, int next_state)
295 {
296 void *in;
297 void *sqc;
298 int inlen;
299 int err;
300
301 inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
302 in = mlx5_vzalloc(inlen);
303 if (in == NULL)
304 return (-ENOMEM);
305
306 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
307
308 MLX5_SET(modify_sq_in, in, sqn, iq->sqn);
309 MLX5_SET(modify_sq_in, in, sq_state, curr_state);
310 MLX5_SET(sqc, sqc, state, next_state);
311
312 err = mlx5_core_modify_sq(iq->priv->mdev, in, inlen);
313
314 kvfree(in);
315
316 return (err);
317 }
318
319 static void
mlx5e_iq_disable(struct mlx5e_iq * iq)320 mlx5e_iq_disable(struct mlx5e_iq *iq)
321 {
322 mlx5_core_destroy_sq(iq->priv->mdev, iq->sqn);
323 }
324
325 int
mlx5e_iq_open(struct mlx5e_channel * c,struct mlx5e_sq_param * sq_param,struct mlx5e_cq_param * cq_param,struct mlx5e_iq * iq)326 mlx5e_iq_open(struct mlx5e_channel *c,
327 struct mlx5e_sq_param *sq_param,
328 struct mlx5e_cq_param *cq_param,
329 struct mlx5e_iq *iq)
330 {
331 int err;
332
333 err = mlx5e_open_cq(c->priv, cq_param, &iq->cq,
334 &mlx5e_iq_completion, c->ix);
335 if (err)
336 return (err);
337
338 err = mlx5e_iq_create(c, sq_param, iq);
339 if (err)
340 goto err_close_cq;
341
342 err = mlx5e_iq_enable(iq, sq_param, &c->bfreg, c->priv->tisn[0]);
343 if (err)
344 goto err_destroy_sq;
345
346 err = mlx5e_iq_modify(iq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
347 if (err)
348 goto err_disable_sq;
349
350 WRITE_ONCE(iq->running, 1);
351
352 return (0);
353
354 err_disable_sq:
355 mlx5e_iq_disable(iq);
356 err_destroy_sq:
357 mlx5e_iq_destroy(iq);
358 err_close_cq:
359 mlx5e_close_cq(&iq->cq);
360
361 return (err);
362 }
363
364 static void
mlx5e_iq_drain(struct mlx5e_iq * iq)365 mlx5e_iq_drain(struct mlx5e_iq *iq)
366 {
367 struct mlx5_core_dev *mdev = iq->priv->mdev;
368
369 /*
370 * Check if already stopped.
371 *
372 * NOTE: Serialization of this function is managed by the
373 * caller ensuring the priv's state lock is locked or in case
374 * of rate limit support, a single thread manages drain and
375 * resume of SQs. The "running" variable can therefore safely
376 * be read without any locks.
377 */
378 if (READ_ONCE(iq->running) == 0)
379 return;
380
381 /* don't put more packets into the SQ */
382 WRITE_ONCE(iq->running, 0);
383
384 /* wait till SQ is empty or link is down */
385 mtx_lock(&iq->lock);
386 while (iq->cc != iq->pc &&
387 (iq->priv->media_status_last & IFM_ACTIVE) != 0 &&
388 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
389 pci_channel_offline(mdev->pdev) == 0) {
390 mtx_unlock(&iq->lock);
391 msleep(1);
392 iq->cq.mcq.comp(&iq->cq.mcq, NULL);
393 mtx_lock(&iq->lock);
394 }
395 mtx_unlock(&iq->lock);
396
397 /* error out remaining requests */
398 (void) mlx5e_iq_modify(iq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
399
400 /* wait till SQ is empty */
401 mtx_lock(&iq->lock);
402 while (iq->cc != iq->pc &&
403 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
404 pci_channel_offline(mdev->pdev) == 0) {
405 mtx_unlock(&iq->lock);
406 msleep(1);
407 iq->cq.mcq.comp(&iq->cq.mcq, NULL);
408 mtx_lock(&iq->lock);
409 }
410 mtx_unlock(&iq->lock);
411 }
412
413 void
mlx5e_iq_close(struct mlx5e_iq * iq)414 mlx5e_iq_close(struct mlx5e_iq *iq)
415 {
416 mlx5e_iq_drain(iq);
417 mlx5e_iq_disable(iq);
418 mlx5e_iq_destroy(iq);
419 mlx5e_close_cq(&iq->cq);
420 }
421
422 void
mlx5e_iq_static_init(struct mlx5e_iq * iq)423 mlx5e_iq_static_init(struct mlx5e_iq *iq)
424 {
425 mtx_init(&iq->lock, "mlx5iq",
426 MTX_NETWORK_LOCK " IQ", MTX_DEF);
427 mtx_init(&iq->comp_lock, "mlx5iq_comp",
428 MTX_NETWORK_LOCK " IQ COMP", MTX_DEF);
429 }
430
431 void
mlx5e_iq_static_destroy(struct mlx5e_iq * iq)432 mlx5e_iq_static_destroy(struct mlx5e_iq *iq)
433 {
434 mtx_destroy(&iq->lock);
435 mtx_destroy(&iq->comp_lock);
436 }
437
438 void
mlx5e_iq_notify_hw(struct mlx5e_iq * iq)439 mlx5e_iq_notify_hw(struct mlx5e_iq *iq)
440 {
441 mtx_assert(&iq->lock, MA_OWNED);
442
443 /* Check if we need to write the doorbell */
444 if (unlikely(iq->db_inhibit != 0 || iq->doorbell.d64 == 0))
445 return;
446
447 /* Ensure wqe is visible to device before updating doorbell record */
448 wmb();
449
450 *iq->wq.db = cpu_to_be32(iq->pc);
451
452 /*
453 * Ensure the doorbell record is visible to device before ringing
454 * the doorbell:
455 */
456 wmb();
457
458 mlx5_write64(iq->doorbell.d32, iq->uar_map,
459 MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
460
461 iq->doorbell.d64 = 0;
462 }
463
464 static inline bool
mlx5e_iq_has_room_for(struct mlx5e_iq * iq,u16 n)465 mlx5e_iq_has_room_for(struct mlx5e_iq *iq, u16 n)
466 {
467 u16 cc = iq->cc;
468 u16 pc = iq->pc;
469
470 return ((iq->wq.sz_m1 & (cc - pc)) >= n || cc == pc);
471 }
472
473 int
mlx5e_iq_get_producer_index(struct mlx5e_iq * iq)474 mlx5e_iq_get_producer_index(struct mlx5e_iq *iq)
475 {
476 u16 pi;
477
478 mtx_assert(&iq->lock, MA_OWNED);
479
480 if (unlikely(iq->running == 0))
481 return (-1);
482 if (unlikely(!mlx5e_iq_has_room_for(iq, 2 * MLX5_SEND_WQE_MAX_WQEBBS)))
483 return (-1);
484
485 /* Align IQ edge with NOPs to avoid WQE wrap around */
486 pi = ((~iq->pc) & iq->wq.sz_m1);
487 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) {
488 /* Send one multi NOP message instead of many */
489 mlx5e_iq_send_nop(iq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
490 pi = ((~iq->pc) & iq->wq.sz_m1);
491 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)))
492 return (-1);
493 }
494 return (iq->pc & iq->wq.sz_m1);
495 }
496
497 static void
mlx5e_iq_load_memory_cb(void * arg,bus_dma_segment_t * segs,int nseg,int error)498 mlx5e_iq_load_memory_cb(void *arg, bus_dma_segment_t *segs,
499 int nseg, int error)
500 {
501 u64 *pdma_address = arg;
502
503 if (unlikely(error || nseg != 1))
504 panic("mlx5e_iq_load_memory_cb: error=%d nseg=%d", error, nseg);
505
506 *pdma_address = segs[0].ds_addr;
507 }
508
509 CTASSERT(BUS_DMASYNC_POSTREAD != 0);
510 CTASSERT(BUS_DMASYNC_POSTWRITE != 0);
511
512 void
mlx5e_iq_load_memory_single(struct mlx5e_iq * iq,u16 pi,void * buffer,size_t size,u64 * pdma_address,u32 dma_sync)513 mlx5e_iq_load_memory_single(struct mlx5e_iq *iq, u16 pi, void *buffer, size_t size,
514 u64 *pdma_address, u32 dma_sync)
515 {
516 int error;
517
518 error = bus_dmamap_load(iq->dma_tag, iq->data[pi].dma_map, buffer, size,
519 &mlx5e_iq_load_memory_cb, pdma_address, BUS_DMA_NOWAIT);
520 if (unlikely(error))
521 panic("mlx5e_iq_load_memory: error=%d buffer=%p size=%zd", error, buffer, size);
522
523 switch (dma_sync) {
524 case BUS_DMASYNC_PREREAD:
525 iq->data[pi].dma_sync = BUS_DMASYNC_POSTREAD;
526 break;
527 case BUS_DMASYNC_PREWRITE:
528 iq->data[pi].dma_sync = BUS_DMASYNC_POSTWRITE;
529 break;
530 default:
531 panic("mlx5e_iq_load_memory_single: Invalid DMA sync operation(%d)", dma_sync);
532 }
533
534 /* make sure data in buffer is visible to hardware */
535 bus_dmamap_sync(iq->dma_tag, iq->data[pi].dma_map, dma_sync);
536 }
537