1 /*- 2 * Copyright (c) 2021 NVIDIA corporation & affiliates. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 /* 29 * The internal queue, IQ, code is more or less a stripped down copy 30 * of the existing SQ managing code with exception of: 31 * 32 * - an optional single segment memory buffer which can be read or 33 * written as a whole by the hardware, may be provided. 34 * 35 * - an optional completion callback for all transmit operations, may 36 * be provided. 37 * 38 * - does not support mbufs. 39 */ 40 41 #include <dev/mlx5/mlx5_en/en.h> 42 43 static void 44 mlx5e_iq_poll(struct mlx5e_iq *iq, int budget) 45 { 46 const struct mlx5_cqe64 *cqe; 47 u16 ci; 48 u16 iqcc; 49 50 /* 51 * iq->cc must be updated only after mlx5_cqwq_update_db_record(), 52 * otherwise a cq overrun may occur 53 */ 54 iqcc = iq->cc; 55 56 while (budget-- > 0) { 57 58 cqe = mlx5e_get_cqe(&iq->cq); 59 if (!cqe) 60 break; 61 62 mlx5_cqwq_pop(&iq->cq.wq); 63 64 ci = iqcc & iq->wq.sz_m1; 65 66 if (likely(iq->data[ci].dma_sync != 0)) { 67 /* make sure data written by hardware is visible to CPU */ 68 bus_dmamap_sync(iq->dma_tag, iq->data[ci].dma_map, iq->data[ci].dma_sync); 69 bus_dmamap_unload(iq->dma_tag, iq->data[ci].dma_map); 70 71 iq->data[ci].dma_sync = 0; 72 } 73 74 if (likely(iq->data[ci].callback != NULL)) { 75 iq->data[ci].callback(iq->data[ci].arg); 76 iq->data[ci].callback = NULL; 77 } 78 79 if (unlikely(iq->data[ci].p_refcount != NULL)) { 80 atomic_add_int(iq->data[ci].p_refcount, -1); 81 iq->data[ci].p_refcount = NULL; 82 } 83 iqcc += iq->data[ci].num_wqebbs; 84 } 85 86 mlx5_cqwq_update_db_record(&iq->cq.wq); 87 88 /* Ensure cq space is freed before enabling more cqes */ 89 atomic_thread_fence_rel(); 90 91 iq->cc = iqcc; 92 } 93 94 static void 95 mlx5e_iq_completion(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused) 96 { 97 struct mlx5e_iq *iq = container_of(mcq, struct mlx5e_iq, cq.mcq); 98 99 mtx_lock(&iq->comp_lock); 100 mlx5e_iq_poll(iq, MLX5E_BUDGET_MAX); 101 mlx5e_cq_arm(&iq->cq, MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock)); 102 mtx_unlock(&iq->comp_lock); 103 } 104 105 void 106 mlx5e_iq_send_nop(struct mlx5e_iq *iq, u32 ds_cnt) 107 { 108 u16 pi = iq->pc & iq->wq.sz_m1; 109 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi); 110 111 mtx_assert(&iq->lock, MA_OWNED); 112 113 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 114 115 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) | MLX5_OPCODE_NOP); 116 wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt); 117 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 118 119 /* Copy data for doorbell */ 120 memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32)); 121 122 iq->data[pi].callback = NULL; 123 iq->data[pi].arg = NULL; 124 iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 125 iq->data[pi].dma_sync = 0; 126 iq->pc += iq->data[pi].num_wqebbs; 127 } 128 129 static void 130 mlx5e_iq_free_db(struct mlx5e_iq *iq) 131 { 132 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq); 133 int x; 134 135 for (x = 0; x != wq_sz; x++) { 136 if (likely(iq->data[x].dma_sync != 0)) { 137 bus_dmamap_unload(iq->dma_tag, iq->data[x].dma_map); 138 iq->data[x].dma_sync = 0; 139 } 140 if (likely(iq->data[x].callback != NULL)) { 141 iq->data[x].callback(iq->data[x].arg); 142 iq->data[x].callback = NULL; 143 } 144 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map); 145 } 146 free(iq->data, M_MLX5EN); 147 } 148 149 static int 150 mlx5e_iq_alloc_db(struct mlx5e_iq *iq) 151 { 152 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq); 153 int err; 154 int x; 155 156 iq->data = malloc_domainset(wq_sz * sizeof(iq->data[0]), M_MLX5EN, 157 mlx5_dev_domainset(iq->priv->mdev), M_WAITOK | M_ZERO); 158 159 /* Create DMA descriptor maps */ 160 for (x = 0; x != wq_sz; x++) { 161 err = -bus_dmamap_create(iq->dma_tag, 0, &iq->data[x].dma_map); 162 if (err != 0) { 163 while (x--) 164 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map); 165 free(iq->data, M_MLX5EN); 166 return (err); 167 } 168 } 169 return (0); 170 } 171 172 static int 173 mlx5e_iq_create(struct mlx5e_channel *c, 174 struct mlx5e_sq_param *param, 175 struct mlx5e_iq *iq) 176 { 177 struct mlx5e_priv *priv = c->priv; 178 struct mlx5_core_dev *mdev = priv->mdev; 179 void *sqc = param->sqc; 180 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); 181 int err; 182 183 /* Create DMA descriptor TAG */ 184 if ((err = -bus_dma_tag_create( 185 bus_get_dma_tag(mdev->pdev->dev.bsddev), 186 1, /* any alignment */ 187 0, /* no boundary */ 188 BUS_SPACE_MAXADDR, /* lowaddr */ 189 BUS_SPACE_MAXADDR, /* highaddr */ 190 NULL, NULL, /* filter, filterarg */ 191 PAGE_SIZE, /* maxsize */ 192 1, /* nsegments */ 193 PAGE_SIZE, /* maxsegsize */ 194 0, /* flags */ 195 NULL, NULL, /* lockfunc, lockfuncarg */ 196 &iq->dma_tag))) 197 goto done; 198 199 iq->mkey_be = cpu_to_be32(priv->mr.key); 200 iq->priv = priv; 201 202 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, 203 &iq->wq, &iq->wq_ctrl); 204 if (err) 205 goto err_free_dma_tag; 206 207 iq->wq.db = &iq->wq.db[MLX5_SND_DBR]; 208 209 err = mlx5e_iq_alloc_db(iq); 210 if (err) 211 goto err_iq_wq_destroy; 212 213 return (0); 214 215 err_iq_wq_destroy: 216 mlx5_wq_destroy(&iq->wq_ctrl); 217 218 err_free_dma_tag: 219 bus_dma_tag_destroy(iq->dma_tag); 220 done: 221 return (err); 222 } 223 224 static void 225 mlx5e_iq_destroy(struct mlx5e_iq *iq) 226 { 227 mlx5e_iq_free_db(iq); 228 mlx5_wq_destroy(&iq->wq_ctrl); 229 bus_dma_tag_destroy(iq->dma_tag); 230 } 231 232 static int 233 mlx5e_iq_enable(struct mlx5e_iq *iq, struct mlx5e_sq_param *param, 234 const struct mlx5_sq_bfreg *bfreg, int tis_num) 235 { 236 void *in; 237 void *sqc; 238 void *wq; 239 int inlen; 240 int err; 241 u8 ts_format; 242 243 inlen = MLX5_ST_SZ_BYTES(create_sq_in) + 244 sizeof(u64) * iq->wq_ctrl.buf.npages; 245 in = mlx5_vzalloc(inlen); 246 if (in == NULL) 247 return (-ENOMEM); 248 249 iq->uar_map = bfreg->map; 250 251 ts_format = mlx5_get_sq_default_ts(iq->priv->mdev); 252 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); 253 wq = MLX5_ADDR_OF(sqc, sqc, wq); 254 255 memcpy(sqc, param->sqc, sizeof(param->sqc)); 256 257 MLX5_SET(sqc, sqc, tis_num_0, tis_num); 258 MLX5_SET(sqc, sqc, cqn, iq->cq.mcq.cqn); 259 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); 260 MLX5_SET(sqc, sqc, ts_format, ts_format); 261 MLX5_SET(sqc, sqc, tis_lst_sz, 1); 262 MLX5_SET(sqc, sqc, flush_in_error_en, 1); 263 MLX5_SET(sqc, sqc, allow_swp, 1); 264 265 /* SQ remap support requires reg_umr privileges level */ 266 if (MLX5_CAP_QOS(iq->priv->mdev, qos_remap_pp)) { 267 MLX5_SET(sqc, sqc, qos_remap_en, 1); 268 if (MLX5_CAP_ETH(iq->priv->mdev, reg_umr_sq)) 269 MLX5_SET(sqc, sqc, reg_umr, 1); 270 else 271 mlx5_en_err(iq->priv->ifp, 272 "No reg umr SQ capability, SQ remap disabled\n"); 273 } 274 275 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); 276 MLX5_SET(wq, wq, uar_page, bfreg->index); 277 MLX5_SET(wq, wq, log_wq_pg_sz, iq->wq_ctrl.buf.page_shift - 278 PAGE_SHIFT); 279 MLX5_SET64(wq, wq, dbr_addr, iq->wq_ctrl.db.dma); 280 281 mlx5_fill_page_array(&iq->wq_ctrl.buf, 282 (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); 283 284 err = mlx5_core_create_sq(iq->priv->mdev, in, inlen, &iq->sqn); 285 286 kvfree(in); 287 288 return (err); 289 } 290 291 static int 292 mlx5e_iq_modify(struct mlx5e_iq *iq, int curr_state, int next_state) 293 { 294 void *in; 295 void *sqc; 296 int inlen; 297 int err; 298 299 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 300 in = mlx5_vzalloc(inlen); 301 if (in == NULL) 302 return (-ENOMEM); 303 304 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 305 306 MLX5_SET(modify_sq_in, in, sqn, iq->sqn); 307 MLX5_SET(modify_sq_in, in, sq_state, curr_state); 308 MLX5_SET(sqc, sqc, state, next_state); 309 310 err = mlx5_core_modify_sq(iq->priv->mdev, in, inlen); 311 312 kvfree(in); 313 314 return (err); 315 } 316 317 static void 318 mlx5e_iq_disable(struct mlx5e_iq *iq) 319 { 320 mlx5_core_destroy_sq(iq->priv->mdev, iq->sqn); 321 } 322 323 int 324 mlx5e_iq_open(struct mlx5e_channel *c, 325 struct mlx5e_sq_param *sq_param, 326 struct mlx5e_cq_param *cq_param, 327 struct mlx5e_iq *iq) 328 { 329 int err; 330 331 err = mlx5e_open_cq(c->priv, cq_param, &iq->cq, 332 &mlx5e_iq_completion, c->ix); 333 if (err) 334 return (err); 335 336 err = mlx5e_iq_create(c, sq_param, iq); 337 if (err) 338 goto err_close_cq; 339 340 err = mlx5e_iq_enable(iq, sq_param, &c->bfreg, c->priv->tisn[0]); 341 if (err) 342 goto err_destroy_sq; 343 344 err = mlx5e_iq_modify(iq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); 345 if (err) 346 goto err_disable_sq; 347 348 WRITE_ONCE(iq->running, 1); 349 350 return (0); 351 352 err_disable_sq: 353 mlx5e_iq_disable(iq); 354 err_destroy_sq: 355 mlx5e_iq_destroy(iq); 356 err_close_cq: 357 mlx5e_close_cq(&iq->cq); 358 359 return (err); 360 } 361 362 static void 363 mlx5e_iq_drain(struct mlx5e_iq *iq) 364 { 365 struct mlx5_core_dev *mdev = iq->priv->mdev; 366 367 /* 368 * Check if already stopped. 369 * 370 * NOTE: Serialization of this function is managed by the 371 * caller ensuring the priv's state lock is locked or in case 372 * of rate limit support, a single thread manages drain and 373 * resume of SQs. The "running" variable can therefore safely 374 * be read without any locks. 375 */ 376 if (READ_ONCE(iq->running) == 0) 377 return; 378 379 /* don't put more packets into the SQ */ 380 WRITE_ONCE(iq->running, 0); 381 382 /* wait till SQ is empty or link is down */ 383 mtx_lock(&iq->lock); 384 while (iq->cc != iq->pc && 385 (iq->priv->media_status_last & IFM_ACTIVE) != 0 && 386 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR && 387 pci_channel_offline(mdev->pdev) == 0) { 388 mtx_unlock(&iq->lock); 389 msleep(1); 390 iq->cq.mcq.comp(&iq->cq.mcq, NULL); 391 mtx_lock(&iq->lock); 392 } 393 mtx_unlock(&iq->lock); 394 395 /* error out remaining requests */ 396 (void) mlx5e_iq_modify(iq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); 397 398 /* wait till SQ is empty */ 399 mtx_lock(&iq->lock); 400 while (iq->cc != iq->pc && 401 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR && 402 pci_channel_offline(mdev->pdev) == 0) { 403 mtx_unlock(&iq->lock); 404 msleep(1); 405 iq->cq.mcq.comp(&iq->cq.mcq, NULL); 406 mtx_lock(&iq->lock); 407 } 408 mtx_unlock(&iq->lock); 409 } 410 411 void 412 mlx5e_iq_close(struct mlx5e_iq *iq) 413 { 414 mlx5e_iq_drain(iq); 415 mlx5e_iq_disable(iq); 416 mlx5e_iq_destroy(iq); 417 mlx5e_close_cq(&iq->cq); 418 } 419 420 void 421 mlx5e_iq_static_init(struct mlx5e_iq *iq) 422 { 423 mtx_init(&iq->lock, "mlx5iq", 424 MTX_NETWORK_LOCK " IQ", MTX_DEF); 425 mtx_init(&iq->comp_lock, "mlx5iq_comp", 426 MTX_NETWORK_LOCK " IQ COMP", MTX_DEF); 427 } 428 429 void 430 mlx5e_iq_static_destroy(struct mlx5e_iq *iq) 431 { 432 mtx_destroy(&iq->lock); 433 mtx_destroy(&iq->comp_lock); 434 } 435 436 void 437 mlx5e_iq_notify_hw(struct mlx5e_iq *iq) 438 { 439 mtx_assert(&iq->lock, MA_OWNED); 440 441 /* Check if we need to write the doorbell */ 442 if (unlikely(iq->db_inhibit != 0 || iq->doorbell.d64 == 0)) 443 return; 444 445 /* Ensure wqe is visible to device before updating doorbell record */ 446 wmb(); 447 448 *iq->wq.db = cpu_to_be32(iq->pc); 449 450 /* 451 * Ensure the doorbell record is visible to device before ringing 452 * the doorbell: 453 */ 454 wmb(); 455 456 mlx5_write64(iq->doorbell.d32, iq->uar_map, 457 MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock)); 458 459 iq->doorbell.d64 = 0; 460 } 461 462 static inline bool 463 mlx5e_iq_has_room_for(struct mlx5e_iq *iq, u16 n) 464 { 465 u16 cc = iq->cc; 466 u16 pc = iq->pc; 467 468 return ((iq->wq.sz_m1 & (cc - pc)) >= n || cc == pc); 469 } 470 471 int 472 mlx5e_iq_get_producer_index(struct mlx5e_iq *iq) 473 { 474 u16 pi; 475 476 mtx_assert(&iq->lock, MA_OWNED); 477 478 if (unlikely(iq->running == 0)) 479 return (-1); 480 if (unlikely(!mlx5e_iq_has_room_for(iq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) 481 return (-1); 482 483 /* Align IQ edge with NOPs to avoid WQE wrap around */ 484 pi = ((~iq->pc) & iq->wq.sz_m1); 485 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) { 486 /* Send one multi NOP message instead of many */ 487 mlx5e_iq_send_nop(iq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); 488 pi = ((~iq->pc) & iq->wq.sz_m1); 489 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) 490 return (-1); 491 } 492 return (iq->pc & iq->wq.sz_m1); 493 } 494 495 static void 496 mlx5e_iq_load_memory_cb(void *arg, bus_dma_segment_t *segs, 497 int nseg, int error) 498 { 499 u64 *pdma_address = arg; 500 501 if (unlikely(error || nseg != 1)) 502 panic("mlx5e_iq_load_memory_cb: error=%d nseg=%d", error, nseg); 503 504 *pdma_address = segs[0].ds_addr; 505 } 506 507 CTASSERT(BUS_DMASYNC_POSTREAD != 0); 508 CTASSERT(BUS_DMASYNC_POSTWRITE != 0); 509 510 void 511 mlx5e_iq_load_memory_single(struct mlx5e_iq *iq, u16 pi, void *buffer, size_t size, 512 u64 *pdma_address, u32 dma_sync) 513 { 514 int error; 515 516 error = bus_dmamap_load(iq->dma_tag, iq->data[pi].dma_map, buffer, size, 517 &mlx5e_iq_load_memory_cb, pdma_address, BUS_DMA_NOWAIT); 518 if (unlikely(error)) 519 panic("mlx5e_iq_load_memory: error=%d buffer=%p size=%zd", error, buffer, size); 520 521 switch (dma_sync) { 522 case BUS_DMASYNC_PREREAD: 523 iq->data[pi].dma_sync = BUS_DMASYNC_POSTREAD; 524 break; 525 case BUS_DMASYNC_PREWRITE: 526 iq->data[pi].dma_sync = BUS_DMASYNC_POSTWRITE; 527 break; 528 default: 529 panic("mlx5e_iq_load_memory_single: Invalid DMA sync operation(%d)", dma_sync); 530 } 531 532 /* make sure data in buffer is visible to hardware */ 533 bus_dmamap_sync(iq->dma_tag, iq->data[pi].dma_map, dma_sync); 534 } 535