1 /*- 2 * Copyright (c) 2021-2022 NVIDIA corporation & affiliates. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 /* 27 * The internal queue, IQ, code is more or less a stripped down copy 28 * of the existing SQ managing code with exception of: 29 * 30 * - an optional single segment memory buffer which can be read or 31 * written as a whole by the hardware, may be provided. 32 * 33 * - an optional completion callback for all transmit operations, may 34 * be provided. 35 * 36 * - does not support mbufs. 37 */ 38 39 #include <dev/mlx5/mlx5_en/en.h> 40 41 static void 42 mlx5e_iq_poll(struct mlx5e_iq *iq, int budget) 43 { 44 const struct mlx5_cqe64 *cqe; 45 u16 ci; 46 u16 iqcc; 47 48 /* 49 * iq->cc must be updated only after mlx5_cqwq_update_db_record(), 50 * otherwise a cq overrun may occur 51 */ 52 iqcc = iq->cc; 53 54 while (budget-- > 0) { 55 56 cqe = mlx5e_get_cqe(&iq->cq); 57 if (!cqe) 58 break; 59 60 mlx5_cqwq_pop(&iq->cq.wq); 61 62 ci = iqcc & iq->wq.sz_m1; 63 64 if (likely(iq->data[ci].dma_sync != 0)) { 65 /* make sure data written by hardware is visible to CPU */ 66 bus_dmamap_sync(iq->dma_tag, iq->data[ci].dma_map, iq->data[ci].dma_sync); 67 bus_dmamap_unload(iq->dma_tag, iq->data[ci].dma_map); 68 69 iq->data[ci].dma_sync = 0; 70 } 71 72 if (likely(iq->data[ci].callback != NULL)) { 73 iq->data[ci].callback(iq->data[ci].arg); 74 iq->data[ci].callback = NULL; 75 } 76 77 if (unlikely(iq->data[ci].p_refcount != NULL)) { 78 atomic_add_int(iq->data[ci].p_refcount, -1); 79 iq->data[ci].p_refcount = NULL; 80 } 81 iqcc += iq->data[ci].num_wqebbs; 82 } 83 84 mlx5_cqwq_update_db_record(&iq->cq.wq); 85 86 /* Ensure cq space is freed before enabling more cqes */ 87 atomic_thread_fence_rel(); 88 89 iq->cc = iqcc; 90 } 91 92 static void 93 mlx5e_iq_completion(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused) 94 { 95 struct mlx5e_iq *iq = container_of(mcq, struct mlx5e_iq, cq.mcq); 96 97 mtx_lock(&iq->comp_lock); 98 mlx5e_iq_poll(iq, MLX5E_BUDGET_MAX); 99 mlx5e_cq_arm(&iq->cq, MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock)); 100 mtx_unlock(&iq->comp_lock); 101 } 102 103 void 104 mlx5e_iq_send_nop(struct mlx5e_iq *iq, u32 ds_cnt) 105 { 106 u16 pi = iq->pc & iq->wq.sz_m1; 107 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi); 108 109 mtx_assert(&iq->lock, MA_OWNED); 110 111 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 112 113 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) | MLX5_OPCODE_NOP); 114 wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt); 115 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 116 117 /* Copy data for doorbell */ 118 memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32)); 119 120 iq->data[pi].callback = NULL; 121 iq->data[pi].arg = NULL; 122 iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 123 iq->data[pi].dma_sync = 0; 124 iq->pc += iq->data[pi].num_wqebbs; 125 } 126 127 static void 128 mlx5e_iq_free_db(struct mlx5e_iq *iq) 129 { 130 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq); 131 int x; 132 133 for (x = 0; x != wq_sz; x++) { 134 if (likely(iq->data[x].dma_sync != 0)) { 135 bus_dmamap_unload(iq->dma_tag, iq->data[x].dma_map); 136 iq->data[x].dma_sync = 0; 137 } 138 if (likely(iq->data[x].callback != NULL)) { 139 iq->data[x].callback(iq->data[x].arg); 140 iq->data[x].callback = NULL; 141 } 142 if (unlikely(iq->data[x].p_refcount != NULL)) { 143 atomic_add_int(iq->data[x].p_refcount, -1); 144 iq->data[x].p_refcount = NULL; 145 } 146 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map); 147 } 148 free(iq->data, M_MLX5EN); 149 } 150 151 static int 152 mlx5e_iq_alloc_db(struct mlx5e_iq *iq) 153 { 154 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq); 155 int err; 156 int x; 157 158 iq->data = malloc_domainset(wq_sz * sizeof(iq->data[0]), M_MLX5EN, 159 mlx5_dev_domainset(iq->priv->mdev), M_WAITOK | M_ZERO); 160 161 /* Create DMA descriptor maps */ 162 for (x = 0; x != wq_sz; x++) { 163 err = -bus_dmamap_create(iq->dma_tag, 0, &iq->data[x].dma_map); 164 if (err != 0) { 165 while (x--) 166 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map); 167 free(iq->data, M_MLX5EN); 168 return (err); 169 } 170 } 171 return (0); 172 } 173 174 static int 175 mlx5e_iq_create(struct mlx5e_channel *c, 176 struct mlx5e_sq_param *param, 177 struct mlx5e_iq *iq) 178 { 179 struct mlx5e_priv *priv = c->priv; 180 struct mlx5_core_dev *mdev = priv->mdev; 181 void *sqc = param->sqc; 182 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); 183 int err; 184 185 /* Create DMA descriptor TAG */ 186 if ((err = -bus_dma_tag_create( 187 bus_get_dma_tag(mdev->pdev->dev.bsddev), 188 1, /* any alignment */ 189 0, /* no boundary */ 190 BUS_SPACE_MAXADDR, /* lowaddr */ 191 BUS_SPACE_MAXADDR, /* highaddr */ 192 NULL, NULL, /* filter, filterarg */ 193 PAGE_SIZE, /* maxsize */ 194 1, /* nsegments */ 195 PAGE_SIZE, /* maxsegsize */ 196 0, /* flags */ 197 NULL, NULL, /* lockfunc, lockfuncarg */ 198 &iq->dma_tag))) 199 goto done; 200 201 iq->mkey_be = cpu_to_be32(priv->mr.key); 202 iq->priv = priv; 203 204 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, 205 &iq->wq, &iq->wq_ctrl); 206 if (err) 207 goto err_free_dma_tag; 208 209 iq->wq.db = &iq->wq.db[MLX5_SND_DBR]; 210 211 err = mlx5e_iq_alloc_db(iq); 212 if (err) 213 goto err_iq_wq_destroy; 214 215 return (0); 216 217 err_iq_wq_destroy: 218 mlx5_wq_destroy(&iq->wq_ctrl); 219 220 err_free_dma_tag: 221 bus_dma_tag_destroy(iq->dma_tag); 222 done: 223 return (err); 224 } 225 226 static void 227 mlx5e_iq_destroy(struct mlx5e_iq *iq) 228 { 229 mlx5e_iq_free_db(iq); 230 mlx5_wq_destroy(&iq->wq_ctrl); 231 bus_dma_tag_destroy(iq->dma_tag); 232 } 233 234 static int 235 mlx5e_iq_enable(struct mlx5e_iq *iq, struct mlx5e_sq_param *param, 236 const struct mlx5_sq_bfreg *bfreg, int tis_num) 237 { 238 void *in; 239 void *sqc; 240 void *wq; 241 int inlen; 242 int err; 243 u8 ts_format; 244 245 inlen = MLX5_ST_SZ_BYTES(create_sq_in) + 246 sizeof(u64) * iq->wq_ctrl.buf.npages; 247 in = mlx5_vzalloc(inlen); 248 if (in == NULL) 249 return (-ENOMEM); 250 251 iq->uar_map = bfreg->map; 252 253 ts_format = mlx5_get_sq_default_ts(iq->priv->mdev); 254 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); 255 wq = MLX5_ADDR_OF(sqc, sqc, wq); 256 257 memcpy(sqc, param->sqc, sizeof(param->sqc)); 258 259 MLX5_SET(sqc, sqc, tis_num_0, tis_num); 260 MLX5_SET(sqc, sqc, cqn, iq->cq.mcq.cqn); 261 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); 262 MLX5_SET(sqc, sqc, ts_format, ts_format); 263 MLX5_SET(sqc, sqc, tis_lst_sz, 1); 264 MLX5_SET(sqc, sqc, flush_in_error_en, 1); 265 MLX5_SET(sqc, sqc, allow_swp, 1); 266 267 /* SQ remap support requires reg_umr privileges level */ 268 if (MLX5_CAP_QOS(iq->priv->mdev, qos_remap_pp)) { 269 MLX5_SET(sqc, sqc, qos_remap_en, 1); 270 if (MLX5_CAP_ETH(iq->priv->mdev, reg_umr_sq)) 271 MLX5_SET(sqc, sqc, reg_umr, 1); 272 else 273 mlx5_en_err(iq->priv->ifp, 274 "No reg umr SQ capability, SQ remap disabled\n"); 275 } 276 277 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); 278 MLX5_SET(wq, wq, uar_page, bfreg->index); 279 MLX5_SET(wq, wq, log_wq_pg_sz, iq->wq_ctrl.buf.page_shift - 280 MLX5_ADAPTER_PAGE_SHIFT); 281 MLX5_SET64(wq, wq, dbr_addr, iq->wq_ctrl.db.dma); 282 283 mlx5_fill_page_array(&iq->wq_ctrl.buf, 284 (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); 285 286 err = mlx5_core_create_sq(iq->priv->mdev, in, inlen, &iq->sqn); 287 288 kvfree(in); 289 290 return (err); 291 } 292 293 static int 294 mlx5e_iq_modify(struct mlx5e_iq *iq, int curr_state, int next_state) 295 { 296 void *in; 297 void *sqc; 298 int inlen; 299 int err; 300 301 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 302 in = mlx5_vzalloc(inlen); 303 if (in == NULL) 304 return (-ENOMEM); 305 306 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 307 308 MLX5_SET(modify_sq_in, in, sqn, iq->sqn); 309 MLX5_SET(modify_sq_in, in, sq_state, curr_state); 310 MLX5_SET(sqc, sqc, state, next_state); 311 312 err = mlx5_core_modify_sq(iq->priv->mdev, in, inlen); 313 314 kvfree(in); 315 316 return (err); 317 } 318 319 static void 320 mlx5e_iq_disable(struct mlx5e_iq *iq) 321 { 322 mlx5_core_destroy_sq(iq->priv->mdev, iq->sqn); 323 } 324 325 int 326 mlx5e_iq_open(struct mlx5e_channel *c, 327 struct mlx5e_sq_param *sq_param, 328 struct mlx5e_cq_param *cq_param, 329 struct mlx5e_iq *iq) 330 { 331 int err; 332 333 err = mlx5e_open_cq(c->priv, cq_param, &iq->cq, 334 &mlx5e_iq_completion, c->ix); 335 if (err) 336 return (err); 337 338 err = mlx5e_iq_create(c, sq_param, iq); 339 if (err) 340 goto err_close_cq; 341 342 err = mlx5e_iq_enable(iq, sq_param, &c->bfreg, c->priv->tisn[0]); 343 if (err) 344 goto err_destroy_sq; 345 346 err = mlx5e_iq_modify(iq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); 347 if (err) 348 goto err_disable_sq; 349 350 WRITE_ONCE(iq->running, 1); 351 352 return (0); 353 354 err_disable_sq: 355 mlx5e_iq_disable(iq); 356 err_destroy_sq: 357 mlx5e_iq_destroy(iq); 358 err_close_cq: 359 mlx5e_close_cq(&iq->cq); 360 361 return (err); 362 } 363 364 static void 365 mlx5e_iq_drain(struct mlx5e_iq *iq) 366 { 367 struct mlx5_core_dev *mdev = iq->priv->mdev; 368 369 /* 370 * Check if already stopped. 371 * 372 * NOTE: Serialization of this function is managed by the 373 * caller ensuring the priv's state lock is locked or in case 374 * of rate limit support, a single thread manages drain and 375 * resume of SQs. The "running" variable can therefore safely 376 * be read without any locks. 377 */ 378 if (READ_ONCE(iq->running) == 0) 379 return; 380 381 /* don't put more packets into the SQ */ 382 WRITE_ONCE(iq->running, 0); 383 384 /* wait till SQ is empty or link is down */ 385 mtx_lock(&iq->lock); 386 while (iq->cc != iq->pc && 387 (iq->priv->media_status_last & IFM_ACTIVE) != 0 && 388 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR && 389 pci_channel_offline(mdev->pdev) == 0) { 390 mtx_unlock(&iq->lock); 391 msleep(1); 392 iq->cq.mcq.comp(&iq->cq.mcq, NULL); 393 mtx_lock(&iq->lock); 394 } 395 mtx_unlock(&iq->lock); 396 397 /* error out remaining requests */ 398 (void) mlx5e_iq_modify(iq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); 399 400 /* wait till SQ is empty */ 401 mtx_lock(&iq->lock); 402 while (iq->cc != iq->pc && 403 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR && 404 pci_channel_offline(mdev->pdev) == 0) { 405 mtx_unlock(&iq->lock); 406 msleep(1); 407 iq->cq.mcq.comp(&iq->cq.mcq, NULL); 408 mtx_lock(&iq->lock); 409 } 410 mtx_unlock(&iq->lock); 411 } 412 413 void 414 mlx5e_iq_close(struct mlx5e_iq *iq) 415 { 416 mlx5e_iq_drain(iq); 417 mlx5e_iq_disable(iq); 418 mlx5e_iq_destroy(iq); 419 mlx5e_close_cq(&iq->cq); 420 } 421 422 void 423 mlx5e_iq_static_init(struct mlx5e_iq *iq) 424 { 425 mtx_init(&iq->lock, "mlx5iq", 426 MTX_NETWORK_LOCK " IQ", MTX_DEF); 427 mtx_init(&iq->comp_lock, "mlx5iq_comp", 428 MTX_NETWORK_LOCK " IQ COMP", MTX_DEF); 429 } 430 431 void 432 mlx5e_iq_static_destroy(struct mlx5e_iq *iq) 433 { 434 mtx_destroy(&iq->lock); 435 mtx_destroy(&iq->comp_lock); 436 } 437 438 void 439 mlx5e_iq_notify_hw(struct mlx5e_iq *iq) 440 { 441 mtx_assert(&iq->lock, MA_OWNED); 442 443 /* Check if we need to write the doorbell */ 444 if (unlikely(iq->db_inhibit != 0 || iq->doorbell.d64 == 0)) 445 return; 446 447 /* Ensure wqe is visible to device before updating doorbell record */ 448 wmb(); 449 450 *iq->wq.db = cpu_to_be32(iq->pc); 451 452 /* 453 * Ensure the doorbell record is visible to device before ringing 454 * the doorbell: 455 */ 456 wmb(); 457 458 mlx5_write64(iq->doorbell.d32, iq->uar_map, 459 MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock)); 460 461 iq->doorbell.d64 = 0; 462 } 463 464 static inline bool 465 mlx5e_iq_has_room_for(struct mlx5e_iq *iq, u16 n) 466 { 467 u16 cc = iq->cc; 468 u16 pc = iq->pc; 469 470 return ((iq->wq.sz_m1 & (cc - pc)) >= n || cc == pc); 471 } 472 473 int 474 mlx5e_iq_get_producer_index(struct mlx5e_iq *iq) 475 { 476 u16 pi; 477 478 mtx_assert(&iq->lock, MA_OWNED); 479 480 if (unlikely(iq->running == 0)) 481 return (-1); 482 if (unlikely(!mlx5e_iq_has_room_for(iq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) 483 return (-1); 484 485 /* Align IQ edge with NOPs to avoid WQE wrap around */ 486 pi = ((~iq->pc) & iq->wq.sz_m1); 487 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) { 488 /* Send one multi NOP message instead of many */ 489 mlx5e_iq_send_nop(iq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); 490 pi = ((~iq->pc) & iq->wq.sz_m1); 491 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) 492 return (-1); 493 } 494 return (iq->pc & iq->wq.sz_m1); 495 } 496 497 static void 498 mlx5e_iq_load_memory_cb(void *arg, bus_dma_segment_t *segs, 499 int nseg, int error) 500 { 501 u64 *pdma_address = arg; 502 503 if (unlikely(error || nseg != 1)) 504 panic("mlx5e_iq_load_memory_cb: error=%d nseg=%d", error, nseg); 505 506 *pdma_address = segs[0].ds_addr; 507 } 508 509 CTASSERT(BUS_DMASYNC_POSTREAD != 0); 510 CTASSERT(BUS_DMASYNC_POSTWRITE != 0); 511 512 void 513 mlx5e_iq_load_memory_single(struct mlx5e_iq *iq, u16 pi, void *buffer, size_t size, 514 u64 *pdma_address, u32 dma_sync) 515 { 516 int error; 517 518 error = bus_dmamap_load(iq->dma_tag, iq->data[pi].dma_map, buffer, size, 519 &mlx5e_iq_load_memory_cb, pdma_address, BUS_DMA_NOWAIT); 520 if (unlikely(error)) 521 panic("mlx5e_iq_load_memory: error=%d buffer=%p size=%zd", error, buffer, size); 522 523 switch (dma_sync) { 524 case BUS_DMASYNC_PREREAD: 525 iq->data[pi].dma_sync = BUS_DMASYNC_POSTREAD; 526 break; 527 case BUS_DMASYNC_PREWRITE: 528 iq->data[pi].dma_sync = BUS_DMASYNC_POSTWRITE; 529 break; 530 default: 531 panic("mlx5e_iq_load_memory_single: Invalid DMA sync operation(%d)", dma_sync); 532 } 533 534 /* make sure data in buffer is visible to hardware */ 535 bus_dmamap_sync(iq->dma_tag, iq->data[pi].dma_map, dma_sync); 536 } 537