1 /*- 2 * Copyright (c) 2021-2022 NVIDIA corporation & affiliates. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 /* 29 * The internal queue, IQ, code is more or less a stripped down copy 30 * of the existing SQ managing code with exception of: 31 * 32 * - an optional single segment memory buffer which can be read or 33 * written as a whole by the hardware, may be provided. 34 * 35 * - an optional completion callback for all transmit operations, may 36 * be provided. 37 * 38 * - does not support mbufs. 39 */ 40 41 #include <dev/mlx5/mlx5_en/en.h> 42 43 static void 44 mlx5e_iq_poll(struct mlx5e_iq *iq, int budget) 45 { 46 const struct mlx5_cqe64 *cqe; 47 u16 ci; 48 u16 iqcc; 49 50 /* 51 * iq->cc must be updated only after mlx5_cqwq_update_db_record(), 52 * otherwise a cq overrun may occur 53 */ 54 iqcc = iq->cc; 55 56 while (budget-- > 0) { 57 58 cqe = mlx5e_get_cqe(&iq->cq); 59 if (!cqe) 60 break; 61 62 mlx5_cqwq_pop(&iq->cq.wq); 63 64 ci = iqcc & iq->wq.sz_m1; 65 66 if (likely(iq->data[ci].dma_sync != 0)) { 67 /* make sure data written by hardware is visible to CPU */ 68 bus_dmamap_sync(iq->dma_tag, iq->data[ci].dma_map, iq->data[ci].dma_sync); 69 bus_dmamap_unload(iq->dma_tag, iq->data[ci].dma_map); 70 71 iq->data[ci].dma_sync = 0; 72 } 73 74 if (likely(iq->data[ci].callback != NULL)) { 75 iq->data[ci].callback(iq->data[ci].arg); 76 iq->data[ci].callback = NULL; 77 } 78 79 if (unlikely(iq->data[ci].p_refcount != NULL)) { 80 atomic_add_int(iq->data[ci].p_refcount, -1); 81 iq->data[ci].p_refcount = NULL; 82 } 83 iqcc += iq->data[ci].num_wqebbs; 84 } 85 86 mlx5_cqwq_update_db_record(&iq->cq.wq); 87 88 /* Ensure cq space is freed before enabling more cqes */ 89 atomic_thread_fence_rel(); 90 91 iq->cc = iqcc; 92 } 93 94 static void 95 mlx5e_iq_completion(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused) 96 { 97 struct mlx5e_iq *iq = container_of(mcq, struct mlx5e_iq, cq.mcq); 98 99 mtx_lock(&iq->comp_lock); 100 mlx5e_iq_poll(iq, MLX5E_BUDGET_MAX); 101 mlx5e_cq_arm(&iq->cq, MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock)); 102 mtx_unlock(&iq->comp_lock); 103 } 104 105 void 106 mlx5e_iq_send_nop(struct mlx5e_iq *iq, u32 ds_cnt) 107 { 108 u16 pi = iq->pc & iq->wq.sz_m1; 109 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi); 110 111 mtx_assert(&iq->lock, MA_OWNED); 112 113 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 114 115 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) | MLX5_OPCODE_NOP); 116 wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt); 117 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 118 119 /* Copy data for doorbell */ 120 memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32)); 121 122 iq->data[pi].callback = NULL; 123 iq->data[pi].arg = NULL; 124 iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 125 iq->data[pi].dma_sync = 0; 126 iq->pc += iq->data[pi].num_wqebbs; 127 } 128 129 static void 130 mlx5e_iq_free_db(struct mlx5e_iq *iq) 131 { 132 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq); 133 int x; 134 135 for (x = 0; x != wq_sz; x++) { 136 if (likely(iq->data[x].dma_sync != 0)) { 137 bus_dmamap_unload(iq->dma_tag, iq->data[x].dma_map); 138 iq->data[x].dma_sync = 0; 139 } 140 if (likely(iq->data[x].callback != NULL)) { 141 iq->data[x].callback(iq->data[x].arg); 142 iq->data[x].callback = NULL; 143 } 144 if (unlikely(iq->data[x].p_refcount != NULL)) { 145 atomic_add_int(iq->data[x].p_refcount, -1); 146 iq->data[x].p_refcount = NULL; 147 } 148 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map); 149 } 150 free(iq->data, M_MLX5EN); 151 } 152 153 static int 154 mlx5e_iq_alloc_db(struct mlx5e_iq *iq) 155 { 156 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq); 157 int err; 158 int x; 159 160 iq->data = malloc_domainset(wq_sz * sizeof(iq->data[0]), M_MLX5EN, 161 mlx5_dev_domainset(iq->priv->mdev), M_WAITOK | M_ZERO); 162 163 /* Create DMA descriptor maps */ 164 for (x = 0; x != wq_sz; x++) { 165 err = -bus_dmamap_create(iq->dma_tag, 0, &iq->data[x].dma_map); 166 if (err != 0) { 167 while (x--) 168 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map); 169 free(iq->data, M_MLX5EN); 170 return (err); 171 } 172 } 173 return (0); 174 } 175 176 static int 177 mlx5e_iq_create(struct mlx5e_channel *c, 178 struct mlx5e_sq_param *param, 179 struct mlx5e_iq *iq) 180 { 181 struct mlx5e_priv *priv = c->priv; 182 struct mlx5_core_dev *mdev = priv->mdev; 183 void *sqc = param->sqc; 184 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); 185 int err; 186 187 /* Create DMA descriptor TAG */ 188 if ((err = -bus_dma_tag_create( 189 bus_get_dma_tag(mdev->pdev->dev.bsddev), 190 1, /* any alignment */ 191 0, /* no boundary */ 192 BUS_SPACE_MAXADDR, /* lowaddr */ 193 BUS_SPACE_MAXADDR, /* highaddr */ 194 NULL, NULL, /* filter, filterarg */ 195 PAGE_SIZE, /* maxsize */ 196 1, /* nsegments */ 197 PAGE_SIZE, /* maxsegsize */ 198 0, /* flags */ 199 NULL, NULL, /* lockfunc, lockfuncarg */ 200 &iq->dma_tag))) 201 goto done; 202 203 iq->mkey_be = cpu_to_be32(priv->mr.key); 204 iq->priv = priv; 205 206 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, 207 &iq->wq, &iq->wq_ctrl); 208 if (err) 209 goto err_free_dma_tag; 210 211 iq->wq.db = &iq->wq.db[MLX5_SND_DBR]; 212 213 err = mlx5e_iq_alloc_db(iq); 214 if (err) 215 goto err_iq_wq_destroy; 216 217 return (0); 218 219 err_iq_wq_destroy: 220 mlx5_wq_destroy(&iq->wq_ctrl); 221 222 err_free_dma_tag: 223 bus_dma_tag_destroy(iq->dma_tag); 224 done: 225 return (err); 226 } 227 228 static void 229 mlx5e_iq_destroy(struct mlx5e_iq *iq) 230 { 231 mlx5e_iq_free_db(iq); 232 mlx5_wq_destroy(&iq->wq_ctrl); 233 bus_dma_tag_destroy(iq->dma_tag); 234 } 235 236 static int 237 mlx5e_iq_enable(struct mlx5e_iq *iq, struct mlx5e_sq_param *param, 238 const struct mlx5_sq_bfreg *bfreg, int tis_num) 239 { 240 void *in; 241 void *sqc; 242 void *wq; 243 int inlen; 244 int err; 245 u8 ts_format; 246 247 inlen = MLX5_ST_SZ_BYTES(create_sq_in) + 248 sizeof(u64) * iq->wq_ctrl.buf.npages; 249 in = mlx5_vzalloc(inlen); 250 if (in == NULL) 251 return (-ENOMEM); 252 253 iq->uar_map = bfreg->map; 254 255 ts_format = mlx5_get_sq_default_ts(iq->priv->mdev); 256 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); 257 wq = MLX5_ADDR_OF(sqc, sqc, wq); 258 259 memcpy(sqc, param->sqc, sizeof(param->sqc)); 260 261 MLX5_SET(sqc, sqc, tis_num_0, tis_num); 262 MLX5_SET(sqc, sqc, cqn, iq->cq.mcq.cqn); 263 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); 264 MLX5_SET(sqc, sqc, ts_format, ts_format); 265 MLX5_SET(sqc, sqc, tis_lst_sz, 1); 266 MLX5_SET(sqc, sqc, flush_in_error_en, 1); 267 MLX5_SET(sqc, sqc, allow_swp, 1); 268 269 /* SQ remap support requires reg_umr privileges level */ 270 if (MLX5_CAP_QOS(iq->priv->mdev, qos_remap_pp)) { 271 MLX5_SET(sqc, sqc, qos_remap_en, 1); 272 if (MLX5_CAP_ETH(iq->priv->mdev, reg_umr_sq)) 273 MLX5_SET(sqc, sqc, reg_umr, 1); 274 else 275 mlx5_en_err(iq->priv->ifp, 276 "No reg umr SQ capability, SQ remap disabled\n"); 277 } 278 279 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); 280 MLX5_SET(wq, wq, uar_page, bfreg->index); 281 MLX5_SET(wq, wq, log_wq_pg_sz, iq->wq_ctrl.buf.page_shift - 282 MLX5_ADAPTER_PAGE_SHIFT); 283 MLX5_SET64(wq, wq, dbr_addr, iq->wq_ctrl.db.dma); 284 285 mlx5_fill_page_array(&iq->wq_ctrl.buf, 286 (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); 287 288 err = mlx5_core_create_sq(iq->priv->mdev, in, inlen, &iq->sqn); 289 290 kvfree(in); 291 292 return (err); 293 } 294 295 static int 296 mlx5e_iq_modify(struct mlx5e_iq *iq, int curr_state, int next_state) 297 { 298 void *in; 299 void *sqc; 300 int inlen; 301 int err; 302 303 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 304 in = mlx5_vzalloc(inlen); 305 if (in == NULL) 306 return (-ENOMEM); 307 308 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 309 310 MLX5_SET(modify_sq_in, in, sqn, iq->sqn); 311 MLX5_SET(modify_sq_in, in, sq_state, curr_state); 312 MLX5_SET(sqc, sqc, state, next_state); 313 314 err = mlx5_core_modify_sq(iq->priv->mdev, in, inlen); 315 316 kvfree(in); 317 318 return (err); 319 } 320 321 static void 322 mlx5e_iq_disable(struct mlx5e_iq *iq) 323 { 324 mlx5_core_destroy_sq(iq->priv->mdev, iq->sqn); 325 } 326 327 int 328 mlx5e_iq_open(struct mlx5e_channel *c, 329 struct mlx5e_sq_param *sq_param, 330 struct mlx5e_cq_param *cq_param, 331 struct mlx5e_iq *iq) 332 { 333 int err; 334 335 err = mlx5e_open_cq(c->priv, cq_param, &iq->cq, 336 &mlx5e_iq_completion, c->ix); 337 if (err) 338 return (err); 339 340 err = mlx5e_iq_create(c, sq_param, iq); 341 if (err) 342 goto err_close_cq; 343 344 err = mlx5e_iq_enable(iq, sq_param, &c->bfreg, c->priv->tisn[0]); 345 if (err) 346 goto err_destroy_sq; 347 348 err = mlx5e_iq_modify(iq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); 349 if (err) 350 goto err_disable_sq; 351 352 WRITE_ONCE(iq->running, 1); 353 354 return (0); 355 356 err_disable_sq: 357 mlx5e_iq_disable(iq); 358 err_destroy_sq: 359 mlx5e_iq_destroy(iq); 360 err_close_cq: 361 mlx5e_close_cq(&iq->cq); 362 363 return (err); 364 } 365 366 static void 367 mlx5e_iq_drain(struct mlx5e_iq *iq) 368 { 369 struct mlx5_core_dev *mdev = iq->priv->mdev; 370 371 /* 372 * Check if already stopped. 373 * 374 * NOTE: Serialization of this function is managed by the 375 * caller ensuring the priv's state lock is locked or in case 376 * of rate limit support, a single thread manages drain and 377 * resume of SQs. The "running" variable can therefore safely 378 * be read without any locks. 379 */ 380 if (READ_ONCE(iq->running) == 0) 381 return; 382 383 /* don't put more packets into the SQ */ 384 WRITE_ONCE(iq->running, 0); 385 386 /* wait till SQ is empty or link is down */ 387 mtx_lock(&iq->lock); 388 while (iq->cc != iq->pc && 389 (iq->priv->media_status_last & IFM_ACTIVE) != 0 && 390 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR && 391 pci_channel_offline(mdev->pdev) == 0) { 392 mtx_unlock(&iq->lock); 393 msleep(1); 394 iq->cq.mcq.comp(&iq->cq.mcq, NULL); 395 mtx_lock(&iq->lock); 396 } 397 mtx_unlock(&iq->lock); 398 399 /* error out remaining requests */ 400 (void) mlx5e_iq_modify(iq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); 401 402 /* wait till SQ is empty */ 403 mtx_lock(&iq->lock); 404 while (iq->cc != iq->pc && 405 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR && 406 pci_channel_offline(mdev->pdev) == 0) { 407 mtx_unlock(&iq->lock); 408 msleep(1); 409 iq->cq.mcq.comp(&iq->cq.mcq, NULL); 410 mtx_lock(&iq->lock); 411 } 412 mtx_unlock(&iq->lock); 413 } 414 415 void 416 mlx5e_iq_close(struct mlx5e_iq *iq) 417 { 418 mlx5e_iq_drain(iq); 419 mlx5e_iq_disable(iq); 420 mlx5e_iq_destroy(iq); 421 mlx5e_close_cq(&iq->cq); 422 } 423 424 void 425 mlx5e_iq_static_init(struct mlx5e_iq *iq) 426 { 427 mtx_init(&iq->lock, "mlx5iq", 428 MTX_NETWORK_LOCK " IQ", MTX_DEF); 429 mtx_init(&iq->comp_lock, "mlx5iq_comp", 430 MTX_NETWORK_LOCK " IQ COMP", MTX_DEF); 431 } 432 433 void 434 mlx5e_iq_static_destroy(struct mlx5e_iq *iq) 435 { 436 mtx_destroy(&iq->lock); 437 mtx_destroy(&iq->comp_lock); 438 } 439 440 void 441 mlx5e_iq_notify_hw(struct mlx5e_iq *iq) 442 { 443 mtx_assert(&iq->lock, MA_OWNED); 444 445 /* Check if we need to write the doorbell */ 446 if (unlikely(iq->db_inhibit != 0 || iq->doorbell.d64 == 0)) 447 return; 448 449 /* Ensure wqe is visible to device before updating doorbell record */ 450 wmb(); 451 452 *iq->wq.db = cpu_to_be32(iq->pc); 453 454 /* 455 * Ensure the doorbell record is visible to device before ringing 456 * the doorbell: 457 */ 458 wmb(); 459 460 mlx5_write64(iq->doorbell.d32, iq->uar_map, 461 MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock)); 462 463 iq->doorbell.d64 = 0; 464 } 465 466 static inline bool 467 mlx5e_iq_has_room_for(struct mlx5e_iq *iq, u16 n) 468 { 469 u16 cc = iq->cc; 470 u16 pc = iq->pc; 471 472 return ((iq->wq.sz_m1 & (cc - pc)) >= n || cc == pc); 473 } 474 475 int 476 mlx5e_iq_get_producer_index(struct mlx5e_iq *iq) 477 { 478 u16 pi; 479 480 mtx_assert(&iq->lock, MA_OWNED); 481 482 if (unlikely(iq->running == 0)) 483 return (-1); 484 if (unlikely(!mlx5e_iq_has_room_for(iq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) 485 return (-1); 486 487 /* Align IQ edge with NOPs to avoid WQE wrap around */ 488 pi = ((~iq->pc) & iq->wq.sz_m1); 489 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) { 490 /* Send one multi NOP message instead of many */ 491 mlx5e_iq_send_nop(iq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); 492 pi = ((~iq->pc) & iq->wq.sz_m1); 493 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) 494 return (-1); 495 } 496 return (iq->pc & iq->wq.sz_m1); 497 } 498 499 static void 500 mlx5e_iq_load_memory_cb(void *arg, bus_dma_segment_t *segs, 501 int nseg, int error) 502 { 503 u64 *pdma_address = arg; 504 505 if (unlikely(error || nseg != 1)) 506 panic("mlx5e_iq_load_memory_cb: error=%d nseg=%d", error, nseg); 507 508 *pdma_address = segs[0].ds_addr; 509 } 510 511 CTASSERT(BUS_DMASYNC_POSTREAD != 0); 512 CTASSERT(BUS_DMASYNC_POSTWRITE != 0); 513 514 void 515 mlx5e_iq_load_memory_single(struct mlx5e_iq *iq, u16 pi, void *buffer, size_t size, 516 u64 *pdma_address, u32 dma_sync) 517 { 518 int error; 519 520 error = bus_dmamap_load(iq->dma_tag, iq->data[pi].dma_map, buffer, size, 521 &mlx5e_iq_load_memory_cb, pdma_address, BUS_DMA_NOWAIT); 522 if (unlikely(error)) 523 panic("mlx5e_iq_load_memory: error=%d buffer=%p size=%zd", error, buffer, size); 524 525 switch (dma_sync) { 526 case BUS_DMASYNC_PREREAD: 527 iq->data[pi].dma_sync = BUS_DMASYNC_POSTREAD; 528 break; 529 case BUS_DMASYNC_PREWRITE: 530 iq->data[pi].dma_sync = BUS_DMASYNC_POSTWRITE; 531 break; 532 default: 533 panic("mlx5e_iq_load_memory_single: Invalid DMA sync operation(%d)", dma_sync); 534 } 535 536 /* make sure data in buffer is visible to hardware */ 537 bus_dmamap_sync(iq->dma_tag, iq->data[pi].dma_map, dma_sync); 538 } 539