1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 #include "ena.h" 16 17 void 18 ena_free_tx_dma(ena_txq_t *txq) 19 { 20 if (txq->et_tcbs != NULL) { 21 for (uint_t i = 0; i < txq->et_sq_num_descs; i++) { 22 ena_tx_control_block_t *tcb = &txq->et_tcbs[i]; 23 ena_dma_free(&tcb->etcb_dma); 24 } 25 26 kmem_free(txq->et_tcbs, 27 sizeof (*txq->et_tcbs) * txq->et_sq_num_descs); 28 29 txq->et_tcbs = NULL; 30 31 } 32 33 ena_dma_free(&txq->et_cq_dma); 34 txq->et_cq_descs = NULL; 35 36 ena_dma_free(&txq->et_sq_dma); 37 txq->et_sq_descs = NULL; 38 39 txq->et_state &= ~ENA_TXQ_STATE_HOST_ALLOC; 40 } 41 42 static int 43 ena_alloc_tx_dma(ena_txq_t *txq) 44 { 45 ena_t *ena = txq->et_ena; 46 size_t cq_descs_sz; 47 size_t sq_descs_sz; 48 int err = 0; 49 ena_dma_conf_t conf; 50 51 ASSERT0(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC); 52 ASSERT3P(ena, !=, NULL); 53 54 cq_descs_sz = txq->et_cq_num_descs * sizeof (*txq->et_cq_descs); 55 sq_descs_sz = txq->et_sq_num_descs * sizeof (*txq->et_sq_descs); 56 57 /* BEGIN CSTYLED */ 58 conf = (ena_dma_conf_t) { 59 .edc_size = sq_descs_sz, 60 .edc_align = ENAHW_IO_SQ_DESC_BUF_ALIGNMENT, 61 .edc_sgl = 1, 62 .edc_endian = DDI_NEVERSWAP_ACC, 63 .edc_stream = B_FALSE, 64 }; 65 /* END CSTYLED */ 66 67 if (!ena_dma_alloc(ena, &txq->et_sq_dma, &conf, sq_descs_sz)) { 68 return (ENOMEM); 69 } 70 71 bzero(txq->et_sq_dma.edb_va, sq_descs_sz); 72 txq->et_sq_descs = (void *)txq->et_sq_dma.edb_va; 73 txq->et_tcbs = kmem_zalloc(sizeof (*txq->et_tcbs) * 74 txq->et_sq_num_descs, KM_SLEEP); 75 76 for (uint_t i = 0; i < txq->et_sq_num_descs; i++) { 77 ena_tx_control_block_t *tcb = &txq->et_tcbs[i]; 78 ena_dma_conf_t buf_conf = { 79 .edc_size = ena->ena_tx_buf_sz, 80 .edc_align = 1, 81 .edc_sgl = ena->ena_tx_sgl_max_sz, 82 .edc_endian = DDI_NEVERSWAP_ACC, 83 .edc_stream = B_TRUE, 84 }; 85 86 if (!ena_dma_alloc(ena, &tcb->etcb_dma, &buf_conf, 87 ena->ena_tx_buf_sz)) { 88 err = ENOMEM; 89 goto error; 90 } 91 } 92 93 /* BEGIN CSTYLED */ 94 conf = (ena_dma_conf_t) { 95 .edc_size = cq_descs_sz, 96 .edc_align = ENAHW_IO_CQ_DESC_BUF_ALIGNMENT, 97 .edc_sgl = 1, 98 .edc_endian = DDI_NEVERSWAP_ACC, 99 .edc_stream = B_FALSE, 100 }; 101 /* END CSTYLED */ 102 103 if (!ena_dma_alloc(ena, &txq->et_cq_dma, &conf, cq_descs_sz)) { 104 err = ENOMEM; 105 goto error; 106 } 107 108 bzero(txq->et_cq_dma.edb_va, cq_descs_sz); 109 txq->et_cq_descs = (void *)txq->et_cq_dma.edb_va; 110 txq->et_state |= ENA_TXQ_STATE_HOST_ALLOC; 111 return (0); 112 113 error: 114 ena_free_tx_dma(txq); 115 return (err); 116 } 117 118 boolean_t 119 ena_alloc_txq(ena_txq_t *txq) 120 { 121 int ret = 0; 122 ena_t *ena = txq->et_ena; 123 uint16_t cq_hw_idx, sq_hw_idx; 124 uint32_t *cq_unmask_addr, *cq_numanode; 125 uint32_t *sq_db_addr; 126 127 ASSERT3U(txq->et_cq_num_descs, >, 0); 128 129 /* 130 * First, allocate the Tx data buffers. 131 */ 132 if ((ret = ena_alloc_tx_dma(txq)) != 0) { 133 ena_err(ena, "failed to allocate Tx queue %u data buffers: %d", 134 txq->et_txqs_idx, ret); 135 return (B_FALSE); 136 } 137 138 ASSERT(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC); 139 140 /* 141 * Second, create the Completion Queue. 142 */ 143 ret = ena_create_cq(ena, txq->et_cq_num_descs, 144 txq->et_cq_dma.edb_cookie->dmac_laddress, B_TRUE, 145 txq->et_intr_vector, &cq_hw_idx, &cq_unmask_addr, &cq_numanode); 146 147 if (ret != 0) { 148 ena_err(ena, "failed to create Tx CQ %u: %d", txq->et_txqs_idx, 149 ret); 150 return (B_FALSE); 151 } 152 153 txq->et_cq_hw_idx = cq_hw_idx; 154 txq->et_cq_phase = 1; 155 txq->et_cq_unmask_addr = cq_unmask_addr; 156 txq->et_cq_numa_addr = cq_numanode; 157 txq->et_state |= ENA_TXQ_STATE_CQ_CREATED; 158 159 /* 160 * Third, create the Submission Queue to match with the above 161 * CQ. At this time we force the SQ and CQ to have the same 162 * number of descriptors as we only use a 1:1 completion 163 * policy. However, in the future, we could loosen this and 164 * use an on-demand completion policy and the two could have a 165 * different number of descriptors. 166 */ 167 ASSERT3U(txq->et_sq_num_descs, ==, txq->et_cq_num_descs); 168 169 ret = ena_create_sq(ena, txq->et_sq_num_descs, 170 txq->et_sq_dma.edb_cookie->dmac_laddress, B_TRUE, cq_hw_idx, 171 &sq_hw_idx, &sq_db_addr); 172 173 if (ret != 0) { 174 ena_err(ena, "failed to create Tx SQ %u: %d", txq->et_txqs_idx, 175 ret); 176 return (B_FALSE); 177 } 178 179 txq->et_sq_hw_idx = sq_hw_idx; 180 txq->et_sq_db_addr = sq_db_addr; 181 /* The phase must always start on 1. */ 182 txq->et_sq_phase = 1; 183 txq->et_sq_avail_descs = txq->et_sq_num_descs; 184 txq->et_blocked = B_FALSE; 185 txq->et_state |= ENA_TXQ_STATE_SQ_CREATED; 186 187 return (B_TRUE); 188 } 189 190 void 191 ena_cleanup_txq(ena_txq_t *txq) 192 { 193 int ret = 0; 194 ena_t *ena = txq->et_ena; 195 196 if ((txq->et_state & ENA_TXQ_STATE_SQ_CREATED) != 0) { 197 ret = ena_destroy_sq(ena, txq->et_sq_hw_idx, B_TRUE); 198 199 if (ret != 0) { 200 ena_err(ena, "failed to destroy Tx SQ %u: %d", 201 txq->et_txqs_idx, ret); 202 } 203 204 txq->et_sq_hw_idx = 0; 205 txq->et_sq_db_addr = NULL; 206 txq->et_sq_tail_idx = 0; 207 txq->et_sq_phase = 0; 208 txq->et_state &= ~ENA_TXQ_STATE_SQ_CREATED; 209 } 210 211 if ((txq->et_state & ENA_TXQ_STATE_CQ_CREATED) != 0) { 212 ret = ena_destroy_cq(ena, txq->et_cq_hw_idx); 213 214 if (ret != 0) { 215 ena_err(ena, "failed to destroy Tx CQ %u: %d", 216 txq->et_txqs_idx, ret); 217 } 218 219 txq->et_cq_hw_idx = 0; 220 txq->et_cq_head_idx = 0; 221 txq->et_cq_phase = 0; 222 txq->et_cq_unmask_addr = NULL; 223 txq->et_cq_numa_addr = NULL; 224 txq->et_state &= ~ENA_TXQ_STATE_CQ_CREATED; 225 } 226 227 ena_free_tx_dma(txq); 228 VERIFY3S(txq->et_state, ==, ENA_TXQ_STATE_NONE); 229 } 230 231 void 232 ena_ring_tx_stop(mac_ring_driver_t rh) 233 { 234 ena_txq_t *txq = (ena_txq_t *)rh; 235 uint32_t intr_ctrl; 236 237 intr_ctrl = ena_hw_abs_read32(txq->et_ena, txq->et_cq_unmask_addr); 238 ENAHW_REG_INTR_UNMASK(intr_ctrl); 239 ena_hw_abs_write32(txq->et_ena, txq->et_cq_unmask_addr, intr_ctrl); 240 241 txq->et_state &= ~ENA_TXQ_STATE_RUNNING; 242 txq->et_state &= ~ENA_TXQ_STATE_READY; 243 } 244 245 int 246 ena_ring_tx_start(mac_ring_driver_t rh, uint64_t gen_num) 247 { 248 ena_txq_t *txq = (ena_txq_t *)rh; 249 ena_t *ena = txq->et_ena; 250 uint32_t intr_ctrl; 251 252 mutex_enter(&txq->et_lock); 253 txq->et_m_gen_num = gen_num; 254 mutex_exit(&txq->et_lock); 255 256 txq->et_state |= ENA_TXQ_STATE_READY; 257 258 intr_ctrl = ena_hw_abs_read32(ena, txq->et_cq_unmask_addr); 259 ENAHW_REG_INTR_UNMASK(intr_ctrl); 260 ena_hw_abs_write32(ena, txq->et_cq_unmask_addr, intr_ctrl); 261 txq->et_state |= ENA_TXQ_STATE_RUNNING; 262 return (0); 263 } 264 265 static void 266 ena_tx_copy_fragment(ena_tx_control_block_t *tcb, const mblk_t *mp, 267 const size_t off, const size_t len) 268 { 269 const void *soff = mp->b_rptr + off; 270 void *doff = 271 (void *)(tcb->etcb_dma.edb_va + tcb->etcb_dma.edb_used_len); 272 273 VERIFY3U(len, >, 0); 274 VERIFY3P(soff, >=, mp->b_rptr); 275 VERIFY3P(soff, <=, mp->b_wptr); 276 VERIFY3U(len, <=, MBLKL(mp)); 277 VERIFY3U((uintptr_t)soff + len, <=, (uintptr_t)mp->b_wptr); 278 VERIFY3U(tcb->etcb_dma.edb_used_len + len, <, tcb->etcb_dma.edb_len); 279 280 bcopy(soff, doff, len); 281 tcb->etcb_type = ENA_TCB_COPY; 282 tcb->etcb_dma.edb_used_len += len; 283 } 284 285 ena_tx_control_block_t * 286 ena_pull_tcb(const ena_txq_t *txq, mblk_t *mp) 287 { 288 mblk_t *nmp = mp; 289 ena_t *ena = txq->et_ena; 290 ena_tx_control_block_t *tcb = NULL; 291 const uint16_t tail_mod = 292 txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1); 293 294 ASSERT(MUTEX_HELD(&txq->et_lock)); 295 VERIFY3U(msgsize(mp), <, ena->ena_tx_buf_sz); 296 297 while (nmp != NULL) { 298 const size_t nmp_len = MBLKL(nmp); 299 300 if (nmp_len == 0) { 301 nmp = nmp->b_cont; 302 continue; 303 } 304 305 /* For now TCB is bound to SQ desc. */ 306 if (tcb == NULL) { 307 tcb = &txq->et_tcbs[tail_mod]; 308 } 309 310 ena_tx_copy_fragment(tcb, nmp, 0, nmp_len); 311 nmp = nmp->b_cont; 312 } 313 314 ENA_DMA_SYNC(tcb->etcb_dma, DDI_DMA_SYNC_FORDEV); 315 VERIFY3P(nmp, ==, NULL); 316 VERIFY3P(tcb, !=, NULL); 317 return (tcb); 318 } 319 320 static void 321 ena_fill_tx_data_desc(ena_txq_t *txq, ena_tx_control_block_t *tcb, 322 uint16_t tail, uint8_t phase, enahw_tx_data_desc_t *desc, 323 mac_ether_offload_info_t *meo, size_t mlen) 324 { 325 VERIFY3U(mlen, <=, ENAHW_TX_DESC_LENGTH_MASK); 326 327 #ifdef DEBUG 328 /* 329 * If there is no header for the specific layer it will be set 330 * to zero, thus we elide the meoi_flags check here. 331 */ 332 size_t hdr_len = meo->meoi_l2hlen + meo->meoi_l3hlen + meo->meoi_l4hlen; 333 ASSERT3U(hdr_len, <=, txq->et_ena->ena_tx_max_hdr_len); 334 #endif 335 336 bzero(desc, sizeof (*desc)); 337 ENAHW_TX_DESC_FIRST_ON(desc); 338 ENAHW_TX_DESC_LENGTH(desc, mlen); 339 ENAHW_TX_DESC_REQID_HI(desc, tail); 340 ENAHW_TX_DESC_REQID_LO(desc, tail); 341 ENAHW_TX_DESC_PHASE(desc, phase); 342 ENAHW_TX_DESC_DF_ON(desc); 343 ENAHW_TX_DESC_LAST_ON(desc); 344 ENAHW_TX_DESC_COMP_REQ_ON(desc); 345 ENAHW_TX_DESC_META_DESC_OFF(desc); 346 ENAHW_TX_DESC_ADDR_LO(desc, tcb->etcb_dma.edb_cookie->dmac_laddress); 347 ENAHW_TX_DESC_ADDR_HI(desc, tcb->etcb_dma.edb_cookie->dmac_laddress); 348 /* 349 * NOTE: Please see the block comment above 350 * etd_buff_addr_hi_hdr_sz to see why this is set to 0. 351 */ 352 ENAHW_TX_DESC_HEADER_LENGTH(desc, 0); 353 ENAHW_TX_DESC_TSO_OFF(desc); 354 ENAHW_TX_DESC_L3_CSUM_OFF(desc); 355 ENAHW_TX_DESC_L4_CSUM_OFF(desc); 356 /* 357 * Enabling this bit tells the device NOT to calculate the 358 * pseudo header checksum. 359 */ 360 ENAHW_TX_DESC_L4_CSUM_PARTIAL_ON(desc); 361 } 362 363 static void 364 ena_submit_tx(ena_txq_t *txq, uint16_t desc_idx) 365 { 366 ena_hw_abs_write32(txq->et_ena, txq->et_sq_db_addr, desc_idx); 367 } 368 369 /* 370 * For now we do the simplest thing possible. All Tx uses bcopy to 371 * pre-allocated buffers, no checksum, no TSO, etc. 372 */ 373 mblk_t * 374 ena_ring_tx(void *arg, mblk_t *mp) 375 { 376 ena_txq_t *txq = arg; 377 ena_t *ena = txq->et_ena; 378 mac_ether_offload_info_t meo; 379 enahw_tx_data_desc_t *desc; 380 ena_tx_control_block_t *tcb; 381 const uint16_t tail_mod = 382 txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1); 383 384 VERIFY3P(mp->b_next, ==, NULL); 385 386 /* 387 * The ena_state value is written by atomic operations. The 388 * et_state value is currently Write Once, but if that changes 389 * it should also be written with atomics. 390 */ 391 if (!(ena->ena_state & ENA_STATE_RUNNING) || 392 !(txq->et_state & ENA_TXQ_STATE_RUNNING)) { 393 freemsg(mp); 394 return (NULL); 395 } 396 397 if (mac_ether_offload_info(mp, &meo) != 0) { 398 freemsg(mp); 399 mutex_enter(&txq->et_stat_lock); 400 txq->et_stat.ets_hck_meoifail.value.ui64++; 401 mutex_exit(&txq->et_stat_lock); 402 return (NULL); 403 } 404 405 mutex_enter(&txq->et_lock); 406 407 /* 408 * For the moment there is a 1:1 mapping between Tx descs and 409 * Tx contexts. Currently Tx is copy only, and each context 410 * buffer is guaranteed to be as large as MTU + frame header, 411 * see ena_update_buf_sizes(). 412 */ 413 if (txq->et_blocked || txq->et_sq_avail_descs == 0) { 414 txq->et_blocked = B_TRUE; 415 mutex_enter(&txq->et_stat_lock); 416 txq->et_stat.ets_blocked.value.ui64++; 417 mutex_exit(&txq->et_stat_lock); 418 mutex_exit(&txq->et_lock); 419 return (mp); 420 } 421 422 ASSERT3U(meo.meoi_len, <=, ena->ena_max_frame_total); 423 tcb = ena_pull_tcb(txq, mp); 424 ASSERT3P(tcb, !=, NULL); 425 tcb->etcb_mp = mp; 426 txq->et_sq_avail_descs--; 427 428 /* Fill in the Tx descriptor. */ 429 desc = &(txq->et_sq_descs[tail_mod].etd_data); 430 ena_fill_tx_data_desc(txq, tcb, tail_mod, txq->et_sq_phase, desc, &meo, 431 meo.meoi_len); 432 DTRACE_PROBE3(tx__submit, ena_tx_control_block_t *, tcb, uint16_t, 433 tail_mod, enahw_tx_data_desc_t *, desc); 434 435 /* 436 * Remember, we submit the raw tail value to the device, the 437 * hardware performs its own modulo (like we did to get 438 * tail_mod). 439 */ 440 txq->et_sq_tail_idx++; 441 ena_submit_tx(txq, txq->et_sq_tail_idx); 442 443 mutex_enter(&txq->et_stat_lock); 444 txq->et_stat.ets_packets.value.ui64++; 445 txq->et_stat.ets_bytes.value.ui64 += meo.meoi_len; 446 mutex_exit(&txq->et_stat_lock); 447 448 if ((txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1)) == 0) { 449 txq->et_sq_phase ^= 1; 450 } 451 452 mutex_exit(&txq->et_lock); 453 return (NULL); 454 } 455 456 void 457 ena_tx_intr_work(ena_txq_t *txq) 458 { 459 uint16_t head_mod; 460 enahw_tx_cdesc_t *cdesc; 461 ena_tx_control_block_t *tcb; 462 uint16_t req_id; 463 uint64_t recycled = 0; 464 boolean_t unblocked = B_FALSE; 465 466 mutex_enter(&txq->et_lock); 467 head_mod = txq->et_cq_head_idx & (txq->et_cq_num_descs - 1); 468 ENA_DMA_SYNC(txq->et_cq_dma, DDI_DMA_SYNC_FORKERNEL); 469 cdesc = &txq->et_cq_descs[head_mod]; 470 471 /* Recycle any completed descriptors. */ 472 while (ENAHW_TX_CDESC_GET_PHASE(cdesc) == txq->et_cq_phase) { 473 mblk_t *mp; 474 475 /* Get the corresponding TCB. */ 476 req_id = cdesc->etc_req_id; 477 /* 478 * It would be nice to make this a device reset 479 * instead. 480 */ 481 VERIFY3U(req_id, <=, txq->et_sq_num_descs); 482 tcb = &txq->et_tcbs[req_id]; 483 DTRACE_PROBE2(tx__complete, uint16_t, req_id, 484 ena_tx_control_block_t *, tcb); 485 486 /* Free the associated mblk. */ 487 tcb->etcb_dma.edb_used_len = 0; 488 mp = tcb->etcb_mp; 489 /* Make this a device reset instead. */ 490 VERIFY3P(mp, !=, NULL); 491 freemsg(mp); 492 tcb->etcb_mp = NULL; 493 494 /* Add this descriptor back to the free list. */ 495 txq->et_sq_avail_descs++; 496 txq->et_cq_head_idx++; 497 498 /* Check for phase rollover. */ 499 head_mod = txq->et_cq_head_idx & (txq->et_cq_num_descs - 1); 500 501 if (head_mod == 0) { 502 txq->et_cq_phase ^= 1; 503 } 504 505 if (txq->et_blocked) { 506 txq->et_blocked = B_FALSE; 507 unblocked = B_TRUE; 508 mac_tx_ring_update(txq->et_ena->ena_mh, txq->et_mrh); 509 } 510 511 recycled++; 512 cdesc = &txq->et_cq_descs[head_mod]; 513 } 514 515 if (recycled == 0) { 516 mutex_exit(&txq->et_lock); 517 return; 518 } 519 520 mutex_exit(&txq->et_lock); 521 522 /* Update stats. */ 523 mutex_enter(&txq->et_stat_lock); 524 txq->et_stat.ets_recycled.value.ui64 += recycled; 525 if (unblocked) { 526 txq->et_stat.ets_unblocked.value.ui64++; 527 } 528 mutex_exit(&txq->et_stat_lock); 529 } 530