1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 #include "ena.h" 17 18 void 19 ena_free_tx_dma(ena_txq_t *txq) 20 { 21 if (txq->et_tcbs != NULL) { 22 for (uint_t i = 0; i < txq->et_sq_num_descs; i++) { 23 ena_tx_control_block_t *tcb = &txq->et_tcbs[i]; 24 ena_dma_free(&tcb->etcb_dma); 25 if (tcb->etcb_mp != NULL) 26 freemsg(tcb->etcb_mp); 27 } 28 29 kmem_free(txq->et_tcbs, 30 sizeof (*txq->et_tcbs) * txq->et_sq_num_descs); 31 kmem_free(txq->et_tcbs_freelist, 32 sizeof (ena_tx_control_block_t *) * txq->et_sq_num_descs); 33 34 txq->et_tcbs = NULL; 35 txq->et_tcbs_freelist = NULL; 36 txq->et_tcbs_freelist_size = 0; 37 } 38 39 ena_dma_free(&txq->et_cq_dma); 40 txq->et_cq_descs = NULL; 41 42 ena_dma_free(&txq->et_sq_dma); 43 txq->et_sq_descs = NULL; 44 45 txq->et_state &= ~ENA_TXQ_STATE_HOST_ALLOC; 46 } 47 48 static int 49 ena_alloc_tx_dma(ena_txq_t *txq) 50 { 51 ena_t *ena = txq->et_ena; 52 size_t cq_descs_sz; 53 size_t sq_descs_sz; 54 int err = 0; 55 56 ASSERT0(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC); 57 ASSERT3P(ena, !=, NULL); 58 59 cq_descs_sz = txq->et_cq_num_descs * sizeof (*txq->et_cq_descs); 60 sq_descs_sz = txq->et_sq_num_descs * sizeof (*txq->et_sq_descs); 61 62 ena_dma_conf_t sq_conf = { 63 .edc_size = sq_descs_sz, 64 .edc_align = ENAHW_IO_SQ_DESC_BUF_ALIGNMENT, 65 .edc_sgl = 1, 66 .edc_endian = DDI_NEVERSWAP_ACC, 67 .edc_stream = false, 68 }; 69 70 if (!ena_dma_alloc(ena, &txq->et_sq_dma, &sq_conf, sq_descs_sz)) { 71 return (ENOMEM); 72 } 73 74 txq->et_sq_descs = (void *)txq->et_sq_dma.edb_va; 75 txq->et_tcbs = kmem_zalloc(sizeof (*txq->et_tcbs) * 76 txq->et_sq_num_descs, KM_SLEEP); 77 txq->et_tcbs_freelist = kmem_zalloc(sizeof (ena_tx_control_block_t *) * 78 txq->et_sq_num_descs, KM_SLEEP); 79 80 for (uint_t i = 0; i < txq->et_sq_num_descs; i++) { 81 ena_tx_control_block_t *tcb = &txq->et_tcbs[i]; 82 ena_dma_conf_t buf_conf = { 83 .edc_size = ena->ena_tx_buf_sz, 84 .edc_align = 1, 85 .edc_sgl = ena->ena_tx_sgl_max_sz, 86 .edc_endian = DDI_NEVERSWAP_ACC, 87 .edc_stream = true, 88 }; 89 90 if (!ena_dma_alloc(ena, &tcb->etcb_dma, &buf_conf, 91 ena->ena_tx_buf_sz)) { 92 err = ENOMEM; 93 goto error; 94 } 95 96 tcb->etcb_id = i; 97 txq->et_tcbs_freelist[i] = tcb; 98 } 99 txq->et_tcbs_freelist_size = txq->et_sq_num_descs; 100 101 ena_dma_conf_t cq_conf = { 102 .edc_size = cq_descs_sz, 103 .edc_align = ENAHW_IO_CQ_DESC_BUF_ALIGNMENT, 104 .edc_sgl = 1, 105 .edc_endian = DDI_NEVERSWAP_ACC, 106 .edc_stream = false, 107 }; 108 109 if (!ena_dma_alloc(ena, &txq->et_cq_dma, &cq_conf, cq_descs_sz)) { 110 err = ENOMEM; 111 goto error; 112 } 113 114 txq->et_cq_descs = (void *)txq->et_cq_dma.edb_va; 115 txq->et_state |= ENA_TXQ_STATE_HOST_ALLOC; 116 return (0); 117 118 error: 119 ena_free_tx_dma(txq); 120 return (err); 121 } 122 123 bool 124 ena_alloc_txq(ena_txq_t *txq) 125 { 126 int ret = 0; 127 ena_t *ena = txq->et_ena; 128 uint16_t cq_hw_idx, sq_hw_idx; 129 uint32_t *cq_unmask_addr, *cq_numanode; 130 uint32_t *sq_db_addr; 131 132 ASSERT3U(txq->et_cq_num_descs, >, 0); 133 134 /* 135 * First, allocate the Tx data buffers. 136 */ 137 if ((ret = ena_alloc_tx_dma(txq)) != 0) { 138 ena_err(ena, "failed to allocate Tx queue %u data buffers: %d", 139 txq->et_txqs_idx, ret); 140 return (false); 141 } 142 143 ASSERT(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC); 144 145 /* 146 * Second, create the Completion Queue. 147 */ 148 ret = ena_create_cq(ena, txq->et_cq_num_descs, 149 txq->et_cq_dma.edb_cookie->dmac_laddress, true, 150 txq->et_intr_vector, &cq_hw_idx, &cq_unmask_addr, &cq_numanode); 151 152 if (ret != 0) { 153 ena_err(ena, "failed to create Tx CQ %u: %d", txq->et_txqs_idx, 154 ret); 155 return (false); 156 } 157 158 txq->et_cq_hw_idx = cq_hw_idx; 159 txq->et_cq_phase = 1; 160 txq->et_cq_unmask_addr = cq_unmask_addr; 161 txq->et_cq_numa_addr = cq_numanode; 162 txq->et_state |= ENA_TXQ_STATE_CQ_CREATED; 163 164 /* 165 * Third, create the Submission Queue to match with the above 166 * CQ. At this time we force the SQ and CQ to have the same 167 * number of descriptors as we only use a 1:1 completion 168 * policy. However, in the future, we could loosen this and 169 * use an on-demand completion policy and the two could have a 170 * different number of descriptors. 171 */ 172 ASSERT3U(txq->et_sq_num_descs, ==, txq->et_cq_num_descs); 173 174 ret = ena_create_sq(ena, txq->et_sq_num_descs, 175 txq->et_sq_dma.edb_cookie->dmac_laddress, true, cq_hw_idx, 176 &sq_hw_idx, &sq_db_addr); 177 178 if (ret != 0) { 179 ena_err(ena, "failed to create Tx SQ %u: %d", txq->et_txqs_idx, 180 ret); 181 return (false); 182 } 183 184 txq->et_sq_hw_idx = sq_hw_idx; 185 txq->et_sq_db_addr = sq_db_addr; 186 /* The phase must always start on 1. */ 187 txq->et_sq_phase = 1; 188 txq->et_sq_avail_descs = txq->et_sq_num_descs; 189 txq->et_blocked = false; 190 txq->et_stall_watchdog = 0; 191 txq->et_state |= ENA_TXQ_STATE_SQ_CREATED; 192 193 return (true); 194 } 195 196 void 197 ena_cleanup_txq(ena_txq_t *txq, bool resetting) 198 { 199 int ret = 0; 200 ena_t *ena = txq->et_ena; 201 202 if ((txq->et_state & ENA_TXQ_STATE_SQ_CREATED) != 0) { 203 if (!resetting) { 204 ret = ena_destroy_sq(ena, txq->et_sq_hw_idx, true); 205 206 if (ret != 0) { 207 ena_err(ena, "failed to destroy Tx SQ %u: %d", 208 txq->et_txqs_idx, ret); 209 } 210 } 211 212 txq->et_sq_hw_idx = 0; 213 txq->et_sq_db_addr = NULL; 214 txq->et_sq_tail_idx = 0; 215 txq->et_sq_phase = 0; 216 txq->et_state &= ~ENA_TXQ_STATE_SQ_CREATED; 217 } 218 219 if ((txq->et_state & ENA_TXQ_STATE_CQ_CREATED) != 0) { 220 if (!resetting) { 221 ret = ena_destroy_cq(ena, txq->et_cq_hw_idx); 222 223 if (ret != 0) { 224 ena_err(ena, "failed to destroy Tx CQ %u: %d", 225 txq->et_txqs_idx, ret); 226 } 227 } 228 229 txq->et_cq_hw_idx = 0; 230 txq->et_cq_head_idx = 0; 231 txq->et_cq_phase = 0; 232 txq->et_cq_unmask_addr = NULL; 233 txq->et_cq_numa_addr = NULL; 234 txq->et_state &= ~ENA_TXQ_STATE_CQ_CREATED; 235 } 236 237 ena_free_tx_dma(txq); 238 VERIFY3S(txq->et_state, ==, ENA_TXQ_STATE_NONE); 239 } 240 241 void 242 ena_ring_tx_stop(mac_ring_driver_t rh) 243 { 244 ena_txq_t *txq = (ena_txq_t *)rh; 245 uint32_t intr_ctrl; 246 247 intr_ctrl = ena_hw_abs_read32(txq->et_ena, txq->et_cq_unmask_addr); 248 ENAHW_REG_INTR_UNMASK(intr_ctrl); 249 ena_hw_abs_write32(txq->et_ena, txq->et_cq_unmask_addr, intr_ctrl); 250 251 txq->et_state &= ~ENA_TXQ_STATE_RUNNING; 252 txq->et_state &= ~ENA_TXQ_STATE_READY; 253 } 254 255 int 256 ena_ring_tx_start(mac_ring_driver_t rh, uint64_t gen_num) 257 { 258 ena_txq_t *txq = (ena_txq_t *)rh; 259 ena_t *ena = txq->et_ena; 260 uint32_t intr_ctrl; 261 262 ena_dbg(ena, "ring_tx_start %p: state 0x%x", txq, txq->et_state); 263 264 mutex_enter(&txq->et_lock); 265 txq->et_m_gen_num = gen_num; 266 mutex_exit(&txq->et_lock); 267 268 txq->et_state |= ENA_TXQ_STATE_READY; 269 270 intr_ctrl = ena_hw_abs_read32(ena, txq->et_cq_unmask_addr); 271 ENAHW_REG_INTR_UNMASK(intr_ctrl); 272 ena_hw_abs_write32(ena, txq->et_cq_unmask_addr, intr_ctrl); 273 txq->et_state |= ENA_TXQ_STATE_RUNNING; 274 275 return (0); 276 } 277 278 static ena_tx_control_block_t * 279 ena_tcb_alloc(ena_txq_t *txq) 280 { 281 ena_tx_control_block_t *tcb; 282 283 ASSERT(MUTEX_HELD(&txq->et_lock)); 284 285 if (txq->et_tcbs_freelist_size == 0) 286 return (NULL); 287 txq->et_tcbs_freelist_size--; 288 tcb = txq->et_tcbs_freelist[txq->et_tcbs_freelist_size]; 289 txq->et_tcbs_freelist[txq->et_tcbs_freelist_size] = NULL; 290 291 return (tcb); 292 } 293 294 static void 295 ena_tcb_free(ena_txq_t *txq, ena_tx_control_block_t *tcb) 296 { 297 ASSERT3P(tcb, !=, NULL); 298 ASSERT(MUTEX_HELD(&txq->et_lock)); 299 ASSERT3U(txq->et_tcbs_freelist_size, <, txq->et_sq_num_descs); 300 txq->et_tcbs_freelist[txq->et_tcbs_freelist_size++] = tcb; 301 } 302 303 304 static void 305 ena_tx_copy_fragment(ena_tx_control_block_t *tcb, const mblk_t *mp, 306 const size_t off, const size_t len) 307 { 308 const void *soff = mp->b_rptr + off; 309 void *doff = 310 (void *)(tcb->etcb_dma.edb_va + tcb->etcb_dma.edb_used_len); 311 312 VERIFY3U(len, >, 0); 313 VERIFY3P(soff, >=, mp->b_rptr); 314 VERIFY3P(soff, <=, mp->b_wptr); 315 VERIFY3U(len, <=, MBLKL(mp)); 316 VERIFY3U((uintptr_t)soff + len, <=, (uintptr_t)mp->b_wptr); 317 VERIFY3U(tcb->etcb_dma.edb_used_len + len, <, tcb->etcb_dma.edb_len); 318 319 bcopy(soff, doff, len); 320 tcb->etcb_type = ENA_TCB_COPY; 321 tcb->etcb_dma.edb_used_len += len; 322 } 323 324 static void 325 ena_tcb_pull(const ena_txq_t *txq, ena_tx_control_block_t *tcb, mblk_t *mp) 326 { 327 mblk_t *nmp = mp; 328 ena_t *ena = txq->et_ena; 329 330 ASSERT(MUTEX_HELD(&txq->et_lock)); 331 VERIFY3U(msgsize(mp), <, ena->ena_tx_buf_sz); 332 ASSERT3P(tcb, !=, NULL); 333 VERIFY0(tcb->etcb_dma.edb_used_len); 334 335 while (nmp != NULL) { 336 const size_t nmp_len = MBLKL(nmp); 337 338 if (nmp_len == 0) { 339 nmp = nmp->b_cont; 340 continue; 341 } 342 343 ena_tx_copy_fragment(tcb, nmp, 0, nmp_len); 344 nmp = nmp->b_cont; 345 } 346 347 ENA_DMA_SYNC(tcb->etcb_dma, DDI_DMA_SYNC_FORDEV); 348 349 VERIFY3P(tcb->etcb_mp, ==, NULL); 350 tcb->etcb_mp = mp; 351 } 352 353 static void 354 ena_fill_tx_data_desc(ena_txq_t *txq, ena_tx_control_block_t *tcb, 355 uint16_t req_id, uint8_t phase, enahw_tx_data_desc_t *desc, 356 mac_ether_offload_info_t *meo, size_t mlen) 357 { 358 VERIFY3U(mlen, <=, ENAHW_TX_DESC_LENGTH_MASK); 359 360 #ifdef DEBUG 361 /* 362 * If there is no header for the specific layer it will be set 363 * to zero, thus we elide the meoi_flags check here. 364 */ 365 size_t hdr_len = meo->meoi_l2hlen + meo->meoi_l3hlen + meo->meoi_l4hlen; 366 ASSERT3U(hdr_len, <=, txq->et_ena->ena_tx_max_hdr_len); 367 #endif 368 369 bzero(desc, sizeof (*desc)); 370 ENAHW_TX_DESC_FIRST_ON(desc); 371 ENAHW_TX_DESC_LENGTH(desc, mlen); 372 ENAHW_TX_DESC_REQID_HI(desc, req_id); 373 ENAHW_TX_DESC_REQID_LO(desc, req_id); 374 ENAHW_TX_DESC_PHASE(desc, phase); 375 ENAHW_TX_DESC_DF_ON(desc); 376 ENAHW_TX_DESC_LAST_ON(desc); 377 ENAHW_TX_DESC_COMP_REQ_ON(desc); 378 ENAHW_TX_DESC_META_DESC_OFF(desc); 379 ENAHW_TX_DESC_ADDR_LO(desc, tcb->etcb_dma.edb_cookie->dmac_laddress); 380 ENAHW_TX_DESC_ADDR_HI(desc, tcb->etcb_dma.edb_cookie->dmac_laddress); 381 /* 382 * NOTE: Please see the block comment above 383 * etd_buff_addr_hi_hdr_sz to see why this is set to 0. 384 */ 385 ENAHW_TX_DESC_HEADER_LENGTH(desc, 0); 386 ENAHW_TX_DESC_TSO_OFF(desc); 387 ENAHW_TX_DESC_L3_CSUM_OFF(desc); 388 ENAHW_TX_DESC_L4_CSUM_OFF(desc); 389 /* 390 * Enabling this bit tells the device NOT to calculate the 391 * pseudo header checksum. 392 */ 393 ENAHW_TX_DESC_L4_CSUM_PARTIAL_ON(desc); 394 } 395 396 static void 397 ena_submit_tx(ena_txq_t *txq, uint16_t desc_idx) 398 { 399 ena_hw_abs_write32(txq->et_ena, txq->et_sq_db_addr, desc_idx); 400 } 401 402 /* 403 * For now we do the simplest thing possible. All Tx uses bcopy to 404 * pre-allocated buffers, no checksum, no TSO, etc. 405 */ 406 mblk_t * 407 ena_ring_tx(void *arg, mblk_t *mp) 408 { 409 ena_txq_t *txq = arg; 410 ena_t *ena = txq->et_ena; 411 mac_ether_offload_info_t meo; 412 enahw_tx_data_desc_t *desc; 413 ena_tx_control_block_t *tcb; 414 const uint16_t modulo_mask = txq->et_sq_num_descs - 1; 415 uint16_t tail_mod; 416 417 VERIFY3P(mp->b_next, ==, NULL); 418 419 /* 420 * The ena_state value is written by atomic operations. The 421 * et_state value is currently Write Once, but if that changes 422 * it should also be written with atomics. 423 */ 424 if (!(ena->ena_state & ENA_STATE_STARTED) || 425 !(txq->et_state & ENA_TXQ_STATE_RUNNING)) { 426 freemsg(mp); 427 return (NULL); 428 } 429 430 if (mac_ether_offload_info(mp, &meo) != 0) { 431 freemsg(mp); 432 mutex_enter(&txq->et_stat_lock); 433 txq->et_stat.ets_hck_meoifail.value.ui64++; 434 mutex_exit(&txq->et_stat_lock); 435 return (NULL); 436 } 437 438 mutex_enter(&txq->et_lock); 439 440 /* 441 * For the moment there are an equal number of Tx descs and Tx 442 * contexts. Currently Tx is copy only, and each context buffer is 443 * guaranteed to be as large as MTU + frame header, see 444 * ena_update_buf_sizes(). 445 */ 446 if (txq->et_blocked || txq->et_sq_avail_descs == 0) { 447 txq->et_blocked = true; 448 mutex_enter(&txq->et_stat_lock); 449 txq->et_stat.ets_blocked.value.ui64++; 450 mutex_exit(&txq->et_stat_lock); 451 mutex_exit(&txq->et_lock); 452 return (mp); 453 } 454 455 ASSERT3U(meo.meoi_len, <=, ena->ena_max_frame_total); 456 457 /* 458 * There are as many pre-allocated TCBs as there are Tx descs so we 459 * should never fail to get one. 460 */ 461 tcb = ena_tcb_alloc(txq); 462 ASSERT3P(tcb, !=, NULL); 463 ena_tcb_pull(txq, tcb, mp); 464 465 /* Fill in the Tx descriptor. */ 466 tail_mod = txq->et_sq_tail_idx & modulo_mask; 467 desc = &txq->et_sq_descs[tail_mod].etd_data; 468 ena_fill_tx_data_desc(txq, tcb, tcb->etcb_id, txq->et_sq_phase, desc, 469 &meo, meo.meoi_len); 470 DTRACE_PROBE3(tx__submit, ena_tx_control_block_t *, tcb, uint16_t, 471 tcb->etcb_id, enahw_tx_data_desc_t *, desc); 472 473 txq->et_sq_avail_descs--; 474 475 /* 476 * Remember, we submit the raw tail value to the device, the 477 * hardware performs its own modulo (like we did to get 478 * tail_mod). 479 */ 480 txq->et_sq_tail_idx++; 481 ena_submit_tx(txq, txq->et_sq_tail_idx); 482 483 mutex_enter(&txq->et_stat_lock); 484 txq->et_stat.ets_packets.value.ui64++; 485 txq->et_stat.ets_bytes.value.ui64 += meo.meoi_len; 486 mutex_exit(&txq->et_stat_lock); 487 488 if ((txq->et_sq_tail_idx & modulo_mask) == 0) 489 txq->et_sq_phase ^= 1; 490 491 mutex_exit(&txq->et_lock); 492 493 return (NULL); 494 } 495 496 void 497 ena_tx_intr_work(ena_txq_t *txq) 498 { 499 uint16_t head_mod; 500 enahw_tx_cdesc_t *cdesc; 501 ena_tx_control_block_t *tcb; 502 uint16_t req_id; 503 uint64_t recycled = 0; 504 bool unblocked = false; 505 const uint16_t modulo_mask = txq->et_cq_num_descs - 1; 506 ena_t *ena = txq->et_ena; 507 508 mutex_enter(&txq->et_lock); 509 head_mod = txq->et_cq_head_idx & modulo_mask; 510 ENA_DMA_SYNC(txq->et_cq_dma, DDI_DMA_SYNC_FORKERNEL); 511 cdesc = &txq->et_cq_descs[head_mod]; 512 513 /* Recycle any completed descriptors. */ 514 while (ENAHW_TX_CDESC_GET_PHASE(cdesc) == txq->et_cq_phase) { 515 mblk_t *mp; 516 517 /* Get the corresponding TCB. */ 518 req_id = cdesc->etc_req_id; 519 if (req_id > txq->et_sq_num_descs) { 520 ena_err(ena, "invalid Tx request ID: 0x%x", req_id); 521 ena_trigger_reset(ena, ENAHW_RESET_INV_TX_REQ_ID); 522 break; 523 } 524 tcb = &txq->et_tcbs[req_id]; 525 DTRACE_PROBE2(tx__complete, uint16_t, req_id, 526 ena_tx_control_block_t *, tcb); 527 528 /* Free the associated mblk. */ 529 tcb->etcb_dma.edb_used_len = 0; 530 mp = tcb->etcb_mp; 531 tcb->etcb_mp = NULL; 532 VERIFY3P(mp, !=, NULL); 533 freemsg(mp); 534 535 /* Add this descriptor back to the free list. */ 536 ena_tcb_free(txq, tcb); 537 txq->et_sq_avail_descs++; 538 539 /* Move on and check for phase rollover. */ 540 txq->et_cq_head_idx++; 541 head_mod = txq->et_cq_head_idx & modulo_mask; 542 if (head_mod == 0) 543 txq->et_cq_phase ^= 1; 544 545 if (txq->et_blocked) { 546 txq->et_blocked = false; 547 txq->et_stall_watchdog = 0; 548 unblocked = true; 549 mac_tx_ring_update(ena->ena_mh, txq->et_mrh); 550 } 551 552 recycled++; 553 cdesc = &txq->et_cq_descs[head_mod]; 554 } 555 556 mutex_exit(&txq->et_lock); 557 558 if (recycled == 0) 559 return; 560 561 /* Update stats. */ 562 mutex_enter(&txq->et_stat_lock); 563 txq->et_stat.ets_recycled.value.ui64 += recycled; 564 if (unblocked) { 565 txq->et_stat.ets_unblocked.value.ui64++; 566 } 567 mutex_exit(&txq->et_stat_lock); 568 } 569