1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 #include "ena.h" 17 18 void 19 ena_free_tx_dma(ena_txq_t *txq) 20 { 21 if (txq->et_tcbs != NULL) { 22 for (uint_t i = 0; i < txq->et_sq_num_descs; i++) { 23 ena_tx_control_block_t *tcb = &txq->et_tcbs[i]; 24 ena_dma_free(&tcb->etcb_dma); 25 } 26 27 kmem_free(txq->et_tcbs, 28 sizeof (*txq->et_tcbs) * txq->et_sq_num_descs); 29 30 txq->et_tcbs = NULL; 31 } 32 33 ena_dma_free(&txq->et_cq_dma); 34 txq->et_cq_descs = NULL; 35 36 ena_dma_free(&txq->et_sq_dma); 37 txq->et_sq_descs = NULL; 38 39 txq->et_state &= ~ENA_TXQ_STATE_HOST_ALLOC; 40 } 41 42 static int 43 ena_alloc_tx_dma(ena_txq_t *txq) 44 { 45 ena_t *ena = txq->et_ena; 46 size_t cq_descs_sz; 47 size_t sq_descs_sz; 48 int err = 0; 49 50 ASSERT0(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC); 51 ASSERT3P(ena, !=, NULL); 52 53 cq_descs_sz = txq->et_cq_num_descs * sizeof (*txq->et_cq_descs); 54 sq_descs_sz = txq->et_sq_num_descs * sizeof (*txq->et_sq_descs); 55 56 ena_dma_conf_t sq_conf = { 57 .edc_size = sq_descs_sz, 58 .edc_align = ENAHW_IO_SQ_DESC_BUF_ALIGNMENT, 59 .edc_sgl = 1, 60 .edc_endian = DDI_NEVERSWAP_ACC, 61 .edc_stream = false, 62 }; 63 64 if (!ena_dma_alloc(ena, &txq->et_sq_dma, &sq_conf, sq_descs_sz)) { 65 return (ENOMEM); 66 } 67 68 txq->et_sq_descs = (void *)txq->et_sq_dma.edb_va; 69 txq->et_tcbs = kmem_zalloc(sizeof (*txq->et_tcbs) * 70 txq->et_sq_num_descs, KM_SLEEP); 71 72 for (uint_t i = 0; i < txq->et_sq_num_descs; i++) { 73 ena_tx_control_block_t *tcb = &txq->et_tcbs[i]; 74 ena_dma_conf_t buf_conf = { 75 .edc_size = ena->ena_tx_buf_sz, 76 .edc_align = 1, 77 .edc_sgl = ena->ena_tx_sgl_max_sz, 78 .edc_endian = DDI_NEVERSWAP_ACC, 79 .edc_stream = true, 80 }; 81 82 if (!ena_dma_alloc(ena, &tcb->etcb_dma, &buf_conf, 83 ena->ena_tx_buf_sz)) { 84 err = ENOMEM; 85 goto error; 86 } 87 } 88 89 ena_dma_conf_t cq_conf = { 90 .edc_size = cq_descs_sz, 91 .edc_align = ENAHW_IO_CQ_DESC_BUF_ALIGNMENT, 92 .edc_sgl = 1, 93 .edc_endian = DDI_NEVERSWAP_ACC, 94 .edc_stream = false, 95 }; 96 97 if (!ena_dma_alloc(ena, &txq->et_cq_dma, &cq_conf, cq_descs_sz)) { 98 err = ENOMEM; 99 goto error; 100 } 101 102 txq->et_cq_descs = (void *)txq->et_cq_dma.edb_va; 103 txq->et_state |= ENA_TXQ_STATE_HOST_ALLOC; 104 return (0); 105 106 error: 107 ena_free_tx_dma(txq); 108 return (err); 109 } 110 111 bool 112 ena_alloc_txq(ena_txq_t *txq) 113 { 114 int ret = 0; 115 ena_t *ena = txq->et_ena; 116 uint16_t cq_hw_idx, sq_hw_idx; 117 uint32_t *cq_unmask_addr, *cq_numanode; 118 uint32_t *sq_db_addr; 119 120 ASSERT3U(txq->et_cq_num_descs, >, 0); 121 122 /* 123 * First, allocate the Tx data buffers. 124 */ 125 if ((ret = ena_alloc_tx_dma(txq)) != 0) { 126 ena_err(ena, "failed to allocate Tx queue %u data buffers: %d", 127 txq->et_txqs_idx, ret); 128 return (false); 129 } 130 131 ASSERT(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC); 132 133 /* 134 * Second, create the Completion Queue. 135 */ 136 ret = ena_create_cq(ena, txq->et_cq_num_descs, 137 txq->et_cq_dma.edb_cookie->dmac_laddress, true, 138 txq->et_intr_vector, &cq_hw_idx, &cq_unmask_addr, &cq_numanode); 139 140 if (ret != 0) { 141 ena_err(ena, "failed to create Tx CQ %u: %d", txq->et_txqs_idx, 142 ret); 143 return (false); 144 } 145 146 txq->et_cq_hw_idx = cq_hw_idx; 147 txq->et_cq_phase = 1; 148 txq->et_cq_unmask_addr = cq_unmask_addr; 149 txq->et_cq_numa_addr = cq_numanode; 150 txq->et_state |= ENA_TXQ_STATE_CQ_CREATED; 151 152 /* 153 * Third, create the Submission Queue to match with the above 154 * CQ. At this time we force the SQ and CQ to have the same 155 * number of descriptors as we only use a 1:1 completion 156 * policy. However, in the future, we could loosen this and 157 * use an on-demand completion policy and the two could have a 158 * different number of descriptors. 159 */ 160 ASSERT3U(txq->et_sq_num_descs, ==, txq->et_cq_num_descs); 161 162 ret = ena_create_sq(ena, txq->et_sq_num_descs, 163 txq->et_sq_dma.edb_cookie->dmac_laddress, true, cq_hw_idx, 164 &sq_hw_idx, &sq_db_addr); 165 166 if (ret != 0) { 167 ena_err(ena, "failed to create Tx SQ %u: %d", txq->et_txqs_idx, 168 ret); 169 return (false); 170 } 171 172 txq->et_sq_hw_idx = sq_hw_idx; 173 txq->et_sq_db_addr = sq_db_addr; 174 /* The phase must always start on 1. */ 175 txq->et_sq_phase = 1; 176 txq->et_sq_avail_descs = txq->et_sq_num_descs; 177 txq->et_blocked = false; 178 txq->et_stall_watchdog = 0; 179 txq->et_state |= ENA_TXQ_STATE_SQ_CREATED; 180 181 return (true); 182 } 183 184 void 185 ena_cleanup_txq(ena_txq_t *txq, bool resetting) 186 { 187 int ret = 0; 188 ena_t *ena = txq->et_ena; 189 190 if ((txq->et_state & ENA_TXQ_STATE_SQ_CREATED) != 0) { 191 if (!resetting) { 192 ret = ena_destroy_sq(ena, txq->et_sq_hw_idx, true); 193 194 if (ret != 0) { 195 ena_err(ena, "failed to destroy Tx SQ %u: %d", 196 txq->et_txqs_idx, ret); 197 } 198 } 199 200 txq->et_sq_hw_idx = 0; 201 txq->et_sq_db_addr = NULL; 202 txq->et_sq_tail_idx = 0; 203 txq->et_sq_phase = 0; 204 txq->et_state &= ~ENA_TXQ_STATE_SQ_CREATED; 205 } 206 207 if ((txq->et_state & ENA_TXQ_STATE_CQ_CREATED) != 0) { 208 if (!resetting) { 209 ret = ena_destroy_cq(ena, txq->et_cq_hw_idx); 210 211 if (ret != 0) { 212 ena_err(ena, "failed to destroy Tx CQ %u: %d", 213 txq->et_txqs_idx, ret); 214 } 215 } 216 217 txq->et_cq_hw_idx = 0; 218 txq->et_cq_head_idx = 0; 219 txq->et_cq_phase = 0; 220 txq->et_cq_unmask_addr = NULL; 221 txq->et_cq_numa_addr = NULL; 222 txq->et_state &= ~ENA_TXQ_STATE_CQ_CREATED; 223 } 224 225 ena_free_tx_dma(txq); 226 VERIFY3S(txq->et_state, ==, ENA_TXQ_STATE_NONE); 227 } 228 229 void 230 ena_ring_tx_stop(mac_ring_driver_t rh) 231 { 232 ena_txq_t *txq = (ena_txq_t *)rh; 233 uint32_t intr_ctrl; 234 235 intr_ctrl = ena_hw_abs_read32(txq->et_ena, txq->et_cq_unmask_addr); 236 ENAHW_REG_INTR_UNMASK(intr_ctrl); 237 ena_hw_abs_write32(txq->et_ena, txq->et_cq_unmask_addr, intr_ctrl); 238 239 txq->et_state &= ~ENA_TXQ_STATE_RUNNING; 240 txq->et_state &= ~ENA_TXQ_STATE_READY; 241 } 242 243 int 244 ena_ring_tx_start(mac_ring_driver_t rh, uint64_t gen_num) 245 { 246 ena_txq_t *txq = (ena_txq_t *)rh; 247 ena_t *ena = txq->et_ena; 248 uint32_t intr_ctrl; 249 250 ena_dbg(ena, "ring_tx_start %p: state 0x%x", txq, txq->et_state); 251 252 mutex_enter(&txq->et_lock); 253 txq->et_m_gen_num = gen_num; 254 mutex_exit(&txq->et_lock); 255 256 txq->et_state |= ENA_TXQ_STATE_READY; 257 258 intr_ctrl = ena_hw_abs_read32(ena, txq->et_cq_unmask_addr); 259 ENAHW_REG_INTR_UNMASK(intr_ctrl); 260 ena_hw_abs_write32(ena, txq->et_cq_unmask_addr, intr_ctrl); 261 txq->et_state |= ENA_TXQ_STATE_RUNNING; 262 263 return (0); 264 } 265 266 static void 267 ena_tx_copy_fragment(ena_tx_control_block_t *tcb, const mblk_t *mp, 268 const size_t off, const size_t len) 269 { 270 const void *soff = mp->b_rptr + off; 271 void *doff = 272 (void *)(tcb->etcb_dma.edb_va + tcb->etcb_dma.edb_used_len); 273 274 VERIFY3U(len, >, 0); 275 VERIFY3P(soff, >=, mp->b_rptr); 276 VERIFY3P(soff, <=, mp->b_wptr); 277 VERIFY3U(len, <=, MBLKL(mp)); 278 VERIFY3U((uintptr_t)soff + len, <=, (uintptr_t)mp->b_wptr); 279 VERIFY3U(tcb->etcb_dma.edb_used_len + len, <, tcb->etcb_dma.edb_len); 280 281 bcopy(soff, doff, len); 282 tcb->etcb_type = ENA_TCB_COPY; 283 tcb->etcb_dma.edb_used_len += len; 284 } 285 286 ena_tx_control_block_t * 287 ena_pull_tcb(const ena_txq_t *txq, mblk_t *mp) 288 { 289 mblk_t *nmp = mp; 290 ena_t *ena = txq->et_ena; 291 ena_tx_control_block_t *tcb = NULL; 292 const uint16_t tail_mod = 293 txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1); 294 295 ASSERT(MUTEX_HELD(&txq->et_lock)); 296 VERIFY3U(msgsize(mp), <, ena->ena_tx_buf_sz); 297 298 while (nmp != NULL) { 299 const size_t nmp_len = MBLKL(nmp); 300 301 if (nmp_len == 0) { 302 nmp = nmp->b_cont; 303 continue; 304 } 305 306 /* For now TCB is bound to SQ desc. */ 307 if (tcb == NULL) { 308 tcb = &txq->et_tcbs[tail_mod]; 309 } 310 311 ena_tx_copy_fragment(tcb, nmp, 0, nmp_len); 312 nmp = nmp->b_cont; 313 } 314 315 ENA_DMA_SYNC(tcb->etcb_dma, DDI_DMA_SYNC_FORDEV); 316 VERIFY3P(nmp, ==, NULL); 317 VERIFY3P(tcb, !=, NULL); 318 return (tcb); 319 } 320 321 static void 322 ena_fill_tx_data_desc(ena_txq_t *txq, ena_tx_control_block_t *tcb, 323 uint16_t tail, uint8_t phase, enahw_tx_data_desc_t *desc, 324 mac_ether_offload_info_t *meo, size_t mlen) 325 { 326 VERIFY3U(mlen, <=, ENAHW_TX_DESC_LENGTH_MASK); 327 328 #ifdef DEBUG 329 /* 330 * If there is no header for the specific layer it will be set 331 * to zero, thus we elide the meoi_flags check here. 332 */ 333 size_t hdr_len = meo->meoi_l2hlen + meo->meoi_l3hlen + meo->meoi_l4hlen; 334 ASSERT3U(hdr_len, <=, txq->et_ena->ena_tx_max_hdr_len); 335 #endif 336 337 bzero(desc, sizeof (*desc)); 338 ENAHW_TX_DESC_FIRST_ON(desc); 339 ENAHW_TX_DESC_LENGTH(desc, mlen); 340 ENAHW_TX_DESC_REQID_HI(desc, tail); 341 ENAHW_TX_DESC_REQID_LO(desc, tail); 342 ENAHW_TX_DESC_PHASE(desc, phase); 343 ENAHW_TX_DESC_DF_ON(desc); 344 ENAHW_TX_DESC_LAST_ON(desc); 345 ENAHW_TX_DESC_COMP_REQ_ON(desc); 346 ENAHW_TX_DESC_META_DESC_OFF(desc); 347 ENAHW_TX_DESC_ADDR_LO(desc, tcb->etcb_dma.edb_cookie->dmac_laddress); 348 ENAHW_TX_DESC_ADDR_HI(desc, tcb->etcb_dma.edb_cookie->dmac_laddress); 349 /* 350 * NOTE: Please see the block comment above 351 * etd_buff_addr_hi_hdr_sz to see why this is set to 0. 352 */ 353 ENAHW_TX_DESC_HEADER_LENGTH(desc, 0); 354 ENAHW_TX_DESC_TSO_OFF(desc); 355 ENAHW_TX_DESC_L3_CSUM_OFF(desc); 356 ENAHW_TX_DESC_L4_CSUM_OFF(desc); 357 /* 358 * Enabling this bit tells the device NOT to calculate the 359 * pseudo header checksum. 360 */ 361 ENAHW_TX_DESC_L4_CSUM_PARTIAL_ON(desc); 362 } 363 364 static void 365 ena_submit_tx(ena_txq_t *txq, uint16_t desc_idx) 366 { 367 ena_hw_abs_write32(txq->et_ena, txq->et_sq_db_addr, desc_idx); 368 } 369 370 /* 371 * For now we do the simplest thing possible. All Tx uses bcopy to 372 * pre-allocated buffers, no checksum, no TSO, etc. 373 */ 374 mblk_t * 375 ena_ring_tx(void *arg, mblk_t *mp) 376 { 377 ena_txq_t *txq = arg; 378 ena_t *ena = txq->et_ena; 379 mac_ether_offload_info_t meo; 380 enahw_tx_data_desc_t *desc; 381 ena_tx_control_block_t *tcb; 382 const uint16_t modulo_mask = txq->et_sq_num_descs - 1; 383 uint16_t tail_mod; 384 385 VERIFY3P(mp->b_next, ==, NULL); 386 387 /* 388 * The ena_state value is written by atomic operations. The 389 * et_state value is currently Write Once, but if that changes 390 * it should also be written with atomics. 391 */ 392 if (!(ena->ena_state & ENA_STATE_STARTED) || 393 !(txq->et_state & ENA_TXQ_STATE_RUNNING)) { 394 freemsg(mp); 395 return (NULL); 396 } 397 398 if (mac_ether_offload_info(mp, &meo) != 0) { 399 freemsg(mp); 400 mutex_enter(&txq->et_stat_lock); 401 txq->et_stat.ets_hck_meoifail.value.ui64++; 402 mutex_exit(&txq->et_stat_lock); 403 return (NULL); 404 } 405 406 mutex_enter(&txq->et_lock); 407 408 /* 409 * For the moment there is a 1:1 mapping between Tx descs and 410 * Tx contexts. Currently Tx is copy only, and each context 411 * buffer is guaranteed to be as large as MTU + frame header, 412 * see ena_update_buf_sizes(). 413 */ 414 if (txq->et_blocked || txq->et_sq_avail_descs == 0) { 415 txq->et_blocked = true; 416 mutex_enter(&txq->et_stat_lock); 417 txq->et_stat.ets_blocked.value.ui64++; 418 mutex_exit(&txq->et_stat_lock); 419 mutex_exit(&txq->et_lock); 420 return (mp); 421 } 422 423 ASSERT3U(meo.meoi_len, <=, ena->ena_max_frame_total); 424 tcb = ena_pull_tcb(txq, mp); 425 ASSERT3P(tcb, !=, NULL); 426 tcb->etcb_mp = mp; 427 txq->et_sq_avail_descs--; 428 429 /* Fill in the Tx descriptor. */ 430 tail_mod = txq->et_sq_tail_idx & modulo_mask; 431 desc = &txq->et_sq_descs[tail_mod].etd_data; 432 ena_fill_tx_data_desc(txq, tcb, tail_mod, txq->et_sq_phase, desc, &meo, 433 meo.meoi_len); 434 DTRACE_PROBE3(tx__submit, ena_tx_control_block_t *, tcb, uint16_t, 435 tail_mod, enahw_tx_data_desc_t *, desc); 436 437 /* 438 * Remember, we submit the raw tail value to the device, the 439 * hardware performs its own modulo (like we did to get 440 * tail_mod). 441 */ 442 txq->et_sq_tail_idx++; 443 ena_submit_tx(txq, txq->et_sq_tail_idx); 444 445 mutex_enter(&txq->et_stat_lock); 446 txq->et_stat.ets_packets.value.ui64++; 447 txq->et_stat.ets_bytes.value.ui64 += meo.meoi_len; 448 mutex_exit(&txq->et_stat_lock); 449 450 if ((txq->et_sq_tail_idx & modulo_mask) == 0) 451 txq->et_sq_phase ^= 1; 452 453 mutex_exit(&txq->et_lock); 454 455 return (NULL); 456 } 457 458 void 459 ena_tx_intr_work(ena_txq_t *txq) 460 { 461 uint16_t head_mod; 462 enahw_tx_cdesc_t *cdesc; 463 ena_tx_control_block_t *tcb; 464 uint16_t req_id; 465 uint64_t recycled = 0; 466 bool unblocked = false; 467 const uint16_t modulo_mask = txq->et_cq_num_descs - 1; 468 ena_t *ena = txq->et_ena; 469 470 mutex_enter(&txq->et_lock); 471 head_mod = txq->et_cq_head_idx & modulo_mask; 472 ENA_DMA_SYNC(txq->et_cq_dma, DDI_DMA_SYNC_FORKERNEL); 473 cdesc = &txq->et_cq_descs[head_mod]; 474 475 /* Recycle any completed descriptors. */ 476 while (ENAHW_TX_CDESC_GET_PHASE(cdesc) == txq->et_cq_phase) { 477 mblk_t *mp; 478 479 /* Get the corresponding TCB. */ 480 req_id = cdesc->etc_req_id; 481 if (req_id > txq->et_sq_num_descs) { 482 ena_err(ena, "invalid Tx request ID: 0x%x", req_id); 483 ena_trigger_reset(ena, ENAHW_RESET_INV_TX_REQ_ID); 484 break; 485 } 486 tcb = &txq->et_tcbs[req_id]; 487 DTRACE_PROBE2(tx__complete, uint16_t, req_id, 488 ena_tx_control_block_t *, tcb); 489 490 /* Free the associated mblk. */ 491 tcb->etcb_dma.edb_used_len = 0; 492 mp = tcb->etcb_mp; 493 VERIFY3P(mp, !=, NULL); 494 freemsg(mp); 495 tcb->etcb_mp = NULL; 496 497 /* Add this descriptor back to the free list. */ 498 txq->et_sq_avail_descs++; 499 txq->et_cq_head_idx++; 500 501 /* Check for phase rollover. */ 502 head_mod = txq->et_cq_head_idx & modulo_mask; 503 if (head_mod == 0) 504 txq->et_cq_phase ^= 1; 505 506 if (txq->et_blocked) { 507 txq->et_blocked = false; 508 txq->et_stall_watchdog = 0; 509 unblocked = true; 510 mac_tx_ring_update(ena->ena_mh, txq->et_mrh); 511 } 512 513 recycled++; 514 cdesc = &txq->et_cq_descs[head_mod]; 515 } 516 517 mutex_exit(&txq->et_lock); 518 519 if (recycled == 0) 520 return; 521 522 /* Update stats. */ 523 mutex_enter(&txq->et_stat_lock); 524 txq->et_stat.ets_recycled.value.ui64 += recycled; 525 if (unblocked) { 526 txq->et_stat.ets_unblocked.value.ui64++; 527 } 528 mutex_exit(&txq->et_stat_lock); 529 } 530