1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2021 Oxide Computer Company 14 */ 15 #include "ena.h" 16 17 void 18 ena_free_tx_dma(ena_txq_t *txq) 19 { 20 if (txq->et_tcbs != NULL) { 21 for (uint_t i = 0; i < txq->et_sq_num_descs; i++) { 22 ena_tx_control_block_t *tcb = &txq->et_tcbs[i]; 23 ena_dma_free(&tcb->etcb_dma); 24 } 25 26 kmem_free(txq->et_tcbs, 27 sizeof (*txq->et_tcbs) * txq->et_sq_num_descs); 28 29 txq->et_tcbs = NULL; 30 31 } 32 33 ena_dma_free(&txq->et_cq_dma); 34 txq->et_cq_descs = NULL; 35 36 ena_dma_free(&txq->et_sq_dma); 37 txq->et_sq_descs = NULL; 38 39 txq->et_state &= ~ENA_TXQ_STATE_HOST_ALLOC; 40 } 41 42 static int 43 ena_alloc_tx_dma(ena_txq_t *txq) 44 { 45 ena_t *ena = txq->et_ena; 46 size_t cq_descs_sz; 47 size_t sq_descs_sz; 48 int err = 0; 49 ena_dma_conf_t conf; 50 51 ASSERT0(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC); 52 ASSERT3P(ena, !=, NULL); 53 54 cq_descs_sz = txq->et_cq_num_descs * sizeof (*txq->et_cq_descs); 55 sq_descs_sz = txq->et_sq_num_descs * sizeof (*txq->et_sq_descs); 56 57 conf = (ena_dma_conf_t) { 58 .edc_size = sq_descs_sz, 59 .edc_align = ENAHW_IO_SQ_DESC_BUF_ALIGNMENT, 60 .edc_sgl = 1, 61 .edc_endian = DDI_NEVERSWAP_ACC, 62 .edc_stream = B_FALSE, 63 }; 64 65 if (!ena_dma_alloc(ena, &txq->et_sq_dma, &conf, sq_descs_sz)) { 66 return (ENOMEM); 67 } 68 69 bzero(txq->et_sq_dma.edb_va, sq_descs_sz); 70 txq->et_sq_descs = (void *)txq->et_sq_dma.edb_va; 71 txq->et_tcbs = kmem_zalloc(sizeof (*txq->et_tcbs) * 72 txq->et_sq_num_descs, KM_SLEEP); 73 74 for (uint_t i = 0; i < txq->et_sq_num_descs; i++) { 75 ena_tx_control_block_t *tcb = &txq->et_tcbs[i]; 76 ena_dma_conf_t buf_conf = { 77 .edc_size = ena->ena_tx_buf_sz, 78 .edc_align = 1, 79 .edc_sgl = ena->ena_tx_sgl_max_sz, 80 .edc_endian = DDI_NEVERSWAP_ACC, 81 .edc_stream = B_TRUE, 82 }; 83 84 if (!ena_dma_alloc(ena, &tcb->etcb_dma, &buf_conf, 85 ena->ena_tx_buf_sz)) { 86 err = ENOMEM; 87 goto error; 88 } 89 } 90 91 conf = (ena_dma_conf_t) { 92 .edc_size = cq_descs_sz, 93 .edc_align = ENAHW_IO_CQ_DESC_BUF_ALIGNMENT, 94 .edc_sgl = 1, 95 .edc_endian = DDI_NEVERSWAP_ACC, 96 .edc_stream = B_FALSE, 97 }; 98 99 if (!ena_dma_alloc(ena, &txq->et_cq_dma, &conf, cq_descs_sz)) { 100 err = ENOMEM; 101 goto error; 102 } 103 104 bzero(txq->et_cq_dma.edb_va, cq_descs_sz); 105 txq->et_cq_descs = (void *)txq->et_cq_dma.edb_va; 106 txq->et_state |= ENA_TXQ_STATE_HOST_ALLOC; 107 return (0); 108 109 error: 110 ena_free_tx_dma(txq); 111 return (err); 112 } 113 114 boolean_t 115 ena_alloc_txq(ena_txq_t *txq) 116 { 117 int ret = 0; 118 ena_t *ena = txq->et_ena; 119 uint16_t cq_hw_idx, sq_hw_idx; 120 uint32_t *cq_unmask_addr, *cq_headdb, *cq_numanode; 121 uint32_t *sq_db_addr; 122 123 ASSERT3U(txq->et_cq_num_descs, >, 0); 124 125 /* 126 * First, allocate the Tx data buffers. 127 */ 128 if ((ret = ena_alloc_tx_dma(txq)) != 0) { 129 ena_err(ena, "failed to allocate Tx queue %u data buffers: %d", 130 txq->et_txqs_idx, ret); 131 return (B_FALSE); 132 } 133 134 ASSERT(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC); 135 136 /* 137 * Second, create the Completion Queue. 138 */ 139 ret = ena_create_cq(ena, txq->et_cq_num_descs, 140 txq->et_cq_dma.edb_cookie->dmac_laddress, B_TRUE, 141 txq->et_intr_vector, &cq_hw_idx, &cq_unmask_addr, &cq_headdb, 142 &cq_numanode); 143 144 if (ret != 0) { 145 ena_err(ena, "failed to create Tx CQ %u: %d", txq->et_txqs_idx, 146 ret); 147 return (B_FALSE); 148 } 149 150 txq->et_cq_hw_idx = cq_hw_idx; 151 txq->et_cq_phase = 1; 152 txq->et_cq_unmask_addr = cq_unmask_addr; 153 txq->et_cq_head_db_addr = cq_headdb; 154 txq->et_cq_numa_addr = cq_numanode; 155 txq->et_state |= ENA_TXQ_STATE_CQ_CREATED; 156 157 /* 158 * Third, create the Submission Queue to match with the above 159 * CQ. At this time we force the SQ and CQ to have the same 160 * number of descriptors as we only use a 1:1 completion 161 * policy. However, in the future, we could loosen this and 162 * use an on-demand completion policy and the two could have a 163 * different number of descriptors. 164 */ 165 ASSERT3U(txq->et_sq_num_descs, ==, txq->et_cq_num_descs); 166 167 ret = ena_create_sq(ena, txq->et_sq_num_descs, 168 txq->et_sq_dma.edb_cookie->dmac_laddress, B_TRUE, cq_hw_idx, 169 &sq_hw_idx, &sq_db_addr); 170 171 if (ret != 0) { 172 ena_err(ena, "failed to create Tx SQ %u: %d", txq->et_txqs_idx, 173 ret); 174 return (B_FALSE); 175 } 176 177 txq->et_sq_hw_idx = sq_hw_idx; 178 txq->et_sq_db_addr = sq_db_addr; 179 /* The phase must always start on 1. */ 180 txq->et_sq_phase = 1; 181 txq->et_sq_avail_descs = txq->et_sq_num_descs; 182 txq->et_blocked = B_FALSE; 183 txq->et_state |= ENA_TXQ_STATE_SQ_CREATED; 184 185 return (B_TRUE); 186 } 187 188 void 189 ena_cleanup_txq(ena_txq_t *txq) 190 { 191 int ret = 0; 192 ena_t *ena = txq->et_ena; 193 194 if ((txq->et_state & ENA_TXQ_STATE_SQ_CREATED) != 0) { 195 ret = ena_destroy_sq(ena, txq->et_sq_hw_idx, B_TRUE); 196 197 if (ret != 0) { 198 ena_err(ena, "failed to destroy Tx SQ %u: %d", 199 txq->et_txqs_idx, ret); 200 } 201 202 txq->et_sq_hw_idx = 0; 203 txq->et_sq_db_addr = NULL; 204 txq->et_sq_tail_idx = 0; 205 txq->et_sq_phase = 0; 206 txq->et_state &= ~ENA_TXQ_STATE_SQ_CREATED; 207 } 208 209 if ((txq->et_state & ENA_TXQ_STATE_CQ_CREATED) != 0) { 210 ret = ena_destroy_cq(ena, txq->et_cq_hw_idx); 211 212 if (ret != 0) { 213 ena_err(ena, "failed to destroy Tx CQ %u: %d", 214 txq->et_txqs_idx, ret); 215 } 216 217 txq->et_cq_hw_idx = 0; 218 txq->et_cq_head_idx = 0; 219 txq->et_cq_phase = 0; 220 txq->et_cq_head_db_addr = NULL; 221 txq->et_cq_unmask_addr = NULL; 222 txq->et_cq_numa_addr = NULL; 223 txq->et_state &= ~ENA_TXQ_STATE_CQ_CREATED; 224 } 225 226 ena_free_tx_dma(txq); 227 VERIFY3S(txq->et_state, ==, ENA_TXQ_STATE_NONE); 228 } 229 230 void 231 ena_ring_tx_stop(mac_ring_driver_t rh) 232 { 233 ena_txq_t *txq = (ena_txq_t *)rh; 234 uint32_t intr_ctrl; 235 236 intr_ctrl = ena_hw_abs_read32(txq->et_ena, txq->et_cq_unmask_addr); 237 ENAHW_REG_INTR_UNMASK(intr_ctrl); 238 ena_hw_abs_write32(txq->et_ena, txq->et_cq_unmask_addr, intr_ctrl); 239 240 txq->et_state &= ~ENA_TXQ_STATE_RUNNING; 241 txq->et_state &= ~ENA_TXQ_STATE_READY; 242 } 243 244 int 245 ena_ring_tx_start(mac_ring_driver_t rh, uint64_t gen_num) 246 { 247 ena_txq_t *txq = (ena_txq_t *)rh; 248 ena_t *ena = txq->et_ena; 249 uint32_t intr_ctrl; 250 251 mutex_enter(&txq->et_lock); 252 txq->et_m_gen_num = gen_num; 253 mutex_exit(&txq->et_lock); 254 255 txq->et_state |= ENA_TXQ_STATE_READY; 256 257 intr_ctrl = ena_hw_abs_read32(ena, txq->et_cq_unmask_addr); 258 ENAHW_REG_INTR_UNMASK(intr_ctrl); 259 ena_hw_abs_write32(ena, txq->et_cq_unmask_addr, intr_ctrl); 260 txq->et_state |= ENA_TXQ_STATE_RUNNING; 261 return (0); 262 } 263 264 static void 265 ena_tx_copy_fragment(ena_tx_control_block_t *tcb, const mblk_t *mp, 266 const size_t off, const size_t len) 267 { 268 const void *soff = mp->b_rptr + off; 269 void *doff = 270 (void *)(tcb->etcb_dma.edb_va + tcb->etcb_dma.edb_used_len); 271 272 VERIFY3U(len, >, 0); 273 VERIFY3P(soff, >=, mp->b_rptr); 274 VERIFY3P(soff, <=, mp->b_wptr); 275 VERIFY3U(len, <=, MBLKL(mp)); 276 VERIFY3U((uintptr_t)soff + len, <=, (uintptr_t)mp->b_wptr); 277 VERIFY3U(tcb->etcb_dma.edb_used_len + len, <, tcb->etcb_dma.edb_len); 278 279 bcopy(soff, doff, len); 280 tcb->etcb_type = ENA_TCB_COPY; 281 tcb->etcb_dma.edb_used_len += len; 282 } 283 284 ena_tx_control_block_t * 285 ena_pull_tcb(const ena_txq_t *txq, mblk_t *mp) 286 { 287 mblk_t *nmp = mp; 288 ena_t *ena = txq->et_ena; 289 ena_tx_control_block_t *tcb = NULL; 290 const uint16_t tail_mod = 291 txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1); 292 293 ASSERT(MUTEX_HELD(&txq->et_lock)); 294 VERIFY3U(msgsize(mp), <, ena->ena_tx_buf_sz); 295 296 while (nmp != NULL) { 297 const size_t nmp_len = MBLKL(nmp); 298 299 if (nmp_len == 0) { 300 nmp = nmp->b_cont; 301 continue; 302 } 303 304 /* For now TCB is bound to SQ desc. */ 305 if (tcb == NULL) { 306 tcb = &txq->et_tcbs[tail_mod]; 307 } 308 309 ena_tx_copy_fragment(tcb, nmp, 0, nmp_len); 310 nmp = nmp->b_cont; 311 } 312 313 ENA_DMA_SYNC(tcb->etcb_dma, DDI_DMA_SYNC_FORDEV); 314 VERIFY3P(nmp, ==, NULL); 315 VERIFY3P(tcb, !=, NULL); 316 return (tcb); 317 } 318 319 static void 320 ena_fill_tx_data_desc(ena_txq_t *txq, ena_tx_control_block_t *tcb, 321 uint16_t tail, uint8_t phase, enahw_tx_data_desc_t *desc, 322 mac_ether_offload_info_t *meo, size_t mlen) 323 { 324 VERIFY3U(mlen, <=, ENAHW_TX_DESC_LENGTH_MASK); 325 326 #ifdef DEBUG 327 /* 328 * If there is no header for the specific layer it will be set 329 * to zero, thus we elide the meoi_flags check here. 330 */ 331 size_t hdr_len = meo->meoi_l2hlen + meo->meoi_l3hlen + meo->meoi_l4hlen; 332 ASSERT3U(hdr_len, <=, txq->et_ena->ena_tx_max_hdr_len); 333 #endif 334 335 bzero(desc, sizeof (*desc)); 336 ENAHW_TX_DESC_FIRST_ON(desc); 337 ENAHW_TX_DESC_LENGTH(desc, mlen); 338 ENAHW_TX_DESC_REQID_HI(desc, tail); 339 ENAHW_TX_DESC_REQID_LO(desc, tail); 340 ENAHW_TX_DESC_PHASE(desc, phase); 341 ENAHW_TX_DESC_DF_ON(desc); 342 ENAHW_TX_DESC_LAST_ON(desc); 343 ENAHW_TX_DESC_COMP_REQ_ON(desc); 344 ENAHW_TX_DESC_META_DESC_OFF(desc); 345 ENAHW_TX_DESC_ADDR_LO(desc, tcb->etcb_dma.edb_cookie->dmac_laddress); 346 ENAHW_TX_DESC_ADDR_HI(desc, tcb->etcb_dma.edb_cookie->dmac_laddress); 347 /* 348 * NOTE: Please see the block comment above 349 * etd_buff_addr_hi_hdr_sz to see why this is set to 0. 350 */ 351 ENAHW_TX_DESC_HEADER_LENGTH(desc, 0); 352 ENAHW_TX_DESC_TSO_OFF(desc); 353 ENAHW_TX_DESC_L3_CSUM_OFF(desc); 354 ENAHW_TX_DESC_L4_CSUM_OFF(desc); 355 /* 356 * Enabling this bit tells the device NOT to calculate the 357 * pseudo header checksum. 358 */ 359 ENAHW_TX_DESC_L4_CSUM_PARTIAL_ON(desc); 360 } 361 362 static void 363 ena_submit_tx(ena_txq_t *txq, uint16_t desc_idx) 364 { 365 ena_hw_abs_write32(txq->et_ena, txq->et_sq_db_addr, desc_idx); 366 } 367 368 /* 369 * For now we do the simplest thing possible. All Tx uses bcopy to 370 * pre-allocated buffers, no checksum, no TSO, etc. 371 */ 372 mblk_t * 373 ena_ring_tx(void *arg, mblk_t *mp) 374 { 375 ena_txq_t *txq = arg; 376 ena_t *ena = txq->et_ena; 377 mac_ether_offload_info_t meo; 378 enahw_tx_data_desc_t *desc; 379 ena_tx_control_block_t *tcb; 380 const uint16_t tail_mod = 381 txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1); 382 383 VERIFY3P(mp->b_next, ==, NULL); 384 VERIFY(txq->et_blocked == B_FALSE); 385 386 /* 387 * The ena_state value is written by atomic operations. The 388 * et_state value is currently Write Once, but if that changes 389 * it should also be written with atomics. 390 */ 391 if (!(ena->ena_state & ENA_STATE_RUNNING) || 392 !(txq->et_state & ENA_TXQ_STATE_RUNNING)) { 393 freemsg(mp); 394 return (NULL); 395 } 396 397 if (mac_ether_offload_info(mp, &meo) != 0) { 398 freemsg(mp); 399 mutex_enter(&txq->et_stat_lock); 400 txq->et_stat.ets_hck_meoifail.value.ui64++; 401 mutex_exit(&txq->et_stat_lock); 402 return (NULL); 403 } 404 405 mutex_enter(&txq->et_lock); 406 407 /* 408 * For the moment there is a 1:1 mapping between Tx descs and 409 * Tx contexts. Currently Tx is copy only, and each context 410 * buffer is guaranteed to be as large as MTU + frame header, 411 * see ena_update_buf_sizes(). 412 */ 413 if (txq->et_sq_avail_descs == 0) { 414 txq->et_blocked = B_TRUE; 415 mutex_enter(&txq->et_stat_lock); 416 txq->et_stat.ets_blocked.value.ui64++; 417 mutex_exit(&txq->et_stat_lock); 418 mutex_exit(&txq->et_lock); 419 return (mp); 420 } 421 422 ASSERT3U(meo.meoi_len, <=, ena->ena_max_frame_total); 423 tcb = ena_pull_tcb(txq, mp); 424 ASSERT3P(tcb, !=, NULL); 425 tcb->etcb_mp = mp; 426 txq->et_sq_avail_descs--; 427 428 /* Fill in the Tx descriptor. */ 429 desc = &(txq->et_sq_descs[tail_mod].etd_data); 430 ena_fill_tx_data_desc(txq, tcb, tail_mod, txq->et_sq_phase, desc, &meo, 431 meo.meoi_len); 432 DTRACE_PROBE3(tx__submit, ena_tx_control_block_t *, tcb, uint16_t, 433 tail_mod, enahw_tx_data_desc_t *, desc); 434 435 /* 436 * Remember, we submit the raw tail value to the device, the 437 * hardware performs its own modulo (like we did to get 438 * tail_mod). 439 */ 440 txq->et_sq_tail_idx++; 441 ena_submit_tx(txq, txq->et_sq_tail_idx); 442 443 mutex_enter(&txq->et_stat_lock); 444 txq->et_stat.ets_packets.value.ui64++; 445 txq->et_stat.ets_bytes.value.ui64 += meo.meoi_len; 446 mutex_exit(&txq->et_stat_lock); 447 448 if ((txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1)) == 0) { 449 txq->et_sq_phase = !txq->et_sq_phase; 450 } 451 452 mutex_exit(&txq->et_lock); 453 return (NULL); 454 } 455 456 void 457 ena_tx_intr_work(ena_txq_t *txq) 458 { 459 uint16_t head_mod; 460 enahw_tx_cdesc_t *cdesc; 461 ena_tx_control_block_t *tcb; 462 uint16_t req_id; 463 uint64_t recycled = 0; 464 boolean_t unblocked = B_FALSE; 465 466 mutex_enter(&txq->et_lock); 467 head_mod = txq->et_cq_head_idx & (txq->et_cq_num_descs - 1); 468 ENA_DMA_SYNC(txq->et_cq_dma, DDI_DMA_SYNC_FORKERNEL); 469 cdesc = &txq->et_cq_descs[head_mod]; 470 471 /* Recycle any completed descriptors. */ 472 while (ENAHW_TX_CDESC_GET_PHASE(cdesc) == txq->et_cq_phase) { 473 mblk_t *mp; 474 475 /* Get the corresponding TCB. */ 476 req_id = cdesc->etc_req_id; 477 /* 478 * It would be nice to make this a device reset 479 * instead. 480 */ 481 VERIFY3U(req_id, <=, txq->et_sq_num_descs); 482 tcb = &txq->et_tcbs[req_id]; 483 DTRACE_PROBE2(tx__complete, uint16_t, req_id, 484 ena_tx_control_block_t *, tcb); 485 486 /* Free the associated mblk. */ 487 tcb->etcb_dma.edb_used_len = 0; 488 mp = tcb->etcb_mp; 489 /* Make this a device reset instead. */ 490 VERIFY3P(mp, !=, NULL); 491 freemsg(mp); 492 tcb->etcb_mp = NULL; 493 494 /* Add this descriptor back to the free list. */ 495 txq->et_sq_avail_descs++; 496 txq->et_cq_head_idx++; 497 498 /* Check for phase rollover. */ 499 head_mod = txq->et_cq_head_idx & (txq->et_cq_num_descs - 1); 500 501 if (head_mod == 0) { 502 txq->et_cq_phase = !txq->et_cq_phase; 503 } 504 505 if (txq->et_blocked) { 506 txq->et_blocked = B_FALSE; 507 unblocked = B_TRUE; 508 mac_tx_ring_update(txq->et_ena->ena_mh, txq->et_mrh); 509 } 510 511 recycled++; 512 cdesc = &txq->et_cq_descs[head_mod]; 513 } 514 515 /* 516 * If the device provided a head doorbell register, then we 517 * need to update it to let the device know we are done 518 * reading these CQ entries. 519 */ 520 if (txq->et_cq_head_db_addr != NULL) { 521 ena_hw_abs_write32(txq->et_ena, txq->et_cq_head_db_addr, 522 head_mod); 523 } 524 525 mutex_exit(&txq->et_lock); 526 527 /* Update stats. */ 528 mutex_enter(&txq->et_stat_lock); 529 txq->et_stat.ets_recycled.value.ui64 += recycled; 530 if (unblocked) { 531 txq->et_stat.ets_unblocked.value.ui64++; 532 } 533 mutex_exit(&txq->et_stat_lock); 534 } 535