1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_rss.h" 34 #include "ena.h" 35 #include "ena_datapath.h" 36 #ifdef DEV_NETMAP 37 #include "ena_netmap.h" 38 #endif /* DEV_NETMAP */ 39 40 /********************************************************************* 41 * Static functions prototypes 42 *********************************************************************/ 43 44 static int ena_tx_cleanup(struct ena_ring *); 45 static int ena_rx_cleanup(struct ena_ring *); 46 static inline int validate_tx_req_id(struct ena_ring *, uint16_t); 47 static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *, 48 struct mbuf *); 49 static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *, 50 struct ena_com_rx_ctx *, uint16_t *); 51 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *, 52 struct mbuf *); 53 static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool); 54 static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, 55 struct mbuf **mbuf); 56 static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **); 57 static void ena_start_xmit(struct ena_ring *); 58 59 /********************************************************************* 60 * Global functions 61 *********************************************************************/ 62 63 void 64 ena_cleanup(void *arg, int pending) 65 { 66 struct ena_que *que = arg; 67 struct ena_adapter *adapter = que->adapter; 68 if_t ifp = adapter->ifp; 69 struct ena_ring *tx_ring; 70 struct ena_ring *rx_ring; 71 struct ena_com_io_cq* io_cq; 72 struct ena_eth_io_intr_reg intr_reg; 73 int qid, ena_qid; 74 int txc, rxc, i; 75 76 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 77 return; 78 79 ena_trace(NULL, ENA_DBG, "MSI-X TX/RX routine\n"); 80 81 tx_ring = que->tx_ring; 82 rx_ring = que->rx_ring; 83 qid = que->id; 84 ena_qid = ENA_IO_TXQ_IDX(qid); 85 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 86 87 tx_ring->first_interrupt = true; 88 rx_ring->first_interrupt = true; 89 90 for (i = 0; i < CLEAN_BUDGET; ++i) { 91 rxc = ena_rx_cleanup(rx_ring); 92 txc = ena_tx_cleanup(tx_ring); 93 94 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 95 return; 96 97 if ((txc != TX_BUDGET) && (rxc != RX_BUDGET)) 98 break; 99 } 100 101 /* Signal that work is done and unmask interrupt */ 102 ena_com_update_intr_reg(&intr_reg, 103 RX_IRQ_INTERVAL, 104 TX_IRQ_INTERVAL, 105 true); 106 ena_com_unmask_intr(io_cq, &intr_reg); 107 } 108 109 void 110 ena_deferred_mq_start(void *arg, int pending) 111 { 112 struct ena_ring *tx_ring = (struct ena_ring *)arg; 113 struct ifnet *ifp = tx_ring->adapter->ifp; 114 115 while (!drbr_empty(ifp, tx_ring->br) && 116 tx_ring->running && 117 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { 118 ENA_RING_MTX_LOCK(tx_ring); 119 ena_start_xmit(tx_ring); 120 ENA_RING_MTX_UNLOCK(tx_ring); 121 } 122 } 123 124 int 125 ena_mq_start(if_t ifp, struct mbuf *m) 126 { 127 struct ena_adapter *adapter = ifp->if_softc; 128 struct ena_ring *tx_ring; 129 int ret, is_drbr_empty; 130 uint32_t i; 131 132 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) 133 return (ENODEV); 134 135 /* Which queue to use */ 136 /* 137 * If everything is setup correctly, it should be the 138 * same bucket that the current CPU we're on is. 139 * It should improve performance. 140 */ 141 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 142 i = m->m_pkthdr.flowid % adapter->num_io_queues; 143 } else { 144 i = curcpu % adapter->num_io_queues; 145 } 146 tx_ring = &adapter->tx_ring[i]; 147 148 /* Check if drbr is empty before putting packet */ 149 is_drbr_empty = drbr_empty(ifp, tx_ring->br); 150 ret = drbr_enqueue(ifp, tx_ring->br, m); 151 if (unlikely(ret != 0)) { 152 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); 153 return (ret); 154 } 155 156 if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) { 157 ena_start_xmit(tx_ring); 158 ENA_RING_MTX_UNLOCK(tx_ring); 159 } else { 160 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); 161 } 162 163 return (0); 164 } 165 166 void 167 ena_qflush(if_t ifp) 168 { 169 struct ena_adapter *adapter = ifp->if_softc; 170 struct ena_ring *tx_ring = adapter->tx_ring; 171 int i; 172 173 for(i = 0; i < adapter->num_io_queues; ++i, ++tx_ring) 174 if (!drbr_empty(ifp, tx_ring->br)) { 175 ENA_RING_MTX_LOCK(tx_ring); 176 drbr_flush(ifp, tx_ring->br); 177 ENA_RING_MTX_UNLOCK(tx_ring); 178 } 179 180 if_qflush(ifp); 181 } 182 183 /********************************************************************* 184 * Static functions 185 *********************************************************************/ 186 187 static inline int 188 validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id) 189 { 190 struct ena_adapter *adapter = tx_ring->adapter; 191 struct ena_tx_buffer *tx_info = NULL; 192 193 if (likely(req_id < tx_ring->ring_size)) { 194 tx_info = &tx_ring->tx_buffer_info[req_id]; 195 if (tx_info->mbuf != NULL) 196 return (0); 197 device_printf(adapter->pdev, 198 "tx_info doesn't have valid mbuf\n"); 199 } 200 201 device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id); 202 counter_u64_add(tx_ring->tx_stats.bad_req_id, 1); 203 204 /* Trigger device reset */ 205 ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 206 207 return (EFAULT); 208 } 209 210 /** 211 * ena_tx_cleanup - clear sent packets and corresponding descriptors 212 * @tx_ring: ring for which we want to clean packets 213 * 214 * Once packets are sent, we ask the device in a loop for no longer used 215 * descriptors. We find the related mbuf chain in a map (index in an array) 216 * and free it, then update ring state. 217 * This is performed in "endless" loop, updating ring pointers every 218 * TX_COMMIT. The first check of free descriptor is performed before the actual 219 * loop, then repeated at the loop end. 220 **/ 221 static int 222 ena_tx_cleanup(struct ena_ring *tx_ring) 223 { 224 struct ena_adapter *adapter; 225 struct ena_com_io_cq* io_cq; 226 uint16_t next_to_clean; 227 uint16_t req_id; 228 uint16_t ena_qid; 229 unsigned int total_done = 0; 230 int rc; 231 int commit = TX_COMMIT; 232 int budget = TX_BUDGET; 233 int work_done; 234 bool above_thresh; 235 236 adapter = tx_ring->que->adapter; 237 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); 238 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 239 next_to_clean = tx_ring->next_to_clean; 240 241 #ifdef DEV_NETMAP 242 if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS) 243 return (0); 244 #endif /* DEV_NETMAP */ 245 246 do { 247 struct ena_tx_buffer *tx_info; 248 struct mbuf *mbuf; 249 250 rc = ena_com_tx_comp_req_id_get(io_cq, &req_id); 251 if (unlikely(rc != 0)) 252 break; 253 254 rc = validate_tx_req_id(tx_ring, req_id); 255 if (unlikely(rc != 0)) 256 break; 257 258 tx_info = &tx_ring->tx_buffer_info[req_id]; 259 260 mbuf = tx_info->mbuf; 261 262 tx_info->mbuf = NULL; 263 bintime_clear(&tx_info->timestamp); 264 265 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, 266 BUS_DMASYNC_POSTWRITE); 267 bus_dmamap_unload(adapter->tx_buf_tag, 268 tx_info->dmamap); 269 270 ena_trace(NULL, ENA_DBG | ENA_TXPTH, "tx: q %d mbuf %p completed\n", 271 tx_ring->qid, mbuf); 272 273 m_freem(mbuf); 274 275 total_done += tx_info->tx_descs; 276 277 tx_ring->free_tx_ids[next_to_clean] = req_id; 278 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, 279 tx_ring->ring_size); 280 281 if (unlikely(--commit == 0)) { 282 commit = TX_COMMIT; 283 /* update ring state every TX_COMMIT descriptor */ 284 tx_ring->next_to_clean = next_to_clean; 285 ena_com_comp_ack( 286 &adapter->ena_dev->io_sq_queues[ena_qid], 287 total_done); 288 ena_com_update_dev_comp_head(io_cq); 289 total_done = 0; 290 } 291 } while (likely(--budget)); 292 293 work_done = TX_BUDGET - budget; 294 295 ena_trace(NULL, ENA_DBG | ENA_TXPTH, "tx: q %d done. total pkts: %d\n", 296 tx_ring->qid, work_done); 297 298 /* If there is still something to commit update ring state */ 299 if (likely(commit != TX_COMMIT)) { 300 tx_ring->next_to_clean = next_to_clean; 301 ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid], 302 total_done); 303 ena_com_update_dev_comp_head(io_cq); 304 } 305 306 /* 307 * Need to make the rings circular update visible to 308 * ena_xmit_mbuf() before checking for tx_ring->running. 309 */ 310 mb(); 311 312 above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 313 ENA_TX_RESUME_THRESH); 314 if (unlikely(!tx_ring->running && above_thresh)) { 315 ENA_RING_MTX_LOCK(tx_ring); 316 above_thresh = 317 ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 318 ENA_TX_RESUME_THRESH); 319 if (!tx_ring->running && above_thresh) { 320 tx_ring->running = true; 321 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1); 322 taskqueue_enqueue(tx_ring->enqueue_tq, 323 &tx_ring->enqueue_task); 324 } 325 ENA_RING_MTX_UNLOCK(tx_ring); 326 } 327 328 return (work_done); 329 } 330 331 static void 332 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, 333 struct mbuf *mbuf) 334 { 335 struct ena_adapter *adapter = rx_ring->adapter; 336 337 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 338 mbuf->m_pkthdr.flowid = ena_rx_ctx->hash; 339 340 #ifdef RSS 341 /* 342 * Hardware and software RSS are in agreement only when both are 343 * configured to Toeplitz algorithm. This driver configures 344 * that algorithm only when software RSS is enabled and uses it. 345 */ 346 if (adapter->ena_dev->rss.hash_func != ENA_ADMIN_TOEPLITZ && 347 ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN) { 348 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 349 return; 350 } 351 #endif 352 353 if (ena_rx_ctx->frag && 354 (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) { 355 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 356 return; 357 } 358 359 switch (ena_rx_ctx->l3_proto) { 360 case ENA_ETH_IO_L3_PROTO_IPV4: 361 switch (ena_rx_ctx->l4_proto) { 362 case ENA_ETH_IO_L4_PROTO_TCP: 363 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); 364 break; 365 case ENA_ETH_IO_L4_PROTO_UDP: 366 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); 367 break; 368 default: 369 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); 370 } 371 break; 372 case ENA_ETH_IO_L3_PROTO_IPV6: 373 switch (ena_rx_ctx->l4_proto) { 374 case ENA_ETH_IO_L4_PROTO_TCP: 375 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); 376 break; 377 case ENA_ETH_IO_L4_PROTO_UDP: 378 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); 379 break; 380 default: 381 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); 382 } 383 break; 384 case ENA_ETH_IO_L3_PROTO_UNKNOWN: 385 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); 386 break; 387 default: 388 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 389 } 390 } else { 391 mbuf->m_pkthdr.flowid = rx_ring->qid; 392 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); 393 } 394 } 395 396 /** 397 * ena_rx_mbuf - assemble mbuf from descriptors 398 * @rx_ring: ring for which we want to clean packets 399 * @ena_bufs: buffer info 400 * @ena_rx_ctx: metadata for this packet(s) 401 * @next_to_clean: ring pointer, will be updated only upon success 402 * 403 **/ 404 static struct mbuf* 405 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, 406 struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean) 407 { 408 struct mbuf *mbuf; 409 struct ena_rx_buffer *rx_info; 410 struct ena_adapter *adapter; 411 unsigned int descs = ena_rx_ctx->descs; 412 uint16_t ntc, len, req_id, buf = 0; 413 414 ntc = *next_to_clean; 415 adapter = rx_ring->adapter; 416 417 len = ena_bufs[buf].len; 418 req_id = ena_bufs[buf].req_id; 419 rx_info = &rx_ring->rx_buffer_info[req_id]; 420 if (unlikely(rx_info->mbuf == NULL)) { 421 device_printf(adapter->pdev, "NULL mbuf in rx_info"); 422 return (NULL); 423 } 424 425 ena_trace(NULL, ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx\n", 426 rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr); 427 428 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, 429 BUS_DMASYNC_POSTREAD); 430 mbuf = rx_info->mbuf; 431 mbuf->m_flags |= M_PKTHDR; 432 mbuf->m_pkthdr.len = len; 433 mbuf->m_len = len; 434 // Only for the first segment the data starts at specific offset 435 mbuf->m_data = mtodo(mbuf, ena_rx_ctx->pkt_offset); 436 ena_trace(NULL, ENA_DBG | ENA_RXPTH, 437 "Mbuf data offset=%u\n", ena_rx_ctx->pkt_offset); 438 mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp; 439 440 /* Fill mbuf with hash key and it's interpretation for optimization */ 441 ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf); 442 443 ena_trace(NULL, ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d\n", 444 mbuf, mbuf->m_flags, mbuf->m_pkthdr.len); 445 446 /* DMA address is not needed anymore, unmap it */ 447 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); 448 449 rx_info->mbuf = NULL; 450 rx_ring->free_rx_ids[ntc] = req_id; 451 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size); 452 453 /* 454 * While we have more than 1 descriptors for one rcvd packet, append 455 * other mbufs to the main one 456 */ 457 while (--descs) { 458 ++buf; 459 len = ena_bufs[buf].len; 460 req_id = ena_bufs[buf].req_id; 461 rx_info = &rx_ring->rx_buffer_info[req_id]; 462 463 if (unlikely(rx_info->mbuf == NULL)) { 464 device_printf(adapter->pdev, "NULL mbuf in rx_info"); 465 /* 466 * If one of the required mbufs was not allocated yet, 467 * we can break there. 468 * All earlier used descriptors will be reallocated 469 * later and not used mbufs can be reused. 470 * The next_to_clean pointer will not be updated in case 471 * of an error, so caller should advance it manually 472 * in error handling routine to keep it up to date 473 * with hw ring. 474 */ 475 m_freem(mbuf); 476 return (NULL); 477 } 478 479 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, 480 BUS_DMASYNC_POSTREAD); 481 if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) { 482 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1); 483 ena_trace(NULL, ENA_WARNING, "Failed to append Rx mbuf %p\n", 484 mbuf); 485 } 486 487 ena_trace(NULL, ENA_DBG | ENA_RXPTH, 488 "rx mbuf updated. len %d\n", mbuf->m_pkthdr.len); 489 490 /* Free already appended mbuf, it won't be useful anymore */ 491 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); 492 m_freem(rx_info->mbuf); 493 rx_info->mbuf = NULL; 494 495 rx_ring->free_rx_ids[ntc] = req_id; 496 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size); 497 } 498 499 *next_to_clean = ntc; 500 501 return (mbuf); 502 } 503 504 /** 505 * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum 506 **/ 507 static inline void 508 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, 509 struct mbuf *mbuf) 510 { 511 512 /* if IP and error */ 513 if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && 514 ena_rx_ctx->l3_csum_err)) { 515 /* ipv4 checksum error */ 516 mbuf->m_pkthdr.csum_flags = 0; 517 counter_u64_add(rx_ring->rx_stats.bad_csum, 1); 518 ena_trace(NULL, ENA_DBG, "RX IPv4 header checksum error\n"); 519 return; 520 } 521 522 /* if TCP/UDP */ 523 if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || 524 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) { 525 if (ena_rx_ctx->l4_csum_err) { 526 /* TCP/UDP checksum error */ 527 mbuf->m_pkthdr.csum_flags = 0; 528 counter_u64_add(rx_ring->rx_stats.bad_csum, 1); 529 ena_trace(NULL, ENA_DBG, "RX L4 checksum error\n"); 530 } else { 531 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 532 mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID; 533 } 534 } 535 } 536 537 /** 538 * ena_rx_cleanup - handle rx irq 539 * @arg: ring for which irq is being handled 540 **/ 541 static int 542 ena_rx_cleanup(struct ena_ring *rx_ring) 543 { 544 struct ena_adapter *adapter; 545 struct mbuf *mbuf; 546 struct ena_com_rx_ctx ena_rx_ctx; 547 struct ena_com_io_cq* io_cq; 548 struct ena_com_io_sq* io_sq; 549 enum ena_regs_reset_reason_types reset_reason; 550 if_t ifp; 551 uint16_t ena_qid; 552 uint16_t next_to_clean; 553 uint32_t refill_required; 554 uint32_t refill_threshold; 555 uint32_t do_if_input = 0; 556 unsigned int qid; 557 int rc, i; 558 int budget = RX_BUDGET; 559 #ifdef DEV_NETMAP 560 int done; 561 #endif /* DEV_NETMAP */ 562 563 adapter = rx_ring->que->adapter; 564 ifp = adapter->ifp; 565 qid = rx_ring->que->id; 566 ena_qid = ENA_IO_RXQ_IDX(qid); 567 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 568 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; 569 next_to_clean = rx_ring->next_to_clean; 570 571 #ifdef DEV_NETMAP 572 if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS) 573 return (0); 574 #endif /* DEV_NETMAP */ 575 576 ena_trace(NULL, ENA_DBG, "rx: qid %d\n", qid); 577 578 do { 579 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 580 ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size; 581 ena_rx_ctx.descs = 0; 582 ena_rx_ctx.pkt_offset = 0; 583 584 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag, 585 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD); 586 rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx); 587 if (unlikely(rc != 0)) { 588 if (rc == ENA_COM_NO_SPACE) { 589 counter_u64_add(rx_ring->rx_stats.bad_desc_num, 590 1); 591 reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; 592 } else { 593 counter_u64_add(rx_ring->rx_stats.bad_req_id, 594 1); 595 reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; 596 } 597 ena_trigger_reset(adapter, reset_reason); 598 return (0); 599 } 600 601 if (unlikely(ena_rx_ctx.descs == 0)) 602 break; 603 604 ena_trace(NULL, ENA_DBG | ENA_RXPTH, "rx: q %d got packet from ena. " 605 "descs #: %d l3 proto %d l4 proto %d hash: %x\n", 606 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, 607 ena_rx_ctx.l4_proto, ena_rx_ctx.hash); 608 609 /* Receive mbuf from the ring */ 610 mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs, 611 &ena_rx_ctx, &next_to_clean); 612 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag, 613 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD); 614 /* Exit if we failed to retrieve a buffer */ 615 if (unlikely(mbuf == NULL)) { 616 for (i = 0; i < ena_rx_ctx.descs; ++i) { 617 rx_ring->free_rx_ids[next_to_clean] = 618 rx_ring->ena_bufs[i].req_id; 619 next_to_clean = 620 ENA_RX_RING_IDX_NEXT(next_to_clean, 621 rx_ring->ring_size); 622 623 } 624 break; 625 } 626 627 if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) || 628 ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) { 629 ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf); 630 } 631 632 counter_enter(); 633 counter_u64_add_protected(rx_ring->rx_stats.bytes, 634 mbuf->m_pkthdr.len); 635 counter_u64_add_protected(adapter->hw_stats.rx_bytes, 636 mbuf->m_pkthdr.len); 637 counter_exit(); 638 /* 639 * LRO is only for IP/TCP packets and TCP checksum of the packet 640 * should be computed by hardware. 641 */ 642 do_if_input = 1; 643 if (((ifp->if_capenable & IFCAP_LRO) != 0) && 644 ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) && 645 (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) { 646 /* 647 * Send to the stack if: 648 * - LRO not enabled, or 649 * - no LRO resources, or 650 * - lro enqueue fails 651 */ 652 if ((rx_ring->lro.lro_cnt != 0) && 653 (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0)) 654 do_if_input = 0; 655 } 656 if (do_if_input != 0) { 657 ena_trace(NULL, ENA_DBG | ENA_RXPTH, 658 "calling if_input() with mbuf %p\n", mbuf); 659 (*ifp->if_input)(ifp, mbuf); 660 } 661 662 counter_enter(); 663 counter_u64_add_protected(rx_ring->rx_stats.cnt, 1); 664 counter_u64_add_protected(adapter->hw_stats.rx_packets, 1); 665 counter_exit(); 666 } while (--budget); 667 668 rx_ring->next_to_clean = next_to_clean; 669 670 refill_required = ena_com_free_q_entries(io_sq); 671 refill_threshold = min_t(int, 672 rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER, 673 ENA_RX_REFILL_THRESH_PACKET); 674 675 if (refill_required > refill_threshold) { 676 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 677 ena_refill_rx_bufs(rx_ring, refill_required); 678 } 679 680 tcp_lro_flush_all(&rx_ring->lro); 681 682 return (RX_BUDGET - budget); 683 } 684 685 static void 686 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf, 687 bool disable_meta_caching) 688 { 689 struct ena_com_tx_meta *ena_meta; 690 struct ether_vlan_header *eh; 691 struct mbuf *mbuf_next; 692 u32 mss; 693 bool offload; 694 uint16_t etype; 695 int ehdrlen; 696 struct ip *ip; 697 int iphlen; 698 struct tcphdr *th; 699 int offset; 700 701 offload = false; 702 ena_meta = &ena_tx_ctx->ena_meta; 703 mss = mbuf->m_pkthdr.tso_segsz; 704 705 if (mss != 0) 706 offload = true; 707 708 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) 709 offload = true; 710 711 if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0) 712 offload = true; 713 714 if (!offload) { 715 if (disable_meta_caching) { 716 memset(ena_meta, 0, sizeof(*ena_meta)); 717 ena_tx_ctx->meta_valid = 1; 718 } else { 719 ena_tx_ctx->meta_valid = 0; 720 } 721 return; 722 } 723 724 /* Determine where frame payload starts. */ 725 eh = mtod(mbuf, struct ether_vlan_header *); 726 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 727 etype = ntohs(eh->evl_proto); 728 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 729 } else { 730 etype = ntohs(eh->evl_encap_proto); 731 ehdrlen = ETHER_HDR_LEN; 732 } 733 734 mbuf_next = m_getptr(mbuf, ehdrlen, &offset); 735 ip = (struct ip *)(mtodo(mbuf_next, offset)); 736 iphlen = ip->ip_hl << 2; 737 738 mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset); 739 th = (struct tcphdr *)(mtodo(mbuf_next, offset)); 740 741 if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) { 742 ena_tx_ctx->l3_csum_enable = 1; 743 } 744 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) { 745 ena_tx_ctx->tso_enable = 1; 746 ena_meta->l4_hdr_len = (th->th_off); 747 } 748 749 switch (etype) { 750 case ETHERTYPE_IP: 751 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 752 if ((ip->ip_off & htons(IP_DF)) != 0) 753 ena_tx_ctx->df = 1; 754 break; 755 case ETHERTYPE_IPV6: 756 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 757 758 default: 759 break; 760 } 761 762 if (ip->ip_p == IPPROTO_TCP) { 763 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 764 if ((mbuf->m_pkthdr.csum_flags & 765 (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0) 766 ena_tx_ctx->l4_csum_enable = 1; 767 else 768 ena_tx_ctx->l4_csum_enable = 0; 769 } else if (ip->ip_p == IPPROTO_UDP) { 770 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 771 if ((mbuf->m_pkthdr.csum_flags & 772 (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0) 773 ena_tx_ctx->l4_csum_enable = 1; 774 else 775 ena_tx_ctx->l4_csum_enable = 0; 776 } else { 777 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 778 ena_tx_ctx->l4_csum_enable = 0; 779 } 780 781 ena_meta->mss = mss; 782 ena_meta->l3_hdr_len = iphlen; 783 ena_meta->l3_hdr_offset = ehdrlen; 784 ena_tx_ctx->meta_valid = 1; 785 } 786 787 static int 788 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) 789 { 790 struct ena_adapter *adapter; 791 struct mbuf *collapsed_mbuf; 792 int num_frags; 793 794 adapter = tx_ring->adapter; 795 num_frags = ena_mbuf_count(*mbuf); 796 797 /* One segment must be reserved for configuration descriptor. */ 798 if (num_frags < adapter->max_tx_sgl_size) 799 return (0); 800 counter_u64_add(tx_ring->tx_stats.collapse, 1); 801 802 collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT, 803 adapter->max_tx_sgl_size - 1); 804 if (unlikely(collapsed_mbuf == NULL)) { 805 counter_u64_add(tx_ring->tx_stats.collapse_err, 1); 806 return (ENOMEM); 807 } 808 809 /* If mbuf was collapsed succesfully, original mbuf is released. */ 810 *mbuf = collapsed_mbuf; 811 812 return (0); 813 } 814 815 static int 816 ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info, 817 struct mbuf *mbuf, void **push_hdr, u16 *header_len) 818 { 819 struct ena_adapter *adapter = tx_ring->adapter; 820 struct ena_com_buf *ena_buf; 821 bus_dma_segment_t segs[ENA_BUS_DMA_SEGS]; 822 size_t iseg = 0; 823 uint32_t mbuf_head_len; 824 uint16_t offset; 825 int rc, nsegs; 826 827 mbuf_head_len = mbuf->m_len; 828 tx_info->mbuf = mbuf; 829 ena_buf = tx_info->bufs; 830 831 /* 832 * For easier maintaining of the DMA map, map the whole mbuf even if 833 * the LLQ is used. The descriptors will be filled using the segments. 834 */ 835 rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->dmamap, mbuf, 836 segs, &nsegs, BUS_DMA_NOWAIT); 837 if (unlikely((rc != 0) || (nsegs == 0))) { 838 ena_trace(NULL, ENA_WARNING, 839 "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs); 840 goto dma_error; 841 } 842 843 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 844 /* 845 * When the device is LLQ mode, the driver will copy 846 * the header into the device memory space. 847 * the ena_com layer assumes the header is in a linear 848 * memory space. 849 * This assumption might be wrong since part of the header 850 * can be in the fragmented buffers. 851 * First check if header fits in the mbuf. If not, copy it to 852 * separate buffer that will be holding linearized data. 853 */ 854 *header_len = min_t(uint32_t, mbuf->m_pkthdr.len, tx_ring->tx_max_header_size); 855 856 /* If header is in linear space, just point into mbuf's data. */ 857 if (likely(*header_len <= mbuf_head_len)) { 858 *push_hdr = mbuf->m_data; 859 /* 860 * Otherwise, copy whole portion of header from multiple mbufs 861 * to intermediate buffer. 862 */ 863 } else { 864 m_copydata(mbuf, 0, *header_len, tx_ring->push_buf_intermediate_buf); 865 *push_hdr = tx_ring->push_buf_intermediate_buf; 866 867 counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1); 868 } 869 870 ena_trace(NULL, ENA_DBG | ENA_TXPTH, 871 "mbuf: %p header_buf->vaddr: %p push_len: %d\n", 872 mbuf, *push_hdr, *header_len); 873 874 /* If packet is fitted in LLQ header, no need for DMA segments. */ 875 if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) { 876 return (0); 877 } else { 878 offset = tx_ring->tx_max_header_size; 879 /* 880 * As Header part is mapped to LLQ header, we can skip it and just 881 * map the residuum of the mbuf to DMA Segments. 882 */ 883 while (offset > 0) { 884 if (offset >= segs[iseg].ds_len) { 885 offset -= segs[iseg].ds_len; 886 } else { 887 ena_buf->paddr = segs[iseg].ds_addr + offset; 888 ena_buf->len = segs[iseg].ds_len - offset; 889 ena_buf++; 890 tx_info->num_of_bufs++; 891 offset = 0; 892 } 893 iseg++; 894 } 895 } 896 } else { 897 *push_hdr = NULL; 898 /* 899 * header_len is just a hint for the device. Because FreeBSD is not 900 * giving us information about packet header length and it is not 901 * guaranteed that all packet headers will be in the 1st mbuf, setting 902 * header_len to 0 is making the device ignore this value and resolve 903 * header on it's own. 904 */ 905 *header_len = 0; 906 } 907 908 /* Map rest of the mbuf */ 909 while (iseg < nsegs) { 910 ena_buf->paddr = segs[iseg].ds_addr; 911 ena_buf->len = segs[iseg].ds_len; 912 ena_buf++; 913 iseg++; 914 tx_info->num_of_bufs++; 915 } 916 917 return (0); 918 919 dma_error: 920 counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1); 921 tx_info->mbuf = NULL; 922 return (rc); 923 } 924 925 static int 926 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) 927 { 928 struct ena_adapter *adapter; 929 struct ena_tx_buffer *tx_info; 930 struct ena_com_tx_ctx ena_tx_ctx; 931 struct ena_com_dev *ena_dev; 932 struct ena_com_io_sq* io_sq; 933 void *push_hdr; 934 uint16_t next_to_use; 935 uint16_t req_id; 936 uint16_t ena_qid; 937 uint16_t header_len; 938 int rc; 939 int nb_hw_desc; 940 941 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); 942 adapter = tx_ring->que->adapter; 943 ena_dev = adapter->ena_dev; 944 io_sq = &ena_dev->io_sq_queues[ena_qid]; 945 946 rc = ena_check_and_collapse_mbuf(tx_ring, mbuf); 947 if (unlikely(rc != 0)) { 948 ena_trace(NULL, ENA_WARNING, 949 "Failed to collapse mbuf! err: %d\n", rc); 950 return (rc); 951 } 952 953 ena_trace(NULL, ENA_DBG | ENA_TXPTH, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len); 954 955 next_to_use = tx_ring->next_to_use; 956 req_id = tx_ring->free_tx_ids[next_to_use]; 957 tx_info = &tx_ring->tx_buffer_info[req_id]; 958 tx_info->num_of_bufs = 0; 959 960 rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len); 961 if (unlikely(rc != 0)) { 962 ena_trace(NULL, ENA_WARNING, "Failed to map TX mbuf\n"); 963 return (rc); 964 } 965 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); 966 ena_tx_ctx.ena_bufs = tx_info->bufs; 967 ena_tx_ctx.push_header = push_hdr; 968 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 969 ena_tx_ctx.req_id = req_id; 970 ena_tx_ctx.header_len = header_len; 971 972 /* Set flags and meta data */ 973 ena_tx_csum(&ena_tx_ctx, *mbuf, adapter->disable_meta_caching); 974 975 if (tx_ring->acum_pkts == DB_THRESHOLD || 976 ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) { 977 ena_trace(NULL, ENA_DBG | ENA_TXPTH, 978 "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", 979 tx_ring->que->id); 980 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 981 counter_u64_add(tx_ring->tx_stats.doorbells, 1); 982 tx_ring->acum_pkts = 0; 983 } 984 985 /* Prepare the packet's descriptors and send them to device */ 986 rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc); 987 if (unlikely(rc != 0)) { 988 if (likely(rc == ENA_COM_NO_MEM)) { 989 ena_trace(NULL, ENA_DBG | ENA_TXPTH, 990 "tx ring[%d] if out of space\n", tx_ring->que->id); 991 } else { 992 device_printf(adapter->pdev, 993 "failed to prepare tx bufs\n"); 994 } 995 counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1); 996 goto dma_error; 997 } 998 999 counter_enter(); 1000 counter_u64_add_protected(tx_ring->tx_stats.cnt, 1); 1001 counter_u64_add_protected(tx_ring->tx_stats.bytes, 1002 (*mbuf)->m_pkthdr.len); 1003 1004 counter_u64_add_protected(adapter->hw_stats.tx_packets, 1); 1005 counter_u64_add_protected(adapter->hw_stats.tx_bytes, 1006 (*mbuf)->m_pkthdr.len); 1007 counter_exit(); 1008 1009 tx_info->tx_descs = nb_hw_desc; 1010 getbinuptime(&tx_info->timestamp); 1011 tx_info->print_once = true; 1012 1013 tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, 1014 tx_ring->ring_size); 1015 1016 /* stop the queue when no more space available, the packet can have up 1017 * to sgl_size + 2. one for the meta descriptor and one for header 1018 * (if the header is larger than tx_max_header_size). 1019 */ 1020 if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1021 adapter->max_tx_sgl_size + 2))) { 1022 ena_trace(NULL, ENA_DBG | ENA_TXPTH, "Stop queue %d\n", 1023 tx_ring->que->id); 1024 1025 tx_ring->running = false; 1026 counter_u64_add(tx_ring->tx_stats.queue_stop, 1); 1027 1028 /* There is a rare condition where this function decides to 1029 * stop the queue but meanwhile tx_cleanup() updates 1030 * next_to_completion and terminates. 1031 * The queue will remain stopped forever. 1032 * To solve this issue this function performs mb(), checks 1033 * the wakeup condition and wakes up the queue if needed. 1034 */ 1035 mb(); 1036 1037 if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1038 ENA_TX_RESUME_THRESH)) { 1039 tx_ring->running = true; 1040 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1); 1041 } 1042 } 1043 1044 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, 1045 BUS_DMASYNC_PREWRITE); 1046 1047 return (0); 1048 1049 dma_error: 1050 tx_info->mbuf = NULL; 1051 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap); 1052 1053 return (rc); 1054 } 1055 1056 static void 1057 ena_start_xmit(struct ena_ring *tx_ring) 1058 { 1059 struct mbuf *mbuf; 1060 struct ena_adapter *adapter = tx_ring->adapter; 1061 struct ena_com_io_sq* io_sq; 1062 int ena_qid; 1063 int ret = 0; 1064 1065 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) 1066 return; 1067 1068 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))) 1069 return; 1070 1071 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); 1072 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; 1073 1074 while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) { 1075 ena_trace(NULL, ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and" 1076 " header csum flags %#jx\n", 1077 mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags); 1078 1079 if (unlikely(!tx_ring->running)) { 1080 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1081 break; 1082 } 1083 1084 if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) { 1085 if (ret == ENA_COM_NO_MEM) { 1086 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1087 } else if (ret == ENA_COM_NO_SPACE) { 1088 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1089 } else { 1090 m_freem(mbuf); 1091 drbr_advance(adapter->ifp, tx_ring->br); 1092 } 1093 1094 break; 1095 } 1096 1097 drbr_advance(adapter->ifp, tx_ring->br); 1098 1099 if (unlikely((if_getdrvflags(adapter->ifp) & 1100 IFF_DRV_RUNNING) == 0)) 1101 return; 1102 1103 tx_ring->acum_pkts++; 1104 1105 BPF_MTAP(adapter->ifp, mbuf); 1106 } 1107 1108 if (likely(tx_ring->acum_pkts != 0)) { 1109 /* Trigger the dma engine */ 1110 ena_com_write_sq_doorbell(io_sq); 1111 counter_u64_add(tx_ring->tx_stats.doorbells, 1); 1112 tx_ring->acum_pkts = 0; 1113 } 1114 1115 if (unlikely(!tx_ring->running)) 1116 taskqueue_enqueue(tx_ring->que->cleanup_tq, 1117 &tx_ring->que->cleanup_task); 1118 } 1119