1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) 2015-2019 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "ena.h" 34 #include "ena_datapath.h" 35 #ifdef DEV_NETMAP 36 #include "ena_netmap.h" 37 #endif /* DEV_NETMAP */ 38 39 /********************************************************************* 40 * Static functions prototypes 41 *********************************************************************/ 42 43 static int ena_tx_cleanup(struct ena_ring *); 44 static int ena_rx_cleanup(struct ena_ring *); 45 static inline int validate_tx_req_id(struct ena_ring *, uint16_t); 46 static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *, 47 struct mbuf *); 48 static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *, 49 struct ena_com_rx_ctx *, uint16_t *); 50 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *, 51 struct mbuf *); 52 static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *); 53 static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, 54 struct mbuf **mbuf); 55 static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **); 56 static void ena_start_xmit(struct ena_ring *); 57 58 /********************************************************************* 59 * Global functions 60 *********************************************************************/ 61 62 void 63 ena_cleanup(void *arg, int pending) 64 { 65 struct ena_que *que = arg; 66 struct ena_adapter *adapter = que->adapter; 67 if_t ifp = adapter->ifp; 68 struct ena_ring *tx_ring; 69 struct ena_ring *rx_ring; 70 struct ena_com_io_cq* io_cq; 71 struct ena_eth_io_intr_reg intr_reg; 72 int qid, ena_qid; 73 int txc, rxc, i; 74 75 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 76 return; 77 78 ena_trace(ENA_DBG, "MSI-X TX/RX routine\n"); 79 80 tx_ring = que->tx_ring; 81 rx_ring = que->rx_ring; 82 qid = que->id; 83 ena_qid = ENA_IO_TXQ_IDX(qid); 84 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 85 86 tx_ring->first_interrupt = true; 87 rx_ring->first_interrupt = true; 88 89 for (i = 0; i < CLEAN_BUDGET; ++i) { 90 rxc = ena_rx_cleanup(rx_ring); 91 txc = ena_tx_cleanup(tx_ring); 92 93 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 94 return; 95 96 if ((txc != TX_BUDGET) && (rxc != RX_BUDGET)) 97 break; 98 } 99 100 /* Signal that work is done and unmask interrupt */ 101 ena_com_update_intr_reg(&intr_reg, 102 RX_IRQ_INTERVAL, 103 TX_IRQ_INTERVAL, 104 true); 105 ena_com_unmask_intr(io_cq, &intr_reg); 106 } 107 108 void 109 ena_deferred_mq_start(void *arg, int pending) 110 { 111 struct ena_ring *tx_ring = (struct ena_ring *)arg; 112 struct ifnet *ifp = tx_ring->adapter->ifp; 113 114 while (!drbr_empty(ifp, tx_ring->br) && 115 tx_ring->running && 116 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { 117 ENA_RING_MTX_LOCK(tx_ring); 118 ena_start_xmit(tx_ring); 119 ENA_RING_MTX_UNLOCK(tx_ring); 120 } 121 } 122 123 int 124 ena_mq_start(if_t ifp, struct mbuf *m) 125 { 126 struct ena_adapter *adapter = ifp->if_softc; 127 struct ena_ring *tx_ring; 128 int ret, is_drbr_empty; 129 uint32_t i; 130 131 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) 132 return (ENODEV); 133 134 /* Which queue to use */ 135 /* 136 * If everything is setup correctly, it should be the 137 * same bucket that the current CPU we're on is. 138 * It should improve performance. 139 */ 140 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 141 i = m->m_pkthdr.flowid % adapter->num_queues; 142 } else { 143 i = curcpu % adapter->num_queues; 144 } 145 tx_ring = &adapter->tx_ring[i]; 146 147 /* Check if drbr is empty before putting packet */ 148 is_drbr_empty = drbr_empty(ifp, tx_ring->br); 149 ret = drbr_enqueue(ifp, tx_ring->br, m); 150 if (unlikely(ret != 0)) { 151 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); 152 return (ret); 153 } 154 155 if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) { 156 ena_start_xmit(tx_ring); 157 ENA_RING_MTX_UNLOCK(tx_ring); 158 } else { 159 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); 160 } 161 162 return (0); 163 } 164 165 void 166 ena_qflush(if_t ifp) 167 { 168 struct ena_adapter *adapter = ifp->if_softc; 169 struct ena_ring *tx_ring = adapter->tx_ring; 170 int i; 171 172 for(i = 0; i < adapter->num_queues; ++i, ++tx_ring) 173 if (!drbr_empty(ifp, tx_ring->br)) { 174 ENA_RING_MTX_LOCK(tx_ring); 175 drbr_flush(ifp, tx_ring->br); 176 ENA_RING_MTX_UNLOCK(tx_ring); 177 } 178 179 if_qflush(ifp); 180 } 181 182 /********************************************************************* 183 * Static functions 184 *********************************************************************/ 185 186 static inline int 187 validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id) 188 { 189 struct ena_adapter *adapter = tx_ring->adapter; 190 struct ena_tx_buffer *tx_info = NULL; 191 192 if (likely(req_id < tx_ring->ring_size)) { 193 tx_info = &tx_ring->tx_buffer_info[req_id]; 194 if (tx_info->mbuf != NULL) 195 return (0); 196 device_printf(adapter->pdev, 197 "tx_info doesn't have valid mbuf\n"); 198 } 199 200 device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id); 201 counter_u64_add(tx_ring->tx_stats.bad_req_id, 1); 202 203 /* Trigger device reset */ 204 adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; 205 ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter); 206 207 return (EFAULT); 208 } 209 210 /** 211 * ena_tx_cleanup - clear sent packets and corresponding descriptors 212 * @tx_ring: ring for which we want to clean packets 213 * 214 * Once packets are sent, we ask the device in a loop for no longer used 215 * descriptors. We find the related mbuf chain in a map (index in an array) 216 * and free it, then update ring state. 217 * This is performed in "endless" loop, updating ring pointers every 218 * TX_COMMIT. The first check of free descriptor is performed before the actual 219 * loop, then repeated at the loop end. 220 **/ 221 static int 222 ena_tx_cleanup(struct ena_ring *tx_ring) 223 { 224 struct ena_adapter *adapter; 225 struct ena_com_io_cq* io_cq; 226 uint16_t next_to_clean; 227 uint16_t req_id; 228 uint16_t ena_qid; 229 unsigned int total_done = 0; 230 int rc; 231 int commit = TX_COMMIT; 232 int budget = TX_BUDGET; 233 int work_done; 234 bool above_thresh; 235 236 adapter = tx_ring->que->adapter; 237 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); 238 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 239 next_to_clean = tx_ring->next_to_clean; 240 241 #ifdef DEV_NETMAP 242 if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS) 243 return (0); 244 #endif /* DEV_NETMAP */ 245 246 do { 247 struct ena_tx_buffer *tx_info; 248 struct mbuf *mbuf; 249 250 rc = ena_com_tx_comp_req_id_get(io_cq, &req_id); 251 if (unlikely(rc != 0)) 252 break; 253 254 rc = validate_tx_req_id(tx_ring, req_id); 255 if (unlikely(rc != 0)) 256 break; 257 258 tx_info = &tx_ring->tx_buffer_info[req_id]; 259 260 mbuf = tx_info->mbuf; 261 262 tx_info->mbuf = NULL; 263 bintime_clear(&tx_info->timestamp); 264 265 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, 266 BUS_DMASYNC_POSTWRITE); 267 bus_dmamap_unload(adapter->tx_buf_tag, 268 tx_info->dmamap); 269 270 ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d mbuf %p completed\n", 271 tx_ring->qid, mbuf); 272 273 m_freem(mbuf); 274 275 total_done += tx_info->tx_descs; 276 277 tx_ring->free_tx_ids[next_to_clean] = req_id; 278 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, 279 tx_ring->ring_size); 280 281 if (unlikely(--commit == 0)) { 282 commit = TX_COMMIT; 283 /* update ring state every TX_COMMIT descriptor */ 284 tx_ring->next_to_clean = next_to_clean; 285 ena_com_comp_ack( 286 &adapter->ena_dev->io_sq_queues[ena_qid], 287 total_done); 288 ena_com_update_dev_comp_head(io_cq); 289 total_done = 0; 290 } 291 } while (likely(--budget)); 292 293 work_done = TX_BUDGET - budget; 294 295 ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d done. total pkts: %d\n", 296 tx_ring->qid, work_done); 297 298 /* If there is still something to commit update ring state */ 299 if (likely(commit != TX_COMMIT)) { 300 tx_ring->next_to_clean = next_to_clean; 301 ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid], 302 total_done); 303 ena_com_update_dev_comp_head(io_cq); 304 } 305 306 /* 307 * Need to make the rings circular update visible to 308 * ena_xmit_mbuf() before checking for tx_ring->running. 309 */ 310 mb(); 311 312 above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 313 ENA_TX_RESUME_THRESH); 314 if (unlikely(!tx_ring->running && above_thresh)) { 315 ENA_RING_MTX_LOCK(tx_ring); 316 above_thresh = 317 ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 318 ENA_TX_RESUME_THRESH); 319 if (!tx_ring->running && above_thresh) { 320 tx_ring->running = true; 321 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1); 322 taskqueue_enqueue(tx_ring->enqueue_tq, 323 &tx_ring->enqueue_task); 324 } 325 ENA_RING_MTX_UNLOCK(tx_ring); 326 } 327 328 return (work_done); 329 } 330 331 static void 332 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, 333 struct mbuf *mbuf) 334 { 335 struct ena_adapter *adapter = rx_ring->adapter; 336 337 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 338 mbuf->m_pkthdr.flowid = ena_rx_ctx->hash; 339 340 if (ena_rx_ctx->frag && 341 (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) { 342 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 343 return; 344 } 345 346 switch (ena_rx_ctx->l3_proto) { 347 case ENA_ETH_IO_L3_PROTO_IPV4: 348 switch (ena_rx_ctx->l4_proto) { 349 case ENA_ETH_IO_L4_PROTO_TCP: 350 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); 351 break; 352 case ENA_ETH_IO_L4_PROTO_UDP: 353 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); 354 break; 355 default: 356 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); 357 } 358 break; 359 case ENA_ETH_IO_L3_PROTO_IPV6: 360 switch (ena_rx_ctx->l4_proto) { 361 case ENA_ETH_IO_L4_PROTO_TCP: 362 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); 363 break; 364 case ENA_ETH_IO_L4_PROTO_UDP: 365 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); 366 break; 367 default: 368 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); 369 } 370 break; 371 case ENA_ETH_IO_L3_PROTO_UNKNOWN: 372 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); 373 break; 374 default: 375 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 376 } 377 } else { 378 mbuf->m_pkthdr.flowid = rx_ring->qid; 379 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); 380 } 381 } 382 383 /** 384 * ena_rx_mbuf - assemble mbuf from descriptors 385 * @rx_ring: ring for which we want to clean packets 386 * @ena_bufs: buffer info 387 * @ena_rx_ctx: metadata for this packet(s) 388 * @next_to_clean: ring pointer, will be updated only upon success 389 * 390 **/ 391 static struct mbuf* 392 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, 393 struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean) 394 { 395 struct mbuf *mbuf; 396 struct ena_rx_buffer *rx_info; 397 struct ena_adapter *adapter; 398 unsigned int descs = ena_rx_ctx->descs; 399 int rc; 400 uint16_t ntc, len, req_id, buf = 0; 401 402 ntc = *next_to_clean; 403 adapter = rx_ring->adapter; 404 405 len = ena_bufs[buf].len; 406 req_id = ena_bufs[buf].req_id; 407 rc = validate_rx_req_id(rx_ring, req_id); 408 if (unlikely(rc != 0)) 409 return (NULL); 410 411 rx_info = &rx_ring->rx_buffer_info[req_id]; 412 if (unlikely(rx_info->mbuf == NULL)) { 413 device_printf(adapter->pdev, "NULL mbuf in rx_info"); 414 return (NULL); 415 } 416 417 ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx\n", 418 rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr); 419 420 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, 421 BUS_DMASYNC_POSTREAD); 422 mbuf = rx_info->mbuf; 423 mbuf->m_flags |= M_PKTHDR; 424 mbuf->m_pkthdr.len = len; 425 mbuf->m_len = len; 426 mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp; 427 428 /* Fill mbuf with hash key and it's interpretation for optimization */ 429 ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf); 430 431 ena_trace(ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d\n", 432 mbuf, mbuf->m_flags, mbuf->m_pkthdr.len); 433 434 /* DMA address is not needed anymore, unmap it */ 435 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); 436 437 rx_info->mbuf = NULL; 438 rx_ring->free_rx_ids[ntc] = req_id; 439 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size); 440 441 /* 442 * While we have more than 1 descriptors for one rcvd packet, append 443 * other mbufs to the main one 444 */ 445 while (--descs) { 446 ++buf; 447 len = ena_bufs[buf].len; 448 req_id = ena_bufs[buf].req_id; 449 rc = validate_rx_req_id(rx_ring, req_id); 450 if (unlikely(rc != 0)) { 451 /* 452 * If the req_id is invalid, then the device will be 453 * reset. In that case we must free all mbufs that 454 * were already gathered. 455 */ 456 m_freem(mbuf); 457 return (NULL); 458 } 459 rx_info = &rx_ring->rx_buffer_info[req_id]; 460 461 if (unlikely(rx_info->mbuf == NULL)) { 462 device_printf(adapter->pdev, "NULL mbuf in rx_info"); 463 /* 464 * If one of the required mbufs was not allocated yet, 465 * we can break there. 466 * All earlier used descriptors will be reallocated 467 * later and not used mbufs can be reused. 468 * The next_to_clean pointer will not be updated in case 469 * of an error, so caller should advance it manually 470 * in error handling routine to keep it up to date 471 * with hw ring. 472 */ 473 m_freem(mbuf); 474 return (NULL); 475 } 476 477 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, 478 BUS_DMASYNC_POSTREAD); 479 if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) { 480 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1); 481 ena_trace(ENA_WARNING, "Failed to append Rx mbuf %p\n", 482 mbuf); 483 } 484 485 ena_trace(ENA_DBG | ENA_RXPTH, 486 "rx mbuf updated. len %d\n", mbuf->m_pkthdr.len); 487 488 /* Free already appended mbuf, it won't be useful anymore */ 489 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); 490 m_freem(rx_info->mbuf); 491 rx_info->mbuf = NULL; 492 493 rx_ring->free_rx_ids[ntc] = req_id; 494 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size); 495 } 496 497 *next_to_clean = ntc; 498 499 return (mbuf); 500 } 501 502 /** 503 * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum 504 **/ 505 static inline void 506 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, 507 struct mbuf *mbuf) 508 { 509 510 /* if IP and error */ 511 if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && 512 ena_rx_ctx->l3_csum_err)) { 513 /* ipv4 checksum error */ 514 mbuf->m_pkthdr.csum_flags = 0; 515 counter_u64_add(rx_ring->rx_stats.bad_csum, 1); 516 ena_trace(ENA_DBG, "RX IPv4 header checksum error\n"); 517 return; 518 } 519 520 /* if TCP/UDP */ 521 if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || 522 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) { 523 if (ena_rx_ctx->l4_csum_err) { 524 /* TCP/UDP checksum error */ 525 mbuf->m_pkthdr.csum_flags = 0; 526 counter_u64_add(rx_ring->rx_stats.bad_csum, 1); 527 ena_trace(ENA_DBG, "RX L4 checksum error\n"); 528 } else { 529 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 530 mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID; 531 } 532 } 533 } 534 535 /** 536 * ena_rx_cleanup - handle rx irq 537 * @arg: ring for which irq is being handled 538 **/ 539 static int 540 ena_rx_cleanup(struct ena_ring *rx_ring) 541 { 542 struct ena_adapter *adapter; 543 struct mbuf *mbuf; 544 struct ena_com_rx_ctx ena_rx_ctx; 545 struct ena_com_io_cq* io_cq; 546 struct ena_com_io_sq* io_sq; 547 if_t ifp; 548 uint16_t ena_qid; 549 uint16_t next_to_clean; 550 uint32_t refill_required; 551 uint32_t refill_threshold; 552 uint32_t do_if_input = 0; 553 unsigned int qid; 554 int rc, i; 555 int budget = RX_BUDGET; 556 #ifdef DEV_NETMAP 557 int done; 558 #endif /* DEV_NETMAP */ 559 560 adapter = rx_ring->que->adapter; 561 ifp = adapter->ifp; 562 qid = rx_ring->que->id; 563 ena_qid = ENA_IO_RXQ_IDX(qid); 564 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 565 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; 566 next_to_clean = rx_ring->next_to_clean; 567 568 #ifdef DEV_NETMAP 569 if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS) 570 return (0); 571 #endif /* DEV_NETMAP */ 572 573 ena_trace(ENA_DBG, "rx: qid %d\n", qid); 574 575 do { 576 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 577 ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size; 578 ena_rx_ctx.descs = 0; 579 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag, 580 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD); 581 rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx); 582 583 if (unlikely(rc != 0)) 584 goto error; 585 586 if (unlikely(ena_rx_ctx.descs == 0)) 587 break; 588 589 ena_trace(ENA_DBG | ENA_RXPTH, "rx: q %d got packet from ena. " 590 "descs #: %d l3 proto %d l4 proto %d hash: %x\n", 591 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, 592 ena_rx_ctx.l4_proto, ena_rx_ctx.hash); 593 594 /* Receive mbuf from the ring */ 595 mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs, 596 &ena_rx_ctx, &next_to_clean); 597 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag, 598 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD); 599 /* Exit if we failed to retrieve a buffer */ 600 if (unlikely(mbuf == NULL)) { 601 for (i = 0; i < ena_rx_ctx.descs; ++i) { 602 rx_ring->free_rx_ids[next_to_clean] = 603 rx_ring->ena_bufs[i].req_id; 604 next_to_clean = 605 ENA_RX_RING_IDX_NEXT(next_to_clean, 606 rx_ring->ring_size); 607 608 } 609 break; 610 } 611 612 if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) || 613 ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) { 614 ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf); 615 } 616 617 counter_enter(); 618 counter_u64_add_protected(rx_ring->rx_stats.bytes, 619 mbuf->m_pkthdr.len); 620 counter_u64_add_protected(adapter->hw_stats.rx_bytes, 621 mbuf->m_pkthdr.len); 622 counter_exit(); 623 /* 624 * LRO is only for IP/TCP packets and TCP checksum of the packet 625 * should be computed by hardware. 626 */ 627 do_if_input = 1; 628 if (((ifp->if_capenable & IFCAP_LRO) != 0) && 629 ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) && 630 (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) { 631 /* 632 * Send to the stack if: 633 * - LRO not enabled, or 634 * - no LRO resources, or 635 * - lro enqueue fails 636 */ 637 if ((rx_ring->lro.lro_cnt != 0) && 638 (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0)) 639 do_if_input = 0; 640 } 641 if (do_if_input != 0) { 642 ena_trace(ENA_DBG | ENA_RXPTH, 643 "calling if_input() with mbuf %p\n", mbuf); 644 (*ifp->if_input)(ifp, mbuf); 645 } 646 647 counter_enter(); 648 counter_u64_add_protected(rx_ring->rx_stats.cnt, 1); 649 counter_u64_add_protected(adapter->hw_stats.rx_packets, 1); 650 counter_exit(); 651 } while (--budget); 652 653 rx_ring->next_to_clean = next_to_clean; 654 655 refill_required = ena_com_free_desc(io_sq); 656 refill_threshold = min_t(int, 657 rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER, 658 ENA_RX_REFILL_THRESH_PACKET); 659 660 if (refill_required > refill_threshold) { 661 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 662 ena_refill_rx_bufs(rx_ring, refill_required); 663 } 664 665 tcp_lro_flush_all(&rx_ring->lro); 666 667 return (RX_BUDGET - budget); 668 669 error: 670 counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1); 671 672 /* Too many desc from the device. Trigger reset */ 673 if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) { 674 adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; 675 ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter); 676 } 677 678 return (0); 679 } 680 681 static void 682 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf) 683 { 684 struct ena_com_tx_meta *ena_meta; 685 struct ether_vlan_header *eh; 686 struct mbuf *mbuf_next; 687 u32 mss; 688 bool offload; 689 uint16_t etype; 690 int ehdrlen; 691 struct ip *ip; 692 int iphlen; 693 struct tcphdr *th; 694 int offset; 695 696 offload = false; 697 ena_meta = &ena_tx_ctx->ena_meta; 698 mss = mbuf->m_pkthdr.tso_segsz; 699 700 if (mss != 0) 701 offload = true; 702 703 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) 704 offload = true; 705 706 if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0) 707 offload = true; 708 709 if (!offload) { 710 ena_tx_ctx->meta_valid = 0; 711 return; 712 } 713 714 /* Determine where frame payload starts. */ 715 eh = mtod(mbuf, struct ether_vlan_header *); 716 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 717 etype = ntohs(eh->evl_proto); 718 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 719 } else { 720 etype = ntohs(eh->evl_encap_proto); 721 ehdrlen = ETHER_HDR_LEN; 722 } 723 724 mbuf_next = m_getptr(mbuf, ehdrlen, &offset); 725 ip = (struct ip *)(mtodo(mbuf_next, offset)); 726 iphlen = ip->ip_hl << 2; 727 728 mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset); 729 th = (struct tcphdr *)(mtodo(mbuf_next, offset)); 730 731 if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) { 732 ena_tx_ctx->l3_csum_enable = 1; 733 } 734 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) { 735 ena_tx_ctx->tso_enable = 1; 736 ena_meta->l4_hdr_len = (th->th_off); 737 } 738 739 switch (etype) { 740 case ETHERTYPE_IP: 741 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 742 if ((ip->ip_off & htons(IP_DF)) != 0) 743 ena_tx_ctx->df = 1; 744 break; 745 case ETHERTYPE_IPV6: 746 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 747 748 default: 749 break; 750 } 751 752 if (ip->ip_p == IPPROTO_TCP) { 753 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 754 if ((mbuf->m_pkthdr.csum_flags & 755 (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0) 756 ena_tx_ctx->l4_csum_enable = 1; 757 else 758 ena_tx_ctx->l4_csum_enable = 0; 759 } else if (ip->ip_p == IPPROTO_UDP) { 760 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 761 if ((mbuf->m_pkthdr.csum_flags & 762 (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0) 763 ena_tx_ctx->l4_csum_enable = 1; 764 else 765 ena_tx_ctx->l4_csum_enable = 0; 766 } else { 767 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 768 ena_tx_ctx->l4_csum_enable = 0; 769 } 770 771 ena_meta->mss = mss; 772 ena_meta->l3_hdr_len = iphlen; 773 ena_meta->l3_hdr_offset = ehdrlen; 774 ena_tx_ctx->meta_valid = 1; 775 } 776 777 static int 778 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) 779 { 780 struct ena_adapter *adapter; 781 struct mbuf *collapsed_mbuf; 782 int num_frags; 783 784 adapter = tx_ring->adapter; 785 num_frags = ena_mbuf_count(*mbuf); 786 787 /* One segment must be reserved for configuration descriptor. */ 788 if (num_frags < adapter->max_tx_sgl_size) 789 return (0); 790 counter_u64_add(tx_ring->tx_stats.collapse, 1); 791 792 collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT, 793 adapter->max_tx_sgl_size - 1); 794 if (unlikely(collapsed_mbuf == NULL)) { 795 counter_u64_add(tx_ring->tx_stats.collapse_err, 1); 796 return (ENOMEM); 797 } 798 799 /* If mbuf was collapsed succesfully, original mbuf is released. */ 800 *mbuf = collapsed_mbuf; 801 802 return (0); 803 } 804 805 static int 806 ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info, 807 struct mbuf *mbuf, void **push_hdr, u16 *header_len) 808 { 809 struct ena_adapter *adapter = tx_ring->adapter; 810 struct ena_com_buf *ena_buf; 811 bus_dma_segment_t segs[ENA_BUS_DMA_SEGS]; 812 size_t iseg = 0; 813 uint32_t mbuf_head_len, frag_len; 814 uint16_t push_len = 0; 815 uint16_t delta = 0; 816 int rc, nsegs; 817 818 mbuf_head_len = mbuf->m_len; 819 tx_info->mbuf = mbuf; 820 ena_buf = tx_info->bufs; 821 822 /* 823 * For easier maintaining of the DMA map, map the whole mbuf even if 824 * the LLQ is used. The descriptors will be filled using the segments. 825 */ 826 rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->dmamap, mbuf, 827 segs, &nsegs, BUS_DMA_NOWAIT); 828 if (unlikely((rc != 0) || (nsegs == 0))) { 829 ena_trace(ENA_WARNING, 830 "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs); 831 goto dma_error; 832 } 833 834 835 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 836 /* 837 * When the device is LLQ mode, the driver will copy 838 * the header into the device memory space. 839 * the ena_com layer assumes the header is in a linear 840 * memory space. 841 * This assumption might be wrong since part of the header 842 * can be in the fragmented buffers. 843 * First check if header fits in the mbuf. If not, copy it to 844 * separate buffer that will be holding linearized data. 845 */ 846 push_len = min_t(uint32_t, mbuf->m_pkthdr.len, 847 tx_ring->tx_max_header_size); 848 *header_len = push_len; 849 /* If header is in linear space, just point into mbuf's data. */ 850 if (likely(push_len <= mbuf_head_len)) { 851 *push_hdr = mbuf->m_data; 852 /* 853 * Otherwise, copy whole portion of header from multiple mbufs 854 * to intermediate buffer. 855 */ 856 } else { 857 m_copydata(mbuf, 0, push_len, 858 tx_ring->push_buf_intermediate_buf); 859 *push_hdr = tx_ring->push_buf_intermediate_buf; 860 861 counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1); 862 delta = push_len - mbuf_head_len; 863 } 864 865 ena_trace(ENA_DBG | ENA_TXPTH, 866 "mbuf: %p header_buf->vaddr: %p push_len: %d\n", 867 mbuf, *push_hdr, push_len); 868 869 /* 870 * If header was in linear memory space, map for the dma rest of the data 871 * in the first mbuf of the mbuf chain. 872 */ 873 if (mbuf_head_len > push_len) { 874 ena_buf->paddr = segs[iseg].ds_addr + push_len; 875 ena_buf->len = segs[iseg].ds_len - push_len; 876 ena_buf++; 877 tx_info->num_of_bufs++; 878 } 879 /* 880 * Advance the seg index as either the 1st mbuf was mapped or is 881 * a part of push_hdr. 882 */ 883 iseg++; 884 } else { 885 *push_hdr = NULL; 886 /* 887 * header_len is just a hint for the device. Because FreeBSD is not 888 * giving us information about packet header length and it is not 889 * guaranteed that all packet headers will be in the 1st mbuf, setting 890 * header_len to 0 is making the device ignore this value and resolve 891 * header on it's own. 892 */ 893 *header_len = 0; 894 } 895 896 /* 897 * If header is in non linear space (delta > 0), then skip mbufs 898 * containing header and map the last one containing both header and the 899 * packet data. 900 * The first segment is already counted in. 901 * If LLQ is not supported, the loop will be skipped. 902 */ 903 while (delta > 0) { 904 frag_len = segs[iseg].ds_len; 905 906 /* 907 * If whole segment contains header just move to the 908 * next one and reduce delta. 909 */ 910 if (unlikely(delta >= frag_len)) { 911 delta -= frag_len; 912 } else { 913 /* 914 * Map rest of the packet data that was contained in 915 * the mbuf. 916 */ 917 ena_buf->paddr = segs[iseg].ds_addr + delta; 918 ena_buf->len = frag_len - delta; 919 ena_buf++; 920 tx_info->num_of_bufs++; 921 922 delta = 0; 923 } 924 iseg++; 925 } 926 927 if (mbuf == NULL) { 928 return (0); 929 } 930 931 /* Map rest of the mbuf */ 932 while (iseg < nsegs) { 933 ena_buf->paddr = segs[iseg].ds_addr; 934 ena_buf->len = segs[iseg].ds_len; 935 ena_buf++; 936 iseg++; 937 tx_info->num_of_bufs++; 938 } 939 940 return (0); 941 942 dma_error: 943 counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1); 944 tx_info->mbuf = NULL; 945 return (rc); 946 } 947 948 static int 949 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) 950 { 951 struct ena_adapter *adapter; 952 struct ena_tx_buffer *tx_info; 953 struct ena_com_tx_ctx ena_tx_ctx; 954 struct ena_com_dev *ena_dev; 955 struct ena_com_io_sq* io_sq; 956 void *push_hdr; 957 uint16_t next_to_use; 958 uint16_t req_id; 959 uint16_t ena_qid; 960 uint16_t header_len; 961 int rc; 962 int nb_hw_desc; 963 964 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); 965 adapter = tx_ring->que->adapter; 966 ena_dev = adapter->ena_dev; 967 io_sq = &ena_dev->io_sq_queues[ena_qid]; 968 969 rc = ena_check_and_collapse_mbuf(tx_ring, mbuf); 970 if (unlikely(rc != 0)) { 971 ena_trace(ENA_WARNING, 972 "Failed to collapse mbuf! err: %d\n", rc); 973 return (rc); 974 } 975 976 ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len); 977 978 next_to_use = tx_ring->next_to_use; 979 req_id = tx_ring->free_tx_ids[next_to_use]; 980 tx_info = &tx_ring->tx_buffer_info[req_id]; 981 tx_info->num_of_bufs = 0; 982 983 rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len); 984 if (unlikely(rc != 0)) { 985 ena_trace(ENA_WARNING, "Failed to map TX mbuf\n"); 986 return (rc); 987 } 988 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); 989 ena_tx_ctx.ena_bufs = tx_info->bufs; 990 ena_tx_ctx.push_header = push_hdr; 991 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 992 ena_tx_ctx.req_id = req_id; 993 ena_tx_ctx.header_len = header_len; 994 995 /* Set flags and meta data */ 996 ena_tx_csum(&ena_tx_ctx, *mbuf); 997 998 if (tx_ring->acum_pkts == DB_THRESHOLD || 999 ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) { 1000 ena_trace(ENA_DBG | ENA_TXPTH, 1001 "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", 1002 tx_ring->que->id); 1003 wmb(); 1004 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 1005 counter_u64_add(tx_ring->tx_stats.doorbells, 1); 1006 tx_ring->acum_pkts = 0; 1007 } 1008 1009 /* Prepare the packet's descriptors and send them to device */ 1010 rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc); 1011 if (unlikely(rc != 0)) { 1012 if (likely(rc == ENA_COM_NO_MEM)) { 1013 ena_trace(ENA_DBG | ENA_TXPTH, 1014 "tx ring[%d] if out of space\n", tx_ring->que->id); 1015 } else { 1016 device_printf(adapter->pdev, 1017 "failed to prepare tx bufs\n"); 1018 } 1019 counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1); 1020 goto dma_error; 1021 } 1022 1023 counter_enter(); 1024 counter_u64_add_protected(tx_ring->tx_stats.cnt, 1); 1025 counter_u64_add_protected(tx_ring->tx_stats.bytes, 1026 (*mbuf)->m_pkthdr.len); 1027 1028 counter_u64_add_protected(adapter->hw_stats.tx_packets, 1); 1029 counter_u64_add_protected(adapter->hw_stats.tx_bytes, 1030 (*mbuf)->m_pkthdr.len); 1031 counter_exit(); 1032 1033 tx_info->tx_descs = nb_hw_desc; 1034 getbinuptime(&tx_info->timestamp); 1035 tx_info->print_once = true; 1036 1037 tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, 1038 tx_ring->ring_size); 1039 1040 /* stop the queue when no more space available, the packet can have up 1041 * to sgl_size + 2. one for the meta descriptor and one for header 1042 * (if the header is larger than tx_max_header_size). 1043 */ 1044 if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1045 adapter->max_tx_sgl_size + 2))) { 1046 ena_trace(ENA_DBG | ENA_TXPTH, "Stop queue %d\n", 1047 tx_ring->que->id); 1048 1049 tx_ring->running = false; 1050 counter_u64_add(tx_ring->tx_stats.queue_stop, 1); 1051 1052 /* There is a rare condition where this function decides to 1053 * stop the queue but meanwhile tx_cleanup() updates 1054 * next_to_completion and terminates. 1055 * The queue will remain stopped forever. 1056 * To solve this issue this function performs mb(), checks 1057 * the wakeup condition and wakes up the queue if needed. 1058 */ 1059 mb(); 1060 1061 if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1062 ENA_TX_RESUME_THRESH)) { 1063 tx_ring->running = true; 1064 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1); 1065 } 1066 } 1067 1068 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, 1069 BUS_DMASYNC_PREWRITE); 1070 1071 return (0); 1072 1073 dma_error: 1074 tx_info->mbuf = NULL; 1075 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap); 1076 1077 return (rc); 1078 } 1079 1080 static void 1081 ena_start_xmit(struct ena_ring *tx_ring) 1082 { 1083 struct mbuf *mbuf; 1084 struct ena_adapter *adapter = tx_ring->adapter; 1085 struct ena_com_io_sq* io_sq; 1086 int ena_qid; 1087 int ret = 0; 1088 1089 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) 1090 return; 1091 1092 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))) 1093 return; 1094 1095 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); 1096 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; 1097 1098 while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) { 1099 ena_trace(ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and" 1100 " header csum flags %#jx\n", 1101 mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags); 1102 1103 if (unlikely(!tx_ring->running)) { 1104 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1105 break; 1106 } 1107 1108 if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) { 1109 if (ret == ENA_COM_NO_MEM) { 1110 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1111 } else if (ret == ENA_COM_NO_SPACE) { 1112 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1113 } else { 1114 m_freem(mbuf); 1115 drbr_advance(adapter->ifp, tx_ring->br); 1116 } 1117 1118 break; 1119 } 1120 1121 drbr_advance(adapter->ifp, tx_ring->br); 1122 1123 if (unlikely((if_getdrvflags(adapter->ifp) & 1124 IFF_DRV_RUNNING) == 0)) 1125 return; 1126 1127 tx_ring->acum_pkts++; 1128 1129 BPF_MTAP(adapter->ifp, mbuf); 1130 } 1131 1132 if (likely(tx_ring->acum_pkts != 0)) { 1133 wmb(); 1134 /* Trigger the dma engine */ 1135 ena_com_write_sq_doorbell(io_sq); 1136 counter_u64_add(tx_ring->tx_stats.doorbells, 1); 1137 tx_ring->acum_pkts = 0; 1138 } 1139 1140 if (unlikely(!tx_ring->running)) 1141 taskqueue_enqueue(tx_ring->que->cleanup_tq, 1142 &tx_ring->que->cleanup_task); 1143 } 1144