1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_rss.h" 34 #include "ena.h" 35 #include "ena_datapath.h" 36 #ifdef DEV_NETMAP 37 #include "ena_netmap.h" 38 #endif /* DEV_NETMAP */ 39 #ifdef RSS 40 #include <net/rss_config.h> 41 #endif /* RSS */ 42 43 /********************************************************************* 44 * Static functions prototypes 45 *********************************************************************/ 46 47 static int ena_tx_cleanup(struct ena_ring *); 48 static int ena_rx_cleanup(struct ena_ring *); 49 static inline int validate_tx_req_id(struct ena_ring *, uint16_t); 50 static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *, 51 struct mbuf *); 52 static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *, 53 struct ena_com_rx_ctx *, uint16_t *); 54 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *, 55 struct mbuf *); 56 static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool); 57 static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, 58 struct mbuf **mbuf); 59 static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **); 60 static void ena_start_xmit(struct ena_ring *); 61 62 /********************************************************************* 63 * Global functions 64 *********************************************************************/ 65 66 void 67 ena_cleanup(void *arg, int pending) 68 { 69 struct ena_que *que = arg; 70 struct ena_adapter *adapter = que->adapter; 71 if_t ifp = adapter->ifp; 72 struct ena_ring *tx_ring; 73 struct ena_ring *rx_ring; 74 struct ena_com_io_cq* io_cq; 75 struct ena_eth_io_intr_reg intr_reg; 76 int qid, ena_qid; 77 int txc, rxc, i; 78 79 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 80 return; 81 82 ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n"); 83 84 tx_ring = que->tx_ring; 85 rx_ring = que->rx_ring; 86 qid = que->id; 87 ena_qid = ENA_IO_TXQ_IDX(qid); 88 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 89 90 tx_ring->first_interrupt = true; 91 rx_ring->first_interrupt = true; 92 93 for (i = 0; i < CLEAN_BUDGET; ++i) { 94 rxc = ena_rx_cleanup(rx_ring); 95 txc = ena_tx_cleanup(tx_ring); 96 97 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 98 return; 99 100 if ((txc != TX_BUDGET) && (rxc != RX_BUDGET)) 101 break; 102 } 103 104 /* Signal that work is done and unmask interrupt */ 105 ena_com_update_intr_reg(&intr_reg, 106 RX_IRQ_INTERVAL, 107 TX_IRQ_INTERVAL, 108 true); 109 counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1); 110 ena_com_unmask_intr(io_cq, &intr_reg); 111 } 112 113 void 114 ena_deferred_mq_start(void *arg, int pending) 115 { 116 struct ena_ring *tx_ring = (struct ena_ring *)arg; 117 struct ifnet *ifp = tx_ring->adapter->ifp; 118 119 while (!drbr_empty(ifp, tx_ring->br) && 120 tx_ring->running && 121 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { 122 ENA_RING_MTX_LOCK(tx_ring); 123 ena_start_xmit(tx_ring); 124 ENA_RING_MTX_UNLOCK(tx_ring); 125 } 126 } 127 128 int 129 ena_mq_start(if_t ifp, struct mbuf *m) 130 { 131 struct ena_adapter *adapter = ifp->if_softc; 132 struct ena_ring *tx_ring; 133 int ret, is_drbr_empty; 134 uint32_t i; 135 #ifdef RSS 136 uint32_t bucket_id; 137 #endif 138 139 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) 140 return (ENODEV); 141 142 /* Which queue to use */ 143 /* 144 * If everything is setup correctly, it should be the 145 * same bucket that the current CPU we're on is. 146 * It should improve performance. 147 */ 148 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 149 #ifdef RSS 150 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 151 &bucket_id) == 0) 152 i = bucket_id % adapter->num_io_queues; 153 else 154 #endif 155 i = m->m_pkthdr.flowid % adapter->num_io_queues; 156 } else { 157 i = curcpu % adapter->num_io_queues; 158 } 159 tx_ring = &adapter->tx_ring[i]; 160 161 /* Check if drbr is empty before putting packet */ 162 is_drbr_empty = drbr_empty(ifp, tx_ring->br); 163 ret = drbr_enqueue(ifp, tx_ring->br, m); 164 if (unlikely(ret != 0)) { 165 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); 166 return (ret); 167 } 168 169 if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) { 170 ena_start_xmit(tx_ring); 171 ENA_RING_MTX_UNLOCK(tx_ring); 172 } else { 173 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); 174 } 175 176 return (0); 177 } 178 179 void 180 ena_qflush(if_t ifp) 181 { 182 struct ena_adapter *adapter = ifp->if_softc; 183 struct ena_ring *tx_ring = adapter->tx_ring; 184 int i; 185 186 for(i = 0; i < adapter->num_io_queues; ++i, ++tx_ring) 187 if (!drbr_empty(ifp, tx_ring->br)) { 188 ENA_RING_MTX_LOCK(tx_ring); 189 drbr_flush(ifp, tx_ring->br); 190 ENA_RING_MTX_UNLOCK(tx_ring); 191 } 192 193 if_qflush(ifp); 194 } 195 196 /********************************************************************* 197 * Static functions 198 *********************************************************************/ 199 200 static inline int 201 validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id) 202 { 203 struct ena_adapter *adapter = tx_ring->adapter; 204 struct ena_tx_buffer *tx_info = NULL; 205 206 if (likely(req_id < tx_ring->ring_size)) { 207 tx_info = &tx_ring->tx_buffer_info[req_id]; 208 if (tx_info->mbuf != NULL) 209 return (0); 210 ena_log(adapter->pdev, ERR, 211 "tx_info doesn't have valid mbuf\n"); 212 } 213 214 ena_log(adapter->pdev, ERR, "Invalid req_id: %hu\n", req_id); 215 counter_u64_add(tx_ring->tx_stats.bad_req_id, 1); 216 217 /* Trigger device reset */ 218 ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 219 220 return (EFAULT); 221 } 222 223 /** 224 * ena_tx_cleanup - clear sent packets and corresponding descriptors 225 * @tx_ring: ring for which we want to clean packets 226 * 227 * Once packets are sent, we ask the device in a loop for no longer used 228 * descriptors. We find the related mbuf chain in a map (index in an array) 229 * and free it, then update ring state. 230 * This is performed in "endless" loop, updating ring pointers every 231 * TX_COMMIT. The first check of free descriptor is performed before the actual 232 * loop, then repeated at the loop end. 233 **/ 234 static int 235 ena_tx_cleanup(struct ena_ring *tx_ring) 236 { 237 struct ena_adapter *adapter; 238 struct ena_com_io_cq* io_cq; 239 uint16_t next_to_clean; 240 uint16_t req_id; 241 uint16_t ena_qid; 242 unsigned int total_done = 0; 243 int rc; 244 int commit = TX_COMMIT; 245 int budget = TX_BUDGET; 246 int work_done; 247 bool above_thresh; 248 249 adapter = tx_ring->que->adapter; 250 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); 251 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 252 next_to_clean = tx_ring->next_to_clean; 253 254 #ifdef DEV_NETMAP 255 if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS) 256 return (0); 257 #endif /* DEV_NETMAP */ 258 259 do { 260 struct ena_tx_buffer *tx_info; 261 struct mbuf *mbuf; 262 263 rc = ena_com_tx_comp_req_id_get(io_cq, &req_id); 264 if (unlikely(rc != 0)) 265 break; 266 267 rc = validate_tx_req_id(tx_ring, req_id); 268 if (unlikely(rc != 0)) 269 break; 270 271 tx_info = &tx_ring->tx_buffer_info[req_id]; 272 273 mbuf = tx_info->mbuf; 274 275 tx_info->mbuf = NULL; 276 bintime_clear(&tx_info->timestamp); 277 278 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, 279 BUS_DMASYNC_POSTWRITE); 280 bus_dmamap_unload(adapter->tx_buf_tag, 281 tx_info->dmamap); 282 283 ena_log_io(adapter->pdev, DBG, "tx: q %d mbuf %p completed\n", 284 tx_ring->qid, mbuf); 285 286 m_freem(mbuf); 287 288 total_done += tx_info->tx_descs; 289 290 tx_ring->free_tx_ids[next_to_clean] = req_id; 291 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, 292 tx_ring->ring_size); 293 294 if (unlikely(--commit == 0)) { 295 commit = TX_COMMIT; 296 /* update ring state every TX_COMMIT descriptor */ 297 tx_ring->next_to_clean = next_to_clean; 298 ena_com_comp_ack( 299 &adapter->ena_dev->io_sq_queues[ena_qid], 300 total_done); 301 ena_com_update_dev_comp_head(io_cq); 302 total_done = 0; 303 } 304 } while (likely(--budget)); 305 306 work_done = TX_BUDGET - budget; 307 308 ena_log_io(adapter->pdev, DBG, "tx: q %d done. total pkts: %d\n", 309 tx_ring->qid, work_done); 310 311 /* If there is still something to commit update ring state */ 312 if (likely(commit != TX_COMMIT)) { 313 tx_ring->next_to_clean = next_to_clean; 314 ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid], 315 total_done); 316 ena_com_update_dev_comp_head(io_cq); 317 } 318 319 /* 320 * Need to make the rings circular update visible to 321 * ena_xmit_mbuf() before checking for tx_ring->running. 322 */ 323 mb(); 324 325 above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 326 ENA_TX_RESUME_THRESH); 327 if (unlikely(!tx_ring->running && above_thresh)) { 328 ENA_RING_MTX_LOCK(tx_ring); 329 above_thresh = 330 ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 331 ENA_TX_RESUME_THRESH); 332 if (!tx_ring->running && above_thresh) { 333 tx_ring->running = true; 334 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1); 335 taskqueue_enqueue(tx_ring->enqueue_tq, 336 &tx_ring->enqueue_task); 337 } 338 ENA_RING_MTX_UNLOCK(tx_ring); 339 } 340 341 return (work_done); 342 } 343 344 static void 345 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, 346 struct mbuf *mbuf) 347 { 348 struct ena_adapter *adapter = rx_ring->adapter; 349 350 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 351 mbuf->m_pkthdr.flowid = ena_rx_ctx->hash; 352 353 #ifdef RSS 354 /* 355 * Hardware and software RSS are in agreement only when both are 356 * configured to Toeplitz algorithm. This driver configures 357 * that algorithm only when software RSS is enabled and uses it. 358 */ 359 if (adapter->ena_dev->rss.hash_func != ENA_ADMIN_TOEPLITZ && 360 ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN) { 361 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 362 return; 363 } 364 #endif 365 366 if (ena_rx_ctx->frag && 367 (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) { 368 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 369 return; 370 } 371 372 switch (ena_rx_ctx->l3_proto) { 373 case ENA_ETH_IO_L3_PROTO_IPV4: 374 switch (ena_rx_ctx->l4_proto) { 375 case ENA_ETH_IO_L4_PROTO_TCP: 376 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); 377 break; 378 case ENA_ETH_IO_L4_PROTO_UDP: 379 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); 380 break; 381 default: 382 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); 383 } 384 break; 385 case ENA_ETH_IO_L3_PROTO_IPV6: 386 switch (ena_rx_ctx->l4_proto) { 387 case ENA_ETH_IO_L4_PROTO_TCP: 388 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); 389 break; 390 case ENA_ETH_IO_L4_PROTO_UDP: 391 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); 392 break; 393 default: 394 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); 395 } 396 break; 397 case ENA_ETH_IO_L3_PROTO_UNKNOWN: 398 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); 399 break; 400 default: 401 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 402 } 403 } else { 404 mbuf->m_pkthdr.flowid = rx_ring->qid; 405 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); 406 } 407 } 408 409 /** 410 * ena_rx_mbuf - assemble mbuf from descriptors 411 * @rx_ring: ring for which we want to clean packets 412 * @ena_bufs: buffer info 413 * @ena_rx_ctx: metadata for this packet(s) 414 * @next_to_clean: ring pointer, will be updated only upon success 415 * 416 **/ 417 static struct mbuf* 418 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, 419 struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean) 420 { 421 struct mbuf *mbuf; 422 struct ena_rx_buffer *rx_info; 423 struct ena_adapter *adapter; 424 device_t pdev; 425 unsigned int descs = ena_rx_ctx->descs; 426 uint16_t ntc, len, req_id, buf = 0; 427 428 ntc = *next_to_clean; 429 adapter = rx_ring->adapter; 430 pdev = adapter->pdev; 431 432 len = ena_bufs[buf].len; 433 req_id = ena_bufs[buf].req_id; 434 rx_info = &rx_ring->rx_buffer_info[req_id]; 435 if (unlikely(rx_info->mbuf == NULL)) { 436 ena_log(pdev, ERR, "NULL mbuf in rx_info"); 437 return (NULL); 438 } 439 440 ena_log_io(pdev, DBG, "rx_info %p, mbuf %p, paddr %jx\n", rx_info, 441 rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr); 442 443 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, 444 BUS_DMASYNC_POSTREAD); 445 mbuf = rx_info->mbuf; 446 mbuf->m_flags |= M_PKTHDR; 447 mbuf->m_pkthdr.len = len; 448 mbuf->m_len = len; 449 /* Only for the first segment the data starts at specific offset */ 450 mbuf->m_data = mtodo(mbuf, ena_rx_ctx->pkt_offset); 451 ena_log_io(pdev, DBG, "Mbuf data offset=%u\n", ena_rx_ctx->pkt_offset); 452 mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp; 453 454 /* Fill mbuf with hash key and it's interpretation for optimization */ 455 ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf); 456 457 ena_log_io(pdev, DBG, "rx mbuf 0x%p, flags=0x%x, len: %d\n", mbuf, 458 mbuf->m_flags, mbuf->m_pkthdr.len); 459 460 /* DMA address is not needed anymore, unmap it */ 461 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); 462 463 rx_info->mbuf = NULL; 464 rx_ring->free_rx_ids[ntc] = req_id; 465 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size); 466 467 /* 468 * While we have more than 1 descriptors for one rcvd packet, append 469 * other mbufs to the main one 470 */ 471 while (--descs) { 472 ++buf; 473 len = ena_bufs[buf].len; 474 req_id = ena_bufs[buf].req_id; 475 rx_info = &rx_ring->rx_buffer_info[req_id]; 476 477 if (unlikely(rx_info->mbuf == NULL)) { 478 ena_log(pdev, ERR, "NULL mbuf in rx_info"); 479 /* 480 * If one of the required mbufs was not allocated yet, 481 * we can break there. 482 * All earlier used descriptors will be reallocated 483 * later and not used mbufs can be reused. 484 * The next_to_clean pointer will not be updated in case 485 * of an error, so caller should advance it manually 486 * in error handling routine to keep it up to date 487 * with hw ring. 488 */ 489 m_freem(mbuf); 490 return (NULL); 491 } 492 493 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, 494 BUS_DMASYNC_POSTREAD); 495 if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) { 496 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1); 497 ena_log_io(pdev, WARN, "Failed to append Rx mbuf %p\n", 498 mbuf); 499 } 500 501 ena_log_io(pdev, DBG, "rx mbuf updated. len %d\n", 502 mbuf->m_pkthdr.len); 503 504 /* Free already appended mbuf, it won't be useful anymore */ 505 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); 506 m_freem(rx_info->mbuf); 507 rx_info->mbuf = NULL; 508 509 rx_ring->free_rx_ids[ntc] = req_id; 510 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size); 511 } 512 513 *next_to_clean = ntc; 514 515 return (mbuf); 516 } 517 518 /** 519 * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum 520 **/ 521 static inline void 522 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, 523 struct mbuf *mbuf) 524 { 525 device_t pdev = rx_ring->adapter->pdev; 526 527 /* if IP and error */ 528 if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && 529 ena_rx_ctx->l3_csum_err)) { 530 /* ipv4 checksum error */ 531 mbuf->m_pkthdr.csum_flags = 0; 532 counter_u64_add(rx_ring->rx_stats.csum_bad, 1); 533 ena_log_io(pdev, DBG, "RX IPv4 header checksum error\n"); 534 return; 535 } 536 537 /* if TCP/UDP */ 538 if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || 539 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) { 540 if (ena_rx_ctx->l4_csum_err) { 541 /* TCP/UDP checksum error */ 542 mbuf->m_pkthdr.csum_flags = 0; 543 counter_u64_add(rx_ring->rx_stats.csum_bad, 1); 544 ena_log_io(pdev, DBG, "RX L4 checksum error\n"); 545 } else { 546 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 547 mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID; 548 counter_u64_add(rx_ring->rx_stats.csum_good, 1); 549 } 550 } 551 } 552 553 /** 554 * ena_rx_cleanup - handle rx irq 555 * @arg: ring for which irq is being handled 556 **/ 557 static int 558 ena_rx_cleanup(struct ena_ring *rx_ring) 559 { 560 struct ena_adapter *adapter; 561 device_t pdev; 562 struct mbuf *mbuf; 563 struct ena_com_rx_ctx ena_rx_ctx; 564 struct ena_com_io_cq* io_cq; 565 struct ena_com_io_sq* io_sq; 566 enum ena_regs_reset_reason_types reset_reason; 567 if_t ifp; 568 uint16_t ena_qid; 569 uint16_t next_to_clean; 570 uint32_t refill_required; 571 uint32_t refill_threshold; 572 uint32_t do_if_input = 0; 573 unsigned int qid; 574 int rc, i; 575 int budget = RX_BUDGET; 576 #ifdef DEV_NETMAP 577 int done; 578 #endif /* DEV_NETMAP */ 579 580 adapter = rx_ring->que->adapter; 581 pdev = adapter->pdev; 582 ifp = adapter->ifp; 583 qid = rx_ring->que->id; 584 ena_qid = ENA_IO_RXQ_IDX(qid); 585 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 586 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; 587 next_to_clean = rx_ring->next_to_clean; 588 589 #ifdef DEV_NETMAP 590 if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS) 591 return (0); 592 #endif /* DEV_NETMAP */ 593 594 ena_log_io(pdev, DBG, "rx: qid %d\n", qid); 595 596 do { 597 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 598 ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size; 599 ena_rx_ctx.descs = 0; 600 ena_rx_ctx.pkt_offset = 0; 601 602 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag, 603 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD); 604 rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx); 605 if (unlikely(rc != 0)) { 606 if (rc == ENA_COM_NO_SPACE) { 607 counter_u64_add(rx_ring->rx_stats.bad_desc_num, 608 1); 609 reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; 610 } else { 611 counter_u64_add(rx_ring->rx_stats.bad_req_id, 612 1); 613 reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; 614 } 615 ena_trigger_reset(adapter, reset_reason); 616 return (0); 617 } 618 619 if (unlikely(ena_rx_ctx.descs == 0)) 620 break; 621 622 ena_log_io(pdev, DBG, "rx: q %d got packet from ena. " 623 "descs #: %d l3 proto %d l4 proto %d hash: %x\n", 624 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, 625 ena_rx_ctx.l4_proto, ena_rx_ctx.hash); 626 627 /* Receive mbuf from the ring */ 628 mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs, 629 &ena_rx_ctx, &next_to_clean); 630 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag, 631 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD); 632 /* Exit if we failed to retrieve a buffer */ 633 if (unlikely(mbuf == NULL)) { 634 for (i = 0; i < ena_rx_ctx.descs; ++i) { 635 rx_ring->free_rx_ids[next_to_clean] = 636 rx_ring->ena_bufs[i].req_id; 637 next_to_clean = 638 ENA_RX_RING_IDX_NEXT(next_to_clean, 639 rx_ring->ring_size); 640 641 } 642 break; 643 } 644 645 if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) || 646 ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) { 647 ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf); 648 } 649 650 counter_enter(); 651 counter_u64_add_protected(rx_ring->rx_stats.bytes, 652 mbuf->m_pkthdr.len); 653 counter_u64_add_protected(adapter->hw_stats.rx_bytes, 654 mbuf->m_pkthdr.len); 655 counter_exit(); 656 /* 657 * LRO is only for IP/TCP packets and TCP checksum of the packet 658 * should be computed by hardware. 659 */ 660 do_if_input = 1; 661 if (((ifp->if_capenable & IFCAP_LRO) != 0) && 662 ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) && 663 (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) { 664 /* 665 * Send to the stack if: 666 * - LRO not enabled, or 667 * - no LRO resources, or 668 * - lro enqueue fails 669 */ 670 if ((rx_ring->lro.lro_cnt != 0) && 671 (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0)) 672 do_if_input = 0; 673 } 674 if (do_if_input != 0) { 675 ena_log_io(pdev, DBG, "calling if_input() with mbuf %p\n", 676 mbuf); 677 (*ifp->if_input)(ifp, mbuf); 678 } 679 680 counter_enter(); 681 counter_u64_add_protected(rx_ring->rx_stats.cnt, 1); 682 counter_u64_add_protected(adapter->hw_stats.rx_packets, 1); 683 counter_exit(); 684 } while (--budget); 685 686 rx_ring->next_to_clean = next_to_clean; 687 688 refill_required = ena_com_free_q_entries(io_sq); 689 refill_threshold = min_t(int, 690 rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER, 691 ENA_RX_REFILL_THRESH_PACKET); 692 693 if (refill_required > refill_threshold) { 694 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 695 ena_refill_rx_bufs(rx_ring, refill_required); 696 } 697 698 tcp_lro_flush_all(&rx_ring->lro); 699 700 return (RX_BUDGET - budget); 701 } 702 703 static void 704 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf, 705 bool disable_meta_caching) 706 { 707 struct ena_com_tx_meta *ena_meta; 708 struct ether_vlan_header *eh; 709 struct mbuf *mbuf_next; 710 u32 mss; 711 bool offload; 712 uint16_t etype; 713 int ehdrlen; 714 struct ip *ip; 715 int iphlen; 716 struct tcphdr *th; 717 int offset; 718 719 offload = false; 720 ena_meta = &ena_tx_ctx->ena_meta; 721 mss = mbuf->m_pkthdr.tso_segsz; 722 723 if (mss != 0) 724 offload = true; 725 726 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) 727 offload = true; 728 729 if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0) 730 offload = true; 731 732 if (!offload) { 733 if (disable_meta_caching) { 734 memset(ena_meta, 0, sizeof(*ena_meta)); 735 ena_tx_ctx->meta_valid = 1; 736 } else { 737 ena_tx_ctx->meta_valid = 0; 738 } 739 return; 740 } 741 742 /* Determine where frame payload starts. */ 743 eh = mtod(mbuf, struct ether_vlan_header *); 744 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 745 etype = ntohs(eh->evl_proto); 746 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 747 } else { 748 etype = ntohs(eh->evl_encap_proto); 749 ehdrlen = ETHER_HDR_LEN; 750 } 751 752 mbuf_next = m_getptr(mbuf, ehdrlen, &offset); 753 ip = (struct ip *)(mtodo(mbuf_next, offset)); 754 iphlen = ip->ip_hl << 2; 755 756 mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset); 757 th = (struct tcphdr *)(mtodo(mbuf_next, offset)); 758 759 if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) { 760 ena_tx_ctx->l3_csum_enable = 1; 761 } 762 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) { 763 ena_tx_ctx->tso_enable = 1; 764 ena_meta->l4_hdr_len = (th->th_off); 765 } 766 767 switch (etype) { 768 case ETHERTYPE_IP: 769 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 770 if ((ip->ip_off & htons(IP_DF)) != 0) 771 ena_tx_ctx->df = 1; 772 break; 773 case ETHERTYPE_IPV6: 774 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 775 776 default: 777 break; 778 } 779 780 if (ip->ip_p == IPPROTO_TCP) { 781 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 782 if ((mbuf->m_pkthdr.csum_flags & 783 (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0) 784 ena_tx_ctx->l4_csum_enable = 1; 785 else 786 ena_tx_ctx->l4_csum_enable = 0; 787 } else if (ip->ip_p == IPPROTO_UDP) { 788 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 789 if ((mbuf->m_pkthdr.csum_flags & 790 (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0) 791 ena_tx_ctx->l4_csum_enable = 1; 792 else 793 ena_tx_ctx->l4_csum_enable = 0; 794 } else { 795 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 796 ena_tx_ctx->l4_csum_enable = 0; 797 } 798 799 ena_meta->mss = mss; 800 ena_meta->l3_hdr_len = iphlen; 801 ena_meta->l3_hdr_offset = ehdrlen; 802 ena_tx_ctx->meta_valid = 1; 803 } 804 805 static int 806 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) 807 { 808 struct ena_adapter *adapter; 809 struct mbuf *collapsed_mbuf; 810 int num_frags; 811 812 adapter = tx_ring->adapter; 813 num_frags = ena_mbuf_count(*mbuf); 814 815 /* One segment must be reserved for configuration descriptor. */ 816 if (num_frags < adapter->max_tx_sgl_size) 817 return (0); 818 819 if ((num_frags == adapter->max_tx_sgl_size) && 820 ((*mbuf)->m_pkthdr.len < tx_ring->tx_max_header_size)) 821 return (0); 822 823 counter_u64_add(tx_ring->tx_stats.collapse, 1); 824 825 collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT, 826 adapter->max_tx_sgl_size - 1); 827 if (unlikely(collapsed_mbuf == NULL)) { 828 counter_u64_add(tx_ring->tx_stats.collapse_err, 1); 829 return (ENOMEM); 830 } 831 832 /* If mbuf was collapsed succesfully, original mbuf is released. */ 833 *mbuf = collapsed_mbuf; 834 835 return (0); 836 } 837 838 static int 839 ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info, 840 struct mbuf *mbuf, void **push_hdr, u16 *header_len) 841 { 842 struct ena_adapter *adapter = tx_ring->adapter; 843 struct ena_com_buf *ena_buf; 844 bus_dma_segment_t segs[ENA_BUS_DMA_SEGS]; 845 size_t iseg = 0; 846 uint32_t mbuf_head_len; 847 uint16_t offset; 848 int rc, nsegs; 849 850 mbuf_head_len = mbuf->m_len; 851 tx_info->mbuf = mbuf; 852 ena_buf = tx_info->bufs; 853 854 /* 855 * For easier maintaining of the DMA map, map the whole mbuf even if 856 * the LLQ is used. The descriptors will be filled using the segments. 857 */ 858 rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->dmamap, mbuf, 859 segs, &nsegs, BUS_DMA_NOWAIT); 860 if (unlikely((rc != 0) || (nsegs == 0))) { 861 ena_log_io(adapter->pdev, WARN, 862 "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs); 863 goto dma_error; 864 } 865 866 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 867 /* 868 * When the device is LLQ mode, the driver will copy 869 * the header into the device memory space. 870 * the ena_com layer assumes the header is in a linear 871 * memory space. 872 * This assumption might be wrong since part of the header 873 * can be in the fragmented buffers. 874 * First check if header fits in the mbuf. If not, copy it to 875 * separate buffer that will be holding linearized data. 876 */ 877 *header_len = min_t(uint32_t, mbuf->m_pkthdr.len, tx_ring->tx_max_header_size); 878 879 /* If header is in linear space, just point into mbuf's data. */ 880 if (likely(*header_len <= mbuf_head_len)) { 881 *push_hdr = mbuf->m_data; 882 /* 883 * Otherwise, copy whole portion of header from multiple mbufs 884 * to intermediate buffer. 885 */ 886 } else { 887 m_copydata(mbuf, 0, *header_len, tx_ring->push_buf_intermediate_buf); 888 *push_hdr = tx_ring->push_buf_intermediate_buf; 889 890 counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1); 891 } 892 893 ena_log_io(adapter->pdev, DBG, "mbuf: %p ""header_buf->vaddr: %p " 894 "push_len: %d\n", mbuf, *push_hdr, *header_len); 895 896 /* If packet is fitted in LLQ header, no need for DMA segments. */ 897 if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) { 898 return (0); 899 } else { 900 offset = tx_ring->tx_max_header_size; 901 /* 902 * As Header part is mapped to LLQ header, we can skip it and just 903 * map the residuum of the mbuf to DMA Segments. 904 */ 905 while (offset > 0) { 906 if (offset >= segs[iseg].ds_len) { 907 offset -= segs[iseg].ds_len; 908 } else { 909 ena_buf->paddr = segs[iseg].ds_addr + offset; 910 ena_buf->len = segs[iseg].ds_len - offset; 911 ena_buf++; 912 tx_info->num_of_bufs++; 913 offset = 0; 914 } 915 iseg++; 916 } 917 } 918 } else { 919 *push_hdr = NULL; 920 /* 921 * header_len is just a hint for the device. Because FreeBSD is not 922 * giving us information about packet header length and it is not 923 * guaranteed that all packet headers will be in the 1st mbuf, setting 924 * header_len to 0 is making the device ignore this value and resolve 925 * header on it's own. 926 */ 927 *header_len = 0; 928 } 929 930 /* Map rest of the mbuf */ 931 while (iseg < nsegs) { 932 ena_buf->paddr = segs[iseg].ds_addr; 933 ena_buf->len = segs[iseg].ds_len; 934 ena_buf++; 935 iseg++; 936 tx_info->num_of_bufs++; 937 } 938 939 return (0); 940 941 dma_error: 942 counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1); 943 tx_info->mbuf = NULL; 944 return (rc); 945 } 946 947 static int 948 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) 949 { 950 struct ena_adapter *adapter; 951 device_t pdev; 952 struct ena_tx_buffer *tx_info; 953 struct ena_com_tx_ctx ena_tx_ctx; 954 struct ena_com_dev *ena_dev; 955 struct ena_com_io_sq* io_sq; 956 void *push_hdr; 957 uint16_t next_to_use; 958 uint16_t req_id; 959 uint16_t ena_qid; 960 uint16_t header_len; 961 int rc; 962 int nb_hw_desc; 963 964 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); 965 adapter = tx_ring->que->adapter; 966 pdev = adapter->pdev; 967 ena_dev = adapter->ena_dev; 968 io_sq = &ena_dev->io_sq_queues[ena_qid]; 969 970 rc = ena_check_and_collapse_mbuf(tx_ring, mbuf); 971 if (unlikely(rc != 0)) { 972 ena_log_io(pdev, WARN, "Failed to collapse mbuf! err: %d\n", 973 rc); 974 return (rc); 975 } 976 977 ena_log_io(pdev, DBG, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len); 978 979 next_to_use = tx_ring->next_to_use; 980 req_id = tx_ring->free_tx_ids[next_to_use]; 981 tx_info = &tx_ring->tx_buffer_info[req_id]; 982 tx_info->num_of_bufs = 0; 983 984 ENA_WARN(tx_info->mbuf != NULL, adapter->ena_dev, 985 "mbuf isn't NULL for req_id %d\n", req_id); 986 987 rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len); 988 if (unlikely(rc != 0)) { 989 ena_log_io(pdev, WARN, "Failed to map TX mbuf\n"); 990 return (rc); 991 } 992 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); 993 ena_tx_ctx.ena_bufs = tx_info->bufs; 994 ena_tx_ctx.push_header = push_hdr; 995 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 996 ena_tx_ctx.req_id = req_id; 997 ena_tx_ctx.header_len = header_len; 998 999 /* Set flags and meta data */ 1000 ena_tx_csum(&ena_tx_ctx, *mbuf, adapter->disable_meta_caching); 1001 1002 if (tx_ring->acum_pkts == DB_THRESHOLD || 1003 ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) { 1004 ena_log_io(pdev, DBG, 1005 "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", 1006 tx_ring->que->id); 1007 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 1008 counter_u64_add(tx_ring->tx_stats.doorbells, 1); 1009 tx_ring->acum_pkts = 0; 1010 } 1011 1012 /* Prepare the packet's descriptors and send them to device */ 1013 rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc); 1014 if (unlikely(rc != 0)) { 1015 if (likely(rc == ENA_COM_NO_MEM)) { 1016 ena_log_io(pdev, DBG, "tx ring[%d] is out of space\n", 1017 tx_ring->que->id); 1018 } else { 1019 ena_log(pdev, ERR, "failed to prepare tx bufs\n"); 1020 ena_trigger_reset(adapter, 1021 ENA_REGS_RESET_DRIVER_INVALID_STATE); 1022 } 1023 counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1); 1024 goto dma_error; 1025 } 1026 1027 counter_enter(); 1028 counter_u64_add_protected(tx_ring->tx_stats.cnt, 1); 1029 counter_u64_add_protected(tx_ring->tx_stats.bytes, 1030 (*mbuf)->m_pkthdr.len); 1031 1032 counter_u64_add_protected(adapter->hw_stats.tx_packets, 1); 1033 counter_u64_add_protected(adapter->hw_stats.tx_bytes, 1034 (*mbuf)->m_pkthdr.len); 1035 counter_exit(); 1036 1037 tx_info->tx_descs = nb_hw_desc; 1038 getbinuptime(&tx_info->timestamp); 1039 tx_info->print_once = true; 1040 1041 tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, 1042 tx_ring->ring_size); 1043 1044 /* stop the queue when no more space available, the packet can have up 1045 * to sgl_size + 2. one for the meta descriptor and one for header 1046 * (if the header is larger than tx_max_header_size). 1047 */ 1048 if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1049 adapter->max_tx_sgl_size + 2))) { 1050 ena_log_io(pdev, DBG, "Stop queue %d\n", tx_ring->que->id); 1051 1052 tx_ring->running = false; 1053 counter_u64_add(tx_ring->tx_stats.queue_stop, 1); 1054 1055 /* There is a rare condition where this function decides to 1056 * stop the queue but meanwhile tx_cleanup() updates 1057 * next_to_completion and terminates. 1058 * The queue will remain stopped forever. 1059 * To solve this issue this function performs mb(), checks 1060 * the wakeup condition and wakes up the queue if needed. 1061 */ 1062 mb(); 1063 1064 if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1065 ENA_TX_RESUME_THRESH)) { 1066 tx_ring->running = true; 1067 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1); 1068 } 1069 } 1070 1071 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, 1072 BUS_DMASYNC_PREWRITE); 1073 1074 return (0); 1075 1076 dma_error: 1077 tx_info->mbuf = NULL; 1078 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap); 1079 1080 return (rc); 1081 } 1082 1083 static void 1084 ena_start_xmit(struct ena_ring *tx_ring) 1085 { 1086 struct mbuf *mbuf; 1087 struct ena_adapter *adapter = tx_ring->adapter; 1088 struct ena_com_io_sq* io_sq; 1089 int ena_qid; 1090 int ret = 0; 1091 1092 ENA_RING_MTX_ASSERT(tx_ring); 1093 1094 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) 1095 return; 1096 1097 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))) 1098 return; 1099 1100 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); 1101 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; 1102 1103 while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) { 1104 ena_log_io(adapter->pdev, DBG, 1105 "\ndequeued mbuf %p with flags %#x and header csum flags %#jx\n", 1106 mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags); 1107 1108 if (unlikely(!tx_ring->running)) { 1109 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1110 break; 1111 } 1112 1113 if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) { 1114 if (ret == ENA_COM_NO_MEM) { 1115 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1116 } else if (ret == ENA_COM_NO_SPACE) { 1117 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1118 } else { 1119 m_freem(mbuf); 1120 drbr_advance(adapter->ifp, tx_ring->br); 1121 } 1122 1123 break; 1124 } 1125 1126 drbr_advance(adapter->ifp, tx_ring->br); 1127 1128 if (unlikely((if_getdrvflags(adapter->ifp) & 1129 IFF_DRV_RUNNING) == 0)) 1130 return; 1131 1132 tx_ring->acum_pkts++; 1133 1134 BPF_MTAP(adapter->ifp, mbuf); 1135 } 1136 1137 if (likely(tx_ring->acum_pkts != 0)) { 1138 /* Trigger the dma engine */ 1139 ena_com_write_sq_doorbell(io_sq); 1140 counter_u64_add(tx_ring->tx_stats.doorbells, 1); 1141 tx_ring->acum_pkts = 0; 1142 } 1143 1144 if (unlikely(!tx_ring->running)) 1145 taskqueue_enqueue(tx_ring->que->cleanup_tq, 1146 &tx_ring->que->cleanup_task); 1147 } 1148