1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 #include "opt_rss.h" 32 #include "ena.h" 33 #include "ena_datapath.h" 34 #ifdef DEV_NETMAP 35 #include "ena_netmap.h" 36 #endif /* DEV_NETMAP */ 37 #ifdef RSS 38 #include <net/rss_config.h> 39 #endif /* RSS */ 40 41 #include <netinet6/ip6_var.h> 42 43 /********************************************************************* 44 * Static functions prototypes 45 *********************************************************************/ 46 47 static int ena_tx_cleanup(struct ena_ring *); 48 static int ena_rx_cleanup(struct ena_ring *); 49 static inline int ena_get_tx_req_id(struct ena_ring *tx_ring, 50 struct ena_com_io_cq *io_cq, uint16_t *req_id); 51 static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *, 52 struct mbuf *); 53 static struct mbuf *ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *, 54 struct ena_com_rx_ctx *, uint16_t *); 55 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *, 56 struct mbuf *); 57 static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool); 58 static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, 59 struct mbuf **mbuf); 60 static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **); 61 static void ena_start_xmit(struct ena_ring *); 62 63 /********************************************************************* 64 * Global functions 65 *********************************************************************/ 66 67 void 68 ena_cleanup(void *arg, int pending) 69 { 70 struct ena_que *que = arg; 71 struct ena_adapter *adapter = que->adapter; 72 if_t ifp = adapter->ifp; 73 struct ena_ring *tx_ring; 74 struct ena_ring *rx_ring; 75 struct ena_com_io_cq *io_cq; 76 struct ena_eth_io_intr_reg intr_reg; 77 int qid, ena_qid; 78 int txc, rxc, i; 79 80 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 81 return; 82 83 ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n"); 84 85 tx_ring = que->tx_ring; 86 rx_ring = que->rx_ring; 87 qid = que->id; 88 ena_qid = ENA_IO_TXQ_IDX(qid); 89 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 90 91 atomic_store_8(&tx_ring->first_interrupt, 1); 92 atomic_store_8(&rx_ring->first_interrupt, 1); 93 94 for (i = 0; i < ENA_CLEAN_BUDGET; ++i) { 95 rxc = ena_rx_cleanup(rx_ring); 96 txc = ena_tx_cleanup(tx_ring); 97 98 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 99 return; 100 101 if ((txc != ENA_TX_BUDGET) && (rxc != ENA_RX_BUDGET)) 102 break; 103 } 104 105 /* Signal that work is done and unmask interrupt */ 106 ena_com_update_intr_reg(&intr_reg, ENA_RX_IRQ_INTERVAL, 107 ENA_TX_IRQ_INTERVAL, true); 108 counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1); 109 ena_com_unmask_intr(io_cq, &intr_reg); 110 } 111 112 void 113 ena_deferred_mq_start(void *arg, int pending) 114 { 115 struct ena_ring *tx_ring = (struct ena_ring *)arg; 116 if_t ifp = tx_ring->adapter->ifp; 117 118 while (!drbr_empty(ifp, tx_ring->br) && tx_ring->running && 119 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { 120 ENA_RING_MTX_LOCK(tx_ring); 121 ena_start_xmit(tx_ring); 122 ENA_RING_MTX_UNLOCK(tx_ring); 123 } 124 } 125 126 int 127 ena_mq_start(if_t ifp, struct mbuf *m) 128 { 129 struct ena_adapter *adapter = if_getsoftc(ifp); 130 struct ena_ring *tx_ring; 131 int ret, is_drbr_empty; 132 uint32_t i; 133 #ifdef RSS 134 uint32_t bucket_id; 135 #endif 136 137 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) 138 return (ENODEV); 139 140 /* Which queue to use */ 141 /* 142 * If everything is setup correctly, it should be the 143 * same bucket that the current CPU we're on is. 144 * It should improve performance. 145 */ 146 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 147 #ifdef RSS 148 if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 149 &bucket_id) == 0) 150 i = bucket_id % adapter->num_io_queues; 151 else 152 #endif 153 i = m->m_pkthdr.flowid % adapter->num_io_queues; 154 } else { 155 i = curcpu % adapter->num_io_queues; 156 } 157 tx_ring = &adapter->tx_ring[i]; 158 159 /* Check if drbr is empty before putting packet */ 160 is_drbr_empty = drbr_empty(ifp, tx_ring->br); 161 ret = drbr_enqueue(ifp, tx_ring->br, m); 162 if (unlikely(ret != 0)) { 163 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); 164 return (ret); 165 } 166 167 if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) { 168 ena_start_xmit(tx_ring); 169 ENA_RING_MTX_UNLOCK(tx_ring); 170 } else { 171 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); 172 } 173 174 return (0); 175 } 176 177 void 178 ena_qflush(if_t ifp) 179 { 180 struct ena_adapter *adapter = if_getsoftc(ifp); 181 struct ena_ring *tx_ring = adapter->tx_ring; 182 int i; 183 184 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring) 185 if (!drbr_empty(ifp, tx_ring->br)) { 186 ENA_RING_MTX_LOCK(tx_ring); 187 drbr_flush(ifp, tx_ring->br); 188 ENA_RING_MTX_UNLOCK(tx_ring); 189 } 190 191 if_qflush(ifp); 192 } 193 194 /********************************************************************* 195 * Static functions 196 *********************************************************************/ 197 198 static inline int 199 ena_get_tx_req_id(struct ena_ring *tx_ring, struct ena_com_io_cq *io_cq, 200 uint16_t *req_id) 201 { 202 struct ena_adapter *adapter = tx_ring->adapter; 203 int rc; 204 205 rc = ena_com_tx_comp_req_id_get(io_cq, req_id); 206 if (rc == ENA_COM_TRY_AGAIN) 207 return (EAGAIN); 208 209 if (unlikely(rc != 0)) { 210 ena_log(adapter->pdev, ERR, "Invalid req_id %hu in qid %hu\n", 211 *req_id, tx_ring->qid); 212 counter_u64_add(tx_ring->tx_stats.bad_req_id, 1); 213 goto err; 214 } 215 216 if (tx_ring->tx_buffer_info[*req_id].mbuf != NULL) 217 return (0); 218 219 ena_log(adapter->pdev, ERR, 220 "tx_info doesn't have valid mbuf. req_id %hu qid %hu\n", 221 *req_id, tx_ring->qid); 222 err: 223 ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID); 224 225 return (EFAULT); 226 } 227 228 /** 229 * ena_tx_cleanup - clear sent packets and corresponding descriptors 230 * @tx_ring: ring for which we want to clean packets 231 * 232 * Once packets are sent, we ask the device in a loop for no longer used 233 * descriptors. We find the related mbuf chain in a map (index in an array) 234 * and free it, then update ring state. 235 * This is performed in "endless" loop, updating ring pointers every 236 * TX_COMMIT. The first check of free descriptor is performed before the actual 237 * loop, then repeated at the loop end. 238 **/ 239 static int 240 ena_tx_cleanup(struct ena_ring *tx_ring) 241 { 242 struct ena_adapter *adapter; 243 struct ena_com_io_cq *io_cq; 244 uint16_t next_to_clean; 245 uint16_t req_id; 246 uint16_t ena_qid; 247 unsigned int total_done = 0; 248 int rc; 249 int commit = ENA_TX_COMMIT; 250 int budget = ENA_TX_BUDGET; 251 int work_done; 252 bool above_thresh; 253 254 adapter = tx_ring->que->adapter; 255 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); 256 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 257 next_to_clean = tx_ring->next_to_clean; 258 259 #ifdef DEV_NETMAP 260 if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS) 261 return (0); 262 #endif /* DEV_NETMAP */ 263 264 do { 265 struct ena_tx_buffer *tx_info; 266 struct mbuf *mbuf; 267 268 rc = ena_get_tx_req_id(tx_ring, io_cq, &req_id); 269 if (unlikely(rc != 0)) 270 break; 271 272 tx_info = &tx_ring->tx_buffer_info[req_id]; 273 274 mbuf = tx_info->mbuf; 275 276 tx_info->mbuf = NULL; 277 bintime_clear(&tx_info->timestamp); 278 279 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, 280 BUS_DMASYNC_POSTWRITE); 281 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap); 282 283 ena_log_io(adapter->pdev, DBG, "tx: q %d mbuf %p completed\n", 284 tx_ring->qid, mbuf); 285 286 m_freem(mbuf); 287 288 total_done += tx_info->tx_descs; 289 290 tx_ring->free_tx_ids[next_to_clean] = req_id; 291 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, 292 tx_ring->ring_size); 293 294 if (unlikely(--commit == 0)) { 295 commit = ENA_TX_COMMIT; 296 /* update ring state every ENA_TX_COMMIT descriptor */ 297 tx_ring->next_to_clean = next_to_clean; 298 ena_com_comp_ack( 299 &adapter->ena_dev->io_sq_queues[ena_qid], 300 total_done); 301 ena_com_update_dev_comp_head(io_cq); 302 total_done = 0; 303 } 304 } while (likely(--budget)); 305 306 work_done = ENA_TX_BUDGET - budget; 307 308 ena_log_io(adapter->pdev, DBG, "tx: q %d done. total pkts: %d\n", 309 tx_ring->qid, work_done); 310 311 /* If there is still something to commit update ring state */ 312 if (likely(commit != ENA_TX_COMMIT)) { 313 tx_ring->next_to_clean = next_to_clean; 314 ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid], 315 total_done); 316 ena_com_update_dev_comp_head(io_cq); 317 } 318 319 /* 320 * Need to make the rings circular update visible to 321 * ena_xmit_mbuf() before checking for tx_ring->running. 322 */ 323 mb(); 324 325 above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 326 ENA_TX_RESUME_THRESH); 327 if (unlikely(!tx_ring->running && above_thresh)) { 328 ENA_RING_MTX_LOCK(tx_ring); 329 above_thresh = ena_com_sq_have_enough_space( 330 tx_ring->ena_com_io_sq, ENA_TX_RESUME_THRESH); 331 if (!tx_ring->running && above_thresh) { 332 tx_ring->running = true; 333 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1); 334 taskqueue_enqueue(tx_ring->enqueue_tq, 335 &tx_ring->enqueue_task); 336 } 337 ENA_RING_MTX_UNLOCK(tx_ring); 338 } 339 340 tx_ring->tx_last_cleanup_ticks = ticks; 341 342 return (work_done); 343 } 344 345 static void 346 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, 347 struct mbuf *mbuf) 348 { 349 struct ena_adapter *adapter = rx_ring->adapter; 350 351 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 352 mbuf->m_pkthdr.flowid = ena_rx_ctx->hash; 353 354 #ifdef RSS 355 /* 356 * Hardware and software RSS are in agreement only when both are 357 * configured to Toeplitz algorithm. This driver configures 358 * that algorithm only when software RSS is enabled and uses it. 359 */ 360 if (adapter->ena_dev->rss.hash_func != ENA_ADMIN_TOEPLITZ && 361 ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN) { 362 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 363 return; 364 } 365 #endif 366 367 if (ena_rx_ctx->frag && 368 (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) { 369 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 370 return; 371 } 372 373 switch (ena_rx_ctx->l3_proto) { 374 case ENA_ETH_IO_L3_PROTO_IPV4: 375 switch (ena_rx_ctx->l4_proto) { 376 case ENA_ETH_IO_L4_PROTO_TCP: 377 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); 378 break; 379 case ENA_ETH_IO_L4_PROTO_UDP: 380 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); 381 break; 382 default: 383 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); 384 } 385 break; 386 case ENA_ETH_IO_L3_PROTO_IPV6: 387 switch (ena_rx_ctx->l4_proto) { 388 case ENA_ETH_IO_L4_PROTO_TCP: 389 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); 390 break; 391 case ENA_ETH_IO_L4_PROTO_UDP: 392 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); 393 break; 394 default: 395 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); 396 } 397 break; 398 case ENA_ETH_IO_L3_PROTO_UNKNOWN: 399 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); 400 break; 401 default: 402 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 403 } 404 } else { 405 mbuf->m_pkthdr.flowid = rx_ring->qid; 406 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); 407 } 408 } 409 410 /** 411 * ena_rx_mbuf - assemble mbuf from descriptors 412 * @rx_ring: ring for which we want to clean packets 413 * @ena_bufs: buffer info 414 * @ena_rx_ctx: metadata for this packet(s) 415 * @next_to_clean: ring pointer, will be updated only upon success 416 * 417 **/ 418 static struct mbuf * 419 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, 420 struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean) 421 { 422 struct mbuf *mbuf; 423 struct ena_rx_buffer *rx_info; 424 struct ena_adapter *adapter; 425 device_t pdev; 426 unsigned int descs = ena_rx_ctx->descs; 427 uint16_t ntc, len, req_id, buf = 0; 428 429 ntc = *next_to_clean; 430 adapter = rx_ring->adapter; 431 pdev = adapter->pdev; 432 433 len = ena_bufs[buf].len; 434 req_id = ena_bufs[buf].req_id; 435 rx_info = &rx_ring->rx_buffer_info[req_id]; 436 if (unlikely(rx_info->mbuf == NULL)) { 437 ena_log(pdev, ERR, "NULL mbuf in rx_info"); 438 return (NULL); 439 } 440 441 ena_log_io(pdev, DBG, "rx_info %p, mbuf %p, paddr %jx\n", rx_info, 442 rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr); 443 444 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, 445 BUS_DMASYNC_POSTREAD); 446 mbuf = rx_info->mbuf; 447 mbuf->m_flags |= M_PKTHDR; 448 mbuf->m_pkthdr.len = len; 449 mbuf->m_len = len; 450 /* Only for the first segment the data starts at specific offset */ 451 mbuf->m_data = mtodo(mbuf, ena_rx_ctx->pkt_offset); 452 ena_log_io(pdev, DBG, "Mbuf data offset=%u\n", ena_rx_ctx->pkt_offset); 453 mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp; 454 455 /* Fill mbuf with hash key and it's interpretation for optimization */ 456 ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf); 457 458 ena_log_io(pdev, DBG, "rx mbuf 0x%p, flags=0x%x, len: %d\n", mbuf, 459 mbuf->m_flags, mbuf->m_pkthdr.len); 460 461 /* DMA address is not needed anymore, unmap it */ 462 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); 463 464 rx_info->mbuf = NULL; 465 rx_ring->free_rx_ids[ntc] = req_id; 466 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size); 467 468 /* 469 * While we have more than 1 descriptors for one rcvd packet, append 470 * other mbufs to the main one 471 */ 472 while (--descs) { 473 ++buf; 474 len = ena_bufs[buf].len; 475 req_id = ena_bufs[buf].req_id; 476 rx_info = &rx_ring->rx_buffer_info[req_id]; 477 478 if (unlikely(rx_info->mbuf == NULL)) { 479 ena_log(pdev, ERR, "NULL mbuf in rx_info"); 480 /* 481 * If one of the required mbufs was not allocated yet, 482 * we can break there. 483 * All earlier used descriptors will be reallocated 484 * later and not used mbufs can be reused. 485 * The next_to_clean pointer will not be updated in case 486 * of an error, so caller should advance it manually 487 * in error handling routine to keep it up to date 488 * with hw ring. 489 */ 490 m_freem(mbuf); 491 return (NULL); 492 } 493 494 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, 495 BUS_DMASYNC_POSTREAD); 496 if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) { 497 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1); 498 ena_log_io(pdev, WARN, "Failed to append Rx mbuf %p\n", 499 mbuf); 500 } 501 502 ena_log_io(pdev, DBG, "rx mbuf updated. len %d\n", 503 mbuf->m_pkthdr.len); 504 505 /* Free already appended mbuf, it won't be useful anymore */ 506 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); 507 m_freem(rx_info->mbuf); 508 rx_info->mbuf = NULL; 509 510 rx_ring->free_rx_ids[ntc] = req_id; 511 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size); 512 } 513 514 *next_to_clean = ntc; 515 516 return (mbuf); 517 } 518 519 /** 520 * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum 521 **/ 522 static inline void 523 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, 524 struct mbuf *mbuf) 525 { 526 device_t pdev = rx_ring->adapter->pdev; 527 528 /* if IP and error */ 529 if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && 530 ena_rx_ctx->l3_csum_err)) { 531 /* ipv4 checksum error */ 532 mbuf->m_pkthdr.csum_flags = 0; 533 counter_u64_add(rx_ring->rx_stats.csum_bad, 1); 534 ena_log_io(pdev, DBG, "RX IPv4 header checksum error\n"); 535 return; 536 } 537 538 /* if TCP/UDP */ 539 if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || 540 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) { 541 if (ena_rx_ctx->l4_csum_err) { 542 /* TCP/UDP checksum error */ 543 mbuf->m_pkthdr.csum_flags = 0; 544 counter_u64_add(rx_ring->rx_stats.csum_bad, 1); 545 ena_log_io(pdev, DBG, "RX L4 checksum error\n"); 546 } else { 547 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 548 mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID; 549 counter_u64_add(rx_ring->rx_stats.csum_good, 1); 550 } 551 } 552 } 553 554 /** 555 * ena_rx_cleanup - handle rx irq 556 * @arg: ring for which irq is being handled 557 **/ 558 static int 559 ena_rx_cleanup(struct ena_ring *rx_ring) 560 { 561 struct ena_adapter *adapter; 562 device_t pdev; 563 struct mbuf *mbuf; 564 struct ena_com_rx_ctx ena_rx_ctx; 565 struct ena_com_io_cq *io_cq; 566 struct ena_com_io_sq *io_sq; 567 enum ena_regs_reset_reason_types reset_reason; 568 if_t ifp; 569 uint16_t ena_qid; 570 uint16_t next_to_clean; 571 uint32_t refill_required; 572 uint32_t refill_threshold; 573 uint32_t do_if_input = 0; 574 unsigned int qid; 575 int rc, i; 576 int budget = ENA_RX_BUDGET; 577 #ifdef DEV_NETMAP 578 int done; 579 #endif /* DEV_NETMAP */ 580 581 adapter = rx_ring->que->adapter; 582 pdev = adapter->pdev; 583 ifp = adapter->ifp; 584 qid = rx_ring->que->id; 585 ena_qid = ENA_IO_RXQ_IDX(qid); 586 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 587 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; 588 next_to_clean = rx_ring->next_to_clean; 589 590 #ifdef DEV_NETMAP 591 if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS) 592 return (0); 593 #endif /* DEV_NETMAP */ 594 595 ena_log_io(pdev, DBG, "rx: qid %d\n", qid); 596 597 do { 598 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 599 ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size; 600 ena_rx_ctx.descs = 0; 601 ena_rx_ctx.pkt_offset = 0; 602 603 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag, 604 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD); 605 rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx); 606 if (unlikely(rc != 0)) { 607 if (rc == ENA_COM_NO_SPACE) { 608 counter_u64_add(rx_ring->rx_stats.bad_desc_num, 609 1); 610 reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; 611 } else { 612 counter_u64_add(rx_ring->rx_stats.bad_req_id, 613 1); 614 reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; 615 } 616 ena_trigger_reset(adapter, reset_reason); 617 return (0); 618 } 619 620 if (unlikely(ena_rx_ctx.descs == 0)) 621 break; 622 623 ena_log_io(pdev, DBG, 624 "rx: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", 625 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, 626 ena_rx_ctx.l4_proto, ena_rx_ctx.hash); 627 628 /* Receive mbuf from the ring */ 629 mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs, &ena_rx_ctx, 630 &next_to_clean); 631 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag, 632 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD); 633 /* Exit if we failed to retrieve a buffer */ 634 if (unlikely(mbuf == NULL)) { 635 for (i = 0; i < ena_rx_ctx.descs; ++i) { 636 rx_ring->free_rx_ids[next_to_clean] = 637 rx_ring->ena_bufs[i].req_id; 638 next_to_clean = ENA_RX_RING_IDX_NEXT( 639 next_to_clean, rx_ring->ring_size); 640 } 641 break; 642 } 643 644 if (((if_getcapenable(ifp) & IFCAP_RXCSUM) != 0) || 645 ((if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) != 0)) { 646 ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf); 647 } 648 649 counter_enter(); 650 counter_u64_add_protected(rx_ring->rx_stats.bytes, 651 mbuf->m_pkthdr.len); 652 counter_u64_add_protected(adapter->hw_stats.rx_bytes, 653 mbuf->m_pkthdr.len); 654 counter_exit(); 655 /* 656 * LRO is only for IP/TCP packets and TCP checksum of the packet 657 * should be computed by hardware. 658 */ 659 do_if_input = 1; 660 if (((if_getcapenable(ifp) & IFCAP_LRO) != 0) && 661 ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) && 662 (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) { 663 /* 664 * Send to the stack if: 665 * - LRO not enabled, or 666 * - no LRO resources, or 667 * - lro enqueue fails 668 */ 669 if ((rx_ring->lro.lro_cnt != 0) && 670 (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0)) 671 do_if_input = 0; 672 } 673 if (do_if_input != 0) { 674 ena_log_io(pdev, DBG, 675 "calling if_input() with mbuf %p\n", mbuf); 676 if_input(ifp, mbuf); 677 } 678 679 counter_enter(); 680 counter_u64_add_protected(rx_ring->rx_stats.cnt, 1); 681 counter_u64_add_protected(adapter->hw_stats.rx_packets, 1); 682 counter_exit(); 683 } while (--budget); 684 685 rx_ring->next_to_clean = next_to_clean; 686 687 refill_required = ena_com_free_q_entries(io_sq); 688 refill_threshold = min_t(int, 689 rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER, 690 ENA_RX_REFILL_THRESH_PACKET); 691 692 if (refill_required > refill_threshold) { 693 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 694 ena_refill_rx_bufs(rx_ring, refill_required); 695 } 696 697 tcp_lro_flush_all(&rx_ring->lro); 698 699 return (ENA_RX_BUDGET - budget); 700 } 701 702 static void 703 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf, 704 bool disable_meta_caching) 705 { 706 struct ena_com_tx_meta *ena_meta; 707 struct ether_vlan_header *eh; 708 struct mbuf *mbuf_next; 709 u32 mss; 710 bool offload; 711 uint16_t etype; 712 int ehdrlen; 713 struct ip *ip; 714 int ipproto; 715 int iphlen; 716 struct tcphdr *th; 717 int offset; 718 719 offload = false; 720 ena_meta = &ena_tx_ctx->ena_meta; 721 mss = mbuf->m_pkthdr.tso_segsz; 722 723 if (mss != 0) 724 offload = true; 725 726 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) 727 offload = true; 728 729 if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0) 730 offload = true; 731 732 if ((mbuf->m_pkthdr.csum_flags & CSUM6_OFFLOAD) != 0) 733 offload = true; 734 735 if (!offload) { 736 if (disable_meta_caching) { 737 memset(ena_meta, 0, sizeof(*ena_meta)); 738 ena_tx_ctx->meta_valid = 1; 739 } else { 740 ena_tx_ctx->meta_valid = 0; 741 } 742 return; 743 } 744 745 /* Determine where frame payload starts. */ 746 eh = mtod(mbuf, struct ether_vlan_header *); 747 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 748 etype = ntohs(eh->evl_proto); 749 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 750 } else { 751 etype = ntohs(eh->evl_encap_proto); 752 ehdrlen = ETHER_HDR_LEN; 753 } 754 755 mbuf_next = m_getptr(mbuf, ehdrlen, &offset); 756 757 switch (etype) { 758 case ETHERTYPE_IP: 759 ip = (struct ip *)(mtodo(mbuf_next, offset)); 760 iphlen = ip->ip_hl << 2; 761 ipproto = ip->ip_p; 762 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 763 if ((ip->ip_off & htons(IP_DF)) != 0) 764 ena_tx_ctx->df = 1; 765 break; 766 case ETHERTYPE_IPV6: 767 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 768 iphlen = ip6_lasthdr(mbuf, ehdrlen, IPPROTO_IPV6, &ipproto); 769 iphlen -= ehdrlen; 770 ena_tx_ctx->df = 1; 771 break; 772 default: 773 iphlen = 0; 774 ipproto = 0; 775 break; 776 } 777 778 mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset); 779 th = (struct tcphdr *)(mtodo(mbuf_next, offset)); 780 781 if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) { 782 ena_tx_ctx->l3_csum_enable = 1; 783 } 784 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) { 785 ena_tx_ctx->tso_enable = 1; 786 ena_meta->l4_hdr_len = (th->th_off); 787 } 788 789 if (ipproto == IPPROTO_TCP) { 790 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 791 if ((mbuf->m_pkthdr.csum_flags & 792 (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0) 793 ena_tx_ctx->l4_csum_enable = 1; 794 else 795 ena_tx_ctx->l4_csum_enable = 0; 796 } else if (ipproto == IPPROTO_UDP) { 797 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 798 if ((mbuf->m_pkthdr.csum_flags & 799 (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0) 800 ena_tx_ctx->l4_csum_enable = 1; 801 else 802 ena_tx_ctx->l4_csum_enable = 0; 803 } else { 804 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; 805 ena_tx_ctx->l4_csum_enable = 0; 806 } 807 808 ena_meta->mss = mss; 809 ena_meta->l3_hdr_len = iphlen; 810 ena_meta->l3_hdr_offset = ehdrlen; 811 ena_tx_ctx->meta_valid = 1; 812 } 813 814 static int 815 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) 816 { 817 struct ena_adapter *adapter; 818 struct mbuf *collapsed_mbuf; 819 int num_frags; 820 821 adapter = tx_ring->adapter; 822 num_frags = ena_mbuf_count(*mbuf); 823 824 /* One segment must be reserved for configuration descriptor. */ 825 if (num_frags < adapter->max_tx_sgl_size) 826 return (0); 827 828 if ((num_frags == adapter->max_tx_sgl_size) && 829 ((*mbuf)->m_pkthdr.len < tx_ring->tx_max_header_size)) 830 return (0); 831 832 counter_u64_add(tx_ring->tx_stats.collapse, 1); 833 834 collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT, 835 adapter->max_tx_sgl_size - 1); 836 if (unlikely(collapsed_mbuf == NULL)) { 837 counter_u64_add(tx_ring->tx_stats.collapse_err, 1); 838 return (ENOMEM); 839 } 840 841 /* If mbuf was collapsed succesfully, original mbuf is released. */ 842 *mbuf = collapsed_mbuf; 843 844 return (0); 845 } 846 847 static int 848 ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info, 849 struct mbuf *mbuf, void **push_hdr, u16 *header_len) 850 { 851 struct ena_adapter *adapter = tx_ring->adapter; 852 struct ena_com_buf *ena_buf; 853 bus_dma_segment_t segs[ENA_BUS_DMA_SEGS]; 854 size_t iseg = 0; 855 uint32_t mbuf_head_len; 856 uint16_t offset; 857 int rc, nsegs; 858 859 mbuf_head_len = mbuf->m_len; 860 tx_info->mbuf = mbuf; 861 ena_buf = tx_info->bufs; 862 863 /* 864 * For easier maintaining of the DMA map, map the whole mbuf even if 865 * the LLQ is used. The descriptors will be filled using the segments. 866 */ 867 rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, 868 tx_info->dmamap, mbuf, segs, &nsegs, BUS_DMA_NOWAIT); 869 if (unlikely((rc != 0) || (nsegs == 0))) { 870 ena_log_io(adapter->pdev, WARN, 871 "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs); 872 goto dma_error; 873 } 874 875 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 876 /* 877 * When the device is LLQ mode, the driver will copy 878 * the header into the device memory space. 879 * the ena_com layer assumes the header is in a linear 880 * memory space. 881 * This assumption might be wrong since part of the header 882 * can be in the fragmented buffers. 883 * First check if header fits in the mbuf. If not, copy it to 884 * separate buffer that will be holding linearized data. 885 */ 886 *header_len = min_t(uint32_t, mbuf->m_pkthdr.len, 887 tx_ring->tx_max_header_size); 888 889 /* If header is in linear space, just point into mbuf's data. */ 890 if (likely(*header_len <= mbuf_head_len)) { 891 *push_hdr = mbuf->m_data; 892 /* 893 * Otherwise, copy whole portion of header from multiple 894 * mbufs to intermediate buffer. 895 */ 896 } else { 897 m_copydata(mbuf, 0, *header_len, 898 tx_ring->push_buf_intermediate_buf); 899 *push_hdr = tx_ring->push_buf_intermediate_buf; 900 901 counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1); 902 } 903 904 ena_log_io(adapter->pdev, DBG, 905 "mbuf: %p header_buf->vaddr: %p push_len: %d\n", 906 mbuf, *push_hdr, *header_len); 907 908 /* If packet is fitted in LLQ header, no need for DMA segments. */ 909 if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) { 910 return (0); 911 } else { 912 offset = tx_ring->tx_max_header_size; 913 /* 914 * As Header part is mapped to LLQ header, we can skip 915 * it and just map the residuum of the mbuf to DMA 916 * Segments. 917 */ 918 while (offset > 0) { 919 if (offset >= segs[iseg].ds_len) { 920 offset -= segs[iseg].ds_len; 921 } else { 922 ena_buf->paddr = segs[iseg].ds_addr + 923 offset; 924 ena_buf->len = segs[iseg].ds_len - 925 offset; 926 ena_buf++; 927 tx_info->num_of_bufs++; 928 offset = 0; 929 } 930 iseg++; 931 } 932 } 933 } else { 934 *push_hdr = NULL; 935 /* 936 * header_len is just a hint for the device. Because FreeBSD is 937 * not giving us information about packet header length and it 938 * is not guaranteed that all packet headers will be in the 1st 939 * mbuf, setting header_len to 0 is making the device ignore 940 * this value and resolve header on it's own. 941 */ 942 *header_len = 0; 943 } 944 945 /* Map rest of the mbuf */ 946 while (iseg < nsegs) { 947 ena_buf->paddr = segs[iseg].ds_addr; 948 ena_buf->len = segs[iseg].ds_len; 949 ena_buf++; 950 iseg++; 951 tx_info->num_of_bufs++; 952 } 953 954 return (0); 955 956 dma_error: 957 counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1); 958 tx_info->mbuf = NULL; 959 return (rc); 960 } 961 962 static int 963 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) 964 { 965 struct ena_adapter *adapter; 966 device_t pdev; 967 struct ena_tx_buffer *tx_info; 968 struct ena_com_tx_ctx ena_tx_ctx; 969 struct ena_com_dev *ena_dev; 970 struct ena_com_io_sq *io_sq; 971 void *push_hdr; 972 uint16_t next_to_use; 973 uint16_t req_id; 974 uint16_t ena_qid; 975 uint16_t header_len; 976 int rc; 977 int nb_hw_desc; 978 979 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); 980 adapter = tx_ring->que->adapter; 981 pdev = adapter->pdev; 982 ena_dev = adapter->ena_dev; 983 io_sq = &ena_dev->io_sq_queues[ena_qid]; 984 985 rc = ena_check_and_collapse_mbuf(tx_ring, mbuf); 986 if (unlikely(rc != 0)) { 987 ena_log_io(pdev, WARN, "Failed to collapse mbuf! err: %d\n", 988 rc); 989 return (rc); 990 } 991 992 ena_log_io(pdev, DBG, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len); 993 994 next_to_use = tx_ring->next_to_use; 995 req_id = tx_ring->free_tx_ids[next_to_use]; 996 tx_info = &tx_ring->tx_buffer_info[req_id]; 997 tx_info->num_of_bufs = 0; 998 999 ENA_WARN(tx_info->mbuf != NULL, adapter->ena_dev, 1000 "mbuf isn't NULL for req_id %d\n", req_id); 1001 1002 rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len); 1003 if (unlikely(rc != 0)) { 1004 ena_log_io(pdev, WARN, "Failed to map TX mbuf\n"); 1005 return (rc); 1006 } 1007 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); 1008 ena_tx_ctx.ena_bufs = tx_info->bufs; 1009 ena_tx_ctx.push_header = push_hdr; 1010 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 1011 ena_tx_ctx.req_id = req_id; 1012 ena_tx_ctx.header_len = header_len; 1013 1014 /* Set flags and meta data */ 1015 ena_tx_csum(&ena_tx_ctx, *mbuf, adapter->disable_meta_caching); 1016 1017 if (tx_ring->acum_pkts == ENA_DB_THRESHOLD || 1018 ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) { 1019 ena_log_io(pdev, DBG, 1020 "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", 1021 tx_ring->que->id); 1022 ena_ring_tx_doorbell(tx_ring); 1023 } 1024 1025 /* Prepare the packet's descriptors and send them to device */ 1026 rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc); 1027 if (unlikely(rc != 0)) { 1028 if (likely(rc == ENA_COM_NO_MEM)) { 1029 ena_log_io(pdev, DBG, "tx ring[%d] is out of space\n", 1030 tx_ring->que->id); 1031 } else { 1032 ena_log(pdev, ERR, "failed to prepare tx bufs\n"); 1033 ena_trigger_reset(adapter, 1034 ENA_REGS_RESET_DRIVER_INVALID_STATE); 1035 } 1036 counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1); 1037 goto dma_error; 1038 } 1039 1040 counter_enter(); 1041 counter_u64_add_protected(tx_ring->tx_stats.cnt, 1); 1042 counter_u64_add_protected(tx_ring->tx_stats.bytes, 1043 (*mbuf)->m_pkthdr.len); 1044 1045 counter_u64_add_protected(adapter->hw_stats.tx_packets, 1); 1046 counter_u64_add_protected(adapter->hw_stats.tx_bytes, 1047 (*mbuf)->m_pkthdr.len); 1048 counter_exit(); 1049 1050 tx_info->tx_descs = nb_hw_desc; 1051 getbinuptime(&tx_info->timestamp); 1052 tx_info->print_once = true; 1053 1054 tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, 1055 tx_ring->ring_size); 1056 1057 /* stop the queue when no more space available, the packet can have up 1058 * to sgl_size + 2. one for the meta descriptor and one for header 1059 * (if the header is larger than tx_max_header_size). 1060 */ 1061 if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1062 adapter->max_tx_sgl_size + 2))) { 1063 ena_log_io(pdev, DBG, "Stop queue %d\n", tx_ring->que->id); 1064 1065 tx_ring->running = false; 1066 counter_u64_add(tx_ring->tx_stats.queue_stop, 1); 1067 1068 /* There is a rare condition where this function decides to 1069 * stop the queue but meanwhile tx_cleanup() updates 1070 * next_to_completion and terminates. 1071 * The queue will remain stopped forever. 1072 * To solve this issue this function performs mb(), checks 1073 * the wakeup condition and wakes up the queue if needed. 1074 */ 1075 mb(); 1076 1077 if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1078 ENA_TX_RESUME_THRESH)) { 1079 tx_ring->running = true; 1080 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1); 1081 } 1082 } 1083 1084 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, 1085 BUS_DMASYNC_PREWRITE); 1086 1087 return (0); 1088 1089 dma_error: 1090 tx_info->mbuf = NULL; 1091 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap); 1092 1093 return (rc); 1094 } 1095 1096 static void 1097 ena_start_xmit(struct ena_ring *tx_ring) 1098 { 1099 struct mbuf *mbuf; 1100 struct ena_adapter *adapter = tx_ring->adapter; 1101 int ret = 0; 1102 1103 ENA_RING_MTX_ASSERT(tx_ring); 1104 1105 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) 1106 return; 1107 1108 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))) 1109 return; 1110 1111 while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) { 1112 ena_log_io(adapter->pdev, DBG, 1113 "\ndequeued mbuf %p with flags %#x and header csum flags %#jx\n", 1114 mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags); 1115 1116 if (unlikely(!tx_ring->running)) { 1117 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1118 break; 1119 } 1120 1121 if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) { 1122 if (ret == ENA_COM_NO_MEM) { 1123 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1124 } else if (ret == ENA_COM_NO_SPACE) { 1125 drbr_putback(adapter->ifp, tx_ring->br, mbuf); 1126 } else { 1127 m_freem(mbuf); 1128 drbr_advance(adapter->ifp, tx_ring->br); 1129 } 1130 1131 break; 1132 } 1133 1134 drbr_advance(adapter->ifp, tx_ring->br); 1135 1136 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) 1137 return; 1138 1139 tx_ring->acum_pkts++; 1140 1141 BPF_MTAP(adapter->ifp, mbuf); 1142 } 1143 1144 if (likely(tx_ring->acum_pkts != 0)) { 1145 /* Trigger the dma engine */ 1146 ena_ring_tx_doorbell(tx_ring); 1147 } 1148 1149 if (unlikely(!tx_ring->running)) 1150 taskqueue_enqueue(tx_ring->que->cleanup_tq, 1151 &tx_ring->que->cleanup_task); 1152 } 1153