1 /*- 2 * Copyright (c) 2014-2017, Matthew Macy <mmacy@nextbsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Neither the name of Matthew Macy nor the names of its 12 * contributors may be used to endorse or promote products derived from 13 * this software without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_acpi.h" 34 35 #include <sys/param.h> 36 #include <sys/types.h> 37 #include <sys/bus.h> 38 #include <sys/eventhandler.h> 39 #include <sys/sockio.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/module.h> 44 #include <sys/kobj.h> 45 #include <sys/rman.h> 46 #include <sys/sbuf.h> 47 #include <sys/smp.h> 48 #include <sys/socket.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 #include <sys/taskqueue.h> 52 #include <sys/limits.h> 53 54 55 #include <net/if.h> 56 #include <net/if_var.h> 57 #include <net/if_types.h> 58 #include <net/if_media.h> 59 #include <net/bpf.h> 60 #include <net/ethernet.h> 61 #include <net/mp_ring.h> 62 63 #include <netinet/in.h> 64 #include <netinet/in_pcb.h> 65 #include <netinet/tcp_lro.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/if_ether.h> 68 #include <netinet/ip.h> 69 #include <netinet/ip6.h> 70 #include <netinet/tcp.h> 71 72 #include <machine/bus.h> 73 #include <machine/in_cksum.h> 74 75 #include <vm/vm.h> 76 #include <vm/pmap.h> 77 78 #include <dev/led/led.h> 79 #include <dev/pci/pcireg.h> 80 #include <dev/pci/pcivar.h> 81 #include <dev/pci/pci_private.h> 82 83 #include <net/iflib.h> 84 85 #include "ifdi_if.h" 86 87 #if defined(__i386__) || defined(__amd64__) 88 #include <sys/memdesc.h> 89 #include <machine/bus.h> 90 #include <machine/md_var.h> 91 #include <machine/specialreg.h> 92 #include <x86/include/busdma_impl.h> 93 #include <x86/iommu/busdma_dmar.h> 94 #endif 95 96 #include <sys/bitstring.h> 97 /* 98 * enable accounting of every mbuf as it comes in to and goes out of 99 * iflib's software descriptor references 100 */ 101 #define MEMORY_LOGGING 0 102 /* 103 * Enable mbuf vectors for compressing long mbuf chains 104 */ 105 106 /* 107 * NB: 108 * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead 109 * we prefetch needs to be determined by the time spent in m_free vis a vis 110 * the cost of a prefetch. This will of course vary based on the workload: 111 * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which 112 * is quite expensive, thus suggesting very little prefetch. 113 * - small packet forwarding which is just returning a single mbuf to 114 * UMA will typically be very fast vis a vis the cost of a memory 115 * access. 116 */ 117 118 119 /* 120 * File organization: 121 * - private structures 122 * - iflib private utility functions 123 * - ifnet functions 124 * - vlan registry and other exported functions 125 * - iflib public core functions 126 * 127 * 128 */ 129 static MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); 130 131 struct iflib_txq; 132 typedef struct iflib_txq *iflib_txq_t; 133 struct iflib_rxq; 134 typedef struct iflib_rxq *iflib_rxq_t; 135 struct iflib_fl; 136 typedef struct iflib_fl *iflib_fl_t; 137 138 struct iflib_ctx; 139 140 static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); 141 142 typedef struct iflib_filter_info { 143 driver_filter_t *ifi_filter; 144 void *ifi_filter_arg; 145 struct grouptask *ifi_task; 146 void *ifi_ctx; 147 } *iflib_filter_info_t; 148 149 struct iflib_ctx { 150 KOBJ_FIELDS; 151 /* 152 * Pointer to hardware driver's softc 153 */ 154 void *ifc_softc; 155 device_t ifc_dev; 156 if_t ifc_ifp; 157 158 cpuset_t ifc_cpus; 159 if_shared_ctx_t ifc_sctx; 160 struct if_softc_ctx ifc_softc_ctx; 161 162 struct mtx ifc_mtx; 163 164 uint16_t ifc_nhwtxqs; 165 uint16_t ifc_nhwrxqs; 166 167 iflib_txq_t ifc_txqs; 168 iflib_rxq_t ifc_rxqs; 169 uint32_t ifc_if_flags; 170 uint32_t ifc_flags; 171 uint32_t ifc_max_fl_buf_size; 172 int ifc_in_detach; 173 174 int ifc_link_state; 175 int ifc_link_irq; 176 int ifc_watchdog_events; 177 struct cdev *ifc_led_dev; 178 struct resource *ifc_msix_mem; 179 180 struct if_irq ifc_legacy_irq; 181 struct grouptask ifc_admin_task; 182 struct grouptask ifc_vflr_task; 183 struct iflib_filter_info ifc_filter_info; 184 struct ifmedia ifc_media; 185 186 struct sysctl_oid *ifc_sysctl_node; 187 uint16_t ifc_sysctl_ntxqs; 188 uint16_t ifc_sysctl_nrxqs; 189 uint16_t ifc_sysctl_qs_eq_override; 190 uint16_t ifc_sysctl_rx_budget; 191 192 qidx_t ifc_sysctl_ntxds[8]; 193 qidx_t ifc_sysctl_nrxds[8]; 194 struct if_txrx ifc_txrx; 195 #define isc_txd_encap ifc_txrx.ift_txd_encap 196 #define isc_txd_flush ifc_txrx.ift_txd_flush 197 #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update 198 #define isc_rxd_available ifc_txrx.ift_rxd_available 199 #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get 200 #define isc_rxd_refill ifc_txrx.ift_rxd_refill 201 #define isc_rxd_flush ifc_txrx.ift_rxd_flush 202 #define isc_rxd_refill ifc_txrx.ift_rxd_refill 203 #define isc_rxd_refill ifc_txrx.ift_rxd_refill 204 #define isc_legacy_intr ifc_txrx.ift_legacy_intr 205 eventhandler_tag ifc_vlan_attach_event; 206 eventhandler_tag ifc_vlan_detach_event; 207 uint8_t ifc_mac[ETHER_ADDR_LEN]; 208 char ifc_mtx_name[16]; 209 }; 210 211 212 void * 213 iflib_get_softc(if_ctx_t ctx) 214 { 215 216 return (ctx->ifc_softc); 217 } 218 219 device_t 220 iflib_get_dev(if_ctx_t ctx) 221 { 222 223 return (ctx->ifc_dev); 224 } 225 226 if_t 227 iflib_get_ifp(if_ctx_t ctx) 228 { 229 230 return (ctx->ifc_ifp); 231 } 232 233 struct ifmedia * 234 iflib_get_media(if_ctx_t ctx) 235 { 236 237 return (&ctx->ifc_media); 238 } 239 240 void 241 iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) 242 { 243 244 bcopy(mac, ctx->ifc_mac, ETHER_ADDR_LEN); 245 } 246 247 if_softc_ctx_t 248 iflib_get_softc_ctx(if_ctx_t ctx) 249 { 250 251 return (&ctx->ifc_softc_ctx); 252 } 253 254 if_shared_ctx_t 255 iflib_get_sctx(if_ctx_t ctx) 256 { 257 258 return (ctx->ifc_sctx); 259 } 260 261 #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) 262 #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) 263 #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) 264 265 #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) 266 #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) 267 268 #define RX_SW_DESC_MAP_CREATED (1 << 0) 269 #define TX_SW_DESC_MAP_CREATED (1 << 1) 270 #define RX_SW_DESC_INUSE (1 << 3) 271 #define TX_SW_DESC_MAPPED (1 << 4) 272 273 #define M_TOOBIG M_PROTO1 274 275 typedef struct iflib_sw_rx_desc_array { 276 bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ 277 struct mbuf **ifsd_m; /* pkthdr mbufs */ 278 caddr_t *ifsd_cl; /* direct cluster pointer for rx */ 279 uint8_t *ifsd_flags; 280 } iflib_rxsd_array_t; 281 282 typedef struct iflib_sw_tx_desc_array { 283 bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ 284 struct mbuf **ifsd_m; /* pkthdr mbufs */ 285 uint8_t *ifsd_flags; 286 } if_txsd_vec_t; 287 288 289 /* magic number that should be high enough for any hardware */ 290 #define IFLIB_MAX_TX_SEGS 128 291 /* bnxt supports 64 with hardware LRO enabled */ 292 #define IFLIB_MAX_RX_SEGS 64 293 #define IFLIB_RX_COPY_THRESH 128 294 #define IFLIB_MAX_RX_REFRESH 32 295 /* The minimum descriptors per second before we start coalescing */ 296 #define IFLIB_MIN_DESC_SEC 16384 297 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 298 #define IFLIB_QUEUE_IDLE 0 299 #define IFLIB_QUEUE_HUNG 1 300 #define IFLIB_QUEUE_WORKING 2 301 /* maximum number of txqs that can share an rx interrupt */ 302 #define IFLIB_MAX_TX_SHARED_INTR 4 303 304 /* this should really scale with ring size - this is a fairly arbitrary value */ 305 #define TX_BATCH_SIZE 32 306 307 #define IFLIB_RESTART_BUDGET 8 308 309 #define IFC_LEGACY 0x001 310 #define IFC_QFLUSH 0x002 311 #define IFC_MULTISEG 0x004 312 #define IFC_DMAR 0x008 313 #define IFC_SC_ALLOCATED 0x010 314 #define IFC_INIT_DONE 0x020 315 #define IFC_PREFETCH 0x040 316 #define IFC_DO_RESET 0x080 317 #define IFC_CHECK_HUNG 0x100 318 319 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ 320 CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ 321 CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) 322 struct iflib_txq { 323 qidx_t ift_in_use; 324 qidx_t ift_cidx; 325 qidx_t ift_cidx_processed; 326 qidx_t ift_pidx; 327 uint8_t ift_gen; 328 uint8_t ift_br_offset; 329 uint16_t ift_npending; 330 uint16_t ift_db_pending; 331 uint16_t ift_rs_pending; 332 /* implicit pad */ 333 uint8_t ift_txd_size[8]; 334 uint64_t ift_processed; 335 uint64_t ift_cleaned; 336 uint64_t ift_cleaned_prev; 337 #if MEMORY_LOGGING 338 uint64_t ift_enqueued; 339 uint64_t ift_dequeued; 340 #endif 341 uint64_t ift_no_tx_dma_setup; 342 uint64_t ift_no_desc_avail; 343 uint64_t ift_mbuf_defrag_failed; 344 uint64_t ift_mbuf_defrag; 345 uint64_t ift_map_failed; 346 uint64_t ift_txd_encap_efbig; 347 uint64_t ift_pullups; 348 349 struct mtx ift_mtx; 350 struct mtx ift_db_mtx; 351 352 /* constant values */ 353 if_ctx_t ift_ctx; 354 struct ifmp_ring *ift_br; 355 struct grouptask ift_task; 356 qidx_t ift_size; 357 uint16_t ift_id; 358 struct callout ift_timer; 359 360 if_txsd_vec_t ift_sds; 361 uint8_t ift_qstatus; 362 uint8_t ift_closed; 363 uint8_t ift_update_freq; 364 struct iflib_filter_info ift_filter_info; 365 bus_dma_tag_t ift_desc_tag; 366 bus_dma_tag_t ift_tso_desc_tag; 367 iflib_dma_info_t ift_ifdi; 368 #define MTX_NAME_LEN 16 369 char ift_mtx_name[MTX_NAME_LEN]; 370 char ift_db_mtx_name[MTX_NAME_LEN]; 371 bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); 372 #ifdef IFLIB_DIAGNOSTICS 373 uint64_t ift_cpu_exec_count[256]; 374 #endif 375 } __aligned(CACHE_LINE_SIZE); 376 377 struct iflib_fl { 378 qidx_t ifl_cidx; 379 qidx_t ifl_pidx; 380 qidx_t ifl_credits; 381 uint8_t ifl_gen; 382 uint8_t ifl_rxd_size; 383 #if MEMORY_LOGGING 384 uint64_t ifl_m_enqueued; 385 uint64_t ifl_m_dequeued; 386 uint64_t ifl_cl_enqueued; 387 uint64_t ifl_cl_dequeued; 388 #endif 389 /* implicit pad */ 390 391 bitstr_t *ifl_rx_bitmap; 392 qidx_t ifl_fragidx; 393 /* constant */ 394 qidx_t ifl_size; 395 uint16_t ifl_buf_size; 396 uint16_t ifl_cltype; 397 uma_zone_t ifl_zone; 398 iflib_rxsd_array_t ifl_sds; 399 iflib_rxq_t ifl_rxq; 400 uint8_t ifl_id; 401 bus_dma_tag_t ifl_desc_tag; 402 iflib_dma_info_t ifl_ifdi; 403 uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); 404 caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH]; 405 qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; 406 } __aligned(CACHE_LINE_SIZE); 407 408 static inline qidx_t 409 get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) 410 { 411 qidx_t used; 412 413 if (pidx > cidx) 414 used = pidx - cidx; 415 else if (pidx < cidx) 416 used = size - cidx + pidx; 417 else if (gen == 0 && pidx == cidx) 418 used = 0; 419 else if (gen == 1 && pidx == cidx) 420 used = size; 421 else 422 panic("bad state"); 423 424 return (used); 425 } 426 427 #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) 428 429 #define IDXDIFF(head, tail, wrap) \ 430 ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) 431 432 struct iflib_rxq { 433 /* If there is a separate completion queue - 434 * these are the cq cidx and pidx. Otherwise 435 * these are unused. 436 */ 437 qidx_t ifr_size; 438 qidx_t ifr_cq_cidx; 439 qidx_t ifr_cq_pidx; 440 uint8_t ifr_cq_gen; 441 uint8_t ifr_fl_offset; 442 443 if_ctx_t ifr_ctx; 444 iflib_fl_t ifr_fl; 445 uint64_t ifr_rx_irq; 446 uint16_t ifr_id; 447 uint8_t ifr_lro_enabled; 448 uint8_t ifr_nfl; 449 uint8_t ifr_ntxqirq; 450 uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; 451 struct lro_ctrl ifr_lc; 452 struct grouptask ifr_task; 453 struct iflib_filter_info ifr_filter_info; 454 iflib_dma_info_t ifr_ifdi; 455 456 /* dynamically allocate if any drivers need a value substantially larger than this */ 457 struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); 458 #ifdef IFLIB_DIAGNOSTICS 459 uint64_t ifr_cpu_exec_count[256]; 460 #endif 461 } __aligned(CACHE_LINE_SIZE); 462 463 typedef struct if_rxsd { 464 caddr_t *ifsd_cl; 465 struct mbuf **ifsd_m; 466 iflib_fl_t ifsd_fl; 467 qidx_t ifsd_cidx; 468 } *if_rxsd_t; 469 470 /* multiple of word size */ 471 #ifdef __LP64__ 472 #define PKT_INFO_SIZE 6 473 #define RXD_INFO_SIZE 5 474 #define PKT_TYPE uint64_t 475 #else 476 #define PKT_INFO_SIZE 11 477 #define RXD_INFO_SIZE 8 478 #define PKT_TYPE uint32_t 479 #endif 480 #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) 481 #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) 482 483 typedef struct if_pkt_info_pad { 484 PKT_TYPE pkt_val[PKT_INFO_SIZE]; 485 } *if_pkt_info_pad_t; 486 typedef struct if_rxd_info_pad { 487 PKT_TYPE rxd_val[RXD_INFO_SIZE]; 488 } *if_rxd_info_pad_t; 489 490 CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); 491 CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); 492 493 494 static inline void 495 pkt_info_zero(if_pkt_info_t pi) 496 { 497 if_pkt_info_pad_t pi_pad; 498 499 pi_pad = (if_pkt_info_pad_t)pi; 500 pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; 501 pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; 502 #ifndef __LP64__ 503 pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; 504 pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; 505 #endif 506 } 507 508 static inline void 509 rxd_info_zero(if_rxd_info_t ri) 510 { 511 if_rxd_info_pad_t ri_pad; 512 int i; 513 514 ri_pad = (if_rxd_info_pad_t)ri; 515 for (i = 0; i < RXD_LOOP_BOUND; i += 4) { 516 ri_pad->rxd_val[i] = 0; 517 ri_pad->rxd_val[i+1] = 0; 518 ri_pad->rxd_val[i+2] = 0; 519 ri_pad->rxd_val[i+3] = 0; 520 } 521 #ifdef __LP64__ 522 ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; 523 #endif 524 } 525 526 /* 527 * Only allow a single packet to take up most 1/nth of the tx ring 528 */ 529 #define MAX_SINGLE_PACKET_FRACTION 12 530 #define IF_BAD_DMA (bus_addr_t)-1 531 532 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) 533 534 #define CTX_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF) 535 536 #define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx) 537 #define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx) 538 #define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx) 539 540 541 #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) 542 #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) 543 544 545 /* Our boot-time initialization hook */ 546 static int iflib_module_event_handler(module_t, int, void *); 547 548 static moduledata_t iflib_moduledata = { 549 "iflib", 550 iflib_module_event_handler, 551 NULL 552 }; 553 554 DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); 555 MODULE_VERSION(iflib, 1); 556 557 MODULE_DEPEND(iflib, pci, 1, 1, 1); 558 MODULE_DEPEND(iflib, ether, 1, 1, 1); 559 560 TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); 561 TASKQGROUP_DEFINE(if_config_tqg, 1, 1); 562 563 #ifndef IFLIB_DEBUG_COUNTERS 564 #ifdef INVARIANTS 565 #define IFLIB_DEBUG_COUNTERS 1 566 #else 567 #define IFLIB_DEBUG_COUNTERS 0 568 #endif /* !INVARIANTS */ 569 #endif 570 571 static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, 572 "iflib driver parameters"); 573 574 /* 575 * XXX need to ensure that this can't accidentally cause the head to be moved backwards 576 */ 577 static int iflib_min_tx_latency = 0; 578 SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, 579 &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); 580 static int iflib_no_tx_batch = 0; 581 SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, 582 &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); 583 584 585 #if IFLIB_DEBUG_COUNTERS 586 587 static int iflib_tx_seen; 588 static int iflib_tx_sent; 589 static int iflib_tx_encap; 590 static int iflib_rx_allocs; 591 static int iflib_fl_refills; 592 static int iflib_fl_refills_large; 593 static int iflib_tx_frees; 594 595 SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, 596 &iflib_tx_seen, 0, "# tx mbufs seen"); 597 SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, 598 &iflib_tx_sent, 0, "# tx mbufs sent"); 599 SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, 600 &iflib_tx_encap, 0, "# tx mbufs encapped"); 601 SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, 602 &iflib_tx_frees, 0, "# tx frees"); 603 SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, 604 &iflib_rx_allocs, 0, "# rx allocations"); 605 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, 606 &iflib_fl_refills, 0, "# refills"); 607 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, 608 &iflib_fl_refills_large, 0, "# large refills"); 609 610 611 static int iflib_txq_drain_flushing; 612 static int iflib_txq_drain_oactive; 613 static int iflib_txq_drain_notready; 614 static int iflib_txq_drain_encapfail; 615 616 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, 617 &iflib_txq_drain_flushing, 0, "# drain flushes"); 618 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, 619 &iflib_txq_drain_oactive, 0, "# drain oactives"); 620 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, 621 &iflib_txq_drain_notready, 0, "# drain notready"); 622 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_encapfail, CTLFLAG_RD, 623 &iflib_txq_drain_encapfail, 0, "# drain encap fails"); 624 625 626 static int iflib_encap_load_mbuf_fail; 627 static int iflib_encap_txq_avail_fail; 628 static int iflib_encap_txd_encap_fail; 629 630 SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, 631 &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); 632 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, 633 &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); 634 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, 635 &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); 636 637 static int iflib_task_fn_rxs; 638 static int iflib_rx_intr_enables; 639 static int iflib_fast_intrs; 640 static int iflib_intr_link; 641 static int iflib_intr_msix; 642 static int iflib_rx_unavail; 643 static int iflib_rx_ctx_inactive; 644 static int iflib_rx_zero_len; 645 static int iflib_rx_if_input; 646 static int iflib_rx_mbuf_null; 647 static int iflib_rxd_flush; 648 649 static int iflib_verbose_debug; 650 651 SYSCTL_INT(_net_iflib, OID_AUTO, intr_link, CTLFLAG_RD, 652 &iflib_intr_link, 0, "# intr link calls"); 653 SYSCTL_INT(_net_iflib, OID_AUTO, intr_msix, CTLFLAG_RD, 654 &iflib_intr_msix, 0, "# intr msix calls"); 655 SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, 656 &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); 657 SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, 658 &iflib_rx_intr_enables, 0, "# rx intr enables"); 659 SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, 660 &iflib_fast_intrs, 0, "# fast_intr calls"); 661 SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, 662 &iflib_rx_unavail, 0, "# times rxeof called with no available data"); 663 SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, 664 &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); 665 SYSCTL_INT(_net_iflib, OID_AUTO, rx_zero_len, CTLFLAG_RD, 666 &iflib_rx_zero_len, 0, "# times rxeof saw zero len mbuf"); 667 SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, 668 &iflib_rx_if_input, 0, "# times rxeof called if_input"); 669 SYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD, 670 &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf"); 671 SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, 672 &iflib_rxd_flush, 0, "# times rxd_flush called"); 673 SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, 674 &iflib_verbose_debug, 0, "enable verbose debugging"); 675 676 #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) 677 static void 678 iflib_debug_reset(void) 679 { 680 iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = 681 iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = 682 iflib_txq_drain_flushing = iflib_txq_drain_oactive = 683 iflib_txq_drain_notready = iflib_txq_drain_encapfail = 684 iflib_encap_load_mbuf_fail = iflib_encap_txq_avail_fail = 685 iflib_encap_txd_encap_fail = iflib_task_fn_rxs = iflib_rx_intr_enables = 686 iflib_fast_intrs = iflib_intr_link = iflib_intr_msix = iflib_rx_unavail = 687 iflib_rx_ctx_inactive = iflib_rx_zero_len = iflib_rx_if_input = 688 iflib_rx_mbuf_null = iflib_rxd_flush = 0; 689 } 690 691 #else 692 #define DBG_COUNTER_INC(name) 693 static void iflib_debug_reset(void) {} 694 #endif 695 696 697 698 #define IFLIB_DEBUG 0 699 700 static void iflib_tx_structures_free(if_ctx_t ctx); 701 static void iflib_rx_structures_free(if_ctx_t ctx); 702 static int iflib_queues_alloc(if_ctx_t ctx); 703 static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); 704 static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); 705 static int iflib_qset_structures_setup(if_ctx_t ctx); 706 static int iflib_msix_init(if_ctx_t ctx); 707 static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str); 708 static void iflib_txq_check_drain(iflib_txq_t txq, int budget); 709 static uint32_t iflib_txq_can_drain(struct ifmp_ring *); 710 static int iflib_register(if_ctx_t); 711 static void iflib_init_locked(if_ctx_t ctx); 712 static void iflib_add_device_sysctl_pre(if_ctx_t ctx); 713 static void iflib_add_device_sysctl_post(if_ctx_t ctx); 714 static void iflib_ifmp_purge(iflib_txq_t txq); 715 static void _iflib_pre_assert(if_softc_ctx_t scctx); 716 static void iflib_stop(if_ctx_t ctx); 717 static void iflib_if_init_locked(if_ctx_t ctx); 718 #ifndef __NO_STRICT_ALIGNMENT 719 static struct mbuf * iflib_fixup_rx(struct mbuf *m); 720 #endif 721 722 #ifdef DEV_NETMAP 723 #include <sys/selinfo.h> 724 #include <net/netmap.h> 725 #include <dev/netmap/netmap_kern.h> 726 727 MODULE_DEPEND(iflib, netmap, 1, 1, 1); 728 729 static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init); 730 731 /* 732 * device-specific sysctl variables: 733 * 734 * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. 735 * During regular operations the CRC is stripped, but on some 736 * hardware reception of frames not multiple of 64 is slower, 737 * so using crcstrip=0 helps in benchmarks. 738 * 739 * iflib_rx_miss, iflib_rx_miss_bufs: 740 * count packets that might be missed due to lost interrupts. 741 */ 742 SYSCTL_DECL(_dev_netmap); 743 /* 744 * The xl driver by default strips CRCs and we do not override it. 745 */ 746 747 int iflib_crcstrip = 1; 748 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, 749 CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames"); 750 751 int iflib_rx_miss, iflib_rx_miss_bufs; 752 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, 753 CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr"); 754 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, 755 CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs"); 756 757 /* 758 * Register/unregister. We are already under netmap lock. 759 * Only called on the first register or the last unregister. 760 */ 761 static int 762 iflib_netmap_register(struct netmap_adapter *na, int onoff) 763 { 764 struct ifnet *ifp = na->ifp; 765 if_ctx_t ctx = ifp->if_softc; 766 int status; 767 768 CTX_LOCK(ctx); 769 IFDI_INTR_DISABLE(ctx); 770 771 /* Tell the stack that the interface is no longer active */ 772 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 773 774 if (!CTX_IS_VF(ctx)) 775 IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); 776 777 /* enable or disable flags and callbacks in na and ifp */ 778 if (onoff) { 779 nm_set_native_flags(na); 780 } else { 781 nm_clear_native_flags(na); 782 } 783 iflib_stop(ctx); 784 iflib_init_locked(ctx); 785 IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? 786 status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; 787 if (status) 788 nm_clear_native_flags(na); 789 CTX_UNLOCK(ctx); 790 return (status); 791 } 792 793 static int 794 netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init) 795 { 796 struct netmap_adapter *na = kring->na; 797 u_int const lim = kring->nkr_num_slots - 1; 798 u_int head = kring->rhead; 799 struct netmap_ring *ring = kring->ring; 800 bus_dmamap_t *map; 801 struct if_rxd_update iru; 802 if_ctx_t ctx = rxq->ifr_ctx; 803 iflib_fl_t fl = &rxq->ifr_fl[0]; 804 uint32_t refill_pidx, nic_i; 805 806 if (nm_i == head && __predict_true(!init)) 807 return 0; 808 iru_init(&iru, rxq, 0 /* flid */); 809 map = fl->ifl_sds.ifsd_map; 810 refill_pidx = netmap_idx_k2n(kring, nm_i); 811 /* 812 * IMPORTANT: we must leave one free slot in the ring, 813 * so move head back by one unit 814 */ 815 head = nm_prev(head, lim); 816 while (nm_i != head) { 817 for (int tmp_pidx = 0; tmp_pidx < IFLIB_MAX_RX_REFRESH && nm_i != head; tmp_pidx++) { 818 struct netmap_slot *slot = &ring->slot[nm_i]; 819 void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]); 820 uint32_t nic_i_dma = refill_pidx; 821 nic_i = netmap_idx_k2n(kring, nm_i); 822 823 MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH); 824 825 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ 826 return netmap_ring_reinit(kring); 827 828 fl->ifl_vm_addrs[tmp_pidx] = addr; 829 if (__predict_false(init) && map) { 830 netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); 831 } else if (map && (slot->flags & NS_BUF_CHANGED)) { 832 /* buffer has changed, reload map */ 833 netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); 834 } 835 slot->flags &= ~NS_BUF_CHANGED; 836 837 nm_i = nm_next(nm_i, lim); 838 fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim); 839 if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1) 840 continue; 841 842 iru.iru_pidx = refill_pidx; 843 iru.iru_count = tmp_pidx+1; 844 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 845 846 refill_pidx = nic_i; 847 if (map == NULL) 848 continue; 849 850 for (int n = 0; n < iru.iru_count; n++) { 851 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma], 852 BUS_DMASYNC_PREREAD); 853 /* XXX - change this to not use the netmap func*/ 854 nic_i_dma = nm_next(nic_i_dma, lim); 855 } 856 } 857 } 858 kring->nr_hwcur = head; 859 860 if (map) 861 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 862 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 863 ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); 864 return (0); 865 } 866 867 /* 868 * Reconcile kernel and user view of the transmit ring. 869 * 870 * All information is in the kring. 871 * Userspace wants to send packets up to the one before kring->rhead, 872 * kernel knows kring->nr_hwcur is the first unsent packet. 873 * 874 * Here we push packets out (as many as possible), and possibly 875 * reclaim buffers from previously completed transmission. 876 * 877 * The caller (netmap) guarantees that there is only one instance 878 * running at any time. Any interference with other driver 879 * methods should be handled by the individual drivers. 880 */ 881 static int 882 iflib_netmap_txsync(struct netmap_kring *kring, int flags) 883 { 884 struct netmap_adapter *na = kring->na; 885 struct ifnet *ifp = na->ifp; 886 struct netmap_ring *ring = kring->ring; 887 u_int nm_i; /* index into the netmap ring */ 888 u_int nic_i; /* index into the NIC ring */ 889 u_int n; 890 u_int const lim = kring->nkr_num_slots - 1; 891 u_int const head = kring->rhead; 892 struct if_pkt_info pi; 893 894 /* 895 * interrupts on every tx packet are expensive so request 896 * them every half ring, or where NS_REPORT is set 897 */ 898 u_int report_frequency = kring->nkr_num_slots >> 1; 899 /* device-specific */ 900 if_ctx_t ctx = ifp->if_softc; 901 iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; 902 903 if (txq->ift_sds.ifsd_map) 904 bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, 905 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 906 907 908 /* 909 * First part: process new packets to send. 910 * nm_i is the current index in the netmap ring, 911 * nic_i is the corresponding index in the NIC ring. 912 * 913 * If we have packets to send (nm_i != head) 914 * iterate over the netmap ring, fetch length and update 915 * the corresponding slot in the NIC ring. Some drivers also 916 * need to update the buffer's physical address in the NIC slot 917 * even NS_BUF_CHANGED is not set (PNMB computes the addresses). 918 * 919 * The netmap_reload_map() calls is especially expensive, 920 * even when (as in this case) the tag is 0, so do only 921 * when the buffer has actually changed. 922 * 923 * If possible do not set the report/intr bit on all slots, 924 * but only a few times per ring or when NS_REPORT is set. 925 * 926 * Finally, on 10G and faster drivers, it might be useful 927 * to prefetch the next slot and txr entry. 928 */ 929 930 nm_i = netmap_idx_n2k(kring, kring->nr_hwcur); 931 pkt_info_zero(&pi); 932 pi.ipi_segs = txq->ift_segs; 933 pi.ipi_qsidx = kring->ring_id; 934 if (nm_i != head) { /* we have new packets to send */ 935 nic_i = netmap_idx_k2n(kring, nm_i); 936 937 __builtin_prefetch(&ring->slot[nm_i]); 938 __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); 939 if (txq->ift_sds.ifsd_map) 940 __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); 941 942 for (n = 0; nm_i != head; n++) { 943 struct netmap_slot *slot = &ring->slot[nm_i]; 944 u_int len = slot->len; 945 uint64_t paddr; 946 void *addr = PNMB(na, slot, &paddr); 947 int flags = (slot->flags & NS_REPORT || 948 nic_i == 0 || nic_i == report_frequency) ? 949 IPI_TX_INTR : 0; 950 951 /* device-specific */ 952 pi.ipi_len = len; 953 pi.ipi_segs[0].ds_addr = paddr; 954 pi.ipi_segs[0].ds_len = len; 955 pi.ipi_nsegs = 1; 956 pi.ipi_ndescs = 0; 957 pi.ipi_pidx = nic_i; 958 pi.ipi_flags = flags; 959 960 /* Fill the slot in the NIC ring. */ 961 ctx->isc_txd_encap(ctx->ifc_softc, &pi); 962 963 /* prefetch for next round */ 964 __builtin_prefetch(&ring->slot[nm_i + 1]); 965 __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); 966 if (txq->ift_sds.ifsd_map) { 967 __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); 968 969 NM_CHECK_ADDR_LEN(na, addr, len); 970 971 if (slot->flags & NS_BUF_CHANGED) { 972 /* buffer has changed, reload map */ 973 netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr); 974 } 975 /* make sure changes to the buffer are synced */ 976 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i], 977 BUS_DMASYNC_PREWRITE); 978 } 979 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 980 nm_i = nm_next(nm_i, lim); 981 nic_i = nm_next(nic_i, lim); 982 } 983 kring->nr_hwcur = head; 984 985 /* synchronize the NIC ring */ 986 if (txq->ift_sds.ifsd_map) 987 bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, 988 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 989 990 /* (re)start the tx unit up to slot nic_i (excluded) */ 991 ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); 992 } 993 994 /* 995 * Second part: reclaim buffers for completed transmissions. 996 */ 997 if (iflib_tx_credits_update(ctx, txq)) { 998 /* some tx completed, increment avail */ 999 nic_i = txq->ift_cidx_processed; 1000 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); 1001 } 1002 return (0); 1003 } 1004 1005 /* 1006 * Reconcile kernel and user view of the receive ring. 1007 * Same as for the txsync, this routine must be efficient. 1008 * The caller guarantees a single invocations, but races against 1009 * the rest of the driver should be handled here. 1010 * 1011 * On call, kring->rhead is the first packet that userspace wants 1012 * to keep, and kring->rcur is the wakeup point. 1013 * The kernel has previously reported packets up to kring->rtail. 1014 * 1015 * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective 1016 * of whether or not we received an interrupt. 1017 */ 1018 static int 1019 iflib_netmap_rxsync(struct netmap_kring *kring, int flags) 1020 { 1021 struct netmap_adapter *na = kring->na; 1022 struct netmap_ring *ring = kring->ring; 1023 uint32_t nm_i; /* index into the netmap ring */ 1024 uint32_t nic_i; /* index into the NIC ring */ 1025 u_int i, n; 1026 u_int const lim = kring->nkr_num_slots - 1; 1027 u_int const head = netmap_idx_n2k(kring, kring->rhead); 1028 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 1029 struct if_rxd_info ri; 1030 1031 struct ifnet *ifp = na->ifp; 1032 if_ctx_t ctx = ifp->if_softc; 1033 iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; 1034 iflib_fl_t fl = rxq->ifr_fl; 1035 if (head > lim) 1036 return netmap_ring_reinit(kring); 1037 1038 /* XXX check sync modes */ 1039 for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) { 1040 if (fl->ifl_sds.ifsd_map == NULL) 1041 continue; 1042 bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map, 1043 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1044 } 1045 /* 1046 * First part: import newly received packets. 1047 * 1048 * nm_i is the index of the next free slot in the netmap ring, 1049 * nic_i is the index of the next received packet in the NIC ring, 1050 * and they may differ in case if_init() has been called while 1051 * in netmap mode. For the receive ring we have 1052 * 1053 * nic_i = rxr->next_check; 1054 * nm_i = kring->nr_hwtail (previous) 1055 * and 1056 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 1057 * 1058 * rxr->next_check is set to 0 on a ring reinit 1059 */ 1060 if (netmap_no_pendintr || force_update) { 1061 int crclen = iflib_crcstrip ? 0 : 4; 1062 int error, avail; 1063 uint16_t slot_flags = kring->nkr_slot_flags; 1064 1065 for (i = 0; i < rxq->ifr_nfl; i++) { 1066 fl = &rxq->ifr_fl[i]; 1067 nic_i = fl->ifl_cidx; 1068 nm_i = netmap_idx_n2k(kring, nic_i); 1069 avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX); 1070 for (n = 0; avail > 0; n++, avail--) { 1071 rxd_info_zero(&ri); 1072 ri.iri_frags = rxq->ifr_frags; 1073 ri.iri_qsidx = kring->ring_id; 1074 ri.iri_ifp = ctx->ifc_ifp; 1075 ri.iri_cidx = nic_i; 1076 1077 error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); 1078 ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen; 1079 ring->slot[nm_i].flags = slot_flags; 1080 if (fl->ifl_sds.ifsd_map) 1081 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, 1082 fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); 1083 nm_i = nm_next(nm_i, lim); 1084 nic_i = nm_next(nic_i, lim); 1085 } 1086 if (n) { /* update the state variables */ 1087 if (netmap_no_pendintr && !force_update) { 1088 /* diagnostics */ 1089 iflib_rx_miss ++; 1090 iflib_rx_miss_bufs += n; 1091 } 1092 fl->ifl_cidx = nic_i; 1093 kring->nr_hwtail = netmap_idx_k2n(kring, nm_i); 1094 } 1095 kring->nr_kflags &= ~NKR_PENDINTR; 1096 } 1097 } 1098 /* 1099 * Second part: skip past packets that userspace has released. 1100 * (kring->nr_hwcur to head excluded), 1101 * and make the buffers available for reception. 1102 * As usual nm_i is the index in the netmap ring, 1103 * nic_i is the index in the NIC ring, and 1104 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 1105 */ 1106 /* XXX not sure how this will work with multiple free lists */ 1107 nm_i = netmap_idx_n2k(kring, kring->nr_hwcur); 1108 1109 return (netmap_fl_refill(rxq, kring, nm_i, false)); 1110 } 1111 1112 static void 1113 iflib_netmap_intr(struct netmap_adapter *na, int onoff) 1114 { 1115 struct ifnet *ifp = na->ifp; 1116 if_ctx_t ctx = ifp->if_softc; 1117 1118 CTX_LOCK(ctx); 1119 if (onoff) { 1120 IFDI_INTR_ENABLE(ctx); 1121 } else { 1122 IFDI_INTR_DISABLE(ctx); 1123 } 1124 CTX_UNLOCK(ctx); 1125 } 1126 1127 1128 static int 1129 iflib_netmap_attach(if_ctx_t ctx) 1130 { 1131 struct netmap_adapter na; 1132 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1133 1134 bzero(&na, sizeof(na)); 1135 1136 na.ifp = ctx->ifc_ifp; 1137 na.na_flags = NAF_BDG_MAYSLEEP; 1138 MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); 1139 MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); 1140 1141 na.num_tx_desc = scctx->isc_ntxd[0]; 1142 na.num_rx_desc = scctx->isc_nrxd[0]; 1143 na.nm_txsync = iflib_netmap_txsync; 1144 na.nm_rxsync = iflib_netmap_rxsync; 1145 na.nm_register = iflib_netmap_register; 1146 na.nm_intr = iflib_netmap_intr; 1147 na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; 1148 na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; 1149 return (netmap_attach(&na)); 1150 } 1151 1152 static void 1153 iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) 1154 { 1155 struct netmap_adapter *na = NA(ctx->ifc_ifp); 1156 struct netmap_slot *slot; 1157 1158 slot = netmap_reset(na, NR_TX, txq->ift_id, 0); 1159 if (slot == NULL) 1160 return; 1161 if (txq->ift_sds.ifsd_map == NULL) 1162 return; 1163 1164 for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { 1165 1166 /* 1167 * In netmap mode, set the map for the packet buffer. 1168 * NOTE: Some drivers (not this one) also need to set 1169 * the physical buffer address in the NIC ring. 1170 * netmap_idx_n2k() maps a nic index, i, into the corresponding 1171 * netmap slot index, si 1172 */ 1173 int si = netmap_idx_n2k(&na->tx_rings[txq->ift_id], i); 1174 netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si)); 1175 } 1176 } 1177 1178 static void 1179 iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) 1180 { 1181 struct netmap_adapter *na = NA(ctx->ifc_ifp); 1182 struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; 1183 struct netmap_slot *slot; 1184 uint32_t nm_i; 1185 1186 slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); 1187 if (slot == NULL) 1188 return; 1189 nm_i = netmap_idx_n2k(kring, 0); 1190 netmap_fl_refill(rxq, kring, nm_i, true); 1191 } 1192 1193 #define iflib_netmap_detach(ifp) netmap_detach(ifp) 1194 1195 #else 1196 #define iflib_netmap_txq_init(ctx, txq) 1197 #define iflib_netmap_rxq_init(ctx, rxq) 1198 #define iflib_netmap_detach(ifp) 1199 1200 #define iflib_netmap_attach(ctx) (0) 1201 #define netmap_rx_irq(ifp, qid, budget) (0) 1202 #define netmap_tx_irq(ifp, qid) do {} while (0) 1203 1204 #endif 1205 1206 #if defined(__i386__) || defined(__amd64__) 1207 static __inline void 1208 prefetch(void *x) 1209 { 1210 __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); 1211 } 1212 static __inline void 1213 prefetch2cachelines(void *x) 1214 { 1215 __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); 1216 #if (CACHE_LINE_SIZE < 128) 1217 __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); 1218 #endif 1219 } 1220 #else 1221 #define prefetch(x) 1222 #define prefetch2cachelines(x) 1223 #endif 1224 1225 static void 1226 iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) 1227 { 1228 iflib_fl_t fl; 1229 1230 fl = &rxq->ifr_fl[flid]; 1231 iru->iru_paddrs = fl->ifl_bus_addrs; 1232 iru->iru_vaddrs = &fl->ifl_vm_addrs[0]; 1233 iru->iru_idxs = fl->ifl_rxd_idxs; 1234 iru->iru_qsidx = rxq->ifr_id; 1235 iru->iru_buf_size = fl->ifl_buf_size; 1236 iru->iru_flidx = fl->ifl_id; 1237 } 1238 1239 static void 1240 _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) 1241 { 1242 if (err) 1243 return; 1244 *(bus_addr_t *) arg = segs[0].ds_addr; 1245 } 1246 1247 int 1248 iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) 1249 { 1250 int err; 1251 if_shared_ctx_t sctx = ctx->ifc_sctx; 1252 device_t dev = ctx->ifc_dev; 1253 1254 KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); 1255 1256 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1257 sctx->isc_q_align, 0, /* alignment, bounds */ 1258 BUS_SPACE_MAXADDR, /* lowaddr */ 1259 BUS_SPACE_MAXADDR, /* highaddr */ 1260 NULL, NULL, /* filter, filterarg */ 1261 size, /* maxsize */ 1262 1, /* nsegments */ 1263 size, /* maxsegsize */ 1264 BUS_DMA_ALLOCNOW, /* flags */ 1265 NULL, /* lockfunc */ 1266 NULL, /* lockarg */ 1267 &dma->idi_tag); 1268 if (err) { 1269 device_printf(dev, 1270 "%s: bus_dma_tag_create failed: %d\n", 1271 __func__, err); 1272 goto fail_0; 1273 } 1274 1275 err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, 1276 BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); 1277 if (err) { 1278 device_printf(dev, 1279 "%s: bus_dmamem_alloc(%ju) failed: %d\n", 1280 __func__, (uintmax_t)size, err); 1281 goto fail_1; 1282 } 1283 1284 dma->idi_paddr = IF_BAD_DMA; 1285 err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, 1286 size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); 1287 if (err || dma->idi_paddr == IF_BAD_DMA) { 1288 device_printf(dev, 1289 "%s: bus_dmamap_load failed: %d\n", 1290 __func__, err); 1291 goto fail_2; 1292 } 1293 1294 dma->idi_size = size; 1295 return (0); 1296 1297 fail_2: 1298 bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); 1299 fail_1: 1300 bus_dma_tag_destroy(dma->idi_tag); 1301 fail_0: 1302 dma->idi_tag = NULL; 1303 1304 return (err); 1305 } 1306 1307 int 1308 iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) 1309 { 1310 int i, err; 1311 iflib_dma_info_t *dmaiter; 1312 1313 dmaiter = dmalist; 1314 for (i = 0; i < count; i++, dmaiter++) { 1315 if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) 1316 break; 1317 } 1318 if (err) 1319 iflib_dma_free_multi(dmalist, i); 1320 return (err); 1321 } 1322 1323 void 1324 iflib_dma_free(iflib_dma_info_t dma) 1325 { 1326 if (dma->idi_tag == NULL) 1327 return; 1328 if (dma->idi_paddr != IF_BAD_DMA) { 1329 bus_dmamap_sync(dma->idi_tag, dma->idi_map, 1330 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1331 bus_dmamap_unload(dma->idi_tag, dma->idi_map); 1332 dma->idi_paddr = IF_BAD_DMA; 1333 } 1334 if (dma->idi_vaddr != NULL) { 1335 bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); 1336 dma->idi_vaddr = NULL; 1337 } 1338 bus_dma_tag_destroy(dma->idi_tag); 1339 dma->idi_tag = NULL; 1340 } 1341 1342 void 1343 iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) 1344 { 1345 int i; 1346 iflib_dma_info_t *dmaiter = dmalist; 1347 1348 for (i = 0; i < count; i++, dmaiter++) 1349 iflib_dma_free(*dmaiter); 1350 } 1351 1352 #ifdef EARLY_AP_STARTUP 1353 static const int iflib_started = 1; 1354 #else 1355 /* 1356 * We used to abuse the smp_started flag to decide if the queues have been 1357 * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()). 1358 * That gave bad races, since the SYSINIT() runs strictly after smp_started 1359 * is set. Run a SYSINIT() strictly after that to just set a usable 1360 * completion flag. 1361 */ 1362 1363 static int iflib_started; 1364 1365 static void 1366 iflib_record_started(void *arg) 1367 { 1368 iflib_started = 1; 1369 } 1370 1371 SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST, 1372 iflib_record_started, NULL); 1373 #endif 1374 1375 static int 1376 iflib_fast_intr(void *arg) 1377 { 1378 iflib_filter_info_t info = arg; 1379 struct grouptask *gtask = info->ifi_task; 1380 if (!iflib_started) 1381 return (FILTER_HANDLED); 1382 1383 DBG_COUNTER_INC(fast_intrs); 1384 if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) 1385 return (FILTER_HANDLED); 1386 1387 GROUPTASK_ENQUEUE(gtask); 1388 return (FILTER_HANDLED); 1389 } 1390 1391 static int 1392 iflib_fast_intr_rxtx(void *arg) 1393 { 1394 iflib_filter_info_t info = arg; 1395 struct grouptask *gtask = info->ifi_task; 1396 iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; 1397 if_ctx_t ctx; 1398 int i, cidx; 1399 1400 if (!iflib_started) 1401 return (FILTER_HANDLED); 1402 1403 DBG_COUNTER_INC(fast_intrs); 1404 if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) 1405 return (FILTER_HANDLED); 1406 1407 for (i = 0; i < rxq->ifr_ntxqirq; i++) { 1408 qidx_t txqid = rxq->ifr_txqid[i]; 1409 1410 ctx = rxq->ifr_ctx; 1411 1412 if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) { 1413 IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); 1414 continue; 1415 } 1416 GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); 1417 } 1418 if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) 1419 cidx = rxq->ifr_cq_cidx; 1420 else 1421 cidx = rxq->ifr_fl[0].ifl_cidx; 1422 if (iflib_rxd_avail(ctx, rxq, cidx, 1)) 1423 GROUPTASK_ENQUEUE(gtask); 1424 else 1425 IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); 1426 return (FILTER_HANDLED); 1427 } 1428 1429 1430 static int 1431 iflib_fast_intr_ctx(void *arg) 1432 { 1433 iflib_filter_info_t info = arg; 1434 struct grouptask *gtask = info->ifi_task; 1435 1436 if (!iflib_started) 1437 return (FILTER_HANDLED); 1438 1439 DBG_COUNTER_INC(fast_intrs); 1440 if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) 1441 return (FILTER_HANDLED); 1442 1443 GROUPTASK_ENQUEUE(gtask); 1444 return (FILTER_HANDLED); 1445 } 1446 1447 static int 1448 _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, 1449 driver_filter_t filter, driver_intr_t handler, void *arg, 1450 char *name) 1451 { 1452 int rc, flags; 1453 struct resource *res; 1454 void *tag = NULL; 1455 device_t dev = ctx->ifc_dev; 1456 1457 flags = RF_ACTIVE; 1458 if (ctx->ifc_flags & IFC_LEGACY) 1459 flags |= RF_SHAREABLE; 1460 MPASS(rid < 512); 1461 irq->ii_rid = rid; 1462 res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags); 1463 if (res == NULL) { 1464 device_printf(dev, 1465 "failed to allocate IRQ for rid %d, name %s.\n", rid, name); 1466 return (ENOMEM); 1467 } 1468 irq->ii_res = res; 1469 KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); 1470 rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, 1471 filter, handler, arg, &tag); 1472 if (rc != 0) { 1473 device_printf(dev, 1474 "failed to setup interrupt for rid %d, name %s: %d\n", 1475 rid, name ? name : "unknown", rc); 1476 return (rc); 1477 } else if (name) 1478 bus_describe_intr(dev, res, tag, "%s", name); 1479 1480 irq->ii_tag = tag; 1481 return (0); 1482 } 1483 1484 1485 /********************************************************************* 1486 * 1487 * Allocate memory for tx_buffer structures. The tx_buffer stores all 1488 * the information needed to transmit a packet on the wire. This is 1489 * called only once at attach, setup is done every reset. 1490 * 1491 **********************************************************************/ 1492 1493 static int 1494 iflib_txsd_alloc(iflib_txq_t txq) 1495 { 1496 if_ctx_t ctx = txq->ift_ctx; 1497 if_shared_ctx_t sctx = ctx->ifc_sctx; 1498 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1499 device_t dev = ctx->ifc_dev; 1500 int err, nsegments, ntsosegments; 1501 1502 nsegments = scctx->isc_tx_nsegments; 1503 ntsosegments = scctx->isc_tx_tso_segments_max; 1504 MPASS(scctx->isc_ntxd[0] > 0); 1505 MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); 1506 MPASS(nsegments > 0); 1507 MPASS(ntsosegments > 0); 1508 /* 1509 * Setup DMA descriptor areas. 1510 */ 1511 if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1512 1, 0, /* alignment, bounds */ 1513 BUS_SPACE_MAXADDR, /* lowaddr */ 1514 BUS_SPACE_MAXADDR, /* highaddr */ 1515 NULL, NULL, /* filter, filterarg */ 1516 sctx->isc_tx_maxsize, /* maxsize */ 1517 nsegments, /* nsegments */ 1518 sctx->isc_tx_maxsegsize, /* maxsegsize */ 1519 0, /* flags */ 1520 NULL, /* lockfunc */ 1521 NULL, /* lockfuncarg */ 1522 &txq->ift_desc_tag))) { 1523 device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); 1524 device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", 1525 (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); 1526 goto fail; 1527 } 1528 if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1529 1, 0, /* alignment, bounds */ 1530 BUS_SPACE_MAXADDR, /* lowaddr */ 1531 BUS_SPACE_MAXADDR, /* highaddr */ 1532 NULL, NULL, /* filter, filterarg */ 1533 scctx->isc_tx_tso_size_max, /* maxsize */ 1534 ntsosegments, /* nsegments */ 1535 scctx->isc_tx_tso_segsize_max, /* maxsegsize */ 1536 0, /* flags */ 1537 NULL, /* lockfunc */ 1538 NULL, /* lockfuncarg */ 1539 &txq->ift_tso_desc_tag))) { 1540 device_printf(dev,"Unable to allocate TX TSO DMA tag: %d\n", err); 1541 1542 goto fail; 1543 } 1544 if (!(txq->ift_sds.ifsd_flags = 1545 (uint8_t *) malloc(sizeof(uint8_t) * 1546 scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1547 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 1548 err = ENOMEM; 1549 goto fail; 1550 } 1551 if (!(txq->ift_sds.ifsd_m = 1552 (struct mbuf **) malloc(sizeof(struct mbuf *) * 1553 scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1554 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 1555 err = ENOMEM; 1556 goto fail; 1557 } 1558 1559 /* Create the descriptor buffer dma maps */ 1560 #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) 1561 if ((ctx->ifc_flags & IFC_DMAR) == 0) 1562 return (0); 1563 1564 if (!(txq->ift_sds.ifsd_map = 1565 (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1566 device_printf(dev, "Unable to allocate tx_buffer map memory\n"); 1567 err = ENOMEM; 1568 goto fail; 1569 } 1570 1571 for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { 1572 err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]); 1573 if (err != 0) { 1574 device_printf(dev, "Unable to create TX DMA map\n"); 1575 goto fail; 1576 } 1577 } 1578 #endif 1579 return (0); 1580 fail: 1581 /* We free all, it handles case where we are in the middle */ 1582 iflib_tx_structures_free(ctx); 1583 return (err); 1584 } 1585 1586 static void 1587 iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) 1588 { 1589 bus_dmamap_t map; 1590 1591 map = NULL; 1592 if (txq->ift_sds.ifsd_map != NULL) 1593 map = txq->ift_sds.ifsd_map[i]; 1594 if (map != NULL) { 1595 bus_dmamap_unload(txq->ift_desc_tag, map); 1596 bus_dmamap_destroy(txq->ift_desc_tag, map); 1597 txq->ift_sds.ifsd_map[i] = NULL; 1598 } 1599 } 1600 1601 static void 1602 iflib_txq_destroy(iflib_txq_t txq) 1603 { 1604 if_ctx_t ctx = txq->ift_ctx; 1605 1606 for (int i = 0; i < txq->ift_size; i++) 1607 iflib_txsd_destroy(ctx, txq, i); 1608 if (txq->ift_sds.ifsd_map != NULL) { 1609 free(txq->ift_sds.ifsd_map, M_IFLIB); 1610 txq->ift_sds.ifsd_map = NULL; 1611 } 1612 if (txq->ift_sds.ifsd_m != NULL) { 1613 free(txq->ift_sds.ifsd_m, M_IFLIB); 1614 txq->ift_sds.ifsd_m = NULL; 1615 } 1616 if (txq->ift_sds.ifsd_flags != NULL) { 1617 free(txq->ift_sds.ifsd_flags, M_IFLIB); 1618 txq->ift_sds.ifsd_flags = NULL; 1619 } 1620 if (txq->ift_desc_tag != NULL) { 1621 bus_dma_tag_destroy(txq->ift_desc_tag); 1622 txq->ift_desc_tag = NULL; 1623 } 1624 if (txq->ift_tso_desc_tag != NULL) { 1625 bus_dma_tag_destroy(txq->ift_tso_desc_tag); 1626 txq->ift_tso_desc_tag = NULL; 1627 } 1628 } 1629 1630 static void 1631 iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) 1632 { 1633 struct mbuf **mp; 1634 1635 mp = &txq->ift_sds.ifsd_m[i]; 1636 if (*mp == NULL) 1637 return; 1638 1639 if (txq->ift_sds.ifsd_map != NULL) { 1640 bus_dmamap_sync(txq->ift_desc_tag, 1641 txq->ift_sds.ifsd_map[i], 1642 BUS_DMASYNC_POSTWRITE); 1643 bus_dmamap_unload(txq->ift_desc_tag, 1644 txq->ift_sds.ifsd_map[i]); 1645 } 1646 m_free(*mp); 1647 DBG_COUNTER_INC(tx_frees); 1648 *mp = NULL; 1649 } 1650 1651 static int 1652 iflib_txq_setup(iflib_txq_t txq) 1653 { 1654 if_ctx_t ctx = txq->ift_ctx; 1655 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1656 iflib_dma_info_t di; 1657 int i; 1658 1659 /* Set number of descriptors available */ 1660 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 1661 /* XXX make configurable */ 1662 txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; 1663 1664 /* Reset indices */ 1665 txq->ift_cidx_processed = 0; 1666 txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; 1667 txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; 1668 1669 for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) 1670 bzero((void *)di->idi_vaddr, di->idi_size); 1671 1672 IFDI_TXQ_SETUP(ctx, txq->ift_id); 1673 for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) 1674 bus_dmamap_sync(di->idi_tag, di->idi_map, 1675 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1676 return (0); 1677 } 1678 1679 /********************************************************************* 1680 * 1681 * Allocate memory for rx_buffer structures. Since we use one 1682 * rx_buffer per received packet, the maximum number of rx_buffer's 1683 * that we'll need is equal to the number of receive descriptors 1684 * that we've allocated. 1685 * 1686 **********************************************************************/ 1687 static int 1688 iflib_rxsd_alloc(iflib_rxq_t rxq) 1689 { 1690 if_ctx_t ctx = rxq->ifr_ctx; 1691 if_shared_ctx_t sctx = ctx->ifc_sctx; 1692 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1693 device_t dev = ctx->ifc_dev; 1694 iflib_fl_t fl; 1695 int err; 1696 1697 MPASS(scctx->isc_nrxd[0] > 0); 1698 MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); 1699 1700 fl = rxq->ifr_fl; 1701 for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { 1702 fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ 1703 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1704 1, 0, /* alignment, bounds */ 1705 BUS_SPACE_MAXADDR, /* lowaddr */ 1706 BUS_SPACE_MAXADDR, /* highaddr */ 1707 NULL, NULL, /* filter, filterarg */ 1708 sctx->isc_rx_maxsize, /* maxsize */ 1709 sctx->isc_rx_nsegments, /* nsegments */ 1710 sctx->isc_rx_maxsegsize, /* maxsegsize */ 1711 0, /* flags */ 1712 NULL, /* lockfunc */ 1713 NULL, /* lockarg */ 1714 &fl->ifl_desc_tag); 1715 if (err) { 1716 device_printf(dev, "%s: bus_dma_tag_create failed %d\n", 1717 __func__, err); 1718 goto fail; 1719 } 1720 if (!(fl->ifl_sds.ifsd_flags = 1721 (uint8_t *) malloc(sizeof(uint8_t) * 1722 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1723 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 1724 err = ENOMEM; 1725 goto fail; 1726 } 1727 if (!(fl->ifl_sds.ifsd_m = 1728 (struct mbuf **) malloc(sizeof(struct mbuf *) * 1729 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1730 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 1731 err = ENOMEM; 1732 goto fail; 1733 } 1734 if (!(fl->ifl_sds.ifsd_cl = 1735 (caddr_t *) malloc(sizeof(caddr_t) * 1736 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1737 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 1738 err = ENOMEM; 1739 goto fail; 1740 } 1741 1742 /* Create the descriptor buffer dma maps */ 1743 #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) 1744 if ((ctx->ifc_flags & IFC_DMAR) == 0) 1745 continue; 1746 1747 if (!(fl->ifl_sds.ifsd_map = 1748 (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1749 device_printf(dev, "Unable to allocate tx_buffer map memory\n"); 1750 err = ENOMEM; 1751 goto fail; 1752 } 1753 1754 for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { 1755 err = bus_dmamap_create(fl->ifl_desc_tag, 0, &fl->ifl_sds.ifsd_map[i]); 1756 if (err != 0) { 1757 device_printf(dev, "Unable to create RX buffer DMA map\n"); 1758 goto fail; 1759 } 1760 } 1761 #endif 1762 } 1763 return (0); 1764 1765 fail: 1766 iflib_rx_structures_free(ctx); 1767 return (err); 1768 } 1769 1770 1771 /* 1772 * Internal service routines 1773 */ 1774 1775 struct rxq_refill_cb_arg { 1776 int error; 1777 bus_dma_segment_t seg; 1778 int nseg; 1779 }; 1780 1781 static void 1782 _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 1783 { 1784 struct rxq_refill_cb_arg *cb_arg = arg; 1785 1786 cb_arg->error = error; 1787 cb_arg->seg = segs[0]; 1788 cb_arg->nseg = nseg; 1789 } 1790 1791 1792 #ifdef ACPI_DMAR 1793 #define IS_DMAR(ctx) (ctx->ifc_flags & IFC_DMAR) 1794 #else 1795 #define IS_DMAR(ctx) (0) 1796 #endif 1797 1798 /** 1799 * rxq_refill - refill an rxq free-buffer list 1800 * @ctx: the iflib context 1801 * @rxq: the free-list to refill 1802 * @n: the number of new buffers to allocate 1803 * 1804 * (Re)populate an rxq free-buffer list with up to @n new packet buffers. 1805 * The caller must assure that @n does not exceed the queue's capacity. 1806 */ 1807 static void 1808 _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) 1809 { 1810 struct mbuf *m; 1811 int idx, frag_idx = fl->ifl_fragidx; 1812 int pidx = fl->ifl_pidx; 1813 caddr_t cl, *sd_cl; 1814 struct mbuf **sd_m; 1815 uint8_t *sd_flags; 1816 struct if_rxd_update iru; 1817 bus_dmamap_t *sd_map; 1818 int n, i = 0; 1819 uint64_t bus_addr; 1820 int err; 1821 qidx_t credits; 1822 1823 sd_m = fl->ifl_sds.ifsd_m; 1824 sd_map = fl->ifl_sds.ifsd_map; 1825 sd_cl = fl->ifl_sds.ifsd_cl; 1826 sd_flags = fl->ifl_sds.ifsd_flags; 1827 idx = pidx; 1828 credits = fl->ifl_credits; 1829 1830 n = count; 1831 MPASS(n > 0); 1832 MPASS(credits + n <= fl->ifl_size); 1833 1834 if (pidx < fl->ifl_cidx) 1835 MPASS(pidx + n <= fl->ifl_cidx); 1836 if (pidx == fl->ifl_cidx && (credits < fl->ifl_size)) 1837 MPASS(fl->ifl_gen == 0); 1838 if (pidx > fl->ifl_cidx) 1839 MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); 1840 1841 DBG_COUNTER_INC(fl_refills); 1842 if (n > 8) 1843 DBG_COUNTER_INC(fl_refills_large); 1844 iru_init(&iru, fl->ifl_rxq, fl->ifl_id); 1845 while (n--) { 1846 /* 1847 * We allocate an uninitialized mbuf + cluster, mbuf is 1848 * initialized after rx. 1849 * 1850 * If the cluster is still set then we know a minimum sized packet was received 1851 */ 1852 bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx); 1853 if ((frag_idx < 0) || (frag_idx >= fl->ifl_size)) 1854 bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); 1855 if ((cl = sd_cl[frag_idx]) == NULL) { 1856 if ((cl = sd_cl[frag_idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) 1857 break; 1858 #if MEMORY_LOGGING 1859 fl->ifl_cl_enqueued++; 1860 #endif 1861 } 1862 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 1863 break; 1864 } 1865 #if MEMORY_LOGGING 1866 fl->ifl_m_enqueued++; 1867 #endif 1868 1869 DBG_COUNTER_INC(rx_allocs); 1870 #if defined(__i386__) || defined(__amd64__) 1871 if (!IS_DMAR(ctx)) { 1872 bus_addr = pmap_kextract((vm_offset_t)cl); 1873 } else 1874 #endif 1875 { 1876 struct rxq_refill_cb_arg cb_arg; 1877 iflib_rxq_t q; 1878 1879 cb_arg.error = 0; 1880 q = fl->ifl_rxq; 1881 MPASS(sd_map != NULL); 1882 MPASS(sd_map[frag_idx] != NULL); 1883 err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[frag_idx], 1884 cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0); 1885 bus_dmamap_sync(fl->ifl_desc_tag, sd_map[frag_idx], 1886 BUS_DMASYNC_PREREAD); 1887 1888 if (err != 0 || cb_arg.error) { 1889 /* 1890 * !zone_pack ? 1891 */ 1892 if (fl->ifl_zone == zone_pack) 1893 uma_zfree(fl->ifl_zone, cl); 1894 m_free(m); 1895 n = 0; 1896 goto done; 1897 } 1898 bus_addr = cb_arg.seg.ds_addr; 1899 } 1900 bit_set(fl->ifl_rx_bitmap, frag_idx); 1901 sd_flags[frag_idx] |= RX_SW_DESC_INUSE; 1902 1903 MPASS(sd_m[frag_idx] == NULL); 1904 sd_cl[frag_idx] = cl; 1905 sd_m[frag_idx] = m; 1906 fl->ifl_rxd_idxs[i] = frag_idx; 1907 fl->ifl_bus_addrs[i] = bus_addr; 1908 fl->ifl_vm_addrs[i] = cl; 1909 credits++; 1910 i++; 1911 MPASS(credits <= fl->ifl_size); 1912 if (++idx == fl->ifl_size) { 1913 fl->ifl_gen = 1; 1914 idx = 0; 1915 } 1916 if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { 1917 iru.iru_pidx = pidx; 1918 iru.iru_count = i; 1919 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 1920 i = 0; 1921 pidx = idx; 1922 fl->ifl_pidx = idx; 1923 fl->ifl_credits = credits; 1924 } 1925 1926 } 1927 done: 1928 if (i) { 1929 iru.iru_pidx = pidx; 1930 iru.iru_count = i; 1931 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 1932 fl->ifl_pidx = idx; 1933 fl->ifl_credits = credits; 1934 } 1935 DBG_COUNTER_INC(rxd_flush); 1936 if (fl->ifl_pidx == 0) 1937 pidx = fl->ifl_size - 1; 1938 else 1939 pidx = fl->ifl_pidx - 1; 1940 1941 if (sd_map) 1942 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 1943 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1944 ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx); 1945 fl->ifl_fragidx = frag_idx; 1946 } 1947 1948 static __inline void 1949 __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max) 1950 { 1951 /* we avoid allowing pidx to catch up with cidx as it confuses ixl */ 1952 int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; 1953 #ifdef INVARIANTS 1954 int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; 1955 #endif 1956 1957 MPASS(fl->ifl_credits <= fl->ifl_size); 1958 MPASS(reclaimable == delta); 1959 1960 if (reclaimable > 0) 1961 _iflib_fl_refill(ctx, fl, min(max, reclaimable)); 1962 } 1963 1964 static void 1965 iflib_fl_bufs_free(iflib_fl_t fl) 1966 { 1967 iflib_dma_info_t idi = fl->ifl_ifdi; 1968 uint32_t i; 1969 1970 for (i = 0; i < fl->ifl_size; i++) { 1971 struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; 1972 uint8_t *sd_flags = &fl->ifl_sds.ifsd_flags[i]; 1973 caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; 1974 1975 if (*sd_flags & RX_SW_DESC_INUSE) { 1976 if (fl->ifl_sds.ifsd_map != NULL) { 1977 bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i]; 1978 bus_dmamap_unload(fl->ifl_desc_tag, sd_map); 1979 bus_dmamap_destroy(fl->ifl_desc_tag, sd_map); 1980 } 1981 if (*sd_m != NULL) { 1982 m_init(*sd_m, M_NOWAIT, MT_DATA, 0); 1983 uma_zfree(zone_mbuf, *sd_m); 1984 } 1985 if (*sd_cl != NULL) 1986 uma_zfree(fl->ifl_zone, *sd_cl); 1987 *sd_flags = 0; 1988 } else { 1989 MPASS(*sd_cl == NULL); 1990 MPASS(*sd_m == NULL); 1991 } 1992 #if MEMORY_LOGGING 1993 fl->ifl_m_dequeued++; 1994 fl->ifl_cl_dequeued++; 1995 #endif 1996 *sd_cl = NULL; 1997 *sd_m = NULL; 1998 } 1999 #ifdef INVARIANTS 2000 for (i = 0; i < fl->ifl_size; i++) { 2001 MPASS(fl->ifl_sds.ifsd_flags[i] == 0); 2002 MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); 2003 MPASS(fl->ifl_sds.ifsd_m[i] == NULL); 2004 } 2005 #endif 2006 /* 2007 * Reset free list values 2008 */ 2009 fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; 2010 bzero(idi->idi_vaddr, idi->idi_size); 2011 } 2012 2013 /********************************************************************* 2014 * 2015 * Initialize a receive ring and its buffers. 2016 * 2017 **********************************************************************/ 2018 static int 2019 iflib_fl_setup(iflib_fl_t fl) 2020 { 2021 iflib_rxq_t rxq = fl->ifl_rxq; 2022 if_ctx_t ctx = rxq->ifr_ctx; 2023 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2024 2025 bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size); 2026 /* 2027 ** Free current RX buffer structs and their mbufs 2028 */ 2029 iflib_fl_bufs_free(fl); 2030 /* Now replenish the mbufs */ 2031 MPASS(fl->ifl_credits == 0); 2032 /* 2033 * XXX don't set the max_frame_size to larger 2034 * than the hardware can handle 2035 */ 2036 if (sctx->isc_max_frame_size <= 2048) 2037 fl->ifl_buf_size = MCLBYTES; 2038 #ifndef CONTIGMALLOC_WORKS 2039 else 2040 fl->ifl_buf_size = MJUMPAGESIZE; 2041 #else 2042 else if (sctx->isc_max_frame_size <= 4096) 2043 fl->ifl_buf_size = MJUMPAGESIZE; 2044 else if (sctx->isc_max_frame_size <= 9216) 2045 fl->ifl_buf_size = MJUM9BYTES; 2046 else 2047 fl->ifl_buf_size = MJUM16BYTES; 2048 #endif 2049 if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) 2050 ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; 2051 fl->ifl_cltype = m_gettype(fl->ifl_buf_size); 2052 fl->ifl_zone = m_getzone(fl->ifl_buf_size); 2053 2054 2055 /* avoid pre-allocating zillions of clusters to an idle card 2056 * potentially speeding up attach 2057 */ 2058 _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size)); 2059 MPASS(min(128, fl->ifl_size) == fl->ifl_credits); 2060 if (min(128, fl->ifl_size) != fl->ifl_credits) 2061 return (ENOBUFS); 2062 /* 2063 * handle failure 2064 */ 2065 MPASS(rxq != NULL); 2066 MPASS(fl->ifl_ifdi != NULL); 2067 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 2068 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2069 return (0); 2070 } 2071 2072 /********************************************************************* 2073 * 2074 * Free receive ring data structures 2075 * 2076 **********************************************************************/ 2077 static void 2078 iflib_rx_sds_free(iflib_rxq_t rxq) 2079 { 2080 iflib_fl_t fl; 2081 int i; 2082 2083 if (rxq->ifr_fl != NULL) { 2084 for (i = 0; i < rxq->ifr_nfl; i++) { 2085 fl = &rxq->ifr_fl[i]; 2086 if (fl->ifl_desc_tag != NULL) { 2087 bus_dma_tag_destroy(fl->ifl_desc_tag); 2088 fl->ifl_desc_tag = NULL; 2089 } 2090 free(fl->ifl_sds.ifsd_m, M_IFLIB); 2091 free(fl->ifl_sds.ifsd_cl, M_IFLIB); 2092 /* XXX destroy maps first */ 2093 free(fl->ifl_sds.ifsd_map, M_IFLIB); 2094 fl->ifl_sds.ifsd_m = NULL; 2095 fl->ifl_sds.ifsd_cl = NULL; 2096 fl->ifl_sds.ifsd_map = NULL; 2097 } 2098 free(rxq->ifr_fl, M_IFLIB); 2099 rxq->ifr_fl = NULL; 2100 rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; 2101 } 2102 } 2103 2104 /* 2105 * MI independent logic 2106 * 2107 */ 2108 static void 2109 iflib_timer(void *arg) 2110 { 2111 iflib_txq_t txq = arg; 2112 if_ctx_t ctx = txq->ift_ctx; 2113 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2114 2115 if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) 2116 return; 2117 /* 2118 ** Check on the state of the TX queue(s), this 2119 ** can be done without the lock because its RO 2120 ** and the HUNG state will be static if set. 2121 */ 2122 IFDI_TIMER(ctx, txq->ift_id); 2123 if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && 2124 ((txq->ift_cleaned_prev == txq->ift_cleaned) || 2125 (sctx->isc_pause_frames == 0))) 2126 goto hung; 2127 2128 if (ifmp_ring_is_stalled(txq->ift_br)) 2129 txq->ift_qstatus = IFLIB_QUEUE_HUNG; 2130 txq->ift_cleaned_prev = txq->ift_cleaned; 2131 /* handle any laggards */ 2132 if (txq->ift_db_pending) 2133 GROUPTASK_ENQUEUE(&txq->ift_task); 2134 2135 sctx->isc_pause_frames = 0; 2136 if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) 2137 callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); 2138 return; 2139 hung: 2140 CTX_LOCK(ctx); 2141 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2142 device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", 2143 txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); 2144 2145 IFDI_WATCHDOG_RESET(ctx); 2146 ctx->ifc_watchdog_events++; 2147 2148 ctx->ifc_flags |= IFC_DO_RESET; 2149 iflib_admin_intr_deferred(ctx); 2150 CTX_UNLOCK(ctx); 2151 } 2152 2153 static void 2154 iflib_init_locked(if_ctx_t ctx) 2155 { 2156 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2157 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2158 if_t ifp = ctx->ifc_ifp; 2159 iflib_fl_t fl; 2160 iflib_txq_t txq; 2161 iflib_rxq_t rxq; 2162 int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; 2163 2164 2165 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2166 IFDI_INTR_DISABLE(ctx); 2167 2168 tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); 2169 tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); 2170 /* Set hardware offload abilities */ 2171 if_clearhwassist(ifp); 2172 if (if_getcapenable(ifp) & IFCAP_TXCSUM) 2173 if_sethwassistbits(ifp, tx_ip_csum_flags, 0); 2174 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) 2175 if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); 2176 if (if_getcapenable(ifp) & IFCAP_TSO4) 2177 if_sethwassistbits(ifp, CSUM_IP_TSO, 0); 2178 if (if_getcapenable(ifp) & IFCAP_TSO6) 2179 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); 2180 2181 for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { 2182 CALLOUT_LOCK(txq); 2183 callout_stop(&txq->ift_timer); 2184 CALLOUT_UNLOCK(txq); 2185 iflib_netmap_txq_init(ctx, txq); 2186 } 2187 #ifdef INVARIANTS 2188 i = if_getdrvflags(ifp); 2189 #endif 2190 IFDI_INIT(ctx); 2191 MPASS(if_getdrvflags(ifp) == i); 2192 for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { 2193 /* XXX this should really be done on a per-queue basis */ 2194 if (if_getcapenable(ifp) & IFCAP_NETMAP) { 2195 MPASS(rxq->ifr_id == i); 2196 iflib_netmap_rxq_init(ctx, rxq); 2197 continue; 2198 } 2199 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { 2200 if (iflib_fl_setup(fl)) { 2201 device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n"); 2202 goto done; 2203 } 2204 } 2205 } 2206 done: 2207 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 2208 IFDI_INTR_ENABLE(ctx); 2209 txq = ctx->ifc_txqs; 2210 for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) 2211 callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, 2212 txq->ift_timer.c_cpu); 2213 } 2214 2215 static int 2216 iflib_media_change(if_t ifp) 2217 { 2218 if_ctx_t ctx = if_getsoftc(ifp); 2219 int err; 2220 2221 CTX_LOCK(ctx); 2222 if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) 2223 iflib_init_locked(ctx); 2224 CTX_UNLOCK(ctx); 2225 return (err); 2226 } 2227 2228 static void 2229 iflib_media_status(if_t ifp, struct ifmediareq *ifmr) 2230 { 2231 if_ctx_t ctx = if_getsoftc(ifp); 2232 2233 CTX_LOCK(ctx); 2234 IFDI_UPDATE_ADMIN_STATUS(ctx); 2235 IFDI_MEDIA_STATUS(ctx, ifmr); 2236 CTX_UNLOCK(ctx); 2237 } 2238 2239 static void 2240 iflib_stop(if_ctx_t ctx) 2241 { 2242 iflib_txq_t txq = ctx->ifc_txqs; 2243 iflib_rxq_t rxq = ctx->ifc_rxqs; 2244 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2245 iflib_dma_info_t di; 2246 iflib_fl_t fl; 2247 int i, j; 2248 2249 /* Tell the stack that the interface is no longer active */ 2250 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2251 2252 IFDI_INTR_DISABLE(ctx); 2253 DELAY(1000); 2254 IFDI_STOP(ctx); 2255 DELAY(1000); 2256 2257 iflib_debug_reset(); 2258 /* Wait for current tx queue users to exit to disarm watchdog timer. */ 2259 for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { 2260 /* make sure all transmitters have completed before proceeding XXX */ 2261 2262 /* clean any enqueued buffers */ 2263 iflib_ifmp_purge(txq); 2264 /* Free any existing tx buffers. */ 2265 for (j = 0; j < txq->ift_size; j++) { 2266 iflib_txsd_free(ctx, txq, j); 2267 } 2268 txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; 2269 txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; 2270 txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; 2271 txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; 2272 txq->ift_pullups = 0; 2273 ifmp_ring_reset_stats(txq->ift_br); 2274 for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++) 2275 bzero((void *)di->idi_vaddr, di->idi_size); 2276 } 2277 for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { 2278 /* make sure all transmitters have completed before proceeding XXX */ 2279 2280 for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, di++) 2281 bzero((void *)di->idi_vaddr, di->idi_size); 2282 /* also resets the free lists pidx/cidx */ 2283 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) 2284 iflib_fl_bufs_free(fl); 2285 } 2286 } 2287 2288 static inline caddr_t 2289 calc_next_rxd(iflib_fl_t fl, int cidx) 2290 { 2291 qidx_t size; 2292 int nrxd; 2293 caddr_t start, end, cur, next; 2294 2295 nrxd = fl->ifl_size; 2296 size = fl->ifl_rxd_size; 2297 start = fl->ifl_ifdi->idi_vaddr; 2298 2299 if (__predict_false(size == 0)) 2300 return (start); 2301 cur = start + size*cidx; 2302 end = start + size*nrxd; 2303 next = CACHE_PTR_NEXT(cur); 2304 return (next < end ? next : start); 2305 } 2306 2307 static inline void 2308 prefetch_pkts(iflib_fl_t fl, int cidx) 2309 { 2310 int nextptr; 2311 int nrxd = fl->ifl_size; 2312 caddr_t next_rxd; 2313 2314 2315 nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); 2316 prefetch(&fl->ifl_sds.ifsd_m[nextptr]); 2317 prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); 2318 next_rxd = calc_next_rxd(fl, cidx); 2319 prefetch(next_rxd); 2320 prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); 2321 prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); 2322 prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); 2323 prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); 2324 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); 2325 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); 2326 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); 2327 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); 2328 } 2329 2330 static void 2331 rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) 2332 { 2333 int flid, cidx; 2334 bus_dmamap_t map; 2335 iflib_fl_t fl; 2336 iflib_dma_info_t di; 2337 int next; 2338 2339 map = NULL; 2340 flid = irf->irf_flid; 2341 cidx = irf->irf_idx; 2342 fl = &rxq->ifr_fl[flid]; 2343 sd->ifsd_fl = fl; 2344 sd->ifsd_cidx = cidx; 2345 sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx]; 2346 sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; 2347 fl->ifl_credits--; 2348 #if MEMORY_LOGGING 2349 fl->ifl_m_dequeued++; 2350 #endif 2351 if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) 2352 prefetch_pkts(fl, cidx); 2353 if (fl->ifl_sds.ifsd_map != NULL) { 2354 next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); 2355 prefetch(&fl->ifl_sds.ifsd_map[next]); 2356 map = fl->ifl_sds.ifsd_map[cidx]; 2357 di = fl->ifl_ifdi; 2358 next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1); 2359 prefetch(&fl->ifl_sds.ifsd_flags[next]); 2360 bus_dmamap_sync(di->idi_tag, di->idi_map, 2361 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2362 2363 /* not valid assert if bxe really does SGE from non-contiguous elements */ 2364 MPASS(fl->ifl_cidx == cidx); 2365 if (unload) 2366 bus_dmamap_unload(fl->ifl_desc_tag, map); 2367 } 2368 fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); 2369 if (__predict_false(fl->ifl_cidx == 0)) 2370 fl->ifl_gen = 0; 2371 if (map != NULL) 2372 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 2373 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2374 bit_clear(fl->ifl_rx_bitmap, cidx); 2375 } 2376 2377 static struct mbuf * 2378 assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd) 2379 { 2380 int i, padlen , flags; 2381 struct mbuf *m, *mh, *mt; 2382 caddr_t cl; 2383 2384 i = 0; 2385 mh = NULL; 2386 do { 2387 rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd); 2388 2389 MPASS(*sd->ifsd_cl != NULL); 2390 MPASS(*sd->ifsd_m != NULL); 2391 2392 /* Don't include zero-length frags */ 2393 if (ri->iri_frags[i].irf_len == 0) { 2394 /* XXX we can save the cluster here, but not the mbuf */ 2395 m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0); 2396 m_free(*sd->ifsd_m); 2397 *sd->ifsd_m = NULL; 2398 continue; 2399 } 2400 m = *sd->ifsd_m; 2401 *sd->ifsd_m = NULL; 2402 if (mh == NULL) { 2403 flags = M_PKTHDR|M_EXT; 2404 mh = mt = m; 2405 padlen = ri->iri_pad; 2406 } else { 2407 flags = M_EXT; 2408 mt->m_next = m; 2409 mt = m; 2410 /* assuming padding is only on the first fragment */ 2411 padlen = 0; 2412 } 2413 cl = *sd->ifsd_cl; 2414 *sd->ifsd_cl = NULL; 2415 2416 /* Can these two be made one ? */ 2417 m_init(m, M_NOWAIT, MT_DATA, flags); 2418 m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); 2419 /* 2420 * These must follow m_init and m_cljset 2421 */ 2422 m->m_data += padlen; 2423 ri->iri_len -= padlen; 2424 m->m_len = ri->iri_frags[i].irf_len; 2425 } while (++i < ri->iri_nfrags); 2426 2427 return (mh); 2428 } 2429 2430 /* 2431 * Process one software descriptor 2432 */ 2433 static struct mbuf * 2434 iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) 2435 { 2436 struct if_rxsd sd; 2437 struct mbuf *m; 2438 2439 /* should I merge this back in now that the two paths are basically duplicated? */ 2440 if (ri->iri_nfrags == 1 && 2441 ri->iri_frags[0].irf_len <= IFLIB_RX_COPY_THRESH) { 2442 rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd); 2443 m = *sd.ifsd_m; 2444 *sd.ifsd_m = NULL; 2445 m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); 2446 #ifndef __NO_STRICT_ALIGNMENT 2447 if (!IP_ALIGNED(m)) 2448 m->m_data += 2; 2449 #endif 2450 memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); 2451 m->m_len = ri->iri_frags[0].irf_len; 2452 } else { 2453 m = assemble_segments(rxq, ri, &sd); 2454 } 2455 m->m_pkthdr.len = ri->iri_len; 2456 m->m_pkthdr.rcvif = ri->iri_ifp; 2457 m->m_flags |= ri->iri_flags; 2458 m->m_pkthdr.ether_vtag = ri->iri_vtag; 2459 m->m_pkthdr.flowid = ri->iri_flowid; 2460 M_HASHTYPE_SET(m, ri->iri_rsstype); 2461 m->m_pkthdr.csum_flags = ri->iri_csum_flags; 2462 m->m_pkthdr.csum_data = ri->iri_csum_data; 2463 return (m); 2464 } 2465 2466 static bool 2467 iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) 2468 { 2469 if_ctx_t ctx = rxq->ifr_ctx; 2470 if_shared_ctx_t sctx = ctx->ifc_sctx; 2471 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2472 int avail, i; 2473 qidx_t *cidxp; 2474 struct if_rxd_info ri; 2475 int err, budget_left, rx_bytes, rx_pkts; 2476 iflib_fl_t fl; 2477 struct ifnet *ifp; 2478 int lro_enabled; 2479 2480 /* 2481 * XXX early demux data packets so that if_input processing only handles 2482 * acks in interrupt context 2483 */ 2484 struct mbuf *m, *mh, *mt, *mf; 2485 2486 ifp = ctx->ifc_ifp; 2487 mh = mt = NULL; 2488 MPASS(budget > 0); 2489 rx_pkts = rx_bytes = 0; 2490 if (sctx->isc_flags & IFLIB_HAS_RXCQ) 2491 cidxp = &rxq->ifr_cq_cidx; 2492 else 2493 cidxp = &rxq->ifr_fl[0].ifl_cidx; 2494 if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { 2495 for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) 2496 __iflib_fl_refill_lt(ctx, fl, budget + 8); 2497 DBG_COUNTER_INC(rx_unavail); 2498 return (false); 2499 } 2500 2501 for (budget_left = budget; (budget_left > 0) && (avail > 0); budget_left--, avail--) { 2502 if (__predict_false(!CTX_ACTIVE(ctx))) { 2503 DBG_COUNTER_INC(rx_ctx_inactive); 2504 break; 2505 } 2506 /* 2507 * Reset client set fields to their default values 2508 */ 2509 rxd_info_zero(&ri); 2510 ri.iri_qsidx = rxq->ifr_id; 2511 ri.iri_cidx = *cidxp; 2512 ri.iri_ifp = ifp; 2513 ri.iri_frags = rxq->ifr_frags; 2514 err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); 2515 2516 if (err) 2517 goto err; 2518 if (sctx->isc_flags & IFLIB_HAS_RXCQ) { 2519 *cidxp = ri.iri_cidx; 2520 /* Update our consumer index */ 2521 /* XXX NB: shurd - check if this is still safe */ 2522 while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) { 2523 rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; 2524 rxq->ifr_cq_gen = 0; 2525 } 2526 /* was this only a completion queue message? */ 2527 if (__predict_false(ri.iri_nfrags == 0)) 2528 continue; 2529 } 2530 MPASS(ri.iri_nfrags != 0); 2531 MPASS(ri.iri_len != 0); 2532 2533 /* will advance the cidx on the corresponding free lists */ 2534 m = iflib_rxd_pkt_get(rxq, &ri); 2535 if (avail == 0 && budget_left) 2536 avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); 2537 2538 if (__predict_false(m == NULL)) { 2539 DBG_COUNTER_INC(rx_mbuf_null); 2540 continue; 2541 } 2542 /* imm_pkt: -- cxgb */ 2543 if (mh == NULL) 2544 mh = mt = m; 2545 else { 2546 mt->m_nextpkt = m; 2547 mt = m; 2548 } 2549 } 2550 /* make sure that we can refill faster than drain */ 2551 for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) 2552 __iflib_fl_refill_lt(ctx, fl, budget + 8); 2553 2554 lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); 2555 mt = mf = NULL; 2556 while (mh != NULL) { 2557 m = mh; 2558 if (mf == NULL) 2559 mf = m; 2560 mh = mh->m_nextpkt; 2561 m->m_nextpkt = NULL; 2562 #ifndef __NO_STRICT_ALIGNMENT 2563 if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) 2564 continue; 2565 #endif 2566 rx_bytes += m->m_pkthdr.len; 2567 rx_pkts++; 2568 #if defined(INET6) || defined(INET) 2569 if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) { 2570 if (mf == m) 2571 mf = NULL; 2572 continue; 2573 } 2574 #endif 2575 if (mt != NULL) 2576 mt->m_nextpkt = m; 2577 mt = m; 2578 } 2579 if (mf != NULL) { 2580 ifp->if_input(ifp, mf); 2581 DBG_COUNTER_INC(rx_if_input); 2582 } 2583 2584 if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); 2585 if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); 2586 2587 /* 2588 * Flush any outstanding LRO work 2589 */ 2590 #if defined(INET6) || defined(INET) 2591 tcp_lro_flush_all(&rxq->ifr_lc); 2592 #endif 2593 if (avail) 2594 return true; 2595 return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); 2596 err: 2597 CTX_LOCK(ctx); 2598 ctx->ifc_flags |= IFC_DO_RESET; 2599 iflib_admin_intr_deferred(ctx); 2600 CTX_UNLOCK(ctx); 2601 return (false); 2602 } 2603 2604 #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) 2605 static inline qidx_t 2606 txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) 2607 { 2608 qidx_t notify_count = TXD_NOTIFY_COUNT(txq); 2609 qidx_t minthresh = txq->ift_size / 8; 2610 if (in_use > 4*minthresh) 2611 return (notify_count); 2612 if (in_use > 2*minthresh) 2613 return (notify_count >> 1); 2614 if (in_use > minthresh) 2615 return (notify_count >> 3); 2616 return (0); 2617 } 2618 2619 static inline qidx_t 2620 txq_max_rs_deferred(iflib_txq_t txq) 2621 { 2622 qidx_t notify_count = TXD_NOTIFY_COUNT(txq); 2623 qidx_t minthresh = txq->ift_size / 8; 2624 if (txq->ift_in_use > 4*minthresh) 2625 return (notify_count); 2626 if (txq->ift_in_use > 2*minthresh) 2627 return (notify_count >> 1); 2628 if (txq->ift_in_use > minthresh) 2629 return (notify_count >> 2); 2630 return (2); 2631 } 2632 2633 #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) 2634 #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) 2635 2636 #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) 2637 #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) 2638 #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) 2639 2640 /* forward compatibility for cxgb */ 2641 #define FIRST_QSET(ctx) 0 2642 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) 2643 #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) 2644 #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) 2645 #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) 2646 2647 /* XXX we should be setting this to something other than zero */ 2648 #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) 2649 #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max) 2650 2651 static inline bool 2652 iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use) 2653 { 2654 qidx_t dbval, max; 2655 bool rang; 2656 2657 rang = false; 2658 max = TXQ_MAX_DB_DEFERRED(txq, in_use); 2659 if (ring || txq->ift_db_pending >= max) { 2660 dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; 2661 ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); 2662 txq->ift_db_pending = txq->ift_npending = 0; 2663 rang = true; 2664 } 2665 return (rang); 2666 } 2667 2668 #ifdef PKT_DEBUG 2669 static void 2670 print_pkt(if_pkt_info_t pi) 2671 { 2672 printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", 2673 pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); 2674 printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", 2675 pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); 2676 printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", 2677 pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); 2678 } 2679 #endif 2680 2681 #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) 2682 #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) 2683 2684 static int 2685 iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) 2686 { 2687 if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; 2688 struct ether_vlan_header *eh; 2689 struct mbuf *m, *n; 2690 2691 n = m = *mp; 2692 if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && 2693 M_WRITABLE(m) == 0) { 2694 if ((m = m_dup(m, M_NOWAIT)) == NULL) { 2695 return (ENOMEM); 2696 } else { 2697 m_freem(*mp); 2698 n = *mp = m; 2699 } 2700 } 2701 2702 /* 2703 * Determine where frame payload starts. 2704 * Jump over vlan headers if already present, 2705 * helpful for QinQ too. 2706 */ 2707 if (__predict_false(m->m_len < sizeof(*eh))) { 2708 txq->ift_pullups++; 2709 if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) 2710 return (ENOMEM); 2711 } 2712 eh = mtod(m, struct ether_vlan_header *); 2713 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 2714 pi->ipi_etype = ntohs(eh->evl_proto); 2715 pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 2716 } else { 2717 pi->ipi_etype = ntohs(eh->evl_encap_proto); 2718 pi->ipi_ehdrlen = ETHER_HDR_LEN; 2719 } 2720 2721 switch (pi->ipi_etype) { 2722 #ifdef INET 2723 case ETHERTYPE_IP: 2724 { 2725 struct ip *ip = NULL; 2726 struct tcphdr *th = NULL; 2727 int minthlen; 2728 2729 minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); 2730 if (__predict_false(m->m_len < minthlen)) { 2731 /* 2732 * if this code bloat is causing too much of a hit 2733 * move it to a separate function and mark it noinline 2734 */ 2735 if (m->m_len == pi->ipi_ehdrlen) { 2736 n = m->m_next; 2737 MPASS(n); 2738 if (n->m_len >= sizeof(*ip)) { 2739 ip = (struct ip *)n->m_data; 2740 if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 2741 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 2742 } else { 2743 txq->ift_pullups++; 2744 if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) 2745 return (ENOMEM); 2746 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 2747 } 2748 } else { 2749 txq->ift_pullups++; 2750 if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) 2751 return (ENOMEM); 2752 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 2753 if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 2754 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 2755 } 2756 } else { 2757 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 2758 if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 2759 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 2760 } 2761 pi->ipi_ip_hlen = ip->ip_hl << 2; 2762 pi->ipi_ipproto = ip->ip_p; 2763 pi->ipi_flags |= IPI_TX_IPV4; 2764 2765 if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) 2766 ip->ip_sum = 0; 2767 2768 if (IS_TSO4(pi)) { 2769 if (pi->ipi_ipproto == IPPROTO_TCP) { 2770 if (__predict_false(th == NULL)) { 2771 txq->ift_pullups++; 2772 if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) 2773 return (ENOMEM); 2774 th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); 2775 } 2776 pi->ipi_tcp_hflags = th->th_flags; 2777 pi->ipi_tcp_hlen = th->th_off << 2; 2778 pi->ipi_tcp_seq = th->th_seq; 2779 } 2780 if (__predict_false(ip->ip_p != IPPROTO_TCP)) 2781 return (ENXIO); 2782 th->th_sum = in_pseudo(ip->ip_src.s_addr, 2783 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 2784 pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; 2785 if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { 2786 ip->ip_sum = 0; 2787 ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); 2788 } 2789 } 2790 break; 2791 } 2792 #endif 2793 #ifdef INET6 2794 case ETHERTYPE_IPV6: 2795 { 2796 struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); 2797 struct tcphdr *th; 2798 pi->ipi_ip_hlen = sizeof(struct ip6_hdr); 2799 2800 if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { 2801 if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) 2802 return (ENOMEM); 2803 } 2804 th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); 2805 2806 /* XXX-BZ this will go badly in case of ext hdrs. */ 2807 pi->ipi_ipproto = ip6->ip6_nxt; 2808 pi->ipi_flags |= IPI_TX_IPV6; 2809 2810 if (IS_TSO6(pi)) { 2811 if (pi->ipi_ipproto == IPPROTO_TCP) { 2812 if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { 2813 if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) 2814 return (ENOMEM); 2815 } 2816 pi->ipi_tcp_hflags = th->th_flags; 2817 pi->ipi_tcp_hlen = th->th_off << 2; 2818 } 2819 2820 if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) 2821 return (ENXIO); 2822 /* 2823 * The corresponding flag is set by the stack in the IPv4 2824 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). 2825 * So, set it here because the rest of the flow requires it. 2826 */ 2827 pi->ipi_csum_flags |= CSUM_TCP_IPV6; 2828 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 2829 pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; 2830 } 2831 break; 2832 } 2833 #endif 2834 default: 2835 pi->ipi_csum_flags &= ~CSUM_OFFLOAD; 2836 pi->ipi_ip_hlen = 0; 2837 break; 2838 } 2839 *mp = m; 2840 2841 return (0); 2842 } 2843 2844 static __noinline struct mbuf * 2845 collapse_pkthdr(struct mbuf *m0) 2846 { 2847 struct mbuf *m, *m_next, *tmp; 2848 2849 m = m0; 2850 m_next = m->m_next; 2851 while (m_next != NULL && m_next->m_len == 0) { 2852 m = m_next; 2853 m->m_next = NULL; 2854 m_free(m); 2855 m_next = m_next->m_next; 2856 } 2857 m = m0; 2858 m->m_next = m_next; 2859 if ((m_next->m_flags & M_EXT) == 0) { 2860 m = m_defrag(m, M_NOWAIT); 2861 } else { 2862 tmp = m_next->m_next; 2863 memcpy(m_next, m, MPKTHSIZE); 2864 m = m_next; 2865 m->m_next = tmp; 2866 } 2867 return (m); 2868 } 2869 2870 /* 2871 * If dodgy hardware rejects the scatter gather chain we've handed it 2872 * we'll need to remove the mbuf chain from ifsg_m[] before we can add the 2873 * m_defrag'd mbufs 2874 */ 2875 static __noinline struct mbuf * 2876 iflib_remove_mbuf(iflib_txq_t txq) 2877 { 2878 int ntxd, i, pidx; 2879 struct mbuf *m, *mh, **ifsd_m; 2880 2881 pidx = txq->ift_pidx; 2882 ifsd_m = txq->ift_sds.ifsd_m; 2883 ntxd = txq->ift_size; 2884 mh = m = ifsd_m[pidx]; 2885 ifsd_m[pidx] = NULL; 2886 #if MEMORY_LOGGING 2887 txq->ift_dequeued++; 2888 #endif 2889 i = 1; 2890 2891 while (m) { 2892 ifsd_m[(pidx + i) & (ntxd -1)] = NULL; 2893 #if MEMORY_LOGGING 2894 txq->ift_dequeued++; 2895 #endif 2896 m = m->m_next; 2897 i++; 2898 } 2899 return (mh); 2900 } 2901 2902 static int 2903 iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, 2904 struct mbuf **m0, bus_dma_segment_t *segs, int *nsegs, 2905 int max_segs, int flags) 2906 { 2907 if_ctx_t ctx; 2908 if_shared_ctx_t sctx; 2909 if_softc_ctx_t scctx; 2910 int i, next, pidx, err, ntxd, count; 2911 struct mbuf *m, *tmp, **ifsd_m; 2912 2913 m = *m0; 2914 2915 /* 2916 * Please don't ever do this 2917 */ 2918 if (__predict_false(m->m_len == 0)) 2919 *m0 = m = collapse_pkthdr(m); 2920 2921 ctx = txq->ift_ctx; 2922 sctx = ctx->ifc_sctx; 2923 scctx = &ctx->ifc_softc_ctx; 2924 ifsd_m = txq->ift_sds.ifsd_m; 2925 ntxd = txq->ift_size; 2926 pidx = txq->ift_pidx; 2927 if (map != NULL) { 2928 uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags; 2929 2930 err = bus_dmamap_load_mbuf_sg(tag, map, 2931 *m0, segs, nsegs, BUS_DMA_NOWAIT); 2932 if (err) 2933 return (err); 2934 ifsd_flags[pidx] |= TX_SW_DESC_MAPPED; 2935 count = 0; 2936 m = *m0; 2937 do { 2938 if (__predict_false(m->m_len <= 0)) { 2939 tmp = m; 2940 m = m->m_next; 2941 tmp->m_next = NULL; 2942 m_free(tmp); 2943 continue; 2944 } 2945 m = m->m_next; 2946 count++; 2947 } while (m != NULL); 2948 if (count > *nsegs) { 2949 ifsd_m[pidx] = *m0; 2950 ifsd_m[pidx]->m_flags |= M_TOOBIG; 2951 return (0); 2952 } 2953 m = *m0; 2954 count = 0; 2955 do { 2956 next = (pidx + count) & (ntxd-1); 2957 MPASS(ifsd_m[next] == NULL); 2958 ifsd_m[next] = m; 2959 count++; 2960 tmp = m; 2961 m = m->m_next; 2962 } while (m != NULL); 2963 } else { 2964 int buflen, sgsize, maxsegsz, max_sgsize; 2965 vm_offset_t vaddr; 2966 vm_paddr_t curaddr; 2967 2968 count = i = 0; 2969 m = *m0; 2970 if (m->m_pkthdr.csum_flags & CSUM_TSO) 2971 maxsegsz = scctx->isc_tx_tso_segsize_max; 2972 else 2973 maxsegsz = sctx->isc_tx_maxsegsize; 2974 2975 do { 2976 if (__predict_false(m->m_len <= 0)) { 2977 tmp = m; 2978 m = m->m_next; 2979 tmp->m_next = NULL; 2980 m_free(tmp); 2981 continue; 2982 } 2983 buflen = m->m_len; 2984 vaddr = (vm_offset_t)m->m_data; 2985 /* 2986 * see if we can't be smarter about physically 2987 * contiguous mappings 2988 */ 2989 next = (pidx + count) & (ntxd-1); 2990 MPASS(ifsd_m[next] == NULL); 2991 #if MEMORY_LOGGING 2992 txq->ift_enqueued++; 2993 #endif 2994 ifsd_m[next] = m; 2995 while (buflen > 0) { 2996 if (i >= max_segs) 2997 goto err; 2998 max_sgsize = MIN(buflen, maxsegsz); 2999 curaddr = pmap_kextract(vaddr); 3000 sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); 3001 sgsize = MIN(sgsize, max_sgsize); 3002 segs[i].ds_addr = curaddr; 3003 segs[i].ds_len = sgsize; 3004 vaddr += sgsize; 3005 buflen -= sgsize; 3006 i++; 3007 } 3008 count++; 3009 tmp = m; 3010 m = m->m_next; 3011 } while (m != NULL); 3012 *nsegs = i; 3013 } 3014 return (0); 3015 err: 3016 *m0 = iflib_remove_mbuf(txq); 3017 return (EFBIG); 3018 } 3019 3020 static inline caddr_t 3021 calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) 3022 { 3023 qidx_t size; 3024 int ntxd; 3025 caddr_t start, end, cur, next; 3026 3027 ntxd = txq->ift_size; 3028 size = txq->ift_txd_size[qid]; 3029 start = txq->ift_ifdi[qid].idi_vaddr; 3030 3031 if (__predict_false(size == 0)) 3032 return (start); 3033 cur = start + size*cidx; 3034 end = start + size*ntxd; 3035 next = CACHE_PTR_NEXT(cur); 3036 return (next < end ? next : start); 3037 } 3038 3039 static int 3040 iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) 3041 { 3042 if_ctx_t ctx; 3043 if_shared_ctx_t sctx; 3044 if_softc_ctx_t scctx; 3045 bus_dma_segment_t *segs; 3046 struct mbuf *m_head; 3047 void *next_txd; 3048 bus_dmamap_t map; 3049 struct if_pkt_info pi; 3050 int remap = 0; 3051 int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; 3052 bus_dma_tag_t desc_tag; 3053 3054 segs = txq->ift_segs; 3055 ctx = txq->ift_ctx; 3056 sctx = ctx->ifc_sctx; 3057 scctx = &ctx->ifc_softc_ctx; 3058 segs = txq->ift_segs; 3059 ntxd = txq->ift_size; 3060 m_head = *m_headp; 3061 map = NULL; 3062 3063 /* 3064 * If we're doing TSO the next descriptor to clean may be quite far ahead 3065 */ 3066 cidx = txq->ift_cidx; 3067 pidx = txq->ift_pidx; 3068 if (ctx->ifc_flags & IFC_PREFETCH) { 3069 next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); 3070 if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { 3071 next_txd = calc_next_txd(txq, cidx, 0); 3072 prefetch(next_txd); 3073 } 3074 3075 /* prefetch the next cache line of mbuf pointers and flags */ 3076 prefetch(&txq->ift_sds.ifsd_m[next]); 3077 if (txq->ift_sds.ifsd_map != NULL) { 3078 prefetch(&txq->ift_sds.ifsd_map[next]); 3079 next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); 3080 prefetch(&txq->ift_sds.ifsd_flags[next]); 3081 } 3082 } else if (txq->ift_sds.ifsd_map != NULL) 3083 map = txq->ift_sds.ifsd_map[pidx]; 3084 3085 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3086 desc_tag = txq->ift_tso_desc_tag; 3087 max_segs = scctx->isc_tx_tso_segments_max; 3088 } else { 3089 desc_tag = txq->ift_desc_tag; 3090 max_segs = scctx->isc_tx_nsegments; 3091 } 3092 m_head = *m_headp; 3093 3094 pkt_info_zero(&pi); 3095 pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); 3096 pi.ipi_pidx = pidx; 3097 pi.ipi_qsidx = txq->ift_id; 3098 pi.ipi_len = m_head->m_pkthdr.len; 3099 pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; 3100 pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; 3101 3102 /* deliberate bitwise OR to make one condition */ 3103 if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { 3104 if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) 3105 return (err); 3106 m_head = *m_headp; 3107 } 3108 3109 retry: 3110 err = iflib_busdma_load_mbuf_sg(txq, desc_tag, map, m_headp, segs, &nsegs, max_segs, BUS_DMA_NOWAIT); 3111 defrag: 3112 if (__predict_false(err)) { 3113 switch (err) { 3114 case EFBIG: 3115 /* try collapse once and defrag once */ 3116 if (remap == 0) 3117 m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); 3118 if (remap == 1) 3119 m_head = m_defrag(*m_headp, M_NOWAIT); 3120 remap++; 3121 if (__predict_false(m_head == NULL)) 3122 goto defrag_failed; 3123 txq->ift_mbuf_defrag++; 3124 *m_headp = m_head; 3125 goto retry; 3126 break; 3127 case ENOMEM: 3128 txq->ift_no_tx_dma_setup++; 3129 break; 3130 default: 3131 txq->ift_no_tx_dma_setup++; 3132 m_freem(*m_headp); 3133 DBG_COUNTER_INC(tx_frees); 3134 *m_headp = NULL; 3135 break; 3136 } 3137 txq->ift_map_failed++; 3138 DBG_COUNTER_INC(encap_load_mbuf_fail); 3139 return (err); 3140 } 3141 3142 /* 3143 * XXX assumes a 1 to 1 relationship between segments and 3144 * descriptors - this does not hold true on all drivers, e.g. 3145 * cxgb 3146 */ 3147 if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { 3148 txq->ift_no_desc_avail++; 3149 if (map != NULL) 3150 bus_dmamap_unload(desc_tag, map); 3151 DBG_COUNTER_INC(encap_txq_avail_fail); 3152 if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) 3153 GROUPTASK_ENQUEUE(&txq->ift_task); 3154 return (ENOBUFS); 3155 } 3156 /* 3157 * On Intel cards we can greatly reduce the number of TX interrupts 3158 * we see by only setting report status on every Nth descriptor. 3159 * However, this also means that the driver will need to keep track 3160 * of the descriptors that RS was set on to check them for the DD bit. 3161 */ 3162 txq->ift_rs_pending += nsegs + 1; 3163 if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || 3164 iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs - 1) <= MAX_TX_DESC(ctx)) { 3165 pi.ipi_flags |= IPI_TX_INTR; 3166 txq->ift_rs_pending = 0; 3167 } 3168 3169 pi.ipi_segs = segs; 3170 pi.ipi_nsegs = nsegs; 3171 3172 MPASS(pidx >= 0 && pidx < txq->ift_size); 3173 #ifdef PKT_DEBUG 3174 print_pkt(&pi); 3175 #endif 3176 if (map != NULL) 3177 bus_dmamap_sync(desc_tag, map, BUS_DMASYNC_PREWRITE); 3178 if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { 3179 if (map != NULL) 3180 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 3181 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3182 DBG_COUNTER_INC(tx_encap); 3183 MPASS(pi.ipi_new_pidx < txq->ift_size); 3184 3185 ndesc = pi.ipi_new_pidx - pi.ipi_pidx; 3186 if (pi.ipi_new_pidx < pi.ipi_pidx) { 3187 ndesc += txq->ift_size; 3188 txq->ift_gen = 1; 3189 } 3190 /* 3191 * drivers can need as many as 3192 * two sentinels 3193 */ 3194 MPASS(ndesc <= pi.ipi_nsegs + 2); 3195 MPASS(pi.ipi_new_pidx != pidx); 3196 MPASS(ndesc > 0); 3197 txq->ift_in_use += ndesc; 3198 3199 /* 3200 * We update the last software descriptor again here because there may 3201 * be a sentinel and/or there may be more mbufs than segments 3202 */ 3203 txq->ift_pidx = pi.ipi_new_pidx; 3204 txq->ift_npending += pi.ipi_ndescs; 3205 } else if (__predict_false(err == EFBIG && remap < 2)) { 3206 *m_headp = m_head = iflib_remove_mbuf(txq); 3207 remap = 1; 3208 txq->ift_txd_encap_efbig++; 3209 goto defrag; 3210 } else 3211 DBG_COUNTER_INC(encap_txd_encap_fail); 3212 return (err); 3213 3214 defrag_failed: 3215 txq->ift_mbuf_defrag_failed++; 3216 txq->ift_map_failed++; 3217 m_freem(*m_headp); 3218 DBG_COUNTER_INC(tx_frees); 3219 *m_headp = NULL; 3220 return (ENOMEM); 3221 } 3222 3223 static void 3224 iflib_tx_desc_free(iflib_txq_t txq, int n) 3225 { 3226 int hasmap; 3227 uint32_t qsize, cidx, mask, gen; 3228 struct mbuf *m, **ifsd_m; 3229 uint8_t *ifsd_flags; 3230 bus_dmamap_t *ifsd_map; 3231 bool do_prefetch; 3232 3233 cidx = txq->ift_cidx; 3234 gen = txq->ift_gen; 3235 qsize = txq->ift_size; 3236 mask = qsize-1; 3237 hasmap = txq->ift_sds.ifsd_map != NULL; 3238 ifsd_flags = txq->ift_sds.ifsd_flags; 3239 ifsd_m = txq->ift_sds.ifsd_m; 3240 ifsd_map = txq->ift_sds.ifsd_map; 3241 do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); 3242 3243 while (n--) { 3244 if (do_prefetch) { 3245 prefetch(ifsd_m[(cidx + 3) & mask]); 3246 prefetch(ifsd_m[(cidx + 4) & mask]); 3247 } 3248 if (ifsd_m[cidx] != NULL) { 3249 prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); 3250 prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]); 3251 if (hasmap && (ifsd_flags[cidx] & TX_SW_DESC_MAPPED)) { 3252 /* 3253 * does it matter if it's not the TSO tag? If so we'll 3254 * have to add the type to flags 3255 */ 3256 bus_dmamap_unload(txq->ift_desc_tag, ifsd_map[cidx]); 3257 ifsd_flags[cidx] &= ~TX_SW_DESC_MAPPED; 3258 } 3259 if ((m = ifsd_m[cidx]) != NULL) { 3260 /* XXX we don't support any drivers that batch packets yet */ 3261 MPASS(m->m_nextpkt == NULL); 3262 /* if the number of clusters exceeds the number of segments 3263 * there won't be space on the ring to save a pointer to each 3264 * cluster so we simply free the list here 3265 */ 3266 if (m->m_flags & M_TOOBIG) { 3267 m_freem(m); 3268 } else { 3269 m_free(m); 3270 } 3271 ifsd_m[cidx] = NULL; 3272 #if MEMORY_LOGGING 3273 txq->ift_dequeued++; 3274 #endif 3275 DBG_COUNTER_INC(tx_frees); 3276 } 3277 } 3278 if (__predict_false(++cidx == qsize)) { 3279 cidx = 0; 3280 gen = 0; 3281 } 3282 } 3283 txq->ift_cidx = cidx; 3284 txq->ift_gen = gen; 3285 } 3286 3287 static __inline int 3288 iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) 3289 { 3290 int reclaim; 3291 if_ctx_t ctx = txq->ift_ctx; 3292 3293 KASSERT(thresh >= 0, ("invalid threshold to reclaim")); 3294 MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); 3295 3296 /* 3297 * Need a rate-limiting check so that this isn't called every time 3298 */ 3299 iflib_tx_credits_update(ctx, txq); 3300 reclaim = DESC_RECLAIMABLE(txq); 3301 3302 if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { 3303 #ifdef INVARIANTS 3304 if (iflib_verbose_debug) { 3305 printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, 3306 txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, 3307 reclaim, thresh); 3308 3309 } 3310 #endif 3311 return (0); 3312 } 3313 iflib_tx_desc_free(txq, reclaim); 3314 txq->ift_cleaned += reclaim; 3315 txq->ift_in_use -= reclaim; 3316 3317 return (reclaim); 3318 } 3319 3320 static struct mbuf ** 3321 _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) 3322 { 3323 int next, size; 3324 struct mbuf **items; 3325 3326 size = r->size; 3327 next = (cidx + CACHE_PTR_INCREMENT) & (size-1); 3328 items = __DEVOLATILE(struct mbuf **, &r->items[0]); 3329 3330 prefetch(items[(cidx + offset) & (size-1)]); 3331 if (remaining > 1) { 3332 prefetch2cachelines(&items[next]); 3333 prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]); 3334 prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]); 3335 prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]); 3336 } 3337 return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); 3338 } 3339 3340 static void 3341 iflib_txq_check_drain(iflib_txq_t txq, int budget) 3342 { 3343 3344 ifmp_ring_check_drainage(txq->ift_br, budget); 3345 } 3346 3347 static uint32_t 3348 iflib_txq_can_drain(struct ifmp_ring *r) 3349 { 3350 iflib_txq_t txq = r->cookie; 3351 if_ctx_t ctx = txq->ift_ctx; 3352 3353 return ((TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) || 3354 ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)); 3355 } 3356 3357 static uint32_t 3358 iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) 3359 { 3360 iflib_txq_t txq = r->cookie; 3361 if_ctx_t ctx = txq->ift_ctx; 3362 struct ifnet *ifp = ctx->ifc_ifp; 3363 struct mbuf **mp, *m; 3364 int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail; 3365 int reclaimed, err, in_use_prev, desc_used; 3366 bool do_prefetch, ring, rang; 3367 3368 if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || 3369 !LINK_ACTIVE(ctx))) { 3370 DBG_COUNTER_INC(txq_drain_notready); 3371 return (0); 3372 } 3373 reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); 3374 rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use); 3375 avail = IDXDIFF(pidx, cidx, r->size); 3376 if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { 3377 DBG_COUNTER_INC(txq_drain_flushing); 3378 for (i = 0; i < avail; i++) { 3379 m_free(r->items[(cidx + i) & (r->size-1)]); 3380 r->items[(cidx + i) & (r->size-1)] = NULL; 3381 } 3382 return (avail); 3383 } 3384 3385 if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { 3386 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3387 CALLOUT_LOCK(txq); 3388 callout_stop(&txq->ift_timer); 3389 CALLOUT_UNLOCK(txq); 3390 DBG_COUNTER_INC(txq_drain_oactive); 3391 return (0); 3392 } 3393 if (reclaimed) 3394 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3395 consumed = mcast_sent = bytes_sent = pkt_sent = 0; 3396 count = MIN(avail, TX_BATCH_SIZE); 3397 #ifdef INVARIANTS 3398 if (iflib_verbose_debug) 3399 printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, 3400 avail, ctx->ifc_flags, TXQ_AVAIL(txq)); 3401 #endif 3402 do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); 3403 avail = TXQ_AVAIL(txq); 3404 for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) { 3405 int pidx_prev, rem = do_prefetch ? count - i : 0; 3406 3407 mp = _ring_peek_one(r, cidx, i, rem); 3408 MPASS(mp != NULL && *mp != NULL); 3409 if (__predict_false(*mp == (struct mbuf *)txq)) { 3410 consumed++; 3411 reclaimed++; 3412 continue; 3413 } 3414 in_use_prev = txq->ift_in_use; 3415 pidx_prev = txq->ift_pidx; 3416 err = iflib_encap(txq, mp); 3417 if (__predict_false(err)) { 3418 DBG_COUNTER_INC(txq_drain_encapfail); 3419 /* no room - bail out */ 3420 if (err == ENOBUFS) 3421 break; 3422 consumed++; 3423 DBG_COUNTER_INC(txq_drain_encapfail); 3424 /* we can't send this packet - skip it */ 3425 continue; 3426 } 3427 consumed++; 3428 pkt_sent++; 3429 m = *mp; 3430 DBG_COUNTER_INC(tx_sent); 3431 bytes_sent += m->m_pkthdr.len; 3432 mcast_sent += !!(m->m_flags & M_MCAST); 3433 avail = TXQ_AVAIL(txq); 3434 3435 txq->ift_db_pending += (txq->ift_in_use - in_use_prev); 3436 desc_used += (txq->ift_in_use - in_use_prev); 3437 ETHER_BPF_MTAP(ifp, m); 3438 if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) 3439 break; 3440 rang = iflib_txd_db_check(ctx, txq, false, in_use_prev); 3441 } 3442 3443 /* deliberate use of bitwise or to avoid gratuitous short-circuit */ 3444 ring = rang ? false : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)); 3445 iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use); 3446 if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); 3447 if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); 3448 if (mcast_sent) 3449 if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); 3450 #ifdef INVARIANTS 3451 if (iflib_verbose_debug) 3452 printf("consumed=%d\n", consumed); 3453 #endif 3454 return (consumed); 3455 } 3456 3457 static uint32_t 3458 iflib_txq_drain_always(struct ifmp_ring *r) 3459 { 3460 return (1); 3461 } 3462 3463 static uint32_t 3464 iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) 3465 { 3466 int i, avail; 3467 struct mbuf **mp; 3468 iflib_txq_t txq; 3469 3470 txq = r->cookie; 3471 3472 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3473 CALLOUT_LOCK(txq); 3474 callout_stop(&txq->ift_timer); 3475 CALLOUT_UNLOCK(txq); 3476 3477 avail = IDXDIFF(pidx, cidx, r->size); 3478 for (i = 0; i < avail; i++) { 3479 mp = _ring_peek_one(r, cidx, i, avail - i); 3480 if (__predict_false(*mp == (struct mbuf *)txq)) 3481 continue; 3482 m_freem(*mp); 3483 } 3484 MPASS(ifmp_ring_is_stalled(r) == 0); 3485 return (avail); 3486 } 3487 3488 static void 3489 iflib_ifmp_purge(iflib_txq_t txq) 3490 { 3491 struct ifmp_ring *r; 3492 3493 r = txq->ift_br; 3494 r->drain = iflib_txq_drain_free; 3495 r->can_drain = iflib_txq_drain_always; 3496 3497 ifmp_ring_check_drainage(r, r->size); 3498 3499 r->drain = iflib_txq_drain; 3500 r->can_drain = iflib_txq_can_drain; 3501 } 3502 3503 static void 3504 _task_fn_tx(void *context) 3505 { 3506 iflib_txq_t txq = context; 3507 if_ctx_t ctx = txq->ift_ctx; 3508 struct ifnet *ifp = ctx->ifc_ifp; 3509 int rc; 3510 3511 #ifdef IFLIB_DIAGNOSTICS 3512 txq->ift_cpu_exec_count[curcpu]++; 3513 #endif 3514 if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) 3515 return; 3516 if (if_getcapenable(ifp) & IFCAP_NETMAP) { 3517 if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) 3518 netmap_tx_irq(ifp, txq->ift_id); 3519 IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); 3520 return; 3521 } 3522 if (txq->ift_db_pending) 3523 ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE); 3524 ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); 3525 if (ctx->ifc_flags & IFC_LEGACY) 3526 IFDI_INTR_ENABLE(ctx); 3527 else { 3528 rc = IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); 3529 KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); 3530 } 3531 } 3532 3533 static void 3534 _task_fn_rx(void *context) 3535 { 3536 iflib_rxq_t rxq = context; 3537 if_ctx_t ctx = rxq->ifr_ctx; 3538 bool more; 3539 int rc; 3540 uint16_t budget; 3541 3542 #ifdef IFLIB_DIAGNOSTICS 3543 rxq->ifr_cpu_exec_count[curcpu]++; 3544 #endif 3545 DBG_COUNTER_INC(task_fn_rxs); 3546 if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) 3547 return; 3548 more = true; 3549 #ifdef DEV_NETMAP 3550 if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) { 3551 u_int work = 0; 3552 if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) { 3553 more = false; 3554 } 3555 } 3556 #endif 3557 budget = ctx->ifc_sysctl_rx_budget; 3558 if (budget == 0) 3559 budget = 16; /* XXX */ 3560 if (more == false || (more = iflib_rxeof(rxq, budget)) == false) { 3561 if (ctx->ifc_flags & IFC_LEGACY) 3562 IFDI_INTR_ENABLE(ctx); 3563 else { 3564 DBG_COUNTER_INC(rx_intr_enables); 3565 rc = IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); 3566 KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); 3567 } 3568 } 3569 if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) 3570 return; 3571 if (more) 3572 GROUPTASK_ENQUEUE(&rxq->ifr_task); 3573 } 3574 3575 static void 3576 _task_fn_admin(void *context) 3577 { 3578 if_ctx_t ctx = context; 3579 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 3580 iflib_txq_t txq; 3581 int i; 3582 3583 if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) { 3584 if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { 3585 return; 3586 } 3587 } 3588 3589 CTX_LOCK(ctx); 3590 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { 3591 CALLOUT_LOCK(txq); 3592 callout_stop(&txq->ift_timer); 3593 CALLOUT_UNLOCK(txq); 3594 } 3595 IFDI_UPDATE_ADMIN_STATUS(ctx); 3596 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) 3597 callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); 3598 IFDI_LINK_INTR_ENABLE(ctx); 3599 if (ctx->ifc_flags & IFC_DO_RESET) { 3600 ctx->ifc_flags &= ~IFC_DO_RESET; 3601 iflib_if_init_locked(ctx); 3602 } 3603 CTX_UNLOCK(ctx); 3604 3605 if (LINK_ACTIVE(ctx) == 0) 3606 return; 3607 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) 3608 iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); 3609 } 3610 3611 3612 static void 3613 _task_fn_iov(void *context) 3614 { 3615 if_ctx_t ctx = context; 3616 3617 if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) 3618 return; 3619 3620 CTX_LOCK(ctx); 3621 IFDI_VFLR_HANDLE(ctx); 3622 CTX_UNLOCK(ctx); 3623 } 3624 3625 static int 3626 iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) 3627 { 3628 int err; 3629 if_int_delay_info_t info; 3630 if_ctx_t ctx; 3631 3632 info = (if_int_delay_info_t)arg1; 3633 ctx = info->iidi_ctx; 3634 info->iidi_req = req; 3635 info->iidi_oidp = oidp; 3636 CTX_LOCK(ctx); 3637 err = IFDI_SYSCTL_INT_DELAY(ctx, info); 3638 CTX_UNLOCK(ctx); 3639 return (err); 3640 } 3641 3642 /********************************************************************* 3643 * 3644 * IFNET FUNCTIONS 3645 * 3646 **********************************************************************/ 3647 3648 static void 3649 iflib_if_init_locked(if_ctx_t ctx) 3650 { 3651 iflib_stop(ctx); 3652 iflib_init_locked(ctx); 3653 } 3654 3655 3656 static void 3657 iflib_if_init(void *arg) 3658 { 3659 if_ctx_t ctx = arg; 3660 3661 CTX_LOCK(ctx); 3662 iflib_if_init_locked(ctx); 3663 CTX_UNLOCK(ctx); 3664 } 3665 3666 static int 3667 iflib_if_transmit(if_t ifp, struct mbuf *m) 3668 { 3669 if_ctx_t ctx = if_getsoftc(ifp); 3670 3671 iflib_txq_t txq; 3672 int err, qidx; 3673 3674 if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { 3675 DBG_COUNTER_INC(tx_frees); 3676 m_freem(m); 3677 return (ENOBUFS); 3678 } 3679 3680 MPASS(m->m_nextpkt == NULL); 3681 qidx = 0; 3682 if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) 3683 qidx = QIDX(ctx, m); 3684 /* 3685 * XXX calculate buf_ring based on flowid (divvy up bits?) 3686 */ 3687 txq = &ctx->ifc_txqs[qidx]; 3688 3689 #ifdef DRIVER_BACKPRESSURE 3690 if (txq->ift_closed) { 3691 while (m != NULL) { 3692 next = m->m_nextpkt; 3693 m->m_nextpkt = NULL; 3694 m_freem(m); 3695 m = next; 3696 } 3697 return (ENOBUFS); 3698 } 3699 #endif 3700 #ifdef notyet 3701 qidx = count = 0; 3702 mp = marr; 3703 next = m; 3704 do { 3705 count++; 3706 next = next->m_nextpkt; 3707 } while (next != NULL); 3708 3709 if (count > nitems(marr)) 3710 if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { 3711 /* XXX check nextpkt */ 3712 m_freem(m); 3713 /* XXX simplify for now */ 3714 DBG_COUNTER_INC(tx_frees); 3715 return (ENOBUFS); 3716 } 3717 for (next = m, i = 0; next != NULL; i++) { 3718 mp[i] = next; 3719 next = next->m_nextpkt; 3720 mp[i]->m_nextpkt = NULL; 3721 } 3722 #endif 3723 DBG_COUNTER_INC(tx_seen); 3724 err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE); 3725 3726 GROUPTASK_ENQUEUE(&txq->ift_task); 3727 if (err) { 3728 /* support forthcoming later */ 3729 #ifdef DRIVER_BACKPRESSURE 3730 txq->ift_closed = TRUE; 3731 #endif 3732 ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); 3733 m_freem(m); 3734 } 3735 3736 return (err); 3737 } 3738 3739 static void 3740 iflib_if_qflush(if_t ifp) 3741 { 3742 if_ctx_t ctx = if_getsoftc(ifp); 3743 iflib_txq_t txq = ctx->ifc_txqs; 3744 int i; 3745 3746 CTX_LOCK(ctx); 3747 ctx->ifc_flags |= IFC_QFLUSH; 3748 CTX_UNLOCK(ctx); 3749 for (i = 0; i < NTXQSETS(ctx); i++, txq++) 3750 while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) 3751 iflib_txq_check_drain(txq, 0); 3752 CTX_LOCK(ctx); 3753 ctx->ifc_flags &= ~IFC_QFLUSH; 3754 CTX_UNLOCK(ctx); 3755 3756 if_qflush(ifp); 3757 } 3758 3759 3760 #define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ 3761 IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \ 3762 IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO) 3763 3764 static int 3765 iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) 3766 { 3767 if_ctx_t ctx = if_getsoftc(ifp); 3768 struct ifreq *ifr = (struct ifreq *)data; 3769 #if defined(INET) || defined(INET6) 3770 struct ifaddr *ifa = (struct ifaddr *)data; 3771 #endif 3772 bool avoid_reset = FALSE; 3773 int err = 0, reinit = 0, bits; 3774 3775 switch (command) { 3776 case SIOCSIFADDR: 3777 #ifdef INET 3778 if (ifa->ifa_addr->sa_family == AF_INET) 3779 avoid_reset = TRUE; 3780 #endif 3781 #ifdef INET6 3782 if (ifa->ifa_addr->sa_family == AF_INET6) 3783 avoid_reset = TRUE; 3784 #endif 3785 /* 3786 ** Calling init results in link renegotiation, 3787 ** so we avoid doing it when possible. 3788 */ 3789 if (avoid_reset) { 3790 if_setflagbits(ifp, IFF_UP,0); 3791 if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) 3792 reinit = 1; 3793 #ifdef INET 3794 if (!(if_getflags(ifp) & IFF_NOARP)) 3795 arp_ifinit(ifp, ifa); 3796 #endif 3797 } else 3798 err = ether_ioctl(ifp, command, data); 3799 break; 3800 case SIOCSIFMTU: 3801 CTX_LOCK(ctx); 3802 if (ifr->ifr_mtu == if_getmtu(ifp)) { 3803 CTX_UNLOCK(ctx); 3804 break; 3805 } 3806 bits = if_getdrvflags(ifp); 3807 /* stop the driver and free any clusters before proceeding */ 3808 iflib_stop(ctx); 3809 3810 if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { 3811 if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) 3812 ctx->ifc_flags |= IFC_MULTISEG; 3813 else 3814 ctx->ifc_flags &= ~IFC_MULTISEG; 3815 err = if_setmtu(ifp, ifr->ifr_mtu); 3816 } 3817 iflib_init_locked(ctx); 3818 if_setdrvflags(ifp, bits); 3819 CTX_UNLOCK(ctx); 3820 break; 3821 case SIOCSIFFLAGS: 3822 CTX_LOCK(ctx); 3823 if (if_getflags(ifp) & IFF_UP) { 3824 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 3825 if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & 3826 (IFF_PROMISC | IFF_ALLMULTI)) { 3827 err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); 3828 } 3829 } else 3830 reinit = 1; 3831 } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 3832 iflib_stop(ctx); 3833 } 3834 ctx->ifc_if_flags = if_getflags(ifp); 3835 CTX_UNLOCK(ctx); 3836 break; 3837 case SIOCADDMULTI: 3838 case SIOCDELMULTI: 3839 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 3840 CTX_LOCK(ctx); 3841 IFDI_INTR_DISABLE(ctx); 3842 IFDI_MULTI_SET(ctx); 3843 IFDI_INTR_ENABLE(ctx); 3844 CTX_UNLOCK(ctx); 3845 } 3846 break; 3847 case SIOCSIFMEDIA: 3848 CTX_LOCK(ctx); 3849 IFDI_MEDIA_SET(ctx); 3850 CTX_UNLOCK(ctx); 3851 /* falls thru */ 3852 case SIOCGIFMEDIA: 3853 err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command); 3854 break; 3855 case SIOCGI2C: 3856 { 3857 struct ifi2creq i2c; 3858 3859 err = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); 3860 if (err != 0) 3861 break; 3862 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { 3863 err = EINVAL; 3864 break; 3865 } 3866 if (i2c.len > sizeof(i2c.data)) { 3867 err = EINVAL; 3868 break; 3869 } 3870 3871 if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) 3872 err = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); 3873 break; 3874 } 3875 case SIOCSIFCAP: 3876 { 3877 int mask, setmask; 3878 3879 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); 3880 setmask = 0; 3881 #ifdef TCP_OFFLOAD 3882 setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); 3883 #endif 3884 setmask |= (mask & IFCAP_FLAGS); 3885 3886 if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) 3887 setmask |= (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); 3888 if ((mask & IFCAP_WOL) && 3889 (if_getcapabilities(ifp) & IFCAP_WOL) != 0) 3890 setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC)); 3891 if_vlancap(ifp); 3892 /* 3893 * want to ensure that traffic has stopped before we change any of the flags 3894 */ 3895 if (setmask) { 3896 CTX_LOCK(ctx); 3897 bits = if_getdrvflags(ifp); 3898 if (bits & IFF_DRV_RUNNING) 3899 iflib_stop(ctx); 3900 if_togglecapenable(ifp, setmask); 3901 if (bits & IFF_DRV_RUNNING) 3902 iflib_init_locked(ctx); 3903 if_setdrvflags(ifp, bits); 3904 CTX_UNLOCK(ctx); 3905 } 3906 break; 3907 } 3908 case SIOCGPRIVATE_0: 3909 case SIOCSDRVSPEC: 3910 case SIOCGDRVSPEC: 3911 CTX_LOCK(ctx); 3912 err = IFDI_PRIV_IOCTL(ctx, command, data); 3913 CTX_UNLOCK(ctx); 3914 break; 3915 default: 3916 err = ether_ioctl(ifp, command, data); 3917 break; 3918 } 3919 if (reinit) 3920 iflib_if_init(ctx); 3921 return (err); 3922 } 3923 3924 static uint64_t 3925 iflib_if_get_counter(if_t ifp, ift_counter cnt) 3926 { 3927 if_ctx_t ctx = if_getsoftc(ifp); 3928 3929 return (IFDI_GET_COUNTER(ctx, cnt)); 3930 } 3931 3932 /********************************************************************* 3933 * 3934 * OTHER FUNCTIONS EXPORTED TO THE STACK 3935 * 3936 **********************************************************************/ 3937 3938 static void 3939 iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) 3940 { 3941 if_ctx_t ctx = if_getsoftc(ifp); 3942 3943 if ((void *)ctx != arg) 3944 return; 3945 3946 if ((vtag == 0) || (vtag > 4095)) 3947 return; 3948 3949 CTX_LOCK(ctx); 3950 IFDI_VLAN_REGISTER(ctx, vtag); 3951 /* Re-init to load the changes */ 3952 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 3953 iflib_if_init_locked(ctx); 3954 CTX_UNLOCK(ctx); 3955 } 3956 3957 static void 3958 iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) 3959 { 3960 if_ctx_t ctx = if_getsoftc(ifp); 3961 3962 if ((void *)ctx != arg) 3963 return; 3964 3965 if ((vtag == 0) || (vtag > 4095)) 3966 return; 3967 3968 CTX_LOCK(ctx); 3969 IFDI_VLAN_UNREGISTER(ctx, vtag); 3970 /* Re-init to load the changes */ 3971 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 3972 iflib_if_init_locked(ctx); 3973 CTX_UNLOCK(ctx); 3974 } 3975 3976 static void 3977 iflib_led_func(void *arg, int onoff) 3978 { 3979 if_ctx_t ctx = arg; 3980 3981 CTX_LOCK(ctx); 3982 IFDI_LED_FUNC(ctx, onoff); 3983 CTX_UNLOCK(ctx); 3984 } 3985 3986 /********************************************************************* 3987 * 3988 * BUS FUNCTION DEFINITIONS 3989 * 3990 **********************************************************************/ 3991 3992 int 3993 iflib_device_probe(device_t dev) 3994 { 3995 pci_vendor_info_t *ent; 3996 3997 uint16_t pci_vendor_id, pci_device_id; 3998 uint16_t pci_subvendor_id, pci_subdevice_id; 3999 uint16_t pci_rev_id; 4000 if_shared_ctx_t sctx; 4001 4002 if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) 4003 return (ENOTSUP); 4004 4005 pci_vendor_id = pci_get_vendor(dev); 4006 pci_device_id = pci_get_device(dev); 4007 pci_subvendor_id = pci_get_subvendor(dev); 4008 pci_subdevice_id = pci_get_subdevice(dev); 4009 pci_rev_id = pci_get_revid(dev); 4010 if (sctx->isc_parse_devinfo != NULL) 4011 sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); 4012 4013 ent = sctx->isc_vendor_info; 4014 while (ent->pvi_vendor_id != 0) { 4015 if (pci_vendor_id != ent->pvi_vendor_id) { 4016 ent++; 4017 continue; 4018 } 4019 if ((pci_device_id == ent->pvi_device_id) && 4020 ((pci_subvendor_id == ent->pvi_subvendor_id) || 4021 (ent->pvi_subvendor_id == 0)) && 4022 ((pci_subdevice_id == ent->pvi_subdevice_id) || 4023 (ent->pvi_subdevice_id == 0)) && 4024 ((pci_rev_id == ent->pvi_rev_id) || 4025 (ent->pvi_rev_id == 0))) { 4026 4027 device_set_desc_copy(dev, ent->pvi_name); 4028 /* this needs to be changed to zero if the bus probing code 4029 * ever stops re-probing on best match because the sctx 4030 * may have its values over written by register calls 4031 * in subsequent probes 4032 */ 4033 return (BUS_PROBE_DEFAULT); 4034 } 4035 ent++; 4036 } 4037 return (ENXIO); 4038 } 4039 4040 int 4041 iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) 4042 { 4043 int err, rid, msix, msix_bar; 4044 if_ctx_t ctx; 4045 if_t ifp; 4046 if_softc_ctx_t scctx; 4047 int i; 4048 uint16_t main_txq; 4049 uint16_t main_rxq; 4050 4051 4052 ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); 4053 4054 if (sc == NULL) { 4055 sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); 4056 device_set_softc(dev, ctx); 4057 ctx->ifc_flags |= IFC_SC_ALLOCATED; 4058 } 4059 4060 ctx->ifc_sctx = sctx; 4061 ctx->ifc_dev = dev; 4062 ctx->ifc_softc = sc; 4063 4064 if ((err = iflib_register(ctx)) != 0) { 4065 device_printf(dev, "iflib_register failed %d\n", err); 4066 return (err); 4067 } 4068 iflib_add_device_sysctl_pre(ctx); 4069 4070 scctx = &ctx->ifc_softc_ctx; 4071 ifp = ctx->ifc_ifp; 4072 4073 /* 4074 * XXX sanity check that ntxd & nrxd are a power of 2 4075 */ 4076 if (ctx->ifc_sysctl_ntxqs != 0) 4077 scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; 4078 if (ctx->ifc_sysctl_nrxqs != 0) 4079 scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; 4080 4081 for (i = 0; i < sctx->isc_ntxqs; i++) { 4082 if (ctx->ifc_sysctl_ntxds[i] != 0) 4083 scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; 4084 else 4085 scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; 4086 } 4087 4088 for (i = 0; i < sctx->isc_nrxqs; i++) { 4089 if (ctx->ifc_sysctl_nrxds[i] != 0) 4090 scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; 4091 else 4092 scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; 4093 } 4094 4095 for (i = 0; i < sctx->isc_nrxqs; i++) { 4096 if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { 4097 device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", 4098 i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); 4099 scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; 4100 } 4101 if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { 4102 device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", 4103 i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); 4104 scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; 4105 } 4106 } 4107 4108 for (i = 0; i < sctx->isc_ntxqs; i++) { 4109 if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { 4110 device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", 4111 i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); 4112 scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; 4113 } 4114 if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { 4115 device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", 4116 i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); 4117 scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; 4118 } 4119 } 4120 4121 if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { 4122 device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); 4123 return (err); 4124 } 4125 _iflib_pre_assert(scctx); 4126 ctx->ifc_txrx = *scctx->isc_txrx; 4127 4128 #ifdef INVARIANTS 4129 MPASS(scctx->isc_capenable); 4130 if (scctx->isc_capenable & IFCAP_TXCSUM) 4131 MPASS(scctx->isc_tx_csum_flags); 4132 #endif 4133 4134 if_setcapabilities(ifp, scctx->isc_capenable | IFCAP_HWSTATS); 4135 if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS); 4136 4137 if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) 4138 scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; 4139 if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) 4140 scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; 4141 4142 #ifdef ACPI_DMAR 4143 if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL) 4144 ctx->ifc_flags |= IFC_DMAR; 4145 #elif !(defined(__i386__) || defined(__amd64__)) 4146 /* set unconditionally for !x86 */ 4147 ctx->ifc_flags |= IFC_DMAR; 4148 #endif 4149 4150 msix_bar = scctx->isc_msix_bar; 4151 main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; 4152 main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; 4153 4154 /* XXX change for per-queue sizes */ 4155 device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", 4156 scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); 4157 for (i = 0; i < sctx->isc_nrxqs; i++) { 4158 if (!powerof2(scctx->isc_nrxd[i])) { 4159 /* round down instead? */ 4160 device_printf(dev, "# rx descriptors must be a power of 2\n"); 4161 err = EINVAL; 4162 goto fail; 4163 } 4164 } 4165 for (i = 0; i < sctx->isc_ntxqs; i++) { 4166 if (!powerof2(scctx->isc_ntxd[i])) { 4167 device_printf(dev, 4168 "# tx descriptors must be a power of 2"); 4169 err = EINVAL; 4170 goto fail; 4171 } 4172 } 4173 4174 if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / 4175 MAX_SINGLE_PACKET_FRACTION) 4176 scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / 4177 MAX_SINGLE_PACKET_FRACTION); 4178 if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / 4179 MAX_SINGLE_PACKET_FRACTION) 4180 scctx->isc_tx_tso_segments_max = max(1, 4181 scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); 4182 4183 /* 4184 * Protect the stack against modern hardware 4185 */ 4186 if (scctx->isc_tx_tso_size_max > FREEBSD_TSO_SIZE_MAX) 4187 scctx->isc_tx_tso_size_max = FREEBSD_TSO_SIZE_MAX; 4188 4189 /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ 4190 ifp->if_hw_tsomaxsegcount = scctx->isc_tx_tso_segments_max; 4191 ifp->if_hw_tsomax = scctx->isc_tx_tso_size_max; 4192 ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max; 4193 if (scctx->isc_rss_table_size == 0) 4194 scctx->isc_rss_table_size = 64; 4195 scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; 4196 4197 GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); 4198 /* XXX format name */ 4199 taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); 4200 /* 4201 ** Now setup MSI or MSI/X, should 4202 ** return us the number of supported 4203 ** vectors. (Will be 1 for MSI) 4204 */ 4205 if (sctx->isc_flags & IFLIB_SKIP_MSIX) { 4206 msix = scctx->isc_vectors; 4207 } else if (scctx->isc_msix_bar != 0) 4208 /* 4209 * The simple fact that isc_msix_bar is not 0 does not mean we 4210 * we have a good value there that is known to work. 4211 */ 4212 msix = iflib_msix_init(ctx); 4213 else { 4214 scctx->isc_vectors = 1; 4215 scctx->isc_ntxqsets = 1; 4216 scctx->isc_nrxqsets = 1; 4217 scctx->isc_intr = IFLIB_INTR_LEGACY; 4218 msix = 0; 4219 } 4220 /* Get memory for the station queues */ 4221 if ((err = iflib_queues_alloc(ctx))) { 4222 device_printf(dev, "Unable to allocate queue memory\n"); 4223 goto fail; 4224 } 4225 4226 if ((err = iflib_qset_structures_setup(ctx))) { 4227 device_printf(dev, "qset structure setup failed %d\n", err); 4228 goto fail_queues; 4229 } 4230 4231 /* 4232 * Group taskqueues aren't properly set up until SMP is started, 4233 * so we disable interrupts until we can handle them post 4234 * SI_SUB_SMP. 4235 * 4236 * XXX: disabling interrupts doesn't actually work, at least for 4237 * the non-MSI case. When they occur before SI_SUB_SMP completes, 4238 * we do null handling and depend on this not causing too large an 4239 * interrupt storm. 4240 */ 4241 IFDI_INTR_DISABLE(ctx); 4242 if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) { 4243 device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err); 4244 goto fail_intr_free; 4245 } 4246 if (msix <= 1) { 4247 rid = 0; 4248 if (scctx->isc_intr == IFLIB_INTR_MSI) { 4249 MPASS(msix == 1); 4250 rid = 1; 4251 } 4252 if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { 4253 device_printf(dev, "iflib_legacy_setup failed %d\n", err); 4254 goto fail_intr_free; 4255 } 4256 } 4257 ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac); 4258 if ((err = IFDI_ATTACH_POST(ctx)) != 0) { 4259 device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); 4260 goto fail_detach; 4261 } 4262 if ((err = iflib_netmap_attach(ctx))) { 4263 device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); 4264 goto fail_detach; 4265 } 4266 *ctxp = ctx; 4267 4268 if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); 4269 iflib_add_device_sysctl_post(ctx); 4270 ctx->ifc_flags |= IFC_INIT_DONE; 4271 return (0); 4272 fail_detach: 4273 ether_ifdetach(ctx->ifc_ifp); 4274 fail_intr_free: 4275 if (scctx->isc_intr == IFLIB_INTR_MSIX || scctx->isc_intr == IFLIB_INTR_MSI) 4276 pci_release_msi(ctx->ifc_dev); 4277 fail_queues: 4278 /* XXX free queues */ 4279 fail: 4280 IFDI_DETACH(ctx); 4281 return (err); 4282 } 4283 4284 int 4285 iflib_device_attach(device_t dev) 4286 { 4287 if_ctx_t ctx; 4288 if_shared_ctx_t sctx; 4289 4290 if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) 4291 return (ENOTSUP); 4292 4293 pci_enable_busmaster(dev); 4294 4295 return (iflib_device_register(dev, NULL, sctx, &ctx)); 4296 } 4297 4298 int 4299 iflib_device_deregister(if_ctx_t ctx) 4300 { 4301 if_t ifp = ctx->ifc_ifp; 4302 iflib_txq_t txq; 4303 iflib_rxq_t rxq; 4304 device_t dev = ctx->ifc_dev; 4305 int i, j; 4306 struct taskqgroup *tqg; 4307 iflib_fl_t fl; 4308 4309 /* Make sure VLANS are not using driver */ 4310 if (if_vlantrunkinuse(ifp)) { 4311 device_printf(dev,"Vlan in use, detach first\n"); 4312 return (EBUSY); 4313 } 4314 4315 CTX_LOCK(ctx); 4316 ctx->ifc_in_detach = 1; 4317 iflib_stop(ctx); 4318 CTX_UNLOCK(ctx); 4319 4320 /* Unregister VLAN events */ 4321 if (ctx->ifc_vlan_attach_event != NULL) 4322 EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); 4323 if (ctx->ifc_vlan_detach_event != NULL) 4324 EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); 4325 4326 iflib_netmap_detach(ifp); 4327 ether_ifdetach(ifp); 4328 /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ 4329 CTX_LOCK_DESTROY(ctx); 4330 if (ctx->ifc_led_dev != NULL) 4331 led_destroy(ctx->ifc_led_dev); 4332 /* XXX drain any dependent tasks */ 4333 tqg = qgroup_if_io_tqg; 4334 for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { 4335 callout_drain(&txq->ift_timer); 4336 if (txq->ift_task.gt_uniq != NULL) 4337 taskqgroup_detach(tqg, &txq->ift_task); 4338 } 4339 for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { 4340 if (rxq->ifr_task.gt_uniq != NULL) 4341 taskqgroup_detach(tqg, &rxq->ifr_task); 4342 4343 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) 4344 free(fl->ifl_rx_bitmap, M_IFLIB); 4345 4346 } 4347 tqg = qgroup_if_config_tqg; 4348 if (ctx->ifc_admin_task.gt_uniq != NULL) 4349 taskqgroup_detach(tqg, &ctx->ifc_admin_task); 4350 if (ctx->ifc_vflr_task.gt_uniq != NULL) 4351 taskqgroup_detach(tqg, &ctx->ifc_vflr_task); 4352 4353 IFDI_DETACH(ctx); 4354 device_set_softc(ctx->ifc_dev, NULL); 4355 if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { 4356 pci_release_msi(dev); 4357 } 4358 if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { 4359 iflib_irq_free(ctx, &ctx->ifc_legacy_irq); 4360 } 4361 if (ctx->ifc_msix_mem != NULL) { 4362 bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, 4363 ctx->ifc_softc_ctx.isc_msix_bar, ctx->ifc_msix_mem); 4364 ctx->ifc_msix_mem = NULL; 4365 } 4366 4367 bus_generic_detach(dev); 4368 if_free(ifp); 4369 4370 iflib_tx_structures_free(ctx); 4371 iflib_rx_structures_free(ctx); 4372 if (ctx->ifc_flags & IFC_SC_ALLOCATED) 4373 free(ctx->ifc_softc, M_IFLIB); 4374 free(ctx, M_IFLIB); 4375 return (0); 4376 } 4377 4378 4379 int 4380 iflib_device_detach(device_t dev) 4381 { 4382 if_ctx_t ctx = device_get_softc(dev); 4383 4384 return (iflib_device_deregister(ctx)); 4385 } 4386 4387 int 4388 iflib_device_suspend(device_t dev) 4389 { 4390 if_ctx_t ctx = device_get_softc(dev); 4391 4392 CTX_LOCK(ctx); 4393 IFDI_SUSPEND(ctx); 4394 CTX_UNLOCK(ctx); 4395 4396 return bus_generic_suspend(dev); 4397 } 4398 int 4399 iflib_device_shutdown(device_t dev) 4400 { 4401 if_ctx_t ctx = device_get_softc(dev); 4402 4403 CTX_LOCK(ctx); 4404 IFDI_SHUTDOWN(ctx); 4405 CTX_UNLOCK(ctx); 4406 4407 return bus_generic_suspend(dev); 4408 } 4409 4410 4411 int 4412 iflib_device_resume(device_t dev) 4413 { 4414 if_ctx_t ctx = device_get_softc(dev); 4415 iflib_txq_t txq = ctx->ifc_txqs; 4416 4417 CTX_LOCK(ctx); 4418 IFDI_RESUME(ctx); 4419 iflib_init_locked(ctx); 4420 CTX_UNLOCK(ctx); 4421 for (int i = 0; i < NTXQSETS(ctx); i++, txq++) 4422 iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); 4423 4424 return (bus_generic_resume(dev)); 4425 } 4426 4427 int 4428 iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) 4429 { 4430 int error; 4431 if_ctx_t ctx = device_get_softc(dev); 4432 4433 CTX_LOCK(ctx); 4434 error = IFDI_IOV_INIT(ctx, num_vfs, params); 4435 CTX_UNLOCK(ctx); 4436 4437 return (error); 4438 } 4439 4440 void 4441 iflib_device_iov_uninit(device_t dev) 4442 { 4443 if_ctx_t ctx = device_get_softc(dev); 4444 4445 CTX_LOCK(ctx); 4446 IFDI_IOV_UNINIT(ctx); 4447 CTX_UNLOCK(ctx); 4448 } 4449 4450 int 4451 iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) 4452 { 4453 int error; 4454 if_ctx_t ctx = device_get_softc(dev); 4455 4456 CTX_LOCK(ctx); 4457 error = IFDI_IOV_VF_ADD(ctx, vfnum, params); 4458 CTX_UNLOCK(ctx); 4459 4460 return (error); 4461 } 4462 4463 /********************************************************************* 4464 * 4465 * MODULE FUNCTION DEFINITIONS 4466 * 4467 **********************************************************************/ 4468 4469 /* 4470 * - Start a fast taskqueue thread for each core 4471 * - Start a taskqueue for control operations 4472 */ 4473 static int 4474 iflib_module_init(void) 4475 { 4476 return (0); 4477 } 4478 4479 static int 4480 iflib_module_event_handler(module_t mod, int what, void *arg) 4481 { 4482 int err; 4483 4484 switch (what) { 4485 case MOD_LOAD: 4486 if ((err = iflib_module_init()) != 0) 4487 return (err); 4488 break; 4489 case MOD_UNLOAD: 4490 return (EBUSY); 4491 default: 4492 return (EOPNOTSUPP); 4493 } 4494 4495 return (0); 4496 } 4497 4498 /********************************************************************* 4499 * 4500 * PUBLIC FUNCTION DEFINITIONS 4501 * ordered as in iflib.h 4502 * 4503 **********************************************************************/ 4504 4505 4506 static void 4507 _iflib_assert(if_shared_ctx_t sctx) 4508 { 4509 MPASS(sctx->isc_tx_maxsize); 4510 MPASS(sctx->isc_tx_maxsegsize); 4511 4512 MPASS(sctx->isc_rx_maxsize); 4513 MPASS(sctx->isc_rx_nsegments); 4514 MPASS(sctx->isc_rx_maxsegsize); 4515 4516 MPASS(sctx->isc_nrxd_min[0]); 4517 MPASS(sctx->isc_nrxd_max[0]); 4518 MPASS(sctx->isc_nrxd_default[0]); 4519 MPASS(sctx->isc_ntxd_min[0]); 4520 MPASS(sctx->isc_ntxd_max[0]); 4521 MPASS(sctx->isc_ntxd_default[0]); 4522 } 4523 4524 static void 4525 _iflib_pre_assert(if_softc_ctx_t scctx) 4526 { 4527 4528 MPASS(scctx->isc_txrx->ift_txd_encap); 4529 MPASS(scctx->isc_txrx->ift_txd_flush); 4530 MPASS(scctx->isc_txrx->ift_txd_credits_update); 4531 MPASS(scctx->isc_txrx->ift_rxd_available); 4532 MPASS(scctx->isc_txrx->ift_rxd_pkt_get); 4533 MPASS(scctx->isc_txrx->ift_rxd_refill); 4534 MPASS(scctx->isc_txrx->ift_rxd_flush); 4535 } 4536 4537 static int 4538 iflib_register(if_ctx_t ctx) 4539 { 4540 if_shared_ctx_t sctx = ctx->ifc_sctx; 4541 driver_t *driver = sctx->isc_driver; 4542 device_t dev = ctx->ifc_dev; 4543 if_t ifp; 4544 4545 _iflib_assert(sctx); 4546 4547 CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); 4548 4549 ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER); 4550 if (ifp == NULL) { 4551 device_printf(dev, "can not allocate ifnet structure\n"); 4552 return (ENOMEM); 4553 } 4554 4555 /* 4556 * Initialize our context's device specific methods 4557 */ 4558 kobj_init((kobj_t) ctx, (kobj_class_t) driver); 4559 kobj_class_compile((kobj_class_t) driver); 4560 driver->refs++; 4561 4562 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4563 if_setsoftc(ifp, ctx); 4564 if_setdev(ifp, dev); 4565 if_setinitfn(ifp, iflib_if_init); 4566 if_setioctlfn(ifp, iflib_if_ioctl); 4567 if_settransmitfn(ifp, iflib_if_transmit); 4568 if_setqflushfn(ifp, iflib_if_qflush); 4569 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 4570 4571 ctx->ifc_vlan_attach_event = 4572 EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, 4573 EVENTHANDLER_PRI_FIRST); 4574 ctx->ifc_vlan_detach_event = 4575 EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, 4576 EVENTHANDLER_PRI_FIRST); 4577 4578 ifmedia_init(&ctx->ifc_media, IFM_IMASK, 4579 iflib_media_change, iflib_media_status); 4580 4581 return (0); 4582 } 4583 4584 4585 static int 4586 iflib_queues_alloc(if_ctx_t ctx) 4587 { 4588 if_shared_ctx_t sctx = ctx->ifc_sctx; 4589 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 4590 device_t dev = ctx->ifc_dev; 4591 int nrxqsets = scctx->isc_nrxqsets; 4592 int ntxqsets = scctx->isc_ntxqsets; 4593 iflib_txq_t txq; 4594 iflib_rxq_t rxq; 4595 iflib_fl_t fl = NULL; 4596 int i, j, cpu, err, txconf, rxconf; 4597 iflib_dma_info_t ifdip; 4598 uint32_t *rxqsizes = scctx->isc_rxqsizes; 4599 uint32_t *txqsizes = scctx->isc_txqsizes; 4600 uint8_t nrxqs = sctx->isc_nrxqs; 4601 uint8_t ntxqs = sctx->isc_ntxqs; 4602 int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; 4603 caddr_t *vaddrs; 4604 uint64_t *paddrs; 4605 struct ifmp_ring **brscp; 4606 4607 KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); 4608 KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); 4609 4610 brscp = NULL; 4611 txq = NULL; 4612 rxq = NULL; 4613 4614 /* Allocate the TX ring struct memory */ 4615 if (!(txq = 4616 (iflib_txq_t) malloc(sizeof(struct iflib_txq) * 4617 ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { 4618 device_printf(dev, "Unable to allocate TX ring memory\n"); 4619 err = ENOMEM; 4620 goto fail; 4621 } 4622 4623 /* Now allocate the RX */ 4624 if (!(rxq = 4625 (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * 4626 nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { 4627 device_printf(dev, "Unable to allocate RX ring memory\n"); 4628 err = ENOMEM; 4629 goto rx_fail; 4630 } 4631 4632 ctx->ifc_txqs = txq; 4633 ctx->ifc_rxqs = rxq; 4634 4635 /* 4636 * XXX handle allocation failure 4637 */ 4638 for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { 4639 /* Set up some basics */ 4640 4641 if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { 4642 device_printf(dev, "failed to allocate iflib_dma_info\n"); 4643 err = ENOMEM; 4644 goto err_tx_desc; 4645 } 4646 txq->ift_ifdi = ifdip; 4647 for (j = 0; j < ntxqs; j++, ifdip++) { 4648 if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, BUS_DMA_NOWAIT)) { 4649 device_printf(dev, "Unable to allocate Descriptor memory\n"); 4650 err = ENOMEM; 4651 goto err_tx_desc; 4652 } 4653 txq->ift_txd_size[j] = scctx->isc_txd_size[j]; 4654 bzero((void *)ifdip->idi_vaddr, txqsizes[j]); 4655 } 4656 txq->ift_ctx = ctx; 4657 txq->ift_id = i; 4658 if (sctx->isc_flags & IFLIB_HAS_TXCQ) { 4659 txq->ift_br_offset = 1; 4660 } else { 4661 txq->ift_br_offset = 0; 4662 } 4663 /* XXX fix this */ 4664 txq->ift_timer.c_cpu = cpu; 4665 4666 if (iflib_txsd_alloc(txq)) { 4667 device_printf(dev, "Critical Failure setting up TX buffers\n"); 4668 err = ENOMEM; 4669 goto err_tx_desc; 4670 } 4671 4672 /* Initialize the TX lock */ 4673 snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout", 4674 device_get_nameunit(dev), txq->ift_id); 4675 mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); 4676 callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); 4677 4678 snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db", 4679 device_get_nameunit(dev), txq->ift_id); 4680 4681 err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, 4682 iflib_txq_can_drain, M_IFLIB, M_WAITOK); 4683 if (err) { 4684 /* XXX free any allocated rings */ 4685 device_printf(dev, "Unable to allocate buf_ring\n"); 4686 goto err_tx_desc; 4687 } 4688 } 4689 4690 for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { 4691 /* Set up some basics */ 4692 4693 if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { 4694 device_printf(dev, "failed to allocate iflib_dma_info\n"); 4695 err = ENOMEM; 4696 goto err_tx_desc; 4697 } 4698 4699 rxq->ifr_ifdi = ifdip; 4700 /* XXX this needs to be changed if #rx queues != #tx queues */ 4701 rxq->ifr_ntxqirq = 1; 4702 rxq->ifr_txqid[0] = i; 4703 for (j = 0; j < nrxqs; j++, ifdip++) { 4704 if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) { 4705 device_printf(dev, "Unable to allocate Descriptor memory\n"); 4706 err = ENOMEM; 4707 goto err_tx_desc; 4708 } 4709 bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); 4710 } 4711 rxq->ifr_ctx = ctx; 4712 rxq->ifr_id = i; 4713 if (sctx->isc_flags & IFLIB_HAS_RXCQ) { 4714 rxq->ifr_fl_offset = 1; 4715 } else { 4716 rxq->ifr_fl_offset = 0; 4717 } 4718 rxq->ifr_nfl = nfree_lists; 4719 if (!(fl = 4720 (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { 4721 device_printf(dev, "Unable to allocate free list memory\n"); 4722 err = ENOMEM; 4723 goto err_tx_desc; 4724 } 4725 rxq->ifr_fl = fl; 4726 for (j = 0; j < nfree_lists; j++) { 4727 fl[j].ifl_rxq = rxq; 4728 fl[j].ifl_id = j; 4729 fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; 4730 fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; 4731 } 4732 /* Allocate receive buffers for the ring*/ 4733 if (iflib_rxsd_alloc(rxq)) { 4734 device_printf(dev, 4735 "Critical Failure setting up receive buffers\n"); 4736 err = ENOMEM; 4737 goto err_rx_desc; 4738 } 4739 4740 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) 4741 fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK|M_ZERO); 4742 } 4743 4744 /* TXQs */ 4745 vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); 4746 paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); 4747 for (i = 0; i < ntxqsets; i++) { 4748 iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; 4749 4750 for (j = 0; j < ntxqs; j++, di++) { 4751 vaddrs[i*ntxqs + j] = di->idi_vaddr; 4752 paddrs[i*ntxqs + j] = di->idi_paddr; 4753 } 4754 } 4755 if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { 4756 device_printf(ctx->ifc_dev, "device queue allocation failed\n"); 4757 iflib_tx_structures_free(ctx); 4758 free(vaddrs, M_IFLIB); 4759 free(paddrs, M_IFLIB); 4760 goto err_rx_desc; 4761 } 4762 free(vaddrs, M_IFLIB); 4763 free(paddrs, M_IFLIB); 4764 4765 /* RXQs */ 4766 vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); 4767 paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); 4768 for (i = 0; i < nrxqsets; i++) { 4769 iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; 4770 4771 for (j = 0; j < nrxqs; j++, di++) { 4772 vaddrs[i*nrxqs + j] = di->idi_vaddr; 4773 paddrs[i*nrxqs + j] = di->idi_paddr; 4774 } 4775 } 4776 if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { 4777 device_printf(ctx->ifc_dev, "device queue allocation failed\n"); 4778 iflib_tx_structures_free(ctx); 4779 free(vaddrs, M_IFLIB); 4780 free(paddrs, M_IFLIB); 4781 goto err_rx_desc; 4782 } 4783 free(vaddrs, M_IFLIB); 4784 free(paddrs, M_IFLIB); 4785 4786 return (0); 4787 4788 /* XXX handle allocation failure changes */ 4789 err_rx_desc: 4790 err_tx_desc: 4791 if (ctx->ifc_rxqs != NULL) 4792 free(ctx->ifc_rxqs, M_IFLIB); 4793 ctx->ifc_rxqs = NULL; 4794 if (ctx->ifc_txqs != NULL) 4795 free(ctx->ifc_txqs, M_IFLIB); 4796 ctx->ifc_txqs = NULL; 4797 rx_fail: 4798 if (brscp != NULL) 4799 free(brscp, M_IFLIB); 4800 if (rxq != NULL) 4801 free(rxq, M_IFLIB); 4802 if (txq != NULL) 4803 free(txq, M_IFLIB); 4804 fail: 4805 return (err); 4806 } 4807 4808 static int 4809 iflib_tx_structures_setup(if_ctx_t ctx) 4810 { 4811 iflib_txq_t txq = ctx->ifc_txqs; 4812 int i; 4813 4814 for (i = 0; i < NTXQSETS(ctx); i++, txq++) 4815 iflib_txq_setup(txq); 4816 4817 return (0); 4818 } 4819 4820 static void 4821 iflib_tx_structures_free(if_ctx_t ctx) 4822 { 4823 iflib_txq_t txq = ctx->ifc_txqs; 4824 int i, j; 4825 4826 for (i = 0; i < NTXQSETS(ctx); i++, txq++) { 4827 iflib_txq_destroy(txq); 4828 for (j = 0; j < ctx->ifc_nhwtxqs; j++) 4829 iflib_dma_free(&txq->ift_ifdi[j]); 4830 } 4831 free(ctx->ifc_txqs, M_IFLIB); 4832 ctx->ifc_txqs = NULL; 4833 IFDI_QUEUES_FREE(ctx); 4834 } 4835 4836 /********************************************************************* 4837 * 4838 * Initialize all receive rings. 4839 * 4840 **********************************************************************/ 4841 static int 4842 iflib_rx_structures_setup(if_ctx_t ctx) 4843 { 4844 iflib_rxq_t rxq = ctx->ifc_rxqs; 4845 int q; 4846 #if defined(INET6) || defined(INET) 4847 int i, err; 4848 #endif 4849 4850 for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { 4851 #if defined(INET6) || defined(INET) 4852 tcp_lro_free(&rxq->ifr_lc); 4853 if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, 4854 TCP_LRO_ENTRIES, min(1024, 4855 ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) { 4856 device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); 4857 goto fail; 4858 } 4859 rxq->ifr_lro_enabled = TRUE; 4860 #endif 4861 IFDI_RXQ_SETUP(ctx, rxq->ifr_id); 4862 } 4863 return (0); 4864 #if defined(INET6) || defined(INET) 4865 fail: 4866 /* 4867 * Free RX software descriptors allocated so far, we will only handle 4868 * the rings that completed, the failing case will have 4869 * cleaned up for itself. 'q' failed, so its the terminus. 4870 */ 4871 rxq = ctx->ifc_rxqs; 4872 for (i = 0; i < q; ++i, rxq++) { 4873 iflib_rx_sds_free(rxq); 4874 rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; 4875 } 4876 return (err); 4877 #endif 4878 } 4879 4880 /********************************************************************* 4881 * 4882 * Free all receive rings. 4883 * 4884 **********************************************************************/ 4885 static void 4886 iflib_rx_structures_free(if_ctx_t ctx) 4887 { 4888 iflib_rxq_t rxq = ctx->ifc_rxqs; 4889 4890 for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { 4891 iflib_rx_sds_free(rxq); 4892 } 4893 } 4894 4895 static int 4896 iflib_qset_structures_setup(if_ctx_t ctx) 4897 { 4898 int err; 4899 4900 if ((err = iflib_tx_structures_setup(ctx)) != 0) 4901 return (err); 4902 4903 if ((err = iflib_rx_structures_setup(ctx)) != 0) { 4904 device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); 4905 iflib_tx_structures_free(ctx); 4906 iflib_rx_structures_free(ctx); 4907 } 4908 return (err); 4909 } 4910 4911 int 4912 iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, 4913 driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, char *name) 4914 { 4915 4916 return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); 4917 } 4918 4919 static int 4920 find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid) 4921 { 4922 int i, cpuid, eqid, count; 4923 4924 CPU_COPY(&ctx->ifc_cpus, cpus); 4925 count = CPU_COUNT(&ctx->ifc_cpus); 4926 eqid = qid % count; 4927 /* clear up to the qid'th bit */ 4928 for (i = 0; i < eqid; i++) { 4929 cpuid = CPU_FFS(cpus); 4930 MPASS(cpuid != 0); 4931 CPU_CLR(cpuid-1, cpus); 4932 } 4933 cpuid = CPU_FFS(cpus); 4934 MPASS(cpuid != 0); 4935 return (cpuid-1); 4936 } 4937 4938 int 4939 iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, 4940 iflib_intr_type_t type, driver_filter_t *filter, 4941 void *filter_arg, int qid, char *name) 4942 { 4943 struct grouptask *gtask; 4944 struct taskqgroup *tqg; 4945 iflib_filter_info_t info; 4946 cpuset_t cpus; 4947 gtask_fn_t *fn; 4948 int tqrid, err, cpuid; 4949 driver_filter_t *intr_fast; 4950 void *q; 4951 4952 info = &ctx->ifc_filter_info; 4953 tqrid = rid; 4954 4955 switch (type) { 4956 /* XXX merge tx/rx for netmap? */ 4957 case IFLIB_INTR_TX: 4958 q = &ctx->ifc_txqs[qid]; 4959 info = &ctx->ifc_txqs[qid].ift_filter_info; 4960 gtask = &ctx->ifc_txqs[qid].ift_task; 4961 tqg = qgroup_if_io_tqg; 4962 fn = _task_fn_tx; 4963 intr_fast = iflib_fast_intr; 4964 GROUPTASK_INIT(gtask, 0, fn, q); 4965 break; 4966 case IFLIB_INTR_RX: 4967 q = &ctx->ifc_rxqs[qid]; 4968 info = &ctx->ifc_rxqs[qid].ifr_filter_info; 4969 gtask = &ctx->ifc_rxqs[qid].ifr_task; 4970 tqg = qgroup_if_io_tqg; 4971 fn = _task_fn_rx; 4972 intr_fast = iflib_fast_intr; 4973 GROUPTASK_INIT(gtask, 0, fn, q); 4974 break; 4975 case IFLIB_INTR_RXTX: 4976 q = &ctx->ifc_rxqs[qid]; 4977 info = &ctx->ifc_rxqs[qid].ifr_filter_info; 4978 gtask = &ctx->ifc_rxqs[qid].ifr_task; 4979 tqg = qgroup_if_io_tqg; 4980 fn = _task_fn_rx; 4981 intr_fast = iflib_fast_intr_rxtx; 4982 GROUPTASK_INIT(gtask, 0, fn, q); 4983 break; 4984 case IFLIB_INTR_ADMIN: 4985 q = ctx; 4986 tqrid = -1; 4987 info = &ctx->ifc_filter_info; 4988 gtask = &ctx->ifc_admin_task; 4989 tqg = qgroup_if_config_tqg; 4990 fn = _task_fn_admin; 4991 intr_fast = iflib_fast_intr_ctx; 4992 break; 4993 default: 4994 panic("unknown net intr type"); 4995 } 4996 4997 info->ifi_filter = filter; 4998 info->ifi_filter_arg = filter_arg; 4999 info->ifi_task = gtask; 5000 info->ifi_ctx = q; 5001 5002 err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); 5003 if (err != 0) { 5004 device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err); 5005 return (err); 5006 } 5007 if (type == IFLIB_INTR_ADMIN) 5008 return (0); 5009 5010 if (tqrid != -1) { 5011 cpuid = find_nth(ctx, &cpus, qid); 5012 taskqgroup_attach_cpu(tqg, gtask, q, cpuid, rman_get_start(irq->ii_res), name); 5013 } else { 5014 taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name); 5015 } 5016 5017 return (0); 5018 } 5019 5020 void 5021 iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, char *name) 5022 { 5023 struct grouptask *gtask; 5024 struct taskqgroup *tqg; 5025 gtask_fn_t *fn; 5026 void *q; 5027 int irq_num = -1; 5028 5029 switch (type) { 5030 case IFLIB_INTR_TX: 5031 q = &ctx->ifc_txqs[qid]; 5032 gtask = &ctx->ifc_txqs[qid].ift_task; 5033 tqg = qgroup_if_io_tqg; 5034 fn = _task_fn_tx; 5035 if (irq != NULL) 5036 irq_num = rman_get_start(irq->ii_res); 5037 break; 5038 case IFLIB_INTR_RX: 5039 q = &ctx->ifc_rxqs[qid]; 5040 gtask = &ctx->ifc_rxqs[qid].ifr_task; 5041 tqg = qgroup_if_io_tqg; 5042 fn = _task_fn_rx; 5043 if (irq != NULL) 5044 irq_num = rman_get_start(irq->ii_res); 5045 break; 5046 case IFLIB_INTR_IOV: 5047 q = ctx; 5048 gtask = &ctx->ifc_vflr_task; 5049 tqg = qgroup_if_config_tqg; 5050 fn = _task_fn_iov; 5051 break; 5052 default: 5053 panic("unknown net intr type"); 5054 } 5055 GROUPTASK_INIT(gtask, 0, fn, q); 5056 taskqgroup_attach(tqg, gtask, q, irq_num, name); 5057 } 5058 5059 void 5060 iflib_irq_free(if_ctx_t ctx, if_irq_t irq) 5061 { 5062 if (irq->ii_tag) 5063 bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); 5064 5065 if (irq->ii_res) 5066 bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res); 5067 } 5068 5069 static int 5070 iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, char *name) 5071 { 5072 iflib_txq_t txq = ctx->ifc_txqs; 5073 iflib_rxq_t rxq = ctx->ifc_rxqs; 5074 if_irq_t irq = &ctx->ifc_legacy_irq; 5075 iflib_filter_info_t info; 5076 struct grouptask *gtask; 5077 struct taskqgroup *tqg; 5078 gtask_fn_t *fn; 5079 int tqrid; 5080 void *q; 5081 int err; 5082 5083 q = &ctx->ifc_rxqs[0]; 5084 info = &rxq[0].ifr_filter_info; 5085 gtask = &rxq[0].ifr_task; 5086 tqg = qgroup_if_io_tqg; 5087 tqrid = irq->ii_rid = *rid; 5088 fn = _task_fn_rx; 5089 5090 ctx->ifc_flags |= IFC_LEGACY; 5091 info->ifi_filter = filter; 5092 info->ifi_filter_arg = filter_arg; 5093 info->ifi_task = gtask; 5094 info->ifi_ctx = ctx; 5095 5096 /* We allocate a single interrupt resource */ 5097 if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0) 5098 return (err); 5099 GROUPTASK_INIT(gtask, 0, fn, q); 5100 taskqgroup_attach(tqg, gtask, q, tqrid, name); 5101 5102 GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); 5103 taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, tqrid, "tx"); 5104 return (0); 5105 } 5106 5107 void 5108 iflib_led_create(if_ctx_t ctx) 5109 { 5110 5111 ctx->ifc_led_dev = led_create(iflib_led_func, ctx, 5112 device_get_nameunit(ctx->ifc_dev)); 5113 } 5114 5115 void 5116 iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) 5117 { 5118 5119 GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); 5120 } 5121 5122 void 5123 iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) 5124 { 5125 5126 GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); 5127 } 5128 5129 void 5130 iflib_admin_intr_deferred(if_ctx_t ctx) 5131 { 5132 #ifdef INVARIANTS 5133 struct grouptask *gtask; 5134 5135 gtask = &ctx->ifc_admin_task; 5136 MPASS(gtask != NULL && gtask->gt_taskqueue != NULL); 5137 #endif 5138 5139 GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); 5140 } 5141 5142 void 5143 iflib_iov_intr_deferred(if_ctx_t ctx) 5144 { 5145 5146 GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); 5147 } 5148 5149 void 5150 iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) 5151 { 5152 5153 taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name); 5154 } 5155 5156 void 5157 iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, 5158 char *name) 5159 { 5160 5161 GROUPTASK_INIT(gtask, 0, fn, ctx); 5162 taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name); 5163 } 5164 5165 void 5166 iflib_config_gtask_deinit(struct grouptask *gtask) 5167 { 5168 5169 taskqgroup_detach(qgroup_if_config_tqg, gtask); 5170 } 5171 5172 void 5173 iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) 5174 { 5175 if_t ifp = ctx->ifc_ifp; 5176 iflib_txq_t txq = ctx->ifc_txqs; 5177 5178 if_setbaudrate(ifp, baudrate); 5179 if (baudrate >= IF_Gbps(10)) 5180 ctx->ifc_flags |= IFC_PREFETCH; 5181 5182 /* If link down, disable watchdog */ 5183 if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { 5184 for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) 5185 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 5186 } 5187 ctx->ifc_link_state = link_state; 5188 if_link_state_change(ifp, link_state); 5189 } 5190 5191 static int 5192 iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) 5193 { 5194 int credits; 5195 #ifdef INVARIANTS 5196 int credits_pre = txq->ift_cidx_processed; 5197 #endif 5198 5199 if (ctx->isc_txd_credits_update == NULL) 5200 return (0); 5201 5202 if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) 5203 return (0); 5204 5205 txq->ift_processed += credits; 5206 txq->ift_cidx_processed += credits; 5207 5208 MPASS(credits_pre + credits == txq->ift_cidx_processed); 5209 if (txq->ift_cidx_processed >= txq->ift_size) 5210 txq->ift_cidx_processed -= txq->ift_size; 5211 return (credits); 5212 } 5213 5214 static int 5215 iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) 5216 { 5217 5218 return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, 5219 budget)); 5220 } 5221 5222 void 5223 iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, 5224 const char *description, if_int_delay_info_t info, 5225 int offset, int value) 5226 { 5227 info->iidi_ctx = ctx; 5228 info->iidi_offset = offset; 5229 info->iidi_value = value; 5230 SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), 5231 SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), 5232 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, 5233 info, 0, iflib_sysctl_int_delay, "I", description); 5234 } 5235 5236 struct mtx * 5237 iflib_ctx_lock_get(if_ctx_t ctx) 5238 { 5239 5240 return (&ctx->ifc_mtx); 5241 } 5242 5243 static int 5244 iflib_msix_init(if_ctx_t ctx) 5245 { 5246 device_t dev = ctx->ifc_dev; 5247 if_shared_ctx_t sctx = ctx->ifc_sctx; 5248 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 5249 int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs; 5250 int iflib_num_tx_queues, iflib_num_rx_queues; 5251 int err, admincnt, bar; 5252 5253 iflib_num_tx_queues = scctx->isc_ntxqsets; 5254 iflib_num_rx_queues = scctx->isc_nrxqsets; 5255 5256 device_printf(dev, "msix_init qsets capped at %d\n", iflib_num_tx_queues); 5257 5258 bar = ctx->ifc_softc_ctx.isc_msix_bar; 5259 admincnt = sctx->isc_admin_intrcnt; 5260 /* Override by global tuneable */ 5261 { 5262 int i; 5263 size_t len = sizeof(i); 5264 err = kernel_sysctlbyname(curthread, "hw.pci.enable_msix", &i, &len, NULL, 0, NULL, 0); 5265 if (err == 0) { 5266 if (i == 0) 5267 goto msi; 5268 } 5269 else { 5270 device_printf(dev, "unable to read hw.pci.enable_msix."); 5271 } 5272 } 5273 /* Override by tuneable */ 5274 if (scctx->isc_disable_msix) 5275 goto msi; 5276 5277 /* 5278 ** When used in a virtualized environment 5279 ** PCI BUSMASTER capability may not be set 5280 ** so explicity set it here and rewrite 5281 ** the ENABLE in the MSIX control register 5282 ** at this point to cause the host to 5283 ** successfully initialize us. 5284 */ 5285 { 5286 int msix_ctrl, rid; 5287 5288 pci_enable_busmaster(dev); 5289 rid = 0; 5290 if (pci_find_cap(dev, PCIY_MSIX, &rid) == 0 && rid != 0) { 5291 rid += PCIR_MSIX_CTRL; 5292 msix_ctrl = pci_read_config(dev, rid, 2); 5293 msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; 5294 pci_write_config(dev, rid, msix_ctrl, 2); 5295 } else { 5296 device_printf(dev, "PCIY_MSIX capability not found; " 5297 "or rid %d == 0.\n", rid); 5298 goto msi; 5299 } 5300 } 5301 5302 /* 5303 * bar == -1 => "trust me I know what I'm doing" 5304 * Some drivers are for hardware that is so shoddily 5305 * documented that no one knows which bars are which 5306 * so the developer has to map all bars. This hack 5307 * allows shoddy garbage to use msix in this framework. 5308 */ 5309 if (bar != -1) { 5310 ctx->ifc_msix_mem = bus_alloc_resource_any(dev, 5311 SYS_RES_MEMORY, &bar, RF_ACTIVE); 5312 if (ctx->ifc_msix_mem == NULL) { 5313 /* May not be enabled */ 5314 device_printf(dev, "Unable to map MSIX table \n"); 5315 goto msi; 5316 } 5317 } 5318 /* First try MSI/X */ 5319 if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */ 5320 device_printf(dev, "System has MSIX disabled \n"); 5321 bus_release_resource(dev, SYS_RES_MEMORY, 5322 bar, ctx->ifc_msix_mem); 5323 ctx->ifc_msix_mem = NULL; 5324 goto msi; 5325 } 5326 #if IFLIB_DEBUG 5327 /* use only 1 qset in debug mode */ 5328 queuemsgs = min(msgs - admincnt, 1); 5329 #else 5330 queuemsgs = msgs - admincnt; 5331 #endif 5332 if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) == 0) { 5333 #ifdef RSS 5334 queues = imin(queuemsgs, rss_getnumbuckets()); 5335 #else 5336 queues = queuemsgs; 5337 #endif 5338 queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); 5339 device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n", 5340 CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); 5341 } else { 5342 device_printf(dev, "Unable to fetch CPU list\n"); 5343 /* Figure out a reasonable auto config value */ 5344 queues = min(queuemsgs, mp_ncpus); 5345 } 5346 #ifdef RSS 5347 /* If we're doing RSS, clamp at the number of RSS buckets */ 5348 if (queues > rss_getnumbuckets()) 5349 queues = rss_getnumbuckets(); 5350 #endif 5351 if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) 5352 rx_queues = iflib_num_rx_queues; 5353 else 5354 rx_queues = queues; 5355 /* 5356 * We want this to be all logical CPUs by default 5357 */ 5358 if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) 5359 tx_queues = iflib_num_tx_queues; 5360 else 5361 tx_queues = mp_ncpus; 5362 5363 if (ctx->ifc_sysctl_qs_eq_override == 0) { 5364 #ifdef INVARIANTS 5365 if (tx_queues != rx_queues) 5366 device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", 5367 min(rx_queues, tx_queues), min(rx_queues, tx_queues)); 5368 #endif 5369 tx_queues = min(rx_queues, tx_queues); 5370 rx_queues = min(rx_queues, tx_queues); 5371 } 5372 5373 device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); 5374 5375 vectors = rx_queues + admincnt; 5376 if ((err = pci_alloc_msix(dev, &vectors)) == 0) { 5377 device_printf(dev, 5378 "Using MSIX interrupts with %d vectors\n", vectors); 5379 scctx->isc_vectors = vectors; 5380 scctx->isc_nrxqsets = rx_queues; 5381 scctx->isc_ntxqsets = tx_queues; 5382 scctx->isc_intr = IFLIB_INTR_MSIX; 5383 5384 return (vectors); 5385 } else { 5386 device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err); 5387 } 5388 msi: 5389 vectors = pci_msi_count(dev); 5390 scctx->isc_nrxqsets = 1; 5391 scctx->isc_ntxqsets = 1; 5392 scctx->isc_vectors = vectors; 5393 if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { 5394 device_printf(dev,"Using an MSI interrupt\n"); 5395 scctx->isc_intr = IFLIB_INTR_MSI; 5396 } else { 5397 device_printf(dev,"Using a Legacy interrupt\n"); 5398 scctx->isc_intr = IFLIB_INTR_LEGACY; 5399 } 5400 5401 return (vectors); 5402 } 5403 5404 char * ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; 5405 5406 static int 5407 mp_ring_state_handler(SYSCTL_HANDLER_ARGS) 5408 { 5409 int rc; 5410 uint16_t *state = ((uint16_t *)oidp->oid_arg1); 5411 struct sbuf *sb; 5412 char *ring_state = "UNKNOWN"; 5413 5414 /* XXX needed ? */ 5415 rc = sysctl_wire_old_buffer(req, 0); 5416 MPASS(rc == 0); 5417 if (rc != 0) 5418 return (rc); 5419 sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); 5420 MPASS(sb != NULL); 5421 if (sb == NULL) 5422 return (ENOMEM); 5423 if (state[3] <= 3) 5424 ring_state = ring_states[state[3]]; 5425 5426 sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", 5427 state[0], state[1], state[2], ring_state); 5428 rc = sbuf_finish(sb); 5429 sbuf_delete(sb); 5430 return(rc); 5431 } 5432 5433 enum iflib_ndesc_handler { 5434 IFLIB_NTXD_HANDLER, 5435 IFLIB_NRXD_HANDLER, 5436 }; 5437 5438 static int 5439 mp_ndesc_handler(SYSCTL_HANDLER_ARGS) 5440 { 5441 if_ctx_t ctx = (void *)arg1; 5442 enum iflib_ndesc_handler type = arg2; 5443 char buf[256] = {0}; 5444 qidx_t *ndesc; 5445 char *p, *next; 5446 int nqs, rc, i; 5447 5448 MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER); 5449 5450 nqs = 8; 5451 switch(type) { 5452 case IFLIB_NTXD_HANDLER: 5453 ndesc = ctx->ifc_sysctl_ntxds; 5454 if (ctx->ifc_sctx) 5455 nqs = ctx->ifc_sctx->isc_ntxqs; 5456 break; 5457 case IFLIB_NRXD_HANDLER: 5458 ndesc = ctx->ifc_sysctl_nrxds; 5459 if (ctx->ifc_sctx) 5460 nqs = ctx->ifc_sctx->isc_nrxqs; 5461 break; 5462 } 5463 if (nqs == 0) 5464 nqs = 8; 5465 5466 for (i=0; i<8; i++) { 5467 if (i >= nqs) 5468 break; 5469 if (i) 5470 strcat(buf, ","); 5471 sprintf(strchr(buf, 0), "%d", ndesc[i]); 5472 } 5473 5474 rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); 5475 if (rc || req->newptr == NULL) 5476 return rc; 5477 5478 for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; 5479 i++, p = strsep(&next, " ,")) { 5480 ndesc[i] = strtoul(p, NULL, 10); 5481 } 5482 5483 return(rc); 5484 } 5485 5486 #define NAME_BUFLEN 32 5487 static void 5488 iflib_add_device_sysctl_pre(if_ctx_t ctx) 5489 { 5490 device_t dev = iflib_get_dev(ctx); 5491 struct sysctl_oid_list *child, *oid_list; 5492 struct sysctl_ctx_list *ctx_list; 5493 struct sysctl_oid *node; 5494 5495 ctx_list = device_get_sysctl_ctx(dev); 5496 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 5497 ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", 5498 CTLFLAG_RD, NULL, "IFLIB fields"); 5499 oid_list = SYSCTL_CHILDREN(node); 5500 5501 SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", 5502 CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0, 5503 "driver version"); 5504 5505 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", 5506 CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, 5507 "# of txqs to use, 0 => use default #"); 5508 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", 5509 CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, 5510 "# of rxqs to use, 0 => use default #"); 5511 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", 5512 CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, 5513 "permit #txq != #rxq"); 5514 SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", 5515 CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, 5516 "disable MSIX (default 0)"); 5517 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", 5518 CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, 5519 "set the rx budget"); 5520 5521 /* XXX change for per-queue sizes */ 5522 SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", 5523 CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER, 5524 mp_ndesc_handler, "A", 5525 "list of # of tx descriptors to use, 0 = use default #"); 5526 SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", 5527 CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, 5528 mp_ndesc_handler, "A", 5529 "list of # of rx descriptors to use, 0 = use default #"); 5530 } 5531 5532 static void 5533 iflib_add_device_sysctl_post(if_ctx_t ctx) 5534 { 5535 if_shared_ctx_t sctx = ctx->ifc_sctx; 5536 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 5537 device_t dev = iflib_get_dev(ctx); 5538 struct sysctl_oid_list *child; 5539 struct sysctl_ctx_list *ctx_list; 5540 iflib_fl_t fl; 5541 iflib_txq_t txq; 5542 iflib_rxq_t rxq; 5543 int i, j; 5544 char namebuf[NAME_BUFLEN]; 5545 char *qfmt; 5546 struct sysctl_oid *queue_node, *fl_node, *node; 5547 struct sysctl_oid_list *queue_list, *fl_list; 5548 ctx_list = device_get_sysctl_ctx(dev); 5549 5550 node = ctx->ifc_sysctl_node; 5551 child = SYSCTL_CHILDREN(node); 5552 5553 if (scctx->isc_ntxqsets > 100) 5554 qfmt = "txq%03d"; 5555 else if (scctx->isc_ntxqsets > 10) 5556 qfmt = "txq%02d"; 5557 else 5558 qfmt = "txq%d"; 5559 for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { 5560 snprintf(namebuf, NAME_BUFLEN, qfmt, i); 5561 queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, 5562 CTLFLAG_RD, NULL, "Queue Name"); 5563 queue_list = SYSCTL_CHILDREN(queue_node); 5564 #if MEMORY_LOGGING 5565 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", 5566 CTLFLAG_RD, 5567 &txq->ift_dequeued, "total mbufs freed"); 5568 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", 5569 CTLFLAG_RD, 5570 &txq->ift_enqueued, "total mbufs enqueued"); 5571 #endif 5572 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", 5573 CTLFLAG_RD, 5574 &txq->ift_mbuf_defrag, "# of times m_defrag was called"); 5575 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", 5576 CTLFLAG_RD, 5577 &txq->ift_pullups, "# of times m_pullup was called"); 5578 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", 5579 CTLFLAG_RD, 5580 &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); 5581 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", 5582 CTLFLAG_RD, 5583 &txq->ift_no_desc_avail, "# of times no descriptors were available"); 5584 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", 5585 CTLFLAG_RD, 5586 &txq->ift_map_failed, "# of times dma map failed"); 5587 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", 5588 CTLFLAG_RD, 5589 &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); 5590 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", 5591 CTLFLAG_RD, 5592 &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); 5593 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", 5594 CTLFLAG_RD, 5595 &txq->ift_pidx, 1, "Producer Index"); 5596 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", 5597 CTLFLAG_RD, 5598 &txq->ift_cidx, 1, "Consumer Index"); 5599 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", 5600 CTLFLAG_RD, 5601 &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); 5602 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", 5603 CTLFLAG_RD, 5604 &txq->ift_in_use, 1, "descriptors in use"); 5605 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", 5606 CTLFLAG_RD, 5607 &txq->ift_processed, "descriptors procesed for clean"); 5608 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", 5609 CTLFLAG_RD, 5610 &txq->ift_cleaned, "total cleaned"); 5611 SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", 5612 CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state), 5613 0, mp_ring_state_handler, "A", "soft ring state"); 5614 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", 5615 CTLFLAG_RD, &txq->ift_br->enqueues, 5616 "# of enqueues to the mp_ring for this queue"); 5617 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", 5618 CTLFLAG_RD, &txq->ift_br->drops, 5619 "# of drops in the mp_ring for this queue"); 5620 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", 5621 CTLFLAG_RD, &txq->ift_br->starts, 5622 "# of normal consumer starts in the mp_ring for this queue"); 5623 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", 5624 CTLFLAG_RD, &txq->ift_br->stalls, 5625 "# of consumer stalls in the mp_ring for this queue"); 5626 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", 5627 CTLFLAG_RD, &txq->ift_br->restarts, 5628 "# of consumer restarts in the mp_ring for this queue"); 5629 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", 5630 CTLFLAG_RD, &txq->ift_br->abdications, 5631 "# of consumer abdications in the mp_ring for this queue"); 5632 } 5633 5634 if (scctx->isc_nrxqsets > 100) 5635 qfmt = "rxq%03d"; 5636 else if (scctx->isc_nrxqsets > 10) 5637 qfmt = "rxq%02d"; 5638 else 5639 qfmt = "rxq%d"; 5640 for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { 5641 snprintf(namebuf, NAME_BUFLEN, qfmt, i); 5642 queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, 5643 CTLFLAG_RD, NULL, "Queue Name"); 5644 queue_list = SYSCTL_CHILDREN(queue_node); 5645 if (sctx->isc_flags & IFLIB_HAS_RXCQ) { 5646 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx", 5647 CTLFLAG_RD, 5648 &rxq->ifr_cq_pidx, 1, "Producer Index"); 5649 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", 5650 CTLFLAG_RD, 5651 &rxq->ifr_cq_cidx, 1, "Consumer Index"); 5652 } 5653 5654 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { 5655 snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); 5656 fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, 5657 CTLFLAG_RD, NULL, "freelist Name"); 5658 fl_list = SYSCTL_CHILDREN(fl_node); 5659 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", 5660 CTLFLAG_RD, 5661 &fl->ifl_pidx, 1, "Producer Index"); 5662 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", 5663 CTLFLAG_RD, 5664 &fl->ifl_cidx, 1, "Consumer Index"); 5665 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", 5666 CTLFLAG_RD, 5667 &fl->ifl_credits, 1, "credits available"); 5668 #if MEMORY_LOGGING 5669 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", 5670 CTLFLAG_RD, 5671 &fl->ifl_m_enqueued, "mbufs allocated"); 5672 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", 5673 CTLFLAG_RD, 5674 &fl->ifl_m_dequeued, "mbufs freed"); 5675 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", 5676 CTLFLAG_RD, 5677 &fl->ifl_cl_enqueued, "clusters allocated"); 5678 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", 5679 CTLFLAG_RD, 5680 &fl->ifl_cl_dequeued, "clusters freed"); 5681 #endif 5682 5683 } 5684 } 5685 5686 } 5687 5688 #ifndef __NO_STRICT_ALIGNMENT 5689 static struct mbuf * 5690 iflib_fixup_rx(struct mbuf *m) 5691 { 5692 struct mbuf *n; 5693 5694 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { 5695 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); 5696 m->m_data += ETHER_HDR_LEN; 5697 n = m; 5698 } else { 5699 MGETHDR(n, M_NOWAIT, MT_DATA); 5700 if (n == NULL) { 5701 m_freem(m); 5702 return (NULL); 5703 } 5704 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); 5705 m->m_data += ETHER_HDR_LEN; 5706 m->m_len -= ETHER_HDR_LEN; 5707 n->m_len = ETHER_HDR_LEN; 5708 M_MOVE_PKTHDR(n, m); 5709 n->m_next = m; 5710 } 5711 return (n); 5712 } 5713 #endif 5714