1 /*- 2 * Copyright (c) 2014-2018, Matthew Macy <mmacy@mattmacy.io> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Neither the name of Matthew Macy nor the names of its 12 * contributors may be used to endorse or promote products derived from 13 * this software without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_acpi.h" 34 #include "opt_sched.h" 35 36 #include <sys/param.h> 37 #include <sys/types.h> 38 #include <sys/bus.h> 39 #include <sys/eventhandler.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/module.h> 44 #include <sys/kobj.h> 45 #include <sys/rman.h> 46 #include <sys/sbuf.h> 47 #include <sys/smp.h> 48 #include <sys/socket.h> 49 #include <sys/sockio.h> 50 #include <sys/sysctl.h> 51 #include <sys/syslog.h> 52 #include <sys/taskqueue.h> 53 #include <sys/limits.h> 54 55 #include <net/if.h> 56 #include <net/if_var.h> 57 #include <net/if_types.h> 58 #include <net/if_media.h> 59 #include <net/bpf.h> 60 #include <net/ethernet.h> 61 #include <net/mp_ring.h> 62 #include <net/debugnet.h> 63 #include <net/pfil.h> 64 #include <net/vnet.h> 65 66 #include <netinet/in.h> 67 #include <netinet/in_pcb.h> 68 #include <netinet/tcp_lro.h> 69 #include <netinet/in_systm.h> 70 #include <netinet/if_ether.h> 71 #include <netinet/ip.h> 72 #include <netinet/ip6.h> 73 #include <netinet/tcp.h> 74 #include <netinet/ip_var.h> 75 #include <netinet6/ip6_var.h> 76 77 #include <machine/bus.h> 78 #include <machine/in_cksum.h> 79 80 #include <vm/vm.h> 81 #include <vm/pmap.h> 82 83 #include <dev/led/led.h> 84 #include <dev/pci/pcireg.h> 85 #include <dev/pci/pcivar.h> 86 #include <dev/pci/pci_private.h> 87 88 #include <net/iflib.h> 89 #include <net/iflib_private.h> 90 91 #include "ifdi_if.h" 92 93 #ifdef PCI_IOV 94 #include <dev/pci/pci_iov.h> 95 #endif 96 97 #include <sys/bitstring.h> 98 /* 99 * enable accounting of every mbuf as it comes in to and goes out of 100 * iflib's software descriptor references 101 */ 102 #define MEMORY_LOGGING 0 103 /* 104 * Enable mbuf vectors for compressing long mbuf chains 105 */ 106 107 /* 108 * NB: 109 * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead 110 * we prefetch needs to be determined by the time spent in m_free vis a vis 111 * the cost of a prefetch. This will of course vary based on the workload: 112 * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which 113 * is quite expensive, thus suggesting very little prefetch. 114 * - small packet forwarding which is just returning a single mbuf to 115 * UMA will typically be very fast vis a vis the cost of a memory 116 * access. 117 */ 118 119 /* 120 * File organization: 121 * - private structures 122 * - iflib private utility functions 123 * - ifnet functions 124 * - vlan registry and other exported functions 125 * - iflib public core functions 126 * 127 * 128 */ 129 MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); 130 131 #define IFLIB_RXEOF_MORE (1U << 0) 132 #define IFLIB_RXEOF_EMPTY (2U << 0) 133 134 struct iflib_txq; 135 typedef struct iflib_txq *iflib_txq_t; 136 struct iflib_rxq; 137 typedef struct iflib_rxq *iflib_rxq_t; 138 struct iflib_fl; 139 typedef struct iflib_fl *iflib_fl_t; 140 141 struct iflib_ctx; 142 143 static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); 144 static void iflib_timer(void *arg); 145 static void iflib_tqg_detach(if_ctx_t ctx); 146 147 typedef struct iflib_filter_info { 148 driver_filter_t *ifi_filter; 149 void *ifi_filter_arg; 150 struct grouptask *ifi_task; 151 void *ifi_ctx; 152 } *iflib_filter_info_t; 153 154 struct iflib_ctx { 155 KOBJ_FIELDS; 156 /* 157 * Pointer to hardware driver's softc 158 */ 159 void *ifc_softc; 160 device_t ifc_dev; 161 if_t ifc_ifp; 162 163 cpuset_t ifc_cpus; 164 if_shared_ctx_t ifc_sctx; 165 struct if_softc_ctx ifc_softc_ctx; 166 167 struct sx ifc_ctx_sx; 168 struct mtx ifc_state_mtx; 169 170 iflib_txq_t ifc_txqs; 171 iflib_rxq_t ifc_rxqs; 172 uint32_t ifc_if_flags; 173 uint32_t ifc_flags; 174 uint32_t ifc_max_fl_buf_size; 175 uint32_t ifc_rx_mbuf_sz; 176 177 int ifc_link_state; 178 int ifc_watchdog_events; 179 struct cdev *ifc_led_dev; 180 struct resource *ifc_msix_mem; 181 182 struct if_irq ifc_legacy_irq; 183 struct grouptask ifc_admin_task; 184 struct grouptask ifc_vflr_task; 185 struct iflib_filter_info ifc_filter_info; 186 struct ifmedia ifc_media; 187 struct ifmedia *ifc_mediap; 188 189 struct sysctl_oid *ifc_sysctl_node; 190 uint16_t ifc_sysctl_ntxqs; 191 uint16_t ifc_sysctl_nrxqs; 192 uint16_t ifc_sysctl_qs_eq_override; 193 uint16_t ifc_sysctl_rx_budget; 194 uint16_t ifc_sysctl_tx_abdicate; 195 uint16_t ifc_sysctl_core_offset; 196 #define CORE_OFFSET_UNSPECIFIED 0xffff 197 uint8_t ifc_sysctl_separate_txrx; 198 199 qidx_t ifc_sysctl_ntxds[8]; 200 qidx_t ifc_sysctl_nrxds[8]; 201 struct if_txrx ifc_txrx; 202 #define isc_txd_encap ifc_txrx.ift_txd_encap 203 #define isc_txd_flush ifc_txrx.ift_txd_flush 204 #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update 205 #define isc_rxd_available ifc_txrx.ift_rxd_available 206 #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get 207 #define isc_rxd_refill ifc_txrx.ift_rxd_refill 208 #define isc_rxd_flush ifc_txrx.ift_rxd_flush 209 #define isc_legacy_intr ifc_txrx.ift_legacy_intr 210 eventhandler_tag ifc_vlan_attach_event; 211 eventhandler_tag ifc_vlan_detach_event; 212 struct ether_addr ifc_mac; 213 }; 214 215 void * 216 iflib_get_softc(if_ctx_t ctx) 217 { 218 219 return (ctx->ifc_softc); 220 } 221 222 device_t 223 iflib_get_dev(if_ctx_t ctx) 224 { 225 226 return (ctx->ifc_dev); 227 } 228 229 if_t 230 iflib_get_ifp(if_ctx_t ctx) 231 { 232 233 return (ctx->ifc_ifp); 234 } 235 236 struct ifmedia * 237 iflib_get_media(if_ctx_t ctx) 238 { 239 240 return (ctx->ifc_mediap); 241 } 242 243 uint32_t 244 iflib_get_flags(if_ctx_t ctx) 245 { 246 return (ctx->ifc_flags); 247 } 248 249 void 250 iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) 251 { 252 253 bcopy(mac, ctx->ifc_mac.octet, ETHER_ADDR_LEN); 254 } 255 256 if_softc_ctx_t 257 iflib_get_softc_ctx(if_ctx_t ctx) 258 { 259 260 return (&ctx->ifc_softc_ctx); 261 } 262 263 if_shared_ctx_t 264 iflib_get_sctx(if_ctx_t ctx) 265 { 266 267 return (ctx->ifc_sctx); 268 } 269 270 #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) 271 #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) 272 #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) 273 274 #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) 275 #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) 276 277 typedef struct iflib_sw_rx_desc_array { 278 bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ 279 struct mbuf **ifsd_m; /* pkthdr mbufs */ 280 caddr_t *ifsd_cl; /* direct cluster pointer for rx */ 281 bus_addr_t *ifsd_ba; /* bus addr of cluster for rx */ 282 } iflib_rxsd_array_t; 283 284 typedef struct iflib_sw_tx_desc_array { 285 bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ 286 bus_dmamap_t *ifsd_tso_map; /* bus_dma maps for TSO packet */ 287 struct mbuf **ifsd_m; /* pkthdr mbufs */ 288 } if_txsd_vec_t; 289 290 /* magic number that should be high enough for any hardware */ 291 #define IFLIB_MAX_TX_SEGS 128 292 #define IFLIB_RX_COPY_THRESH 128 293 #define IFLIB_MAX_RX_REFRESH 32 294 /* The minimum descriptors per second before we start coalescing */ 295 #define IFLIB_MIN_DESC_SEC 16384 296 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 297 #define IFLIB_QUEUE_IDLE 0 298 #define IFLIB_QUEUE_HUNG 1 299 #define IFLIB_QUEUE_WORKING 2 300 /* maximum number of txqs that can share an rx interrupt */ 301 #define IFLIB_MAX_TX_SHARED_INTR 4 302 303 /* this should really scale with ring size - this is a fairly arbitrary value */ 304 #define TX_BATCH_SIZE 32 305 306 #define IFLIB_RESTART_BUDGET 8 307 308 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ 309 CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ 310 CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) 311 312 struct iflib_txq { 313 qidx_t ift_in_use; 314 qidx_t ift_cidx; 315 qidx_t ift_cidx_processed; 316 qidx_t ift_pidx; 317 uint8_t ift_gen; 318 uint8_t ift_br_offset; 319 uint16_t ift_npending; 320 uint16_t ift_db_pending; 321 uint16_t ift_rs_pending; 322 /* implicit pad */ 323 uint8_t ift_txd_size[8]; 324 uint64_t ift_processed; 325 uint64_t ift_cleaned; 326 uint64_t ift_cleaned_prev; 327 #if MEMORY_LOGGING 328 uint64_t ift_enqueued; 329 uint64_t ift_dequeued; 330 #endif 331 uint64_t ift_no_tx_dma_setup; 332 uint64_t ift_no_desc_avail; 333 uint64_t ift_mbuf_defrag_failed; 334 uint64_t ift_mbuf_defrag; 335 uint64_t ift_map_failed; 336 uint64_t ift_txd_encap_efbig; 337 uint64_t ift_pullups; 338 uint64_t ift_last_timer_tick; 339 340 struct mtx ift_mtx; 341 struct mtx ift_db_mtx; 342 343 /* constant values */ 344 if_ctx_t ift_ctx; 345 struct ifmp_ring *ift_br; 346 struct grouptask ift_task; 347 qidx_t ift_size; 348 uint16_t ift_id; 349 struct callout ift_timer; 350 #ifdef DEV_NETMAP 351 struct callout ift_netmap_timer; 352 #endif /* DEV_NETMAP */ 353 354 if_txsd_vec_t ift_sds; 355 uint8_t ift_qstatus; 356 uint8_t ift_closed; 357 uint8_t ift_update_freq; 358 struct iflib_filter_info ift_filter_info; 359 bus_dma_tag_t ift_buf_tag; 360 bus_dma_tag_t ift_tso_buf_tag; 361 iflib_dma_info_t ift_ifdi; 362 #define MTX_NAME_LEN 32 363 char ift_mtx_name[MTX_NAME_LEN]; 364 bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); 365 #ifdef IFLIB_DIAGNOSTICS 366 uint64_t ift_cpu_exec_count[256]; 367 #endif 368 } __aligned(CACHE_LINE_SIZE); 369 370 struct iflib_fl { 371 qidx_t ifl_cidx; 372 qidx_t ifl_pidx; 373 qidx_t ifl_credits; 374 uint8_t ifl_gen; 375 uint8_t ifl_rxd_size; 376 #if MEMORY_LOGGING 377 uint64_t ifl_m_enqueued; 378 uint64_t ifl_m_dequeued; 379 uint64_t ifl_cl_enqueued; 380 uint64_t ifl_cl_dequeued; 381 #endif 382 /* implicit pad */ 383 bitstr_t *ifl_rx_bitmap; 384 qidx_t ifl_fragidx; 385 /* constant */ 386 qidx_t ifl_size; 387 uint16_t ifl_buf_size; 388 uint16_t ifl_cltype; 389 uma_zone_t ifl_zone; 390 iflib_rxsd_array_t ifl_sds; 391 iflib_rxq_t ifl_rxq; 392 uint8_t ifl_id; 393 bus_dma_tag_t ifl_buf_tag; 394 iflib_dma_info_t ifl_ifdi; 395 uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); 396 qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; 397 } __aligned(CACHE_LINE_SIZE); 398 399 static inline qidx_t 400 get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) 401 { 402 qidx_t used; 403 404 if (pidx > cidx) 405 used = pidx - cidx; 406 else if (pidx < cidx) 407 used = size - cidx + pidx; 408 else if (gen == 0 && pidx == cidx) 409 used = 0; 410 else if (gen == 1 && pidx == cidx) 411 used = size; 412 else 413 panic("bad state"); 414 415 return (used); 416 } 417 418 #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) 419 420 #define IDXDIFF(head, tail, wrap) \ 421 ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) 422 423 struct iflib_rxq { 424 if_ctx_t ifr_ctx; 425 iflib_fl_t ifr_fl; 426 uint64_t ifr_rx_irq; 427 struct pfil_head *pfil; 428 /* 429 * If there is a separate completion queue (IFLIB_HAS_RXCQ), this is 430 * the completion queue consumer index. Otherwise it's unused. 431 */ 432 qidx_t ifr_cq_cidx; 433 uint16_t ifr_id; 434 uint8_t ifr_nfl; 435 uint8_t ifr_ntxqirq; 436 uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; 437 uint8_t ifr_fl_offset; 438 struct lro_ctrl ifr_lc; 439 struct grouptask ifr_task; 440 struct callout ifr_watchdog; 441 struct iflib_filter_info ifr_filter_info; 442 iflib_dma_info_t ifr_ifdi; 443 444 /* dynamically allocate if any drivers need a value substantially larger than this */ 445 struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); 446 #ifdef IFLIB_DIAGNOSTICS 447 uint64_t ifr_cpu_exec_count[256]; 448 #endif 449 } __aligned(CACHE_LINE_SIZE); 450 451 typedef struct if_rxsd { 452 caddr_t *ifsd_cl; 453 iflib_fl_t ifsd_fl; 454 } *if_rxsd_t; 455 456 /* multiple of word size */ 457 #ifdef __LP64__ 458 #define PKT_INFO_SIZE 6 459 #define RXD_INFO_SIZE 5 460 #define PKT_TYPE uint64_t 461 #else 462 #define PKT_INFO_SIZE 11 463 #define RXD_INFO_SIZE 8 464 #define PKT_TYPE uint32_t 465 #endif 466 #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) 467 #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) 468 469 typedef struct if_pkt_info_pad { 470 PKT_TYPE pkt_val[PKT_INFO_SIZE]; 471 } *if_pkt_info_pad_t; 472 typedef struct if_rxd_info_pad { 473 PKT_TYPE rxd_val[RXD_INFO_SIZE]; 474 } *if_rxd_info_pad_t; 475 476 CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); 477 CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); 478 479 static inline void 480 pkt_info_zero(if_pkt_info_t pi) 481 { 482 if_pkt_info_pad_t pi_pad; 483 484 pi_pad = (if_pkt_info_pad_t)pi; 485 pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; 486 pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; 487 #ifndef __LP64__ 488 pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; 489 pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; 490 #endif 491 } 492 493 static device_method_t iflib_pseudo_methods[] = { 494 DEVMETHOD(device_attach, noop_attach), 495 DEVMETHOD(device_detach, iflib_pseudo_detach), 496 DEVMETHOD_END 497 }; 498 499 driver_t iflib_pseudodriver = { 500 "iflib_pseudo", iflib_pseudo_methods, sizeof(struct iflib_ctx), 501 }; 502 503 static inline void 504 rxd_info_zero(if_rxd_info_t ri) 505 { 506 if_rxd_info_pad_t ri_pad; 507 int i; 508 509 ri_pad = (if_rxd_info_pad_t)ri; 510 for (i = 0; i < RXD_LOOP_BOUND; i += 4) { 511 ri_pad->rxd_val[i] = 0; 512 ri_pad->rxd_val[i+1] = 0; 513 ri_pad->rxd_val[i+2] = 0; 514 ri_pad->rxd_val[i+3] = 0; 515 } 516 #ifdef __LP64__ 517 ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; 518 #endif 519 } 520 521 /* 522 * Only allow a single packet to take up most 1/nth of the tx ring 523 */ 524 #define MAX_SINGLE_PACKET_FRACTION 12 525 #define IF_BAD_DMA (bus_addr_t)-1 526 527 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) 528 529 #define CTX_LOCK_INIT(_sc) sx_init(&(_sc)->ifc_ctx_sx, "iflib ctx lock") 530 #define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_ctx_sx) 531 #define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_ctx_sx) 532 #define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_ctx_sx) 533 534 #define STATE_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_state_mtx, _name, "iflib state lock", MTX_DEF) 535 #define STATE_LOCK(ctx) mtx_lock(&(ctx)->ifc_state_mtx) 536 #define STATE_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_state_mtx) 537 #define STATE_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_state_mtx) 538 539 #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) 540 #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) 541 542 void 543 iflib_set_detach(if_ctx_t ctx) 544 { 545 STATE_LOCK(ctx); 546 ctx->ifc_flags |= IFC_IN_DETACH; 547 STATE_UNLOCK(ctx); 548 } 549 550 /* Our boot-time initialization hook */ 551 static int iflib_module_event_handler(module_t, int, void *); 552 553 static moduledata_t iflib_moduledata = { 554 "iflib", 555 iflib_module_event_handler, 556 NULL 557 }; 558 559 DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); 560 MODULE_VERSION(iflib, 1); 561 562 MODULE_DEPEND(iflib, pci, 1, 1, 1); 563 MODULE_DEPEND(iflib, ether, 1, 1, 1); 564 565 TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); 566 TASKQGROUP_DEFINE(if_config_tqg, 1, 1); 567 568 #ifndef IFLIB_DEBUG_COUNTERS 569 #ifdef INVARIANTS 570 #define IFLIB_DEBUG_COUNTERS 1 571 #else 572 #define IFLIB_DEBUG_COUNTERS 0 573 #endif /* !INVARIANTS */ 574 #endif 575 576 static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 577 "iflib driver parameters"); 578 579 /* 580 * XXX need to ensure that this can't accidentally cause the head to be moved backwards 581 */ 582 static int iflib_min_tx_latency = 0; 583 SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, 584 &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); 585 static int iflib_no_tx_batch = 0; 586 SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, 587 &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); 588 static int iflib_timer_default = 1000; 589 SYSCTL_INT(_net_iflib, OID_AUTO, timer_default, CTLFLAG_RW, 590 &iflib_timer_default, 0, "number of ticks between iflib_timer calls"); 591 592 593 #if IFLIB_DEBUG_COUNTERS 594 595 static int iflib_tx_seen; 596 static int iflib_tx_sent; 597 static int iflib_tx_encap; 598 static int iflib_rx_allocs; 599 static int iflib_fl_refills; 600 static int iflib_fl_refills_large; 601 static int iflib_tx_frees; 602 603 SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, 604 &iflib_tx_seen, 0, "# TX mbufs seen"); 605 SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, 606 &iflib_tx_sent, 0, "# TX mbufs sent"); 607 SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, 608 &iflib_tx_encap, 0, "# TX mbufs encapped"); 609 SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, 610 &iflib_tx_frees, 0, "# TX frees"); 611 SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, 612 &iflib_rx_allocs, 0, "# RX allocations"); 613 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, 614 &iflib_fl_refills, 0, "# refills"); 615 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, 616 &iflib_fl_refills_large, 0, "# large refills"); 617 618 static int iflib_txq_drain_flushing; 619 static int iflib_txq_drain_oactive; 620 static int iflib_txq_drain_notready; 621 622 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, 623 &iflib_txq_drain_flushing, 0, "# drain flushes"); 624 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, 625 &iflib_txq_drain_oactive, 0, "# drain oactives"); 626 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, 627 &iflib_txq_drain_notready, 0, "# drain notready"); 628 629 static int iflib_encap_load_mbuf_fail; 630 static int iflib_encap_pad_mbuf_fail; 631 static int iflib_encap_txq_avail_fail; 632 static int iflib_encap_txd_encap_fail; 633 634 SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, 635 &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); 636 SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD, 637 &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures"); 638 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, 639 &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); 640 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, 641 &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); 642 643 static int iflib_task_fn_rxs; 644 static int iflib_rx_intr_enables; 645 static int iflib_fast_intrs; 646 static int iflib_rx_unavail; 647 static int iflib_rx_ctx_inactive; 648 static int iflib_rx_if_input; 649 static int iflib_rxd_flush; 650 651 static int iflib_verbose_debug; 652 653 SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, 654 &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); 655 SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, 656 &iflib_rx_intr_enables, 0, "# RX intr enables"); 657 SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, 658 &iflib_fast_intrs, 0, "# fast_intr calls"); 659 SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, 660 &iflib_rx_unavail, 0, "# times rxeof called with no available data"); 661 SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, 662 &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); 663 SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, 664 &iflib_rx_if_input, 0, "# times rxeof called if_input"); 665 SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, 666 &iflib_rxd_flush, 0, "# times rxd_flush called"); 667 SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, 668 &iflib_verbose_debug, 0, "enable verbose debugging"); 669 670 #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) 671 static void 672 iflib_debug_reset(void) 673 { 674 iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = 675 iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = 676 iflib_txq_drain_flushing = iflib_txq_drain_oactive = 677 iflib_txq_drain_notready = 678 iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail = 679 iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail = 680 iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = 681 iflib_rx_unavail = 682 iflib_rx_ctx_inactive = iflib_rx_if_input = 683 iflib_rxd_flush = 0; 684 } 685 686 #else 687 #define DBG_COUNTER_INC(name) 688 static void iflib_debug_reset(void) {} 689 #endif 690 691 #define IFLIB_DEBUG 0 692 693 static void iflib_tx_structures_free(if_ctx_t ctx); 694 static void iflib_rx_structures_free(if_ctx_t ctx); 695 static int iflib_queues_alloc(if_ctx_t ctx); 696 static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); 697 static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); 698 static int iflib_qset_structures_setup(if_ctx_t ctx); 699 static int iflib_msix_init(if_ctx_t ctx); 700 static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, const char *str); 701 static void iflib_txq_check_drain(iflib_txq_t txq, int budget); 702 static uint32_t iflib_txq_can_drain(struct ifmp_ring *); 703 #ifdef ALTQ 704 static void iflib_altq_if_start(if_t ifp); 705 static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m); 706 #endif 707 static int iflib_register(if_ctx_t); 708 static void iflib_deregister(if_ctx_t); 709 static void iflib_unregister_vlan_handlers(if_ctx_t ctx); 710 static uint16_t iflib_get_mbuf_size_for(unsigned int size); 711 static void iflib_init_locked(if_ctx_t ctx); 712 static void iflib_add_device_sysctl_pre(if_ctx_t ctx); 713 static void iflib_add_device_sysctl_post(if_ctx_t ctx); 714 static void iflib_ifmp_purge(iflib_txq_t txq); 715 static void _iflib_pre_assert(if_softc_ctx_t scctx); 716 static void iflib_if_init_locked(if_ctx_t ctx); 717 static void iflib_free_intr_mem(if_ctx_t ctx); 718 #ifndef __NO_STRICT_ALIGNMENT 719 static struct mbuf * iflib_fixup_rx(struct mbuf *m); 720 #endif 721 722 static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets = 723 SLIST_HEAD_INITIALIZER(cpu_offsets); 724 struct cpu_offset { 725 SLIST_ENTRY(cpu_offset) entries; 726 cpuset_t set; 727 unsigned int refcount; 728 uint16_t offset; 729 }; 730 static struct mtx cpu_offset_mtx; 731 MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock", 732 MTX_DEF); 733 734 DEBUGNET_DEFINE(iflib); 735 736 static int 737 iflib_num_rx_descs(if_ctx_t ctx) 738 { 739 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 740 if_shared_ctx_t sctx = ctx->ifc_sctx; 741 uint16_t first_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; 742 743 return scctx->isc_nrxd[first_rxq]; 744 } 745 746 static int 747 iflib_num_tx_descs(if_ctx_t ctx) 748 { 749 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 750 if_shared_ctx_t sctx = ctx->ifc_sctx; 751 uint16_t first_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; 752 753 return scctx->isc_ntxd[first_txq]; 754 } 755 756 #ifdef DEV_NETMAP 757 #include <sys/selinfo.h> 758 #include <net/netmap.h> 759 #include <dev/netmap/netmap_kern.h> 760 761 MODULE_DEPEND(iflib, netmap, 1, 1, 1); 762 763 static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, bool init); 764 static void iflib_netmap_timer(void *arg); 765 766 /* 767 * device-specific sysctl variables: 768 * 769 * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. 770 * During regular operations the CRC is stripped, but on some 771 * hardware reception of frames not multiple of 64 is slower, 772 * so using crcstrip=0 helps in benchmarks. 773 * 774 * iflib_rx_miss, iflib_rx_miss_bufs: 775 * count packets that might be missed due to lost interrupts. 776 */ 777 SYSCTL_DECL(_dev_netmap); 778 /* 779 * The xl driver by default strips CRCs and we do not override it. 780 */ 781 782 int iflib_crcstrip = 1; 783 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, 784 CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on RX frames"); 785 786 int iflib_rx_miss, iflib_rx_miss_bufs; 787 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, 788 CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed RX intr"); 789 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, 790 CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed RX intr bufs"); 791 792 /* 793 * Register/unregister. We are already under netmap lock. 794 * Only called on the first register or the last unregister. 795 */ 796 static int 797 iflib_netmap_register(struct netmap_adapter *na, int onoff) 798 { 799 if_t ifp = na->ifp; 800 if_ctx_t ctx = ifp->if_softc; 801 int status; 802 803 CTX_LOCK(ctx); 804 if (!CTX_IS_VF(ctx)) 805 IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); 806 807 iflib_stop(ctx); 808 809 /* 810 * Enable (or disable) netmap flags, and intercept (or restore) 811 * ifp->if_transmit. This is done once the device has been stopped 812 * to prevent race conditions. Also, this must be done after 813 * calling netmap_disable_all_rings() and before calling 814 * netmap_enable_all_rings(), so that these two functions see the 815 * updated state of the NAF_NETMAP_ON bit. 816 */ 817 if (onoff) { 818 nm_set_native_flags(na); 819 } else { 820 nm_clear_native_flags(na); 821 } 822 823 iflib_init_locked(ctx); 824 IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? 825 status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; 826 if (status) 827 nm_clear_native_flags(na); 828 CTX_UNLOCK(ctx); 829 return (status); 830 } 831 832 static int 833 netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, bool init) 834 { 835 struct netmap_adapter *na = kring->na; 836 u_int const lim = kring->nkr_num_slots - 1; 837 struct netmap_ring *ring = kring->ring; 838 bus_dmamap_t *map; 839 struct if_rxd_update iru; 840 if_ctx_t ctx = rxq->ifr_ctx; 841 iflib_fl_t fl = &rxq->ifr_fl[0]; 842 u_int nic_i_first, nic_i; 843 u_int nm_i; 844 int i, n; 845 #if IFLIB_DEBUG_COUNTERS 846 int rf_count = 0; 847 #endif 848 849 /* 850 * This function is used both at initialization and in rxsync. 851 * At initialization we need to prepare (with isc_rxd_refill()) 852 * all the netmap buffers currently owned by the kernel, in 853 * such a way to keep fl->ifl_pidx and kring->nr_hwcur in sync 854 * (except for kring->nkr_hwofs). These may be less than 855 * kring->nkr_num_slots if netmap_reset() was called while 856 * an application using the kring that still owned some 857 * buffers. 858 * At rxsync time, both indexes point to the next buffer to be 859 * refilled. 860 * In any case we publish (with isc_rxd_flush()) up to 861 * (fl->ifl_pidx - 1) % N (included), to avoid the NIC tail/prod 862 * pointer to overrun the head/cons pointer, although this is 863 * not necessary for some NICs (e.g. vmx). 864 */ 865 if (__predict_false(init)) { 866 n = kring->nkr_num_slots - nm_kr_rxspace(kring); 867 } else { 868 n = kring->rhead - kring->nr_hwcur; 869 if (n == 0) 870 return (0); /* Nothing to do. */ 871 if (n < 0) 872 n += kring->nkr_num_slots; 873 } 874 875 iru_init(&iru, rxq, 0 /* flid */); 876 map = fl->ifl_sds.ifsd_map; 877 nic_i = fl->ifl_pidx; 878 nm_i = netmap_idx_n2k(kring, nic_i); 879 if (__predict_false(init)) { 880 /* 881 * On init/reset, nic_i must be 0, and we must 882 * start to refill from hwtail (see netmap_reset()). 883 */ 884 MPASS(nic_i == 0); 885 MPASS(nm_i == kring->nr_hwtail); 886 } else 887 MPASS(nm_i == kring->nr_hwcur); 888 DBG_COUNTER_INC(fl_refills); 889 while (n > 0) { 890 #if IFLIB_DEBUG_COUNTERS 891 if (++rf_count == 9) 892 DBG_COUNTER_INC(fl_refills_large); 893 #endif 894 nic_i_first = nic_i; 895 for (i = 0; n > 0 && i < IFLIB_MAX_RX_REFRESH; n--, i++) { 896 struct netmap_slot *slot = &ring->slot[nm_i]; 897 void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[i]); 898 899 MPASS(i < IFLIB_MAX_RX_REFRESH); 900 901 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ 902 return netmap_ring_reinit(kring); 903 904 fl->ifl_rxd_idxs[i] = nic_i; 905 906 if (__predict_false(init)) { 907 netmap_load_map(na, fl->ifl_buf_tag, 908 map[nic_i], addr); 909 } else if (slot->flags & NS_BUF_CHANGED) { 910 /* buffer has changed, reload map */ 911 netmap_reload_map(na, fl->ifl_buf_tag, 912 map[nic_i], addr); 913 } 914 bus_dmamap_sync(fl->ifl_buf_tag, map[nic_i], 915 BUS_DMASYNC_PREREAD); 916 slot->flags &= ~NS_BUF_CHANGED; 917 918 nm_i = nm_next(nm_i, lim); 919 nic_i = nm_next(nic_i, lim); 920 } 921 922 iru.iru_pidx = nic_i_first; 923 iru.iru_count = i; 924 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 925 } 926 fl->ifl_pidx = nic_i; 927 /* 928 * At the end of the loop we must have refilled everything 929 * we could possibly refill. 930 */ 931 MPASS(nm_i == kring->rhead); 932 kring->nr_hwcur = nm_i; 933 934 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 935 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 936 ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, 937 nm_prev(nic_i, lim)); 938 DBG_COUNTER_INC(rxd_flush); 939 940 return (0); 941 } 942 943 #define NETMAP_TX_TIMER_US 90 944 945 /* 946 * Reconcile kernel and user view of the transmit ring. 947 * 948 * All information is in the kring. 949 * Userspace wants to send packets up to the one before kring->rhead, 950 * kernel knows kring->nr_hwcur is the first unsent packet. 951 * 952 * Here we push packets out (as many as possible), and possibly 953 * reclaim buffers from previously completed transmission. 954 * 955 * The caller (netmap) guarantees that there is only one instance 956 * running at any time. Any interference with other driver 957 * methods should be handled by the individual drivers. 958 */ 959 static int 960 iflib_netmap_txsync(struct netmap_kring *kring, int flags) 961 { 962 struct netmap_adapter *na = kring->na; 963 if_t ifp = na->ifp; 964 struct netmap_ring *ring = kring->ring; 965 u_int nm_i; /* index into the netmap kring */ 966 u_int nic_i; /* index into the NIC ring */ 967 u_int n; 968 u_int const lim = kring->nkr_num_slots - 1; 969 u_int const head = kring->rhead; 970 struct if_pkt_info pi; 971 972 /* 973 * interrupts on every tx packet are expensive so request 974 * them every half ring, or where NS_REPORT is set 975 */ 976 u_int report_frequency = kring->nkr_num_slots >> 1; 977 /* device-specific */ 978 if_ctx_t ctx = ifp->if_softc; 979 iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; 980 981 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 982 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 983 984 /* 985 * First part: process new packets to send. 986 * nm_i is the current index in the netmap kring, 987 * nic_i is the corresponding index in the NIC ring. 988 * 989 * If we have packets to send (nm_i != head) 990 * iterate over the netmap ring, fetch length and update 991 * the corresponding slot in the NIC ring. Some drivers also 992 * need to update the buffer's physical address in the NIC slot 993 * even NS_BUF_CHANGED is not set (PNMB computes the addresses). 994 * 995 * The netmap_reload_map() calls is especially expensive, 996 * even when (as in this case) the tag is 0, so do only 997 * when the buffer has actually changed. 998 * 999 * If possible do not set the report/intr bit on all slots, 1000 * but only a few times per ring or when NS_REPORT is set. 1001 * 1002 * Finally, on 10G and faster drivers, it might be useful 1003 * to prefetch the next slot and txr entry. 1004 */ 1005 1006 nm_i = kring->nr_hwcur; 1007 if (nm_i != head) { /* we have new packets to send */ 1008 uint32_t pkt_len = 0, seg_idx = 0; 1009 int nic_i_start = -1, flags = 0; 1010 pkt_info_zero(&pi); 1011 pi.ipi_segs = txq->ift_segs; 1012 pi.ipi_qsidx = kring->ring_id; 1013 nic_i = netmap_idx_k2n(kring, nm_i); 1014 1015 __builtin_prefetch(&ring->slot[nm_i]); 1016 __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); 1017 __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); 1018 1019 for (n = 0; nm_i != head; n++) { 1020 struct netmap_slot *slot = &ring->slot[nm_i]; 1021 u_int len = slot->len; 1022 uint64_t paddr; 1023 void *addr = PNMB(na, slot, &paddr); 1024 1025 flags |= (slot->flags & NS_REPORT || 1026 nic_i == 0 || nic_i == report_frequency) ? 1027 IPI_TX_INTR : 0; 1028 1029 /* 1030 * If this is the first packet fragment, save the 1031 * index of the first NIC slot for later. 1032 */ 1033 if (nic_i_start < 0) 1034 nic_i_start = nic_i; 1035 1036 pi.ipi_segs[seg_idx].ds_addr = paddr; 1037 pi.ipi_segs[seg_idx].ds_len = len; 1038 if (len) { 1039 pkt_len += len; 1040 seg_idx++; 1041 } 1042 1043 if (!(slot->flags & NS_MOREFRAG)) { 1044 pi.ipi_len = pkt_len; 1045 pi.ipi_nsegs = seg_idx; 1046 pi.ipi_pidx = nic_i_start; 1047 pi.ipi_ndescs = 0; 1048 pi.ipi_flags = flags; 1049 1050 /* Prepare the NIC TX ring. */ 1051 ctx->isc_txd_encap(ctx->ifc_softc, &pi); 1052 DBG_COUNTER_INC(tx_encap); 1053 1054 /* Reinit per-packet info for the next one. */ 1055 flags = seg_idx = pkt_len = 0; 1056 nic_i_start = -1; 1057 } 1058 1059 /* prefetch for next round */ 1060 __builtin_prefetch(&ring->slot[nm_i + 1]); 1061 __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); 1062 __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); 1063 1064 NM_CHECK_ADDR_LEN(na, addr, len); 1065 1066 if (slot->flags & NS_BUF_CHANGED) { 1067 /* buffer has changed, reload map */ 1068 netmap_reload_map(na, txq->ift_buf_tag, 1069 txq->ift_sds.ifsd_map[nic_i], addr); 1070 } 1071 /* make sure changes to the buffer are synced */ 1072 bus_dmamap_sync(txq->ift_buf_tag, 1073 txq->ift_sds.ifsd_map[nic_i], 1074 BUS_DMASYNC_PREWRITE); 1075 1076 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED | NS_MOREFRAG); 1077 nm_i = nm_next(nm_i, lim); 1078 nic_i = nm_next(nic_i, lim); 1079 } 1080 kring->nr_hwcur = nm_i; 1081 1082 /* synchronize the NIC ring */ 1083 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 1084 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1085 1086 /* (re)start the tx unit up to slot nic_i (excluded) */ 1087 ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); 1088 } 1089 1090 /* 1091 * Second part: reclaim buffers for completed transmissions. 1092 * 1093 * If there are unclaimed buffers, attempt to reclaim them. 1094 * If we don't manage to reclaim them all, and TX IRQs are not in use, 1095 * trigger a per-tx-queue timer to try again later. 1096 */ 1097 if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { 1098 if (iflib_tx_credits_update(ctx, txq)) { 1099 /* some tx completed, increment avail */ 1100 nic_i = txq->ift_cidx_processed; 1101 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); 1102 } 1103 } 1104 1105 if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) 1106 if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { 1107 callout_reset_sbt_on(&txq->ift_netmap_timer, 1108 NETMAP_TX_TIMER_US * SBT_1US, SBT_1US, 1109 iflib_netmap_timer, txq, 1110 txq->ift_netmap_timer.c_cpu, 0); 1111 } 1112 return (0); 1113 } 1114 1115 /* 1116 * Reconcile kernel and user view of the receive ring. 1117 * Same as for the txsync, this routine must be efficient. 1118 * The caller guarantees a single invocations, but races against 1119 * the rest of the driver should be handled here. 1120 * 1121 * On call, kring->rhead is the first packet that userspace wants 1122 * to keep, and kring->rcur is the wakeup point. 1123 * The kernel has previously reported packets up to kring->rtail. 1124 * 1125 * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective 1126 * of whether or not we received an interrupt. 1127 */ 1128 static int 1129 iflib_netmap_rxsync(struct netmap_kring *kring, int flags) 1130 { 1131 struct netmap_adapter *na = kring->na; 1132 struct netmap_ring *ring = kring->ring; 1133 if_t ifp = na->ifp; 1134 uint32_t nm_i; /* index into the netmap ring */ 1135 uint32_t nic_i; /* index into the NIC ring */ 1136 u_int n; 1137 u_int const lim = kring->nkr_num_slots - 1; 1138 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 1139 int i = 0; 1140 1141 if_ctx_t ctx = ifp->if_softc; 1142 if_shared_ctx_t sctx = ctx->ifc_sctx; 1143 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1144 iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; 1145 iflib_fl_t fl = &rxq->ifr_fl[0]; 1146 struct if_rxd_info ri; 1147 qidx_t *cidxp; 1148 1149 /* 1150 * netmap only uses free list 0, to avoid out of order consumption 1151 * of receive buffers 1152 */ 1153 1154 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 1155 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1156 1157 /* 1158 * First part: import newly received packets. 1159 * 1160 * nm_i is the index of the next free slot in the netmap ring, 1161 * nic_i is the index of the next received packet in the NIC ring 1162 * (or in the free list 0 if IFLIB_HAS_RXCQ is set), and they may 1163 * differ in case if_init() has been called while 1164 * in netmap mode. For the receive ring we have 1165 * 1166 * nic_i = fl->ifl_cidx; 1167 * nm_i = kring->nr_hwtail (previous) 1168 * and 1169 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 1170 * 1171 * fl->ifl_cidx is set to 0 on a ring reinit 1172 */ 1173 if (netmap_no_pendintr || force_update) { 1174 uint32_t hwtail_lim = nm_prev(kring->nr_hwcur, lim); 1175 bool have_rxcq = sctx->isc_flags & IFLIB_HAS_RXCQ; 1176 int crclen = iflib_crcstrip ? 0 : 4; 1177 int error, avail; 1178 1179 /* 1180 * For the free list consumer index, we use the same 1181 * logic as in iflib_rxeof(). 1182 */ 1183 if (have_rxcq) 1184 cidxp = &rxq->ifr_cq_cidx; 1185 else 1186 cidxp = &fl->ifl_cidx; 1187 avail = ctx->isc_rxd_available(ctx->ifc_softc, 1188 rxq->ifr_id, *cidxp, USHRT_MAX); 1189 1190 nic_i = fl->ifl_cidx; 1191 nm_i = netmap_idx_n2k(kring, nic_i); 1192 MPASS(nm_i == kring->nr_hwtail); 1193 for (n = 0; avail > 0 && nm_i != hwtail_lim; n++, avail--) { 1194 rxd_info_zero(&ri); 1195 ri.iri_frags = rxq->ifr_frags; 1196 ri.iri_qsidx = kring->ring_id; 1197 ri.iri_ifp = ctx->ifc_ifp; 1198 ri.iri_cidx = *cidxp; 1199 1200 error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); 1201 for (i = 0; i < ri.iri_nfrags; i++) { 1202 if (error) { 1203 ring->slot[nm_i].len = 0; 1204 ring->slot[nm_i].flags = 0; 1205 } else { 1206 ring->slot[nm_i].len = ri.iri_frags[i].irf_len; 1207 if (i == (ri.iri_nfrags - 1)) { 1208 ring->slot[nm_i].len -= crclen; 1209 ring->slot[nm_i].flags = 0; 1210 } else 1211 ring->slot[nm_i].flags = NS_MOREFRAG; 1212 } 1213 1214 bus_dmamap_sync(fl->ifl_buf_tag, 1215 fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); 1216 nm_i = nm_next(nm_i, lim); 1217 fl->ifl_cidx = nic_i = nm_next(nic_i, lim); 1218 } 1219 1220 if (have_rxcq) { 1221 *cidxp = ri.iri_cidx; 1222 while (*cidxp >= scctx->isc_nrxd[0]) 1223 *cidxp -= scctx->isc_nrxd[0]; 1224 } 1225 1226 } 1227 if (n) { /* update the state variables */ 1228 if (netmap_no_pendintr && !force_update) { 1229 /* diagnostics */ 1230 iflib_rx_miss ++; 1231 iflib_rx_miss_bufs += n; 1232 } 1233 kring->nr_hwtail = nm_i; 1234 } 1235 kring->nr_kflags &= ~NKR_PENDINTR; 1236 } 1237 /* 1238 * Second part: skip past packets that userspace has released. 1239 * (kring->nr_hwcur to head excluded), 1240 * and make the buffers available for reception. 1241 * As usual nm_i is the index in the netmap ring, 1242 * nic_i is the index in the NIC ring, and 1243 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 1244 */ 1245 netmap_fl_refill(rxq, kring, false); 1246 1247 return (0); 1248 } 1249 1250 static void 1251 iflib_netmap_intr(struct netmap_adapter *na, int onoff) 1252 { 1253 if_ctx_t ctx = na->ifp->if_softc; 1254 1255 CTX_LOCK(ctx); 1256 if (onoff) { 1257 IFDI_INTR_ENABLE(ctx); 1258 } else { 1259 IFDI_INTR_DISABLE(ctx); 1260 } 1261 CTX_UNLOCK(ctx); 1262 } 1263 1264 static int 1265 iflib_netmap_attach(if_ctx_t ctx) 1266 { 1267 struct netmap_adapter na; 1268 1269 bzero(&na, sizeof(na)); 1270 1271 na.ifp = ctx->ifc_ifp; 1272 na.na_flags = NAF_BDG_MAYSLEEP | NAF_MOREFRAG; 1273 MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); 1274 MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); 1275 1276 na.num_tx_desc = iflib_num_tx_descs(ctx); 1277 na.num_rx_desc = iflib_num_rx_descs(ctx); 1278 na.nm_txsync = iflib_netmap_txsync; 1279 na.nm_rxsync = iflib_netmap_rxsync; 1280 na.nm_register = iflib_netmap_register; 1281 na.nm_intr = iflib_netmap_intr; 1282 na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; 1283 na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; 1284 return (netmap_attach(&na)); 1285 } 1286 1287 static int 1288 iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) 1289 { 1290 struct netmap_adapter *na = NA(ctx->ifc_ifp); 1291 struct netmap_slot *slot; 1292 1293 slot = netmap_reset(na, NR_TX, txq->ift_id, 0); 1294 if (slot == NULL) 1295 return (0); 1296 for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { 1297 /* 1298 * In netmap mode, set the map for the packet buffer. 1299 * NOTE: Some drivers (not this one) also need to set 1300 * the physical buffer address in the NIC ring. 1301 * netmap_idx_n2k() maps a nic index, i, into the corresponding 1302 * netmap slot index, si 1303 */ 1304 int si = netmap_idx_n2k(na->tx_rings[txq->ift_id], i); 1305 netmap_load_map(na, txq->ift_buf_tag, txq->ift_sds.ifsd_map[i], 1306 NMB(na, slot + si)); 1307 } 1308 return (1); 1309 } 1310 1311 static int 1312 iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) 1313 { 1314 struct netmap_adapter *na = NA(ctx->ifc_ifp); 1315 struct netmap_kring *kring; 1316 struct netmap_slot *slot; 1317 1318 slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); 1319 if (slot == NULL) 1320 return (0); 1321 kring = na->rx_rings[rxq->ifr_id]; 1322 netmap_fl_refill(rxq, kring, true); 1323 return (1); 1324 } 1325 1326 static void 1327 iflib_netmap_timer(void *arg) 1328 { 1329 iflib_txq_t txq = arg; 1330 if_ctx_t ctx = txq->ift_ctx; 1331 1332 /* 1333 * Wake up the netmap application, to give it a chance to 1334 * call txsync and reclaim more completed TX buffers. 1335 */ 1336 netmap_tx_irq(ctx->ifc_ifp, txq->ift_id); 1337 } 1338 1339 #define iflib_netmap_detach(ifp) netmap_detach(ifp) 1340 1341 #else 1342 #define iflib_netmap_txq_init(ctx, txq) (0) 1343 #define iflib_netmap_rxq_init(ctx, rxq) (0) 1344 #define iflib_netmap_detach(ifp) 1345 #define netmap_enable_all_rings(ifp) 1346 #define netmap_disable_all_rings(ifp) 1347 1348 #define iflib_netmap_attach(ctx) (0) 1349 #define netmap_rx_irq(ifp, qid, budget) (0) 1350 #endif 1351 1352 #if defined(__i386__) || defined(__amd64__) 1353 static __inline void 1354 prefetch(void *x) 1355 { 1356 __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); 1357 } 1358 static __inline void 1359 prefetch2cachelines(void *x) 1360 { 1361 __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); 1362 #if (CACHE_LINE_SIZE < 128) 1363 __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); 1364 #endif 1365 } 1366 #else 1367 #define prefetch(x) 1368 #define prefetch2cachelines(x) 1369 #endif 1370 1371 static void 1372 iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) 1373 { 1374 iflib_fl_t fl; 1375 1376 fl = &rxq->ifr_fl[flid]; 1377 iru->iru_paddrs = fl->ifl_bus_addrs; 1378 iru->iru_idxs = fl->ifl_rxd_idxs; 1379 iru->iru_qsidx = rxq->ifr_id; 1380 iru->iru_buf_size = fl->ifl_buf_size; 1381 iru->iru_flidx = fl->ifl_id; 1382 } 1383 1384 static void 1385 _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) 1386 { 1387 if (err) 1388 return; 1389 *(bus_addr_t *) arg = segs[0].ds_addr; 1390 } 1391 1392 int 1393 iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags) 1394 { 1395 int err; 1396 device_t dev = ctx->ifc_dev; 1397 1398 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1399 align, 0, /* alignment, bounds */ 1400 BUS_SPACE_MAXADDR, /* lowaddr */ 1401 BUS_SPACE_MAXADDR, /* highaddr */ 1402 NULL, NULL, /* filter, filterarg */ 1403 size, /* maxsize */ 1404 1, /* nsegments */ 1405 size, /* maxsegsize */ 1406 BUS_DMA_ALLOCNOW, /* flags */ 1407 NULL, /* lockfunc */ 1408 NULL, /* lockarg */ 1409 &dma->idi_tag); 1410 if (err) { 1411 device_printf(dev, 1412 "%s: bus_dma_tag_create failed: %d\n", 1413 __func__, err); 1414 goto fail_0; 1415 } 1416 1417 err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, 1418 BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); 1419 if (err) { 1420 device_printf(dev, 1421 "%s: bus_dmamem_alloc(%ju) failed: %d\n", 1422 __func__, (uintmax_t)size, err); 1423 goto fail_1; 1424 } 1425 1426 dma->idi_paddr = IF_BAD_DMA; 1427 err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, 1428 size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); 1429 if (err || dma->idi_paddr == IF_BAD_DMA) { 1430 device_printf(dev, 1431 "%s: bus_dmamap_load failed: %d\n", 1432 __func__, err); 1433 goto fail_2; 1434 } 1435 1436 dma->idi_size = size; 1437 return (0); 1438 1439 fail_2: 1440 bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); 1441 fail_1: 1442 bus_dma_tag_destroy(dma->idi_tag); 1443 fail_0: 1444 dma->idi_tag = NULL; 1445 1446 return (err); 1447 } 1448 1449 int 1450 iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) 1451 { 1452 if_shared_ctx_t sctx = ctx->ifc_sctx; 1453 1454 KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); 1455 1456 return (iflib_dma_alloc_align(ctx, size, sctx->isc_q_align, dma, mapflags)); 1457 } 1458 1459 int 1460 iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) 1461 { 1462 int i, err; 1463 iflib_dma_info_t *dmaiter; 1464 1465 dmaiter = dmalist; 1466 for (i = 0; i < count; i++, dmaiter++) { 1467 if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) 1468 break; 1469 } 1470 if (err) 1471 iflib_dma_free_multi(dmalist, i); 1472 return (err); 1473 } 1474 1475 void 1476 iflib_dma_free(iflib_dma_info_t dma) 1477 { 1478 if (dma->idi_tag == NULL) 1479 return; 1480 if (dma->idi_paddr != IF_BAD_DMA) { 1481 bus_dmamap_sync(dma->idi_tag, dma->idi_map, 1482 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1483 bus_dmamap_unload(dma->idi_tag, dma->idi_map); 1484 dma->idi_paddr = IF_BAD_DMA; 1485 } 1486 if (dma->idi_vaddr != NULL) { 1487 bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); 1488 dma->idi_vaddr = NULL; 1489 } 1490 bus_dma_tag_destroy(dma->idi_tag); 1491 dma->idi_tag = NULL; 1492 } 1493 1494 void 1495 iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) 1496 { 1497 int i; 1498 iflib_dma_info_t *dmaiter = dmalist; 1499 1500 for (i = 0; i < count; i++, dmaiter++) 1501 iflib_dma_free(*dmaiter); 1502 } 1503 1504 static int 1505 iflib_fast_intr(void *arg) 1506 { 1507 iflib_filter_info_t info = arg; 1508 struct grouptask *gtask = info->ifi_task; 1509 int result; 1510 1511 DBG_COUNTER_INC(fast_intrs); 1512 if (info->ifi_filter != NULL) { 1513 result = info->ifi_filter(info->ifi_filter_arg); 1514 if ((result & FILTER_SCHEDULE_THREAD) == 0) 1515 return (result); 1516 } 1517 1518 GROUPTASK_ENQUEUE(gtask); 1519 return (FILTER_HANDLED); 1520 } 1521 1522 static int 1523 iflib_fast_intr_rxtx(void *arg) 1524 { 1525 iflib_filter_info_t info = arg; 1526 struct grouptask *gtask = info->ifi_task; 1527 if_ctx_t ctx; 1528 iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; 1529 iflib_txq_t txq; 1530 void *sc; 1531 int i, cidx, result; 1532 qidx_t txqid; 1533 bool intr_enable, intr_legacy; 1534 1535 DBG_COUNTER_INC(fast_intrs); 1536 if (info->ifi_filter != NULL) { 1537 result = info->ifi_filter(info->ifi_filter_arg); 1538 if ((result & FILTER_SCHEDULE_THREAD) == 0) 1539 return (result); 1540 } 1541 1542 ctx = rxq->ifr_ctx; 1543 sc = ctx->ifc_softc; 1544 intr_enable = false; 1545 intr_legacy = !!(ctx->ifc_flags & IFC_LEGACY); 1546 MPASS(rxq->ifr_ntxqirq); 1547 for (i = 0; i < rxq->ifr_ntxqirq; i++) { 1548 txqid = rxq->ifr_txqid[i]; 1549 txq = &ctx->ifc_txqs[txqid]; 1550 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 1551 BUS_DMASYNC_POSTREAD); 1552 if (!ctx->isc_txd_credits_update(sc, txqid, false)) { 1553 if (intr_legacy) 1554 intr_enable = true; 1555 else 1556 IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); 1557 continue; 1558 } 1559 GROUPTASK_ENQUEUE(&txq->ift_task); 1560 } 1561 if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) 1562 cidx = rxq->ifr_cq_cidx; 1563 else 1564 cidx = rxq->ifr_fl[0].ifl_cidx; 1565 if (iflib_rxd_avail(ctx, rxq, cidx, 1)) 1566 GROUPTASK_ENQUEUE(gtask); 1567 else { 1568 if (intr_legacy) 1569 intr_enable = true; 1570 else 1571 IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); 1572 DBG_COUNTER_INC(rx_intr_enables); 1573 } 1574 if (intr_enable) 1575 IFDI_INTR_ENABLE(ctx); 1576 return (FILTER_HANDLED); 1577 } 1578 1579 static int 1580 iflib_fast_intr_ctx(void *arg) 1581 { 1582 iflib_filter_info_t info = arg; 1583 struct grouptask *gtask = info->ifi_task; 1584 int result; 1585 1586 DBG_COUNTER_INC(fast_intrs); 1587 if (info->ifi_filter != NULL) { 1588 result = info->ifi_filter(info->ifi_filter_arg); 1589 if ((result & FILTER_SCHEDULE_THREAD) == 0) 1590 return (result); 1591 } 1592 1593 GROUPTASK_ENQUEUE(gtask); 1594 return (FILTER_HANDLED); 1595 } 1596 1597 static int 1598 _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, 1599 driver_filter_t filter, driver_intr_t handler, void *arg, 1600 const char *name) 1601 { 1602 struct resource *res; 1603 void *tag = NULL; 1604 device_t dev = ctx->ifc_dev; 1605 int flags, i, rc; 1606 1607 flags = RF_ACTIVE; 1608 if (ctx->ifc_flags & IFC_LEGACY) 1609 flags |= RF_SHAREABLE; 1610 MPASS(rid < 512); 1611 i = rid; 1612 res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, flags); 1613 if (res == NULL) { 1614 device_printf(dev, 1615 "failed to allocate IRQ for rid %d, name %s.\n", rid, name); 1616 return (ENOMEM); 1617 } 1618 irq->ii_res = res; 1619 KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); 1620 rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, 1621 filter, handler, arg, &tag); 1622 if (rc != 0) { 1623 device_printf(dev, 1624 "failed to setup interrupt for rid %d, name %s: %d\n", 1625 rid, name ? name : "unknown", rc); 1626 return (rc); 1627 } else if (name) 1628 bus_describe_intr(dev, res, tag, "%s", name); 1629 1630 irq->ii_tag = tag; 1631 return (0); 1632 } 1633 1634 /********************************************************************* 1635 * 1636 * Allocate DMA resources for TX buffers as well as memory for the TX 1637 * mbuf map. TX DMA maps (non-TSO/TSO) and TX mbuf map are kept in a 1638 * iflib_sw_tx_desc_array structure, storing all the information that 1639 * is needed to transmit a packet on the wire. This is called only 1640 * once at attach, setup is done every reset. 1641 * 1642 **********************************************************************/ 1643 static int 1644 iflib_txsd_alloc(iflib_txq_t txq) 1645 { 1646 if_ctx_t ctx = txq->ift_ctx; 1647 if_shared_ctx_t sctx = ctx->ifc_sctx; 1648 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1649 device_t dev = ctx->ifc_dev; 1650 bus_size_t tsomaxsize; 1651 int err, nsegments, ntsosegments; 1652 bool tso; 1653 1654 nsegments = scctx->isc_tx_nsegments; 1655 ntsosegments = scctx->isc_tx_tso_segments_max; 1656 tsomaxsize = scctx->isc_tx_tso_size_max; 1657 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_VLAN_MTU) 1658 tsomaxsize += sizeof(struct ether_vlan_header); 1659 MPASS(scctx->isc_ntxd[0] > 0); 1660 MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); 1661 MPASS(nsegments > 0); 1662 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) { 1663 MPASS(ntsosegments > 0); 1664 MPASS(sctx->isc_tso_maxsize >= tsomaxsize); 1665 } 1666 1667 /* 1668 * Set up DMA tags for TX buffers. 1669 */ 1670 if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1671 1, 0, /* alignment, bounds */ 1672 BUS_SPACE_MAXADDR, /* lowaddr */ 1673 BUS_SPACE_MAXADDR, /* highaddr */ 1674 NULL, NULL, /* filter, filterarg */ 1675 sctx->isc_tx_maxsize, /* maxsize */ 1676 nsegments, /* nsegments */ 1677 sctx->isc_tx_maxsegsize, /* maxsegsize */ 1678 0, /* flags */ 1679 NULL, /* lockfunc */ 1680 NULL, /* lockfuncarg */ 1681 &txq->ift_buf_tag))) { 1682 device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); 1683 device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", 1684 (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); 1685 goto fail; 1686 } 1687 tso = (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) != 0; 1688 if (tso && (err = bus_dma_tag_create(bus_get_dma_tag(dev), 1689 1, 0, /* alignment, bounds */ 1690 BUS_SPACE_MAXADDR, /* lowaddr */ 1691 BUS_SPACE_MAXADDR, /* highaddr */ 1692 NULL, NULL, /* filter, filterarg */ 1693 tsomaxsize, /* maxsize */ 1694 ntsosegments, /* nsegments */ 1695 sctx->isc_tso_maxsegsize,/* maxsegsize */ 1696 0, /* flags */ 1697 NULL, /* lockfunc */ 1698 NULL, /* lockfuncarg */ 1699 &txq->ift_tso_buf_tag))) { 1700 device_printf(dev, "Unable to allocate TSO TX DMA tag: %d\n", 1701 err); 1702 goto fail; 1703 } 1704 1705 /* Allocate memory for the TX mbuf map. */ 1706 if (!(txq->ift_sds.ifsd_m = 1707 (struct mbuf **) malloc(sizeof(struct mbuf *) * 1708 scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1709 device_printf(dev, "Unable to allocate TX mbuf map memory\n"); 1710 err = ENOMEM; 1711 goto fail; 1712 } 1713 1714 /* 1715 * Create the DMA maps for TX buffers. 1716 */ 1717 if ((txq->ift_sds.ifsd_map = (bus_dmamap_t *)malloc( 1718 sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], 1719 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 1720 device_printf(dev, 1721 "Unable to allocate TX buffer DMA map memory\n"); 1722 err = ENOMEM; 1723 goto fail; 1724 } 1725 if (tso && (txq->ift_sds.ifsd_tso_map = (bus_dmamap_t *)malloc( 1726 sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], 1727 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 1728 device_printf(dev, 1729 "Unable to allocate TSO TX buffer map memory\n"); 1730 err = ENOMEM; 1731 goto fail; 1732 } 1733 for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { 1734 err = bus_dmamap_create(txq->ift_buf_tag, 0, 1735 &txq->ift_sds.ifsd_map[i]); 1736 if (err != 0) { 1737 device_printf(dev, "Unable to create TX DMA map\n"); 1738 goto fail; 1739 } 1740 if (!tso) 1741 continue; 1742 err = bus_dmamap_create(txq->ift_tso_buf_tag, 0, 1743 &txq->ift_sds.ifsd_tso_map[i]); 1744 if (err != 0) { 1745 device_printf(dev, "Unable to create TSO TX DMA map\n"); 1746 goto fail; 1747 } 1748 } 1749 return (0); 1750 fail: 1751 /* We free all, it handles case where we are in the middle */ 1752 iflib_tx_structures_free(ctx); 1753 return (err); 1754 } 1755 1756 static void 1757 iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) 1758 { 1759 bus_dmamap_t map; 1760 1761 if (txq->ift_sds.ifsd_map != NULL) { 1762 map = txq->ift_sds.ifsd_map[i]; 1763 bus_dmamap_sync(txq->ift_buf_tag, map, BUS_DMASYNC_POSTWRITE); 1764 bus_dmamap_unload(txq->ift_buf_tag, map); 1765 bus_dmamap_destroy(txq->ift_buf_tag, map); 1766 txq->ift_sds.ifsd_map[i] = NULL; 1767 } 1768 1769 if (txq->ift_sds.ifsd_tso_map != NULL) { 1770 map = txq->ift_sds.ifsd_tso_map[i]; 1771 bus_dmamap_sync(txq->ift_tso_buf_tag, map, 1772 BUS_DMASYNC_POSTWRITE); 1773 bus_dmamap_unload(txq->ift_tso_buf_tag, map); 1774 bus_dmamap_destroy(txq->ift_tso_buf_tag, map); 1775 txq->ift_sds.ifsd_tso_map[i] = NULL; 1776 } 1777 } 1778 1779 static void 1780 iflib_txq_destroy(iflib_txq_t txq) 1781 { 1782 if_ctx_t ctx = txq->ift_ctx; 1783 1784 for (int i = 0; i < txq->ift_size; i++) 1785 iflib_txsd_destroy(ctx, txq, i); 1786 1787 if (txq->ift_br != NULL) { 1788 ifmp_ring_free(txq->ift_br); 1789 txq->ift_br = NULL; 1790 } 1791 1792 mtx_destroy(&txq->ift_mtx); 1793 1794 if (txq->ift_sds.ifsd_map != NULL) { 1795 free(txq->ift_sds.ifsd_map, M_IFLIB); 1796 txq->ift_sds.ifsd_map = NULL; 1797 } 1798 if (txq->ift_sds.ifsd_tso_map != NULL) { 1799 free(txq->ift_sds.ifsd_tso_map, M_IFLIB); 1800 txq->ift_sds.ifsd_tso_map = NULL; 1801 } 1802 if (txq->ift_sds.ifsd_m != NULL) { 1803 free(txq->ift_sds.ifsd_m, M_IFLIB); 1804 txq->ift_sds.ifsd_m = NULL; 1805 } 1806 if (txq->ift_buf_tag != NULL) { 1807 bus_dma_tag_destroy(txq->ift_buf_tag); 1808 txq->ift_buf_tag = NULL; 1809 } 1810 if (txq->ift_tso_buf_tag != NULL) { 1811 bus_dma_tag_destroy(txq->ift_tso_buf_tag); 1812 txq->ift_tso_buf_tag = NULL; 1813 } 1814 if (txq->ift_ifdi != NULL) { 1815 free(txq->ift_ifdi, M_IFLIB); 1816 } 1817 } 1818 1819 static void 1820 iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) 1821 { 1822 struct mbuf **mp; 1823 1824 mp = &txq->ift_sds.ifsd_m[i]; 1825 if (*mp == NULL) 1826 return; 1827 1828 if (txq->ift_sds.ifsd_map != NULL) { 1829 bus_dmamap_sync(txq->ift_buf_tag, 1830 txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); 1831 bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[i]); 1832 } 1833 if (txq->ift_sds.ifsd_tso_map != NULL) { 1834 bus_dmamap_sync(txq->ift_tso_buf_tag, 1835 txq->ift_sds.ifsd_tso_map[i], BUS_DMASYNC_POSTWRITE); 1836 bus_dmamap_unload(txq->ift_tso_buf_tag, 1837 txq->ift_sds.ifsd_tso_map[i]); 1838 } 1839 m_freem(*mp); 1840 DBG_COUNTER_INC(tx_frees); 1841 *mp = NULL; 1842 } 1843 1844 static int 1845 iflib_txq_setup(iflib_txq_t txq) 1846 { 1847 if_ctx_t ctx = txq->ift_ctx; 1848 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1849 if_shared_ctx_t sctx = ctx->ifc_sctx; 1850 iflib_dma_info_t di; 1851 int i; 1852 1853 /* Set number of descriptors available */ 1854 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 1855 /* XXX make configurable */ 1856 txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; 1857 1858 /* Reset indices */ 1859 txq->ift_cidx_processed = 0; 1860 txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; 1861 txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; 1862 1863 for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) 1864 bzero((void *)di->idi_vaddr, di->idi_size); 1865 1866 IFDI_TXQ_SETUP(ctx, txq->ift_id); 1867 for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) 1868 bus_dmamap_sync(di->idi_tag, di->idi_map, 1869 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1870 return (0); 1871 } 1872 1873 /********************************************************************* 1874 * 1875 * Allocate DMA resources for RX buffers as well as memory for the RX 1876 * mbuf map, direct RX cluster pointer map and RX cluster bus address 1877 * map. RX DMA map, RX mbuf map, direct RX cluster pointer map and 1878 * RX cluster map are kept in a iflib_sw_rx_desc_array structure. 1879 * Since we use use one entry in iflib_sw_rx_desc_array per received 1880 * packet, the maximum number of entries we'll need is equal to the 1881 * number of hardware receive descriptors that we've allocated. 1882 * 1883 **********************************************************************/ 1884 static int 1885 iflib_rxsd_alloc(iflib_rxq_t rxq) 1886 { 1887 if_ctx_t ctx = rxq->ifr_ctx; 1888 if_shared_ctx_t sctx = ctx->ifc_sctx; 1889 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1890 device_t dev = ctx->ifc_dev; 1891 iflib_fl_t fl; 1892 int err; 1893 1894 MPASS(scctx->isc_nrxd[0] > 0); 1895 MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); 1896 1897 fl = rxq->ifr_fl; 1898 for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { 1899 fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ 1900 /* Set up DMA tag for RX buffers. */ 1901 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1902 1, 0, /* alignment, bounds */ 1903 BUS_SPACE_MAXADDR, /* lowaddr */ 1904 BUS_SPACE_MAXADDR, /* highaddr */ 1905 NULL, NULL, /* filter, filterarg */ 1906 sctx->isc_rx_maxsize, /* maxsize */ 1907 sctx->isc_rx_nsegments, /* nsegments */ 1908 sctx->isc_rx_maxsegsize, /* maxsegsize */ 1909 0, /* flags */ 1910 NULL, /* lockfunc */ 1911 NULL, /* lockarg */ 1912 &fl->ifl_buf_tag); 1913 if (err) { 1914 device_printf(dev, 1915 "Unable to allocate RX DMA tag: %d\n", err); 1916 goto fail; 1917 } 1918 1919 /* Allocate memory for the RX mbuf map. */ 1920 if (!(fl->ifl_sds.ifsd_m = 1921 (struct mbuf **) malloc(sizeof(struct mbuf *) * 1922 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1923 device_printf(dev, 1924 "Unable to allocate RX mbuf map memory\n"); 1925 err = ENOMEM; 1926 goto fail; 1927 } 1928 1929 /* Allocate memory for the direct RX cluster pointer map. */ 1930 if (!(fl->ifl_sds.ifsd_cl = 1931 (caddr_t *) malloc(sizeof(caddr_t) * 1932 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1933 device_printf(dev, 1934 "Unable to allocate RX cluster map memory\n"); 1935 err = ENOMEM; 1936 goto fail; 1937 } 1938 1939 /* Allocate memory for the RX cluster bus address map. */ 1940 if (!(fl->ifl_sds.ifsd_ba = 1941 (bus_addr_t *) malloc(sizeof(bus_addr_t) * 1942 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1943 device_printf(dev, 1944 "Unable to allocate RX bus address map memory\n"); 1945 err = ENOMEM; 1946 goto fail; 1947 } 1948 1949 /* 1950 * Create the DMA maps for RX buffers. 1951 */ 1952 if (!(fl->ifl_sds.ifsd_map = 1953 (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1954 device_printf(dev, 1955 "Unable to allocate RX buffer DMA map memory\n"); 1956 err = ENOMEM; 1957 goto fail; 1958 } 1959 for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { 1960 err = bus_dmamap_create(fl->ifl_buf_tag, 0, 1961 &fl->ifl_sds.ifsd_map[i]); 1962 if (err != 0) { 1963 device_printf(dev, "Unable to create RX buffer DMA map\n"); 1964 goto fail; 1965 } 1966 } 1967 } 1968 return (0); 1969 1970 fail: 1971 iflib_rx_structures_free(ctx); 1972 return (err); 1973 } 1974 1975 /* 1976 * Internal service routines 1977 */ 1978 1979 struct rxq_refill_cb_arg { 1980 int error; 1981 bus_dma_segment_t seg; 1982 int nseg; 1983 }; 1984 1985 static void 1986 _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 1987 { 1988 struct rxq_refill_cb_arg *cb_arg = arg; 1989 1990 cb_arg->error = error; 1991 cb_arg->seg = segs[0]; 1992 cb_arg->nseg = nseg; 1993 } 1994 1995 /** 1996 * iflib_fl_refill - refill an rxq free-buffer list 1997 * @ctx: the iflib context 1998 * @fl: the free list to refill 1999 * @count: the number of new buffers to allocate 2000 * 2001 * (Re)populate an rxq free-buffer list with up to @count new packet buffers. 2002 * The caller must assure that @count does not exceed the queue's capacity 2003 * minus one (since we always leave a descriptor unavailable). 2004 */ 2005 static uint8_t 2006 iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) 2007 { 2008 struct if_rxd_update iru; 2009 struct rxq_refill_cb_arg cb_arg; 2010 struct mbuf *m; 2011 caddr_t cl, *sd_cl; 2012 struct mbuf **sd_m; 2013 bus_dmamap_t *sd_map; 2014 bus_addr_t bus_addr, *sd_ba; 2015 int err, frag_idx, i, idx, n, pidx; 2016 qidx_t credits; 2017 2018 MPASS(count <= fl->ifl_size - fl->ifl_credits - 1); 2019 2020 sd_m = fl->ifl_sds.ifsd_m; 2021 sd_map = fl->ifl_sds.ifsd_map; 2022 sd_cl = fl->ifl_sds.ifsd_cl; 2023 sd_ba = fl->ifl_sds.ifsd_ba; 2024 pidx = fl->ifl_pidx; 2025 idx = pidx; 2026 frag_idx = fl->ifl_fragidx; 2027 credits = fl->ifl_credits; 2028 2029 i = 0; 2030 n = count; 2031 MPASS(n > 0); 2032 MPASS(credits + n <= fl->ifl_size); 2033 2034 if (pidx < fl->ifl_cidx) 2035 MPASS(pidx + n <= fl->ifl_cidx); 2036 if (pidx == fl->ifl_cidx && (credits < fl->ifl_size)) 2037 MPASS(fl->ifl_gen == 0); 2038 if (pidx > fl->ifl_cidx) 2039 MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); 2040 2041 DBG_COUNTER_INC(fl_refills); 2042 if (n > 8) 2043 DBG_COUNTER_INC(fl_refills_large); 2044 iru_init(&iru, fl->ifl_rxq, fl->ifl_id); 2045 while (n-- > 0) { 2046 /* 2047 * We allocate an uninitialized mbuf + cluster, mbuf is 2048 * initialized after rx. 2049 * 2050 * If the cluster is still set then we know a minimum sized 2051 * packet was received 2052 */ 2053 bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, 2054 &frag_idx); 2055 if (frag_idx < 0) 2056 bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); 2057 MPASS(frag_idx >= 0); 2058 if ((cl = sd_cl[frag_idx]) == NULL) { 2059 cl = uma_zalloc(fl->ifl_zone, M_NOWAIT); 2060 if (__predict_false(cl == NULL)) 2061 break; 2062 2063 cb_arg.error = 0; 2064 MPASS(sd_map != NULL); 2065 err = bus_dmamap_load(fl->ifl_buf_tag, sd_map[frag_idx], 2066 cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 2067 BUS_DMA_NOWAIT); 2068 if (__predict_false(err != 0 || cb_arg.error)) { 2069 uma_zfree(fl->ifl_zone, cl); 2070 break; 2071 } 2072 2073 sd_ba[frag_idx] = bus_addr = cb_arg.seg.ds_addr; 2074 sd_cl[frag_idx] = cl; 2075 #if MEMORY_LOGGING 2076 fl->ifl_cl_enqueued++; 2077 #endif 2078 } else { 2079 bus_addr = sd_ba[frag_idx]; 2080 } 2081 bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx], 2082 BUS_DMASYNC_PREREAD); 2083 2084 if (sd_m[frag_idx] == NULL) { 2085 m = m_gethdr(M_NOWAIT, MT_NOINIT); 2086 if (__predict_false(m == NULL)) 2087 break; 2088 sd_m[frag_idx] = m; 2089 } 2090 bit_set(fl->ifl_rx_bitmap, frag_idx); 2091 #if MEMORY_LOGGING 2092 fl->ifl_m_enqueued++; 2093 #endif 2094 2095 DBG_COUNTER_INC(rx_allocs); 2096 fl->ifl_rxd_idxs[i] = frag_idx; 2097 fl->ifl_bus_addrs[i] = bus_addr; 2098 credits++; 2099 i++; 2100 MPASS(credits <= fl->ifl_size); 2101 if (++idx == fl->ifl_size) { 2102 #ifdef INVARIANTS 2103 fl->ifl_gen = 1; 2104 #endif 2105 idx = 0; 2106 } 2107 if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { 2108 iru.iru_pidx = pidx; 2109 iru.iru_count = i; 2110 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 2111 fl->ifl_pidx = idx; 2112 fl->ifl_credits = credits; 2113 pidx = idx; 2114 i = 0; 2115 } 2116 } 2117 2118 if (n < count - 1) { 2119 if (i != 0) { 2120 iru.iru_pidx = pidx; 2121 iru.iru_count = i; 2122 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 2123 fl->ifl_pidx = idx; 2124 fl->ifl_credits = credits; 2125 } 2126 DBG_COUNTER_INC(rxd_flush); 2127 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 2128 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2129 ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, 2130 fl->ifl_id, fl->ifl_pidx); 2131 if (__predict_true(bit_test(fl->ifl_rx_bitmap, frag_idx))) { 2132 fl->ifl_fragidx = frag_idx + 1; 2133 if (fl->ifl_fragidx == fl->ifl_size) 2134 fl->ifl_fragidx = 0; 2135 } else { 2136 fl->ifl_fragidx = frag_idx; 2137 } 2138 } 2139 2140 return (n == -1 ? 0 : IFLIB_RXEOF_EMPTY); 2141 } 2142 2143 static inline uint8_t 2144 iflib_fl_refill_all(if_ctx_t ctx, iflib_fl_t fl) 2145 { 2146 /* 2147 * We leave an unused descriptor to avoid pidx to catch up with cidx. 2148 * This is important as it confuses most NICs. For instance, 2149 * Intel NICs have (per receive ring) RDH and RDT registers, where 2150 * RDH points to the next receive descriptor to be used by the NIC, 2151 * and RDT for the next receive descriptor to be published by the 2152 * driver to the NIC (RDT - 1 is thus the last valid one). 2153 * The condition RDH == RDT means no descriptors are available to 2154 * the NIC, and thus it would be ambiguous if it also meant that 2155 * all the descriptors are available to the NIC. 2156 */ 2157 int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; 2158 #ifdef INVARIANTS 2159 int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; 2160 #endif 2161 2162 MPASS(fl->ifl_credits <= fl->ifl_size); 2163 MPASS(reclaimable == delta); 2164 2165 if (reclaimable > 0) 2166 return (iflib_fl_refill(ctx, fl, reclaimable)); 2167 return (0); 2168 } 2169 2170 uint8_t 2171 iflib_in_detach(if_ctx_t ctx) 2172 { 2173 bool in_detach; 2174 2175 STATE_LOCK(ctx); 2176 in_detach = !!(ctx->ifc_flags & IFC_IN_DETACH); 2177 STATE_UNLOCK(ctx); 2178 return (in_detach); 2179 } 2180 2181 static void 2182 iflib_fl_bufs_free(iflib_fl_t fl) 2183 { 2184 iflib_dma_info_t idi = fl->ifl_ifdi; 2185 bus_dmamap_t sd_map; 2186 uint32_t i; 2187 2188 for (i = 0; i < fl->ifl_size; i++) { 2189 struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; 2190 caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; 2191 2192 if (*sd_cl != NULL) { 2193 sd_map = fl->ifl_sds.ifsd_map[i]; 2194 bus_dmamap_sync(fl->ifl_buf_tag, sd_map, 2195 BUS_DMASYNC_POSTREAD); 2196 bus_dmamap_unload(fl->ifl_buf_tag, sd_map); 2197 uma_zfree(fl->ifl_zone, *sd_cl); 2198 *sd_cl = NULL; 2199 if (*sd_m != NULL) { 2200 m_init(*sd_m, M_NOWAIT, MT_DATA, 0); 2201 uma_zfree(zone_mbuf, *sd_m); 2202 *sd_m = NULL; 2203 } 2204 } else { 2205 MPASS(*sd_m == NULL); 2206 } 2207 #if MEMORY_LOGGING 2208 fl->ifl_m_dequeued++; 2209 fl->ifl_cl_dequeued++; 2210 #endif 2211 } 2212 #ifdef INVARIANTS 2213 for (i = 0; i < fl->ifl_size; i++) { 2214 MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); 2215 MPASS(fl->ifl_sds.ifsd_m[i] == NULL); 2216 } 2217 #endif 2218 /* 2219 * Reset free list values 2220 */ 2221 fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; 2222 bzero(idi->idi_vaddr, idi->idi_size); 2223 } 2224 2225 /********************************************************************* 2226 * 2227 * Initialize a free list and its buffers. 2228 * 2229 **********************************************************************/ 2230 static int 2231 iflib_fl_setup(iflib_fl_t fl) 2232 { 2233 iflib_rxq_t rxq = fl->ifl_rxq; 2234 if_ctx_t ctx = rxq->ifr_ctx; 2235 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2236 int qidx; 2237 2238 bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1); 2239 /* 2240 ** Free current RX buffer structs and their mbufs 2241 */ 2242 iflib_fl_bufs_free(fl); 2243 /* Now replenish the mbufs */ 2244 MPASS(fl->ifl_credits == 0); 2245 qidx = rxq->ifr_fl_offset + fl->ifl_id; 2246 if (scctx->isc_rxd_buf_size[qidx] != 0) 2247 fl->ifl_buf_size = scctx->isc_rxd_buf_size[qidx]; 2248 else 2249 fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz; 2250 /* 2251 * ifl_buf_size may be a driver-supplied value, so pull it up 2252 * to the selected mbuf size. 2253 */ 2254 fl->ifl_buf_size = iflib_get_mbuf_size_for(fl->ifl_buf_size); 2255 if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) 2256 ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; 2257 fl->ifl_cltype = m_gettype(fl->ifl_buf_size); 2258 fl->ifl_zone = m_getzone(fl->ifl_buf_size); 2259 2260 /* 2261 * Avoid pre-allocating zillions of clusters to an idle card 2262 * potentially speeding up attach. In any case make sure 2263 * to leave a descriptor unavailable. See the comment in 2264 * iflib_fl_refill_all(). 2265 */ 2266 MPASS(fl->ifl_size > 0); 2267 (void)iflib_fl_refill(ctx, fl, min(128, fl->ifl_size - 1)); 2268 if (min(128, fl->ifl_size - 1) != fl->ifl_credits) 2269 return (ENOBUFS); 2270 /* 2271 * handle failure 2272 */ 2273 MPASS(rxq != NULL); 2274 MPASS(fl->ifl_ifdi != NULL); 2275 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 2276 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2277 return (0); 2278 } 2279 2280 /********************************************************************* 2281 * 2282 * Free receive ring data structures 2283 * 2284 **********************************************************************/ 2285 static void 2286 iflib_rx_sds_free(iflib_rxq_t rxq) 2287 { 2288 iflib_fl_t fl; 2289 int i, j; 2290 2291 if (rxq->ifr_fl != NULL) { 2292 for (i = 0; i < rxq->ifr_nfl; i++) { 2293 fl = &rxq->ifr_fl[i]; 2294 if (fl->ifl_buf_tag != NULL) { 2295 if (fl->ifl_sds.ifsd_map != NULL) { 2296 for (j = 0; j < fl->ifl_size; j++) { 2297 bus_dmamap_sync( 2298 fl->ifl_buf_tag, 2299 fl->ifl_sds.ifsd_map[j], 2300 BUS_DMASYNC_POSTREAD); 2301 bus_dmamap_unload( 2302 fl->ifl_buf_tag, 2303 fl->ifl_sds.ifsd_map[j]); 2304 bus_dmamap_destroy( 2305 fl->ifl_buf_tag, 2306 fl->ifl_sds.ifsd_map[j]); 2307 } 2308 } 2309 bus_dma_tag_destroy(fl->ifl_buf_tag); 2310 fl->ifl_buf_tag = NULL; 2311 } 2312 free(fl->ifl_sds.ifsd_m, M_IFLIB); 2313 free(fl->ifl_sds.ifsd_cl, M_IFLIB); 2314 free(fl->ifl_sds.ifsd_ba, M_IFLIB); 2315 free(fl->ifl_sds.ifsd_map, M_IFLIB); 2316 free(fl->ifl_rx_bitmap, M_IFLIB); 2317 fl->ifl_sds.ifsd_m = NULL; 2318 fl->ifl_sds.ifsd_cl = NULL; 2319 fl->ifl_sds.ifsd_ba = NULL; 2320 fl->ifl_sds.ifsd_map = NULL; 2321 fl->ifl_rx_bitmap = NULL; 2322 } 2323 free(rxq->ifr_fl, M_IFLIB); 2324 rxq->ifr_fl = NULL; 2325 free(rxq->ifr_ifdi, M_IFLIB); 2326 rxq->ifr_ifdi = NULL; 2327 rxq->ifr_cq_cidx = 0; 2328 } 2329 } 2330 2331 /* 2332 * Timer routine 2333 */ 2334 static void 2335 iflib_timer(void *arg) 2336 { 2337 iflib_txq_t txq = arg; 2338 if_ctx_t ctx = txq->ift_ctx; 2339 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2340 uint64_t this_tick = ticks; 2341 2342 if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) 2343 return; 2344 2345 /* 2346 ** Check on the state of the TX queue(s), this 2347 ** can be done without the lock because its RO 2348 ** and the HUNG state will be static if set. 2349 */ 2350 if (this_tick - txq->ift_last_timer_tick >= iflib_timer_default) { 2351 txq->ift_last_timer_tick = this_tick; 2352 IFDI_TIMER(ctx, txq->ift_id); 2353 if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && 2354 ((txq->ift_cleaned_prev == txq->ift_cleaned) || 2355 (sctx->isc_pause_frames == 0))) 2356 goto hung; 2357 2358 if (txq->ift_qstatus != IFLIB_QUEUE_IDLE && 2359 ifmp_ring_is_stalled(txq->ift_br)) { 2360 KASSERT(ctx->ifc_link_state == LINK_STATE_UP, 2361 ("queue can't be marked as hung if interface is down")); 2362 txq->ift_qstatus = IFLIB_QUEUE_HUNG; 2363 } 2364 txq->ift_cleaned_prev = txq->ift_cleaned; 2365 } 2366 /* handle any laggards */ 2367 if (txq->ift_db_pending) 2368 GROUPTASK_ENQUEUE(&txq->ift_task); 2369 2370 sctx->isc_pause_frames = 0; 2371 if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) 2372 callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, 2373 txq, txq->ift_timer.c_cpu); 2374 return; 2375 2376 hung: 2377 device_printf(ctx->ifc_dev, 2378 "Watchdog timeout (TX: %d desc avail: %d pidx: %d) -- resetting\n", 2379 txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); 2380 STATE_LOCK(ctx); 2381 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2382 ctx->ifc_flags |= (IFC_DO_WATCHDOG|IFC_DO_RESET); 2383 iflib_admin_intr_deferred(ctx); 2384 STATE_UNLOCK(ctx); 2385 } 2386 2387 static uint16_t 2388 iflib_get_mbuf_size_for(unsigned int size) 2389 { 2390 2391 if (size <= MCLBYTES) 2392 return (MCLBYTES); 2393 else 2394 return (MJUMPAGESIZE); 2395 } 2396 2397 static void 2398 iflib_calc_rx_mbuf_sz(if_ctx_t ctx) 2399 { 2400 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2401 2402 /* 2403 * XXX don't set the max_frame_size to larger 2404 * than the hardware can handle 2405 */ 2406 ctx->ifc_rx_mbuf_sz = 2407 iflib_get_mbuf_size_for(sctx->isc_max_frame_size); 2408 } 2409 2410 uint32_t 2411 iflib_get_rx_mbuf_sz(if_ctx_t ctx) 2412 { 2413 2414 return (ctx->ifc_rx_mbuf_sz); 2415 } 2416 2417 static void 2418 iflib_init_locked(if_ctx_t ctx) 2419 { 2420 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2421 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2422 if_t ifp = ctx->ifc_ifp; 2423 iflib_fl_t fl; 2424 iflib_txq_t txq; 2425 iflib_rxq_t rxq; 2426 int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; 2427 2428 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2429 IFDI_INTR_DISABLE(ctx); 2430 2431 /* 2432 * See iflib_stop(). Useful in case iflib_init_locked() is 2433 * called without first calling iflib_stop(). 2434 */ 2435 netmap_disable_all_rings(ifp); 2436 2437 tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); 2438 tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); 2439 /* Set hardware offload abilities */ 2440 if_clearhwassist(ifp); 2441 if (if_getcapenable(ifp) & IFCAP_TXCSUM) 2442 if_sethwassistbits(ifp, tx_ip_csum_flags, 0); 2443 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) 2444 if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); 2445 if (if_getcapenable(ifp) & IFCAP_TSO4) 2446 if_sethwassistbits(ifp, CSUM_IP_TSO, 0); 2447 if (if_getcapenable(ifp) & IFCAP_TSO6) 2448 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); 2449 2450 for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { 2451 CALLOUT_LOCK(txq); 2452 callout_stop(&txq->ift_timer); 2453 #ifdef DEV_NETMAP 2454 callout_stop(&txq->ift_netmap_timer); 2455 #endif /* DEV_NETMAP */ 2456 CALLOUT_UNLOCK(txq); 2457 iflib_netmap_txq_init(ctx, txq); 2458 } 2459 2460 /* 2461 * Calculate a suitable Rx mbuf size prior to calling IFDI_INIT, so 2462 * that drivers can use the value when setting up the hardware receive 2463 * buffers. 2464 */ 2465 iflib_calc_rx_mbuf_sz(ctx); 2466 2467 #ifdef INVARIANTS 2468 i = if_getdrvflags(ifp); 2469 #endif 2470 IFDI_INIT(ctx); 2471 MPASS(if_getdrvflags(ifp) == i); 2472 for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { 2473 if (iflib_netmap_rxq_init(ctx, rxq) > 0) { 2474 /* This rxq is in netmap mode. Skip normal init. */ 2475 continue; 2476 } 2477 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { 2478 if (iflib_fl_setup(fl)) { 2479 device_printf(ctx->ifc_dev, 2480 "setting up free list %d failed - " 2481 "check cluster settings\n", j); 2482 goto done; 2483 } 2484 } 2485 } 2486 done: 2487 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 2488 IFDI_INTR_ENABLE(ctx); 2489 txq = ctx->ifc_txqs; 2490 for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) 2491 callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, txq, 2492 txq->ift_timer.c_cpu); 2493 2494 /* Re-enable txsync/rxsync. */ 2495 netmap_enable_all_rings(ifp); 2496 } 2497 2498 static int 2499 iflib_media_change(if_t ifp) 2500 { 2501 if_ctx_t ctx = if_getsoftc(ifp); 2502 int err; 2503 2504 CTX_LOCK(ctx); 2505 if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) 2506 iflib_init_locked(ctx); 2507 CTX_UNLOCK(ctx); 2508 return (err); 2509 } 2510 2511 static void 2512 iflib_media_status(if_t ifp, struct ifmediareq *ifmr) 2513 { 2514 if_ctx_t ctx = if_getsoftc(ifp); 2515 2516 CTX_LOCK(ctx); 2517 IFDI_UPDATE_ADMIN_STATUS(ctx); 2518 IFDI_MEDIA_STATUS(ctx, ifmr); 2519 CTX_UNLOCK(ctx); 2520 } 2521 2522 void 2523 iflib_stop(if_ctx_t ctx) 2524 { 2525 iflib_txq_t txq = ctx->ifc_txqs; 2526 iflib_rxq_t rxq = ctx->ifc_rxqs; 2527 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2528 if_shared_ctx_t sctx = ctx->ifc_sctx; 2529 iflib_dma_info_t di; 2530 iflib_fl_t fl; 2531 int i, j; 2532 2533 /* Tell the stack that the interface is no longer active */ 2534 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2535 2536 IFDI_INTR_DISABLE(ctx); 2537 DELAY(1000); 2538 IFDI_STOP(ctx); 2539 DELAY(1000); 2540 2541 /* 2542 * Stop any pending txsync/rxsync and prevent new ones 2543 * form starting. Processes blocked in poll() will get 2544 * POLLERR. 2545 */ 2546 netmap_disable_all_rings(ctx->ifc_ifp); 2547 2548 iflib_debug_reset(); 2549 /* Wait for current tx queue users to exit to disarm watchdog timer. */ 2550 for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { 2551 /* make sure all transmitters have completed before proceeding XXX */ 2552 2553 CALLOUT_LOCK(txq); 2554 callout_stop(&txq->ift_timer); 2555 #ifdef DEV_NETMAP 2556 callout_stop(&txq->ift_netmap_timer); 2557 #endif /* DEV_NETMAP */ 2558 CALLOUT_UNLOCK(txq); 2559 2560 /* clean any enqueued buffers */ 2561 iflib_ifmp_purge(txq); 2562 /* Free any existing tx buffers. */ 2563 for (j = 0; j < txq->ift_size; j++) { 2564 iflib_txsd_free(ctx, txq, j); 2565 } 2566 txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; 2567 txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; 2568 txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; 2569 txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; 2570 txq->ift_pullups = 0; 2571 ifmp_ring_reset_stats(txq->ift_br); 2572 for (j = 0, di = txq->ift_ifdi; j < sctx->isc_ntxqs; j++, di++) 2573 bzero((void *)di->idi_vaddr, di->idi_size); 2574 } 2575 for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { 2576 /* make sure all transmitters have completed before proceeding XXX */ 2577 2578 rxq->ifr_cq_cidx = 0; 2579 for (j = 0, di = rxq->ifr_ifdi; j < sctx->isc_nrxqs; j++, di++) 2580 bzero((void *)di->idi_vaddr, di->idi_size); 2581 /* also resets the free lists pidx/cidx */ 2582 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) 2583 iflib_fl_bufs_free(fl); 2584 } 2585 } 2586 2587 static inline caddr_t 2588 calc_next_rxd(iflib_fl_t fl, int cidx) 2589 { 2590 qidx_t size; 2591 int nrxd; 2592 caddr_t start, end, cur, next; 2593 2594 nrxd = fl->ifl_size; 2595 size = fl->ifl_rxd_size; 2596 start = fl->ifl_ifdi->idi_vaddr; 2597 2598 if (__predict_false(size == 0)) 2599 return (start); 2600 cur = start + size*cidx; 2601 end = start + size*nrxd; 2602 next = CACHE_PTR_NEXT(cur); 2603 return (next < end ? next : start); 2604 } 2605 2606 static inline void 2607 prefetch_pkts(iflib_fl_t fl, int cidx) 2608 { 2609 int nextptr; 2610 int nrxd = fl->ifl_size; 2611 caddr_t next_rxd; 2612 2613 nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); 2614 prefetch(&fl->ifl_sds.ifsd_m[nextptr]); 2615 prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); 2616 next_rxd = calc_next_rxd(fl, cidx); 2617 prefetch(next_rxd); 2618 prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); 2619 prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); 2620 prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); 2621 prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); 2622 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); 2623 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); 2624 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); 2625 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); 2626 } 2627 2628 static struct mbuf * 2629 rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, bool unload, if_rxsd_t sd, 2630 int *pf_rv, if_rxd_info_t ri) 2631 { 2632 bus_dmamap_t map; 2633 iflib_fl_t fl; 2634 caddr_t payload; 2635 struct mbuf *m; 2636 int flid, cidx, len, next; 2637 2638 map = NULL; 2639 flid = irf->irf_flid; 2640 cidx = irf->irf_idx; 2641 fl = &rxq->ifr_fl[flid]; 2642 sd->ifsd_fl = fl; 2643 m = fl->ifl_sds.ifsd_m[cidx]; 2644 sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; 2645 fl->ifl_credits--; 2646 #if MEMORY_LOGGING 2647 fl->ifl_m_dequeued++; 2648 #endif 2649 if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) 2650 prefetch_pkts(fl, cidx); 2651 next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); 2652 prefetch(&fl->ifl_sds.ifsd_map[next]); 2653 map = fl->ifl_sds.ifsd_map[cidx]; 2654 2655 bus_dmamap_sync(fl->ifl_buf_tag, map, BUS_DMASYNC_POSTREAD); 2656 2657 if (rxq->pfil != NULL && PFIL_HOOKED_IN(rxq->pfil) && pf_rv != NULL && 2658 irf->irf_len != 0) { 2659 payload = *sd->ifsd_cl; 2660 payload += ri->iri_pad; 2661 len = ri->iri_len - ri->iri_pad; 2662 *pf_rv = pfil_run_hooks(rxq->pfil, payload, ri->iri_ifp, 2663 len | PFIL_MEMPTR | PFIL_IN, NULL); 2664 switch (*pf_rv) { 2665 case PFIL_DROPPED: 2666 case PFIL_CONSUMED: 2667 /* 2668 * The filter ate it. Everything is recycled. 2669 */ 2670 m = NULL; 2671 unload = 0; 2672 break; 2673 case PFIL_REALLOCED: 2674 /* 2675 * The filter copied it. Everything is recycled. 2676 */ 2677 m = pfil_mem2mbuf(payload); 2678 unload = 0; 2679 break; 2680 case PFIL_PASS: 2681 /* 2682 * Filter said it was OK, so receive like 2683 * normal 2684 */ 2685 fl->ifl_sds.ifsd_m[cidx] = NULL; 2686 break; 2687 default: 2688 MPASS(0); 2689 } 2690 } else { 2691 fl->ifl_sds.ifsd_m[cidx] = NULL; 2692 if (pf_rv != NULL) 2693 *pf_rv = PFIL_PASS; 2694 } 2695 2696 if (unload && irf->irf_len != 0) 2697 bus_dmamap_unload(fl->ifl_buf_tag, map); 2698 fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); 2699 if (__predict_false(fl->ifl_cidx == 0)) 2700 fl->ifl_gen = 0; 2701 bit_clear(fl->ifl_rx_bitmap, cidx); 2702 return (m); 2703 } 2704 2705 static struct mbuf * 2706 assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd, int *pf_rv) 2707 { 2708 struct mbuf *m, *mh, *mt; 2709 caddr_t cl; 2710 int *pf_rv_ptr, flags, i, padlen; 2711 bool consumed; 2712 2713 i = 0; 2714 mh = NULL; 2715 consumed = false; 2716 *pf_rv = PFIL_PASS; 2717 pf_rv_ptr = pf_rv; 2718 do { 2719 m = rxd_frag_to_sd(rxq, &ri->iri_frags[i], !consumed, sd, 2720 pf_rv_ptr, ri); 2721 2722 MPASS(*sd->ifsd_cl != NULL); 2723 2724 /* 2725 * Exclude zero-length frags & frags from 2726 * packets the filter has consumed or dropped 2727 */ 2728 if (ri->iri_frags[i].irf_len == 0 || consumed || 2729 *pf_rv == PFIL_CONSUMED || *pf_rv == PFIL_DROPPED) { 2730 if (mh == NULL) { 2731 /* everything saved here */ 2732 consumed = true; 2733 pf_rv_ptr = NULL; 2734 continue; 2735 } 2736 /* XXX we can save the cluster here, but not the mbuf */ 2737 m_init(m, M_NOWAIT, MT_DATA, 0); 2738 m_free(m); 2739 continue; 2740 } 2741 if (mh == NULL) { 2742 flags = M_PKTHDR|M_EXT; 2743 mh = mt = m; 2744 padlen = ri->iri_pad; 2745 } else { 2746 flags = M_EXT; 2747 mt->m_next = m; 2748 mt = m; 2749 /* assuming padding is only on the first fragment */ 2750 padlen = 0; 2751 } 2752 cl = *sd->ifsd_cl; 2753 *sd->ifsd_cl = NULL; 2754 2755 /* Can these two be made one ? */ 2756 m_init(m, M_NOWAIT, MT_DATA, flags); 2757 m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); 2758 /* 2759 * These must follow m_init and m_cljset 2760 */ 2761 m->m_data += padlen; 2762 ri->iri_len -= padlen; 2763 m->m_len = ri->iri_frags[i].irf_len; 2764 } while (++i < ri->iri_nfrags); 2765 2766 return (mh); 2767 } 2768 2769 /* 2770 * Process one software descriptor 2771 */ 2772 static struct mbuf * 2773 iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) 2774 { 2775 struct if_rxsd sd; 2776 struct mbuf *m; 2777 int pf_rv; 2778 2779 /* should I merge this back in now that the two paths are basically duplicated? */ 2780 if (ri->iri_nfrags == 1 && 2781 ri->iri_frags[0].irf_len != 0 && 2782 ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { 2783 m = rxd_frag_to_sd(rxq, &ri->iri_frags[0], false, &sd, 2784 &pf_rv, ri); 2785 if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED) 2786 return (m); 2787 if (pf_rv == PFIL_PASS) { 2788 m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); 2789 #ifndef __NO_STRICT_ALIGNMENT 2790 if (!IP_ALIGNED(m)) 2791 m->m_data += 2; 2792 #endif 2793 memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); 2794 m->m_len = ri->iri_frags[0].irf_len; 2795 } 2796 } else { 2797 m = assemble_segments(rxq, ri, &sd, &pf_rv); 2798 if (m == NULL) 2799 return (NULL); 2800 if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED) 2801 return (m); 2802 } 2803 m->m_pkthdr.len = ri->iri_len; 2804 m->m_pkthdr.rcvif = ri->iri_ifp; 2805 m->m_flags |= ri->iri_flags; 2806 m->m_pkthdr.ether_vtag = ri->iri_vtag; 2807 m->m_pkthdr.flowid = ri->iri_flowid; 2808 M_HASHTYPE_SET(m, ri->iri_rsstype); 2809 m->m_pkthdr.csum_flags = ri->iri_csum_flags; 2810 m->m_pkthdr.csum_data = ri->iri_csum_data; 2811 return (m); 2812 } 2813 2814 #if defined(INET6) || defined(INET) 2815 static void 2816 iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6) 2817 { 2818 CURVNET_SET(lc->ifp->if_vnet); 2819 #if defined(INET6) 2820 *v6 = V_ip6_forwarding; 2821 #endif 2822 #if defined(INET) 2823 *v4 = V_ipforwarding; 2824 #endif 2825 CURVNET_RESTORE(); 2826 } 2827 2828 /* 2829 * Returns true if it's possible this packet could be LROed. 2830 * if it returns false, it is guaranteed that tcp_lro_rx() 2831 * would not return zero. 2832 */ 2833 static bool 2834 iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) 2835 { 2836 struct ether_header *eh; 2837 2838 eh = mtod(m, struct ether_header *); 2839 switch (eh->ether_type) { 2840 #if defined(INET6) 2841 case htons(ETHERTYPE_IPV6): 2842 return (!v6_forwarding); 2843 #endif 2844 #if defined (INET) 2845 case htons(ETHERTYPE_IP): 2846 return (!v4_forwarding); 2847 #endif 2848 } 2849 2850 return false; 2851 } 2852 #else 2853 static void 2854 iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused) 2855 { 2856 } 2857 #endif 2858 2859 static void 2860 _task_fn_rx_watchdog(void *context) 2861 { 2862 iflib_rxq_t rxq = context; 2863 2864 GROUPTASK_ENQUEUE(&rxq->ifr_task); 2865 } 2866 2867 static uint8_t 2868 iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) 2869 { 2870 if_t ifp; 2871 if_ctx_t ctx = rxq->ifr_ctx; 2872 if_shared_ctx_t sctx = ctx->ifc_sctx; 2873 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2874 int avail, i; 2875 qidx_t *cidxp; 2876 struct if_rxd_info ri; 2877 int err, budget_left, rx_bytes, rx_pkts; 2878 iflib_fl_t fl; 2879 int lro_enabled; 2880 bool v4_forwarding, v6_forwarding, lro_possible; 2881 uint8_t retval = 0; 2882 2883 /* 2884 * XXX early demux data packets so that if_input processing only handles 2885 * acks in interrupt context 2886 */ 2887 struct mbuf *m, *mh, *mt, *mf; 2888 2889 NET_EPOCH_ASSERT(); 2890 2891 lro_possible = v4_forwarding = v6_forwarding = false; 2892 ifp = ctx->ifc_ifp; 2893 mh = mt = NULL; 2894 MPASS(budget > 0); 2895 rx_pkts = rx_bytes = 0; 2896 if (sctx->isc_flags & IFLIB_HAS_RXCQ) 2897 cidxp = &rxq->ifr_cq_cidx; 2898 else 2899 cidxp = &rxq->ifr_fl[0].ifl_cidx; 2900 if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { 2901 for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) 2902 retval |= iflib_fl_refill_all(ctx, fl); 2903 DBG_COUNTER_INC(rx_unavail); 2904 return (retval); 2905 } 2906 2907 /* pfil needs the vnet to be set */ 2908 CURVNET_SET_QUIET(ifp->if_vnet); 2909 for (budget_left = budget; budget_left > 0 && avail > 0;) { 2910 if (__predict_false(!CTX_ACTIVE(ctx))) { 2911 DBG_COUNTER_INC(rx_ctx_inactive); 2912 break; 2913 } 2914 /* 2915 * Reset client set fields to their default values 2916 */ 2917 rxd_info_zero(&ri); 2918 ri.iri_qsidx = rxq->ifr_id; 2919 ri.iri_cidx = *cidxp; 2920 ri.iri_ifp = ifp; 2921 ri.iri_frags = rxq->ifr_frags; 2922 err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); 2923 2924 if (err) 2925 goto err; 2926 rx_pkts += 1; 2927 rx_bytes += ri.iri_len; 2928 if (sctx->isc_flags & IFLIB_HAS_RXCQ) { 2929 *cidxp = ri.iri_cidx; 2930 /* Update our consumer index */ 2931 /* XXX NB: shurd - check if this is still safe */ 2932 while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) 2933 rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; 2934 /* was this only a completion queue message? */ 2935 if (__predict_false(ri.iri_nfrags == 0)) 2936 continue; 2937 } 2938 MPASS(ri.iri_nfrags != 0); 2939 MPASS(ri.iri_len != 0); 2940 2941 /* will advance the cidx on the corresponding free lists */ 2942 m = iflib_rxd_pkt_get(rxq, &ri); 2943 avail--; 2944 budget_left--; 2945 if (avail == 0 && budget_left) 2946 avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); 2947 2948 if (__predict_false(m == NULL)) 2949 continue; 2950 2951 /* imm_pkt: -- cxgb */ 2952 if (mh == NULL) 2953 mh = mt = m; 2954 else { 2955 mt->m_nextpkt = m; 2956 mt = m; 2957 } 2958 } 2959 CURVNET_RESTORE(); 2960 /* make sure that we can refill faster than drain */ 2961 for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) 2962 retval |= iflib_fl_refill_all(ctx, fl); 2963 2964 lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); 2965 if (lro_enabled) 2966 iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding); 2967 mt = mf = NULL; 2968 while (mh != NULL) { 2969 m = mh; 2970 mh = mh->m_nextpkt; 2971 m->m_nextpkt = NULL; 2972 #ifndef __NO_STRICT_ALIGNMENT 2973 if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) 2974 continue; 2975 #endif 2976 rx_bytes += m->m_pkthdr.len; 2977 rx_pkts++; 2978 #if defined(INET6) || defined(INET) 2979 if (lro_enabled) { 2980 if (!lro_possible) { 2981 lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding); 2982 if (lro_possible && mf != NULL) { 2983 ifp->if_input(ifp, mf); 2984 DBG_COUNTER_INC(rx_if_input); 2985 mt = mf = NULL; 2986 } 2987 } 2988 if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) == 2989 (CSUM_L4_CALC|CSUM_L4_VALID)) { 2990 if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) 2991 continue; 2992 } 2993 } 2994 #endif 2995 if (lro_possible) { 2996 ifp->if_input(ifp, m); 2997 DBG_COUNTER_INC(rx_if_input); 2998 continue; 2999 } 3000 3001 if (mf == NULL) 3002 mf = m; 3003 if (mt != NULL) 3004 mt->m_nextpkt = m; 3005 mt = m; 3006 } 3007 if (mf != NULL) { 3008 ifp->if_input(ifp, mf); 3009 DBG_COUNTER_INC(rx_if_input); 3010 } 3011 3012 if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); 3013 if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); 3014 3015 /* 3016 * Flush any outstanding LRO work 3017 */ 3018 #if defined(INET6) || defined(INET) 3019 tcp_lro_flush_all(&rxq->ifr_lc); 3020 #endif 3021 if (avail != 0 || iflib_rxd_avail(ctx, rxq, *cidxp, 1) != 0) 3022 retval |= IFLIB_RXEOF_MORE; 3023 return (retval); 3024 err: 3025 STATE_LOCK(ctx); 3026 ctx->ifc_flags |= IFC_DO_RESET; 3027 iflib_admin_intr_deferred(ctx); 3028 STATE_UNLOCK(ctx); 3029 return (0); 3030 } 3031 3032 #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) 3033 static inline qidx_t 3034 txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) 3035 { 3036 qidx_t notify_count = TXD_NOTIFY_COUNT(txq); 3037 qidx_t minthresh = txq->ift_size / 8; 3038 if (in_use > 4*minthresh) 3039 return (notify_count); 3040 if (in_use > 2*minthresh) 3041 return (notify_count >> 1); 3042 if (in_use > minthresh) 3043 return (notify_count >> 3); 3044 return (0); 3045 } 3046 3047 static inline qidx_t 3048 txq_max_rs_deferred(iflib_txq_t txq) 3049 { 3050 qidx_t notify_count = TXD_NOTIFY_COUNT(txq); 3051 qidx_t minthresh = txq->ift_size / 8; 3052 if (txq->ift_in_use > 4*minthresh) 3053 return (notify_count); 3054 if (txq->ift_in_use > 2*minthresh) 3055 return (notify_count >> 1); 3056 if (txq->ift_in_use > minthresh) 3057 return (notify_count >> 2); 3058 return (2); 3059 } 3060 3061 #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) 3062 #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) 3063 3064 #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) 3065 #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) 3066 #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) 3067 3068 /* forward compatibility for cxgb */ 3069 #define FIRST_QSET(ctx) 0 3070 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) 3071 #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) 3072 #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) 3073 #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) 3074 3075 /* XXX we should be setting this to something other than zero */ 3076 #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) 3077 #define MAX_TX_DESC(ctx) MAX((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max, \ 3078 (ctx)->ifc_softc_ctx.isc_tx_nsegments) 3079 3080 static inline bool 3081 iflib_txd_db_check(iflib_txq_t txq, int ring) 3082 { 3083 if_ctx_t ctx = txq->ift_ctx; 3084 qidx_t dbval, max; 3085 3086 max = TXQ_MAX_DB_DEFERRED(txq, txq->ift_in_use); 3087 3088 /* force || threshold exceeded || at the edge of the ring */ 3089 if (ring || (txq->ift_db_pending >= max) || (TXQ_AVAIL(txq) <= MAX_TX_DESC(ctx) + 2)) { 3090 3091 /* 3092 * 'npending' is used if the card's doorbell is in terms of the number of descriptors 3093 * pending flush (BRCM). 'pidx' is used in cases where the card's doorbeel uses the 3094 * producer index explicitly (INTC). 3095 */ 3096 dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; 3097 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 3098 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3099 ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); 3100 3101 /* 3102 * Absent bugs there are zero packets pending so reset pending counts to zero. 3103 */ 3104 txq->ift_db_pending = txq->ift_npending = 0; 3105 return (true); 3106 } 3107 return (false); 3108 } 3109 3110 #ifdef PKT_DEBUG 3111 static void 3112 print_pkt(if_pkt_info_t pi) 3113 { 3114 printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", 3115 pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); 3116 printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", 3117 pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); 3118 printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", 3119 pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); 3120 } 3121 #endif 3122 3123 #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) 3124 #define IS_TX_OFFLOAD4(pi) ((pi)->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO)) 3125 #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) 3126 #define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO)) 3127 3128 static int 3129 iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) 3130 { 3131 if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; 3132 struct ether_vlan_header *eh; 3133 struct mbuf *m; 3134 3135 m = *mp; 3136 if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && 3137 M_WRITABLE(m) == 0) { 3138 if ((m = m_dup(m, M_NOWAIT)) == NULL) { 3139 return (ENOMEM); 3140 } else { 3141 m_freem(*mp); 3142 DBG_COUNTER_INC(tx_frees); 3143 *mp = m; 3144 } 3145 } 3146 3147 /* 3148 * Determine where frame payload starts. 3149 * Jump over vlan headers if already present, 3150 * helpful for QinQ too. 3151 */ 3152 if (__predict_false(m->m_len < sizeof(*eh))) { 3153 txq->ift_pullups++; 3154 if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) 3155 return (ENOMEM); 3156 } 3157 eh = mtod(m, struct ether_vlan_header *); 3158 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 3159 pi->ipi_etype = ntohs(eh->evl_proto); 3160 pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3161 } else { 3162 pi->ipi_etype = ntohs(eh->evl_encap_proto); 3163 pi->ipi_ehdrlen = ETHER_HDR_LEN; 3164 } 3165 3166 switch (pi->ipi_etype) { 3167 #ifdef INET 3168 case ETHERTYPE_IP: 3169 { 3170 struct mbuf *n; 3171 struct ip *ip = NULL; 3172 struct tcphdr *th = NULL; 3173 int minthlen; 3174 3175 minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); 3176 if (__predict_false(m->m_len < minthlen)) { 3177 /* 3178 * if this code bloat is causing too much of a hit 3179 * move it to a separate function and mark it noinline 3180 */ 3181 if (m->m_len == pi->ipi_ehdrlen) { 3182 n = m->m_next; 3183 MPASS(n); 3184 if (n->m_len >= sizeof(*ip)) { 3185 ip = (struct ip *)n->m_data; 3186 if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 3187 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 3188 } else { 3189 txq->ift_pullups++; 3190 if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) 3191 return (ENOMEM); 3192 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 3193 } 3194 } else { 3195 txq->ift_pullups++; 3196 if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) 3197 return (ENOMEM); 3198 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 3199 if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 3200 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 3201 } 3202 } else { 3203 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 3204 if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 3205 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 3206 } 3207 pi->ipi_ip_hlen = ip->ip_hl << 2; 3208 pi->ipi_ipproto = ip->ip_p; 3209 pi->ipi_flags |= IPI_TX_IPV4; 3210 3211 /* TCP checksum offload may require TCP header length */ 3212 if (IS_TX_OFFLOAD4(pi)) { 3213 if (__predict_true(pi->ipi_ipproto == IPPROTO_TCP)) { 3214 if (__predict_false(th == NULL)) { 3215 txq->ift_pullups++; 3216 if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) 3217 return (ENOMEM); 3218 th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); 3219 } 3220 pi->ipi_tcp_hflags = th->th_flags; 3221 pi->ipi_tcp_hlen = th->th_off << 2; 3222 pi->ipi_tcp_seq = th->th_seq; 3223 } 3224 if (IS_TSO4(pi)) { 3225 if (__predict_false(ip->ip_p != IPPROTO_TCP)) 3226 return (ENXIO); 3227 /* 3228 * TSO always requires hardware checksum offload. 3229 */ 3230 pi->ipi_csum_flags |= (CSUM_IP_TCP | CSUM_IP); 3231 th->th_sum = in_pseudo(ip->ip_src.s_addr, 3232 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 3233 pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; 3234 if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { 3235 ip->ip_sum = 0; 3236 ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); 3237 } 3238 } 3239 } 3240 if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) 3241 ip->ip_sum = 0; 3242 3243 break; 3244 } 3245 #endif 3246 #ifdef INET6 3247 case ETHERTYPE_IPV6: 3248 { 3249 struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); 3250 struct tcphdr *th; 3251 pi->ipi_ip_hlen = sizeof(struct ip6_hdr); 3252 3253 if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { 3254 txq->ift_pullups++; 3255 if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) 3256 return (ENOMEM); 3257 } 3258 th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); 3259 3260 /* XXX-BZ this will go badly in case of ext hdrs. */ 3261 pi->ipi_ipproto = ip6->ip6_nxt; 3262 pi->ipi_flags |= IPI_TX_IPV6; 3263 3264 /* TCP checksum offload may require TCP header length */ 3265 if (IS_TX_OFFLOAD6(pi)) { 3266 if (pi->ipi_ipproto == IPPROTO_TCP) { 3267 if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { 3268 txq->ift_pullups++; 3269 if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) 3270 return (ENOMEM); 3271 } 3272 pi->ipi_tcp_hflags = th->th_flags; 3273 pi->ipi_tcp_hlen = th->th_off << 2; 3274 pi->ipi_tcp_seq = th->th_seq; 3275 } 3276 if (IS_TSO6(pi)) { 3277 if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) 3278 return (ENXIO); 3279 /* 3280 * TSO always requires hardware checksum offload. 3281 */ 3282 pi->ipi_csum_flags |= CSUM_IP6_TCP; 3283 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 3284 pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; 3285 } 3286 } 3287 break; 3288 } 3289 #endif 3290 default: 3291 pi->ipi_csum_flags &= ~CSUM_OFFLOAD; 3292 pi->ipi_ip_hlen = 0; 3293 break; 3294 } 3295 *mp = m; 3296 3297 return (0); 3298 } 3299 3300 /* 3301 * If dodgy hardware rejects the scatter gather chain we've handed it 3302 * we'll need to remove the mbuf chain from ifsg_m[] before we can add the 3303 * m_defrag'd mbufs 3304 */ 3305 static __noinline struct mbuf * 3306 iflib_remove_mbuf(iflib_txq_t txq) 3307 { 3308 int ntxd, pidx; 3309 struct mbuf *m, **ifsd_m; 3310 3311 ifsd_m = txq->ift_sds.ifsd_m; 3312 ntxd = txq->ift_size; 3313 pidx = txq->ift_pidx & (ntxd - 1); 3314 ifsd_m = txq->ift_sds.ifsd_m; 3315 m = ifsd_m[pidx]; 3316 ifsd_m[pidx] = NULL; 3317 bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[pidx]); 3318 if (txq->ift_sds.ifsd_tso_map != NULL) 3319 bus_dmamap_unload(txq->ift_tso_buf_tag, 3320 txq->ift_sds.ifsd_tso_map[pidx]); 3321 #if MEMORY_LOGGING 3322 txq->ift_dequeued++; 3323 #endif 3324 return (m); 3325 } 3326 3327 static inline caddr_t 3328 calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) 3329 { 3330 qidx_t size; 3331 int ntxd; 3332 caddr_t start, end, cur, next; 3333 3334 ntxd = txq->ift_size; 3335 size = txq->ift_txd_size[qid]; 3336 start = txq->ift_ifdi[qid].idi_vaddr; 3337 3338 if (__predict_false(size == 0)) 3339 return (start); 3340 cur = start + size*cidx; 3341 end = start + size*ntxd; 3342 next = CACHE_PTR_NEXT(cur); 3343 return (next < end ? next : start); 3344 } 3345 3346 /* 3347 * Pad an mbuf to ensure a minimum ethernet frame size. 3348 * min_frame_size is the frame size (less CRC) to pad the mbuf to 3349 */ 3350 static __noinline int 3351 iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size) 3352 { 3353 /* 3354 * 18 is enough bytes to pad an ARP packet to 46 bytes, and 3355 * and ARP message is the smallest common payload I can think of 3356 */ 3357 static char pad[18]; /* just zeros */ 3358 int n; 3359 struct mbuf *new_head; 3360 3361 if (!M_WRITABLE(*m_head)) { 3362 new_head = m_dup(*m_head, M_NOWAIT); 3363 if (new_head == NULL) { 3364 m_freem(*m_head); 3365 device_printf(dev, "cannot pad short frame, m_dup() failed"); 3366 DBG_COUNTER_INC(encap_pad_mbuf_fail); 3367 DBG_COUNTER_INC(tx_frees); 3368 return ENOMEM; 3369 } 3370 m_freem(*m_head); 3371 *m_head = new_head; 3372 } 3373 3374 for (n = min_frame_size - (*m_head)->m_pkthdr.len; 3375 n > 0; n -= sizeof(pad)) 3376 if (!m_append(*m_head, min(n, sizeof(pad)), pad)) 3377 break; 3378 3379 if (n > 0) { 3380 m_freem(*m_head); 3381 device_printf(dev, "cannot pad short frame\n"); 3382 DBG_COUNTER_INC(encap_pad_mbuf_fail); 3383 DBG_COUNTER_INC(tx_frees); 3384 return (ENOBUFS); 3385 } 3386 3387 return 0; 3388 } 3389 3390 static int 3391 iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) 3392 { 3393 if_ctx_t ctx; 3394 if_shared_ctx_t sctx; 3395 if_softc_ctx_t scctx; 3396 bus_dma_tag_t buf_tag; 3397 bus_dma_segment_t *segs; 3398 struct mbuf *m_head, **ifsd_m; 3399 void *next_txd; 3400 bus_dmamap_t map; 3401 struct if_pkt_info pi; 3402 int remap = 0; 3403 int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; 3404 3405 ctx = txq->ift_ctx; 3406 sctx = ctx->ifc_sctx; 3407 scctx = &ctx->ifc_softc_ctx; 3408 segs = txq->ift_segs; 3409 ntxd = txq->ift_size; 3410 m_head = *m_headp; 3411 map = NULL; 3412 3413 /* 3414 * If we're doing TSO the next descriptor to clean may be quite far ahead 3415 */ 3416 cidx = txq->ift_cidx; 3417 pidx = txq->ift_pidx; 3418 if (ctx->ifc_flags & IFC_PREFETCH) { 3419 next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); 3420 if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { 3421 next_txd = calc_next_txd(txq, cidx, 0); 3422 prefetch(next_txd); 3423 } 3424 3425 /* prefetch the next cache line of mbuf pointers and flags */ 3426 prefetch(&txq->ift_sds.ifsd_m[next]); 3427 prefetch(&txq->ift_sds.ifsd_map[next]); 3428 next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); 3429 } 3430 map = txq->ift_sds.ifsd_map[pidx]; 3431 ifsd_m = txq->ift_sds.ifsd_m; 3432 3433 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3434 buf_tag = txq->ift_tso_buf_tag; 3435 max_segs = scctx->isc_tx_tso_segments_max; 3436 map = txq->ift_sds.ifsd_tso_map[pidx]; 3437 MPASS(buf_tag != NULL); 3438 MPASS(max_segs > 0); 3439 } else { 3440 buf_tag = txq->ift_buf_tag; 3441 max_segs = scctx->isc_tx_nsegments; 3442 map = txq->ift_sds.ifsd_map[pidx]; 3443 } 3444 if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) && 3445 __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) { 3446 err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size); 3447 if (err) { 3448 DBG_COUNTER_INC(encap_txd_encap_fail); 3449 return err; 3450 } 3451 } 3452 m_head = *m_headp; 3453 3454 pkt_info_zero(&pi); 3455 pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); 3456 pi.ipi_pidx = pidx; 3457 pi.ipi_qsidx = txq->ift_id; 3458 pi.ipi_len = m_head->m_pkthdr.len; 3459 pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; 3460 pi.ipi_vtag = M_HAS_VLANTAG(m_head) ? m_head->m_pkthdr.ether_vtag : 0; 3461 3462 /* deliberate bitwise OR to make one condition */ 3463 if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { 3464 if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) { 3465 DBG_COUNTER_INC(encap_txd_encap_fail); 3466 return (err); 3467 } 3468 m_head = *m_headp; 3469 } 3470 3471 retry: 3472 err = bus_dmamap_load_mbuf_sg(buf_tag, map, m_head, segs, &nsegs, 3473 BUS_DMA_NOWAIT); 3474 defrag: 3475 if (__predict_false(err)) { 3476 switch (err) { 3477 case EFBIG: 3478 /* try collapse once and defrag once */ 3479 if (remap == 0) { 3480 m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); 3481 /* try defrag if collapsing fails */ 3482 if (m_head == NULL) 3483 remap++; 3484 } 3485 if (remap == 1) { 3486 txq->ift_mbuf_defrag++; 3487 m_head = m_defrag(*m_headp, M_NOWAIT); 3488 } 3489 /* 3490 * remap should never be >1 unless bus_dmamap_load_mbuf_sg 3491 * failed to map an mbuf that was run through m_defrag 3492 */ 3493 MPASS(remap <= 1); 3494 if (__predict_false(m_head == NULL || remap > 1)) 3495 goto defrag_failed; 3496 remap++; 3497 *m_headp = m_head; 3498 goto retry; 3499 break; 3500 case ENOMEM: 3501 txq->ift_no_tx_dma_setup++; 3502 break; 3503 default: 3504 txq->ift_no_tx_dma_setup++; 3505 m_freem(*m_headp); 3506 DBG_COUNTER_INC(tx_frees); 3507 *m_headp = NULL; 3508 break; 3509 } 3510 txq->ift_map_failed++; 3511 DBG_COUNTER_INC(encap_load_mbuf_fail); 3512 DBG_COUNTER_INC(encap_txd_encap_fail); 3513 return (err); 3514 } 3515 ifsd_m[pidx] = m_head; 3516 /* 3517 * XXX assumes a 1 to 1 relationship between segments and 3518 * descriptors - this does not hold true on all drivers, e.g. 3519 * cxgb 3520 */ 3521 if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { 3522 txq->ift_no_desc_avail++; 3523 bus_dmamap_unload(buf_tag, map); 3524 DBG_COUNTER_INC(encap_txq_avail_fail); 3525 DBG_COUNTER_INC(encap_txd_encap_fail); 3526 if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) 3527 GROUPTASK_ENQUEUE(&txq->ift_task); 3528 return (ENOBUFS); 3529 } 3530 /* 3531 * On Intel cards we can greatly reduce the number of TX interrupts 3532 * we see by only setting report status on every Nth descriptor. 3533 * However, this also means that the driver will need to keep track 3534 * of the descriptors that RS was set on to check them for the DD bit. 3535 */ 3536 txq->ift_rs_pending += nsegs + 1; 3537 if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || 3538 iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs) <= MAX_TX_DESC(ctx) + 2) { 3539 pi.ipi_flags |= IPI_TX_INTR; 3540 txq->ift_rs_pending = 0; 3541 } 3542 3543 pi.ipi_segs = segs; 3544 pi.ipi_nsegs = nsegs; 3545 3546 MPASS(pidx >= 0 && pidx < txq->ift_size); 3547 #ifdef PKT_DEBUG 3548 print_pkt(&pi); 3549 #endif 3550 if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { 3551 bus_dmamap_sync(buf_tag, map, BUS_DMASYNC_PREWRITE); 3552 DBG_COUNTER_INC(tx_encap); 3553 MPASS(pi.ipi_new_pidx < txq->ift_size); 3554 3555 ndesc = pi.ipi_new_pidx - pi.ipi_pidx; 3556 if (pi.ipi_new_pidx < pi.ipi_pidx) { 3557 ndesc += txq->ift_size; 3558 txq->ift_gen = 1; 3559 } 3560 /* 3561 * drivers can need as many as 3562 * two sentinels 3563 */ 3564 MPASS(ndesc <= pi.ipi_nsegs + 2); 3565 MPASS(pi.ipi_new_pidx != pidx); 3566 MPASS(ndesc > 0); 3567 txq->ift_in_use += ndesc; 3568 txq->ift_db_pending += ndesc; 3569 3570 /* 3571 * We update the last software descriptor again here because there may 3572 * be a sentinel and/or there may be more mbufs than segments 3573 */ 3574 txq->ift_pidx = pi.ipi_new_pidx; 3575 txq->ift_npending += pi.ipi_ndescs; 3576 } else { 3577 *m_headp = m_head = iflib_remove_mbuf(txq); 3578 if (err == EFBIG) { 3579 txq->ift_txd_encap_efbig++; 3580 if (remap < 2) { 3581 remap = 1; 3582 goto defrag; 3583 } 3584 } 3585 goto defrag_failed; 3586 } 3587 /* 3588 * err can't possibly be non-zero here, so we don't neet to test it 3589 * to see if we need to DBG_COUNTER_INC(encap_txd_encap_fail). 3590 */ 3591 return (err); 3592 3593 defrag_failed: 3594 txq->ift_mbuf_defrag_failed++; 3595 txq->ift_map_failed++; 3596 m_freem(*m_headp); 3597 DBG_COUNTER_INC(tx_frees); 3598 *m_headp = NULL; 3599 DBG_COUNTER_INC(encap_txd_encap_fail); 3600 return (ENOMEM); 3601 } 3602 3603 static void 3604 iflib_tx_desc_free(iflib_txq_t txq, int n) 3605 { 3606 uint32_t qsize, cidx, mask, gen; 3607 struct mbuf *m, **ifsd_m; 3608 bool do_prefetch; 3609 3610 cidx = txq->ift_cidx; 3611 gen = txq->ift_gen; 3612 qsize = txq->ift_size; 3613 mask = qsize-1; 3614 ifsd_m = txq->ift_sds.ifsd_m; 3615 do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); 3616 3617 while (n-- > 0) { 3618 if (do_prefetch) { 3619 prefetch(ifsd_m[(cidx + 3) & mask]); 3620 prefetch(ifsd_m[(cidx + 4) & mask]); 3621 } 3622 if ((m = ifsd_m[cidx]) != NULL) { 3623 prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); 3624 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 3625 bus_dmamap_sync(txq->ift_tso_buf_tag, 3626 txq->ift_sds.ifsd_tso_map[cidx], 3627 BUS_DMASYNC_POSTWRITE); 3628 bus_dmamap_unload(txq->ift_tso_buf_tag, 3629 txq->ift_sds.ifsd_tso_map[cidx]); 3630 } else { 3631 bus_dmamap_sync(txq->ift_buf_tag, 3632 txq->ift_sds.ifsd_map[cidx], 3633 BUS_DMASYNC_POSTWRITE); 3634 bus_dmamap_unload(txq->ift_buf_tag, 3635 txq->ift_sds.ifsd_map[cidx]); 3636 } 3637 /* XXX we don't support any drivers that batch packets yet */ 3638 MPASS(m->m_nextpkt == NULL); 3639 m_freem(m); 3640 ifsd_m[cidx] = NULL; 3641 #if MEMORY_LOGGING 3642 txq->ift_dequeued++; 3643 #endif 3644 DBG_COUNTER_INC(tx_frees); 3645 } 3646 if (__predict_false(++cidx == qsize)) { 3647 cidx = 0; 3648 gen = 0; 3649 } 3650 } 3651 txq->ift_cidx = cidx; 3652 txq->ift_gen = gen; 3653 } 3654 3655 static __inline int 3656 iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) 3657 { 3658 int reclaim; 3659 if_ctx_t ctx = txq->ift_ctx; 3660 3661 KASSERT(thresh >= 0, ("invalid threshold to reclaim")); 3662 MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); 3663 3664 /* 3665 * Need a rate-limiting check so that this isn't called every time 3666 */ 3667 iflib_tx_credits_update(ctx, txq); 3668 reclaim = DESC_RECLAIMABLE(txq); 3669 3670 if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { 3671 #ifdef INVARIANTS 3672 if (iflib_verbose_debug) { 3673 printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, 3674 txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, 3675 reclaim, thresh); 3676 } 3677 #endif 3678 return (0); 3679 } 3680 iflib_tx_desc_free(txq, reclaim); 3681 txq->ift_cleaned += reclaim; 3682 txq->ift_in_use -= reclaim; 3683 3684 return (reclaim); 3685 } 3686 3687 static struct mbuf ** 3688 _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) 3689 { 3690 int next, size; 3691 struct mbuf **items; 3692 3693 size = r->size; 3694 next = (cidx + CACHE_PTR_INCREMENT) & (size-1); 3695 items = __DEVOLATILE(struct mbuf **, &r->items[0]); 3696 3697 prefetch(items[(cidx + offset) & (size-1)]); 3698 if (remaining > 1) { 3699 prefetch2cachelines(&items[next]); 3700 prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]); 3701 prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]); 3702 prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]); 3703 } 3704 return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); 3705 } 3706 3707 static void 3708 iflib_txq_check_drain(iflib_txq_t txq, int budget) 3709 { 3710 3711 ifmp_ring_check_drainage(txq->ift_br, budget); 3712 } 3713 3714 static uint32_t 3715 iflib_txq_can_drain(struct ifmp_ring *r) 3716 { 3717 iflib_txq_t txq = r->cookie; 3718 if_ctx_t ctx = txq->ift_ctx; 3719 3720 if (TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) 3721 return (1); 3722 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 3723 BUS_DMASYNC_POSTREAD); 3724 return (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, 3725 false)); 3726 } 3727 3728 static uint32_t 3729 iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) 3730 { 3731 iflib_txq_t txq = r->cookie; 3732 if_ctx_t ctx = txq->ift_ctx; 3733 if_t ifp = ctx->ifc_ifp; 3734 struct mbuf *m, **mp; 3735 int avail, bytes_sent, skipped, count, err, i; 3736 int mcast_sent, pkt_sent, reclaimed; 3737 bool do_prefetch, rang, ring; 3738 3739 if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || 3740 !LINK_ACTIVE(ctx))) { 3741 DBG_COUNTER_INC(txq_drain_notready); 3742 return (0); 3743 } 3744 reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); 3745 rang = iflib_txd_db_check(txq, reclaimed && txq->ift_db_pending); 3746 avail = IDXDIFF(pidx, cidx, r->size); 3747 3748 if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { 3749 /* 3750 * The driver is unloading so we need to free all pending packets. 3751 */ 3752 DBG_COUNTER_INC(txq_drain_flushing); 3753 for (i = 0; i < avail; i++) { 3754 if (__predict_true(r->items[(cidx + i) & (r->size-1)] != (void *)txq)) 3755 m_freem(r->items[(cidx + i) & (r->size-1)]); 3756 r->items[(cidx + i) & (r->size-1)] = NULL; 3757 } 3758 return (avail); 3759 } 3760 3761 if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { 3762 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3763 CALLOUT_LOCK(txq); 3764 callout_stop(&txq->ift_timer); 3765 CALLOUT_UNLOCK(txq); 3766 DBG_COUNTER_INC(txq_drain_oactive); 3767 return (0); 3768 } 3769 3770 /* 3771 * If we've reclaimed any packets this queue cannot be hung. 3772 */ 3773 if (reclaimed) 3774 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3775 skipped = mcast_sent = bytes_sent = pkt_sent = 0; 3776 count = MIN(avail, TX_BATCH_SIZE); 3777 #ifdef INVARIANTS 3778 if (iflib_verbose_debug) 3779 printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, 3780 avail, ctx->ifc_flags, TXQ_AVAIL(txq)); 3781 #endif 3782 do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); 3783 err = 0; 3784 for (i = 0; i < count && TXQ_AVAIL(txq) >= MAX_TX_DESC(ctx) + 2; i++) { 3785 int rem = do_prefetch ? count - i : 0; 3786 3787 mp = _ring_peek_one(r, cidx, i, rem); 3788 MPASS(mp != NULL && *mp != NULL); 3789 3790 /* 3791 * Completion interrupts will use the address of the txq 3792 * as a sentinel to enqueue _something_ in order to acquire 3793 * the lock on the mp_ring (there's no direct lock call). 3794 * We obviously whave to check for these sentinel cases 3795 * and skip them. 3796 */ 3797 if (__predict_false(*mp == (struct mbuf *)txq)) { 3798 skipped++; 3799 continue; 3800 } 3801 err = iflib_encap(txq, mp); 3802 if (__predict_false(err)) { 3803 /* no room - bail out */ 3804 if (err == ENOBUFS) 3805 break; 3806 skipped++; 3807 /* we can't send this packet - skip it */ 3808 continue; 3809 } 3810 pkt_sent++; 3811 m = *mp; 3812 DBG_COUNTER_INC(tx_sent); 3813 bytes_sent += m->m_pkthdr.len; 3814 mcast_sent += !!(m->m_flags & M_MCAST); 3815 3816 if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) 3817 break; 3818 ETHER_BPF_MTAP(ifp, m); 3819 rang = iflib_txd_db_check(txq, false); 3820 } 3821 3822 /* deliberate use of bitwise or to avoid gratuitous short-circuit */ 3823 ring = rang ? false : (iflib_min_tx_latency | err); 3824 iflib_txd_db_check(txq, ring); 3825 if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); 3826 if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); 3827 if (mcast_sent) 3828 if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); 3829 #ifdef INVARIANTS 3830 if (iflib_verbose_debug) 3831 printf("consumed=%d\n", skipped + pkt_sent); 3832 #endif 3833 return (skipped + pkt_sent); 3834 } 3835 3836 static uint32_t 3837 iflib_txq_drain_always(struct ifmp_ring *r) 3838 { 3839 return (1); 3840 } 3841 3842 static uint32_t 3843 iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) 3844 { 3845 int i, avail; 3846 struct mbuf **mp; 3847 iflib_txq_t txq; 3848 3849 txq = r->cookie; 3850 3851 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3852 CALLOUT_LOCK(txq); 3853 callout_stop(&txq->ift_timer); 3854 CALLOUT_UNLOCK(txq); 3855 3856 avail = IDXDIFF(pidx, cidx, r->size); 3857 for (i = 0; i < avail; i++) { 3858 mp = _ring_peek_one(r, cidx, i, avail - i); 3859 if (__predict_false(*mp == (struct mbuf *)txq)) 3860 continue; 3861 m_freem(*mp); 3862 DBG_COUNTER_INC(tx_frees); 3863 } 3864 MPASS(ifmp_ring_is_stalled(r) == 0); 3865 return (avail); 3866 } 3867 3868 static void 3869 iflib_ifmp_purge(iflib_txq_t txq) 3870 { 3871 struct ifmp_ring *r; 3872 3873 r = txq->ift_br; 3874 r->drain = iflib_txq_drain_free; 3875 r->can_drain = iflib_txq_drain_always; 3876 3877 ifmp_ring_check_drainage(r, r->size); 3878 3879 r->drain = iflib_txq_drain; 3880 r->can_drain = iflib_txq_can_drain; 3881 } 3882 3883 static void 3884 _task_fn_tx(void *context) 3885 { 3886 iflib_txq_t txq = context; 3887 if_ctx_t ctx = txq->ift_ctx; 3888 if_t ifp = ctx->ifc_ifp; 3889 int abdicate = ctx->ifc_sysctl_tx_abdicate; 3890 3891 #ifdef IFLIB_DIAGNOSTICS 3892 txq->ift_cpu_exec_count[curcpu]++; 3893 #endif 3894 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 3895 return; 3896 #ifdef DEV_NETMAP 3897 if ((if_getcapenable(ifp) & IFCAP_NETMAP) && 3898 netmap_tx_irq(ifp, txq->ift_id)) 3899 goto skip_ifmp; 3900 #endif 3901 #ifdef ALTQ 3902 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 3903 iflib_altq_if_start(ifp); 3904 #endif 3905 if (txq->ift_db_pending) 3906 ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE, abdicate); 3907 else if (!abdicate) 3908 ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); 3909 /* 3910 * When abdicating, we always need to check drainage, not just when we don't enqueue 3911 */ 3912 if (abdicate) 3913 ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); 3914 #ifdef DEV_NETMAP 3915 skip_ifmp: 3916 #endif 3917 if (ctx->ifc_flags & IFC_LEGACY) 3918 IFDI_INTR_ENABLE(ctx); 3919 else 3920 IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); 3921 } 3922 3923 static void 3924 _task_fn_rx(void *context) 3925 { 3926 iflib_rxq_t rxq = context; 3927 if_ctx_t ctx = rxq->ifr_ctx; 3928 uint8_t more; 3929 uint16_t budget; 3930 #ifdef DEV_NETMAP 3931 u_int work = 0; 3932 int nmirq; 3933 #endif 3934 3935 #ifdef IFLIB_DIAGNOSTICS 3936 rxq->ifr_cpu_exec_count[curcpu]++; 3937 #endif 3938 DBG_COUNTER_INC(task_fn_rxs); 3939 if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) 3940 return; 3941 #ifdef DEV_NETMAP 3942 nmirq = netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work); 3943 if (nmirq != NM_IRQ_PASS) { 3944 more = (nmirq == NM_IRQ_RESCHED) ? IFLIB_RXEOF_MORE : 0; 3945 goto skip_rxeof; 3946 } 3947 #endif 3948 budget = ctx->ifc_sysctl_rx_budget; 3949 if (budget == 0) 3950 budget = 16; /* XXX */ 3951 more = iflib_rxeof(rxq, budget); 3952 #ifdef DEV_NETMAP 3953 skip_rxeof: 3954 #endif 3955 if ((more & IFLIB_RXEOF_MORE) == 0) { 3956 if (ctx->ifc_flags & IFC_LEGACY) 3957 IFDI_INTR_ENABLE(ctx); 3958 else 3959 IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); 3960 DBG_COUNTER_INC(rx_intr_enables); 3961 } 3962 if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) 3963 return; 3964 3965 if (more & IFLIB_RXEOF_MORE) 3966 GROUPTASK_ENQUEUE(&rxq->ifr_task); 3967 else if (more & IFLIB_RXEOF_EMPTY) 3968 callout_reset_curcpu(&rxq->ifr_watchdog, 1, &_task_fn_rx_watchdog, rxq); 3969 } 3970 3971 static void 3972 _task_fn_admin(void *context) 3973 { 3974 if_ctx_t ctx = context; 3975 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 3976 iflib_txq_t txq; 3977 int i; 3978 bool oactive, running, do_reset, do_watchdog, in_detach; 3979 3980 STATE_LOCK(ctx); 3981 running = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); 3982 oactive = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE); 3983 do_reset = (ctx->ifc_flags & IFC_DO_RESET); 3984 do_watchdog = (ctx->ifc_flags & IFC_DO_WATCHDOG); 3985 in_detach = (ctx->ifc_flags & IFC_IN_DETACH); 3986 ctx->ifc_flags &= ~(IFC_DO_RESET|IFC_DO_WATCHDOG); 3987 STATE_UNLOCK(ctx); 3988 3989 if ((!running && !oactive) && !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) 3990 return; 3991 if (in_detach) 3992 return; 3993 3994 CTX_LOCK(ctx); 3995 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { 3996 CALLOUT_LOCK(txq); 3997 callout_stop(&txq->ift_timer); 3998 CALLOUT_UNLOCK(txq); 3999 } 4000 if (do_watchdog) { 4001 ctx->ifc_watchdog_events++; 4002 IFDI_WATCHDOG_RESET(ctx); 4003 } 4004 IFDI_UPDATE_ADMIN_STATUS(ctx); 4005 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { 4006 callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, txq, 4007 txq->ift_timer.c_cpu); 4008 } 4009 IFDI_LINK_INTR_ENABLE(ctx); 4010 if (do_reset) 4011 iflib_if_init_locked(ctx); 4012 CTX_UNLOCK(ctx); 4013 4014 if (LINK_ACTIVE(ctx) == 0) 4015 return; 4016 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) 4017 iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); 4018 } 4019 4020 static void 4021 _task_fn_iov(void *context) 4022 { 4023 if_ctx_t ctx = context; 4024 4025 if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) && 4026 !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) 4027 return; 4028 4029 CTX_LOCK(ctx); 4030 IFDI_VFLR_HANDLE(ctx); 4031 CTX_UNLOCK(ctx); 4032 } 4033 4034 static int 4035 iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) 4036 { 4037 int err; 4038 if_int_delay_info_t info; 4039 if_ctx_t ctx; 4040 4041 info = (if_int_delay_info_t)arg1; 4042 ctx = info->iidi_ctx; 4043 info->iidi_req = req; 4044 info->iidi_oidp = oidp; 4045 CTX_LOCK(ctx); 4046 err = IFDI_SYSCTL_INT_DELAY(ctx, info); 4047 CTX_UNLOCK(ctx); 4048 return (err); 4049 } 4050 4051 /********************************************************************* 4052 * 4053 * IFNET FUNCTIONS 4054 * 4055 **********************************************************************/ 4056 4057 static void 4058 iflib_if_init_locked(if_ctx_t ctx) 4059 { 4060 iflib_stop(ctx); 4061 iflib_init_locked(ctx); 4062 } 4063 4064 static void 4065 iflib_if_init(void *arg) 4066 { 4067 if_ctx_t ctx = arg; 4068 4069 CTX_LOCK(ctx); 4070 iflib_if_init_locked(ctx); 4071 CTX_UNLOCK(ctx); 4072 } 4073 4074 static int 4075 iflib_if_transmit(if_t ifp, struct mbuf *m) 4076 { 4077 if_ctx_t ctx = if_getsoftc(ifp); 4078 4079 iflib_txq_t txq; 4080 int err, qidx; 4081 int abdicate = ctx->ifc_sysctl_tx_abdicate; 4082 4083 if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { 4084 DBG_COUNTER_INC(tx_frees); 4085 m_freem(m); 4086 return (ENETDOWN); 4087 } 4088 4089 MPASS(m->m_nextpkt == NULL); 4090 /* ALTQ-enabled interfaces always use queue 0. */ 4091 qidx = 0; 4092 if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m) && !ALTQ_IS_ENABLED(&ifp->if_snd)) 4093 qidx = QIDX(ctx, m); 4094 /* 4095 * XXX calculate buf_ring based on flowid (divvy up bits?) 4096 */ 4097 txq = &ctx->ifc_txqs[qidx]; 4098 4099 #ifdef DRIVER_BACKPRESSURE 4100 if (txq->ift_closed) { 4101 while (m != NULL) { 4102 next = m->m_nextpkt; 4103 m->m_nextpkt = NULL; 4104 m_freem(m); 4105 DBG_COUNTER_INC(tx_frees); 4106 m = next; 4107 } 4108 return (ENOBUFS); 4109 } 4110 #endif 4111 #ifdef notyet 4112 qidx = count = 0; 4113 mp = marr; 4114 next = m; 4115 do { 4116 count++; 4117 next = next->m_nextpkt; 4118 } while (next != NULL); 4119 4120 if (count > nitems(marr)) 4121 if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { 4122 /* XXX check nextpkt */ 4123 m_freem(m); 4124 /* XXX simplify for now */ 4125 DBG_COUNTER_INC(tx_frees); 4126 return (ENOBUFS); 4127 } 4128 for (next = m, i = 0; next != NULL; i++) { 4129 mp[i] = next; 4130 next = next->m_nextpkt; 4131 mp[i]->m_nextpkt = NULL; 4132 } 4133 #endif 4134 DBG_COUNTER_INC(tx_seen); 4135 err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE, abdicate); 4136 4137 if (abdicate) 4138 GROUPTASK_ENQUEUE(&txq->ift_task); 4139 if (err) { 4140 if (!abdicate) 4141 GROUPTASK_ENQUEUE(&txq->ift_task); 4142 /* support forthcoming later */ 4143 #ifdef DRIVER_BACKPRESSURE 4144 txq->ift_closed = TRUE; 4145 #endif 4146 ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); 4147 m_freem(m); 4148 DBG_COUNTER_INC(tx_frees); 4149 } 4150 4151 return (err); 4152 } 4153 4154 #ifdef ALTQ 4155 /* 4156 * The overall approach to integrating iflib with ALTQ is to continue to use 4157 * the iflib mp_ring machinery between the ALTQ queue(s) and the hardware 4158 * ring. Technically, when using ALTQ, queueing to an intermediate mp_ring 4159 * is redundant/unnecessary, but doing so minimizes the amount of 4160 * ALTQ-specific code required in iflib. It is assumed that the overhead of 4161 * redundantly queueing to an intermediate mp_ring is swamped by the 4162 * performance limitations inherent in using ALTQ. 4163 * 4164 * When ALTQ support is compiled in, all iflib drivers will use a transmit 4165 * routine, iflib_altq_if_transmit(), that checks if ALTQ is enabled for the 4166 * given interface. If ALTQ is enabled for an interface, then all 4167 * transmitted packets for that interface will be submitted to the ALTQ 4168 * subsystem via IFQ_ENQUEUE(). We don't use the legacy if_transmit() 4169 * implementation because it uses IFQ_HANDOFF(), which will duplicatively 4170 * update stats that the iflib machinery handles, and which is sensitve to 4171 * the disused IFF_DRV_OACTIVE flag. Additionally, iflib_altq_if_start() 4172 * will be installed as the start routine for use by ALTQ facilities that 4173 * need to trigger queue drains on a scheduled basis. 4174 * 4175 */ 4176 static void 4177 iflib_altq_if_start(if_t ifp) 4178 { 4179 struct ifaltq *ifq = &ifp->if_snd; 4180 struct mbuf *m; 4181 4182 IFQ_LOCK(ifq); 4183 IFQ_DEQUEUE_NOLOCK(ifq, m); 4184 while (m != NULL) { 4185 iflib_if_transmit(ifp, m); 4186 IFQ_DEQUEUE_NOLOCK(ifq, m); 4187 } 4188 IFQ_UNLOCK(ifq); 4189 } 4190 4191 static int 4192 iflib_altq_if_transmit(if_t ifp, struct mbuf *m) 4193 { 4194 int err; 4195 4196 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 4197 IFQ_ENQUEUE(&ifp->if_snd, m, err); 4198 if (err == 0) 4199 iflib_altq_if_start(ifp); 4200 } else 4201 err = iflib_if_transmit(ifp, m); 4202 4203 return (err); 4204 } 4205 #endif /* ALTQ */ 4206 4207 static void 4208 iflib_if_qflush(if_t ifp) 4209 { 4210 if_ctx_t ctx = if_getsoftc(ifp); 4211 iflib_txq_t txq = ctx->ifc_txqs; 4212 int i; 4213 4214 STATE_LOCK(ctx); 4215 ctx->ifc_flags |= IFC_QFLUSH; 4216 STATE_UNLOCK(ctx); 4217 for (i = 0; i < NTXQSETS(ctx); i++, txq++) 4218 while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) 4219 iflib_txq_check_drain(txq, 0); 4220 STATE_LOCK(ctx); 4221 ctx->ifc_flags &= ~IFC_QFLUSH; 4222 STATE_UNLOCK(ctx); 4223 4224 /* 4225 * When ALTQ is enabled, this will also take care of purging the 4226 * ALTQ queue(s). 4227 */ 4228 if_qflush(ifp); 4229 } 4230 4231 #define IFCAP_FLAGS (IFCAP_HWCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ 4232 IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \ 4233 IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | \ 4234 IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_MEXTPG) 4235 4236 static int 4237 iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) 4238 { 4239 if_ctx_t ctx = if_getsoftc(ifp); 4240 struct ifreq *ifr = (struct ifreq *)data; 4241 #if defined(INET) || defined(INET6) 4242 struct ifaddr *ifa = (struct ifaddr *)data; 4243 #endif 4244 bool avoid_reset = false; 4245 int err = 0, reinit = 0, bits; 4246 4247 switch (command) { 4248 case SIOCSIFADDR: 4249 #ifdef INET 4250 if (ifa->ifa_addr->sa_family == AF_INET) 4251 avoid_reset = true; 4252 #endif 4253 #ifdef INET6 4254 if (ifa->ifa_addr->sa_family == AF_INET6) 4255 avoid_reset = true; 4256 #endif 4257 /* 4258 ** Calling init results in link renegotiation, 4259 ** so we avoid doing it when possible. 4260 */ 4261 if (avoid_reset) { 4262 if_setflagbits(ifp, IFF_UP,0); 4263 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 4264 reinit = 1; 4265 #ifdef INET 4266 if (!(if_getflags(ifp) & IFF_NOARP)) 4267 arp_ifinit(ifp, ifa); 4268 #endif 4269 } else 4270 err = ether_ioctl(ifp, command, data); 4271 break; 4272 case SIOCSIFMTU: 4273 CTX_LOCK(ctx); 4274 if (ifr->ifr_mtu == if_getmtu(ifp)) { 4275 CTX_UNLOCK(ctx); 4276 break; 4277 } 4278 bits = if_getdrvflags(ifp); 4279 /* stop the driver and free any clusters before proceeding */ 4280 iflib_stop(ctx); 4281 4282 if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { 4283 STATE_LOCK(ctx); 4284 if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) 4285 ctx->ifc_flags |= IFC_MULTISEG; 4286 else 4287 ctx->ifc_flags &= ~IFC_MULTISEG; 4288 STATE_UNLOCK(ctx); 4289 err = if_setmtu(ifp, ifr->ifr_mtu); 4290 } 4291 iflib_init_locked(ctx); 4292 STATE_LOCK(ctx); 4293 if_setdrvflags(ifp, bits); 4294 STATE_UNLOCK(ctx); 4295 CTX_UNLOCK(ctx); 4296 break; 4297 case SIOCSIFFLAGS: 4298 CTX_LOCK(ctx); 4299 if (if_getflags(ifp) & IFF_UP) { 4300 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4301 if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & 4302 (IFF_PROMISC | IFF_ALLMULTI)) { 4303 CTX_UNLOCK(ctx); 4304 err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); 4305 CTX_LOCK(ctx); 4306 } 4307 } else 4308 reinit = 1; 4309 } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4310 iflib_stop(ctx); 4311 } 4312 ctx->ifc_if_flags = if_getflags(ifp); 4313 CTX_UNLOCK(ctx); 4314 break; 4315 case SIOCADDMULTI: 4316 case SIOCDELMULTI: 4317 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4318 CTX_LOCK(ctx); 4319 IFDI_INTR_DISABLE(ctx); 4320 IFDI_MULTI_SET(ctx); 4321 IFDI_INTR_ENABLE(ctx); 4322 CTX_UNLOCK(ctx); 4323 } 4324 break; 4325 case SIOCSIFMEDIA: 4326 CTX_LOCK(ctx); 4327 IFDI_MEDIA_SET(ctx); 4328 CTX_UNLOCK(ctx); 4329 /* FALLTHROUGH */ 4330 case SIOCGIFMEDIA: 4331 case SIOCGIFXMEDIA: 4332 err = ifmedia_ioctl(ifp, ifr, ctx->ifc_mediap, command); 4333 break; 4334 case SIOCGI2C: 4335 { 4336 struct ifi2creq i2c; 4337 4338 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4339 if (err != 0) 4340 break; 4341 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { 4342 err = EINVAL; 4343 break; 4344 } 4345 if (i2c.len > sizeof(i2c.data)) { 4346 err = EINVAL; 4347 break; 4348 } 4349 4350 if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) 4351 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4352 sizeof(i2c)); 4353 break; 4354 } 4355 case SIOCSIFCAP: 4356 { 4357 int mask, setmask, oldmask; 4358 4359 oldmask = if_getcapenable(ifp); 4360 mask = ifr->ifr_reqcap ^ oldmask; 4361 mask &= ctx->ifc_softc_ctx.isc_capabilities | IFCAP_MEXTPG; 4362 setmask = 0; 4363 #ifdef TCP_OFFLOAD 4364 setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); 4365 #endif 4366 setmask |= (mask & IFCAP_FLAGS); 4367 setmask |= (mask & IFCAP_WOL); 4368 4369 /* 4370 * If any RX csum has changed, change all the ones that 4371 * are supported by the driver. 4372 */ 4373 if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 4374 setmask |= ctx->ifc_softc_ctx.isc_capabilities & 4375 (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); 4376 } 4377 4378 /* 4379 * want to ensure that traffic has stopped before we change any of the flags 4380 */ 4381 if (setmask) { 4382 CTX_LOCK(ctx); 4383 bits = if_getdrvflags(ifp); 4384 if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) 4385 iflib_stop(ctx); 4386 STATE_LOCK(ctx); 4387 if_togglecapenable(ifp, setmask); 4388 STATE_UNLOCK(ctx); 4389 if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) 4390 iflib_init_locked(ctx); 4391 STATE_LOCK(ctx); 4392 if_setdrvflags(ifp, bits); 4393 STATE_UNLOCK(ctx); 4394 CTX_UNLOCK(ctx); 4395 } 4396 if_vlancap(ifp); 4397 break; 4398 } 4399 case SIOCGPRIVATE_0: 4400 case SIOCSDRVSPEC: 4401 case SIOCGDRVSPEC: 4402 CTX_LOCK(ctx); 4403 err = IFDI_PRIV_IOCTL(ctx, command, data); 4404 CTX_UNLOCK(ctx); 4405 break; 4406 default: 4407 err = ether_ioctl(ifp, command, data); 4408 break; 4409 } 4410 if (reinit) 4411 iflib_if_init(ctx); 4412 return (err); 4413 } 4414 4415 static uint64_t 4416 iflib_if_get_counter(if_t ifp, ift_counter cnt) 4417 { 4418 if_ctx_t ctx = if_getsoftc(ifp); 4419 4420 return (IFDI_GET_COUNTER(ctx, cnt)); 4421 } 4422 4423 /********************************************************************* 4424 * 4425 * OTHER FUNCTIONS EXPORTED TO THE STACK 4426 * 4427 **********************************************************************/ 4428 4429 static void 4430 iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) 4431 { 4432 if_ctx_t ctx = if_getsoftc(ifp); 4433 4434 if ((void *)ctx != arg) 4435 return; 4436 4437 if ((vtag == 0) || (vtag > 4095)) 4438 return; 4439 4440 if (iflib_in_detach(ctx)) 4441 return; 4442 4443 CTX_LOCK(ctx); 4444 /* Driver may need all untagged packets to be flushed */ 4445 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4446 iflib_stop(ctx); 4447 IFDI_VLAN_REGISTER(ctx, vtag); 4448 /* Re-init to load the changes, if required */ 4449 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4450 iflib_init_locked(ctx); 4451 CTX_UNLOCK(ctx); 4452 } 4453 4454 static void 4455 iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) 4456 { 4457 if_ctx_t ctx = if_getsoftc(ifp); 4458 4459 if ((void *)ctx != arg) 4460 return; 4461 4462 if ((vtag == 0) || (vtag > 4095)) 4463 return; 4464 4465 CTX_LOCK(ctx); 4466 /* Driver may need all tagged packets to be flushed */ 4467 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4468 iflib_stop(ctx); 4469 IFDI_VLAN_UNREGISTER(ctx, vtag); 4470 /* Re-init to load the changes, if required */ 4471 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4472 iflib_init_locked(ctx); 4473 CTX_UNLOCK(ctx); 4474 } 4475 4476 static void 4477 iflib_led_func(void *arg, int onoff) 4478 { 4479 if_ctx_t ctx = arg; 4480 4481 CTX_LOCK(ctx); 4482 IFDI_LED_FUNC(ctx, onoff); 4483 CTX_UNLOCK(ctx); 4484 } 4485 4486 /********************************************************************* 4487 * 4488 * BUS FUNCTION DEFINITIONS 4489 * 4490 **********************************************************************/ 4491 4492 int 4493 iflib_device_probe(device_t dev) 4494 { 4495 const pci_vendor_info_t *ent; 4496 if_shared_ctx_t sctx; 4497 uint16_t pci_device_id, pci_rev_id, pci_subdevice_id, pci_subvendor_id; 4498 uint16_t pci_vendor_id; 4499 4500 if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) 4501 return (ENOTSUP); 4502 4503 pci_vendor_id = pci_get_vendor(dev); 4504 pci_device_id = pci_get_device(dev); 4505 pci_subvendor_id = pci_get_subvendor(dev); 4506 pci_subdevice_id = pci_get_subdevice(dev); 4507 pci_rev_id = pci_get_revid(dev); 4508 if (sctx->isc_parse_devinfo != NULL) 4509 sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); 4510 4511 ent = sctx->isc_vendor_info; 4512 while (ent->pvi_vendor_id != 0) { 4513 if (pci_vendor_id != ent->pvi_vendor_id) { 4514 ent++; 4515 continue; 4516 } 4517 if ((pci_device_id == ent->pvi_device_id) && 4518 ((pci_subvendor_id == ent->pvi_subvendor_id) || 4519 (ent->pvi_subvendor_id == 0)) && 4520 ((pci_subdevice_id == ent->pvi_subdevice_id) || 4521 (ent->pvi_subdevice_id == 0)) && 4522 ((pci_rev_id == ent->pvi_rev_id) || 4523 (ent->pvi_rev_id == 0))) { 4524 device_set_desc_copy(dev, ent->pvi_name); 4525 /* this needs to be changed to zero if the bus probing code 4526 * ever stops re-probing on best match because the sctx 4527 * may have its values over written by register calls 4528 * in subsequent probes 4529 */ 4530 return (BUS_PROBE_DEFAULT); 4531 } 4532 ent++; 4533 } 4534 return (ENXIO); 4535 } 4536 4537 int 4538 iflib_device_probe_vendor(device_t dev) 4539 { 4540 int probe; 4541 4542 probe = iflib_device_probe(dev); 4543 if (probe == BUS_PROBE_DEFAULT) 4544 return (BUS_PROBE_VENDOR); 4545 else 4546 return (probe); 4547 } 4548 4549 static void 4550 iflib_reset_qvalues(if_ctx_t ctx) 4551 { 4552 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 4553 if_shared_ctx_t sctx = ctx->ifc_sctx; 4554 device_t dev = ctx->ifc_dev; 4555 int i; 4556 4557 if (ctx->ifc_sysctl_ntxqs != 0) 4558 scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; 4559 if (ctx->ifc_sysctl_nrxqs != 0) 4560 scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; 4561 4562 for (i = 0; i < sctx->isc_ntxqs; i++) { 4563 if (ctx->ifc_sysctl_ntxds[i] != 0) 4564 scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; 4565 else 4566 scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; 4567 } 4568 4569 for (i = 0; i < sctx->isc_nrxqs; i++) { 4570 if (ctx->ifc_sysctl_nrxds[i] != 0) 4571 scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; 4572 else 4573 scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; 4574 } 4575 4576 for (i = 0; i < sctx->isc_nrxqs; i++) { 4577 if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { 4578 device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", 4579 i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); 4580 scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; 4581 } 4582 if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { 4583 device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", 4584 i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); 4585 scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; 4586 } 4587 if (!powerof2(scctx->isc_nrxd[i])) { 4588 device_printf(dev, "nrxd%d: %d is not a power of 2 - using default value of %d\n", 4589 i, scctx->isc_nrxd[i], sctx->isc_nrxd_default[i]); 4590 scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; 4591 } 4592 } 4593 4594 for (i = 0; i < sctx->isc_ntxqs; i++) { 4595 if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { 4596 device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", 4597 i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); 4598 scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; 4599 } 4600 if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { 4601 device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", 4602 i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); 4603 scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; 4604 } 4605 if (!powerof2(scctx->isc_ntxd[i])) { 4606 device_printf(dev, "ntxd%d: %d is not a power of 2 - using default value of %d\n", 4607 i, scctx->isc_ntxd[i], sctx->isc_ntxd_default[i]); 4608 scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; 4609 } 4610 } 4611 } 4612 4613 static void 4614 iflib_add_pfil(if_ctx_t ctx) 4615 { 4616 struct pfil_head *pfil; 4617 struct pfil_head_args pa; 4618 iflib_rxq_t rxq; 4619 int i; 4620 4621 pa.pa_version = PFIL_VERSION; 4622 pa.pa_flags = PFIL_IN; 4623 pa.pa_type = PFIL_TYPE_ETHERNET; 4624 pa.pa_headname = ctx->ifc_ifp->if_xname; 4625 pfil = pfil_head_register(&pa); 4626 4627 for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { 4628 rxq->pfil = pfil; 4629 } 4630 } 4631 4632 static void 4633 iflib_rem_pfil(if_ctx_t ctx) 4634 { 4635 struct pfil_head *pfil; 4636 iflib_rxq_t rxq; 4637 int i; 4638 4639 rxq = ctx->ifc_rxqs; 4640 pfil = rxq->pfil; 4641 for (i = 0; i < NRXQSETS(ctx); i++, rxq++) { 4642 rxq->pfil = NULL; 4643 } 4644 pfil_head_unregister(pfil); 4645 } 4646 4647 static uint16_t 4648 get_ctx_core_offset(if_ctx_t ctx) 4649 { 4650 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 4651 struct cpu_offset *op; 4652 uint16_t qc; 4653 uint16_t ret = ctx->ifc_sysctl_core_offset; 4654 4655 if (ret != CORE_OFFSET_UNSPECIFIED) 4656 return (ret); 4657 4658 if (ctx->ifc_sysctl_separate_txrx) 4659 qc = scctx->isc_ntxqsets + scctx->isc_nrxqsets; 4660 else 4661 qc = max(scctx->isc_ntxqsets, scctx->isc_nrxqsets); 4662 4663 mtx_lock(&cpu_offset_mtx); 4664 SLIST_FOREACH(op, &cpu_offsets, entries) { 4665 if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { 4666 ret = op->offset; 4667 op->offset += qc; 4668 MPASS(op->refcount < UINT_MAX); 4669 op->refcount++; 4670 break; 4671 } 4672 } 4673 if (ret == CORE_OFFSET_UNSPECIFIED) { 4674 ret = 0; 4675 op = malloc(sizeof(struct cpu_offset), M_IFLIB, 4676 M_NOWAIT | M_ZERO); 4677 if (op == NULL) { 4678 device_printf(ctx->ifc_dev, 4679 "allocation for cpu offset failed.\n"); 4680 } else { 4681 op->offset = qc; 4682 op->refcount = 1; 4683 CPU_COPY(&ctx->ifc_cpus, &op->set); 4684 SLIST_INSERT_HEAD(&cpu_offsets, op, entries); 4685 } 4686 } 4687 mtx_unlock(&cpu_offset_mtx); 4688 4689 return (ret); 4690 } 4691 4692 static void 4693 unref_ctx_core_offset(if_ctx_t ctx) 4694 { 4695 struct cpu_offset *op, *top; 4696 4697 mtx_lock(&cpu_offset_mtx); 4698 SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) { 4699 if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { 4700 MPASS(op->refcount > 0); 4701 op->refcount--; 4702 if (op->refcount == 0) { 4703 SLIST_REMOVE(&cpu_offsets, op, cpu_offset, entries); 4704 free(op, M_IFLIB); 4705 } 4706 break; 4707 } 4708 } 4709 mtx_unlock(&cpu_offset_mtx); 4710 } 4711 4712 int 4713 iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) 4714 { 4715 if_ctx_t ctx; 4716 if_t ifp; 4717 if_softc_ctx_t scctx; 4718 kobjop_desc_t kobj_desc; 4719 kobj_method_t *kobj_method; 4720 int err, msix, rid; 4721 int num_txd, num_rxd; 4722 4723 ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); 4724 4725 if (sc == NULL) { 4726 sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); 4727 device_set_softc(dev, ctx); 4728 ctx->ifc_flags |= IFC_SC_ALLOCATED; 4729 } 4730 4731 ctx->ifc_sctx = sctx; 4732 ctx->ifc_dev = dev; 4733 ctx->ifc_softc = sc; 4734 4735 if ((err = iflib_register(ctx)) != 0) { 4736 device_printf(dev, "iflib_register failed %d\n", err); 4737 goto fail_ctx_free; 4738 } 4739 iflib_add_device_sysctl_pre(ctx); 4740 4741 scctx = &ctx->ifc_softc_ctx; 4742 ifp = ctx->ifc_ifp; 4743 4744 iflib_reset_qvalues(ctx); 4745 CTX_LOCK(ctx); 4746 if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { 4747 device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); 4748 goto fail_unlock; 4749 } 4750 _iflib_pre_assert(scctx); 4751 ctx->ifc_txrx = *scctx->isc_txrx; 4752 4753 if (sctx->isc_flags & IFLIB_DRIVER_MEDIA) 4754 ctx->ifc_mediap = scctx->isc_media; 4755 4756 #ifdef INVARIANTS 4757 if (scctx->isc_capabilities & IFCAP_TXCSUM) 4758 MPASS(scctx->isc_tx_csum_flags); 4759 #endif 4760 4761 if_setcapabilities(ifp, 4762 scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_MEXTPG); 4763 if_setcapenable(ifp, 4764 scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_MEXTPG); 4765 4766 if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) 4767 scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; 4768 if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) 4769 scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; 4770 4771 num_txd = iflib_num_tx_descs(ctx); 4772 num_rxd = iflib_num_rx_descs(ctx); 4773 4774 /* XXX change for per-queue sizes */ 4775 device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", 4776 num_txd, num_rxd); 4777 4778 if (scctx->isc_tx_nsegments > num_txd / MAX_SINGLE_PACKET_FRACTION) 4779 scctx->isc_tx_nsegments = max(1, num_txd / 4780 MAX_SINGLE_PACKET_FRACTION); 4781 if (scctx->isc_tx_tso_segments_max > num_txd / 4782 MAX_SINGLE_PACKET_FRACTION) 4783 scctx->isc_tx_tso_segments_max = max(1, 4784 num_txd / MAX_SINGLE_PACKET_FRACTION); 4785 4786 /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ 4787 if (if_getcapabilities(ifp) & IFCAP_TSO) { 4788 /* 4789 * The stack can't handle a TSO size larger than IP_MAXPACKET, 4790 * but some MACs do. 4791 */ 4792 if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, 4793 IP_MAXPACKET)); 4794 /* 4795 * Take maximum number of m_pullup(9)'s in iflib_parse_header() 4796 * into account. In the worst case, each of these calls will 4797 * add another mbuf and, thus, the requirement for another DMA 4798 * segment. So for best performance, it doesn't make sense to 4799 * advertize a maximum of TSO segments that typically will 4800 * require defragmentation in iflib_encap(). 4801 */ 4802 if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); 4803 if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); 4804 } 4805 if (scctx->isc_rss_table_size == 0) 4806 scctx->isc_rss_table_size = 64; 4807 scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; 4808 4809 GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); 4810 /* XXX format name */ 4811 taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, 4812 NULL, NULL, "admin"); 4813 4814 /* Set up cpu set. If it fails, use the set of all CPUs. */ 4815 if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) { 4816 device_printf(dev, "Unable to fetch CPU list\n"); 4817 CPU_COPY(&all_cpus, &ctx->ifc_cpus); 4818 } 4819 MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0); 4820 4821 /* 4822 ** Now set up MSI or MSI-X, should return us the number of supported 4823 ** vectors (will be 1 for a legacy interrupt and MSI). 4824 */ 4825 if (sctx->isc_flags & IFLIB_SKIP_MSIX) { 4826 msix = scctx->isc_vectors; 4827 } else if (scctx->isc_msix_bar != 0) 4828 /* 4829 * The simple fact that isc_msix_bar is not 0 does not mean we 4830 * we have a good value there that is known to work. 4831 */ 4832 msix = iflib_msix_init(ctx); 4833 else { 4834 scctx->isc_vectors = 1; 4835 scctx->isc_ntxqsets = 1; 4836 scctx->isc_nrxqsets = 1; 4837 scctx->isc_intr = IFLIB_INTR_LEGACY; 4838 msix = 0; 4839 } 4840 /* Get memory for the station queues */ 4841 if ((err = iflib_queues_alloc(ctx))) { 4842 device_printf(dev, "Unable to allocate queue memory\n"); 4843 goto fail_intr_free; 4844 } 4845 4846 if ((err = iflib_qset_structures_setup(ctx))) 4847 goto fail_queues; 4848 4849 /* 4850 * Now that we know how many queues there are, get the core offset. 4851 */ 4852 ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx); 4853 4854 if (msix > 1) { 4855 /* 4856 * When using MSI-X, ensure that ifdi_{r,t}x_queue_intr_enable 4857 * aren't the default NULL implementation. 4858 */ 4859 kobj_desc = &ifdi_rx_queue_intr_enable_desc; 4860 kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL, 4861 kobj_desc); 4862 if (kobj_method == &kobj_desc->deflt) { 4863 device_printf(dev, 4864 "MSI-X requires ifdi_rx_queue_intr_enable method"); 4865 err = EOPNOTSUPP; 4866 goto fail_queues; 4867 } 4868 kobj_desc = &ifdi_tx_queue_intr_enable_desc; 4869 kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL, 4870 kobj_desc); 4871 if (kobj_method == &kobj_desc->deflt) { 4872 device_printf(dev, 4873 "MSI-X requires ifdi_tx_queue_intr_enable method"); 4874 err = EOPNOTSUPP; 4875 goto fail_queues; 4876 } 4877 4878 /* 4879 * Assign the MSI-X vectors. 4880 * Note that the default NULL ifdi_msix_intr_assign method will 4881 * fail here, too. 4882 */ 4883 err = IFDI_MSIX_INTR_ASSIGN(ctx, msix); 4884 if (err != 0) { 4885 device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", 4886 err); 4887 goto fail_queues; 4888 } 4889 } else if (scctx->isc_intr != IFLIB_INTR_MSIX) { 4890 rid = 0; 4891 if (scctx->isc_intr == IFLIB_INTR_MSI) { 4892 MPASS(msix == 1); 4893 rid = 1; 4894 } 4895 if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { 4896 device_printf(dev, "iflib_legacy_setup failed %d\n", err); 4897 goto fail_queues; 4898 } 4899 } else { 4900 device_printf(dev, 4901 "Cannot use iflib with only 1 MSI-X interrupt!\n"); 4902 err = ENODEV; 4903 goto fail_queues; 4904 } 4905 4906 ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); 4907 4908 if ((err = IFDI_ATTACH_POST(ctx)) != 0) { 4909 device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); 4910 goto fail_detach; 4911 } 4912 4913 /* 4914 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. 4915 * This must appear after the call to ether_ifattach() because 4916 * ether_ifattach() sets if_hdrlen to the default value. 4917 */ 4918 if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) 4919 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 4920 4921 if ((err = iflib_netmap_attach(ctx))) { 4922 device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); 4923 goto fail_detach; 4924 } 4925 *ctxp = ctx; 4926 4927 DEBUGNET_SET(ctx->ifc_ifp, iflib); 4928 4929 if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); 4930 iflib_add_device_sysctl_post(ctx); 4931 iflib_add_pfil(ctx); 4932 ctx->ifc_flags |= IFC_INIT_DONE; 4933 CTX_UNLOCK(ctx); 4934 4935 return (0); 4936 4937 fail_detach: 4938 ether_ifdetach(ctx->ifc_ifp); 4939 fail_queues: 4940 iflib_tqg_detach(ctx); 4941 iflib_tx_structures_free(ctx); 4942 iflib_rx_structures_free(ctx); 4943 IFDI_DETACH(ctx); 4944 IFDI_QUEUES_FREE(ctx); 4945 fail_intr_free: 4946 iflib_free_intr_mem(ctx); 4947 fail_unlock: 4948 CTX_UNLOCK(ctx); 4949 iflib_deregister(ctx); 4950 fail_ctx_free: 4951 device_set_softc(ctx->ifc_dev, NULL); 4952 if (ctx->ifc_flags & IFC_SC_ALLOCATED) 4953 free(ctx->ifc_softc, M_IFLIB); 4954 free(ctx, M_IFLIB); 4955 return (err); 4956 } 4957 4958 int 4959 iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, 4960 struct iflib_cloneattach_ctx *clctx) 4961 { 4962 int num_txd, num_rxd; 4963 int err; 4964 if_ctx_t ctx; 4965 if_t ifp; 4966 if_softc_ctx_t scctx; 4967 int i; 4968 void *sc; 4969 4970 ctx = malloc(sizeof(*ctx), M_IFLIB, M_WAITOK|M_ZERO); 4971 sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); 4972 ctx->ifc_flags |= IFC_SC_ALLOCATED; 4973 if (sctx->isc_flags & (IFLIB_PSEUDO|IFLIB_VIRTUAL)) 4974 ctx->ifc_flags |= IFC_PSEUDO; 4975 4976 ctx->ifc_sctx = sctx; 4977 ctx->ifc_softc = sc; 4978 ctx->ifc_dev = dev; 4979 4980 if ((err = iflib_register(ctx)) != 0) { 4981 device_printf(dev, "%s: iflib_register failed %d\n", __func__, err); 4982 goto fail_ctx_free; 4983 } 4984 iflib_add_device_sysctl_pre(ctx); 4985 4986 scctx = &ctx->ifc_softc_ctx; 4987 ifp = ctx->ifc_ifp; 4988 4989 iflib_reset_qvalues(ctx); 4990 CTX_LOCK(ctx); 4991 if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { 4992 device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); 4993 goto fail_unlock; 4994 } 4995 if (sctx->isc_flags & IFLIB_GEN_MAC) 4996 ether_gen_addr(ifp, &ctx->ifc_mac); 4997 if ((err = IFDI_CLONEATTACH(ctx, clctx->cc_ifc, clctx->cc_name, 4998 clctx->cc_params)) != 0) { 4999 device_printf(dev, "IFDI_CLONEATTACH failed %d\n", err); 5000 goto fail_unlock; 5001 } 5002 #ifdef INVARIANTS 5003 if (scctx->isc_capabilities & IFCAP_TXCSUM) 5004 MPASS(scctx->isc_tx_csum_flags); 5005 #endif 5006 5007 if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_LINKSTATE); 5008 if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_LINKSTATE); 5009 5010 ifp->if_flags |= IFF_NOGROUP; 5011 if (sctx->isc_flags & IFLIB_PSEUDO) { 5012 ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); 5013 ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); 5014 if (sctx->isc_flags & IFLIB_PSEUDO_ETHER) { 5015 ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); 5016 } else { 5017 if_attach(ctx->ifc_ifp); 5018 bpfattach(ctx->ifc_ifp, DLT_NULL, sizeof(u_int32_t)); 5019 } 5020 5021 if ((err = IFDI_ATTACH_POST(ctx)) != 0) { 5022 device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); 5023 goto fail_detach; 5024 } 5025 *ctxp = ctx; 5026 5027 /* 5028 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. 5029 * This must appear after the call to ether_ifattach() because 5030 * ether_ifattach() sets if_hdrlen to the default value. 5031 */ 5032 if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) 5033 if_setifheaderlen(ifp, 5034 sizeof(struct ether_vlan_header)); 5035 5036 if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); 5037 iflib_add_device_sysctl_post(ctx); 5038 ctx->ifc_flags |= IFC_INIT_DONE; 5039 CTX_UNLOCK(ctx); 5040 return (0); 5041 } 5042 ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 5043 ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); 5044 ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); 5045 5046 _iflib_pre_assert(scctx); 5047 ctx->ifc_txrx = *scctx->isc_txrx; 5048 5049 if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) 5050 scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; 5051 if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) 5052 scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; 5053 5054 num_txd = iflib_num_tx_descs(ctx); 5055 num_rxd = iflib_num_rx_descs(ctx); 5056 5057 /* XXX change for per-queue sizes */ 5058 device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", 5059 num_txd, num_rxd); 5060 5061 if (scctx->isc_tx_nsegments > num_txd / MAX_SINGLE_PACKET_FRACTION) 5062 scctx->isc_tx_nsegments = max(1, num_txd / 5063 MAX_SINGLE_PACKET_FRACTION); 5064 if (scctx->isc_tx_tso_segments_max > num_txd / 5065 MAX_SINGLE_PACKET_FRACTION) 5066 scctx->isc_tx_tso_segments_max = max(1, 5067 num_txd / MAX_SINGLE_PACKET_FRACTION); 5068 5069 /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ 5070 if (if_getcapabilities(ifp) & IFCAP_TSO) { 5071 /* 5072 * The stack can't handle a TSO size larger than IP_MAXPACKET, 5073 * but some MACs do. 5074 */ 5075 if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, 5076 IP_MAXPACKET)); 5077 /* 5078 * Take maximum number of m_pullup(9)'s in iflib_parse_header() 5079 * into account. In the worst case, each of these calls will 5080 * add another mbuf and, thus, the requirement for another DMA 5081 * segment. So for best performance, it doesn't make sense to 5082 * advertize a maximum of TSO segments that typically will 5083 * require defragmentation in iflib_encap(). 5084 */ 5085 if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); 5086 if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); 5087 } 5088 if (scctx->isc_rss_table_size == 0) 5089 scctx->isc_rss_table_size = 64; 5090 scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; 5091 5092 GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); 5093 /* XXX format name */ 5094 taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, 5095 NULL, NULL, "admin"); 5096 5097 /* XXX --- can support > 1 -- but keep it simple for now */ 5098 scctx->isc_intr = IFLIB_INTR_LEGACY; 5099 5100 /* Get memory for the station queues */ 5101 if ((err = iflib_queues_alloc(ctx))) { 5102 device_printf(dev, "Unable to allocate queue memory\n"); 5103 goto fail_iflib_detach; 5104 } 5105 5106 if ((err = iflib_qset_structures_setup(ctx))) { 5107 device_printf(dev, "qset structure setup failed %d\n", err); 5108 goto fail_queues; 5109 } 5110 5111 /* 5112 * XXX What if anything do we want to do about interrupts? 5113 */ 5114 ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); 5115 if ((err = IFDI_ATTACH_POST(ctx)) != 0) { 5116 device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); 5117 goto fail_detach; 5118 } 5119 5120 /* 5121 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. 5122 * This must appear after the call to ether_ifattach() because 5123 * ether_ifattach() sets if_hdrlen to the default value. 5124 */ 5125 if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) 5126 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 5127 5128 /* XXX handle more than one queue */ 5129 for (i = 0; i < scctx->isc_nrxqsets; i++) 5130 IFDI_RX_CLSET(ctx, 0, i, ctx->ifc_rxqs[i].ifr_fl[0].ifl_sds.ifsd_cl); 5131 5132 *ctxp = ctx; 5133 5134 if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); 5135 iflib_add_device_sysctl_post(ctx); 5136 ctx->ifc_flags |= IFC_INIT_DONE; 5137 CTX_UNLOCK(ctx); 5138 5139 return (0); 5140 fail_detach: 5141 ether_ifdetach(ctx->ifc_ifp); 5142 fail_queues: 5143 iflib_tqg_detach(ctx); 5144 iflib_tx_structures_free(ctx); 5145 iflib_rx_structures_free(ctx); 5146 fail_iflib_detach: 5147 IFDI_DETACH(ctx); 5148 IFDI_QUEUES_FREE(ctx); 5149 fail_unlock: 5150 CTX_UNLOCK(ctx); 5151 iflib_deregister(ctx); 5152 fail_ctx_free: 5153 free(ctx->ifc_softc, M_IFLIB); 5154 free(ctx, M_IFLIB); 5155 return (err); 5156 } 5157 5158 int 5159 iflib_pseudo_deregister(if_ctx_t ctx) 5160 { 5161 if_t ifp = ctx->ifc_ifp; 5162 if_shared_ctx_t sctx = ctx->ifc_sctx; 5163 5164 /* Unregister VLAN event handlers early */ 5165 iflib_unregister_vlan_handlers(ctx); 5166 5167 if ((sctx->isc_flags & IFLIB_PSEUDO) && 5168 (sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) { 5169 bpfdetach(ifp); 5170 if_detach(ifp); 5171 } else { 5172 ether_ifdetach(ifp); 5173 } 5174 5175 iflib_tqg_detach(ctx); 5176 iflib_tx_structures_free(ctx); 5177 iflib_rx_structures_free(ctx); 5178 IFDI_DETACH(ctx); 5179 IFDI_QUEUES_FREE(ctx); 5180 5181 iflib_deregister(ctx); 5182 5183 if (ctx->ifc_flags & IFC_SC_ALLOCATED) 5184 free(ctx->ifc_softc, M_IFLIB); 5185 free(ctx, M_IFLIB); 5186 return (0); 5187 } 5188 5189 int 5190 iflib_device_attach(device_t dev) 5191 { 5192 if_ctx_t ctx; 5193 if_shared_ctx_t sctx; 5194 5195 if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) 5196 return (ENOTSUP); 5197 5198 pci_enable_busmaster(dev); 5199 5200 return (iflib_device_register(dev, NULL, sctx, &ctx)); 5201 } 5202 5203 int 5204 iflib_device_deregister(if_ctx_t ctx) 5205 { 5206 if_t ifp = ctx->ifc_ifp; 5207 device_t dev = ctx->ifc_dev; 5208 5209 /* Make sure VLANS are not using driver */ 5210 if (if_vlantrunkinuse(ifp)) { 5211 device_printf(dev, "Vlan in use, detach first\n"); 5212 return (EBUSY); 5213 } 5214 #ifdef PCI_IOV 5215 if (!CTX_IS_VF(ctx) && pci_iov_detach(dev) != 0) { 5216 device_printf(dev, "SR-IOV in use; detach first.\n"); 5217 return (EBUSY); 5218 } 5219 #endif 5220 5221 STATE_LOCK(ctx); 5222 ctx->ifc_flags |= IFC_IN_DETACH; 5223 STATE_UNLOCK(ctx); 5224 5225 /* Unregister VLAN handlers before calling iflib_stop() */ 5226 iflib_unregister_vlan_handlers(ctx); 5227 5228 iflib_netmap_detach(ifp); 5229 ether_ifdetach(ifp); 5230 5231 CTX_LOCK(ctx); 5232 iflib_stop(ctx); 5233 CTX_UNLOCK(ctx); 5234 5235 iflib_rem_pfil(ctx); 5236 if (ctx->ifc_led_dev != NULL) 5237 led_destroy(ctx->ifc_led_dev); 5238 5239 iflib_tqg_detach(ctx); 5240 iflib_tx_structures_free(ctx); 5241 iflib_rx_structures_free(ctx); 5242 5243 CTX_LOCK(ctx); 5244 IFDI_DETACH(ctx); 5245 IFDI_QUEUES_FREE(ctx); 5246 CTX_UNLOCK(ctx); 5247 5248 /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ 5249 iflib_free_intr_mem(ctx); 5250 5251 bus_generic_detach(dev); 5252 5253 iflib_deregister(ctx); 5254 5255 device_set_softc(ctx->ifc_dev, NULL); 5256 if (ctx->ifc_flags & IFC_SC_ALLOCATED) 5257 free(ctx->ifc_softc, M_IFLIB); 5258 unref_ctx_core_offset(ctx); 5259 free(ctx, M_IFLIB); 5260 return (0); 5261 } 5262 5263 static void 5264 iflib_tqg_detach(if_ctx_t ctx) 5265 { 5266 iflib_txq_t txq; 5267 iflib_rxq_t rxq; 5268 int i; 5269 struct taskqgroup *tqg; 5270 5271 /* XXX drain any dependent tasks */ 5272 tqg = qgroup_if_io_tqg; 5273 for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { 5274 callout_drain(&txq->ift_timer); 5275 #ifdef DEV_NETMAP 5276 callout_drain(&txq->ift_netmap_timer); 5277 #endif /* DEV_NETMAP */ 5278 if (txq->ift_task.gt_uniq != NULL) 5279 taskqgroup_detach(tqg, &txq->ift_task); 5280 } 5281 for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { 5282 if (rxq->ifr_task.gt_uniq != NULL) 5283 taskqgroup_detach(tqg, &rxq->ifr_task); 5284 } 5285 tqg = qgroup_if_config_tqg; 5286 if (ctx->ifc_admin_task.gt_uniq != NULL) 5287 taskqgroup_detach(tqg, &ctx->ifc_admin_task); 5288 if (ctx->ifc_vflr_task.gt_uniq != NULL) 5289 taskqgroup_detach(tqg, &ctx->ifc_vflr_task); 5290 } 5291 5292 static void 5293 iflib_free_intr_mem(if_ctx_t ctx) 5294 { 5295 5296 if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { 5297 iflib_irq_free(ctx, &ctx->ifc_legacy_irq); 5298 } 5299 if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { 5300 pci_release_msi(ctx->ifc_dev); 5301 } 5302 if (ctx->ifc_msix_mem != NULL) { 5303 bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, 5304 rman_get_rid(ctx->ifc_msix_mem), ctx->ifc_msix_mem); 5305 ctx->ifc_msix_mem = NULL; 5306 } 5307 } 5308 5309 int 5310 iflib_device_detach(device_t dev) 5311 { 5312 if_ctx_t ctx = device_get_softc(dev); 5313 5314 return (iflib_device_deregister(ctx)); 5315 } 5316 5317 int 5318 iflib_device_suspend(device_t dev) 5319 { 5320 if_ctx_t ctx = device_get_softc(dev); 5321 5322 CTX_LOCK(ctx); 5323 IFDI_SUSPEND(ctx); 5324 CTX_UNLOCK(ctx); 5325 5326 return bus_generic_suspend(dev); 5327 } 5328 int 5329 iflib_device_shutdown(device_t dev) 5330 { 5331 if_ctx_t ctx = device_get_softc(dev); 5332 5333 CTX_LOCK(ctx); 5334 IFDI_SHUTDOWN(ctx); 5335 CTX_UNLOCK(ctx); 5336 5337 return bus_generic_suspend(dev); 5338 } 5339 5340 int 5341 iflib_device_resume(device_t dev) 5342 { 5343 if_ctx_t ctx = device_get_softc(dev); 5344 iflib_txq_t txq = ctx->ifc_txqs; 5345 5346 CTX_LOCK(ctx); 5347 IFDI_RESUME(ctx); 5348 iflib_if_init_locked(ctx); 5349 CTX_UNLOCK(ctx); 5350 for (int i = 0; i < NTXQSETS(ctx); i++, txq++) 5351 iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); 5352 5353 return (bus_generic_resume(dev)); 5354 } 5355 5356 int 5357 iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) 5358 { 5359 int error; 5360 if_ctx_t ctx = device_get_softc(dev); 5361 5362 CTX_LOCK(ctx); 5363 error = IFDI_IOV_INIT(ctx, num_vfs, params); 5364 CTX_UNLOCK(ctx); 5365 5366 return (error); 5367 } 5368 5369 void 5370 iflib_device_iov_uninit(device_t dev) 5371 { 5372 if_ctx_t ctx = device_get_softc(dev); 5373 5374 CTX_LOCK(ctx); 5375 IFDI_IOV_UNINIT(ctx); 5376 CTX_UNLOCK(ctx); 5377 } 5378 5379 int 5380 iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) 5381 { 5382 int error; 5383 if_ctx_t ctx = device_get_softc(dev); 5384 5385 CTX_LOCK(ctx); 5386 error = IFDI_IOV_VF_ADD(ctx, vfnum, params); 5387 CTX_UNLOCK(ctx); 5388 5389 return (error); 5390 } 5391 5392 /********************************************************************* 5393 * 5394 * MODULE FUNCTION DEFINITIONS 5395 * 5396 **********************************************************************/ 5397 5398 /* 5399 * - Start a fast taskqueue thread for each core 5400 * - Start a taskqueue for control operations 5401 */ 5402 static int 5403 iflib_module_init(void) 5404 { 5405 iflib_timer_default = hz / 2; 5406 return (0); 5407 } 5408 5409 static int 5410 iflib_module_event_handler(module_t mod, int what, void *arg) 5411 { 5412 int err; 5413 5414 switch (what) { 5415 case MOD_LOAD: 5416 if ((err = iflib_module_init()) != 0) 5417 return (err); 5418 break; 5419 case MOD_UNLOAD: 5420 return (EBUSY); 5421 default: 5422 return (EOPNOTSUPP); 5423 } 5424 5425 return (0); 5426 } 5427 5428 /********************************************************************* 5429 * 5430 * PUBLIC FUNCTION DEFINITIONS 5431 * ordered as in iflib.h 5432 * 5433 **********************************************************************/ 5434 5435 static void 5436 _iflib_assert(if_shared_ctx_t sctx) 5437 { 5438 int i; 5439 5440 MPASS(sctx->isc_tx_maxsize); 5441 MPASS(sctx->isc_tx_maxsegsize); 5442 5443 MPASS(sctx->isc_rx_maxsize); 5444 MPASS(sctx->isc_rx_nsegments); 5445 MPASS(sctx->isc_rx_maxsegsize); 5446 5447 MPASS(sctx->isc_nrxqs >= 1 && sctx->isc_nrxqs <= 8); 5448 for (i = 0; i < sctx->isc_nrxqs; i++) { 5449 MPASS(sctx->isc_nrxd_min[i]); 5450 MPASS(powerof2(sctx->isc_nrxd_min[i])); 5451 MPASS(sctx->isc_nrxd_max[i]); 5452 MPASS(powerof2(sctx->isc_nrxd_max[i])); 5453 MPASS(sctx->isc_nrxd_default[i]); 5454 MPASS(powerof2(sctx->isc_nrxd_default[i])); 5455 } 5456 5457 MPASS(sctx->isc_ntxqs >= 1 && sctx->isc_ntxqs <= 8); 5458 for (i = 0; i < sctx->isc_ntxqs; i++) { 5459 MPASS(sctx->isc_ntxd_min[i]); 5460 MPASS(powerof2(sctx->isc_ntxd_min[i])); 5461 MPASS(sctx->isc_ntxd_max[i]); 5462 MPASS(powerof2(sctx->isc_ntxd_max[i])); 5463 MPASS(sctx->isc_ntxd_default[i]); 5464 MPASS(powerof2(sctx->isc_ntxd_default[i])); 5465 } 5466 } 5467 5468 static void 5469 _iflib_pre_assert(if_softc_ctx_t scctx) 5470 { 5471 5472 MPASS(scctx->isc_txrx->ift_txd_encap); 5473 MPASS(scctx->isc_txrx->ift_txd_flush); 5474 MPASS(scctx->isc_txrx->ift_txd_credits_update); 5475 MPASS(scctx->isc_txrx->ift_rxd_available); 5476 MPASS(scctx->isc_txrx->ift_rxd_pkt_get); 5477 MPASS(scctx->isc_txrx->ift_rxd_refill); 5478 MPASS(scctx->isc_txrx->ift_rxd_flush); 5479 } 5480 5481 static int 5482 iflib_register(if_ctx_t ctx) 5483 { 5484 if_shared_ctx_t sctx = ctx->ifc_sctx; 5485 driver_t *driver = sctx->isc_driver; 5486 device_t dev = ctx->ifc_dev; 5487 if_t ifp; 5488 u_char type; 5489 int iflags; 5490 5491 if ((sctx->isc_flags & IFLIB_PSEUDO) == 0) 5492 _iflib_assert(sctx); 5493 5494 CTX_LOCK_INIT(ctx); 5495 STATE_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); 5496 if (sctx->isc_flags & IFLIB_PSEUDO) { 5497 if (sctx->isc_flags & IFLIB_PSEUDO_ETHER) 5498 type = IFT_ETHER; 5499 else 5500 type = IFT_PPP; 5501 } else 5502 type = IFT_ETHER; 5503 ifp = ctx->ifc_ifp = if_alloc(type); 5504 if (ifp == NULL) { 5505 device_printf(dev, "can not allocate ifnet structure\n"); 5506 return (ENOMEM); 5507 } 5508 5509 /* 5510 * Initialize our context's device specific methods 5511 */ 5512 kobj_init((kobj_t) ctx, (kobj_class_t) driver); 5513 kobj_class_compile((kobj_class_t) driver); 5514 5515 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 5516 if_setsoftc(ifp, ctx); 5517 if_setdev(ifp, dev); 5518 if_setinitfn(ifp, iflib_if_init); 5519 if_setioctlfn(ifp, iflib_if_ioctl); 5520 #ifdef ALTQ 5521 if_setstartfn(ifp, iflib_altq_if_start); 5522 if_settransmitfn(ifp, iflib_altq_if_transmit); 5523 if_setsendqready(ifp); 5524 #else 5525 if_settransmitfn(ifp, iflib_if_transmit); 5526 #endif 5527 if_setqflushfn(ifp, iflib_if_qflush); 5528 iflags = IFF_MULTICAST | IFF_KNOWSEPOCH; 5529 5530 if ((sctx->isc_flags & IFLIB_PSEUDO) && 5531 (sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) 5532 iflags |= IFF_POINTOPOINT; 5533 else 5534 iflags |= IFF_BROADCAST | IFF_SIMPLEX; 5535 if_setflags(ifp, iflags); 5536 ctx->ifc_vlan_attach_event = 5537 EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, 5538 EVENTHANDLER_PRI_FIRST); 5539 ctx->ifc_vlan_detach_event = 5540 EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, 5541 EVENTHANDLER_PRI_FIRST); 5542 5543 if ((sctx->isc_flags & IFLIB_DRIVER_MEDIA) == 0) { 5544 ctx->ifc_mediap = &ctx->ifc_media; 5545 ifmedia_init(ctx->ifc_mediap, IFM_IMASK, 5546 iflib_media_change, iflib_media_status); 5547 } 5548 return (0); 5549 } 5550 5551 static void 5552 iflib_unregister_vlan_handlers(if_ctx_t ctx) 5553 { 5554 /* Unregister VLAN events */ 5555 if (ctx->ifc_vlan_attach_event != NULL) { 5556 EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); 5557 ctx->ifc_vlan_attach_event = NULL; 5558 } 5559 if (ctx->ifc_vlan_detach_event != NULL) { 5560 EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); 5561 ctx->ifc_vlan_detach_event = NULL; 5562 } 5563 5564 } 5565 5566 static void 5567 iflib_deregister(if_ctx_t ctx) 5568 { 5569 if_t ifp = ctx->ifc_ifp; 5570 5571 /* Remove all media */ 5572 ifmedia_removeall(&ctx->ifc_media); 5573 5574 /* Ensure that VLAN event handlers are unregistered */ 5575 iflib_unregister_vlan_handlers(ctx); 5576 5577 /* Release kobject reference */ 5578 kobj_delete((kobj_t) ctx, NULL); 5579 5580 /* Free the ifnet structure */ 5581 if_free(ifp); 5582 5583 STATE_LOCK_DESTROY(ctx); 5584 5585 /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ 5586 CTX_LOCK_DESTROY(ctx); 5587 } 5588 5589 static int 5590 iflib_queues_alloc(if_ctx_t ctx) 5591 { 5592 if_shared_ctx_t sctx = ctx->ifc_sctx; 5593 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 5594 device_t dev = ctx->ifc_dev; 5595 int nrxqsets = scctx->isc_nrxqsets; 5596 int ntxqsets = scctx->isc_ntxqsets; 5597 iflib_txq_t txq; 5598 iflib_rxq_t rxq; 5599 iflib_fl_t fl = NULL; 5600 int i, j, cpu, err, txconf, rxconf; 5601 iflib_dma_info_t ifdip; 5602 uint32_t *rxqsizes = scctx->isc_rxqsizes; 5603 uint32_t *txqsizes = scctx->isc_txqsizes; 5604 uint8_t nrxqs = sctx->isc_nrxqs; 5605 uint8_t ntxqs = sctx->isc_ntxqs; 5606 int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; 5607 int fl_offset = (sctx->isc_flags & IFLIB_HAS_RXCQ ? 1 : 0); 5608 caddr_t *vaddrs; 5609 uint64_t *paddrs; 5610 5611 KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); 5612 KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); 5613 KASSERT(nrxqs >= fl_offset + nfree_lists, 5614 ("there must be at least a rxq for each free list")); 5615 5616 /* Allocate the TX ring struct memory */ 5617 if (!(ctx->ifc_txqs = 5618 (iflib_txq_t) malloc(sizeof(struct iflib_txq) * 5619 ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { 5620 device_printf(dev, "Unable to allocate TX ring memory\n"); 5621 err = ENOMEM; 5622 goto fail; 5623 } 5624 5625 /* Now allocate the RX */ 5626 if (!(ctx->ifc_rxqs = 5627 (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * 5628 nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { 5629 device_printf(dev, "Unable to allocate RX ring memory\n"); 5630 err = ENOMEM; 5631 goto rx_fail; 5632 } 5633 5634 txq = ctx->ifc_txqs; 5635 rxq = ctx->ifc_rxqs; 5636 5637 /* 5638 * XXX handle allocation failure 5639 */ 5640 for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { 5641 /* Set up some basics */ 5642 5643 if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, 5644 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 5645 device_printf(dev, 5646 "Unable to allocate TX DMA info memory\n"); 5647 err = ENOMEM; 5648 goto err_tx_desc; 5649 } 5650 txq->ift_ifdi = ifdip; 5651 for (j = 0; j < ntxqs; j++, ifdip++) { 5652 if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, 0)) { 5653 device_printf(dev, 5654 "Unable to allocate TX descriptors\n"); 5655 err = ENOMEM; 5656 goto err_tx_desc; 5657 } 5658 txq->ift_txd_size[j] = scctx->isc_txd_size[j]; 5659 bzero((void *)ifdip->idi_vaddr, txqsizes[j]); 5660 } 5661 txq->ift_ctx = ctx; 5662 txq->ift_id = i; 5663 if (sctx->isc_flags & IFLIB_HAS_TXCQ) { 5664 txq->ift_br_offset = 1; 5665 } else { 5666 txq->ift_br_offset = 0; 5667 } 5668 5669 if (iflib_txsd_alloc(txq)) { 5670 device_printf(dev, "Critical Failure setting up TX buffers\n"); 5671 err = ENOMEM; 5672 goto err_tx_desc; 5673 } 5674 5675 /* Initialize the TX lock */ 5676 snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:TX(%d):callout", 5677 device_get_nameunit(dev), txq->ift_id); 5678 mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); 5679 callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); 5680 txq->ift_timer.c_cpu = cpu; 5681 #ifdef DEV_NETMAP 5682 callout_init_mtx(&txq->ift_netmap_timer, &txq->ift_mtx, 0); 5683 txq->ift_netmap_timer.c_cpu = cpu; 5684 #endif /* DEV_NETMAP */ 5685 5686 err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, 5687 iflib_txq_can_drain, M_IFLIB, M_WAITOK); 5688 if (err) { 5689 /* XXX free any allocated rings */ 5690 device_printf(dev, "Unable to allocate buf_ring\n"); 5691 goto err_tx_desc; 5692 } 5693 } 5694 5695 for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { 5696 /* Set up some basics */ 5697 callout_init(&rxq->ifr_watchdog, 1); 5698 5699 if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, 5700 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 5701 device_printf(dev, 5702 "Unable to allocate RX DMA info memory\n"); 5703 err = ENOMEM; 5704 goto err_tx_desc; 5705 } 5706 5707 rxq->ifr_ifdi = ifdip; 5708 /* XXX this needs to be changed if #rx queues != #tx queues */ 5709 rxq->ifr_ntxqirq = 1; 5710 rxq->ifr_txqid[0] = i; 5711 for (j = 0; j < nrxqs; j++, ifdip++) { 5712 if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, 0)) { 5713 device_printf(dev, 5714 "Unable to allocate RX descriptors\n"); 5715 err = ENOMEM; 5716 goto err_tx_desc; 5717 } 5718 bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); 5719 } 5720 rxq->ifr_ctx = ctx; 5721 rxq->ifr_id = i; 5722 rxq->ifr_fl_offset = fl_offset; 5723 rxq->ifr_nfl = nfree_lists; 5724 if (!(fl = 5725 (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { 5726 device_printf(dev, "Unable to allocate free list memory\n"); 5727 err = ENOMEM; 5728 goto err_tx_desc; 5729 } 5730 rxq->ifr_fl = fl; 5731 for (j = 0; j < nfree_lists; j++) { 5732 fl[j].ifl_rxq = rxq; 5733 fl[j].ifl_id = j; 5734 fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; 5735 fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; 5736 } 5737 /* Allocate receive buffers for the ring */ 5738 if (iflib_rxsd_alloc(rxq)) { 5739 device_printf(dev, 5740 "Critical Failure setting up receive buffers\n"); 5741 err = ENOMEM; 5742 goto err_rx_desc; 5743 } 5744 5745 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) 5746 fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, 5747 M_WAITOK); 5748 } 5749 5750 /* TXQs */ 5751 vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); 5752 paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); 5753 for (i = 0; i < ntxqsets; i++) { 5754 iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; 5755 5756 for (j = 0; j < ntxqs; j++, di++) { 5757 vaddrs[i*ntxqs + j] = di->idi_vaddr; 5758 paddrs[i*ntxqs + j] = di->idi_paddr; 5759 } 5760 } 5761 if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { 5762 device_printf(ctx->ifc_dev, 5763 "Unable to allocate device TX queue\n"); 5764 iflib_tx_structures_free(ctx); 5765 free(vaddrs, M_IFLIB); 5766 free(paddrs, M_IFLIB); 5767 goto err_rx_desc; 5768 } 5769 free(vaddrs, M_IFLIB); 5770 free(paddrs, M_IFLIB); 5771 5772 /* RXQs */ 5773 vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); 5774 paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); 5775 for (i = 0; i < nrxqsets; i++) { 5776 iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; 5777 5778 for (j = 0; j < nrxqs; j++, di++) { 5779 vaddrs[i*nrxqs + j] = di->idi_vaddr; 5780 paddrs[i*nrxqs + j] = di->idi_paddr; 5781 } 5782 } 5783 if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { 5784 device_printf(ctx->ifc_dev, 5785 "Unable to allocate device RX queue\n"); 5786 iflib_tx_structures_free(ctx); 5787 free(vaddrs, M_IFLIB); 5788 free(paddrs, M_IFLIB); 5789 goto err_rx_desc; 5790 } 5791 free(vaddrs, M_IFLIB); 5792 free(paddrs, M_IFLIB); 5793 5794 return (0); 5795 5796 /* XXX handle allocation failure changes */ 5797 err_rx_desc: 5798 err_tx_desc: 5799 rx_fail: 5800 if (ctx->ifc_rxqs != NULL) 5801 free(ctx->ifc_rxqs, M_IFLIB); 5802 ctx->ifc_rxqs = NULL; 5803 if (ctx->ifc_txqs != NULL) 5804 free(ctx->ifc_txqs, M_IFLIB); 5805 ctx->ifc_txqs = NULL; 5806 fail: 5807 return (err); 5808 } 5809 5810 static int 5811 iflib_tx_structures_setup(if_ctx_t ctx) 5812 { 5813 iflib_txq_t txq = ctx->ifc_txqs; 5814 int i; 5815 5816 for (i = 0; i < NTXQSETS(ctx); i++, txq++) 5817 iflib_txq_setup(txq); 5818 5819 return (0); 5820 } 5821 5822 static void 5823 iflib_tx_structures_free(if_ctx_t ctx) 5824 { 5825 iflib_txq_t txq = ctx->ifc_txqs; 5826 if_shared_ctx_t sctx = ctx->ifc_sctx; 5827 int i, j; 5828 5829 for (i = 0; i < NTXQSETS(ctx); i++, txq++) { 5830 for (j = 0; j < sctx->isc_ntxqs; j++) 5831 iflib_dma_free(&txq->ift_ifdi[j]); 5832 iflib_txq_destroy(txq); 5833 } 5834 free(ctx->ifc_txqs, M_IFLIB); 5835 ctx->ifc_txqs = NULL; 5836 } 5837 5838 /********************************************************************* 5839 * 5840 * Initialize all receive rings. 5841 * 5842 **********************************************************************/ 5843 static int 5844 iflib_rx_structures_setup(if_ctx_t ctx) 5845 { 5846 iflib_rxq_t rxq = ctx->ifc_rxqs; 5847 int q; 5848 #if defined(INET6) || defined(INET) 5849 int err, i; 5850 #endif 5851 5852 for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { 5853 #if defined(INET6) || defined(INET) 5854 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) { 5855 err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, 5856 TCP_LRO_ENTRIES, min(1024, 5857 ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset])); 5858 if (err != 0) { 5859 device_printf(ctx->ifc_dev, 5860 "LRO Initialization failed!\n"); 5861 goto fail; 5862 } 5863 } 5864 #endif 5865 IFDI_RXQ_SETUP(ctx, rxq->ifr_id); 5866 } 5867 return (0); 5868 #if defined(INET6) || defined(INET) 5869 fail: 5870 /* 5871 * Free LRO resources allocated so far, we will only handle 5872 * the rings that completed, the failing case will have 5873 * cleaned up for itself. 'q' failed, so its the terminus. 5874 */ 5875 rxq = ctx->ifc_rxqs; 5876 for (i = 0; i < q; ++i, rxq++) { 5877 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) 5878 tcp_lro_free(&rxq->ifr_lc); 5879 } 5880 return (err); 5881 #endif 5882 } 5883 5884 /********************************************************************* 5885 * 5886 * Free all receive rings. 5887 * 5888 **********************************************************************/ 5889 static void 5890 iflib_rx_structures_free(if_ctx_t ctx) 5891 { 5892 iflib_rxq_t rxq = ctx->ifc_rxqs; 5893 if_shared_ctx_t sctx = ctx->ifc_sctx; 5894 int i, j; 5895 5896 for (i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { 5897 for (j = 0; j < sctx->isc_nrxqs; j++) 5898 iflib_dma_free(&rxq->ifr_ifdi[j]); 5899 iflib_rx_sds_free(rxq); 5900 #if defined(INET6) || defined(INET) 5901 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) 5902 tcp_lro_free(&rxq->ifr_lc); 5903 #endif 5904 } 5905 free(ctx->ifc_rxqs, M_IFLIB); 5906 ctx->ifc_rxqs = NULL; 5907 } 5908 5909 static int 5910 iflib_qset_structures_setup(if_ctx_t ctx) 5911 { 5912 int err; 5913 5914 /* 5915 * It is expected that the caller takes care of freeing queues if this 5916 * fails. 5917 */ 5918 if ((err = iflib_tx_structures_setup(ctx)) != 0) { 5919 device_printf(ctx->ifc_dev, "iflib_tx_structures_setup failed: %d\n", err); 5920 return (err); 5921 } 5922 5923 if ((err = iflib_rx_structures_setup(ctx)) != 0) 5924 device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); 5925 5926 return (err); 5927 } 5928 5929 int 5930 iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, 5931 driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, const char *name) 5932 { 5933 5934 return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); 5935 } 5936 5937 #ifdef SMP 5938 static int 5939 find_nth(if_ctx_t ctx, int qid) 5940 { 5941 cpuset_t cpus; 5942 int i, cpuid, eqid, count; 5943 5944 CPU_COPY(&ctx->ifc_cpus, &cpus); 5945 count = CPU_COUNT(&cpus); 5946 eqid = qid % count; 5947 /* clear up to the qid'th bit */ 5948 for (i = 0; i < eqid; i++) { 5949 cpuid = CPU_FFS(&cpus); 5950 MPASS(cpuid != 0); 5951 CPU_CLR(cpuid-1, &cpus); 5952 } 5953 cpuid = CPU_FFS(&cpus); 5954 MPASS(cpuid != 0); 5955 return (cpuid-1); 5956 } 5957 5958 #ifdef SCHED_ULE 5959 extern struct cpu_group *cpu_top; /* CPU topology */ 5960 5961 static int 5962 find_child_with_core(int cpu, struct cpu_group *grp) 5963 { 5964 int i; 5965 5966 if (grp->cg_children == 0) 5967 return -1; 5968 5969 MPASS(grp->cg_child); 5970 for (i = 0; i < grp->cg_children; i++) { 5971 if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) 5972 return i; 5973 } 5974 5975 return -1; 5976 } 5977 5978 /* 5979 * Find the nth "close" core to the specified core 5980 * "close" is defined as the deepest level that shares 5981 * at least an L2 cache. With threads, this will be 5982 * threads on the same core. If the shared cache is L3 5983 * or higher, simply returns the same core. 5984 */ 5985 static int 5986 find_close_core(int cpu, int core_offset) 5987 { 5988 struct cpu_group *grp; 5989 int i; 5990 int fcpu; 5991 cpuset_t cs; 5992 5993 grp = cpu_top; 5994 if (grp == NULL) 5995 return cpu; 5996 i = 0; 5997 while ((i = find_child_with_core(cpu, grp)) != -1) { 5998 /* If the child only has one cpu, don't descend */ 5999 if (grp->cg_child[i].cg_count <= 1) 6000 break; 6001 grp = &grp->cg_child[i]; 6002 } 6003 6004 /* If they don't share at least an L2 cache, use the same CPU */ 6005 if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) 6006 return cpu; 6007 6008 /* Now pick one */ 6009 CPU_COPY(&grp->cg_mask, &cs); 6010 6011 /* Add the selected CPU offset to core offset. */ 6012 for (i = 0; (fcpu = CPU_FFS(&cs)) != 0; i++) { 6013 if (fcpu - 1 == cpu) 6014 break; 6015 CPU_CLR(fcpu - 1, &cs); 6016 } 6017 MPASS(fcpu); 6018 6019 core_offset += i; 6020 6021 CPU_COPY(&grp->cg_mask, &cs); 6022 for (i = core_offset % grp->cg_count; i > 0; i--) { 6023 MPASS(CPU_FFS(&cs)); 6024 CPU_CLR(CPU_FFS(&cs) - 1, &cs); 6025 } 6026 MPASS(CPU_FFS(&cs)); 6027 return CPU_FFS(&cs) - 1; 6028 } 6029 #else 6030 static int 6031 find_close_core(int cpu, int core_offset __unused) 6032 { 6033 return cpu; 6034 } 6035 #endif 6036 6037 static int 6038 get_core_offset(if_ctx_t ctx, iflib_intr_type_t type, int qid) 6039 { 6040 switch (type) { 6041 case IFLIB_INTR_TX: 6042 /* TX queues get cores which share at least an L2 cache with the corresponding RX queue */ 6043 /* XXX handle multiple RX threads per core and more than two core per L2 group */ 6044 return qid / CPU_COUNT(&ctx->ifc_cpus) + 1; 6045 case IFLIB_INTR_RX: 6046 case IFLIB_INTR_RXTX: 6047 /* RX queues get the specified core */ 6048 return qid / CPU_COUNT(&ctx->ifc_cpus); 6049 default: 6050 return -1; 6051 } 6052 } 6053 #else 6054 #define get_core_offset(ctx, type, qid) CPU_FIRST() 6055 #define find_close_core(cpuid, tid) CPU_FIRST() 6056 #define find_nth(ctx, gid) CPU_FIRST() 6057 #endif 6058 6059 /* Just to avoid copy/paste */ 6060 static inline int 6061 iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, 6062 int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, 6063 const char *name) 6064 { 6065 device_t dev; 6066 int co, cpuid, err, tid; 6067 6068 dev = ctx->ifc_dev; 6069 co = ctx->ifc_sysctl_core_offset; 6070 if (ctx->ifc_sysctl_separate_txrx && type == IFLIB_INTR_TX) 6071 co += ctx->ifc_softc_ctx.isc_nrxqsets; 6072 cpuid = find_nth(ctx, qid + co); 6073 tid = get_core_offset(ctx, type, qid); 6074 if (tid < 0) { 6075 device_printf(dev, "get_core_offset failed\n"); 6076 return (EOPNOTSUPP); 6077 } 6078 cpuid = find_close_core(cpuid, tid); 6079 err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev, irq->ii_res, 6080 name); 6081 if (err) { 6082 device_printf(dev, "taskqgroup_attach_cpu failed %d\n", err); 6083 return (err); 6084 } 6085 #ifdef notyet 6086 if (cpuid > ctx->ifc_cpuid_highest) 6087 ctx->ifc_cpuid_highest = cpuid; 6088 #endif 6089 return (0); 6090 } 6091 6092 int 6093 iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, 6094 iflib_intr_type_t type, driver_filter_t *filter, 6095 void *filter_arg, int qid, const char *name) 6096 { 6097 device_t dev; 6098 struct grouptask *gtask; 6099 struct taskqgroup *tqg; 6100 iflib_filter_info_t info; 6101 gtask_fn_t *fn; 6102 int tqrid, err; 6103 driver_filter_t *intr_fast; 6104 void *q; 6105 6106 info = &ctx->ifc_filter_info; 6107 tqrid = rid; 6108 6109 switch (type) { 6110 /* XXX merge tx/rx for netmap? */ 6111 case IFLIB_INTR_TX: 6112 q = &ctx->ifc_txqs[qid]; 6113 info = &ctx->ifc_txqs[qid].ift_filter_info; 6114 gtask = &ctx->ifc_txqs[qid].ift_task; 6115 tqg = qgroup_if_io_tqg; 6116 fn = _task_fn_tx; 6117 intr_fast = iflib_fast_intr; 6118 GROUPTASK_INIT(gtask, 0, fn, q); 6119 ctx->ifc_flags |= IFC_NETMAP_TX_IRQ; 6120 break; 6121 case IFLIB_INTR_RX: 6122 q = &ctx->ifc_rxqs[qid]; 6123 info = &ctx->ifc_rxqs[qid].ifr_filter_info; 6124 gtask = &ctx->ifc_rxqs[qid].ifr_task; 6125 tqg = qgroup_if_io_tqg; 6126 fn = _task_fn_rx; 6127 intr_fast = iflib_fast_intr; 6128 NET_GROUPTASK_INIT(gtask, 0, fn, q); 6129 break; 6130 case IFLIB_INTR_RXTX: 6131 q = &ctx->ifc_rxqs[qid]; 6132 info = &ctx->ifc_rxqs[qid].ifr_filter_info; 6133 gtask = &ctx->ifc_rxqs[qid].ifr_task; 6134 tqg = qgroup_if_io_tqg; 6135 fn = _task_fn_rx; 6136 intr_fast = iflib_fast_intr_rxtx; 6137 NET_GROUPTASK_INIT(gtask, 0, fn, q); 6138 break; 6139 case IFLIB_INTR_ADMIN: 6140 q = ctx; 6141 tqrid = -1; 6142 info = &ctx->ifc_filter_info; 6143 gtask = &ctx->ifc_admin_task; 6144 tqg = qgroup_if_config_tqg; 6145 fn = _task_fn_admin; 6146 intr_fast = iflib_fast_intr_ctx; 6147 break; 6148 default: 6149 device_printf(ctx->ifc_dev, "%s: unknown net intr type\n", 6150 __func__); 6151 return (EINVAL); 6152 } 6153 6154 info->ifi_filter = filter; 6155 info->ifi_filter_arg = filter_arg; 6156 info->ifi_task = gtask; 6157 info->ifi_ctx = q; 6158 6159 dev = ctx->ifc_dev; 6160 err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); 6161 if (err != 0) { 6162 device_printf(dev, "_iflib_irq_alloc failed %d\n", err); 6163 return (err); 6164 } 6165 if (type == IFLIB_INTR_ADMIN) 6166 return (0); 6167 6168 if (tqrid != -1) { 6169 err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, 6170 q, name); 6171 if (err) 6172 return (err); 6173 } else { 6174 taskqgroup_attach(tqg, gtask, q, dev, irq->ii_res, name); 6175 } 6176 6177 return (0); 6178 } 6179 6180 void 6181 iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name) 6182 { 6183 struct grouptask *gtask; 6184 struct taskqgroup *tqg; 6185 gtask_fn_t *fn; 6186 void *q; 6187 int err; 6188 6189 switch (type) { 6190 case IFLIB_INTR_TX: 6191 q = &ctx->ifc_txqs[qid]; 6192 gtask = &ctx->ifc_txqs[qid].ift_task; 6193 tqg = qgroup_if_io_tqg; 6194 fn = _task_fn_tx; 6195 GROUPTASK_INIT(gtask, 0, fn, q); 6196 break; 6197 case IFLIB_INTR_RX: 6198 q = &ctx->ifc_rxqs[qid]; 6199 gtask = &ctx->ifc_rxqs[qid].ifr_task; 6200 tqg = qgroup_if_io_tqg; 6201 fn = _task_fn_rx; 6202 NET_GROUPTASK_INIT(gtask, 0, fn, q); 6203 break; 6204 case IFLIB_INTR_IOV: 6205 q = ctx; 6206 gtask = &ctx->ifc_vflr_task; 6207 tqg = qgroup_if_config_tqg; 6208 fn = _task_fn_iov; 6209 GROUPTASK_INIT(gtask, 0, fn, q); 6210 break; 6211 default: 6212 panic("unknown net intr type"); 6213 } 6214 if (irq != NULL) { 6215 err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, 6216 q, name); 6217 if (err) 6218 taskqgroup_attach(tqg, gtask, q, ctx->ifc_dev, 6219 irq->ii_res, name); 6220 } else { 6221 taskqgroup_attach(tqg, gtask, q, NULL, NULL, name); 6222 } 6223 } 6224 6225 void 6226 iflib_irq_free(if_ctx_t ctx, if_irq_t irq) 6227 { 6228 6229 if (irq->ii_tag) 6230 bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); 6231 6232 if (irq->ii_res) 6233 bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, 6234 rman_get_rid(irq->ii_res), irq->ii_res); 6235 } 6236 6237 static int 6238 iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, const char *name) 6239 { 6240 iflib_txq_t txq = ctx->ifc_txqs; 6241 iflib_rxq_t rxq = ctx->ifc_rxqs; 6242 if_irq_t irq = &ctx->ifc_legacy_irq; 6243 iflib_filter_info_t info; 6244 device_t dev; 6245 struct grouptask *gtask; 6246 struct resource *res; 6247 struct taskqgroup *tqg; 6248 void *q; 6249 int err, tqrid; 6250 bool rx_only; 6251 6252 q = &ctx->ifc_rxqs[0]; 6253 info = &rxq[0].ifr_filter_info; 6254 gtask = &rxq[0].ifr_task; 6255 tqg = qgroup_if_io_tqg; 6256 tqrid = *rid; 6257 rx_only = (ctx->ifc_sctx->isc_flags & IFLIB_SINGLE_IRQ_RX_ONLY) != 0; 6258 6259 ctx->ifc_flags |= IFC_LEGACY; 6260 info->ifi_filter = filter; 6261 info->ifi_filter_arg = filter_arg; 6262 info->ifi_task = gtask; 6263 info->ifi_ctx = rx_only ? ctx : q; 6264 6265 dev = ctx->ifc_dev; 6266 /* We allocate a single interrupt resource */ 6267 err = _iflib_irq_alloc(ctx, irq, tqrid, rx_only ? iflib_fast_intr_ctx : 6268 iflib_fast_intr_rxtx, NULL, info, name); 6269 if (err != 0) 6270 return (err); 6271 NET_GROUPTASK_INIT(gtask, 0, _task_fn_rx, q); 6272 res = irq->ii_res; 6273 taskqgroup_attach(tqg, gtask, q, dev, res, name); 6274 6275 GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); 6276 taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, dev, res, 6277 "tx"); 6278 return (0); 6279 } 6280 6281 void 6282 iflib_led_create(if_ctx_t ctx) 6283 { 6284 6285 ctx->ifc_led_dev = led_create(iflib_led_func, ctx, 6286 device_get_nameunit(ctx->ifc_dev)); 6287 } 6288 6289 void 6290 iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) 6291 { 6292 6293 GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); 6294 } 6295 6296 void 6297 iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) 6298 { 6299 6300 GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); 6301 } 6302 6303 void 6304 iflib_admin_intr_deferred(if_ctx_t ctx) 6305 { 6306 6307 MPASS(ctx->ifc_admin_task.gt_taskqueue != NULL); 6308 GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); 6309 } 6310 6311 void 6312 iflib_iov_intr_deferred(if_ctx_t ctx) 6313 { 6314 6315 GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); 6316 } 6317 6318 void 6319 iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, const char *name) 6320 { 6321 6322 taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, NULL, NULL, 6323 name); 6324 } 6325 6326 void 6327 iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, 6328 const char *name) 6329 { 6330 6331 GROUPTASK_INIT(gtask, 0, fn, ctx); 6332 taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, NULL, NULL, 6333 name); 6334 } 6335 6336 void 6337 iflib_config_gtask_deinit(struct grouptask *gtask) 6338 { 6339 6340 taskqgroup_detach(qgroup_if_config_tqg, gtask); 6341 } 6342 6343 void 6344 iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) 6345 { 6346 if_t ifp = ctx->ifc_ifp; 6347 iflib_txq_t txq = ctx->ifc_txqs; 6348 6349 if_setbaudrate(ifp, baudrate); 6350 if (baudrate >= IF_Gbps(10)) { 6351 STATE_LOCK(ctx); 6352 ctx->ifc_flags |= IFC_PREFETCH; 6353 STATE_UNLOCK(ctx); 6354 } 6355 /* If link down, disable watchdog */ 6356 if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { 6357 for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) 6358 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 6359 } 6360 ctx->ifc_link_state = link_state; 6361 if_link_state_change(ifp, link_state); 6362 } 6363 6364 static int 6365 iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) 6366 { 6367 int credits; 6368 #ifdef INVARIANTS 6369 int credits_pre = txq->ift_cidx_processed; 6370 #endif 6371 6372 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 6373 BUS_DMASYNC_POSTREAD); 6374 if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) 6375 return (0); 6376 6377 txq->ift_processed += credits; 6378 txq->ift_cidx_processed += credits; 6379 6380 MPASS(credits_pre + credits == txq->ift_cidx_processed); 6381 if (txq->ift_cidx_processed >= txq->ift_size) 6382 txq->ift_cidx_processed -= txq->ift_size; 6383 return (credits); 6384 } 6385 6386 static int 6387 iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) 6388 { 6389 iflib_fl_t fl; 6390 u_int i; 6391 6392 for (i = 0, fl = &rxq->ifr_fl[0]; i < rxq->ifr_nfl; i++, fl++) 6393 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 6394 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 6395 return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, 6396 budget)); 6397 } 6398 6399 void 6400 iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, 6401 const char *description, if_int_delay_info_t info, 6402 int offset, int value) 6403 { 6404 info->iidi_ctx = ctx; 6405 info->iidi_offset = offset; 6406 info->iidi_value = value; 6407 SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), 6408 SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), 6409 OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 6410 info, 0, iflib_sysctl_int_delay, "I", description); 6411 } 6412 6413 struct sx * 6414 iflib_ctx_lock_get(if_ctx_t ctx) 6415 { 6416 6417 return (&ctx->ifc_ctx_sx); 6418 } 6419 6420 static int 6421 iflib_msix_init(if_ctx_t ctx) 6422 { 6423 device_t dev = ctx->ifc_dev; 6424 if_shared_ctx_t sctx = ctx->ifc_sctx; 6425 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 6426 int admincnt, bar, err, iflib_num_rx_queues, iflib_num_tx_queues; 6427 int msgs, queuemsgs, queues, rx_queues, tx_queues, vectors; 6428 6429 iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; 6430 iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; 6431 6432 if (bootverbose) 6433 device_printf(dev, "msix_init qsets capped at %d\n", 6434 imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); 6435 6436 /* Override by tuneable */ 6437 if (scctx->isc_disable_msix) 6438 goto msi; 6439 6440 /* First try MSI-X */ 6441 if ((msgs = pci_msix_count(dev)) == 0) { 6442 if (bootverbose) 6443 device_printf(dev, "MSI-X not supported or disabled\n"); 6444 goto msi; 6445 } 6446 6447 bar = ctx->ifc_softc_ctx.isc_msix_bar; 6448 /* 6449 * bar == -1 => "trust me I know what I'm doing" 6450 * Some drivers are for hardware that is so shoddily 6451 * documented that no one knows which bars are which 6452 * so the developer has to map all bars. This hack 6453 * allows shoddy garbage to use MSI-X in this framework. 6454 */ 6455 if (bar != -1) { 6456 ctx->ifc_msix_mem = bus_alloc_resource_any(dev, 6457 SYS_RES_MEMORY, &bar, RF_ACTIVE); 6458 if (ctx->ifc_msix_mem == NULL) { 6459 device_printf(dev, "Unable to map MSI-X table\n"); 6460 goto msi; 6461 } 6462 } 6463 6464 admincnt = sctx->isc_admin_intrcnt; 6465 #if IFLIB_DEBUG 6466 /* use only 1 qset in debug mode */ 6467 queuemsgs = min(msgs - admincnt, 1); 6468 #else 6469 queuemsgs = msgs - admincnt; 6470 #endif 6471 #ifdef RSS 6472 queues = imin(queuemsgs, rss_getnumbuckets()); 6473 #else 6474 queues = queuemsgs; 6475 #endif 6476 queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); 6477 if (bootverbose) 6478 device_printf(dev, 6479 "intr CPUs: %d queue msgs: %d admincnt: %d\n", 6480 CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); 6481 #ifdef RSS 6482 /* If we're doing RSS, clamp at the number of RSS buckets */ 6483 if (queues > rss_getnumbuckets()) 6484 queues = rss_getnumbuckets(); 6485 #endif 6486 if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) 6487 rx_queues = iflib_num_rx_queues; 6488 else 6489 rx_queues = queues; 6490 6491 if (rx_queues > scctx->isc_nrxqsets) 6492 rx_queues = scctx->isc_nrxqsets; 6493 6494 /* 6495 * We want this to be all logical CPUs by default 6496 */ 6497 if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) 6498 tx_queues = iflib_num_tx_queues; 6499 else 6500 tx_queues = mp_ncpus; 6501 6502 if (tx_queues > scctx->isc_ntxqsets) 6503 tx_queues = scctx->isc_ntxqsets; 6504 6505 if (ctx->ifc_sysctl_qs_eq_override == 0) { 6506 #ifdef INVARIANTS 6507 if (tx_queues != rx_queues) 6508 device_printf(dev, 6509 "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", 6510 min(rx_queues, tx_queues), min(rx_queues, tx_queues)); 6511 #endif 6512 tx_queues = min(rx_queues, tx_queues); 6513 rx_queues = min(rx_queues, tx_queues); 6514 } 6515 6516 vectors = rx_queues + admincnt; 6517 if (msgs < vectors) { 6518 device_printf(dev, 6519 "insufficient number of MSI-X vectors " 6520 "(supported %d, need %d)\n", msgs, vectors); 6521 goto msi; 6522 } 6523 6524 device_printf(dev, "Using %d RX queues %d TX queues\n", rx_queues, 6525 tx_queues); 6526 msgs = vectors; 6527 if ((err = pci_alloc_msix(dev, &vectors)) == 0) { 6528 if (vectors != msgs) { 6529 device_printf(dev, 6530 "Unable to allocate sufficient MSI-X vectors " 6531 "(got %d, need %d)\n", vectors, msgs); 6532 pci_release_msi(dev); 6533 if (bar != -1) { 6534 bus_release_resource(dev, SYS_RES_MEMORY, bar, 6535 ctx->ifc_msix_mem); 6536 ctx->ifc_msix_mem = NULL; 6537 } 6538 goto msi; 6539 } 6540 device_printf(dev, "Using MSI-X interrupts with %d vectors\n", 6541 vectors); 6542 scctx->isc_vectors = vectors; 6543 scctx->isc_nrxqsets = rx_queues; 6544 scctx->isc_ntxqsets = tx_queues; 6545 scctx->isc_intr = IFLIB_INTR_MSIX; 6546 6547 return (vectors); 6548 } else { 6549 device_printf(dev, 6550 "failed to allocate %d MSI-X vectors, err: %d\n", vectors, 6551 err); 6552 if (bar != -1) { 6553 bus_release_resource(dev, SYS_RES_MEMORY, bar, 6554 ctx->ifc_msix_mem); 6555 ctx->ifc_msix_mem = NULL; 6556 } 6557 } 6558 6559 msi: 6560 vectors = pci_msi_count(dev); 6561 scctx->isc_nrxqsets = 1; 6562 scctx->isc_ntxqsets = 1; 6563 scctx->isc_vectors = vectors; 6564 if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { 6565 device_printf(dev,"Using an MSI interrupt\n"); 6566 scctx->isc_intr = IFLIB_INTR_MSI; 6567 } else { 6568 scctx->isc_vectors = 1; 6569 device_printf(dev,"Using a Legacy interrupt\n"); 6570 scctx->isc_intr = IFLIB_INTR_LEGACY; 6571 } 6572 6573 return (vectors); 6574 } 6575 6576 static const char *ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; 6577 6578 static int 6579 mp_ring_state_handler(SYSCTL_HANDLER_ARGS) 6580 { 6581 int rc; 6582 uint16_t *state = ((uint16_t *)oidp->oid_arg1); 6583 struct sbuf *sb; 6584 const char *ring_state = "UNKNOWN"; 6585 6586 /* XXX needed ? */ 6587 rc = sysctl_wire_old_buffer(req, 0); 6588 MPASS(rc == 0); 6589 if (rc != 0) 6590 return (rc); 6591 sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); 6592 MPASS(sb != NULL); 6593 if (sb == NULL) 6594 return (ENOMEM); 6595 if (state[3] <= 3) 6596 ring_state = ring_states[state[3]]; 6597 6598 sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", 6599 state[0], state[1], state[2], ring_state); 6600 rc = sbuf_finish(sb); 6601 sbuf_delete(sb); 6602 return(rc); 6603 } 6604 6605 enum iflib_ndesc_handler { 6606 IFLIB_NTXD_HANDLER, 6607 IFLIB_NRXD_HANDLER, 6608 }; 6609 6610 static int 6611 mp_ndesc_handler(SYSCTL_HANDLER_ARGS) 6612 { 6613 if_ctx_t ctx = (void *)arg1; 6614 enum iflib_ndesc_handler type = arg2; 6615 char buf[256] = {0}; 6616 qidx_t *ndesc; 6617 char *p, *next; 6618 int nqs, rc, i; 6619 6620 nqs = 8; 6621 switch(type) { 6622 case IFLIB_NTXD_HANDLER: 6623 ndesc = ctx->ifc_sysctl_ntxds; 6624 if (ctx->ifc_sctx) 6625 nqs = ctx->ifc_sctx->isc_ntxqs; 6626 break; 6627 case IFLIB_NRXD_HANDLER: 6628 ndesc = ctx->ifc_sysctl_nrxds; 6629 if (ctx->ifc_sctx) 6630 nqs = ctx->ifc_sctx->isc_nrxqs; 6631 break; 6632 default: 6633 printf("%s: unhandled type\n", __func__); 6634 return (EINVAL); 6635 } 6636 if (nqs == 0) 6637 nqs = 8; 6638 6639 for (i=0; i<8; i++) { 6640 if (i >= nqs) 6641 break; 6642 if (i) 6643 strcat(buf, ","); 6644 sprintf(strchr(buf, 0), "%d", ndesc[i]); 6645 } 6646 6647 rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); 6648 if (rc || req->newptr == NULL) 6649 return rc; 6650 6651 for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; 6652 i++, p = strsep(&next, " ,")) { 6653 ndesc[i] = strtoul(p, NULL, 10); 6654 } 6655 6656 return(rc); 6657 } 6658 6659 #define NAME_BUFLEN 32 6660 static void 6661 iflib_add_device_sysctl_pre(if_ctx_t ctx) 6662 { 6663 device_t dev = iflib_get_dev(ctx); 6664 struct sysctl_oid_list *child, *oid_list; 6665 struct sysctl_ctx_list *ctx_list; 6666 struct sysctl_oid *node; 6667 6668 ctx_list = device_get_sysctl_ctx(dev); 6669 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 6670 ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", 6671 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IFLIB fields"); 6672 oid_list = SYSCTL_CHILDREN(node); 6673 6674 SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", 6675 CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 6676 "driver version"); 6677 6678 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", 6679 CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, 6680 "# of txqs to use, 0 => use default #"); 6681 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", 6682 CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, 6683 "# of rxqs to use, 0 => use default #"); 6684 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", 6685 CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, 6686 "permit #txq != #rxq"); 6687 SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", 6688 CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, 6689 "disable MSI-X (default 0)"); 6690 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", 6691 CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, 6692 "set the RX budget"); 6693 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate", 6694 CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0, 6695 "cause TX to abdicate instead of running to completion"); 6696 ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED; 6697 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset", 6698 CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0, 6699 "offset to start using cores at"); 6700 SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx", 6701 CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0, 6702 "use separate cores for TX and RX"); 6703 6704 /* XXX change for per-queue sizes */ 6705 SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", 6706 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, 6707 IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", 6708 "list of # of TX descriptors to use, 0 = use default #"); 6709 SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", 6710 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, 6711 IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", 6712 "list of # of RX descriptors to use, 0 = use default #"); 6713 } 6714 6715 static void 6716 iflib_add_device_sysctl_post(if_ctx_t ctx) 6717 { 6718 if_shared_ctx_t sctx = ctx->ifc_sctx; 6719 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 6720 device_t dev = iflib_get_dev(ctx); 6721 struct sysctl_oid_list *child; 6722 struct sysctl_ctx_list *ctx_list; 6723 iflib_fl_t fl; 6724 iflib_txq_t txq; 6725 iflib_rxq_t rxq; 6726 int i, j; 6727 char namebuf[NAME_BUFLEN]; 6728 char *qfmt; 6729 struct sysctl_oid *queue_node, *fl_node, *node; 6730 struct sysctl_oid_list *queue_list, *fl_list; 6731 ctx_list = device_get_sysctl_ctx(dev); 6732 6733 node = ctx->ifc_sysctl_node; 6734 child = SYSCTL_CHILDREN(node); 6735 6736 if (scctx->isc_ntxqsets > 100) 6737 qfmt = "txq%03d"; 6738 else if (scctx->isc_ntxqsets > 10) 6739 qfmt = "txq%02d"; 6740 else 6741 qfmt = "txq%d"; 6742 for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { 6743 snprintf(namebuf, NAME_BUFLEN, qfmt, i); 6744 queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, 6745 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 6746 queue_list = SYSCTL_CHILDREN(queue_node); 6747 #if MEMORY_LOGGING 6748 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", 6749 CTLFLAG_RD, 6750 &txq->ift_dequeued, "total mbufs freed"); 6751 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", 6752 CTLFLAG_RD, 6753 &txq->ift_enqueued, "total mbufs enqueued"); 6754 #endif 6755 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", 6756 CTLFLAG_RD, 6757 &txq->ift_mbuf_defrag, "# of times m_defrag was called"); 6758 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", 6759 CTLFLAG_RD, 6760 &txq->ift_pullups, "# of times m_pullup was called"); 6761 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", 6762 CTLFLAG_RD, 6763 &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); 6764 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", 6765 CTLFLAG_RD, 6766 &txq->ift_no_desc_avail, "# of times no descriptors were available"); 6767 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", 6768 CTLFLAG_RD, 6769 &txq->ift_map_failed, "# of times DMA map failed"); 6770 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", 6771 CTLFLAG_RD, 6772 &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); 6773 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", 6774 CTLFLAG_RD, 6775 &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); 6776 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", 6777 CTLFLAG_RD, 6778 &txq->ift_pidx, 1, "Producer Index"); 6779 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", 6780 CTLFLAG_RD, 6781 &txq->ift_cidx, 1, "Consumer Index"); 6782 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", 6783 CTLFLAG_RD, 6784 &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); 6785 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", 6786 CTLFLAG_RD, 6787 &txq->ift_in_use, 1, "descriptors in use"); 6788 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", 6789 CTLFLAG_RD, 6790 &txq->ift_processed, "descriptors procesed for clean"); 6791 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", 6792 CTLFLAG_RD, 6793 &txq->ift_cleaned, "total cleaned"); 6794 SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", 6795 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, 6796 __DEVOLATILE(uint64_t *, &txq->ift_br->state), 0, 6797 mp_ring_state_handler, "A", "soft ring state"); 6798 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", 6799 CTLFLAG_RD, &txq->ift_br->enqueues, 6800 "# of enqueues to the mp_ring for this queue"); 6801 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", 6802 CTLFLAG_RD, &txq->ift_br->drops, 6803 "# of drops in the mp_ring for this queue"); 6804 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", 6805 CTLFLAG_RD, &txq->ift_br->starts, 6806 "# of normal consumer starts in the mp_ring for this queue"); 6807 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", 6808 CTLFLAG_RD, &txq->ift_br->stalls, 6809 "# of consumer stalls in the mp_ring for this queue"); 6810 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", 6811 CTLFLAG_RD, &txq->ift_br->restarts, 6812 "# of consumer restarts in the mp_ring for this queue"); 6813 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", 6814 CTLFLAG_RD, &txq->ift_br->abdications, 6815 "# of consumer abdications in the mp_ring for this queue"); 6816 } 6817 6818 if (scctx->isc_nrxqsets > 100) 6819 qfmt = "rxq%03d"; 6820 else if (scctx->isc_nrxqsets > 10) 6821 qfmt = "rxq%02d"; 6822 else 6823 qfmt = "rxq%d"; 6824 for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { 6825 snprintf(namebuf, NAME_BUFLEN, qfmt, i); 6826 queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, 6827 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 6828 queue_list = SYSCTL_CHILDREN(queue_node); 6829 if (sctx->isc_flags & IFLIB_HAS_RXCQ) { 6830 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", 6831 CTLFLAG_RD, 6832 &rxq->ifr_cq_cidx, 1, "Consumer Index"); 6833 } 6834 6835 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { 6836 snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); 6837 fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, 6838 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist Name"); 6839 fl_list = SYSCTL_CHILDREN(fl_node); 6840 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", 6841 CTLFLAG_RD, 6842 &fl->ifl_pidx, 1, "Producer Index"); 6843 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", 6844 CTLFLAG_RD, 6845 &fl->ifl_cidx, 1, "Consumer Index"); 6846 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", 6847 CTLFLAG_RD, 6848 &fl->ifl_credits, 1, "credits available"); 6849 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "buf_size", 6850 CTLFLAG_RD, 6851 &fl->ifl_buf_size, 1, "buffer size"); 6852 #if MEMORY_LOGGING 6853 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", 6854 CTLFLAG_RD, 6855 &fl->ifl_m_enqueued, "mbufs allocated"); 6856 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", 6857 CTLFLAG_RD, 6858 &fl->ifl_m_dequeued, "mbufs freed"); 6859 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", 6860 CTLFLAG_RD, 6861 &fl->ifl_cl_enqueued, "clusters allocated"); 6862 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", 6863 CTLFLAG_RD, 6864 &fl->ifl_cl_dequeued, "clusters freed"); 6865 #endif 6866 } 6867 } 6868 6869 } 6870 6871 void 6872 iflib_request_reset(if_ctx_t ctx) 6873 { 6874 6875 STATE_LOCK(ctx); 6876 ctx->ifc_flags |= IFC_DO_RESET; 6877 STATE_UNLOCK(ctx); 6878 } 6879 6880 #ifndef __NO_STRICT_ALIGNMENT 6881 static struct mbuf * 6882 iflib_fixup_rx(struct mbuf *m) 6883 { 6884 struct mbuf *n; 6885 6886 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { 6887 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); 6888 m->m_data += ETHER_HDR_LEN; 6889 n = m; 6890 } else { 6891 MGETHDR(n, M_NOWAIT, MT_DATA); 6892 if (n == NULL) { 6893 m_freem(m); 6894 return (NULL); 6895 } 6896 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); 6897 m->m_data += ETHER_HDR_LEN; 6898 m->m_len -= ETHER_HDR_LEN; 6899 n->m_len = ETHER_HDR_LEN; 6900 M_MOVE_PKTHDR(n, m); 6901 n->m_next = m; 6902 } 6903 return (n); 6904 } 6905 #endif 6906 6907 #ifdef DEBUGNET 6908 static void 6909 iflib_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize) 6910 { 6911 if_ctx_t ctx; 6912 6913 ctx = if_getsoftc(ifp); 6914 CTX_LOCK(ctx); 6915 *nrxr = NRXQSETS(ctx); 6916 *ncl = ctx->ifc_rxqs[0].ifr_fl->ifl_size; 6917 *clsize = ctx->ifc_rxqs[0].ifr_fl->ifl_buf_size; 6918 CTX_UNLOCK(ctx); 6919 } 6920 6921 static void 6922 iflib_debugnet_event(if_t ifp, enum debugnet_ev event) 6923 { 6924 if_ctx_t ctx; 6925 if_softc_ctx_t scctx; 6926 iflib_fl_t fl; 6927 iflib_rxq_t rxq; 6928 int i, j; 6929 6930 ctx = if_getsoftc(ifp); 6931 scctx = &ctx->ifc_softc_ctx; 6932 6933 switch (event) { 6934 case DEBUGNET_START: 6935 for (i = 0; i < scctx->isc_nrxqsets; i++) { 6936 rxq = &ctx->ifc_rxqs[i]; 6937 for (j = 0; j < rxq->ifr_nfl; j++) { 6938 fl = rxq->ifr_fl; 6939 fl->ifl_zone = m_getzone(fl->ifl_buf_size); 6940 } 6941 } 6942 iflib_no_tx_batch = 1; 6943 break; 6944 default: 6945 break; 6946 } 6947 } 6948 6949 static int 6950 iflib_debugnet_transmit(if_t ifp, struct mbuf *m) 6951 { 6952 if_ctx_t ctx; 6953 iflib_txq_t txq; 6954 int error; 6955 6956 ctx = if_getsoftc(ifp); 6957 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 6958 IFF_DRV_RUNNING) 6959 return (EBUSY); 6960 6961 txq = &ctx->ifc_txqs[0]; 6962 error = iflib_encap(txq, &m); 6963 if (error == 0) 6964 (void)iflib_txd_db_check(txq, true); 6965 return (error); 6966 } 6967 6968 static int 6969 iflib_debugnet_poll(if_t ifp, int count) 6970 { 6971 struct epoch_tracker et; 6972 if_ctx_t ctx; 6973 if_softc_ctx_t scctx; 6974 iflib_txq_t txq; 6975 int i; 6976 6977 ctx = if_getsoftc(ifp); 6978 scctx = &ctx->ifc_softc_ctx; 6979 6980 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 6981 IFF_DRV_RUNNING) 6982 return (EBUSY); 6983 6984 txq = &ctx->ifc_txqs[0]; 6985 (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); 6986 6987 NET_EPOCH_ENTER(et); 6988 for (i = 0; i < scctx->isc_nrxqsets; i++) 6989 (void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */); 6990 NET_EPOCH_EXIT(et); 6991 return (0); 6992 } 6993 #endif /* DEBUGNET */ 6994