1 /****************************************************************************** 2 3 Copyright (c) 2001-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 #include "opt_em.h" 36 #include "opt_ddb.h" 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 40 #ifdef HAVE_KERNEL_OPTION_HEADERS 41 #include "opt_device_polling.h" 42 #endif 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #ifdef DDB 47 #include <sys/types.h> 48 #include <ddb/ddb.h> 49 #endif 50 #if __FreeBSD_version >= 800000 51 #include <sys/buf_ring.h> 52 #endif 53 #include <sys/bus.h> 54 #include <sys/endian.h> 55 #include <sys/kernel.h> 56 #include <sys/kthread.h> 57 #include <sys/malloc.h> 58 #include <sys/mbuf.h> 59 #include <sys/module.h> 60 #include <sys/rman.h> 61 #include <sys/smp.h> 62 #include <sys/socket.h> 63 #include <sys/sockio.h> 64 #include <sys/sysctl.h> 65 #include <sys/taskqueue.h> 66 #include <sys/eventhandler.h> 67 #include <machine/bus.h> 68 #include <machine/resource.h> 69 70 #include <net/bpf.h> 71 #include <net/ethernet.h> 72 #include <net/if.h> 73 #include <net/if_var.h> 74 #include <net/if_arp.h> 75 #include <net/if_dl.h> 76 #include <net/if_media.h> 77 78 #include <net/if_types.h> 79 #include <net/if_vlan_var.h> 80 81 #include <netinet/in_systm.h> 82 #include <netinet/in.h> 83 #include <netinet/if_ether.h> 84 #include <netinet/ip.h> 85 #include <netinet/ip6.h> 86 #include <netinet/tcp.h> 87 #include <netinet/udp.h> 88 89 #include <machine/in_cksum.h> 90 #include <dev/led/led.h> 91 #include <dev/pci/pcivar.h> 92 #include <dev/pci/pcireg.h> 93 94 #include "e1000_api.h" 95 #include "e1000_82571.h" 96 #include "if_em.h" 97 98 /********************************************************************* 99 * Driver version: 100 *********************************************************************/ 101 char em_driver_version[] = "7.6.1-k"; 102 103 /********************************************************************* 104 * PCI Device ID Table 105 * 106 * Used by probe to select devices to load on 107 * Last field stores an index into e1000_strings 108 * Last entry must be all 0s 109 * 110 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } 111 *********************************************************************/ 112 113 static em_vendor_info_t em_vendor_info_array[] = 114 { 115 /* Intel(R) PRO/1000 Network Connection */ 116 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, 117 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, 118 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, 119 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, 120 PCI_ANY_ID, PCI_ANY_ID, 0}, 121 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, 122 PCI_ANY_ID, PCI_ANY_ID, 0}, 123 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, 124 PCI_ANY_ID, PCI_ANY_ID, 0}, 125 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, 126 PCI_ANY_ID, PCI_ANY_ID, 0}, 127 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, 128 PCI_ANY_ID, PCI_ANY_ID, 0}, 129 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, 130 PCI_ANY_ID, PCI_ANY_ID, 0}, 131 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, 132 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, 133 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, 134 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0}, 135 136 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0}, 137 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 138 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0}, 139 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0}, 140 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, 141 PCI_ANY_ID, PCI_ANY_ID, 0}, 142 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, 143 PCI_ANY_ID, PCI_ANY_ID, 0}, 144 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, 145 PCI_ANY_ID, PCI_ANY_ID, 0}, 146 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, 147 PCI_ANY_ID, PCI_ANY_ID, 0}, 148 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 149 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 150 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, 151 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, 152 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, 153 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, 154 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, 155 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0}, 156 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 157 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 158 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, 159 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, 160 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 161 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, 162 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, 163 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, 164 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0}, 165 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0}, 166 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0}, 167 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 168 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, 169 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 170 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 171 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, 172 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 173 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 174 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0}, 175 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0}, 176 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0}, 177 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 178 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 179 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 180 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 181 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, 182 PCI_ANY_ID, PCI_ANY_ID, 0}, 183 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, 184 PCI_ANY_ID, PCI_ANY_ID, 0}, 185 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0}, 186 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, 187 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0}, 188 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0}, 189 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 190 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 191 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, 192 PCI_ANY_ID, PCI_ANY_ID, 0}, 193 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, 194 { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, 195 PCI_ANY_ID, PCI_ANY_ID, 0}, 196 /* required last entry */ 197 { 0, 0, 0, 0, 0} 198 }; 199 200 /********************************************************************* 201 * Table of branding strings for all supported NICs. 202 *********************************************************************/ 203 204 static char *em_strings[] = { 205 "Intel(R) PRO/1000 Network Connection" 206 }; 207 208 /********************************************************************* 209 * Function prototypes 210 *********************************************************************/ 211 static int em_probe(device_t); 212 static int em_attach(device_t); 213 static int em_detach(device_t); 214 static int em_shutdown(device_t); 215 static int em_suspend(device_t); 216 static int em_resume(device_t); 217 #ifdef EM_MULTIQUEUE 218 static int em_mq_start(if_t, struct mbuf *); 219 static int em_mq_start_locked(if_t, 220 struct tx_ring *); 221 static void em_qflush(if_t); 222 #else 223 static void em_start(if_t); 224 static void em_start_locked(if_t, struct tx_ring *); 225 #endif 226 static int em_ioctl(if_t, u_long, caddr_t); 227 static uint64_t em_get_counter(if_t, ift_counter); 228 static void em_init(void *); 229 static void em_init_locked(struct adapter *); 230 static void em_stop(void *); 231 static void em_media_status(if_t, struct ifmediareq *); 232 static int em_media_change(if_t); 233 static void em_identify_hardware(struct adapter *); 234 static int em_allocate_pci_resources(struct adapter *); 235 static int em_allocate_legacy(struct adapter *); 236 static int em_allocate_msix(struct adapter *); 237 static int em_allocate_queues(struct adapter *); 238 static int em_setup_msix(struct adapter *); 239 static void em_free_pci_resources(struct adapter *); 240 static void em_local_timer(void *); 241 static void em_reset(struct adapter *); 242 static int em_setup_interface(device_t, struct adapter *); 243 static void em_flush_desc_rings(struct adapter *); 244 245 static void em_setup_transmit_structures(struct adapter *); 246 static void em_initialize_transmit_unit(struct adapter *); 247 static int em_allocate_transmit_buffers(struct tx_ring *); 248 static void em_free_transmit_structures(struct adapter *); 249 static void em_free_transmit_buffers(struct tx_ring *); 250 251 static int em_setup_receive_structures(struct adapter *); 252 static int em_allocate_receive_buffers(struct rx_ring *); 253 static void em_initialize_receive_unit(struct adapter *); 254 static void em_free_receive_structures(struct adapter *); 255 static void em_free_receive_buffers(struct rx_ring *); 256 257 static void em_enable_intr(struct adapter *); 258 static void em_disable_intr(struct adapter *); 259 static void em_update_stats_counters(struct adapter *); 260 static void em_add_hw_stats(struct adapter *adapter); 261 static void em_txeof(struct tx_ring *); 262 static bool em_rxeof(struct rx_ring *, int, int *); 263 #ifndef __NO_STRICT_ALIGNMENT 264 static int em_fixup_rx(struct rx_ring *); 265 #endif 266 static void em_setup_rxdesc(union e1000_rx_desc_extended *, 267 const struct em_rxbuffer *rxbuf); 268 static void em_receive_checksum(uint32_t status, struct mbuf *); 269 static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, 270 struct ip *, u32 *, u32 *); 271 static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *, 272 struct tcphdr *, u32 *, u32 *); 273 static void em_set_promisc(struct adapter *); 274 static void em_disable_promisc(struct adapter *); 275 static void em_set_multi(struct adapter *); 276 static void em_update_link_status(struct adapter *); 277 static void em_refresh_mbufs(struct rx_ring *, int); 278 static void em_register_vlan(void *, if_t, u16); 279 static void em_unregister_vlan(void *, if_t, u16); 280 static void em_setup_vlan_hw_support(struct adapter *); 281 static int em_xmit(struct tx_ring *, struct mbuf **); 282 static int em_dma_malloc(struct adapter *, bus_size_t, 283 struct em_dma_alloc *, int); 284 static void em_dma_free(struct adapter *, struct em_dma_alloc *); 285 static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); 286 static void em_print_nvm_info(struct adapter *); 287 static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); 288 static void em_print_debug_info(struct adapter *); 289 static int em_is_valid_ether_addr(u8 *); 290 static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); 291 static void em_add_int_delay_sysctl(struct adapter *, const char *, 292 const char *, struct em_int_delay_info *, int, int); 293 /* Management and WOL Support */ 294 static void em_init_manageability(struct adapter *); 295 static void em_release_manageability(struct adapter *); 296 static void em_get_hw_control(struct adapter *); 297 static void em_release_hw_control(struct adapter *); 298 static void em_get_wakeup(device_t); 299 static void em_enable_wakeup(device_t); 300 static int em_enable_phy_wakeup(struct adapter *); 301 static void em_led_func(void *, int); 302 static void em_disable_aspm(struct adapter *); 303 304 static int em_irq_fast(void *); 305 306 /* MSIX handlers */ 307 static void em_msix_tx(void *); 308 static void em_msix_rx(void *); 309 static void em_msix_link(void *); 310 static void em_handle_tx(void *context, int pending); 311 static void em_handle_rx(void *context, int pending); 312 static void em_handle_link(void *context, int pending); 313 314 #ifdef EM_MULTIQUEUE 315 static void em_enable_vectors_82574(struct adapter *); 316 #endif 317 318 static void em_set_sysctl_value(struct adapter *, const char *, 319 const char *, int *, int); 320 static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); 321 static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); 322 323 static __inline void em_rx_discard(struct rx_ring *, int); 324 325 #ifdef DEVICE_POLLING 326 static poll_handler_t em_poll; 327 #endif /* POLLING */ 328 329 /********************************************************************* 330 * FreeBSD Device Interface Entry Points 331 *********************************************************************/ 332 333 static device_method_t em_methods[] = { 334 /* Device interface */ 335 DEVMETHOD(device_probe, em_probe), 336 DEVMETHOD(device_attach, em_attach), 337 DEVMETHOD(device_detach, em_detach), 338 DEVMETHOD(device_shutdown, em_shutdown), 339 DEVMETHOD(device_suspend, em_suspend), 340 DEVMETHOD(device_resume, em_resume), 341 DEVMETHOD_END 342 }; 343 344 static driver_t em_driver = { 345 "em", em_methods, sizeof(struct adapter), 346 }; 347 348 devclass_t em_devclass; 349 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); 350 MODULE_DEPEND(em, pci, 1, 1, 1); 351 MODULE_DEPEND(em, ether, 1, 1, 1); 352 #ifdef DEV_NETMAP 353 MODULE_DEPEND(em, netmap, 1, 1, 1); 354 #endif /* DEV_NETMAP */ 355 356 /********************************************************************* 357 * Tunable default values. 358 *********************************************************************/ 359 360 #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) 361 #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) 362 #define M_TSO_LEN 66 363 364 #define MAX_INTS_PER_SEC 8000 365 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) 366 367 /* Allow common code without TSO */ 368 #ifndef CSUM_TSO 369 #define CSUM_TSO 0 370 #endif 371 372 #define TSO_WORKAROUND 4 373 374 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); 375 376 static int em_disable_crc_stripping = 0; 377 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, 378 &em_disable_crc_stripping, 0, "Disable CRC Stripping"); 379 380 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); 381 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); 382 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 383 0, "Default transmit interrupt delay in usecs"); 384 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 385 0, "Default receive interrupt delay in usecs"); 386 387 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); 388 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); 389 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, 390 &em_tx_abs_int_delay_dflt, 0, 391 "Default transmit interrupt delay limit in usecs"); 392 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, 393 &em_rx_abs_int_delay_dflt, 0, 394 "Default receive interrupt delay limit in usecs"); 395 396 static int em_rxd = EM_DEFAULT_RXD; 397 static int em_txd = EM_DEFAULT_TXD; 398 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0, 399 "Number of receive descriptors per queue"); 400 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0, 401 "Number of transmit descriptors per queue"); 402 403 static int em_smart_pwr_down = FALSE; 404 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 405 0, "Set to true to leave smart power down enabled on newer adapters"); 406 407 /* Controls whether promiscuous also shows bad packets */ 408 static int em_debug_sbp = FALSE; 409 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, 410 "Show bad packets in promiscuous mode"); 411 412 static int em_enable_msix = TRUE; 413 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0, 414 "Enable MSI-X interrupts"); 415 416 #ifdef EM_MULTIQUEUE 417 static int em_num_queues = 1; 418 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0, 419 "82574 only: Number of queues to configure, 0 indicates autoconfigure"); 420 #endif 421 422 /* 423 ** Global variable to store last used CPU when binding queues 424 ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a 425 ** queue is bound to a cpu. 426 */ 427 static int em_last_bind_cpu = -1; 428 429 /* How many packets rxeof tries to clean at a time */ 430 static int em_rx_process_limit = 100; 431 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, 432 &em_rx_process_limit, 0, 433 "Maximum number of received packets to process " 434 "at a time, -1 means unlimited"); 435 436 /* Energy efficient ethernet - default to OFF */ 437 static int eee_setting = 1; 438 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, 439 "Enable Energy Efficient Ethernet"); 440 441 /* Global used in WOL setup with multiport cards */ 442 static int global_quad_port_a = 0; 443 444 #ifdef DEV_NETMAP /* see ixgbe.c for details */ 445 #include <dev/netmap/if_em_netmap.h> 446 #endif /* DEV_NETMAP */ 447 448 /********************************************************************* 449 * Device identification routine 450 * 451 * em_probe determines if the driver should be loaded on 452 * adapter based on PCI vendor/device id of the adapter. 453 * 454 * return BUS_PROBE_DEFAULT on success, positive on failure 455 *********************************************************************/ 456 457 static int 458 em_probe(device_t dev) 459 { 460 char adapter_name[60]; 461 uint16_t pci_vendor_id = 0; 462 uint16_t pci_device_id = 0; 463 uint16_t pci_subvendor_id = 0; 464 uint16_t pci_subdevice_id = 0; 465 em_vendor_info_t *ent; 466 467 INIT_DEBUGOUT("em_probe: begin"); 468 469 pci_vendor_id = pci_get_vendor(dev); 470 if (pci_vendor_id != EM_VENDOR_ID) 471 return (ENXIO); 472 473 pci_device_id = pci_get_device(dev); 474 pci_subvendor_id = pci_get_subvendor(dev); 475 pci_subdevice_id = pci_get_subdevice(dev); 476 477 ent = em_vendor_info_array; 478 while (ent->vendor_id != 0) { 479 if ((pci_vendor_id == ent->vendor_id) && 480 (pci_device_id == ent->device_id) && 481 482 ((pci_subvendor_id == ent->subvendor_id) || 483 (ent->subvendor_id == PCI_ANY_ID)) && 484 485 ((pci_subdevice_id == ent->subdevice_id) || 486 (ent->subdevice_id == PCI_ANY_ID))) { 487 sprintf(adapter_name, "%s %s", 488 em_strings[ent->index], 489 em_driver_version); 490 device_set_desc_copy(dev, adapter_name); 491 return (BUS_PROBE_DEFAULT); 492 } 493 ent++; 494 } 495 496 return (ENXIO); 497 } 498 499 /********************************************************************* 500 * Device initialization routine 501 * 502 * The attach entry point is called when the driver is being loaded. 503 * This routine identifies the type of hardware, allocates all resources 504 * and initializes the hardware. 505 * 506 * return 0 on success, positive on failure 507 *********************************************************************/ 508 509 static int 510 em_attach(device_t dev) 511 { 512 struct adapter *adapter; 513 struct e1000_hw *hw; 514 int error = 0; 515 516 INIT_DEBUGOUT("em_attach: begin"); 517 518 if (resource_disabled("em", device_get_unit(dev))) { 519 device_printf(dev, "Disabled by device hint\n"); 520 return (ENXIO); 521 } 522 523 adapter = device_get_softc(dev); 524 adapter->dev = adapter->osdep.dev = dev; 525 hw = &adapter->hw; 526 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); 527 528 /* SYSCTL stuff */ 529 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 530 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 531 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 532 em_sysctl_nvm_info, "I", "NVM Information"); 533 534 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 535 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 536 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 537 em_sysctl_debug_info, "I", "Debug Information"); 538 539 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 540 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 541 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 542 em_set_flowcntl, "I", "Flow Control"); 543 544 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); 545 546 /* Determine hardware and mac info */ 547 em_identify_hardware(adapter); 548 549 /* Setup PCI resources */ 550 if (em_allocate_pci_resources(adapter)) { 551 device_printf(dev, "Allocation of PCI resources failed\n"); 552 error = ENXIO; 553 goto err_pci; 554 } 555 556 /* 557 ** For ICH8 and family we need to 558 ** map the flash memory, and this 559 ** must happen after the MAC is 560 ** identified 561 */ 562 if ((hw->mac.type == e1000_ich8lan) || 563 (hw->mac.type == e1000_ich9lan) || 564 (hw->mac.type == e1000_ich10lan) || 565 (hw->mac.type == e1000_pchlan) || 566 (hw->mac.type == e1000_pch2lan) || 567 (hw->mac.type == e1000_pch_lpt)) { 568 int rid = EM_BAR_TYPE_FLASH; 569 adapter->flash = bus_alloc_resource_any(dev, 570 SYS_RES_MEMORY, &rid, RF_ACTIVE); 571 if (adapter->flash == NULL) { 572 device_printf(dev, "Mapping of Flash failed\n"); 573 error = ENXIO; 574 goto err_pci; 575 } 576 /* This is used in the shared code */ 577 hw->flash_address = (u8 *)adapter->flash; 578 adapter->osdep.flash_bus_space_tag = 579 rman_get_bustag(adapter->flash); 580 adapter->osdep.flash_bus_space_handle = 581 rman_get_bushandle(adapter->flash); 582 } 583 /* 584 ** In the new SPT device flash is not a 585 ** separate BAR, rather it is also in BAR0, 586 ** so use the same tag and an offset handle for the 587 ** FLASH read/write macros in the shared code. 588 */ 589 else if (hw->mac.type == e1000_pch_spt) { 590 adapter->osdep.flash_bus_space_tag = 591 adapter->osdep.mem_bus_space_tag; 592 adapter->osdep.flash_bus_space_handle = 593 adapter->osdep.mem_bus_space_handle 594 + E1000_FLASH_BASE_ADDR; 595 } 596 597 /* Do Shared Code initialization */ 598 error = e1000_setup_init_funcs(hw, TRUE); 599 if (error) { 600 device_printf(dev, "Setup of Shared code failed, error %d\n", 601 error); 602 error = ENXIO; 603 goto err_pci; 604 } 605 606 /* 607 * Setup MSI/X or MSI if PCI Express 608 */ 609 adapter->msix = em_setup_msix(adapter); 610 611 e1000_get_bus_info(hw); 612 613 /* Set up some sysctls for the tunable interrupt delays */ 614 em_add_int_delay_sysctl(adapter, "rx_int_delay", 615 "receive interrupt delay in usecs", &adapter->rx_int_delay, 616 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); 617 em_add_int_delay_sysctl(adapter, "tx_int_delay", 618 "transmit interrupt delay in usecs", &adapter->tx_int_delay, 619 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); 620 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", 621 "receive interrupt delay limit in usecs", 622 &adapter->rx_abs_int_delay, 623 E1000_REGISTER(hw, E1000_RADV), 624 em_rx_abs_int_delay_dflt); 625 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", 626 "transmit interrupt delay limit in usecs", 627 &adapter->tx_abs_int_delay, 628 E1000_REGISTER(hw, E1000_TADV), 629 em_tx_abs_int_delay_dflt); 630 em_add_int_delay_sysctl(adapter, "itr", 631 "interrupt delay limit in usecs/4", 632 &adapter->tx_itr, 633 E1000_REGISTER(hw, E1000_ITR), 634 DEFAULT_ITR); 635 636 /* Sysctl for limiting the amount of work done in the taskqueue */ 637 em_set_sysctl_value(adapter, "rx_processing_limit", 638 "max number of rx packets to process", &adapter->rx_process_limit, 639 em_rx_process_limit); 640 641 /* 642 * Validate number of transmit and receive descriptors. It 643 * must not exceed hardware maximum, and must be multiple 644 * of E1000_DBA_ALIGN. 645 */ 646 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || 647 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) { 648 device_printf(dev, "Using %d TX descriptors instead of %d!\n", 649 EM_DEFAULT_TXD, em_txd); 650 adapter->num_tx_desc = EM_DEFAULT_TXD; 651 } else 652 adapter->num_tx_desc = em_txd; 653 654 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 || 655 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { 656 device_printf(dev, "Using %d RX descriptors instead of %d!\n", 657 EM_DEFAULT_RXD, em_rxd); 658 adapter->num_rx_desc = EM_DEFAULT_RXD; 659 } else 660 adapter->num_rx_desc = em_rxd; 661 662 hw->mac.autoneg = DO_AUTO_NEG; 663 hw->phy.autoneg_wait_to_complete = FALSE; 664 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 665 666 /* Copper options */ 667 if (hw->phy.media_type == e1000_media_type_copper) { 668 hw->phy.mdix = AUTO_ALL_MODES; 669 hw->phy.disable_polarity_correction = FALSE; 670 hw->phy.ms_type = EM_MASTER_SLAVE; 671 } 672 673 /* 674 * Set the frame limits assuming 675 * standard ethernet sized frames. 676 */ 677 adapter->hw.mac.max_frame_size = 678 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; 679 680 /* 681 * This controls when hardware reports transmit completion 682 * status. 683 */ 684 hw->mac.report_tx_early = 1; 685 686 /* 687 ** Get queue/ring memory 688 */ 689 if (em_allocate_queues(adapter)) { 690 error = ENOMEM; 691 goto err_pci; 692 } 693 694 /* Allocate multicast array memory. */ 695 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * 696 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); 697 if (adapter->mta == NULL) { 698 device_printf(dev, "Can not allocate multicast setup array\n"); 699 error = ENOMEM; 700 goto err_late; 701 } 702 703 /* Check SOL/IDER usage */ 704 if (e1000_check_reset_block(hw)) 705 device_printf(dev, "PHY reset is blocked" 706 " due to SOL/IDER session.\n"); 707 708 /* Sysctl for setting Energy Efficient Ethernet */ 709 hw->dev_spec.ich8lan.eee_disable = eee_setting; 710 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 711 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 712 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, 713 adapter, 0, em_sysctl_eee, "I", 714 "Disable Energy Efficient Ethernet"); 715 716 /* 717 ** Start from a known state, this is 718 ** important in reading the nvm and 719 ** mac from that. 720 */ 721 e1000_reset_hw(hw); 722 723 724 /* Make sure we have a good EEPROM before we read from it */ 725 if (e1000_validate_nvm_checksum(hw) < 0) { 726 /* 727 ** Some PCI-E parts fail the first check due to 728 ** the link being in sleep state, call it again, 729 ** if it fails a second time its a real issue. 730 */ 731 if (e1000_validate_nvm_checksum(hw) < 0) { 732 device_printf(dev, 733 "The EEPROM Checksum Is Not Valid\n"); 734 error = EIO; 735 goto err_late; 736 } 737 } 738 739 /* Copy the permanent MAC address out of the EEPROM */ 740 if (e1000_read_mac_addr(hw) < 0) { 741 device_printf(dev, "EEPROM read error while reading MAC" 742 " address\n"); 743 error = EIO; 744 goto err_late; 745 } 746 747 if (!em_is_valid_ether_addr(hw->mac.addr)) { 748 device_printf(dev, "Invalid MAC address\n"); 749 error = EIO; 750 goto err_late; 751 } 752 753 /* Disable ULP support */ 754 e1000_disable_ulp_lpt_lp(hw, TRUE); 755 756 /* 757 ** Do interrupt configuration 758 */ 759 if (adapter->msix > 1) /* Do MSIX */ 760 error = em_allocate_msix(adapter); 761 else /* MSI or Legacy */ 762 error = em_allocate_legacy(adapter); 763 if (error) 764 goto err_late; 765 766 /* 767 * Get Wake-on-Lan and Management info for later use 768 */ 769 em_get_wakeup(dev); 770 771 /* Setup OS specific network interface */ 772 if (em_setup_interface(dev, adapter) != 0) 773 goto err_late; 774 775 em_reset(adapter); 776 777 /* Initialize statistics */ 778 em_update_stats_counters(adapter); 779 780 hw->mac.get_link_status = 1; 781 em_update_link_status(adapter); 782 783 /* Register for VLAN events */ 784 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 785 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 786 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 787 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 788 789 em_add_hw_stats(adapter); 790 791 /* Non-AMT based hardware can now take control from firmware */ 792 if (adapter->has_manage && !adapter->has_amt) 793 em_get_hw_control(adapter); 794 795 /* Tell the stack that the interface is not active */ 796 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 797 798 adapter->led_dev = led_create(em_led_func, adapter, 799 device_get_nameunit(dev)); 800 #ifdef DEV_NETMAP 801 em_netmap_attach(adapter); 802 #endif /* DEV_NETMAP */ 803 804 INIT_DEBUGOUT("em_attach: end"); 805 806 return (0); 807 808 err_late: 809 em_free_transmit_structures(adapter); 810 em_free_receive_structures(adapter); 811 em_release_hw_control(adapter); 812 if (adapter->ifp != (void *)NULL) 813 if_free(adapter->ifp); 814 err_pci: 815 em_free_pci_resources(adapter); 816 free(adapter->mta, M_DEVBUF); 817 EM_CORE_LOCK_DESTROY(adapter); 818 819 return (error); 820 } 821 822 /********************************************************************* 823 * Device removal routine 824 * 825 * The detach entry point is called when the driver is being removed. 826 * This routine stops the adapter and deallocates all the resources 827 * that were allocated for driver operation. 828 * 829 * return 0 on success, positive on failure 830 *********************************************************************/ 831 832 static int 833 em_detach(device_t dev) 834 { 835 struct adapter *adapter = device_get_softc(dev); 836 if_t ifp = adapter->ifp; 837 838 INIT_DEBUGOUT("em_detach: begin"); 839 840 /* Make sure VLANS are not using driver */ 841 if (if_vlantrunkinuse(ifp)) { 842 device_printf(dev,"Vlan in use, detach first\n"); 843 return (EBUSY); 844 } 845 846 #ifdef DEVICE_POLLING 847 if (if_getcapenable(ifp) & IFCAP_POLLING) 848 ether_poll_deregister(ifp); 849 #endif 850 851 if (adapter->led_dev != NULL) 852 led_destroy(adapter->led_dev); 853 854 EM_CORE_LOCK(adapter); 855 adapter->in_detach = 1; 856 em_stop(adapter); 857 EM_CORE_UNLOCK(adapter); 858 EM_CORE_LOCK_DESTROY(adapter); 859 860 e1000_phy_hw_reset(&adapter->hw); 861 862 em_release_manageability(adapter); 863 em_release_hw_control(adapter); 864 865 /* Unregister VLAN events */ 866 if (adapter->vlan_attach != NULL) 867 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); 868 if (adapter->vlan_detach != NULL) 869 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 870 871 ether_ifdetach(adapter->ifp); 872 callout_drain(&adapter->timer); 873 874 #ifdef DEV_NETMAP 875 netmap_detach(ifp); 876 #endif /* DEV_NETMAP */ 877 878 em_free_pci_resources(adapter); 879 bus_generic_detach(dev); 880 if_free(ifp); 881 882 em_free_transmit_structures(adapter); 883 em_free_receive_structures(adapter); 884 885 em_release_hw_control(adapter); 886 free(adapter->mta, M_DEVBUF); 887 888 return (0); 889 } 890 891 /********************************************************************* 892 * 893 * Shutdown entry point 894 * 895 **********************************************************************/ 896 897 static int 898 em_shutdown(device_t dev) 899 { 900 return em_suspend(dev); 901 } 902 903 /* 904 * Suspend/resume device methods. 905 */ 906 static int 907 em_suspend(device_t dev) 908 { 909 struct adapter *adapter = device_get_softc(dev); 910 911 EM_CORE_LOCK(adapter); 912 913 em_release_manageability(adapter); 914 em_release_hw_control(adapter); 915 em_enable_wakeup(dev); 916 917 EM_CORE_UNLOCK(adapter); 918 919 return bus_generic_suspend(dev); 920 } 921 922 static int 923 em_resume(device_t dev) 924 { 925 struct adapter *adapter = device_get_softc(dev); 926 struct tx_ring *txr = adapter->tx_rings; 927 if_t ifp = adapter->ifp; 928 929 EM_CORE_LOCK(adapter); 930 if (adapter->hw.mac.type == e1000_pch2lan) 931 e1000_resume_workarounds_pchlan(&adapter->hw); 932 em_init_locked(adapter); 933 em_init_manageability(adapter); 934 935 if ((if_getflags(ifp) & IFF_UP) && 936 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) { 937 for (int i = 0; i < adapter->num_queues; i++, txr++) { 938 EM_TX_LOCK(txr); 939 #ifdef EM_MULTIQUEUE 940 if (!drbr_empty(ifp, txr->br)) 941 em_mq_start_locked(ifp, txr); 942 #else 943 if (!if_sendq_empty(ifp)) 944 em_start_locked(ifp, txr); 945 #endif 946 EM_TX_UNLOCK(txr); 947 } 948 } 949 EM_CORE_UNLOCK(adapter); 950 951 return bus_generic_resume(dev); 952 } 953 954 955 #ifndef EM_MULTIQUEUE 956 static void 957 em_start_locked(if_t ifp, struct tx_ring *txr) 958 { 959 struct adapter *adapter = if_getsoftc(ifp); 960 struct mbuf *m_head; 961 962 EM_TX_LOCK_ASSERT(txr); 963 964 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 965 IFF_DRV_RUNNING) 966 return; 967 968 if (!adapter->link_active) 969 return; 970 971 while (!if_sendq_empty(ifp)) { 972 /* Call cleanup if number of TX descriptors low */ 973 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) 974 em_txeof(txr); 975 if (txr->tx_avail < EM_MAX_SCATTER) { 976 if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0); 977 break; 978 } 979 m_head = if_dequeue(ifp); 980 if (m_head == NULL) 981 break; 982 /* 983 * Encapsulation can modify our pointer, and or make it 984 * NULL on failure. In that event, we can't requeue. 985 */ 986 if (em_xmit(txr, &m_head)) { 987 if (m_head == NULL) 988 break; 989 if_sendq_prepend(ifp, m_head); 990 break; 991 } 992 993 /* Mark the queue as having work */ 994 if (txr->busy == EM_TX_IDLE) 995 txr->busy = EM_TX_BUSY; 996 997 /* Send a copy of the frame to the BPF listener */ 998 ETHER_BPF_MTAP(ifp, m_head); 999 1000 } 1001 1002 return; 1003 } 1004 1005 static void 1006 em_start(if_t ifp) 1007 { 1008 struct adapter *adapter = if_getsoftc(ifp); 1009 struct tx_ring *txr = adapter->tx_rings; 1010 1011 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1012 EM_TX_LOCK(txr); 1013 em_start_locked(ifp, txr); 1014 EM_TX_UNLOCK(txr); 1015 } 1016 return; 1017 } 1018 #else /* EM_MULTIQUEUE */ 1019 /********************************************************************* 1020 * Multiqueue Transmit routines 1021 * 1022 * em_mq_start is called by the stack to initiate a transmit. 1023 * however, if busy the driver can queue the request rather 1024 * than do an immediate send. It is this that is an advantage 1025 * in this driver, rather than also having multiple tx queues. 1026 **********************************************************************/ 1027 /* 1028 ** Multiqueue capable stack interface 1029 */ 1030 static int 1031 em_mq_start(if_t ifp, struct mbuf *m) 1032 { 1033 struct adapter *adapter = if_getsoftc(ifp); 1034 struct tx_ring *txr = adapter->tx_rings; 1035 unsigned int i, error; 1036 1037 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 1038 i = m->m_pkthdr.flowid % adapter->num_queues; 1039 else 1040 i = curcpu % adapter->num_queues; 1041 1042 txr = &adapter->tx_rings[i]; 1043 1044 error = drbr_enqueue(ifp, txr->br, m); 1045 if (error) 1046 return (error); 1047 1048 if (EM_TX_TRYLOCK(txr)) { 1049 em_mq_start_locked(ifp, txr); 1050 EM_TX_UNLOCK(txr); 1051 } else 1052 taskqueue_enqueue(txr->tq, &txr->tx_task); 1053 1054 return (0); 1055 } 1056 1057 static int 1058 em_mq_start_locked(if_t ifp, struct tx_ring *txr) 1059 { 1060 struct adapter *adapter = txr->adapter; 1061 struct mbuf *next; 1062 int err = 0, enq = 0; 1063 1064 EM_TX_LOCK_ASSERT(txr); 1065 1066 if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) || 1067 adapter->link_active == 0) { 1068 return (ENETDOWN); 1069 } 1070 1071 /* Process the queue */ 1072 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 1073 if ((err = em_xmit(txr, &next)) != 0) { 1074 if (next == NULL) { 1075 /* It was freed, move forward */ 1076 drbr_advance(ifp, txr->br); 1077 } else { 1078 /* 1079 * Still have one left, it may not be 1080 * the same since the transmit function 1081 * may have changed it. 1082 */ 1083 drbr_putback(ifp, txr->br, next); 1084 } 1085 break; 1086 } 1087 drbr_advance(ifp, txr->br); 1088 enq++; 1089 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); 1090 if (next->m_flags & M_MCAST) 1091 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 1092 ETHER_BPF_MTAP(ifp, next); 1093 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) 1094 break; 1095 } 1096 1097 /* Mark the queue as having work */ 1098 if ((enq > 0) && (txr->busy == EM_TX_IDLE)) 1099 txr->busy = EM_TX_BUSY; 1100 1101 if (txr->tx_avail < EM_MAX_SCATTER) 1102 em_txeof(txr); 1103 if (txr->tx_avail < EM_MAX_SCATTER) { 1104 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0); 1105 } 1106 return (err); 1107 } 1108 1109 /* 1110 ** Flush all ring buffers 1111 */ 1112 static void 1113 em_qflush(if_t ifp) 1114 { 1115 struct adapter *adapter = if_getsoftc(ifp); 1116 struct tx_ring *txr = adapter->tx_rings; 1117 struct mbuf *m; 1118 1119 for (int i = 0; i < adapter->num_queues; i++, txr++) { 1120 EM_TX_LOCK(txr); 1121 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 1122 m_freem(m); 1123 EM_TX_UNLOCK(txr); 1124 } 1125 if_qflush(ifp); 1126 } 1127 #endif /* EM_MULTIQUEUE */ 1128 1129 /********************************************************************* 1130 * Ioctl entry point 1131 * 1132 * em_ioctl is called when the user wants to configure the 1133 * interface. 1134 * 1135 * return 0 on success, positive on failure 1136 **********************************************************************/ 1137 1138 static int 1139 em_ioctl(if_t ifp, u_long command, caddr_t data) 1140 { 1141 struct adapter *adapter = if_getsoftc(ifp); 1142 struct ifreq *ifr = (struct ifreq *)data; 1143 #if defined(INET) || defined(INET6) 1144 struct ifaddr *ifa = (struct ifaddr *)data; 1145 #endif 1146 bool avoid_reset = FALSE; 1147 int error = 0; 1148 1149 if (adapter->in_detach) 1150 return (error); 1151 1152 switch (command) { 1153 case SIOCSIFADDR: 1154 #ifdef INET 1155 if (ifa->ifa_addr->sa_family == AF_INET) 1156 avoid_reset = TRUE; 1157 #endif 1158 #ifdef INET6 1159 if (ifa->ifa_addr->sa_family == AF_INET6) 1160 avoid_reset = TRUE; 1161 #endif 1162 /* 1163 ** Calling init results in link renegotiation, 1164 ** so we avoid doing it when possible. 1165 */ 1166 if (avoid_reset) { 1167 if_setflagbits(ifp,IFF_UP,0); 1168 if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) 1169 em_init(adapter); 1170 #ifdef INET 1171 if (!(if_getflags(ifp) & IFF_NOARP)) 1172 arp_ifinit(ifp, ifa); 1173 #endif 1174 } else 1175 error = ether_ioctl(ifp, command, data); 1176 break; 1177 case SIOCSIFMTU: 1178 { 1179 int max_frame_size; 1180 1181 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); 1182 1183 EM_CORE_LOCK(adapter); 1184 switch (adapter->hw.mac.type) { 1185 case e1000_82571: 1186 case e1000_82572: 1187 case e1000_ich9lan: 1188 case e1000_ich10lan: 1189 case e1000_pch2lan: 1190 case e1000_pch_lpt: 1191 case e1000_pch_spt: 1192 case e1000_82574: 1193 case e1000_82583: 1194 case e1000_80003es2lan: /* 9K Jumbo Frame size */ 1195 max_frame_size = 9234; 1196 break; 1197 case e1000_pchlan: 1198 max_frame_size = 4096; 1199 break; 1200 /* Adapters that do not support jumbo frames */ 1201 case e1000_ich8lan: 1202 max_frame_size = ETHER_MAX_LEN; 1203 break; 1204 default: 1205 max_frame_size = MAX_JUMBO_FRAME_SIZE; 1206 } 1207 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 1208 ETHER_CRC_LEN) { 1209 EM_CORE_UNLOCK(adapter); 1210 error = EINVAL; 1211 break; 1212 } 1213 1214 if_setmtu(ifp, ifr->ifr_mtu); 1215 adapter->hw.mac.max_frame_size = 1216 if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN; 1217 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) 1218 em_init_locked(adapter); 1219 EM_CORE_UNLOCK(adapter); 1220 break; 1221 } 1222 case SIOCSIFFLAGS: 1223 IOCTL_DEBUGOUT("ioctl rcv'd:\ 1224 SIOCSIFFLAGS (Set Interface Flags)"); 1225 EM_CORE_LOCK(adapter); 1226 if (if_getflags(ifp) & IFF_UP) { 1227 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1228 if ((if_getflags(ifp) ^ adapter->if_flags) & 1229 (IFF_PROMISC | IFF_ALLMULTI)) { 1230 em_disable_promisc(adapter); 1231 em_set_promisc(adapter); 1232 } 1233 } else 1234 em_init_locked(adapter); 1235 } else 1236 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) 1237 em_stop(adapter); 1238 adapter->if_flags = if_getflags(ifp); 1239 EM_CORE_UNLOCK(adapter); 1240 break; 1241 case SIOCADDMULTI: 1242 case SIOCDELMULTI: 1243 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); 1244 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1245 EM_CORE_LOCK(adapter); 1246 em_disable_intr(adapter); 1247 em_set_multi(adapter); 1248 #ifdef DEVICE_POLLING 1249 if (!(if_getcapenable(ifp) & IFCAP_POLLING)) 1250 #endif 1251 em_enable_intr(adapter); 1252 EM_CORE_UNLOCK(adapter); 1253 } 1254 break; 1255 case SIOCSIFMEDIA: 1256 /* Check SOL/IDER usage */ 1257 EM_CORE_LOCK(adapter); 1258 if (e1000_check_reset_block(&adapter->hw)) { 1259 EM_CORE_UNLOCK(adapter); 1260 device_printf(adapter->dev, "Media change is" 1261 " blocked due to SOL/IDER session.\n"); 1262 break; 1263 } 1264 EM_CORE_UNLOCK(adapter); 1265 /* falls thru */ 1266 case SIOCGIFMEDIA: 1267 IOCTL_DEBUGOUT("ioctl rcv'd: \ 1268 SIOCxIFMEDIA (Get/Set Interface Media)"); 1269 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); 1270 break; 1271 case SIOCSIFCAP: 1272 { 1273 int mask, reinit; 1274 1275 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); 1276 reinit = 0; 1277 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); 1278 #ifdef DEVICE_POLLING 1279 if (mask & IFCAP_POLLING) { 1280 if (ifr->ifr_reqcap & IFCAP_POLLING) { 1281 error = ether_poll_register(em_poll, ifp); 1282 if (error) 1283 return (error); 1284 EM_CORE_LOCK(adapter); 1285 em_disable_intr(adapter); 1286 if_setcapenablebit(ifp, IFCAP_POLLING, 0); 1287 EM_CORE_UNLOCK(adapter); 1288 } else { 1289 error = ether_poll_deregister(ifp); 1290 /* Enable interrupt even in error case */ 1291 EM_CORE_LOCK(adapter); 1292 em_enable_intr(adapter); 1293 if_setcapenablebit(ifp, 0, IFCAP_POLLING); 1294 EM_CORE_UNLOCK(adapter); 1295 } 1296 } 1297 #endif 1298 if (mask & IFCAP_HWCSUM) { 1299 if_togglecapenable(ifp,IFCAP_HWCSUM); 1300 reinit = 1; 1301 } 1302 if (mask & IFCAP_TSO4) { 1303 if_togglecapenable(ifp,IFCAP_TSO4); 1304 reinit = 1; 1305 } 1306 if (mask & IFCAP_VLAN_HWTAGGING) { 1307 if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING); 1308 reinit = 1; 1309 } 1310 if (mask & IFCAP_VLAN_HWFILTER) { 1311 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER); 1312 reinit = 1; 1313 } 1314 if (mask & IFCAP_VLAN_HWTSO) { 1315 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); 1316 reinit = 1; 1317 } 1318 if ((mask & IFCAP_WOL) && 1319 (if_getcapabilities(ifp) & IFCAP_WOL) != 0) { 1320 if (mask & IFCAP_WOL_MCAST) 1321 if_togglecapenable(ifp, IFCAP_WOL_MCAST); 1322 if (mask & IFCAP_WOL_MAGIC) 1323 if_togglecapenable(ifp, IFCAP_WOL_MAGIC); 1324 } 1325 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 1326 em_init(adapter); 1327 if_vlancap(ifp); 1328 break; 1329 } 1330 1331 default: 1332 error = ether_ioctl(ifp, command, data); 1333 break; 1334 } 1335 1336 return (error); 1337 } 1338 1339 1340 /********************************************************************* 1341 * Init entry point 1342 * 1343 * This routine is used in two ways. It is used by the stack as 1344 * init entry point in network interface structure. It is also used 1345 * by the driver as a hw/sw initialization routine to get to a 1346 * consistent state. 1347 * 1348 * return 0 on success, positive on failure 1349 **********************************************************************/ 1350 1351 static void 1352 em_init_locked(struct adapter *adapter) 1353 { 1354 if_t ifp = adapter->ifp; 1355 device_t dev = adapter->dev; 1356 1357 INIT_DEBUGOUT("em_init: begin"); 1358 1359 EM_CORE_LOCK_ASSERT(adapter); 1360 1361 em_disable_intr(adapter); 1362 callout_stop(&adapter->timer); 1363 1364 /* Get the latest mac address, User can use a LAA */ 1365 bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr, 1366 ETHER_ADDR_LEN); 1367 1368 /* Put the address into the Receive Address Array */ 1369 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); 1370 1371 /* 1372 * With the 82571 adapter, RAR[0] may be overwritten 1373 * when the other port is reset, we make a duplicate 1374 * in RAR[14] for that eventuality, this assures 1375 * the interface continues to function. 1376 */ 1377 if (adapter->hw.mac.type == e1000_82571) { 1378 e1000_set_laa_state_82571(&adapter->hw, TRUE); 1379 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 1380 E1000_RAR_ENTRIES - 1); 1381 } 1382 1383 /* Initialize the hardware */ 1384 em_reset(adapter); 1385 em_update_link_status(adapter); 1386 1387 /* Setup VLAN support, basic and offload if available */ 1388 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); 1389 1390 /* Set hardware offload abilities */ 1391 if_clearhwassist(ifp); 1392 if (if_getcapenable(ifp) & IFCAP_TXCSUM) 1393 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0); 1394 /* 1395 ** There have proven to be problems with TSO when not 1396 ** at full gigabit speed, so disable the assist automatically 1397 ** when at lower speeds. -jfv 1398 */ 1399 if (if_getcapenable(ifp) & IFCAP_TSO4) { 1400 if (adapter->link_speed == SPEED_1000) 1401 if_sethwassistbits(ifp, CSUM_TSO, 0); 1402 } 1403 1404 /* Configure for OS presence */ 1405 em_init_manageability(adapter); 1406 1407 /* Prepare transmit descriptors and buffers */ 1408 em_setup_transmit_structures(adapter); 1409 em_initialize_transmit_unit(adapter); 1410 1411 /* Setup Multicast table */ 1412 em_set_multi(adapter); 1413 1414 /* 1415 ** Figure out the desired mbuf 1416 ** pool for doing jumbos 1417 */ 1418 if (adapter->hw.mac.max_frame_size <= 2048) 1419 adapter->rx_mbuf_sz = MCLBYTES; 1420 else if (adapter->hw.mac.max_frame_size <= 4096) 1421 adapter->rx_mbuf_sz = MJUMPAGESIZE; 1422 else 1423 adapter->rx_mbuf_sz = MJUM9BYTES; 1424 1425 /* Prepare receive descriptors and buffers */ 1426 if (em_setup_receive_structures(adapter)) { 1427 device_printf(dev, "Could not setup receive structures\n"); 1428 em_stop(adapter); 1429 return; 1430 } 1431 em_initialize_receive_unit(adapter); 1432 1433 /* Use real VLAN Filter support? */ 1434 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { 1435 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 1436 /* Use real VLAN Filter support */ 1437 em_setup_vlan_hw_support(adapter); 1438 else { 1439 u32 ctrl; 1440 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); 1441 ctrl |= E1000_CTRL_VME; 1442 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); 1443 } 1444 } 1445 1446 /* Don't lose promiscuous settings */ 1447 em_set_promisc(adapter); 1448 1449 /* Set the interface as ACTIVE */ 1450 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 1451 1452 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 1453 e1000_clear_hw_cntrs_base_generic(&adapter->hw); 1454 1455 /* MSI/X configuration for 82574 */ 1456 if (adapter->hw.mac.type == e1000_82574) { 1457 int tmp; 1458 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 1459 tmp |= E1000_CTRL_EXT_PBA_CLR; 1460 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); 1461 /* Set the IVAR - interrupt vector routing. */ 1462 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); 1463 } 1464 1465 #ifdef DEVICE_POLLING 1466 /* 1467 * Only enable interrupts if we are not polling, make sure 1468 * they are off otherwise. 1469 */ 1470 if (if_getcapenable(ifp) & IFCAP_POLLING) 1471 em_disable_intr(adapter); 1472 else 1473 #endif /* DEVICE_POLLING */ 1474 em_enable_intr(adapter); 1475 1476 /* AMT based hardware can now take control from firmware */ 1477 if (adapter->has_manage && adapter->has_amt) 1478 em_get_hw_control(adapter); 1479 } 1480 1481 static void 1482 em_init(void *arg) 1483 { 1484 struct adapter *adapter = arg; 1485 1486 EM_CORE_LOCK(adapter); 1487 em_init_locked(adapter); 1488 EM_CORE_UNLOCK(adapter); 1489 } 1490 1491 1492 #ifdef DEVICE_POLLING 1493 /********************************************************************* 1494 * 1495 * Legacy polling routine: note this only works with single queue 1496 * 1497 *********************************************************************/ 1498 static int 1499 em_poll(if_t ifp, enum poll_cmd cmd, int count) 1500 { 1501 struct adapter *adapter = if_getsoftc(ifp); 1502 struct tx_ring *txr = adapter->tx_rings; 1503 struct rx_ring *rxr = adapter->rx_rings; 1504 u32 reg_icr; 1505 int rx_done; 1506 1507 EM_CORE_LOCK(adapter); 1508 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 1509 EM_CORE_UNLOCK(adapter); 1510 return (0); 1511 } 1512 1513 if (cmd == POLL_AND_CHECK_STATUS) { 1514 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1515 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1516 callout_stop(&adapter->timer); 1517 adapter->hw.mac.get_link_status = 1; 1518 em_update_link_status(adapter); 1519 callout_reset(&adapter->timer, hz, 1520 em_local_timer, adapter); 1521 } 1522 } 1523 EM_CORE_UNLOCK(adapter); 1524 1525 em_rxeof(rxr, count, &rx_done); 1526 1527 EM_TX_LOCK(txr); 1528 em_txeof(txr); 1529 #ifdef EM_MULTIQUEUE 1530 if (!drbr_empty(ifp, txr->br)) 1531 em_mq_start_locked(ifp, txr); 1532 #else 1533 if (!if_sendq_empty(ifp)) 1534 em_start_locked(ifp, txr); 1535 #endif 1536 EM_TX_UNLOCK(txr); 1537 1538 return (rx_done); 1539 } 1540 #endif /* DEVICE_POLLING */ 1541 1542 1543 /********************************************************************* 1544 * 1545 * Fast Legacy/MSI Combined Interrupt Service routine 1546 * 1547 *********************************************************************/ 1548 static int 1549 em_irq_fast(void *arg) 1550 { 1551 struct adapter *adapter = arg; 1552 if_t ifp; 1553 u32 reg_icr; 1554 1555 ifp = adapter->ifp; 1556 1557 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1558 1559 /* Hot eject? */ 1560 if (reg_icr == 0xffffffff) 1561 return FILTER_STRAY; 1562 1563 /* Definitely not our interrupt. */ 1564 if (reg_icr == 0x0) 1565 return FILTER_STRAY; 1566 1567 /* 1568 * Starting with the 82571 chip, bit 31 should be used to 1569 * determine whether the interrupt belongs to us. 1570 */ 1571 if (adapter->hw.mac.type >= e1000_82571 && 1572 (reg_icr & E1000_ICR_INT_ASSERTED) == 0) 1573 return FILTER_STRAY; 1574 1575 em_disable_intr(adapter); 1576 taskqueue_enqueue(adapter->tq, &adapter->que_task); 1577 1578 /* Link status change */ 1579 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1580 adapter->hw.mac.get_link_status = 1; 1581 taskqueue_enqueue(taskqueue_fast, &adapter->link_task); 1582 } 1583 1584 if (reg_icr & E1000_ICR_RXO) 1585 adapter->rx_overruns++; 1586 return FILTER_HANDLED; 1587 } 1588 1589 /* Combined RX/TX handler, used by Legacy and MSI */ 1590 static void 1591 em_handle_que(void *context, int pending) 1592 { 1593 struct adapter *adapter = context; 1594 if_t ifp = adapter->ifp; 1595 struct tx_ring *txr = adapter->tx_rings; 1596 struct rx_ring *rxr = adapter->rx_rings; 1597 1598 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1599 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1600 1601 EM_TX_LOCK(txr); 1602 em_txeof(txr); 1603 #ifdef EM_MULTIQUEUE 1604 if (!drbr_empty(ifp, txr->br)) 1605 em_mq_start_locked(ifp, txr); 1606 #else 1607 if (!if_sendq_empty(ifp)) 1608 em_start_locked(ifp, txr); 1609 #endif 1610 EM_TX_UNLOCK(txr); 1611 if (more) { 1612 taskqueue_enqueue(adapter->tq, &adapter->que_task); 1613 return; 1614 } 1615 } 1616 1617 em_enable_intr(adapter); 1618 return; 1619 } 1620 1621 1622 /********************************************************************* 1623 * 1624 * MSIX Interrupt Service Routines 1625 * 1626 **********************************************************************/ 1627 static void 1628 em_msix_tx(void *arg) 1629 { 1630 struct tx_ring *txr = arg; 1631 struct adapter *adapter = txr->adapter; 1632 if_t ifp = adapter->ifp; 1633 1634 ++txr->tx_irq; 1635 EM_TX_LOCK(txr); 1636 em_txeof(txr); 1637 #ifdef EM_MULTIQUEUE 1638 if (!drbr_empty(ifp, txr->br)) 1639 em_mq_start_locked(ifp, txr); 1640 #else 1641 if (!if_sendq_empty(ifp)) 1642 em_start_locked(ifp, txr); 1643 #endif 1644 1645 /* Reenable this interrupt */ 1646 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); 1647 EM_TX_UNLOCK(txr); 1648 return; 1649 } 1650 1651 /********************************************************************* 1652 * 1653 * MSIX RX Interrupt Service routine 1654 * 1655 **********************************************************************/ 1656 1657 static void 1658 em_msix_rx(void *arg) 1659 { 1660 struct rx_ring *rxr = arg; 1661 struct adapter *adapter = rxr->adapter; 1662 bool more; 1663 1664 ++rxr->rx_irq; 1665 if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)) 1666 return; 1667 more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1668 if (more) 1669 taskqueue_enqueue(rxr->tq, &rxr->rx_task); 1670 else { 1671 /* Reenable this interrupt */ 1672 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); 1673 } 1674 return; 1675 } 1676 1677 /********************************************************************* 1678 * 1679 * MSIX Link Fast Interrupt Service routine 1680 * 1681 **********************************************************************/ 1682 static void 1683 em_msix_link(void *arg) 1684 { 1685 struct adapter *adapter = arg; 1686 u32 reg_icr; 1687 1688 ++adapter->link_irq; 1689 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1690 1691 if (reg_icr & E1000_ICR_RXO) 1692 adapter->rx_overruns++; 1693 1694 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1695 adapter->hw.mac.get_link_status = 1; 1696 em_handle_link(adapter, 0); 1697 } else 1698 E1000_WRITE_REG(&adapter->hw, E1000_IMS, 1699 EM_MSIX_LINK | E1000_IMS_LSC); 1700 /* 1701 ** Because we must read the ICR for this interrupt 1702 ** it may clear other causes using autoclear, for 1703 ** this reason we simply create a soft interrupt 1704 ** for all these vectors. 1705 */ 1706 if (reg_icr) { 1707 E1000_WRITE_REG(&adapter->hw, 1708 E1000_ICS, adapter->ims); 1709 } 1710 return; 1711 } 1712 1713 static void 1714 em_handle_rx(void *context, int pending) 1715 { 1716 struct rx_ring *rxr = context; 1717 struct adapter *adapter = rxr->adapter; 1718 bool more; 1719 1720 more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1721 if (more) 1722 taskqueue_enqueue(rxr->tq, &rxr->rx_task); 1723 else { 1724 /* Reenable this interrupt */ 1725 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); 1726 } 1727 } 1728 1729 static void 1730 em_handle_tx(void *context, int pending) 1731 { 1732 struct tx_ring *txr = context; 1733 struct adapter *adapter = txr->adapter; 1734 if_t ifp = adapter->ifp; 1735 1736 EM_TX_LOCK(txr); 1737 em_txeof(txr); 1738 #ifdef EM_MULTIQUEUE 1739 if (!drbr_empty(ifp, txr->br)) 1740 em_mq_start_locked(ifp, txr); 1741 #else 1742 if (!if_sendq_empty(ifp)) 1743 em_start_locked(ifp, txr); 1744 #endif 1745 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); 1746 EM_TX_UNLOCK(txr); 1747 } 1748 1749 static void 1750 em_handle_link(void *context, int pending) 1751 { 1752 struct adapter *adapter = context; 1753 struct tx_ring *txr = adapter->tx_rings; 1754 if_t ifp = adapter->ifp; 1755 1756 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 1757 return; 1758 1759 EM_CORE_LOCK(adapter); 1760 callout_stop(&adapter->timer); 1761 em_update_link_status(adapter); 1762 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 1763 E1000_WRITE_REG(&adapter->hw, E1000_IMS, 1764 EM_MSIX_LINK | E1000_IMS_LSC); 1765 if (adapter->link_active) { 1766 for (int i = 0; i < adapter->num_queues; i++, txr++) { 1767 EM_TX_LOCK(txr); 1768 #ifdef EM_MULTIQUEUE 1769 if (!drbr_empty(ifp, txr->br)) 1770 em_mq_start_locked(ifp, txr); 1771 #else 1772 if (if_sendq_empty(ifp)) 1773 em_start_locked(ifp, txr); 1774 #endif 1775 EM_TX_UNLOCK(txr); 1776 } 1777 } 1778 EM_CORE_UNLOCK(adapter); 1779 } 1780 1781 1782 /********************************************************************* 1783 * 1784 * Media Ioctl callback 1785 * 1786 * This routine is called whenever the user queries the status of 1787 * the interface using ifconfig. 1788 * 1789 **********************************************************************/ 1790 static void 1791 em_media_status(if_t ifp, struct ifmediareq *ifmr) 1792 { 1793 struct adapter *adapter = if_getsoftc(ifp); 1794 u_char fiber_type = IFM_1000_SX; 1795 1796 INIT_DEBUGOUT("em_media_status: begin"); 1797 1798 EM_CORE_LOCK(adapter); 1799 em_update_link_status(adapter); 1800 1801 ifmr->ifm_status = IFM_AVALID; 1802 ifmr->ifm_active = IFM_ETHER; 1803 1804 if (!adapter->link_active) { 1805 EM_CORE_UNLOCK(adapter); 1806 return; 1807 } 1808 1809 ifmr->ifm_status |= IFM_ACTIVE; 1810 1811 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 1812 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { 1813 ifmr->ifm_active |= fiber_type | IFM_FDX; 1814 } else { 1815 switch (adapter->link_speed) { 1816 case 10: 1817 ifmr->ifm_active |= IFM_10_T; 1818 break; 1819 case 100: 1820 ifmr->ifm_active |= IFM_100_TX; 1821 break; 1822 case 1000: 1823 ifmr->ifm_active |= IFM_1000_T; 1824 break; 1825 } 1826 if (adapter->link_duplex == FULL_DUPLEX) 1827 ifmr->ifm_active |= IFM_FDX; 1828 else 1829 ifmr->ifm_active |= IFM_HDX; 1830 } 1831 EM_CORE_UNLOCK(adapter); 1832 } 1833 1834 /********************************************************************* 1835 * 1836 * Media Ioctl callback 1837 * 1838 * This routine is called when the user changes speed/duplex using 1839 * media/mediopt option with ifconfig. 1840 * 1841 **********************************************************************/ 1842 static int 1843 em_media_change(if_t ifp) 1844 { 1845 struct adapter *adapter = if_getsoftc(ifp); 1846 struct ifmedia *ifm = &adapter->media; 1847 1848 INIT_DEBUGOUT("em_media_change: begin"); 1849 1850 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1851 return (EINVAL); 1852 1853 EM_CORE_LOCK(adapter); 1854 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1855 case IFM_AUTO: 1856 adapter->hw.mac.autoneg = DO_AUTO_NEG; 1857 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1858 break; 1859 case IFM_1000_LX: 1860 case IFM_1000_SX: 1861 case IFM_1000_T: 1862 adapter->hw.mac.autoneg = DO_AUTO_NEG; 1863 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1864 break; 1865 case IFM_100_TX: 1866 adapter->hw.mac.autoneg = FALSE; 1867 adapter->hw.phy.autoneg_advertised = 0; 1868 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1869 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1870 else 1871 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1872 break; 1873 case IFM_10_T: 1874 adapter->hw.mac.autoneg = FALSE; 1875 adapter->hw.phy.autoneg_advertised = 0; 1876 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1877 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1878 else 1879 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1880 break; 1881 default: 1882 device_printf(adapter->dev, "Unsupported media type\n"); 1883 } 1884 1885 em_init_locked(adapter); 1886 EM_CORE_UNLOCK(adapter); 1887 1888 return (0); 1889 } 1890 1891 /********************************************************************* 1892 * 1893 * This routine maps the mbufs to tx descriptors. 1894 * 1895 * return 0 on success, positive on failure 1896 **********************************************************************/ 1897 1898 static int 1899 em_xmit(struct tx_ring *txr, struct mbuf **m_headp) 1900 { 1901 struct adapter *adapter = txr->adapter; 1902 bus_dma_segment_t segs[EM_MAX_SCATTER]; 1903 bus_dmamap_t map; 1904 struct em_txbuffer *tx_buffer, *tx_buffer_mapped; 1905 struct e1000_tx_desc *ctxd = NULL; 1906 struct mbuf *m_head; 1907 struct ether_header *eh; 1908 struct ip *ip = NULL; 1909 struct tcphdr *tp = NULL; 1910 u32 txd_upper = 0, txd_lower = 0; 1911 int ip_off, poff; 1912 int nsegs, i, j, first, last = 0; 1913 int error; 1914 bool do_tso, tso_desc, remap = TRUE; 1915 1916 m_head = *m_headp; 1917 do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO); 1918 tso_desc = FALSE; 1919 ip_off = poff = 0; 1920 1921 /* 1922 * Intel recommends entire IP/TCP header length reside in a single 1923 * buffer. If multiple descriptors are used to describe the IP and 1924 * TCP header, each descriptor should describe one or more 1925 * complete headers; descriptors referencing only parts of headers 1926 * are not supported. If all layer headers are not coalesced into 1927 * a single buffer, each buffer should not cross a 4KB boundary, 1928 * or be larger than the maximum read request size. 1929 * Controller also requires modifing IP/TCP header to make TSO work 1930 * so we firstly get a writable mbuf chain then coalesce ethernet/ 1931 * IP/TCP header into a single buffer to meet the requirement of 1932 * controller. This also simplifies IP/TCP/UDP checksum offloading 1933 * which also has similar restrictions. 1934 */ 1935 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 1936 if (do_tso || (m_head->m_next != NULL && 1937 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) { 1938 if (M_WRITABLE(*m_headp) == 0) { 1939 m_head = m_dup(*m_headp, M_NOWAIT); 1940 m_freem(*m_headp); 1941 if (m_head == NULL) { 1942 *m_headp = NULL; 1943 return (ENOBUFS); 1944 } 1945 *m_headp = m_head; 1946 } 1947 } 1948 /* 1949 * XXX 1950 * Assume IPv4, we don't have TSO/checksum offload support 1951 * for IPv6 yet. 1952 */ 1953 ip_off = sizeof(struct ether_header); 1954 if (m_head->m_len < ip_off) { 1955 m_head = m_pullup(m_head, ip_off); 1956 if (m_head == NULL) { 1957 *m_headp = NULL; 1958 return (ENOBUFS); 1959 } 1960 } 1961 eh = mtod(m_head, struct ether_header *); 1962 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 1963 ip_off = sizeof(struct ether_vlan_header); 1964 if (m_head->m_len < ip_off) { 1965 m_head = m_pullup(m_head, ip_off); 1966 if (m_head == NULL) { 1967 *m_headp = NULL; 1968 return (ENOBUFS); 1969 } 1970 } 1971 } 1972 if (m_head->m_len < ip_off + sizeof(struct ip)) { 1973 m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); 1974 if (m_head == NULL) { 1975 *m_headp = NULL; 1976 return (ENOBUFS); 1977 } 1978 } 1979 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 1980 poff = ip_off + (ip->ip_hl << 2); 1981 1982 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) { 1983 if (m_head->m_len < poff + sizeof(struct tcphdr)) { 1984 m_head = m_pullup(m_head, poff + 1985 sizeof(struct tcphdr)); 1986 if (m_head == NULL) { 1987 *m_headp = NULL; 1988 return (ENOBUFS); 1989 } 1990 } 1991 tp = (struct tcphdr *)(mtod(m_head, char *) + poff); 1992 /* 1993 * TSO workaround: 1994 * pull 4 more bytes of data into it. 1995 */ 1996 if (m_head->m_len < poff + (tp->th_off << 2)) { 1997 m_head = m_pullup(m_head, poff + 1998 (tp->th_off << 2) + 1999 TSO_WORKAROUND); 2000 if (m_head == NULL) { 2001 *m_headp = NULL; 2002 return (ENOBUFS); 2003 } 2004 } 2005 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 2006 tp = (struct tcphdr *)(mtod(m_head, char *) + poff); 2007 if (do_tso) { 2008 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz + 2009 (ip->ip_hl << 2) + 2010 (tp->th_off << 2)); 2011 ip->ip_sum = 0; 2012 /* 2013 * The pseudo TCP checksum does not include TCP 2014 * payload length so driver should recompute 2015 * the checksum here what hardware expect to 2016 * see. This is adherence of Microsoft's Large 2017 * Send specification. 2018 */ 2019 tp->th_sum = in_pseudo(ip->ip_src.s_addr, 2020 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 2021 } 2022 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { 2023 if (m_head->m_len < poff + sizeof(struct udphdr)) { 2024 m_head = m_pullup(m_head, poff + 2025 sizeof(struct udphdr)); 2026 if (m_head == NULL) { 2027 *m_headp = NULL; 2028 return (ENOBUFS); 2029 } 2030 } 2031 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 2032 } 2033 *m_headp = m_head; 2034 } 2035 2036 /* 2037 * Map the packet for DMA 2038 * 2039 * Capture the first descriptor index, 2040 * this descriptor will have the index 2041 * of the EOP which is the only one that 2042 * now gets a DONE bit writeback. 2043 */ 2044 first = txr->next_avail_desc; 2045 tx_buffer = &txr->tx_buffers[first]; 2046 tx_buffer_mapped = tx_buffer; 2047 map = tx_buffer->map; 2048 2049 retry: 2050 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 2051 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 2052 2053 /* 2054 * There are two types of errors we can (try) to handle: 2055 * - EFBIG means the mbuf chain was too long and bus_dma ran 2056 * out of segments. Defragment the mbuf chain and try again. 2057 * - ENOMEM means bus_dma could not obtain enough bounce buffers 2058 * at this point in time. Defer sending and try again later. 2059 * All other errors, in particular EINVAL, are fatal and prevent the 2060 * mbuf chain from ever going through. Drop it and report error. 2061 */ 2062 if (error == EFBIG && remap) { 2063 struct mbuf *m; 2064 2065 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER); 2066 if (m == NULL) { 2067 adapter->mbuf_defrag_failed++; 2068 m_freem(*m_headp); 2069 *m_headp = NULL; 2070 return (ENOBUFS); 2071 } 2072 *m_headp = m; 2073 2074 /* Try it again, but only once */ 2075 remap = FALSE; 2076 goto retry; 2077 } else if (error != 0) { 2078 adapter->no_tx_dma_setup++; 2079 m_freem(*m_headp); 2080 *m_headp = NULL; 2081 return (error); 2082 } 2083 2084 /* 2085 * TSO Hardware workaround, if this packet is not 2086 * TSO, and is only a single descriptor long, and 2087 * it follows a TSO burst, then we need to add a 2088 * sentinel descriptor to prevent premature writeback. 2089 */ 2090 if ((!do_tso) && (txr->tx_tso == TRUE)) { 2091 if (nsegs == 1) 2092 tso_desc = TRUE; 2093 txr->tx_tso = FALSE; 2094 } 2095 2096 if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) { 2097 txr->no_desc_avail++; 2098 bus_dmamap_unload(txr->txtag, map); 2099 return (ENOBUFS); 2100 } 2101 m_head = *m_headp; 2102 2103 /* Do hardware assists */ 2104 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 2105 em_tso_setup(txr, m_head, ip_off, ip, tp, 2106 &txd_upper, &txd_lower); 2107 /* we need to make a final sentinel transmit desc */ 2108 tso_desc = TRUE; 2109 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) 2110 em_transmit_checksum_setup(txr, m_head, 2111 ip_off, ip, &txd_upper, &txd_lower); 2112 2113 if (m_head->m_flags & M_VLANTAG) { 2114 /* Set the vlan id. */ 2115 txd_upper |= htole16(if_getvtag(m_head)) << 16; 2116 /* Tell hardware to add tag */ 2117 txd_lower |= htole32(E1000_TXD_CMD_VLE); 2118 } 2119 2120 i = txr->next_avail_desc; 2121 2122 /* Set up our transmit descriptors */ 2123 for (j = 0; j < nsegs; j++) { 2124 bus_size_t seg_len; 2125 bus_addr_t seg_addr; 2126 2127 tx_buffer = &txr->tx_buffers[i]; 2128 ctxd = &txr->tx_base[i]; 2129 seg_addr = segs[j].ds_addr; 2130 seg_len = segs[j].ds_len; 2131 /* 2132 ** TSO Workaround: 2133 ** If this is the last descriptor, we want to 2134 ** split it so we have a small final sentinel 2135 */ 2136 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { 2137 seg_len -= TSO_WORKAROUND; 2138 ctxd->buffer_addr = htole64(seg_addr); 2139 ctxd->lower.data = htole32( 2140 adapter->txd_cmd | txd_lower | seg_len); 2141 ctxd->upper.data = htole32(txd_upper); 2142 if (++i == adapter->num_tx_desc) 2143 i = 0; 2144 2145 /* Now make the sentinel */ 2146 txr->tx_avail--; 2147 ctxd = &txr->tx_base[i]; 2148 tx_buffer = &txr->tx_buffers[i]; 2149 ctxd->buffer_addr = 2150 htole64(seg_addr + seg_len); 2151 ctxd->lower.data = htole32( 2152 adapter->txd_cmd | txd_lower | TSO_WORKAROUND); 2153 ctxd->upper.data = 2154 htole32(txd_upper); 2155 last = i; 2156 if (++i == adapter->num_tx_desc) 2157 i = 0; 2158 } else { 2159 ctxd->buffer_addr = htole64(seg_addr); 2160 ctxd->lower.data = htole32( 2161 adapter->txd_cmd | txd_lower | seg_len); 2162 ctxd->upper.data = htole32(txd_upper); 2163 last = i; 2164 if (++i == adapter->num_tx_desc) 2165 i = 0; 2166 } 2167 tx_buffer->m_head = NULL; 2168 tx_buffer->next_eop = -1; 2169 } 2170 2171 txr->next_avail_desc = i; 2172 txr->tx_avail -= nsegs; 2173 2174 tx_buffer->m_head = m_head; 2175 /* 2176 ** Here we swap the map so the last descriptor, 2177 ** which gets the completion interrupt has the 2178 ** real map, and the first descriptor gets the 2179 ** unused map from this descriptor. 2180 */ 2181 tx_buffer_mapped->map = tx_buffer->map; 2182 tx_buffer->map = map; 2183 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 2184 2185 /* 2186 * Last Descriptor of Packet 2187 * needs End Of Packet (EOP) 2188 * and Report Status (RS) 2189 */ 2190 ctxd->lower.data |= 2191 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); 2192 /* 2193 * Keep track in the first buffer which 2194 * descriptor will be written back 2195 */ 2196 tx_buffer = &txr->tx_buffers[first]; 2197 tx_buffer->next_eop = last; 2198 2199 /* 2200 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 2201 * that this frame is available to transmit. 2202 */ 2203 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 2204 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2205 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); 2206 2207 return (0); 2208 } 2209 2210 static void 2211 em_set_promisc(struct adapter *adapter) 2212 { 2213 if_t ifp = adapter->ifp; 2214 u32 reg_rctl; 2215 2216 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2217 2218 if (if_getflags(ifp) & IFF_PROMISC) { 2219 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 2220 /* Turn this on if you want to see bad packets */ 2221 if (em_debug_sbp) 2222 reg_rctl |= E1000_RCTL_SBP; 2223 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2224 } else if (if_getflags(ifp) & IFF_ALLMULTI) { 2225 reg_rctl |= E1000_RCTL_MPE; 2226 reg_rctl &= ~E1000_RCTL_UPE; 2227 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2228 } 2229 } 2230 2231 static void 2232 em_disable_promisc(struct adapter *adapter) 2233 { 2234 if_t ifp = adapter->ifp; 2235 u32 reg_rctl; 2236 int mcnt = 0; 2237 2238 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2239 reg_rctl &= (~E1000_RCTL_UPE); 2240 if (if_getflags(ifp) & IFF_ALLMULTI) 2241 mcnt = MAX_NUM_MULTICAST_ADDRESSES; 2242 else 2243 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); 2244 /* Don't disable if in MAX groups */ 2245 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) 2246 reg_rctl &= (~E1000_RCTL_MPE); 2247 reg_rctl &= (~E1000_RCTL_SBP); 2248 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2249 } 2250 2251 2252 /********************************************************************* 2253 * Multicast Update 2254 * 2255 * This routine is called whenever multicast address list is updated. 2256 * 2257 **********************************************************************/ 2258 2259 static void 2260 em_set_multi(struct adapter *adapter) 2261 { 2262 if_t ifp = adapter->ifp; 2263 u32 reg_rctl = 0; 2264 u8 *mta; /* Multicast array memory */ 2265 int mcnt = 0; 2266 2267 IOCTL_DEBUGOUT("em_set_multi: begin"); 2268 2269 mta = adapter->mta; 2270 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); 2271 2272 if (adapter->hw.mac.type == e1000_82542 && 2273 adapter->hw.revision_id == E1000_REVISION_2) { 2274 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2275 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) 2276 e1000_pci_clear_mwi(&adapter->hw); 2277 reg_rctl |= E1000_RCTL_RST; 2278 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2279 msec_delay(5); 2280 } 2281 2282 if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); 2283 2284 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 2285 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2286 reg_rctl |= E1000_RCTL_MPE; 2287 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2288 } else 2289 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); 2290 2291 if (adapter->hw.mac.type == e1000_82542 && 2292 adapter->hw.revision_id == E1000_REVISION_2) { 2293 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2294 reg_rctl &= ~E1000_RCTL_RST; 2295 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2296 msec_delay(5); 2297 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) 2298 e1000_pci_set_mwi(&adapter->hw); 2299 } 2300 } 2301 2302 2303 /********************************************************************* 2304 * Timer routine 2305 * 2306 * This routine checks for link status and updates statistics. 2307 * 2308 **********************************************************************/ 2309 2310 static void 2311 em_local_timer(void *arg) 2312 { 2313 struct adapter *adapter = arg; 2314 if_t ifp = adapter->ifp; 2315 struct tx_ring *txr = adapter->tx_rings; 2316 struct rx_ring *rxr = adapter->rx_rings; 2317 u32 trigger = 0; 2318 2319 EM_CORE_LOCK_ASSERT(adapter); 2320 2321 em_update_link_status(adapter); 2322 em_update_stats_counters(adapter); 2323 2324 /* Reset LAA into RAR[0] on 82571 */ 2325 if ((adapter->hw.mac.type == e1000_82571) && 2326 e1000_get_laa_state_82571(&adapter->hw)) 2327 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); 2328 2329 /* Mask to use in the irq trigger */ 2330 if (adapter->msix_mem) { 2331 for (int i = 0; i < adapter->num_queues; i++, rxr++) 2332 trigger |= rxr->ims; 2333 rxr = adapter->rx_rings; 2334 } else 2335 trigger = E1000_ICS_RXDMT0; 2336 2337 /* 2338 ** Check on the state of the TX queue(s), this 2339 ** can be done without the lock because its RO 2340 ** and the HUNG state will be static if set. 2341 */ 2342 for (int i = 0; i < adapter->num_queues; i++, txr++) { 2343 if (txr->busy == EM_TX_HUNG) 2344 goto hung; 2345 if (txr->busy >= EM_TX_MAXTRIES) 2346 txr->busy = EM_TX_HUNG; 2347 /* Schedule a TX tasklet if needed */ 2348 if (txr->tx_avail <= EM_MAX_SCATTER) 2349 taskqueue_enqueue(txr->tq, &txr->tx_task); 2350 } 2351 2352 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 2353 #ifndef DEVICE_POLLING 2354 /* Trigger an RX interrupt to guarantee mbuf refresh */ 2355 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); 2356 #endif 2357 return; 2358 hung: 2359 /* Looks like we're hung */ 2360 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n", 2361 txr->me); 2362 em_print_debug_info(adapter); 2363 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 2364 adapter->watchdog_events++; 2365 em_init_locked(adapter); 2366 } 2367 2368 2369 static void 2370 em_update_link_status(struct adapter *adapter) 2371 { 2372 struct e1000_hw *hw = &adapter->hw; 2373 if_t ifp = adapter->ifp; 2374 device_t dev = adapter->dev; 2375 struct tx_ring *txr = adapter->tx_rings; 2376 u32 link_check = 0; 2377 2378 /* Get the cached link value or read phy for real */ 2379 switch (hw->phy.media_type) { 2380 case e1000_media_type_copper: 2381 if (hw->mac.get_link_status) { 2382 if (hw->mac.type == e1000_pch_spt) 2383 msec_delay(50); 2384 /* Do the work to read phy */ 2385 e1000_check_for_link(hw); 2386 link_check = !hw->mac.get_link_status; 2387 if (link_check) /* ESB2 fix */ 2388 e1000_cfg_on_link_up(hw); 2389 } else 2390 link_check = TRUE; 2391 break; 2392 case e1000_media_type_fiber: 2393 e1000_check_for_link(hw); 2394 link_check = (E1000_READ_REG(hw, E1000_STATUS) & 2395 E1000_STATUS_LU); 2396 break; 2397 case e1000_media_type_internal_serdes: 2398 e1000_check_for_link(hw); 2399 link_check = adapter->hw.mac.serdes_has_link; 2400 break; 2401 default: 2402 case e1000_media_type_unknown: 2403 break; 2404 } 2405 2406 /* Now check for a transition */ 2407 if (link_check && (adapter->link_active == 0)) { 2408 e1000_get_speed_and_duplex(hw, &adapter->link_speed, 2409 &adapter->link_duplex); 2410 /* Check if we must disable SPEED_MODE bit on PCI-E */ 2411 if ((adapter->link_speed != SPEED_1000) && 2412 ((hw->mac.type == e1000_82571) || 2413 (hw->mac.type == e1000_82572))) { 2414 int tarc0; 2415 tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); 2416 tarc0 &= ~TARC_SPEED_MODE_BIT; 2417 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); 2418 } 2419 if (bootverbose) 2420 device_printf(dev, "Link is up %d Mbps %s\n", 2421 adapter->link_speed, 2422 ((adapter->link_duplex == FULL_DUPLEX) ? 2423 "Full Duplex" : "Half Duplex")); 2424 adapter->link_active = 1; 2425 adapter->smartspeed = 0; 2426 if_setbaudrate(ifp, adapter->link_speed * 1000000); 2427 if_link_state_change(ifp, LINK_STATE_UP); 2428 } else if (!link_check && (adapter->link_active == 1)) { 2429 if_setbaudrate(ifp, 0); 2430 adapter->link_speed = 0; 2431 adapter->link_duplex = 0; 2432 if (bootverbose) 2433 device_printf(dev, "Link is Down\n"); 2434 adapter->link_active = 0; 2435 /* Link down, disable hang detection */ 2436 for (int i = 0; i < adapter->num_queues; i++, txr++) 2437 txr->busy = EM_TX_IDLE; 2438 if_link_state_change(ifp, LINK_STATE_DOWN); 2439 } 2440 } 2441 2442 /********************************************************************* 2443 * 2444 * This routine disables all traffic on the adapter by issuing a 2445 * global reset on the MAC and deallocates TX/RX buffers. 2446 * 2447 * This routine should always be called with BOTH the CORE 2448 * and TX locks. 2449 **********************************************************************/ 2450 2451 static void 2452 em_stop(void *arg) 2453 { 2454 struct adapter *adapter = arg; 2455 if_t ifp = adapter->ifp; 2456 struct tx_ring *txr = adapter->tx_rings; 2457 2458 EM_CORE_LOCK_ASSERT(adapter); 2459 2460 INIT_DEBUGOUT("em_stop: begin"); 2461 2462 em_disable_intr(adapter); 2463 callout_stop(&adapter->timer); 2464 2465 /* Tell the stack that the interface is no longer active */ 2466 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2467 2468 /* Disarm Hang Detection. */ 2469 for (int i = 0; i < adapter->num_queues; i++, txr++) { 2470 EM_TX_LOCK(txr); 2471 txr->busy = EM_TX_IDLE; 2472 EM_TX_UNLOCK(txr); 2473 } 2474 2475 /* I219 needs some special flushing to avoid hangs */ 2476 if (adapter->hw.mac.type == e1000_pch_spt) 2477 em_flush_desc_rings(adapter); 2478 2479 e1000_reset_hw(&adapter->hw); 2480 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); 2481 2482 e1000_led_off(&adapter->hw); 2483 e1000_cleanup_led(&adapter->hw); 2484 } 2485 2486 2487 /********************************************************************* 2488 * 2489 * Determine hardware revision. 2490 * 2491 **********************************************************************/ 2492 static void 2493 em_identify_hardware(struct adapter *adapter) 2494 { 2495 device_t dev = adapter->dev; 2496 2497 /* Make sure our PCI config space has the necessary stuff set */ 2498 pci_enable_busmaster(dev); 2499 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 2500 2501 /* Save off the information about this board */ 2502 adapter->hw.vendor_id = pci_get_vendor(dev); 2503 adapter->hw.device_id = pci_get_device(dev); 2504 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 2505 adapter->hw.subsystem_vendor_id = 2506 pci_read_config(dev, PCIR_SUBVEND_0, 2); 2507 adapter->hw.subsystem_device_id = 2508 pci_read_config(dev, PCIR_SUBDEV_0, 2); 2509 2510 /* Do Shared Code Init and Setup */ 2511 if (e1000_set_mac_type(&adapter->hw)) { 2512 device_printf(dev, "Setup init failure\n"); 2513 return; 2514 } 2515 } 2516 2517 static int 2518 em_allocate_pci_resources(struct adapter *adapter) 2519 { 2520 device_t dev = adapter->dev; 2521 int rid; 2522 2523 rid = PCIR_BAR(0); 2524 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 2525 &rid, RF_ACTIVE); 2526 if (adapter->memory == NULL) { 2527 device_printf(dev, "Unable to allocate bus resource: memory\n"); 2528 return (ENXIO); 2529 } 2530 adapter->osdep.mem_bus_space_tag = 2531 rman_get_bustag(adapter->memory); 2532 adapter->osdep.mem_bus_space_handle = 2533 rman_get_bushandle(adapter->memory); 2534 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; 2535 2536 adapter->hw.back = &adapter->osdep; 2537 2538 return (0); 2539 } 2540 2541 /********************************************************************* 2542 * 2543 * Setup the Legacy or MSI Interrupt handler 2544 * 2545 **********************************************************************/ 2546 int 2547 em_allocate_legacy(struct adapter *adapter) 2548 { 2549 device_t dev = adapter->dev; 2550 struct tx_ring *txr = adapter->tx_rings; 2551 int error, rid = 0; 2552 2553 /* Manually turn off all interrupts */ 2554 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 2555 2556 if (adapter->msix == 1) /* using MSI */ 2557 rid = 1; 2558 /* We allocate a single interrupt resource */ 2559 adapter->res = bus_alloc_resource_any(dev, 2560 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); 2561 if (adapter->res == NULL) { 2562 device_printf(dev, "Unable to allocate bus resource: " 2563 "interrupt\n"); 2564 return (ENXIO); 2565 } 2566 2567 /* 2568 * Allocate a fast interrupt and the associated 2569 * deferred processing contexts. 2570 */ 2571 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); 2572 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, 2573 taskqueue_thread_enqueue, &adapter->tq); 2574 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que", 2575 device_get_nameunit(adapter->dev)); 2576 /* Use a TX only tasklet for local timer */ 2577 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); 2578 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, 2579 taskqueue_thread_enqueue, &txr->tq); 2580 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", 2581 device_get_nameunit(adapter->dev)); 2582 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); 2583 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, 2584 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { 2585 device_printf(dev, "Failed to register fast interrupt " 2586 "handler: %d\n", error); 2587 taskqueue_free(adapter->tq); 2588 adapter->tq = NULL; 2589 return (error); 2590 } 2591 2592 return (0); 2593 } 2594 2595 /********************************************************************* 2596 * 2597 * Setup the MSIX Interrupt handlers 2598 * This is not really Multiqueue, rather 2599 * its just separate interrupt vectors 2600 * for TX, RX, and Link. 2601 * 2602 **********************************************************************/ 2603 int 2604 em_allocate_msix(struct adapter *adapter) 2605 { 2606 device_t dev = adapter->dev; 2607 struct tx_ring *txr = adapter->tx_rings; 2608 struct rx_ring *rxr = adapter->rx_rings; 2609 int error, rid, vector = 0; 2610 int cpu_id = 0; 2611 2612 2613 /* Make sure all interrupts are disabled */ 2614 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 2615 2616 /* First set up ring resources */ 2617 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) { 2618 2619 /* RX ring */ 2620 rid = vector + 1; 2621 2622 rxr->res = bus_alloc_resource_any(dev, 2623 SYS_RES_IRQ, &rid, RF_ACTIVE); 2624 if (rxr->res == NULL) { 2625 device_printf(dev, 2626 "Unable to allocate bus resource: " 2627 "RX MSIX Interrupt %d\n", i); 2628 return (ENXIO); 2629 } 2630 if ((error = bus_setup_intr(dev, rxr->res, 2631 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, 2632 rxr, &rxr->tag)) != 0) { 2633 device_printf(dev, "Failed to register RX handler"); 2634 return (error); 2635 } 2636 #if __FreeBSD_version >= 800504 2637 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i); 2638 #endif 2639 rxr->msix = vector; 2640 2641 if (em_last_bind_cpu < 0) 2642 em_last_bind_cpu = CPU_FIRST(); 2643 cpu_id = em_last_bind_cpu; 2644 bus_bind_intr(dev, rxr->res, cpu_id); 2645 2646 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); 2647 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, 2648 taskqueue_thread_enqueue, &rxr->tq); 2649 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)", 2650 device_get_nameunit(adapter->dev), cpu_id); 2651 /* 2652 ** Set the bit to enable interrupt 2653 ** in E1000_IMS -- bits 20 and 21 2654 ** are for RX0 and RX1, note this has 2655 ** NOTHING to do with the MSIX vector 2656 */ 2657 rxr->ims = 1 << (20 + i); 2658 adapter->ims |= rxr->ims; 2659 adapter->ivars |= (8 | rxr->msix) << (i * 4); 2660 2661 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); 2662 } 2663 2664 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) { 2665 /* TX ring */ 2666 rid = vector + 1; 2667 txr->res = bus_alloc_resource_any(dev, 2668 SYS_RES_IRQ, &rid, RF_ACTIVE); 2669 if (txr->res == NULL) { 2670 device_printf(dev, 2671 "Unable to allocate bus resource: " 2672 "TX MSIX Interrupt %d\n", i); 2673 return (ENXIO); 2674 } 2675 if ((error = bus_setup_intr(dev, txr->res, 2676 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, 2677 txr, &txr->tag)) != 0) { 2678 device_printf(dev, "Failed to register TX handler"); 2679 return (error); 2680 } 2681 #if __FreeBSD_version >= 800504 2682 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i); 2683 #endif 2684 txr->msix = vector; 2685 2686 if (em_last_bind_cpu < 0) 2687 em_last_bind_cpu = CPU_FIRST(); 2688 cpu_id = em_last_bind_cpu; 2689 bus_bind_intr(dev, txr->res, cpu_id); 2690 2691 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); 2692 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, 2693 taskqueue_thread_enqueue, &txr->tq); 2694 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)", 2695 device_get_nameunit(adapter->dev), cpu_id); 2696 /* 2697 ** Set the bit to enable interrupt 2698 ** in E1000_IMS -- bits 22 and 23 2699 ** are for TX0 and TX1, note this has 2700 ** NOTHING to do with the MSIX vector 2701 */ 2702 txr->ims = 1 << (22 + i); 2703 adapter->ims |= txr->ims; 2704 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); 2705 2706 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); 2707 } 2708 2709 /* Link interrupt */ 2710 rid = vector + 1; 2711 adapter->res = bus_alloc_resource_any(dev, 2712 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); 2713 if (!adapter->res) { 2714 device_printf(dev,"Unable to allocate " 2715 "bus resource: Link interrupt [%d]\n", rid); 2716 return (ENXIO); 2717 } 2718 /* Set the link handler function */ 2719 error = bus_setup_intr(dev, adapter->res, 2720 INTR_TYPE_NET | INTR_MPSAFE, NULL, 2721 em_msix_link, adapter, &adapter->tag); 2722 if (error) { 2723 adapter->res = NULL; 2724 device_printf(dev, "Failed to register LINK handler"); 2725 return (error); 2726 } 2727 #if __FreeBSD_version >= 800504 2728 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); 2729 #endif 2730 adapter->linkvec = vector; 2731 adapter->ivars |= (8 | vector) << 16; 2732 adapter->ivars |= 0x80000000; 2733 2734 return (0); 2735 } 2736 2737 2738 static void 2739 em_free_pci_resources(struct adapter *adapter) 2740 { 2741 device_t dev = adapter->dev; 2742 struct tx_ring *txr; 2743 struct rx_ring *rxr; 2744 int rid; 2745 2746 2747 /* 2748 ** Release all the queue interrupt resources: 2749 */ 2750 for (int i = 0; i < adapter->num_queues; i++) { 2751 txr = &adapter->tx_rings[i]; 2752 /* an early abort? */ 2753 if (txr == NULL) 2754 break; 2755 rid = txr->msix +1; 2756 if (txr->tag != NULL) { 2757 bus_teardown_intr(dev, txr->res, txr->tag); 2758 txr->tag = NULL; 2759 } 2760 if (txr->res != NULL) 2761 bus_release_resource(dev, SYS_RES_IRQ, 2762 rid, txr->res); 2763 2764 rxr = &adapter->rx_rings[i]; 2765 /* an early abort? */ 2766 if (rxr == NULL) 2767 break; 2768 rid = rxr->msix +1; 2769 if (rxr->tag != NULL) { 2770 bus_teardown_intr(dev, rxr->res, rxr->tag); 2771 rxr->tag = NULL; 2772 } 2773 if (rxr->res != NULL) 2774 bus_release_resource(dev, SYS_RES_IRQ, 2775 rid, rxr->res); 2776 } 2777 2778 if (adapter->linkvec) /* we are doing MSIX */ 2779 rid = adapter->linkvec + 1; 2780 else 2781 (adapter->msix != 0) ? (rid = 1):(rid = 0); 2782 2783 if (adapter->tag != NULL) { 2784 bus_teardown_intr(dev, adapter->res, adapter->tag); 2785 adapter->tag = NULL; 2786 } 2787 2788 if (adapter->res != NULL) 2789 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); 2790 2791 2792 if (adapter->msix) 2793 pci_release_msi(dev); 2794 2795 if (adapter->msix_mem != NULL) 2796 bus_release_resource(dev, SYS_RES_MEMORY, 2797 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); 2798 2799 if (adapter->memory != NULL) 2800 bus_release_resource(dev, SYS_RES_MEMORY, 2801 PCIR_BAR(0), adapter->memory); 2802 2803 if (adapter->flash != NULL) 2804 bus_release_resource(dev, SYS_RES_MEMORY, 2805 EM_FLASH, adapter->flash); 2806 } 2807 2808 /* 2809 * Setup MSI or MSI/X 2810 */ 2811 static int 2812 em_setup_msix(struct adapter *adapter) 2813 { 2814 device_t dev = adapter->dev; 2815 int val; 2816 2817 /* Nearly always going to use one queue */ 2818 adapter->num_queues = 1; 2819 2820 /* 2821 ** Try using MSI-X for Hartwell adapters 2822 */ 2823 if ((adapter->hw.mac.type == e1000_82574) && 2824 (em_enable_msix == TRUE)) { 2825 #ifdef EM_MULTIQUEUE 2826 adapter->num_queues = (em_num_queues == 1) ? 1 : 2; 2827 if (adapter->num_queues > 1) 2828 em_enable_vectors_82574(adapter); 2829 #endif 2830 /* Map the MSIX BAR */ 2831 int rid = PCIR_BAR(EM_MSIX_BAR); 2832 adapter->msix_mem = bus_alloc_resource_any(dev, 2833 SYS_RES_MEMORY, &rid, RF_ACTIVE); 2834 if (adapter->msix_mem == NULL) { 2835 /* May not be enabled */ 2836 device_printf(adapter->dev, 2837 "Unable to map MSIX table \n"); 2838 goto msi; 2839 } 2840 val = pci_msix_count(dev); 2841 2842 #ifdef EM_MULTIQUEUE 2843 /* We need 5 vectors in the multiqueue case */ 2844 if (adapter->num_queues > 1 ) { 2845 if (val >= 5) 2846 val = 5; 2847 else { 2848 adapter->num_queues = 1; 2849 device_printf(adapter->dev, 2850 "Insufficient MSIX vectors for >1 queue, " 2851 "using single queue...\n"); 2852 goto msix_one; 2853 } 2854 } else { 2855 msix_one: 2856 #endif 2857 if (val >= 3) 2858 val = 3; 2859 else { 2860 device_printf(adapter->dev, 2861 "Insufficient MSIX vectors, using MSI\n"); 2862 goto msi; 2863 } 2864 #ifdef EM_MULTIQUEUE 2865 } 2866 #endif 2867 2868 if ((pci_alloc_msix(dev, &val) == 0)) { 2869 device_printf(adapter->dev, 2870 "Using MSIX interrupts " 2871 "with %d vectors\n", val); 2872 return (val); 2873 } 2874 2875 /* 2876 ** If MSIX alloc failed or provided us with 2877 ** less than needed, free and fall through to MSI 2878 */ 2879 pci_release_msi(dev); 2880 } 2881 msi: 2882 if (adapter->msix_mem != NULL) { 2883 bus_release_resource(dev, SYS_RES_MEMORY, 2884 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); 2885 adapter->msix_mem = NULL; 2886 } 2887 val = 1; 2888 if (pci_alloc_msi(dev, &val) == 0) { 2889 device_printf(adapter->dev, "Using an MSI interrupt\n"); 2890 return (val); 2891 } 2892 /* Should only happen due to manual configuration */ 2893 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n"); 2894 return (0); 2895 } 2896 2897 2898 /* 2899 ** The 3 following flush routines are used as a workaround in the 2900 ** I219 client parts and only for them. 2901 ** 2902 ** em_flush_tx_ring - remove all descriptors from the tx_ring 2903 ** 2904 ** We want to clear all pending descriptors from the TX ring. 2905 ** zeroing happens when the HW reads the regs. We assign the ring itself as 2906 ** the data of the next descriptor. We don't care about the data we are about 2907 ** to reset the HW. 2908 */ 2909 static void 2910 em_flush_tx_ring(struct adapter *adapter) 2911 { 2912 struct e1000_hw *hw = &adapter->hw; 2913 struct tx_ring *txr = adapter->tx_rings; 2914 struct e1000_tx_desc *txd; 2915 u32 tctl, txd_lower = E1000_TXD_CMD_IFCS; 2916 u16 size = 512; 2917 2918 tctl = E1000_READ_REG(hw, E1000_TCTL); 2919 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN); 2920 2921 txd = &txr->tx_base[txr->next_avail_desc++]; 2922 if (txr->next_avail_desc == adapter->num_tx_desc) 2923 txr->next_avail_desc = 0; 2924 2925 /* Just use the ring as a dummy buffer addr */ 2926 txd->buffer_addr = txr->txdma.dma_paddr; 2927 txd->lower.data = htole32(txd_lower | size); 2928 txd->upper.data = 0; 2929 2930 /* flush descriptors to memory before notifying the HW */ 2931 wmb(); 2932 2933 E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc); 2934 mb(); 2935 usec_delay(250); 2936 } 2937 2938 /* 2939 ** em_flush_rx_ring - remove all descriptors from the rx_ring 2940 ** 2941 ** Mark all descriptors in the RX ring as consumed and disable the rx ring 2942 */ 2943 static void 2944 em_flush_rx_ring(struct adapter *adapter) 2945 { 2946 struct e1000_hw *hw = &adapter->hw; 2947 u32 rctl, rxdctl; 2948 2949 rctl = E1000_READ_REG(hw, E1000_RCTL); 2950 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2951 E1000_WRITE_FLUSH(hw); 2952 usec_delay(150); 2953 2954 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); 2955 /* zero the lower 14 bits (prefetch and host thresholds) */ 2956 rxdctl &= 0xffffc000; 2957 /* 2958 * update thresholds: prefetch threshold to 31, host threshold to 1 2959 * and make sure the granularity is "descriptors" and not "cache lines" 2960 */ 2961 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC); 2962 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl); 2963 2964 /* momentarily enable the RX ring for the changes to take effect */ 2965 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN); 2966 E1000_WRITE_FLUSH(hw); 2967 usec_delay(150); 2968 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2969 } 2970 2971 /* 2972 ** em_flush_desc_rings - remove all descriptors from the descriptor rings 2973 ** 2974 ** In i219, the descriptor rings must be emptied before resetting the HW 2975 ** or before changing the device state to D3 during runtime (runtime PM). 2976 ** 2977 ** Failure to do this will cause the HW to enter a unit hang state which can 2978 ** only be released by PCI reset on the device 2979 ** 2980 */ 2981 static void 2982 em_flush_desc_rings(struct adapter *adapter) 2983 { 2984 struct e1000_hw *hw = &adapter->hw; 2985 device_t dev = adapter->dev; 2986 u16 hang_state; 2987 u32 fext_nvm11, tdlen; 2988 2989 /* First, disable MULR fix in FEXTNVM11 */ 2990 fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11); 2991 fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX; 2992 E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11); 2993 2994 /* do nothing if we're not in faulty state, or if the queue is empty */ 2995 tdlen = E1000_READ_REG(hw, E1000_TDLEN(0)); 2996 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); 2997 if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen) 2998 return; 2999 em_flush_tx_ring(adapter); 3000 3001 /* recheck, maybe the fault is caused by the rx ring */ 3002 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); 3003 if (hang_state & FLUSH_DESC_REQUIRED) 3004 em_flush_rx_ring(adapter); 3005 } 3006 3007 3008 /********************************************************************* 3009 * 3010 * Initialize the hardware to a configuration 3011 * as specified by the adapter structure. 3012 * 3013 **********************************************************************/ 3014 static void 3015 em_reset(struct adapter *adapter) 3016 { 3017 device_t dev = adapter->dev; 3018 if_t ifp = adapter->ifp; 3019 struct e1000_hw *hw = &adapter->hw; 3020 u16 rx_buffer_size; 3021 u32 pba; 3022 3023 INIT_DEBUGOUT("em_reset: begin"); 3024 3025 /* Set up smart power down as default off on newer adapters. */ 3026 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || 3027 hw->mac.type == e1000_82572)) { 3028 u16 phy_tmp = 0; 3029 3030 /* Speed up time to link by disabling smart power down. */ 3031 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); 3032 phy_tmp &= ~IGP02E1000_PM_SPD; 3033 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); 3034 } 3035 3036 /* 3037 * Packet Buffer Allocation (PBA) 3038 * Writing PBA sets the receive portion of the buffer 3039 * the remainder is used for the transmit buffer. 3040 */ 3041 switch (hw->mac.type) { 3042 /* Total Packet Buffer on these is 48K */ 3043 case e1000_82571: 3044 case e1000_82572: 3045 case e1000_80003es2lan: 3046 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ 3047 break; 3048 case e1000_82573: /* 82573: Total Packet Buffer is 32K */ 3049 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ 3050 break; 3051 case e1000_82574: 3052 case e1000_82583: 3053 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ 3054 break; 3055 case e1000_ich8lan: 3056 pba = E1000_PBA_8K; 3057 break; 3058 case e1000_ich9lan: 3059 case e1000_ich10lan: 3060 /* Boost Receive side for jumbo frames */ 3061 if (adapter->hw.mac.max_frame_size > 4096) 3062 pba = E1000_PBA_14K; 3063 else 3064 pba = E1000_PBA_10K; 3065 break; 3066 case e1000_pchlan: 3067 case e1000_pch2lan: 3068 case e1000_pch_lpt: 3069 case e1000_pch_spt: 3070 pba = E1000_PBA_26K; 3071 break; 3072 default: 3073 if (adapter->hw.mac.max_frame_size > 8192) 3074 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ 3075 else 3076 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ 3077 } 3078 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); 3079 3080 /* 3081 * These parameters control the automatic generation (Tx) and 3082 * response (Rx) to Ethernet PAUSE frames. 3083 * - High water mark should allow for at least two frames to be 3084 * received after sending an XOFF. 3085 * - Low water mark works best when it is very near the high water mark. 3086 * This allows the receiver to restart by sending XON when it has 3087 * drained a bit. Here we use an arbitrary value of 1500 which will 3088 * restart after one full frame is pulled from the buffer. There 3089 * could be several smaller frames in the buffer and if so they will 3090 * not trigger the XON until their total number reduces the buffer 3091 * by 1500. 3092 * - The pause time is fairly large at 1000 x 512ns = 512 usec. 3093 */ 3094 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 ); 3095 hw->fc.high_water = rx_buffer_size - 3096 roundup2(adapter->hw.mac.max_frame_size, 1024); 3097 hw->fc.low_water = hw->fc.high_water - 1500; 3098 3099 if (adapter->fc) /* locally set flow control value? */ 3100 hw->fc.requested_mode = adapter->fc; 3101 else 3102 hw->fc.requested_mode = e1000_fc_full; 3103 3104 if (hw->mac.type == e1000_80003es2lan) 3105 hw->fc.pause_time = 0xFFFF; 3106 else 3107 hw->fc.pause_time = EM_FC_PAUSE_TIME; 3108 3109 hw->fc.send_xon = TRUE; 3110 3111 /* Device specific overrides/settings */ 3112 switch (hw->mac.type) { 3113 case e1000_pchlan: 3114 /* Workaround: no TX flow ctrl for PCH */ 3115 hw->fc.requested_mode = e1000_fc_rx_pause; 3116 hw->fc.pause_time = 0xFFFF; /* override */ 3117 if (if_getmtu(ifp) > ETHERMTU) { 3118 hw->fc.high_water = 0x3500; 3119 hw->fc.low_water = 0x1500; 3120 } else { 3121 hw->fc.high_water = 0x5000; 3122 hw->fc.low_water = 0x3000; 3123 } 3124 hw->fc.refresh_time = 0x1000; 3125 break; 3126 case e1000_pch2lan: 3127 case e1000_pch_lpt: 3128 case e1000_pch_spt: 3129 hw->fc.high_water = 0x5C20; 3130 hw->fc.low_water = 0x5048; 3131 hw->fc.pause_time = 0x0650; 3132 hw->fc.refresh_time = 0x0400; 3133 /* Jumbos need adjusted PBA */ 3134 if (if_getmtu(ifp) > ETHERMTU) 3135 E1000_WRITE_REG(hw, E1000_PBA, 12); 3136 else 3137 E1000_WRITE_REG(hw, E1000_PBA, 26); 3138 break; 3139 case e1000_ich9lan: 3140 case e1000_ich10lan: 3141 if (if_getmtu(ifp) > ETHERMTU) { 3142 hw->fc.high_water = 0x2800; 3143 hw->fc.low_water = hw->fc.high_water - 8; 3144 break; 3145 } 3146 /* else fall thru */ 3147 default: 3148 if (hw->mac.type == e1000_80003es2lan) 3149 hw->fc.pause_time = 0xFFFF; 3150 break; 3151 } 3152 3153 /* I219 needs some special flushing to avoid hangs */ 3154 if (hw->mac.type == e1000_pch_spt) 3155 em_flush_desc_rings(adapter); 3156 3157 /* Issue a global reset */ 3158 e1000_reset_hw(hw); 3159 E1000_WRITE_REG(hw, E1000_WUC, 0); 3160 em_disable_aspm(adapter); 3161 /* and a re-init */ 3162 if (e1000_init_hw(hw) < 0) { 3163 device_printf(dev, "Hardware Initialization Failed\n"); 3164 return; 3165 } 3166 3167 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); 3168 e1000_get_phy_info(hw); 3169 e1000_check_for_link(hw); 3170 return; 3171 } 3172 3173 /********************************************************************* 3174 * 3175 * Setup networking device structure and register an interface. 3176 * 3177 **********************************************************************/ 3178 static int 3179 em_setup_interface(device_t dev, struct adapter *adapter) 3180 { 3181 if_t ifp; 3182 3183 INIT_DEBUGOUT("em_setup_interface: begin"); 3184 3185 ifp = adapter->ifp = if_gethandle(IFT_ETHER); 3186 if (ifp == 0) { 3187 device_printf(dev, "can not allocate ifnet structure\n"); 3188 return (-1); 3189 } 3190 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3191 if_setdev(ifp, dev); 3192 if_setinitfn(ifp, em_init); 3193 if_setsoftc(ifp, adapter); 3194 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 3195 if_setioctlfn(ifp, em_ioctl); 3196 if_setgetcounterfn(ifp, em_get_counter); 3197 3198 /* TSO parameters */ 3199 ifp->if_hw_tsomax = IP_MAXPACKET; 3200 /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */ 3201 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5; 3202 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE; 3203 3204 #ifdef EM_MULTIQUEUE 3205 /* Multiqueue stack interface */ 3206 if_settransmitfn(ifp, em_mq_start); 3207 if_setqflushfn(ifp, em_qflush); 3208 #else 3209 if_setstartfn(ifp, em_start); 3210 if_setsendqlen(ifp, adapter->num_tx_desc - 1); 3211 if_setsendqready(ifp); 3212 #endif 3213 3214 ether_ifattach(ifp, adapter->hw.mac.addr); 3215 3216 if_setcapabilities(ifp, 0); 3217 if_setcapenable(ifp, 0); 3218 3219 3220 if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | 3221 IFCAP_TSO4, 0); 3222 /* 3223 * Tell the upper layer(s) we 3224 * support full VLAN capability 3225 */ 3226 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 3227 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | 3228 IFCAP_VLAN_MTU, 0); 3229 if_setcapenable(ifp, if_getcapabilities(ifp)); 3230 3231 /* 3232 ** Don't turn this on by default, if vlans are 3233 ** created on another pseudo device (eg. lagg) 3234 ** then vlan events are not passed thru, breaking 3235 ** operation, but with HW FILTER off it works. If 3236 ** using vlans directly on the em driver you can 3237 ** enable this and get full hardware tag filtering. 3238 */ 3239 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0); 3240 3241 #ifdef DEVICE_POLLING 3242 if_setcapabilitiesbit(ifp, IFCAP_POLLING,0); 3243 #endif 3244 3245 /* Enable only WOL MAGIC by default */ 3246 if (adapter->wol) { 3247 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0); 3248 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0); 3249 } 3250 3251 /* 3252 * Specify the media types supported by this adapter and register 3253 * callbacks to update media and link information 3254 */ 3255 ifmedia_init(&adapter->media, IFM_IMASK, 3256 em_media_change, em_media_status); 3257 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 3258 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { 3259 u_char fiber_type = IFM_1000_SX; /* default type */ 3260 3261 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 3262 0, NULL); 3263 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); 3264 } else { 3265 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); 3266 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 3267 0, NULL); 3268 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 3269 0, NULL); 3270 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 3271 0, NULL); 3272 if (adapter->hw.phy.type != e1000_phy_ife) { 3273 ifmedia_add(&adapter->media, 3274 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 3275 ifmedia_add(&adapter->media, 3276 IFM_ETHER | IFM_1000_T, 0, NULL); 3277 } 3278 } 3279 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); 3280 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); 3281 return (0); 3282 } 3283 3284 3285 /* 3286 * Manage DMA'able memory. 3287 */ 3288 static void 3289 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 3290 { 3291 if (error) 3292 return; 3293 *(bus_addr_t *) arg = segs[0].ds_addr; 3294 } 3295 3296 static int 3297 em_dma_malloc(struct adapter *adapter, bus_size_t size, 3298 struct em_dma_alloc *dma, int mapflags) 3299 { 3300 int error; 3301 3302 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 3303 EM_DBA_ALIGN, 0, /* alignment, bounds */ 3304 BUS_SPACE_MAXADDR, /* lowaddr */ 3305 BUS_SPACE_MAXADDR, /* highaddr */ 3306 NULL, NULL, /* filter, filterarg */ 3307 size, /* maxsize */ 3308 1, /* nsegments */ 3309 size, /* maxsegsize */ 3310 0, /* flags */ 3311 NULL, /* lockfunc */ 3312 NULL, /* lockarg */ 3313 &dma->dma_tag); 3314 if (error) { 3315 device_printf(adapter->dev, 3316 "%s: bus_dma_tag_create failed: %d\n", 3317 __func__, error); 3318 goto fail_0; 3319 } 3320 3321 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, 3322 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); 3323 if (error) { 3324 device_printf(adapter->dev, 3325 "%s: bus_dmamem_alloc(%ju) failed: %d\n", 3326 __func__, (uintmax_t)size, error); 3327 goto fail_2; 3328 } 3329 3330 dma->dma_paddr = 0; 3331 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 3332 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); 3333 if (error || dma->dma_paddr == 0) { 3334 device_printf(adapter->dev, 3335 "%s: bus_dmamap_load failed: %d\n", 3336 __func__, error); 3337 goto fail_3; 3338 } 3339 3340 return (0); 3341 3342 fail_3: 3343 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 3344 fail_2: 3345 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 3346 bus_dma_tag_destroy(dma->dma_tag); 3347 fail_0: 3348 dma->dma_tag = NULL; 3349 3350 return (error); 3351 } 3352 3353 static void 3354 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) 3355 { 3356 if (dma->dma_tag == NULL) 3357 return; 3358 if (dma->dma_paddr != 0) { 3359 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 3360 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 3361 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 3362 dma->dma_paddr = 0; 3363 } 3364 if (dma->dma_vaddr != NULL) { 3365 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 3366 dma->dma_vaddr = NULL; 3367 } 3368 bus_dma_tag_destroy(dma->dma_tag); 3369 dma->dma_tag = NULL; 3370 } 3371 3372 3373 /********************************************************************* 3374 * 3375 * Allocate memory for the transmit and receive rings, and then 3376 * the descriptors associated with each, called only once at attach. 3377 * 3378 **********************************************************************/ 3379 static int 3380 em_allocate_queues(struct adapter *adapter) 3381 { 3382 device_t dev = adapter->dev; 3383 struct tx_ring *txr = NULL; 3384 struct rx_ring *rxr = NULL; 3385 int rsize, tsize, error = E1000_SUCCESS; 3386 int txconf = 0, rxconf = 0; 3387 3388 3389 /* Allocate the TX ring struct memory */ 3390 if (!(adapter->tx_rings = 3391 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 3392 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3393 device_printf(dev, "Unable to allocate TX ring memory\n"); 3394 error = ENOMEM; 3395 goto fail; 3396 } 3397 3398 /* Now allocate the RX */ 3399 if (!(adapter->rx_rings = 3400 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 3401 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3402 device_printf(dev, "Unable to allocate RX ring memory\n"); 3403 error = ENOMEM; 3404 goto rx_fail; 3405 } 3406 3407 tsize = roundup2(adapter->num_tx_desc * 3408 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); 3409 /* 3410 * Now set up the TX queues, txconf is needed to handle the 3411 * possibility that things fail midcourse and we need to 3412 * undo memory gracefully 3413 */ 3414 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 3415 /* Set up some basics */ 3416 txr = &adapter->tx_rings[i]; 3417 txr->adapter = adapter; 3418 txr->me = i; 3419 3420 /* Initialize the TX lock */ 3421 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 3422 device_get_nameunit(dev), txr->me); 3423 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 3424 3425 if (em_dma_malloc(adapter, tsize, 3426 &txr->txdma, BUS_DMA_NOWAIT)) { 3427 device_printf(dev, 3428 "Unable to allocate TX Descriptor memory\n"); 3429 error = ENOMEM; 3430 goto err_tx_desc; 3431 } 3432 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; 3433 bzero((void *)txr->tx_base, tsize); 3434 3435 if (em_allocate_transmit_buffers(txr)) { 3436 device_printf(dev, 3437 "Critical Failure setting up transmit buffers\n"); 3438 error = ENOMEM; 3439 goto err_tx_desc; 3440 } 3441 #if __FreeBSD_version >= 800000 3442 /* Allocate a buf ring */ 3443 txr->br = buf_ring_alloc(4096, M_DEVBUF, 3444 M_WAITOK, &txr->tx_mtx); 3445 #endif 3446 } 3447 3448 /* 3449 * Next the RX queues... 3450 */ 3451 rsize = roundup2(adapter->num_rx_desc * 3452 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); 3453 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 3454 rxr = &adapter->rx_rings[i]; 3455 rxr->adapter = adapter; 3456 rxr->me = i; 3457 3458 /* Initialize the RX lock */ 3459 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 3460 device_get_nameunit(dev), txr->me); 3461 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 3462 3463 if (em_dma_malloc(adapter, rsize, 3464 &rxr->rxdma, BUS_DMA_NOWAIT)) { 3465 device_printf(dev, 3466 "Unable to allocate RxDescriptor memory\n"); 3467 error = ENOMEM; 3468 goto err_rx_desc; 3469 } 3470 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr; 3471 bzero((void *)rxr->rx_base, rsize); 3472 3473 /* Allocate receive buffers for the ring*/ 3474 if (em_allocate_receive_buffers(rxr)) { 3475 device_printf(dev, 3476 "Critical Failure setting up receive buffers\n"); 3477 error = ENOMEM; 3478 goto err_rx_desc; 3479 } 3480 } 3481 3482 return (0); 3483 3484 err_rx_desc: 3485 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 3486 em_dma_free(adapter, &rxr->rxdma); 3487 err_tx_desc: 3488 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 3489 em_dma_free(adapter, &txr->txdma); 3490 free(adapter->rx_rings, M_DEVBUF); 3491 rx_fail: 3492 #if __FreeBSD_version >= 800000 3493 buf_ring_free(txr->br, M_DEVBUF); 3494 #endif 3495 free(adapter->tx_rings, M_DEVBUF); 3496 fail: 3497 return (error); 3498 } 3499 3500 3501 /********************************************************************* 3502 * 3503 * Allocate memory for tx_buffer structures. The tx_buffer stores all 3504 * the information needed to transmit a packet on the wire. This is 3505 * called only once at attach, setup is done every reset. 3506 * 3507 **********************************************************************/ 3508 static int 3509 em_allocate_transmit_buffers(struct tx_ring *txr) 3510 { 3511 struct adapter *adapter = txr->adapter; 3512 device_t dev = adapter->dev; 3513 struct em_txbuffer *txbuf; 3514 int error, i; 3515 3516 /* 3517 * Setup DMA descriptor areas. 3518 */ 3519 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 3520 1, 0, /* alignment, bounds */ 3521 BUS_SPACE_MAXADDR, /* lowaddr */ 3522 BUS_SPACE_MAXADDR, /* highaddr */ 3523 NULL, NULL, /* filter, filterarg */ 3524 EM_TSO_SIZE, /* maxsize */ 3525 EM_MAX_SCATTER, /* nsegments */ 3526 PAGE_SIZE, /* maxsegsize */ 3527 0, /* flags */ 3528 NULL, /* lockfunc */ 3529 NULL, /* lockfuncarg */ 3530 &txr->txtag))) { 3531 device_printf(dev,"Unable to allocate TX DMA tag\n"); 3532 goto fail; 3533 } 3534 3535 if (!(txr->tx_buffers = 3536 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) * 3537 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3538 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 3539 error = ENOMEM; 3540 goto fail; 3541 } 3542 3543 /* Create the descriptor buffer dma maps */ 3544 txbuf = txr->tx_buffers; 3545 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 3546 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 3547 if (error != 0) { 3548 device_printf(dev, "Unable to create TX DMA map\n"); 3549 goto fail; 3550 } 3551 } 3552 3553 return 0; 3554 fail: 3555 /* We free all, it handles case where we are in the middle */ 3556 em_free_transmit_structures(adapter); 3557 return (error); 3558 } 3559 3560 /********************************************************************* 3561 * 3562 * Initialize a transmit ring. 3563 * 3564 **********************************************************************/ 3565 static void 3566 em_setup_transmit_ring(struct tx_ring *txr) 3567 { 3568 struct adapter *adapter = txr->adapter; 3569 struct em_txbuffer *txbuf; 3570 int i; 3571 #ifdef DEV_NETMAP 3572 struct netmap_slot *slot; 3573 struct netmap_adapter *na = netmap_getna(adapter->ifp); 3574 #endif /* DEV_NETMAP */ 3575 3576 /* Clear the old descriptor contents */ 3577 EM_TX_LOCK(txr); 3578 #ifdef DEV_NETMAP 3579 slot = netmap_reset(na, NR_TX, txr->me, 0); 3580 #endif /* DEV_NETMAP */ 3581 3582 bzero((void *)txr->tx_base, 3583 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); 3584 /* Reset indices */ 3585 txr->next_avail_desc = 0; 3586 txr->next_to_clean = 0; 3587 3588 /* Free any existing tx buffers. */ 3589 txbuf = txr->tx_buffers; 3590 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 3591 if (txbuf->m_head != NULL) { 3592 bus_dmamap_sync(txr->txtag, txbuf->map, 3593 BUS_DMASYNC_POSTWRITE); 3594 bus_dmamap_unload(txr->txtag, txbuf->map); 3595 m_freem(txbuf->m_head); 3596 txbuf->m_head = NULL; 3597 } 3598 #ifdef DEV_NETMAP 3599 if (slot) { 3600 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 3601 uint64_t paddr; 3602 void *addr; 3603 3604 addr = PNMB(na, slot + si, &paddr); 3605 txr->tx_base[i].buffer_addr = htole64(paddr); 3606 /* reload the map for netmap mode */ 3607 netmap_load_map(na, txr->txtag, txbuf->map, addr); 3608 } 3609 #endif /* DEV_NETMAP */ 3610 3611 /* clear the watch index */ 3612 txbuf->next_eop = -1; 3613 } 3614 3615 /* Set number of descriptors available */ 3616 txr->tx_avail = adapter->num_tx_desc; 3617 txr->busy = EM_TX_IDLE; 3618 3619 /* Clear checksum offload context. */ 3620 txr->last_hw_offload = 0; 3621 txr->last_hw_ipcss = 0; 3622 txr->last_hw_ipcso = 0; 3623 txr->last_hw_tucss = 0; 3624 txr->last_hw_tucso = 0; 3625 3626 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 3627 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3628 EM_TX_UNLOCK(txr); 3629 } 3630 3631 /********************************************************************* 3632 * 3633 * Initialize all transmit rings. 3634 * 3635 **********************************************************************/ 3636 static void 3637 em_setup_transmit_structures(struct adapter *adapter) 3638 { 3639 struct tx_ring *txr = adapter->tx_rings; 3640 3641 for (int i = 0; i < adapter->num_queues; i++, txr++) 3642 em_setup_transmit_ring(txr); 3643 3644 return; 3645 } 3646 3647 /********************************************************************* 3648 * 3649 * Enable transmit unit. 3650 * 3651 **********************************************************************/ 3652 static void 3653 em_initialize_transmit_unit(struct adapter *adapter) 3654 { 3655 struct tx_ring *txr = adapter->tx_rings; 3656 struct e1000_hw *hw = &adapter->hw; 3657 u32 tctl, txdctl = 0, tarc, tipg = 0; 3658 3659 INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); 3660 3661 for (int i = 0; i < adapter->num_queues; i++, txr++) { 3662 u64 bus_addr = txr->txdma.dma_paddr; 3663 /* Base and Len of TX Ring */ 3664 E1000_WRITE_REG(hw, E1000_TDLEN(i), 3665 adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); 3666 E1000_WRITE_REG(hw, E1000_TDBAH(i), 3667 (u32)(bus_addr >> 32)); 3668 E1000_WRITE_REG(hw, E1000_TDBAL(i), 3669 (u32)bus_addr); 3670 /* Init the HEAD/TAIL indices */ 3671 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 3672 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 3673 3674 HW_DEBUGOUT2("Base = %x, Length = %x\n", 3675 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), 3676 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); 3677 3678 txr->busy = EM_TX_IDLE; 3679 txdctl = 0; /* clear txdctl */ 3680 txdctl |= 0x1f; /* PTHRESH */ 3681 txdctl |= 1 << 8; /* HTHRESH */ 3682 txdctl |= 1 << 16;/* WTHRESH */ 3683 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ 3684 txdctl |= E1000_TXDCTL_GRAN; 3685 txdctl |= 1 << 25; /* LWTHRESH */ 3686 3687 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 3688 } 3689 3690 /* Set the default values for the Tx Inter Packet Gap timer */ 3691 switch (adapter->hw.mac.type) { 3692 case e1000_80003es2lan: 3693 tipg = DEFAULT_82543_TIPG_IPGR1; 3694 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << 3695 E1000_TIPG_IPGR2_SHIFT; 3696 break; 3697 default: 3698 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 3699 (adapter->hw.phy.media_type == 3700 e1000_media_type_internal_serdes)) 3701 tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 3702 else 3703 tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 3704 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 3705 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 3706 } 3707 3708 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); 3709 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); 3710 3711 if(adapter->hw.mac.type >= e1000_82540) 3712 E1000_WRITE_REG(&adapter->hw, E1000_TADV, 3713 adapter->tx_abs_int_delay.value); 3714 3715 if ((adapter->hw.mac.type == e1000_82571) || 3716 (adapter->hw.mac.type == e1000_82572)) { 3717 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3718 tarc |= TARC_SPEED_MODE_BIT; 3719 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3720 } else if (adapter->hw.mac.type == e1000_80003es2lan) { 3721 /* errata: program both queues to unweighted RR */ 3722 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3723 tarc |= 1; 3724 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3725 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); 3726 tarc |= 1; 3727 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); 3728 } else if (adapter->hw.mac.type == e1000_82574) { 3729 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3730 tarc |= TARC_ERRATA_BIT; 3731 if ( adapter->num_queues > 1) { 3732 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); 3733 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3734 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); 3735 } else 3736 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3737 } 3738 3739 adapter->txd_cmd = E1000_TXD_CMD_IFCS; 3740 if (adapter->tx_int_delay.value > 0) 3741 adapter->txd_cmd |= E1000_TXD_CMD_IDE; 3742 3743 /* Program the Transmit Control Register */ 3744 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); 3745 tctl &= ~E1000_TCTL_CT; 3746 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 3747 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 3748 3749 if (adapter->hw.mac.type >= e1000_82571) 3750 tctl |= E1000_TCTL_MULR; 3751 3752 /* This write will effectively turn on the transmit unit. */ 3753 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); 3754 3755 if (hw->mac.type == e1000_pch_spt) { 3756 u32 reg; 3757 reg = E1000_READ_REG(hw, E1000_IOSFPC); 3758 reg |= E1000_RCTL_RDMTS_HEX; 3759 E1000_WRITE_REG(hw, E1000_IOSFPC, reg); 3760 reg = E1000_READ_REG(hw, E1000_TARC(0)); 3761 reg |= E1000_TARC0_CB_MULTIQ_3_REQ; 3762 E1000_WRITE_REG(hw, E1000_TARC(0), reg); 3763 } 3764 } 3765 3766 3767 /********************************************************************* 3768 * 3769 * Free all transmit rings. 3770 * 3771 **********************************************************************/ 3772 static void 3773 em_free_transmit_structures(struct adapter *adapter) 3774 { 3775 struct tx_ring *txr = adapter->tx_rings; 3776 3777 for (int i = 0; i < adapter->num_queues; i++, txr++) { 3778 EM_TX_LOCK(txr); 3779 em_free_transmit_buffers(txr); 3780 em_dma_free(adapter, &txr->txdma); 3781 EM_TX_UNLOCK(txr); 3782 EM_TX_LOCK_DESTROY(txr); 3783 } 3784 3785 free(adapter->tx_rings, M_DEVBUF); 3786 } 3787 3788 /********************************************************************* 3789 * 3790 * Free transmit ring related data structures. 3791 * 3792 **********************************************************************/ 3793 static void 3794 em_free_transmit_buffers(struct tx_ring *txr) 3795 { 3796 struct adapter *adapter = txr->adapter; 3797 struct em_txbuffer *txbuf; 3798 3799 INIT_DEBUGOUT("free_transmit_ring: begin"); 3800 3801 if (txr->tx_buffers == NULL) 3802 return; 3803 3804 for (int i = 0; i < adapter->num_tx_desc; i++) { 3805 txbuf = &txr->tx_buffers[i]; 3806 if (txbuf->m_head != NULL) { 3807 bus_dmamap_sync(txr->txtag, txbuf->map, 3808 BUS_DMASYNC_POSTWRITE); 3809 bus_dmamap_unload(txr->txtag, 3810 txbuf->map); 3811 m_freem(txbuf->m_head); 3812 txbuf->m_head = NULL; 3813 if (txbuf->map != NULL) { 3814 bus_dmamap_destroy(txr->txtag, 3815 txbuf->map); 3816 txbuf->map = NULL; 3817 } 3818 } else if (txbuf->map != NULL) { 3819 bus_dmamap_unload(txr->txtag, 3820 txbuf->map); 3821 bus_dmamap_destroy(txr->txtag, 3822 txbuf->map); 3823 txbuf->map = NULL; 3824 } 3825 } 3826 #if __FreeBSD_version >= 800000 3827 if (txr->br != NULL) 3828 buf_ring_free(txr->br, M_DEVBUF); 3829 #endif 3830 if (txr->tx_buffers != NULL) { 3831 free(txr->tx_buffers, M_DEVBUF); 3832 txr->tx_buffers = NULL; 3833 } 3834 if (txr->txtag != NULL) { 3835 bus_dma_tag_destroy(txr->txtag); 3836 txr->txtag = NULL; 3837 } 3838 return; 3839 } 3840 3841 3842 /********************************************************************* 3843 * The offload context is protocol specific (TCP/UDP) and thus 3844 * only needs to be set when the protocol changes. The occasion 3845 * of a context change can be a performance detriment, and 3846 * might be better just disabled. The reason arises in the way 3847 * in which the controller supports pipelined requests from the 3848 * Tx data DMA. Up to four requests can be pipelined, and they may 3849 * belong to the same packet or to multiple packets. However all 3850 * requests for one packet are issued before a request is issued 3851 * for a subsequent packet and if a request for the next packet 3852 * requires a context change, that request will be stalled 3853 * until the previous request completes. This means setting up 3854 * a new context effectively disables pipelined Tx data DMA which 3855 * in turn greatly slow down performance to send small sized 3856 * frames. 3857 **********************************************************************/ 3858 static void 3859 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, 3860 struct ip *ip, u32 *txd_upper, u32 *txd_lower) 3861 { 3862 struct adapter *adapter = txr->adapter; 3863 struct e1000_context_desc *TXD = NULL; 3864 struct em_txbuffer *tx_buffer; 3865 int cur, hdr_len; 3866 u32 cmd = 0; 3867 u16 offload = 0; 3868 u8 ipcso, ipcss, tucso, tucss; 3869 3870 ipcss = ipcso = tucss = tucso = 0; 3871 hdr_len = ip_off + (ip->ip_hl << 2); 3872 cur = txr->next_avail_desc; 3873 3874 /* Setup of IP header checksum. */ 3875 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 3876 *txd_upper |= E1000_TXD_POPTS_IXSM << 8; 3877 offload |= CSUM_IP; 3878 ipcss = ip_off; 3879 ipcso = ip_off + offsetof(struct ip, ip_sum); 3880 /* 3881 * Start offset for header checksum calculation. 3882 * End offset for header checksum calculation. 3883 * Offset of place to put the checksum. 3884 */ 3885 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3886 TXD->lower_setup.ip_fields.ipcss = ipcss; 3887 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); 3888 TXD->lower_setup.ip_fields.ipcso = ipcso; 3889 cmd |= E1000_TXD_CMD_IP; 3890 } 3891 3892 if (mp->m_pkthdr.csum_flags & CSUM_TCP) { 3893 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 3894 *txd_upper |= E1000_TXD_POPTS_TXSM << 8; 3895 offload |= CSUM_TCP; 3896 tucss = hdr_len; 3897 tucso = hdr_len + offsetof(struct tcphdr, th_sum); 3898 /* 3899 * The 82574L can only remember the *last* context used 3900 * regardless of queue that it was use for. We cannot reuse 3901 * contexts on this hardware platform and must generate a new 3902 * context every time. 82574L hardware spec, section 7.2.6, 3903 * second note. 3904 */ 3905 if (adapter->num_queues < 2) { 3906 /* 3907 * Setting up new checksum offload context for every 3908 * frames takes a lot of processing time for hardware. 3909 * This also reduces performance a lot for small sized 3910 * frames so avoid it if driver can use previously 3911 * configured checksum offload context. 3912 */ 3913 if (txr->last_hw_offload == offload) { 3914 if (offload & CSUM_IP) { 3915 if (txr->last_hw_ipcss == ipcss && 3916 txr->last_hw_ipcso == ipcso && 3917 txr->last_hw_tucss == tucss && 3918 txr->last_hw_tucso == tucso) 3919 return; 3920 } else { 3921 if (txr->last_hw_tucss == tucss && 3922 txr->last_hw_tucso == tucso) 3923 return; 3924 } 3925 } 3926 txr->last_hw_offload = offload; 3927 txr->last_hw_tucss = tucss; 3928 txr->last_hw_tucso = tucso; 3929 } 3930 /* 3931 * Start offset for payload checksum calculation. 3932 * End offset for payload checksum calculation. 3933 * Offset of place to put the checksum. 3934 */ 3935 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3936 TXD->upper_setup.tcp_fields.tucss = hdr_len; 3937 TXD->upper_setup.tcp_fields.tucse = htole16(0); 3938 TXD->upper_setup.tcp_fields.tucso = tucso; 3939 cmd |= E1000_TXD_CMD_TCP; 3940 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { 3941 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 3942 *txd_upper |= E1000_TXD_POPTS_TXSM << 8; 3943 tucss = hdr_len; 3944 tucso = hdr_len + offsetof(struct udphdr, uh_sum); 3945 /* 3946 * The 82574L can only remember the *last* context used 3947 * regardless of queue that it was use for. We cannot reuse 3948 * contexts on this hardware platform and must generate a new 3949 * context every time. 82574L hardware spec, section 7.2.6, 3950 * second note. 3951 */ 3952 if (adapter->num_queues < 2) { 3953 /* 3954 * Setting up new checksum offload context for every 3955 * frames takes a lot of processing time for hardware. 3956 * This also reduces performance a lot for small sized 3957 * frames so avoid it if driver can use previously 3958 * configured checksum offload context. 3959 */ 3960 if (txr->last_hw_offload == offload) { 3961 if (offload & CSUM_IP) { 3962 if (txr->last_hw_ipcss == ipcss && 3963 txr->last_hw_ipcso == ipcso && 3964 txr->last_hw_tucss == tucss && 3965 txr->last_hw_tucso == tucso) 3966 return; 3967 } else { 3968 if (txr->last_hw_tucss == tucss && 3969 txr->last_hw_tucso == tucso) 3970 return; 3971 } 3972 } 3973 txr->last_hw_offload = offload; 3974 txr->last_hw_tucss = tucss; 3975 txr->last_hw_tucso = tucso; 3976 } 3977 /* 3978 * Start offset for header checksum calculation. 3979 * End offset for header checksum calculation. 3980 * Offset of place to put the checksum. 3981 */ 3982 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3983 TXD->upper_setup.tcp_fields.tucss = tucss; 3984 TXD->upper_setup.tcp_fields.tucse = htole16(0); 3985 TXD->upper_setup.tcp_fields.tucso = tucso; 3986 } 3987 3988 if (offload & CSUM_IP) { 3989 txr->last_hw_ipcss = ipcss; 3990 txr->last_hw_ipcso = ipcso; 3991 } 3992 3993 TXD->tcp_seg_setup.data = htole32(0); 3994 TXD->cmd_and_length = 3995 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); 3996 tx_buffer = &txr->tx_buffers[cur]; 3997 tx_buffer->m_head = NULL; 3998 tx_buffer->next_eop = -1; 3999 4000 if (++cur == adapter->num_tx_desc) 4001 cur = 0; 4002 4003 txr->tx_avail--; 4004 txr->next_avail_desc = cur; 4005 } 4006 4007 4008 /********************************************************************** 4009 * 4010 * Setup work for hardware segmentation offload (TSO) 4011 * 4012 **********************************************************************/ 4013 static void 4014 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, 4015 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower) 4016 { 4017 struct adapter *adapter = txr->adapter; 4018 struct e1000_context_desc *TXD; 4019 struct em_txbuffer *tx_buffer; 4020 int cur, hdr_len; 4021 4022 /* 4023 * In theory we can use the same TSO context if and only if 4024 * frame is the same type(IP/TCP) and the same MSS. However 4025 * checking whether a frame has the same IP/TCP structure is 4026 * hard thing so just ignore that and always restablish a 4027 * new TSO context. 4028 */ 4029 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2); 4030 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ 4031 E1000_TXD_DTYP_D | /* Data descr type */ 4032 E1000_TXD_CMD_TSE); /* Do TSE on this packet */ 4033 4034 /* IP and/or TCP header checksum calculation and insertion. */ 4035 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; 4036 4037 cur = txr->next_avail_desc; 4038 tx_buffer = &txr->tx_buffers[cur]; 4039 TXD = (struct e1000_context_desc *) &txr->tx_base[cur]; 4040 4041 /* 4042 * Start offset for header checksum calculation. 4043 * End offset for header checksum calculation. 4044 * Offset of place put the checksum. 4045 */ 4046 TXD->lower_setup.ip_fields.ipcss = ip_off; 4047 TXD->lower_setup.ip_fields.ipcse = 4048 htole16(ip_off + (ip->ip_hl << 2) - 1); 4049 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum); 4050 /* 4051 * Start offset for payload checksum calculation. 4052 * End offset for payload checksum calculation. 4053 * Offset of place to put the checksum. 4054 */ 4055 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2); 4056 TXD->upper_setup.tcp_fields.tucse = 0; 4057 TXD->upper_setup.tcp_fields.tucso = 4058 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum); 4059 /* 4060 * Payload size per packet w/o any headers. 4061 * Length of all headers up to payload. 4062 */ 4063 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz); 4064 TXD->tcp_seg_setup.fields.hdr_len = hdr_len; 4065 4066 TXD->cmd_and_length = htole32(adapter->txd_cmd | 4067 E1000_TXD_CMD_DEXT | /* Extended descr */ 4068 E1000_TXD_CMD_TSE | /* TSE context */ 4069 E1000_TXD_CMD_IP | /* Do IP csum */ 4070 E1000_TXD_CMD_TCP | /* Do TCP checksum */ 4071 (mp->m_pkthdr.len - (hdr_len))); /* Total len */ 4072 4073 tx_buffer->m_head = NULL; 4074 tx_buffer->next_eop = -1; 4075 4076 if (++cur == adapter->num_tx_desc) 4077 cur = 0; 4078 4079 txr->tx_avail--; 4080 txr->next_avail_desc = cur; 4081 txr->tx_tso = TRUE; 4082 } 4083 4084 4085 /********************************************************************** 4086 * 4087 * Examine each tx_buffer in the used queue. If the hardware is done 4088 * processing the packet then free associated resources. The 4089 * tx_buffer is put back on the free queue. 4090 * 4091 **********************************************************************/ 4092 static void 4093 em_txeof(struct tx_ring *txr) 4094 { 4095 struct adapter *adapter = txr->adapter; 4096 int first, last, done, processed; 4097 struct em_txbuffer *tx_buffer; 4098 struct e1000_tx_desc *tx_desc, *eop_desc; 4099 if_t ifp = adapter->ifp; 4100 4101 EM_TX_LOCK_ASSERT(txr); 4102 #ifdef DEV_NETMAP 4103 if (netmap_tx_irq(ifp, txr->me)) 4104 return; 4105 #endif /* DEV_NETMAP */ 4106 4107 /* No work, make sure hang detection is disabled */ 4108 if (txr->tx_avail == adapter->num_tx_desc) { 4109 txr->busy = EM_TX_IDLE; 4110 return; 4111 } 4112 4113 processed = 0; 4114 first = txr->next_to_clean; 4115 tx_desc = &txr->tx_base[first]; 4116 tx_buffer = &txr->tx_buffers[first]; 4117 last = tx_buffer->next_eop; 4118 eop_desc = &txr->tx_base[last]; 4119 4120 /* 4121 * What this does is get the index of the 4122 * first descriptor AFTER the EOP of the 4123 * first packet, that way we can do the 4124 * simple comparison on the inner while loop. 4125 */ 4126 if (++last == adapter->num_tx_desc) 4127 last = 0; 4128 done = last; 4129 4130 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 4131 BUS_DMASYNC_POSTREAD); 4132 4133 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { 4134 /* We clean the range of the packet */ 4135 while (first != done) { 4136 tx_desc->upper.data = 0; 4137 tx_desc->lower.data = 0; 4138 tx_desc->buffer_addr = 0; 4139 ++txr->tx_avail; 4140 ++processed; 4141 4142 if (tx_buffer->m_head) { 4143 bus_dmamap_sync(txr->txtag, 4144 tx_buffer->map, 4145 BUS_DMASYNC_POSTWRITE); 4146 bus_dmamap_unload(txr->txtag, 4147 tx_buffer->map); 4148 m_freem(tx_buffer->m_head); 4149 tx_buffer->m_head = NULL; 4150 } 4151 tx_buffer->next_eop = -1; 4152 4153 if (++first == adapter->num_tx_desc) 4154 first = 0; 4155 4156 tx_buffer = &txr->tx_buffers[first]; 4157 tx_desc = &txr->tx_base[first]; 4158 } 4159 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 4160 /* See if we can continue to the next packet */ 4161 last = tx_buffer->next_eop; 4162 if (last != -1) { 4163 eop_desc = &txr->tx_base[last]; 4164 /* Get new done point */ 4165 if (++last == adapter->num_tx_desc) last = 0; 4166 done = last; 4167 } else 4168 break; 4169 } 4170 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 4171 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4172 4173 txr->next_to_clean = first; 4174 4175 /* 4176 ** Hang detection: we know there's work outstanding 4177 ** or the entry return would have been taken, so no 4178 ** descriptor processed here indicates a potential hang. 4179 ** The local timer will examine this and do a reset if needed. 4180 */ 4181 if (processed == 0) { 4182 if (txr->busy != EM_TX_HUNG) 4183 ++txr->busy; 4184 } else /* At least one descriptor was cleaned */ 4185 txr->busy = EM_TX_BUSY; /* note this clears HUNG */ 4186 4187 /* 4188 * If we have a minimum free, clear IFF_DRV_OACTIVE 4189 * to tell the stack that it is OK to send packets. 4190 * Notice that all writes of OACTIVE happen under the 4191 * TX lock which, with a single queue, guarantees 4192 * sanity. 4193 */ 4194 if (txr->tx_avail >= EM_MAX_SCATTER) { 4195 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); 4196 } 4197 4198 /* Disable hang detection if all clean */ 4199 if (txr->tx_avail == adapter->num_tx_desc) 4200 txr->busy = EM_TX_IDLE; 4201 } 4202 4203 /********************************************************************* 4204 * 4205 * Refresh RX descriptor mbufs from system mbuf buffer pool. 4206 * 4207 **********************************************************************/ 4208 static void 4209 em_refresh_mbufs(struct rx_ring *rxr, int limit) 4210 { 4211 struct adapter *adapter = rxr->adapter; 4212 struct mbuf *m; 4213 bus_dma_segment_t segs; 4214 struct em_rxbuffer *rxbuf; 4215 int i, j, error, nsegs; 4216 bool cleaned = FALSE; 4217 4218 i = j = rxr->next_to_refresh; 4219 /* 4220 ** Get one descriptor beyond 4221 ** our work mark to control 4222 ** the loop. 4223 */ 4224 if (++j == adapter->num_rx_desc) 4225 j = 0; 4226 4227 while (j != limit) { 4228 rxbuf = &rxr->rx_buffers[i]; 4229 if (rxbuf->m_head == NULL) { 4230 m = m_getjcl(M_NOWAIT, MT_DATA, 4231 M_PKTHDR, adapter->rx_mbuf_sz); 4232 /* 4233 ** If we have a temporary resource shortage 4234 ** that causes a failure, just abort refresh 4235 ** for now, we will return to this point when 4236 ** reinvoked from em_rxeof. 4237 */ 4238 if (m == NULL) 4239 goto update; 4240 } else 4241 m = rxbuf->m_head; 4242 4243 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; 4244 m->m_flags |= M_PKTHDR; 4245 m->m_data = m->m_ext.ext_buf; 4246 4247 /* Use bus_dma machinery to setup the memory mapping */ 4248 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, 4249 m, &segs, &nsegs, BUS_DMA_NOWAIT); 4250 if (error != 0) { 4251 printf("Refresh mbufs: hdr dmamap load" 4252 " failure - %d\n", error); 4253 m_free(m); 4254 rxbuf->m_head = NULL; 4255 goto update; 4256 } 4257 rxbuf->m_head = m; 4258 rxbuf->paddr = segs.ds_addr; 4259 bus_dmamap_sync(rxr->rxtag, 4260 rxbuf->map, BUS_DMASYNC_PREREAD); 4261 em_setup_rxdesc(&rxr->rx_base[i], rxbuf); 4262 cleaned = TRUE; 4263 4264 i = j; /* Next is precalulated for us */ 4265 rxr->next_to_refresh = i; 4266 /* Calculate next controlling index */ 4267 if (++j == adapter->num_rx_desc) 4268 j = 0; 4269 } 4270 update: 4271 /* 4272 ** Update the tail pointer only if, 4273 ** and as far as we have refreshed. 4274 */ 4275 if (cleaned) 4276 E1000_WRITE_REG(&adapter->hw, 4277 E1000_RDT(rxr->me), rxr->next_to_refresh); 4278 4279 return; 4280 } 4281 4282 4283 /********************************************************************* 4284 * 4285 * Allocate memory for rx_buffer structures. Since we use one 4286 * rx_buffer per received packet, the maximum number of rx_buffer's 4287 * that we'll need is equal to the number of receive descriptors 4288 * that we've allocated. 4289 * 4290 **********************************************************************/ 4291 static int 4292 em_allocate_receive_buffers(struct rx_ring *rxr) 4293 { 4294 struct adapter *adapter = rxr->adapter; 4295 device_t dev = adapter->dev; 4296 struct em_rxbuffer *rxbuf; 4297 int error; 4298 4299 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) * 4300 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); 4301 if (rxr->rx_buffers == NULL) { 4302 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 4303 return (ENOMEM); 4304 } 4305 4306 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4307 1, 0, /* alignment, bounds */ 4308 BUS_SPACE_MAXADDR, /* lowaddr */ 4309 BUS_SPACE_MAXADDR, /* highaddr */ 4310 NULL, NULL, /* filter, filterarg */ 4311 MJUM9BYTES, /* maxsize */ 4312 1, /* nsegments */ 4313 MJUM9BYTES, /* maxsegsize */ 4314 0, /* flags */ 4315 NULL, /* lockfunc */ 4316 NULL, /* lockarg */ 4317 &rxr->rxtag); 4318 if (error) { 4319 device_printf(dev, "%s: bus_dma_tag_create failed %d\n", 4320 __func__, error); 4321 goto fail; 4322 } 4323 4324 rxbuf = rxr->rx_buffers; 4325 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { 4326 rxbuf = &rxr->rx_buffers[i]; 4327 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map); 4328 if (error) { 4329 device_printf(dev, "%s: bus_dmamap_create failed: %d\n", 4330 __func__, error); 4331 goto fail; 4332 } 4333 } 4334 4335 return (0); 4336 4337 fail: 4338 em_free_receive_structures(adapter); 4339 return (error); 4340 } 4341 4342 4343 /********************************************************************* 4344 * 4345 * Initialize a receive ring and its buffers. 4346 * 4347 **********************************************************************/ 4348 static int 4349 em_setup_receive_ring(struct rx_ring *rxr) 4350 { 4351 struct adapter *adapter = rxr->adapter; 4352 struct em_rxbuffer *rxbuf; 4353 bus_dma_segment_t seg[1]; 4354 int rsize, nsegs, error = 0; 4355 #ifdef DEV_NETMAP 4356 struct netmap_slot *slot; 4357 struct netmap_adapter *na = netmap_getna(adapter->ifp); 4358 #endif 4359 4360 4361 /* Clear the ring contents */ 4362 EM_RX_LOCK(rxr); 4363 rsize = roundup2(adapter->num_rx_desc * 4364 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); 4365 bzero((void *)rxr->rx_base, rsize); 4366 #ifdef DEV_NETMAP 4367 slot = netmap_reset(na, NR_RX, rxr->me, 0); 4368 #endif 4369 4370 /* 4371 ** Free current RX buffer structs and their mbufs 4372 */ 4373 for (int i = 0; i < adapter->num_rx_desc; i++) { 4374 rxbuf = &rxr->rx_buffers[i]; 4375 if (rxbuf->m_head != NULL) { 4376 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4377 BUS_DMASYNC_POSTREAD); 4378 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4379 m_freem(rxbuf->m_head); 4380 rxbuf->m_head = NULL; /* mark as freed */ 4381 } 4382 } 4383 4384 /* Now replenish the mbufs */ 4385 for (int j = 0; j != adapter->num_rx_desc; ++j) { 4386 rxbuf = &rxr->rx_buffers[j]; 4387 #ifdef DEV_NETMAP 4388 if (slot) { 4389 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 4390 uint64_t paddr; 4391 void *addr; 4392 4393 addr = PNMB(na, slot + si, &paddr); 4394 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr); 4395 rxbuf->paddr = paddr; 4396 em_setup_rxdesc(&rxr->rx_base[j], rxbuf); 4397 continue; 4398 } 4399 #endif /* DEV_NETMAP */ 4400 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA, 4401 M_PKTHDR, adapter->rx_mbuf_sz); 4402 if (rxbuf->m_head == NULL) { 4403 error = ENOBUFS; 4404 goto fail; 4405 } 4406 rxbuf->m_head->m_len = adapter->rx_mbuf_sz; 4407 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */ 4408 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz; 4409 4410 /* Get the memory mapping */ 4411 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, 4412 rxbuf->map, rxbuf->m_head, seg, 4413 &nsegs, BUS_DMA_NOWAIT); 4414 if (error != 0) { 4415 m_freem(rxbuf->m_head); 4416 rxbuf->m_head = NULL; 4417 goto fail; 4418 } 4419 bus_dmamap_sync(rxr->rxtag, 4420 rxbuf->map, BUS_DMASYNC_PREREAD); 4421 4422 rxbuf->paddr = seg[0].ds_addr; 4423 em_setup_rxdesc(&rxr->rx_base[j], rxbuf); 4424 } 4425 rxr->next_to_check = 0; 4426 rxr->next_to_refresh = 0; 4427 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4428 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4429 4430 fail: 4431 EM_RX_UNLOCK(rxr); 4432 return (error); 4433 } 4434 4435 /********************************************************************* 4436 * 4437 * Initialize all receive rings. 4438 * 4439 **********************************************************************/ 4440 static int 4441 em_setup_receive_structures(struct adapter *adapter) 4442 { 4443 struct rx_ring *rxr = adapter->rx_rings; 4444 int q; 4445 4446 for (q = 0; q < adapter->num_queues; q++, rxr++) 4447 if (em_setup_receive_ring(rxr)) 4448 goto fail; 4449 4450 return (0); 4451 fail: 4452 /* 4453 * Free RX buffers allocated so far, we will only handle 4454 * the rings that completed, the failing case will have 4455 * cleaned up for itself. 'q' failed, so its the terminus. 4456 */ 4457 for (int i = 0; i < q; ++i) { 4458 rxr = &adapter->rx_rings[i]; 4459 for (int n = 0; n < adapter->num_rx_desc; n++) { 4460 struct em_rxbuffer *rxbuf; 4461 rxbuf = &rxr->rx_buffers[n]; 4462 if (rxbuf->m_head != NULL) { 4463 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4464 BUS_DMASYNC_POSTREAD); 4465 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4466 m_freem(rxbuf->m_head); 4467 rxbuf->m_head = NULL; 4468 } 4469 } 4470 rxr->next_to_check = 0; 4471 rxr->next_to_refresh = 0; 4472 } 4473 4474 return (ENOBUFS); 4475 } 4476 4477 /********************************************************************* 4478 * 4479 * Free all receive rings. 4480 * 4481 **********************************************************************/ 4482 static void 4483 em_free_receive_structures(struct adapter *adapter) 4484 { 4485 struct rx_ring *rxr = adapter->rx_rings; 4486 4487 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 4488 em_free_receive_buffers(rxr); 4489 /* Free the ring memory as well */ 4490 em_dma_free(adapter, &rxr->rxdma); 4491 EM_RX_LOCK_DESTROY(rxr); 4492 } 4493 4494 free(adapter->rx_rings, M_DEVBUF); 4495 } 4496 4497 4498 /********************************************************************* 4499 * 4500 * Free receive ring data structures 4501 * 4502 **********************************************************************/ 4503 static void 4504 em_free_receive_buffers(struct rx_ring *rxr) 4505 { 4506 struct adapter *adapter = rxr->adapter; 4507 struct em_rxbuffer *rxbuf = NULL; 4508 4509 INIT_DEBUGOUT("free_receive_buffers: begin"); 4510 4511 if (rxr->rx_buffers != NULL) { 4512 for (int i = 0; i < adapter->num_rx_desc; i++) { 4513 rxbuf = &rxr->rx_buffers[i]; 4514 if (rxbuf->map != NULL) { 4515 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4516 BUS_DMASYNC_POSTREAD); 4517 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4518 bus_dmamap_destroy(rxr->rxtag, rxbuf->map); 4519 } 4520 if (rxbuf->m_head != NULL) { 4521 m_freem(rxbuf->m_head); 4522 rxbuf->m_head = NULL; 4523 } 4524 } 4525 free(rxr->rx_buffers, M_DEVBUF); 4526 rxr->rx_buffers = NULL; 4527 rxr->next_to_check = 0; 4528 rxr->next_to_refresh = 0; 4529 } 4530 4531 if (rxr->rxtag != NULL) { 4532 bus_dma_tag_destroy(rxr->rxtag); 4533 rxr->rxtag = NULL; 4534 } 4535 4536 return; 4537 } 4538 4539 4540 /********************************************************************* 4541 * 4542 * Enable receive unit. 4543 * 4544 **********************************************************************/ 4545 4546 static void 4547 em_initialize_receive_unit(struct adapter *adapter) 4548 { 4549 struct rx_ring *rxr = adapter->rx_rings; 4550 if_t ifp = adapter->ifp; 4551 struct e1000_hw *hw = &adapter->hw; 4552 u32 rctl, rxcsum, rfctl; 4553 4554 INIT_DEBUGOUT("em_initialize_receive_units: begin"); 4555 4556 /* 4557 * Make sure receives are disabled while setting 4558 * up the descriptor ring 4559 */ 4560 rctl = E1000_READ_REG(hw, E1000_RCTL); 4561 /* Do not disable if ever enabled on this hardware */ 4562 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) 4563 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 4564 4565 /* Setup the Receive Control Register */ 4566 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 4567 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | 4568 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | 4569 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 4570 4571 /* Do not store bad packets */ 4572 rctl &= ~E1000_RCTL_SBP; 4573 4574 /* Enable Long Packet receive */ 4575 if (if_getmtu(ifp) > ETHERMTU) 4576 rctl |= E1000_RCTL_LPE; 4577 else 4578 rctl &= ~E1000_RCTL_LPE; 4579 4580 /* Strip the CRC */ 4581 if (!em_disable_crc_stripping) 4582 rctl |= E1000_RCTL_SECRC; 4583 4584 E1000_WRITE_REG(&adapter->hw, E1000_RADV, 4585 adapter->rx_abs_int_delay.value); 4586 4587 E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 4588 adapter->rx_int_delay.value); 4589 /* 4590 * Set the interrupt throttling rate. Value is calculated 4591 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) 4592 */ 4593 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); 4594 4595 /* Use extended rx descriptor formats */ 4596 rfctl = E1000_READ_REG(hw, E1000_RFCTL); 4597 rfctl |= E1000_RFCTL_EXTEN; 4598 /* 4599 ** When using MSIX interrupts we need to throttle 4600 ** using the EITR register (82574 only) 4601 */ 4602 if (hw->mac.type == e1000_82574) { 4603 for (int i = 0; i < 4; i++) 4604 E1000_WRITE_REG(hw, E1000_EITR_82574(i), 4605 DEFAULT_ITR); 4606 /* Disable accelerated acknowledge */ 4607 rfctl |= E1000_RFCTL_ACK_DIS; 4608 } 4609 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); 4610 4611 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); 4612 if (if_getcapenable(ifp) & IFCAP_RXCSUM) { 4613 #ifdef EM_MULTIQUEUE 4614 rxcsum |= E1000_RXCSUM_TUOFL | 4615 E1000_RXCSUM_IPOFL | 4616 E1000_RXCSUM_PCSD; 4617 #else 4618 rxcsum |= E1000_RXCSUM_TUOFL; 4619 #endif 4620 } else 4621 rxcsum &= ~E1000_RXCSUM_TUOFL; 4622 4623 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); 4624 4625 #ifdef EM_MULTIQUEUE 4626 #define RSSKEYLEN 10 4627 if (adapter->num_queues > 1) { 4628 uint8_t rss_key[4 * RSSKEYLEN]; 4629 uint32_t reta = 0; 4630 int i; 4631 4632 /* 4633 * Configure RSS key 4634 */ 4635 arc4rand(rss_key, sizeof(rss_key), 0); 4636 for (i = 0; i < RSSKEYLEN; ++i) { 4637 uint32_t rssrk = 0; 4638 4639 rssrk = EM_RSSRK_VAL(rss_key, i); 4640 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); 4641 } 4642 4643 /* 4644 * Configure RSS redirect table in following fashion: 4645 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 4646 */ 4647 for (i = 0; i < sizeof(reta); ++i) { 4648 uint32_t q; 4649 4650 q = (i % adapter->num_queues) << 7; 4651 reta |= q << (8 * i); 4652 } 4653 4654 for (i = 0; i < 32; ++i) { 4655 E1000_WRITE_REG(hw, E1000_RETA(i), reta); 4656 } 4657 4658 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 4659 E1000_MRQC_RSS_FIELD_IPV4_TCP | 4660 E1000_MRQC_RSS_FIELD_IPV4 | 4661 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | 4662 E1000_MRQC_RSS_FIELD_IPV6_EX | 4663 E1000_MRQC_RSS_FIELD_IPV6); 4664 } 4665 #endif 4666 /* 4667 ** XXX TEMPORARY WORKAROUND: on some systems with 82573 4668 ** long latencies are observed, like Lenovo X60. This 4669 ** change eliminates the problem, but since having positive 4670 ** values in RDTR is a known source of problems on other 4671 ** platforms another solution is being sought. 4672 */ 4673 if (hw->mac.type == e1000_82573) 4674 E1000_WRITE_REG(hw, E1000_RDTR, 0x20); 4675 4676 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 4677 /* Setup the Base and Length of the Rx Descriptor Ring */ 4678 u64 bus_addr = rxr->rxdma.dma_paddr; 4679 u32 rdt = adapter->num_rx_desc - 1; /* default */ 4680 4681 E1000_WRITE_REG(hw, E1000_RDLEN(i), 4682 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended)); 4683 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); 4684 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); 4685 /* Setup the Head and Tail Descriptor Pointers */ 4686 E1000_WRITE_REG(hw, E1000_RDH(i), 0); 4687 #ifdef DEV_NETMAP 4688 /* 4689 * an init() while a netmap client is active must 4690 * preserve the rx buffers passed to userspace. 4691 */ 4692 if (if_getcapenable(ifp) & IFCAP_NETMAP) { 4693 struct netmap_adapter *na = netmap_getna(adapter->ifp); 4694 rdt -= nm_kr_rxspace(&na->rx_rings[i]); 4695 } 4696 #endif /* DEV_NETMAP */ 4697 E1000_WRITE_REG(hw, E1000_RDT(i), rdt); 4698 } 4699 4700 /* 4701 * Set PTHRESH for improved jumbo performance 4702 * According to 10.2.5.11 of Intel 82574 Datasheet, 4703 * RXDCTL(1) is written whenever RXDCTL(0) is written. 4704 * Only write to RXDCTL(1) if there is a need for different 4705 * settings. 4706 */ 4707 if (((adapter->hw.mac.type == e1000_ich9lan) || 4708 (adapter->hw.mac.type == e1000_pch2lan) || 4709 (adapter->hw.mac.type == e1000_ich10lan)) && 4710 (if_getmtu(ifp) > ETHERMTU)) { 4711 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); 4712 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); 4713 } else if (adapter->hw.mac.type == e1000_82574) { 4714 for (int i = 0; i < adapter->num_queues; i++) { 4715 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 4716 4717 rxdctl |= 0x20; /* PTHRESH */ 4718 rxdctl |= 4 << 8; /* HTHRESH */ 4719 rxdctl |= 4 << 16;/* WTHRESH */ 4720 rxdctl |= 1 << 24; /* Switch to granularity */ 4721 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 4722 } 4723 } 4724 4725 if (adapter->hw.mac.type >= e1000_pch2lan) { 4726 if (if_getmtu(ifp) > ETHERMTU) 4727 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); 4728 else 4729 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); 4730 } 4731 4732 /* Make sure VLAN Filters are off */ 4733 rctl &= ~E1000_RCTL_VFE; 4734 4735 if (adapter->rx_mbuf_sz == MCLBYTES) 4736 rctl |= E1000_RCTL_SZ_2048; 4737 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) 4738 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 4739 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) 4740 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 4741 4742 /* ensure we clear use DTYPE of 00 here */ 4743 rctl &= ~0x00000C00; 4744 /* Write out the settings */ 4745 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 4746 4747 return; 4748 } 4749 4750 4751 /********************************************************************* 4752 * 4753 * This routine executes in interrupt context. It replenishes 4754 * the mbufs in the descriptor and sends data which has been 4755 * dma'ed into host memory to upper layer. 4756 * 4757 * We loop at most count times if count is > 0, or until done if 4758 * count < 0. 4759 * 4760 * For polling we also now return the number of cleaned packets 4761 *********************************************************************/ 4762 static bool 4763 em_rxeof(struct rx_ring *rxr, int count, int *done) 4764 { 4765 struct adapter *adapter = rxr->adapter; 4766 if_t ifp = adapter->ifp; 4767 struct mbuf *mp, *sendmp; 4768 u32 status = 0; 4769 u16 len; 4770 int i, processed, rxdone = 0; 4771 bool eop; 4772 union e1000_rx_desc_extended *cur; 4773 4774 EM_RX_LOCK(rxr); 4775 4776 /* Sync the ring */ 4777 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4778 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4779 4780 4781 #ifdef DEV_NETMAP 4782 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 4783 EM_RX_UNLOCK(rxr); 4784 return (FALSE); 4785 } 4786 #endif /* DEV_NETMAP */ 4787 4788 for (i = rxr->next_to_check, processed = 0; count != 0;) { 4789 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) 4790 break; 4791 4792 cur = &rxr->rx_base[i]; 4793 status = le32toh(cur->wb.upper.status_error); 4794 mp = sendmp = NULL; 4795 4796 if ((status & E1000_RXD_STAT_DD) == 0) 4797 break; 4798 4799 len = le16toh(cur->wb.upper.length); 4800 eop = (status & E1000_RXD_STAT_EOP) != 0; 4801 4802 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) || 4803 (rxr->discard == TRUE)) { 4804 adapter->dropped_pkts++; 4805 ++rxr->rx_discarded; 4806 if (!eop) /* Catch subsequent segs */ 4807 rxr->discard = TRUE; 4808 else 4809 rxr->discard = FALSE; 4810 em_rx_discard(rxr, i); 4811 goto next_desc; 4812 } 4813 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map); 4814 4815 /* Assign correct length to the current fragment */ 4816 mp = rxr->rx_buffers[i].m_head; 4817 mp->m_len = len; 4818 4819 /* Trigger for refresh */ 4820 rxr->rx_buffers[i].m_head = NULL; 4821 4822 /* First segment? */ 4823 if (rxr->fmp == NULL) { 4824 mp->m_pkthdr.len = len; 4825 rxr->fmp = rxr->lmp = mp; 4826 } else { 4827 /* Chain mbuf's together */ 4828 mp->m_flags &= ~M_PKTHDR; 4829 rxr->lmp->m_next = mp; 4830 rxr->lmp = mp; 4831 rxr->fmp->m_pkthdr.len += len; 4832 } 4833 4834 if (eop) { 4835 --count; 4836 sendmp = rxr->fmp; 4837 if_setrcvif(sendmp, ifp); 4838 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 4839 em_receive_checksum(status, sendmp); 4840 #ifndef __NO_STRICT_ALIGNMENT 4841 if (adapter->hw.mac.max_frame_size > 4842 (MCLBYTES - ETHER_ALIGN) && 4843 em_fixup_rx(rxr) != 0) 4844 goto skip; 4845 #endif 4846 if (status & E1000_RXD_STAT_VP) { 4847 if_setvtag(sendmp, 4848 le16toh(cur->wb.upper.vlan)); 4849 sendmp->m_flags |= M_VLANTAG; 4850 } 4851 #ifndef __NO_STRICT_ALIGNMENT 4852 skip: 4853 #endif 4854 rxr->fmp = rxr->lmp = NULL; 4855 } 4856 next_desc: 4857 /* Sync the ring */ 4858 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4859 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4860 4861 /* Zero out the receive descriptors status. */ 4862 cur->wb.upper.status_error &= htole32(~0xFF); 4863 ++rxdone; /* cumulative for POLL */ 4864 ++processed; 4865 4866 /* Advance our pointers to the next descriptor. */ 4867 if (++i == adapter->num_rx_desc) 4868 i = 0; 4869 4870 /* Send to the stack */ 4871 if (sendmp != NULL) { 4872 rxr->next_to_check = i; 4873 EM_RX_UNLOCK(rxr); 4874 if_input(ifp, sendmp); 4875 EM_RX_LOCK(rxr); 4876 i = rxr->next_to_check; 4877 } 4878 4879 /* Only refresh mbufs every 8 descriptors */ 4880 if (processed == 8) { 4881 em_refresh_mbufs(rxr, i); 4882 processed = 0; 4883 } 4884 } 4885 4886 /* Catch any remaining refresh work */ 4887 if (e1000_rx_unrefreshed(rxr)) 4888 em_refresh_mbufs(rxr, i); 4889 4890 rxr->next_to_check = i; 4891 if (done != NULL) 4892 *done = rxdone; 4893 EM_RX_UNLOCK(rxr); 4894 4895 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); 4896 } 4897 4898 static __inline void 4899 em_rx_discard(struct rx_ring *rxr, int i) 4900 { 4901 struct em_rxbuffer *rbuf; 4902 4903 rbuf = &rxr->rx_buffers[i]; 4904 bus_dmamap_unload(rxr->rxtag, rbuf->map); 4905 4906 /* Free any previous pieces */ 4907 if (rxr->fmp != NULL) { 4908 rxr->fmp->m_flags |= M_PKTHDR; 4909 m_freem(rxr->fmp); 4910 rxr->fmp = NULL; 4911 rxr->lmp = NULL; 4912 } 4913 /* 4914 ** Free buffer and allow em_refresh_mbufs() 4915 ** to clean up and recharge buffer. 4916 */ 4917 if (rbuf->m_head) { 4918 m_free(rbuf->m_head); 4919 rbuf->m_head = NULL; 4920 } 4921 return; 4922 } 4923 4924 #ifndef __NO_STRICT_ALIGNMENT 4925 /* 4926 * When jumbo frames are enabled we should realign entire payload on 4927 * architecures with strict alignment. This is serious design mistake of 8254x 4928 * as it nullifies DMA operations. 8254x just allows RX buffer size to be 4929 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its 4930 * payload. On architecures without strict alignment restrictions 8254x still 4931 * performs unaligned memory access which would reduce the performance too. 4932 * To avoid copying over an entire frame to align, we allocate a new mbuf and 4933 * copy ethernet header to the new mbuf. The new mbuf is prepended into the 4934 * existing mbuf chain. 4935 * 4936 * Be aware, best performance of the 8254x is achived only when jumbo frame is 4937 * not used at all on architectures with strict alignment. 4938 */ 4939 static int 4940 em_fixup_rx(struct rx_ring *rxr) 4941 { 4942 struct adapter *adapter = rxr->adapter; 4943 struct mbuf *m, *n; 4944 int error; 4945 4946 error = 0; 4947 m = rxr->fmp; 4948 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { 4949 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); 4950 m->m_data += ETHER_HDR_LEN; 4951 } else { 4952 MGETHDR(n, M_NOWAIT, MT_DATA); 4953 if (n != NULL) { 4954 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); 4955 m->m_data += ETHER_HDR_LEN; 4956 m->m_len -= ETHER_HDR_LEN; 4957 n->m_len = ETHER_HDR_LEN; 4958 M_MOVE_PKTHDR(n, m); 4959 n->m_next = m; 4960 rxr->fmp = n; 4961 } else { 4962 adapter->dropped_pkts++; 4963 m_freem(rxr->fmp); 4964 rxr->fmp = NULL; 4965 error = ENOMEM; 4966 } 4967 } 4968 4969 return (error); 4970 } 4971 #endif 4972 4973 static void 4974 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf) 4975 { 4976 rxd->read.buffer_addr = htole64(rxbuf->paddr); 4977 /* DD bits must be cleared */ 4978 rxd->wb.upper.status_error= 0; 4979 } 4980 4981 /********************************************************************* 4982 * 4983 * Verify that the hardware indicated that the checksum is valid. 4984 * Inform the stack about the status of checksum so that stack 4985 * doesn't spend time verifying the checksum. 4986 * 4987 *********************************************************************/ 4988 static void 4989 em_receive_checksum(uint32_t status, struct mbuf *mp) 4990 { 4991 mp->m_pkthdr.csum_flags = 0; 4992 4993 /* Ignore Checksum bit is set */ 4994 if (status & E1000_RXD_STAT_IXSM) 4995 return; 4996 4997 /* If the IP checksum exists and there is no IP Checksum error */ 4998 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == 4999 E1000_RXD_STAT_IPCS) { 5000 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); 5001 } 5002 5003 /* TCP or UDP checksum */ 5004 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == 5005 E1000_RXD_STAT_TCPCS) { 5006 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 5007 mp->m_pkthdr.csum_data = htons(0xffff); 5008 } 5009 if (status & E1000_RXD_STAT_UDPCS) { 5010 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 5011 mp->m_pkthdr.csum_data = htons(0xffff); 5012 } 5013 } 5014 5015 /* 5016 * This routine is run via an vlan 5017 * config EVENT 5018 */ 5019 static void 5020 em_register_vlan(void *arg, if_t ifp, u16 vtag) 5021 { 5022 struct adapter *adapter = if_getsoftc(ifp); 5023 u32 index, bit; 5024 5025 if ((void*)adapter != arg) /* Not our event */ 5026 return; 5027 5028 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ 5029 return; 5030 5031 EM_CORE_LOCK(adapter); 5032 index = (vtag >> 5) & 0x7F; 5033 bit = vtag & 0x1F; 5034 adapter->shadow_vfta[index] |= (1 << bit); 5035 ++adapter->num_vlans; 5036 /* Re-init to load the changes */ 5037 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 5038 em_init_locked(adapter); 5039 EM_CORE_UNLOCK(adapter); 5040 } 5041 5042 /* 5043 * This routine is run via an vlan 5044 * unconfig EVENT 5045 */ 5046 static void 5047 em_unregister_vlan(void *arg, if_t ifp, u16 vtag) 5048 { 5049 struct adapter *adapter = if_getsoftc(ifp); 5050 u32 index, bit; 5051 5052 if (adapter != arg) 5053 return; 5054 5055 if ((vtag == 0) || (vtag > 4095)) /* Invalid */ 5056 return; 5057 5058 EM_CORE_LOCK(adapter); 5059 index = (vtag >> 5) & 0x7F; 5060 bit = vtag & 0x1F; 5061 adapter->shadow_vfta[index] &= ~(1 << bit); 5062 --adapter->num_vlans; 5063 /* Re-init to load the changes */ 5064 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 5065 em_init_locked(adapter); 5066 EM_CORE_UNLOCK(adapter); 5067 } 5068 5069 static void 5070 em_setup_vlan_hw_support(struct adapter *adapter) 5071 { 5072 struct e1000_hw *hw = &adapter->hw; 5073 u32 reg; 5074 5075 /* 5076 ** We get here thru init_locked, meaning 5077 ** a soft reset, this has already cleared 5078 ** the VFTA and other state, so if there 5079 ** have been no vlan's registered do nothing. 5080 */ 5081 if (adapter->num_vlans == 0) 5082 return; 5083 5084 /* 5085 ** A soft reset zero's out the VFTA, so 5086 ** we need to repopulate it now. 5087 */ 5088 for (int i = 0; i < EM_VFTA_SIZE; i++) 5089 if (adapter->shadow_vfta[i] != 0) 5090 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, 5091 i, adapter->shadow_vfta[i]); 5092 5093 reg = E1000_READ_REG(hw, E1000_CTRL); 5094 reg |= E1000_CTRL_VME; 5095 E1000_WRITE_REG(hw, E1000_CTRL, reg); 5096 5097 /* Enable the Filter Table */ 5098 reg = E1000_READ_REG(hw, E1000_RCTL); 5099 reg &= ~E1000_RCTL_CFIEN; 5100 reg |= E1000_RCTL_VFE; 5101 E1000_WRITE_REG(hw, E1000_RCTL, reg); 5102 } 5103 5104 static void 5105 em_enable_intr(struct adapter *adapter) 5106 { 5107 struct e1000_hw *hw = &adapter->hw; 5108 u32 ims_mask = IMS_ENABLE_MASK; 5109 5110 if (hw->mac.type == e1000_82574) { 5111 E1000_WRITE_REG(hw, EM_EIAC, adapter->ims); 5112 ims_mask |= adapter->ims; 5113 } 5114 E1000_WRITE_REG(hw, E1000_IMS, ims_mask); 5115 } 5116 5117 static void 5118 em_disable_intr(struct adapter *adapter) 5119 { 5120 struct e1000_hw *hw = &adapter->hw; 5121 5122 if (hw->mac.type == e1000_82574) 5123 E1000_WRITE_REG(hw, EM_EIAC, 0); 5124 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 5125 } 5126 5127 /* 5128 * Bit of a misnomer, what this really means is 5129 * to enable OS management of the system... aka 5130 * to disable special hardware management features 5131 */ 5132 static void 5133 em_init_manageability(struct adapter *adapter) 5134 { 5135 /* A shared code workaround */ 5136 #define E1000_82542_MANC2H E1000_MANC2H 5137 if (adapter->has_manage) { 5138 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); 5139 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); 5140 5141 /* disable hardware interception of ARP */ 5142 manc &= ~(E1000_MANC_ARP_EN); 5143 5144 /* enable receiving management packets to the host */ 5145 manc |= E1000_MANC_EN_MNG2HOST; 5146 #define E1000_MNG2HOST_PORT_623 (1 << 5) 5147 #define E1000_MNG2HOST_PORT_664 (1 << 6) 5148 manc2h |= E1000_MNG2HOST_PORT_623; 5149 manc2h |= E1000_MNG2HOST_PORT_664; 5150 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); 5151 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); 5152 } 5153 } 5154 5155 /* 5156 * Give control back to hardware management 5157 * controller if there is one. 5158 */ 5159 static void 5160 em_release_manageability(struct adapter *adapter) 5161 { 5162 if (adapter->has_manage) { 5163 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); 5164 5165 /* re-enable hardware interception of ARP */ 5166 manc |= E1000_MANC_ARP_EN; 5167 manc &= ~E1000_MANC_EN_MNG2HOST; 5168 5169 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); 5170 } 5171 } 5172 5173 /* 5174 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. 5175 * For ASF and Pass Through versions of f/w this means 5176 * that the driver is loaded. For AMT version type f/w 5177 * this means that the network i/f is open. 5178 */ 5179 static void 5180 em_get_hw_control(struct adapter *adapter) 5181 { 5182 u32 ctrl_ext, swsm; 5183 5184 if (adapter->hw.mac.type == e1000_82573) { 5185 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); 5186 E1000_WRITE_REG(&adapter->hw, E1000_SWSM, 5187 swsm | E1000_SWSM_DRV_LOAD); 5188 return; 5189 } 5190 /* else */ 5191 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5192 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, 5193 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 5194 return; 5195 } 5196 5197 /* 5198 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. 5199 * For ASF and Pass Through versions of f/w this means that 5200 * the driver is no longer loaded. For AMT versions of the 5201 * f/w this means that the network i/f is closed. 5202 */ 5203 static void 5204 em_release_hw_control(struct adapter *adapter) 5205 { 5206 u32 ctrl_ext, swsm; 5207 5208 if (!adapter->has_manage) 5209 return; 5210 5211 if (adapter->hw.mac.type == e1000_82573) { 5212 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); 5213 E1000_WRITE_REG(&adapter->hw, E1000_SWSM, 5214 swsm & ~E1000_SWSM_DRV_LOAD); 5215 return; 5216 } 5217 /* else */ 5218 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5219 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, 5220 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 5221 return; 5222 } 5223 5224 static int 5225 em_is_valid_ether_addr(u8 *addr) 5226 { 5227 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; 5228 5229 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { 5230 return (FALSE); 5231 } 5232 5233 return (TRUE); 5234 } 5235 5236 /* 5237 ** Parse the interface capabilities with regard 5238 ** to both system management and wake-on-lan for 5239 ** later use. 5240 */ 5241 static void 5242 em_get_wakeup(device_t dev) 5243 { 5244 struct adapter *adapter = device_get_softc(dev); 5245 u16 eeprom_data = 0, device_id, apme_mask; 5246 5247 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); 5248 apme_mask = EM_EEPROM_APME; 5249 5250 switch (adapter->hw.mac.type) { 5251 case e1000_82573: 5252 case e1000_82583: 5253 adapter->has_amt = TRUE; 5254 /* Falls thru */ 5255 case e1000_82571: 5256 case e1000_82572: 5257 case e1000_80003es2lan: 5258 if (adapter->hw.bus.func == 1) { 5259 e1000_read_nvm(&adapter->hw, 5260 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); 5261 break; 5262 } else 5263 e1000_read_nvm(&adapter->hw, 5264 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 5265 break; 5266 case e1000_ich8lan: 5267 case e1000_ich9lan: 5268 case e1000_ich10lan: 5269 case e1000_pchlan: 5270 case e1000_pch2lan: 5271 apme_mask = E1000_WUC_APME; 5272 adapter->has_amt = TRUE; 5273 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); 5274 break; 5275 default: 5276 e1000_read_nvm(&adapter->hw, 5277 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 5278 break; 5279 } 5280 if (eeprom_data & apme_mask) 5281 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); 5282 /* 5283 * We have the eeprom settings, now apply the special cases 5284 * where the eeprom may be wrong or the board won't support 5285 * wake on lan on a particular port 5286 */ 5287 device_id = pci_get_device(dev); 5288 switch (device_id) { 5289 case E1000_DEV_ID_82571EB_FIBER: 5290 /* Wake events only supported on port A for dual fiber 5291 * regardless of eeprom setting */ 5292 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & 5293 E1000_STATUS_FUNC_1) 5294 adapter->wol = 0; 5295 break; 5296 case E1000_DEV_ID_82571EB_QUAD_COPPER: 5297 case E1000_DEV_ID_82571EB_QUAD_FIBER: 5298 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: 5299 /* if quad port adapter, disable WoL on all but port A */ 5300 if (global_quad_port_a != 0) 5301 adapter->wol = 0; 5302 /* Reset for multiple quad port adapters */ 5303 if (++global_quad_port_a == 4) 5304 global_quad_port_a = 0; 5305 break; 5306 } 5307 return; 5308 } 5309 5310 5311 /* 5312 * Enable PCI Wake On Lan capability 5313 */ 5314 static void 5315 em_enable_wakeup(device_t dev) 5316 { 5317 struct adapter *adapter = device_get_softc(dev); 5318 if_t ifp = adapter->ifp; 5319 u32 pmc, ctrl, ctrl_ext, rctl; 5320 u16 status; 5321 5322 if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) 5323 return; 5324 5325 /* Advertise the wakeup capability */ 5326 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); 5327 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); 5328 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); 5329 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); 5330 5331 if ((adapter->hw.mac.type == e1000_ich8lan) || 5332 (adapter->hw.mac.type == e1000_pchlan) || 5333 (adapter->hw.mac.type == e1000_ich9lan) || 5334 (adapter->hw.mac.type == e1000_ich10lan)) 5335 e1000_suspend_workarounds_ich8lan(&adapter->hw); 5336 5337 /* Keep the laser running on Fiber adapters */ 5338 if (adapter->hw.phy.media_type == e1000_media_type_fiber || 5339 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { 5340 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5341 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; 5342 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); 5343 } 5344 5345 /* 5346 ** Determine type of Wakeup: note that wol 5347 ** is set with all bits on by default. 5348 */ 5349 if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) 5350 adapter->wol &= ~E1000_WUFC_MAG; 5351 5352 if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) 5353 adapter->wol &= ~E1000_WUFC_MC; 5354 else { 5355 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 5356 rctl |= E1000_RCTL_MPE; 5357 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); 5358 } 5359 5360 if ((adapter->hw.mac.type == e1000_pchlan) || 5361 (adapter->hw.mac.type == e1000_pch2lan)) { 5362 if (em_enable_phy_wakeup(adapter)) 5363 return; 5364 } else { 5365 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); 5366 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); 5367 } 5368 5369 if (adapter->hw.phy.type == e1000_phy_igp_3) 5370 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); 5371 5372 /* Request PME */ 5373 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); 5374 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); 5375 if (if_getcapenable(ifp) & IFCAP_WOL) 5376 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 5377 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); 5378 5379 return; 5380 } 5381 5382 /* 5383 ** WOL in the newer chipset interfaces (pchlan) 5384 ** require thing to be copied into the phy 5385 */ 5386 static int 5387 em_enable_phy_wakeup(struct adapter *adapter) 5388 { 5389 struct e1000_hw *hw = &adapter->hw; 5390 u32 mreg, ret = 0; 5391 u16 preg; 5392 5393 /* copy MAC RARs to PHY RARs */ 5394 e1000_copy_rx_addrs_to_phy_ich8lan(hw); 5395 5396 /* copy MAC MTA to PHY MTA */ 5397 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { 5398 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); 5399 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); 5400 e1000_write_phy_reg(hw, BM_MTA(i) + 1, 5401 (u16)((mreg >> 16) & 0xFFFF)); 5402 } 5403 5404 /* configure PHY Rx Control register */ 5405 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); 5406 mreg = E1000_READ_REG(hw, E1000_RCTL); 5407 if (mreg & E1000_RCTL_UPE) 5408 preg |= BM_RCTL_UPE; 5409 if (mreg & E1000_RCTL_MPE) 5410 preg |= BM_RCTL_MPE; 5411 preg &= ~(BM_RCTL_MO_MASK); 5412 if (mreg & E1000_RCTL_MO_3) 5413 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) 5414 << BM_RCTL_MO_SHIFT); 5415 if (mreg & E1000_RCTL_BAM) 5416 preg |= BM_RCTL_BAM; 5417 if (mreg & E1000_RCTL_PMCF) 5418 preg |= BM_RCTL_PMCF; 5419 mreg = E1000_READ_REG(hw, E1000_CTRL); 5420 if (mreg & E1000_CTRL_RFCE) 5421 preg |= BM_RCTL_RFCE; 5422 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); 5423 5424 /* enable PHY wakeup in MAC register */ 5425 E1000_WRITE_REG(hw, E1000_WUC, 5426 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); 5427 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); 5428 5429 /* configure and enable PHY wakeup in PHY registers */ 5430 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); 5431 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); 5432 5433 /* activate PHY wakeup */ 5434 ret = hw->phy.ops.acquire(hw); 5435 if (ret) { 5436 printf("Could not acquire PHY\n"); 5437 return ret; 5438 } 5439 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, 5440 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); 5441 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); 5442 if (ret) { 5443 printf("Could not read PHY page 769\n"); 5444 goto out; 5445 } 5446 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; 5447 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); 5448 if (ret) 5449 printf("Could not set PHY Host Wakeup bit\n"); 5450 out: 5451 hw->phy.ops.release(hw); 5452 5453 return ret; 5454 } 5455 5456 static void 5457 em_led_func(void *arg, int onoff) 5458 { 5459 struct adapter *adapter = arg; 5460 5461 EM_CORE_LOCK(adapter); 5462 if (onoff) { 5463 e1000_setup_led(&adapter->hw); 5464 e1000_led_on(&adapter->hw); 5465 } else { 5466 e1000_led_off(&adapter->hw); 5467 e1000_cleanup_led(&adapter->hw); 5468 } 5469 EM_CORE_UNLOCK(adapter); 5470 } 5471 5472 /* 5473 ** Disable the L0S and L1 LINK states 5474 */ 5475 static void 5476 em_disable_aspm(struct adapter *adapter) 5477 { 5478 int base, reg; 5479 u16 link_cap,link_ctrl; 5480 device_t dev = adapter->dev; 5481 5482 switch (adapter->hw.mac.type) { 5483 case e1000_82573: 5484 case e1000_82574: 5485 case e1000_82583: 5486 break; 5487 default: 5488 return; 5489 } 5490 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) 5491 return; 5492 reg = base + PCIER_LINK_CAP; 5493 link_cap = pci_read_config(dev, reg, 2); 5494 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) 5495 return; 5496 reg = base + PCIER_LINK_CTL; 5497 link_ctrl = pci_read_config(dev, reg, 2); 5498 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; 5499 pci_write_config(dev, reg, link_ctrl, 2); 5500 return; 5501 } 5502 5503 /********************************************************************** 5504 * 5505 * Update the board statistics counters. 5506 * 5507 **********************************************************************/ 5508 static void 5509 em_update_stats_counters(struct adapter *adapter) 5510 { 5511 5512 if(adapter->hw.phy.media_type == e1000_media_type_copper || 5513 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { 5514 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); 5515 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); 5516 } 5517 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); 5518 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); 5519 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); 5520 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); 5521 5522 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); 5523 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); 5524 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); 5525 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); 5526 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); 5527 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); 5528 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); 5529 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); 5530 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); 5531 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); 5532 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); 5533 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); 5534 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); 5535 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); 5536 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); 5537 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); 5538 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); 5539 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); 5540 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); 5541 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); 5542 5543 /* For the 64-bit byte counters the low dword must be read first. */ 5544 /* Both registers clear on the read of the high dword */ 5545 5546 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + 5547 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); 5548 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + 5549 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); 5550 5551 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); 5552 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); 5553 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); 5554 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); 5555 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); 5556 5557 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); 5558 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); 5559 5560 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); 5561 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); 5562 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); 5563 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); 5564 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); 5565 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); 5566 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); 5567 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); 5568 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); 5569 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); 5570 5571 /* Interrupt Counts */ 5572 5573 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); 5574 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); 5575 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); 5576 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); 5577 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); 5578 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); 5579 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); 5580 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); 5581 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); 5582 5583 if (adapter->hw.mac.type >= e1000_82543) { 5584 adapter->stats.algnerrc += 5585 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); 5586 adapter->stats.rxerrc += 5587 E1000_READ_REG(&adapter->hw, E1000_RXERRC); 5588 adapter->stats.tncrs += 5589 E1000_READ_REG(&adapter->hw, E1000_TNCRS); 5590 adapter->stats.cexterr += 5591 E1000_READ_REG(&adapter->hw, E1000_CEXTERR); 5592 adapter->stats.tsctc += 5593 E1000_READ_REG(&adapter->hw, E1000_TSCTC); 5594 adapter->stats.tsctfc += 5595 E1000_READ_REG(&adapter->hw, E1000_TSCTFC); 5596 } 5597 } 5598 5599 static uint64_t 5600 em_get_counter(if_t ifp, ift_counter cnt) 5601 { 5602 struct adapter *adapter; 5603 5604 adapter = if_getsoftc(ifp); 5605 5606 switch (cnt) { 5607 case IFCOUNTER_COLLISIONS: 5608 return (adapter->stats.colc); 5609 case IFCOUNTER_IERRORS: 5610 return (adapter->dropped_pkts + adapter->stats.rxerrc + 5611 adapter->stats.crcerrs + adapter->stats.algnerrc + 5612 adapter->stats.ruc + adapter->stats.roc + 5613 adapter->stats.mpc + adapter->stats.cexterr); 5614 case IFCOUNTER_OERRORS: 5615 return (adapter->stats.ecol + adapter->stats.latecol + 5616 adapter->watchdog_events); 5617 default: 5618 return (if_get_counter_default(ifp, cnt)); 5619 } 5620 } 5621 5622 /* Export a single 32-bit register via a read-only sysctl. */ 5623 static int 5624 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) 5625 { 5626 struct adapter *adapter; 5627 u_int val; 5628 5629 adapter = oidp->oid_arg1; 5630 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); 5631 return (sysctl_handle_int(oidp, &val, 0, req)); 5632 } 5633 5634 /* 5635 * Add sysctl variables, one per statistic, to the system. 5636 */ 5637 static void 5638 em_add_hw_stats(struct adapter *adapter) 5639 { 5640 device_t dev = adapter->dev; 5641 5642 struct tx_ring *txr = adapter->tx_rings; 5643 struct rx_ring *rxr = adapter->rx_rings; 5644 5645 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 5646 struct sysctl_oid *tree = device_get_sysctl_tree(dev); 5647 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 5648 struct e1000_hw_stats *stats = &adapter->stats; 5649 5650 struct sysctl_oid *stat_node, *queue_node, *int_node; 5651 struct sysctl_oid_list *stat_list, *queue_list, *int_list; 5652 5653 #define QUEUE_NAME_LEN 32 5654 char namebuf[QUEUE_NAME_LEN]; 5655 5656 /* Driver Statistics */ 5657 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 5658 CTLFLAG_RD, &adapter->dropped_pkts, 5659 "Driver dropped packets"); 5660 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 5661 CTLFLAG_RD, &adapter->link_irq, 5662 "Link MSIX IRQ Handled"); 5663 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 5664 CTLFLAG_RD, &adapter->mbuf_defrag_failed, 5665 "Defragmenting mbuf chain failed"); 5666 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 5667 CTLFLAG_RD, &adapter->no_tx_dma_setup, 5668 "Driver tx dma failure in xmit"); 5669 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", 5670 CTLFLAG_RD, &adapter->rx_overruns, 5671 "RX overruns"); 5672 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", 5673 CTLFLAG_RD, &adapter->watchdog_events, 5674 "Watchdog timeouts"); 5675 5676 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", 5677 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, 5678 em_sysctl_reg_handler, "IU", 5679 "Device Control Register"); 5680 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", 5681 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, 5682 em_sysctl_reg_handler, "IU", 5683 "Receiver Control Register"); 5684 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", 5685 CTLFLAG_RD, &adapter->hw.fc.high_water, 0, 5686 "Flow Control High Watermark"); 5687 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 5688 CTLFLAG_RD, &adapter->hw.fc.low_water, 0, 5689 "Flow Control Low Watermark"); 5690 5691 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { 5692 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); 5693 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 5694 CTLFLAG_RD, NULL, "TX Queue Name"); 5695 queue_list = SYSCTL_CHILDREN(queue_node); 5696 5697 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 5698 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5699 E1000_TDH(txr->me), 5700 em_sysctl_reg_handler, "IU", 5701 "Transmit Descriptor Head"); 5702 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 5703 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5704 E1000_TDT(txr->me), 5705 em_sysctl_reg_handler, "IU", 5706 "Transmit Descriptor Tail"); 5707 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", 5708 CTLFLAG_RD, &txr->tx_irq, 5709 "Queue MSI-X Transmit Interrupts"); 5710 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 5711 CTLFLAG_RD, &txr->no_desc_avail, 5712 "Queue No Descriptor Available"); 5713 5714 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i); 5715 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 5716 CTLFLAG_RD, NULL, "RX Queue Name"); 5717 queue_list = SYSCTL_CHILDREN(queue_node); 5718 5719 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 5720 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5721 E1000_RDH(rxr->me), 5722 em_sysctl_reg_handler, "IU", 5723 "Receive Descriptor Head"); 5724 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 5725 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5726 E1000_RDT(rxr->me), 5727 em_sysctl_reg_handler, "IU", 5728 "Receive Descriptor Tail"); 5729 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", 5730 CTLFLAG_RD, &rxr->rx_irq, 5731 "Queue MSI-X Receive Interrupts"); 5732 } 5733 5734 /* MAC stats get their own sub node */ 5735 5736 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 5737 CTLFLAG_RD, NULL, "Statistics"); 5738 stat_list = SYSCTL_CHILDREN(stat_node); 5739 5740 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", 5741 CTLFLAG_RD, &stats->ecol, 5742 "Excessive collisions"); 5743 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", 5744 CTLFLAG_RD, &stats->scc, 5745 "Single collisions"); 5746 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 5747 CTLFLAG_RD, &stats->mcc, 5748 "Multiple collisions"); 5749 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", 5750 CTLFLAG_RD, &stats->latecol, 5751 "Late collisions"); 5752 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", 5753 CTLFLAG_RD, &stats->colc, 5754 "Collision Count"); 5755 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", 5756 CTLFLAG_RD, &adapter->stats.symerrs, 5757 "Symbol Errors"); 5758 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", 5759 CTLFLAG_RD, &adapter->stats.sec, 5760 "Sequence Errors"); 5761 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", 5762 CTLFLAG_RD, &adapter->stats.dc, 5763 "Defer Count"); 5764 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", 5765 CTLFLAG_RD, &adapter->stats.mpc, 5766 "Missed Packets"); 5767 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", 5768 CTLFLAG_RD, &adapter->stats.rnbc, 5769 "Receive No Buffers"); 5770 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", 5771 CTLFLAG_RD, &adapter->stats.ruc, 5772 "Receive Undersize"); 5773 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", 5774 CTLFLAG_RD, &adapter->stats.rfc, 5775 "Fragmented Packets Received "); 5776 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", 5777 CTLFLAG_RD, &adapter->stats.roc, 5778 "Oversized Packets Received"); 5779 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", 5780 CTLFLAG_RD, &adapter->stats.rjc, 5781 "Recevied Jabber"); 5782 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", 5783 CTLFLAG_RD, &adapter->stats.rxerrc, 5784 "Receive Errors"); 5785 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", 5786 CTLFLAG_RD, &adapter->stats.crcerrs, 5787 "CRC errors"); 5788 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", 5789 CTLFLAG_RD, &adapter->stats.algnerrc, 5790 "Alignment Errors"); 5791 /* On 82575 these are collision counts */ 5792 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", 5793 CTLFLAG_RD, &adapter->stats.cexterr, 5794 "Collision/Carrier extension errors"); 5795 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", 5796 CTLFLAG_RD, &adapter->stats.xonrxc, 5797 "XON Received"); 5798 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", 5799 CTLFLAG_RD, &adapter->stats.xontxc, 5800 "XON Transmitted"); 5801 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", 5802 CTLFLAG_RD, &adapter->stats.xoffrxc, 5803 "XOFF Received"); 5804 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", 5805 CTLFLAG_RD, &adapter->stats.xofftxc, 5806 "XOFF Transmitted"); 5807 5808 /* Packet Reception Stats */ 5809 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", 5810 CTLFLAG_RD, &adapter->stats.tpr, 5811 "Total Packets Received "); 5812 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", 5813 CTLFLAG_RD, &adapter->stats.gprc, 5814 "Good Packets Received"); 5815 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", 5816 CTLFLAG_RD, &adapter->stats.bprc, 5817 "Broadcast Packets Received"); 5818 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", 5819 CTLFLAG_RD, &adapter->stats.mprc, 5820 "Multicast Packets Received"); 5821 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", 5822 CTLFLAG_RD, &adapter->stats.prc64, 5823 "64 byte frames received "); 5824 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", 5825 CTLFLAG_RD, &adapter->stats.prc127, 5826 "65-127 byte frames received"); 5827 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", 5828 CTLFLAG_RD, &adapter->stats.prc255, 5829 "128-255 byte frames received"); 5830 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", 5831 CTLFLAG_RD, &adapter->stats.prc511, 5832 "256-511 byte frames received"); 5833 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", 5834 CTLFLAG_RD, &adapter->stats.prc1023, 5835 "512-1023 byte frames received"); 5836 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", 5837 CTLFLAG_RD, &adapter->stats.prc1522, 5838 "1023-1522 byte frames received"); 5839 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 5840 CTLFLAG_RD, &adapter->stats.gorc, 5841 "Good Octets Received"); 5842 5843 /* Packet Transmission Stats */ 5844 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 5845 CTLFLAG_RD, &adapter->stats.gotc, 5846 "Good Octets Transmitted"); 5847 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", 5848 CTLFLAG_RD, &adapter->stats.tpt, 5849 "Total Packets Transmitted"); 5850 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", 5851 CTLFLAG_RD, &adapter->stats.gptc, 5852 "Good Packets Transmitted"); 5853 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", 5854 CTLFLAG_RD, &adapter->stats.bptc, 5855 "Broadcast Packets Transmitted"); 5856 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", 5857 CTLFLAG_RD, &adapter->stats.mptc, 5858 "Multicast Packets Transmitted"); 5859 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", 5860 CTLFLAG_RD, &adapter->stats.ptc64, 5861 "64 byte frames transmitted "); 5862 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", 5863 CTLFLAG_RD, &adapter->stats.ptc127, 5864 "65-127 byte frames transmitted"); 5865 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", 5866 CTLFLAG_RD, &adapter->stats.ptc255, 5867 "128-255 byte frames transmitted"); 5868 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", 5869 CTLFLAG_RD, &adapter->stats.ptc511, 5870 "256-511 byte frames transmitted"); 5871 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", 5872 CTLFLAG_RD, &adapter->stats.ptc1023, 5873 "512-1023 byte frames transmitted"); 5874 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", 5875 CTLFLAG_RD, &adapter->stats.ptc1522, 5876 "1024-1522 byte frames transmitted"); 5877 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", 5878 CTLFLAG_RD, &adapter->stats.tsctc, 5879 "TSO Contexts Transmitted"); 5880 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", 5881 CTLFLAG_RD, &adapter->stats.tsctfc, 5882 "TSO Contexts Failed"); 5883 5884 5885 /* Interrupt Stats */ 5886 5887 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 5888 CTLFLAG_RD, NULL, "Interrupt Statistics"); 5889 int_list = SYSCTL_CHILDREN(int_node); 5890 5891 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", 5892 CTLFLAG_RD, &adapter->stats.iac, 5893 "Interrupt Assertion Count"); 5894 5895 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", 5896 CTLFLAG_RD, &adapter->stats.icrxptc, 5897 "Interrupt Cause Rx Pkt Timer Expire Count"); 5898 5899 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", 5900 CTLFLAG_RD, &adapter->stats.icrxatc, 5901 "Interrupt Cause Rx Abs Timer Expire Count"); 5902 5903 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", 5904 CTLFLAG_RD, &adapter->stats.ictxptc, 5905 "Interrupt Cause Tx Pkt Timer Expire Count"); 5906 5907 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", 5908 CTLFLAG_RD, &adapter->stats.ictxatc, 5909 "Interrupt Cause Tx Abs Timer Expire Count"); 5910 5911 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", 5912 CTLFLAG_RD, &adapter->stats.ictxqec, 5913 "Interrupt Cause Tx Queue Empty Count"); 5914 5915 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", 5916 CTLFLAG_RD, &adapter->stats.ictxqmtc, 5917 "Interrupt Cause Tx Queue Min Thresh Count"); 5918 5919 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", 5920 CTLFLAG_RD, &adapter->stats.icrxdmtc, 5921 "Interrupt Cause Rx Desc Min Thresh Count"); 5922 5923 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", 5924 CTLFLAG_RD, &adapter->stats.icrxoc, 5925 "Interrupt Cause Receiver Overrun Count"); 5926 } 5927 5928 /********************************************************************** 5929 * 5930 * This routine provides a way to dump out the adapter eeprom, 5931 * often a useful debug/service tool. This only dumps the first 5932 * 32 words, stuff that matters is in that extent. 5933 * 5934 **********************************************************************/ 5935 static int 5936 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) 5937 { 5938 struct adapter *adapter = (struct adapter *)arg1; 5939 int error; 5940 int result; 5941 5942 result = -1; 5943 error = sysctl_handle_int(oidp, &result, 0, req); 5944 5945 if (error || !req->newptr) 5946 return (error); 5947 5948 /* 5949 * This value will cause a hex dump of the 5950 * first 32 16-bit words of the EEPROM to 5951 * the screen. 5952 */ 5953 if (result == 1) 5954 em_print_nvm_info(adapter); 5955 5956 return (error); 5957 } 5958 5959 static void 5960 em_print_nvm_info(struct adapter *adapter) 5961 { 5962 u16 eeprom_data; 5963 int i, j, row = 0; 5964 5965 /* Its a bit crude, but it gets the job done */ 5966 printf("\nInterface EEPROM Dump:\n"); 5967 printf("Offset\n0x0000 "); 5968 for (i = 0, j = 0; i < 32; i++, j++) { 5969 if (j == 8) { /* Make the offset block */ 5970 j = 0; ++row; 5971 printf("\n0x00%x0 ",row); 5972 } 5973 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); 5974 printf("%04x ", eeprom_data); 5975 } 5976 printf("\n"); 5977 } 5978 5979 static int 5980 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) 5981 { 5982 struct em_int_delay_info *info; 5983 struct adapter *adapter; 5984 u32 regval; 5985 int error, usecs, ticks; 5986 5987 info = (struct em_int_delay_info *)arg1; 5988 usecs = info->value; 5989 error = sysctl_handle_int(oidp, &usecs, 0, req); 5990 if (error != 0 || req->newptr == NULL) 5991 return (error); 5992 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) 5993 return (EINVAL); 5994 info->value = usecs; 5995 ticks = EM_USECS_TO_TICKS(usecs); 5996 if (info->offset == E1000_ITR) /* units are 256ns here */ 5997 ticks *= 4; 5998 5999 adapter = info->adapter; 6000 6001 EM_CORE_LOCK(adapter); 6002 regval = E1000_READ_OFFSET(&adapter->hw, info->offset); 6003 regval = (regval & ~0xffff) | (ticks & 0xffff); 6004 /* Handle a few special cases. */ 6005 switch (info->offset) { 6006 case E1000_RDTR: 6007 break; 6008 case E1000_TIDV: 6009 if (ticks == 0) { 6010 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; 6011 /* Don't write 0 into the TIDV register. */ 6012 regval++; 6013 } else 6014 adapter->txd_cmd |= E1000_TXD_CMD_IDE; 6015 break; 6016 } 6017 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); 6018 EM_CORE_UNLOCK(adapter); 6019 return (0); 6020 } 6021 6022 static void 6023 em_add_int_delay_sysctl(struct adapter *adapter, const char *name, 6024 const char *description, struct em_int_delay_info *info, 6025 int offset, int value) 6026 { 6027 info->adapter = adapter; 6028 info->offset = offset; 6029 info->value = value; 6030 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), 6031 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), 6032 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, 6033 info, 0, em_sysctl_int_delay, "I", description); 6034 } 6035 6036 static void 6037 em_set_sysctl_value(struct adapter *adapter, const char *name, 6038 const char *description, int *limit, int value) 6039 { 6040 *limit = value; 6041 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), 6042 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), 6043 OID_AUTO, name, CTLFLAG_RW, limit, value, description); 6044 } 6045 6046 6047 /* 6048 ** Set flow control using sysctl: 6049 ** Flow control values: 6050 ** 0 - off 6051 ** 1 - rx pause 6052 ** 2 - tx pause 6053 ** 3 - full 6054 */ 6055 static int 6056 em_set_flowcntl(SYSCTL_HANDLER_ARGS) 6057 { 6058 int error; 6059 static int input = 3; /* default is full */ 6060 struct adapter *adapter = (struct adapter *) arg1; 6061 6062 error = sysctl_handle_int(oidp, &input, 0, req); 6063 6064 if ((error) || (req->newptr == NULL)) 6065 return (error); 6066 6067 if (input == adapter->fc) /* no change? */ 6068 return (error); 6069 6070 switch (input) { 6071 case e1000_fc_rx_pause: 6072 case e1000_fc_tx_pause: 6073 case e1000_fc_full: 6074 case e1000_fc_none: 6075 adapter->hw.fc.requested_mode = input; 6076 adapter->fc = input; 6077 break; 6078 default: 6079 /* Do nothing */ 6080 return (error); 6081 } 6082 6083 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; 6084 e1000_force_mac_fc(&adapter->hw); 6085 return (error); 6086 } 6087 6088 /* 6089 ** Manage Energy Efficient Ethernet: 6090 ** Control values: 6091 ** 0/1 - enabled/disabled 6092 */ 6093 static int 6094 em_sysctl_eee(SYSCTL_HANDLER_ARGS) 6095 { 6096 struct adapter *adapter = (struct adapter *) arg1; 6097 int error, value; 6098 6099 value = adapter->hw.dev_spec.ich8lan.eee_disable; 6100 error = sysctl_handle_int(oidp, &value, 0, req); 6101 if (error || req->newptr == NULL) 6102 return (error); 6103 EM_CORE_LOCK(adapter); 6104 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); 6105 em_init_locked(adapter); 6106 EM_CORE_UNLOCK(adapter); 6107 return (0); 6108 } 6109 6110 static int 6111 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) 6112 { 6113 struct adapter *adapter; 6114 int error; 6115 int result; 6116 6117 result = -1; 6118 error = sysctl_handle_int(oidp, &result, 0, req); 6119 6120 if (error || !req->newptr) 6121 return (error); 6122 6123 if (result == 1) { 6124 adapter = (struct adapter *)arg1; 6125 em_print_debug_info(adapter); 6126 } 6127 6128 return (error); 6129 } 6130 6131 /* 6132 ** This routine is meant to be fluid, add whatever is 6133 ** needed for debugging a problem. -jfv 6134 */ 6135 static void 6136 em_print_debug_info(struct adapter *adapter) 6137 { 6138 device_t dev = adapter->dev; 6139 struct tx_ring *txr = adapter->tx_rings; 6140 struct rx_ring *rxr = adapter->rx_rings; 6141 6142 if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) 6143 printf("Interface is RUNNING "); 6144 else 6145 printf("Interface is NOT RUNNING\n"); 6146 6147 if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE) 6148 printf("and INACTIVE\n"); 6149 else 6150 printf("and ACTIVE\n"); 6151 6152 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { 6153 device_printf(dev, "TX Queue %d ------\n", i); 6154 device_printf(dev, "hw tdh = %d, hw tdt = %d\n", 6155 E1000_READ_REG(&adapter->hw, E1000_TDH(i)), 6156 E1000_READ_REG(&adapter->hw, E1000_TDT(i))); 6157 device_printf(dev, "Tx Queue Status = %d\n", txr->busy); 6158 device_printf(dev, "TX descriptors avail = %d\n", 6159 txr->tx_avail); 6160 device_printf(dev, "Tx Descriptors avail failure = %ld\n", 6161 txr->no_desc_avail); 6162 device_printf(dev, "RX Queue %d ------\n", i); 6163 device_printf(dev, "hw rdh = %d, hw rdt = %d\n", 6164 E1000_READ_REG(&adapter->hw, E1000_RDH(i)), 6165 E1000_READ_REG(&adapter->hw, E1000_RDT(i))); 6166 device_printf(dev, "RX discarded packets = %ld\n", 6167 rxr->rx_discarded); 6168 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); 6169 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); 6170 } 6171 } 6172 6173 #ifdef EM_MULTIQUEUE 6174 /* 6175 * 82574 only: 6176 * Write a new value to the EEPROM increasing the number of MSIX 6177 * vectors from 3 to 5, for proper multiqueue support. 6178 */ 6179 static void 6180 em_enable_vectors_82574(struct adapter *adapter) 6181 { 6182 struct e1000_hw *hw = &adapter->hw; 6183 device_t dev = adapter->dev; 6184 u16 edata; 6185 6186 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); 6187 printf("Current cap: %#06x\n", edata); 6188 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { 6189 device_printf(dev, "Writing to eeprom: increasing " 6190 "reported MSIX vectors from 3 to 5...\n"); 6191 edata &= ~(EM_NVM_MSIX_N_MASK); 6192 edata |= 4 << EM_NVM_MSIX_N_SHIFT; 6193 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); 6194 e1000_update_nvm_checksum(hw); 6195 device_printf(dev, "Writing to eeprom: done\n"); 6196 } 6197 } 6198 #endif 6199 6200 #ifdef DDB 6201 DB_COMMAND(em_reset_dev, em_ddb_reset_dev) 6202 { 6203 devclass_t dc; 6204 int max_em; 6205 6206 dc = devclass_find("em"); 6207 max_em = devclass_get_maxunit(dc); 6208 6209 for (int index = 0; index < (max_em - 1); index++) { 6210 device_t dev; 6211 dev = devclass_get_device(dc, index); 6212 if (device_get_driver(dev) == &em_driver) { 6213 struct adapter *adapter = device_get_softc(dev); 6214 EM_CORE_LOCK(adapter); 6215 em_init_locked(adapter); 6216 EM_CORE_UNLOCK(adapter); 6217 } 6218 } 6219 } 6220 DB_COMMAND(em_dump_queue, em_ddb_dump_queue) 6221 { 6222 devclass_t dc; 6223 int max_em; 6224 6225 dc = devclass_find("em"); 6226 max_em = devclass_get_maxunit(dc); 6227 6228 for (int index = 0; index < (max_em - 1); index++) { 6229 device_t dev; 6230 dev = devclass_get_device(dc, index); 6231 if (device_get_driver(dev) == &em_driver) 6232 em_print_debug_info(device_get_softc(dev)); 6233 } 6234 6235 } 6236 #endif 6237