1 /****************************************************************************** 2 3 Copyright (c) 2001-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 #include "opt_em.h" 36 #include "opt_ddb.h" 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 40 #ifdef HAVE_KERNEL_OPTION_HEADERS 41 #include "opt_device_polling.h" 42 #endif 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #ifdef DDB 47 #include <sys/types.h> 48 #include <ddb/ddb.h> 49 #endif 50 #if __FreeBSD_version >= 800000 51 #include <sys/buf_ring.h> 52 #endif 53 #include <sys/bus.h> 54 #include <sys/endian.h> 55 #include <sys/kernel.h> 56 #include <sys/kthread.h> 57 #include <sys/malloc.h> 58 #include <sys/mbuf.h> 59 #include <sys/module.h> 60 #include <sys/rman.h> 61 #include <sys/smp.h> 62 #include <sys/socket.h> 63 #include <sys/sockio.h> 64 #include <sys/sysctl.h> 65 #include <sys/taskqueue.h> 66 #include <sys/eventhandler.h> 67 #include <machine/bus.h> 68 #include <machine/resource.h> 69 70 #include <net/bpf.h> 71 #include <net/ethernet.h> 72 #include <net/if.h> 73 #include <net/if_var.h> 74 #include <net/if_arp.h> 75 #include <net/if_dl.h> 76 #include <net/if_media.h> 77 78 #include <net/if_types.h> 79 #include <net/if_vlan_var.h> 80 81 #include <netinet/in_systm.h> 82 #include <netinet/in.h> 83 #include <netinet/if_ether.h> 84 #include <netinet/ip.h> 85 #include <netinet/ip6.h> 86 #include <netinet/tcp.h> 87 #include <netinet/udp.h> 88 89 #include <machine/in_cksum.h> 90 #include <dev/led/led.h> 91 #include <dev/pci/pcivar.h> 92 #include <dev/pci/pcireg.h> 93 94 #include "e1000_api.h" 95 #include "e1000_82571.h" 96 #include "if_em.h" 97 98 /********************************************************************* 99 * Driver version: 100 *********************************************************************/ 101 char em_driver_version[] = "7.6.1-k"; 102 103 /********************************************************************* 104 * PCI Device ID Table 105 * 106 * Used by probe to select devices to load on 107 * Last field stores an index into e1000_strings 108 * Last entry must be all 0s 109 * 110 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } 111 *********************************************************************/ 112 113 static em_vendor_info_t em_vendor_info_array[] = 114 { 115 /* Intel(R) PRO/1000 Network Connection */ 116 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, 117 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, 118 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, 119 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, 120 PCI_ANY_ID, PCI_ANY_ID, 0}, 121 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, 122 PCI_ANY_ID, PCI_ANY_ID, 0}, 123 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, 124 PCI_ANY_ID, PCI_ANY_ID, 0}, 125 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, 126 PCI_ANY_ID, PCI_ANY_ID, 0}, 127 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, 128 PCI_ANY_ID, PCI_ANY_ID, 0}, 129 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, 130 PCI_ANY_ID, PCI_ANY_ID, 0}, 131 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, 132 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, 133 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, 134 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0}, 135 136 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0}, 137 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 138 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0}, 139 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0}, 140 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, 141 PCI_ANY_ID, PCI_ANY_ID, 0}, 142 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, 143 PCI_ANY_ID, PCI_ANY_ID, 0}, 144 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, 145 PCI_ANY_ID, PCI_ANY_ID, 0}, 146 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, 147 PCI_ANY_ID, PCI_ANY_ID, 0}, 148 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 149 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 150 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, 151 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, 152 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, 153 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, 154 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, 155 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0}, 156 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 157 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 158 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, 159 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, 160 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 161 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, 162 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, 163 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, 164 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0}, 165 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0}, 166 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0}, 167 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 168 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, 169 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 170 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 171 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, 172 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 173 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 174 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0}, 175 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0}, 176 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0}, 177 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 178 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 179 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 180 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 181 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, 182 PCI_ANY_ID, PCI_ANY_ID, 0}, 183 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, 184 PCI_ANY_ID, PCI_ANY_ID, 0}, 185 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0}, 186 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, 187 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0}, 188 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0}, 189 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 190 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 191 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, 192 PCI_ANY_ID, PCI_ANY_ID, 0}, 193 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, 194 { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, 195 PCI_ANY_ID, PCI_ANY_ID, 0}, 196 /* required last entry */ 197 { 0, 0, 0, 0, 0} 198 }; 199 200 /********************************************************************* 201 * Table of branding strings for all supported NICs. 202 *********************************************************************/ 203 204 static char *em_strings[] = { 205 "Intel(R) PRO/1000 Network Connection" 206 }; 207 208 /********************************************************************* 209 * Function prototypes 210 *********************************************************************/ 211 static int em_probe(device_t); 212 static int em_attach(device_t); 213 static int em_detach(device_t); 214 static int em_shutdown(device_t); 215 static int em_suspend(device_t); 216 static int em_resume(device_t); 217 #ifdef EM_MULTIQUEUE 218 static int em_mq_start(if_t, struct mbuf *); 219 static int em_mq_start_locked(if_t, 220 struct tx_ring *); 221 static void em_qflush(if_t); 222 #else 223 static void em_start(if_t); 224 static void em_start_locked(if_t, struct tx_ring *); 225 #endif 226 static int em_ioctl(if_t, u_long, caddr_t); 227 static uint64_t em_get_counter(if_t, ift_counter); 228 static void em_init(void *); 229 static void em_init_locked(struct adapter *); 230 static void em_stop(void *); 231 static void em_media_status(if_t, struct ifmediareq *); 232 static int em_media_change(if_t); 233 static void em_identify_hardware(struct adapter *); 234 static int em_allocate_pci_resources(struct adapter *); 235 static int em_allocate_legacy(struct adapter *); 236 static int em_allocate_msix(struct adapter *); 237 static int em_allocate_queues(struct adapter *); 238 static int em_setup_msix(struct adapter *); 239 static void em_free_pci_resources(struct adapter *); 240 static void em_local_timer(void *); 241 static void em_reset(struct adapter *); 242 static int em_setup_interface(device_t, struct adapter *); 243 static void em_flush_desc_rings(struct adapter *); 244 245 static void em_setup_transmit_structures(struct adapter *); 246 static void em_initialize_transmit_unit(struct adapter *); 247 static int em_allocate_transmit_buffers(struct tx_ring *); 248 static void em_free_transmit_structures(struct adapter *); 249 static void em_free_transmit_buffers(struct tx_ring *); 250 251 static int em_setup_receive_structures(struct adapter *); 252 static int em_allocate_receive_buffers(struct rx_ring *); 253 static void em_initialize_receive_unit(struct adapter *); 254 static void em_free_receive_structures(struct adapter *); 255 static void em_free_receive_buffers(struct rx_ring *); 256 257 static void em_enable_intr(struct adapter *); 258 static void em_disable_intr(struct adapter *); 259 static void em_update_stats_counters(struct adapter *); 260 static void em_add_hw_stats(struct adapter *adapter); 261 static void em_txeof(struct tx_ring *); 262 static bool em_rxeof(struct rx_ring *, int, int *); 263 #ifndef __NO_STRICT_ALIGNMENT 264 static int em_fixup_rx(struct rx_ring *); 265 #endif 266 static void em_setup_rxdesc(union e1000_rx_desc_extended *, 267 const struct em_rxbuffer *rxbuf); 268 static void em_receive_checksum(uint32_t status, struct mbuf *); 269 static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, 270 struct ip *, u32 *, u32 *); 271 static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *, 272 struct tcphdr *, u32 *, u32 *); 273 static void em_set_promisc(struct adapter *); 274 static void em_disable_promisc(struct adapter *); 275 static void em_set_multi(struct adapter *); 276 static void em_update_link_status(struct adapter *); 277 static void em_refresh_mbufs(struct rx_ring *, int); 278 static void em_register_vlan(void *, if_t, u16); 279 static void em_unregister_vlan(void *, if_t, u16); 280 static void em_setup_vlan_hw_support(struct adapter *); 281 static int em_xmit(struct tx_ring *, struct mbuf **); 282 static int em_dma_malloc(struct adapter *, bus_size_t, 283 struct em_dma_alloc *, int); 284 static void em_dma_free(struct adapter *, struct em_dma_alloc *); 285 static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); 286 static void em_print_nvm_info(struct adapter *); 287 static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); 288 static void em_print_debug_info(struct adapter *); 289 static int em_is_valid_ether_addr(u8 *); 290 static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); 291 static void em_add_int_delay_sysctl(struct adapter *, const char *, 292 const char *, struct em_int_delay_info *, int, int); 293 /* Management and WOL Support */ 294 static void em_init_manageability(struct adapter *); 295 static void em_release_manageability(struct adapter *); 296 static void em_get_hw_control(struct adapter *); 297 static void em_release_hw_control(struct adapter *); 298 static void em_get_wakeup(device_t); 299 static void em_enable_wakeup(device_t); 300 static int em_enable_phy_wakeup(struct adapter *); 301 static void em_led_func(void *, int); 302 static void em_disable_aspm(struct adapter *); 303 304 static int em_irq_fast(void *); 305 306 /* MSIX handlers */ 307 static void em_msix_tx(void *); 308 static void em_msix_rx(void *); 309 static void em_msix_link(void *); 310 static void em_handle_tx(void *context, int pending); 311 static void em_handle_rx(void *context, int pending); 312 static void em_handle_link(void *context, int pending); 313 314 #ifdef EM_MULTIQUEUE 315 static void em_enable_vectors_82574(struct adapter *); 316 #endif 317 318 static void em_set_sysctl_value(struct adapter *, const char *, 319 const char *, int *, int); 320 static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); 321 static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); 322 323 static __inline void em_rx_discard(struct rx_ring *, int); 324 325 #ifdef DEVICE_POLLING 326 static poll_handler_t em_poll; 327 #endif /* POLLING */ 328 329 /********************************************************************* 330 * FreeBSD Device Interface Entry Points 331 *********************************************************************/ 332 333 static device_method_t em_methods[] = { 334 /* Device interface */ 335 DEVMETHOD(device_probe, em_probe), 336 DEVMETHOD(device_attach, em_attach), 337 DEVMETHOD(device_detach, em_detach), 338 DEVMETHOD(device_shutdown, em_shutdown), 339 DEVMETHOD(device_suspend, em_suspend), 340 DEVMETHOD(device_resume, em_resume), 341 DEVMETHOD_END 342 }; 343 344 static driver_t em_driver = { 345 "em", em_methods, sizeof(struct adapter), 346 }; 347 348 devclass_t em_devclass; 349 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); 350 MODULE_DEPEND(em, pci, 1, 1, 1); 351 MODULE_DEPEND(em, ether, 1, 1, 1); 352 #ifdef DEV_NETMAP 353 MODULE_DEPEND(em, netmap, 1, 1, 1); 354 #endif /* DEV_NETMAP */ 355 356 /********************************************************************* 357 * Tunable default values. 358 *********************************************************************/ 359 360 #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) 361 #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) 362 #define M_TSO_LEN 66 363 364 #define MAX_INTS_PER_SEC 8000 365 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) 366 367 /* Allow common code without TSO */ 368 #ifndef CSUM_TSO 369 #define CSUM_TSO 0 370 #endif 371 372 #define TSO_WORKAROUND 4 373 374 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); 375 376 static int em_disable_crc_stripping = 0; 377 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, 378 &em_disable_crc_stripping, 0, "Disable CRC Stripping"); 379 380 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); 381 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); 382 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 383 0, "Default transmit interrupt delay in usecs"); 384 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 385 0, "Default receive interrupt delay in usecs"); 386 387 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); 388 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); 389 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, 390 &em_tx_abs_int_delay_dflt, 0, 391 "Default transmit interrupt delay limit in usecs"); 392 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, 393 &em_rx_abs_int_delay_dflt, 0, 394 "Default receive interrupt delay limit in usecs"); 395 396 static int em_rxd = EM_DEFAULT_RXD; 397 static int em_txd = EM_DEFAULT_TXD; 398 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0, 399 "Number of receive descriptors per queue"); 400 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0, 401 "Number of transmit descriptors per queue"); 402 403 static int em_smart_pwr_down = FALSE; 404 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 405 0, "Set to true to leave smart power down enabled on newer adapters"); 406 407 /* Controls whether promiscuous also shows bad packets */ 408 static int em_debug_sbp = FALSE; 409 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, 410 "Show bad packets in promiscuous mode"); 411 412 static int em_enable_msix = TRUE; 413 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0, 414 "Enable MSI-X interrupts"); 415 416 #ifdef EM_MULTIQUEUE 417 static int em_num_queues = 1; 418 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0, 419 "82574 only: Number of queues to configure, 0 indicates autoconfigure"); 420 #endif 421 422 /* 423 ** Global variable to store last used CPU when binding queues 424 ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a 425 ** queue is bound to a cpu. 426 */ 427 static int em_last_bind_cpu = -1; 428 429 /* How many packets rxeof tries to clean at a time */ 430 static int em_rx_process_limit = 100; 431 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, 432 &em_rx_process_limit, 0, 433 "Maximum number of received packets to process " 434 "at a time, -1 means unlimited"); 435 436 /* Energy efficient ethernet - default to OFF */ 437 static int eee_setting = 1; 438 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, 439 "Enable Energy Efficient Ethernet"); 440 441 /* Global used in WOL setup with multiport cards */ 442 static int global_quad_port_a = 0; 443 444 #ifdef DEV_NETMAP /* see ixgbe.c for details */ 445 #include <dev/netmap/if_em_netmap.h> 446 #endif /* DEV_NETMAP */ 447 448 /********************************************************************* 449 * Device identification routine 450 * 451 * em_probe determines if the driver should be loaded on 452 * adapter based on PCI vendor/device id of the adapter. 453 * 454 * return BUS_PROBE_DEFAULT on success, positive on failure 455 *********************************************************************/ 456 457 static int 458 em_probe(device_t dev) 459 { 460 char adapter_name[60]; 461 uint16_t pci_vendor_id = 0; 462 uint16_t pci_device_id = 0; 463 uint16_t pci_subvendor_id = 0; 464 uint16_t pci_subdevice_id = 0; 465 em_vendor_info_t *ent; 466 467 INIT_DEBUGOUT("em_probe: begin"); 468 469 pci_vendor_id = pci_get_vendor(dev); 470 if (pci_vendor_id != EM_VENDOR_ID) 471 return (ENXIO); 472 473 pci_device_id = pci_get_device(dev); 474 pci_subvendor_id = pci_get_subvendor(dev); 475 pci_subdevice_id = pci_get_subdevice(dev); 476 477 ent = em_vendor_info_array; 478 while (ent->vendor_id != 0) { 479 if ((pci_vendor_id == ent->vendor_id) && 480 (pci_device_id == ent->device_id) && 481 482 ((pci_subvendor_id == ent->subvendor_id) || 483 (ent->subvendor_id == PCI_ANY_ID)) && 484 485 ((pci_subdevice_id == ent->subdevice_id) || 486 (ent->subdevice_id == PCI_ANY_ID))) { 487 sprintf(adapter_name, "%s %s", 488 em_strings[ent->index], 489 em_driver_version); 490 device_set_desc_copy(dev, adapter_name); 491 return (BUS_PROBE_DEFAULT); 492 } 493 ent++; 494 } 495 496 return (ENXIO); 497 } 498 499 /********************************************************************* 500 * Device initialization routine 501 * 502 * The attach entry point is called when the driver is being loaded. 503 * This routine identifies the type of hardware, allocates all resources 504 * and initializes the hardware. 505 * 506 * return 0 on success, positive on failure 507 *********************************************************************/ 508 509 static int 510 em_attach(device_t dev) 511 { 512 struct adapter *adapter; 513 struct e1000_hw *hw; 514 int error = 0; 515 516 INIT_DEBUGOUT("em_attach: begin"); 517 518 if (resource_disabled("em", device_get_unit(dev))) { 519 device_printf(dev, "Disabled by device hint\n"); 520 return (ENXIO); 521 } 522 523 adapter = device_get_softc(dev); 524 adapter->dev = adapter->osdep.dev = dev; 525 hw = &adapter->hw; 526 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); 527 528 /* SYSCTL stuff */ 529 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 530 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 531 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 532 em_sysctl_nvm_info, "I", "NVM Information"); 533 534 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 535 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 536 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 537 em_sysctl_debug_info, "I", "Debug Information"); 538 539 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 540 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 541 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 542 em_set_flowcntl, "I", "Flow Control"); 543 544 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); 545 546 /* Determine hardware and mac info */ 547 em_identify_hardware(adapter); 548 549 /* Setup PCI resources */ 550 if (em_allocate_pci_resources(adapter)) { 551 device_printf(dev, "Allocation of PCI resources failed\n"); 552 error = ENXIO; 553 goto err_pci; 554 } 555 556 /* 557 ** For ICH8 and family we need to 558 ** map the flash memory, and this 559 ** must happen after the MAC is 560 ** identified 561 */ 562 if ((hw->mac.type == e1000_ich8lan) || 563 (hw->mac.type == e1000_ich9lan) || 564 (hw->mac.type == e1000_ich10lan) || 565 (hw->mac.type == e1000_pchlan) || 566 (hw->mac.type == e1000_pch2lan) || 567 (hw->mac.type == e1000_pch_lpt)) { 568 int rid = EM_BAR_TYPE_FLASH; 569 adapter->flash = bus_alloc_resource_any(dev, 570 SYS_RES_MEMORY, &rid, RF_ACTIVE); 571 if (adapter->flash == NULL) { 572 device_printf(dev, "Mapping of Flash failed\n"); 573 error = ENXIO; 574 goto err_pci; 575 } 576 /* This is used in the shared code */ 577 hw->flash_address = (u8 *)adapter->flash; 578 adapter->osdep.flash_bus_space_tag = 579 rman_get_bustag(adapter->flash); 580 adapter->osdep.flash_bus_space_handle = 581 rman_get_bushandle(adapter->flash); 582 } 583 /* 584 ** In the new SPT device flash is not a 585 ** separate BAR, rather it is also in BAR0, 586 ** so use the same tag and an offset handle for the 587 ** FLASH read/write macros in the shared code. 588 */ 589 else if (hw->mac.type == e1000_pch_spt) { 590 adapter->osdep.flash_bus_space_tag = 591 adapter->osdep.mem_bus_space_tag; 592 adapter->osdep.flash_bus_space_handle = 593 adapter->osdep.mem_bus_space_handle 594 + E1000_FLASH_BASE_ADDR; 595 } 596 597 /* Do Shared Code initialization */ 598 error = e1000_setup_init_funcs(hw, TRUE); 599 if (error) { 600 device_printf(dev, "Setup of Shared code failed, error %d\n", 601 error); 602 error = ENXIO; 603 goto err_pci; 604 } 605 606 /* 607 * Setup MSI/X or MSI if PCI Express 608 */ 609 adapter->msix = em_setup_msix(adapter); 610 611 e1000_get_bus_info(hw); 612 613 /* Set up some sysctls for the tunable interrupt delays */ 614 em_add_int_delay_sysctl(adapter, "rx_int_delay", 615 "receive interrupt delay in usecs", &adapter->rx_int_delay, 616 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); 617 em_add_int_delay_sysctl(adapter, "tx_int_delay", 618 "transmit interrupt delay in usecs", &adapter->tx_int_delay, 619 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); 620 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", 621 "receive interrupt delay limit in usecs", 622 &adapter->rx_abs_int_delay, 623 E1000_REGISTER(hw, E1000_RADV), 624 em_rx_abs_int_delay_dflt); 625 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", 626 "transmit interrupt delay limit in usecs", 627 &adapter->tx_abs_int_delay, 628 E1000_REGISTER(hw, E1000_TADV), 629 em_tx_abs_int_delay_dflt); 630 em_add_int_delay_sysctl(adapter, "itr", 631 "interrupt delay limit in usecs/4", 632 &adapter->tx_itr, 633 E1000_REGISTER(hw, E1000_ITR), 634 DEFAULT_ITR); 635 636 /* Sysctl for limiting the amount of work done in the taskqueue */ 637 em_set_sysctl_value(adapter, "rx_processing_limit", 638 "max number of rx packets to process", &adapter->rx_process_limit, 639 em_rx_process_limit); 640 641 /* 642 * Validate number of transmit and receive descriptors. It 643 * must not exceed hardware maximum, and must be multiple 644 * of E1000_DBA_ALIGN. 645 */ 646 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || 647 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) { 648 device_printf(dev, "Using %d TX descriptors instead of %d!\n", 649 EM_DEFAULT_TXD, em_txd); 650 adapter->num_tx_desc = EM_DEFAULT_TXD; 651 } else 652 adapter->num_tx_desc = em_txd; 653 654 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 || 655 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { 656 device_printf(dev, "Using %d RX descriptors instead of %d!\n", 657 EM_DEFAULT_RXD, em_rxd); 658 adapter->num_rx_desc = EM_DEFAULT_RXD; 659 } else 660 adapter->num_rx_desc = em_rxd; 661 662 hw->mac.autoneg = DO_AUTO_NEG; 663 hw->phy.autoneg_wait_to_complete = FALSE; 664 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 665 666 /* Copper options */ 667 if (hw->phy.media_type == e1000_media_type_copper) { 668 hw->phy.mdix = AUTO_ALL_MODES; 669 hw->phy.disable_polarity_correction = FALSE; 670 hw->phy.ms_type = EM_MASTER_SLAVE; 671 } 672 673 /* 674 * Set the frame limits assuming 675 * standard ethernet sized frames. 676 */ 677 adapter->hw.mac.max_frame_size = 678 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; 679 680 /* 681 * This controls when hardware reports transmit completion 682 * status. 683 */ 684 hw->mac.report_tx_early = 1; 685 686 /* 687 ** Get queue/ring memory 688 */ 689 if (em_allocate_queues(adapter)) { 690 error = ENOMEM; 691 goto err_pci; 692 } 693 694 /* Allocate multicast array memory. */ 695 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * 696 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); 697 if (adapter->mta == NULL) { 698 device_printf(dev, "Can not allocate multicast setup array\n"); 699 error = ENOMEM; 700 goto err_late; 701 } 702 703 /* Check SOL/IDER usage */ 704 if (e1000_check_reset_block(hw)) 705 device_printf(dev, "PHY reset is blocked" 706 " due to SOL/IDER session.\n"); 707 708 /* Sysctl for setting Energy Efficient Ethernet */ 709 hw->dev_spec.ich8lan.eee_disable = eee_setting; 710 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 711 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 712 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, 713 adapter, 0, em_sysctl_eee, "I", 714 "Disable Energy Efficient Ethernet"); 715 716 /* 717 ** Start from a known state, this is 718 ** important in reading the nvm and 719 ** mac from that. 720 */ 721 e1000_reset_hw(hw); 722 723 724 /* Make sure we have a good EEPROM before we read from it */ 725 if (e1000_validate_nvm_checksum(hw) < 0) { 726 /* 727 ** Some PCI-E parts fail the first check due to 728 ** the link being in sleep state, call it again, 729 ** if it fails a second time its a real issue. 730 */ 731 if (e1000_validate_nvm_checksum(hw) < 0) { 732 device_printf(dev, 733 "The EEPROM Checksum Is Not Valid\n"); 734 error = EIO; 735 goto err_late; 736 } 737 } 738 739 /* Copy the permanent MAC address out of the EEPROM */ 740 if (e1000_read_mac_addr(hw) < 0) { 741 device_printf(dev, "EEPROM read error while reading MAC" 742 " address\n"); 743 error = EIO; 744 goto err_late; 745 } 746 747 if (!em_is_valid_ether_addr(hw->mac.addr)) { 748 device_printf(dev, "Invalid MAC address\n"); 749 error = EIO; 750 goto err_late; 751 } 752 753 /* Disable ULP support */ 754 e1000_disable_ulp_lpt_lp(hw, TRUE); 755 756 /* 757 ** Do interrupt configuration 758 */ 759 if (adapter->msix > 1) /* Do MSIX */ 760 error = em_allocate_msix(adapter); 761 else /* MSI or Legacy */ 762 error = em_allocate_legacy(adapter); 763 if (error) 764 goto err_late; 765 766 /* 767 * Get Wake-on-Lan and Management info for later use 768 */ 769 em_get_wakeup(dev); 770 771 /* Setup OS specific network interface */ 772 if (em_setup_interface(dev, adapter) != 0) 773 goto err_late; 774 775 em_reset(adapter); 776 777 /* Initialize statistics */ 778 em_update_stats_counters(adapter); 779 780 hw->mac.get_link_status = 1; 781 em_update_link_status(adapter); 782 783 /* Register for VLAN events */ 784 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 785 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 786 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 787 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 788 789 em_add_hw_stats(adapter); 790 791 /* Non-AMT based hardware can now take control from firmware */ 792 if (adapter->has_manage && !adapter->has_amt) 793 em_get_hw_control(adapter); 794 795 /* Tell the stack that the interface is not active */ 796 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 797 798 adapter->led_dev = led_create(em_led_func, adapter, 799 device_get_nameunit(dev)); 800 #ifdef DEV_NETMAP 801 em_netmap_attach(adapter); 802 #endif /* DEV_NETMAP */ 803 804 INIT_DEBUGOUT("em_attach: end"); 805 806 return (0); 807 808 err_late: 809 em_free_transmit_structures(adapter); 810 em_free_receive_structures(adapter); 811 em_release_hw_control(adapter); 812 if (adapter->ifp != (void *)NULL) 813 if_free(adapter->ifp); 814 err_pci: 815 em_free_pci_resources(adapter); 816 free(adapter->mta, M_DEVBUF); 817 EM_CORE_LOCK_DESTROY(adapter); 818 819 return (error); 820 } 821 822 /********************************************************************* 823 * Device removal routine 824 * 825 * The detach entry point is called when the driver is being removed. 826 * This routine stops the adapter and deallocates all the resources 827 * that were allocated for driver operation. 828 * 829 * return 0 on success, positive on failure 830 *********************************************************************/ 831 832 static int 833 em_detach(device_t dev) 834 { 835 struct adapter *adapter = device_get_softc(dev); 836 if_t ifp = adapter->ifp; 837 838 INIT_DEBUGOUT("em_detach: begin"); 839 840 /* Make sure VLANS are not using driver */ 841 if (if_vlantrunkinuse(ifp)) { 842 device_printf(dev,"Vlan in use, detach first\n"); 843 return (EBUSY); 844 } 845 846 #ifdef DEVICE_POLLING 847 if (if_getcapenable(ifp) & IFCAP_POLLING) 848 ether_poll_deregister(ifp); 849 #endif 850 851 if (adapter->led_dev != NULL) 852 led_destroy(adapter->led_dev); 853 854 EM_CORE_LOCK(adapter); 855 adapter->in_detach = 1; 856 em_stop(adapter); 857 EM_CORE_UNLOCK(adapter); 858 EM_CORE_LOCK_DESTROY(adapter); 859 860 e1000_phy_hw_reset(&adapter->hw); 861 862 em_release_manageability(adapter); 863 em_release_hw_control(adapter); 864 865 /* Unregister VLAN events */ 866 if (adapter->vlan_attach != NULL) 867 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); 868 if (adapter->vlan_detach != NULL) 869 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 870 871 ether_ifdetach(adapter->ifp); 872 callout_drain(&adapter->timer); 873 874 #ifdef DEV_NETMAP 875 netmap_detach(ifp); 876 #endif /* DEV_NETMAP */ 877 878 em_free_pci_resources(adapter); 879 bus_generic_detach(dev); 880 if_free(ifp); 881 882 em_free_transmit_structures(adapter); 883 em_free_receive_structures(adapter); 884 885 em_release_hw_control(adapter); 886 free(adapter->mta, M_DEVBUF); 887 888 return (0); 889 } 890 891 /********************************************************************* 892 * 893 * Shutdown entry point 894 * 895 **********************************************************************/ 896 897 static int 898 em_shutdown(device_t dev) 899 { 900 return em_suspend(dev); 901 } 902 903 /* 904 * Suspend/resume device methods. 905 */ 906 static int 907 em_suspend(device_t dev) 908 { 909 struct adapter *adapter = device_get_softc(dev); 910 911 EM_CORE_LOCK(adapter); 912 913 em_release_manageability(adapter); 914 em_release_hw_control(adapter); 915 em_enable_wakeup(dev); 916 917 EM_CORE_UNLOCK(adapter); 918 919 return bus_generic_suspend(dev); 920 } 921 922 static int 923 em_resume(device_t dev) 924 { 925 struct adapter *adapter = device_get_softc(dev); 926 struct tx_ring *txr = adapter->tx_rings; 927 if_t ifp = adapter->ifp; 928 929 EM_CORE_LOCK(adapter); 930 if (adapter->hw.mac.type == e1000_pch2lan) 931 e1000_resume_workarounds_pchlan(&adapter->hw); 932 em_init_locked(adapter); 933 em_init_manageability(adapter); 934 935 if ((if_getflags(ifp) & IFF_UP) && 936 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) { 937 for (int i = 0; i < adapter->num_queues; i++, txr++) { 938 EM_TX_LOCK(txr); 939 #ifdef EM_MULTIQUEUE 940 if (!drbr_empty(ifp, txr->br)) 941 em_mq_start_locked(ifp, txr); 942 #else 943 if (!if_sendq_empty(ifp)) 944 em_start_locked(ifp, txr); 945 #endif 946 EM_TX_UNLOCK(txr); 947 } 948 } 949 EM_CORE_UNLOCK(adapter); 950 951 return bus_generic_resume(dev); 952 } 953 954 955 #ifndef EM_MULTIQUEUE 956 static void 957 em_start_locked(if_t ifp, struct tx_ring *txr) 958 { 959 struct adapter *adapter = if_getsoftc(ifp); 960 struct mbuf *m_head; 961 962 EM_TX_LOCK_ASSERT(txr); 963 964 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 965 IFF_DRV_RUNNING) 966 return; 967 968 if (!adapter->link_active) 969 return; 970 971 while (!if_sendq_empty(ifp)) { 972 /* Call cleanup if number of TX descriptors low */ 973 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) 974 em_txeof(txr); 975 if (txr->tx_avail < EM_MAX_SCATTER) { 976 if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0); 977 break; 978 } 979 m_head = if_dequeue(ifp); 980 if (m_head == NULL) 981 break; 982 /* 983 * Encapsulation can modify our pointer, and or make it 984 * NULL on failure. In that event, we can't requeue. 985 */ 986 if (em_xmit(txr, &m_head)) { 987 if (m_head == NULL) 988 break; 989 if_sendq_prepend(ifp, m_head); 990 break; 991 } 992 993 /* Mark the queue as having work */ 994 if (txr->busy == EM_TX_IDLE) 995 txr->busy = EM_TX_BUSY; 996 997 /* Send a copy of the frame to the BPF listener */ 998 ETHER_BPF_MTAP(ifp, m_head); 999 1000 } 1001 1002 return; 1003 } 1004 1005 static void 1006 em_start(if_t ifp) 1007 { 1008 struct adapter *adapter = if_getsoftc(ifp); 1009 struct tx_ring *txr = adapter->tx_rings; 1010 1011 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1012 EM_TX_LOCK(txr); 1013 em_start_locked(ifp, txr); 1014 EM_TX_UNLOCK(txr); 1015 } 1016 return; 1017 } 1018 #else /* EM_MULTIQUEUE */ 1019 /********************************************************************* 1020 * Multiqueue Transmit routines 1021 * 1022 * em_mq_start is called by the stack to initiate a transmit. 1023 * however, if busy the driver can queue the request rather 1024 * than do an immediate send. It is this that is an advantage 1025 * in this driver, rather than also having multiple tx queues. 1026 **********************************************************************/ 1027 /* 1028 ** Multiqueue capable stack interface 1029 */ 1030 static int 1031 em_mq_start(if_t ifp, struct mbuf *m) 1032 { 1033 struct adapter *adapter = if_getsoftc(ifp); 1034 struct tx_ring *txr = adapter->tx_rings; 1035 unsigned int i, error; 1036 1037 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 1038 i = m->m_pkthdr.flowid % adapter->num_queues; 1039 else 1040 i = curcpu % adapter->num_queues; 1041 1042 txr = &adapter->tx_rings[i]; 1043 1044 error = drbr_enqueue(ifp, txr->br, m); 1045 if (error) 1046 return (error); 1047 1048 if (EM_TX_TRYLOCK(txr)) { 1049 em_mq_start_locked(ifp, txr); 1050 EM_TX_UNLOCK(txr); 1051 } else 1052 taskqueue_enqueue(txr->tq, &txr->tx_task); 1053 1054 return (0); 1055 } 1056 1057 static int 1058 em_mq_start_locked(if_t ifp, struct tx_ring *txr) 1059 { 1060 struct adapter *adapter = txr->adapter; 1061 struct mbuf *next; 1062 int err = 0, enq = 0; 1063 1064 EM_TX_LOCK_ASSERT(txr); 1065 1066 if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) || 1067 adapter->link_active == 0) { 1068 return (ENETDOWN); 1069 } 1070 1071 /* Process the queue */ 1072 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 1073 if ((err = em_xmit(txr, &next)) != 0) { 1074 if (next == NULL) { 1075 /* It was freed, move forward */ 1076 drbr_advance(ifp, txr->br); 1077 } else { 1078 /* 1079 * Still have one left, it may not be 1080 * the same since the transmit function 1081 * may have changed it. 1082 */ 1083 drbr_putback(ifp, txr->br, next); 1084 } 1085 break; 1086 } 1087 drbr_advance(ifp, txr->br); 1088 enq++; 1089 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); 1090 if (next->m_flags & M_MCAST) 1091 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 1092 ETHER_BPF_MTAP(ifp, next); 1093 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) 1094 break; 1095 } 1096 1097 /* Mark the queue as having work */ 1098 if ((enq > 0) && (txr->busy == EM_TX_IDLE)) 1099 txr->busy = EM_TX_BUSY; 1100 1101 if (txr->tx_avail < EM_MAX_SCATTER) 1102 em_txeof(txr); 1103 if (txr->tx_avail < EM_MAX_SCATTER) { 1104 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0); 1105 } 1106 return (err); 1107 } 1108 1109 /* 1110 ** Flush all ring buffers 1111 */ 1112 static void 1113 em_qflush(if_t ifp) 1114 { 1115 struct adapter *adapter = if_getsoftc(ifp); 1116 struct tx_ring *txr = adapter->tx_rings; 1117 struct mbuf *m; 1118 1119 for (int i = 0; i < adapter->num_queues; i++, txr++) { 1120 EM_TX_LOCK(txr); 1121 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 1122 m_freem(m); 1123 EM_TX_UNLOCK(txr); 1124 } 1125 if_qflush(ifp); 1126 } 1127 #endif /* EM_MULTIQUEUE */ 1128 1129 /********************************************************************* 1130 * Ioctl entry point 1131 * 1132 * em_ioctl is called when the user wants to configure the 1133 * interface. 1134 * 1135 * return 0 on success, positive on failure 1136 **********************************************************************/ 1137 1138 static int 1139 em_ioctl(if_t ifp, u_long command, caddr_t data) 1140 { 1141 struct adapter *adapter = if_getsoftc(ifp); 1142 struct ifreq *ifr = (struct ifreq *)data; 1143 #if defined(INET) || defined(INET6) 1144 struct ifaddr *ifa = (struct ifaddr *)data; 1145 #endif 1146 bool avoid_reset = FALSE; 1147 int error = 0; 1148 1149 if (adapter->in_detach) 1150 return (error); 1151 1152 switch (command) { 1153 case SIOCSIFADDR: 1154 #ifdef INET 1155 if (ifa->ifa_addr->sa_family == AF_INET) 1156 avoid_reset = TRUE; 1157 #endif 1158 #ifdef INET6 1159 if (ifa->ifa_addr->sa_family == AF_INET6) 1160 avoid_reset = TRUE; 1161 #endif 1162 /* 1163 ** Calling init results in link renegotiation, 1164 ** so we avoid doing it when possible. 1165 */ 1166 if (avoid_reset) { 1167 if_setflagbits(ifp,IFF_UP,0); 1168 if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) 1169 em_init(adapter); 1170 #ifdef INET 1171 if (!(if_getflags(ifp) & IFF_NOARP)) 1172 arp_ifinit(ifp, ifa); 1173 #endif 1174 } else 1175 error = ether_ioctl(ifp, command, data); 1176 break; 1177 case SIOCSIFMTU: 1178 { 1179 int max_frame_size; 1180 1181 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); 1182 1183 EM_CORE_LOCK(adapter); 1184 switch (adapter->hw.mac.type) { 1185 case e1000_82571: 1186 case e1000_82572: 1187 case e1000_ich9lan: 1188 case e1000_ich10lan: 1189 case e1000_pch2lan: 1190 case e1000_pch_lpt: 1191 case e1000_pch_spt: 1192 case e1000_82574: 1193 case e1000_82583: 1194 case e1000_80003es2lan: /* 9K Jumbo Frame size */ 1195 max_frame_size = 9234; 1196 break; 1197 case e1000_pchlan: 1198 max_frame_size = 4096; 1199 break; 1200 /* Adapters that do not support jumbo frames */ 1201 case e1000_ich8lan: 1202 max_frame_size = ETHER_MAX_LEN; 1203 break; 1204 default: 1205 max_frame_size = MAX_JUMBO_FRAME_SIZE; 1206 } 1207 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 1208 ETHER_CRC_LEN) { 1209 EM_CORE_UNLOCK(adapter); 1210 error = EINVAL; 1211 break; 1212 } 1213 1214 if_setmtu(ifp, ifr->ifr_mtu); 1215 adapter->hw.mac.max_frame_size = 1216 if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN; 1217 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) 1218 em_init_locked(adapter); 1219 EM_CORE_UNLOCK(adapter); 1220 break; 1221 } 1222 case SIOCSIFFLAGS: 1223 IOCTL_DEBUGOUT("ioctl rcv'd:\ 1224 SIOCSIFFLAGS (Set Interface Flags)"); 1225 EM_CORE_LOCK(adapter); 1226 if (if_getflags(ifp) & IFF_UP) { 1227 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1228 if ((if_getflags(ifp) ^ adapter->if_flags) & 1229 (IFF_PROMISC | IFF_ALLMULTI)) { 1230 em_disable_promisc(adapter); 1231 em_set_promisc(adapter); 1232 } 1233 } else 1234 em_init_locked(adapter); 1235 } else 1236 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) 1237 em_stop(adapter); 1238 adapter->if_flags = if_getflags(ifp); 1239 EM_CORE_UNLOCK(adapter); 1240 break; 1241 case SIOCADDMULTI: 1242 case SIOCDELMULTI: 1243 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); 1244 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1245 EM_CORE_LOCK(adapter); 1246 em_disable_intr(adapter); 1247 em_set_multi(adapter); 1248 #ifdef DEVICE_POLLING 1249 if (!(if_getcapenable(ifp) & IFCAP_POLLING)) 1250 #endif 1251 em_enable_intr(adapter); 1252 EM_CORE_UNLOCK(adapter); 1253 } 1254 break; 1255 case SIOCSIFMEDIA: 1256 /* Check SOL/IDER usage */ 1257 EM_CORE_LOCK(adapter); 1258 if (e1000_check_reset_block(&adapter->hw)) { 1259 EM_CORE_UNLOCK(adapter); 1260 device_printf(adapter->dev, "Media change is" 1261 " blocked due to SOL/IDER session.\n"); 1262 break; 1263 } 1264 EM_CORE_UNLOCK(adapter); 1265 /* falls thru */ 1266 case SIOCGIFMEDIA: 1267 IOCTL_DEBUGOUT("ioctl rcv'd: \ 1268 SIOCxIFMEDIA (Get/Set Interface Media)"); 1269 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); 1270 break; 1271 case SIOCSIFCAP: 1272 { 1273 int mask, reinit; 1274 1275 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); 1276 reinit = 0; 1277 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); 1278 #ifdef DEVICE_POLLING 1279 if (mask & IFCAP_POLLING) { 1280 if (ifr->ifr_reqcap & IFCAP_POLLING) { 1281 error = ether_poll_register(em_poll, ifp); 1282 if (error) 1283 return (error); 1284 EM_CORE_LOCK(adapter); 1285 em_disable_intr(adapter); 1286 if_setcapenablebit(ifp, IFCAP_POLLING, 0); 1287 EM_CORE_UNLOCK(adapter); 1288 } else { 1289 error = ether_poll_deregister(ifp); 1290 /* Enable interrupt even in error case */ 1291 EM_CORE_LOCK(adapter); 1292 em_enable_intr(adapter); 1293 if_setcapenablebit(ifp, 0, IFCAP_POLLING); 1294 EM_CORE_UNLOCK(adapter); 1295 } 1296 } 1297 #endif 1298 if (mask & IFCAP_HWCSUM) { 1299 if_togglecapenable(ifp,IFCAP_HWCSUM); 1300 reinit = 1; 1301 } 1302 if (mask & IFCAP_TSO4) { 1303 if_togglecapenable(ifp,IFCAP_TSO4); 1304 reinit = 1; 1305 } 1306 if (mask & IFCAP_VLAN_HWTAGGING) { 1307 if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING); 1308 reinit = 1; 1309 } 1310 if (mask & IFCAP_VLAN_HWFILTER) { 1311 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER); 1312 reinit = 1; 1313 } 1314 if (mask & IFCAP_VLAN_HWTSO) { 1315 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); 1316 reinit = 1; 1317 } 1318 if ((mask & IFCAP_WOL) && 1319 (if_getcapabilities(ifp) & IFCAP_WOL) != 0) { 1320 if (mask & IFCAP_WOL_MCAST) 1321 if_togglecapenable(ifp, IFCAP_WOL_MCAST); 1322 if (mask & IFCAP_WOL_MAGIC) 1323 if_togglecapenable(ifp, IFCAP_WOL_MAGIC); 1324 } 1325 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 1326 em_init(adapter); 1327 if_vlancap(ifp); 1328 break; 1329 } 1330 1331 default: 1332 error = ether_ioctl(ifp, command, data); 1333 break; 1334 } 1335 1336 return (error); 1337 } 1338 1339 1340 /********************************************************************* 1341 * Init entry point 1342 * 1343 * This routine is used in two ways. It is used by the stack as 1344 * init entry point in network interface structure. It is also used 1345 * by the driver as a hw/sw initialization routine to get to a 1346 * consistent state. 1347 * 1348 * return 0 on success, positive on failure 1349 **********************************************************************/ 1350 1351 static void 1352 em_init_locked(struct adapter *adapter) 1353 { 1354 if_t ifp = adapter->ifp; 1355 device_t dev = adapter->dev; 1356 1357 INIT_DEBUGOUT("em_init: begin"); 1358 1359 EM_CORE_LOCK_ASSERT(adapter); 1360 1361 em_disable_intr(adapter); 1362 callout_stop(&adapter->timer); 1363 1364 /* Get the latest mac address, User can use a LAA */ 1365 bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr, 1366 ETHER_ADDR_LEN); 1367 1368 /* Put the address into the Receive Address Array */ 1369 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); 1370 1371 /* 1372 * With the 82571 adapter, RAR[0] may be overwritten 1373 * when the other port is reset, we make a duplicate 1374 * in RAR[14] for that eventuality, this assures 1375 * the interface continues to function. 1376 */ 1377 if (adapter->hw.mac.type == e1000_82571) { 1378 e1000_set_laa_state_82571(&adapter->hw, TRUE); 1379 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 1380 E1000_RAR_ENTRIES - 1); 1381 } 1382 1383 /* Initialize the hardware */ 1384 em_reset(adapter); 1385 em_update_link_status(adapter); 1386 1387 /* Setup VLAN support, basic and offload if available */ 1388 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); 1389 1390 /* Set hardware offload abilities */ 1391 if_clearhwassist(ifp); 1392 if (if_getcapenable(ifp) & IFCAP_TXCSUM) 1393 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0); 1394 /* 1395 ** There have proven to be problems with TSO when not 1396 ** at full gigabit speed, so disable the assist automatically 1397 ** when at lower speeds. -jfv 1398 */ 1399 if (if_getcapenable(ifp) & IFCAP_TSO4) { 1400 if (adapter->link_speed == SPEED_1000) 1401 if_sethwassistbits(ifp, CSUM_TSO, 0); 1402 } 1403 1404 /* Configure for OS presence */ 1405 em_init_manageability(adapter); 1406 1407 /* Prepare transmit descriptors and buffers */ 1408 em_setup_transmit_structures(adapter); 1409 em_initialize_transmit_unit(adapter); 1410 1411 /* Setup Multicast table */ 1412 em_set_multi(adapter); 1413 1414 /* 1415 ** Figure out the desired mbuf 1416 ** pool for doing jumbos 1417 */ 1418 if (adapter->hw.mac.max_frame_size <= 2048) 1419 adapter->rx_mbuf_sz = MCLBYTES; 1420 else if (adapter->hw.mac.max_frame_size <= 4096) 1421 adapter->rx_mbuf_sz = MJUMPAGESIZE; 1422 else 1423 adapter->rx_mbuf_sz = MJUM9BYTES; 1424 1425 /* Prepare receive descriptors and buffers */ 1426 if (em_setup_receive_structures(adapter)) { 1427 device_printf(dev, "Could not setup receive structures\n"); 1428 em_stop(adapter); 1429 return; 1430 } 1431 em_initialize_receive_unit(adapter); 1432 1433 /* Use real VLAN Filter support? */ 1434 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { 1435 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 1436 /* Use real VLAN Filter support */ 1437 em_setup_vlan_hw_support(adapter); 1438 else { 1439 u32 ctrl; 1440 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); 1441 ctrl |= E1000_CTRL_VME; 1442 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); 1443 } 1444 } 1445 1446 /* Don't lose promiscuous settings */ 1447 em_set_promisc(adapter); 1448 1449 /* Set the interface as ACTIVE */ 1450 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 1451 1452 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 1453 e1000_clear_hw_cntrs_base_generic(&adapter->hw); 1454 1455 /* MSI/X configuration for 82574 */ 1456 if (adapter->hw.mac.type == e1000_82574) { 1457 int tmp; 1458 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 1459 tmp |= E1000_CTRL_EXT_PBA_CLR; 1460 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); 1461 /* Set the IVAR - interrupt vector routing. */ 1462 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); 1463 } 1464 1465 #ifdef DEVICE_POLLING 1466 /* 1467 * Only enable interrupts if we are not polling, make sure 1468 * they are off otherwise. 1469 */ 1470 if (if_getcapenable(ifp) & IFCAP_POLLING) 1471 em_disable_intr(adapter); 1472 else 1473 #endif /* DEVICE_POLLING */ 1474 em_enable_intr(adapter); 1475 1476 /* AMT based hardware can now take control from firmware */ 1477 if (adapter->has_manage && adapter->has_amt) 1478 em_get_hw_control(adapter); 1479 } 1480 1481 static void 1482 em_init(void *arg) 1483 { 1484 struct adapter *adapter = arg; 1485 1486 EM_CORE_LOCK(adapter); 1487 em_init_locked(adapter); 1488 EM_CORE_UNLOCK(adapter); 1489 } 1490 1491 1492 #ifdef DEVICE_POLLING 1493 /********************************************************************* 1494 * 1495 * Legacy polling routine: note this only works with single queue 1496 * 1497 *********************************************************************/ 1498 static int 1499 em_poll(if_t ifp, enum poll_cmd cmd, int count) 1500 { 1501 struct adapter *adapter = if_getsoftc(ifp); 1502 struct tx_ring *txr = adapter->tx_rings; 1503 struct rx_ring *rxr = adapter->rx_rings; 1504 u32 reg_icr; 1505 int rx_done; 1506 1507 EM_CORE_LOCK(adapter); 1508 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 1509 EM_CORE_UNLOCK(adapter); 1510 return (0); 1511 } 1512 1513 if (cmd == POLL_AND_CHECK_STATUS) { 1514 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1515 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1516 callout_stop(&adapter->timer); 1517 adapter->hw.mac.get_link_status = 1; 1518 em_update_link_status(adapter); 1519 callout_reset(&adapter->timer, hz, 1520 em_local_timer, adapter); 1521 } 1522 } 1523 EM_CORE_UNLOCK(adapter); 1524 1525 em_rxeof(rxr, count, &rx_done); 1526 1527 EM_TX_LOCK(txr); 1528 em_txeof(txr); 1529 #ifdef EM_MULTIQUEUE 1530 if (!drbr_empty(ifp, txr->br)) 1531 em_mq_start_locked(ifp, txr); 1532 #else 1533 if (!if_sendq_empty(ifp)) 1534 em_start_locked(ifp, txr); 1535 #endif 1536 EM_TX_UNLOCK(txr); 1537 1538 return (rx_done); 1539 } 1540 #endif /* DEVICE_POLLING */ 1541 1542 1543 /********************************************************************* 1544 * 1545 * Fast Legacy/MSI Combined Interrupt Service routine 1546 * 1547 *********************************************************************/ 1548 static int 1549 em_irq_fast(void *arg) 1550 { 1551 struct adapter *adapter = arg; 1552 if_t ifp; 1553 u32 reg_icr; 1554 1555 ifp = adapter->ifp; 1556 1557 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1558 1559 /* Hot eject? */ 1560 if (reg_icr == 0xffffffff) 1561 return FILTER_STRAY; 1562 1563 /* Definitely not our interrupt. */ 1564 if (reg_icr == 0x0) 1565 return FILTER_STRAY; 1566 1567 /* 1568 * Starting with the 82571 chip, bit 31 should be used to 1569 * determine whether the interrupt belongs to us. 1570 */ 1571 if (adapter->hw.mac.type >= e1000_82571 && 1572 (reg_icr & E1000_ICR_INT_ASSERTED) == 0) 1573 return FILTER_STRAY; 1574 1575 em_disable_intr(adapter); 1576 taskqueue_enqueue(adapter->tq, &adapter->que_task); 1577 1578 /* Link status change */ 1579 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1580 adapter->hw.mac.get_link_status = 1; 1581 taskqueue_enqueue(taskqueue_fast, &adapter->link_task); 1582 } 1583 1584 if (reg_icr & E1000_ICR_RXO) 1585 adapter->rx_overruns++; 1586 return FILTER_HANDLED; 1587 } 1588 1589 /* Combined RX/TX handler, used by Legacy and MSI */ 1590 static void 1591 em_handle_que(void *context, int pending) 1592 { 1593 struct adapter *adapter = context; 1594 if_t ifp = adapter->ifp; 1595 struct tx_ring *txr = adapter->tx_rings; 1596 struct rx_ring *rxr = adapter->rx_rings; 1597 1598 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1599 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1600 1601 EM_TX_LOCK(txr); 1602 em_txeof(txr); 1603 #ifdef EM_MULTIQUEUE 1604 if (!drbr_empty(ifp, txr->br)) 1605 em_mq_start_locked(ifp, txr); 1606 #else 1607 if (!if_sendq_empty(ifp)) 1608 em_start_locked(ifp, txr); 1609 #endif 1610 EM_TX_UNLOCK(txr); 1611 if (more) { 1612 taskqueue_enqueue(adapter->tq, &adapter->que_task); 1613 return; 1614 } 1615 } 1616 1617 em_enable_intr(adapter); 1618 return; 1619 } 1620 1621 1622 /********************************************************************* 1623 * 1624 * MSIX Interrupt Service Routines 1625 * 1626 **********************************************************************/ 1627 static void 1628 em_msix_tx(void *arg) 1629 { 1630 struct tx_ring *txr = arg; 1631 struct adapter *adapter = txr->adapter; 1632 if_t ifp = adapter->ifp; 1633 1634 ++txr->tx_irq; 1635 EM_TX_LOCK(txr); 1636 em_txeof(txr); 1637 #ifdef EM_MULTIQUEUE 1638 if (!drbr_empty(ifp, txr->br)) 1639 em_mq_start_locked(ifp, txr); 1640 #else 1641 if (!if_sendq_empty(ifp)) 1642 em_start_locked(ifp, txr); 1643 #endif 1644 1645 /* Reenable this interrupt */ 1646 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); 1647 EM_TX_UNLOCK(txr); 1648 return; 1649 } 1650 1651 /********************************************************************* 1652 * 1653 * MSIX RX Interrupt Service routine 1654 * 1655 **********************************************************************/ 1656 1657 static void 1658 em_msix_rx(void *arg) 1659 { 1660 struct rx_ring *rxr = arg; 1661 struct adapter *adapter = rxr->adapter; 1662 bool more; 1663 1664 ++rxr->rx_irq; 1665 if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)) 1666 return; 1667 more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1668 if (more) 1669 taskqueue_enqueue(rxr->tq, &rxr->rx_task); 1670 else { 1671 /* Reenable this interrupt */ 1672 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); 1673 } 1674 return; 1675 } 1676 1677 /********************************************************************* 1678 * 1679 * MSIX Link Fast Interrupt Service routine 1680 * 1681 **********************************************************************/ 1682 static void 1683 em_msix_link(void *arg) 1684 { 1685 struct adapter *adapter = arg; 1686 u32 reg_icr; 1687 1688 ++adapter->link_irq; 1689 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1690 1691 if (reg_icr & E1000_ICR_RXO) 1692 adapter->rx_overruns++; 1693 1694 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1695 adapter->hw.mac.get_link_status = 1; 1696 em_handle_link(adapter, 0); 1697 } else 1698 E1000_WRITE_REG(&adapter->hw, E1000_IMS, 1699 EM_MSIX_LINK | E1000_IMS_LSC); 1700 /* 1701 ** Because we must read the ICR for this interrupt 1702 ** it may clear other causes using autoclear, for 1703 ** this reason we simply create a soft interrupt 1704 ** for all these vectors. 1705 */ 1706 if (reg_icr) { 1707 E1000_WRITE_REG(&adapter->hw, 1708 E1000_ICS, adapter->ims); 1709 } 1710 return; 1711 } 1712 1713 static void 1714 em_handle_rx(void *context, int pending) 1715 { 1716 struct rx_ring *rxr = context; 1717 struct adapter *adapter = rxr->adapter; 1718 bool more; 1719 1720 more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1721 if (more) 1722 taskqueue_enqueue(rxr->tq, &rxr->rx_task); 1723 else { 1724 /* Reenable this interrupt */ 1725 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); 1726 } 1727 } 1728 1729 static void 1730 em_handle_tx(void *context, int pending) 1731 { 1732 struct tx_ring *txr = context; 1733 struct adapter *adapter = txr->adapter; 1734 if_t ifp = adapter->ifp; 1735 1736 EM_TX_LOCK(txr); 1737 em_txeof(txr); 1738 #ifdef EM_MULTIQUEUE 1739 if (!drbr_empty(ifp, txr->br)) 1740 em_mq_start_locked(ifp, txr); 1741 #else 1742 if (!if_sendq_empty(ifp)) 1743 em_start_locked(ifp, txr); 1744 #endif 1745 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); 1746 EM_TX_UNLOCK(txr); 1747 } 1748 1749 static void 1750 em_handle_link(void *context, int pending) 1751 { 1752 struct adapter *adapter = context; 1753 struct tx_ring *txr = adapter->tx_rings; 1754 if_t ifp = adapter->ifp; 1755 1756 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 1757 return; 1758 1759 EM_CORE_LOCK(adapter); 1760 callout_stop(&adapter->timer); 1761 em_update_link_status(adapter); 1762 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 1763 E1000_WRITE_REG(&adapter->hw, E1000_IMS, 1764 EM_MSIX_LINK | E1000_IMS_LSC); 1765 if (adapter->link_active) { 1766 for (int i = 0; i < adapter->num_queues; i++, txr++) { 1767 EM_TX_LOCK(txr); 1768 #ifdef EM_MULTIQUEUE 1769 if (!drbr_empty(ifp, txr->br)) 1770 em_mq_start_locked(ifp, txr); 1771 #else 1772 if (if_sendq_empty(ifp)) 1773 em_start_locked(ifp, txr); 1774 #endif 1775 EM_TX_UNLOCK(txr); 1776 } 1777 } 1778 EM_CORE_UNLOCK(adapter); 1779 } 1780 1781 1782 /********************************************************************* 1783 * 1784 * Media Ioctl callback 1785 * 1786 * This routine is called whenever the user queries the status of 1787 * the interface using ifconfig. 1788 * 1789 **********************************************************************/ 1790 static void 1791 em_media_status(if_t ifp, struct ifmediareq *ifmr) 1792 { 1793 struct adapter *adapter = if_getsoftc(ifp); 1794 u_char fiber_type = IFM_1000_SX; 1795 1796 INIT_DEBUGOUT("em_media_status: begin"); 1797 1798 EM_CORE_LOCK(adapter); 1799 em_update_link_status(adapter); 1800 1801 ifmr->ifm_status = IFM_AVALID; 1802 ifmr->ifm_active = IFM_ETHER; 1803 1804 if (!adapter->link_active) { 1805 EM_CORE_UNLOCK(adapter); 1806 return; 1807 } 1808 1809 ifmr->ifm_status |= IFM_ACTIVE; 1810 1811 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 1812 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { 1813 ifmr->ifm_active |= fiber_type | IFM_FDX; 1814 } else { 1815 switch (adapter->link_speed) { 1816 case 10: 1817 ifmr->ifm_active |= IFM_10_T; 1818 break; 1819 case 100: 1820 ifmr->ifm_active |= IFM_100_TX; 1821 break; 1822 case 1000: 1823 ifmr->ifm_active |= IFM_1000_T; 1824 break; 1825 } 1826 if (adapter->link_duplex == FULL_DUPLEX) 1827 ifmr->ifm_active |= IFM_FDX; 1828 else 1829 ifmr->ifm_active |= IFM_HDX; 1830 } 1831 EM_CORE_UNLOCK(adapter); 1832 } 1833 1834 /********************************************************************* 1835 * 1836 * Media Ioctl callback 1837 * 1838 * This routine is called when the user changes speed/duplex using 1839 * media/mediopt option with ifconfig. 1840 * 1841 **********************************************************************/ 1842 static int 1843 em_media_change(if_t ifp) 1844 { 1845 struct adapter *adapter = if_getsoftc(ifp); 1846 struct ifmedia *ifm = &adapter->media; 1847 1848 INIT_DEBUGOUT("em_media_change: begin"); 1849 1850 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1851 return (EINVAL); 1852 1853 EM_CORE_LOCK(adapter); 1854 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1855 case IFM_AUTO: 1856 adapter->hw.mac.autoneg = DO_AUTO_NEG; 1857 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1858 break; 1859 case IFM_1000_LX: 1860 case IFM_1000_SX: 1861 case IFM_1000_T: 1862 adapter->hw.mac.autoneg = DO_AUTO_NEG; 1863 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1864 break; 1865 case IFM_100_TX: 1866 adapter->hw.mac.autoneg = FALSE; 1867 adapter->hw.phy.autoneg_advertised = 0; 1868 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1869 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1870 else 1871 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1872 break; 1873 case IFM_10_T: 1874 adapter->hw.mac.autoneg = FALSE; 1875 adapter->hw.phy.autoneg_advertised = 0; 1876 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1877 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1878 else 1879 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1880 break; 1881 default: 1882 device_printf(adapter->dev, "Unsupported media type\n"); 1883 } 1884 1885 em_init_locked(adapter); 1886 EM_CORE_UNLOCK(adapter); 1887 1888 return (0); 1889 } 1890 1891 /********************************************************************* 1892 * 1893 * This routine maps the mbufs to tx descriptors. 1894 * 1895 * return 0 on success, positive on failure 1896 **********************************************************************/ 1897 1898 static int 1899 em_xmit(struct tx_ring *txr, struct mbuf **m_headp) 1900 { 1901 struct adapter *adapter = txr->adapter; 1902 bus_dma_segment_t segs[EM_MAX_SCATTER]; 1903 bus_dmamap_t map; 1904 struct em_txbuffer *tx_buffer, *tx_buffer_mapped; 1905 struct e1000_tx_desc *ctxd = NULL; 1906 struct mbuf *m_head; 1907 struct ether_header *eh; 1908 struct ip *ip = NULL; 1909 struct tcphdr *tp = NULL; 1910 u32 txd_upper = 0, txd_lower = 0; 1911 int ip_off, poff; 1912 int nsegs, i, j, first, last = 0; 1913 int error; 1914 bool do_tso, tso_desc, remap = TRUE; 1915 1916 m_head = *m_headp; 1917 do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO); 1918 tso_desc = FALSE; 1919 ip_off = poff = 0; 1920 1921 /* 1922 * Intel recommends entire IP/TCP header length reside in a single 1923 * buffer. If multiple descriptors are used to describe the IP and 1924 * TCP header, each descriptor should describe one or more 1925 * complete headers; descriptors referencing only parts of headers 1926 * are not supported. If all layer headers are not coalesced into 1927 * a single buffer, each buffer should not cross a 4KB boundary, 1928 * or be larger than the maximum read request size. 1929 * Controller also requires modifing IP/TCP header to make TSO work 1930 * so we firstly get a writable mbuf chain then coalesce ethernet/ 1931 * IP/TCP header into a single buffer to meet the requirement of 1932 * controller. This also simplifies IP/TCP/UDP checksum offloading 1933 * which also has similar restrictions. 1934 */ 1935 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 1936 if (do_tso || (m_head->m_next != NULL && 1937 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) { 1938 if (M_WRITABLE(*m_headp) == 0) { 1939 m_head = m_dup(*m_headp, M_NOWAIT); 1940 m_freem(*m_headp); 1941 if (m_head == NULL) { 1942 *m_headp = NULL; 1943 return (ENOBUFS); 1944 } 1945 *m_headp = m_head; 1946 } 1947 } 1948 /* 1949 * XXX 1950 * Assume IPv4, we don't have TSO/checksum offload support 1951 * for IPv6 yet. 1952 */ 1953 ip_off = sizeof(struct ether_header); 1954 if (m_head->m_len < ip_off) { 1955 m_head = m_pullup(m_head, ip_off); 1956 if (m_head == NULL) { 1957 *m_headp = NULL; 1958 return (ENOBUFS); 1959 } 1960 } 1961 eh = mtod(m_head, struct ether_header *); 1962 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 1963 ip_off = sizeof(struct ether_vlan_header); 1964 if (m_head->m_len < ip_off) { 1965 m_head = m_pullup(m_head, ip_off); 1966 if (m_head == NULL) { 1967 *m_headp = NULL; 1968 return (ENOBUFS); 1969 } 1970 } 1971 } 1972 if (m_head->m_len < ip_off + sizeof(struct ip)) { 1973 m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); 1974 if (m_head == NULL) { 1975 *m_headp = NULL; 1976 return (ENOBUFS); 1977 } 1978 } 1979 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 1980 poff = ip_off + (ip->ip_hl << 2); 1981 1982 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) { 1983 if (m_head->m_len < poff + sizeof(struct tcphdr)) { 1984 m_head = m_pullup(m_head, poff + 1985 sizeof(struct tcphdr)); 1986 if (m_head == NULL) { 1987 *m_headp = NULL; 1988 return (ENOBUFS); 1989 } 1990 } 1991 tp = (struct tcphdr *)(mtod(m_head, char *) + poff); 1992 /* 1993 * TSO workaround: 1994 * pull 4 more bytes of data into it. 1995 */ 1996 if (m_head->m_len < poff + (tp->th_off << 2)) { 1997 m_head = m_pullup(m_head, poff + 1998 (tp->th_off << 2) + 1999 TSO_WORKAROUND); 2000 if (m_head == NULL) { 2001 *m_headp = NULL; 2002 return (ENOBUFS); 2003 } 2004 } 2005 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 2006 tp = (struct tcphdr *)(mtod(m_head, char *) + poff); 2007 if (do_tso) { 2008 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz + 2009 (ip->ip_hl << 2) + 2010 (tp->th_off << 2)); 2011 ip->ip_sum = 0; 2012 /* 2013 * The pseudo TCP checksum does not include TCP 2014 * payload length so driver should recompute 2015 * the checksum here what hardware expect to 2016 * see. This is adherence of Microsoft's Large 2017 * Send specification. 2018 */ 2019 tp->th_sum = in_pseudo(ip->ip_src.s_addr, 2020 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 2021 } 2022 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { 2023 if (m_head->m_len < poff + sizeof(struct udphdr)) { 2024 m_head = m_pullup(m_head, poff + 2025 sizeof(struct udphdr)); 2026 if (m_head == NULL) { 2027 *m_headp = NULL; 2028 return (ENOBUFS); 2029 } 2030 } 2031 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 2032 } 2033 *m_headp = m_head; 2034 } 2035 2036 /* 2037 * Map the packet for DMA 2038 * 2039 * Capture the first descriptor index, 2040 * this descriptor will have the index 2041 * of the EOP which is the only one that 2042 * now gets a DONE bit writeback. 2043 */ 2044 first = txr->next_avail_desc; 2045 tx_buffer = &txr->tx_buffers[first]; 2046 tx_buffer_mapped = tx_buffer; 2047 map = tx_buffer->map; 2048 2049 retry: 2050 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 2051 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 2052 2053 /* 2054 * There are two types of errors we can (try) to handle: 2055 * - EFBIG means the mbuf chain was too long and bus_dma ran 2056 * out of segments. Defragment the mbuf chain and try again. 2057 * - ENOMEM means bus_dma could not obtain enough bounce buffers 2058 * at this point in time. Defer sending and try again later. 2059 * All other errors, in particular EINVAL, are fatal and prevent the 2060 * mbuf chain from ever going through. Drop it and report error. 2061 */ 2062 if (error == EFBIG && remap) { 2063 struct mbuf *m; 2064 2065 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER); 2066 if (m == NULL) { 2067 adapter->mbuf_defrag_failed++; 2068 m_freem(*m_headp); 2069 *m_headp = NULL; 2070 return (ENOBUFS); 2071 } 2072 *m_headp = m; 2073 2074 /* Try it again, but only once */ 2075 remap = FALSE; 2076 goto retry; 2077 } else if (error != 0) { 2078 adapter->no_tx_dma_setup++; 2079 m_freem(*m_headp); 2080 *m_headp = NULL; 2081 return (error); 2082 } 2083 2084 /* 2085 * TSO Hardware workaround, if this packet is not 2086 * TSO, and is only a single descriptor long, and 2087 * it follows a TSO burst, then we need to add a 2088 * sentinel descriptor to prevent premature writeback. 2089 */ 2090 if ((!do_tso) && (txr->tx_tso == TRUE)) { 2091 if (nsegs == 1) 2092 tso_desc = TRUE; 2093 txr->tx_tso = FALSE; 2094 } 2095 2096 if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) { 2097 txr->no_desc_avail++; 2098 bus_dmamap_unload(txr->txtag, map); 2099 return (ENOBUFS); 2100 } 2101 m_head = *m_headp; 2102 2103 /* Do hardware assists */ 2104 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 2105 em_tso_setup(txr, m_head, ip_off, ip, tp, 2106 &txd_upper, &txd_lower); 2107 /* we need to make a final sentinel transmit desc */ 2108 tso_desc = TRUE; 2109 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) 2110 em_transmit_checksum_setup(txr, m_head, 2111 ip_off, ip, &txd_upper, &txd_lower); 2112 2113 if (m_head->m_flags & M_VLANTAG) { 2114 /* Set the vlan id. */ 2115 txd_upper |= htole16(if_getvtag(m_head)) << 16; 2116 /* Tell hardware to add tag */ 2117 txd_lower |= htole32(E1000_TXD_CMD_VLE); 2118 } 2119 2120 i = txr->next_avail_desc; 2121 2122 /* Set up our transmit descriptors */ 2123 for (j = 0; j < nsegs; j++) { 2124 bus_size_t seg_len; 2125 bus_addr_t seg_addr; 2126 2127 tx_buffer = &txr->tx_buffers[i]; 2128 ctxd = &txr->tx_base[i]; 2129 seg_addr = segs[j].ds_addr; 2130 seg_len = segs[j].ds_len; 2131 /* 2132 ** TSO Workaround: 2133 ** If this is the last descriptor, we want to 2134 ** split it so we have a small final sentinel 2135 */ 2136 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { 2137 seg_len -= TSO_WORKAROUND; 2138 ctxd->buffer_addr = htole64(seg_addr); 2139 ctxd->lower.data = htole32( 2140 adapter->txd_cmd | txd_lower | seg_len); 2141 ctxd->upper.data = htole32(txd_upper); 2142 if (++i == adapter->num_tx_desc) 2143 i = 0; 2144 2145 /* Now make the sentinel */ 2146 txr->tx_avail--; 2147 ctxd = &txr->tx_base[i]; 2148 tx_buffer = &txr->tx_buffers[i]; 2149 ctxd->buffer_addr = 2150 htole64(seg_addr + seg_len); 2151 ctxd->lower.data = htole32( 2152 adapter->txd_cmd | txd_lower | TSO_WORKAROUND); 2153 ctxd->upper.data = 2154 htole32(txd_upper); 2155 last = i; 2156 if (++i == adapter->num_tx_desc) 2157 i = 0; 2158 } else { 2159 ctxd->buffer_addr = htole64(seg_addr); 2160 ctxd->lower.data = htole32( 2161 adapter->txd_cmd | txd_lower | seg_len); 2162 ctxd->upper.data = htole32(txd_upper); 2163 last = i; 2164 if (++i == adapter->num_tx_desc) 2165 i = 0; 2166 } 2167 tx_buffer->m_head = NULL; 2168 tx_buffer->next_eop = -1; 2169 } 2170 2171 txr->next_avail_desc = i; 2172 txr->tx_avail -= nsegs; 2173 2174 tx_buffer->m_head = m_head; 2175 /* 2176 ** Here we swap the map so the last descriptor, 2177 ** which gets the completion interrupt has the 2178 ** real map, and the first descriptor gets the 2179 ** unused map from this descriptor. 2180 */ 2181 tx_buffer_mapped->map = tx_buffer->map; 2182 tx_buffer->map = map; 2183 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 2184 2185 /* 2186 * Last Descriptor of Packet 2187 * needs End Of Packet (EOP) 2188 * and Report Status (RS) 2189 */ 2190 ctxd->lower.data |= 2191 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); 2192 /* 2193 * Keep track in the first buffer which 2194 * descriptor will be written back 2195 */ 2196 tx_buffer = &txr->tx_buffers[first]; 2197 tx_buffer->next_eop = last; 2198 2199 /* 2200 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 2201 * that this frame is available to transmit. 2202 */ 2203 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 2204 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2205 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); 2206 2207 return (0); 2208 } 2209 2210 static void 2211 em_set_promisc(struct adapter *adapter) 2212 { 2213 if_t ifp = adapter->ifp; 2214 u32 reg_rctl; 2215 2216 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2217 2218 if (if_getflags(ifp) & IFF_PROMISC) { 2219 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 2220 /* Turn this on if you want to see bad packets */ 2221 if (em_debug_sbp) 2222 reg_rctl |= E1000_RCTL_SBP; 2223 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2224 } else if (if_getflags(ifp) & IFF_ALLMULTI) { 2225 reg_rctl |= E1000_RCTL_MPE; 2226 reg_rctl &= ~E1000_RCTL_UPE; 2227 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2228 } 2229 } 2230 2231 static void 2232 em_disable_promisc(struct adapter *adapter) 2233 { 2234 if_t ifp = adapter->ifp; 2235 u32 reg_rctl; 2236 int mcnt = 0; 2237 2238 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2239 reg_rctl &= (~E1000_RCTL_UPE); 2240 if (if_getflags(ifp) & IFF_ALLMULTI) 2241 mcnt = MAX_NUM_MULTICAST_ADDRESSES; 2242 else 2243 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); 2244 /* Don't disable if in MAX groups */ 2245 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) 2246 reg_rctl &= (~E1000_RCTL_MPE); 2247 reg_rctl &= (~E1000_RCTL_SBP); 2248 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2249 } 2250 2251 2252 /********************************************************************* 2253 * Multicast Update 2254 * 2255 * This routine is called whenever multicast address list is updated. 2256 * 2257 **********************************************************************/ 2258 2259 static void 2260 em_set_multi(struct adapter *adapter) 2261 { 2262 if_t ifp = adapter->ifp; 2263 u32 reg_rctl = 0; 2264 u8 *mta; /* Multicast array memory */ 2265 int mcnt = 0; 2266 2267 IOCTL_DEBUGOUT("em_set_multi: begin"); 2268 2269 mta = adapter->mta; 2270 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); 2271 2272 if (adapter->hw.mac.type == e1000_82542 && 2273 adapter->hw.revision_id == E1000_REVISION_2) { 2274 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2275 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) 2276 e1000_pci_clear_mwi(&adapter->hw); 2277 reg_rctl |= E1000_RCTL_RST; 2278 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2279 msec_delay(5); 2280 } 2281 2282 if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); 2283 2284 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 2285 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2286 reg_rctl |= E1000_RCTL_MPE; 2287 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2288 } else 2289 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); 2290 2291 if (adapter->hw.mac.type == e1000_82542 && 2292 adapter->hw.revision_id == E1000_REVISION_2) { 2293 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2294 reg_rctl &= ~E1000_RCTL_RST; 2295 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2296 msec_delay(5); 2297 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) 2298 e1000_pci_set_mwi(&adapter->hw); 2299 } 2300 } 2301 2302 2303 /********************************************************************* 2304 * Timer routine 2305 * 2306 * This routine checks for link status and updates statistics. 2307 * 2308 **********************************************************************/ 2309 2310 static void 2311 em_local_timer(void *arg) 2312 { 2313 struct adapter *adapter = arg; 2314 if_t ifp = adapter->ifp; 2315 struct tx_ring *txr = adapter->tx_rings; 2316 struct rx_ring *rxr = adapter->rx_rings; 2317 u32 trigger = 0; 2318 2319 EM_CORE_LOCK_ASSERT(adapter); 2320 2321 em_update_link_status(adapter); 2322 em_update_stats_counters(adapter); 2323 2324 /* Reset LAA into RAR[0] on 82571 */ 2325 if ((adapter->hw.mac.type == e1000_82571) && 2326 e1000_get_laa_state_82571(&adapter->hw)) 2327 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); 2328 2329 /* Mask to use in the irq trigger */ 2330 if (adapter->msix_mem) { 2331 for (int i = 0; i < adapter->num_queues; i++, rxr++) 2332 trigger |= rxr->ims; 2333 rxr = adapter->rx_rings; 2334 } else 2335 trigger = E1000_ICS_RXDMT0; 2336 2337 /* 2338 ** Check on the state of the TX queue(s), this 2339 ** can be done without the lock because its RO 2340 ** and the HUNG state will be static if set. 2341 */ 2342 for (int i = 0; i < adapter->num_queues; i++, txr++) { 2343 if (txr->busy == EM_TX_HUNG) 2344 goto hung; 2345 if (txr->busy >= EM_TX_MAXTRIES) 2346 txr->busy = EM_TX_HUNG; 2347 /* Schedule a TX tasklet if needed */ 2348 if (txr->tx_avail <= EM_MAX_SCATTER) 2349 taskqueue_enqueue(txr->tq, &txr->tx_task); 2350 } 2351 2352 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 2353 #ifndef DEVICE_POLLING 2354 /* Trigger an RX interrupt to guarantee mbuf refresh */ 2355 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); 2356 #endif 2357 return; 2358 hung: 2359 /* Looks like we're hung */ 2360 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n", 2361 txr->me); 2362 em_print_debug_info(adapter); 2363 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 2364 adapter->watchdog_events++; 2365 em_init_locked(adapter); 2366 } 2367 2368 2369 static void 2370 em_update_link_status(struct adapter *adapter) 2371 { 2372 struct e1000_hw *hw = &adapter->hw; 2373 if_t ifp = adapter->ifp; 2374 device_t dev = adapter->dev; 2375 struct tx_ring *txr = adapter->tx_rings; 2376 u32 link_check = 0; 2377 2378 /* Get the cached link value or read phy for real */ 2379 switch (hw->phy.media_type) { 2380 case e1000_media_type_copper: 2381 if (hw->mac.get_link_status) { 2382 if (hw->mac.type == e1000_pch_spt) 2383 msec_delay(50); 2384 /* Do the work to read phy */ 2385 e1000_check_for_link(hw); 2386 link_check = !hw->mac.get_link_status; 2387 if (link_check) /* ESB2 fix */ 2388 e1000_cfg_on_link_up(hw); 2389 } else 2390 link_check = TRUE; 2391 break; 2392 case e1000_media_type_fiber: 2393 e1000_check_for_link(hw); 2394 link_check = (E1000_READ_REG(hw, E1000_STATUS) & 2395 E1000_STATUS_LU); 2396 break; 2397 case e1000_media_type_internal_serdes: 2398 e1000_check_for_link(hw); 2399 link_check = adapter->hw.mac.serdes_has_link; 2400 break; 2401 default: 2402 case e1000_media_type_unknown: 2403 break; 2404 } 2405 2406 /* Now check for a transition */ 2407 if (link_check && (adapter->link_active == 0)) { 2408 e1000_get_speed_and_duplex(hw, &adapter->link_speed, 2409 &adapter->link_duplex); 2410 /* Check if we must disable SPEED_MODE bit on PCI-E */ 2411 if ((adapter->link_speed != SPEED_1000) && 2412 ((hw->mac.type == e1000_82571) || 2413 (hw->mac.type == e1000_82572))) { 2414 int tarc0; 2415 tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); 2416 tarc0 &= ~TARC_SPEED_MODE_BIT; 2417 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); 2418 } 2419 if (bootverbose) 2420 device_printf(dev, "Link is up %d Mbps %s\n", 2421 adapter->link_speed, 2422 ((adapter->link_duplex == FULL_DUPLEX) ? 2423 "Full Duplex" : "Half Duplex")); 2424 adapter->link_active = 1; 2425 adapter->smartspeed = 0; 2426 if_setbaudrate(ifp, adapter->link_speed * 1000000); 2427 if_link_state_change(ifp, LINK_STATE_UP); 2428 } else if (!link_check && (adapter->link_active == 1)) { 2429 if_setbaudrate(ifp, 0); 2430 adapter->link_speed = 0; 2431 adapter->link_duplex = 0; 2432 if (bootverbose) 2433 device_printf(dev, "Link is Down\n"); 2434 adapter->link_active = 0; 2435 /* Link down, disable hang detection */ 2436 for (int i = 0; i < adapter->num_queues; i++, txr++) 2437 txr->busy = EM_TX_IDLE; 2438 if_link_state_change(ifp, LINK_STATE_DOWN); 2439 } 2440 } 2441 2442 /********************************************************************* 2443 * 2444 * This routine disables all traffic on the adapter by issuing a 2445 * global reset on the MAC and deallocates TX/RX buffers. 2446 * 2447 * This routine should always be called with BOTH the CORE 2448 * and TX locks. 2449 **********************************************************************/ 2450 2451 static void 2452 em_stop(void *arg) 2453 { 2454 struct adapter *adapter = arg; 2455 if_t ifp = adapter->ifp; 2456 struct tx_ring *txr = adapter->tx_rings; 2457 2458 EM_CORE_LOCK_ASSERT(adapter); 2459 2460 INIT_DEBUGOUT("em_stop: begin"); 2461 2462 em_disable_intr(adapter); 2463 callout_stop(&adapter->timer); 2464 2465 /* Tell the stack that the interface is no longer active */ 2466 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2467 2468 /* Disarm Hang Detection. */ 2469 for (int i = 0; i < adapter->num_queues; i++, txr++) { 2470 EM_TX_LOCK(txr); 2471 txr->busy = EM_TX_IDLE; 2472 EM_TX_UNLOCK(txr); 2473 } 2474 2475 /* I219 needs some special flushing to avoid hangs */ 2476 if (adapter->hw.mac.type == e1000_pch_spt) 2477 em_flush_desc_rings(adapter); 2478 2479 e1000_reset_hw(&adapter->hw); 2480 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); 2481 2482 e1000_led_off(&adapter->hw); 2483 e1000_cleanup_led(&adapter->hw); 2484 } 2485 2486 2487 /********************************************************************* 2488 * 2489 * Determine hardware revision. 2490 * 2491 **********************************************************************/ 2492 static void 2493 em_identify_hardware(struct adapter *adapter) 2494 { 2495 device_t dev = adapter->dev; 2496 2497 /* Make sure our PCI config space has the necessary stuff set */ 2498 pci_enable_busmaster(dev); 2499 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 2500 2501 /* Save off the information about this board */ 2502 adapter->hw.vendor_id = pci_get_vendor(dev); 2503 adapter->hw.device_id = pci_get_device(dev); 2504 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 2505 adapter->hw.subsystem_vendor_id = 2506 pci_read_config(dev, PCIR_SUBVEND_0, 2); 2507 adapter->hw.subsystem_device_id = 2508 pci_read_config(dev, PCIR_SUBDEV_0, 2); 2509 2510 /* Do Shared Code Init and Setup */ 2511 if (e1000_set_mac_type(&adapter->hw)) { 2512 device_printf(dev, "Setup init failure\n"); 2513 return; 2514 } 2515 } 2516 2517 static int 2518 em_allocate_pci_resources(struct adapter *adapter) 2519 { 2520 device_t dev = adapter->dev; 2521 int rid; 2522 2523 rid = PCIR_BAR(0); 2524 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 2525 &rid, RF_ACTIVE); 2526 if (adapter->memory == NULL) { 2527 device_printf(dev, "Unable to allocate bus resource: memory\n"); 2528 return (ENXIO); 2529 } 2530 adapter->osdep.mem_bus_space_tag = 2531 rman_get_bustag(adapter->memory); 2532 adapter->osdep.mem_bus_space_handle = 2533 rman_get_bushandle(adapter->memory); 2534 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; 2535 2536 adapter->hw.back = &adapter->osdep; 2537 2538 return (0); 2539 } 2540 2541 /********************************************************************* 2542 * 2543 * Setup the Legacy or MSI Interrupt handler 2544 * 2545 **********************************************************************/ 2546 int 2547 em_allocate_legacy(struct adapter *adapter) 2548 { 2549 device_t dev = adapter->dev; 2550 struct tx_ring *txr = adapter->tx_rings; 2551 int error, rid = 0; 2552 2553 /* Manually turn off all interrupts */ 2554 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 2555 2556 if (adapter->msix == 1) /* using MSI */ 2557 rid = 1; 2558 /* We allocate a single interrupt resource */ 2559 adapter->res = bus_alloc_resource_any(dev, 2560 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); 2561 if (adapter->res == NULL) { 2562 device_printf(dev, "Unable to allocate bus resource: " 2563 "interrupt\n"); 2564 return (ENXIO); 2565 } 2566 2567 /* 2568 * Allocate a fast interrupt and the associated 2569 * deferred processing contexts. 2570 */ 2571 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); 2572 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, 2573 taskqueue_thread_enqueue, &adapter->tq); 2574 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que", 2575 device_get_nameunit(adapter->dev)); 2576 /* Use a TX only tasklet for local timer */ 2577 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); 2578 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, 2579 taskqueue_thread_enqueue, &txr->tq); 2580 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", 2581 device_get_nameunit(adapter->dev)); 2582 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); 2583 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, 2584 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { 2585 device_printf(dev, "Failed to register fast interrupt " 2586 "handler: %d\n", error); 2587 taskqueue_free(adapter->tq); 2588 adapter->tq = NULL; 2589 return (error); 2590 } 2591 2592 return (0); 2593 } 2594 2595 /********************************************************************* 2596 * 2597 * Setup the MSIX Interrupt handlers 2598 * This is not really Multiqueue, rather 2599 * its just separate interrupt vectors 2600 * for TX, RX, and Link. 2601 * 2602 **********************************************************************/ 2603 int 2604 em_allocate_msix(struct adapter *adapter) 2605 { 2606 device_t dev = adapter->dev; 2607 struct tx_ring *txr = adapter->tx_rings; 2608 struct rx_ring *rxr = adapter->rx_rings; 2609 int error, rid, vector = 0; 2610 int cpu_id = 0; 2611 2612 2613 /* Make sure all interrupts are disabled */ 2614 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 2615 2616 /* First set up ring resources */ 2617 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) { 2618 2619 /* RX ring */ 2620 rid = vector + 1; 2621 2622 rxr->res = bus_alloc_resource_any(dev, 2623 SYS_RES_IRQ, &rid, RF_ACTIVE); 2624 if (rxr->res == NULL) { 2625 device_printf(dev, 2626 "Unable to allocate bus resource: " 2627 "RX MSIX Interrupt %d\n", i); 2628 return (ENXIO); 2629 } 2630 if ((error = bus_setup_intr(dev, rxr->res, 2631 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, 2632 rxr, &rxr->tag)) != 0) { 2633 device_printf(dev, "Failed to register RX handler"); 2634 return (error); 2635 } 2636 #if __FreeBSD_version >= 800504 2637 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i); 2638 #endif 2639 rxr->msix = vector; 2640 2641 if (em_last_bind_cpu < 0) 2642 em_last_bind_cpu = CPU_FIRST(); 2643 cpu_id = em_last_bind_cpu; 2644 bus_bind_intr(dev, rxr->res, cpu_id); 2645 2646 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); 2647 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, 2648 taskqueue_thread_enqueue, &rxr->tq); 2649 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)", 2650 device_get_nameunit(adapter->dev), cpu_id); 2651 /* 2652 ** Set the bit to enable interrupt 2653 ** in E1000_IMS -- bits 20 and 21 2654 ** are for RX0 and RX1, note this has 2655 ** NOTHING to do with the MSIX vector 2656 */ 2657 rxr->ims = 1 << (20 + i); 2658 adapter->ims |= rxr->ims; 2659 adapter->ivars |= (8 | rxr->msix) << (i * 4); 2660 2661 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); 2662 } 2663 2664 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) { 2665 /* TX ring */ 2666 rid = vector + 1; 2667 txr->res = bus_alloc_resource_any(dev, 2668 SYS_RES_IRQ, &rid, RF_ACTIVE); 2669 if (txr->res == NULL) { 2670 device_printf(dev, 2671 "Unable to allocate bus resource: " 2672 "TX MSIX Interrupt %d\n", i); 2673 return (ENXIO); 2674 } 2675 if ((error = bus_setup_intr(dev, txr->res, 2676 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, 2677 txr, &txr->tag)) != 0) { 2678 device_printf(dev, "Failed to register TX handler"); 2679 return (error); 2680 } 2681 #if __FreeBSD_version >= 800504 2682 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i); 2683 #endif 2684 txr->msix = vector; 2685 2686 if (em_last_bind_cpu < 0) 2687 em_last_bind_cpu = CPU_FIRST(); 2688 cpu_id = em_last_bind_cpu; 2689 bus_bind_intr(dev, txr->res, cpu_id); 2690 2691 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); 2692 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, 2693 taskqueue_thread_enqueue, &txr->tq); 2694 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)", 2695 device_get_nameunit(adapter->dev), cpu_id); 2696 /* 2697 ** Set the bit to enable interrupt 2698 ** in E1000_IMS -- bits 22 and 23 2699 ** are for TX0 and TX1, note this has 2700 ** NOTHING to do with the MSIX vector 2701 */ 2702 txr->ims = 1 << (22 + i); 2703 adapter->ims |= txr->ims; 2704 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); 2705 2706 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); 2707 } 2708 2709 /* Link interrupt */ 2710 rid = vector + 1; 2711 adapter->res = bus_alloc_resource_any(dev, 2712 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); 2713 if (!adapter->res) { 2714 device_printf(dev,"Unable to allocate " 2715 "bus resource: Link interrupt [%d]\n", rid); 2716 return (ENXIO); 2717 } 2718 /* Set the link handler function */ 2719 error = bus_setup_intr(dev, adapter->res, 2720 INTR_TYPE_NET | INTR_MPSAFE, NULL, 2721 em_msix_link, adapter, &adapter->tag); 2722 if (error) { 2723 adapter->res = NULL; 2724 device_printf(dev, "Failed to register LINK handler"); 2725 return (error); 2726 } 2727 #if __FreeBSD_version >= 800504 2728 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); 2729 #endif 2730 adapter->linkvec = vector; 2731 adapter->ivars |= (8 | vector) << 16; 2732 adapter->ivars |= 0x80000000; 2733 2734 return (0); 2735 } 2736 2737 2738 static void 2739 em_free_pci_resources(struct adapter *adapter) 2740 { 2741 device_t dev = adapter->dev; 2742 struct tx_ring *txr; 2743 struct rx_ring *rxr; 2744 int rid; 2745 2746 2747 /* 2748 ** Release all the queue interrupt resources: 2749 */ 2750 for (int i = 0; i < adapter->num_queues; i++) { 2751 txr = &adapter->tx_rings[i]; 2752 /* an early abort? */ 2753 if (txr == NULL) 2754 break; 2755 rid = txr->msix +1; 2756 if (txr->tag != NULL) { 2757 bus_teardown_intr(dev, txr->res, txr->tag); 2758 txr->tag = NULL; 2759 } 2760 if (txr->res != NULL) 2761 bus_release_resource(dev, SYS_RES_IRQ, 2762 rid, txr->res); 2763 2764 rxr = &adapter->rx_rings[i]; 2765 /* an early abort? */ 2766 if (rxr == NULL) 2767 break; 2768 rid = rxr->msix +1; 2769 if (rxr->tag != NULL) { 2770 bus_teardown_intr(dev, rxr->res, rxr->tag); 2771 rxr->tag = NULL; 2772 } 2773 if (rxr->res != NULL) 2774 bus_release_resource(dev, SYS_RES_IRQ, 2775 rid, rxr->res); 2776 } 2777 2778 if (adapter->linkvec) /* we are doing MSIX */ 2779 rid = adapter->linkvec + 1; 2780 else 2781 (adapter->msix != 0) ? (rid = 1):(rid = 0); 2782 2783 if (adapter->tag != NULL) { 2784 bus_teardown_intr(dev, adapter->res, adapter->tag); 2785 adapter->tag = NULL; 2786 } 2787 2788 if (adapter->res != NULL) 2789 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); 2790 2791 2792 if (adapter->msix) 2793 pci_release_msi(dev); 2794 2795 if (adapter->msix_mem != NULL) 2796 bus_release_resource(dev, SYS_RES_MEMORY, 2797 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); 2798 2799 if (adapter->memory != NULL) 2800 bus_release_resource(dev, SYS_RES_MEMORY, 2801 PCIR_BAR(0), adapter->memory); 2802 2803 if (adapter->flash != NULL) 2804 bus_release_resource(dev, SYS_RES_MEMORY, 2805 EM_FLASH, adapter->flash); 2806 } 2807 2808 /* 2809 * Setup MSI or MSI/X 2810 */ 2811 static int 2812 em_setup_msix(struct adapter *adapter) 2813 { 2814 device_t dev = adapter->dev; 2815 int val; 2816 2817 /* Nearly always going to use one queue */ 2818 adapter->num_queues = 1; 2819 2820 /* 2821 ** Try using MSI-X for Hartwell adapters 2822 */ 2823 if ((adapter->hw.mac.type == e1000_82574) && 2824 (em_enable_msix == TRUE)) { 2825 #ifdef EM_MULTIQUEUE 2826 adapter->num_queues = (em_num_queues == 1) ? 1 : 2; 2827 if (adapter->num_queues > 1) 2828 em_enable_vectors_82574(adapter); 2829 #endif 2830 /* Map the MSIX BAR */ 2831 int rid = PCIR_BAR(EM_MSIX_BAR); 2832 adapter->msix_mem = bus_alloc_resource_any(dev, 2833 SYS_RES_MEMORY, &rid, RF_ACTIVE); 2834 if (adapter->msix_mem == NULL) { 2835 /* May not be enabled */ 2836 device_printf(adapter->dev, 2837 "Unable to map MSIX table \n"); 2838 goto msi; 2839 } 2840 val = pci_msix_count(dev); 2841 2842 #ifdef EM_MULTIQUEUE 2843 /* We need 5 vectors in the multiqueue case */ 2844 if (adapter->num_queues > 1 ) { 2845 if (val >= 5) 2846 val = 5; 2847 else { 2848 adapter->num_queues = 1; 2849 device_printf(adapter->dev, 2850 "Insufficient MSIX vectors for >1 queue, " 2851 "using single queue...\n"); 2852 goto msix_one; 2853 } 2854 } else { 2855 msix_one: 2856 #endif 2857 if (val >= 3) 2858 val = 3; 2859 else { 2860 device_printf(adapter->dev, 2861 "Insufficient MSIX vectors, using MSI\n"); 2862 goto msi; 2863 } 2864 #ifdef EM_MULTIQUEUE 2865 } 2866 #endif 2867 2868 if ((pci_alloc_msix(dev, &val) == 0)) { 2869 device_printf(adapter->dev, 2870 "Using MSIX interrupts " 2871 "with %d vectors\n", val); 2872 return (val); 2873 } 2874 2875 /* 2876 ** If MSIX alloc failed or provided us with 2877 ** less than needed, free and fall through to MSI 2878 */ 2879 pci_release_msi(dev); 2880 } 2881 msi: 2882 if (adapter->msix_mem != NULL) { 2883 bus_release_resource(dev, SYS_RES_MEMORY, 2884 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); 2885 adapter->msix_mem = NULL; 2886 } 2887 val = 1; 2888 if (pci_alloc_msi(dev, &val) == 0) { 2889 device_printf(adapter->dev, "Using an MSI interrupt\n"); 2890 return (val); 2891 } 2892 /* Should only happen due to manual configuration */ 2893 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n"); 2894 return (0); 2895 } 2896 2897 2898 /* 2899 ** The 3 following flush routines are used as a workaround in the 2900 ** I219 client parts and only for them. 2901 ** 2902 ** em_flush_tx_ring - remove all descriptors from the tx_ring 2903 ** 2904 ** We want to clear all pending descriptors from the TX ring. 2905 ** zeroing happens when the HW reads the regs. We assign the ring itself as 2906 ** the data of the next descriptor. We don't care about the data we are about 2907 ** to reset the HW. 2908 */ 2909 static void 2910 em_flush_tx_ring(struct adapter *adapter) 2911 { 2912 struct e1000_hw *hw = &adapter->hw; 2913 struct tx_ring *txr = adapter->tx_rings; 2914 struct e1000_tx_desc *txd; 2915 u32 tctl, txd_lower = E1000_TXD_CMD_IFCS; 2916 u16 size = 512; 2917 2918 tctl = E1000_READ_REG(hw, E1000_TCTL); 2919 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN); 2920 2921 txd = &txr->tx_base[txr->next_avail_desc++]; 2922 if (txr->next_avail_desc == adapter->num_tx_desc) 2923 txr->next_avail_desc = 0; 2924 2925 /* Just use the ring as a dummy buffer addr */ 2926 txd->buffer_addr = txr->txdma.dma_paddr; 2927 txd->lower.data = htole32(txd_lower | size); 2928 txd->upper.data = 0; 2929 2930 /* flush descriptors to memory before notifying the HW */ 2931 wmb(); 2932 2933 E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc); 2934 mb(); 2935 usec_delay(250); 2936 } 2937 2938 /* 2939 ** em_flush_rx_ring - remove all descriptors from the rx_ring 2940 ** 2941 ** Mark all descriptors in the RX ring as consumed and disable the rx ring 2942 */ 2943 static void 2944 em_flush_rx_ring(struct adapter *adapter) 2945 { 2946 struct e1000_hw *hw = &adapter->hw; 2947 u32 rctl, rxdctl; 2948 2949 rctl = E1000_READ_REG(hw, E1000_RCTL); 2950 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2951 E1000_WRITE_FLUSH(hw); 2952 usec_delay(150); 2953 2954 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); 2955 /* zero the lower 14 bits (prefetch and host thresholds) */ 2956 rxdctl &= 0xffffc000; 2957 /* 2958 * update thresholds: prefetch threshold to 31, host threshold to 1 2959 * and make sure the granularity is "descriptors" and not "cache lines" 2960 */ 2961 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC); 2962 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl); 2963 2964 /* momentarily enable the RX ring for the changes to take effect */ 2965 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN); 2966 E1000_WRITE_FLUSH(hw); 2967 usec_delay(150); 2968 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2969 } 2970 2971 /* 2972 ** em_flush_desc_rings - remove all descriptors from the descriptor rings 2973 ** 2974 ** In i219, the descriptor rings must be emptied before resetting the HW 2975 ** or before changing the device state to D3 during runtime (runtime PM). 2976 ** 2977 ** Failure to do this will cause the HW to enter a unit hang state which can 2978 ** only be released by PCI reset on the device 2979 ** 2980 */ 2981 static void 2982 em_flush_desc_rings(struct adapter *adapter) 2983 { 2984 struct e1000_hw *hw = &adapter->hw; 2985 device_t dev = adapter->dev; 2986 u16 hang_state; 2987 u32 fext_nvm11, tdlen; 2988 2989 /* First, disable MULR fix in FEXTNVM11 */ 2990 fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11); 2991 fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX; 2992 E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11); 2993 2994 /* do nothing if we're not in faulty state, or if the queue is empty */ 2995 tdlen = E1000_READ_REG(hw, E1000_TDLEN(0)); 2996 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); 2997 if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen) 2998 return; 2999 em_flush_tx_ring(adapter); 3000 3001 /* recheck, maybe the fault is caused by the rx ring */ 3002 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); 3003 if (hang_state & FLUSH_DESC_REQUIRED) 3004 em_flush_rx_ring(adapter); 3005 } 3006 3007 3008 /********************************************************************* 3009 * 3010 * Initialize the hardware to a configuration 3011 * as specified by the adapter structure. 3012 * 3013 **********************************************************************/ 3014 static void 3015 em_reset(struct adapter *adapter) 3016 { 3017 device_t dev = adapter->dev; 3018 if_t ifp = adapter->ifp; 3019 struct e1000_hw *hw = &adapter->hw; 3020 u16 rx_buffer_size; 3021 u32 pba; 3022 3023 INIT_DEBUGOUT("em_reset: begin"); 3024 3025 /* Set up smart power down as default off on newer adapters. */ 3026 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || 3027 hw->mac.type == e1000_82572)) { 3028 u16 phy_tmp = 0; 3029 3030 /* Speed up time to link by disabling smart power down. */ 3031 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); 3032 phy_tmp &= ~IGP02E1000_PM_SPD; 3033 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); 3034 } 3035 3036 /* 3037 * Packet Buffer Allocation (PBA) 3038 * Writing PBA sets the receive portion of the buffer 3039 * the remainder is used for the transmit buffer. 3040 */ 3041 switch (hw->mac.type) { 3042 /* Total Packet Buffer on these is 48K */ 3043 case e1000_82571: 3044 case e1000_82572: 3045 case e1000_80003es2lan: 3046 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ 3047 break; 3048 case e1000_82573: /* 82573: Total Packet Buffer is 32K */ 3049 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ 3050 break; 3051 case e1000_82574: 3052 case e1000_82583: 3053 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ 3054 break; 3055 case e1000_ich8lan: 3056 pba = E1000_PBA_8K; 3057 break; 3058 case e1000_ich9lan: 3059 case e1000_ich10lan: 3060 /* Boost Receive side for jumbo frames */ 3061 if (adapter->hw.mac.max_frame_size > 4096) 3062 pba = E1000_PBA_14K; 3063 else 3064 pba = E1000_PBA_10K; 3065 break; 3066 case e1000_pchlan: 3067 case e1000_pch2lan: 3068 case e1000_pch_lpt: 3069 case e1000_pch_spt: 3070 pba = E1000_PBA_26K; 3071 break; 3072 default: 3073 if (adapter->hw.mac.max_frame_size > 8192) 3074 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ 3075 else 3076 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ 3077 } 3078 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); 3079 3080 /* 3081 * These parameters control the automatic generation (Tx) and 3082 * response (Rx) to Ethernet PAUSE frames. 3083 * - High water mark should allow for at least two frames to be 3084 * received after sending an XOFF. 3085 * - Low water mark works best when it is very near the high water mark. 3086 * This allows the receiver to restart by sending XON when it has 3087 * drained a bit. Here we use an arbitrary value of 1500 which will 3088 * restart after one full frame is pulled from the buffer. There 3089 * could be several smaller frames in the buffer and if so they will 3090 * not trigger the XON until their total number reduces the buffer 3091 * by 1500. 3092 * - The pause time is fairly large at 1000 x 512ns = 512 usec. 3093 */ 3094 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 ); 3095 hw->fc.high_water = rx_buffer_size - 3096 roundup2(adapter->hw.mac.max_frame_size, 1024); 3097 hw->fc.low_water = hw->fc.high_water - 1500; 3098 3099 if (adapter->fc) /* locally set flow control value? */ 3100 hw->fc.requested_mode = adapter->fc; 3101 else 3102 hw->fc.requested_mode = e1000_fc_full; 3103 3104 if (hw->mac.type == e1000_80003es2lan) 3105 hw->fc.pause_time = 0xFFFF; 3106 else 3107 hw->fc.pause_time = EM_FC_PAUSE_TIME; 3108 3109 hw->fc.send_xon = TRUE; 3110 3111 /* Device specific overrides/settings */ 3112 switch (hw->mac.type) { 3113 case e1000_pchlan: 3114 /* Workaround: no TX flow ctrl for PCH */ 3115 hw->fc.requested_mode = e1000_fc_rx_pause; 3116 hw->fc.pause_time = 0xFFFF; /* override */ 3117 if (if_getmtu(ifp) > ETHERMTU) { 3118 hw->fc.high_water = 0x3500; 3119 hw->fc.low_water = 0x1500; 3120 } else { 3121 hw->fc.high_water = 0x5000; 3122 hw->fc.low_water = 0x3000; 3123 } 3124 hw->fc.refresh_time = 0x1000; 3125 break; 3126 case e1000_pch2lan: 3127 case e1000_pch_lpt: 3128 case e1000_pch_spt: 3129 hw->fc.high_water = 0x5C20; 3130 hw->fc.low_water = 0x5048; 3131 hw->fc.pause_time = 0x0650; 3132 hw->fc.refresh_time = 0x0400; 3133 /* Jumbos need adjusted PBA */ 3134 if (if_getmtu(ifp) > ETHERMTU) 3135 E1000_WRITE_REG(hw, E1000_PBA, 12); 3136 else 3137 E1000_WRITE_REG(hw, E1000_PBA, 26); 3138 break; 3139 case e1000_ich9lan: 3140 case e1000_ich10lan: 3141 if (if_getmtu(ifp) > ETHERMTU) { 3142 hw->fc.high_water = 0x2800; 3143 hw->fc.low_water = hw->fc.high_water - 8; 3144 break; 3145 } 3146 /* else fall thru */ 3147 default: 3148 if (hw->mac.type == e1000_80003es2lan) 3149 hw->fc.pause_time = 0xFFFF; 3150 break; 3151 } 3152 3153 /* I219 needs some special flushing to avoid hangs */ 3154 if (hw->mac.type == e1000_pch_spt) 3155 em_flush_desc_rings(adapter); 3156 3157 /* Issue a global reset */ 3158 e1000_reset_hw(hw); 3159 E1000_WRITE_REG(hw, E1000_WUC, 0); 3160 em_disable_aspm(adapter); 3161 /* and a re-init */ 3162 if (e1000_init_hw(hw) < 0) { 3163 device_printf(dev, "Hardware Initialization Failed\n"); 3164 return; 3165 } 3166 3167 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); 3168 e1000_get_phy_info(hw); 3169 e1000_check_for_link(hw); 3170 return; 3171 } 3172 3173 /********************************************************************* 3174 * 3175 * Setup networking device structure and register an interface. 3176 * 3177 **********************************************************************/ 3178 static int 3179 em_setup_interface(device_t dev, struct adapter *adapter) 3180 { 3181 if_t ifp; 3182 3183 INIT_DEBUGOUT("em_setup_interface: begin"); 3184 3185 ifp = adapter->ifp = if_gethandle(IFT_ETHER); 3186 if (ifp == 0) { 3187 device_printf(dev, "can not allocate ifnet structure\n"); 3188 return (-1); 3189 } 3190 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3191 if_setdev(ifp, dev); 3192 if_setinitfn(ifp, em_init); 3193 if_setsoftc(ifp, adapter); 3194 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 3195 if_setioctlfn(ifp, em_ioctl); 3196 if_setgetcounterfn(ifp, em_get_counter); 3197 3198 /* TSO parameters */ 3199 ifp->if_hw_tsomax = IP_MAXPACKET; 3200 /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */ 3201 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5; 3202 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE; 3203 3204 #ifdef EM_MULTIQUEUE 3205 /* Multiqueue stack interface */ 3206 if_settransmitfn(ifp, em_mq_start); 3207 if_setqflushfn(ifp, em_qflush); 3208 #else 3209 if_setstartfn(ifp, em_start); 3210 if_setsendqlen(ifp, adapter->num_tx_desc - 1); 3211 if_setsendqready(ifp); 3212 #endif 3213 3214 ether_ifattach(ifp, adapter->hw.mac.addr); 3215 3216 if_setcapabilities(ifp, 0); 3217 if_setcapenable(ifp, 0); 3218 3219 3220 if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | 3221 IFCAP_TSO4, 0); 3222 /* 3223 * Tell the upper layer(s) we 3224 * support full VLAN capability 3225 */ 3226 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 3227 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | 3228 IFCAP_VLAN_MTU, 0); 3229 if_setcapenable(ifp, if_getcapabilities(ifp)); 3230 3231 /* 3232 ** Don't turn this on by default, if vlans are 3233 ** created on another pseudo device (eg. lagg) 3234 ** then vlan events are not passed thru, breaking 3235 ** operation, but with HW FILTER off it works. If 3236 ** using vlans directly on the em driver you can 3237 ** enable this and get full hardware tag filtering. 3238 */ 3239 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0); 3240 3241 #ifdef DEVICE_POLLING 3242 if_setcapabilitiesbit(ifp, IFCAP_POLLING,0); 3243 #endif 3244 3245 /* Enable only WOL MAGIC by default */ 3246 if (adapter->wol) { 3247 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0); 3248 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0); 3249 } 3250 3251 /* 3252 * Specify the media types supported by this adapter and register 3253 * callbacks to update media and link information 3254 */ 3255 ifmedia_init(&adapter->media, IFM_IMASK, 3256 em_media_change, em_media_status); 3257 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 3258 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { 3259 u_char fiber_type = IFM_1000_SX; /* default type */ 3260 3261 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 3262 0, NULL); 3263 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); 3264 } else { 3265 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); 3266 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 3267 0, NULL); 3268 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 3269 0, NULL); 3270 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 3271 0, NULL); 3272 if (adapter->hw.phy.type != e1000_phy_ife) { 3273 ifmedia_add(&adapter->media, 3274 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 3275 ifmedia_add(&adapter->media, 3276 IFM_ETHER | IFM_1000_T, 0, NULL); 3277 } 3278 } 3279 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); 3280 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); 3281 return (0); 3282 } 3283 3284 3285 /* 3286 * Manage DMA'able memory. 3287 */ 3288 static void 3289 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 3290 { 3291 if (error) 3292 return; 3293 *(bus_addr_t *) arg = segs[0].ds_addr; 3294 } 3295 3296 static int 3297 em_dma_malloc(struct adapter *adapter, bus_size_t size, 3298 struct em_dma_alloc *dma, int mapflags) 3299 { 3300 int error; 3301 3302 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 3303 EM_DBA_ALIGN, 0, /* alignment, bounds */ 3304 BUS_SPACE_MAXADDR, /* lowaddr */ 3305 BUS_SPACE_MAXADDR, /* highaddr */ 3306 NULL, NULL, /* filter, filterarg */ 3307 size, /* maxsize */ 3308 1, /* nsegments */ 3309 size, /* maxsegsize */ 3310 0, /* flags */ 3311 NULL, /* lockfunc */ 3312 NULL, /* lockarg */ 3313 &dma->dma_tag); 3314 if (error) { 3315 device_printf(adapter->dev, 3316 "%s: bus_dma_tag_create failed: %d\n", 3317 __func__, error); 3318 goto fail_0; 3319 } 3320 3321 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, 3322 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); 3323 if (error) { 3324 device_printf(adapter->dev, 3325 "%s: bus_dmamem_alloc(%ju) failed: %d\n", 3326 __func__, (uintmax_t)size, error); 3327 goto fail_2; 3328 } 3329 3330 dma->dma_paddr = 0; 3331 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 3332 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); 3333 if (error || dma->dma_paddr == 0) { 3334 device_printf(adapter->dev, 3335 "%s: bus_dmamap_load failed: %d\n", 3336 __func__, error); 3337 goto fail_3; 3338 } 3339 3340 return (0); 3341 3342 fail_3: 3343 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 3344 fail_2: 3345 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 3346 bus_dma_tag_destroy(dma->dma_tag); 3347 fail_0: 3348 dma->dma_tag = NULL; 3349 3350 return (error); 3351 } 3352 3353 static void 3354 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) 3355 { 3356 if (dma->dma_tag == NULL) 3357 return; 3358 if (dma->dma_paddr != 0) { 3359 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 3360 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 3361 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 3362 dma->dma_paddr = 0; 3363 } 3364 if (dma->dma_vaddr != NULL) { 3365 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 3366 dma->dma_vaddr = NULL; 3367 } 3368 bus_dma_tag_destroy(dma->dma_tag); 3369 dma->dma_tag = NULL; 3370 } 3371 3372 3373 /********************************************************************* 3374 * 3375 * Allocate memory for the transmit and receive rings, and then 3376 * the descriptors associated with each, called only once at attach. 3377 * 3378 **********************************************************************/ 3379 static int 3380 em_allocate_queues(struct adapter *adapter) 3381 { 3382 device_t dev = adapter->dev; 3383 struct tx_ring *txr = NULL; 3384 struct rx_ring *rxr = NULL; 3385 int rsize, tsize, error = E1000_SUCCESS; 3386 int txconf = 0, rxconf = 0; 3387 3388 3389 /* Allocate the TX ring struct memory */ 3390 if (!(adapter->tx_rings = 3391 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 3392 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3393 device_printf(dev, "Unable to allocate TX ring memory\n"); 3394 error = ENOMEM; 3395 goto fail; 3396 } 3397 3398 /* Now allocate the RX */ 3399 if (!(adapter->rx_rings = 3400 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 3401 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3402 device_printf(dev, "Unable to allocate RX ring memory\n"); 3403 error = ENOMEM; 3404 goto rx_fail; 3405 } 3406 3407 tsize = roundup2(adapter->num_tx_desc * 3408 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); 3409 /* 3410 * Now set up the TX queues, txconf is needed to handle the 3411 * possibility that things fail midcourse and we need to 3412 * undo memory gracefully 3413 */ 3414 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 3415 /* Set up some basics */ 3416 txr = &adapter->tx_rings[i]; 3417 txr->adapter = adapter; 3418 txr->me = i; 3419 3420 /* Initialize the TX lock */ 3421 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 3422 device_get_nameunit(dev), txr->me); 3423 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 3424 3425 if (em_dma_malloc(adapter, tsize, 3426 &txr->txdma, BUS_DMA_NOWAIT)) { 3427 device_printf(dev, 3428 "Unable to allocate TX Descriptor memory\n"); 3429 error = ENOMEM; 3430 goto err_tx_desc; 3431 } 3432 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; 3433 bzero((void *)txr->tx_base, tsize); 3434 3435 if (em_allocate_transmit_buffers(txr)) { 3436 device_printf(dev, 3437 "Critical Failure setting up transmit buffers\n"); 3438 error = ENOMEM; 3439 goto err_tx_desc; 3440 } 3441 #if __FreeBSD_version >= 800000 3442 /* Allocate a buf ring */ 3443 txr->br = buf_ring_alloc(4096, M_DEVBUF, 3444 M_WAITOK, &txr->tx_mtx); 3445 #endif 3446 } 3447 3448 /* 3449 * Next the RX queues... 3450 */ 3451 rsize = roundup2(adapter->num_rx_desc * 3452 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); 3453 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 3454 rxr = &adapter->rx_rings[i]; 3455 rxr->adapter = adapter; 3456 rxr->me = i; 3457 3458 /* Initialize the RX lock */ 3459 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 3460 device_get_nameunit(dev), txr->me); 3461 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 3462 3463 if (em_dma_malloc(adapter, rsize, 3464 &rxr->rxdma, BUS_DMA_NOWAIT)) { 3465 device_printf(dev, 3466 "Unable to allocate RxDescriptor memory\n"); 3467 error = ENOMEM; 3468 goto err_rx_desc; 3469 } 3470 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr; 3471 bzero((void *)rxr->rx_base, rsize); 3472 3473 /* Allocate receive buffers for the ring*/ 3474 if (em_allocate_receive_buffers(rxr)) { 3475 device_printf(dev, 3476 "Critical Failure setting up receive buffers\n"); 3477 error = ENOMEM; 3478 goto err_rx_desc; 3479 } 3480 } 3481 3482 return (0); 3483 3484 err_rx_desc: 3485 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 3486 em_dma_free(adapter, &rxr->rxdma); 3487 err_tx_desc: 3488 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 3489 em_dma_free(adapter, &txr->txdma); 3490 free(adapter->rx_rings, M_DEVBUF); 3491 rx_fail: 3492 #if __FreeBSD_version >= 800000 3493 buf_ring_free(txr->br, M_DEVBUF); 3494 #endif 3495 free(adapter->tx_rings, M_DEVBUF); 3496 fail: 3497 return (error); 3498 } 3499 3500 3501 /********************************************************************* 3502 * 3503 * Allocate memory for tx_buffer structures. The tx_buffer stores all 3504 * the information needed to transmit a packet on the wire. This is 3505 * called only once at attach, setup is done every reset. 3506 * 3507 **********************************************************************/ 3508 static int 3509 em_allocate_transmit_buffers(struct tx_ring *txr) 3510 { 3511 struct adapter *adapter = txr->adapter; 3512 device_t dev = adapter->dev; 3513 struct em_txbuffer *txbuf; 3514 int error, i; 3515 3516 /* 3517 * Setup DMA descriptor areas. 3518 */ 3519 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 3520 1, 0, /* alignment, bounds */ 3521 BUS_SPACE_MAXADDR, /* lowaddr */ 3522 BUS_SPACE_MAXADDR, /* highaddr */ 3523 NULL, NULL, /* filter, filterarg */ 3524 EM_TSO_SIZE, /* maxsize */ 3525 EM_MAX_SCATTER, /* nsegments */ 3526 PAGE_SIZE, /* maxsegsize */ 3527 0, /* flags */ 3528 NULL, /* lockfunc */ 3529 NULL, /* lockfuncarg */ 3530 &txr->txtag))) { 3531 device_printf(dev,"Unable to allocate TX DMA tag\n"); 3532 goto fail; 3533 } 3534 3535 if (!(txr->tx_buffers = 3536 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) * 3537 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3538 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 3539 error = ENOMEM; 3540 goto fail; 3541 } 3542 3543 /* Create the descriptor buffer dma maps */ 3544 txbuf = txr->tx_buffers; 3545 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 3546 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 3547 if (error != 0) { 3548 device_printf(dev, "Unable to create TX DMA map\n"); 3549 goto fail; 3550 } 3551 } 3552 3553 return 0; 3554 fail: 3555 /* We free all, it handles case where we are in the middle */ 3556 em_free_transmit_structures(adapter); 3557 return (error); 3558 } 3559 3560 /********************************************************************* 3561 * 3562 * Initialize a transmit ring. 3563 * 3564 **********************************************************************/ 3565 static void 3566 em_setup_transmit_ring(struct tx_ring *txr) 3567 { 3568 struct adapter *adapter = txr->adapter; 3569 struct em_txbuffer *txbuf; 3570 int i; 3571 #ifdef DEV_NETMAP 3572 struct netmap_slot *slot; 3573 struct netmap_adapter *na = netmap_getna(adapter->ifp); 3574 #endif /* DEV_NETMAP */ 3575 3576 /* Clear the old descriptor contents */ 3577 EM_TX_LOCK(txr); 3578 #ifdef DEV_NETMAP 3579 slot = netmap_reset(na, NR_TX, txr->me, 0); 3580 #endif /* DEV_NETMAP */ 3581 3582 bzero((void *)txr->tx_base, 3583 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); 3584 /* Reset indices */ 3585 txr->next_avail_desc = 0; 3586 txr->next_to_clean = 0; 3587 3588 /* Free any existing tx buffers. */ 3589 txbuf = txr->tx_buffers; 3590 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 3591 if (txbuf->m_head != NULL) { 3592 bus_dmamap_sync(txr->txtag, txbuf->map, 3593 BUS_DMASYNC_POSTWRITE); 3594 bus_dmamap_unload(txr->txtag, txbuf->map); 3595 m_freem(txbuf->m_head); 3596 txbuf->m_head = NULL; 3597 } 3598 #ifdef DEV_NETMAP 3599 if (slot) { 3600 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 3601 uint64_t paddr; 3602 void *addr; 3603 3604 addr = PNMB(na, slot + si, &paddr); 3605 txr->tx_base[i].buffer_addr = htole64(paddr); 3606 /* reload the map for netmap mode */ 3607 netmap_load_map(na, txr->txtag, txbuf->map, addr); 3608 } 3609 #endif /* DEV_NETMAP */ 3610 3611 /* clear the watch index */ 3612 txbuf->next_eop = -1; 3613 } 3614 3615 /* Set number of descriptors available */ 3616 txr->tx_avail = adapter->num_tx_desc; 3617 txr->busy = EM_TX_IDLE; 3618 3619 /* Clear checksum offload context. */ 3620 txr->last_hw_offload = 0; 3621 txr->last_hw_ipcss = 0; 3622 txr->last_hw_ipcso = 0; 3623 txr->last_hw_tucss = 0; 3624 txr->last_hw_tucso = 0; 3625 3626 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 3627 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3628 EM_TX_UNLOCK(txr); 3629 } 3630 3631 /********************************************************************* 3632 * 3633 * Initialize all transmit rings. 3634 * 3635 **********************************************************************/ 3636 static void 3637 em_setup_transmit_structures(struct adapter *adapter) 3638 { 3639 struct tx_ring *txr = adapter->tx_rings; 3640 3641 for (int i = 0; i < adapter->num_queues; i++, txr++) 3642 em_setup_transmit_ring(txr); 3643 3644 return; 3645 } 3646 3647 /********************************************************************* 3648 * 3649 * Enable transmit unit. 3650 * 3651 **********************************************************************/ 3652 static void 3653 em_initialize_transmit_unit(struct adapter *adapter) 3654 { 3655 struct tx_ring *txr = adapter->tx_rings; 3656 struct e1000_hw *hw = &adapter->hw; 3657 u32 tctl, txdctl = 0, tarc, tipg = 0; 3658 3659 INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); 3660 3661 for (int i = 0; i < adapter->num_queues; i++, txr++) { 3662 u64 bus_addr = txr->txdma.dma_paddr; 3663 /* Base and Len of TX Ring */ 3664 E1000_WRITE_REG(hw, E1000_TDLEN(i), 3665 adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); 3666 E1000_WRITE_REG(hw, E1000_TDBAH(i), 3667 (u32)(bus_addr >> 32)); 3668 E1000_WRITE_REG(hw, E1000_TDBAL(i), 3669 (u32)bus_addr); 3670 /* Init the HEAD/TAIL indices */ 3671 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 3672 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 3673 3674 HW_DEBUGOUT2("Base = %x, Length = %x\n", 3675 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), 3676 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); 3677 3678 txr->busy = EM_TX_IDLE; 3679 txdctl = 0; /* clear txdctl */ 3680 txdctl |= 0x1f; /* PTHRESH */ 3681 txdctl |= 1 << 8; /* HTHRESH */ 3682 txdctl |= 1 << 16;/* WTHRESH */ 3683 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ 3684 txdctl |= E1000_TXDCTL_GRAN; 3685 txdctl |= 1 << 25; /* LWTHRESH */ 3686 3687 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 3688 } 3689 3690 /* Set the default values for the Tx Inter Packet Gap timer */ 3691 switch (adapter->hw.mac.type) { 3692 case e1000_80003es2lan: 3693 tipg = DEFAULT_82543_TIPG_IPGR1; 3694 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << 3695 E1000_TIPG_IPGR2_SHIFT; 3696 break; 3697 default: 3698 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 3699 (adapter->hw.phy.media_type == 3700 e1000_media_type_internal_serdes)) 3701 tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 3702 else 3703 tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 3704 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 3705 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 3706 } 3707 3708 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); 3709 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); 3710 3711 if(adapter->hw.mac.type >= e1000_82540) 3712 E1000_WRITE_REG(&adapter->hw, E1000_TADV, 3713 adapter->tx_abs_int_delay.value); 3714 3715 if ((adapter->hw.mac.type == e1000_82571) || 3716 (adapter->hw.mac.type == e1000_82572)) { 3717 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3718 tarc |= TARC_SPEED_MODE_BIT; 3719 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3720 } else if (adapter->hw.mac.type == e1000_80003es2lan) { 3721 /* errata: program both queues to unweighted RR */ 3722 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3723 tarc |= 1; 3724 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3725 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); 3726 tarc |= 1; 3727 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); 3728 } else if (adapter->hw.mac.type == e1000_82574) { 3729 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3730 tarc |= TARC_ERRATA_BIT; 3731 if ( adapter->num_queues > 1) { 3732 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); 3733 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3734 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); 3735 } else 3736 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3737 } 3738 3739 adapter->txd_cmd = E1000_TXD_CMD_IFCS; 3740 if (adapter->tx_int_delay.value > 0) 3741 adapter->txd_cmd |= E1000_TXD_CMD_IDE; 3742 3743 /* Program the Transmit Control Register */ 3744 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); 3745 tctl &= ~E1000_TCTL_CT; 3746 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 3747 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 3748 3749 if (adapter->hw.mac.type >= e1000_82571) 3750 tctl |= E1000_TCTL_MULR; 3751 3752 /* This write will effectively turn on the transmit unit. */ 3753 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); 3754 3755 if (hw->mac.type == e1000_pch_spt) { 3756 u32 reg; 3757 reg = E1000_READ_REG(hw, E1000_IOSFPC); 3758 reg |= E1000_RCTL_RDMTS_HEX; 3759 E1000_WRITE_REG(hw, E1000_IOSFPC, reg); 3760 reg = E1000_READ_REG(hw, E1000_TARC(0)); 3761 reg |= E1000_TARC0_CB_MULTIQ_3_REQ; 3762 E1000_WRITE_REG(hw, E1000_TARC(0), reg); 3763 } 3764 } 3765 3766 3767 /********************************************************************* 3768 * 3769 * Free all transmit rings. 3770 * 3771 **********************************************************************/ 3772 static void 3773 em_free_transmit_structures(struct adapter *adapter) 3774 { 3775 struct tx_ring *txr = adapter->tx_rings; 3776 3777 for (int i = 0; i < adapter->num_queues; i++, txr++) { 3778 EM_TX_LOCK(txr); 3779 em_free_transmit_buffers(txr); 3780 em_dma_free(adapter, &txr->txdma); 3781 EM_TX_UNLOCK(txr); 3782 EM_TX_LOCK_DESTROY(txr); 3783 } 3784 3785 free(adapter->tx_rings, M_DEVBUF); 3786 } 3787 3788 /********************************************************************* 3789 * 3790 * Free transmit ring related data structures. 3791 * 3792 **********************************************************************/ 3793 static void 3794 em_free_transmit_buffers(struct tx_ring *txr) 3795 { 3796 struct adapter *adapter = txr->adapter; 3797 struct em_txbuffer *txbuf; 3798 3799 INIT_DEBUGOUT("free_transmit_ring: begin"); 3800 3801 if (txr->tx_buffers == NULL) 3802 return; 3803 3804 for (int i = 0; i < adapter->num_tx_desc; i++) { 3805 txbuf = &txr->tx_buffers[i]; 3806 if (txbuf->m_head != NULL) { 3807 bus_dmamap_sync(txr->txtag, txbuf->map, 3808 BUS_DMASYNC_POSTWRITE); 3809 bus_dmamap_unload(txr->txtag, 3810 txbuf->map); 3811 m_freem(txbuf->m_head); 3812 txbuf->m_head = NULL; 3813 if (txbuf->map != NULL) { 3814 bus_dmamap_destroy(txr->txtag, 3815 txbuf->map); 3816 txbuf->map = NULL; 3817 } 3818 } else if (txbuf->map != NULL) { 3819 bus_dmamap_unload(txr->txtag, 3820 txbuf->map); 3821 bus_dmamap_destroy(txr->txtag, 3822 txbuf->map); 3823 txbuf->map = NULL; 3824 } 3825 } 3826 #if __FreeBSD_version >= 800000 3827 if (txr->br != NULL) 3828 buf_ring_free(txr->br, M_DEVBUF); 3829 #endif 3830 if (txr->tx_buffers != NULL) { 3831 free(txr->tx_buffers, M_DEVBUF); 3832 txr->tx_buffers = NULL; 3833 } 3834 if (txr->txtag != NULL) { 3835 bus_dma_tag_destroy(txr->txtag); 3836 txr->txtag = NULL; 3837 } 3838 return; 3839 } 3840 3841 3842 /********************************************************************* 3843 * The offload context is protocol specific (TCP/UDP) and thus 3844 * only needs to be set when the protocol changes. The occasion 3845 * of a context change can be a performance detriment, and 3846 * might be better just disabled. The reason arises in the way 3847 * in which the controller supports pipelined requests from the 3848 * Tx data DMA. Up to four requests can be pipelined, and they may 3849 * belong to the same packet or to multiple packets. However all 3850 * requests for one packet are issued before a request is issued 3851 * for a subsequent packet and if a request for the next packet 3852 * requires a context change, that request will be stalled 3853 * until the previous request completes. This means setting up 3854 * a new context effectively disables pipelined Tx data DMA which 3855 * in turn greatly slow down performance to send small sized 3856 * frames. 3857 **********************************************************************/ 3858 static void 3859 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, 3860 struct ip *ip, u32 *txd_upper, u32 *txd_lower) 3861 { 3862 struct adapter *adapter = txr->adapter; 3863 struct e1000_context_desc *TXD = NULL; 3864 struct em_txbuffer *tx_buffer; 3865 int cur, hdr_len; 3866 u32 cmd = 0; 3867 u16 offload = 0; 3868 u8 ipcso, ipcss, tucso, tucss; 3869 3870 ipcss = ipcso = tucss = tucso = 0; 3871 hdr_len = ip_off + (ip->ip_hl << 2); 3872 cur = txr->next_avail_desc; 3873 3874 /* Setup of IP header checksum. */ 3875 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 3876 *txd_upper |= E1000_TXD_POPTS_IXSM << 8; 3877 offload |= CSUM_IP; 3878 ipcss = ip_off; 3879 ipcso = ip_off + offsetof(struct ip, ip_sum); 3880 /* 3881 * Start offset for header checksum calculation. 3882 * End offset for header checksum calculation. 3883 * Offset of place to put the checksum. 3884 */ 3885 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3886 TXD->lower_setup.ip_fields.ipcss = ipcss; 3887 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); 3888 TXD->lower_setup.ip_fields.ipcso = ipcso; 3889 cmd |= E1000_TXD_CMD_IP; 3890 } 3891 3892 if (mp->m_pkthdr.csum_flags & CSUM_TCP) { 3893 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 3894 *txd_upper |= E1000_TXD_POPTS_TXSM << 8; 3895 offload |= CSUM_TCP; 3896 tucss = hdr_len; 3897 tucso = hdr_len + offsetof(struct tcphdr, th_sum); 3898 /* 3899 * The 82574L can only remember the *last* context used 3900 * regardless of queue that it was use for. We cannot reuse 3901 * contexts on this hardware platform and must generate a new 3902 * context every time. 82574L hardware spec, section 7.2.6, 3903 * second note. 3904 */ 3905 if (adapter->num_queues < 2) { 3906 /* 3907 * Setting up new checksum offload context for every 3908 * frames takes a lot of processing time for hardware. 3909 * This also reduces performance a lot for small sized 3910 * frames so avoid it if driver can use previously 3911 * configured checksum offload context. 3912 */ 3913 if (txr->last_hw_offload == offload) { 3914 if (offload & CSUM_IP) { 3915 if (txr->last_hw_ipcss == ipcss && 3916 txr->last_hw_ipcso == ipcso && 3917 txr->last_hw_tucss == tucss && 3918 txr->last_hw_tucso == tucso) 3919 return; 3920 } else { 3921 if (txr->last_hw_tucss == tucss && 3922 txr->last_hw_tucso == tucso) 3923 return; 3924 } 3925 } 3926 txr->last_hw_offload = offload; 3927 txr->last_hw_tucss = tucss; 3928 txr->last_hw_tucso = tucso; 3929 } 3930 /* 3931 * Start offset for payload checksum calculation. 3932 * End offset for payload checksum calculation. 3933 * Offset of place to put the checksum. 3934 */ 3935 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3936 TXD->upper_setup.tcp_fields.tucss = hdr_len; 3937 TXD->upper_setup.tcp_fields.tucse = htole16(0); 3938 TXD->upper_setup.tcp_fields.tucso = tucso; 3939 cmd |= E1000_TXD_CMD_TCP; 3940 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { 3941 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 3942 *txd_upper |= E1000_TXD_POPTS_TXSM << 8; 3943 tucss = hdr_len; 3944 tucso = hdr_len + offsetof(struct udphdr, uh_sum); 3945 /* 3946 * The 82574L can only remember the *last* context used 3947 * regardless of queue that it was use for. We cannot reuse 3948 * contexts on this hardware platform and must generate a new 3949 * context every time. 82574L hardware spec, section 7.2.6, 3950 * second note. 3951 */ 3952 if (adapter->num_queues < 2) { 3953 /* 3954 * Setting up new checksum offload context for every 3955 * frames takes a lot of processing time for hardware. 3956 * This also reduces performance a lot for small sized 3957 * frames so avoid it if driver can use previously 3958 * configured checksum offload context. 3959 */ 3960 if (txr->last_hw_offload == offload) { 3961 if (offload & CSUM_IP) { 3962 if (txr->last_hw_ipcss == ipcss && 3963 txr->last_hw_ipcso == ipcso && 3964 txr->last_hw_tucss == tucss && 3965 txr->last_hw_tucso == tucso) 3966 return; 3967 } else { 3968 if (txr->last_hw_tucss == tucss && 3969 txr->last_hw_tucso == tucso) 3970 return; 3971 } 3972 } 3973 txr->last_hw_offload = offload; 3974 txr->last_hw_tucss = tucss; 3975 txr->last_hw_tucso = tucso; 3976 } 3977 /* 3978 * Start offset for header checksum calculation. 3979 * End offset for header checksum calculation. 3980 * Offset of place to put the checksum. 3981 */ 3982 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3983 TXD->upper_setup.tcp_fields.tucss = tucss; 3984 TXD->upper_setup.tcp_fields.tucse = htole16(0); 3985 TXD->upper_setup.tcp_fields.tucso = tucso; 3986 } 3987 3988 if (offload & CSUM_IP) { 3989 txr->last_hw_ipcss = ipcss; 3990 txr->last_hw_ipcso = ipcso; 3991 } 3992 3993 TXD->tcp_seg_setup.data = htole32(0); 3994 TXD->cmd_and_length = 3995 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); 3996 tx_buffer = &txr->tx_buffers[cur]; 3997 tx_buffer->m_head = NULL; 3998 tx_buffer->next_eop = -1; 3999 4000 if (++cur == adapter->num_tx_desc) 4001 cur = 0; 4002 4003 txr->tx_avail--; 4004 txr->next_avail_desc = cur; 4005 } 4006 4007 4008 /********************************************************************** 4009 * 4010 * Setup work for hardware segmentation offload (TSO) 4011 * 4012 **********************************************************************/ 4013 static void 4014 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, 4015 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower) 4016 { 4017 struct adapter *adapter = txr->adapter; 4018 struct e1000_context_desc *TXD; 4019 struct em_txbuffer *tx_buffer; 4020 int cur, hdr_len; 4021 4022 /* 4023 * In theory we can use the same TSO context if and only if 4024 * frame is the same type(IP/TCP) and the same MSS. However 4025 * checking whether a frame has the same IP/TCP structure is 4026 * hard thing so just ignore that and always restablish a 4027 * new TSO context. 4028 */ 4029 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2); 4030 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ 4031 E1000_TXD_DTYP_D | /* Data descr type */ 4032 E1000_TXD_CMD_TSE); /* Do TSE on this packet */ 4033 4034 /* IP and/or TCP header checksum calculation and insertion. */ 4035 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; 4036 4037 cur = txr->next_avail_desc; 4038 tx_buffer = &txr->tx_buffers[cur]; 4039 TXD = (struct e1000_context_desc *) &txr->tx_base[cur]; 4040 4041 /* 4042 * Start offset for header checksum calculation. 4043 * End offset for header checksum calculation. 4044 * Offset of place put the checksum. 4045 */ 4046 TXD->lower_setup.ip_fields.ipcss = ip_off; 4047 TXD->lower_setup.ip_fields.ipcse = 4048 htole16(ip_off + (ip->ip_hl << 2) - 1); 4049 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum); 4050 /* 4051 * Start offset for payload checksum calculation. 4052 * End offset for payload checksum calculation. 4053 * Offset of place to put the checksum. 4054 */ 4055 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2); 4056 TXD->upper_setup.tcp_fields.tucse = 0; 4057 TXD->upper_setup.tcp_fields.tucso = 4058 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum); 4059 /* 4060 * Payload size per packet w/o any headers. 4061 * Length of all headers up to payload. 4062 */ 4063 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz); 4064 TXD->tcp_seg_setup.fields.hdr_len = hdr_len; 4065 4066 TXD->cmd_and_length = htole32(adapter->txd_cmd | 4067 E1000_TXD_CMD_DEXT | /* Extended descr */ 4068 E1000_TXD_CMD_TSE | /* TSE context */ 4069 E1000_TXD_CMD_IP | /* Do IP csum */ 4070 E1000_TXD_CMD_TCP | /* Do TCP checksum */ 4071 (mp->m_pkthdr.len - (hdr_len))); /* Total len */ 4072 4073 tx_buffer->m_head = NULL; 4074 tx_buffer->next_eop = -1; 4075 4076 if (++cur == adapter->num_tx_desc) 4077 cur = 0; 4078 4079 txr->tx_avail--; 4080 txr->next_avail_desc = cur; 4081 txr->tx_tso = TRUE; 4082 } 4083 4084 4085 /********************************************************************** 4086 * 4087 * Examine each tx_buffer in the used queue. If the hardware is done 4088 * processing the packet then free associated resources. The 4089 * tx_buffer is put back on the free queue. 4090 * 4091 **********************************************************************/ 4092 static void 4093 em_txeof(struct tx_ring *txr) 4094 { 4095 struct adapter *adapter = txr->adapter; 4096 int first, last, done, processed; 4097 struct em_txbuffer *tx_buffer; 4098 struct e1000_tx_desc *tx_desc, *eop_desc; 4099 if_t ifp = adapter->ifp; 4100 4101 EM_TX_LOCK_ASSERT(txr); 4102 #ifdef DEV_NETMAP 4103 if (netmap_tx_irq(ifp, txr->me)) 4104 return; 4105 #endif /* DEV_NETMAP */ 4106 4107 /* No work, make sure hang detection is disabled */ 4108 if (txr->tx_avail == adapter->num_tx_desc) { 4109 txr->busy = EM_TX_IDLE; 4110 return; 4111 } 4112 4113 processed = 0; 4114 first = txr->next_to_clean; 4115 tx_desc = &txr->tx_base[first]; 4116 tx_buffer = &txr->tx_buffers[first]; 4117 last = tx_buffer->next_eop; 4118 eop_desc = &txr->tx_base[last]; 4119 4120 /* 4121 * What this does is get the index of the 4122 * first descriptor AFTER the EOP of the 4123 * first packet, that way we can do the 4124 * simple comparison on the inner while loop. 4125 */ 4126 if (++last == adapter->num_tx_desc) 4127 last = 0; 4128 done = last; 4129 4130 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 4131 BUS_DMASYNC_POSTREAD); 4132 4133 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { 4134 /* We clean the range of the packet */ 4135 while (first != done) { 4136 tx_desc->upper.data = 0; 4137 tx_desc->lower.data = 0; 4138 tx_desc->buffer_addr = 0; 4139 ++txr->tx_avail; 4140 ++processed; 4141 4142 if (tx_buffer->m_head) { 4143 bus_dmamap_sync(txr->txtag, 4144 tx_buffer->map, 4145 BUS_DMASYNC_POSTWRITE); 4146 bus_dmamap_unload(txr->txtag, 4147 tx_buffer->map); 4148 m_freem(tx_buffer->m_head); 4149 tx_buffer->m_head = NULL; 4150 } 4151 tx_buffer->next_eop = -1; 4152 4153 if (++first == adapter->num_tx_desc) 4154 first = 0; 4155 4156 tx_buffer = &txr->tx_buffers[first]; 4157 tx_desc = &txr->tx_base[first]; 4158 } 4159 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 4160 /* See if we can continue to the next packet */ 4161 last = tx_buffer->next_eop; 4162 if (last != -1) { 4163 eop_desc = &txr->tx_base[last]; 4164 /* Get new done point */ 4165 if (++last == adapter->num_tx_desc) last = 0; 4166 done = last; 4167 } else 4168 break; 4169 } 4170 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 4171 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4172 4173 txr->next_to_clean = first; 4174 4175 /* 4176 ** Hang detection: we know there's work outstanding 4177 ** or the entry return would have been taken, so no 4178 ** descriptor processed here indicates a potential hang. 4179 ** The local timer will examine this and do a reset if needed. 4180 */ 4181 if (processed == 0) { 4182 if (txr->busy != EM_TX_HUNG) 4183 ++txr->busy; 4184 } else /* At least one descriptor was cleaned */ 4185 txr->busy = EM_TX_BUSY; /* note this clears HUNG */ 4186 4187 /* 4188 * If we have a minimum free, clear IFF_DRV_OACTIVE 4189 * to tell the stack that it is OK to send packets. 4190 * Notice that all writes of OACTIVE happen under the 4191 * TX lock which, with a single queue, guarantees 4192 * sanity. 4193 */ 4194 if (txr->tx_avail >= EM_MAX_SCATTER) { 4195 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); 4196 } 4197 4198 /* Disable hang detection if all clean */ 4199 if (txr->tx_avail == adapter->num_tx_desc) 4200 txr->busy = EM_TX_IDLE; 4201 } 4202 4203 /********************************************************************* 4204 * 4205 * Refresh RX descriptor mbufs from system mbuf buffer pool. 4206 * 4207 **********************************************************************/ 4208 static void 4209 em_refresh_mbufs(struct rx_ring *rxr, int limit) 4210 { 4211 struct adapter *adapter = rxr->adapter; 4212 struct mbuf *m; 4213 bus_dma_segment_t segs; 4214 struct em_rxbuffer *rxbuf; 4215 int i, j, error, nsegs; 4216 bool cleaned = FALSE; 4217 4218 i = j = rxr->next_to_refresh; 4219 /* 4220 ** Get one descriptor beyond 4221 ** our work mark to control 4222 ** the loop. 4223 */ 4224 if (++j == adapter->num_rx_desc) 4225 j = 0; 4226 4227 while (j != limit) { 4228 rxbuf = &rxr->rx_buffers[i]; 4229 if (rxbuf->m_head == NULL) { 4230 m = m_getjcl(M_NOWAIT, MT_DATA, 4231 M_PKTHDR, adapter->rx_mbuf_sz); 4232 /* 4233 ** If we have a temporary resource shortage 4234 ** that causes a failure, just abort refresh 4235 ** for now, we will return to this point when 4236 ** reinvoked from em_rxeof. 4237 */ 4238 if (m == NULL) 4239 goto update; 4240 } else 4241 m = rxbuf->m_head; 4242 4243 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; 4244 m->m_flags |= M_PKTHDR; 4245 m->m_data = m->m_ext.ext_buf; 4246 4247 /* Use bus_dma machinery to setup the memory mapping */ 4248 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, 4249 m, &segs, &nsegs, BUS_DMA_NOWAIT); 4250 if (error != 0) { 4251 printf("Refresh mbufs: hdr dmamap load" 4252 " failure - %d\n", error); 4253 m_free(m); 4254 rxbuf->m_head = NULL; 4255 goto update; 4256 } 4257 rxbuf->m_head = m; 4258 rxbuf->paddr = segs.ds_addr; 4259 bus_dmamap_sync(rxr->rxtag, 4260 rxbuf->map, BUS_DMASYNC_PREREAD); 4261 em_setup_rxdesc(&rxr->rx_base[i], rxbuf); 4262 cleaned = TRUE; 4263 4264 i = j; /* Next is precalulated for us */ 4265 rxr->next_to_refresh = i; 4266 /* Calculate next controlling index */ 4267 if (++j == adapter->num_rx_desc) 4268 j = 0; 4269 } 4270 update: 4271 /* 4272 ** Update the tail pointer only if, 4273 ** and as far as we have refreshed. 4274 */ 4275 if (cleaned) 4276 E1000_WRITE_REG(&adapter->hw, 4277 E1000_RDT(rxr->me), rxr->next_to_refresh); 4278 4279 return; 4280 } 4281 4282 4283 /********************************************************************* 4284 * 4285 * Allocate memory for rx_buffer structures. Since we use one 4286 * rx_buffer per received packet, the maximum number of rx_buffer's 4287 * that we'll need is equal to the number of receive descriptors 4288 * that we've allocated. 4289 * 4290 **********************************************************************/ 4291 static int 4292 em_allocate_receive_buffers(struct rx_ring *rxr) 4293 { 4294 struct adapter *adapter = rxr->adapter; 4295 device_t dev = adapter->dev; 4296 struct em_rxbuffer *rxbuf; 4297 int error; 4298 4299 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) * 4300 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); 4301 if (rxr->rx_buffers == NULL) { 4302 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 4303 return (ENOMEM); 4304 } 4305 4306 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4307 1, 0, /* alignment, bounds */ 4308 BUS_SPACE_MAXADDR, /* lowaddr */ 4309 BUS_SPACE_MAXADDR, /* highaddr */ 4310 NULL, NULL, /* filter, filterarg */ 4311 MJUM9BYTES, /* maxsize */ 4312 1, /* nsegments */ 4313 MJUM9BYTES, /* maxsegsize */ 4314 0, /* flags */ 4315 NULL, /* lockfunc */ 4316 NULL, /* lockarg */ 4317 &rxr->rxtag); 4318 if (error) { 4319 device_printf(dev, "%s: bus_dma_tag_create failed %d\n", 4320 __func__, error); 4321 goto fail; 4322 } 4323 4324 rxbuf = rxr->rx_buffers; 4325 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { 4326 rxbuf = &rxr->rx_buffers[i]; 4327 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map); 4328 if (error) { 4329 device_printf(dev, "%s: bus_dmamap_create failed: %d\n", 4330 __func__, error); 4331 goto fail; 4332 } 4333 } 4334 4335 return (0); 4336 4337 fail: 4338 em_free_receive_structures(adapter); 4339 return (error); 4340 } 4341 4342 4343 /********************************************************************* 4344 * 4345 * Initialize a receive ring and its buffers. 4346 * 4347 **********************************************************************/ 4348 static int 4349 em_setup_receive_ring(struct rx_ring *rxr) 4350 { 4351 struct adapter *adapter = rxr->adapter; 4352 struct em_rxbuffer *rxbuf; 4353 bus_dma_segment_t seg[1]; 4354 int rsize, nsegs, error = 0; 4355 #ifdef DEV_NETMAP 4356 struct netmap_slot *slot; 4357 struct netmap_adapter *na = netmap_getna(adapter->ifp); 4358 #endif 4359 4360 4361 /* Clear the ring contents */ 4362 EM_RX_LOCK(rxr); 4363 rsize = roundup2(adapter->num_rx_desc * 4364 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); 4365 bzero((void *)rxr->rx_base, rsize); 4366 #ifdef DEV_NETMAP 4367 slot = netmap_reset(na, NR_RX, rxr->me, 0); 4368 #endif 4369 4370 /* 4371 ** Free current RX buffer structs and their mbufs 4372 */ 4373 for (int i = 0; i < adapter->num_rx_desc; i++) { 4374 rxbuf = &rxr->rx_buffers[i]; 4375 if (rxbuf->m_head != NULL) { 4376 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4377 BUS_DMASYNC_POSTREAD); 4378 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4379 m_freem(rxbuf->m_head); 4380 rxbuf->m_head = NULL; /* mark as freed */ 4381 } 4382 } 4383 4384 /* Now replenish the mbufs */ 4385 for (int j = 0; j != adapter->num_rx_desc; ++j) { 4386 rxbuf = &rxr->rx_buffers[j]; 4387 #ifdef DEV_NETMAP 4388 if (slot) { 4389 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 4390 uint64_t paddr; 4391 void *addr; 4392 4393 addr = PNMB(na, slot + si, &paddr); 4394 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr); 4395 em_setup_rxdesc(&rxr->rx_base[j], rxbuf); 4396 continue; 4397 } 4398 #endif /* DEV_NETMAP */ 4399 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA, 4400 M_PKTHDR, adapter->rx_mbuf_sz); 4401 if (rxbuf->m_head == NULL) { 4402 error = ENOBUFS; 4403 goto fail; 4404 } 4405 rxbuf->m_head->m_len = adapter->rx_mbuf_sz; 4406 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */ 4407 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz; 4408 4409 /* Get the memory mapping */ 4410 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, 4411 rxbuf->map, rxbuf->m_head, seg, 4412 &nsegs, BUS_DMA_NOWAIT); 4413 if (error != 0) { 4414 m_freem(rxbuf->m_head); 4415 rxbuf->m_head = NULL; 4416 goto fail; 4417 } 4418 bus_dmamap_sync(rxr->rxtag, 4419 rxbuf->map, BUS_DMASYNC_PREREAD); 4420 4421 rxbuf->paddr = seg[0].ds_addr; 4422 em_setup_rxdesc(&rxr->rx_base[j], rxbuf); 4423 } 4424 rxr->next_to_check = 0; 4425 rxr->next_to_refresh = 0; 4426 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4427 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4428 4429 fail: 4430 EM_RX_UNLOCK(rxr); 4431 return (error); 4432 } 4433 4434 /********************************************************************* 4435 * 4436 * Initialize all receive rings. 4437 * 4438 **********************************************************************/ 4439 static int 4440 em_setup_receive_structures(struct adapter *adapter) 4441 { 4442 struct rx_ring *rxr = adapter->rx_rings; 4443 int q; 4444 4445 for (q = 0; q < adapter->num_queues; q++, rxr++) 4446 if (em_setup_receive_ring(rxr)) 4447 goto fail; 4448 4449 return (0); 4450 fail: 4451 /* 4452 * Free RX buffers allocated so far, we will only handle 4453 * the rings that completed, the failing case will have 4454 * cleaned up for itself. 'q' failed, so its the terminus. 4455 */ 4456 for (int i = 0; i < q; ++i) { 4457 rxr = &adapter->rx_rings[i]; 4458 for (int n = 0; n < adapter->num_rx_desc; n++) { 4459 struct em_rxbuffer *rxbuf; 4460 rxbuf = &rxr->rx_buffers[n]; 4461 if (rxbuf->m_head != NULL) { 4462 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4463 BUS_DMASYNC_POSTREAD); 4464 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4465 m_freem(rxbuf->m_head); 4466 rxbuf->m_head = NULL; 4467 } 4468 } 4469 rxr->next_to_check = 0; 4470 rxr->next_to_refresh = 0; 4471 } 4472 4473 return (ENOBUFS); 4474 } 4475 4476 /********************************************************************* 4477 * 4478 * Free all receive rings. 4479 * 4480 **********************************************************************/ 4481 static void 4482 em_free_receive_structures(struct adapter *adapter) 4483 { 4484 struct rx_ring *rxr = adapter->rx_rings; 4485 4486 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 4487 em_free_receive_buffers(rxr); 4488 /* Free the ring memory as well */ 4489 em_dma_free(adapter, &rxr->rxdma); 4490 EM_RX_LOCK_DESTROY(rxr); 4491 } 4492 4493 free(adapter->rx_rings, M_DEVBUF); 4494 } 4495 4496 4497 /********************************************************************* 4498 * 4499 * Free receive ring data structures 4500 * 4501 **********************************************************************/ 4502 static void 4503 em_free_receive_buffers(struct rx_ring *rxr) 4504 { 4505 struct adapter *adapter = rxr->adapter; 4506 struct em_rxbuffer *rxbuf = NULL; 4507 4508 INIT_DEBUGOUT("free_receive_buffers: begin"); 4509 4510 if (rxr->rx_buffers != NULL) { 4511 for (int i = 0; i < adapter->num_rx_desc; i++) { 4512 rxbuf = &rxr->rx_buffers[i]; 4513 if (rxbuf->map != NULL) { 4514 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4515 BUS_DMASYNC_POSTREAD); 4516 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4517 bus_dmamap_destroy(rxr->rxtag, rxbuf->map); 4518 } 4519 if (rxbuf->m_head != NULL) { 4520 m_freem(rxbuf->m_head); 4521 rxbuf->m_head = NULL; 4522 } 4523 } 4524 free(rxr->rx_buffers, M_DEVBUF); 4525 rxr->rx_buffers = NULL; 4526 rxr->next_to_check = 0; 4527 rxr->next_to_refresh = 0; 4528 } 4529 4530 if (rxr->rxtag != NULL) { 4531 bus_dma_tag_destroy(rxr->rxtag); 4532 rxr->rxtag = NULL; 4533 } 4534 4535 return; 4536 } 4537 4538 4539 /********************************************************************* 4540 * 4541 * Enable receive unit. 4542 * 4543 **********************************************************************/ 4544 4545 static void 4546 em_initialize_receive_unit(struct adapter *adapter) 4547 { 4548 struct rx_ring *rxr = adapter->rx_rings; 4549 if_t ifp = adapter->ifp; 4550 struct e1000_hw *hw = &adapter->hw; 4551 u32 rctl, rxcsum, rfctl; 4552 4553 INIT_DEBUGOUT("em_initialize_receive_units: begin"); 4554 4555 /* 4556 * Make sure receives are disabled while setting 4557 * up the descriptor ring 4558 */ 4559 rctl = E1000_READ_REG(hw, E1000_RCTL); 4560 /* Do not disable if ever enabled on this hardware */ 4561 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) 4562 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 4563 4564 /* Setup the Receive Control Register */ 4565 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 4566 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | 4567 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | 4568 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 4569 4570 /* Do not store bad packets */ 4571 rctl &= ~E1000_RCTL_SBP; 4572 4573 /* Enable Long Packet receive */ 4574 if (if_getmtu(ifp) > ETHERMTU) 4575 rctl |= E1000_RCTL_LPE; 4576 else 4577 rctl &= ~E1000_RCTL_LPE; 4578 4579 /* Strip the CRC */ 4580 if (!em_disable_crc_stripping) 4581 rctl |= E1000_RCTL_SECRC; 4582 4583 E1000_WRITE_REG(&adapter->hw, E1000_RADV, 4584 adapter->rx_abs_int_delay.value); 4585 4586 E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 4587 adapter->rx_int_delay.value); 4588 /* 4589 * Set the interrupt throttling rate. Value is calculated 4590 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) 4591 */ 4592 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); 4593 4594 /* Use extended rx descriptor formats */ 4595 rfctl = E1000_READ_REG(hw, E1000_RFCTL); 4596 rfctl |= E1000_RFCTL_EXTEN; 4597 /* 4598 ** When using MSIX interrupts we need to throttle 4599 ** using the EITR register (82574 only) 4600 */ 4601 if (hw->mac.type == e1000_82574) { 4602 for (int i = 0; i < 4; i++) 4603 E1000_WRITE_REG(hw, E1000_EITR_82574(i), 4604 DEFAULT_ITR); 4605 /* Disable accelerated acknowledge */ 4606 rfctl |= E1000_RFCTL_ACK_DIS; 4607 } 4608 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); 4609 4610 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); 4611 if (if_getcapenable(ifp) & IFCAP_RXCSUM) { 4612 #ifdef EM_MULTIQUEUE 4613 rxcsum |= E1000_RXCSUM_TUOFL | 4614 E1000_RXCSUM_IPOFL | 4615 E1000_RXCSUM_PCSD; 4616 #else 4617 rxcsum |= E1000_RXCSUM_TUOFL; 4618 #endif 4619 } else 4620 rxcsum &= ~E1000_RXCSUM_TUOFL; 4621 4622 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); 4623 4624 #ifdef EM_MULTIQUEUE 4625 #define RSSKEYLEN 10 4626 if (adapter->num_queues > 1) { 4627 uint8_t rss_key[4 * RSSKEYLEN]; 4628 uint32_t reta = 0; 4629 int i; 4630 4631 /* 4632 * Configure RSS key 4633 */ 4634 arc4rand(rss_key, sizeof(rss_key), 0); 4635 for (i = 0; i < RSSKEYLEN; ++i) { 4636 uint32_t rssrk = 0; 4637 4638 rssrk = EM_RSSRK_VAL(rss_key, i); 4639 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); 4640 } 4641 4642 /* 4643 * Configure RSS redirect table in following fashion: 4644 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 4645 */ 4646 for (i = 0; i < sizeof(reta); ++i) { 4647 uint32_t q; 4648 4649 q = (i % adapter->num_queues) << 7; 4650 reta |= q << (8 * i); 4651 } 4652 4653 for (i = 0; i < 32; ++i) { 4654 E1000_WRITE_REG(hw, E1000_RETA(i), reta); 4655 } 4656 4657 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 4658 E1000_MRQC_RSS_FIELD_IPV4_TCP | 4659 E1000_MRQC_RSS_FIELD_IPV4 | 4660 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | 4661 E1000_MRQC_RSS_FIELD_IPV6_EX | 4662 E1000_MRQC_RSS_FIELD_IPV6); 4663 } 4664 #endif 4665 /* 4666 ** XXX TEMPORARY WORKAROUND: on some systems with 82573 4667 ** long latencies are observed, like Lenovo X60. This 4668 ** change eliminates the problem, but since having positive 4669 ** values in RDTR is a known source of problems on other 4670 ** platforms another solution is being sought. 4671 */ 4672 if (hw->mac.type == e1000_82573) 4673 E1000_WRITE_REG(hw, E1000_RDTR, 0x20); 4674 4675 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 4676 /* Setup the Base and Length of the Rx Descriptor Ring */ 4677 u64 bus_addr = rxr->rxdma.dma_paddr; 4678 u32 rdt = adapter->num_rx_desc - 1; /* default */ 4679 4680 E1000_WRITE_REG(hw, E1000_RDLEN(i), 4681 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended)); 4682 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); 4683 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); 4684 /* Setup the Head and Tail Descriptor Pointers */ 4685 E1000_WRITE_REG(hw, E1000_RDH(i), 0); 4686 #ifdef DEV_NETMAP 4687 /* 4688 * an init() while a netmap client is active must 4689 * preserve the rx buffers passed to userspace. 4690 */ 4691 if (if_getcapenable(ifp) & IFCAP_NETMAP) { 4692 struct netmap_adapter *na = netmap_getna(adapter->ifp); 4693 rdt -= nm_kr_rxspace(&na->rx_rings[i]); 4694 } 4695 #endif /* DEV_NETMAP */ 4696 E1000_WRITE_REG(hw, E1000_RDT(i), rdt); 4697 } 4698 4699 /* 4700 * Set PTHRESH for improved jumbo performance 4701 * According to 10.2.5.11 of Intel 82574 Datasheet, 4702 * RXDCTL(1) is written whenever RXDCTL(0) is written. 4703 * Only write to RXDCTL(1) if there is a need for different 4704 * settings. 4705 */ 4706 if (((adapter->hw.mac.type == e1000_ich9lan) || 4707 (adapter->hw.mac.type == e1000_pch2lan) || 4708 (adapter->hw.mac.type == e1000_ich10lan)) && 4709 (if_getmtu(ifp) > ETHERMTU)) { 4710 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); 4711 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); 4712 } else if (adapter->hw.mac.type == e1000_82574) { 4713 for (int i = 0; i < adapter->num_queues; i++) { 4714 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 4715 4716 rxdctl |= 0x20; /* PTHRESH */ 4717 rxdctl |= 4 << 8; /* HTHRESH */ 4718 rxdctl |= 4 << 16;/* WTHRESH */ 4719 rxdctl |= 1 << 24; /* Switch to granularity */ 4720 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 4721 } 4722 } 4723 4724 if (adapter->hw.mac.type >= e1000_pch2lan) { 4725 if (if_getmtu(ifp) > ETHERMTU) 4726 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); 4727 else 4728 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); 4729 } 4730 4731 /* Make sure VLAN Filters are off */ 4732 rctl &= ~E1000_RCTL_VFE; 4733 4734 if (adapter->rx_mbuf_sz == MCLBYTES) 4735 rctl |= E1000_RCTL_SZ_2048; 4736 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) 4737 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 4738 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) 4739 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 4740 4741 /* ensure we clear use DTYPE of 00 here */ 4742 rctl &= ~0x00000C00; 4743 /* Write out the settings */ 4744 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 4745 4746 return; 4747 } 4748 4749 4750 /********************************************************************* 4751 * 4752 * This routine executes in interrupt context. It replenishes 4753 * the mbufs in the descriptor and sends data which has been 4754 * dma'ed into host memory to upper layer. 4755 * 4756 * We loop at most count times if count is > 0, or until done if 4757 * count < 0. 4758 * 4759 * For polling we also now return the number of cleaned packets 4760 *********************************************************************/ 4761 static bool 4762 em_rxeof(struct rx_ring *rxr, int count, int *done) 4763 { 4764 struct adapter *adapter = rxr->adapter; 4765 if_t ifp = adapter->ifp; 4766 struct mbuf *mp, *sendmp; 4767 u32 status = 0; 4768 u16 len; 4769 int i, processed, rxdone = 0; 4770 bool eop; 4771 union e1000_rx_desc_extended *cur; 4772 4773 EM_RX_LOCK(rxr); 4774 4775 /* Sync the ring */ 4776 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4777 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4778 4779 4780 #ifdef DEV_NETMAP 4781 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 4782 EM_RX_UNLOCK(rxr); 4783 return (FALSE); 4784 } 4785 #endif /* DEV_NETMAP */ 4786 4787 for (i = rxr->next_to_check, processed = 0; count != 0;) { 4788 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) 4789 break; 4790 4791 cur = &rxr->rx_base[i]; 4792 status = le32toh(cur->wb.upper.status_error); 4793 mp = sendmp = NULL; 4794 4795 if ((status & E1000_RXD_STAT_DD) == 0) 4796 break; 4797 4798 len = le16toh(cur->wb.upper.length); 4799 eop = (status & E1000_RXD_STAT_EOP) != 0; 4800 4801 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) || 4802 (rxr->discard == TRUE)) { 4803 adapter->dropped_pkts++; 4804 ++rxr->rx_discarded; 4805 if (!eop) /* Catch subsequent segs */ 4806 rxr->discard = TRUE; 4807 else 4808 rxr->discard = FALSE; 4809 em_rx_discard(rxr, i); 4810 goto next_desc; 4811 } 4812 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map); 4813 4814 /* Assign correct length to the current fragment */ 4815 mp = rxr->rx_buffers[i].m_head; 4816 mp->m_len = len; 4817 4818 /* Trigger for refresh */ 4819 rxr->rx_buffers[i].m_head = NULL; 4820 4821 /* First segment? */ 4822 if (rxr->fmp == NULL) { 4823 mp->m_pkthdr.len = len; 4824 rxr->fmp = rxr->lmp = mp; 4825 } else { 4826 /* Chain mbuf's together */ 4827 mp->m_flags &= ~M_PKTHDR; 4828 rxr->lmp->m_next = mp; 4829 rxr->lmp = mp; 4830 rxr->fmp->m_pkthdr.len += len; 4831 } 4832 4833 if (eop) { 4834 --count; 4835 sendmp = rxr->fmp; 4836 if_setrcvif(sendmp, ifp); 4837 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 4838 em_receive_checksum(status, sendmp); 4839 #ifndef __NO_STRICT_ALIGNMENT 4840 if (adapter->hw.mac.max_frame_size > 4841 (MCLBYTES - ETHER_ALIGN) && 4842 em_fixup_rx(rxr) != 0) 4843 goto skip; 4844 #endif 4845 if (status & E1000_RXD_STAT_VP) { 4846 if_setvtag(sendmp, 4847 le16toh(cur->wb.upper.vlan)); 4848 sendmp->m_flags |= M_VLANTAG; 4849 } 4850 #ifndef __NO_STRICT_ALIGNMENT 4851 skip: 4852 #endif 4853 rxr->fmp = rxr->lmp = NULL; 4854 } 4855 next_desc: 4856 /* Sync the ring */ 4857 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4858 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4859 4860 /* Zero out the receive descriptors status. */ 4861 cur->wb.upper.status_error &= htole32(~0xFF); 4862 ++rxdone; /* cumulative for POLL */ 4863 ++processed; 4864 4865 /* Advance our pointers to the next descriptor. */ 4866 if (++i == adapter->num_rx_desc) 4867 i = 0; 4868 4869 /* Send to the stack */ 4870 if (sendmp != NULL) { 4871 rxr->next_to_check = i; 4872 EM_RX_UNLOCK(rxr); 4873 if_input(ifp, sendmp); 4874 EM_RX_LOCK(rxr); 4875 i = rxr->next_to_check; 4876 } 4877 4878 /* Only refresh mbufs every 8 descriptors */ 4879 if (processed == 8) { 4880 em_refresh_mbufs(rxr, i); 4881 processed = 0; 4882 } 4883 } 4884 4885 /* Catch any remaining refresh work */ 4886 if (e1000_rx_unrefreshed(rxr)) 4887 em_refresh_mbufs(rxr, i); 4888 4889 rxr->next_to_check = i; 4890 if (done != NULL) 4891 *done = rxdone; 4892 EM_RX_UNLOCK(rxr); 4893 4894 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); 4895 } 4896 4897 static __inline void 4898 em_rx_discard(struct rx_ring *rxr, int i) 4899 { 4900 struct em_rxbuffer *rbuf; 4901 4902 rbuf = &rxr->rx_buffers[i]; 4903 bus_dmamap_unload(rxr->rxtag, rbuf->map); 4904 4905 /* Free any previous pieces */ 4906 if (rxr->fmp != NULL) { 4907 rxr->fmp->m_flags |= M_PKTHDR; 4908 m_freem(rxr->fmp); 4909 rxr->fmp = NULL; 4910 rxr->lmp = NULL; 4911 } 4912 /* 4913 ** Free buffer and allow em_refresh_mbufs() 4914 ** to clean up and recharge buffer. 4915 */ 4916 if (rbuf->m_head) { 4917 m_free(rbuf->m_head); 4918 rbuf->m_head = NULL; 4919 } 4920 return; 4921 } 4922 4923 #ifndef __NO_STRICT_ALIGNMENT 4924 /* 4925 * When jumbo frames are enabled we should realign entire payload on 4926 * architecures with strict alignment. This is serious design mistake of 8254x 4927 * as it nullifies DMA operations. 8254x just allows RX buffer size to be 4928 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its 4929 * payload. On architecures without strict alignment restrictions 8254x still 4930 * performs unaligned memory access which would reduce the performance too. 4931 * To avoid copying over an entire frame to align, we allocate a new mbuf and 4932 * copy ethernet header to the new mbuf. The new mbuf is prepended into the 4933 * existing mbuf chain. 4934 * 4935 * Be aware, best performance of the 8254x is achived only when jumbo frame is 4936 * not used at all on architectures with strict alignment. 4937 */ 4938 static int 4939 em_fixup_rx(struct rx_ring *rxr) 4940 { 4941 struct adapter *adapter = rxr->adapter; 4942 struct mbuf *m, *n; 4943 int error; 4944 4945 error = 0; 4946 m = rxr->fmp; 4947 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { 4948 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); 4949 m->m_data += ETHER_HDR_LEN; 4950 } else { 4951 MGETHDR(n, M_NOWAIT, MT_DATA); 4952 if (n != NULL) { 4953 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); 4954 m->m_data += ETHER_HDR_LEN; 4955 m->m_len -= ETHER_HDR_LEN; 4956 n->m_len = ETHER_HDR_LEN; 4957 M_MOVE_PKTHDR(n, m); 4958 n->m_next = m; 4959 rxr->fmp = n; 4960 } else { 4961 adapter->dropped_pkts++; 4962 m_freem(rxr->fmp); 4963 rxr->fmp = NULL; 4964 error = ENOMEM; 4965 } 4966 } 4967 4968 return (error); 4969 } 4970 #endif 4971 4972 static void 4973 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf) 4974 { 4975 rxd->read.buffer_addr = htole64(rxbuf->paddr); 4976 /* DD bits must be cleared */ 4977 rxd->wb.upper.status_error= 0; 4978 } 4979 4980 /********************************************************************* 4981 * 4982 * Verify that the hardware indicated that the checksum is valid. 4983 * Inform the stack about the status of checksum so that stack 4984 * doesn't spend time verifying the checksum. 4985 * 4986 *********************************************************************/ 4987 static void 4988 em_receive_checksum(uint32_t status, struct mbuf *mp) 4989 { 4990 mp->m_pkthdr.csum_flags = 0; 4991 4992 /* Ignore Checksum bit is set */ 4993 if (status & E1000_RXD_STAT_IXSM) 4994 return; 4995 4996 /* If the IP checksum exists and there is no IP Checksum error */ 4997 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == 4998 E1000_RXD_STAT_IPCS) { 4999 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); 5000 } 5001 5002 /* TCP or UDP checksum */ 5003 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == 5004 E1000_RXD_STAT_TCPCS) { 5005 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 5006 mp->m_pkthdr.csum_data = htons(0xffff); 5007 } 5008 if (status & E1000_RXD_STAT_UDPCS) { 5009 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 5010 mp->m_pkthdr.csum_data = htons(0xffff); 5011 } 5012 } 5013 5014 /* 5015 * This routine is run via an vlan 5016 * config EVENT 5017 */ 5018 static void 5019 em_register_vlan(void *arg, if_t ifp, u16 vtag) 5020 { 5021 struct adapter *adapter = if_getsoftc(ifp); 5022 u32 index, bit; 5023 5024 if ((void*)adapter != arg) /* Not our event */ 5025 return; 5026 5027 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ 5028 return; 5029 5030 EM_CORE_LOCK(adapter); 5031 index = (vtag >> 5) & 0x7F; 5032 bit = vtag & 0x1F; 5033 adapter->shadow_vfta[index] |= (1 << bit); 5034 ++adapter->num_vlans; 5035 /* Re-init to load the changes */ 5036 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 5037 em_init_locked(adapter); 5038 EM_CORE_UNLOCK(adapter); 5039 } 5040 5041 /* 5042 * This routine is run via an vlan 5043 * unconfig EVENT 5044 */ 5045 static void 5046 em_unregister_vlan(void *arg, if_t ifp, u16 vtag) 5047 { 5048 struct adapter *adapter = if_getsoftc(ifp); 5049 u32 index, bit; 5050 5051 if (adapter != arg) 5052 return; 5053 5054 if ((vtag == 0) || (vtag > 4095)) /* Invalid */ 5055 return; 5056 5057 EM_CORE_LOCK(adapter); 5058 index = (vtag >> 5) & 0x7F; 5059 bit = vtag & 0x1F; 5060 adapter->shadow_vfta[index] &= ~(1 << bit); 5061 --adapter->num_vlans; 5062 /* Re-init to load the changes */ 5063 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 5064 em_init_locked(adapter); 5065 EM_CORE_UNLOCK(adapter); 5066 } 5067 5068 static void 5069 em_setup_vlan_hw_support(struct adapter *adapter) 5070 { 5071 struct e1000_hw *hw = &adapter->hw; 5072 u32 reg; 5073 5074 /* 5075 ** We get here thru init_locked, meaning 5076 ** a soft reset, this has already cleared 5077 ** the VFTA and other state, so if there 5078 ** have been no vlan's registered do nothing. 5079 */ 5080 if (adapter->num_vlans == 0) 5081 return; 5082 5083 /* 5084 ** A soft reset zero's out the VFTA, so 5085 ** we need to repopulate it now. 5086 */ 5087 for (int i = 0; i < EM_VFTA_SIZE; i++) 5088 if (adapter->shadow_vfta[i] != 0) 5089 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, 5090 i, adapter->shadow_vfta[i]); 5091 5092 reg = E1000_READ_REG(hw, E1000_CTRL); 5093 reg |= E1000_CTRL_VME; 5094 E1000_WRITE_REG(hw, E1000_CTRL, reg); 5095 5096 /* Enable the Filter Table */ 5097 reg = E1000_READ_REG(hw, E1000_RCTL); 5098 reg &= ~E1000_RCTL_CFIEN; 5099 reg |= E1000_RCTL_VFE; 5100 E1000_WRITE_REG(hw, E1000_RCTL, reg); 5101 } 5102 5103 static void 5104 em_enable_intr(struct adapter *adapter) 5105 { 5106 struct e1000_hw *hw = &adapter->hw; 5107 u32 ims_mask = IMS_ENABLE_MASK; 5108 5109 if (hw->mac.type == e1000_82574) { 5110 E1000_WRITE_REG(hw, EM_EIAC, adapter->ims); 5111 ims_mask |= adapter->ims; 5112 } 5113 E1000_WRITE_REG(hw, E1000_IMS, ims_mask); 5114 } 5115 5116 static void 5117 em_disable_intr(struct adapter *adapter) 5118 { 5119 struct e1000_hw *hw = &adapter->hw; 5120 5121 if (hw->mac.type == e1000_82574) 5122 E1000_WRITE_REG(hw, EM_EIAC, 0); 5123 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 5124 } 5125 5126 /* 5127 * Bit of a misnomer, what this really means is 5128 * to enable OS management of the system... aka 5129 * to disable special hardware management features 5130 */ 5131 static void 5132 em_init_manageability(struct adapter *adapter) 5133 { 5134 /* A shared code workaround */ 5135 #define E1000_82542_MANC2H E1000_MANC2H 5136 if (adapter->has_manage) { 5137 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); 5138 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); 5139 5140 /* disable hardware interception of ARP */ 5141 manc &= ~(E1000_MANC_ARP_EN); 5142 5143 /* enable receiving management packets to the host */ 5144 manc |= E1000_MANC_EN_MNG2HOST; 5145 #define E1000_MNG2HOST_PORT_623 (1 << 5) 5146 #define E1000_MNG2HOST_PORT_664 (1 << 6) 5147 manc2h |= E1000_MNG2HOST_PORT_623; 5148 manc2h |= E1000_MNG2HOST_PORT_664; 5149 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); 5150 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); 5151 } 5152 } 5153 5154 /* 5155 * Give control back to hardware management 5156 * controller if there is one. 5157 */ 5158 static void 5159 em_release_manageability(struct adapter *adapter) 5160 { 5161 if (adapter->has_manage) { 5162 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); 5163 5164 /* re-enable hardware interception of ARP */ 5165 manc |= E1000_MANC_ARP_EN; 5166 manc &= ~E1000_MANC_EN_MNG2HOST; 5167 5168 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); 5169 } 5170 } 5171 5172 /* 5173 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. 5174 * For ASF and Pass Through versions of f/w this means 5175 * that the driver is loaded. For AMT version type f/w 5176 * this means that the network i/f is open. 5177 */ 5178 static void 5179 em_get_hw_control(struct adapter *adapter) 5180 { 5181 u32 ctrl_ext, swsm; 5182 5183 if (adapter->hw.mac.type == e1000_82573) { 5184 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); 5185 E1000_WRITE_REG(&adapter->hw, E1000_SWSM, 5186 swsm | E1000_SWSM_DRV_LOAD); 5187 return; 5188 } 5189 /* else */ 5190 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5191 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, 5192 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 5193 return; 5194 } 5195 5196 /* 5197 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. 5198 * For ASF and Pass Through versions of f/w this means that 5199 * the driver is no longer loaded. For AMT versions of the 5200 * f/w this means that the network i/f is closed. 5201 */ 5202 static void 5203 em_release_hw_control(struct adapter *adapter) 5204 { 5205 u32 ctrl_ext, swsm; 5206 5207 if (!adapter->has_manage) 5208 return; 5209 5210 if (adapter->hw.mac.type == e1000_82573) { 5211 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); 5212 E1000_WRITE_REG(&adapter->hw, E1000_SWSM, 5213 swsm & ~E1000_SWSM_DRV_LOAD); 5214 return; 5215 } 5216 /* else */ 5217 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5218 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, 5219 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 5220 return; 5221 } 5222 5223 static int 5224 em_is_valid_ether_addr(u8 *addr) 5225 { 5226 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; 5227 5228 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { 5229 return (FALSE); 5230 } 5231 5232 return (TRUE); 5233 } 5234 5235 /* 5236 ** Parse the interface capabilities with regard 5237 ** to both system management and wake-on-lan for 5238 ** later use. 5239 */ 5240 static void 5241 em_get_wakeup(device_t dev) 5242 { 5243 struct adapter *adapter = device_get_softc(dev); 5244 u16 eeprom_data = 0, device_id, apme_mask; 5245 5246 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); 5247 apme_mask = EM_EEPROM_APME; 5248 5249 switch (adapter->hw.mac.type) { 5250 case e1000_82573: 5251 case e1000_82583: 5252 adapter->has_amt = TRUE; 5253 /* Falls thru */ 5254 case e1000_82571: 5255 case e1000_82572: 5256 case e1000_80003es2lan: 5257 if (adapter->hw.bus.func == 1) { 5258 e1000_read_nvm(&adapter->hw, 5259 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); 5260 break; 5261 } else 5262 e1000_read_nvm(&adapter->hw, 5263 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 5264 break; 5265 case e1000_ich8lan: 5266 case e1000_ich9lan: 5267 case e1000_ich10lan: 5268 case e1000_pchlan: 5269 case e1000_pch2lan: 5270 apme_mask = E1000_WUC_APME; 5271 adapter->has_amt = TRUE; 5272 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); 5273 break; 5274 default: 5275 e1000_read_nvm(&adapter->hw, 5276 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 5277 break; 5278 } 5279 if (eeprom_data & apme_mask) 5280 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); 5281 /* 5282 * We have the eeprom settings, now apply the special cases 5283 * where the eeprom may be wrong or the board won't support 5284 * wake on lan on a particular port 5285 */ 5286 device_id = pci_get_device(dev); 5287 switch (device_id) { 5288 case E1000_DEV_ID_82571EB_FIBER: 5289 /* Wake events only supported on port A for dual fiber 5290 * regardless of eeprom setting */ 5291 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & 5292 E1000_STATUS_FUNC_1) 5293 adapter->wol = 0; 5294 break; 5295 case E1000_DEV_ID_82571EB_QUAD_COPPER: 5296 case E1000_DEV_ID_82571EB_QUAD_FIBER: 5297 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: 5298 /* if quad port adapter, disable WoL on all but port A */ 5299 if (global_quad_port_a != 0) 5300 adapter->wol = 0; 5301 /* Reset for multiple quad port adapters */ 5302 if (++global_quad_port_a == 4) 5303 global_quad_port_a = 0; 5304 break; 5305 } 5306 return; 5307 } 5308 5309 5310 /* 5311 * Enable PCI Wake On Lan capability 5312 */ 5313 static void 5314 em_enable_wakeup(device_t dev) 5315 { 5316 struct adapter *adapter = device_get_softc(dev); 5317 if_t ifp = adapter->ifp; 5318 u32 pmc, ctrl, ctrl_ext, rctl; 5319 u16 status; 5320 5321 if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) 5322 return; 5323 5324 /* Advertise the wakeup capability */ 5325 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); 5326 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); 5327 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); 5328 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); 5329 5330 if ((adapter->hw.mac.type == e1000_ich8lan) || 5331 (adapter->hw.mac.type == e1000_pchlan) || 5332 (adapter->hw.mac.type == e1000_ich9lan) || 5333 (adapter->hw.mac.type == e1000_ich10lan)) 5334 e1000_suspend_workarounds_ich8lan(&adapter->hw); 5335 5336 /* Keep the laser running on Fiber adapters */ 5337 if (adapter->hw.phy.media_type == e1000_media_type_fiber || 5338 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { 5339 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5340 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; 5341 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); 5342 } 5343 5344 /* 5345 ** Determine type of Wakeup: note that wol 5346 ** is set with all bits on by default. 5347 */ 5348 if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) 5349 adapter->wol &= ~E1000_WUFC_MAG; 5350 5351 if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) 5352 adapter->wol &= ~E1000_WUFC_MC; 5353 else { 5354 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 5355 rctl |= E1000_RCTL_MPE; 5356 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); 5357 } 5358 5359 if ((adapter->hw.mac.type == e1000_pchlan) || 5360 (adapter->hw.mac.type == e1000_pch2lan)) { 5361 if (em_enable_phy_wakeup(adapter)) 5362 return; 5363 } else { 5364 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); 5365 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); 5366 } 5367 5368 if (adapter->hw.phy.type == e1000_phy_igp_3) 5369 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); 5370 5371 /* Request PME */ 5372 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); 5373 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); 5374 if (if_getcapenable(ifp) & IFCAP_WOL) 5375 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 5376 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); 5377 5378 return; 5379 } 5380 5381 /* 5382 ** WOL in the newer chipset interfaces (pchlan) 5383 ** require thing to be copied into the phy 5384 */ 5385 static int 5386 em_enable_phy_wakeup(struct adapter *adapter) 5387 { 5388 struct e1000_hw *hw = &adapter->hw; 5389 u32 mreg, ret = 0; 5390 u16 preg; 5391 5392 /* copy MAC RARs to PHY RARs */ 5393 e1000_copy_rx_addrs_to_phy_ich8lan(hw); 5394 5395 /* copy MAC MTA to PHY MTA */ 5396 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { 5397 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); 5398 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); 5399 e1000_write_phy_reg(hw, BM_MTA(i) + 1, 5400 (u16)((mreg >> 16) & 0xFFFF)); 5401 } 5402 5403 /* configure PHY Rx Control register */ 5404 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); 5405 mreg = E1000_READ_REG(hw, E1000_RCTL); 5406 if (mreg & E1000_RCTL_UPE) 5407 preg |= BM_RCTL_UPE; 5408 if (mreg & E1000_RCTL_MPE) 5409 preg |= BM_RCTL_MPE; 5410 preg &= ~(BM_RCTL_MO_MASK); 5411 if (mreg & E1000_RCTL_MO_3) 5412 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) 5413 << BM_RCTL_MO_SHIFT); 5414 if (mreg & E1000_RCTL_BAM) 5415 preg |= BM_RCTL_BAM; 5416 if (mreg & E1000_RCTL_PMCF) 5417 preg |= BM_RCTL_PMCF; 5418 mreg = E1000_READ_REG(hw, E1000_CTRL); 5419 if (mreg & E1000_CTRL_RFCE) 5420 preg |= BM_RCTL_RFCE; 5421 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); 5422 5423 /* enable PHY wakeup in MAC register */ 5424 E1000_WRITE_REG(hw, E1000_WUC, 5425 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); 5426 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); 5427 5428 /* configure and enable PHY wakeup in PHY registers */ 5429 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); 5430 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); 5431 5432 /* activate PHY wakeup */ 5433 ret = hw->phy.ops.acquire(hw); 5434 if (ret) { 5435 printf("Could not acquire PHY\n"); 5436 return ret; 5437 } 5438 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, 5439 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); 5440 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); 5441 if (ret) { 5442 printf("Could not read PHY page 769\n"); 5443 goto out; 5444 } 5445 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; 5446 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); 5447 if (ret) 5448 printf("Could not set PHY Host Wakeup bit\n"); 5449 out: 5450 hw->phy.ops.release(hw); 5451 5452 return ret; 5453 } 5454 5455 static void 5456 em_led_func(void *arg, int onoff) 5457 { 5458 struct adapter *adapter = arg; 5459 5460 EM_CORE_LOCK(adapter); 5461 if (onoff) { 5462 e1000_setup_led(&adapter->hw); 5463 e1000_led_on(&adapter->hw); 5464 } else { 5465 e1000_led_off(&adapter->hw); 5466 e1000_cleanup_led(&adapter->hw); 5467 } 5468 EM_CORE_UNLOCK(adapter); 5469 } 5470 5471 /* 5472 ** Disable the L0S and L1 LINK states 5473 */ 5474 static void 5475 em_disable_aspm(struct adapter *adapter) 5476 { 5477 int base, reg; 5478 u16 link_cap,link_ctrl; 5479 device_t dev = adapter->dev; 5480 5481 switch (adapter->hw.mac.type) { 5482 case e1000_82573: 5483 case e1000_82574: 5484 case e1000_82583: 5485 break; 5486 default: 5487 return; 5488 } 5489 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) 5490 return; 5491 reg = base + PCIER_LINK_CAP; 5492 link_cap = pci_read_config(dev, reg, 2); 5493 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) 5494 return; 5495 reg = base + PCIER_LINK_CTL; 5496 link_ctrl = pci_read_config(dev, reg, 2); 5497 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; 5498 pci_write_config(dev, reg, link_ctrl, 2); 5499 return; 5500 } 5501 5502 /********************************************************************** 5503 * 5504 * Update the board statistics counters. 5505 * 5506 **********************************************************************/ 5507 static void 5508 em_update_stats_counters(struct adapter *adapter) 5509 { 5510 5511 if(adapter->hw.phy.media_type == e1000_media_type_copper || 5512 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { 5513 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); 5514 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); 5515 } 5516 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); 5517 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); 5518 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); 5519 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); 5520 5521 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); 5522 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); 5523 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); 5524 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); 5525 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); 5526 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); 5527 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); 5528 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); 5529 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); 5530 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); 5531 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); 5532 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); 5533 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); 5534 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); 5535 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); 5536 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); 5537 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); 5538 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); 5539 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); 5540 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); 5541 5542 /* For the 64-bit byte counters the low dword must be read first. */ 5543 /* Both registers clear on the read of the high dword */ 5544 5545 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + 5546 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); 5547 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + 5548 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); 5549 5550 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); 5551 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); 5552 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); 5553 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); 5554 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); 5555 5556 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); 5557 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); 5558 5559 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); 5560 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); 5561 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); 5562 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); 5563 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); 5564 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); 5565 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); 5566 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); 5567 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); 5568 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); 5569 5570 /* Interrupt Counts */ 5571 5572 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); 5573 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); 5574 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); 5575 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); 5576 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); 5577 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); 5578 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); 5579 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); 5580 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); 5581 5582 if (adapter->hw.mac.type >= e1000_82543) { 5583 adapter->stats.algnerrc += 5584 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); 5585 adapter->stats.rxerrc += 5586 E1000_READ_REG(&adapter->hw, E1000_RXERRC); 5587 adapter->stats.tncrs += 5588 E1000_READ_REG(&adapter->hw, E1000_TNCRS); 5589 adapter->stats.cexterr += 5590 E1000_READ_REG(&adapter->hw, E1000_CEXTERR); 5591 adapter->stats.tsctc += 5592 E1000_READ_REG(&adapter->hw, E1000_TSCTC); 5593 adapter->stats.tsctfc += 5594 E1000_READ_REG(&adapter->hw, E1000_TSCTFC); 5595 } 5596 } 5597 5598 static uint64_t 5599 em_get_counter(if_t ifp, ift_counter cnt) 5600 { 5601 struct adapter *adapter; 5602 5603 adapter = if_getsoftc(ifp); 5604 5605 switch (cnt) { 5606 case IFCOUNTER_COLLISIONS: 5607 return (adapter->stats.colc); 5608 case IFCOUNTER_IERRORS: 5609 return (adapter->dropped_pkts + adapter->stats.rxerrc + 5610 adapter->stats.crcerrs + adapter->stats.algnerrc + 5611 adapter->stats.ruc + adapter->stats.roc + 5612 adapter->stats.mpc + adapter->stats.cexterr); 5613 case IFCOUNTER_OERRORS: 5614 return (adapter->stats.ecol + adapter->stats.latecol + 5615 adapter->watchdog_events); 5616 default: 5617 return (if_get_counter_default(ifp, cnt)); 5618 } 5619 } 5620 5621 /* Export a single 32-bit register via a read-only sysctl. */ 5622 static int 5623 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) 5624 { 5625 struct adapter *adapter; 5626 u_int val; 5627 5628 adapter = oidp->oid_arg1; 5629 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); 5630 return (sysctl_handle_int(oidp, &val, 0, req)); 5631 } 5632 5633 /* 5634 * Add sysctl variables, one per statistic, to the system. 5635 */ 5636 static void 5637 em_add_hw_stats(struct adapter *adapter) 5638 { 5639 device_t dev = adapter->dev; 5640 5641 struct tx_ring *txr = adapter->tx_rings; 5642 struct rx_ring *rxr = adapter->rx_rings; 5643 5644 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 5645 struct sysctl_oid *tree = device_get_sysctl_tree(dev); 5646 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 5647 struct e1000_hw_stats *stats = &adapter->stats; 5648 5649 struct sysctl_oid *stat_node, *queue_node, *int_node; 5650 struct sysctl_oid_list *stat_list, *queue_list, *int_list; 5651 5652 #define QUEUE_NAME_LEN 32 5653 char namebuf[QUEUE_NAME_LEN]; 5654 5655 /* Driver Statistics */ 5656 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 5657 CTLFLAG_RD, &adapter->dropped_pkts, 5658 "Driver dropped packets"); 5659 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 5660 CTLFLAG_RD, &adapter->link_irq, 5661 "Link MSIX IRQ Handled"); 5662 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 5663 CTLFLAG_RD, &adapter->mbuf_defrag_failed, 5664 "Defragmenting mbuf chain failed"); 5665 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 5666 CTLFLAG_RD, &adapter->no_tx_dma_setup, 5667 "Driver tx dma failure in xmit"); 5668 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", 5669 CTLFLAG_RD, &adapter->rx_overruns, 5670 "RX overruns"); 5671 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", 5672 CTLFLAG_RD, &adapter->watchdog_events, 5673 "Watchdog timeouts"); 5674 5675 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", 5676 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, 5677 em_sysctl_reg_handler, "IU", 5678 "Device Control Register"); 5679 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", 5680 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, 5681 em_sysctl_reg_handler, "IU", 5682 "Receiver Control Register"); 5683 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", 5684 CTLFLAG_RD, &adapter->hw.fc.high_water, 0, 5685 "Flow Control High Watermark"); 5686 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 5687 CTLFLAG_RD, &adapter->hw.fc.low_water, 0, 5688 "Flow Control Low Watermark"); 5689 5690 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { 5691 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); 5692 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 5693 CTLFLAG_RD, NULL, "TX Queue Name"); 5694 queue_list = SYSCTL_CHILDREN(queue_node); 5695 5696 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 5697 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5698 E1000_TDH(txr->me), 5699 em_sysctl_reg_handler, "IU", 5700 "Transmit Descriptor Head"); 5701 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 5702 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5703 E1000_TDT(txr->me), 5704 em_sysctl_reg_handler, "IU", 5705 "Transmit Descriptor Tail"); 5706 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", 5707 CTLFLAG_RD, &txr->tx_irq, 5708 "Queue MSI-X Transmit Interrupts"); 5709 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 5710 CTLFLAG_RD, &txr->no_desc_avail, 5711 "Queue No Descriptor Available"); 5712 5713 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i); 5714 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 5715 CTLFLAG_RD, NULL, "RX Queue Name"); 5716 queue_list = SYSCTL_CHILDREN(queue_node); 5717 5718 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 5719 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5720 E1000_RDH(rxr->me), 5721 em_sysctl_reg_handler, "IU", 5722 "Receive Descriptor Head"); 5723 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 5724 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5725 E1000_RDT(rxr->me), 5726 em_sysctl_reg_handler, "IU", 5727 "Receive Descriptor Tail"); 5728 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", 5729 CTLFLAG_RD, &rxr->rx_irq, 5730 "Queue MSI-X Receive Interrupts"); 5731 } 5732 5733 /* MAC stats get their own sub node */ 5734 5735 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 5736 CTLFLAG_RD, NULL, "Statistics"); 5737 stat_list = SYSCTL_CHILDREN(stat_node); 5738 5739 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", 5740 CTLFLAG_RD, &stats->ecol, 5741 "Excessive collisions"); 5742 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", 5743 CTLFLAG_RD, &stats->scc, 5744 "Single collisions"); 5745 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 5746 CTLFLAG_RD, &stats->mcc, 5747 "Multiple collisions"); 5748 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", 5749 CTLFLAG_RD, &stats->latecol, 5750 "Late collisions"); 5751 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", 5752 CTLFLAG_RD, &stats->colc, 5753 "Collision Count"); 5754 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", 5755 CTLFLAG_RD, &adapter->stats.symerrs, 5756 "Symbol Errors"); 5757 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", 5758 CTLFLAG_RD, &adapter->stats.sec, 5759 "Sequence Errors"); 5760 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", 5761 CTLFLAG_RD, &adapter->stats.dc, 5762 "Defer Count"); 5763 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", 5764 CTLFLAG_RD, &adapter->stats.mpc, 5765 "Missed Packets"); 5766 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", 5767 CTLFLAG_RD, &adapter->stats.rnbc, 5768 "Receive No Buffers"); 5769 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", 5770 CTLFLAG_RD, &adapter->stats.ruc, 5771 "Receive Undersize"); 5772 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", 5773 CTLFLAG_RD, &adapter->stats.rfc, 5774 "Fragmented Packets Received "); 5775 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", 5776 CTLFLAG_RD, &adapter->stats.roc, 5777 "Oversized Packets Received"); 5778 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", 5779 CTLFLAG_RD, &adapter->stats.rjc, 5780 "Recevied Jabber"); 5781 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", 5782 CTLFLAG_RD, &adapter->stats.rxerrc, 5783 "Receive Errors"); 5784 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", 5785 CTLFLAG_RD, &adapter->stats.crcerrs, 5786 "CRC errors"); 5787 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", 5788 CTLFLAG_RD, &adapter->stats.algnerrc, 5789 "Alignment Errors"); 5790 /* On 82575 these are collision counts */ 5791 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", 5792 CTLFLAG_RD, &adapter->stats.cexterr, 5793 "Collision/Carrier extension errors"); 5794 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", 5795 CTLFLAG_RD, &adapter->stats.xonrxc, 5796 "XON Received"); 5797 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", 5798 CTLFLAG_RD, &adapter->stats.xontxc, 5799 "XON Transmitted"); 5800 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", 5801 CTLFLAG_RD, &adapter->stats.xoffrxc, 5802 "XOFF Received"); 5803 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", 5804 CTLFLAG_RD, &adapter->stats.xofftxc, 5805 "XOFF Transmitted"); 5806 5807 /* Packet Reception Stats */ 5808 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", 5809 CTLFLAG_RD, &adapter->stats.tpr, 5810 "Total Packets Received "); 5811 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", 5812 CTLFLAG_RD, &adapter->stats.gprc, 5813 "Good Packets Received"); 5814 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", 5815 CTLFLAG_RD, &adapter->stats.bprc, 5816 "Broadcast Packets Received"); 5817 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", 5818 CTLFLAG_RD, &adapter->stats.mprc, 5819 "Multicast Packets Received"); 5820 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", 5821 CTLFLAG_RD, &adapter->stats.prc64, 5822 "64 byte frames received "); 5823 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", 5824 CTLFLAG_RD, &adapter->stats.prc127, 5825 "65-127 byte frames received"); 5826 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", 5827 CTLFLAG_RD, &adapter->stats.prc255, 5828 "128-255 byte frames received"); 5829 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", 5830 CTLFLAG_RD, &adapter->stats.prc511, 5831 "256-511 byte frames received"); 5832 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", 5833 CTLFLAG_RD, &adapter->stats.prc1023, 5834 "512-1023 byte frames received"); 5835 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", 5836 CTLFLAG_RD, &adapter->stats.prc1522, 5837 "1023-1522 byte frames received"); 5838 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 5839 CTLFLAG_RD, &adapter->stats.gorc, 5840 "Good Octets Received"); 5841 5842 /* Packet Transmission Stats */ 5843 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 5844 CTLFLAG_RD, &adapter->stats.gotc, 5845 "Good Octets Transmitted"); 5846 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", 5847 CTLFLAG_RD, &adapter->stats.tpt, 5848 "Total Packets Transmitted"); 5849 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", 5850 CTLFLAG_RD, &adapter->stats.gptc, 5851 "Good Packets Transmitted"); 5852 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", 5853 CTLFLAG_RD, &adapter->stats.bptc, 5854 "Broadcast Packets Transmitted"); 5855 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", 5856 CTLFLAG_RD, &adapter->stats.mptc, 5857 "Multicast Packets Transmitted"); 5858 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", 5859 CTLFLAG_RD, &adapter->stats.ptc64, 5860 "64 byte frames transmitted "); 5861 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", 5862 CTLFLAG_RD, &adapter->stats.ptc127, 5863 "65-127 byte frames transmitted"); 5864 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", 5865 CTLFLAG_RD, &adapter->stats.ptc255, 5866 "128-255 byte frames transmitted"); 5867 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", 5868 CTLFLAG_RD, &adapter->stats.ptc511, 5869 "256-511 byte frames transmitted"); 5870 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", 5871 CTLFLAG_RD, &adapter->stats.ptc1023, 5872 "512-1023 byte frames transmitted"); 5873 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", 5874 CTLFLAG_RD, &adapter->stats.ptc1522, 5875 "1024-1522 byte frames transmitted"); 5876 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", 5877 CTLFLAG_RD, &adapter->stats.tsctc, 5878 "TSO Contexts Transmitted"); 5879 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", 5880 CTLFLAG_RD, &adapter->stats.tsctfc, 5881 "TSO Contexts Failed"); 5882 5883 5884 /* Interrupt Stats */ 5885 5886 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 5887 CTLFLAG_RD, NULL, "Interrupt Statistics"); 5888 int_list = SYSCTL_CHILDREN(int_node); 5889 5890 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", 5891 CTLFLAG_RD, &adapter->stats.iac, 5892 "Interrupt Assertion Count"); 5893 5894 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", 5895 CTLFLAG_RD, &adapter->stats.icrxptc, 5896 "Interrupt Cause Rx Pkt Timer Expire Count"); 5897 5898 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", 5899 CTLFLAG_RD, &adapter->stats.icrxatc, 5900 "Interrupt Cause Rx Abs Timer Expire Count"); 5901 5902 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", 5903 CTLFLAG_RD, &adapter->stats.ictxptc, 5904 "Interrupt Cause Tx Pkt Timer Expire Count"); 5905 5906 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", 5907 CTLFLAG_RD, &adapter->stats.ictxatc, 5908 "Interrupt Cause Tx Abs Timer Expire Count"); 5909 5910 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", 5911 CTLFLAG_RD, &adapter->stats.ictxqec, 5912 "Interrupt Cause Tx Queue Empty Count"); 5913 5914 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", 5915 CTLFLAG_RD, &adapter->stats.ictxqmtc, 5916 "Interrupt Cause Tx Queue Min Thresh Count"); 5917 5918 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", 5919 CTLFLAG_RD, &adapter->stats.icrxdmtc, 5920 "Interrupt Cause Rx Desc Min Thresh Count"); 5921 5922 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", 5923 CTLFLAG_RD, &adapter->stats.icrxoc, 5924 "Interrupt Cause Receiver Overrun Count"); 5925 } 5926 5927 /********************************************************************** 5928 * 5929 * This routine provides a way to dump out the adapter eeprom, 5930 * often a useful debug/service tool. This only dumps the first 5931 * 32 words, stuff that matters is in that extent. 5932 * 5933 **********************************************************************/ 5934 static int 5935 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) 5936 { 5937 struct adapter *adapter = (struct adapter *)arg1; 5938 int error; 5939 int result; 5940 5941 result = -1; 5942 error = sysctl_handle_int(oidp, &result, 0, req); 5943 5944 if (error || !req->newptr) 5945 return (error); 5946 5947 /* 5948 * This value will cause a hex dump of the 5949 * first 32 16-bit words of the EEPROM to 5950 * the screen. 5951 */ 5952 if (result == 1) 5953 em_print_nvm_info(adapter); 5954 5955 return (error); 5956 } 5957 5958 static void 5959 em_print_nvm_info(struct adapter *adapter) 5960 { 5961 u16 eeprom_data; 5962 int i, j, row = 0; 5963 5964 /* Its a bit crude, but it gets the job done */ 5965 printf("\nInterface EEPROM Dump:\n"); 5966 printf("Offset\n0x0000 "); 5967 for (i = 0, j = 0; i < 32; i++, j++) { 5968 if (j == 8) { /* Make the offset block */ 5969 j = 0; ++row; 5970 printf("\n0x00%x0 ",row); 5971 } 5972 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); 5973 printf("%04x ", eeprom_data); 5974 } 5975 printf("\n"); 5976 } 5977 5978 static int 5979 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) 5980 { 5981 struct em_int_delay_info *info; 5982 struct adapter *adapter; 5983 u32 regval; 5984 int error, usecs, ticks; 5985 5986 info = (struct em_int_delay_info *)arg1; 5987 usecs = info->value; 5988 error = sysctl_handle_int(oidp, &usecs, 0, req); 5989 if (error != 0 || req->newptr == NULL) 5990 return (error); 5991 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) 5992 return (EINVAL); 5993 info->value = usecs; 5994 ticks = EM_USECS_TO_TICKS(usecs); 5995 if (info->offset == E1000_ITR) /* units are 256ns here */ 5996 ticks *= 4; 5997 5998 adapter = info->adapter; 5999 6000 EM_CORE_LOCK(adapter); 6001 regval = E1000_READ_OFFSET(&adapter->hw, info->offset); 6002 regval = (regval & ~0xffff) | (ticks & 0xffff); 6003 /* Handle a few special cases. */ 6004 switch (info->offset) { 6005 case E1000_RDTR: 6006 break; 6007 case E1000_TIDV: 6008 if (ticks == 0) { 6009 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; 6010 /* Don't write 0 into the TIDV register. */ 6011 regval++; 6012 } else 6013 adapter->txd_cmd |= E1000_TXD_CMD_IDE; 6014 break; 6015 } 6016 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); 6017 EM_CORE_UNLOCK(adapter); 6018 return (0); 6019 } 6020 6021 static void 6022 em_add_int_delay_sysctl(struct adapter *adapter, const char *name, 6023 const char *description, struct em_int_delay_info *info, 6024 int offset, int value) 6025 { 6026 info->adapter = adapter; 6027 info->offset = offset; 6028 info->value = value; 6029 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), 6030 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), 6031 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, 6032 info, 0, em_sysctl_int_delay, "I", description); 6033 } 6034 6035 static void 6036 em_set_sysctl_value(struct adapter *adapter, const char *name, 6037 const char *description, int *limit, int value) 6038 { 6039 *limit = value; 6040 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), 6041 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), 6042 OID_AUTO, name, CTLFLAG_RW, limit, value, description); 6043 } 6044 6045 6046 /* 6047 ** Set flow control using sysctl: 6048 ** Flow control values: 6049 ** 0 - off 6050 ** 1 - rx pause 6051 ** 2 - tx pause 6052 ** 3 - full 6053 */ 6054 static int 6055 em_set_flowcntl(SYSCTL_HANDLER_ARGS) 6056 { 6057 int error; 6058 static int input = 3; /* default is full */ 6059 struct adapter *adapter = (struct adapter *) arg1; 6060 6061 error = sysctl_handle_int(oidp, &input, 0, req); 6062 6063 if ((error) || (req->newptr == NULL)) 6064 return (error); 6065 6066 if (input == adapter->fc) /* no change? */ 6067 return (error); 6068 6069 switch (input) { 6070 case e1000_fc_rx_pause: 6071 case e1000_fc_tx_pause: 6072 case e1000_fc_full: 6073 case e1000_fc_none: 6074 adapter->hw.fc.requested_mode = input; 6075 adapter->fc = input; 6076 break; 6077 default: 6078 /* Do nothing */ 6079 return (error); 6080 } 6081 6082 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; 6083 e1000_force_mac_fc(&adapter->hw); 6084 return (error); 6085 } 6086 6087 /* 6088 ** Manage Energy Efficient Ethernet: 6089 ** Control values: 6090 ** 0/1 - enabled/disabled 6091 */ 6092 static int 6093 em_sysctl_eee(SYSCTL_HANDLER_ARGS) 6094 { 6095 struct adapter *adapter = (struct adapter *) arg1; 6096 int error, value; 6097 6098 value = adapter->hw.dev_spec.ich8lan.eee_disable; 6099 error = sysctl_handle_int(oidp, &value, 0, req); 6100 if (error || req->newptr == NULL) 6101 return (error); 6102 EM_CORE_LOCK(adapter); 6103 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); 6104 em_init_locked(adapter); 6105 EM_CORE_UNLOCK(adapter); 6106 return (0); 6107 } 6108 6109 static int 6110 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) 6111 { 6112 struct adapter *adapter; 6113 int error; 6114 int result; 6115 6116 result = -1; 6117 error = sysctl_handle_int(oidp, &result, 0, req); 6118 6119 if (error || !req->newptr) 6120 return (error); 6121 6122 if (result == 1) { 6123 adapter = (struct adapter *)arg1; 6124 em_print_debug_info(adapter); 6125 } 6126 6127 return (error); 6128 } 6129 6130 /* 6131 ** This routine is meant to be fluid, add whatever is 6132 ** needed for debugging a problem. -jfv 6133 */ 6134 static void 6135 em_print_debug_info(struct adapter *adapter) 6136 { 6137 device_t dev = adapter->dev; 6138 struct tx_ring *txr = adapter->tx_rings; 6139 struct rx_ring *rxr = adapter->rx_rings; 6140 6141 if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) 6142 printf("Interface is RUNNING "); 6143 else 6144 printf("Interface is NOT RUNNING\n"); 6145 6146 if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE) 6147 printf("and INACTIVE\n"); 6148 else 6149 printf("and ACTIVE\n"); 6150 6151 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { 6152 device_printf(dev, "TX Queue %d ------\n", i); 6153 device_printf(dev, "hw tdh = %d, hw tdt = %d\n", 6154 E1000_READ_REG(&adapter->hw, E1000_TDH(i)), 6155 E1000_READ_REG(&adapter->hw, E1000_TDT(i))); 6156 device_printf(dev, "Tx Queue Status = %d\n", txr->busy); 6157 device_printf(dev, "TX descriptors avail = %d\n", 6158 txr->tx_avail); 6159 device_printf(dev, "Tx Descriptors avail failure = %ld\n", 6160 txr->no_desc_avail); 6161 device_printf(dev, "RX Queue %d ------\n", i); 6162 device_printf(dev, "hw rdh = %d, hw rdt = %d\n", 6163 E1000_READ_REG(&adapter->hw, E1000_RDH(i)), 6164 E1000_READ_REG(&adapter->hw, E1000_RDT(i))); 6165 device_printf(dev, "RX discarded packets = %ld\n", 6166 rxr->rx_discarded); 6167 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); 6168 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); 6169 } 6170 } 6171 6172 #ifdef EM_MULTIQUEUE 6173 /* 6174 * 82574 only: 6175 * Write a new value to the EEPROM increasing the number of MSIX 6176 * vectors from 3 to 5, for proper multiqueue support. 6177 */ 6178 static void 6179 em_enable_vectors_82574(struct adapter *adapter) 6180 { 6181 struct e1000_hw *hw = &adapter->hw; 6182 device_t dev = adapter->dev; 6183 u16 edata; 6184 6185 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); 6186 printf("Current cap: %#06x\n", edata); 6187 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { 6188 device_printf(dev, "Writing to eeprom: increasing " 6189 "reported MSIX vectors from 3 to 5...\n"); 6190 edata &= ~(EM_NVM_MSIX_N_MASK); 6191 edata |= 4 << EM_NVM_MSIX_N_SHIFT; 6192 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); 6193 e1000_update_nvm_checksum(hw); 6194 device_printf(dev, "Writing to eeprom: done\n"); 6195 } 6196 } 6197 #endif 6198 6199 #ifdef DDB 6200 DB_COMMAND(em_reset_dev, em_ddb_reset_dev) 6201 { 6202 devclass_t dc; 6203 int max_em; 6204 6205 dc = devclass_find("em"); 6206 max_em = devclass_get_maxunit(dc); 6207 6208 for (int index = 0; index < (max_em - 1); index++) { 6209 device_t dev; 6210 dev = devclass_get_device(dc, index); 6211 if (device_get_driver(dev) == &em_driver) { 6212 struct adapter *adapter = device_get_softc(dev); 6213 EM_CORE_LOCK(adapter); 6214 em_init_locked(adapter); 6215 EM_CORE_UNLOCK(adapter); 6216 } 6217 } 6218 } 6219 DB_COMMAND(em_dump_queue, em_ddb_dump_queue) 6220 { 6221 devclass_t dc; 6222 int max_em; 6223 6224 dc = devclass_find("em"); 6225 max_em = devclass_get_maxunit(dc); 6226 6227 for (int index = 0; index < (max_em - 1); index++) { 6228 device_t dev; 6229 dev = devclass_get_device(dc, index); 6230 if (device_get_driver(dev) == &em_driver) 6231 em_print_debug_info(device_get_softc(dev)); 6232 } 6233 6234 } 6235 #endif 6236