1 /****************************************************************************** 2 3 Copyright (c) 2001-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 #include "opt_em.h" 36 #include "opt_ddb.h" 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 40 #ifdef HAVE_KERNEL_OPTION_HEADERS 41 #include "opt_device_polling.h" 42 #endif 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #ifdef DDB 47 #include <sys/types.h> 48 #include <ddb/ddb.h> 49 #endif 50 #if __FreeBSD_version >= 800000 51 #include <sys/buf_ring.h> 52 #endif 53 #include <sys/bus.h> 54 #include <sys/endian.h> 55 #include <sys/kernel.h> 56 #include <sys/kthread.h> 57 #include <sys/malloc.h> 58 #include <sys/mbuf.h> 59 #include <sys/module.h> 60 #include <sys/rman.h> 61 #include <sys/smp.h> 62 #include <sys/socket.h> 63 #include <sys/sockio.h> 64 #include <sys/sysctl.h> 65 #include <sys/taskqueue.h> 66 #include <sys/eventhandler.h> 67 #include <machine/bus.h> 68 #include <machine/resource.h> 69 70 #include <net/bpf.h> 71 #include <net/ethernet.h> 72 #include <net/if.h> 73 #include <net/if_var.h> 74 #include <net/if_arp.h> 75 #include <net/if_dl.h> 76 #include <net/if_media.h> 77 78 #include <net/if_types.h> 79 #include <net/if_vlan_var.h> 80 81 #include <netinet/in_systm.h> 82 #include <netinet/in.h> 83 #include <netinet/if_ether.h> 84 #include <netinet/ip.h> 85 #include <netinet/ip6.h> 86 #include <netinet/tcp.h> 87 #include <netinet/udp.h> 88 89 #include <machine/in_cksum.h> 90 #include <dev/led/led.h> 91 #include <dev/pci/pcivar.h> 92 #include <dev/pci/pcireg.h> 93 94 #include "e1000_api.h" 95 #include "e1000_82571.h" 96 #include "if_em.h" 97 98 /********************************************************************* 99 * Driver version: 100 *********************************************************************/ 101 char em_driver_version[] = "7.6.1-k"; 102 103 /********************************************************************* 104 * PCI Device ID Table 105 * 106 * Used by probe to select devices to load on 107 * Last field stores an index into e1000_strings 108 * Last entry must be all 0s 109 * 110 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } 111 *********************************************************************/ 112 113 static em_vendor_info_t em_vendor_info_array[] = 114 { 115 /* Intel(R) PRO/1000 Network Connection */ 116 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, 117 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, 118 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, 119 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, 120 PCI_ANY_ID, PCI_ANY_ID, 0}, 121 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, 122 PCI_ANY_ID, PCI_ANY_ID, 0}, 123 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, 124 PCI_ANY_ID, PCI_ANY_ID, 0}, 125 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, 126 PCI_ANY_ID, PCI_ANY_ID, 0}, 127 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, 128 PCI_ANY_ID, PCI_ANY_ID, 0}, 129 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, 130 PCI_ANY_ID, PCI_ANY_ID, 0}, 131 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, 132 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, 133 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, 134 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0}, 135 136 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0}, 137 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 138 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0}, 139 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0}, 140 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, 141 PCI_ANY_ID, PCI_ANY_ID, 0}, 142 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, 143 PCI_ANY_ID, PCI_ANY_ID, 0}, 144 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, 145 PCI_ANY_ID, PCI_ANY_ID, 0}, 146 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, 147 PCI_ANY_ID, PCI_ANY_ID, 0}, 148 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 149 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 150 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, 151 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, 152 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, 153 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, 154 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, 155 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0}, 156 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 157 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 158 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, 159 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, 160 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 161 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, 162 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, 163 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, 164 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0}, 165 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0}, 166 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0}, 167 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 168 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, 169 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 170 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 171 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, 172 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 173 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 174 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0}, 175 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0}, 176 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0}, 177 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 178 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 179 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 180 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 181 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, 182 PCI_ANY_ID, PCI_ANY_ID, 0}, 183 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, 184 PCI_ANY_ID, PCI_ANY_ID, 0}, 185 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0}, 186 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, 187 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0}, 188 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0}, 189 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 190 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 191 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, 192 PCI_ANY_ID, PCI_ANY_ID, 0}, 193 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, 194 { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, 195 PCI_ANY_ID, PCI_ANY_ID, 0}, 196 /* required last entry */ 197 { 0, 0, 0, 0, 0} 198 }; 199 200 /********************************************************************* 201 * Table of branding strings for all supported NICs. 202 *********************************************************************/ 203 204 static char *em_strings[] = { 205 "Intel(R) PRO/1000 Network Connection" 206 }; 207 208 /********************************************************************* 209 * Function prototypes 210 *********************************************************************/ 211 static int em_probe(device_t); 212 static int em_attach(device_t); 213 static int em_detach(device_t); 214 static int em_shutdown(device_t); 215 static int em_suspend(device_t); 216 static int em_resume(device_t); 217 #ifdef EM_MULTIQUEUE 218 static int em_mq_start(if_t, struct mbuf *); 219 static int em_mq_start_locked(if_t, 220 struct tx_ring *); 221 static void em_qflush(if_t); 222 #else 223 static void em_start(if_t); 224 static void em_start_locked(if_t, struct tx_ring *); 225 #endif 226 static int em_ioctl(if_t, u_long, caddr_t); 227 static uint64_t em_get_counter(if_t, ift_counter); 228 static void em_init(void *); 229 static void em_init_locked(struct adapter *); 230 static void em_stop(void *); 231 static void em_media_status(if_t, struct ifmediareq *); 232 static int em_media_change(if_t); 233 static void em_identify_hardware(struct adapter *); 234 static int em_allocate_pci_resources(struct adapter *); 235 static int em_allocate_legacy(struct adapter *); 236 static int em_allocate_msix(struct adapter *); 237 static int em_allocate_queues(struct adapter *); 238 static int em_setup_msix(struct adapter *); 239 static void em_free_pci_resources(struct adapter *); 240 static void em_local_timer(void *); 241 static void em_reset(struct adapter *); 242 static int em_setup_interface(device_t, struct adapter *); 243 static void em_flush_desc_rings(struct adapter *); 244 245 static void em_setup_transmit_structures(struct adapter *); 246 static void em_initialize_transmit_unit(struct adapter *); 247 static int em_allocate_transmit_buffers(struct tx_ring *); 248 static void em_free_transmit_structures(struct adapter *); 249 static void em_free_transmit_buffers(struct tx_ring *); 250 251 static int em_setup_receive_structures(struct adapter *); 252 static int em_allocate_receive_buffers(struct rx_ring *); 253 static void em_initialize_receive_unit(struct adapter *); 254 static void em_free_receive_structures(struct adapter *); 255 static void em_free_receive_buffers(struct rx_ring *); 256 257 static void em_enable_intr(struct adapter *); 258 static void em_disable_intr(struct adapter *); 259 static void em_update_stats_counters(struct adapter *); 260 static void em_add_hw_stats(struct adapter *adapter); 261 static void em_txeof(struct tx_ring *); 262 static bool em_rxeof(struct rx_ring *, int, int *); 263 #ifndef __NO_STRICT_ALIGNMENT 264 static int em_fixup_rx(struct rx_ring *); 265 #endif 266 static void em_setup_rxdesc(union e1000_rx_desc_extended *, 267 const struct em_rxbuffer *rxbuf); 268 static void em_receive_checksum(uint32_t status, struct mbuf *); 269 static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, 270 struct ip *, u32 *, u32 *); 271 static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *, 272 struct tcphdr *, u32 *, u32 *); 273 static void em_set_promisc(struct adapter *); 274 static void em_disable_promisc(struct adapter *); 275 static void em_set_multi(struct adapter *); 276 static void em_update_link_status(struct adapter *); 277 static void em_refresh_mbufs(struct rx_ring *, int); 278 static void em_register_vlan(void *, if_t, u16); 279 static void em_unregister_vlan(void *, if_t, u16); 280 static void em_setup_vlan_hw_support(struct adapter *); 281 static int em_xmit(struct tx_ring *, struct mbuf **); 282 static int em_dma_malloc(struct adapter *, bus_size_t, 283 struct em_dma_alloc *, int); 284 static void em_dma_free(struct adapter *, struct em_dma_alloc *); 285 static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); 286 static void em_print_nvm_info(struct adapter *); 287 static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); 288 static void em_print_debug_info(struct adapter *); 289 static int em_is_valid_ether_addr(u8 *); 290 static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); 291 static void em_add_int_delay_sysctl(struct adapter *, const char *, 292 const char *, struct em_int_delay_info *, int, int); 293 /* Management and WOL Support */ 294 static void em_init_manageability(struct adapter *); 295 static void em_release_manageability(struct adapter *); 296 static void em_get_hw_control(struct adapter *); 297 static void em_release_hw_control(struct adapter *); 298 static void em_get_wakeup(device_t); 299 static void em_enable_wakeup(device_t); 300 static int em_enable_phy_wakeup(struct adapter *); 301 static void em_led_func(void *, int); 302 static void em_disable_aspm(struct adapter *); 303 304 static int em_irq_fast(void *); 305 306 /* MSIX handlers */ 307 static void em_msix_tx(void *); 308 static void em_msix_rx(void *); 309 static void em_msix_link(void *); 310 static void em_handle_tx(void *context, int pending); 311 static void em_handle_rx(void *context, int pending); 312 static void em_handle_link(void *context, int pending); 313 314 #ifdef EM_MULTIQUEUE 315 static void em_enable_vectors_82574(struct adapter *); 316 #endif 317 318 static void em_set_sysctl_value(struct adapter *, const char *, 319 const char *, int *, int); 320 static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); 321 static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); 322 323 static __inline void em_rx_discard(struct rx_ring *, int); 324 325 #ifdef DEVICE_POLLING 326 static poll_handler_t em_poll; 327 #endif /* POLLING */ 328 329 /********************************************************************* 330 * FreeBSD Device Interface Entry Points 331 *********************************************************************/ 332 333 static device_method_t em_methods[] = { 334 /* Device interface */ 335 DEVMETHOD(device_probe, em_probe), 336 DEVMETHOD(device_attach, em_attach), 337 DEVMETHOD(device_detach, em_detach), 338 DEVMETHOD(device_shutdown, em_shutdown), 339 DEVMETHOD(device_suspend, em_suspend), 340 DEVMETHOD(device_resume, em_resume), 341 DEVMETHOD_END 342 }; 343 344 static driver_t em_driver = { 345 "em", em_methods, sizeof(struct adapter), 346 }; 347 348 devclass_t em_devclass; 349 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); 350 MODULE_DEPEND(em, pci, 1, 1, 1); 351 MODULE_DEPEND(em, ether, 1, 1, 1); 352 #ifdef DEV_NETMAP 353 MODULE_DEPEND(em, netmap, 1, 1, 1); 354 #endif /* DEV_NETMAP */ 355 356 /********************************************************************* 357 * Tunable default values. 358 *********************************************************************/ 359 360 #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) 361 #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) 362 #define M_TSO_LEN 66 363 364 #define MAX_INTS_PER_SEC 8000 365 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) 366 367 /* Allow common code without TSO */ 368 #ifndef CSUM_TSO 369 #define CSUM_TSO 0 370 #endif 371 372 #define TSO_WORKAROUND 4 373 374 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); 375 376 static int em_disable_crc_stripping = 0; 377 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, 378 &em_disable_crc_stripping, 0, "Disable CRC Stripping"); 379 380 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); 381 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); 382 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 383 0, "Default transmit interrupt delay in usecs"); 384 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 385 0, "Default receive interrupt delay in usecs"); 386 387 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); 388 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); 389 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, 390 &em_tx_abs_int_delay_dflt, 0, 391 "Default transmit interrupt delay limit in usecs"); 392 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, 393 &em_rx_abs_int_delay_dflt, 0, 394 "Default receive interrupt delay limit in usecs"); 395 396 static int em_rxd = EM_DEFAULT_RXD; 397 static int em_txd = EM_DEFAULT_TXD; 398 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0, 399 "Number of receive descriptors per queue"); 400 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0, 401 "Number of transmit descriptors per queue"); 402 403 static int em_smart_pwr_down = FALSE; 404 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 405 0, "Set to true to leave smart power down enabled on newer adapters"); 406 407 /* Controls whether promiscuous also shows bad packets */ 408 static int em_debug_sbp = FALSE; 409 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, 410 "Show bad packets in promiscuous mode"); 411 412 static int em_enable_msix = TRUE; 413 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0, 414 "Enable MSI-X interrupts"); 415 416 #ifdef EM_MULTIQUEUE 417 static int em_num_queues = 1; 418 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0, 419 "82574 only: Number of queues to configure, 0 indicates autoconfigure"); 420 #endif 421 422 /* 423 ** Global variable to store last used CPU when binding queues 424 ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a 425 ** queue is bound to a cpu. 426 */ 427 static int em_last_bind_cpu = -1; 428 429 /* How many packets rxeof tries to clean at a time */ 430 static int em_rx_process_limit = 100; 431 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, 432 &em_rx_process_limit, 0, 433 "Maximum number of received packets to process " 434 "at a time, -1 means unlimited"); 435 436 /* Energy efficient ethernet - default to OFF */ 437 static int eee_setting = 1; 438 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, 439 "Enable Energy Efficient Ethernet"); 440 441 /* Global used in WOL setup with multiport cards */ 442 static int global_quad_port_a = 0; 443 444 #ifdef DEV_NETMAP /* see ixgbe.c for details */ 445 #include <dev/netmap/if_em_netmap.h> 446 #endif /* DEV_NETMAP */ 447 448 /********************************************************************* 449 * Device identification routine 450 * 451 * em_probe determines if the driver should be loaded on 452 * adapter based on PCI vendor/device id of the adapter. 453 * 454 * return BUS_PROBE_DEFAULT on success, positive on failure 455 *********************************************************************/ 456 457 static int 458 em_probe(device_t dev) 459 { 460 char adapter_name[60]; 461 uint16_t pci_vendor_id = 0; 462 uint16_t pci_device_id = 0; 463 uint16_t pci_subvendor_id = 0; 464 uint16_t pci_subdevice_id = 0; 465 em_vendor_info_t *ent; 466 467 INIT_DEBUGOUT("em_probe: begin"); 468 469 pci_vendor_id = pci_get_vendor(dev); 470 if (pci_vendor_id != EM_VENDOR_ID) 471 return (ENXIO); 472 473 pci_device_id = pci_get_device(dev); 474 pci_subvendor_id = pci_get_subvendor(dev); 475 pci_subdevice_id = pci_get_subdevice(dev); 476 477 ent = em_vendor_info_array; 478 while (ent->vendor_id != 0) { 479 if ((pci_vendor_id == ent->vendor_id) && 480 (pci_device_id == ent->device_id) && 481 482 ((pci_subvendor_id == ent->subvendor_id) || 483 (ent->subvendor_id == PCI_ANY_ID)) && 484 485 ((pci_subdevice_id == ent->subdevice_id) || 486 (ent->subdevice_id == PCI_ANY_ID))) { 487 sprintf(adapter_name, "%s %s", 488 em_strings[ent->index], 489 em_driver_version); 490 device_set_desc_copy(dev, adapter_name); 491 return (BUS_PROBE_DEFAULT); 492 } 493 ent++; 494 } 495 496 return (ENXIO); 497 } 498 499 /********************************************************************* 500 * Device initialization routine 501 * 502 * The attach entry point is called when the driver is being loaded. 503 * This routine identifies the type of hardware, allocates all resources 504 * and initializes the hardware. 505 * 506 * return 0 on success, positive on failure 507 *********************************************************************/ 508 509 static int 510 em_attach(device_t dev) 511 { 512 struct adapter *adapter; 513 struct e1000_hw *hw; 514 int error = 0; 515 516 INIT_DEBUGOUT("em_attach: begin"); 517 518 if (resource_disabled("em", device_get_unit(dev))) { 519 device_printf(dev, "Disabled by device hint\n"); 520 return (ENXIO); 521 } 522 523 adapter = device_get_softc(dev); 524 adapter->dev = adapter->osdep.dev = dev; 525 hw = &adapter->hw; 526 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); 527 528 /* SYSCTL stuff */ 529 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 530 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 531 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 532 em_sysctl_nvm_info, "I", "NVM Information"); 533 534 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 535 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 536 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 537 em_sysctl_debug_info, "I", "Debug Information"); 538 539 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 540 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 541 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 542 em_set_flowcntl, "I", "Flow Control"); 543 544 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); 545 546 /* Determine hardware and mac info */ 547 em_identify_hardware(adapter); 548 549 /* Setup PCI resources */ 550 if (em_allocate_pci_resources(adapter)) { 551 device_printf(dev, "Allocation of PCI resources failed\n"); 552 error = ENXIO; 553 goto err_pci; 554 } 555 556 /* 557 ** For ICH8 and family we need to 558 ** map the flash memory, and this 559 ** must happen after the MAC is 560 ** identified 561 */ 562 if ((hw->mac.type == e1000_ich8lan) || 563 (hw->mac.type == e1000_ich9lan) || 564 (hw->mac.type == e1000_ich10lan) || 565 (hw->mac.type == e1000_pchlan) || 566 (hw->mac.type == e1000_pch2lan) || 567 (hw->mac.type == e1000_pch_lpt)) { 568 int rid = EM_BAR_TYPE_FLASH; 569 adapter->flash = bus_alloc_resource_any(dev, 570 SYS_RES_MEMORY, &rid, RF_ACTIVE); 571 if (adapter->flash == NULL) { 572 device_printf(dev, "Mapping of Flash failed\n"); 573 error = ENXIO; 574 goto err_pci; 575 } 576 /* This is used in the shared code */ 577 hw->flash_address = (u8 *)adapter->flash; 578 adapter->osdep.flash_bus_space_tag = 579 rman_get_bustag(adapter->flash); 580 adapter->osdep.flash_bus_space_handle = 581 rman_get_bushandle(adapter->flash); 582 } 583 /* 584 ** In the new SPT device flash is not a 585 ** seperate BAR, rather it is also in BAR0, 586 ** so use the same tag and an offset handle for the 587 ** FLASH read/write macros in the shared code. 588 */ 589 else if (hw->mac.type == e1000_pch_spt) { 590 adapter->osdep.flash_bus_space_tag = 591 adapter->osdep.mem_bus_space_tag; 592 adapter->osdep.flash_bus_space_handle = 593 adapter->osdep.mem_bus_space_handle 594 + E1000_FLASH_BASE_ADDR; 595 } 596 597 /* Do Shared Code initialization */ 598 error = e1000_setup_init_funcs(hw, TRUE); 599 if (error) { 600 device_printf(dev, "Setup of Shared code failed, error %d\n", 601 error); 602 error = ENXIO; 603 goto err_pci; 604 } 605 606 /* 607 * Setup MSI/X or MSI if PCI Express 608 */ 609 adapter->msix = em_setup_msix(adapter); 610 611 e1000_get_bus_info(hw); 612 613 /* Set up some sysctls for the tunable interrupt delays */ 614 em_add_int_delay_sysctl(adapter, "rx_int_delay", 615 "receive interrupt delay in usecs", &adapter->rx_int_delay, 616 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); 617 em_add_int_delay_sysctl(adapter, "tx_int_delay", 618 "transmit interrupt delay in usecs", &adapter->tx_int_delay, 619 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); 620 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", 621 "receive interrupt delay limit in usecs", 622 &adapter->rx_abs_int_delay, 623 E1000_REGISTER(hw, E1000_RADV), 624 em_rx_abs_int_delay_dflt); 625 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", 626 "transmit interrupt delay limit in usecs", 627 &adapter->tx_abs_int_delay, 628 E1000_REGISTER(hw, E1000_TADV), 629 em_tx_abs_int_delay_dflt); 630 em_add_int_delay_sysctl(adapter, "itr", 631 "interrupt delay limit in usecs/4", 632 &adapter->tx_itr, 633 E1000_REGISTER(hw, E1000_ITR), 634 DEFAULT_ITR); 635 636 /* Sysctl for limiting the amount of work done in the taskqueue */ 637 em_set_sysctl_value(adapter, "rx_processing_limit", 638 "max number of rx packets to process", &adapter->rx_process_limit, 639 em_rx_process_limit); 640 641 /* 642 * Validate number of transmit and receive descriptors. It 643 * must not exceed hardware maximum, and must be multiple 644 * of E1000_DBA_ALIGN. 645 */ 646 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || 647 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) { 648 device_printf(dev, "Using %d TX descriptors instead of %d!\n", 649 EM_DEFAULT_TXD, em_txd); 650 adapter->num_tx_desc = EM_DEFAULT_TXD; 651 } else 652 adapter->num_tx_desc = em_txd; 653 654 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 || 655 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { 656 device_printf(dev, "Using %d RX descriptors instead of %d!\n", 657 EM_DEFAULT_RXD, em_rxd); 658 adapter->num_rx_desc = EM_DEFAULT_RXD; 659 } else 660 adapter->num_rx_desc = em_rxd; 661 662 hw->mac.autoneg = DO_AUTO_NEG; 663 hw->phy.autoneg_wait_to_complete = FALSE; 664 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 665 666 /* Copper options */ 667 if (hw->phy.media_type == e1000_media_type_copper) { 668 hw->phy.mdix = AUTO_ALL_MODES; 669 hw->phy.disable_polarity_correction = FALSE; 670 hw->phy.ms_type = EM_MASTER_SLAVE; 671 } 672 673 /* 674 * Set the frame limits assuming 675 * standard ethernet sized frames. 676 */ 677 adapter->hw.mac.max_frame_size = 678 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; 679 680 /* 681 * This controls when hardware reports transmit completion 682 * status. 683 */ 684 hw->mac.report_tx_early = 1; 685 686 /* 687 ** Get queue/ring memory 688 */ 689 if (em_allocate_queues(adapter)) { 690 error = ENOMEM; 691 goto err_pci; 692 } 693 694 /* Allocate multicast array memory. */ 695 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * 696 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); 697 if (adapter->mta == NULL) { 698 device_printf(dev, "Can not allocate multicast setup array\n"); 699 error = ENOMEM; 700 goto err_late; 701 } 702 703 /* Check SOL/IDER usage */ 704 if (e1000_check_reset_block(hw)) 705 device_printf(dev, "PHY reset is blocked" 706 " due to SOL/IDER session.\n"); 707 708 /* Sysctl for setting Energy Efficient Ethernet */ 709 hw->dev_spec.ich8lan.eee_disable = eee_setting; 710 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 711 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 712 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, 713 adapter, 0, em_sysctl_eee, "I", 714 "Disable Energy Efficient Ethernet"); 715 716 /* 717 ** Start from a known state, this is 718 ** important in reading the nvm and 719 ** mac from that. 720 */ 721 e1000_reset_hw(hw); 722 723 724 /* Make sure we have a good EEPROM before we read from it */ 725 if (e1000_validate_nvm_checksum(hw) < 0) { 726 /* 727 ** Some PCI-E parts fail the first check due to 728 ** the link being in sleep state, call it again, 729 ** if it fails a second time its a real issue. 730 */ 731 if (e1000_validate_nvm_checksum(hw) < 0) { 732 device_printf(dev, 733 "The EEPROM Checksum Is Not Valid\n"); 734 error = EIO; 735 goto err_late; 736 } 737 } 738 739 /* Copy the permanent MAC address out of the EEPROM */ 740 if (e1000_read_mac_addr(hw) < 0) { 741 device_printf(dev, "EEPROM read error while reading MAC" 742 " address\n"); 743 error = EIO; 744 goto err_late; 745 } 746 747 if (!em_is_valid_ether_addr(hw->mac.addr)) { 748 device_printf(dev, "Invalid MAC address\n"); 749 error = EIO; 750 goto err_late; 751 } 752 753 /* Disable ULP support */ 754 e1000_disable_ulp_lpt_lp(hw, TRUE); 755 756 /* 757 ** Do interrupt configuration 758 */ 759 if (adapter->msix > 1) /* Do MSIX */ 760 error = em_allocate_msix(adapter); 761 else /* MSI or Legacy */ 762 error = em_allocate_legacy(adapter); 763 if (error) 764 goto err_late; 765 766 /* 767 * Get Wake-on-Lan and Management info for later use 768 */ 769 em_get_wakeup(dev); 770 771 /* Setup OS specific network interface */ 772 if (em_setup_interface(dev, adapter) != 0) 773 goto err_late; 774 775 em_reset(adapter); 776 777 /* Initialize statistics */ 778 em_update_stats_counters(adapter); 779 780 hw->mac.get_link_status = 1; 781 em_update_link_status(adapter); 782 783 /* Register for VLAN events */ 784 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 785 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 786 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 787 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 788 789 em_add_hw_stats(adapter); 790 791 /* Non-AMT based hardware can now take control from firmware */ 792 if (adapter->has_manage && !adapter->has_amt) 793 em_get_hw_control(adapter); 794 795 /* Tell the stack that the interface is not active */ 796 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 797 798 adapter->led_dev = led_create(em_led_func, adapter, 799 device_get_nameunit(dev)); 800 #ifdef DEV_NETMAP 801 em_netmap_attach(adapter); 802 #endif /* DEV_NETMAP */ 803 804 INIT_DEBUGOUT("em_attach: end"); 805 806 return (0); 807 808 err_late: 809 em_free_transmit_structures(adapter); 810 em_free_receive_structures(adapter); 811 em_release_hw_control(adapter); 812 if (adapter->ifp != (void *)NULL) 813 if_free(adapter->ifp); 814 err_pci: 815 em_free_pci_resources(adapter); 816 free(adapter->mta, M_DEVBUF); 817 EM_CORE_LOCK_DESTROY(adapter); 818 819 return (error); 820 } 821 822 /********************************************************************* 823 * Device removal routine 824 * 825 * The detach entry point is called when the driver is being removed. 826 * This routine stops the adapter and deallocates all the resources 827 * that were allocated for driver operation. 828 * 829 * return 0 on success, positive on failure 830 *********************************************************************/ 831 832 static int 833 em_detach(device_t dev) 834 { 835 struct adapter *adapter = device_get_softc(dev); 836 if_t ifp = adapter->ifp; 837 838 INIT_DEBUGOUT("em_detach: begin"); 839 840 /* Make sure VLANS are not using driver */ 841 if (if_vlantrunkinuse(ifp)) { 842 device_printf(dev,"Vlan in use, detach first\n"); 843 return (EBUSY); 844 } 845 846 #ifdef DEVICE_POLLING 847 if (if_getcapenable(ifp) & IFCAP_POLLING) 848 ether_poll_deregister(ifp); 849 #endif 850 851 if (adapter->led_dev != NULL) 852 led_destroy(adapter->led_dev); 853 854 EM_CORE_LOCK(adapter); 855 adapter->in_detach = 1; 856 em_stop(adapter); 857 EM_CORE_UNLOCK(adapter); 858 EM_CORE_LOCK_DESTROY(adapter); 859 860 e1000_phy_hw_reset(&adapter->hw); 861 862 em_release_manageability(adapter); 863 em_release_hw_control(adapter); 864 865 /* Unregister VLAN events */ 866 if (adapter->vlan_attach != NULL) 867 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); 868 if (adapter->vlan_detach != NULL) 869 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 870 871 ether_ifdetach(adapter->ifp); 872 callout_drain(&adapter->timer); 873 874 #ifdef DEV_NETMAP 875 netmap_detach(ifp); 876 #endif /* DEV_NETMAP */ 877 878 em_free_pci_resources(adapter); 879 bus_generic_detach(dev); 880 if_free(ifp); 881 882 em_free_transmit_structures(adapter); 883 em_free_receive_structures(adapter); 884 885 em_release_hw_control(adapter); 886 free(adapter->mta, M_DEVBUF); 887 888 return (0); 889 } 890 891 /********************************************************************* 892 * 893 * Shutdown entry point 894 * 895 **********************************************************************/ 896 897 static int 898 em_shutdown(device_t dev) 899 { 900 return em_suspend(dev); 901 } 902 903 /* 904 * Suspend/resume device methods. 905 */ 906 static int 907 em_suspend(device_t dev) 908 { 909 struct adapter *adapter = device_get_softc(dev); 910 911 EM_CORE_LOCK(adapter); 912 913 em_release_manageability(adapter); 914 em_release_hw_control(adapter); 915 em_enable_wakeup(dev); 916 917 EM_CORE_UNLOCK(adapter); 918 919 return bus_generic_suspend(dev); 920 } 921 922 static int 923 em_resume(device_t dev) 924 { 925 struct adapter *adapter = device_get_softc(dev); 926 struct tx_ring *txr = adapter->tx_rings; 927 if_t ifp = adapter->ifp; 928 929 EM_CORE_LOCK(adapter); 930 if (adapter->hw.mac.type == e1000_pch2lan) 931 e1000_resume_workarounds_pchlan(&adapter->hw); 932 em_init_locked(adapter); 933 em_init_manageability(adapter); 934 935 if ((if_getflags(ifp) & IFF_UP) && 936 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) { 937 for (int i = 0; i < adapter->num_queues; i++, txr++) { 938 EM_TX_LOCK(txr); 939 #ifdef EM_MULTIQUEUE 940 if (!drbr_empty(ifp, txr->br)) 941 em_mq_start_locked(ifp, txr); 942 #else 943 if (!if_sendq_empty(ifp)) 944 em_start_locked(ifp, txr); 945 #endif 946 EM_TX_UNLOCK(txr); 947 } 948 } 949 EM_CORE_UNLOCK(adapter); 950 951 return bus_generic_resume(dev); 952 } 953 954 955 #ifndef EM_MULTIQUEUE 956 static void 957 em_start_locked(if_t ifp, struct tx_ring *txr) 958 { 959 struct adapter *adapter = if_getsoftc(ifp); 960 struct mbuf *m_head; 961 962 EM_TX_LOCK_ASSERT(txr); 963 964 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 965 IFF_DRV_RUNNING) 966 return; 967 968 if (!adapter->link_active) 969 return; 970 971 while (!if_sendq_empty(ifp)) { 972 /* Call cleanup if number of TX descriptors low */ 973 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) 974 em_txeof(txr); 975 if (txr->tx_avail < EM_MAX_SCATTER) { 976 if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0); 977 break; 978 } 979 m_head = if_dequeue(ifp); 980 if (m_head == NULL) 981 break; 982 /* 983 * Encapsulation can modify our pointer, and or make it 984 * NULL on failure. In that event, we can't requeue. 985 */ 986 if (em_xmit(txr, &m_head)) { 987 if (m_head == NULL) 988 break; 989 if_sendq_prepend(ifp, m_head); 990 break; 991 } 992 993 /* Mark the queue as having work */ 994 if (txr->busy == EM_TX_IDLE) 995 txr->busy = EM_TX_BUSY; 996 997 /* Send a copy of the frame to the BPF listener */ 998 ETHER_BPF_MTAP(ifp, m_head); 999 1000 } 1001 1002 return; 1003 } 1004 1005 static void 1006 em_start(if_t ifp) 1007 { 1008 struct adapter *adapter = if_getsoftc(ifp); 1009 struct tx_ring *txr = adapter->tx_rings; 1010 1011 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1012 EM_TX_LOCK(txr); 1013 em_start_locked(ifp, txr); 1014 EM_TX_UNLOCK(txr); 1015 } 1016 return; 1017 } 1018 #else /* EM_MULTIQUEUE */ 1019 /********************************************************************* 1020 * Multiqueue Transmit routines 1021 * 1022 * em_mq_start is called by the stack to initiate a transmit. 1023 * however, if busy the driver can queue the request rather 1024 * than do an immediate send. It is this that is an advantage 1025 * in this driver, rather than also having multiple tx queues. 1026 **********************************************************************/ 1027 /* 1028 ** Multiqueue capable stack interface 1029 */ 1030 static int 1031 em_mq_start(if_t ifp, struct mbuf *m) 1032 { 1033 struct adapter *adapter = if_getsoftc(ifp); 1034 struct tx_ring *txr = adapter->tx_rings; 1035 unsigned int i, error; 1036 1037 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 1038 i = m->m_pkthdr.flowid % adapter->num_queues; 1039 else 1040 i = curcpu % adapter->num_queues; 1041 1042 txr = &adapter->tx_rings[i]; 1043 1044 error = drbr_enqueue(ifp, txr->br, m); 1045 if (error) 1046 return (error); 1047 1048 if (EM_TX_TRYLOCK(txr)) { 1049 em_mq_start_locked(ifp, txr); 1050 EM_TX_UNLOCK(txr); 1051 } else 1052 taskqueue_enqueue(txr->tq, &txr->tx_task); 1053 1054 return (0); 1055 } 1056 1057 static int 1058 em_mq_start_locked(if_t ifp, struct tx_ring *txr) 1059 { 1060 struct adapter *adapter = txr->adapter; 1061 struct mbuf *next; 1062 int err = 0, enq = 0; 1063 1064 EM_TX_LOCK_ASSERT(txr); 1065 1066 if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) || 1067 adapter->link_active == 0) { 1068 return (ENETDOWN); 1069 } 1070 1071 /* Process the queue */ 1072 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 1073 if ((err = em_xmit(txr, &next)) != 0) { 1074 if (next == NULL) { 1075 /* It was freed, move forward */ 1076 drbr_advance(ifp, txr->br); 1077 } else { 1078 /* 1079 * Still have one left, it may not be 1080 * the same since the transmit function 1081 * may have changed it. 1082 */ 1083 drbr_putback(ifp, txr->br, next); 1084 } 1085 break; 1086 } 1087 drbr_advance(ifp, txr->br); 1088 enq++; 1089 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); 1090 if (next->m_flags & M_MCAST) 1091 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 1092 ETHER_BPF_MTAP(ifp, next); 1093 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) 1094 break; 1095 } 1096 1097 /* Mark the queue as having work */ 1098 if ((enq > 0) && (txr->busy == EM_TX_IDLE)) 1099 txr->busy = EM_TX_BUSY; 1100 1101 if (txr->tx_avail < EM_MAX_SCATTER) 1102 em_txeof(txr); 1103 if (txr->tx_avail < EM_MAX_SCATTER) { 1104 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0); 1105 } 1106 return (err); 1107 } 1108 1109 /* 1110 ** Flush all ring buffers 1111 */ 1112 static void 1113 em_qflush(if_t ifp) 1114 { 1115 struct adapter *adapter = if_getsoftc(ifp); 1116 struct tx_ring *txr = adapter->tx_rings; 1117 struct mbuf *m; 1118 1119 for (int i = 0; i < adapter->num_queues; i++, txr++) { 1120 EM_TX_LOCK(txr); 1121 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 1122 m_freem(m); 1123 EM_TX_UNLOCK(txr); 1124 } 1125 if_qflush(ifp); 1126 } 1127 #endif /* EM_MULTIQUEUE */ 1128 1129 /********************************************************************* 1130 * Ioctl entry point 1131 * 1132 * em_ioctl is called when the user wants to configure the 1133 * interface. 1134 * 1135 * return 0 on success, positive on failure 1136 **********************************************************************/ 1137 1138 static int 1139 em_ioctl(if_t ifp, u_long command, caddr_t data) 1140 { 1141 struct adapter *adapter = if_getsoftc(ifp); 1142 struct ifreq *ifr = (struct ifreq *)data; 1143 #if defined(INET) || defined(INET6) 1144 struct ifaddr *ifa = (struct ifaddr *)data; 1145 #endif 1146 bool avoid_reset = FALSE; 1147 int error = 0; 1148 1149 if (adapter->in_detach) 1150 return (error); 1151 1152 switch (command) { 1153 case SIOCSIFADDR: 1154 #ifdef INET 1155 if (ifa->ifa_addr->sa_family == AF_INET) 1156 avoid_reset = TRUE; 1157 #endif 1158 #ifdef INET6 1159 if (ifa->ifa_addr->sa_family == AF_INET6) 1160 avoid_reset = TRUE; 1161 #endif 1162 /* 1163 ** Calling init results in link renegotiation, 1164 ** so we avoid doing it when possible. 1165 */ 1166 if (avoid_reset) { 1167 if_setflagbits(ifp,IFF_UP,0); 1168 if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) 1169 em_init(adapter); 1170 #ifdef INET 1171 if (!(if_getflags(ifp) & IFF_NOARP)) 1172 arp_ifinit(ifp, ifa); 1173 #endif 1174 } else 1175 error = ether_ioctl(ifp, command, data); 1176 break; 1177 case SIOCSIFMTU: 1178 { 1179 int max_frame_size; 1180 1181 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); 1182 1183 EM_CORE_LOCK(adapter); 1184 switch (adapter->hw.mac.type) { 1185 case e1000_82571: 1186 case e1000_82572: 1187 case e1000_ich9lan: 1188 case e1000_ich10lan: 1189 case e1000_pch2lan: 1190 case e1000_pch_lpt: 1191 case e1000_pch_spt: 1192 case e1000_82574: 1193 case e1000_82583: 1194 case e1000_80003es2lan: /* 9K Jumbo Frame size */ 1195 max_frame_size = 9234; 1196 break; 1197 case e1000_pchlan: 1198 max_frame_size = 4096; 1199 break; 1200 /* Adapters that do not support jumbo frames */ 1201 case e1000_ich8lan: 1202 max_frame_size = ETHER_MAX_LEN; 1203 break; 1204 default: 1205 max_frame_size = MAX_JUMBO_FRAME_SIZE; 1206 } 1207 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 1208 ETHER_CRC_LEN) { 1209 EM_CORE_UNLOCK(adapter); 1210 error = EINVAL; 1211 break; 1212 } 1213 1214 if_setmtu(ifp, ifr->ifr_mtu); 1215 adapter->hw.mac.max_frame_size = 1216 if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN; 1217 em_init_locked(adapter); 1218 EM_CORE_UNLOCK(adapter); 1219 break; 1220 } 1221 case SIOCSIFFLAGS: 1222 IOCTL_DEBUGOUT("ioctl rcv'd:\ 1223 SIOCSIFFLAGS (Set Interface Flags)"); 1224 EM_CORE_LOCK(adapter); 1225 if (if_getflags(ifp) & IFF_UP) { 1226 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1227 if ((if_getflags(ifp) ^ adapter->if_flags) & 1228 (IFF_PROMISC | IFF_ALLMULTI)) { 1229 em_disable_promisc(adapter); 1230 em_set_promisc(adapter); 1231 } 1232 } else 1233 em_init_locked(adapter); 1234 } else 1235 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) 1236 em_stop(adapter); 1237 adapter->if_flags = if_getflags(ifp); 1238 EM_CORE_UNLOCK(adapter); 1239 break; 1240 case SIOCADDMULTI: 1241 case SIOCDELMULTI: 1242 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); 1243 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1244 EM_CORE_LOCK(adapter); 1245 em_disable_intr(adapter); 1246 em_set_multi(adapter); 1247 #ifdef DEVICE_POLLING 1248 if (!(if_getcapenable(ifp) & IFCAP_POLLING)) 1249 #endif 1250 em_enable_intr(adapter); 1251 EM_CORE_UNLOCK(adapter); 1252 } 1253 break; 1254 case SIOCSIFMEDIA: 1255 /* Check SOL/IDER usage */ 1256 EM_CORE_LOCK(adapter); 1257 if (e1000_check_reset_block(&adapter->hw)) { 1258 EM_CORE_UNLOCK(adapter); 1259 device_printf(adapter->dev, "Media change is" 1260 " blocked due to SOL/IDER session.\n"); 1261 break; 1262 } 1263 EM_CORE_UNLOCK(adapter); 1264 /* falls thru */ 1265 case SIOCGIFMEDIA: 1266 IOCTL_DEBUGOUT("ioctl rcv'd: \ 1267 SIOCxIFMEDIA (Get/Set Interface Media)"); 1268 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); 1269 break; 1270 case SIOCSIFCAP: 1271 { 1272 int mask, reinit; 1273 1274 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); 1275 reinit = 0; 1276 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); 1277 #ifdef DEVICE_POLLING 1278 if (mask & IFCAP_POLLING) { 1279 if (ifr->ifr_reqcap & IFCAP_POLLING) { 1280 error = ether_poll_register(em_poll, ifp); 1281 if (error) 1282 return (error); 1283 EM_CORE_LOCK(adapter); 1284 em_disable_intr(adapter); 1285 if_setcapenablebit(ifp, IFCAP_POLLING, 0); 1286 EM_CORE_UNLOCK(adapter); 1287 } else { 1288 error = ether_poll_deregister(ifp); 1289 /* Enable interrupt even in error case */ 1290 EM_CORE_LOCK(adapter); 1291 em_enable_intr(adapter); 1292 if_setcapenablebit(ifp, 0, IFCAP_POLLING); 1293 EM_CORE_UNLOCK(adapter); 1294 } 1295 } 1296 #endif 1297 if (mask & IFCAP_HWCSUM) { 1298 if_togglecapenable(ifp,IFCAP_HWCSUM); 1299 reinit = 1; 1300 } 1301 if (mask & IFCAP_TSO4) { 1302 if_togglecapenable(ifp,IFCAP_TSO4); 1303 reinit = 1; 1304 } 1305 if (mask & IFCAP_VLAN_HWTAGGING) { 1306 if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING); 1307 reinit = 1; 1308 } 1309 if (mask & IFCAP_VLAN_HWFILTER) { 1310 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER); 1311 reinit = 1; 1312 } 1313 if (mask & IFCAP_VLAN_HWTSO) { 1314 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); 1315 reinit = 1; 1316 } 1317 if ((mask & IFCAP_WOL) && 1318 (if_getcapabilities(ifp) & IFCAP_WOL) != 0) { 1319 if (mask & IFCAP_WOL_MCAST) 1320 if_togglecapenable(ifp, IFCAP_WOL_MCAST); 1321 if (mask & IFCAP_WOL_MAGIC) 1322 if_togglecapenable(ifp, IFCAP_WOL_MAGIC); 1323 } 1324 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 1325 em_init(adapter); 1326 if_vlancap(ifp); 1327 break; 1328 } 1329 1330 default: 1331 error = ether_ioctl(ifp, command, data); 1332 break; 1333 } 1334 1335 return (error); 1336 } 1337 1338 1339 /********************************************************************* 1340 * Init entry point 1341 * 1342 * This routine is used in two ways. It is used by the stack as 1343 * init entry point in network interface structure. It is also used 1344 * by the driver as a hw/sw initialization routine to get to a 1345 * consistent state. 1346 * 1347 * return 0 on success, positive on failure 1348 **********************************************************************/ 1349 1350 static void 1351 em_init_locked(struct adapter *adapter) 1352 { 1353 if_t ifp = adapter->ifp; 1354 device_t dev = adapter->dev; 1355 1356 INIT_DEBUGOUT("em_init: begin"); 1357 1358 EM_CORE_LOCK_ASSERT(adapter); 1359 1360 em_disable_intr(adapter); 1361 callout_stop(&adapter->timer); 1362 1363 /* Get the latest mac address, User can use a LAA */ 1364 bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr, 1365 ETHER_ADDR_LEN); 1366 1367 /* Put the address into the Receive Address Array */ 1368 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); 1369 1370 /* 1371 * With the 82571 adapter, RAR[0] may be overwritten 1372 * when the other port is reset, we make a duplicate 1373 * in RAR[14] for that eventuality, this assures 1374 * the interface continues to function. 1375 */ 1376 if (adapter->hw.mac.type == e1000_82571) { 1377 e1000_set_laa_state_82571(&adapter->hw, TRUE); 1378 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 1379 E1000_RAR_ENTRIES - 1); 1380 } 1381 1382 /* Initialize the hardware */ 1383 em_reset(adapter); 1384 em_update_link_status(adapter); 1385 1386 /* Setup VLAN support, basic and offload if available */ 1387 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); 1388 1389 /* Set hardware offload abilities */ 1390 if_clearhwassist(ifp); 1391 if (if_getcapenable(ifp) & IFCAP_TXCSUM) 1392 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0); 1393 /* 1394 ** There have proven to be problems with TSO when not 1395 ** at full gigabit speed, so disable the assist automatically 1396 ** when at lower speeds. -jfv 1397 */ 1398 if (if_getcapenable(ifp) & IFCAP_TSO4) { 1399 if (adapter->link_speed == SPEED_1000) 1400 if_sethwassistbits(ifp, CSUM_TSO, 0); 1401 } 1402 1403 /* Configure for OS presence */ 1404 em_init_manageability(adapter); 1405 1406 /* Prepare transmit descriptors and buffers */ 1407 em_setup_transmit_structures(adapter); 1408 em_initialize_transmit_unit(adapter); 1409 1410 /* Setup Multicast table */ 1411 em_set_multi(adapter); 1412 1413 /* 1414 ** Figure out the desired mbuf 1415 ** pool for doing jumbos 1416 */ 1417 if (adapter->hw.mac.max_frame_size <= 2048) 1418 adapter->rx_mbuf_sz = MCLBYTES; 1419 else if (adapter->hw.mac.max_frame_size <= 4096) 1420 adapter->rx_mbuf_sz = MJUMPAGESIZE; 1421 else 1422 adapter->rx_mbuf_sz = MJUM9BYTES; 1423 1424 /* Prepare receive descriptors and buffers */ 1425 if (em_setup_receive_structures(adapter)) { 1426 device_printf(dev, "Could not setup receive structures\n"); 1427 em_stop(adapter); 1428 return; 1429 } 1430 em_initialize_receive_unit(adapter); 1431 1432 /* Use real VLAN Filter support? */ 1433 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { 1434 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 1435 /* Use real VLAN Filter support */ 1436 em_setup_vlan_hw_support(adapter); 1437 else { 1438 u32 ctrl; 1439 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); 1440 ctrl |= E1000_CTRL_VME; 1441 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); 1442 } 1443 } 1444 1445 /* Don't lose promiscuous settings */ 1446 em_set_promisc(adapter); 1447 1448 /* Set the interface as ACTIVE */ 1449 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 1450 1451 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 1452 e1000_clear_hw_cntrs_base_generic(&adapter->hw); 1453 1454 /* MSI/X configuration for 82574 */ 1455 if (adapter->hw.mac.type == e1000_82574) { 1456 int tmp; 1457 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 1458 tmp |= E1000_CTRL_EXT_PBA_CLR; 1459 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); 1460 /* Set the IVAR - interrupt vector routing. */ 1461 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); 1462 } 1463 1464 #ifdef DEVICE_POLLING 1465 /* 1466 * Only enable interrupts if we are not polling, make sure 1467 * they are off otherwise. 1468 */ 1469 if (if_getcapenable(ifp) & IFCAP_POLLING) 1470 em_disable_intr(adapter); 1471 else 1472 #endif /* DEVICE_POLLING */ 1473 em_enable_intr(adapter); 1474 1475 /* AMT based hardware can now take control from firmware */ 1476 if (adapter->has_manage && adapter->has_amt) 1477 em_get_hw_control(adapter); 1478 } 1479 1480 static void 1481 em_init(void *arg) 1482 { 1483 struct adapter *adapter = arg; 1484 1485 EM_CORE_LOCK(adapter); 1486 em_init_locked(adapter); 1487 EM_CORE_UNLOCK(adapter); 1488 } 1489 1490 1491 #ifdef DEVICE_POLLING 1492 /********************************************************************* 1493 * 1494 * Legacy polling routine: note this only works with single queue 1495 * 1496 *********************************************************************/ 1497 static int 1498 em_poll(if_t ifp, enum poll_cmd cmd, int count) 1499 { 1500 struct adapter *adapter = if_getsoftc(ifp); 1501 struct tx_ring *txr = adapter->tx_rings; 1502 struct rx_ring *rxr = adapter->rx_rings; 1503 u32 reg_icr; 1504 int rx_done; 1505 1506 EM_CORE_LOCK(adapter); 1507 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 1508 EM_CORE_UNLOCK(adapter); 1509 return (0); 1510 } 1511 1512 if (cmd == POLL_AND_CHECK_STATUS) { 1513 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1514 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1515 callout_stop(&adapter->timer); 1516 adapter->hw.mac.get_link_status = 1; 1517 em_update_link_status(adapter); 1518 callout_reset(&adapter->timer, hz, 1519 em_local_timer, adapter); 1520 } 1521 } 1522 EM_CORE_UNLOCK(adapter); 1523 1524 em_rxeof(rxr, count, &rx_done); 1525 1526 EM_TX_LOCK(txr); 1527 em_txeof(txr); 1528 #ifdef EM_MULTIQUEUE 1529 if (!drbr_empty(ifp, txr->br)) 1530 em_mq_start_locked(ifp, txr); 1531 #else 1532 if (!if_sendq_empty(ifp)) 1533 em_start_locked(ifp, txr); 1534 #endif 1535 EM_TX_UNLOCK(txr); 1536 1537 return (rx_done); 1538 } 1539 #endif /* DEVICE_POLLING */ 1540 1541 1542 /********************************************************************* 1543 * 1544 * Fast Legacy/MSI Combined Interrupt Service routine 1545 * 1546 *********************************************************************/ 1547 static int 1548 em_irq_fast(void *arg) 1549 { 1550 struct adapter *adapter = arg; 1551 if_t ifp; 1552 u32 reg_icr; 1553 1554 ifp = adapter->ifp; 1555 1556 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1557 1558 /* Hot eject? */ 1559 if (reg_icr == 0xffffffff) 1560 return FILTER_STRAY; 1561 1562 /* Definitely not our interrupt. */ 1563 if (reg_icr == 0x0) 1564 return FILTER_STRAY; 1565 1566 /* 1567 * Starting with the 82571 chip, bit 31 should be used to 1568 * determine whether the interrupt belongs to us. 1569 */ 1570 if (adapter->hw.mac.type >= e1000_82571 && 1571 (reg_icr & E1000_ICR_INT_ASSERTED) == 0) 1572 return FILTER_STRAY; 1573 1574 em_disable_intr(adapter); 1575 taskqueue_enqueue(adapter->tq, &adapter->que_task); 1576 1577 /* Link status change */ 1578 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1579 adapter->hw.mac.get_link_status = 1; 1580 taskqueue_enqueue(taskqueue_fast, &adapter->link_task); 1581 } 1582 1583 if (reg_icr & E1000_ICR_RXO) 1584 adapter->rx_overruns++; 1585 return FILTER_HANDLED; 1586 } 1587 1588 /* Combined RX/TX handler, used by Legacy and MSI */ 1589 static void 1590 em_handle_que(void *context, int pending) 1591 { 1592 struct adapter *adapter = context; 1593 if_t ifp = adapter->ifp; 1594 struct tx_ring *txr = adapter->tx_rings; 1595 struct rx_ring *rxr = adapter->rx_rings; 1596 1597 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1598 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1599 1600 EM_TX_LOCK(txr); 1601 em_txeof(txr); 1602 #ifdef EM_MULTIQUEUE 1603 if (!drbr_empty(ifp, txr->br)) 1604 em_mq_start_locked(ifp, txr); 1605 #else 1606 if (!if_sendq_empty(ifp)) 1607 em_start_locked(ifp, txr); 1608 #endif 1609 EM_TX_UNLOCK(txr); 1610 if (more) { 1611 taskqueue_enqueue(adapter->tq, &adapter->que_task); 1612 return; 1613 } 1614 } 1615 1616 em_enable_intr(adapter); 1617 return; 1618 } 1619 1620 1621 /********************************************************************* 1622 * 1623 * MSIX Interrupt Service Routines 1624 * 1625 **********************************************************************/ 1626 static void 1627 em_msix_tx(void *arg) 1628 { 1629 struct tx_ring *txr = arg; 1630 struct adapter *adapter = txr->adapter; 1631 if_t ifp = adapter->ifp; 1632 1633 ++txr->tx_irq; 1634 EM_TX_LOCK(txr); 1635 em_txeof(txr); 1636 #ifdef EM_MULTIQUEUE 1637 if (!drbr_empty(ifp, txr->br)) 1638 em_mq_start_locked(ifp, txr); 1639 #else 1640 if (!if_sendq_empty(ifp)) 1641 em_start_locked(ifp, txr); 1642 #endif 1643 1644 /* Reenable this interrupt */ 1645 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); 1646 EM_TX_UNLOCK(txr); 1647 return; 1648 } 1649 1650 /********************************************************************* 1651 * 1652 * MSIX RX Interrupt Service routine 1653 * 1654 **********************************************************************/ 1655 1656 static void 1657 em_msix_rx(void *arg) 1658 { 1659 struct rx_ring *rxr = arg; 1660 struct adapter *adapter = rxr->adapter; 1661 bool more; 1662 1663 ++rxr->rx_irq; 1664 if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)) 1665 return; 1666 more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1667 if (more) 1668 taskqueue_enqueue(rxr->tq, &rxr->rx_task); 1669 else { 1670 /* Reenable this interrupt */ 1671 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); 1672 } 1673 return; 1674 } 1675 1676 /********************************************************************* 1677 * 1678 * MSIX Link Fast Interrupt Service routine 1679 * 1680 **********************************************************************/ 1681 static void 1682 em_msix_link(void *arg) 1683 { 1684 struct adapter *adapter = arg; 1685 u32 reg_icr; 1686 1687 ++adapter->link_irq; 1688 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1689 1690 if (reg_icr & E1000_ICR_RXO) 1691 adapter->rx_overruns++; 1692 1693 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1694 adapter->hw.mac.get_link_status = 1; 1695 em_handle_link(adapter, 0); 1696 } else 1697 E1000_WRITE_REG(&adapter->hw, E1000_IMS, 1698 EM_MSIX_LINK | E1000_IMS_LSC); 1699 /* 1700 ** Because we must read the ICR for this interrupt 1701 ** it may clear other causes using autoclear, for 1702 ** this reason we simply create a soft interrupt 1703 ** for all these vectors. 1704 */ 1705 if (reg_icr) { 1706 E1000_WRITE_REG(&adapter->hw, 1707 E1000_ICS, adapter->ims); 1708 } 1709 return; 1710 } 1711 1712 static void 1713 em_handle_rx(void *context, int pending) 1714 { 1715 struct rx_ring *rxr = context; 1716 struct adapter *adapter = rxr->adapter; 1717 bool more; 1718 1719 more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1720 if (more) 1721 taskqueue_enqueue(rxr->tq, &rxr->rx_task); 1722 else { 1723 /* Reenable this interrupt */ 1724 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); 1725 } 1726 } 1727 1728 static void 1729 em_handle_tx(void *context, int pending) 1730 { 1731 struct tx_ring *txr = context; 1732 struct adapter *adapter = txr->adapter; 1733 if_t ifp = adapter->ifp; 1734 1735 EM_TX_LOCK(txr); 1736 em_txeof(txr); 1737 #ifdef EM_MULTIQUEUE 1738 if (!drbr_empty(ifp, txr->br)) 1739 em_mq_start_locked(ifp, txr); 1740 #else 1741 if (!if_sendq_empty(ifp)) 1742 em_start_locked(ifp, txr); 1743 #endif 1744 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); 1745 EM_TX_UNLOCK(txr); 1746 } 1747 1748 static void 1749 em_handle_link(void *context, int pending) 1750 { 1751 struct adapter *adapter = context; 1752 struct tx_ring *txr = adapter->tx_rings; 1753 if_t ifp = adapter->ifp; 1754 1755 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 1756 return; 1757 1758 EM_CORE_LOCK(adapter); 1759 callout_stop(&adapter->timer); 1760 em_update_link_status(adapter); 1761 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 1762 E1000_WRITE_REG(&adapter->hw, E1000_IMS, 1763 EM_MSIX_LINK | E1000_IMS_LSC); 1764 if (adapter->link_active) { 1765 for (int i = 0; i < adapter->num_queues; i++, txr++) { 1766 EM_TX_LOCK(txr); 1767 #ifdef EM_MULTIQUEUE 1768 if (!drbr_empty(ifp, txr->br)) 1769 em_mq_start_locked(ifp, txr); 1770 #else 1771 if (if_sendq_empty(ifp)) 1772 em_start_locked(ifp, txr); 1773 #endif 1774 EM_TX_UNLOCK(txr); 1775 } 1776 } 1777 EM_CORE_UNLOCK(adapter); 1778 } 1779 1780 1781 /********************************************************************* 1782 * 1783 * Media Ioctl callback 1784 * 1785 * This routine is called whenever the user queries the status of 1786 * the interface using ifconfig. 1787 * 1788 **********************************************************************/ 1789 static void 1790 em_media_status(if_t ifp, struct ifmediareq *ifmr) 1791 { 1792 struct adapter *adapter = if_getsoftc(ifp); 1793 u_char fiber_type = IFM_1000_SX; 1794 1795 INIT_DEBUGOUT("em_media_status: begin"); 1796 1797 EM_CORE_LOCK(adapter); 1798 em_update_link_status(adapter); 1799 1800 ifmr->ifm_status = IFM_AVALID; 1801 ifmr->ifm_active = IFM_ETHER; 1802 1803 if (!adapter->link_active) { 1804 EM_CORE_UNLOCK(adapter); 1805 return; 1806 } 1807 1808 ifmr->ifm_status |= IFM_ACTIVE; 1809 1810 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 1811 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { 1812 ifmr->ifm_active |= fiber_type | IFM_FDX; 1813 } else { 1814 switch (adapter->link_speed) { 1815 case 10: 1816 ifmr->ifm_active |= IFM_10_T; 1817 break; 1818 case 100: 1819 ifmr->ifm_active |= IFM_100_TX; 1820 break; 1821 case 1000: 1822 ifmr->ifm_active |= IFM_1000_T; 1823 break; 1824 } 1825 if (adapter->link_duplex == FULL_DUPLEX) 1826 ifmr->ifm_active |= IFM_FDX; 1827 else 1828 ifmr->ifm_active |= IFM_HDX; 1829 } 1830 EM_CORE_UNLOCK(adapter); 1831 } 1832 1833 /********************************************************************* 1834 * 1835 * Media Ioctl callback 1836 * 1837 * This routine is called when the user changes speed/duplex using 1838 * media/mediopt option with ifconfig. 1839 * 1840 **********************************************************************/ 1841 static int 1842 em_media_change(if_t ifp) 1843 { 1844 struct adapter *adapter = if_getsoftc(ifp); 1845 struct ifmedia *ifm = &adapter->media; 1846 1847 INIT_DEBUGOUT("em_media_change: begin"); 1848 1849 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1850 return (EINVAL); 1851 1852 EM_CORE_LOCK(adapter); 1853 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1854 case IFM_AUTO: 1855 adapter->hw.mac.autoneg = DO_AUTO_NEG; 1856 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1857 break; 1858 case IFM_1000_LX: 1859 case IFM_1000_SX: 1860 case IFM_1000_T: 1861 adapter->hw.mac.autoneg = DO_AUTO_NEG; 1862 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1863 break; 1864 case IFM_100_TX: 1865 adapter->hw.mac.autoneg = FALSE; 1866 adapter->hw.phy.autoneg_advertised = 0; 1867 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1868 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1869 else 1870 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1871 break; 1872 case IFM_10_T: 1873 adapter->hw.mac.autoneg = FALSE; 1874 adapter->hw.phy.autoneg_advertised = 0; 1875 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1876 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1877 else 1878 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1879 break; 1880 default: 1881 device_printf(adapter->dev, "Unsupported media type\n"); 1882 } 1883 1884 em_init_locked(adapter); 1885 EM_CORE_UNLOCK(adapter); 1886 1887 return (0); 1888 } 1889 1890 /********************************************************************* 1891 * 1892 * This routine maps the mbufs to tx descriptors. 1893 * 1894 * return 0 on success, positive on failure 1895 **********************************************************************/ 1896 1897 static int 1898 em_xmit(struct tx_ring *txr, struct mbuf **m_headp) 1899 { 1900 struct adapter *adapter = txr->adapter; 1901 bus_dma_segment_t segs[EM_MAX_SCATTER]; 1902 bus_dmamap_t map; 1903 struct em_txbuffer *tx_buffer, *tx_buffer_mapped; 1904 struct e1000_tx_desc *ctxd = NULL; 1905 struct mbuf *m_head; 1906 struct ether_header *eh; 1907 struct ip *ip = NULL; 1908 struct tcphdr *tp = NULL; 1909 u32 txd_upper = 0, txd_lower = 0; 1910 int ip_off, poff; 1911 int nsegs, i, j, first, last = 0; 1912 int error; 1913 bool do_tso, tso_desc, remap = TRUE; 1914 1915 m_head = *m_headp; 1916 do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO); 1917 tso_desc = FALSE; 1918 ip_off = poff = 0; 1919 1920 /* 1921 * Intel recommends entire IP/TCP header length reside in a single 1922 * buffer. If multiple descriptors are used to describe the IP and 1923 * TCP header, each descriptor should describe one or more 1924 * complete headers; descriptors referencing only parts of headers 1925 * are not supported. If all layer headers are not coalesced into 1926 * a single buffer, each buffer should not cross a 4KB boundary, 1927 * or be larger than the maximum read request size. 1928 * Controller also requires modifing IP/TCP header to make TSO work 1929 * so we firstly get a writable mbuf chain then coalesce ethernet/ 1930 * IP/TCP header into a single buffer to meet the requirement of 1931 * controller. This also simplifies IP/TCP/UDP checksum offloading 1932 * which also has similiar restrictions. 1933 */ 1934 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 1935 if (do_tso || (m_head->m_next != NULL && 1936 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) { 1937 if (M_WRITABLE(*m_headp) == 0) { 1938 m_head = m_dup(*m_headp, M_NOWAIT); 1939 m_freem(*m_headp); 1940 if (m_head == NULL) { 1941 *m_headp = NULL; 1942 return (ENOBUFS); 1943 } 1944 *m_headp = m_head; 1945 } 1946 } 1947 /* 1948 * XXX 1949 * Assume IPv4, we don't have TSO/checksum offload support 1950 * for IPv6 yet. 1951 */ 1952 ip_off = sizeof(struct ether_header); 1953 if (m_head->m_len < ip_off) { 1954 m_head = m_pullup(m_head, ip_off); 1955 if (m_head == NULL) { 1956 *m_headp = NULL; 1957 return (ENOBUFS); 1958 } 1959 } 1960 eh = mtod(m_head, struct ether_header *); 1961 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 1962 ip_off = sizeof(struct ether_vlan_header); 1963 if (m_head->m_len < ip_off) { 1964 m_head = m_pullup(m_head, ip_off); 1965 if (m_head == NULL) { 1966 *m_headp = NULL; 1967 return (ENOBUFS); 1968 } 1969 } 1970 } 1971 if (m_head->m_len < ip_off + sizeof(struct ip)) { 1972 m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); 1973 if (m_head == NULL) { 1974 *m_headp = NULL; 1975 return (ENOBUFS); 1976 } 1977 } 1978 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 1979 poff = ip_off + (ip->ip_hl << 2); 1980 1981 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) { 1982 if (m_head->m_len < poff + sizeof(struct tcphdr)) { 1983 m_head = m_pullup(m_head, poff + 1984 sizeof(struct tcphdr)); 1985 if (m_head == NULL) { 1986 *m_headp = NULL; 1987 return (ENOBUFS); 1988 } 1989 } 1990 tp = (struct tcphdr *)(mtod(m_head, char *) + poff); 1991 /* 1992 * TSO workaround: 1993 * pull 4 more bytes of data into it. 1994 */ 1995 if (m_head->m_len < poff + (tp->th_off << 2)) { 1996 m_head = m_pullup(m_head, poff + 1997 (tp->th_off << 2) + 1998 TSO_WORKAROUND); 1999 if (m_head == NULL) { 2000 *m_headp = NULL; 2001 return (ENOBUFS); 2002 } 2003 } 2004 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 2005 tp = (struct tcphdr *)(mtod(m_head, char *) + poff); 2006 if (do_tso) { 2007 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz + 2008 (ip->ip_hl << 2) + 2009 (tp->th_off << 2)); 2010 ip->ip_sum = 0; 2011 /* 2012 * The pseudo TCP checksum does not include TCP 2013 * payload length so driver should recompute 2014 * the checksum here what hardware expect to 2015 * see. This is adherence of Microsoft's Large 2016 * Send specification. 2017 */ 2018 tp->th_sum = in_pseudo(ip->ip_src.s_addr, 2019 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 2020 } 2021 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { 2022 if (m_head->m_len < poff + sizeof(struct udphdr)) { 2023 m_head = m_pullup(m_head, poff + 2024 sizeof(struct udphdr)); 2025 if (m_head == NULL) { 2026 *m_headp = NULL; 2027 return (ENOBUFS); 2028 } 2029 } 2030 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 2031 } 2032 *m_headp = m_head; 2033 } 2034 2035 /* 2036 * Map the packet for DMA 2037 * 2038 * Capture the first descriptor index, 2039 * this descriptor will have the index 2040 * of the EOP which is the only one that 2041 * now gets a DONE bit writeback. 2042 */ 2043 first = txr->next_avail_desc; 2044 tx_buffer = &txr->tx_buffers[first]; 2045 tx_buffer_mapped = tx_buffer; 2046 map = tx_buffer->map; 2047 2048 retry: 2049 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 2050 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 2051 2052 /* 2053 * There are two types of errors we can (try) to handle: 2054 * - EFBIG means the mbuf chain was too long and bus_dma ran 2055 * out of segments. Defragment the mbuf chain and try again. 2056 * - ENOMEM means bus_dma could not obtain enough bounce buffers 2057 * at this point in time. Defer sending and try again later. 2058 * All other errors, in particular EINVAL, are fatal and prevent the 2059 * mbuf chain from ever going through. Drop it and report error. 2060 */ 2061 if (error == EFBIG && remap) { 2062 struct mbuf *m; 2063 2064 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER); 2065 if (m == NULL) { 2066 adapter->mbuf_defrag_failed++; 2067 m_freem(*m_headp); 2068 *m_headp = NULL; 2069 return (ENOBUFS); 2070 } 2071 *m_headp = m; 2072 2073 /* Try it again, but only once */ 2074 remap = FALSE; 2075 goto retry; 2076 } else if (error != 0) { 2077 adapter->no_tx_dma_setup++; 2078 m_freem(*m_headp); 2079 *m_headp = NULL; 2080 return (error); 2081 } 2082 2083 /* 2084 * TSO Hardware workaround, if this packet is not 2085 * TSO, and is only a single descriptor long, and 2086 * it follows a TSO burst, then we need to add a 2087 * sentinel descriptor to prevent premature writeback. 2088 */ 2089 if ((!do_tso) && (txr->tx_tso == TRUE)) { 2090 if (nsegs == 1) 2091 tso_desc = TRUE; 2092 txr->tx_tso = FALSE; 2093 } 2094 2095 if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) { 2096 txr->no_desc_avail++; 2097 bus_dmamap_unload(txr->txtag, map); 2098 return (ENOBUFS); 2099 } 2100 m_head = *m_headp; 2101 2102 /* Do hardware assists */ 2103 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 2104 em_tso_setup(txr, m_head, ip_off, ip, tp, 2105 &txd_upper, &txd_lower); 2106 /* we need to make a final sentinel transmit desc */ 2107 tso_desc = TRUE; 2108 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) 2109 em_transmit_checksum_setup(txr, m_head, 2110 ip_off, ip, &txd_upper, &txd_lower); 2111 2112 if (m_head->m_flags & M_VLANTAG) { 2113 /* Set the vlan id. */ 2114 txd_upper |= htole16(if_getvtag(m_head)) << 16; 2115 /* Tell hardware to add tag */ 2116 txd_lower |= htole32(E1000_TXD_CMD_VLE); 2117 } 2118 2119 i = txr->next_avail_desc; 2120 2121 /* Set up our transmit descriptors */ 2122 for (j = 0; j < nsegs; j++) { 2123 bus_size_t seg_len; 2124 bus_addr_t seg_addr; 2125 2126 tx_buffer = &txr->tx_buffers[i]; 2127 ctxd = &txr->tx_base[i]; 2128 seg_addr = segs[j].ds_addr; 2129 seg_len = segs[j].ds_len; 2130 /* 2131 ** TSO Workaround: 2132 ** If this is the last descriptor, we want to 2133 ** split it so we have a small final sentinel 2134 */ 2135 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { 2136 seg_len -= TSO_WORKAROUND; 2137 ctxd->buffer_addr = htole64(seg_addr); 2138 ctxd->lower.data = htole32( 2139 adapter->txd_cmd | txd_lower | seg_len); 2140 ctxd->upper.data = htole32(txd_upper); 2141 if (++i == adapter->num_tx_desc) 2142 i = 0; 2143 2144 /* Now make the sentinel */ 2145 txr->tx_avail--; 2146 ctxd = &txr->tx_base[i]; 2147 tx_buffer = &txr->tx_buffers[i]; 2148 ctxd->buffer_addr = 2149 htole64(seg_addr + seg_len); 2150 ctxd->lower.data = htole32( 2151 adapter->txd_cmd | txd_lower | TSO_WORKAROUND); 2152 ctxd->upper.data = 2153 htole32(txd_upper); 2154 last = i; 2155 if (++i == adapter->num_tx_desc) 2156 i = 0; 2157 } else { 2158 ctxd->buffer_addr = htole64(seg_addr); 2159 ctxd->lower.data = htole32( 2160 adapter->txd_cmd | txd_lower | seg_len); 2161 ctxd->upper.data = htole32(txd_upper); 2162 last = i; 2163 if (++i == adapter->num_tx_desc) 2164 i = 0; 2165 } 2166 tx_buffer->m_head = NULL; 2167 tx_buffer->next_eop = -1; 2168 } 2169 2170 txr->next_avail_desc = i; 2171 txr->tx_avail -= nsegs; 2172 2173 tx_buffer->m_head = m_head; 2174 /* 2175 ** Here we swap the map so the last descriptor, 2176 ** which gets the completion interrupt has the 2177 ** real map, and the first descriptor gets the 2178 ** unused map from this descriptor. 2179 */ 2180 tx_buffer_mapped->map = tx_buffer->map; 2181 tx_buffer->map = map; 2182 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 2183 2184 /* 2185 * Last Descriptor of Packet 2186 * needs End Of Packet (EOP) 2187 * and Report Status (RS) 2188 */ 2189 ctxd->lower.data |= 2190 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); 2191 /* 2192 * Keep track in the first buffer which 2193 * descriptor will be written back 2194 */ 2195 tx_buffer = &txr->tx_buffers[first]; 2196 tx_buffer->next_eop = last; 2197 2198 /* 2199 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 2200 * that this frame is available to transmit. 2201 */ 2202 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 2203 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2204 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); 2205 2206 return (0); 2207 } 2208 2209 static void 2210 em_set_promisc(struct adapter *adapter) 2211 { 2212 if_t ifp = adapter->ifp; 2213 u32 reg_rctl; 2214 2215 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2216 2217 if (if_getflags(ifp) & IFF_PROMISC) { 2218 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 2219 /* Turn this on if you want to see bad packets */ 2220 if (em_debug_sbp) 2221 reg_rctl |= E1000_RCTL_SBP; 2222 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2223 } else if (if_getflags(ifp) & IFF_ALLMULTI) { 2224 reg_rctl |= E1000_RCTL_MPE; 2225 reg_rctl &= ~E1000_RCTL_UPE; 2226 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2227 } 2228 } 2229 2230 static void 2231 em_disable_promisc(struct adapter *adapter) 2232 { 2233 if_t ifp = adapter->ifp; 2234 u32 reg_rctl; 2235 int mcnt = 0; 2236 2237 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2238 reg_rctl &= (~E1000_RCTL_UPE); 2239 if (if_getflags(ifp) & IFF_ALLMULTI) 2240 mcnt = MAX_NUM_MULTICAST_ADDRESSES; 2241 else 2242 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); 2243 /* Don't disable if in MAX groups */ 2244 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) 2245 reg_rctl &= (~E1000_RCTL_MPE); 2246 reg_rctl &= (~E1000_RCTL_SBP); 2247 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2248 } 2249 2250 2251 /********************************************************************* 2252 * Multicast Update 2253 * 2254 * This routine is called whenever multicast address list is updated. 2255 * 2256 **********************************************************************/ 2257 2258 static void 2259 em_set_multi(struct adapter *adapter) 2260 { 2261 if_t ifp = adapter->ifp; 2262 u32 reg_rctl = 0; 2263 u8 *mta; /* Multicast array memory */ 2264 int mcnt = 0; 2265 2266 IOCTL_DEBUGOUT("em_set_multi: begin"); 2267 2268 mta = adapter->mta; 2269 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); 2270 2271 if (adapter->hw.mac.type == e1000_82542 && 2272 adapter->hw.revision_id == E1000_REVISION_2) { 2273 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2274 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) 2275 e1000_pci_clear_mwi(&adapter->hw); 2276 reg_rctl |= E1000_RCTL_RST; 2277 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2278 msec_delay(5); 2279 } 2280 2281 if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); 2282 2283 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 2284 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2285 reg_rctl |= E1000_RCTL_MPE; 2286 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2287 } else 2288 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); 2289 2290 if (adapter->hw.mac.type == e1000_82542 && 2291 adapter->hw.revision_id == E1000_REVISION_2) { 2292 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2293 reg_rctl &= ~E1000_RCTL_RST; 2294 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2295 msec_delay(5); 2296 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) 2297 e1000_pci_set_mwi(&adapter->hw); 2298 } 2299 } 2300 2301 2302 /********************************************************************* 2303 * Timer routine 2304 * 2305 * This routine checks for link status and updates statistics. 2306 * 2307 **********************************************************************/ 2308 2309 static void 2310 em_local_timer(void *arg) 2311 { 2312 struct adapter *adapter = arg; 2313 if_t ifp = adapter->ifp; 2314 struct tx_ring *txr = adapter->tx_rings; 2315 struct rx_ring *rxr = adapter->rx_rings; 2316 u32 trigger = 0; 2317 2318 EM_CORE_LOCK_ASSERT(adapter); 2319 2320 em_update_link_status(adapter); 2321 em_update_stats_counters(adapter); 2322 2323 /* Reset LAA into RAR[0] on 82571 */ 2324 if ((adapter->hw.mac.type == e1000_82571) && 2325 e1000_get_laa_state_82571(&adapter->hw)) 2326 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); 2327 2328 /* Mask to use in the irq trigger */ 2329 if (adapter->msix_mem) { 2330 for (int i = 0; i < adapter->num_queues; i++, rxr++) 2331 trigger |= rxr->ims; 2332 rxr = adapter->rx_rings; 2333 } else 2334 trigger = E1000_ICS_RXDMT0; 2335 2336 /* 2337 ** Check on the state of the TX queue(s), this 2338 ** can be done without the lock because its RO 2339 ** and the HUNG state will be static if set. 2340 */ 2341 for (int i = 0; i < adapter->num_queues; i++, txr++) { 2342 if (txr->busy == EM_TX_HUNG) 2343 goto hung; 2344 if (txr->busy >= EM_TX_MAXTRIES) 2345 txr->busy = EM_TX_HUNG; 2346 /* Schedule a TX tasklet if needed */ 2347 if (txr->tx_avail <= EM_MAX_SCATTER) 2348 taskqueue_enqueue(txr->tq, &txr->tx_task); 2349 } 2350 2351 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 2352 #ifndef DEVICE_POLLING 2353 /* Trigger an RX interrupt to guarantee mbuf refresh */ 2354 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); 2355 #endif 2356 return; 2357 hung: 2358 /* Looks like we're hung */ 2359 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n", 2360 txr->me); 2361 em_print_debug_info(adapter); 2362 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 2363 adapter->watchdog_events++; 2364 em_init_locked(adapter); 2365 } 2366 2367 2368 static void 2369 em_update_link_status(struct adapter *adapter) 2370 { 2371 struct e1000_hw *hw = &adapter->hw; 2372 if_t ifp = adapter->ifp; 2373 device_t dev = adapter->dev; 2374 struct tx_ring *txr = adapter->tx_rings; 2375 u32 link_check = 0; 2376 2377 /* Get the cached link value or read phy for real */ 2378 switch (hw->phy.media_type) { 2379 case e1000_media_type_copper: 2380 if (hw->mac.get_link_status) { 2381 if (hw->mac.type == e1000_pch_spt) 2382 msec_delay(50); 2383 /* Do the work to read phy */ 2384 e1000_check_for_link(hw); 2385 link_check = !hw->mac.get_link_status; 2386 if (link_check) /* ESB2 fix */ 2387 e1000_cfg_on_link_up(hw); 2388 } else 2389 link_check = TRUE; 2390 break; 2391 case e1000_media_type_fiber: 2392 e1000_check_for_link(hw); 2393 link_check = (E1000_READ_REG(hw, E1000_STATUS) & 2394 E1000_STATUS_LU); 2395 break; 2396 case e1000_media_type_internal_serdes: 2397 e1000_check_for_link(hw); 2398 link_check = adapter->hw.mac.serdes_has_link; 2399 break; 2400 default: 2401 case e1000_media_type_unknown: 2402 break; 2403 } 2404 2405 /* Now check for a transition */ 2406 if (link_check && (adapter->link_active == 0)) { 2407 e1000_get_speed_and_duplex(hw, &adapter->link_speed, 2408 &adapter->link_duplex); 2409 /* Check if we must disable SPEED_MODE bit on PCI-E */ 2410 if ((adapter->link_speed != SPEED_1000) && 2411 ((hw->mac.type == e1000_82571) || 2412 (hw->mac.type == e1000_82572))) { 2413 int tarc0; 2414 tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); 2415 tarc0 &= ~TARC_SPEED_MODE_BIT; 2416 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); 2417 } 2418 if (bootverbose) 2419 device_printf(dev, "Link is up %d Mbps %s\n", 2420 adapter->link_speed, 2421 ((adapter->link_duplex == FULL_DUPLEX) ? 2422 "Full Duplex" : "Half Duplex")); 2423 adapter->link_active = 1; 2424 adapter->smartspeed = 0; 2425 if_setbaudrate(ifp, adapter->link_speed * 1000000); 2426 if_link_state_change(ifp, LINK_STATE_UP); 2427 } else if (!link_check && (adapter->link_active == 1)) { 2428 if_setbaudrate(ifp, 0); 2429 adapter->link_speed = 0; 2430 adapter->link_duplex = 0; 2431 if (bootverbose) 2432 device_printf(dev, "Link is Down\n"); 2433 adapter->link_active = 0; 2434 /* Link down, disable hang detection */ 2435 for (int i = 0; i < adapter->num_queues; i++, txr++) 2436 txr->busy = EM_TX_IDLE; 2437 if_link_state_change(ifp, LINK_STATE_DOWN); 2438 } 2439 } 2440 2441 /********************************************************************* 2442 * 2443 * This routine disables all traffic on the adapter by issuing a 2444 * global reset on the MAC and deallocates TX/RX buffers. 2445 * 2446 * This routine should always be called with BOTH the CORE 2447 * and TX locks. 2448 **********************************************************************/ 2449 2450 static void 2451 em_stop(void *arg) 2452 { 2453 struct adapter *adapter = arg; 2454 if_t ifp = adapter->ifp; 2455 struct tx_ring *txr = adapter->tx_rings; 2456 2457 EM_CORE_LOCK_ASSERT(adapter); 2458 2459 INIT_DEBUGOUT("em_stop: begin"); 2460 2461 em_disable_intr(adapter); 2462 callout_stop(&adapter->timer); 2463 2464 /* Tell the stack that the interface is no longer active */ 2465 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2466 2467 /* Disarm Hang Detection. */ 2468 for (int i = 0; i < adapter->num_queues; i++, txr++) { 2469 EM_TX_LOCK(txr); 2470 txr->busy = EM_TX_IDLE; 2471 EM_TX_UNLOCK(txr); 2472 } 2473 2474 /* I219 needs some special flushing to avoid hangs */ 2475 if (adapter->hw.mac.type == e1000_pch_spt) 2476 em_flush_desc_rings(adapter); 2477 2478 e1000_reset_hw(&adapter->hw); 2479 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); 2480 2481 e1000_led_off(&adapter->hw); 2482 e1000_cleanup_led(&adapter->hw); 2483 } 2484 2485 2486 /********************************************************************* 2487 * 2488 * Determine hardware revision. 2489 * 2490 **********************************************************************/ 2491 static void 2492 em_identify_hardware(struct adapter *adapter) 2493 { 2494 device_t dev = adapter->dev; 2495 2496 /* Make sure our PCI config space has the necessary stuff set */ 2497 pci_enable_busmaster(dev); 2498 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 2499 2500 /* Save off the information about this board */ 2501 adapter->hw.vendor_id = pci_get_vendor(dev); 2502 adapter->hw.device_id = pci_get_device(dev); 2503 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 2504 adapter->hw.subsystem_vendor_id = 2505 pci_read_config(dev, PCIR_SUBVEND_0, 2); 2506 adapter->hw.subsystem_device_id = 2507 pci_read_config(dev, PCIR_SUBDEV_0, 2); 2508 2509 /* Do Shared Code Init and Setup */ 2510 if (e1000_set_mac_type(&adapter->hw)) { 2511 device_printf(dev, "Setup init failure\n"); 2512 return; 2513 } 2514 } 2515 2516 static int 2517 em_allocate_pci_resources(struct adapter *adapter) 2518 { 2519 device_t dev = adapter->dev; 2520 int rid; 2521 2522 rid = PCIR_BAR(0); 2523 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 2524 &rid, RF_ACTIVE); 2525 if (adapter->memory == NULL) { 2526 device_printf(dev, "Unable to allocate bus resource: memory\n"); 2527 return (ENXIO); 2528 } 2529 adapter->osdep.mem_bus_space_tag = 2530 rman_get_bustag(adapter->memory); 2531 adapter->osdep.mem_bus_space_handle = 2532 rman_get_bushandle(adapter->memory); 2533 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; 2534 2535 adapter->hw.back = &adapter->osdep; 2536 2537 return (0); 2538 } 2539 2540 /********************************************************************* 2541 * 2542 * Setup the Legacy or MSI Interrupt handler 2543 * 2544 **********************************************************************/ 2545 int 2546 em_allocate_legacy(struct adapter *adapter) 2547 { 2548 device_t dev = adapter->dev; 2549 struct tx_ring *txr = adapter->tx_rings; 2550 int error, rid = 0; 2551 2552 /* Manually turn off all interrupts */ 2553 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 2554 2555 if (adapter->msix == 1) /* using MSI */ 2556 rid = 1; 2557 /* We allocate a single interrupt resource */ 2558 adapter->res = bus_alloc_resource_any(dev, 2559 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); 2560 if (adapter->res == NULL) { 2561 device_printf(dev, "Unable to allocate bus resource: " 2562 "interrupt\n"); 2563 return (ENXIO); 2564 } 2565 2566 /* 2567 * Allocate a fast interrupt and the associated 2568 * deferred processing contexts. 2569 */ 2570 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); 2571 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, 2572 taskqueue_thread_enqueue, &adapter->tq); 2573 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que", 2574 device_get_nameunit(adapter->dev)); 2575 /* Use a TX only tasklet for local timer */ 2576 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); 2577 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, 2578 taskqueue_thread_enqueue, &txr->tq); 2579 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", 2580 device_get_nameunit(adapter->dev)); 2581 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); 2582 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, 2583 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { 2584 device_printf(dev, "Failed to register fast interrupt " 2585 "handler: %d\n", error); 2586 taskqueue_free(adapter->tq); 2587 adapter->tq = NULL; 2588 return (error); 2589 } 2590 2591 return (0); 2592 } 2593 2594 /********************************************************************* 2595 * 2596 * Setup the MSIX Interrupt handlers 2597 * This is not really Multiqueue, rather 2598 * its just seperate interrupt vectors 2599 * for TX, RX, and Link. 2600 * 2601 **********************************************************************/ 2602 int 2603 em_allocate_msix(struct adapter *adapter) 2604 { 2605 device_t dev = adapter->dev; 2606 struct tx_ring *txr = adapter->tx_rings; 2607 struct rx_ring *rxr = adapter->rx_rings; 2608 int error, rid, vector = 0; 2609 int cpu_id = 0; 2610 2611 2612 /* Make sure all interrupts are disabled */ 2613 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 2614 2615 /* First set up ring resources */ 2616 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) { 2617 2618 /* RX ring */ 2619 rid = vector + 1; 2620 2621 rxr->res = bus_alloc_resource_any(dev, 2622 SYS_RES_IRQ, &rid, RF_ACTIVE); 2623 if (rxr->res == NULL) { 2624 device_printf(dev, 2625 "Unable to allocate bus resource: " 2626 "RX MSIX Interrupt %d\n", i); 2627 return (ENXIO); 2628 } 2629 if ((error = bus_setup_intr(dev, rxr->res, 2630 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, 2631 rxr, &rxr->tag)) != 0) { 2632 device_printf(dev, "Failed to register RX handler"); 2633 return (error); 2634 } 2635 #if __FreeBSD_version >= 800504 2636 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i); 2637 #endif 2638 rxr->msix = vector; 2639 2640 if (em_last_bind_cpu < 0) 2641 em_last_bind_cpu = CPU_FIRST(); 2642 cpu_id = em_last_bind_cpu; 2643 bus_bind_intr(dev, rxr->res, cpu_id); 2644 2645 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); 2646 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, 2647 taskqueue_thread_enqueue, &rxr->tq); 2648 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)", 2649 device_get_nameunit(adapter->dev), cpu_id); 2650 /* 2651 ** Set the bit to enable interrupt 2652 ** in E1000_IMS -- bits 20 and 21 2653 ** are for RX0 and RX1, note this has 2654 ** NOTHING to do with the MSIX vector 2655 */ 2656 rxr->ims = 1 << (20 + i); 2657 adapter->ims |= rxr->ims; 2658 adapter->ivars |= (8 | rxr->msix) << (i * 4); 2659 2660 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); 2661 } 2662 2663 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) { 2664 /* TX ring */ 2665 rid = vector + 1; 2666 txr->res = bus_alloc_resource_any(dev, 2667 SYS_RES_IRQ, &rid, RF_ACTIVE); 2668 if (txr->res == NULL) { 2669 device_printf(dev, 2670 "Unable to allocate bus resource: " 2671 "TX MSIX Interrupt %d\n", i); 2672 return (ENXIO); 2673 } 2674 if ((error = bus_setup_intr(dev, txr->res, 2675 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, 2676 txr, &txr->tag)) != 0) { 2677 device_printf(dev, "Failed to register TX handler"); 2678 return (error); 2679 } 2680 #if __FreeBSD_version >= 800504 2681 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i); 2682 #endif 2683 txr->msix = vector; 2684 2685 if (em_last_bind_cpu < 0) 2686 em_last_bind_cpu = CPU_FIRST(); 2687 cpu_id = em_last_bind_cpu; 2688 bus_bind_intr(dev, txr->res, cpu_id); 2689 2690 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); 2691 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, 2692 taskqueue_thread_enqueue, &txr->tq); 2693 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)", 2694 device_get_nameunit(adapter->dev), cpu_id); 2695 /* 2696 ** Set the bit to enable interrupt 2697 ** in E1000_IMS -- bits 22 and 23 2698 ** are for TX0 and TX1, note this has 2699 ** NOTHING to do with the MSIX vector 2700 */ 2701 txr->ims = 1 << (22 + i); 2702 adapter->ims |= txr->ims; 2703 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); 2704 2705 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); 2706 } 2707 2708 /* Link interrupt */ 2709 rid = vector + 1; 2710 adapter->res = bus_alloc_resource_any(dev, 2711 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); 2712 if (!adapter->res) { 2713 device_printf(dev,"Unable to allocate " 2714 "bus resource: Link interrupt [%d]\n", rid); 2715 return (ENXIO); 2716 } 2717 /* Set the link handler function */ 2718 error = bus_setup_intr(dev, adapter->res, 2719 INTR_TYPE_NET | INTR_MPSAFE, NULL, 2720 em_msix_link, adapter, &adapter->tag); 2721 if (error) { 2722 adapter->res = NULL; 2723 device_printf(dev, "Failed to register LINK handler"); 2724 return (error); 2725 } 2726 #if __FreeBSD_version >= 800504 2727 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); 2728 #endif 2729 adapter->linkvec = vector; 2730 adapter->ivars |= (8 | vector) << 16; 2731 adapter->ivars |= 0x80000000; 2732 2733 return (0); 2734 } 2735 2736 2737 static void 2738 em_free_pci_resources(struct adapter *adapter) 2739 { 2740 device_t dev = adapter->dev; 2741 struct tx_ring *txr; 2742 struct rx_ring *rxr; 2743 int rid; 2744 2745 2746 /* 2747 ** Release all the queue interrupt resources: 2748 */ 2749 for (int i = 0; i < adapter->num_queues; i++) { 2750 txr = &adapter->tx_rings[i]; 2751 /* an early abort? */ 2752 if (txr == NULL) 2753 break; 2754 rid = txr->msix +1; 2755 if (txr->tag != NULL) { 2756 bus_teardown_intr(dev, txr->res, txr->tag); 2757 txr->tag = NULL; 2758 } 2759 if (txr->res != NULL) 2760 bus_release_resource(dev, SYS_RES_IRQ, 2761 rid, txr->res); 2762 2763 rxr = &adapter->rx_rings[i]; 2764 /* an early abort? */ 2765 if (rxr == NULL) 2766 break; 2767 rid = rxr->msix +1; 2768 if (rxr->tag != NULL) { 2769 bus_teardown_intr(dev, rxr->res, rxr->tag); 2770 rxr->tag = NULL; 2771 } 2772 if (rxr->res != NULL) 2773 bus_release_resource(dev, SYS_RES_IRQ, 2774 rid, rxr->res); 2775 } 2776 2777 if (adapter->linkvec) /* we are doing MSIX */ 2778 rid = adapter->linkvec + 1; 2779 else 2780 (adapter->msix != 0) ? (rid = 1):(rid = 0); 2781 2782 if (adapter->tag != NULL) { 2783 bus_teardown_intr(dev, adapter->res, adapter->tag); 2784 adapter->tag = NULL; 2785 } 2786 2787 if (adapter->res != NULL) 2788 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); 2789 2790 2791 if (adapter->msix) 2792 pci_release_msi(dev); 2793 2794 if (adapter->msix_mem != NULL) 2795 bus_release_resource(dev, SYS_RES_MEMORY, 2796 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); 2797 2798 if (adapter->memory != NULL) 2799 bus_release_resource(dev, SYS_RES_MEMORY, 2800 PCIR_BAR(0), adapter->memory); 2801 2802 if (adapter->flash != NULL) 2803 bus_release_resource(dev, SYS_RES_MEMORY, 2804 EM_FLASH, adapter->flash); 2805 } 2806 2807 /* 2808 * Setup MSI or MSI/X 2809 */ 2810 static int 2811 em_setup_msix(struct adapter *adapter) 2812 { 2813 device_t dev = adapter->dev; 2814 int val; 2815 2816 /* Nearly always going to use one queue */ 2817 adapter->num_queues = 1; 2818 2819 /* 2820 ** Try using MSI-X for Hartwell adapters 2821 */ 2822 if ((adapter->hw.mac.type == e1000_82574) && 2823 (em_enable_msix == TRUE)) { 2824 #ifdef EM_MULTIQUEUE 2825 adapter->num_queues = (em_num_queues == 1) ? 1 : 2; 2826 if (adapter->num_queues > 1) 2827 em_enable_vectors_82574(adapter); 2828 #endif 2829 /* Map the MSIX BAR */ 2830 int rid = PCIR_BAR(EM_MSIX_BAR); 2831 adapter->msix_mem = bus_alloc_resource_any(dev, 2832 SYS_RES_MEMORY, &rid, RF_ACTIVE); 2833 if (adapter->msix_mem == NULL) { 2834 /* May not be enabled */ 2835 device_printf(adapter->dev, 2836 "Unable to map MSIX table \n"); 2837 goto msi; 2838 } 2839 val = pci_msix_count(dev); 2840 2841 #ifdef EM_MULTIQUEUE 2842 /* We need 5 vectors in the multiqueue case */ 2843 if (adapter->num_queues > 1 ) { 2844 if (val >= 5) 2845 val = 5; 2846 else { 2847 adapter->num_queues = 1; 2848 device_printf(adapter->dev, 2849 "Insufficient MSIX vectors for >1 queue, " 2850 "using single queue...\n"); 2851 goto msix_one; 2852 } 2853 } else { 2854 msix_one: 2855 #endif 2856 if (val >= 3) 2857 val = 3; 2858 else { 2859 device_printf(adapter->dev, 2860 "Insufficient MSIX vectors, using MSI\n"); 2861 goto msi; 2862 } 2863 #ifdef EM_MULTIQUEUE 2864 } 2865 #endif 2866 2867 if ((pci_alloc_msix(dev, &val) == 0)) { 2868 device_printf(adapter->dev, 2869 "Using MSIX interrupts " 2870 "with %d vectors\n", val); 2871 return (val); 2872 } 2873 2874 /* 2875 ** If MSIX alloc failed or provided us with 2876 ** less than needed, free and fall through to MSI 2877 */ 2878 pci_release_msi(dev); 2879 } 2880 msi: 2881 if (adapter->msix_mem != NULL) { 2882 bus_release_resource(dev, SYS_RES_MEMORY, 2883 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); 2884 adapter->msix_mem = NULL; 2885 } 2886 val = 1; 2887 if (pci_alloc_msi(dev, &val) == 0) { 2888 device_printf(adapter->dev, "Using an MSI interrupt\n"); 2889 return (val); 2890 } 2891 /* Should only happen due to manual configuration */ 2892 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n"); 2893 return (0); 2894 } 2895 2896 2897 /* 2898 ** The 3 following flush routines are used as a workaround in the 2899 ** I219 client parts and only for them. 2900 ** 2901 ** em_flush_tx_ring - remove all descriptors from the tx_ring 2902 ** 2903 ** We want to clear all pending descriptors from the TX ring. 2904 ** zeroing happens when the HW reads the regs. We assign the ring itself as 2905 ** the data of the next descriptor. We don't care about the data we are about 2906 ** to reset the HW. 2907 */ 2908 static void 2909 em_flush_tx_ring(struct adapter *adapter) 2910 { 2911 struct e1000_hw *hw = &adapter->hw; 2912 struct tx_ring *txr = adapter->tx_rings; 2913 struct e1000_tx_desc *txd; 2914 u32 tctl, txd_lower = E1000_TXD_CMD_IFCS; 2915 u16 size = 512; 2916 2917 tctl = E1000_READ_REG(hw, E1000_TCTL); 2918 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN); 2919 2920 txd = &txr->tx_base[txr->next_avail_desc++]; 2921 if (txr->next_avail_desc == adapter->num_tx_desc) 2922 txr->next_avail_desc = 0; 2923 2924 /* Just use the ring as a dummy buffer addr */ 2925 txd->buffer_addr = txr->txdma.dma_paddr; 2926 txd->lower.data = htole32(txd_lower | size); 2927 txd->upper.data = 0; 2928 2929 /* flush descriptors to memory before notifying the HW */ 2930 wmb(); 2931 2932 E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc); 2933 mb(); 2934 usec_delay(250); 2935 } 2936 2937 /* 2938 ** em_flush_rx_ring - remove all descriptors from the rx_ring 2939 ** 2940 ** Mark all descriptors in the RX ring as consumed and disable the rx ring 2941 */ 2942 static void 2943 em_flush_rx_ring(struct adapter *adapter) 2944 { 2945 struct e1000_hw *hw = &adapter->hw; 2946 u32 rctl, rxdctl; 2947 2948 rctl = E1000_READ_REG(hw, E1000_RCTL); 2949 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2950 E1000_WRITE_FLUSH(hw); 2951 usec_delay(150); 2952 2953 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); 2954 /* zero the lower 14 bits (prefetch and host thresholds) */ 2955 rxdctl &= 0xffffc000; 2956 /* 2957 * update thresholds: prefetch threshold to 31, host threshold to 1 2958 * and make sure the granularity is "descriptors" and not "cache lines" 2959 */ 2960 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC); 2961 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl); 2962 2963 /* momentarily enable the RX ring for the changes to take effect */ 2964 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN); 2965 E1000_WRITE_FLUSH(hw); 2966 usec_delay(150); 2967 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2968 } 2969 2970 /* 2971 ** em_flush_desc_rings - remove all descriptors from the descriptor rings 2972 ** 2973 ** In i219, the descriptor rings must be emptied before resetting the HW 2974 ** or before changing the device state to D3 during runtime (runtime PM). 2975 ** 2976 ** Failure to do this will cause the HW to enter a unit hang state which can 2977 ** only be released by PCI reset on the device 2978 ** 2979 */ 2980 static void 2981 em_flush_desc_rings(struct adapter *adapter) 2982 { 2983 struct e1000_hw *hw = &adapter->hw; 2984 device_t dev = adapter->dev; 2985 u16 hang_state; 2986 u32 fext_nvm11, tdlen; 2987 2988 /* First, disable MULR fix in FEXTNVM11 */ 2989 fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11); 2990 fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX; 2991 E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11); 2992 2993 /* do nothing if we're not in faulty state, or if the queue is empty */ 2994 tdlen = E1000_READ_REG(hw, E1000_TDLEN(0)); 2995 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); 2996 if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen) 2997 return; 2998 em_flush_tx_ring(adapter); 2999 3000 /* recheck, maybe the fault is caused by the rx ring */ 3001 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); 3002 if (hang_state & FLUSH_DESC_REQUIRED) 3003 em_flush_rx_ring(adapter); 3004 } 3005 3006 3007 /********************************************************************* 3008 * 3009 * Initialize the hardware to a configuration 3010 * as specified by the adapter structure. 3011 * 3012 **********************************************************************/ 3013 static void 3014 em_reset(struct adapter *adapter) 3015 { 3016 device_t dev = adapter->dev; 3017 if_t ifp = adapter->ifp; 3018 struct e1000_hw *hw = &adapter->hw; 3019 u16 rx_buffer_size; 3020 u32 pba; 3021 3022 INIT_DEBUGOUT("em_reset: begin"); 3023 3024 /* Set up smart power down as default off on newer adapters. */ 3025 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || 3026 hw->mac.type == e1000_82572)) { 3027 u16 phy_tmp = 0; 3028 3029 /* Speed up time to link by disabling smart power down. */ 3030 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); 3031 phy_tmp &= ~IGP02E1000_PM_SPD; 3032 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); 3033 } 3034 3035 /* 3036 * Packet Buffer Allocation (PBA) 3037 * Writing PBA sets the receive portion of the buffer 3038 * the remainder is used for the transmit buffer. 3039 */ 3040 switch (hw->mac.type) { 3041 /* Total Packet Buffer on these is 48K */ 3042 case e1000_82571: 3043 case e1000_82572: 3044 case e1000_80003es2lan: 3045 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ 3046 break; 3047 case e1000_82573: /* 82573: Total Packet Buffer is 32K */ 3048 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ 3049 break; 3050 case e1000_82574: 3051 case e1000_82583: 3052 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ 3053 break; 3054 case e1000_ich8lan: 3055 pba = E1000_PBA_8K; 3056 break; 3057 case e1000_ich9lan: 3058 case e1000_ich10lan: 3059 /* Boost Receive side for jumbo frames */ 3060 if (adapter->hw.mac.max_frame_size > 4096) 3061 pba = E1000_PBA_14K; 3062 else 3063 pba = E1000_PBA_10K; 3064 break; 3065 case e1000_pchlan: 3066 case e1000_pch2lan: 3067 case e1000_pch_lpt: 3068 case e1000_pch_spt: 3069 pba = E1000_PBA_26K; 3070 break; 3071 default: 3072 if (adapter->hw.mac.max_frame_size > 8192) 3073 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ 3074 else 3075 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ 3076 } 3077 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); 3078 3079 /* 3080 * These parameters control the automatic generation (Tx) and 3081 * response (Rx) to Ethernet PAUSE frames. 3082 * - High water mark should allow for at least two frames to be 3083 * received after sending an XOFF. 3084 * - Low water mark works best when it is very near the high water mark. 3085 * This allows the receiver to restart by sending XON when it has 3086 * drained a bit. Here we use an arbitary value of 1500 which will 3087 * restart after one full frame is pulled from the buffer. There 3088 * could be several smaller frames in the buffer and if so they will 3089 * not trigger the XON until their total number reduces the buffer 3090 * by 1500. 3091 * - The pause time is fairly large at 1000 x 512ns = 512 usec. 3092 */ 3093 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 ); 3094 hw->fc.high_water = rx_buffer_size - 3095 roundup2(adapter->hw.mac.max_frame_size, 1024); 3096 hw->fc.low_water = hw->fc.high_water - 1500; 3097 3098 if (adapter->fc) /* locally set flow control value? */ 3099 hw->fc.requested_mode = adapter->fc; 3100 else 3101 hw->fc.requested_mode = e1000_fc_full; 3102 3103 if (hw->mac.type == e1000_80003es2lan) 3104 hw->fc.pause_time = 0xFFFF; 3105 else 3106 hw->fc.pause_time = EM_FC_PAUSE_TIME; 3107 3108 hw->fc.send_xon = TRUE; 3109 3110 /* Device specific overrides/settings */ 3111 switch (hw->mac.type) { 3112 case e1000_pchlan: 3113 /* Workaround: no TX flow ctrl for PCH */ 3114 hw->fc.requested_mode = e1000_fc_rx_pause; 3115 hw->fc.pause_time = 0xFFFF; /* override */ 3116 if (if_getmtu(ifp) > ETHERMTU) { 3117 hw->fc.high_water = 0x3500; 3118 hw->fc.low_water = 0x1500; 3119 } else { 3120 hw->fc.high_water = 0x5000; 3121 hw->fc.low_water = 0x3000; 3122 } 3123 hw->fc.refresh_time = 0x1000; 3124 break; 3125 case e1000_pch2lan: 3126 case e1000_pch_lpt: 3127 case e1000_pch_spt: 3128 hw->fc.high_water = 0x5C20; 3129 hw->fc.low_water = 0x5048; 3130 hw->fc.pause_time = 0x0650; 3131 hw->fc.refresh_time = 0x0400; 3132 /* Jumbos need adjusted PBA */ 3133 if (if_getmtu(ifp) > ETHERMTU) 3134 E1000_WRITE_REG(hw, E1000_PBA, 12); 3135 else 3136 E1000_WRITE_REG(hw, E1000_PBA, 26); 3137 break; 3138 case e1000_ich9lan: 3139 case e1000_ich10lan: 3140 if (if_getmtu(ifp) > ETHERMTU) { 3141 hw->fc.high_water = 0x2800; 3142 hw->fc.low_water = hw->fc.high_water - 8; 3143 break; 3144 } 3145 /* else fall thru */ 3146 default: 3147 if (hw->mac.type == e1000_80003es2lan) 3148 hw->fc.pause_time = 0xFFFF; 3149 break; 3150 } 3151 3152 /* I219 needs some special flushing to avoid hangs */ 3153 if (hw->mac.type == e1000_pch_spt) 3154 em_flush_desc_rings(adapter); 3155 3156 /* Issue a global reset */ 3157 e1000_reset_hw(hw); 3158 E1000_WRITE_REG(hw, E1000_WUC, 0); 3159 em_disable_aspm(adapter); 3160 /* and a re-init */ 3161 if (e1000_init_hw(hw) < 0) { 3162 device_printf(dev, "Hardware Initialization Failed\n"); 3163 return; 3164 } 3165 3166 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); 3167 e1000_get_phy_info(hw); 3168 e1000_check_for_link(hw); 3169 return; 3170 } 3171 3172 /********************************************************************* 3173 * 3174 * Setup networking device structure and register an interface. 3175 * 3176 **********************************************************************/ 3177 static int 3178 em_setup_interface(device_t dev, struct adapter *adapter) 3179 { 3180 if_t ifp; 3181 3182 INIT_DEBUGOUT("em_setup_interface: begin"); 3183 3184 ifp = adapter->ifp = if_gethandle(IFT_ETHER); 3185 if (ifp == 0) { 3186 device_printf(dev, "can not allocate ifnet structure\n"); 3187 return (-1); 3188 } 3189 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3190 if_setdev(ifp, dev); 3191 if_setinitfn(ifp, em_init); 3192 if_setsoftc(ifp, adapter); 3193 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 3194 if_setioctlfn(ifp, em_ioctl); 3195 if_setgetcounterfn(ifp, em_get_counter); 3196 3197 /* TSO parameters */ 3198 ifp->if_hw_tsomax = IP_MAXPACKET; 3199 /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */ 3200 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5; 3201 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE; 3202 3203 #ifdef EM_MULTIQUEUE 3204 /* Multiqueue stack interface */ 3205 if_settransmitfn(ifp, em_mq_start); 3206 if_setqflushfn(ifp, em_qflush); 3207 #else 3208 if_setstartfn(ifp, em_start); 3209 if_setsendqlen(ifp, adapter->num_tx_desc - 1); 3210 if_setsendqready(ifp); 3211 #endif 3212 3213 ether_ifattach(ifp, adapter->hw.mac.addr); 3214 3215 if_setcapabilities(ifp, 0); 3216 if_setcapenable(ifp, 0); 3217 3218 3219 if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | 3220 IFCAP_TSO4, 0); 3221 /* 3222 * Tell the upper layer(s) we 3223 * support full VLAN capability 3224 */ 3225 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 3226 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | 3227 IFCAP_VLAN_MTU, 0); 3228 if_setcapenable(ifp, if_getcapabilities(ifp)); 3229 3230 /* 3231 ** Don't turn this on by default, if vlans are 3232 ** created on another pseudo device (eg. lagg) 3233 ** then vlan events are not passed thru, breaking 3234 ** operation, but with HW FILTER off it works. If 3235 ** using vlans directly on the em driver you can 3236 ** enable this and get full hardware tag filtering. 3237 */ 3238 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0); 3239 3240 #ifdef DEVICE_POLLING 3241 if_setcapabilitiesbit(ifp, IFCAP_POLLING,0); 3242 #endif 3243 3244 /* Enable only WOL MAGIC by default */ 3245 if (adapter->wol) { 3246 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0); 3247 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0); 3248 } 3249 3250 /* 3251 * Specify the media types supported by this adapter and register 3252 * callbacks to update media and link information 3253 */ 3254 ifmedia_init(&adapter->media, IFM_IMASK, 3255 em_media_change, em_media_status); 3256 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 3257 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { 3258 u_char fiber_type = IFM_1000_SX; /* default type */ 3259 3260 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 3261 0, NULL); 3262 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); 3263 } else { 3264 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); 3265 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 3266 0, NULL); 3267 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 3268 0, NULL); 3269 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 3270 0, NULL); 3271 if (adapter->hw.phy.type != e1000_phy_ife) { 3272 ifmedia_add(&adapter->media, 3273 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 3274 ifmedia_add(&adapter->media, 3275 IFM_ETHER | IFM_1000_T, 0, NULL); 3276 } 3277 } 3278 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); 3279 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); 3280 return (0); 3281 } 3282 3283 3284 /* 3285 * Manage DMA'able memory. 3286 */ 3287 static void 3288 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 3289 { 3290 if (error) 3291 return; 3292 *(bus_addr_t *) arg = segs[0].ds_addr; 3293 } 3294 3295 static int 3296 em_dma_malloc(struct adapter *adapter, bus_size_t size, 3297 struct em_dma_alloc *dma, int mapflags) 3298 { 3299 int error; 3300 3301 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 3302 EM_DBA_ALIGN, 0, /* alignment, bounds */ 3303 BUS_SPACE_MAXADDR, /* lowaddr */ 3304 BUS_SPACE_MAXADDR, /* highaddr */ 3305 NULL, NULL, /* filter, filterarg */ 3306 size, /* maxsize */ 3307 1, /* nsegments */ 3308 size, /* maxsegsize */ 3309 0, /* flags */ 3310 NULL, /* lockfunc */ 3311 NULL, /* lockarg */ 3312 &dma->dma_tag); 3313 if (error) { 3314 device_printf(adapter->dev, 3315 "%s: bus_dma_tag_create failed: %d\n", 3316 __func__, error); 3317 goto fail_0; 3318 } 3319 3320 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, 3321 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); 3322 if (error) { 3323 device_printf(adapter->dev, 3324 "%s: bus_dmamem_alloc(%ju) failed: %d\n", 3325 __func__, (uintmax_t)size, error); 3326 goto fail_2; 3327 } 3328 3329 dma->dma_paddr = 0; 3330 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 3331 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); 3332 if (error || dma->dma_paddr == 0) { 3333 device_printf(adapter->dev, 3334 "%s: bus_dmamap_load failed: %d\n", 3335 __func__, error); 3336 goto fail_3; 3337 } 3338 3339 return (0); 3340 3341 fail_3: 3342 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 3343 fail_2: 3344 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 3345 bus_dma_tag_destroy(dma->dma_tag); 3346 fail_0: 3347 dma->dma_tag = NULL; 3348 3349 return (error); 3350 } 3351 3352 static void 3353 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) 3354 { 3355 if (dma->dma_tag == NULL) 3356 return; 3357 if (dma->dma_paddr != 0) { 3358 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 3359 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 3360 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 3361 dma->dma_paddr = 0; 3362 } 3363 if (dma->dma_vaddr != NULL) { 3364 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 3365 dma->dma_vaddr = NULL; 3366 } 3367 bus_dma_tag_destroy(dma->dma_tag); 3368 dma->dma_tag = NULL; 3369 } 3370 3371 3372 /********************************************************************* 3373 * 3374 * Allocate memory for the transmit and receive rings, and then 3375 * the descriptors associated with each, called only once at attach. 3376 * 3377 **********************************************************************/ 3378 static int 3379 em_allocate_queues(struct adapter *adapter) 3380 { 3381 device_t dev = adapter->dev; 3382 struct tx_ring *txr = NULL; 3383 struct rx_ring *rxr = NULL; 3384 int rsize, tsize, error = E1000_SUCCESS; 3385 int txconf = 0, rxconf = 0; 3386 3387 3388 /* Allocate the TX ring struct memory */ 3389 if (!(adapter->tx_rings = 3390 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 3391 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3392 device_printf(dev, "Unable to allocate TX ring memory\n"); 3393 error = ENOMEM; 3394 goto fail; 3395 } 3396 3397 /* Now allocate the RX */ 3398 if (!(adapter->rx_rings = 3399 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 3400 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3401 device_printf(dev, "Unable to allocate RX ring memory\n"); 3402 error = ENOMEM; 3403 goto rx_fail; 3404 } 3405 3406 tsize = roundup2(adapter->num_tx_desc * 3407 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); 3408 /* 3409 * Now set up the TX queues, txconf is needed to handle the 3410 * possibility that things fail midcourse and we need to 3411 * undo memory gracefully 3412 */ 3413 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 3414 /* Set up some basics */ 3415 txr = &adapter->tx_rings[i]; 3416 txr->adapter = adapter; 3417 txr->me = i; 3418 3419 /* Initialize the TX lock */ 3420 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 3421 device_get_nameunit(dev), txr->me); 3422 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 3423 3424 if (em_dma_malloc(adapter, tsize, 3425 &txr->txdma, BUS_DMA_NOWAIT)) { 3426 device_printf(dev, 3427 "Unable to allocate TX Descriptor memory\n"); 3428 error = ENOMEM; 3429 goto err_tx_desc; 3430 } 3431 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; 3432 bzero((void *)txr->tx_base, tsize); 3433 3434 if (em_allocate_transmit_buffers(txr)) { 3435 device_printf(dev, 3436 "Critical Failure setting up transmit buffers\n"); 3437 error = ENOMEM; 3438 goto err_tx_desc; 3439 } 3440 #if __FreeBSD_version >= 800000 3441 /* Allocate a buf ring */ 3442 txr->br = buf_ring_alloc(4096, M_DEVBUF, 3443 M_WAITOK, &txr->tx_mtx); 3444 #endif 3445 } 3446 3447 /* 3448 * Next the RX queues... 3449 */ 3450 rsize = roundup2(adapter->num_rx_desc * 3451 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); 3452 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 3453 rxr = &adapter->rx_rings[i]; 3454 rxr->adapter = adapter; 3455 rxr->me = i; 3456 3457 /* Initialize the RX lock */ 3458 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 3459 device_get_nameunit(dev), txr->me); 3460 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 3461 3462 if (em_dma_malloc(adapter, rsize, 3463 &rxr->rxdma, BUS_DMA_NOWAIT)) { 3464 device_printf(dev, 3465 "Unable to allocate RxDescriptor memory\n"); 3466 error = ENOMEM; 3467 goto err_rx_desc; 3468 } 3469 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr; 3470 bzero((void *)rxr->rx_base, rsize); 3471 3472 /* Allocate receive buffers for the ring*/ 3473 if (em_allocate_receive_buffers(rxr)) { 3474 device_printf(dev, 3475 "Critical Failure setting up receive buffers\n"); 3476 error = ENOMEM; 3477 goto err_rx_desc; 3478 } 3479 } 3480 3481 return (0); 3482 3483 err_rx_desc: 3484 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 3485 em_dma_free(adapter, &rxr->rxdma); 3486 err_tx_desc: 3487 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 3488 em_dma_free(adapter, &txr->txdma); 3489 free(adapter->rx_rings, M_DEVBUF); 3490 rx_fail: 3491 #if __FreeBSD_version >= 800000 3492 buf_ring_free(txr->br, M_DEVBUF); 3493 #endif 3494 free(adapter->tx_rings, M_DEVBUF); 3495 fail: 3496 return (error); 3497 } 3498 3499 3500 /********************************************************************* 3501 * 3502 * Allocate memory for tx_buffer structures. The tx_buffer stores all 3503 * the information needed to transmit a packet on the wire. This is 3504 * called only once at attach, setup is done every reset. 3505 * 3506 **********************************************************************/ 3507 static int 3508 em_allocate_transmit_buffers(struct tx_ring *txr) 3509 { 3510 struct adapter *adapter = txr->adapter; 3511 device_t dev = adapter->dev; 3512 struct em_txbuffer *txbuf; 3513 int error, i; 3514 3515 /* 3516 * Setup DMA descriptor areas. 3517 */ 3518 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 3519 1, 0, /* alignment, bounds */ 3520 BUS_SPACE_MAXADDR, /* lowaddr */ 3521 BUS_SPACE_MAXADDR, /* highaddr */ 3522 NULL, NULL, /* filter, filterarg */ 3523 EM_TSO_SIZE, /* maxsize */ 3524 EM_MAX_SCATTER, /* nsegments */ 3525 PAGE_SIZE, /* maxsegsize */ 3526 0, /* flags */ 3527 NULL, /* lockfunc */ 3528 NULL, /* lockfuncarg */ 3529 &txr->txtag))) { 3530 device_printf(dev,"Unable to allocate TX DMA tag\n"); 3531 goto fail; 3532 } 3533 3534 if (!(txr->tx_buffers = 3535 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) * 3536 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3537 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 3538 error = ENOMEM; 3539 goto fail; 3540 } 3541 3542 /* Create the descriptor buffer dma maps */ 3543 txbuf = txr->tx_buffers; 3544 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 3545 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 3546 if (error != 0) { 3547 device_printf(dev, "Unable to create TX DMA map\n"); 3548 goto fail; 3549 } 3550 } 3551 3552 return 0; 3553 fail: 3554 /* We free all, it handles case where we are in the middle */ 3555 em_free_transmit_structures(adapter); 3556 return (error); 3557 } 3558 3559 /********************************************************************* 3560 * 3561 * Initialize a transmit ring. 3562 * 3563 **********************************************************************/ 3564 static void 3565 em_setup_transmit_ring(struct tx_ring *txr) 3566 { 3567 struct adapter *adapter = txr->adapter; 3568 struct em_txbuffer *txbuf; 3569 int i; 3570 #ifdef DEV_NETMAP 3571 struct netmap_slot *slot; 3572 struct netmap_adapter *na = netmap_getna(adapter->ifp); 3573 #endif /* DEV_NETMAP */ 3574 3575 /* Clear the old descriptor contents */ 3576 EM_TX_LOCK(txr); 3577 #ifdef DEV_NETMAP 3578 slot = netmap_reset(na, NR_TX, txr->me, 0); 3579 #endif /* DEV_NETMAP */ 3580 3581 bzero((void *)txr->tx_base, 3582 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); 3583 /* Reset indices */ 3584 txr->next_avail_desc = 0; 3585 txr->next_to_clean = 0; 3586 3587 /* Free any existing tx buffers. */ 3588 txbuf = txr->tx_buffers; 3589 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 3590 if (txbuf->m_head != NULL) { 3591 bus_dmamap_sync(txr->txtag, txbuf->map, 3592 BUS_DMASYNC_POSTWRITE); 3593 bus_dmamap_unload(txr->txtag, txbuf->map); 3594 m_freem(txbuf->m_head); 3595 txbuf->m_head = NULL; 3596 } 3597 #ifdef DEV_NETMAP 3598 if (slot) { 3599 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 3600 uint64_t paddr; 3601 void *addr; 3602 3603 addr = PNMB(na, slot + si, &paddr); 3604 txr->tx_base[i].buffer_addr = htole64(paddr); 3605 /* reload the map for netmap mode */ 3606 netmap_load_map(na, txr->txtag, txbuf->map, addr); 3607 } 3608 #endif /* DEV_NETMAP */ 3609 3610 /* clear the watch index */ 3611 txbuf->next_eop = -1; 3612 } 3613 3614 /* Set number of descriptors available */ 3615 txr->tx_avail = adapter->num_tx_desc; 3616 txr->busy = EM_TX_IDLE; 3617 3618 /* Clear checksum offload context. */ 3619 txr->last_hw_offload = 0; 3620 txr->last_hw_ipcss = 0; 3621 txr->last_hw_ipcso = 0; 3622 txr->last_hw_tucss = 0; 3623 txr->last_hw_tucso = 0; 3624 3625 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 3626 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3627 EM_TX_UNLOCK(txr); 3628 } 3629 3630 /********************************************************************* 3631 * 3632 * Initialize all transmit rings. 3633 * 3634 **********************************************************************/ 3635 static void 3636 em_setup_transmit_structures(struct adapter *adapter) 3637 { 3638 struct tx_ring *txr = adapter->tx_rings; 3639 3640 for (int i = 0; i < adapter->num_queues; i++, txr++) 3641 em_setup_transmit_ring(txr); 3642 3643 return; 3644 } 3645 3646 /********************************************************************* 3647 * 3648 * Enable transmit unit. 3649 * 3650 **********************************************************************/ 3651 static void 3652 em_initialize_transmit_unit(struct adapter *adapter) 3653 { 3654 struct tx_ring *txr = adapter->tx_rings; 3655 struct e1000_hw *hw = &adapter->hw; 3656 u32 tctl, txdctl = 0, tarc, tipg = 0; 3657 3658 INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); 3659 3660 for (int i = 0; i < adapter->num_queues; i++, txr++) { 3661 u64 bus_addr = txr->txdma.dma_paddr; 3662 /* Base and Len of TX Ring */ 3663 E1000_WRITE_REG(hw, E1000_TDLEN(i), 3664 adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); 3665 E1000_WRITE_REG(hw, E1000_TDBAH(i), 3666 (u32)(bus_addr >> 32)); 3667 E1000_WRITE_REG(hw, E1000_TDBAL(i), 3668 (u32)bus_addr); 3669 /* Init the HEAD/TAIL indices */ 3670 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 3671 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 3672 3673 HW_DEBUGOUT2("Base = %x, Length = %x\n", 3674 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), 3675 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); 3676 3677 txr->busy = EM_TX_IDLE; 3678 txdctl = 0; /* clear txdctl */ 3679 txdctl |= 0x1f; /* PTHRESH */ 3680 txdctl |= 1 << 8; /* HTHRESH */ 3681 txdctl |= 1 << 16;/* WTHRESH */ 3682 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ 3683 txdctl |= E1000_TXDCTL_GRAN; 3684 txdctl |= 1 << 25; /* LWTHRESH */ 3685 3686 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 3687 } 3688 3689 /* Set the default values for the Tx Inter Packet Gap timer */ 3690 switch (adapter->hw.mac.type) { 3691 case e1000_80003es2lan: 3692 tipg = DEFAULT_82543_TIPG_IPGR1; 3693 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << 3694 E1000_TIPG_IPGR2_SHIFT; 3695 break; 3696 default: 3697 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 3698 (adapter->hw.phy.media_type == 3699 e1000_media_type_internal_serdes)) 3700 tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 3701 else 3702 tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 3703 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 3704 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 3705 } 3706 3707 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); 3708 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); 3709 3710 if(adapter->hw.mac.type >= e1000_82540) 3711 E1000_WRITE_REG(&adapter->hw, E1000_TADV, 3712 adapter->tx_abs_int_delay.value); 3713 3714 if ((adapter->hw.mac.type == e1000_82571) || 3715 (adapter->hw.mac.type == e1000_82572)) { 3716 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3717 tarc |= TARC_SPEED_MODE_BIT; 3718 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3719 } else if (adapter->hw.mac.type == e1000_80003es2lan) { 3720 /* errata: program both queues to unweighted RR */ 3721 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3722 tarc |= 1; 3723 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3724 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); 3725 tarc |= 1; 3726 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); 3727 } else if (adapter->hw.mac.type == e1000_82574) { 3728 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3729 tarc |= TARC_ERRATA_BIT; 3730 if ( adapter->num_queues > 1) { 3731 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); 3732 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3733 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); 3734 } else 3735 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3736 } 3737 3738 adapter->txd_cmd = E1000_TXD_CMD_IFCS; 3739 if (adapter->tx_int_delay.value > 0) 3740 adapter->txd_cmd |= E1000_TXD_CMD_IDE; 3741 3742 /* Program the Transmit Control Register */ 3743 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); 3744 tctl &= ~E1000_TCTL_CT; 3745 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 3746 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 3747 3748 if (adapter->hw.mac.type >= e1000_82571) 3749 tctl |= E1000_TCTL_MULR; 3750 3751 /* This write will effectively turn on the transmit unit. */ 3752 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); 3753 3754 if (hw->mac.type == e1000_pch_spt) { 3755 u32 reg; 3756 reg = E1000_READ_REG(hw, E1000_IOSFPC); 3757 reg |= E1000_RCTL_RDMTS_HEX; 3758 E1000_WRITE_REG(hw, E1000_IOSFPC, reg); 3759 reg = E1000_READ_REG(hw, E1000_TARC(0)); 3760 reg |= E1000_TARC0_CB_MULTIQ_3_REQ; 3761 E1000_WRITE_REG(hw, E1000_TARC(0), reg); 3762 } 3763 } 3764 3765 3766 /********************************************************************* 3767 * 3768 * Free all transmit rings. 3769 * 3770 **********************************************************************/ 3771 static void 3772 em_free_transmit_structures(struct adapter *adapter) 3773 { 3774 struct tx_ring *txr = adapter->tx_rings; 3775 3776 for (int i = 0; i < adapter->num_queues; i++, txr++) { 3777 EM_TX_LOCK(txr); 3778 em_free_transmit_buffers(txr); 3779 em_dma_free(adapter, &txr->txdma); 3780 EM_TX_UNLOCK(txr); 3781 EM_TX_LOCK_DESTROY(txr); 3782 } 3783 3784 free(adapter->tx_rings, M_DEVBUF); 3785 } 3786 3787 /********************************************************************* 3788 * 3789 * Free transmit ring related data structures. 3790 * 3791 **********************************************************************/ 3792 static void 3793 em_free_transmit_buffers(struct tx_ring *txr) 3794 { 3795 struct adapter *adapter = txr->adapter; 3796 struct em_txbuffer *txbuf; 3797 3798 INIT_DEBUGOUT("free_transmit_ring: begin"); 3799 3800 if (txr->tx_buffers == NULL) 3801 return; 3802 3803 for (int i = 0; i < adapter->num_tx_desc; i++) { 3804 txbuf = &txr->tx_buffers[i]; 3805 if (txbuf->m_head != NULL) { 3806 bus_dmamap_sync(txr->txtag, txbuf->map, 3807 BUS_DMASYNC_POSTWRITE); 3808 bus_dmamap_unload(txr->txtag, 3809 txbuf->map); 3810 m_freem(txbuf->m_head); 3811 txbuf->m_head = NULL; 3812 if (txbuf->map != NULL) { 3813 bus_dmamap_destroy(txr->txtag, 3814 txbuf->map); 3815 txbuf->map = NULL; 3816 } 3817 } else if (txbuf->map != NULL) { 3818 bus_dmamap_unload(txr->txtag, 3819 txbuf->map); 3820 bus_dmamap_destroy(txr->txtag, 3821 txbuf->map); 3822 txbuf->map = NULL; 3823 } 3824 } 3825 #if __FreeBSD_version >= 800000 3826 if (txr->br != NULL) 3827 buf_ring_free(txr->br, M_DEVBUF); 3828 #endif 3829 if (txr->tx_buffers != NULL) { 3830 free(txr->tx_buffers, M_DEVBUF); 3831 txr->tx_buffers = NULL; 3832 } 3833 if (txr->txtag != NULL) { 3834 bus_dma_tag_destroy(txr->txtag); 3835 txr->txtag = NULL; 3836 } 3837 return; 3838 } 3839 3840 3841 /********************************************************************* 3842 * The offload context is protocol specific (TCP/UDP) and thus 3843 * only needs to be set when the protocol changes. The occasion 3844 * of a context change can be a performance detriment, and 3845 * might be better just disabled. The reason arises in the way 3846 * in which the controller supports pipelined requests from the 3847 * Tx data DMA. Up to four requests can be pipelined, and they may 3848 * belong to the same packet or to multiple packets. However all 3849 * requests for one packet are issued before a request is issued 3850 * for a subsequent packet and if a request for the next packet 3851 * requires a context change, that request will be stalled 3852 * until the previous request completes. This means setting up 3853 * a new context effectively disables pipelined Tx data DMA which 3854 * in turn greatly slow down performance to send small sized 3855 * frames. 3856 **********************************************************************/ 3857 static void 3858 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, 3859 struct ip *ip, u32 *txd_upper, u32 *txd_lower) 3860 { 3861 struct adapter *adapter = txr->adapter; 3862 struct e1000_context_desc *TXD = NULL; 3863 struct em_txbuffer *tx_buffer; 3864 int cur, hdr_len; 3865 u32 cmd = 0; 3866 u16 offload = 0; 3867 u8 ipcso, ipcss, tucso, tucss; 3868 3869 ipcss = ipcso = tucss = tucso = 0; 3870 hdr_len = ip_off + (ip->ip_hl << 2); 3871 cur = txr->next_avail_desc; 3872 3873 /* Setup of IP header checksum. */ 3874 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 3875 *txd_upper |= E1000_TXD_POPTS_IXSM << 8; 3876 offload |= CSUM_IP; 3877 ipcss = ip_off; 3878 ipcso = ip_off + offsetof(struct ip, ip_sum); 3879 /* 3880 * Start offset for header checksum calculation. 3881 * End offset for header checksum calculation. 3882 * Offset of place to put the checksum. 3883 */ 3884 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3885 TXD->lower_setup.ip_fields.ipcss = ipcss; 3886 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); 3887 TXD->lower_setup.ip_fields.ipcso = ipcso; 3888 cmd |= E1000_TXD_CMD_IP; 3889 } 3890 3891 if (mp->m_pkthdr.csum_flags & CSUM_TCP) { 3892 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 3893 *txd_upper |= E1000_TXD_POPTS_TXSM << 8; 3894 offload |= CSUM_TCP; 3895 tucss = hdr_len; 3896 tucso = hdr_len + offsetof(struct tcphdr, th_sum); 3897 /* 3898 * The 82574L can only remember the *last* context used 3899 * regardless of queue that it was use for. We cannot reuse 3900 * contexts on this hardware platform and must generate a new 3901 * context every time. 82574L hardware spec, section 7.2.6, 3902 * second note. 3903 */ 3904 if (adapter->num_queues < 2) { 3905 /* 3906 * Setting up new checksum offload context for every 3907 * frames takes a lot of processing time for hardware. 3908 * This also reduces performance a lot for small sized 3909 * frames so avoid it if driver can use previously 3910 * configured checksum offload context. 3911 */ 3912 if (txr->last_hw_offload == offload) { 3913 if (offload & CSUM_IP) { 3914 if (txr->last_hw_ipcss == ipcss && 3915 txr->last_hw_ipcso == ipcso && 3916 txr->last_hw_tucss == tucss && 3917 txr->last_hw_tucso == tucso) 3918 return; 3919 } else { 3920 if (txr->last_hw_tucss == tucss && 3921 txr->last_hw_tucso == tucso) 3922 return; 3923 } 3924 } 3925 txr->last_hw_offload = offload; 3926 txr->last_hw_tucss = tucss; 3927 txr->last_hw_tucso = tucso; 3928 } 3929 /* 3930 * Start offset for payload checksum calculation. 3931 * End offset for payload checksum calculation. 3932 * Offset of place to put the checksum. 3933 */ 3934 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3935 TXD->upper_setup.tcp_fields.tucss = hdr_len; 3936 TXD->upper_setup.tcp_fields.tucse = htole16(0); 3937 TXD->upper_setup.tcp_fields.tucso = tucso; 3938 cmd |= E1000_TXD_CMD_TCP; 3939 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { 3940 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 3941 *txd_upper |= E1000_TXD_POPTS_TXSM << 8; 3942 tucss = hdr_len; 3943 tucso = hdr_len + offsetof(struct udphdr, uh_sum); 3944 /* 3945 * The 82574L can only remember the *last* context used 3946 * regardless of queue that it was use for. We cannot reuse 3947 * contexts on this hardware platform and must generate a new 3948 * context every time. 82574L hardware spec, section 7.2.6, 3949 * second note. 3950 */ 3951 if (adapter->num_queues < 2) { 3952 /* 3953 * Setting up new checksum offload context for every 3954 * frames takes a lot of processing time for hardware. 3955 * This also reduces performance a lot for small sized 3956 * frames so avoid it if driver can use previously 3957 * configured checksum offload context. 3958 */ 3959 if (txr->last_hw_offload == offload) { 3960 if (offload & CSUM_IP) { 3961 if (txr->last_hw_ipcss == ipcss && 3962 txr->last_hw_ipcso == ipcso && 3963 txr->last_hw_tucss == tucss && 3964 txr->last_hw_tucso == tucso) 3965 return; 3966 } else { 3967 if (txr->last_hw_tucss == tucss && 3968 txr->last_hw_tucso == tucso) 3969 return; 3970 } 3971 } 3972 txr->last_hw_offload = offload; 3973 txr->last_hw_tucss = tucss; 3974 txr->last_hw_tucso = tucso; 3975 } 3976 /* 3977 * Start offset for header checksum calculation. 3978 * End offset for header checksum calculation. 3979 * Offset of place to put the checksum. 3980 */ 3981 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3982 TXD->upper_setup.tcp_fields.tucss = tucss; 3983 TXD->upper_setup.tcp_fields.tucse = htole16(0); 3984 TXD->upper_setup.tcp_fields.tucso = tucso; 3985 } 3986 3987 if (offload & CSUM_IP) { 3988 txr->last_hw_ipcss = ipcss; 3989 txr->last_hw_ipcso = ipcso; 3990 } 3991 3992 TXD->tcp_seg_setup.data = htole32(0); 3993 TXD->cmd_and_length = 3994 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); 3995 tx_buffer = &txr->tx_buffers[cur]; 3996 tx_buffer->m_head = NULL; 3997 tx_buffer->next_eop = -1; 3998 3999 if (++cur == adapter->num_tx_desc) 4000 cur = 0; 4001 4002 txr->tx_avail--; 4003 txr->next_avail_desc = cur; 4004 } 4005 4006 4007 /********************************************************************** 4008 * 4009 * Setup work for hardware segmentation offload (TSO) 4010 * 4011 **********************************************************************/ 4012 static void 4013 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, 4014 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower) 4015 { 4016 struct adapter *adapter = txr->adapter; 4017 struct e1000_context_desc *TXD; 4018 struct em_txbuffer *tx_buffer; 4019 int cur, hdr_len; 4020 4021 /* 4022 * In theory we can use the same TSO context if and only if 4023 * frame is the same type(IP/TCP) and the same MSS. However 4024 * checking whether a frame has the same IP/TCP structure is 4025 * hard thing so just ignore that and always restablish a 4026 * new TSO context. 4027 */ 4028 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2); 4029 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ 4030 E1000_TXD_DTYP_D | /* Data descr type */ 4031 E1000_TXD_CMD_TSE); /* Do TSE on this packet */ 4032 4033 /* IP and/or TCP header checksum calculation and insertion. */ 4034 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; 4035 4036 cur = txr->next_avail_desc; 4037 tx_buffer = &txr->tx_buffers[cur]; 4038 TXD = (struct e1000_context_desc *) &txr->tx_base[cur]; 4039 4040 /* 4041 * Start offset for header checksum calculation. 4042 * End offset for header checksum calculation. 4043 * Offset of place put the checksum. 4044 */ 4045 TXD->lower_setup.ip_fields.ipcss = ip_off; 4046 TXD->lower_setup.ip_fields.ipcse = 4047 htole16(ip_off + (ip->ip_hl << 2) - 1); 4048 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum); 4049 /* 4050 * Start offset for payload checksum calculation. 4051 * End offset for payload checksum calculation. 4052 * Offset of place to put the checksum. 4053 */ 4054 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2); 4055 TXD->upper_setup.tcp_fields.tucse = 0; 4056 TXD->upper_setup.tcp_fields.tucso = 4057 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum); 4058 /* 4059 * Payload size per packet w/o any headers. 4060 * Length of all headers up to payload. 4061 */ 4062 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz); 4063 TXD->tcp_seg_setup.fields.hdr_len = hdr_len; 4064 4065 TXD->cmd_and_length = htole32(adapter->txd_cmd | 4066 E1000_TXD_CMD_DEXT | /* Extended descr */ 4067 E1000_TXD_CMD_TSE | /* TSE context */ 4068 E1000_TXD_CMD_IP | /* Do IP csum */ 4069 E1000_TXD_CMD_TCP | /* Do TCP checksum */ 4070 (mp->m_pkthdr.len - (hdr_len))); /* Total len */ 4071 4072 tx_buffer->m_head = NULL; 4073 tx_buffer->next_eop = -1; 4074 4075 if (++cur == adapter->num_tx_desc) 4076 cur = 0; 4077 4078 txr->tx_avail--; 4079 txr->next_avail_desc = cur; 4080 txr->tx_tso = TRUE; 4081 } 4082 4083 4084 /********************************************************************** 4085 * 4086 * Examine each tx_buffer in the used queue. If the hardware is done 4087 * processing the packet then free associated resources. The 4088 * tx_buffer is put back on the free queue. 4089 * 4090 **********************************************************************/ 4091 static void 4092 em_txeof(struct tx_ring *txr) 4093 { 4094 struct adapter *adapter = txr->adapter; 4095 int first, last, done, processed; 4096 struct em_txbuffer *tx_buffer; 4097 struct e1000_tx_desc *tx_desc, *eop_desc; 4098 if_t ifp = adapter->ifp; 4099 4100 EM_TX_LOCK_ASSERT(txr); 4101 #ifdef DEV_NETMAP 4102 if (netmap_tx_irq(ifp, txr->me)) 4103 return; 4104 #endif /* DEV_NETMAP */ 4105 4106 /* No work, make sure hang detection is disabled */ 4107 if (txr->tx_avail == adapter->num_tx_desc) { 4108 txr->busy = EM_TX_IDLE; 4109 return; 4110 } 4111 4112 processed = 0; 4113 first = txr->next_to_clean; 4114 tx_desc = &txr->tx_base[first]; 4115 tx_buffer = &txr->tx_buffers[first]; 4116 last = tx_buffer->next_eop; 4117 eop_desc = &txr->tx_base[last]; 4118 4119 /* 4120 * What this does is get the index of the 4121 * first descriptor AFTER the EOP of the 4122 * first packet, that way we can do the 4123 * simple comparison on the inner while loop. 4124 */ 4125 if (++last == adapter->num_tx_desc) 4126 last = 0; 4127 done = last; 4128 4129 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 4130 BUS_DMASYNC_POSTREAD); 4131 4132 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { 4133 /* We clean the range of the packet */ 4134 while (first != done) { 4135 tx_desc->upper.data = 0; 4136 tx_desc->lower.data = 0; 4137 tx_desc->buffer_addr = 0; 4138 ++txr->tx_avail; 4139 ++processed; 4140 4141 if (tx_buffer->m_head) { 4142 bus_dmamap_sync(txr->txtag, 4143 tx_buffer->map, 4144 BUS_DMASYNC_POSTWRITE); 4145 bus_dmamap_unload(txr->txtag, 4146 tx_buffer->map); 4147 m_freem(tx_buffer->m_head); 4148 tx_buffer->m_head = NULL; 4149 } 4150 tx_buffer->next_eop = -1; 4151 4152 if (++first == adapter->num_tx_desc) 4153 first = 0; 4154 4155 tx_buffer = &txr->tx_buffers[first]; 4156 tx_desc = &txr->tx_base[first]; 4157 } 4158 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 4159 /* See if we can continue to the next packet */ 4160 last = tx_buffer->next_eop; 4161 if (last != -1) { 4162 eop_desc = &txr->tx_base[last]; 4163 /* Get new done point */ 4164 if (++last == adapter->num_tx_desc) last = 0; 4165 done = last; 4166 } else 4167 break; 4168 } 4169 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 4170 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4171 4172 txr->next_to_clean = first; 4173 4174 /* 4175 ** Hang detection: we know there's work outstanding 4176 ** or the entry return would have been taken, so no 4177 ** descriptor processed here indicates a potential hang. 4178 ** The local timer will examine this and do a reset if needed. 4179 */ 4180 if (processed == 0) { 4181 if (txr->busy != EM_TX_HUNG) 4182 ++txr->busy; 4183 } else /* At least one descriptor was cleaned */ 4184 txr->busy = EM_TX_BUSY; /* note this clears HUNG */ 4185 4186 /* 4187 * If we have a minimum free, clear IFF_DRV_OACTIVE 4188 * to tell the stack that it is OK to send packets. 4189 * Notice that all writes of OACTIVE happen under the 4190 * TX lock which, with a single queue, guarantees 4191 * sanity. 4192 */ 4193 if (txr->tx_avail >= EM_MAX_SCATTER) { 4194 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); 4195 } 4196 4197 /* Disable hang detection if all clean */ 4198 if (txr->tx_avail == adapter->num_tx_desc) 4199 txr->busy = EM_TX_IDLE; 4200 } 4201 4202 /********************************************************************* 4203 * 4204 * Refresh RX descriptor mbufs from system mbuf buffer pool. 4205 * 4206 **********************************************************************/ 4207 static void 4208 em_refresh_mbufs(struct rx_ring *rxr, int limit) 4209 { 4210 struct adapter *adapter = rxr->adapter; 4211 struct mbuf *m; 4212 bus_dma_segment_t segs; 4213 struct em_rxbuffer *rxbuf; 4214 int i, j, error, nsegs; 4215 bool cleaned = FALSE; 4216 4217 i = j = rxr->next_to_refresh; 4218 /* 4219 ** Get one descriptor beyond 4220 ** our work mark to control 4221 ** the loop. 4222 */ 4223 if (++j == adapter->num_rx_desc) 4224 j = 0; 4225 4226 while (j != limit) { 4227 rxbuf = &rxr->rx_buffers[i]; 4228 if (rxbuf->m_head == NULL) { 4229 m = m_getjcl(M_NOWAIT, MT_DATA, 4230 M_PKTHDR, adapter->rx_mbuf_sz); 4231 /* 4232 ** If we have a temporary resource shortage 4233 ** that causes a failure, just abort refresh 4234 ** for now, we will return to this point when 4235 ** reinvoked from em_rxeof. 4236 */ 4237 if (m == NULL) 4238 goto update; 4239 } else 4240 m = rxbuf->m_head; 4241 4242 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; 4243 m->m_flags |= M_PKTHDR; 4244 m->m_data = m->m_ext.ext_buf; 4245 4246 /* Use bus_dma machinery to setup the memory mapping */ 4247 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, 4248 m, &segs, &nsegs, BUS_DMA_NOWAIT); 4249 if (error != 0) { 4250 printf("Refresh mbufs: hdr dmamap load" 4251 " failure - %d\n", error); 4252 m_free(m); 4253 rxbuf->m_head = NULL; 4254 goto update; 4255 } 4256 rxbuf->m_head = m; 4257 rxbuf->paddr = segs.ds_addr; 4258 bus_dmamap_sync(rxr->rxtag, 4259 rxbuf->map, BUS_DMASYNC_PREREAD); 4260 em_setup_rxdesc(&rxr->rx_base[i], rxbuf); 4261 cleaned = TRUE; 4262 4263 i = j; /* Next is precalulated for us */ 4264 rxr->next_to_refresh = i; 4265 /* Calculate next controlling index */ 4266 if (++j == adapter->num_rx_desc) 4267 j = 0; 4268 } 4269 update: 4270 /* 4271 ** Update the tail pointer only if, 4272 ** and as far as we have refreshed. 4273 */ 4274 if (cleaned) 4275 E1000_WRITE_REG(&adapter->hw, 4276 E1000_RDT(rxr->me), rxr->next_to_refresh); 4277 4278 return; 4279 } 4280 4281 4282 /********************************************************************* 4283 * 4284 * Allocate memory for rx_buffer structures. Since we use one 4285 * rx_buffer per received packet, the maximum number of rx_buffer's 4286 * that we'll need is equal to the number of receive descriptors 4287 * that we've allocated. 4288 * 4289 **********************************************************************/ 4290 static int 4291 em_allocate_receive_buffers(struct rx_ring *rxr) 4292 { 4293 struct adapter *adapter = rxr->adapter; 4294 device_t dev = adapter->dev; 4295 struct em_rxbuffer *rxbuf; 4296 int error; 4297 4298 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) * 4299 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); 4300 if (rxr->rx_buffers == NULL) { 4301 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 4302 return (ENOMEM); 4303 } 4304 4305 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4306 1, 0, /* alignment, bounds */ 4307 BUS_SPACE_MAXADDR, /* lowaddr */ 4308 BUS_SPACE_MAXADDR, /* highaddr */ 4309 NULL, NULL, /* filter, filterarg */ 4310 MJUM9BYTES, /* maxsize */ 4311 1, /* nsegments */ 4312 MJUM9BYTES, /* maxsegsize */ 4313 0, /* flags */ 4314 NULL, /* lockfunc */ 4315 NULL, /* lockarg */ 4316 &rxr->rxtag); 4317 if (error) { 4318 device_printf(dev, "%s: bus_dma_tag_create failed %d\n", 4319 __func__, error); 4320 goto fail; 4321 } 4322 4323 rxbuf = rxr->rx_buffers; 4324 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { 4325 rxbuf = &rxr->rx_buffers[i]; 4326 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map); 4327 if (error) { 4328 device_printf(dev, "%s: bus_dmamap_create failed: %d\n", 4329 __func__, error); 4330 goto fail; 4331 } 4332 } 4333 4334 return (0); 4335 4336 fail: 4337 em_free_receive_structures(adapter); 4338 return (error); 4339 } 4340 4341 4342 /********************************************************************* 4343 * 4344 * Initialize a receive ring and its buffers. 4345 * 4346 **********************************************************************/ 4347 static int 4348 em_setup_receive_ring(struct rx_ring *rxr) 4349 { 4350 struct adapter *adapter = rxr->adapter; 4351 struct em_rxbuffer *rxbuf; 4352 bus_dma_segment_t seg[1]; 4353 int rsize, nsegs, error = 0; 4354 #ifdef DEV_NETMAP 4355 struct netmap_slot *slot; 4356 struct netmap_adapter *na = netmap_getna(adapter->ifp); 4357 #endif 4358 4359 4360 /* Clear the ring contents */ 4361 EM_RX_LOCK(rxr); 4362 rsize = roundup2(adapter->num_rx_desc * 4363 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); 4364 bzero((void *)rxr->rx_base, rsize); 4365 #ifdef DEV_NETMAP 4366 slot = netmap_reset(na, NR_RX, rxr->me, 0); 4367 #endif 4368 4369 /* 4370 ** Free current RX buffer structs and their mbufs 4371 */ 4372 for (int i = 0; i < adapter->num_rx_desc; i++) { 4373 rxbuf = &rxr->rx_buffers[i]; 4374 if (rxbuf->m_head != NULL) { 4375 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4376 BUS_DMASYNC_POSTREAD); 4377 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4378 m_freem(rxbuf->m_head); 4379 rxbuf->m_head = NULL; /* mark as freed */ 4380 } 4381 } 4382 4383 /* Now replenish the mbufs */ 4384 for (int j = 0; j != adapter->num_rx_desc; ++j) { 4385 rxbuf = &rxr->rx_buffers[j]; 4386 #ifdef DEV_NETMAP 4387 if (slot) { 4388 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 4389 uint64_t paddr; 4390 void *addr; 4391 4392 addr = PNMB(na, slot + si, &paddr); 4393 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr); 4394 em_setup_rxdesc(&rxr->rx_base[j], rxbuf); 4395 continue; 4396 } 4397 #endif /* DEV_NETMAP */ 4398 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA, 4399 M_PKTHDR, adapter->rx_mbuf_sz); 4400 if (rxbuf->m_head == NULL) { 4401 error = ENOBUFS; 4402 goto fail; 4403 } 4404 rxbuf->m_head->m_len = adapter->rx_mbuf_sz; 4405 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */ 4406 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz; 4407 4408 /* Get the memory mapping */ 4409 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, 4410 rxbuf->map, rxbuf->m_head, seg, 4411 &nsegs, BUS_DMA_NOWAIT); 4412 if (error != 0) { 4413 m_freem(rxbuf->m_head); 4414 rxbuf->m_head = NULL; 4415 goto fail; 4416 } 4417 bus_dmamap_sync(rxr->rxtag, 4418 rxbuf->map, BUS_DMASYNC_PREREAD); 4419 4420 rxbuf->paddr = seg[0].ds_addr; 4421 em_setup_rxdesc(&rxr->rx_base[j], rxbuf); 4422 } 4423 rxr->next_to_check = 0; 4424 rxr->next_to_refresh = 0; 4425 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4426 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4427 4428 fail: 4429 EM_RX_UNLOCK(rxr); 4430 return (error); 4431 } 4432 4433 /********************************************************************* 4434 * 4435 * Initialize all receive rings. 4436 * 4437 **********************************************************************/ 4438 static int 4439 em_setup_receive_structures(struct adapter *adapter) 4440 { 4441 struct rx_ring *rxr = adapter->rx_rings; 4442 int q; 4443 4444 for (q = 0; q < adapter->num_queues; q++, rxr++) 4445 if (em_setup_receive_ring(rxr)) 4446 goto fail; 4447 4448 return (0); 4449 fail: 4450 /* 4451 * Free RX buffers allocated so far, we will only handle 4452 * the rings that completed, the failing case will have 4453 * cleaned up for itself. 'q' failed, so its the terminus. 4454 */ 4455 for (int i = 0; i < q; ++i) { 4456 rxr = &adapter->rx_rings[i]; 4457 for (int n = 0; n < adapter->num_rx_desc; n++) { 4458 struct em_rxbuffer *rxbuf; 4459 rxbuf = &rxr->rx_buffers[n]; 4460 if (rxbuf->m_head != NULL) { 4461 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4462 BUS_DMASYNC_POSTREAD); 4463 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4464 m_freem(rxbuf->m_head); 4465 rxbuf->m_head = NULL; 4466 } 4467 } 4468 rxr->next_to_check = 0; 4469 rxr->next_to_refresh = 0; 4470 } 4471 4472 return (ENOBUFS); 4473 } 4474 4475 /********************************************************************* 4476 * 4477 * Free all receive rings. 4478 * 4479 **********************************************************************/ 4480 static void 4481 em_free_receive_structures(struct adapter *adapter) 4482 { 4483 struct rx_ring *rxr = adapter->rx_rings; 4484 4485 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 4486 em_free_receive_buffers(rxr); 4487 /* Free the ring memory as well */ 4488 em_dma_free(adapter, &rxr->rxdma); 4489 EM_RX_LOCK_DESTROY(rxr); 4490 } 4491 4492 free(adapter->rx_rings, M_DEVBUF); 4493 } 4494 4495 4496 /********************************************************************* 4497 * 4498 * Free receive ring data structures 4499 * 4500 **********************************************************************/ 4501 static void 4502 em_free_receive_buffers(struct rx_ring *rxr) 4503 { 4504 struct adapter *adapter = rxr->adapter; 4505 struct em_rxbuffer *rxbuf = NULL; 4506 4507 INIT_DEBUGOUT("free_receive_buffers: begin"); 4508 4509 if (rxr->rx_buffers != NULL) { 4510 for (int i = 0; i < adapter->num_rx_desc; i++) { 4511 rxbuf = &rxr->rx_buffers[i]; 4512 if (rxbuf->map != NULL) { 4513 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4514 BUS_DMASYNC_POSTREAD); 4515 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4516 bus_dmamap_destroy(rxr->rxtag, rxbuf->map); 4517 } 4518 if (rxbuf->m_head != NULL) { 4519 m_freem(rxbuf->m_head); 4520 rxbuf->m_head = NULL; 4521 } 4522 } 4523 free(rxr->rx_buffers, M_DEVBUF); 4524 rxr->rx_buffers = NULL; 4525 rxr->next_to_check = 0; 4526 rxr->next_to_refresh = 0; 4527 } 4528 4529 if (rxr->rxtag != NULL) { 4530 bus_dma_tag_destroy(rxr->rxtag); 4531 rxr->rxtag = NULL; 4532 } 4533 4534 return; 4535 } 4536 4537 4538 /********************************************************************* 4539 * 4540 * Enable receive unit. 4541 * 4542 **********************************************************************/ 4543 4544 static void 4545 em_initialize_receive_unit(struct adapter *adapter) 4546 { 4547 struct rx_ring *rxr = adapter->rx_rings; 4548 if_t ifp = adapter->ifp; 4549 struct e1000_hw *hw = &adapter->hw; 4550 u32 rctl, rxcsum, rfctl; 4551 4552 INIT_DEBUGOUT("em_initialize_receive_units: begin"); 4553 4554 /* 4555 * Make sure receives are disabled while setting 4556 * up the descriptor ring 4557 */ 4558 rctl = E1000_READ_REG(hw, E1000_RCTL); 4559 /* Do not disable if ever enabled on this hardware */ 4560 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) 4561 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 4562 4563 /* Setup the Receive Control Register */ 4564 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 4565 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | 4566 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | 4567 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 4568 4569 /* Do not store bad packets */ 4570 rctl &= ~E1000_RCTL_SBP; 4571 4572 /* Enable Long Packet receive */ 4573 if (if_getmtu(ifp) > ETHERMTU) 4574 rctl |= E1000_RCTL_LPE; 4575 else 4576 rctl &= ~E1000_RCTL_LPE; 4577 4578 /* Strip the CRC */ 4579 if (!em_disable_crc_stripping) 4580 rctl |= E1000_RCTL_SECRC; 4581 4582 E1000_WRITE_REG(&adapter->hw, E1000_RADV, 4583 adapter->rx_abs_int_delay.value); 4584 4585 E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 4586 adapter->rx_int_delay.value); 4587 /* 4588 * Set the interrupt throttling rate. Value is calculated 4589 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) 4590 */ 4591 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); 4592 4593 /* Use extended rx descriptor formats */ 4594 rfctl = E1000_READ_REG(hw, E1000_RFCTL); 4595 rfctl |= E1000_RFCTL_EXTEN; 4596 /* 4597 ** When using MSIX interrupts we need to throttle 4598 ** using the EITR register (82574 only) 4599 */ 4600 if (hw->mac.type == e1000_82574) { 4601 for (int i = 0; i < 4; i++) 4602 E1000_WRITE_REG(hw, E1000_EITR_82574(i), 4603 DEFAULT_ITR); 4604 /* Disable accelerated acknowledge */ 4605 rfctl |= E1000_RFCTL_ACK_DIS; 4606 } 4607 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); 4608 4609 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); 4610 if (if_getcapenable(ifp) & IFCAP_RXCSUM) { 4611 #ifdef EM_MULTIQUEUE 4612 rxcsum |= E1000_RXCSUM_TUOFL | 4613 E1000_RXCSUM_IPOFL | 4614 E1000_RXCSUM_PCSD; 4615 #else 4616 rxcsum |= E1000_RXCSUM_TUOFL; 4617 #endif 4618 } else 4619 rxcsum &= ~E1000_RXCSUM_TUOFL; 4620 4621 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); 4622 4623 #ifdef EM_MULTIQUEUE 4624 #define RSSKEYLEN 10 4625 if (adapter->num_queues > 1) { 4626 uint8_t rss_key[4 * RSSKEYLEN]; 4627 uint32_t reta = 0; 4628 int i; 4629 4630 /* 4631 * Configure RSS key 4632 */ 4633 arc4rand(rss_key, sizeof(rss_key), 0); 4634 for (i = 0; i < RSSKEYLEN; ++i) { 4635 uint32_t rssrk = 0; 4636 4637 rssrk = EM_RSSRK_VAL(rss_key, i); 4638 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); 4639 } 4640 4641 /* 4642 * Configure RSS redirect table in following fashion: 4643 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 4644 */ 4645 for (i = 0; i < sizeof(reta); ++i) { 4646 uint32_t q; 4647 4648 q = (i % adapter->num_queues) << 7; 4649 reta |= q << (8 * i); 4650 } 4651 4652 for (i = 0; i < 32; ++i) { 4653 E1000_WRITE_REG(hw, E1000_RETA(i), reta); 4654 } 4655 4656 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 4657 E1000_MRQC_RSS_FIELD_IPV4_TCP | 4658 E1000_MRQC_RSS_FIELD_IPV4 | 4659 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | 4660 E1000_MRQC_RSS_FIELD_IPV6_EX | 4661 E1000_MRQC_RSS_FIELD_IPV6); 4662 } 4663 #endif 4664 /* 4665 ** XXX TEMPORARY WORKAROUND: on some systems with 82573 4666 ** long latencies are observed, like Lenovo X60. This 4667 ** change eliminates the problem, but since having positive 4668 ** values in RDTR is a known source of problems on other 4669 ** platforms another solution is being sought. 4670 */ 4671 if (hw->mac.type == e1000_82573) 4672 E1000_WRITE_REG(hw, E1000_RDTR, 0x20); 4673 4674 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 4675 /* Setup the Base and Length of the Rx Descriptor Ring */ 4676 u64 bus_addr = rxr->rxdma.dma_paddr; 4677 u32 rdt = adapter->num_rx_desc - 1; /* default */ 4678 4679 E1000_WRITE_REG(hw, E1000_RDLEN(i), 4680 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended)); 4681 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); 4682 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); 4683 /* Setup the Head and Tail Descriptor Pointers */ 4684 E1000_WRITE_REG(hw, E1000_RDH(i), 0); 4685 #ifdef DEV_NETMAP 4686 /* 4687 * an init() while a netmap client is active must 4688 * preserve the rx buffers passed to userspace. 4689 */ 4690 if (if_getcapenable(ifp) & IFCAP_NETMAP) { 4691 struct netmap_adapter *na = netmap_getna(adapter->ifp); 4692 rdt -= nm_kr_rxspace(&na->rx_rings[i]); 4693 } 4694 #endif /* DEV_NETMAP */ 4695 E1000_WRITE_REG(hw, E1000_RDT(i), rdt); 4696 } 4697 4698 /* 4699 * Set PTHRESH for improved jumbo performance 4700 * According to 10.2.5.11 of Intel 82574 Datasheet, 4701 * RXDCTL(1) is written whenever RXDCTL(0) is written. 4702 * Only write to RXDCTL(1) if there is a need for different 4703 * settings. 4704 */ 4705 if (((adapter->hw.mac.type == e1000_ich9lan) || 4706 (adapter->hw.mac.type == e1000_pch2lan) || 4707 (adapter->hw.mac.type == e1000_ich10lan)) && 4708 (if_getmtu(ifp) > ETHERMTU)) { 4709 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); 4710 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); 4711 } else if (adapter->hw.mac.type == e1000_82574) { 4712 for (int i = 0; i < adapter->num_queues; i++) { 4713 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 4714 4715 rxdctl |= 0x20; /* PTHRESH */ 4716 rxdctl |= 4 << 8; /* HTHRESH */ 4717 rxdctl |= 4 << 16;/* WTHRESH */ 4718 rxdctl |= 1 << 24; /* Switch to granularity */ 4719 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 4720 } 4721 } 4722 4723 if (adapter->hw.mac.type >= e1000_pch2lan) { 4724 if (if_getmtu(ifp) > ETHERMTU) 4725 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); 4726 else 4727 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); 4728 } 4729 4730 /* Make sure VLAN Filters are off */ 4731 rctl &= ~E1000_RCTL_VFE; 4732 4733 if (adapter->rx_mbuf_sz == MCLBYTES) 4734 rctl |= E1000_RCTL_SZ_2048; 4735 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) 4736 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 4737 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) 4738 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 4739 4740 /* ensure we clear use DTYPE of 00 here */ 4741 rctl &= ~0x00000C00; 4742 /* Write out the settings */ 4743 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 4744 4745 return; 4746 } 4747 4748 4749 /********************************************************************* 4750 * 4751 * This routine executes in interrupt context. It replenishes 4752 * the mbufs in the descriptor and sends data which has been 4753 * dma'ed into host memory to upper layer. 4754 * 4755 * We loop at most count times if count is > 0, or until done if 4756 * count < 0. 4757 * 4758 * For polling we also now return the number of cleaned packets 4759 *********************************************************************/ 4760 static bool 4761 em_rxeof(struct rx_ring *rxr, int count, int *done) 4762 { 4763 struct adapter *adapter = rxr->adapter; 4764 if_t ifp = adapter->ifp; 4765 struct mbuf *mp, *sendmp; 4766 u32 status = 0; 4767 u16 len; 4768 int i, processed, rxdone = 0; 4769 bool eop; 4770 union e1000_rx_desc_extended *cur; 4771 4772 EM_RX_LOCK(rxr); 4773 4774 /* Sync the ring */ 4775 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4776 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4777 4778 4779 #ifdef DEV_NETMAP 4780 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 4781 EM_RX_UNLOCK(rxr); 4782 return (FALSE); 4783 } 4784 #endif /* DEV_NETMAP */ 4785 4786 for (i = rxr->next_to_check, processed = 0; count != 0;) { 4787 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) 4788 break; 4789 4790 cur = &rxr->rx_base[i]; 4791 status = le32toh(cur->wb.upper.status_error); 4792 mp = sendmp = NULL; 4793 4794 if ((status & E1000_RXD_STAT_DD) == 0) 4795 break; 4796 4797 len = le16toh(cur->wb.upper.length); 4798 eop = (status & E1000_RXD_STAT_EOP) != 0; 4799 4800 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) || 4801 (rxr->discard == TRUE)) { 4802 adapter->dropped_pkts++; 4803 ++rxr->rx_discarded; 4804 if (!eop) /* Catch subsequent segs */ 4805 rxr->discard = TRUE; 4806 else 4807 rxr->discard = FALSE; 4808 em_rx_discard(rxr, i); 4809 goto next_desc; 4810 } 4811 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map); 4812 4813 /* Assign correct length to the current fragment */ 4814 mp = rxr->rx_buffers[i].m_head; 4815 mp->m_len = len; 4816 4817 /* Trigger for refresh */ 4818 rxr->rx_buffers[i].m_head = NULL; 4819 4820 /* First segment? */ 4821 if (rxr->fmp == NULL) { 4822 mp->m_pkthdr.len = len; 4823 rxr->fmp = rxr->lmp = mp; 4824 } else { 4825 /* Chain mbuf's together */ 4826 mp->m_flags &= ~M_PKTHDR; 4827 rxr->lmp->m_next = mp; 4828 rxr->lmp = mp; 4829 rxr->fmp->m_pkthdr.len += len; 4830 } 4831 4832 if (eop) { 4833 --count; 4834 sendmp = rxr->fmp; 4835 if_setrcvif(sendmp, ifp); 4836 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 4837 em_receive_checksum(status, sendmp); 4838 #ifndef __NO_STRICT_ALIGNMENT 4839 if (adapter->hw.mac.max_frame_size > 4840 (MCLBYTES - ETHER_ALIGN) && 4841 em_fixup_rx(rxr) != 0) 4842 goto skip; 4843 #endif 4844 if (status & E1000_RXD_STAT_VP) { 4845 if_setvtag(sendmp, 4846 le16toh(cur->wb.upper.vlan)); 4847 sendmp->m_flags |= M_VLANTAG; 4848 } 4849 #ifndef __NO_STRICT_ALIGNMENT 4850 skip: 4851 #endif 4852 rxr->fmp = rxr->lmp = NULL; 4853 } 4854 next_desc: 4855 /* Sync the ring */ 4856 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4857 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4858 4859 /* Zero out the receive descriptors status. */ 4860 cur->wb.upper.status_error &= htole32(~0xFF); 4861 ++rxdone; /* cumulative for POLL */ 4862 ++processed; 4863 4864 /* Advance our pointers to the next descriptor. */ 4865 if (++i == adapter->num_rx_desc) 4866 i = 0; 4867 4868 /* Send to the stack */ 4869 if (sendmp != NULL) { 4870 rxr->next_to_check = i; 4871 EM_RX_UNLOCK(rxr); 4872 if_input(ifp, sendmp); 4873 EM_RX_LOCK(rxr); 4874 i = rxr->next_to_check; 4875 } 4876 4877 /* Only refresh mbufs every 8 descriptors */ 4878 if (processed == 8) { 4879 em_refresh_mbufs(rxr, i); 4880 processed = 0; 4881 } 4882 } 4883 4884 /* Catch any remaining refresh work */ 4885 if (e1000_rx_unrefreshed(rxr)) 4886 em_refresh_mbufs(rxr, i); 4887 4888 rxr->next_to_check = i; 4889 if (done != NULL) 4890 *done = rxdone; 4891 EM_RX_UNLOCK(rxr); 4892 4893 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); 4894 } 4895 4896 static __inline void 4897 em_rx_discard(struct rx_ring *rxr, int i) 4898 { 4899 struct em_rxbuffer *rbuf; 4900 4901 rbuf = &rxr->rx_buffers[i]; 4902 bus_dmamap_unload(rxr->rxtag, rbuf->map); 4903 4904 /* Free any previous pieces */ 4905 if (rxr->fmp != NULL) { 4906 rxr->fmp->m_flags |= M_PKTHDR; 4907 m_freem(rxr->fmp); 4908 rxr->fmp = NULL; 4909 rxr->lmp = NULL; 4910 } 4911 /* 4912 ** Free buffer and allow em_refresh_mbufs() 4913 ** to clean up and recharge buffer. 4914 */ 4915 if (rbuf->m_head) { 4916 m_free(rbuf->m_head); 4917 rbuf->m_head = NULL; 4918 } 4919 return; 4920 } 4921 4922 #ifndef __NO_STRICT_ALIGNMENT 4923 /* 4924 * When jumbo frames are enabled we should realign entire payload on 4925 * architecures with strict alignment. This is serious design mistake of 8254x 4926 * as it nullifies DMA operations. 8254x just allows RX buffer size to be 4927 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its 4928 * payload. On architecures without strict alignment restrictions 8254x still 4929 * performs unaligned memory access which would reduce the performance too. 4930 * To avoid copying over an entire frame to align, we allocate a new mbuf and 4931 * copy ethernet header to the new mbuf. The new mbuf is prepended into the 4932 * existing mbuf chain. 4933 * 4934 * Be aware, best performance of the 8254x is achived only when jumbo frame is 4935 * not used at all on architectures with strict alignment. 4936 */ 4937 static int 4938 em_fixup_rx(struct rx_ring *rxr) 4939 { 4940 struct adapter *adapter = rxr->adapter; 4941 struct mbuf *m, *n; 4942 int error; 4943 4944 error = 0; 4945 m = rxr->fmp; 4946 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { 4947 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); 4948 m->m_data += ETHER_HDR_LEN; 4949 } else { 4950 MGETHDR(n, M_NOWAIT, MT_DATA); 4951 if (n != NULL) { 4952 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); 4953 m->m_data += ETHER_HDR_LEN; 4954 m->m_len -= ETHER_HDR_LEN; 4955 n->m_len = ETHER_HDR_LEN; 4956 M_MOVE_PKTHDR(n, m); 4957 n->m_next = m; 4958 rxr->fmp = n; 4959 } else { 4960 adapter->dropped_pkts++; 4961 m_freem(rxr->fmp); 4962 rxr->fmp = NULL; 4963 error = ENOMEM; 4964 } 4965 } 4966 4967 return (error); 4968 } 4969 #endif 4970 4971 static void 4972 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf) 4973 { 4974 rxd->read.buffer_addr = htole64(rxbuf->paddr); 4975 /* DD bits must be cleared */ 4976 rxd->wb.upper.status_error= 0; 4977 } 4978 4979 /********************************************************************* 4980 * 4981 * Verify that the hardware indicated that the checksum is valid. 4982 * Inform the stack about the status of checksum so that stack 4983 * doesn't spend time verifying the checksum. 4984 * 4985 *********************************************************************/ 4986 static void 4987 em_receive_checksum(uint32_t status, struct mbuf *mp) 4988 { 4989 mp->m_pkthdr.csum_flags = 0; 4990 4991 /* Ignore Checksum bit is set */ 4992 if (status & E1000_RXD_STAT_IXSM) 4993 return; 4994 4995 /* If the IP checksum exists and there is no IP Checksum error */ 4996 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == 4997 E1000_RXD_STAT_IPCS) { 4998 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); 4999 } 5000 5001 /* TCP or UDP checksum */ 5002 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == 5003 E1000_RXD_STAT_TCPCS) { 5004 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 5005 mp->m_pkthdr.csum_data = htons(0xffff); 5006 } 5007 if (status & E1000_RXD_STAT_UDPCS) { 5008 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 5009 mp->m_pkthdr.csum_data = htons(0xffff); 5010 } 5011 } 5012 5013 /* 5014 * This routine is run via an vlan 5015 * config EVENT 5016 */ 5017 static void 5018 em_register_vlan(void *arg, if_t ifp, u16 vtag) 5019 { 5020 struct adapter *adapter = if_getsoftc(ifp); 5021 u32 index, bit; 5022 5023 if ((void*)adapter != arg) /* Not our event */ 5024 return; 5025 5026 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ 5027 return; 5028 5029 EM_CORE_LOCK(adapter); 5030 index = (vtag >> 5) & 0x7F; 5031 bit = vtag & 0x1F; 5032 adapter->shadow_vfta[index] |= (1 << bit); 5033 ++adapter->num_vlans; 5034 /* Re-init to load the changes */ 5035 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 5036 em_init_locked(adapter); 5037 EM_CORE_UNLOCK(adapter); 5038 } 5039 5040 /* 5041 * This routine is run via an vlan 5042 * unconfig EVENT 5043 */ 5044 static void 5045 em_unregister_vlan(void *arg, if_t ifp, u16 vtag) 5046 { 5047 struct adapter *adapter = if_getsoftc(ifp); 5048 u32 index, bit; 5049 5050 if (adapter != arg) 5051 return; 5052 5053 if ((vtag == 0) || (vtag > 4095)) /* Invalid */ 5054 return; 5055 5056 EM_CORE_LOCK(adapter); 5057 index = (vtag >> 5) & 0x7F; 5058 bit = vtag & 0x1F; 5059 adapter->shadow_vfta[index] &= ~(1 << bit); 5060 --adapter->num_vlans; 5061 /* Re-init to load the changes */ 5062 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 5063 em_init_locked(adapter); 5064 EM_CORE_UNLOCK(adapter); 5065 } 5066 5067 static void 5068 em_setup_vlan_hw_support(struct adapter *adapter) 5069 { 5070 struct e1000_hw *hw = &adapter->hw; 5071 u32 reg; 5072 5073 /* 5074 ** We get here thru init_locked, meaning 5075 ** a soft reset, this has already cleared 5076 ** the VFTA and other state, so if there 5077 ** have been no vlan's registered do nothing. 5078 */ 5079 if (adapter->num_vlans == 0) 5080 return; 5081 5082 /* 5083 ** A soft reset zero's out the VFTA, so 5084 ** we need to repopulate it now. 5085 */ 5086 for (int i = 0; i < EM_VFTA_SIZE; i++) 5087 if (adapter->shadow_vfta[i] != 0) 5088 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, 5089 i, adapter->shadow_vfta[i]); 5090 5091 reg = E1000_READ_REG(hw, E1000_CTRL); 5092 reg |= E1000_CTRL_VME; 5093 E1000_WRITE_REG(hw, E1000_CTRL, reg); 5094 5095 /* Enable the Filter Table */ 5096 reg = E1000_READ_REG(hw, E1000_RCTL); 5097 reg &= ~E1000_RCTL_CFIEN; 5098 reg |= E1000_RCTL_VFE; 5099 E1000_WRITE_REG(hw, E1000_RCTL, reg); 5100 } 5101 5102 static void 5103 em_enable_intr(struct adapter *adapter) 5104 { 5105 struct e1000_hw *hw = &adapter->hw; 5106 u32 ims_mask = IMS_ENABLE_MASK; 5107 5108 if (hw->mac.type == e1000_82574) { 5109 E1000_WRITE_REG(hw, EM_EIAC, adapter->ims); 5110 ims_mask |= adapter->ims; 5111 } 5112 E1000_WRITE_REG(hw, E1000_IMS, ims_mask); 5113 } 5114 5115 static void 5116 em_disable_intr(struct adapter *adapter) 5117 { 5118 struct e1000_hw *hw = &adapter->hw; 5119 5120 if (hw->mac.type == e1000_82574) 5121 E1000_WRITE_REG(hw, EM_EIAC, 0); 5122 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 5123 } 5124 5125 /* 5126 * Bit of a misnomer, what this really means is 5127 * to enable OS management of the system... aka 5128 * to disable special hardware management features 5129 */ 5130 static void 5131 em_init_manageability(struct adapter *adapter) 5132 { 5133 /* A shared code workaround */ 5134 #define E1000_82542_MANC2H E1000_MANC2H 5135 if (adapter->has_manage) { 5136 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); 5137 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); 5138 5139 /* disable hardware interception of ARP */ 5140 manc &= ~(E1000_MANC_ARP_EN); 5141 5142 /* enable receiving management packets to the host */ 5143 manc |= E1000_MANC_EN_MNG2HOST; 5144 #define E1000_MNG2HOST_PORT_623 (1 << 5) 5145 #define E1000_MNG2HOST_PORT_664 (1 << 6) 5146 manc2h |= E1000_MNG2HOST_PORT_623; 5147 manc2h |= E1000_MNG2HOST_PORT_664; 5148 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); 5149 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); 5150 } 5151 } 5152 5153 /* 5154 * Give control back to hardware management 5155 * controller if there is one. 5156 */ 5157 static void 5158 em_release_manageability(struct adapter *adapter) 5159 { 5160 if (adapter->has_manage) { 5161 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); 5162 5163 /* re-enable hardware interception of ARP */ 5164 manc |= E1000_MANC_ARP_EN; 5165 manc &= ~E1000_MANC_EN_MNG2HOST; 5166 5167 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); 5168 } 5169 } 5170 5171 /* 5172 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. 5173 * For ASF and Pass Through versions of f/w this means 5174 * that the driver is loaded. For AMT version type f/w 5175 * this means that the network i/f is open. 5176 */ 5177 static void 5178 em_get_hw_control(struct adapter *adapter) 5179 { 5180 u32 ctrl_ext, swsm; 5181 5182 if (adapter->hw.mac.type == e1000_82573) { 5183 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); 5184 E1000_WRITE_REG(&adapter->hw, E1000_SWSM, 5185 swsm | E1000_SWSM_DRV_LOAD); 5186 return; 5187 } 5188 /* else */ 5189 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5190 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, 5191 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 5192 return; 5193 } 5194 5195 /* 5196 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. 5197 * For ASF and Pass Through versions of f/w this means that 5198 * the driver is no longer loaded. For AMT versions of the 5199 * f/w this means that the network i/f is closed. 5200 */ 5201 static void 5202 em_release_hw_control(struct adapter *adapter) 5203 { 5204 u32 ctrl_ext, swsm; 5205 5206 if (!adapter->has_manage) 5207 return; 5208 5209 if (adapter->hw.mac.type == e1000_82573) { 5210 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); 5211 E1000_WRITE_REG(&adapter->hw, E1000_SWSM, 5212 swsm & ~E1000_SWSM_DRV_LOAD); 5213 return; 5214 } 5215 /* else */ 5216 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5217 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, 5218 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 5219 return; 5220 } 5221 5222 static int 5223 em_is_valid_ether_addr(u8 *addr) 5224 { 5225 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; 5226 5227 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { 5228 return (FALSE); 5229 } 5230 5231 return (TRUE); 5232 } 5233 5234 /* 5235 ** Parse the interface capabilities with regard 5236 ** to both system management and wake-on-lan for 5237 ** later use. 5238 */ 5239 static void 5240 em_get_wakeup(device_t dev) 5241 { 5242 struct adapter *adapter = device_get_softc(dev); 5243 u16 eeprom_data = 0, device_id, apme_mask; 5244 5245 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); 5246 apme_mask = EM_EEPROM_APME; 5247 5248 switch (adapter->hw.mac.type) { 5249 case e1000_82573: 5250 case e1000_82583: 5251 adapter->has_amt = TRUE; 5252 /* Falls thru */ 5253 case e1000_82571: 5254 case e1000_82572: 5255 case e1000_80003es2lan: 5256 if (adapter->hw.bus.func == 1) { 5257 e1000_read_nvm(&adapter->hw, 5258 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); 5259 break; 5260 } else 5261 e1000_read_nvm(&adapter->hw, 5262 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 5263 break; 5264 case e1000_ich8lan: 5265 case e1000_ich9lan: 5266 case e1000_ich10lan: 5267 case e1000_pchlan: 5268 case e1000_pch2lan: 5269 apme_mask = E1000_WUC_APME; 5270 adapter->has_amt = TRUE; 5271 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); 5272 break; 5273 default: 5274 e1000_read_nvm(&adapter->hw, 5275 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 5276 break; 5277 } 5278 if (eeprom_data & apme_mask) 5279 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); 5280 /* 5281 * We have the eeprom settings, now apply the special cases 5282 * where the eeprom may be wrong or the board won't support 5283 * wake on lan on a particular port 5284 */ 5285 device_id = pci_get_device(dev); 5286 switch (device_id) { 5287 case E1000_DEV_ID_82571EB_FIBER: 5288 /* Wake events only supported on port A for dual fiber 5289 * regardless of eeprom setting */ 5290 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & 5291 E1000_STATUS_FUNC_1) 5292 adapter->wol = 0; 5293 break; 5294 case E1000_DEV_ID_82571EB_QUAD_COPPER: 5295 case E1000_DEV_ID_82571EB_QUAD_FIBER: 5296 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: 5297 /* if quad port adapter, disable WoL on all but port A */ 5298 if (global_quad_port_a != 0) 5299 adapter->wol = 0; 5300 /* Reset for multiple quad port adapters */ 5301 if (++global_quad_port_a == 4) 5302 global_quad_port_a = 0; 5303 break; 5304 } 5305 return; 5306 } 5307 5308 5309 /* 5310 * Enable PCI Wake On Lan capability 5311 */ 5312 static void 5313 em_enable_wakeup(device_t dev) 5314 { 5315 struct adapter *adapter = device_get_softc(dev); 5316 if_t ifp = adapter->ifp; 5317 u32 pmc, ctrl, ctrl_ext, rctl; 5318 u16 status; 5319 5320 if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) 5321 return; 5322 5323 /* Advertise the wakeup capability */ 5324 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); 5325 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); 5326 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); 5327 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); 5328 5329 if ((adapter->hw.mac.type == e1000_ich8lan) || 5330 (adapter->hw.mac.type == e1000_pchlan) || 5331 (adapter->hw.mac.type == e1000_ich9lan) || 5332 (adapter->hw.mac.type == e1000_ich10lan)) 5333 e1000_suspend_workarounds_ich8lan(&adapter->hw); 5334 5335 /* Keep the laser running on Fiber adapters */ 5336 if (adapter->hw.phy.media_type == e1000_media_type_fiber || 5337 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { 5338 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5339 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; 5340 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); 5341 } 5342 5343 /* 5344 ** Determine type of Wakeup: note that wol 5345 ** is set with all bits on by default. 5346 */ 5347 if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) 5348 adapter->wol &= ~E1000_WUFC_MAG; 5349 5350 if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) 5351 adapter->wol &= ~E1000_WUFC_MC; 5352 else { 5353 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 5354 rctl |= E1000_RCTL_MPE; 5355 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); 5356 } 5357 5358 if ((adapter->hw.mac.type == e1000_pchlan) || 5359 (adapter->hw.mac.type == e1000_pch2lan)) { 5360 if (em_enable_phy_wakeup(adapter)) 5361 return; 5362 } else { 5363 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); 5364 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); 5365 } 5366 5367 if (adapter->hw.phy.type == e1000_phy_igp_3) 5368 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); 5369 5370 /* Request PME */ 5371 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); 5372 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); 5373 if (if_getcapenable(ifp) & IFCAP_WOL) 5374 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 5375 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); 5376 5377 return; 5378 } 5379 5380 /* 5381 ** WOL in the newer chipset interfaces (pchlan) 5382 ** require thing to be copied into the phy 5383 */ 5384 static int 5385 em_enable_phy_wakeup(struct adapter *adapter) 5386 { 5387 struct e1000_hw *hw = &adapter->hw; 5388 u32 mreg, ret = 0; 5389 u16 preg; 5390 5391 /* copy MAC RARs to PHY RARs */ 5392 e1000_copy_rx_addrs_to_phy_ich8lan(hw); 5393 5394 /* copy MAC MTA to PHY MTA */ 5395 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { 5396 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); 5397 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); 5398 e1000_write_phy_reg(hw, BM_MTA(i) + 1, 5399 (u16)((mreg >> 16) & 0xFFFF)); 5400 } 5401 5402 /* configure PHY Rx Control register */ 5403 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); 5404 mreg = E1000_READ_REG(hw, E1000_RCTL); 5405 if (mreg & E1000_RCTL_UPE) 5406 preg |= BM_RCTL_UPE; 5407 if (mreg & E1000_RCTL_MPE) 5408 preg |= BM_RCTL_MPE; 5409 preg &= ~(BM_RCTL_MO_MASK); 5410 if (mreg & E1000_RCTL_MO_3) 5411 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) 5412 << BM_RCTL_MO_SHIFT); 5413 if (mreg & E1000_RCTL_BAM) 5414 preg |= BM_RCTL_BAM; 5415 if (mreg & E1000_RCTL_PMCF) 5416 preg |= BM_RCTL_PMCF; 5417 mreg = E1000_READ_REG(hw, E1000_CTRL); 5418 if (mreg & E1000_CTRL_RFCE) 5419 preg |= BM_RCTL_RFCE; 5420 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); 5421 5422 /* enable PHY wakeup in MAC register */ 5423 E1000_WRITE_REG(hw, E1000_WUC, 5424 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); 5425 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); 5426 5427 /* configure and enable PHY wakeup in PHY registers */ 5428 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); 5429 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); 5430 5431 /* activate PHY wakeup */ 5432 ret = hw->phy.ops.acquire(hw); 5433 if (ret) { 5434 printf("Could not acquire PHY\n"); 5435 return ret; 5436 } 5437 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, 5438 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); 5439 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); 5440 if (ret) { 5441 printf("Could not read PHY page 769\n"); 5442 goto out; 5443 } 5444 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; 5445 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); 5446 if (ret) 5447 printf("Could not set PHY Host Wakeup bit\n"); 5448 out: 5449 hw->phy.ops.release(hw); 5450 5451 return ret; 5452 } 5453 5454 static void 5455 em_led_func(void *arg, int onoff) 5456 { 5457 struct adapter *adapter = arg; 5458 5459 EM_CORE_LOCK(adapter); 5460 if (onoff) { 5461 e1000_setup_led(&adapter->hw); 5462 e1000_led_on(&adapter->hw); 5463 } else { 5464 e1000_led_off(&adapter->hw); 5465 e1000_cleanup_led(&adapter->hw); 5466 } 5467 EM_CORE_UNLOCK(adapter); 5468 } 5469 5470 /* 5471 ** Disable the L0S and L1 LINK states 5472 */ 5473 static void 5474 em_disable_aspm(struct adapter *adapter) 5475 { 5476 int base, reg; 5477 u16 link_cap,link_ctrl; 5478 device_t dev = adapter->dev; 5479 5480 switch (adapter->hw.mac.type) { 5481 case e1000_82573: 5482 case e1000_82574: 5483 case e1000_82583: 5484 break; 5485 default: 5486 return; 5487 } 5488 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) 5489 return; 5490 reg = base + PCIER_LINK_CAP; 5491 link_cap = pci_read_config(dev, reg, 2); 5492 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) 5493 return; 5494 reg = base + PCIER_LINK_CTL; 5495 link_ctrl = pci_read_config(dev, reg, 2); 5496 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; 5497 pci_write_config(dev, reg, link_ctrl, 2); 5498 return; 5499 } 5500 5501 /********************************************************************** 5502 * 5503 * Update the board statistics counters. 5504 * 5505 **********************************************************************/ 5506 static void 5507 em_update_stats_counters(struct adapter *adapter) 5508 { 5509 5510 if(adapter->hw.phy.media_type == e1000_media_type_copper || 5511 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { 5512 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); 5513 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); 5514 } 5515 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); 5516 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); 5517 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); 5518 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); 5519 5520 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); 5521 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); 5522 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); 5523 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); 5524 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); 5525 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); 5526 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); 5527 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); 5528 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); 5529 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); 5530 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); 5531 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); 5532 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); 5533 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); 5534 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); 5535 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); 5536 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); 5537 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); 5538 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); 5539 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); 5540 5541 /* For the 64-bit byte counters the low dword must be read first. */ 5542 /* Both registers clear on the read of the high dword */ 5543 5544 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + 5545 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); 5546 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + 5547 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); 5548 5549 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); 5550 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); 5551 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); 5552 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); 5553 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); 5554 5555 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); 5556 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); 5557 5558 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); 5559 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); 5560 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); 5561 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); 5562 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); 5563 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); 5564 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); 5565 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); 5566 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); 5567 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); 5568 5569 /* Interrupt Counts */ 5570 5571 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); 5572 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); 5573 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); 5574 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); 5575 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); 5576 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); 5577 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); 5578 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); 5579 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); 5580 5581 if (adapter->hw.mac.type >= e1000_82543) { 5582 adapter->stats.algnerrc += 5583 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); 5584 adapter->stats.rxerrc += 5585 E1000_READ_REG(&adapter->hw, E1000_RXERRC); 5586 adapter->stats.tncrs += 5587 E1000_READ_REG(&adapter->hw, E1000_TNCRS); 5588 adapter->stats.cexterr += 5589 E1000_READ_REG(&adapter->hw, E1000_CEXTERR); 5590 adapter->stats.tsctc += 5591 E1000_READ_REG(&adapter->hw, E1000_TSCTC); 5592 adapter->stats.tsctfc += 5593 E1000_READ_REG(&adapter->hw, E1000_TSCTFC); 5594 } 5595 } 5596 5597 static uint64_t 5598 em_get_counter(if_t ifp, ift_counter cnt) 5599 { 5600 struct adapter *adapter; 5601 5602 adapter = if_getsoftc(ifp); 5603 5604 switch (cnt) { 5605 case IFCOUNTER_COLLISIONS: 5606 return (adapter->stats.colc); 5607 case IFCOUNTER_IERRORS: 5608 return (adapter->dropped_pkts + adapter->stats.rxerrc + 5609 adapter->stats.crcerrs + adapter->stats.algnerrc + 5610 adapter->stats.ruc + adapter->stats.roc + 5611 adapter->stats.mpc + adapter->stats.cexterr); 5612 case IFCOUNTER_OERRORS: 5613 return (adapter->stats.ecol + adapter->stats.latecol + 5614 adapter->watchdog_events); 5615 default: 5616 return (if_get_counter_default(ifp, cnt)); 5617 } 5618 } 5619 5620 /* Export a single 32-bit register via a read-only sysctl. */ 5621 static int 5622 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) 5623 { 5624 struct adapter *adapter; 5625 u_int val; 5626 5627 adapter = oidp->oid_arg1; 5628 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); 5629 return (sysctl_handle_int(oidp, &val, 0, req)); 5630 } 5631 5632 /* 5633 * Add sysctl variables, one per statistic, to the system. 5634 */ 5635 static void 5636 em_add_hw_stats(struct adapter *adapter) 5637 { 5638 device_t dev = adapter->dev; 5639 5640 struct tx_ring *txr = adapter->tx_rings; 5641 struct rx_ring *rxr = adapter->rx_rings; 5642 5643 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 5644 struct sysctl_oid *tree = device_get_sysctl_tree(dev); 5645 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 5646 struct e1000_hw_stats *stats = &adapter->stats; 5647 5648 struct sysctl_oid *stat_node, *queue_node, *int_node; 5649 struct sysctl_oid_list *stat_list, *queue_list, *int_list; 5650 5651 #define QUEUE_NAME_LEN 32 5652 char namebuf[QUEUE_NAME_LEN]; 5653 5654 /* Driver Statistics */ 5655 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 5656 CTLFLAG_RD, &adapter->dropped_pkts, 5657 "Driver dropped packets"); 5658 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 5659 CTLFLAG_RD, &adapter->link_irq, 5660 "Link MSIX IRQ Handled"); 5661 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 5662 CTLFLAG_RD, &adapter->mbuf_defrag_failed, 5663 "Defragmenting mbuf chain failed"); 5664 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 5665 CTLFLAG_RD, &adapter->no_tx_dma_setup, 5666 "Driver tx dma failure in xmit"); 5667 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", 5668 CTLFLAG_RD, &adapter->rx_overruns, 5669 "RX overruns"); 5670 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", 5671 CTLFLAG_RD, &adapter->watchdog_events, 5672 "Watchdog timeouts"); 5673 5674 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", 5675 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, 5676 em_sysctl_reg_handler, "IU", 5677 "Device Control Register"); 5678 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", 5679 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, 5680 em_sysctl_reg_handler, "IU", 5681 "Receiver Control Register"); 5682 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", 5683 CTLFLAG_RD, &adapter->hw.fc.high_water, 0, 5684 "Flow Control High Watermark"); 5685 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 5686 CTLFLAG_RD, &adapter->hw.fc.low_water, 0, 5687 "Flow Control Low Watermark"); 5688 5689 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { 5690 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); 5691 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 5692 CTLFLAG_RD, NULL, "TX Queue Name"); 5693 queue_list = SYSCTL_CHILDREN(queue_node); 5694 5695 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 5696 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5697 E1000_TDH(txr->me), 5698 em_sysctl_reg_handler, "IU", 5699 "Transmit Descriptor Head"); 5700 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 5701 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5702 E1000_TDT(txr->me), 5703 em_sysctl_reg_handler, "IU", 5704 "Transmit Descriptor Tail"); 5705 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", 5706 CTLFLAG_RD, &txr->tx_irq, 5707 "Queue MSI-X Transmit Interrupts"); 5708 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 5709 CTLFLAG_RD, &txr->no_desc_avail, 5710 "Queue No Descriptor Available"); 5711 5712 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i); 5713 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 5714 CTLFLAG_RD, NULL, "RX Queue Name"); 5715 queue_list = SYSCTL_CHILDREN(queue_node); 5716 5717 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 5718 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5719 E1000_RDH(rxr->me), 5720 em_sysctl_reg_handler, "IU", 5721 "Receive Descriptor Head"); 5722 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 5723 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5724 E1000_RDT(rxr->me), 5725 em_sysctl_reg_handler, "IU", 5726 "Receive Descriptor Tail"); 5727 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", 5728 CTLFLAG_RD, &rxr->rx_irq, 5729 "Queue MSI-X Receive Interrupts"); 5730 } 5731 5732 /* MAC stats get their own sub node */ 5733 5734 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 5735 CTLFLAG_RD, NULL, "Statistics"); 5736 stat_list = SYSCTL_CHILDREN(stat_node); 5737 5738 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", 5739 CTLFLAG_RD, &stats->ecol, 5740 "Excessive collisions"); 5741 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", 5742 CTLFLAG_RD, &stats->scc, 5743 "Single collisions"); 5744 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 5745 CTLFLAG_RD, &stats->mcc, 5746 "Multiple collisions"); 5747 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", 5748 CTLFLAG_RD, &stats->latecol, 5749 "Late collisions"); 5750 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", 5751 CTLFLAG_RD, &stats->colc, 5752 "Collision Count"); 5753 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", 5754 CTLFLAG_RD, &adapter->stats.symerrs, 5755 "Symbol Errors"); 5756 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", 5757 CTLFLAG_RD, &adapter->stats.sec, 5758 "Sequence Errors"); 5759 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", 5760 CTLFLAG_RD, &adapter->stats.dc, 5761 "Defer Count"); 5762 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", 5763 CTLFLAG_RD, &adapter->stats.mpc, 5764 "Missed Packets"); 5765 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", 5766 CTLFLAG_RD, &adapter->stats.rnbc, 5767 "Receive No Buffers"); 5768 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", 5769 CTLFLAG_RD, &adapter->stats.ruc, 5770 "Receive Undersize"); 5771 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", 5772 CTLFLAG_RD, &adapter->stats.rfc, 5773 "Fragmented Packets Received "); 5774 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", 5775 CTLFLAG_RD, &adapter->stats.roc, 5776 "Oversized Packets Received"); 5777 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", 5778 CTLFLAG_RD, &adapter->stats.rjc, 5779 "Recevied Jabber"); 5780 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", 5781 CTLFLAG_RD, &adapter->stats.rxerrc, 5782 "Receive Errors"); 5783 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", 5784 CTLFLAG_RD, &adapter->stats.crcerrs, 5785 "CRC errors"); 5786 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", 5787 CTLFLAG_RD, &adapter->stats.algnerrc, 5788 "Alignment Errors"); 5789 /* On 82575 these are collision counts */ 5790 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", 5791 CTLFLAG_RD, &adapter->stats.cexterr, 5792 "Collision/Carrier extension errors"); 5793 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", 5794 CTLFLAG_RD, &adapter->stats.xonrxc, 5795 "XON Received"); 5796 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", 5797 CTLFLAG_RD, &adapter->stats.xontxc, 5798 "XON Transmitted"); 5799 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", 5800 CTLFLAG_RD, &adapter->stats.xoffrxc, 5801 "XOFF Received"); 5802 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", 5803 CTLFLAG_RD, &adapter->stats.xofftxc, 5804 "XOFF Transmitted"); 5805 5806 /* Packet Reception Stats */ 5807 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", 5808 CTLFLAG_RD, &adapter->stats.tpr, 5809 "Total Packets Received "); 5810 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", 5811 CTLFLAG_RD, &adapter->stats.gprc, 5812 "Good Packets Received"); 5813 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", 5814 CTLFLAG_RD, &adapter->stats.bprc, 5815 "Broadcast Packets Received"); 5816 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", 5817 CTLFLAG_RD, &adapter->stats.mprc, 5818 "Multicast Packets Received"); 5819 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", 5820 CTLFLAG_RD, &adapter->stats.prc64, 5821 "64 byte frames received "); 5822 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", 5823 CTLFLAG_RD, &adapter->stats.prc127, 5824 "65-127 byte frames received"); 5825 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", 5826 CTLFLAG_RD, &adapter->stats.prc255, 5827 "128-255 byte frames received"); 5828 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", 5829 CTLFLAG_RD, &adapter->stats.prc511, 5830 "256-511 byte frames received"); 5831 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", 5832 CTLFLAG_RD, &adapter->stats.prc1023, 5833 "512-1023 byte frames received"); 5834 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", 5835 CTLFLAG_RD, &adapter->stats.prc1522, 5836 "1023-1522 byte frames received"); 5837 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 5838 CTLFLAG_RD, &adapter->stats.gorc, 5839 "Good Octets Received"); 5840 5841 /* Packet Transmission Stats */ 5842 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 5843 CTLFLAG_RD, &adapter->stats.gotc, 5844 "Good Octets Transmitted"); 5845 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", 5846 CTLFLAG_RD, &adapter->stats.tpt, 5847 "Total Packets Transmitted"); 5848 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", 5849 CTLFLAG_RD, &adapter->stats.gptc, 5850 "Good Packets Transmitted"); 5851 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", 5852 CTLFLAG_RD, &adapter->stats.bptc, 5853 "Broadcast Packets Transmitted"); 5854 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", 5855 CTLFLAG_RD, &adapter->stats.mptc, 5856 "Multicast Packets Transmitted"); 5857 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", 5858 CTLFLAG_RD, &adapter->stats.ptc64, 5859 "64 byte frames transmitted "); 5860 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", 5861 CTLFLAG_RD, &adapter->stats.ptc127, 5862 "65-127 byte frames transmitted"); 5863 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", 5864 CTLFLAG_RD, &adapter->stats.ptc255, 5865 "128-255 byte frames transmitted"); 5866 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", 5867 CTLFLAG_RD, &adapter->stats.ptc511, 5868 "256-511 byte frames transmitted"); 5869 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", 5870 CTLFLAG_RD, &adapter->stats.ptc1023, 5871 "512-1023 byte frames transmitted"); 5872 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", 5873 CTLFLAG_RD, &adapter->stats.ptc1522, 5874 "1024-1522 byte frames transmitted"); 5875 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", 5876 CTLFLAG_RD, &adapter->stats.tsctc, 5877 "TSO Contexts Transmitted"); 5878 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", 5879 CTLFLAG_RD, &adapter->stats.tsctfc, 5880 "TSO Contexts Failed"); 5881 5882 5883 /* Interrupt Stats */ 5884 5885 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 5886 CTLFLAG_RD, NULL, "Interrupt Statistics"); 5887 int_list = SYSCTL_CHILDREN(int_node); 5888 5889 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", 5890 CTLFLAG_RD, &adapter->stats.iac, 5891 "Interrupt Assertion Count"); 5892 5893 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", 5894 CTLFLAG_RD, &adapter->stats.icrxptc, 5895 "Interrupt Cause Rx Pkt Timer Expire Count"); 5896 5897 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", 5898 CTLFLAG_RD, &adapter->stats.icrxatc, 5899 "Interrupt Cause Rx Abs Timer Expire Count"); 5900 5901 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", 5902 CTLFLAG_RD, &adapter->stats.ictxptc, 5903 "Interrupt Cause Tx Pkt Timer Expire Count"); 5904 5905 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", 5906 CTLFLAG_RD, &adapter->stats.ictxatc, 5907 "Interrupt Cause Tx Abs Timer Expire Count"); 5908 5909 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", 5910 CTLFLAG_RD, &adapter->stats.ictxqec, 5911 "Interrupt Cause Tx Queue Empty Count"); 5912 5913 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", 5914 CTLFLAG_RD, &adapter->stats.ictxqmtc, 5915 "Interrupt Cause Tx Queue Min Thresh Count"); 5916 5917 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", 5918 CTLFLAG_RD, &adapter->stats.icrxdmtc, 5919 "Interrupt Cause Rx Desc Min Thresh Count"); 5920 5921 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", 5922 CTLFLAG_RD, &adapter->stats.icrxoc, 5923 "Interrupt Cause Receiver Overrun Count"); 5924 } 5925 5926 /********************************************************************** 5927 * 5928 * This routine provides a way to dump out the adapter eeprom, 5929 * often a useful debug/service tool. This only dumps the first 5930 * 32 words, stuff that matters is in that extent. 5931 * 5932 **********************************************************************/ 5933 static int 5934 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) 5935 { 5936 struct adapter *adapter = (struct adapter *)arg1; 5937 int error; 5938 int result; 5939 5940 result = -1; 5941 error = sysctl_handle_int(oidp, &result, 0, req); 5942 5943 if (error || !req->newptr) 5944 return (error); 5945 5946 /* 5947 * This value will cause a hex dump of the 5948 * first 32 16-bit words of the EEPROM to 5949 * the screen. 5950 */ 5951 if (result == 1) 5952 em_print_nvm_info(adapter); 5953 5954 return (error); 5955 } 5956 5957 static void 5958 em_print_nvm_info(struct adapter *adapter) 5959 { 5960 u16 eeprom_data; 5961 int i, j, row = 0; 5962 5963 /* Its a bit crude, but it gets the job done */ 5964 printf("\nInterface EEPROM Dump:\n"); 5965 printf("Offset\n0x0000 "); 5966 for (i = 0, j = 0; i < 32; i++, j++) { 5967 if (j == 8) { /* Make the offset block */ 5968 j = 0; ++row; 5969 printf("\n0x00%x0 ",row); 5970 } 5971 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); 5972 printf("%04x ", eeprom_data); 5973 } 5974 printf("\n"); 5975 } 5976 5977 static int 5978 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) 5979 { 5980 struct em_int_delay_info *info; 5981 struct adapter *adapter; 5982 u32 regval; 5983 int error, usecs, ticks; 5984 5985 info = (struct em_int_delay_info *)arg1; 5986 usecs = info->value; 5987 error = sysctl_handle_int(oidp, &usecs, 0, req); 5988 if (error != 0 || req->newptr == NULL) 5989 return (error); 5990 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) 5991 return (EINVAL); 5992 info->value = usecs; 5993 ticks = EM_USECS_TO_TICKS(usecs); 5994 if (info->offset == E1000_ITR) /* units are 256ns here */ 5995 ticks *= 4; 5996 5997 adapter = info->adapter; 5998 5999 EM_CORE_LOCK(adapter); 6000 regval = E1000_READ_OFFSET(&adapter->hw, info->offset); 6001 regval = (regval & ~0xffff) | (ticks & 0xffff); 6002 /* Handle a few special cases. */ 6003 switch (info->offset) { 6004 case E1000_RDTR: 6005 break; 6006 case E1000_TIDV: 6007 if (ticks == 0) { 6008 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; 6009 /* Don't write 0 into the TIDV register. */ 6010 regval++; 6011 } else 6012 adapter->txd_cmd |= E1000_TXD_CMD_IDE; 6013 break; 6014 } 6015 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); 6016 EM_CORE_UNLOCK(adapter); 6017 return (0); 6018 } 6019 6020 static void 6021 em_add_int_delay_sysctl(struct adapter *adapter, const char *name, 6022 const char *description, struct em_int_delay_info *info, 6023 int offset, int value) 6024 { 6025 info->adapter = adapter; 6026 info->offset = offset; 6027 info->value = value; 6028 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), 6029 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), 6030 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, 6031 info, 0, em_sysctl_int_delay, "I", description); 6032 } 6033 6034 static void 6035 em_set_sysctl_value(struct adapter *adapter, const char *name, 6036 const char *description, int *limit, int value) 6037 { 6038 *limit = value; 6039 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), 6040 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), 6041 OID_AUTO, name, CTLFLAG_RW, limit, value, description); 6042 } 6043 6044 6045 /* 6046 ** Set flow control using sysctl: 6047 ** Flow control values: 6048 ** 0 - off 6049 ** 1 - rx pause 6050 ** 2 - tx pause 6051 ** 3 - full 6052 */ 6053 static int 6054 em_set_flowcntl(SYSCTL_HANDLER_ARGS) 6055 { 6056 int error; 6057 static int input = 3; /* default is full */ 6058 struct adapter *adapter = (struct adapter *) arg1; 6059 6060 error = sysctl_handle_int(oidp, &input, 0, req); 6061 6062 if ((error) || (req->newptr == NULL)) 6063 return (error); 6064 6065 if (input == adapter->fc) /* no change? */ 6066 return (error); 6067 6068 switch (input) { 6069 case e1000_fc_rx_pause: 6070 case e1000_fc_tx_pause: 6071 case e1000_fc_full: 6072 case e1000_fc_none: 6073 adapter->hw.fc.requested_mode = input; 6074 adapter->fc = input; 6075 break; 6076 default: 6077 /* Do nothing */ 6078 return (error); 6079 } 6080 6081 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; 6082 e1000_force_mac_fc(&adapter->hw); 6083 return (error); 6084 } 6085 6086 /* 6087 ** Manage Energy Efficient Ethernet: 6088 ** Control values: 6089 ** 0/1 - enabled/disabled 6090 */ 6091 static int 6092 em_sysctl_eee(SYSCTL_HANDLER_ARGS) 6093 { 6094 struct adapter *adapter = (struct adapter *) arg1; 6095 int error, value; 6096 6097 value = adapter->hw.dev_spec.ich8lan.eee_disable; 6098 error = sysctl_handle_int(oidp, &value, 0, req); 6099 if (error || req->newptr == NULL) 6100 return (error); 6101 EM_CORE_LOCK(adapter); 6102 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); 6103 em_init_locked(adapter); 6104 EM_CORE_UNLOCK(adapter); 6105 return (0); 6106 } 6107 6108 static int 6109 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) 6110 { 6111 struct adapter *adapter; 6112 int error; 6113 int result; 6114 6115 result = -1; 6116 error = sysctl_handle_int(oidp, &result, 0, req); 6117 6118 if (error || !req->newptr) 6119 return (error); 6120 6121 if (result == 1) { 6122 adapter = (struct adapter *)arg1; 6123 em_print_debug_info(adapter); 6124 } 6125 6126 return (error); 6127 } 6128 6129 /* 6130 ** This routine is meant to be fluid, add whatever is 6131 ** needed for debugging a problem. -jfv 6132 */ 6133 static void 6134 em_print_debug_info(struct adapter *adapter) 6135 { 6136 device_t dev = adapter->dev; 6137 struct tx_ring *txr = adapter->tx_rings; 6138 struct rx_ring *rxr = adapter->rx_rings; 6139 6140 if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) 6141 printf("Interface is RUNNING "); 6142 else 6143 printf("Interface is NOT RUNNING\n"); 6144 6145 if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE) 6146 printf("and INACTIVE\n"); 6147 else 6148 printf("and ACTIVE\n"); 6149 6150 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { 6151 device_printf(dev, "TX Queue %d ------\n", i); 6152 device_printf(dev, "hw tdh = %d, hw tdt = %d\n", 6153 E1000_READ_REG(&adapter->hw, E1000_TDH(i)), 6154 E1000_READ_REG(&adapter->hw, E1000_TDT(i))); 6155 device_printf(dev, "Tx Queue Status = %d\n", txr->busy); 6156 device_printf(dev, "TX descriptors avail = %d\n", 6157 txr->tx_avail); 6158 device_printf(dev, "Tx Descriptors avail failure = %ld\n", 6159 txr->no_desc_avail); 6160 device_printf(dev, "RX Queue %d ------\n", i); 6161 device_printf(dev, "hw rdh = %d, hw rdt = %d\n", 6162 E1000_READ_REG(&adapter->hw, E1000_RDH(i)), 6163 E1000_READ_REG(&adapter->hw, E1000_RDT(i))); 6164 device_printf(dev, "RX discarded packets = %ld\n", 6165 rxr->rx_discarded); 6166 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); 6167 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); 6168 } 6169 } 6170 6171 #ifdef EM_MULTIQUEUE 6172 /* 6173 * 82574 only: 6174 * Write a new value to the EEPROM increasing the number of MSIX 6175 * vectors from 3 to 5, for proper multiqueue support. 6176 */ 6177 static void 6178 em_enable_vectors_82574(struct adapter *adapter) 6179 { 6180 struct e1000_hw *hw = &adapter->hw; 6181 device_t dev = adapter->dev; 6182 u16 edata; 6183 6184 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); 6185 printf("Current cap: %#06x\n", edata); 6186 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { 6187 device_printf(dev, "Writing to eeprom: increasing " 6188 "reported MSIX vectors from 3 to 5...\n"); 6189 edata &= ~(EM_NVM_MSIX_N_MASK); 6190 edata |= 4 << EM_NVM_MSIX_N_SHIFT; 6191 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); 6192 e1000_update_nvm_checksum(hw); 6193 device_printf(dev, "Writing to eeprom: done\n"); 6194 } 6195 } 6196 #endif 6197 6198 #ifdef DDB 6199 DB_COMMAND(em_reset_dev, em_ddb_reset_dev) 6200 { 6201 devclass_t dc; 6202 int max_em; 6203 6204 dc = devclass_find("em"); 6205 max_em = devclass_get_maxunit(dc); 6206 6207 for (int index = 0; index < (max_em - 1); index++) { 6208 device_t dev; 6209 dev = devclass_get_device(dc, index); 6210 if (device_get_driver(dev) == &em_driver) { 6211 struct adapter *adapter = device_get_softc(dev); 6212 EM_CORE_LOCK(adapter); 6213 em_init_locked(adapter); 6214 EM_CORE_UNLOCK(adapter); 6215 } 6216 } 6217 } 6218 DB_COMMAND(em_dump_queue, em_ddb_dump_queue) 6219 { 6220 devclass_t dc; 6221 int max_em; 6222 6223 dc = devclass_find("em"); 6224 max_em = devclass_get_maxunit(dc); 6225 6226 for (int index = 0; index < (max_em - 1); index++) { 6227 device_t dev; 6228 dev = devclass_get_device(dc, index); 6229 if (device_get_driver(dev) == &em_driver) 6230 em_print_debug_info(device_get_softc(dev)); 6231 } 6232 6233 } 6234 #endif 6235