1 /****************************************************************************** 2 3 Copyright (c) 2001-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 #include "opt_em.h" 36 #include "opt_ddb.h" 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 40 #ifdef HAVE_KERNEL_OPTION_HEADERS 41 #include "opt_device_polling.h" 42 #endif 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #ifdef DDB 47 #include <sys/types.h> 48 #include <ddb/ddb.h> 49 #endif 50 #if __FreeBSD_version >= 800000 51 #include <sys/buf_ring.h> 52 #endif 53 #include <sys/bus.h> 54 #include <sys/endian.h> 55 #include <sys/kernel.h> 56 #include <sys/kthread.h> 57 #include <sys/malloc.h> 58 #include <sys/mbuf.h> 59 #include <sys/module.h> 60 #include <sys/rman.h> 61 #include <sys/smp.h> 62 #include <sys/socket.h> 63 #include <sys/sockio.h> 64 #include <sys/sysctl.h> 65 #include <sys/taskqueue.h> 66 #include <sys/eventhandler.h> 67 #include <machine/bus.h> 68 #include <machine/resource.h> 69 70 #include <net/bpf.h> 71 #include <net/ethernet.h> 72 #include <net/if.h> 73 #include <net/if_var.h> 74 #include <net/if_arp.h> 75 #include <net/if_dl.h> 76 #include <net/if_media.h> 77 78 #include <net/if_types.h> 79 #include <net/if_vlan_var.h> 80 81 #include <netinet/in_systm.h> 82 #include <netinet/in.h> 83 #include <netinet/if_ether.h> 84 #include <netinet/ip.h> 85 #include <netinet/ip6.h> 86 #include <netinet/tcp.h> 87 #include <netinet/udp.h> 88 89 #include <machine/in_cksum.h> 90 #include <dev/led/led.h> 91 #include <dev/pci/pcivar.h> 92 #include <dev/pci/pcireg.h> 93 94 #include "e1000_api.h" 95 #include "e1000_82571.h" 96 #include "if_em.h" 97 98 /********************************************************************* 99 * Driver version: 100 *********************************************************************/ 101 char em_driver_version[] = "7.6.1-k"; 102 103 /********************************************************************* 104 * PCI Device ID Table 105 * 106 * Used by probe to select devices to load on 107 * Last field stores an index into e1000_strings 108 * Last entry must be all 0s 109 * 110 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } 111 *********************************************************************/ 112 113 static em_vendor_info_t em_vendor_info_array[] = 114 { 115 /* Intel(R) PRO/1000 Network Connection */ 116 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, 117 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, 118 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, 119 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, 120 PCI_ANY_ID, PCI_ANY_ID, 0}, 121 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, 122 PCI_ANY_ID, PCI_ANY_ID, 0}, 123 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, 124 PCI_ANY_ID, PCI_ANY_ID, 0}, 125 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, 126 PCI_ANY_ID, PCI_ANY_ID, 0}, 127 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, 128 PCI_ANY_ID, PCI_ANY_ID, 0}, 129 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, 130 PCI_ANY_ID, PCI_ANY_ID, 0}, 131 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, 132 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, 133 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, 134 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0}, 135 136 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0}, 137 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 138 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0}, 139 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0}, 140 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, 141 PCI_ANY_ID, PCI_ANY_ID, 0}, 142 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, 143 PCI_ANY_ID, PCI_ANY_ID, 0}, 144 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, 145 PCI_ANY_ID, PCI_ANY_ID, 0}, 146 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, 147 PCI_ANY_ID, PCI_ANY_ID, 0}, 148 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 149 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 150 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, 151 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, 152 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, 153 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, 154 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, 155 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0}, 156 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 157 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, 158 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, 159 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, 160 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 161 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, 162 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, 163 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, 164 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0}, 165 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0}, 166 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0}, 167 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 168 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, 169 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 170 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 171 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, 172 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 173 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 174 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0}, 175 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0}, 176 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0}, 177 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 178 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 179 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 180 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 181 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, 182 PCI_ANY_ID, PCI_ANY_ID, 0}, 183 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, 184 PCI_ANY_ID, PCI_ANY_ID, 0}, 185 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0}, 186 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, 187 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0}, 188 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0}, 189 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, 190 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0}, 191 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, 192 PCI_ANY_ID, PCI_ANY_ID, 0}, 193 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, 194 { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, 195 PCI_ANY_ID, PCI_ANY_ID, 0}, 196 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4, 197 PCI_ANY_ID, PCI_ANY_ID, 0}, 198 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0}, 199 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5, 200 PCI_ANY_ID, PCI_ANY_ID, 0}, 201 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0}, 202 /* required last entry */ 203 { 0, 0, 0, 0, 0} 204 }; 205 206 /********************************************************************* 207 * Table of branding strings for all supported NICs. 208 *********************************************************************/ 209 210 static char *em_strings[] = { 211 "Intel(R) PRO/1000 Network Connection" 212 }; 213 214 /********************************************************************* 215 * Function prototypes 216 *********************************************************************/ 217 static int em_probe(device_t); 218 static int em_attach(device_t); 219 static int em_detach(device_t); 220 static int em_shutdown(device_t); 221 static int em_suspend(device_t); 222 static int em_resume(device_t); 223 #ifdef EM_MULTIQUEUE 224 static int em_mq_start(if_t, struct mbuf *); 225 static int em_mq_start_locked(if_t, 226 struct tx_ring *); 227 static void em_qflush(if_t); 228 #else 229 static void em_start(if_t); 230 static void em_start_locked(if_t, struct tx_ring *); 231 #endif 232 static int em_ioctl(if_t, u_long, caddr_t); 233 static uint64_t em_get_counter(if_t, ift_counter); 234 static void em_init(void *); 235 static void em_init_locked(struct adapter *); 236 static void em_stop(void *); 237 static void em_media_status(if_t, struct ifmediareq *); 238 static int em_media_change(if_t); 239 static void em_identify_hardware(struct adapter *); 240 static int em_allocate_pci_resources(struct adapter *); 241 static int em_allocate_legacy(struct adapter *); 242 static int em_allocate_msix(struct adapter *); 243 static int em_allocate_queues(struct adapter *); 244 static int em_setup_msix(struct adapter *); 245 static void em_free_pci_resources(struct adapter *); 246 static void em_local_timer(void *); 247 static void em_reset(struct adapter *); 248 static int em_setup_interface(device_t, struct adapter *); 249 static void em_flush_desc_rings(struct adapter *); 250 251 static void em_setup_transmit_structures(struct adapter *); 252 static void em_initialize_transmit_unit(struct adapter *); 253 static int em_allocate_transmit_buffers(struct tx_ring *); 254 static void em_free_transmit_structures(struct adapter *); 255 static void em_free_transmit_buffers(struct tx_ring *); 256 257 static int em_setup_receive_structures(struct adapter *); 258 static int em_allocate_receive_buffers(struct rx_ring *); 259 static void em_initialize_receive_unit(struct adapter *); 260 static void em_free_receive_structures(struct adapter *); 261 static void em_free_receive_buffers(struct rx_ring *); 262 263 static void em_enable_intr(struct adapter *); 264 static void em_disable_intr(struct adapter *); 265 static void em_update_stats_counters(struct adapter *); 266 static void em_add_hw_stats(struct adapter *adapter); 267 static void em_txeof(struct tx_ring *); 268 static bool em_rxeof(struct rx_ring *, int, int *); 269 #ifndef __NO_STRICT_ALIGNMENT 270 static int em_fixup_rx(struct rx_ring *); 271 #endif 272 static void em_setup_rxdesc(union e1000_rx_desc_extended *, 273 const struct em_rxbuffer *rxbuf); 274 static void em_receive_checksum(uint32_t status, struct mbuf *); 275 static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, 276 struct ip *, u32 *, u32 *); 277 static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *, 278 struct tcphdr *, u32 *, u32 *); 279 static void em_set_promisc(struct adapter *); 280 static void em_disable_promisc(struct adapter *); 281 static void em_set_multi(struct adapter *); 282 static void em_update_link_status(struct adapter *); 283 static void em_refresh_mbufs(struct rx_ring *, int); 284 static void em_register_vlan(void *, if_t, u16); 285 static void em_unregister_vlan(void *, if_t, u16); 286 static void em_setup_vlan_hw_support(struct adapter *); 287 static int em_xmit(struct tx_ring *, struct mbuf **); 288 static int em_dma_malloc(struct adapter *, bus_size_t, 289 struct em_dma_alloc *, int); 290 static void em_dma_free(struct adapter *, struct em_dma_alloc *); 291 static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); 292 static void em_print_nvm_info(struct adapter *); 293 static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); 294 static void em_print_debug_info(struct adapter *); 295 static int em_is_valid_ether_addr(u8 *); 296 static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); 297 static void em_add_int_delay_sysctl(struct adapter *, const char *, 298 const char *, struct em_int_delay_info *, int, int); 299 /* Management and WOL Support */ 300 static void em_init_manageability(struct adapter *); 301 static void em_release_manageability(struct adapter *); 302 static void em_get_hw_control(struct adapter *); 303 static void em_release_hw_control(struct adapter *); 304 static void em_get_wakeup(device_t); 305 static void em_enable_wakeup(device_t); 306 static int em_enable_phy_wakeup(struct adapter *); 307 static void em_led_func(void *, int); 308 static void em_disable_aspm(struct adapter *); 309 310 static int em_irq_fast(void *); 311 312 /* MSIX handlers */ 313 static void em_msix_tx(void *); 314 static void em_msix_rx(void *); 315 static void em_msix_link(void *); 316 static void em_handle_tx(void *context, int pending); 317 static void em_handle_rx(void *context, int pending); 318 static void em_handle_link(void *context, int pending); 319 320 #ifdef EM_MULTIQUEUE 321 static void em_enable_vectors_82574(struct adapter *); 322 #endif 323 324 static void em_set_sysctl_value(struct adapter *, const char *, 325 const char *, int *, int); 326 static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); 327 static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); 328 329 static __inline void em_rx_discard(struct rx_ring *, int); 330 331 #ifdef DEVICE_POLLING 332 static poll_handler_t em_poll; 333 #endif /* POLLING */ 334 335 /********************************************************************* 336 * FreeBSD Device Interface Entry Points 337 *********************************************************************/ 338 339 static device_method_t em_methods[] = { 340 /* Device interface */ 341 DEVMETHOD(device_probe, em_probe), 342 DEVMETHOD(device_attach, em_attach), 343 DEVMETHOD(device_detach, em_detach), 344 DEVMETHOD(device_shutdown, em_shutdown), 345 DEVMETHOD(device_suspend, em_suspend), 346 DEVMETHOD(device_resume, em_resume), 347 DEVMETHOD_END 348 }; 349 350 static driver_t em_driver = { 351 "em", em_methods, sizeof(struct adapter), 352 }; 353 354 devclass_t em_devclass; 355 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); 356 MODULE_DEPEND(em, pci, 1, 1, 1); 357 MODULE_DEPEND(em, ether, 1, 1, 1); 358 #ifdef DEV_NETMAP 359 MODULE_DEPEND(em, netmap, 1, 1, 1); 360 #endif /* DEV_NETMAP */ 361 362 /********************************************************************* 363 * Tunable default values. 364 *********************************************************************/ 365 366 #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) 367 #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) 368 #define M_TSO_LEN 66 369 370 #define MAX_INTS_PER_SEC 8000 371 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) 372 373 /* Allow common code without TSO */ 374 #ifndef CSUM_TSO 375 #define CSUM_TSO 0 376 #endif 377 378 #define TSO_WORKAROUND 4 379 380 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); 381 382 static int em_disable_crc_stripping = 0; 383 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, 384 &em_disable_crc_stripping, 0, "Disable CRC Stripping"); 385 386 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); 387 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); 388 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 389 0, "Default transmit interrupt delay in usecs"); 390 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 391 0, "Default receive interrupt delay in usecs"); 392 393 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); 394 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); 395 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, 396 &em_tx_abs_int_delay_dflt, 0, 397 "Default transmit interrupt delay limit in usecs"); 398 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, 399 &em_rx_abs_int_delay_dflt, 0, 400 "Default receive interrupt delay limit in usecs"); 401 402 static int em_rxd = EM_DEFAULT_RXD; 403 static int em_txd = EM_DEFAULT_TXD; 404 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0, 405 "Number of receive descriptors per queue"); 406 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0, 407 "Number of transmit descriptors per queue"); 408 409 static int em_smart_pwr_down = FALSE; 410 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 411 0, "Set to true to leave smart power down enabled on newer adapters"); 412 413 /* Controls whether promiscuous also shows bad packets */ 414 static int em_debug_sbp = FALSE; 415 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, 416 "Show bad packets in promiscuous mode"); 417 418 static int em_enable_msix = TRUE; 419 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0, 420 "Enable MSI-X interrupts"); 421 422 #ifdef EM_MULTIQUEUE 423 static int em_num_queues = 1; 424 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0, 425 "82574 only: Number of queues to configure, 0 indicates autoconfigure"); 426 #endif 427 428 /* 429 ** Global variable to store last used CPU when binding queues 430 ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a 431 ** queue is bound to a cpu. 432 */ 433 static int em_last_bind_cpu = -1; 434 435 /* How many packets rxeof tries to clean at a time */ 436 static int em_rx_process_limit = 100; 437 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, 438 &em_rx_process_limit, 0, 439 "Maximum number of received packets to process " 440 "at a time, -1 means unlimited"); 441 442 /* Energy efficient ethernet - default to OFF */ 443 static int eee_setting = 1; 444 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, 445 "Enable Energy Efficient Ethernet"); 446 447 /* Global used in WOL setup with multiport cards */ 448 static int global_quad_port_a = 0; 449 450 #ifdef DEV_NETMAP /* see ixgbe.c for details */ 451 #include <dev/netmap/if_em_netmap.h> 452 #endif /* DEV_NETMAP */ 453 454 /********************************************************************* 455 * Device identification routine 456 * 457 * em_probe determines if the driver should be loaded on 458 * adapter based on PCI vendor/device id of the adapter. 459 * 460 * return BUS_PROBE_DEFAULT on success, positive on failure 461 *********************************************************************/ 462 463 static int 464 em_probe(device_t dev) 465 { 466 char adapter_name[60]; 467 uint16_t pci_vendor_id = 0; 468 uint16_t pci_device_id = 0; 469 uint16_t pci_subvendor_id = 0; 470 uint16_t pci_subdevice_id = 0; 471 em_vendor_info_t *ent; 472 473 INIT_DEBUGOUT("em_probe: begin"); 474 475 pci_vendor_id = pci_get_vendor(dev); 476 if (pci_vendor_id != EM_VENDOR_ID) 477 return (ENXIO); 478 479 pci_device_id = pci_get_device(dev); 480 pci_subvendor_id = pci_get_subvendor(dev); 481 pci_subdevice_id = pci_get_subdevice(dev); 482 483 ent = em_vendor_info_array; 484 while (ent->vendor_id != 0) { 485 if ((pci_vendor_id == ent->vendor_id) && 486 (pci_device_id == ent->device_id) && 487 488 ((pci_subvendor_id == ent->subvendor_id) || 489 (ent->subvendor_id == PCI_ANY_ID)) && 490 491 ((pci_subdevice_id == ent->subdevice_id) || 492 (ent->subdevice_id == PCI_ANY_ID))) { 493 sprintf(adapter_name, "%s %s", 494 em_strings[ent->index], 495 em_driver_version); 496 device_set_desc_copy(dev, adapter_name); 497 return (BUS_PROBE_DEFAULT); 498 } 499 ent++; 500 } 501 502 return (ENXIO); 503 } 504 505 /********************************************************************* 506 * Device initialization routine 507 * 508 * The attach entry point is called when the driver is being loaded. 509 * This routine identifies the type of hardware, allocates all resources 510 * and initializes the hardware. 511 * 512 * return 0 on success, positive on failure 513 *********************************************************************/ 514 515 static int 516 em_attach(device_t dev) 517 { 518 struct adapter *adapter; 519 struct e1000_hw *hw; 520 int error = 0; 521 522 INIT_DEBUGOUT("em_attach: begin"); 523 524 if (resource_disabled("em", device_get_unit(dev))) { 525 device_printf(dev, "Disabled by device hint\n"); 526 return (ENXIO); 527 } 528 529 adapter = device_get_softc(dev); 530 adapter->dev = adapter->osdep.dev = dev; 531 hw = &adapter->hw; 532 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); 533 534 /* SYSCTL stuff */ 535 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 536 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 537 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 538 em_sysctl_nvm_info, "I", "NVM Information"); 539 540 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 541 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 542 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 543 em_sysctl_debug_info, "I", "Debug Information"); 544 545 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 546 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 547 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 548 em_set_flowcntl, "I", "Flow Control"); 549 550 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); 551 552 /* Determine hardware and mac info */ 553 em_identify_hardware(adapter); 554 555 /* Setup PCI resources */ 556 if (em_allocate_pci_resources(adapter)) { 557 device_printf(dev, "Allocation of PCI resources failed\n"); 558 error = ENXIO; 559 goto err_pci; 560 } 561 562 /* 563 ** For ICH8 and family we need to 564 ** map the flash memory, and this 565 ** must happen after the MAC is 566 ** identified 567 */ 568 if ((hw->mac.type == e1000_ich8lan) || 569 (hw->mac.type == e1000_ich9lan) || 570 (hw->mac.type == e1000_ich10lan) || 571 (hw->mac.type == e1000_pchlan) || 572 (hw->mac.type == e1000_pch2lan) || 573 (hw->mac.type == e1000_pch_lpt)) { 574 int rid = EM_BAR_TYPE_FLASH; 575 adapter->flash = bus_alloc_resource_any(dev, 576 SYS_RES_MEMORY, &rid, RF_ACTIVE); 577 if (adapter->flash == NULL) { 578 device_printf(dev, "Mapping of Flash failed\n"); 579 error = ENXIO; 580 goto err_pci; 581 } 582 /* This is used in the shared code */ 583 hw->flash_address = (u8 *)adapter->flash; 584 adapter->osdep.flash_bus_space_tag = 585 rman_get_bustag(adapter->flash); 586 adapter->osdep.flash_bus_space_handle = 587 rman_get_bushandle(adapter->flash); 588 } 589 /* 590 ** In the new SPT device flash is not a 591 ** separate BAR, rather it is also in BAR0, 592 ** so use the same tag and an offset handle for the 593 ** FLASH read/write macros in the shared code. 594 */ 595 else if (hw->mac.type == e1000_pch_spt) { 596 adapter->osdep.flash_bus_space_tag = 597 adapter->osdep.mem_bus_space_tag; 598 adapter->osdep.flash_bus_space_handle = 599 adapter->osdep.mem_bus_space_handle 600 + E1000_FLASH_BASE_ADDR; 601 } 602 603 /* Do Shared Code initialization */ 604 error = e1000_setup_init_funcs(hw, TRUE); 605 if (error) { 606 device_printf(dev, "Setup of Shared code failed, error %d\n", 607 error); 608 error = ENXIO; 609 goto err_pci; 610 } 611 612 /* 613 * Setup MSI/X or MSI if PCI Express 614 */ 615 adapter->msix = em_setup_msix(adapter); 616 617 e1000_get_bus_info(hw); 618 619 /* Set up some sysctls for the tunable interrupt delays */ 620 em_add_int_delay_sysctl(adapter, "rx_int_delay", 621 "receive interrupt delay in usecs", &adapter->rx_int_delay, 622 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); 623 em_add_int_delay_sysctl(adapter, "tx_int_delay", 624 "transmit interrupt delay in usecs", &adapter->tx_int_delay, 625 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); 626 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", 627 "receive interrupt delay limit in usecs", 628 &adapter->rx_abs_int_delay, 629 E1000_REGISTER(hw, E1000_RADV), 630 em_rx_abs_int_delay_dflt); 631 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", 632 "transmit interrupt delay limit in usecs", 633 &adapter->tx_abs_int_delay, 634 E1000_REGISTER(hw, E1000_TADV), 635 em_tx_abs_int_delay_dflt); 636 em_add_int_delay_sysctl(adapter, "itr", 637 "interrupt delay limit in usecs/4", 638 &adapter->tx_itr, 639 E1000_REGISTER(hw, E1000_ITR), 640 DEFAULT_ITR); 641 642 /* Sysctl for limiting the amount of work done in the taskqueue */ 643 em_set_sysctl_value(adapter, "rx_processing_limit", 644 "max number of rx packets to process", &adapter->rx_process_limit, 645 em_rx_process_limit); 646 647 /* 648 * Validate number of transmit and receive descriptors. It 649 * must not exceed hardware maximum, and must be multiple 650 * of E1000_DBA_ALIGN. 651 */ 652 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || 653 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) { 654 device_printf(dev, "Using %d TX descriptors instead of %d!\n", 655 EM_DEFAULT_TXD, em_txd); 656 adapter->num_tx_desc = EM_DEFAULT_TXD; 657 } else 658 adapter->num_tx_desc = em_txd; 659 660 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 || 661 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { 662 device_printf(dev, "Using %d RX descriptors instead of %d!\n", 663 EM_DEFAULT_RXD, em_rxd); 664 adapter->num_rx_desc = EM_DEFAULT_RXD; 665 } else 666 adapter->num_rx_desc = em_rxd; 667 668 hw->mac.autoneg = DO_AUTO_NEG; 669 hw->phy.autoneg_wait_to_complete = FALSE; 670 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 671 672 /* Copper options */ 673 if (hw->phy.media_type == e1000_media_type_copper) { 674 hw->phy.mdix = AUTO_ALL_MODES; 675 hw->phy.disable_polarity_correction = FALSE; 676 hw->phy.ms_type = EM_MASTER_SLAVE; 677 } 678 679 /* 680 * Set the frame limits assuming 681 * standard ethernet sized frames. 682 */ 683 adapter->hw.mac.max_frame_size = 684 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; 685 686 /* 687 * This controls when hardware reports transmit completion 688 * status. 689 */ 690 hw->mac.report_tx_early = 1; 691 692 /* 693 ** Get queue/ring memory 694 */ 695 if (em_allocate_queues(adapter)) { 696 error = ENOMEM; 697 goto err_pci; 698 } 699 700 /* Allocate multicast array memory. */ 701 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * 702 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); 703 if (adapter->mta == NULL) { 704 device_printf(dev, "Can not allocate multicast setup array\n"); 705 error = ENOMEM; 706 goto err_late; 707 } 708 709 /* Check SOL/IDER usage */ 710 if (e1000_check_reset_block(hw)) 711 device_printf(dev, "PHY reset is blocked" 712 " due to SOL/IDER session.\n"); 713 714 /* Sysctl for setting Energy Efficient Ethernet */ 715 hw->dev_spec.ich8lan.eee_disable = eee_setting; 716 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 717 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 718 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, 719 adapter, 0, em_sysctl_eee, "I", 720 "Disable Energy Efficient Ethernet"); 721 722 /* 723 ** Start from a known state, this is 724 ** important in reading the nvm and 725 ** mac from that. 726 */ 727 e1000_reset_hw(hw); 728 729 730 /* Make sure we have a good EEPROM before we read from it */ 731 if (e1000_validate_nvm_checksum(hw) < 0) { 732 /* 733 ** Some PCI-E parts fail the first check due to 734 ** the link being in sleep state, call it again, 735 ** if it fails a second time its a real issue. 736 */ 737 if (e1000_validate_nvm_checksum(hw) < 0) { 738 device_printf(dev, 739 "The EEPROM Checksum Is Not Valid\n"); 740 error = EIO; 741 goto err_late; 742 } 743 } 744 745 /* Copy the permanent MAC address out of the EEPROM */ 746 if (e1000_read_mac_addr(hw) < 0) { 747 device_printf(dev, "EEPROM read error while reading MAC" 748 " address\n"); 749 error = EIO; 750 goto err_late; 751 } 752 753 if (!em_is_valid_ether_addr(hw->mac.addr)) { 754 device_printf(dev, "Invalid MAC address\n"); 755 error = EIO; 756 goto err_late; 757 } 758 759 /* Disable ULP support */ 760 e1000_disable_ulp_lpt_lp(hw, TRUE); 761 762 /* 763 ** Do interrupt configuration 764 */ 765 if (adapter->msix > 1) /* Do MSIX */ 766 error = em_allocate_msix(adapter); 767 else /* MSI or Legacy */ 768 error = em_allocate_legacy(adapter); 769 if (error) 770 goto err_late; 771 772 /* 773 * Get Wake-on-Lan and Management info for later use 774 */ 775 em_get_wakeup(dev); 776 777 /* Setup OS specific network interface */ 778 if (em_setup_interface(dev, adapter) != 0) 779 goto err_late; 780 781 em_reset(adapter); 782 783 /* Initialize statistics */ 784 em_update_stats_counters(adapter); 785 786 hw->mac.get_link_status = 1; 787 em_update_link_status(adapter); 788 789 /* Register for VLAN events */ 790 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 791 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 792 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 793 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 794 795 em_add_hw_stats(adapter); 796 797 /* Non-AMT based hardware can now take control from firmware */ 798 if (adapter->has_manage && !adapter->has_amt) 799 em_get_hw_control(adapter); 800 801 /* Tell the stack that the interface is not active */ 802 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 803 804 adapter->led_dev = led_create(em_led_func, adapter, 805 device_get_nameunit(dev)); 806 #ifdef DEV_NETMAP 807 em_netmap_attach(adapter); 808 #endif /* DEV_NETMAP */ 809 810 INIT_DEBUGOUT("em_attach: end"); 811 812 return (0); 813 814 err_late: 815 em_free_transmit_structures(adapter); 816 em_free_receive_structures(adapter); 817 em_release_hw_control(adapter); 818 if (adapter->ifp != (void *)NULL) 819 if_free(adapter->ifp); 820 err_pci: 821 em_free_pci_resources(adapter); 822 free(adapter->mta, M_DEVBUF); 823 EM_CORE_LOCK_DESTROY(adapter); 824 825 return (error); 826 } 827 828 /********************************************************************* 829 * Device removal routine 830 * 831 * The detach entry point is called when the driver is being removed. 832 * This routine stops the adapter and deallocates all the resources 833 * that were allocated for driver operation. 834 * 835 * return 0 on success, positive on failure 836 *********************************************************************/ 837 838 static int 839 em_detach(device_t dev) 840 { 841 struct adapter *adapter = device_get_softc(dev); 842 if_t ifp = adapter->ifp; 843 844 INIT_DEBUGOUT("em_detach: begin"); 845 846 /* Make sure VLANS are not using driver */ 847 if (if_vlantrunkinuse(ifp)) { 848 device_printf(dev,"Vlan in use, detach first\n"); 849 return (EBUSY); 850 } 851 852 #ifdef DEVICE_POLLING 853 if (if_getcapenable(ifp) & IFCAP_POLLING) 854 ether_poll_deregister(ifp); 855 #endif 856 857 if (adapter->led_dev != NULL) 858 led_destroy(adapter->led_dev); 859 860 EM_CORE_LOCK(adapter); 861 adapter->in_detach = 1; 862 em_stop(adapter); 863 EM_CORE_UNLOCK(adapter); 864 EM_CORE_LOCK_DESTROY(adapter); 865 866 e1000_phy_hw_reset(&adapter->hw); 867 868 em_release_manageability(adapter); 869 em_release_hw_control(adapter); 870 871 /* Unregister VLAN events */ 872 if (adapter->vlan_attach != NULL) 873 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); 874 if (adapter->vlan_detach != NULL) 875 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 876 877 ether_ifdetach(adapter->ifp); 878 callout_drain(&adapter->timer); 879 880 #ifdef DEV_NETMAP 881 netmap_detach(ifp); 882 #endif /* DEV_NETMAP */ 883 884 em_free_pci_resources(adapter); 885 bus_generic_detach(dev); 886 if_free(ifp); 887 888 em_free_transmit_structures(adapter); 889 em_free_receive_structures(adapter); 890 891 em_release_hw_control(adapter); 892 free(adapter->mta, M_DEVBUF); 893 894 return (0); 895 } 896 897 /********************************************************************* 898 * 899 * Shutdown entry point 900 * 901 **********************************************************************/ 902 903 static int 904 em_shutdown(device_t dev) 905 { 906 return em_suspend(dev); 907 } 908 909 /* 910 * Suspend/resume device methods. 911 */ 912 static int 913 em_suspend(device_t dev) 914 { 915 struct adapter *adapter = device_get_softc(dev); 916 917 EM_CORE_LOCK(adapter); 918 919 em_release_manageability(adapter); 920 em_release_hw_control(adapter); 921 em_enable_wakeup(dev); 922 923 EM_CORE_UNLOCK(adapter); 924 925 return bus_generic_suspend(dev); 926 } 927 928 static int 929 em_resume(device_t dev) 930 { 931 struct adapter *adapter = device_get_softc(dev); 932 struct tx_ring *txr = adapter->tx_rings; 933 if_t ifp = adapter->ifp; 934 935 EM_CORE_LOCK(adapter); 936 if (adapter->hw.mac.type == e1000_pch2lan) 937 e1000_resume_workarounds_pchlan(&adapter->hw); 938 em_init_locked(adapter); 939 em_init_manageability(adapter); 940 941 if ((if_getflags(ifp) & IFF_UP) && 942 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) { 943 for (int i = 0; i < adapter->num_queues; i++, txr++) { 944 EM_TX_LOCK(txr); 945 #ifdef EM_MULTIQUEUE 946 if (!drbr_empty(ifp, txr->br)) 947 em_mq_start_locked(ifp, txr); 948 #else 949 if (!if_sendq_empty(ifp)) 950 em_start_locked(ifp, txr); 951 #endif 952 EM_TX_UNLOCK(txr); 953 } 954 } 955 EM_CORE_UNLOCK(adapter); 956 957 return bus_generic_resume(dev); 958 } 959 960 961 #ifndef EM_MULTIQUEUE 962 static void 963 em_start_locked(if_t ifp, struct tx_ring *txr) 964 { 965 struct adapter *adapter = if_getsoftc(ifp); 966 struct mbuf *m_head; 967 968 EM_TX_LOCK_ASSERT(txr); 969 970 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 971 IFF_DRV_RUNNING) 972 return; 973 974 if (!adapter->link_active) 975 return; 976 977 while (!if_sendq_empty(ifp)) { 978 /* Call cleanup if number of TX descriptors low */ 979 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) 980 em_txeof(txr); 981 if (txr->tx_avail < EM_MAX_SCATTER) { 982 if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0); 983 break; 984 } 985 m_head = if_dequeue(ifp); 986 if (m_head == NULL) 987 break; 988 /* 989 * Encapsulation can modify our pointer, and or make it 990 * NULL on failure. In that event, we can't requeue. 991 */ 992 if (em_xmit(txr, &m_head)) { 993 if (m_head == NULL) 994 break; 995 if_sendq_prepend(ifp, m_head); 996 break; 997 } 998 999 /* Mark the queue as having work */ 1000 if (txr->busy == EM_TX_IDLE) 1001 txr->busy = EM_TX_BUSY; 1002 1003 /* Send a copy of the frame to the BPF listener */ 1004 ETHER_BPF_MTAP(ifp, m_head); 1005 1006 } 1007 1008 return; 1009 } 1010 1011 static void 1012 em_start(if_t ifp) 1013 { 1014 struct adapter *adapter = if_getsoftc(ifp); 1015 struct tx_ring *txr = adapter->tx_rings; 1016 1017 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1018 EM_TX_LOCK(txr); 1019 em_start_locked(ifp, txr); 1020 EM_TX_UNLOCK(txr); 1021 } 1022 return; 1023 } 1024 #else /* EM_MULTIQUEUE */ 1025 /********************************************************************* 1026 * Multiqueue Transmit routines 1027 * 1028 * em_mq_start is called by the stack to initiate a transmit. 1029 * however, if busy the driver can queue the request rather 1030 * than do an immediate send. It is this that is an advantage 1031 * in this driver, rather than also having multiple tx queues. 1032 **********************************************************************/ 1033 /* 1034 ** Multiqueue capable stack interface 1035 */ 1036 static int 1037 em_mq_start(if_t ifp, struct mbuf *m) 1038 { 1039 struct adapter *adapter = if_getsoftc(ifp); 1040 struct tx_ring *txr = adapter->tx_rings; 1041 unsigned int i, error; 1042 1043 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 1044 i = m->m_pkthdr.flowid % adapter->num_queues; 1045 else 1046 i = curcpu % adapter->num_queues; 1047 1048 txr = &adapter->tx_rings[i]; 1049 1050 error = drbr_enqueue(ifp, txr->br, m); 1051 if (error) 1052 return (error); 1053 1054 if (EM_TX_TRYLOCK(txr)) { 1055 em_mq_start_locked(ifp, txr); 1056 EM_TX_UNLOCK(txr); 1057 } else 1058 taskqueue_enqueue(txr->tq, &txr->tx_task); 1059 1060 return (0); 1061 } 1062 1063 static int 1064 em_mq_start_locked(if_t ifp, struct tx_ring *txr) 1065 { 1066 struct adapter *adapter = txr->adapter; 1067 struct mbuf *next; 1068 int err = 0, enq = 0; 1069 1070 EM_TX_LOCK_ASSERT(txr); 1071 1072 if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) || 1073 adapter->link_active == 0) { 1074 return (ENETDOWN); 1075 } 1076 1077 /* Process the queue */ 1078 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 1079 if ((err = em_xmit(txr, &next)) != 0) { 1080 if (next == NULL) { 1081 /* It was freed, move forward */ 1082 drbr_advance(ifp, txr->br); 1083 } else { 1084 /* 1085 * Still have one left, it may not be 1086 * the same since the transmit function 1087 * may have changed it. 1088 */ 1089 drbr_putback(ifp, txr->br, next); 1090 } 1091 break; 1092 } 1093 drbr_advance(ifp, txr->br); 1094 enq++; 1095 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); 1096 if (next->m_flags & M_MCAST) 1097 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 1098 ETHER_BPF_MTAP(ifp, next); 1099 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) 1100 break; 1101 } 1102 1103 /* Mark the queue as having work */ 1104 if ((enq > 0) && (txr->busy == EM_TX_IDLE)) 1105 txr->busy = EM_TX_BUSY; 1106 1107 if (txr->tx_avail < EM_MAX_SCATTER) 1108 em_txeof(txr); 1109 if (txr->tx_avail < EM_MAX_SCATTER) { 1110 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0); 1111 } 1112 return (err); 1113 } 1114 1115 /* 1116 ** Flush all ring buffers 1117 */ 1118 static void 1119 em_qflush(if_t ifp) 1120 { 1121 struct adapter *adapter = if_getsoftc(ifp); 1122 struct tx_ring *txr = adapter->tx_rings; 1123 struct mbuf *m; 1124 1125 for (int i = 0; i < adapter->num_queues; i++, txr++) { 1126 EM_TX_LOCK(txr); 1127 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 1128 m_freem(m); 1129 EM_TX_UNLOCK(txr); 1130 } 1131 if_qflush(ifp); 1132 } 1133 #endif /* EM_MULTIQUEUE */ 1134 1135 /********************************************************************* 1136 * Ioctl entry point 1137 * 1138 * em_ioctl is called when the user wants to configure the 1139 * interface. 1140 * 1141 * return 0 on success, positive on failure 1142 **********************************************************************/ 1143 1144 static int 1145 em_ioctl(if_t ifp, u_long command, caddr_t data) 1146 { 1147 struct adapter *adapter = if_getsoftc(ifp); 1148 struct ifreq *ifr = (struct ifreq *)data; 1149 #if defined(INET) || defined(INET6) 1150 struct ifaddr *ifa = (struct ifaddr *)data; 1151 #endif 1152 bool avoid_reset = FALSE; 1153 int error = 0; 1154 1155 if (adapter->in_detach) 1156 return (error); 1157 1158 switch (command) { 1159 case SIOCSIFADDR: 1160 #ifdef INET 1161 if (ifa->ifa_addr->sa_family == AF_INET) 1162 avoid_reset = TRUE; 1163 #endif 1164 #ifdef INET6 1165 if (ifa->ifa_addr->sa_family == AF_INET6) 1166 avoid_reset = TRUE; 1167 #endif 1168 /* 1169 ** Calling init results in link renegotiation, 1170 ** so we avoid doing it when possible. 1171 */ 1172 if (avoid_reset) { 1173 if_setflagbits(ifp,IFF_UP,0); 1174 if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) 1175 em_init(adapter); 1176 #ifdef INET 1177 if (!(if_getflags(ifp) & IFF_NOARP)) 1178 arp_ifinit(ifp, ifa); 1179 #endif 1180 } else 1181 error = ether_ioctl(ifp, command, data); 1182 break; 1183 case SIOCSIFMTU: 1184 { 1185 int max_frame_size; 1186 1187 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); 1188 1189 EM_CORE_LOCK(adapter); 1190 switch (adapter->hw.mac.type) { 1191 case e1000_82571: 1192 case e1000_82572: 1193 case e1000_ich9lan: 1194 case e1000_ich10lan: 1195 case e1000_pch2lan: 1196 case e1000_pch_lpt: 1197 case e1000_pch_spt: 1198 case e1000_82574: 1199 case e1000_82583: 1200 case e1000_80003es2lan: /* 9K Jumbo Frame size */ 1201 max_frame_size = 9234; 1202 break; 1203 case e1000_pchlan: 1204 max_frame_size = 4096; 1205 break; 1206 /* Adapters that do not support jumbo frames */ 1207 case e1000_ich8lan: 1208 max_frame_size = ETHER_MAX_LEN; 1209 break; 1210 default: 1211 max_frame_size = MAX_JUMBO_FRAME_SIZE; 1212 } 1213 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 1214 ETHER_CRC_LEN) { 1215 EM_CORE_UNLOCK(adapter); 1216 error = EINVAL; 1217 break; 1218 } 1219 1220 if_setmtu(ifp, ifr->ifr_mtu); 1221 adapter->hw.mac.max_frame_size = 1222 if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN; 1223 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) 1224 em_init_locked(adapter); 1225 EM_CORE_UNLOCK(adapter); 1226 break; 1227 } 1228 case SIOCSIFFLAGS: 1229 IOCTL_DEBUGOUT("ioctl rcv'd:\ 1230 SIOCSIFFLAGS (Set Interface Flags)"); 1231 EM_CORE_LOCK(adapter); 1232 if (if_getflags(ifp) & IFF_UP) { 1233 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1234 if ((if_getflags(ifp) ^ adapter->if_flags) & 1235 (IFF_PROMISC | IFF_ALLMULTI)) { 1236 em_disable_promisc(adapter); 1237 em_set_promisc(adapter); 1238 } 1239 } else 1240 em_init_locked(adapter); 1241 } else 1242 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) 1243 em_stop(adapter); 1244 adapter->if_flags = if_getflags(ifp); 1245 EM_CORE_UNLOCK(adapter); 1246 break; 1247 case SIOCADDMULTI: 1248 case SIOCDELMULTI: 1249 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); 1250 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1251 EM_CORE_LOCK(adapter); 1252 em_disable_intr(adapter); 1253 em_set_multi(adapter); 1254 #ifdef DEVICE_POLLING 1255 if (!(if_getcapenable(ifp) & IFCAP_POLLING)) 1256 #endif 1257 em_enable_intr(adapter); 1258 EM_CORE_UNLOCK(adapter); 1259 } 1260 break; 1261 case SIOCSIFMEDIA: 1262 /* Check SOL/IDER usage */ 1263 EM_CORE_LOCK(adapter); 1264 if (e1000_check_reset_block(&adapter->hw)) { 1265 EM_CORE_UNLOCK(adapter); 1266 device_printf(adapter->dev, "Media change is" 1267 " blocked due to SOL/IDER session.\n"); 1268 break; 1269 } 1270 EM_CORE_UNLOCK(adapter); 1271 /* falls thru */ 1272 case SIOCGIFMEDIA: 1273 IOCTL_DEBUGOUT("ioctl rcv'd: \ 1274 SIOCxIFMEDIA (Get/Set Interface Media)"); 1275 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); 1276 break; 1277 case SIOCSIFCAP: 1278 { 1279 int mask, reinit; 1280 1281 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); 1282 reinit = 0; 1283 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); 1284 #ifdef DEVICE_POLLING 1285 if (mask & IFCAP_POLLING) { 1286 if (ifr->ifr_reqcap & IFCAP_POLLING) { 1287 error = ether_poll_register(em_poll, ifp); 1288 if (error) 1289 return (error); 1290 EM_CORE_LOCK(adapter); 1291 em_disable_intr(adapter); 1292 if_setcapenablebit(ifp, IFCAP_POLLING, 0); 1293 EM_CORE_UNLOCK(adapter); 1294 } else { 1295 error = ether_poll_deregister(ifp); 1296 /* Enable interrupt even in error case */ 1297 EM_CORE_LOCK(adapter); 1298 em_enable_intr(adapter); 1299 if_setcapenablebit(ifp, 0, IFCAP_POLLING); 1300 EM_CORE_UNLOCK(adapter); 1301 } 1302 } 1303 #endif 1304 if (mask & IFCAP_HWCSUM) { 1305 if_togglecapenable(ifp,IFCAP_HWCSUM); 1306 reinit = 1; 1307 } 1308 if (mask & IFCAP_TSO4) { 1309 if_togglecapenable(ifp,IFCAP_TSO4); 1310 reinit = 1; 1311 } 1312 if (mask & IFCAP_VLAN_HWTAGGING) { 1313 if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING); 1314 reinit = 1; 1315 } 1316 if (mask & IFCAP_VLAN_HWFILTER) { 1317 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER); 1318 reinit = 1; 1319 } 1320 if (mask & IFCAP_VLAN_HWTSO) { 1321 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); 1322 reinit = 1; 1323 } 1324 if ((mask & IFCAP_WOL) && 1325 (if_getcapabilities(ifp) & IFCAP_WOL) != 0) { 1326 if (mask & IFCAP_WOL_MCAST) 1327 if_togglecapenable(ifp, IFCAP_WOL_MCAST); 1328 if (mask & IFCAP_WOL_MAGIC) 1329 if_togglecapenable(ifp, IFCAP_WOL_MAGIC); 1330 } 1331 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 1332 em_init(adapter); 1333 if_vlancap(ifp); 1334 break; 1335 } 1336 1337 default: 1338 error = ether_ioctl(ifp, command, data); 1339 break; 1340 } 1341 1342 return (error); 1343 } 1344 1345 1346 /********************************************************************* 1347 * Init entry point 1348 * 1349 * This routine is used in two ways. It is used by the stack as 1350 * init entry point in network interface structure. It is also used 1351 * by the driver as a hw/sw initialization routine to get to a 1352 * consistent state. 1353 * 1354 * return 0 on success, positive on failure 1355 **********************************************************************/ 1356 1357 static void 1358 em_init_locked(struct adapter *adapter) 1359 { 1360 if_t ifp = adapter->ifp; 1361 device_t dev = adapter->dev; 1362 1363 INIT_DEBUGOUT("em_init: begin"); 1364 1365 EM_CORE_LOCK_ASSERT(adapter); 1366 1367 em_disable_intr(adapter); 1368 callout_stop(&adapter->timer); 1369 1370 /* Get the latest mac address, User can use a LAA */ 1371 bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr, 1372 ETHER_ADDR_LEN); 1373 1374 /* Put the address into the Receive Address Array */ 1375 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); 1376 1377 /* 1378 * With the 82571 adapter, RAR[0] may be overwritten 1379 * when the other port is reset, we make a duplicate 1380 * in RAR[14] for that eventuality, this assures 1381 * the interface continues to function. 1382 */ 1383 if (adapter->hw.mac.type == e1000_82571) { 1384 e1000_set_laa_state_82571(&adapter->hw, TRUE); 1385 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 1386 E1000_RAR_ENTRIES - 1); 1387 } 1388 1389 /* Initialize the hardware */ 1390 em_reset(adapter); 1391 em_update_link_status(adapter); 1392 1393 /* Setup VLAN support, basic and offload if available */ 1394 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); 1395 1396 /* Set hardware offload abilities */ 1397 if_clearhwassist(ifp); 1398 if (if_getcapenable(ifp) & IFCAP_TXCSUM) 1399 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0); 1400 /* 1401 ** There have proven to be problems with TSO when not 1402 ** at full gigabit speed, so disable the assist automatically 1403 ** when at lower speeds. -jfv 1404 */ 1405 if (if_getcapenable(ifp) & IFCAP_TSO4) { 1406 if (adapter->link_speed == SPEED_1000) 1407 if_sethwassistbits(ifp, CSUM_TSO, 0); 1408 } 1409 1410 /* Configure for OS presence */ 1411 em_init_manageability(adapter); 1412 1413 /* Prepare transmit descriptors and buffers */ 1414 em_setup_transmit_structures(adapter); 1415 em_initialize_transmit_unit(adapter); 1416 1417 /* Setup Multicast table */ 1418 em_set_multi(adapter); 1419 1420 /* 1421 ** Figure out the desired mbuf 1422 ** pool for doing jumbos 1423 */ 1424 if (adapter->hw.mac.max_frame_size <= 2048) 1425 adapter->rx_mbuf_sz = MCLBYTES; 1426 else if (adapter->hw.mac.max_frame_size <= 4096) 1427 adapter->rx_mbuf_sz = MJUMPAGESIZE; 1428 else 1429 adapter->rx_mbuf_sz = MJUM9BYTES; 1430 1431 /* Prepare receive descriptors and buffers */ 1432 if (em_setup_receive_structures(adapter)) { 1433 device_printf(dev, "Could not setup receive structures\n"); 1434 em_stop(adapter); 1435 return; 1436 } 1437 em_initialize_receive_unit(adapter); 1438 1439 /* Use real VLAN Filter support? */ 1440 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { 1441 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 1442 /* Use real VLAN Filter support */ 1443 em_setup_vlan_hw_support(adapter); 1444 else { 1445 u32 ctrl; 1446 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); 1447 ctrl |= E1000_CTRL_VME; 1448 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); 1449 } 1450 } 1451 1452 /* Don't lose promiscuous settings */ 1453 em_set_promisc(adapter); 1454 1455 /* Set the interface as ACTIVE */ 1456 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 1457 1458 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 1459 e1000_clear_hw_cntrs_base_generic(&adapter->hw); 1460 1461 /* MSI/X configuration for 82574 */ 1462 if (adapter->hw.mac.type == e1000_82574) { 1463 int tmp; 1464 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 1465 tmp |= E1000_CTRL_EXT_PBA_CLR; 1466 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); 1467 /* Set the IVAR - interrupt vector routing. */ 1468 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); 1469 } 1470 1471 #ifdef DEVICE_POLLING 1472 /* 1473 * Only enable interrupts if we are not polling, make sure 1474 * they are off otherwise. 1475 */ 1476 if (if_getcapenable(ifp) & IFCAP_POLLING) 1477 em_disable_intr(adapter); 1478 else 1479 #endif /* DEVICE_POLLING */ 1480 em_enable_intr(adapter); 1481 1482 /* AMT based hardware can now take control from firmware */ 1483 if (adapter->has_manage && adapter->has_amt) 1484 em_get_hw_control(adapter); 1485 } 1486 1487 static void 1488 em_init(void *arg) 1489 { 1490 struct adapter *adapter = arg; 1491 1492 EM_CORE_LOCK(adapter); 1493 em_init_locked(adapter); 1494 EM_CORE_UNLOCK(adapter); 1495 } 1496 1497 1498 #ifdef DEVICE_POLLING 1499 /********************************************************************* 1500 * 1501 * Legacy polling routine: note this only works with single queue 1502 * 1503 *********************************************************************/ 1504 static int 1505 em_poll(if_t ifp, enum poll_cmd cmd, int count) 1506 { 1507 struct adapter *adapter = if_getsoftc(ifp); 1508 struct tx_ring *txr = adapter->tx_rings; 1509 struct rx_ring *rxr = adapter->rx_rings; 1510 u32 reg_icr; 1511 int rx_done; 1512 1513 EM_CORE_LOCK(adapter); 1514 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 1515 EM_CORE_UNLOCK(adapter); 1516 return (0); 1517 } 1518 1519 if (cmd == POLL_AND_CHECK_STATUS) { 1520 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1521 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1522 callout_stop(&adapter->timer); 1523 adapter->hw.mac.get_link_status = 1; 1524 em_update_link_status(adapter); 1525 callout_reset(&adapter->timer, hz, 1526 em_local_timer, adapter); 1527 } 1528 } 1529 EM_CORE_UNLOCK(adapter); 1530 1531 em_rxeof(rxr, count, &rx_done); 1532 1533 EM_TX_LOCK(txr); 1534 em_txeof(txr); 1535 #ifdef EM_MULTIQUEUE 1536 if (!drbr_empty(ifp, txr->br)) 1537 em_mq_start_locked(ifp, txr); 1538 #else 1539 if (!if_sendq_empty(ifp)) 1540 em_start_locked(ifp, txr); 1541 #endif 1542 EM_TX_UNLOCK(txr); 1543 1544 return (rx_done); 1545 } 1546 #endif /* DEVICE_POLLING */ 1547 1548 1549 /********************************************************************* 1550 * 1551 * Fast Legacy/MSI Combined Interrupt Service routine 1552 * 1553 *********************************************************************/ 1554 static int 1555 em_irq_fast(void *arg) 1556 { 1557 struct adapter *adapter = arg; 1558 if_t ifp; 1559 u32 reg_icr; 1560 1561 ifp = adapter->ifp; 1562 1563 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1564 1565 /* Hot eject? */ 1566 if (reg_icr == 0xffffffff) 1567 return FILTER_STRAY; 1568 1569 /* Definitely not our interrupt. */ 1570 if (reg_icr == 0x0) 1571 return FILTER_STRAY; 1572 1573 /* 1574 * Starting with the 82571 chip, bit 31 should be used to 1575 * determine whether the interrupt belongs to us. 1576 */ 1577 if (adapter->hw.mac.type >= e1000_82571 && 1578 (reg_icr & E1000_ICR_INT_ASSERTED) == 0) 1579 return FILTER_STRAY; 1580 1581 em_disable_intr(adapter); 1582 taskqueue_enqueue(adapter->tq, &adapter->que_task); 1583 1584 /* Link status change */ 1585 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1586 adapter->hw.mac.get_link_status = 1; 1587 taskqueue_enqueue(taskqueue_fast, &adapter->link_task); 1588 } 1589 1590 if (reg_icr & E1000_ICR_RXO) 1591 adapter->rx_overruns++; 1592 return FILTER_HANDLED; 1593 } 1594 1595 /* Combined RX/TX handler, used by Legacy and MSI */ 1596 static void 1597 em_handle_que(void *context, int pending) 1598 { 1599 struct adapter *adapter = context; 1600 if_t ifp = adapter->ifp; 1601 struct tx_ring *txr = adapter->tx_rings; 1602 struct rx_ring *rxr = adapter->rx_rings; 1603 1604 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1605 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1606 1607 EM_TX_LOCK(txr); 1608 em_txeof(txr); 1609 #ifdef EM_MULTIQUEUE 1610 if (!drbr_empty(ifp, txr->br)) 1611 em_mq_start_locked(ifp, txr); 1612 #else 1613 if (!if_sendq_empty(ifp)) 1614 em_start_locked(ifp, txr); 1615 #endif 1616 EM_TX_UNLOCK(txr); 1617 if (more) { 1618 taskqueue_enqueue(adapter->tq, &adapter->que_task); 1619 return; 1620 } 1621 } 1622 1623 em_enable_intr(adapter); 1624 return; 1625 } 1626 1627 1628 /********************************************************************* 1629 * 1630 * MSIX Interrupt Service Routines 1631 * 1632 **********************************************************************/ 1633 static void 1634 em_msix_tx(void *arg) 1635 { 1636 struct tx_ring *txr = arg; 1637 struct adapter *adapter = txr->adapter; 1638 if_t ifp = adapter->ifp; 1639 1640 ++txr->tx_irq; 1641 EM_TX_LOCK(txr); 1642 em_txeof(txr); 1643 #ifdef EM_MULTIQUEUE 1644 if (!drbr_empty(ifp, txr->br)) 1645 em_mq_start_locked(ifp, txr); 1646 #else 1647 if (!if_sendq_empty(ifp)) 1648 em_start_locked(ifp, txr); 1649 #endif 1650 1651 /* Reenable this interrupt */ 1652 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); 1653 EM_TX_UNLOCK(txr); 1654 return; 1655 } 1656 1657 /********************************************************************* 1658 * 1659 * MSIX RX Interrupt Service routine 1660 * 1661 **********************************************************************/ 1662 1663 static void 1664 em_msix_rx(void *arg) 1665 { 1666 struct rx_ring *rxr = arg; 1667 struct adapter *adapter = rxr->adapter; 1668 bool more; 1669 1670 ++rxr->rx_irq; 1671 if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)) 1672 return; 1673 more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1674 if (more) 1675 taskqueue_enqueue(rxr->tq, &rxr->rx_task); 1676 else { 1677 /* Reenable this interrupt */ 1678 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); 1679 } 1680 return; 1681 } 1682 1683 /********************************************************************* 1684 * 1685 * MSIX Link Fast Interrupt Service routine 1686 * 1687 **********************************************************************/ 1688 static void 1689 em_msix_link(void *arg) 1690 { 1691 struct adapter *adapter = arg; 1692 u32 reg_icr; 1693 1694 ++adapter->link_irq; 1695 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1696 1697 if (reg_icr & E1000_ICR_RXO) 1698 adapter->rx_overruns++; 1699 1700 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 1701 adapter->hw.mac.get_link_status = 1; 1702 em_handle_link(adapter, 0); 1703 } else 1704 E1000_WRITE_REG(&adapter->hw, E1000_IMS, 1705 EM_MSIX_LINK | E1000_IMS_LSC); 1706 /* 1707 ** Because we must read the ICR for this interrupt 1708 ** it may clear other causes using autoclear, for 1709 ** this reason we simply create a soft interrupt 1710 ** for all these vectors. 1711 */ 1712 if (reg_icr) { 1713 E1000_WRITE_REG(&adapter->hw, 1714 E1000_ICS, adapter->ims); 1715 } 1716 return; 1717 } 1718 1719 static void 1720 em_handle_rx(void *context, int pending) 1721 { 1722 struct rx_ring *rxr = context; 1723 struct adapter *adapter = rxr->adapter; 1724 bool more; 1725 1726 more = em_rxeof(rxr, adapter->rx_process_limit, NULL); 1727 if (more) 1728 taskqueue_enqueue(rxr->tq, &rxr->rx_task); 1729 else { 1730 /* Reenable this interrupt */ 1731 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); 1732 } 1733 } 1734 1735 static void 1736 em_handle_tx(void *context, int pending) 1737 { 1738 struct tx_ring *txr = context; 1739 struct adapter *adapter = txr->adapter; 1740 if_t ifp = adapter->ifp; 1741 1742 EM_TX_LOCK(txr); 1743 em_txeof(txr); 1744 #ifdef EM_MULTIQUEUE 1745 if (!drbr_empty(ifp, txr->br)) 1746 em_mq_start_locked(ifp, txr); 1747 #else 1748 if (!if_sendq_empty(ifp)) 1749 em_start_locked(ifp, txr); 1750 #endif 1751 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); 1752 EM_TX_UNLOCK(txr); 1753 } 1754 1755 static void 1756 em_handle_link(void *context, int pending) 1757 { 1758 struct adapter *adapter = context; 1759 struct tx_ring *txr = adapter->tx_rings; 1760 if_t ifp = adapter->ifp; 1761 1762 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 1763 return; 1764 1765 EM_CORE_LOCK(adapter); 1766 callout_stop(&adapter->timer); 1767 em_update_link_status(adapter); 1768 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 1769 E1000_WRITE_REG(&adapter->hw, E1000_IMS, 1770 EM_MSIX_LINK | E1000_IMS_LSC); 1771 if (adapter->link_active) { 1772 for (int i = 0; i < adapter->num_queues; i++, txr++) { 1773 EM_TX_LOCK(txr); 1774 #ifdef EM_MULTIQUEUE 1775 if (!drbr_empty(ifp, txr->br)) 1776 em_mq_start_locked(ifp, txr); 1777 #else 1778 if (if_sendq_empty(ifp)) 1779 em_start_locked(ifp, txr); 1780 #endif 1781 EM_TX_UNLOCK(txr); 1782 } 1783 } 1784 EM_CORE_UNLOCK(adapter); 1785 } 1786 1787 1788 /********************************************************************* 1789 * 1790 * Media Ioctl callback 1791 * 1792 * This routine is called whenever the user queries the status of 1793 * the interface using ifconfig. 1794 * 1795 **********************************************************************/ 1796 static void 1797 em_media_status(if_t ifp, struct ifmediareq *ifmr) 1798 { 1799 struct adapter *adapter = if_getsoftc(ifp); 1800 u_char fiber_type = IFM_1000_SX; 1801 1802 INIT_DEBUGOUT("em_media_status: begin"); 1803 1804 EM_CORE_LOCK(adapter); 1805 em_update_link_status(adapter); 1806 1807 ifmr->ifm_status = IFM_AVALID; 1808 ifmr->ifm_active = IFM_ETHER; 1809 1810 if (!adapter->link_active) { 1811 EM_CORE_UNLOCK(adapter); 1812 return; 1813 } 1814 1815 ifmr->ifm_status |= IFM_ACTIVE; 1816 1817 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 1818 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { 1819 ifmr->ifm_active |= fiber_type | IFM_FDX; 1820 } else { 1821 switch (adapter->link_speed) { 1822 case 10: 1823 ifmr->ifm_active |= IFM_10_T; 1824 break; 1825 case 100: 1826 ifmr->ifm_active |= IFM_100_TX; 1827 break; 1828 case 1000: 1829 ifmr->ifm_active |= IFM_1000_T; 1830 break; 1831 } 1832 if (adapter->link_duplex == FULL_DUPLEX) 1833 ifmr->ifm_active |= IFM_FDX; 1834 else 1835 ifmr->ifm_active |= IFM_HDX; 1836 } 1837 EM_CORE_UNLOCK(adapter); 1838 } 1839 1840 /********************************************************************* 1841 * 1842 * Media Ioctl callback 1843 * 1844 * This routine is called when the user changes speed/duplex using 1845 * media/mediopt option with ifconfig. 1846 * 1847 **********************************************************************/ 1848 static int 1849 em_media_change(if_t ifp) 1850 { 1851 struct adapter *adapter = if_getsoftc(ifp); 1852 struct ifmedia *ifm = &adapter->media; 1853 1854 INIT_DEBUGOUT("em_media_change: begin"); 1855 1856 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1857 return (EINVAL); 1858 1859 EM_CORE_LOCK(adapter); 1860 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1861 case IFM_AUTO: 1862 adapter->hw.mac.autoneg = DO_AUTO_NEG; 1863 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1864 break; 1865 case IFM_1000_LX: 1866 case IFM_1000_SX: 1867 case IFM_1000_T: 1868 adapter->hw.mac.autoneg = DO_AUTO_NEG; 1869 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1870 break; 1871 case IFM_100_TX: 1872 adapter->hw.mac.autoneg = FALSE; 1873 adapter->hw.phy.autoneg_advertised = 0; 1874 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1875 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1876 else 1877 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1878 break; 1879 case IFM_10_T: 1880 adapter->hw.mac.autoneg = FALSE; 1881 adapter->hw.phy.autoneg_advertised = 0; 1882 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1883 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1884 else 1885 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1886 break; 1887 default: 1888 device_printf(adapter->dev, "Unsupported media type\n"); 1889 } 1890 1891 em_init_locked(adapter); 1892 EM_CORE_UNLOCK(adapter); 1893 1894 return (0); 1895 } 1896 1897 /********************************************************************* 1898 * 1899 * This routine maps the mbufs to tx descriptors. 1900 * 1901 * return 0 on success, positive on failure 1902 **********************************************************************/ 1903 1904 static int 1905 em_xmit(struct tx_ring *txr, struct mbuf **m_headp) 1906 { 1907 struct adapter *adapter = txr->adapter; 1908 bus_dma_segment_t segs[EM_MAX_SCATTER]; 1909 bus_dmamap_t map; 1910 struct em_txbuffer *tx_buffer, *tx_buffer_mapped; 1911 struct e1000_tx_desc *ctxd = NULL; 1912 struct mbuf *m_head; 1913 struct ether_header *eh; 1914 struct ip *ip = NULL; 1915 struct tcphdr *tp = NULL; 1916 u32 txd_upper = 0, txd_lower = 0; 1917 int ip_off, poff; 1918 int nsegs, i, j, first, last = 0; 1919 int error; 1920 bool do_tso, tso_desc, remap = TRUE; 1921 1922 m_head = *m_headp; 1923 do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO); 1924 tso_desc = FALSE; 1925 ip_off = poff = 0; 1926 1927 /* 1928 * Intel recommends entire IP/TCP header length reside in a single 1929 * buffer. If multiple descriptors are used to describe the IP and 1930 * TCP header, each descriptor should describe one or more 1931 * complete headers; descriptors referencing only parts of headers 1932 * are not supported. If all layer headers are not coalesced into 1933 * a single buffer, each buffer should not cross a 4KB boundary, 1934 * or be larger than the maximum read request size. 1935 * Controller also requires modifing IP/TCP header to make TSO work 1936 * so we firstly get a writable mbuf chain then coalesce ethernet/ 1937 * IP/TCP header into a single buffer to meet the requirement of 1938 * controller. This also simplifies IP/TCP/UDP checksum offloading 1939 * which also has similar restrictions. 1940 */ 1941 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 1942 if (do_tso || (m_head->m_next != NULL && 1943 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) { 1944 if (M_WRITABLE(*m_headp) == 0) { 1945 m_head = m_dup(*m_headp, M_NOWAIT); 1946 m_freem(*m_headp); 1947 if (m_head == NULL) { 1948 *m_headp = NULL; 1949 return (ENOBUFS); 1950 } 1951 *m_headp = m_head; 1952 } 1953 } 1954 /* 1955 * XXX 1956 * Assume IPv4, we don't have TSO/checksum offload support 1957 * for IPv6 yet. 1958 */ 1959 ip_off = sizeof(struct ether_header); 1960 if (m_head->m_len < ip_off) { 1961 m_head = m_pullup(m_head, ip_off); 1962 if (m_head == NULL) { 1963 *m_headp = NULL; 1964 return (ENOBUFS); 1965 } 1966 } 1967 eh = mtod(m_head, struct ether_header *); 1968 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 1969 ip_off = sizeof(struct ether_vlan_header); 1970 if (m_head->m_len < ip_off) { 1971 m_head = m_pullup(m_head, ip_off); 1972 if (m_head == NULL) { 1973 *m_headp = NULL; 1974 return (ENOBUFS); 1975 } 1976 } 1977 } 1978 if (m_head->m_len < ip_off + sizeof(struct ip)) { 1979 m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); 1980 if (m_head == NULL) { 1981 *m_headp = NULL; 1982 return (ENOBUFS); 1983 } 1984 } 1985 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 1986 poff = ip_off + (ip->ip_hl << 2); 1987 1988 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) { 1989 if (m_head->m_len < poff + sizeof(struct tcphdr)) { 1990 m_head = m_pullup(m_head, poff + 1991 sizeof(struct tcphdr)); 1992 if (m_head == NULL) { 1993 *m_headp = NULL; 1994 return (ENOBUFS); 1995 } 1996 } 1997 tp = (struct tcphdr *)(mtod(m_head, char *) + poff); 1998 /* 1999 * TSO workaround: 2000 * pull 4 more bytes of data into it. 2001 */ 2002 if (m_head->m_len < poff + (tp->th_off << 2)) { 2003 m_head = m_pullup(m_head, poff + 2004 (tp->th_off << 2) + 2005 TSO_WORKAROUND); 2006 if (m_head == NULL) { 2007 *m_headp = NULL; 2008 return (ENOBUFS); 2009 } 2010 } 2011 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 2012 tp = (struct tcphdr *)(mtod(m_head, char *) + poff); 2013 if (do_tso) { 2014 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz + 2015 (ip->ip_hl << 2) + 2016 (tp->th_off << 2)); 2017 ip->ip_sum = 0; 2018 /* 2019 * The pseudo TCP checksum does not include TCP 2020 * payload length so driver should recompute 2021 * the checksum here what hardware expect to 2022 * see. This is adherence of Microsoft's Large 2023 * Send specification. 2024 */ 2025 tp->th_sum = in_pseudo(ip->ip_src.s_addr, 2026 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 2027 } 2028 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { 2029 if (m_head->m_len < poff + sizeof(struct udphdr)) { 2030 m_head = m_pullup(m_head, poff + 2031 sizeof(struct udphdr)); 2032 if (m_head == NULL) { 2033 *m_headp = NULL; 2034 return (ENOBUFS); 2035 } 2036 } 2037 ip = (struct ip *)(mtod(m_head, char *) + ip_off); 2038 } 2039 *m_headp = m_head; 2040 } 2041 2042 /* 2043 * Map the packet for DMA 2044 * 2045 * Capture the first descriptor index, 2046 * this descriptor will have the index 2047 * of the EOP which is the only one that 2048 * now gets a DONE bit writeback. 2049 */ 2050 first = txr->next_avail_desc; 2051 tx_buffer = &txr->tx_buffers[first]; 2052 tx_buffer_mapped = tx_buffer; 2053 map = tx_buffer->map; 2054 2055 retry: 2056 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 2057 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 2058 2059 /* 2060 * There are two types of errors we can (try) to handle: 2061 * - EFBIG means the mbuf chain was too long and bus_dma ran 2062 * out of segments. Defragment the mbuf chain and try again. 2063 * - ENOMEM means bus_dma could not obtain enough bounce buffers 2064 * at this point in time. Defer sending and try again later. 2065 * All other errors, in particular EINVAL, are fatal and prevent the 2066 * mbuf chain from ever going through. Drop it and report error. 2067 */ 2068 if (error == EFBIG && remap) { 2069 struct mbuf *m; 2070 2071 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER); 2072 if (m == NULL) { 2073 adapter->mbuf_defrag_failed++; 2074 m_freem(*m_headp); 2075 *m_headp = NULL; 2076 return (ENOBUFS); 2077 } 2078 *m_headp = m; 2079 2080 /* Try it again, but only once */ 2081 remap = FALSE; 2082 goto retry; 2083 } else if (error != 0) { 2084 adapter->no_tx_dma_setup++; 2085 m_freem(*m_headp); 2086 *m_headp = NULL; 2087 return (error); 2088 } 2089 2090 /* 2091 * TSO Hardware workaround, if this packet is not 2092 * TSO, and is only a single descriptor long, and 2093 * it follows a TSO burst, then we need to add a 2094 * sentinel descriptor to prevent premature writeback. 2095 */ 2096 if ((!do_tso) && (txr->tx_tso == TRUE)) { 2097 if (nsegs == 1) 2098 tso_desc = TRUE; 2099 txr->tx_tso = FALSE; 2100 } 2101 2102 if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) { 2103 txr->no_desc_avail++; 2104 bus_dmamap_unload(txr->txtag, map); 2105 return (ENOBUFS); 2106 } 2107 m_head = *m_headp; 2108 2109 /* Do hardware assists */ 2110 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 2111 em_tso_setup(txr, m_head, ip_off, ip, tp, 2112 &txd_upper, &txd_lower); 2113 /* we need to make a final sentinel transmit desc */ 2114 tso_desc = TRUE; 2115 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) 2116 em_transmit_checksum_setup(txr, m_head, 2117 ip_off, ip, &txd_upper, &txd_lower); 2118 2119 if (m_head->m_flags & M_VLANTAG) { 2120 /* Set the vlan id. */ 2121 txd_upper |= htole16(if_getvtag(m_head)) << 16; 2122 /* Tell hardware to add tag */ 2123 txd_lower |= htole32(E1000_TXD_CMD_VLE); 2124 } 2125 2126 i = txr->next_avail_desc; 2127 2128 /* Set up our transmit descriptors */ 2129 for (j = 0; j < nsegs; j++) { 2130 bus_size_t seg_len; 2131 bus_addr_t seg_addr; 2132 2133 tx_buffer = &txr->tx_buffers[i]; 2134 ctxd = &txr->tx_base[i]; 2135 seg_addr = segs[j].ds_addr; 2136 seg_len = segs[j].ds_len; 2137 /* 2138 ** TSO Workaround: 2139 ** If this is the last descriptor, we want to 2140 ** split it so we have a small final sentinel 2141 */ 2142 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { 2143 seg_len -= TSO_WORKAROUND; 2144 ctxd->buffer_addr = htole64(seg_addr); 2145 ctxd->lower.data = htole32( 2146 adapter->txd_cmd | txd_lower | seg_len); 2147 ctxd->upper.data = htole32(txd_upper); 2148 if (++i == adapter->num_tx_desc) 2149 i = 0; 2150 2151 /* Now make the sentinel */ 2152 txr->tx_avail--; 2153 ctxd = &txr->tx_base[i]; 2154 tx_buffer = &txr->tx_buffers[i]; 2155 ctxd->buffer_addr = 2156 htole64(seg_addr + seg_len); 2157 ctxd->lower.data = htole32( 2158 adapter->txd_cmd | txd_lower | TSO_WORKAROUND); 2159 ctxd->upper.data = 2160 htole32(txd_upper); 2161 last = i; 2162 if (++i == adapter->num_tx_desc) 2163 i = 0; 2164 } else { 2165 ctxd->buffer_addr = htole64(seg_addr); 2166 ctxd->lower.data = htole32( 2167 adapter->txd_cmd | txd_lower | seg_len); 2168 ctxd->upper.data = htole32(txd_upper); 2169 last = i; 2170 if (++i == adapter->num_tx_desc) 2171 i = 0; 2172 } 2173 tx_buffer->m_head = NULL; 2174 tx_buffer->next_eop = -1; 2175 } 2176 2177 txr->next_avail_desc = i; 2178 txr->tx_avail -= nsegs; 2179 2180 tx_buffer->m_head = m_head; 2181 /* 2182 ** Here we swap the map so the last descriptor, 2183 ** which gets the completion interrupt has the 2184 ** real map, and the first descriptor gets the 2185 ** unused map from this descriptor. 2186 */ 2187 tx_buffer_mapped->map = tx_buffer->map; 2188 tx_buffer->map = map; 2189 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 2190 2191 /* 2192 * Last Descriptor of Packet 2193 * needs End Of Packet (EOP) 2194 * and Report Status (RS) 2195 */ 2196 ctxd->lower.data |= 2197 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); 2198 /* 2199 * Keep track in the first buffer which 2200 * descriptor will be written back 2201 */ 2202 tx_buffer = &txr->tx_buffers[first]; 2203 tx_buffer->next_eop = last; 2204 2205 /* 2206 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 2207 * that this frame is available to transmit. 2208 */ 2209 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 2210 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2211 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); 2212 2213 return (0); 2214 } 2215 2216 static void 2217 em_set_promisc(struct adapter *adapter) 2218 { 2219 if_t ifp = adapter->ifp; 2220 u32 reg_rctl; 2221 2222 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2223 2224 if (if_getflags(ifp) & IFF_PROMISC) { 2225 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 2226 /* Turn this on if you want to see bad packets */ 2227 if (em_debug_sbp) 2228 reg_rctl |= E1000_RCTL_SBP; 2229 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2230 } else if (if_getflags(ifp) & IFF_ALLMULTI) { 2231 reg_rctl |= E1000_RCTL_MPE; 2232 reg_rctl &= ~E1000_RCTL_UPE; 2233 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2234 } 2235 } 2236 2237 static void 2238 em_disable_promisc(struct adapter *adapter) 2239 { 2240 if_t ifp = adapter->ifp; 2241 u32 reg_rctl; 2242 int mcnt = 0; 2243 2244 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2245 reg_rctl &= (~E1000_RCTL_UPE); 2246 if (if_getflags(ifp) & IFF_ALLMULTI) 2247 mcnt = MAX_NUM_MULTICAST_ADDRESSES; 2248 else 2249 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); 2250 /* Don't disable if in MAX groups */ 2251 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) 2252 reg_rctl &= (~E1000_RCTL_MPE); 2253 reg_rctl &= (~E1000_RCTL_SBP); 2254 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2255 } 2256 2257 2258 /********************************************************************* 2259 * Multicast Update 2260 * 2261 * This routine is called whenever multicast address list is updated. 2262 * 2263 **********************************************************************/ 2264 2265 static void 2266 em_set_multi(struct adapter *adapter) 2267 { 2268 if_t ifp = adapter->ifp; 2269 u32 reg_rctl = 0; 2270 u8 *mta; /* Multicast array memory */ 2271 int mcnt = 0; 2272 2273 IOCTL_DEBUGOUT("em_set_multi: begin"); 2274 2275 mta = adapter->mta; 2276 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); 2277 2278 if (adapter->hw.mac.type == e1000_82542 && 2279 adapter->hw.revision_id == E1000_REVISION_2) { 2280 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2281 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) 2282 e1000_pci_clear_mwi(&adapter->hw); 2283 reg_rctl |= E1000_RCTL_RST; 2284 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2285 msec_delay(5); 2286 } 2287 2288 if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); 2289 2290 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 2291 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2292 reg_rctl |= E1000_RCTL_MPE; 2293 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2294 } else 2295 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); 2296 2297 if (adapter->hw.mac.type == e1000_82542 && 2298 adapter->hw.revision_id == E1000_REVISION_2) { 2299 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2300 reg_rctl &= ~E1000_RCTL_RST; 2301 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2302 msec_delay(5); 2303 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) 2304 e1000_pci_set_mwi(&adapter->hw); 2305 } 2306 } 2307 2308 2309 /********************************************************************* 2310 * Timer routine 2311 * 2312 * This routine checks for link status and updates statistics. 2313 * 2314 **********************************************************************/ 2315 2316 static void 2317 em_local_timer(void *arg) 2318 { 2319 struct adapter *adapter = arg; 2320 if_t ifp = adapter->ifp; 2321 struct tx_ring *txr = adapter->tx_rings; 2322 struct rx_ring *rxr = adapter->rx_rings; 2323 u32 trigger = 0; 2324 2325 EM_CORE_LOCK_ASSERT(adapter); 2326 2327 em_update_link_status(adapter); 2328 em_update_stats_counters(adapter); 2329 2330 /* Reset LAA into RAR[0] on 82571 */ 2331 if ((adapter->hw.mac.type == e1000_82571) && 2332 e1000_get_laa_state_82571(&adapter->hw)) 2333 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); 2334 2335 /* Mask to use in the irq trigger */ 2336 if (adapter->msix_mem) { 2337 for (int i = 0; i < adapter->num_queues; i++, rxr++) 2338 trigger |= rxr->ims; 2339 rxr = adapter->rx_rings; 2340 } else 2341 trigger = E1000_ICS_RXDMT0; 2342 2343 /* 2344 ** Check on the state of the TX queue(s), this 2345 ** can be done without the lock because its RO 2346 ** and the HUNG state will be static if set. 2347 */ 2348 for (int i = 0; i < adapter->num_queues; i++, txr++) { 2349 if (txr->busy == EM_TX_HUNG) 2350 goto hung; 2351 if (txr->busy >= EM_TX_MAXTRIES) 2352 txr->busy = EM_TX_HUNG; 2353 /* Schedule a TX tasklet if needed */ 2354 if (txr->tx_avail <= EM_MAX_SCATTER) 2355 taskqueue_enqueue(txr->tq, &txr->tx_task); 2356 } 2357 2358 callout_reset(&adapter->timer, hz, em_local_timer, adapter); 2359 #ifndef DEVICE_POLLING 2360 /* Trigger an RX interrupt to guarantee mbuf refresh */ 2361 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); 2362 #endif 2363 return; 2364 hung: 2365 /* Looks like we're hung */ 2366 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n", 2367 txr->me); 2368 em_print_debug_info(adapter); 2369 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 2370 adapter->watchdog_events++; 2371 em_init_locked(adapter); 2372 } 2373 2374 2375 static void 2376 em_update_link_status(struct adapter *adapter) 2377 { 2378 struct e1000_hw *hw = &adapter->hw; 2379 if_t ifp = adapter->ifp; 2380 device_t dev = adapter->dev; 2381 struct tx_ring *txr = adapter->tx_rings; 2382 u32 link_check = 0; 2383 2384 /* Get the cached link value or read phy for real */ 2385 switch (hw->phy.media_type) { 2386 case e1000_media_type_copper: 2387 if (hw->mac.get_link_status) { 2388 if (hw->mac.type == e1000_pch_spt) 2389 msec_delay(50); 2390 /* Do the work to read phy */ 2391 e1000_check_for_link(hw); 2392 link_check = !hw->mac.get_link_status; 2393 if (link_check) /* ESB2 fix */ 2394 e1000_cfg_on_link_up(hw); 2395 } else 2396 link_check = TRUE; 2397 break; 2398 case e1000_media_type_fiber: 2399 e1000_check_for_link(hw); 2400 link_check = (E1000_READ_REG(hw, E1000_STATUS) & 2401 E1000_STATUS_LU); 2402 break; 2403 case e1000_media_type_internal_serdes: 2404 e1000_check_for_link(hw); 2405 link_check = adapter->hw.mac.serdes_has_link; 2406 break; 2407 default: 2408 case e1000_media_type_unknown: 2409 break; 2410 } 2411 2412 /* Now check for a transition */ 2413 if (link_check && (adapter->link_active == 0)) { 2414 e1000_get_speed_and_duplex(hw, &adapter->link_speed, 2415 &adapter->link_duplex); 2416 /* Check if we must disable SPEED_MODE bit on PCI-E */ 2417 if ((adapter->link_speed != SPEED_1000) && 2418 ((hw->mac.type == e1000_82571) || 2419 (hw->mac.type == e1000_82572))) { 2420 int tarc0; 2421 tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); 2422 tarc0 &= ~TARC_SPEED_MODE_BIT; 2423 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); 2424 } 2425 if (bootverbose) 2426 device_printf(dev, "Link is up %d Mbps %s\n", 2427 adapter->link_speed, 2428 ((adapter->link_duplex == FULL_DUPLEX) ? 2429 "Full Duplex" : "Half Duplex")); 2430 adapter->link_active = 1; 2431 adapter->smartspeed = 0; 2432 if_setbaudrate(ifp, adapter->link_speed * 1000000); 2433 if_link_state_change(ifp, LINK_STATE_UP); 2434 } else if (!link_check && (adapter->link_active == 1)) { 2435 if_setbaudrate(ifp, 0); 2436 adapter->link_speed = 0; 2437 adapter->link_duplex = 0; 2438 if (bootverbose) 2439 device_printf(dev, "Link is Down\n"); 2440 adapter->link_active = 0; 2441 /* Link down, disable hang detection */ 2442 for (int i = 0; i < adapter->num_queues; i++, txr++) 2443 txr->busy = EM_TX_IDLE; 2444 if_link_state_change(ifp, LINK_STATE_DOWN); 2445 } 2446 } 2447 2448 /********************************************************************* 2449 * 2450 * This routine disables all traffic on the adapter by issuing a 2451 * global reset on the MAC and deallocates TX/RX buffers. 2452 * 2453 * This routine should always be called with BOTH the CORE 2454 * and TX locks. 2455 **********************************************************************/ 2456 2457 static void 2458 em_stop(void *arg) 2459 { 2460 struct adapter *adapter = arg; 2461 if_t ifp = adapter->ifp; 2462 struct tx_ring *txr = adapter->tx_rings; 2463 2464 EM_CORE_LOCK_ASSERT(adapter); 2465 2466 INIT_DEBUGOUT("em_stop: begin"); 2467 2468 em_disable_intr(adapter); 2469 callout_stop(&adapter->timer); 2470 2471 /* Tell the stack that the interface is no longer active */ 2472 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2473 2474 /* Disarm Hang Detection. */ 2475 for (int i = 0; i < adapter->num_queues; i++, txr++) { 2476 EM_TX_LOCK(txr); 2477 txr->busy = EM_TX_IDLE; 2478 EM_TX_UNLOCK(txr); 2479 } 2480 2481 /* I219 needs some special flushing to avoid hangs */ 2482 if (adapter->hw.mac.type == e1000_pch_spt) 2483 em_flush_desc_rings(adapter); 2484 2485 e1000_reset_hw(&adapter->hw); 2486 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); 2487 2488 e1000_led_off(&adapter->hw); 2489 e1000_cleanup_led(&adapter->hw); 2490 } 2491 2492 2493 /********************************************************************* 2494 * 2495 * Determine hardware revision. 2496 * 2497 **********************************************************************/ 2498 static void 2499 em_identify_hardware(struct adapter *adapter) 2500 { 2501 device_t dev = adapter->dev; 2502 2503 /* Make sure our PCI config space has the necessary stuff set */ 2504 pci_enable_busmaster(dev); 2505 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 2506 2507 /* Save off the information about this board */ 2508 adapter->hw.vendor_id = pci_get_vendor(dev); 2509 adapter->hw.device_id = pci_get_device(dev); 2510 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 2511 adapter->hw.subsystem_vendor_id = 2512 pci_read_config(dev, PCIR_SUBVEND_0, 2); 2513 adapter->hw.subsystem_device_id = 2514 pci_read_config(dev, PCIR_SUBDEV_0, 2); 2515 2516 /* Do Shared Code Init and Setup */ 2517 if (e1000_set_mac_type(&adapter->hw)) { 2518 device_printf(dev, "Setup init failure\n"); 2519 return; 2520 } 2521 } 2522 2523 static int 2524 em_allocate_pci_resources(struct adapter *adapter) 2525 { 2526 device_t dev = adapter->dev; 2527 int rid; 2528 2529 rid = PCIR_BAR(0); 2530 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 2531 &rid, RF_ACTIVE); 2532 if (adapter->memory == NULL) { 2533 device_printf(dev, "Unable to allocate bus resource: memory\n"); 2534 return (ENXIO); 2535 } 2536 adapter->osdep.mem_bus_space_tag = 2537 rman_get_bustag(adapter->memory); 2538 adapter->osdep.mem_bus_space_handle = 2539 rman_get_bushandle(adapter->memory); 2540 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; 2541 2542 adapter->hw.back = &adapter->osdep; 2543 2544 return (0); 2545 } 2546 2547 /********************************************************************* 2548 * 2549 * Setup the Legacy or MSI Interrupt handler 2550 * 2551 **********************************************************************/ 2552 int 2553 em_allocate_legacy(struct adapter *adapter) 2554 { 2555 device_t dev = adapter->dev; 2556 struct tx_ring *txr = adapter->tx_rings; 2557 int error, rid = 0; 2558 2559 /* Manually turn off all interrupts */ 2560 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 2561 2562 if (adapter->msix == 1) /* using MSI */ 2563 rid = 1; 2564 /* We allocate a single interrupt resource */ 2565 adapter->res = bus_alloc_resource_any(dev, 2566 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); 2567 if (adapter->res == NULL) { 2568 device_printf(dev, "Unable to allocate bus resource: " 2569 "interrupt\n"); 2570 return (ENXIO); 2571 } 2572 2573 /* 2574 * Allocate a fast interrupt and the associated 2575 * deferred processing contexts. 2576 */ 2577 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); 2578 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, 2579 taskqueue_thread_enqueue, &adapter->tq); 2580 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que", 2581 device_get_nameunit(adapter->dev)); 2582 /* Use a TX only tasklet for local timer */ 2583 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); 2584 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, 2585 taskqueue_thread_enqueue, &txr->tq); 2586 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", 2587 device_get_nameunit(adapter->dev)); 2588 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); 2589 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, 2590 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { 2591 device_printf(dev, "Failed to register fast interrupt " 2592 "handler: %d\n", error); 2593 taskqueue_free(adapter->tq); 2594 adapter->tq = NULL; 2595 return (error); 2596 } 2597 2598 return (0); 2599 } 2600 2601 /********************************************************************* 2602 * 2603 * Setup the MSIX Interrupt handlers 2604 * This is not really Multiqueue, rather 2605 * its just separate interrupt vectors 2606 * for TX, RX, and Link. 2607 * 2608 **********************************************************************/ 2609 int 2610 em_allocate_msix(struct adapter *adapter) 2611 { 2612 device_t dev = adapter->dev; 2613 struct tx_ring *txr = adapter->tx_rings; 2614 struct rx_ring *rxr = adapter->rx_rings; 2615 int error, rid, vector = 0; 2616 int cpu_id = 0; 2617 2618 2619 /* Make sure all interrupts are disabled */ 2620 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 2621 2622 /* First set up ring resources */ 2623 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) { 2624 2625 /* RX ring */ 2626 rid = vector + 1; 2627 2628 rxr->res = bus_alloc_resource_any(dev, 2629 SYS_RES_IRQ, &rid, RF_ACTIVE); 2630 if (rxr->res == NULL) { 2631 device_printf(dev, 2632 "Unable to allocate bus resource: " 2633 "RX MSIX Interrupt %d\n", i); 2634 return (ENXIO); 2635 } 2636 if ((error = bus_setup_intr(dev, rxr->res, 2637 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, 2638 rxr, &rxr->tag)) != 0) { 2639 device_printf(dev, "Failed to register RX handler"); 2640 return (error); 2641 } 2642 #if __FreeBSD_version >= 800504 2643 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i); 2644 #endif 2645 rxr->msix = vector; 2646 2647 if (em_last_bind_cpu < 0) 2648 em_last_bind_cpu = CPU_FIRST(); 2649 cpu_id = em_last_bind_cpu; 2650 bus_bind_intr(dev, rxr->res, cpu_id); 2651 2652 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); 2653 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, 2654 taskqueue_thread_enqueue, &rxr->tq); 2655 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)", 2656 device_get_nameunit(adapter->dev), cpu_id); 2657 /* 2658 ** Set the bit to enable interrupt 2659 ** in E1000_IMS -- bits 20 and 21 2660 ** are for RX0 and RX1, note this has 2661 ** NOTHING to do with the MSIX vector 2662 */ 2663 rxr->ims = 1 << (20 + i); 2664 adapter->ims |= rxr->ims; 2665 adapter->ivars |= (8 | rxr->msix) << (i * 4); 2666 2667 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); 2668 } 2669 2670 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) { 2671 /* TX ring */ 2672 rid = vector + 1; 2673 txr->res = bus_alloc_resource_any(dev, 2674 SYS_RES_IRQ, &rid, RF_ACTIVE); 2675 if (txr->res == NULL) { 2676 device_printf(dev, 2677 "Unable to allocate bus resource: " 2678 "TX MSIX Interrupt %d\n", i); 2679 return (ENXIO); 2680 } 2681 if ((error = bus_setup_intr(dev, txr->res, 2682 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, 2683 txr, &txr->tag)) != 0) { 2684 device_printf(dev, "Failed to register TX handler"); 2685 return (error); 2686 } 2687 #if __FreeBSD_version >= 800504 2688 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i); 2689 #endif 2690 txr->msix = vector; 2691 2692 if (em_last_bind_cpu < 0) 2693 em_last_bind_cpu = CPU_FIRST(); 2694 cpu_id = em_last_bind_cpu; 2695 bus_bind_intr(dev, txr->res, cpu_id); 2696 2697 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); 2698 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, 2699 taskqueue_thread_enqueue, &txr->tq); 2700 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)", 2701 device_get_nameunit(adapter->dev), cpu_id); 2702 /* 2703 ** Set the bit to enable interrupt 2704 ** in E1000_IMS -- bits 22 and 23 2705 ** are for TX0 and TX1, note this has 2706 ** NOTHING to do with the MSIX vector 2707 */ 2708 txr->ims = 1 << (22 + i); 2709 adapter->ims |= txr->ims; 2710 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); 2711 2712 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); 2713 } 2714 2715 /* Link interrupt */ 2716 rid = vector + 1; 2717 adapter->res = bus_alloc_resource_any(dev, 2718 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); 2719 if (!adapter->res) { 2720 device_printf(dev,"Unable to allocate " 2721 "bus resource: Link interrupt [%d]\n", rid); 2722 return (ENXIO); 2723 } 2724 /* Set the link handler function */ 2725 error = bus_setup_intr(dev, adapter->res, 2726 INTR_TYPE_NET | INTR_MPSAFE, NULL, 2727 em_msix_link, adapter, &adapter->tag); 2728 if (error) { 2729 adapter->res = NULL; 2730 device_printf(dev, "Failed to register LINK handler"); 2731 return (error); 2732 } 2733 #if __FreeBSD_version >= 800504 2734 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); 2735 #endif 2736 adapter->linkvec = vector; 2737 adapter->ivars |= (8 | vector) << 16; 2738 adapter->ivars |= 0x80000000; 2739 2740 return (0); 2741 } 2742 2743 2744 static void 2745 em_free_pci_resources(struct adapter *adapter) 2746 { 2747 device_t dev = adapter->dev; 2748 struct tx_ring *txr; 2749 struct rx_ring *rxr; 2750 int rid; 2751 2752 2753 /* 2754 ** Release all the queue interrupt resources: 2755 */ 2756 for (int i = 0; i < adapter->num_queues; i++) { 2757 txr = &adapter->tx_rings[i]; 2758 /* an early abort? */ 2759 if (txr == NULL) 2760 break; 2761 rid = txr->msix +1; 2762 if (txr->tag != NULL) { 2763 bus_teardown_intr(dev, txr->res, txr->tag); 2764 txr->tag = NULL; 2765 } 2766 if (txr->res != NULL) 2767 bus_release_resource(dev, SYS_RES_IRQ, 2768 rid, txr->res); 2769 2770 rxr = &adapter->rx_rings[i]; 2771 /* an early abort? */ 2772 if (rxr == NULL) 2773 break; 2774 rid = rxr->msix +1; 2775 if (rxr->tag != NULL) { 2776 bus_teardown_intr(dev, rxr->res, rxr->tag); 2777 rxr->tag = NULL; 2778 } 2779 if (rxr->res != NULL) 2780 bus_release_resource(dev, SYS_RES_IRQ, 2781 rid, rxr->res); 2782 } 2783 2784 if (adapter->linkvec) /* we are doing MSIX */ 2785 rid = adapter->linkvec + 1; 2786 else 2787 (adapter->msix != 0) ? (rid = 1):(rid = 0); 2788 2789 if (adapter->tag != NULL) { 2790 bus_teardown_intr(dev, adapter->res, adapter->tag); 2791 adapter->tag = NULL; 2792 } 2793 2794 if (adapter->res != NULL) 2795 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); 2796 2797 2798 if (adapter->msix) 2799 pci_release_msi(dev); 2800 2801 if (adapter->msix_mem != NULL) 2802 bus_release_resource(dev, SYS_RES_MEMORY, 2803 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); 2804 2805 if (adapter->memory != NULL) 2806 bus_release_resource(dev, SYS_RES_MEMORY, 2807 PCIR_BAR(0), adapter->memory); 2808 2809 if (adapter->flash != NULL) 2810 bus_release_resource(dev, SYS_RES_MEMORY, 2811 EM_FLASH, adapter->flash); 2812 } 2813 2814 /* 2815 * Setup MSI or MSI/X 2816 */ 2817 static int 2818 em_setup_msix(struct adapter *adapter) 2819 { 2820 device_t dev = adapter->dev; 2821 int val; 2822 2823 /* Nearly always going to use one queue */ 2824 adapter->num_queues = 1; 2825 2826 /* 2827 ** Try using MSI-X for Hartwell adapters 2828 */ 2829 if ((adapter->hw.mac.type == e1000_82574) && 2830 (em_enable_msix == TRUE)) { 2831 #ifdef EM_MULTIQUEUE 2832 adapter->num_queues = (em_num_queues == 1) ? 1 : 2; 2833 if (adapter->num_queues > 1) 2834 em_enable_vectors_82574(adapter); 2835 #endif 2836 /* Map the MSIX BAR */ 2837 int rid = PCIR_BAR(EM_MSIX_BAR); 2838 adapter->msix_mem = bus_alloc_resource_any(dev, 2839 SYS_RES_MEMORY, &rid, RF_ACTIVE); 2840 if (adapter->msix_mem == NULL) { 2841 /* May not be enabled */ 2842 device_printf(adapter->dev, 2843 "Unable to map MSIX table \n"); 2844 goto msi; 2845 } 2846 val = pci_msix_count(dev); 2847 2848 #ifdef EM_MULTIQUEUE 2849 /* We need 5 vectors in the multiqueue case */ 2850 if (adapter->num_queues > 1 ) { 2851 if (val >= 5) 2852 val = 5; 2853 else { 2854 adapter->num_queues = 1; 2855 device_printf(adapter->dev, 2856 "Insufficient MSIX vectors for >1 queue, " 2857 "using single queue...\n"); 2858 goto msix_one; 2859 } 2860 } else { 2861 msix_one: 2862 #endif 2863 if (val >= 3) 2864 val = 3; 2865 else { 2866 device_printf(adapter->dev, 2867 "Insufficient MSIX vectors, using MSI\n"); 2868 goto msi; 2869 } 2870 #ifdef EM_MULTIQUEUE 2871 } 2872 #endif 2873 2874 if ((pci_alloc_msix(dev, &val) == 0)) { 2875 device_printf(adapter->dev, 2876 "Using MSIX interrupts " 2877 "with %d vectors\n", val); 2878 return (val); 2879 } 2880 2881 /* 2882 ** If MSIX alloc failed or provided us with 2883 ** less than needed, free and fall through to MSI 2884 */ 2885 pci_release_msi(dev); 2886 } 2887 msi: 2888 if (adapter->msix_mem != NULL) { 2889 bus_release_resource(dev, SYS_RES_MEMORY, 2890 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); 2891 adapter->msix_mem = NULL; 2892 } 2893 val = 1; 2894 if (pci_alloc_msi(dev, &val) == 0) { 2895 device_printf(adapter->dev, "Using an MSI interrupt\n"); 2896 return (val); 2897 } 2898 /* Should only happen due to manual configuration */ 2899 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n"); 2900 return (0); 2901 } 2902 2903 2904 /* 2905 ** The 3 following flush routines are used as a workaround in the 2906 ** I219 client parts and only for them. 2907 ** 2908 ** em_flush_tx_ring - remove all descriptors from the tx_ring 2909 ** 2910 ** We want to clear all pending descriptors from the TX ring. 2911 ** zeroing happens when the HW reads the regs. We assign the ring itself as 2912 ** the data of the next descriptor. We don't care about the data we are about 2913 ** to reset the HW. 2914 */ 2915 static void 2916 em_flush_tx_ring(struct adapter *adapter) 2917 { 2918 struct e1000_hw *hw = &adapter->hw; 2919 struct tx_ring *txr = adapter->tx_rings; 2920 struct e1000_tx_desc *txd; 2921 u32 tctl, txd_lower = E1000_TXD_CMD_IFCS; 2922 u16 size = 512; 2923 2924 tctl = E1000_READ_REG(hw, E1000_TCTL); 2925 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN); 2926 2927 txd = &txr->tx_base[txr->next_avail_desc++]; 2928 if (txr->next_avail_desc == adapter->num_tx_desc) 2929 txr->next_avail_desc = 0; 2930 2931 /* Just use the ring as a dummy buffer addr */ 2932 txd->buffer_addr = txr->txdma.dma_paddr; 2933 txd->lower.data = htole32(txd_lower | size); 2934 txd->upper.data = 0; 2935 2936 /* flush descriptors to memory before notifying the HW */ 2937 wmb(); 2938 2939 E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc); 2940 mb(); 2941 usec_delay(250); 2942 } 2943 2944 /* 2945 ** em_flush_rx_ring - remove all descriptors from the rx_ring 2946 ** 2947 ** Mark all descriptors in the RX ring as consumed and disable the rx ring 2948 */ 2949 static void 2950 em_flush_rx_ring(struct adapter *adapter) 2951 { 2952 struct e1000_hw *hw = &adapter->hw; 2953 u32 rctl, rxdctl; 2954 2955 rctl = E1000_READ_REG(hw, E1000_RCTL); 2956 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2957 E1000_WRITE_FLUSH(hw); 2958 usec_delay(150); 2959 2960 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); 2961 /* zero the lower 14 bits (prefetch and host thresholds) */ 2962 rxdctl &= 0xffffc000; 2963 /* 2964 * update thresholds: prefetch threshold to 31, host threshold to 1 2965 * and make sure the granularity is "descriptors" and not "cache lines" 2966 */ 2967 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC); 2968 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl); 2969 2970 /* momentarily enable the RX ring for the changes to take effect */ 2971 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN); 2972 E1000_WRITE_FLUSH(hw); 2973 usec_delay(150); 2974 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2975 } 2976 2977 /* 2978 ** em_flush_desc_rings - remove all descriptors from the descriptor rings 2979 ** 2980 ** In i219, the descriptor rings must be emptied before resetting the HW 2981 ** or before changing the device state to D3 during runtime (runtime PM). 2982 ** 2983 ** Failure to do this will cause the HW to enter a unit hang state which can 2984 ** only be released by PCI reset on the device 2985 ** 2986 */ 2987 static void 2988 em_flush_desc_rings(struct adapter *adapter) 2989 { 2990 struct e1000_hw *hw = &adapter->hw; 2991 device_t dev = adapter->dev; 2992 u16 hang_state; 2993 u32 fext_nvm11, tdlen; 2994 2995 /* First, disable MULR fix in FEXTNVM11 */ 2996 fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11); 2997 fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX; 2998 E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11); 2999 3000 /* do nothing if we're not in faulty state, or if the queue is empty */ 3001 tdlen = E1000_READ_REG(hw, E1000_TDLEN(0)); 3002 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); 3003 if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen) 3004 return; 3005 em_flush_tx_ring(adapter); 3006 3007 /* recheck, maybe the fault is caused by the rx ring */ 3008 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); 3009 if (hang_state & FLUSH_DESC_REQUIRED) 3010 em_flush_rx_ring(adapter); 3011 } 3012 3013 3014 /********************************************************************* 3015 * 3016 * Initialize the hardware to a configuration 3017 * as specified by the adapter structure. 3018 * 3019 **********************************************************************/ 3020 static void 3021 em_reset(struct adapter *adapter) 3022 { 3023 device_t dev = adapter->dev; 3024 if_t ifp = adapter->ifp; 3025 struct e1000_hw *hw = &adapter->hw; 3026 u16 rx_buffer_size; 3027 u32 pba; 3028 3029 INIT_DEBUGOUT("em_reset: begin"); 3030 3031 /* Set up smart power down as default off on newer adapters. */ 3032 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || 3033 hw->mac.type == e1000_82572)) { 3034 u16 phy_tmp = 0; 3035 3036 /* Speed up time to link by disabling smart power down. */ 3037 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); 3038 phy_tmp &= ~IGP02E1000_PM_SPD; 3039 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); 3040 } 3041 3042 /* 3043 * Packet Buffer Allocation (PBA) 3044 * Writing PBA sets the receive portion of the buffer 3045 * the remainder is used for the transmit buffer. 3046 */ 3047 switch (hw->mac.type) { 3048 /* Total Packet Buffer on these is 48K */ 3049 case e1000_82571: 3050 case e1000_82572: 3051 case e1000_80003es2lan: 3052 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ 3053 break; 3054 case e1000_82573: /* 82573: Total Packet Buffer is 32K */ 3055 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ 3056 break; 3057 case e1000_82574: 3058 case e1000_82583: 3059 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ 3060 break; 3061 case e1000_ich8lan: 3062 pba = E1000_PBA_8K; 3063 break; 3064 case e1000_ich9lan: 3065 case e1000_ich10lan: 3066 /* Boost Receive side for jumbo frames */ 3067 if (adapter->hw.mac.max_frame_size > 4096) 3068 pba = E1000_PBA_14K; 3069 else 3070 pba = E1000_PBA_10K; 3071 break; 3072 case e1000_pchlan: 3073 case e1000_pch2lan: 3074 case e1000_pch_lpt: 3075 case e1000_pch_spt: 3076 pba = E1000_PBA_26K; 3077 break; 3078 default: 3079 if (adapter->hw.mac.max_frame_size > 8192) 3080 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ 3081 else 3082 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ 3083 } 3084 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); 3085 3086 /* 3087 * These parameters control the automatic generation (Tx) and 3088 * response (Rx) to Ethernet PAUSE frames. 3089 * - High water mark should allow for at least two frames to be 3090 * received after sending an XOFF. 3091 * - Low water mark works best when it is very near the high water mark. 3092 * This allows the receiver to restart by sending XON when it has 3093 * drained a bit. Here we use an arbitrary value of 1500 which will 3094 * restart after one full frame is pulled from the buffer. There 3095 * could be several smaller frames in the buffer and if so they will 3096 * not trigger the XON until their total number reduces the buffer 3097 * by 1500. 3098 * - The pause time is fairly large at 1000 x 512ns = 512 usec. 3099 */ 3100 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 ); 3101 hw->fc.high_water = rx_buffer_size - 3102 roundup2(adapter->hw.mac.max_frame_size, 1024); 3103 hw->fc.low_water = hw->fc.high_water - 1500; 3104 3105 if (adapter->fc) /* locally set flow control value? */ 3106 hw->fc.requested_mode = adapter->fc; 3107 else 3108 hw->fc.requested_mode = e1000_fc_full; 3109 3110 if (hw->mac.type == e1000_80003es2lan) 3111 hw->fc.pause_time = 0xFFFF; 3112 else 3113 hw->fc.pause_time = EM_FC_PAUSE_TIME; 3114 3115 hw->fc.send_xon = TRUE; 3116 3117 /* Device specific overrides/settings */ 3118 switch (hw->mac.type) { 3119 case e1000_pchlan: 3120 /* Workaround: no TX flow ctrl for PCH */ 3121 hw->fc.requested_mode = e1000_fc_rx_pause; 3122 hw->fc.pause_time = 0xFFFF; /* override */ 3123 if (if_getmtu(ifp) > ETHERMTU) { 3124 hw->fc.high_water = 0x3500; 3125 hw->fc.low_water = 0x1500; 3126 } else { 3127 hw->fc.high_water = 0x5000; 3128 hw->fc.low_water = 0x3000; 3129 } 3130 hw->fc.refresh_time = 0x1000; 3131 break; 3132 case e1000_pch2lan: 3133 case e1000_pch_lpt: 3134 case e1000_pch_spt: 3135 hw->fc.high_water = 0x5C20; 3136 hw->fc.low_water = 0x5048; 3137 hw->fc.pause_time = 0x0650; 3138 hw->fc.refresh_time = 0x0400; 3139 /* Jumbos need adjusted PBA */ 3140 if (if_getmtu(ifp) > ETHERMTU) 3141 E1000_WRITE_REG(hw, E1000_PBA, 12); 3142 else 3143 E1000_WRITE_REG(hw, E1000_PBA, 26); 3144 break; 3145 case e1000_ich9lan: 3146 case e1000_ich10lan: 3147 if (if_getmtu(ifp) > ETHERMTU) { 3148 hw->fc.high_water = 0x2800; 3149 hw->fc.low_water = hw->fc.high_water - 8; 3150 break; 3151 } 3152 /* else fall thru */ 3153 default: 3154 if (hw->mac.type == e1000_80003es2lan) 3155 hw->fc.pause_time = 0xFFFF; 3156 break; 3157 } 3158 3159 /* I219 needs some special flushing to avoid hangs */ 3160 if (hw->mac.type == e1000_pch_spt) 3161 em_flush_desc_rings(adapter); 3162 3163 /* Issue a global reset */ 3164 e1000_reset_hw(hw); 3165 E1000_WRITE_REG(hw, E1000_WUC, 0); 3166 em_disable_aspm(adapter); 3167 /* and a re-init */ 3168 if (e1000_init_hw(hw) < 0) { 3169 device_printf(dev, "Hardware Initialization Failed\n"); 3170 return; 3171 } 3172 3173 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); 3174 e1000_get_phy_info(hw); 3175 e1000_check_for_link(hw); 3176 return; 3177 } 3178 3179 /********************************************************************* 3180 * 3181 * Setup networking device structure and register an interface. 3182 * 3183 **********************************************************************/ 3184 static int 3185 em_setup_interface(device_t dev, struct adapter *adapter) 3186 { 3187 if_t ifp; 3188 3189 INIT_DEBUGOUT("em_setup_interface: begin"); 3190 3191 ifp = adapter->ifp = if_gethandle(IFT_ETHER); 3192 if (ifp == 0) { 3193 device_printf(dev, "can not allocate ifnet structure\n"); 3194 return (-1); 3195 } 3196 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3197 if_setdev(ifp, dev); 3198 if_setinitfn(ifp, em_init); 3199 if_setsoftc(ifp, adapter); 3200 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 3201 if_setioctlfn(ifp, em_ioctl); 3202 if_setgetcounterfn(ifp, em_get_counter); 3203 3204 /* TSO parameters */ 3205 ifp->if_hw_tsomax = IP_MAXPACKET; 3206 /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */ 3207 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5; 3208 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE; 3209 3210 #ifdef EM_MULTIQUEUE 3211 /* Multiqueue stack interface */ 3212 if_settransmitfn(ifp, em_mq_start); 3213 if_setqflushfn(ifp, em_qflush); 3214 #else 3215 if_setstartfn(ifp, em_start); 3216 if_setsendqlen(ifp, adapter->num_tx_desc - 1); 3217 if_setsendqready(ifp); 3218 #endif 3219 3220 ether_ifattach(ifp, adapter->hw.mac.addr); 3221 3222 if_setcapabilities(ifp, 0); 3223 if_setcapenable(ifp, 0); 3224 3225 3226 if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | 3227 IFCAP_TSO4, 0); 3228 /* 3229 * Tell the upper layer(s) we 3230 * support full VLAN capability 3231 */ 3232 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 3233 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | 3234 IFCAP_VLAN_MTU, 0); 3235 if_setcapenable(ifp, if_getcapabilities(ifp)); 3236 3237 /* 3238 ** Don't turn this on by default, if vlans are 3239 ** created on another pseudo device (eg. lagg) 3240 ** then vlan events are not passed thru, breaking 3241 ** operation, but with HW FILTER off it works. If 3242 ** using vlans directly on the em driver you can 3243 ** enable this and get full hardware tag filtering. 3244 */ 3245 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0); 3246 3247 #ifdef DEVICE_POLLING 3248 if_setcapabilitiesbit(ifp, IFCAP_POLLING,0); 3249 #endif 3250 3251 /* Enable only WOL MAGIC by default */ 3252 if (adapter->wol) { 3253 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0); 3254 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0); 3255 } 3256 3257 /* 3258 * Specify the media types supported by this adapter and register 3259 * callbacks to update media and link information 3260 */ 3261 ifmedia_init(&adapter->media, IFM_IMASK, 3262 em_media_change, em_media_status); 3263 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 3264 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { 3265 u_char fiber_type = IFM_1000_SX; /* default type */ 3266 3267 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 3268 0, NULL); 3269 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); 3270 } else { 3271 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); 3272 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 3273 0, NULL); 3274 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 3275 0, NULL); 3276 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 3277 0, NULL); 3278 if (adapter->hw.phy.type != e1000_phy_ife) { 3279 ifmedia_add(&adapter->media, 3280 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 3281 ifmedia_add(&adapter->media, 3282 IFM_ETHER | IFM_1000_T, 0, NULL); 3283 } 3284 } 3285 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); 3286 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); 3287 return (0); 3288 } 3289 3290 3291 /* 3292 * Manage DMA'able memory. 3293 */ 3294 static void 3295 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 3296 { 3297 if (error) 3298 return; 3299 *(bus_addr_t *) arg = segs[0].ds_addr; 3300 } 3301 3302 static int 3303 em_dma_malloc(struct adapter *adapter, bus_size_t size, 3304 struct em_dma_alloc *dma, int mapflags) 3305 { 3306 int error; 3307 3308 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 3309 EM_DBA_ALIGN, 0, /* alignment, bounds */ 3310 BUS_SPACE_MAXADDR, /* lowaddr */ 3311 BUS_SPACE_MAXADDR, /* highaddr */ 3312 NULL, NULL, /* filter, filterarg */ 3313 size, /* maxsize */ 3314 1, /* nsegments */ 3315 size, /* maxsegsize */ 3316 0, /* flags */ 3317 NULL, /* lockfunc */ 3318 NULL, /* lockarg */ 3319 &dma->dma_tag); 3320 if (error) { 3321 device_printf(adapter->dev, 3322 "%s: bus_dma_tag_create failed: %d\n", 3323 __func__, error); 3324 goto fail_0; 3325 } 3326 3327 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, 3328 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); 3329 if (error) { 3330 device_printf(adapter->dev, 3331 "%s: bus_dmamem_alloc(%ju) failed: %d\n", 3332 __func__, (uintmax_t)size, error); 3333 goto fail_2; 3334 } 3335 3336 dma->dma_paddr = 0; 3337 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 3338 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); 3339 if (error || dma->dma_paddr == 0) { 3340 device_printf(adapter->dev, 3341 "%s: bus_dmamap_load failed: %d\n", 3342 __func__, error); 3343 goto fail_3; 3344 } 3345 3346 return (0); 3347 3348 fail_3: 3349 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 3350 fail_2: 3351 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 3352 bus_dma_tag_destroy(dma->dma_tag); 3353 fail_0: 3354 dma->dma_tag = NULL; 3355 3356 return (error); 3357 } 3358 3359 static void 3360 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) 3361 { 3362 if (dma->dma_tag == NULL) 3363 return; 3364 if (dma->dma_paddr != 0) { 3365 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 3366 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 3367 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 3368 dma->dma_paddr = 0; 3369 } 3370 if (dma->dma_vaddr != NULL) { 3371 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 3372 dma->dma_vaddr = NULL; 3373 } 3374 bus_dma_tag_destroy(dma->dma_tag); 3375 dma->dma_tag = NULL; 3376 } 3377 3378 3379 /********************************************************************* 3380 * 3381 * Allocate memory for the transmit and receive rings, and then 3382 * the descriptors associated with each, called only once at attach. 3383 * 3384 **********************************************************************/ 3385 static int 3386 em_allocate_queues(struct adapter *adapter) 3387 { 3388 device_t dev = adapter->dev; 3389 struct tx_ring *txr = NULL; 3390 struct rx_ring *rxr = NULL; 3391 int rsize, tsize, error = E1000_SUCCESS; 3392 int txconf = 0, rxconf = 0; 3393 3394 3395 /* Allocate the TX ring struct memory */ 3396 if (!(adapter->tx_rings = 3397 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 3398 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3399 device_printf(dev, "Unable to allocate TX ring memory\n"); 3400 error = ENOMEM; 3401 goto fail; 3402 } 3403 3404 /* Now allocate the RX */ 3405 if (!(adapter->rx_rings = 3406 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 3407 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3408 device_printf(dev, "Unable to allocate RX ring memory\n"); 3409 error = ENOMEM; 3410 goto rx_fail; 3411 } 3412 3413 tsize = roundup2(adapter->num_tx_desc * 3414 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); 3415 /* 3416 * Now set up the TX queues, txconf is needed to handle the 3417 * possibility that things fail midcourse and we need to 3418 * undo memory gracefully 3419 */ 3420 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 3421 /* Set up some basics */ 3422 txr = &adapter->tx_rings[i]; 3423 txr->adapter = adapter; 3424 txr->me = i; 3425 3426 /* Initialize the TX lock */ 3427 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 3428 device_get_nameunit(dev), txr->me); 3429 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 3430 3431 if (em_dma_malloc(adapter, tsize, 3432 &txr->txdma, BUS_DMA_NOWAIT)) { 3433 device_printf(dev, 3434 "Unable to allocate TX Descriptor memory\n"); 3435 error = ENOMEM; 3436 goto err_tx_desc; 3437 } 3438 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; 3439 bzero((void *)txr->tx_base, tsize); 3440 3441 if (em_allocate_transmit_buffers(txr)) { 3442 device_printf(dev, 3443 "Critical Failure setting up transmit buffers\n"); 3444 error = ENOMEM; 3445 goto err_tx_desc; 3446 } 3447 #if __FreeBSD_version >= 800000 3448 /* Allocate a buf ring */ 3449 txr->br = buf_ring_alloc(4096, M_DEVBUF, 3450 M_WAITOK, &txr->tx_mtx); 3451 #endif 3452 } 3453 3454 /* 3455 * Next the RX queues... 3456 */ 3457 rsize = roundup2(adapter->num_rx_desc * 3458 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); 3459 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 3460 rxr = &adapter->rx_rings[i]; 3461 rxr->adapter = adapter; 3462 rxr->me = i; 3463 3464 /* Initialize the RX lock */ 3465 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 3466 device_get_nameunit(dev), txr->me); 3467 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 3468 3469 if (em_dma_malloc(adapter, rsize, 3470 &rxr->rxdma, BUS_DMA_NOWAIT)) { 3471 device_printf(dev, 3472 "Unable to allocate RxDescriptor memory\n"); 3473 error = ENOMEM; 3474 goto err_rx_desc; 3475 } 3476 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr; 3477 bzero((void *)rxr->rx_base, rsize); 3478 3479 /* Allocate receive buffers for the ring*/ 3480 if (em_allocate_receive_buffers(rxr)) { 3481 device_printf(dev, 3482 "Critical Failure setting up receive buffers\n"); 3483 error = ENOMEM; 3484 goto err_rx_desc; 3485 } 3486 } 3487 3488 return (0); 3489 3490 err_rx_desc: 3491 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 3492 em_dma_free(adapter, &rxr->rxdma); 3493 err_tx_desc: 3494 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 3495 em_dma_free(adapter, &txr->txdma); 3496 free(adapter->rx_rings, M_DEVBUF); 3497 rx_fail: 3498 #if __FreeBSD_version >= 800000 3499 buf_ring_free(txr->br, M_DEVBUF); 3500 #endif 3501 free(adapter->tx_rings, M_DEVBUF); 3502 fail: 3503 return (error); 3504 } 3505 3506 3507 /********************************************************************* 3508 * 3509 * Allocate memory for tx_buffer structures. The tx_buffer stores all 3510 * the information needed to transmit a packet on the wire. This is 3511 * called only once at attach, setup is done every reset. 3512 * 3513 **********************************************************************/ 3514 static int 3515 em_allocate_transmit_buffers(struct tx_ring *txr) 3516 { 3517 struct adapter *adapter = txr->adapter; 3518 device_t dev = adapter->dev; 3519 struct em_txbuffer *txbuf; 3520 int error, i; 3521 3522 /* 3523 * Setup DMA descriptor areas. 3524 */ 3525 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 3526 1, 0, /* alignment, bounds */ 3527 BUS_SPACE_MAXADDR, /* lowaddr */ 3528 BUS_SPACE_MAXADDR, /* highaddr */ 3529 NULL, NULL, /* filter, filterarg */ 3530 EM_TSO_SIZE, /* maxsize */ 3531 EM_MAX_SCATTER, /* nsegments */ 3532 PAGE_SIZE, /* maxsegsize */ 3533 0, /* flags */ 3534 NULL, /* lockfunc */ 3535 NULL, /* lockfuncarg */ 3536 &txr->txtag))) { 3537 device_printf(dev,"Unable to allocate TX DMA tag\n"); 3538 goto fail; 3539 } 3540 3541 if (!(txr->tx_buffers = 3542 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) * 3543 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3544 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 3545 error = ENOMEM; 3546 goto fail; 3547 } 3548 3549 /* Create the descriptor buffer dma maps */ 3550 txbuf = txr->tx_buffers; 3551 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 3552 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 3553 if (error != 0) { 3554 device_printf(dev, "Unable to create TX DMA map\n"); 3555 goto fail; 3556 } 3557 } 3558 3559 return 0; 3560 fail: 3561 /* We free all, it handles case where we are in the middle */ 3562 em_free_transmit_structures(adapter); 3563 return (error); 3564 } 3565 3566 /********************************************************************* 3567 * 3568 * Initialize a transmit ring. 3569 * 3570 **********************************************************************/ 3571 static void 3572 em_setup_transmit_ring(struct tx_ring *txr) 3573 { 3574 struct adapter *adapter = txr->adapter; 3575 struct em_txbuffer *txbuf; 3576 int i; 3577 #ifdef DEV_NETMAP 3578 struct netmap_slot *slot; 3579 struct netmap_adapter *na = netmap_getna(adapter->ifp); 3580 #endif /* DEV_NETMAP */ 3581 3582 /* Clear the old descriptor contents */ 3583 EM_TX_LOCK(txr); 3584 #ifdef DEV_NETMAP 3585 slot = netmap_reset(na, NR_TX, txr->me, 0); 3586 #endif /* DEV_NETMAP */ 3587 3588 bzero((void *)txr->tx_base, 3589 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); 3590 /* Reset indices */ 3591 txr->next_avail_desc = 0; 3592 txr->next_to_clean = 0; 3593 3594 /* Free any existing tx buffers. */ 3595 txbuf = txr->tx_buffers; 3596 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 3597 if (txbuf->m_head != NULL) { 3598 bus_dmamap_sync(txr->txtag, txbuf->map, 3599 BUS_DMASYNC_POSTWRITE); 3600 bus_dmamap_unload(txr->txtag, txbuf->map); 3601 m_freem(txbuf->m_head); 3602 txbuf->m_head = NULL; 3603 } 3604 #ifdef DEV_NETMAP 3605 if (slot) { 3606 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 3607 uint64_t paddr; 3608 void *addr; 3609 3610 addr = PNMB(na, slot + si, &paddr); 3611 txr->tx_base[i].buffer_addr = htole64(paddr); 3612 /* reload the map for netmap mode */ 3613 netmap_load_map(na, txr->txtag, txbuf->map, addr); 3614 } 3615 #endif /* DEV_NETMAP */ 3616 3617 /* clear the watch index */ 3618 txbuf->next_eop = -1; 3619 } 3620 3621 /* Set number of descriptors available */ 3622 txr->tx_avail = adapter->num_tx_desc; 3623 txr->busy = EM_TX_IDLE; 3624 3625 /* Clear checksum offload context. */ 3626 txr->last_hw_offload = 0; 3627 txr->last_hw_ipcss = 0; 3628 txr->last_hw_ipcso = 0; 3629 txr->last_hw_tucss = 0; 3630 txr->last_hw_tucso = 0; 3631 3632 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 3633 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3634 EM_TX_UNLOCK(txr); 3635 } 3636 3637 /********************************************************************* 3638 * 3639 * Initialize all transmit rings. 3640 * 3641 **********************************************************************/ 3642 static void 3643 em_setup_transmit_structures(struct adapter *adapter) 3644 { 3645 struct tx_ring *txr = adapter->tx_rings; 3646 3647 for (int i = 0; i < adapter->num_queues; i++, txr++) 3648 em_setup_transmit_ring(txr); 3649 3650 return; 3651 } 3652 3653 /********************************************************************* 3654 * 3655 * Enable transmit unit. 3656 * 3657 **********************************************************************/ 3658 static void 3659 em_initialize_transmit_unit(struct adapter *adapter) 3660 { 3661 struct tx_ring *txr = adapter->tx_rings; 3662 struct e1000_hw *hw = &adapter->hw; 3663 u32 tctl, txdctl = 0, tarc, tipg = 0; 3664 3665 INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); 3666 3667 for (int i = 0; i < adapter->num_queues; i++, txr++) { 3668 u64 bus_addr = txr->txdma.dma_paddr; 3669 /* Base and Len of TX Ring */ 3670 E1000_WRITE_REG(hw, E1000_TDLEN(i), 3671 adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); 3672 E1000_WRITE_REG(hw, E1000_TDBAH(i), 3673 (u32)(bus_addr >> 32)); 3674 E1000_WRITE_REG(hw, E1000_TDBAL(i), 3675 (u32)bus_addr); 3676 /* Init the HEAD/TAIL indices */ 3677 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 3678 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 3679 3680 HW_DEBUGOUT2("Base = %x, Length = %x\n", 3681 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), 3682 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); 3683 3684 txr->busy = EM_TX_IDLE; 3685 txdctl = 0; /* clear txdctl */ 3686 txdctl |= 0x1f; /* PTHRESH */ 3687 txdctl |= 1 << 8; /* HTHRESH */ 3688 txdctl |= 1 << 16;/* WTHRESH */ 3689 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ 3690 txdctl |= E1000_TXDCTL_GRAN; 3691 txdctl |= 1 << 25; /* LWTHRESH */ 3692 3693 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 3694 } 3695 3696 /* Set the default values for the Tx Inter Packet Gap timer */ 3697 switch (adapter->hw.mac.type) { 3698 case e1000_80003es2lan: 3699 tipg = DEFAULT_82543_TIPG_IPGR1; 3700 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << 3701 E1000_TIPG_IPGR2_SHIFT; 3702 break; 3703 default: 3704 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 3705 (adapter->hw.phy.media_type == 3706 e1000_media_type_internal_serdes)) 3707 tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 3708 else 3709 tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 3710 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 3711 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 3712 } 3713 3714 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); 3715 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); 3716 3717 if(adapter->hw.mac.type >= e1000_82540) 3718 E1000_WRITE_REG(&adapter->hw, E1000_TADV, 3719 adapter->tx_abs_int_delay.value); 3720 3721 if ((adapter->hw.mac.type == e1000_82571) || 3722 (adapter->hw.mac.type == e1000_82572)) { 3723 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3724 tarc |= TARC_SPEED_MODE_BIT; 3725 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3726 } else if (adapter->hw.mac.type == e1000_80003es2lan) { 3727 /* errata: program both queues to unweighted RR */ 3728 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3729 tarc |= 1; 3730 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3731 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); 3732 tarc |= 1; 3733 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); 3734 } else if (adapter->hw.mac.type == e1000_82574) { 3735 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); 3736 tarc |= TARC_ERRATA_BIT; 3737 if ( adapter->num_queues > 1) { 3738 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); 3739 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3740 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); 3741 } else 3742 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); 3743 } 3744 3745 adapter->txd_cmd = E1000_TXD_CMD_IFCS; 3746 if (adapter->tx_int_delay.value > 0) 3747 adapter->txd_cmd |= E1000_TXD_CMD_IDE; 3748 3749 /* Program the Transmit Control Register */ 3750 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); 3751 tctl &= ~E1000_TCTL_CT; 3752 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 3753 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 3754 3755 if (adapter->hw.mac.type >= e1000_82571) 3756 tctl |= E1000_TCTL_MULR; 3757 3758 /* This write will effectively turn on the transmit unit. */ 3759 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); 3760 3761 if (hw->mac.type == e1000_pch_spt) { 3762 u32 reg; 3763 reg = E1000_READ_REG(hw, E1000_IOSFPC); 3764 reg |= E1000_RCTL_RDMTS_HEX; 3765 E1000_WRITE_REG(hw, E1000_IOSFPC, reg); 3766 reg = E1000_READ_REG(hw, E1000_TARC(0)); 3767 reg |= E1000_TARC0_CB_MULTIQ_3_REQ; 3768 E1000_WRITE_REG(hw, E1000_TARC(0), reg); 3769 } 3770 } 3771 3772 3773 /********************************************************************* 3774 * 3775 * Free all transmit rings. 3776 * 3777 **********************************************************************/ 3778 static void 3779 em_free_transmit_structures(struct adapter *adapter) 3780 { 3781 struct tx_ring *txr = adapter->tx_rings; 3782 3783 for (int i = 0; i < adapter->num_queues; i++, txr++) { 3784 EM_TX_LOCK(txr); 3785 em_free_transmit_buffers(txr); 3786 em_dma_free(adapter, &txr->txdma); 3787 EM_TX_UNLOCK(txr); 3788 EM_TX_LOCK_DESTROY(txr); 3789 } 3790 3791 free(adapter->tx_rings, M_DEVBUF); 3792 } 3793 3794 /********************************************************************* 3795 * 3796 * Free transmit ring related data structures. 3797 * 3798 **********************************************************************/ 3799 static void 3800 em_free_transmit_buffers(struct tx_ring *txr) 3801 { 3802 struct adapter *adapter = txr->adapter; 3803 struct em_txbuffer *txbuf; 3804 3805 INIT_DEBUGOUT("free_transmit_ring: begin"); 3806 3807 if (txr->tx_buffers == NULL) 3808 return; 3809 3810 for (int i = 0; i < adapter->num_tx_desc; i++) { 3811 txbuf = &txr->tx_buffers[i]; 3812 if (txbuf->m_head != NULL) { 3813 bus_dmamap_sync(txr->txtag, txbuf->map, 3814 BUS_DMASYNC_POSTWRITE); 3815 bus_dmamap_unload(txr->txtag, 3816 txbuf->map); 3817 m_freem(txbuf->m_head); 3818 txbuf->m_head = NULL; 3819 if (txbuf->map != NULL) { 3820 bus_dmamap_destroy(txr->txtag, 3821 txbuf->map); 3822 txbuf->map = NULL; 3823 } 3824 } else if (txbuf->map != NULL) { 3825 bus_dmamap_unload(txr->txtag, 3826 txbuf->map); 3827 bus_dmamap_destroy(txr->txtag, 3828 txbuf->map); 3829 txbuf->map = NULL; 3830 } 3831 } 3832 #if __FreeBSD_version >= 800000 3833 if (txr->br != NULL) 3834 buf_ring_free(txr->br, M_DEVBUF); 3835 #endif 3836 if (txr->tx_buffers != NULL) { 3837 free(txr->tx_buffers, M_DEVBUF); 3838 txr->tx_buffers = NULL; 3839 } 3840 if (txr->txtag != NULL) { 3841 bus_dma_tag_destroy(txr->txtag); 3842 txr->txtag = NULL; 3843 } 3844 return; 3845 } 3846 3847 3848 /********************************************************************* 3849 * The offload context is protocol specific (TCP/UDP) and thus 3850 * only needs to be set when the protocol changes. The occasion 3851 * of a context change can be a performance detriment, and 3852 * might be better just disabled. The reason arises in the way 3853 * in which the controller supports pipelined requests from the 3854 * Tx data DMA. Up to four requests can be pipelined, and they may 3855 * belong to the same packet or to multiple packets. However all 3856 * requests for one packet are issued before a request is issued 3857 * for a subsequent packet and if a request for the next packet 3858 * requires a context change, that request will be stalled 3859 * until the previous request completes. This means setting up 3860 * a new context effectively disables pipelined Tx data DMA which 3861 * in turn greatly slow down performance to send small sized 3862 * frames. 3863 **********************************************************************/ 3864 static void 3865 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, 3866 struct ip *ip, u32 *txd_upper, u32 *txd_lower) 3867 { 3868 struct adapter *adapter = txr->adapter; 3869 struct e1000_context_desc *TXD = NULL; 3870 struct em_txbuffer *tx_buffer; 3871 int cur, hdr_len; 3872 u32 cmd = 0; 3873 u16 offload = 0; 3874 u8 ipcso, ipcss, tucso, tucss; 3875 3876 ipcss = ipcso = tucss = tucso = 0; 3877 hdr_len = ip_off + (ip->ip_hl << 2); 3878 cur = txr->next_avail_desc; 3879 3880 /* Setup of IP header checksum. */ 3881 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 3882 *txd_upper |= E1000_TXD_POPTS_IXSM << 8; 3883 offload |= CSUM_IP; 3884 ipcss = ip_off; 3885 ipcso = ip_off + offsetof(struct ip, ip_sum); 3886 /* 3887 * Start offset for header checksum calculation. 3888 * End offset for header checksum calculation. 3889 * Offset of place to put the checksum. 3890 */ 3891 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3892 TXD->lower_setup.ip_fields.ipcss = ipcss; 3893 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); 3894 TXD->lower_setup.ip_fields.ipcso = ipcso; 3895 cmd |= E1000_TXD_CMD_IP; 3896 } 3897 3898 if (mp->m_pkthdr.csum_flags & CSUM_TCP) { 3899 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 3900 *txd_upper |= E1000_TXD_POPTS_TXSM << 8; 3901 offload |= CSUM_TCP; 3902 tucss = hdr_len; 3903 tucso = hdr_len + offsetof(struct tcphdr, th_sum); 3904 /* 3905 * The 82574L can only remember the *last* context used 3906 * regardless of queue that it was use for. We cannot reuse 3907 * contexts on this hardware platform and must generate a new 3908 * context every time. 82574L hardware spec, section 7.2.6, 3909 * second note. 3910 */ 3911 if (adapter->num_queues < 2) { 3912 /* 3913 * Setting up new checksum offload context for every 3914 * frames takes a lot of processing time for hardware. 3915 * This also reduces performance a lot for small sized 3916 * frames so avoid it if driver can use previously 3917 * configured checksum offload context. 3918 */ 3919 if (txr->last_hw_offload == offload) { 3920 if (offload & CSUM_IP) { 3921 if (txr->last_hw_ipcss == ipcss && 3922 txr->last_hw_ipcso == ipcso && 3923 txr->last_hw_tucss == tucss && 3924 txr->last_hw_tucso == tucso) 3925 return; 3926 } else { 3927 if (txr->last_hw_tucss == tucss && 3928 txr->last_hw_tucso == tucso) 3929 return; 3930 } 3931 } 3932 txr->last_hw_offload = offload; 3933 txr->last_hw_tucss = tucss; 3934 txr->last_hw_tucso = tucso; 3935 } 3936 /* 3937 * Start offset for payload checksum calculation. 3938 * End offset for payload checksum calculation. 3939 * Offset of place to put the checksum. 3940 */ 3941 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3942 TXD->upper_setup.tcp_fields.tucss = hdr_len; 3943 TXD->upper_setup.tcp_fields.tucse = htole16(0); 3944 TXD->upper_setup.tcp_fields.tucso = tucso; 3945 cmd |= E1000_TXD_CMD_TCP; 3946 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { 3947 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 3948 *txd_upper |= E1000_TXD_POPTS_TXSM << 8; 3949 tucss = hdr_len; 3950 tucso = hdr_len + offsetof(struct udphdr, uh_sum); 3951 /* 3952 * The 82574L can only remember the *last* context used 3953 * regardless of queue that it was use for. We cannot reuse 3954 * contexts on this hardware platform and must generate a new 3955 * context every time. 82574L hardware spec, section 7.2.6, 3956 * second note. 3957 */ 3958 if (adapter->num_queues < 2) { 3959 /* 3960 * Setting up new checksum offload context for every 3961 * frames takes a lot of processing time for hardware. 3962 * This also reduces performance a lot for small sized 3963 * frames so avoid it if driver can use previously 3964 * configured checksum offload context. 3965 */ 3966 if (txr->last_hw_offload == offload) { 3967 if (offload & CSUM_IP) { 3968 if (txr->last_hw_ipcss == ipcss && 3969 txr->last_hw_ipcso == ipcso && 3970 txr->last_hw_tucss == tucss && 3971 txr->last_hw_tucso == tucso) 3972 return; 3973 } else { 3974 if (txr->last_hw_tucss == tucss && 3975 txr->last_hw_tucso == tucso) 3976 return; 3977 } 3978 } 3979 txr->last_hw_offload = offload; 3980 txr->last_hw_tucss = tucss; 3981 txr->last_hw_tucso = tucso; 3982 } 3983 /* 3984 * Start offset for header checksum calculation. 3985 * End offset for header checksum calculation. 3986 * Offset of place to put the checksum. 3987 */ 3988 TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; 3989 TXD->upper_setup.tcp_fields.tucss = tucss; 3990 TXD->upper_setup.tcp_fields.tucse = htole16(0); 3991 TXD->upper_setup.tcp_fields.tucso = tucso; 3992 } 3993 3994 if (offload & CSUM_IP) { 3995 txr->last_hw_ipcss = ipcss; 3996 txr->last_hw_ipcso = ipcso; 3997 } 3998 3999 TXD->tcp_seg_setup.data = htole32(0); 4000 TXD->cmd_and_length = 4001 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); 4002 tx_buffer = &txr->tx_buffers[cur]; 4003 tx_buffer->m_head = NULL; 4004 tx_buffer->next_eop = -1; 4005 4006 if (++cur == adapter->num_tx_desc) 4007 cur = 0; 4008 4009 txr->tx_avail--; 4010 txr->next_avail_desc = cur; 4011 } 4012 4013 4014 /********************************************************************** 4015 * 4016 * Setup work for hardware segmentation offload (TSO) 4017 * 4018 **********************************************************************/ 4019 static void 4020 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, 4021 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower) 4022 { 4023 struct adapter *adapter = txr->adapter; 4024 struct e1000_context_desc *TXD; 4025 struct em_txbuffer *tx_buffer; 4026 int cur, hdr_len; 4027 4028 /* 4029 * In theory we can use the same TSO context if and only if 4030 * frame is the same type(IP/TCP) and the same MSS. However 4031 * checking whether a frame has the same IP/TCP structure is 4032 * hard thing so just ignore that and always restablish a 4033 * new TSO context. 4034 */ 4035 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2); 4036 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ 4037 E1000_TXD_DTYP_D | /* Data descr type */ 4038 E1000_TXD_CMD_TSE); /* Do TSE on this packet */ 4039 4040 /* IP and/or TCP header checksum calculation and insertion. */ 4041 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; 4042 4043 cur = txr->next_avail_desc; 4044 tx_buffer = &txr->tx_buffers[cur]; 4045 TXD = (struct e1000_context_desc *) &txr->tx_base[cur]; 4046 4047 /* 4048 * Start offset for header checksum calculation. 4049 * End offset for header checksum calculation. 4050 * Offset of place put the checksum. 4051 */ 4052 TXD->lower_setup.ip_fields.ipcss = ip_off; 4053 TXD->lower_setup.ip_fields.ipcse = 4054 htole16(ip_off + (ip->ip_hl << 2) - 1); 4055 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum); 4056 /* 4057 * Start offset for payload checksum calculation. 4058 * End offset for payload checksum calculation. 4059 * Offset of place to put the checksum. 4060 */ 4061 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2); 4062 TXD->upper_setup.tcp_fields.tucse = 0; 4063 TXD->upper_setup.tcp_fields.tucso = 4064 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum); 4065 /* 4066 * Payload size per packet w/o any headers. 4067 * Length of all headers up to payload. 4068 */ 4069 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz); 4070 TXD->tcp_seg_setup.fields.hdr_len = hdr_len; 4071 4072 TXD->cmd_and_length = htole32(adapter->txd_cmd | 4073 E1000_TXD_CMD_DEXT | /* Extended descr */ 4074 E1000_TXD_CMD_TSE | /* TSE context */ 4075 E1000_TXD_CMD_IP | /* Do IP csum */ 4076 E1000_TXD_CMD_TCP | /* Do TCP checksum */ 4077 (mp->m_pkthdr.len - (hdr_len))); /* Total len */ 4078 4079 tx_buffer->m_head = NULL; 4080 tx_buffer->next_eop = -1; 4081 4082 if (++cur == adapter->num_tx_desc) 4083 cur = 0; 4084 4085 txr->tx_avail--; 4086 txr->next_avail_desc = cur; 4087 txr->tx_tso = TRUE; 4088 } 4089 4090 4091 /********************************************************************** 4092 * 4093 * Examine each tx_buffer in the used queue. If the hardware is done 4094 * processing the packet then free associated resources. The 4095 * tx_buffer is put back on the free queue. 4096 * 4097 **********************************************************************/ 4098 static void 4099 em_txeof(struct tx_ring *txr) 4100 { 4101 struct adapter *adapter = txr->adapter; 4102 int first, last, done, processed; 4103 struct em_txbuffer *tx_buffer; 4104 struct e1000_tx_desc *tx_desc, *eop_desc; 4105 if_t ifp = adapter->ifp; 4106 4107 EM_TX_LOCK_ASSERT(txr); 4108 #ifdef DEV_NETMAP 4109 if (netmap_tx_irq(ifp, txr->me)) 4110 return; 4111 #endif /* DEV_NETMAP */ 4112 4113 /* No work, make sure hang detection is disabled */ 4114 if (txr->tx_avail == adapter->num_tx_desc) { 4115 txr->busy = EM_TX_IDLE; 4116 return; 4117 } 4118 4119 processed = 0; 4120 first = txr->next_to_clean; 4121 tx_desc = &txr->tx_base[first]; 4122 tx_buffer = &txr->tx_buffers[first]; 4123 last = tx_buffer->next_eop; 4124 eop_desc = &txr->tx_base[last]; 4125 4126 /* 4127 * What this does is get the index of the 4128 * first descriptor AFTER the EOP of the 4129 * first packet, that way we can do the 4130 * simple comparison on the inner while loop. 4131 */ 4132 if (++last == adapter->num_tx_desc) 4133 last = 0; 4134 done = last; 4135 4136 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 4137 BUS_DMASYNC_POSTREAD); 4138 4139 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { 4140 /* We clean the range of the packet */ 4141 while (first != done) { 4142 tx_desc->upper.data = 0; 4143 tx_desc->lower.data = 0; 4144 tx_desc->buffer_addr = 0; 4145 ++txr->tx_avail; 4146 ++processed; 4147 4148 if (tx_buffer->m_head) { 4149 bus_dmamap_sync(txr->txtag, 4150 tx_buffer->map, 4151 BUS_DMASYNC_POSTWRITE); 4152 bus_dmamap_unload(txr->txtag, 4153 tx_buffer->map); 4154 m_freem(tx_buffer->m_head); 4155 tx_buffer->m_head = NULL; 4156 } 4157 tx_buffer->next_eop = -1; 4158 4159 if (++first == adapter->num_tx_desc) 4160 first = 0; 4161 4162 tx_buffer = &txr->tx_buffers[first]; 4163 tx_desc = &txr->tx_base[first]; 4164 } 4165 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 4166 /* See if we can continue to the next packet */ 4167 last = tx_buffer->next_eop; 4168 if (last != -1) { 4169 eop_desc = &txr->tx_base[last]; 4170 /* Get new done point */ 4171 if (++last == adapter->num_tx_desc) last = 0; 4172 done = last; 4173 } else 4174 break; 4175 } 4176 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 4177 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4178 4179 txr->next_to_clean = first; 4180 4181 /* 4182 ** Hang detection: we know there's work outstanding 4183 ** or the entry return would have been taken, so no 4184 ** descriptor processed here indicates a potential hang. 4185 ** The local timer will examine this and do a reset if needed. 4186 */ 4187 if (processed == 0) { 4188 if (txr->busy != EM_TX_HUNG) 4189 ++txr->busy; 4190 } else /* At least one descriptor was cleaned */ 4191 txr->busy = EM_TX_BUSY; /* note this clears HUNG */ 4192 4193 /* 4194 * If we have a minimum free, clear IFF_DRV_OACTIVE 4195 * to tell the stack that it is OK to send packets. 4196 * Notice that all writes of OACTIVE happen under the 4197 * TX lock which, with a single queue, guarantees 4198 * sanity. 4199 */ 4200 if (txr->tx_avail >= EM_MAX_SCATTER) { 4201 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); 4202 } 4203 4204 /* Disable hang detection if all clean */ 4205 if (txr->tx_avail == adapter->num_tx_desc) 4206 txr->busy = EM_TX_IDLE; 4207 } 4208 4209 /********************************************************************* 4210 * 4211 * Refresh RX descriptor mbufs from system mbuf buffer pool. 4212 * 4213 **********************************************************************/ 4214 static void 4215 em_refresh_mbufs(struct rx_ring *rxr, int limit) 4216 { 4217 struct adapter *adapter = rxr->adapter; 4218 struct mbuf *m; 4219 bus_dma_segment_t segs; 4220 struct em_rxbuffer *rxbuf; 4221 int i, j, error, nsegs; 4222 bool cleaned = FALSE; 4223 4224 i = j = rxr->next_to_refresh; 4225 /* 4226 ** Get one descriptor beyond 4227 ** our work mark to control 4228 ** the loop. 4229 */ 4230 if (++j == adapter->num_rx_desc) 4231 j = 0; 4232 4233 while (j != limit) { 4234 rxbuf = &rxr->rx_buffers[i]; 4235 if (rxbuf->m_head == NULL) { 4236 m = m_getjcl(M_NOWAIT, MT_DATA, 4237 M_PKTHDR, adapter->rx_mbuf_sz); 4238 /* 4239 ** If we have a temporary resource shortage 4240 ** that causes a failure, just abort refresh 4241 ** for now, we will return to this point when 4242 ** reinvoked from em_rxeof. 4243 */ 4244 if (m == NULL) 4245 goto update; 4246 } else 4247 m = rxbuf->m_head; 4248 4249 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; 4250 m->m_flags |= M_PKTHDR; 4251 m->m_data = m->m_ext.ext_buf; 4252 4253 /* Use bus_dma machinery to setup the memory mapping */ 4254 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, 4255 m, &segs, &nsegs, BUS_DMA_NOWAIT); 4256 if (error != 0) { 4257 printf("Refresh mbufs: hdr dmamap load" 4258 " failure - %d\n", error); 4259 m_free(m); 4260 rxbuf->m_head = NULL; 4261 goto update; 4262 } 4263 rxbuf->m_head = m; 4264 rxbuf->paddr = segs.ds_addr; 4265 bus_dmamap_sync(rxr->rxtag, 4266 rxbuf->map, BUS_DMASYNC_PREREAD); 4267 em_setup_rxdesc(&rxr->rx_base[i], rxbuf); 4268 cleaned = TRUE; 4269 4270 i = j; /* Next is precalulated for us */ 4271 rxr->next_to_refresh = i; 4272 /* Calculate next controlling index */ 4273 if (++j == adapter->num_rx_desc) 4274 j = 0; 4275 } 4276 update: 4277 /* 4278 ** Update the tail pointer only if, 4279 ** and as far as we have refreshed. 4280 */ 4281 if (cleaned) 4282 E1000_WRITE_REG(&adapter->hw, 4283 E1000_RDT(rxr->me), rxr->next_to_refresh); 4284 4285 return; 4286 } 4287 4288 4289 /********************************************************************* 4290 * 4291 * Allocate memory for rx_buffer structures. Since we use one 4292 * rx_buffer per received packet, the maximum number of rx_buffer's 4293 * that we'll need is equal to the number of receive descriptors 4294 * that we've allocated. 4295 * 4296 **********************************************************************/ 4297 static int 4298 em_allocate_receive_buffers(struct rx_ring *rxr) 4299 { 4300 struct adapter *adapter = rxr->adapter; 4301 device_t dev = adapter->dev; 4302 struct em_rxbuffer *rxbuf; 4303 int error; 4304 4305 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) * 4306 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); 4307 if (rxr->rx_buffers == NULL) { 4308 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 4309 return (ENOMEM); 4310 } 4311 4312 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4313 1, 0, /* alignment, bounds */ 4314 BUS_SPACE_MAXADDR, /* lowaddr */ 4315 BUS_SPACE_MAXADDR, /* highaddr */ 4316 NULL, NULL, /* filter, filterarg */ 4317 MJUM9BYTES, /* maxsize */ 4318 1, /* nsegments */ 4319 MJUM9BYTES, /* maxsegsize */ 4320 0, /* flags */ 4321 NULL, /* lockfunc */ 4322 NULL, /* lockarg */ 4323 &rxr->rxtag); 4324 if (error) { 4325 device_printf(dev, "%s: bus_dma_tag_create failed %d\n", 4326 __func__, error); 4327 goto fail; 4328 } 4329 4330 rxbuf = rxr->rx_buffers; 4331 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { 4332 rxbuf = &rxr->rx_buffers[i]; 4333 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map); 4334 if (error) { 4335 device_printf(dev, "%s: bus_dmamap_create failed: %d\n", 4336 __func__, error); 4337 goto fail; 4338 } 4339 } 4340 4341 return (0); 4342 4343 fail: 4344 em_free_receive_structures(adapter); 4345 return (error); 4346 } 4347 4348 4349 /********************************************************************* 4350 * 4351 * Initialize a receive ring and its buffers. 4352 * 4353 **********************************************************************/ 4354 static int 4355 em_setup_receive_ring(struct rx_ring *rxr) 4356 { 4357 struct adapter *adapter = rxr->adapter; 4358 struct em_rxbuffer *rxbuf; 4359 bus_dma_segment_t seg[1]; 4360 int rsize, nsegs, error = 0; 4361 #ifdef DEV_NETMAP 4362 struct netmap_slot *slot; 4363 struct netmap_adapter *na = netmap_getna(adapter->ifp); 4364 #endif 4365 4366 4367 /* Clear the ring contents */ 4368 EM_RX_LOCK(rxr); 4369 rsize = roundup2(adapter->num_rx_desc * 4370 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); 4371 bzero((void *)rxr->rx_base, rsize); 4372 #ifdef DEV_NETMAP 4373 slot = netmap_reset(na, NR_RX, rxr->me, 0); 4374 #endif 4375 4376 /* 4377 ** Free current RX buffer structs and their mbufs 4378 */ 4379 for (int i = 0; i < adapter->num_rx_desc; i++) { 4380 rxbuf = &rxr->rx_buffers[i]; 4381 if (rxbuf->m_head != NULL) { 4382 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4383 BUS_DMASYNC_POSTREAD); 4384 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4385 m_freem(rxbuf->m_head); 4386 rxbuf->m_head = NULL; /* mark as freed */ 4387 } 4388 } 4389 4390 /* Now replenish the mbufs */ 4391 for (int j = 0; j != adapter->num_rx_desc; ++j) { 4392 rxbuf = &rxr->rx_buffers[j]; 4393 #ifdef DEV_NETMAP 4394 if (slot) { 4395 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 4396 uint64_t paddr; 4397 void *addr; 4398 4399 addr = PNMB(na, slot + si, &paddr); 4400 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr); 4401 rxbuf->paddr = paddr; 4402 em_setup_rxdesc(&rxr->rx_base[j], rxbuf); 4403 continue; 4404 } 4405 #endif /* DEV_NETMAP */ 4406 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA, 4407 M_PKTHDR, adapter->rx_mbuf_sz); 4408 if (rxbuf->m_head == NULL) { 4409 error = ENOBUFS; 4410 goto fail; 4411 } 4412 rxbuf->m_head->m_len = adapter->rx_mbuf_sz; 4413 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */ 4414 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz; 4415 4416 /* Get the memory mapping */ 4417 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, 4418 rxbuf->map, rxbuf->m_head, seg, 4419 &nsegs, BUS_DMA_NOWAIT); 4420 if (error != 0) { 4421 m_freem(rxbuf->m_head); 4422 rxbuf->m_head = NULL; 4423 goto fail; 4424 } 4425 bus_dmamap_sync(rxr->rxtag, 4426 rxbuf->map, BUS_DMASYNC_PREREAD); 4427 4428 rxbuf->paddr = seg[0].ds_addr; 4429 em_setup_rxdesc(&rxr->rx_base[j], rxbuf); 4430 } 4431 rxr->next_to_check = 0; 4432 rxr->next_to_refresh = 0; 4433 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4434 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4435 4436 fail: 4437 EM_RX_UNLOCK(rxr); 4438 return (error); 4439 } 4440 4441 /********************************************************************* 4442 * 4443 * Initialize all receive rings. 4444 * 4445 **********************************************************************/ 4446 static int 4447 em_setup_receive_structures(struct adapter *adapter) 4448 { 4449 struct rx_ring *rxr = adapter->rx_rings; 4450 int q; 4451 4452 for (q = 0; q < adapter->num_queues; q++, rxr++) 4453 if (em_setup_receive_ring(rxr)) 4454 goto fail; 4455 4456 return (0); 4457 fail: 4458 /* 4459 * Free RX buffers allocated so far, we will only handle 4460 * the rings that completed, the failing case will have 4461 * cleaned up for itself. 'q' failed, so its the terminus. 4462 */ 4463 for (int i = 0; i < q; ++i) { 4464 rxr = &adapter->rx_rings[i]; 4465 for (int n = 0; n < adapter->num_rx_desc; n++) { 4466 struct em_rxbuffer *rxbuf; 4467 rxbuf = &rxr->rx_buffers[n]; 4468 if (rxbuf->m_head != NULL) { 4469 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4470 BUS_DMASYNC_POSTREAD); 4471 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4472 m_freem(rxbuf->m_head); 4473 rxbuf->m_head = NULL; 4474 } 4475 } 4476 rxr->next_to_check = 0; 4477 rxr->next_to_refresh = 0; 4478 } 4479 4480 return (ENOBUFS); 4481 } 4482 4483 /********************************************************************* 4484 * 4485 * Free all receive rings. 4486 * 4487 **********************************************************************/ 4488 static void 4489 em_free_receive_structures(struct adapter *adapter) 4490 { 4491 struct rx_ring *rxr = adapter->rx_rings; 4492 4493 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 4494 em_free_receive_buffers(rxr); 4495 /* Free the ring memory as well */ 4496 em_dma_free(adapter, &rxr->rxdma); 4497 EM_RX_LOCK_DESTROY(rxr); 4498 } 4499 4500 free(adapter->rx_rings, M_DEVBUF); 4501 } 4502 4503 4504 /********************************************************************* 4505 * 4506 * Free receive ring data structures 4507 * 4508 **********************************************************************/ 4509 static void 4510 em_free_receive_buffers(struct rx_ring *rxr) 4511 { 4512 struct adapter *adapter = rxr->adapter; 4513 struct em_rxbuffer *rxbuf = NULL; 4514 4515 INIT_DEBUGOUT("free_receive_buffers: begin"); 4516 4517 if (rxr->rx_buffers != NULL) { 4518 for (int i = 0; i < adapter->num_rx_desc; i++) { 4519 rxbuf = &rxr->rx_buffers[i]; 4520 if (rxbuf->map != NULL) { 4521 bus_dmamap_sync(rxr->rxtag, rxbuf->map, 4522 BUS_DMASYNC_POSTREAD); 4523 bus_dmamap_unload(rxr->rxtag, rxbuf->map); 4524 bus_dmamap_destroy(rxr->rxtag, rxbuf->map); 4525 } 4526 if (rxbuf->m_head != NULL) { 4527 m_freem(rxbuf->m_head); 4528 rxbuf->m_head = NULL; 4529 } 4530 } 4531 free(rxr->rx_buffers, M_DEVBUF); 4532 rxr->rx_buffers = NULL; 4533 rxr->next_to_check = 0; 4534 rxr->next_to_refresh = 0; 4535 } 4536 4537 if (rxr->rxtag != NULL) { 4538 bus_dma_tag_destroy(rxr->rxtag); 4539 rxr->rxtag = NULL; 4540 } 4541 4542 return; 4543 } 4544 4545 4546 /********************************************************************* 4547 * 4548 * Enable receive unit. 4549 * 4550 **********************************************************************/ 4551 4552 static void 4553 em_initialize_receive_unit(struct adapter *adapter) 4554 { 4555 struct rx_ring *rxr = adapter->rx_rings; 4556 if_t ifp = adapter->ifp; 4557 struct e1000_hw *hw = &adapter->hw; 4558 u32 rctl, rxcsum, rfctl; 4559 4560 INIT_DEBUGOUT("em_initialize_receive_units: begin"); 4561 4562 /* 4563 * Make sure receives are disabled while setting 4564 * up the descriptor ring 4565 */ 4566 rctl = E1000_READ_REG(hw, E1000_RCTL); 4567 /* Do not disable if ever enabled on this hardware */ 4568 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) 4569 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 4570 4571 /* Setup the Receive Control Register */ 4572 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 4573 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | 4574 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | 4575 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 4576 4577 /* Do not store bad packets */ 4578 rctl &= ~E1000_RCTL_SBP; 4579 4580 /* Enable Long Packet receive */ 4581 if (if_getmtu(ifp) > ETHERMTU) 4582 rctl |= E1000_RCTL_LPE; 4583 else 4584 rctl &= ~E1000_RCTL_LPE; 4585 4586 /* Strip the CRC */ 4587 if (!em_disable_crc_stripping) 4588 rctl |= E1000_RCTL_SECRC; 4589 4590 E1000_WRITE_REG(&adapter->hw, E1000_RADV, 4591 adapter->rx_abs_int_delay.value); 4592 4593 E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 4594 adapter->rx_int_delay.value); 4595 /* 4596 * Set the interrupt throttling rate. Value is calculated 4597 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) 4598 */ 4599 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); 4600 4601 /* Use extended rx descriptor formats */ 4602 rfctl = E1000_READ_REG(hw, E1000_RFCTL); 4603 rfctl |= E1000_RFCTL_EXTEN; 4604 /* 4605 ** When using MSIX interrupts we need to throttle 4606 ** using the EITR register (82574 only) 4607 */ 4608 if (hw->mac.type == e1000_82574) { 4609 for (int i = 0; i < 4; i++) 4610 E1000_WRITE_REG(hw, E1000_EITR_82574(i), 4611 DEFAULT_ITR); 4612 /* Disable accelerated acknowledge */ 4613 rfctl |= E1000_RFCTL_ACK_DIS; 4614 } 4615 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); 4616 4617 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); 4618 if (if_getcapenable(ifp) & IFCAP_RXCSUM) { 4619 #ifdef EM_MULTIQUEUE 4620 rxcsum |= E1000_RXCSUM_TUOFL | 4621 E1000_RXCSUM_IPOFL | 4622 E1000_RXCSUM_PCSD; 4623 #else 4624 rxcsum |= E1000_RXCSUM_TUOFL; 4625 #endif 4626 } else 4627 rxcsum &= ~E1000_RXCSUM_TUOFL; 4628 4629 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); 4630 4631 #ifdef EM_MULTIQUEUE 4632 #define RSSKEYLEN 10 4633 if (adapter->num_queues > 1) { 4634 uint8_t rss_key[4 * RSSKEYLEN]; 4635 uint32_t reta = 0; 4636 int i; 4637 4638 /* 4639 * Configure RSS key 4640 */ 4641 arc4rand(rss_key, sizeof(rss_key), 0); 4642 for (i = 0; i < RSSKEYLEN; ++i) { 4643 uint32_t rssrk = 0; 4644 4645 rssrk = EM_RSSRK_VAL(rss_key, i); 4646 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); 4647 } 4648 4649 /* 4650 * Configure RSS redirect table in following fashion: 4651 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 4652 */ 4653 for (i = 0; i < sizeof(reta); ++i) { 4654 uint32_t q; 4655 4656 q = (i % adapter->num_queues) << 7; 4657 reta |= q << (8 * i); 4658 } 4659 4660 for (i = 0; i < 32; ++i) { 4661 E1000_WRITE_REG(hw, E1000_RETA(i), reta); 4662 } 4663 4664 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 4665 E1000_MRQC_RSS_FIELD_IPV4_TCP | 4666 E1000_MRQC_RSS_FIELD_IPV4 | 4667 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | 4668 E1000_MRQC_RSS_FIELD_IPV6_EX | 4669 E1000_MRQC_RSS_FIELD_IPV6); 4670 } 4671 #endif 4672 /* 4673 ** XXX TEMPORARY WORKAROUND: on some systems with 82573 4674 ** long latencies are observed, like Lenovo X60. This 4675 ** change eliminates the problem, but since having positive 4676 ** values in RDTR is a known source of problems on other 4677 ** platforms another solution is being sought. 4678 */ 4679 if (hw->mac.type == e1000_82573) 4680 E1000_WRITE_REG(hw, E1000_RDTR, 0x20); 4681 4682 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 4683 /* Setup the Base and Length of the Rx Descriptor Ring */ 4684 u64 bus_addr = rxr->rxdma.dma_paddr; 4685 u32 rdt = adapter->num_rx_desc - 1; /* default */ 4686 4687 E1000_WRITE_REG(hw, E1000_RDLEN(i), 4688 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended)); 4689 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); 4690 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); 4691 /* Setup the Head and Tail Descriptor Pointers */ 4692 E1000_WRITE_REG(hw, E1000_RDH(i), 0); 4693 #ifdef DEV_NETMAP 4694 /* 4695 * an init() while a netmap client is active must 4696 * preserve the rx buffers passed to userspace. 4697 */ 4698 if (if_getcapenable(ifp) & IFCAP_NETMAP) { 4699 struct netmap_adapter *na = netmap_getna(adapter->ifp); 4700 rdt -= nm_kr_rxspace(&na->rx_rings[i]); 4701 } 4702 #endif /* DEV_NETMAP */ 4703 E1000_WRITE_REG(hw, E1000_RDT(i), rdt); 4704 } 4705 4706 /* 4707 * Set PTHRESH for improved jumbo performance 4708 * According to 10.2.5.11 of Intel 82574 Datasheet, 4709 * RXDCTL(1) is written whenever RXDCTL(0) is written. 4710 * Only write to RXDCTL(1) if there is a need for different 4711 * settings. 4712 */ 4713 if (((adapter->hw.mac.type == e1000_ich9lan) || 4714 (adapter->hw.mac.type == e1000_pch2lan) || 4715 (adapter->hw.mac.type == e1000_ich10lan)) && 4716 (if_getmtu(ifp) > ETHERMTU)) { 4717 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); 4718 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); 4719 } else if (adapter->hw.mac.type == e1000_82574) { 4720 for (int i = 0; i < adapter->num_queues; i++) { 4721 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 4722 4723 rxdctl |= 0x20; /* PTHRESH */ 4724 rxdctl |= 4 << 8; /* HTHRESH */ 4725 rxdctl |= 4 << 16;/* WTHRESH */ 4726 rxdctl |= 1 << 24; /* Switch to granularity */ 4727 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 4728 } 4729 } 4730 4731 if (adapter->hw.mac.type >= e1000_pch2lan) { 4732 if (if_getmtu(ifp) > ETHERMTU) 4733 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); 4734 else 4735 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); 4736 } 4737 4738 /* Make sure VLAN Filters are off */ 4739 rctl &= ~E1000_RCTL_VFE; 4740 4741 if (adapter->rx_mbuf_sz == MCLBYTES) 4742 rctl |= E1000_RCTL_SZ_2048; 4743 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) 4744 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 4745 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) 4746 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 4747 4748 /* ensure we clear use DTYPE of 00 here */ 4749 rctl &= ~0x00000C00; 4750 /* Write out the settings */ 4751 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 4752 4753 return; 4754 } 4755 4756 4757 /********************************************************************* 4758 * 4759 * This routine executes in interrupt context. It replenishes 4760 * the mbufs in the descriptor and sends data which has been 4761 * dma'ed into host memory to upper layer. 4762 * 4763 * We loop at most count times if count is > 0, or until done if 4764 * count < 0. 4765 * 4766 * For polling we also now return the number of cleaned packets 4767 *********************************************************************/ 4768 static bool 4769 em_rxeof(struct rx_ring *rxr, int count, int *done) 4770 { 4771 struct adapter *adapter = rxr->adapter; 4772 if_t ifp = adapter->ifp; 4773 struct mbuf *mp, *sendmp; 4774 u32 status = 0; 4775 u16 len; 4776 int i, processed, rxdone = 0; 4777 bool eop; 4778 union e1000_rx_desc_extended *cur; 4779 4780 EM_RX_LOCK(rxr); 4781 4782 /* Sync the ring */ 4783 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4784 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4785 4786 4787 #ifdef DEV_NETMAP 4788 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 4789 EM_RX_UNLOCK(rxr); 4790 return (FALSE); 4791 } 4792 #endif /* DEV_NETMAP */ 4793 4794 for (i = rxr->next_to_check, processed = 0; count != 0;) { 4795 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) 4796 break; 4797 4798 cur = &rxr->rx_base[i]; 4799 status = le32toh(cur->wb.upper.status_error); 4800 mp = sendmp = NULL; 4801 4802 if ((status & E1000_RXD_STAT_DD) == 0) 4803 break; 4804 4805 len = le16toh(cur->wb.upper.length); 4806 eop = (status & E1000_RXD_STAT_EOP) != 0; 4807 4808 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) || 4809 (rxr->discard == TRUE)) { 4810 adapter->dropped_pkts++; 4811 ++rxr->rx_discarded; 4812 if (!eop) /* Catch subsequent segs */ 4813 rxr->discard = TRUE; 4814 else 4815 rxr->discard = FALSE; 4816 em_rx_discard(rxr, i); 4817 goto next_desc; 4818 } 4819 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map); 4820 4821 /* Assign correct length to the current fragment */ 4822 mp = rxr->rx_buffers[i].m_head; 4823 mp->m_len = len; 4824 4825 /* Trigger for refresh */ 4826 rxr->rx_buffers[i].m_head = NULL; 4827 4828 /* First segment? */ 4829 if (rxr->fmp == NULL) { 4830 mp->m_pkthdr.len = len; 4831 rxr->fmp = rxr->lmp = mp; 4832 } else { 4833 /* Chain mbuf's together */ 4834 mp->m_flags &= ~M_PKTHDR; 4835 rxr->lmp->m_next = mp; 4836 rxr->lmp = mp; 4837 rxr->fmp->m_pkthdr.len += len; 4838 } 4839 4840 if (eop) { 4841 --count; 4842 sendmp = rxr->fmp; 4843 if_setrcvif(sendmp, ifp); 4844 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 4845 em_receive_checksum(status, sendmp); 4846 #ifndef __NO_STRICT_ALIGNMENT 4847 if (adapter->hw.mac.max_frame_size > 4848 (MCLBYTES - ETHER_ALIGN) && 4849 em_fixup_rx(rxr) != 0) 4850 goto skip; 4851 #endif 4852 if (status & E1000_RXD_STAT_VP) { 4853 if_setvtag(sendmp, 4854 le16toh(cur->wb.upper.vlan)); 4855 sendmp->m_flags |= M_VLANTAG; 4856 } 4857 #ifndef __NO_STRICT_ALIGNMENT 4858 skip: 4859 #endif 4860 rxr->fmp = rxr->lmp = NULL; 4861 } 4862 next_desc: 4863 /* Sync the ring */ 4864 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4865 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4866 4867 /* Zero out the receive descriptors status. */ 4868 cur->wb.upper.status_error &= htole32(~0xFF); 4869 ++rxdone; /* cumulative for POLL */ 4870 ++processed; 4871 4872 /* Advance our pointers to the next descriptor. */ 4873 if (++i == adapter->num_rx_desc) 4874 i = 0; 4875 4876 /* Send to the stack */ 4877 if (sendmp != NULL) { 4878 rxr->next_to_check = i; 4879 EM_RX_UNLOCK(rxr); 4880 if_input(ifp, sendmp); 4881 EM_RX_LOCK(rxr); 4882 i = rxr->next_to_check; 4883 } 4884 4885 /* Only refresh mbufs every 8 descriptors */ 4886 if (processed == 8) { 4887 em_refresh_mbufs(rxr, i); 4888 processed = 0; 4889 } 4890 } 4891 4892 /* Catch any remaining refresh work */ 4893 if (e1000_rx_unrefreshed(rxr)) 4894 em_refresh_mbufs(rxr, i); 4895 4896 rxr->next_to_check = i; 4897 if (done != NULL) 4898 *done = rxdone; 4899 EM_RX_UNLOCK(rxr); 4900 4901 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); 4902 } 4903 4904 static __inline void 4905 em_rx_discard(struct rx_ring *rxr, int i) 4906 { 4907 struct em_rxbuffer *rbuf; 4908 4909 rbuf = &rxr->rx_buffers[i]; 4910 bus_dmamap_unload(rxr->rxtag, rbuf->map); 4911 4912 /* Free any previous pieces */ 4913 if (rxr->fmp != NULL) { 4914 rxr->fmp->m_flags |= M_PKTHDR; 4915 m_freem(rxr->fmp); 4916 rxr->fmp = NULL; 4917 rxr->lmp = NULL; 4918 } 4919 /* 4920 ** Free buffer and allow em_refresh_mbufs() 4921 ** to clean up and recharge buffer. 4922 */ 4923 if (rbuf->m_head) { 4924 m_free(rbuf->m_head); 4925 rbuf->m_head = NULL; 4926 } 4927 return; 4928 } 4929 4930 #ifndef __NO_STRICT_ALIGNMENT 4931 /* 4932 * When jumbo frames are enabled we should realign entire payload on 4933 * architecures with strict alignment. This is serious design mistake of 8254x 4934 * as it nullifies DMA operations. 8254x just allows RX buffer size to be 4935 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its 4936 * payload. On architecures without strict alignment restrictions 8254x still 4937 * performs unaligned memory access which would reduce the performance too. 4938 * To avoid copying over an entire frame to align, we allocate a new mbuf and 4939 * copy ethernet header to the new mbuf. The new mbuf is prepended into the 4940 * existing mbuf chain. 4941 * 4942 * Be aware, best performance of the 8254x is achived only when jumbo frame is 4943 * not used at all on architectures with strict alignment. 4944 */ 4945 static int 4946 em_fixup_rx(struct rx_ring *rxr) 4947 { 4948 struct adapter *adapter = rxr->adapter; 4949 struct mbuf *m, *n; 4950 int error; 4951 4952 error = 0; 4953 m = rxr->fmp; 4954 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { 4955 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); 4956 m->m_data += ETHER_HDR_LEN; 4957 } else { 4958 MGETHDR(n, M_NOWAIT, MT_DATA); 4959 if (n != NULL) { 4960 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); 4961 m->m_data += ETHER_HDR_LEN; 4962 m->m_len -= ETHER_HDR_LEN; 4963 n->m_len = ETHER_HDR_LEN; 4964 M_MOVE_PKTHDR(n, m); 4965 n->m_next = m; 4966 rxr->fmp = n; 4967 } else { 4968 adapter->dropped_pkts++; 4969 m_freem(rxr->fmp); 4970 rxr->fmp = NULL; 4971 error = ENOMEM; 4972 } 4973 } 4974 4975 return (error); 4976 } 4977 #endif 4978 4979 static void 4980 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf) 4981 { 4982 rxd->read.buffer_addr = htole64(rxbuf->paddr); 4983 /* DD bits must be cleared */ 4984 rxd->wb.upper.status_error= 0; 4985 } 4986 4987 /********************************************************************* 4988 * 4989 * Verify that the hardware indicated that the checksum is valid. 4990 * Inform the stack about the status of checksum so that stack 4991 * doesn't spend time verifying the checksum. 4992 * 4993 *********************************************************************/ 4994 static void 4995 em_receive_checksum(uint32_t status, struct mbuf *mp) 4996 { 4997 mp->m_pkthdr.csum_flags = 0; 4998 4999 /* Ignore Checksum bit is set */ 5000 if (status & E1000_RXD_STAT_IXSM) 5001 return; 5002 5003 /* If the IP checksum exists and there is no IP Checksum error */ 5004 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == 5005 E1000_RXD_STAT_IPCS) { 5006 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); 5007 } 5008 5009 /* TCP or UDP checksum */ 5010 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == 5011 E1000_RXD_STAT_TCPCS) { 5012 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 5013 mp->m_pkthdr.csum_data = htons(0xffff); 5014 } 5015 if (status & E1000_RXD_STAT_UDPCS) { 5016 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 5017 mp->m_pkthdr.csum_data = htons(0xffff); 5018 } 5019 } 5020 5021 /* 5022 * This routine is run via an vlan 5023 * config EVENT 5024 */ 5025 static void 5026 em_register_vlan(void *arg, if_t ifp, u16 vtag) 5027 { 5028 struct adapter *adapter = if_getsoftc(ifp); 5029 u32 index, bit; 5030 5031 if ((void*)adapter != arg) /* Not our event */ 5032 return; 5033 5034 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ 5035 return; 5036 5037 EM_CORE_LOCK(adapter); 5038 index = (vtag >> 5) & 0x7F; 5039 bit = vtag & 0x1F; 5040 adapter->shadow_vfta[index] |= (1 << bit); 5041 ++adapter->num_vlans; 5042 /* Re-init to load the changes */ 5043 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 5044 em_init_locked(adapter); 5045 EM_CORE_UNLOCK(adapter); 5046 } 5047 5048 /* 5049 * This routine is run via an vlan 5050 * unconfig EVENT 5051 */ 5052 static void 5053 em_unregister_vlan(void *arg, if_t ifp, u16 vtag) 5054 { 5055 struct adapter *adapter = if_getsoftc(ifp); 5056 u32 index, bit; 5057 5058 if (adapter != arg) 5059 return; 5060 5061 if ((vtag == 0) || (vtag > 4095)) /* Invalid */ 5062 return; 5063 5064 EM_CORE_LOCK(adapter); 5065 index = (vtag >> 5) & 0x7F; 5066 bit = vtag & 0x1F; 5067 adapter->shadow_vfta[index] &= ~(1 << bit); 5068 --adapter->num_vlans; 5069 /* Re-init to load the changes */ 5070 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 5071 em_init_locked(adapter); 5072 EM_CORE_UNLOCK(adapter); 5073 } 5074 5075 static void 5076 em_setup_vlan_hw_support(struct adapter *adapter) 5077 { 5078 struct e1000_hw *hw = &adapter->hw; 5079 u32 reg; 5080 5081 /* 5082 ** We get here thru init_locked, meaning 5083 ** a soft reset, this has already cleared 5084 ** the VFTA and other state, so if there 5085 ** have been no vlan's registered do nothing. 5086 */ 5087 if (adapter->num_vlans == 0) 5088 return; 5089 5090 /* 5091 ** A soft reset zero's out the VFTA, so 5092 ** we need to repopulate it now. 5093 */ 5094 for (int i = 0; i < EM_VFTA_SIZE; i++) 5095 if (adapter->shadow_vfta[i] != 0) 5096 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, 5097 i, adapter->shadow_vfta[i]); 5098 5099 reg = E1000_READ_REG(hw, E1000_CTRL); 5100 reg |= E1000_CTRL_VME; 5101 E1000_WRITE_REG(hw, E1000_CTRL, reg); 5102 5103 /* Enable the Filter Table */ 5104 reg = E1000_READ_REG(hw, E1000_RCTL); 5105 reg &= ~E1000_RCTL_CFIEN; 5106 reg |= E1000_RCTL_VFE; 5107 E1000_WRITE_REG(hw, E1000_RCTL, reg); 5108 } 5109 5110 static void 5111 em_enable_intr(struct adapter *adapter) 5112 { 5113 struct e1000_hw *hw = &adapter->hw; 5114 u32 ims_mask = IMS_ENABLE_MASK; 5115 5116 if (hw->mac.type == e1000_82574) { 5117 E1000_WRITE_REG(hw, EM_EIAC, adapter->ims); 5118 ims_mask |= adapter->ims; 5119 } 5120 E1000_WRITE_REG(hw, E1000_IMS, ims_mask); 5121 } 5122 5123 static void 5124 em_disable_intr(struct adapter *adapter) 5125 { 5126 struct e1000_hw *hw = &adapter->hw; 5127 5128 if (hw->mac.type == e1000_82574) 5129 E1000_WRITE_REG(hw, EM_EIAC, 0); 5130 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 5131 } 5132 5133 /* 5134 * Bit of a misnomer, what this really means is 5135 * to enable OS management of the system... aka 5136 * to disable special hardware management features 5137 */ 5138 static void 5139 em_init_manageability(struct adapter *adapter) 5140 { 5141 /* A shared code workaround */ 5142 #define E1000_82542_MANC2H E1000_MANC2H 5143 if (adapter->has_manage) { 5144 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); 5145 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); 5146 5147 /* disable hardware interception of ARP */ 5148 manc &= ~(E1000_MANC_ARP_EN); 5149 5150 /* enable receiving management packets to the host */ 5151 manc |= E1000_MANC_EN_MNG2HOST; 5152 #define E1000_MNG2HOST_PORT_623 (1 << 5) 5153 #define E1000_MNG2HOST_PORT_664 (1 << 6) 5154 manc2h |= E1000_MNG2HOST_PORT_623; 5155 manc2h |= E1000_MNG2HOST_PORT_664; 5156 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); 5157 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); 5158 } 5159 } 5160 5161 /* 5162 * Give control back to hardware management 5163 * controller if there is one. 5164 */ 5165 static void 5166 em_release_manageability(struct adapter *adapter) 5167 { 5168 if (adapter->has_manage) { 5169 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); 5170 5171 /* re-enable hardware interception of ARP */ 5172 manc |= E1000_MANC_ARP_EN; 5173 manc &= ~E1000_MANC_EN_MNG2HOST; 5174 5175 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); 5176 } 5177 } 5178 5179 /* 5180 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. 5181 * For ASF and Pass Through versions of f/w this means 5182 * that the driver is loaded. For AMT version type f/w 5183 * this means that the network i/f is open. 5184 */ 5185 static void 5186 em_get_hw_control(struct adapter *adapter) 5187 { 5188 u32 ctrl_ext, swsm; 5189 5190 if (adapter->hw.mac.type == e1000_82573) { 5191 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); 5192 E1000_WRITE_REG(&adapter->hw, E1000_SWSM, 5193 swsm | E1000_SWSM_DRV_LOAD); 5194 return; 5195 } 5196 /* else */ 5197 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5198 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, 5199 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 5200 return; 5201 } 5202 5203 /* 5204 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. 5205 * For ASF and Pass Through versions of f/w this means that 5206 * the driver is no longer loaded. For AMT versions of the 5207 * f/w this means that the network i/f is closed. 5208 */ 5209 static void 5210 em_release_hw_control(struct adapter *adapter) 5211 { 5212 u32 ctrl_ext, swsm; 5213 5214 if (!adapter->has_manage) 5215 return; 5216 5217 if (adapter->hw.mac.type == e1000_82573) { 5218 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); 5219 E1000_WRITE_REG(&adapter->hw, E1000_SWSM, 5220 swsm & ~E1000_SWSM_DRV_LOAD); 5221 return; 5222 } 5223 /* else */ 5224 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5225 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, 5226 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 5227 return; 5228 } 5229 5230 static int 5231 em_is_valid_ether_addr(u8 *addr) 5232 { 5233 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; 5234 5235 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { 5236 return (FALSE); 5237 } 5238 5239 return (TRUE); 5240 } 5241 5242 /* 5243 ** Parse the interface capabilities with regard 5244 ** to both system management and wake-on-lan for 5245 ** later use. 5246 */ 5247 static void 5248 em_get_wakeup(device_t dev) 5249 { 5250 struct adapter *adapter = device_get_softc(dev); 5251 u16 eeprom_data = 0, device_id, apme_mask; 5252 5253 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); 5254 apme_mask = EM_EEPROM_APME; 5255 5256 switch (adapter->hw.mac.type) { 5257 case e1000_82573: 5258 case e1000_82583: 5259 adapter->has_amt = TRUE; 5260 /* Falls thru */ 5261 case e1000_82571: 5262 case e1000_82572: 5263 case e1000_80003es2lan: 5264 if (adapter->hw.bus.func == 1) { 5265 e1000_read_nvm(&adapter->hw, 5266 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); 5267 break; 5268 } else 5269 e1000_read_nvm(&adapter->hw, 5270 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 5271 break; 5272 case e1000_ich8lan: 5273 case e1000_ich9lan: 5274 case e1000_ich10lan: 5275 case e1000_pchlan: 5276 case e1000_pch2lan: 5277 apme_mask = E1000_WUC_APME; 5278 adapter->has_amt = TRUE; 5279 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); 5280 break; 5281 default: 5282 e1000_read_nvm(&adapter->hw, 5283 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 5284 break; 5285 } 5286 if (eeprom_data & apme_mask) 5287 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); 5288 /* 5289 * We have the eeprom settings, now apply the special cases 5290 * where the eeprom may be wrong or the board won't support 5291 * wake on lan on a particular port 5292 */ 5293 device_id = pci_get_device(dev); 5294 switch (device_id) { 5295 case E1000_DEV_ID_82571EB_FIBER: 5296 /* Wake events only supported on port A for dual fiber 5297 * regardless of eeprom setting */ 5298 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & 5299 E1000_STATUS_FUNC_1) 5300 adapter->wol = 0; 5301 break; 5302 case E1000_DEV_ID_82571EB_QUAD_COPPER: 5303 case E1000_DEV_ID_82571EB_QUAD_FIBER: 5304 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: 5305 /* if quad port adapter, disable WoL on all but port A */ 5306 if (global_quad_port_a != 0) 5307 adapter->wol = 0; 5308 /* Reset for multiple quad port adapters */ 5309 if (++global_quad_port_a == 4) 5310 global_quad_port_a = 0; 5311 break; 5312 } 5313 return; 5314 } 5315 5316 5317 /* 5318 * Enable PCI Wake On Lan capability 5319 */ 5320 static void 5321 em_enable_wakeup(device_t dev) 5322 { 5323 struct adapter *adapter = device_get_softc(dev); 5324 if_t ifp = adapter->ifp; 5325 u32 pmc, ctrl, ctrl_ext, rctl; 5326 u16 status; 5327 5328 if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) 5329 return; 5330 5331 /* Advertise the wakeup capability */ 5332 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); 5333 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); 5334 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); 5335 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); 5336 5337 if ((adapter->hw.mac.type == e1000_ich8lan) || 5338 (adapter->hw.mac.type == e1000_pchlan) || 5339 (adapter->hw.mac.type == e1000_ich9lan) || 5340 (adapter->hw.mac.type == e1000_ich10lan)) 5341 e1000_suspend_workarounds_ich8lan(&adapter->hw); 5342 5343 /* Keep the laser running on Fiber adapters */ 5344 if (adapter->hw.phy.media_type == e1000_media_type_fiber || 5345 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { 5346 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5347 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; 5348 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); 5349 } 5350 5351 /* 5352 ** Determine type of Wakeup: note that wol 5353 ** is set with all bits on by default. 5354 */ 5355 if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) 5356 adapter->wol &= ~E1000_WUFC_MAG; 5357 5358 if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) 5359 adapter->wol &= ~E1000_WUFC_MC; 5360 else { 5361 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 5362 rctl |= E1000_RCTL_MPE; 5363 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); 5364 } 5365 5366 if ((adapter->hw.mac.type == e1000_pchlan) || 5367 (adapter->hw.mac.type == e1000_pch2lan)) { 5368 if (em_enable_phy_wakeup(adapter)) 5369 return; 5370 } else { 5371 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); 5372 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); 5373 } 5374 5375 if (adapter->hw.phy.type == e1000_phy_igp_3) 5376 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); 5377 5378 /* Request PME */ 5379 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); 5380 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); 5381 if (if_getcapenable(ifp) & IFCAP_WOL) 5382 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 5383 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); 5384 5385 return; 5386 } 5387 5388 /* 5389 ** WOL in the newer chipset interfaces (pchlan) 5390 ** require thing to be copied into the phy 5391 */ 5392 static int 5393 em_enable_phy_wakeup(struct adapter *adapter) 5394 { 5395 struct e1000_hw *hw = &adapter->hw; 5396 u32 mreg, ret = 0; 5397 u16 preg; 5398 5399 /* copy MAC RARs to PHY RARs */ 5400 e1000_copy_rx_addrs_to_phy_ich8lan(hw); 5401 5402 /* copy MAC MTA to PHY MTA */ 5403 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { 5404 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); 5405 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); 5406 e1000_write_phy_reg(hw, BM_MTA(i) + 1, 5407 (u16)((mreg >> 16) & 0xFFFF)); 5408 } 5409 5410 /* configure PHY Rx Control register */ 5411 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); 5412 mreg = E1000_READ_REG(hw, E1000_RCTL); 5413 if (mreg & E1000_RCTL_UPE) 5414 preg |= BM_RCTL_UPE; 5415 if (mreg & E1000_RCTL_MPE) 5416 preg |= BM_RCTL_MPE; 5417 preg &= ~(BM_RCTL_MO_MASK); 5418 if (mreg & E1000_RCTL_MO_3) 5419 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) 5420 << BM_RCTL_MO_SHIFT); 5421 if (mreg & E1000_RCTL_BAM) 5422 preg |= BM_RCTL_BAM; 5423 if (mreg & E1000_RCTL_PMCF) 5424 preg |= BM_RCTL_PMCF; 5425 mreg = E1000_READ_REG(hw, E1000_CTRL); 5426 if (mreg & E1000_CTRL_RFCE) 5427 preg |= BM_RCTL_RFCE; 5428 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); 5429 5430 /* enable PHY wakeup in MAC register */ 5431 E1000_WRITE_REG(hw, E1000_WUC, 5432 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); 5433 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); 5434 5435 /* configure and enable PHY wakeup in PHY registers */ 5436 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); 5437 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); 5438 5439 /* activate PHY wakeup */ 5440 ret = hw->phy.ops.acquire(hw); 5441 if (ret) { 5442 printf("Could not acquire PHY\n"); 5443 return ret; 5444 } 5445 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, 5446 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); 5447 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); 5448 if (ret) { 5449 printf("Could not read PHY page 769\n"); 5450 goto out; 5451 } 5452 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; 5453 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); 5454 if (ret) 5455 printf("Could not set PHY Host Wakeup bit\n"); 5456 out: 5457 hw->phy.ops.release(hw); 5458 5459 return ret; 5460 } 5461 5462 static void 5463 em_led_func(void *arg, int onoff) 5464 { 5465 struct adapter *adapter = arg; 5466 5467 EM_CORE_LOCK(adapter); 5468 if (onoff) { 5469 e1000_setup_led(&adapter->hw); 5470 e1000_led_on(&adapter->hw); 5471 } else { 5472 e1000_led_off(&adapter->hw); 5473 e1000_cleanup_led(&adapter->hw); 5474 } 5475 EM_CORE_UNLOCK(adapter); 5476 } 5477 5478 /* 5479 ** Disable the L0S and L1 LINK states 5480 */ 5481 static void 5482 em_disable_aspm(struct adapter *adapter) 5483 { 5484 int base, reg; 5485 u16 link_cap,link_ctrl; 5486 device_t dev = adapter->dev; 5487 5488 switch (adapter->hw.mac.type) { 5489 case e1000_82573: 5490 case e1000_82574: 5491 case e1000_82583: 5492 break; 5493 default: 5494 return; 5495 } 5496 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) 5497 return; 5498 reg = base + PCIER_LINK_CAP; 5499 link_cap = pci_read_config(dev, reg, 2); 5500 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) 5501 return; 5502 reg = base + PCIER_LINK_CTL; 5503 link_ctrl = pci_read_config(dev, reg, 2); 5504 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; 5505 pci_write_config(dev, reg, link_ctrl, 2); 5506 return; 5507 } 5508 5509 /********************************************************************** 5510 * 5511 * Update the board statistics counters. 5512 * 5513 **********************************************************************/ 5514 static void 5515 em_update_stats_counters(struct adapter *adapter) 5516 { 5517 5518 if(adapter->hw.phy.media_type == e1000_media_type_copper || 5519 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { 5520 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); 5521 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); 5522 } 5523 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); 5524 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); 5525 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); 5526 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); 5527 5528 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); 5529 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); 5530 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); 5531 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); 5532 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); 5533 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); 5534 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); 5535 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); 5536 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); 5537 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); 5538 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); 5539 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); 5540 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); 5541 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); 5542 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); 5543 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); 5544 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); 5545 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); 5546 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); 5547 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); 5548 5549 /* For the 64-bit byte counters the low dword must be read first. */ 5550 /* Both registers clear on the read of the high dword */ 5551 5552 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + 5553 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); 5554 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + 5555 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); 5556 5557 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); 5558 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); 5559 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); 5560 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); 5561 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); 5562 5563 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); 5564 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); 5565 5566 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); 5567 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); 5568 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); 5569 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); 5570 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); 5571 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); 5572 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); 5573 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); 5574 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); 5575 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); 5576 5577 /* Interrupt Counts */ 5578 5579 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); 5580 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); 5581 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); 5582 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); 5583 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); 5584 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); 5585 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); 5586 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); 5587 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); 5588 5589 if (adapter->hw.mac.type >= e1000_82543) { 5590 adapter->stats.algnerrc += 5591 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); 5592 adapter->stats.rxerrc += 5593 E1000_READ_REG(&adapter->hw, E1000_RXERRC); 5594 adapter->stats.tncrs += 5595 E1000_READ_REG(&adapter->hw, E1000_TNCRS); 5596 adapter->stats.cexterr += 5597 E1000_READ_REG(&adapter->hw, E1000_CEXTERR); 5598 adapter->stats.tsctc += 5599 E1000_READ_REG(&adapter->hw, E1000_TSCTC); 5600 adapter->stats.tsctfc += 5601 E1000_READ_REG(&adapter->hw, E1000_TSCTFC); 5602 } 5603 } 5604 5605 static uint64_t 5606 em_get_counter(if_t ifp, ift_counter cnt) 5607 { 5608 struct adapter *adapter; 5609 5610 adapter = if_getsoftc(ifp); 5611 5612 switch (cnt) { 5613 case IFCOUNTER_COLLISIONS: 5614 return (adapter->stats.colc); 5615 case IFCOUNTER_IERRORS: 5616 return (adapter->dropped_pkts + adapter->stats.rxerrc + 5617 adapter->stats.crcerrs + adapter->stats.algnerrc + 5618 adapter->stats.ruc + adapter->stats.roc + 5619 adapter->stats.mpc + adapter->stats.cexterr); 5620 case IFCOUNTER_OERRORS: 5621 return (adapter->stats.ecol + adapter->stats.latecol + 5622 adapter->watchdog_events); 5623 default: 5624 return (if_get_counter_default(ifp, cnt)); 5625 } 5626 } 5627 5628 /* Export a single 32-bit register via a read-only sysctl. */ 5629 static int 5630 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) 5631 { 5632 struct adapter *adapter; 5633 u_int val; 5634 5635 adapter = oidp->oid_arg1; 5636 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); 5637 return (sysctl_handle_int(oidp, &val, 0, req)); 5638 } 5639 5640 /* 5641 * Add sysctl variables, one per statistic, to the system. 5642 */ 5643 static void 5644 em_add_hw_stats(struct adapter *adapter) 5645 { 5646 device_t dev = adapter->dev; 5647 5648 struct tx_ring *txr = adapter->tx_rings; 5649 struct rx_ring *rxr = adapter->rx_rings; 5650 5651 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 5652 struct sysctl_oid *tree = device_get_sysctl_tree(dev); 5653 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 5654 struct e1000_hw_stats *stats = &adapter->stats; 5655 5656 struct sysctl_oid *stat_node, *queue_node, *int_node; 5657 struct sysctl_oid_list *stat_list, *queue_list, *int_list; 5658 5659 #define QUEUE_NAME_LEN 32 5660 char namebuf[QUEUE_NAME_LEN]; 5661 5662 /* Driver Statistics */ 5663 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 5664 CTLFLAG_RD, &adapter->dropped_pkts, 5665 "Driver dropped packets"); 5666 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 5667 CTLFLAG_RD, &adapter->link_irq, 5668 "Link MSIX IRQ Handled"); 5669 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 5670 CTLFLAG_RD, &adapter->mbuf_defrag_failed, 5671 "Defragmenting mbuf chain failed"); 5672 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 5673 CTLFLAG_RD, &adapter->no_tx_dma_setup, 5674 "Driver tx dma failure in xmit"); 5675 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", 5676 CTLFLAG_RD, &adapter->rx_overruns, 5677 "RX overruns"); 5678 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", 5679 CTLFLAG_RD, &adapter->watchdog_events, 5680 "Watchdog timeouts"); 5681 5682 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", 5683 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, 5684 em_sysctl_reg_handler, "IU", 5685 "Device Control Register"); 5686 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", 5687 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, 5688 em_sysctl_reg_handler, "IU", 5689 "Receiver Control Register"); 5690 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", 5691 CTLFLAG_RD, &adapter->hw.fc.high_water, 0, 5692 "Flow Control High Watermark"); 5693 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 5694 CTLFLAG_RD, &adapter->hw.fc.low_water, 0, 5695 "Flow Control Low Watermark"); 5696 5697 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { 5698 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); 5699 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 5700 CTLFLAG_RD, NULL, "TX Queue Name"); 5701 queue_list = SYSCTL_CHILDREN(queue_node); 5702 5703 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 5704 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5705 E1000_TDH(txr->me), 5706 em_sysctl_reg_handler, "IU", 5707 "Transmit Descriptor Head"); 5708 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 5709 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5710 E1000_TDT(txr->me), 5711 em_sysctl_reg_handler, "IU", 5712 "Transmit Descriptor Tail"); 5713 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", 5714 CTLFLAG_RD, &txr->tx_irq, 5715 "Queue MSI-X Transmit Interrupts"); 5716 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 5717 CTLFLAG_RD, &txr->no_desc_avail, 5718 "Queue No Descriptor Available"); 5719 5720 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i); 5721 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 5722 CTLFLAG_RD, NULL, "RX Queue Name"); 5723 queue_list = SYSCTL_CHILDREN(queue_node); 5724 5725 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 5726 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5727 E1000_RDH(rxr->me), 5728 em_sysctl_reg_handler, "IU", 5729 "Receive Descriptor Head"); 5730 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 5731 CTLTYPE_UINT | CTLFLAG_RD, adapter, 5732 E1000_RDT(rxr->me), 5733 em_sysctl_reg_handler, "IU", 5734 "Receive Descriptor Tail"); 5735 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", 5736 CTLFLAG_RD, &rxr->rx_irq, 5737 "Queue MSI-X Receive Interrupts"); 5738 } 5739 5740 /* MAC stats get their own sub node */ 5741 5742 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 5743 CTLFLAG_RD, NULL, "Statistics"); 5744 stat_list = SYSCTL_CHILDREN(stat_node); 5745 5746 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", 5747 CTLFLAG_RD, &stats->ecol, 5748 "Excessive collisions"); 5749 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", 5750 CTLFLAG_RD, &stats->scc, 5751 "Single collisions"); 5752 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 5753 CTLFLAG_RD, &stats->mcc, 5754 "Multiple collisions"); 5755 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", 5756 CTLFLAG_RD, &stats->latecol, 5757 "Late collisions"); 5758 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", 5759 CTLFLAG_RD, &stats->colc, 5760 "Collision Count"); 5761 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", 5762 CTLFLAG_RD, &adapter->stats.symerrs, 5763 "Symbol Errors"); 5764 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", 5765 CTLFLAG_RD, &adapter->stats.sec, 5766 "Sequence Errors"); 5767 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", 5768 CTLFLAG_RD, &adapter->stats.dc, 5769 "Defer Count"); 5770 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", 5771 CTLFLAG_RD, &adapter->stats.mpc, 5772 "Missed Packets"); 5773 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", 5774 CTLFLAG_RD, &adapter->stats.rnbc, 5775 "Receive No Buffers"); 5776 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", 5777 CTLFLAG_RD, &adapter->stats.ruc, 5778 "Receive Undersize"); 5779 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", 5780 CTLFLAG_RD, &adapter->stats.rfc, 5781 "Fragmented Packets Received "); 5782 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", 5783 CTLFLAG_RD, &adapter->stats.roc, 5784 "Oversized Packets Received"); 5785 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", 5786 CTLFLAG_RD, &adapter->stats.rjc, 5787 "Recevied Jabber"); 5788 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", 5789 CTLFLAG_RD, &adapter->stats.rxerrc, 5790 "Receive Errors"); 5791 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", 5792 CTLFLAG_RD, &adapter->stats.crcerrs, 5793 "CRC errors"); 5794 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", 5795 CTLFLAG_RD, &adapter->stats.algnerrc, 5796 "Alignment Errors"); 5797 /* On 82575 these are collision counts */ 5798 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", 5799 CTLFLAG_RD, &adapter->stats.cexterr, 5800 "Collision/Carrier extension errors"); 5801 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", 5802 CTLFLAG_RD, &adapter->stats.xonrxc, 5803 "XON Received"); 5804 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", 5805 CTLFLAG_RD, &adapter->stats.xontxc, 5806 "XON Transmitted"); 5807 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", 5808 CTLFLAG_RD, &adapter->stats.xoffrxc, 5809 "XOFF Received"); 5810 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", 5811 CTLFLAG_RD, &adapter->stats.xofftxc, 5812 "XOFF Transmitted"); 5813 5814 /* Packet Reception Stats */ 5815 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", 5816 CTLFLAG_RD, &adapter->stats.tpr, 5817 "Total Packets Received "); 5818 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", 5819 CTLFLAG_RD, &adapter->stats.gprc, 5820 "Good Packets Received"); 5821 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", 5822 CTLFLAG_RD, &adapter->stats.bprc, 5823 "Broadcast Packets Received"); 5824 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", 5825 CTLFLAG_RD, &adapter->stats.mprc, 5826 "Multicast Packets Received"); 5827 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", 5828 CTLFLAG_RD, &adapter->stats.prc64, 5829 "64 byte frames received "); 5830 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", 5831 CTLFLAG_RD, &adapter->stats.prc127, 5832 "65-127 byte frames received"); 5833 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", 5834 CTLFLAG_RD, &adapter->stats.prc255, 5835 "128-255 byte frames received"); 5836 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", 5837 CTLFLAG_RD, &adapter->stats.prc511, 5838 "256-511 byte frames received"); 5839 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", 5840 CTLFLAG_RD, &adapter->stats.prc1023, 5841 "512-1023 byte frames received"); 5842 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", 5843 CTLFLAG_RD, &adapter->stats.prc1522, 5844 "1023-1522 byte frames received"); 5845 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 5846 CTLFLAG_RD, &adapter->stats.gorc, 5847 "Good Octets Received"); 5848 5849 /* Packet Transmission Stats */ 5850 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 5851 CTLFLAG_RD, &adapter->stats.gotc, 5852 "Good Octets Transmitted"); 5853 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", 5854 CTLFLAG_RD, &adapter->stats.tpt, 5855 "Total Packets Transmitted"); 5856 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", 5857 CTLFLAG_RD, &adapter->stats.gptc, 5858 "Good Packets Transmitted"); 5859 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", 5860 CTLFLAG_RD, &adapter->stats.bptc, 5861 "Broadcast Packets Transmitted"); 5862 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", 5863 CTLFLAG_RD, &adapter->stats.mptc, 5864 "Multicast Packets Transmitted"); 5865 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", 5866 CTLFLAG_RD, &adapter->stats.ptc64, 5867 "64 byte frames transmitted "); 5868 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", 5869 CTLFLAG_RD, &adapter->stats.ptc127, 5870 "65-127 byte frames transmitted"); 5871 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", 5872 CTLFLAG_RD, &adapter->stats.ptc255, 5873 "128-255 byte frames transmitted"); 5874 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", 5875 CTLFLAG_RD, &adapter->stats.ptc511, 5876 "256-511 byte frames transmitted"); 5877 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", 5878 CTLFLAG_RD, &adapter->stats.ptc1023, 5879 "512-1023 byte frames transmitted"); 5880 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", 5881 CTLFLAG_RD, &adapter->stats.ptc1522, 5882 "1024-1522 byte frames transmitted"); 5883 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", 5884 CTLFLAG_RD, &adapter->stats.tsctc, 5885 "TSO Contexts Transmitted"); 5886 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", 5887 CTLFLAG_RD, &adapter->stats.tsctfc, 5888 "TSO Contexts Failed"); 5889 5890 5891 /* Interrupt Stats */ 5892 5893 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 5894 CTLFLAG_RD, NULL, "Interrupt Statistics"); 5895 int_list = SYSCTL_CHILDREN(int_node); 5896 5897 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", 5898 CTLFLAG_RD, &adapter->stats.iac, 5899 "Interrupt Assertion Count"); 5900 5901 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", 5902 CTLFLAG_RD, &adapter->stats.icrxptc, 5903 "Interrupt Cause Rx Pkt Timer Expire Count"); 5904 5905 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", 5906 CTLFLAG_RD, &adapter->stats.icrxatc, 5907 "Interrupt Cause Rx Abs Timer Expire Count"); 5908 5909 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", 5910 CTLFLAG_RD, &adapter->stats.ictxptc, 5911 "Interrupt Cause Tx Pkt Timer Expire Count"); 5912 5913 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", 5914 CTLFLAG_RD, &adapter->stats.ictxatc, 5915 "Interrupt Cause Tx Abs Timer Expire Count"); 5916 5917 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", 5918 CTLFLAG_RD, &adapter->stats.ictxqec, 5919 "Interrupt Cause Tx Queue Empty Count"); 5920 5921 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", 5922 CTLFLAG_RD, &adapter->stats.ictxqmtc, 5923 "Interrupt Cause Tx Queue Min Thresh Count"); 5924 5925 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", 5926 CTLFLAG_RD, &adapter->stats.icrxdmtc, 5927 "Interrupt Cause Rx Desc Min Thresh Count"); 5928 5929 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", 5930 CTLFLAG_RD, &adapter->stats.icrxoc, 5931 "Interrupt Cause Receiver Overrun Count"); 5932 } 5933 5934 /********************************************************************** 5935 * 5936 * This routine provides a way to dump out the adapter eeprom, 5937 * often a useful debug/service tool. This only dumps the first 5938 * 32 words, stuff that matters is in that extent. 5939 * 5940 **********************************************************************/ 5941 static int 5942 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) 5943 { 5944 struct adapter *adapter = (struct adapter *)arg1; 5945 int error; 5946 int result; 5947 5948 result = -1; 5949 error = sysctl_handle_int(oidp, &result, 0, req); 5950 5951 if (error || !req->newptr) 5952 return (error); 5953 5954 /* 5955 * This value will cause a hex dump of the 5956 * first 32 16-bit words of the EEPROM to 5957 * the screen. 5958 */ 5959 if (result == 1) 5960 em_print_nvm_info(adapter); 5961 5962 return (error); 5963 } 5964 5965 static void 5966 em_print_nvm_info(struct adapter *adapter) 5967 { 5968 u16 eeprom_data; 5969 int i, j, row = 0; 5970 5971 /* Its a bit crude, but it gets the job done */ 5972 printf("\nInterface EEPROM Dump:\n"); 5973 printf("Offset\n0x0000 "); 5974 for (i = 0, j = 0; i < 32; i++, j++) { 5975 if (j == 8) { /* Make the offset block */ 5976 j = 0; ++row; 5977 printf("\n0x00%x0 ",row); 5978 } 5979 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); 5980 printf("%04x ", eeprom_data); 5981 } 5982 printf("\n"); 5983 } 5984 5985 static int 5986 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) 5987 { 5988 struct em_int_delay_info *info; 5989 struct adapter *adapter; 5990 u32 regval; 5991 int error, usecs, ticks; 5992 5993 info = (struct em_int_delay_info *)arg1; 5994 usecs = info->value; 5995 error = sysctl_handle_int(oidp, &usecs, 0, req); 5996 if (error != 0 || req->newptr == NULL) 5997 return (error); 5998 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) 5999 return (EINVAL); 6000 info->value = usecs; 6001 ticks = EM_USECS_TO_TICKS(usecs); 6002 if (info->offset == E1000_ITR) /* units are 256ns here */ 6003 ticks *= 4; 6004 6005 adapter = info->adapter; 6006 6007 EM_CORE_LOCK(adapter); 6008 regval = E1000_READ_OFFSET(&adapter->hw, info->offset); 6009 regval = (regval & ~0xffff) | (ticks & 0xffff); 6010 /* Handle a few special cases. */ 6011 switch (info->offset) { 6012 case E1000_RDTR: 6013 break; 6014 case E1000_TIDV: 6015 if (ticks == 0) { 6016 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; 6017 /* Don't write 0 into the TIDV register. */ 6018 regval++; 6019 } else 6020 adapter->txd_cmd |= E1000_TXD_CMD_IDE; 6021 break; 6022 } 6023 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); 6024 EM_CORE_UNLOCK(adapter); 6025 return (0); 6026 } 6027 6028 static void 6029 em_add_int_delay_sysctl(struct adapter *adapter, const char *name, 6030 const char *description, struct em_int_delay_info *info, 6031 int offset, int value) 6032 { 6033 info->adapter = adapter; 6034 info->offset = offset; 6035 info->value = value; 6036 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), 6037 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), 6038 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, 6039 info, 0, em_sysctl_int_delay, "I", description); 6040 } 6041 6042 static void 6043 em_set_sysctl_value(struct adapter *adapter, const char *name, 6044 const char *description, int *limit, int value) 6045 { 6046 *limit = value; 6047 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), 6048 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), 6049 OID_AUTO, name, CTLFLAG_RW, limit, value, description); 6050 } 6051 6052 6053 /* 6054 ** Set flow control using sysctl: 6055 ** Flow control values: 6056 ** 0 - off 6057 ** 1 - rx pause 6058 ** 2 - tx pause 6059 ** 3 - full 6060 */ 6061 static int 6062 em_set_flowcntl(SYSCTL_HANDLER_ARGS) 6063 { 6064 int error; 6065 static int input = 3; /* default is full */ 6066 struct adapter *adapter = (struct adapter *) arg1; 6067 6068 error = sysctl_handle_int(oidp, &input, 0, req); 6069 6070 if ((error) || (req->newptr == NULL)) 6071 return (error); 6072 6073 if (input == adapter->fc) /* no change? */ 6074 return (error); 6075 6076 switch (input) { 6077 case e1000_fc_rx_pause: 6078 case e1000_fc_tx_pause: 6079 case e1000_fc_full: 6080 case e1000_fc_none: 6081 adapter->hw.fc.requested_mode = input; 6082 adapter->fc = input; 6083 break; 6084 default: 6085 /* Do nothing */ 6086 return (error); 6087 } 6088 6089 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; 6090 e1000_force_mac_fc(&adapter->hw); 6091 return (error); 6092 } 6093 6094 /* 6095 ** Manage Energy Efficient Ethernet: 6096 ** Control values: 6097 ** 0/1 - enabled/disabled 6098 */ 6099 static int 6100 em_sysctl_eee(SYSCTL_HANDLER_ARGS) 6101 { 6102 struct adapter *adapter = (struct adapter *) arg1; 6103 int error, value; 6104 6105 value = adapter->hw.dev_spec.ich8lan.eee_disable; 6106 error = sysctl_handle_int(oidp, &value, 0, req); 6107 if (error || req->newptr == NULL) 6108 return (error); 6109 EM_CORE_LOCK(adapter); 6110 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); 6111 em_init_locked(adapter); 6112 EM_CORE_UNLOCK(adapter); 6113 return (0); 6114 } 6115 6116 static int 6117 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) 6118 { 6119 struct adapter *adapter; 6120 int error; 6121 int result; 6122 6123 result = -1; 6124 error = sysctl_handle_int(oidp, &result, 0, req); 6125 6126 if (error || !req->newptr) 6127 return (error); 6128 6129 if (result == 1) { 6130 adapter = (struct adapter *)arg1; 6131 em_print_debug_info(adapter); 6132 } 6133 6134 return (error); 6135 } 6136 6137 /* 6138 ** This routine is meant to be fluid, add whatever is 6139 ** needed for debugging a problem. -jfv 6140 */ 6141 static void 6142 em_print_debug_info(struct adapter *adapter) 6143 { 6144 device_t dev = adapter->dev; 6145 struct tx_ring *txr = adapter->tx_rings; 6146 struct rx_ring *rxr = adapter->rx_rings; 6147 6148 if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) 6149 printf("Interface is RUNNING "); 6150 else 6151 printf("Interface is NOT RUNNING\n"); 6152 6153 if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE) 6154 printf("and INACTIVE\n"); 6155 else 6156 printf("and ACTIVE\n"); 6157 6158 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { 6159 device_printf(dev, "TX Queue %d ------\n", i); 6160 device_printf(dev, "hw tdh = %d, hw tdt = %d\n", 6161 E1000_READ_REG(&adapter->hw, E1000_TDH(i)), 6162 E1000_READ_REG(&adapter->hw, E1000_TDT(i))); 6163 device_printf(dev, "Tx Queue Status = %d\n", txr->busy); 6164 device_printf(dev, "TX descriptors avail = %d\n", 6165 txr->tx_avail); 6166 device_printf(dev, "Tx Descriptors avail failure = %ld\n", 6167 txr->no_desc_avail); 6168 device_printf(dev, "RX Queue %d ------\n", i); 6169 device_printf(dev, "hw rdh = %d, hw rdt = %d\n", 6170 E1000_READ_REG(&adapter->hw, E1000_RDH(i)), 6171 E1000_READ_REG(&adapter->hw, E1000_RDT(i))); 6172 device_printf(dev, "RX discarded packets = %ld\n", 6173 rxr->rx_discarded); 6174 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); 6175 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); 6176 } 6177 } 6178 6179 #ifdef EM_MULTIQUEUE 6180 /* 6181 * 82574 only: 6182 * Write a new value to the EEPROM increasing the number of MSIX 6183 * vectors from 3 to 5, for proper multiqueue support. 6184 */ 6185 static void 6186 em_enable_vectors_82574(struct adapter *adapter) 6187 { 6188 struct e1000_hw *hw = &adapter->hw; 6189 device_t dev = adapter->dev; 6190 u16 edata; 6191 6192 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); 6193 printf("Current cap: %#06x\n", edata); 6194 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { 6195 device_printf(dev, "Writing to eeprom: increasing " 6196 "reported MSIX vectors from 3 to 5...\n"); 6197 edata &= ~(EM_NVM_MSIX_N_MASK); 6198 edata |= 4 << EM_NVM_MSIX_N_SHIFT; 6199 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); 6200 e1000_update_nvm_checksum(hw); 6201 device_printf(dev, "Writing to eeprom: done\n"); 6202 } 6203 } 6204 #endif 6205 6206 #ifdef DDB 6207 DB_COMMAND(em_reset_dev, em_ddb_reset_dev) 6208 { 6209 devclass_t dc; 6210 int max_em; 6211 6212 dc = devclass_find("em"); 6213 max_em = devclass_get_maxunit(dc); 6214 6215 for (int index = 0; index < (max_em - 1); index++) { 6216 device_t dev; 6217 dev = devclass_get_device(dc, index); 6218 if (device_get_driver(dev) == &em_driver) { 6219 struct adapter *adapter = device_get_softc(dev); 6220 EM_CORE_LOCK(adapter); 6221 em_init_locked(adapter); 6222 EM_CORE_UNLOCK(adapter); 6223 } 6224 } 6225 } 6226 DB_COMMAND(em_dump_queue, em_ddb_dump_queue) 6227 { 6228 devclass_t dc; 6229 int max_em; 6230 6231 dc = devclass_find("em"); 6232 max_em = devclass_get_maxunit(dc); 6233 6234 for (int index = 0; index < (max_em - 1); index++) { 6235 device_t dev; 6236 dev = devclass_get_device(dc, index); 6237 if (device_get_driver(dev) == &em_driver) 6238 em_print_debug_info(device_get_softc(dev)); 6239 } 6240 6241 } 6242 #endif 6243