xref: /freebsd/sys/dev/e1000/if_em.c (revision c6ec7d31830ab1c80edae95ad5e4b9dba10c47ac)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2011, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71 
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79 
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84 
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88 
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int	em_display_debug_stats = 0;
93 
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98 
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108 
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111 	/* Intel(R) PRO/1000 Network Connection */
112 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116 						PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131 
132 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137 						PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	/* required last entry */
176 	{ 0, 0, 0, 0, 0}
177 };
178 
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182 
183 static char *em_strings[] = {
184 	"Intel(R) PRO/1000 Network Connection"
185 };
186 
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int	em_probe(device_t);
191 static int	em_attach(device_t);
192 static int	em_detach(device_t);
193 static int	em_shutdown(device_t);
194 static int	em_suspend(device_t);
195 static int	em_resume(device_t);
196 #ifdef EM_MULTIQUEUE
197 static int	em_mq_start(struct ifnet *, struct mbuf *);
198 static int	em_mq_start_locked(struct ifnet *,
199 		    struct tx_ring *, struct mbuf *);
200 static void	em_qflush(struct ifnet *);
201 #else
202 static void	em_start(struct ifnet *);
203 static void	em_start_locked(struct ifnet *, struct tx_ring *);
204 #endif
205 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
206 static void	em_init(void *);
207 static void	em_init_locked(struct adapter *);
208 static void	em_stop(void *);
209 static void	em_media_status(struct ifnet *, struct ifmediareq *);
210 static int	em_media_change(struct ifnet *);
211 static void	em_identify_hardware(struct adapter *);
212 static int	em_allocate_pci_resources(struct adapter *);
213 static int	em_allocate_legacy(struct adapter *);
214 static int	em_allocate_msix(struct adapter *);
215 static int	em_allocate_queues(struct adapter *);
216 static int	em_setup_msix(struct adapter *);
217 static void	em_free_pci_resources(struct adapter *);
218 static void	em_local_timer(void *);
219 static void	em_reset(struct adapter *);
220 static int	em_setup_interface(device_t, struct adapter *);
221 
222 static void	em_setup_transmit_structures(struct adapter *);
223 static void	em_initialize_transmit_unit(struct adapter *);
224 static int	em_allocate_transmit_buffers(struct tx_ring *);
225 static void	em_free_transmit_structures(struct adapter *);
226 static void	em_free_transmit_buffers(struct tx_ring *);
227 
228 static int	em_setup_receive_structures(struct adapter *);
229 static int	em_allocate_receive_buffers(struct rx_ring *);
230 static void	em_initialize_receive_unit(struct adapter *);
231 static void	em_free_receive_structures(struct adapter *);
232 static void	em_free_receive_buffers(struct rx_ring *);
233 
234 static void	em_enable_intr(struct adapter *);
235 static void	em_disable_intr(struct adapter *);
236 static void	em_update_stats_counters(struct adapter *);
237 static void	em_add_hw_stats(struct adapter *adapter);
238 static void	em_txeof(struct tx_ring *);
239 static bool	em_rxeof(struct rx_ring *, int, int *);
240 #ifndef __NO_STRICT_ALIGNMENT
241 static int	em_fixup_rx(struct rx_ring *);
242 #endif
243 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245 		    struct ip *, u32 *, u32 *);
246 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247 		    struct tcphdr *, u32 *, u32 *);
248 static void	em_set_promisc(struct adapter *);
249 static void	em_disable_promisc(struct adapter *);
250 static void	em_set_multi(struct adapter *);
251 static void	em_update_link_status(struct adapter *);
252 static void	em_refresh_mbufs(struct rx_ring *, int);
253 static void	em_register_vlan(void *, struct ifnet *, u16);
254 static void	em_unregister_vlan(void *, struct ifnet *, u16);
255 static void	em_setup_vlan_hw_support(struct adapter *);
256 static int	em_xmit(struct tx_ring *, struct mbuf **);
257 static int	em_dma_malloc(struct adapter *, bus_size_t,
258 		    struct em_dma_alloc *, int);
259 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
260 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261 static void	em_print_nvm_info(struct adapter *);
262 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263 static void	em_print_debug_info(struct adapter *);
264 static int 	em_is_valid_ether_addr(u8 *);
265 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
267 		    const char *, struct em_int_delay_info *, int, int);
268 /* Management and WOL Support */
269 static void	em_init_manageability(struct adapter *);
270 static void	em_release_manageability(struct adapter *);
271 static void     em_get_hw_control(struct adapter *);
272 static void     em_release_hw_control(struct adapter *);
273 static void	em_get_wakeup(device_t);
274 static void     em_enable_wakeup(device_t);
275 static int	em_enable_phy_wakeup(struct adapter *);
276 static void	em_led_func(void *, int);
277 static void	em_disable_aspm(struct adapter *);
278 
279 static int	em_irq_fast(void *);
280 
281 /* MSIX handlers */
282 static void	em_msix_tx(void *);
283 static void	em_msix_rx(void *);
284 static void	em_msix_link(void *);
285 static void	em_handle_tx(void *context, int pending);
286 static void	em_handle_rx(void *context, int pending);
287 static void	em_handle_link(void *context, int pending);
288 
289 static void	em_set_sysctl_value(struct adapter *, const char *,
290 		    const char *, int *, int);
291 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
293 
294 static __inline void em_rx_discard(struct rx_ring *, int);
295 
296 #ifdef DEVICE_POLLING
297 static poll_handler_t em_poll;
298 #endif /* POLLING */
299 
300 /*********************************************************************
301  *  FreeBSD Device Interface Entry Points
302  *********************************************************************/
303 
304 static device_method_t em_methods[] = {
305 	/* Device interface */
306 	DEVMETHOD(device_probe, em_probe),
307 	DEVMETHOD(device_attach, em_attach),
308 	DEVMETHOD(device_detach, em_detach),
309 	DEVMETHOD(device_shutdown, em_shutdown),
310 	DEVMETHOD(device_suspend, em_suspend),
311 	DEVMETHOD(device_resume, em_resume),
312 	{0, 0}
313 };
314 
315 static driver_t em_driver = {
316 	"em", em_methods, sizeof(struct adapter),
317 };
318 
319 devclass_t em_devclass;
320 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
321 MODULE_DEPEND(em, pci, 1, 1, 1);
322 MODULE_DEPEND(em, ether, 1, 1, 1);
323 
324 /*********************************************************************
325  *  Tunable default values.
326  *********************************************************************/
327 
328 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
329 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
330 #define M_TSO_LEN			66
331 
332 /* Allow common code without TSO */
333 #ifndef CSUM_TSO
334 #define CSUM_TSO	0
335 #endif
336 
337 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
338 
339 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
340 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
341 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
342 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
343 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
344     0, "Default transmit interrupt delay in usecs");
345 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
346     0, "Default receive interrupt delay in usecs");
347 
348 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
349 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
350 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
351 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
352 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
353     &em_tx_abs_int_delay_dflt, 0,
354     "Default transmit interrupt delay limit in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
356     &em_rx_abs_int_delay_dflt, 0,
357     "Default receive interrupt delay limit in usecs");
358 
359 static int em_rxd = EM_DEFAULT_RXD;
360 static int em_txd = EM_DEFAULT_TXD;
361 TUNABLE_INT("hw.em.rxd", &em_rxd);
362 TUNABLE_INT("hw.em.txd", &em_txd);
363 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
364     "Number of receive descriptors per queue");
365 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
366     "Number of transmit descriptors per queue");
367 
368 static int em_smart_pwr_down = FALSE;
369 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
370 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
371     0, "Set to true to leave smart power down enabled on newer adapters");
372 
373 /* Controls whether promiscuous also shows bad packets */
374 static int em_debug_sbp = FALSE;
375 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
376 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
377     "Show bad packets in promiscuous mode");
378 
379 static int em_enable_msix = TRUE;
380 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
381 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
382     "Enable MSI-X interrupts");
383 
384 /* How many packets rxeof tries to clean at a time */
385 static int em_rx_process_limit = 100;
386 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
388     &em_rx_process_limit, 0,
389     "Maximum number of received packets to process "
390     "at a time, -1 means unlimited");
391 
392 /* Energy efficient ethernet - default to OFF */
393 static int eee_setting = 1;
394 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
395 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
396     "Enable Energy Efficient Ethernet");
397 
398 /* Global used in WOL setup with multiport cards */
399 static int global_quad_port_a = 0;
400 
401 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
402 #include <dev/netmap/if_em_netmap.h>
403 #endif /* DEV_NETMAP */
404 
405 /*********************************************************************
406  *  Device identification routine
407  *
408  *  em_probe determines if the driver should be loaded on
409  *  adapter based on PCI vendor/device id of the adapter.
410  *
411  *  return BUS_PROBE_DEFAULT on success, positive on failure
412  *********************************************************************/
413 
414 static int
415 em_probe(device_t dev)
416 {
417 	char		adapter_name[60];
418 	u16		pci_vendor_id = 0;
419 	u16		pci_device_id = 0;
420 	u16		pci_subvendor_id = 0;
421 	u16		pci_subdevice_id = 0;
422 	em_vendor_info_t *ent;
423 
424 	INIT_DEBUGOUT("em_probe: begin");
425 
426 	pci_vendor_id = pci_get_vendor(dev);
427 	if (pci_vendor_id != EM_VENDOR_ID)
428 		return (ENXIO);
429 
430 	pci_device_id = pci_get_device(dev);
431 	pci_subvendor_id = pci_get_subvendor(dev);
432 	pci_subdevice_id = pci_get_subdevice(dev);
433 
434 	ent = em_vendor_info_array;
435 	while (ent->vendor_id != 0) {
436 		if ((pci_vendor_id == ent->vendor_id) &&
437 		    (pci_device_id == ent->device_id) &&
438 
439 		    ((pci_subvendor_id == ent->subvendor_id) ||
440 		    (ent->subvendor_id == PCI_ANY_ID)) &&
441 
442 		    ((pci_subdevice_id == ent->subdevice_id) ||
443 		    (ent->subdevice_id == PCI_ANY_ID))) {
444 			sprintf(adapter_name, "%s %s",
445 				em_strings[ent->index],
446 				em_driver_version);
447 			device_set_desc_copy(dev, adapter_name);
448 			return (BUS_PROBE_DEFAULT);
449 		}
450 		ent++;
451 	}
452 
453 	return (ENXIO);
454 }
455 
456 /*********************************************************************
457  *  Device initialization routine
458  *
459  *  The attach entry point is called when the driver is being loaded.
460  *  This routine identifies the type of hardware, allocates all resources
461  *  and initializes the hardware.
462  *
463  *  return 0 on success, positive on failure
464  *********************************************************************/
465 
466 static int
467 em_attach(device_t dev)
468 {
469 	struct adapter	*adapter;
470 	struct e1000_hw	*hw;
471 	int		error = 0;
472 
473 	INIT_DEBUGOUT("em_attach: begin");
474 
475 	if (resource_disabled("em", device_get_unit(dev))) {
476 		device_printf(dev, "Disabled by device hint\n");
477 		return (ENXIO);
478 	}
479 
480 	adapter = device_get_softc(dev);
481 	adapter->dev = adapter->osdep.dev = dev;
482 	hw = &adapter->hw;
483 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
484 
485 	/* SYSCTL stuff */
486 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
487 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
488 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
489 	    em_sysctl_nvm_info, "I", "NVM Information");
490 
491 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494 	    em_sysctl_debug_info, "I", "Debug Information");
495 
496 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499 	    em_set_flowcntl, "I", "Flow Control");
500 
501 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
502 
503 	/* Determine hardware and mac info */
504 	em_identify_hardware(adapter);
505 
506 	/* Setup PCI resources */
507 	if (em_allocate_pci_resources(adapter)) {
508 		device_printf(dev, "Allocation of PCI resources failed\n");
509 		error = ENXIO;
510 		goto err_pci;
511 	}
512 
513 	/*
514 	** For ICH8 and family we need to
515 	** map the flash memory, and this
516 	** must happen after the MAC is
517 	** identified
518 	*/
519 	if ((hw->mac.type == e1000_ich8lan) ||
520 	    (hw->mac.type == e1000_ich9lan) ||
521 	    (hw->mac.type == e1000_ich10lan) ||
522 	    (hw->mac.type == e1000_pchlan) ||
523 	    (hw->mac.type == e1000_pch2lan)) {
524 		int rid = EM_BAR_TYPE_FLASH;
525 		adapter->flash = bus_alloc_resource_any(dev,
526 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
527 		if (adapter->flash == NULL) {
528 			device_printf(dev, "Mapping of Flash failed\n");
529 			error = ENXIO;
530 			goto err_pci;
531 		}
532 		/* This is used in the shared code */
533 		hw->flash_address = (u8 *)adapter->flash;
534 		adapter->osdep.flash_bus_space_tag =
535 		    rman_get_bustag(adapter->flash);
536 		adapter->osdep.flash_bus_space_handle =
537 		    rman_get_bushandle(adapter->flash);
538 	}
539 
540 	/* Do Shared Code initialization */
541 	if (e1000_setup_init_funcs(hw, TRUE)) {
542 		device_printf(dev, "Setup of Shared code failed\n");
543 		error = ENXIO;
544 		goto err_pci;
545 	}
546 
547 	e1000_get_bus_info(hw);
548 
549 	/* Set up some sysctls for the tunable interrupt delays */
550 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
551 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
552 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
553 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
554 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
555 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
556 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
557 	    "receive interrupt delay limit in usecs",
558 	    &adapter->rx_abs_int_delay,
559 	    E1000_REGISTER(hw, E1000_RADV),
560 	    em_rx_abs_int_delay_dflt);
561 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
562 	    "transmit interrupt delay limit in usecs",
563 	    &adapter->tx_abs_int_delay,
564 	    E1000_REGISTER(hw, E1000_TADV),
565 	    em_tx_abs_int_delay_dflt);
566 
567 	/* Sysctl for limiting the amount of work done in the taskqueue */
568 	em_set_sysctl_value(adapter, "rx_processing_limit",
569 	    "max number of rx packets to process", &adapter->rx_process_limit,
570 	    em_rx_process_limit);
571 
572 	/*
573 	 * Validate number of transmit and receive descriptors. It
574 	 * must not exceed hardware maximum, and must be multiple
575 	 * of E1000_DBA_ALIGN.
576 	 */
577 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
578 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
579 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
580 		    EM_DEFAULT_TXD, em_txd);
581 		adapter->num_tx_desc = EM_DEFAULT_TXD;
582 	} else
583 		adapter->num_tx_desc = em_txd;
584 
585 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
586 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
587 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
588 		    EM_DEFAULT_RXD, em_rxd);
589 		adapter->num_rx_desc = EM_DEFAULT_RXD;
590 	} else
591 		adapter->num_rx_desc = em_rxd;
592 
593 	hw->mac.autoneg = DO_AUTO_NEG;
594 	hw->phy.autoneg_wait_to_complete = FALSE;
595 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
596 
597 	/* Copper options */
598 	if (hw->phy.media_type == e1000_media_type_copper) {
599 		hw->phy.mdix = AUTO_ALL_MODES;
600 		hw->phy.disable_polarity_correction = FALSE;
601 		hw->phy.ms_type = EM_MASTER_SLAVE;
602 	}
603 
604 	/*
605 	 * Set the frame limits assuming
606 	 * standard ethernet sized frames.
607 	 */
608 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
609 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
610 
611 	/*
612 	 * This controls when hardware reports transmit completion
613 	 * status.
614 	 */
615 	hw->mac.report_tx_early = 1;
616 
617 	/*
618 	** Get queue/ring memory
619 	*/
620 	if (em_allocate_queues(adapter)) {
621 		error = ENOMEM;
622 		goto err_pci;
623 	}
624 
625 	/* Allocate multicast array memory. */
626 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
627 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
628 	if (adapter->mta == NULL) {
629 		device_printf(dev, "Can not allocate multicast setup array\n");
630 		error = ENOMEM;
631 		goto err_late;
632 	}
633 
634 	/* Check SOL/IDER usage */
635 	if (e1000_check_reset_block(hw))
636 		device_printf(dev, "PHY reset is blocked"
637 		    " due to SOL/IDER session.\n");
638 
639 	/* Sysctl for setting Energy Efficient Ethernet */
640 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
641 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
642 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
643 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
644 	    adapter, 0, em_sysctl_eee, "I",
645 	    "Disable Energy Efficient Ethernet");
646 
647 	/*
648 	** Start from a known state, this is
649 	** important in reading the nvm and
650 	** mac from that.
651 	*/
652 	e1000_reset_hw(hw);
653 
654 
655 	/* Make sure we have a good EEPROM before we read from it */
656 	if (e1000_validate_nvm_checksum(hw) < 0) {
657 		/*
658 		** Some PCI-E parts fail the first check due to
659 		** the link being in sleep state, call it again,
660 		** if it fails a second time its a real issue.
661 		*/
662 		if (e1000_validate_nvm_checksum(hw) < 0) {
663 			device_printf(dev,
664 			    "The EEPROM Checksum Is Not Valid\n");
665 			error = EIO;
666 			goto err_late;
667 		}
668 	}
669 
670 	/* Copy the permanent MAC address out of the EEPROM */
671 	if (e1000_read_mac_addr(hw) < 0) {
672 		device_printf(dev, "EEPROM read error while reading MAC"
673 		    " address\n");
674 		error = EIO;
675 		goto err_late;
676 	}
677 
678 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
679 		device_printf(dev, "Invalid MAC address\n");
680 		error = EIO;
681 		goto err_late;
682 	}
683 
684 	/*
685 	**  Do interrupt configuration
686 	*/
687 	if (adapter->msix > 1) /* Do MSIX */
688 		error = em_allocate_msix(adapter);
689 	else  /* MSI or Legacy */
690 		error = em_allocate_legacy(adapter);
691 	if (error)
692 		goto err_late;
693 
694 	/*
695 	 * Get Wake-on-Lan and Management info for later use
696 	 */
697 	em_get_wakeup(dev);
698 
699 	/* Setup OS specific network interface */
700 	if (em_setup_interface(dev, adapter) != 0)
701 		goto err_late;
702 
703 	em_reset(adapter);
704 
705 	/* Initialize statistics */
706 	em_update_stats_counters(adapter);
707 
708 	hw->mac.get_link_status = 1;
709 	em_update_link_status(adapter);
710 
711 	/* Register for VLAN events */
712 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
713 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
714 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
715 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
716 
717 	em_add_hw_stats(adapter);
718 
719 	/* Non-AMT based hardware can now take control from firmware */
720 	if (adapter->has_manage && !adapter->has_amt)
721 		em_get_hw_control(adapter);
722 
723 	/* Tell the stack that the interface is not active */
724 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
725 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
726 
727 	adapter->led_dev = led_create(em_led_func, adapter,
728 	    device_get_nameunit(dev));
729 #ifdef DEV_NETMAP
730 	em_netmap_attach(adapter);
731 #endif /* DEV_NETMAP */
732 
733 	INIT_DEBUGOUT("em_attach: end");
734 
735 	return (0);
736 
737 err_late:
738 	em_free_transmit_structures(adapter);
739 	em_free_receive_structures(adapter);
740 	em_release_hw_control(adapter);
741 	if (adapter->ifp != NULL)
742 		if_free(adapter->ifp);
743 err_pci:
744 	em_free_pci_resources(adapter);
745 	free(adapter->mta, M_DEVBUF);
746 	EM_CORE_LOCK_DESTROY(adapter);
747 
748 	return (error);
749 }
750 
751 /*********************************************************************
752  *  Device removal routine
753  *
754  *  The detach entry point is called when the driver is being removed.
755  *  This routine stops the adapter and deallocates all the resources
756  *  that were allocated for driver operation.
757  *
758  *  return 0 on success, positive on failure
759  *********************************************************************/
760 
761 static int
762 em_detach(device_t dev)
763 {
764 	struct adapter	*adapter = device_get_softc(dev);
765 	struct ifnet	*ifp = adapter->ifp;
766 
767 	INIT_DEBUGOUT("em_detach: begin");
768 
769 	/* Make sure VLANS are not using driver */
770 	if (adapter->ifp->if_vlantrunk != NULL) {
771 		device_printf(dev,"Vlan in use, detach first\n");
772 		return (EBUSY);
773 	}
774 
775 #ifdef DEVICE_POLLING
776 	if (ifp->if_capenable & IFCAP_POLLING)
777 		ether_poll_deregister(ifp);
778 #endif
779 
780 	if (adapter->led_dev != NULL)
781 		led_destroy(adapter->led_dev);
782 
783 	EM_CORE_LOCK(adapter);
784 	adapter->in_detach = 1;
785 	em_stop(adapter);
786 	EM_CORE_UNLOCK(adapter);
787 	EM_CORE_LOCK_DESTROY(adapter);
788 
789 	e1000_phy_hw_reset(&adapter->hw);
790 
791 	em_release_manageability(adapter);
792 	em_release_hw_control(adapter);
793 
794 	/* Unregister VLAN events */
795 	if (adapter->vlan_attach != NULL)
796 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
797 	if (adapter->vlan_detach != NULL)
798 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
799 
800 	ether_ifdetach(adapter->ifp);
801 	callout_drain(&adapter->timer);
802 
803 #ifdef DEV_NETMAP
804 	netmap_detach(ifp);
805 #endif /* DEV_NETMAP */
806 
807 	em_free_pci_resources(adapter);
808 	bus_generic_detach(dev);
809 	if_free(ifp);
810 
811 	em_free_transmit_structures(adapter);
812 	em_free_receive_structures(adapter);
813 
814 	em_release_hw_control(adapter);
815 	free(adapter->mta, M_DEVBUF);
816 
817 	return (0);
818 }
819 
820 /*********************************************************************
821  *
822  *  Shutdown entry point
823  *
824  **********************************************************************/
825 
826 static int
827 em_shutdown(device_t dev)
828 {
829 	return em_suspend(dev);
830 }
831 
832 /*
833  * Suspend/resume device methods.
834  */
835 static int
836 em_suspend(device_t dev)
837 {
838 	struct adapter *adapter = device_get_softc(dev);
839 
840 	EM_CORE_LOCK(adapter);
841 
842         em_release_manageability(adapter);
843 	em_release_hw_control(adapter);
844 	em_enable_wakeup(dev);
845 
846 	EM_CORE_UNLOCK(adapter);
847 
848 	return bus_generic_suspend(dev);
849 }
850 
851 static int
852 em_resume(device_t dev)
853 {
854 	struct adapter *adapter = device_get_softc(dev);
855 	struct tx_ring	*txr = adapter->tx_rings;
856 	struct ifnet *ifp = adapter->ifp;
857 
858 	EM_CORE_LOCK(adapter);
859 	if (adapter->hw.mac.type == e1000_pch2lan)
860 		e1000_resume_workarounds_pchlan(&adapter->hw);
861 	em_init_locked(adapter);
862 	em_init_manageability(adapter);
863 
864 	if ((ifp->if_flags & IFF_UP) &&
865 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
867 			EM_TX_LOCK(txr);
868 #ifdef EM_MULTIQUEUE
869 			if (!drbr_empty(ifp, txr->br))
870 				em_mq_start_locked(ifp, txr, NULL);
871 #else
872 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
873 				em_start_locked(ifp, txr);
874 #endif
875 			EM_TX_UNLOCK(txr);
876 		}
877 	}
878 	EM_CORE_UNLOCK(adapter);
879 
880 	return bus_generic_resume(dev);
881 }
882 
883 
884 #ifdef EM_MULTIQUEUE
885 /*********************************************************************
886  *  Multiqueue Transmit routines
887  *
888  *  em_mq_start is called by the stack to initiate a transmit.
889  *  however, if busy the driver can queue the request rather
890  *  than do an immediate send. It is this that is an advantage
891  *  in this driver, rather than also having multiple tx queues.
892  **********************************************************************/
893 static int
894 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
895 {
896 	struct adapter  *adapter = txr->adapter;
897         struct mbuf     *next;
898         int             err = 0, enq = 0;
899 
900 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
901 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
902 		if (m != NULL)
903 			err = drbr_enqueue(ifp, txr->br, m);
904 		return (err);
905 	}
906 
907 	enq = 0;
908 	if (m == NULL) {
909 		next = drbr_dequeue(ifp, txr->br);
910 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
911 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
912 			return (err);
913 		next = drbr_dequeue(ifp, txr->br);
914 	} else
915 		next = m;
916 
917 	/* Process the queue */
918 	while (next != NULL) {
919 		if ((err = em_xmit(txr, &next)) != 0) {
920                         if (next != NULL)
921                                 err = drbr_enqueue(ifp, txr->br, next);
922                         break;
923 		}
924 		enq++;
925 		ifp->if_obytes += next->m_pkthdr.len;
926 		if (next->m_flags & M_MCAST)
927 			ifp->if_omcasts++;
928 		ETHER_BPF_MTAP(ifp, next);
929 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
930                         break;
931 		next = drbr_dequeue(ifp, txr->br);
932 	}
933 
934 	if (enq > 0) {
935                 /* Set the watchdog */
936                 txr->queue_status = EM_QUEUE_WORKING;
937 		txr->watchdog_time = ticks;
938 	}
939 
940 	if (txr->tx_avail < EM_MAX_SCATTER)
941 		em_txeof(txr);
942 	if (txr->tx_avail < EM_MAX_SCATTER)
943 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
944 	return (err);
945 }
946 
947 /*
948 ** Multiqueue capable stack interface
949 */
950 static int
951 em_mq_start(struct ifnet *ifp, struct mbuf *m)
952 {
953 	struct adapter	*adapter = ifp->if_softc;
954 	struct tx_ring	*txr = adapter->tx_rings;
955 	int 		error;
956 
957 	if (EM_TX_TRYLOCK(txr)) {
958 		error = em_mq_start_locked(ifp, txr, m);
959 		EM_TX_UNLOCK(txr);
960 	} else
961 		error = drbr_enqueue(ifp, txr->br, m);
962 
963 	return (error);
964 }
965 
966 /*
967 ** Flush all ring buffers
968 */
969 static void
970 em_qflush(struct ifnet *ifp)
971 {
972 	struct adapter  *adapter = ifp->if_softc;
973 	struct tx_ring  *txr = adapter->tx_rings;
974 	struct mbuf     *m;
975 
976 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
977 		EM_TX_LOCK(txr);
978 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
979 			m_freem(m);
980 		EM_TX_UNLOCK(txr);
981 	}
982 	if_qflush(ifp);
983 }
984 #else  /* !EM_MULTIQUEUE */
985 
986 static void
987 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
988 {
989 	struct adapter	*adapter = ifp->if_softc;
990 	struct mbuf	*m_head;
991 
992 	EM_TX_LOCK_ASSERT(txr);
993 
994 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
995 	    IFF_DRV_RUNNING)
996 		return;
997 
998 	if (!adapter->link_active)
999 		return;
1000 
1001 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1002         	/* Call cleanup if number of TX descriptors low */
1003 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1004 			em_txeof(txr);
1005 		if (txr->tx_avail < EM_MAX_SCATTER) {
1006 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1007 			break;
1008 		}
1009                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1010 		if (m_head == NULL)
1011 			break;
1012 		/*
1013 		 *  Encapsulation can modify our pointer, and or make it
1014 		 *  NULL on failure.  In that event, we can't requeue.
1015 		 */
1016 		if (em_xmit(txr, &m_head)) {
1017 			if (m_head == NULL)
1018 				break;
1019 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1020 			break;
1021 		}
1022 
1023 		/* Send a copy of the frame to the BPF listener */
1024 		ETHER_BPF_MTAP(ifp, m_head);
1025 
1026 		/* Set timeout in case hardware has problems transmitting. */
1027 		txr->watchdog_time = ticks;
1028                 txr->queue_status = EM_QUEUE_WORKING;
1029 	}
1030 
1031 	return;
1032 }
1033 
1034 static void
1035 em_start(struct ifnet *ifp)
1036 {
1037 	struct adapter	*adapter = ifp->if_softc;
1038 	struct tx_ring	*txr = adapter->tx_rings;
1039 
1040 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1041 		EM_TX_LOCK(txr);
1042 		em_start_locked(ifp, txr);
1043 		EM_TX_UNLOCK(txr);
1044 	}
1045 	return;
1046 }
1047 #endif /* EM_MULTIQUEUE */
1048 
1049 /*********************************************************************
1050  *  Ioctl entry point
1051  *
1052  *  em_ioctl is called when the user wants to configure the
1053  *  interface.
1054  *
1055  *  return 0 on success, positive on failure
1056  **********************************************************************/
1057 
1058 static int
1059 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1060 {
1061 	struct adapter	*adapter = ifp->if_softc;
1062 	struct ifreq	*ifr = (struct ifreq *)data;
1063 #if defined(INET) || defined(INET6)
1064 	struct ifaddr	*ifa = (struct ifaddr *)data;
1065 #endif
1066 	bool		avoid_reset = FALSE;
1067 	int		error = 0;
1068 
1069 	if (adapter->in_detach)
1070 		return (error);
1071 
1072 	switch (command) {
1073 	case SIOCSIFADDR:
1074 #ifdef INET
1075 		if (ifa->ifa_addr->sa_family == AF_INET)
1076 			avoid_reset = TRUE;
1077 #endif
1078 #ifdef INET6
1079 		if (ifa->ifa_addr->sa_family == AF_INET6)
1080 			avoid_reset = TRUE;
1081 #endif
1082 		/*
1083 		** Calling init results in link renegotiation,
1084 		** so we avoid doing it when possible.
1085 		*/
1086 		if (avoid_reset) {
1087 			ifp->if_flags |= IFF_UP;
1088 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1089 				em_init(adapter);
1090 #ifdef INET
1091 			if (!(ifp->if_flags & IFF_NOARP))
1092 				arp_ifinit(ifp, ifa);
1093 #endif
1094 		} else
1095 			error = ether_ioctl(ifp, command, data);
1096 		break;
1097 	case SIOCSIFMTU:
1098 	    {
1099 		int max_frame_size;
1100 
1101 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1102 
1103 		EM_CORE_LOCK(adapter);
1104 		switch (adapter->hw.mac.type) {
1105 		case e1000_82571:
1106 		case e1000_82572:
1107 		case e1000_ich9lan:
1108 		case e1000_ich10lan:
1109 		case e1000_pch2lan:
1110 		case e1000_82574:
1111 		case e1000_82583:
1112 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1113 			max_frame_size = 9234;
1114 			break;
1115 		case e1000_pchlan:
1116 			max_frame_size = 4096;
1117 			break;
1118 			/* Adapters that do not support jumbo frames */
1119 		case e1000_ich8lan:
1120 			max_frame_size = ETHER_MAX_LEN;
1121 			break;
1122 		default:
1123 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1124 		}
1125 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1126 		    ETHER_CRC_LEN) {
1127 			EM_CORE_UNLOCK(adapter);
1128 			error = EINVAL;
1129 			break;
1130 		}
1131 
1132 		ifp->if_mtu = ifr->ifr_mtu;
1133 		adapter->max_frame_size =
1134 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1135 		em_init_locked(adapter);
1136 		EM_CORE_UNLOCK(adapter);
1137 		break;
1138 	    }
1139 	case SIOCSIFFLAGS:
1140 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1141 		    SIOCSIFFLAGS (Set Interface Flags)");
1142 		EM_CORE_LOCK(adapter);
1143 		if (ifp->if_flags & IFF_UP) {
1144 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1145 				if ((ifp->if_flags ^ adapter->if_flags) &
1146 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1147 					em_disable_promisc(adapter);
1148 					em_set_promisc(adapter);
1149 				}
1150 			} else
1151 				em_init_locked(adapter);
1152 		} else
1153 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1154 				em_stop(adapter);
1155 		adapter->if_flags = ifp->if_flags;
1156 		EM_CORE_UNLOCK(adapter);
1157 		break;
1158 	case SIOCADDMULTI:
1159 	case SIOCDELMULTI:
1160 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1161 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1162 			EM_CORE_LOCK(adapter);
1163 			em_disable_intr(adapter);
1164 			em_set_multi(adapter);
1165 #ifdef DEVICE_POLLING
1166 			if (!(ifp->if_capenable & IFCAP_POLLING))
1167 #endif
1168 				em_enable_intr(adapter);
1169 			EM_CORE_UNLOCK(adapter);
1170 		}
1171 		break;
1172 	case SIOCSIFMEDIA:
1173 		/* Check SOL/IDER usage */
1174 		EM_CORE_LOCK(adapter);
1175 		if (e1000_check_reset_block(&adapter->hw)) {
1176 			EM_CORE_UNLOCK(adapter);
1177 			device_printf(adapter->dev, "Media change is"
1178 			    " blocked due to SOL/IDER session.\n");
1179 			break;
1180 		}
1181 		EM_CORE_UNLOCK(adapter);
1182 		/* falls thru */
1183 	case SIOCGIFMEDIA:
1184 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1185 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1186 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1187 		break;
1188 	case SIOCSIFCAP:
1189 	    {
1190 		int mask, reinit;
1191 
1192 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1193 		reinit = 0;
1194 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1195 #ifdef DEVICE_POLLING
1196 		if (mask & IFCAP_POLLING) {
1197 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1198 				error = ether_poll_register(em_poll, ifp);
1199 				if (error)
1200 					return (error);
1201 				EM_CORE_LOCK(adapter);
1202 				em_disable_intr(adapter);
1203 				ifp->if_capenable |= IFCAP_POLLING;
1204 				EM_CORE_UNLOCK(adapter);
1205 			} else {
1206 				error = ether_poll_deregister(ifp);
1207 				/* Enable interrupt even in error case */
1208 				EM_CORE_LOCK(adapter);
1209 				em_enable_intr(adapter);
1210 				ifp->if_capenable &= ~IFCAP_POLLING;
1211 				EM_CORE_UNLOCK(adapter);
1212 			}
1213 		}
1214 #endif
1215 		if (mask & IFCAP_HWCSUM) {
1216 			ifp->if_capenable ^= IFCAP_HWCSUM;
1217 			reinit = 1;
1218 		}
1219 		if (mask & IFCAP_TSO4) {
1220 			ifp->if_capenable ^= IFCAP_TSO4;
1221 			reinit = 1;
1222 		}
1223 		if (mask & IFCAP_VLAN_HWTAGGING) {
1224 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1225 			reinit = 1;
1226 		}
1227 		if (mask & IFCAP_VLAN_HWFILTER) {
1228 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1229 			reinit = 1;
1230 		}
1231 		if (mask & IFCAP_VLAN_HWTSO) {
1232 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1233 			reinit = 1;
1234 		}
1235 		if ((mask & IFCAP_WOL) &&
1236 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1237 			if (mask & IFCAP_WOL_MCAST)
1238 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1239 			if (mask & IFCAP_WOL_MAGIC)
1240 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1241 		}
1242 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1243 			em_init(adapter);
1244 		VLAN_CAPABILITIES(ifp);
1245 		break;
1246 	    }
1247 
1248 	default:
1249 		error = ether_ioctl(ifp, command, data);
1250 		break;
1251 	}
1252 
1253 	return (error);
1254 }
1255 
1256 
1257 /*********************************************************************
1258  *  Init entry point
1259  *
1260  *  This routine is used in two ways. It is used by the stack as
1261  *  init entry point in network interface structure. It is also used
1262  *  by the driver as a hw/sw initialization routine to get to a
1263  *  consistent state.
1264  *
1265  *  return 0 on success, positive on failure
1266  **********************************************************************/
1267 
1268 static void
1269 em_init_locked(struct adapter *adapter)
1270 {
1271 	struct ifnet	*ifp = adapter->ifp;
1272 	device_t	dev = adapter->dev;
1273 
1274 	INIT_DEBUGOUT("em_init: begin");
1275 
1276 	EM_CORE_LOCK_ASSERT(adapter);
1277 
1278 	em_disable_intr(adapter);
1279 	callout_stop(&adapter->timer);
1280 
1281 	/* Get the latest mac address, User can use a LAA */
1282         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1283               ETHER_ADDR_LEN);
1284 
1285 	/* Put the address into the Receive Address Array */
1286 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1287 
1288 	/*
1289 	 * With the 82571 adapter, RAR[0] may be overwritten
1290 	 * when the other port is reset, we make a duplicate
1291 	 * in RAR[14] for that eventuality, this assures
1292 	 * the interface continues to function.
1293 	 */
1294 	if (adapter->hw.mac.type == e1000_82571) {
1295 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1296 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1297 		    E1000_RAR_ENTRIES - 1);
1298 	}
1299 
1300 	/* Initialize the hardware */
1301 	em_reset(adapter);
1302 	em_update_link_status(adapter);
1303 
1304 	/* Setup VLAN support, basic and offload if available */
1305 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1306 
1307 	/* Set hardware offload abilities */
1308 	ifp->if_hwassist = 0;
1309 	if (ifp->if_capenable & IFCAP_TXCSUM)
1310 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1311 	if (ifp->if_capenable & IFCAP_TSO4)
1312 		ifp->if_hwassist |= CSUM_TSO;
1313 
1314 	/* Configure for OS presence */
1315 	em_init_manageability(adapter);
1316 
1317 	/* Prepare transmit descriptors and buffers */
1318 	em_setup_transmit_structures(adapter);
1319 	em_initialize_transmit_unit(adapter);
1320 
1321 	/* Setup Multicast table */
1322 	em_set_multi(adapter);
1323 
1324 	/*
1325 	** Figure out the desired mbuf
1326 	** pool for doing jumbos
1327 	*/
1328 	if (adapter->max_frame_size <= 2048)
1329 		adapter->rx_mbuf_sz = MCLBYTES;
1330 	else if (adapter->max_frame_size <= 4096)
1331 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1332 	else
1333 		adapter->rx_mbuf_sz = MJUM9BYTES;
1334 
1335 	/* Prepare receive descriptors and buffers */
1336 	if (em_setup_receive_structures(adapter)) {
1337 		device_printf(dev, "Could not setup receive structures\n");
1338 		em_stop(adapter);
1339 		return;
1340 	}
1341 	em_initialize_receive_unit(adapter);
1342 
1343 	/* Use real VLAN Filter support? */
1344 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1345 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1346 			/* Use real VLAN Filter support */
1347 			em_setup_vlan_hw_support(adapter);
1348 		else {
1349 			u32 ctrl;
1350 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1351 			ctrl |= E1000_CTRL_VME;
1352 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1353 		}
1354 	}
1355 
1356 	/* Don't lose promiscuous settings */
1357 	em_set_promisc(adapter);
1358 
1359 	/* Set the interface as ACTIVE */
1360 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1361 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1362 
1363 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1364 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1365 
1366 	/* MSI/X configuration for 82574 */
1367 	if (adapter->hw.mac.type == e1000_82574) {
1368 		int tmp;
1369 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1370 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1371 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1372 		/* Set the IVAR - interrupt vector routing. */
1373 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1374 	}
1375 
1376 #ifdef DEVICE_POLLING
1377 	/*
1378 	 * Only enable interrupts if we are not polling, make sure
1379 	 * they are off otherwise.
1380 	 */
1381 	if (ifp->if_capenable & IFCAP_POLLING)
1382 		em_disable_intr(adapter);
1383 	else
1384 #endif /* DEVICE_POLLING */
1385 		em_enable_intr(adapter);
1386 
1387 	/* AMT based hardware can now take control from firmware */
1388 	if (adapter->has_manage && adapter->has_amt)
1389 		em_get_hw_control(adapter);
1390 }
1391 
1392 static void
1393 em_init(void *arg)
1394 {
1395 	struct adapter *adapter = arg;
1396 
1397 	EM_CORE_LOCK(adapter);
1398 	em_init_locked(adapter);
1399 	EM_CORE_UNLOCK(adapter);
1400 }
1401 
1402 
1403 #ifdef DEVICE_POLLING
1404 /*********************************************************************
1405  *
1406  *  Legacy polling routine: note this only works with single queue
1407  *
1408  *********************************************************************/
1409 static int
1410 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1411 {
1412 	struct adapter *adapter = ifp->if_softc;
1413 	struct tx_ring	*txr = adapter->tx_rings;
1414 	struct rx_ring	*rxr = adapter->rx_rings;
1415 	u32		reg_icr;
1416 	int		rx_done;
1417 
1418 	EM_CORE_LOCK(adapter);
1419 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1420 		EM_CORE_UNLOCK(adapter);
1421 		return (0);
1422 	}
1423 
1424 	if (cmd == POLL_AND_CHECK_STATUS) {
1425 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1426 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1427 			callout_stop(&adapter->timer);
1428 			adapter->hw.mac.get_link_status = 1;
1429 			em_update_link_status(adapter);
1430 			callout_reset(&adapter->timer, hz,
1431 			    em_local_timer, adapter);
1432 		}
1433 	}
1434 	EM_CORE_UNLOCK(adapter);
1435 
1436 	em_rxeof(rxr, count, &rx_done);
1437 
1438 	EM_TX_LOCK(txr);
1439 	em_txeof(txr);
1440 #ifdef EM_MULTIQUEUE
1441 	if (!drbr_empty(ifp, txr->br))
1442 		em_mq_start_locked(ifp, txr, NULL);
1443 #else
1444 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1445 		em_start_locked(ifp, txr);
1446 #endif
1447 	EM_TX_UNLOCK(txr);
1448 
1449 	return (rx_done);
1450 }
1451 #endif /* DEVICE_POLLING */
1452 
1453 
1454 /*********************************************************************
1455  *
1456  *  Fast Legacy/MSI Combined Interrupt Service routine
1457  *
1458  *********************************************************************/
1459 static int
1460 em_irq_fast(void *arg)
1461 {
1462 	struct adapter	*adapter = arg;
1463 	struct ifnet	*ifp;
1464 	u32		reg_icr;
1465 
1466 	ifp = adapter->ifp;
1467 
1468 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1469 
1470 	/* Hot eject?  */
1471 	if (reg_icr == 0xffffffff)
1472 		return FILTER_STRAY;
1473 
1474 	/* Definitely not our interrupt.  */
1475 	if (reg_icr == 0x0)
1476 		return FILTER_STRAY;
1477 
1478 	/*
1479 	 * Starting with the 82571 chip, bit 31 should be used to
1480 	 * determine whether the interrupt belongs to us.
1481 	 */
1482 	if (adapter->hw.mac.type >= e1000_82571 &&
1483 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1484 		return FILTER_STRAY;
1485 
1486 	em_disable_intr(adapter);
1487 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1488 
1489 	/* Link status change */
1490 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1491 		adapter->hw.mac.get_link_status = 1;
1492 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1493 	}
1494 
1495 	if (reg_icr & E1000_ICR_RXO)
1496 		adapter->rx_overruns++;
1497 	return FILTER_HANDLED;
1498 }
1499 
1500 /* Combined RX/TX handler, used by Legacy and MSI */
1501 static void
1502 em_handle_que(void *context, int pending)
1503 {
1504 	struct adapter	*adapter = context;
1505 	struct ifnet	*ifp = adapter->ifp;
1506 	struct tx_ring	*txr = adapter->tx_rings;
1507 	struct rx_ring	*rxr = adapter->rx_rings;
1508 
1509 
1510 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1511 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1512 		EM_TX_LOCK(txr);
1513 		em_txeof(txr);
1514 #ifdef EM_MULTIQUEUE
1515 		if (!drbr_empty(ifp, txr->br))
1516 			em_mq_start_locked(ifp, txr, NULL);
1517 #else
1518 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1519 			em_start_locked(ifp, txr);
1520 #endif
1521 		EM_TX_UNLOCK(txr);
1522 		if (more) {
1523 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1524 			return;
1525 		}
1526 	}
1527 
1528 	em_enable_intr(adapter);
1529 	return;
1530 }
1531 
1532 
1533 /*********************************************************************
1534  *
1535  *  MSIX Interrupt Service Routines
1536  *
1537  **********************************************************************/
1538 static void
1539 em_msix_tx(void *arg)
1540 {
1541 	struct tx_ring *txr = arg;
1542 	struct adapter *adapter = txr->adapter;
1543 	struct ifnet	*ifp = adapter->ifp;
1544 
1545 	++txr->tx_irq;
1546 	EM_TX_LOCK(txr);
1547 	em_txeof(txr);
1548 #ifdef EM_MULTIQUEUE
1549 	if (!drbr_empty(ifp, txr->br))
1550 		em_mq_start_locked(ifp, txr, NULL);
1551 #else
1552 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1553 		em_start_locked(ifp, txr);
1554 #endif
1555 	/* Reenable this interrupt */
1556 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1557 	EM_TX_UNLOCK(txr);
1558 	return;
1559 }
1560 
1561 /*********************************************************************
1562  *
1563  *  MSIX RX Interrupt Service routine
1564  *
1565  **********************************************************************/
1566 
1567 static void
1568 em_msix_rx(void *arg)
1569 {
1570 	struct rx_ring	*rxr = arg;
1571 	struct adapter	*adapter = rxr->adapter;
1572 	bool		more;
1573 
1574 	++rxr->rx_irq;
1575 	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1576 		return;
1577 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1578 	if (more)
1579 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1580 	else
1581 		/* Reenable this interrupt */
1582 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1583 	return;
1584 }
1585 
1586 /*********************************************************************
1587  *
1588  *  MSIX Link Fast Interrupt Service routine
1589  *
1590  **********************************************************************/
1591 static void
1592 em_msix_link(void *arg)
1593 {
1594 	struct adapter	*adapter = arg;
1595 	u32		reg_icr;
1596 
1597 	++adapter->link_irq;
1598 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1599 
1600 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1601 		adapter->hw.mac.get_link_status = 1;
1602 		em_handle_link(adapter, 0);
1603 	} else
1604 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1605 		    EM_MSIX_LINK | E1000_IMS_LSC);
1606 	return;
1607 }
1608 
1609 static void
1610 em_handle_rx(void *context, int pending)
1611 {
1612 	struct rx_ring	*rxr = context;
1613 	struct adapter	*adapter = rxr->adapter;
1614         bool            more;
1615 
1616 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1617 	if (more)
1618 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1619 	else
1620 		/* Reenable this interrupt */
1621 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1622 }
1623 
1624 static void
1625 em_handle_tx(void *context, int pending)
1626 {
1627 	struct tx_ring	*txr = context;
1628 	struct adapter	*adapter = txr->adapter;
1629 	struct ifnet	*ifp = adapter->ifp;
1630 
1631 	EM_TX_LOCK(txr);
1632 	em_txeof(txr);
1633 #ifdef EM_MULTIQUEUE
1634 	if (!drbr_empty(ifp, txr->br))
1635 		em_mq_start_locked(ifp, txr, NULL);
1636 #else
1637 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1638 		em_start_locked(ifp, txr);
1639 #endif
1640 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1641 	EM_TX_UNLOCK(txr);
1642 }
1643 
1644 static void
1645 em_handle_link(void *context, int pending)
1646 {
1647 	struct adapter	*adapter = context;
1648 	struct tx_ring	*txr = adapter->tx_rings;
1649 	struct ifnet *ifp = adapter->ifp;
1650 
1651 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1652 		return;
1653 
1654 	EM_CORE_LOCK(adapter);
1655 	callout_stop(&adapter->timer);
1656 	em_update_link_status(adapter);
1657 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1658 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1659 	    EM_MSIX_LINK | E1000_IMS_LSC);
1660 	if (adapter->link_active) {
1661 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1662 			EM_TX_LOCK(txr);
1663 #ifdef EM_MULTIQUEUE
1664 			if (!drbr_empty(ifp, txr->br))
1665 				em_mq_start_locked(ifp, txr, NULL);
1666 #else
1667 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1668 				em_start_locked(ifp, txr);
1669 #endif
1670 			EM_TX_UNLOCK(txr);
1671 		}
1672 	}
1673 	EM_CORE_UNLOCK(adapter);
1674 }
1675 
1676 
1677 /*********************************************************************
1678  *
1679  *  Media Ioctl callback
1680  *
1681  *  This routine is called whenever the user queries the status of
1682  *  the interface using ifconfig.
1683  *
1684  **********************************************************************/
1685 static void
1686 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1687 {
1688 	struct adapter *adapter = ifp->if_softc;
1689 	u_char fiber_type = IFM_1000_SX;
1690 
1691 	INIT_DEBUGOUT("em_media_status: begin");
1692 
1693 	EM_CORE_LOCK(adapter);
1694 	em_update_link_status(adapter);
1695 
1696 	ifmr->ifm_status = IFM_AVALID;
1697 	ifmr->ifm_active = IFM_ETHER;
1698 
1699 	if (!adapter->link_active) {
1700 		EM_CORE_UNLOCK(adapter);
1701 		return;
1702 	}
1703 
1704 	ifmr->ifm_status |= IFM_ACTIVE;
1705 
1706 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1707 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1708 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1709 	} else {
1710 		switch (adapter->link_speed) {
1711 		case 10:
1712 			ifmr->ifm_active |= IFM_10_T;
1713 			break;
1714 		case 100:
1715 			ifmr->ifm_active |= IFM_100_TX;
1716 			break;
1717 		case 1000:
1718 			ifmr->ifm_active |= IFM_1000_T;
1719 			break;
1720 		}
1721 		if (adapter->link_duplex == FULL_DUPLEX)
1722 			ifmr->ifm_active |= IFM_FDX;
1723 		else
1724 			ifmr->ifm_active |= IFM_HDX;
1725 	}
1726 	EM_CORE_UNLOCK(adapter);
1727 }
1728 
1729 /*********************************************************************
1730  *
1731  *  Media Ioctl callback
1732  *
1733  *  This routine is called when the user changes speed/duplex using
1734  *  media/mediopt option with ifconfig.
1735  *
1736  **********************************************************************/
1737 static int
1738 em_media_change(struct ifnet *ifp)
1739 {
1740 	struct adapter *adapter = ifp->if_softc;
1741 	struct ifmedia  *ifm = &adapter->media;
1742 
1743 	INIT_DEBUGOUT("em_media_change: begin");
1744 
1745 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1746 		return (EINVAL);
1747 
1748 	EM_CORE_LOCK(adapter);
1749 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1750 	case IFM_AUTO:
1751 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1752 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1753 		break;
1754 	case IFM_1000_LX:
1755 	case IFM_1000_SX:
1756 	case IFM_1000_T:
1757 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1758 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1759 		break;
1760 	case IFM_100_TX:
1761 		adapter->hw.mac.autoneg = FALSE;
1762 		adapter->hw.phy.autoneg_advertised = 0;
1763 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1764 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1765 		else
1766 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1767 		break;
1768 	case IFM_10_T:
1769 		adapter->hw.mac.autoneg = FALSE;
1770 		adapter->hw.phy.autoneg_advertised = 0;
1771 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1772 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1773 		else
1774 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1775 		break;
1776 	default:
1777 		device_printf(adapter->dev, "Unsupported media type\n");
1778 	}
1779 
1780 	em_init_locked(adapter);
1781 	EM_CORE_UNLOCK(adapter);
1782 
1783 	return (0);
1784 }
1785 
1786 /*********************************************************************
1787  *
1788  *  This routine maps the mbufs to tx descriptors.
1789  *
1790  *  return 0 on success, positive on failure
1791  **********************************************************************/
1792 
1793 static int
1794 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1795 {
1796 	struct adapter		*adapter = txr->adapter;
1797 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1798 	bus_dmamap_t		map;
1799 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1800 	struct e1000_tx_desc	*ctxd = NULL;
1801 	struct mbuf		*m_head;
1802 	struct ether_header	*eh;
1803 	struct ip		*ip = NULL;
1804 	struct tcphdr		*tp = NULL;
1805 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1806 	int			ip_off, poff;
1807 	int			nsegs, i, j, first, last = 0;
1808 	int			error, do_tso, tso_desc = 0, remap = 1;
1809 
1810 retry:
1811 	m_head = *m_headp;
1812 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1813 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1814 	ip_off = poff = 0;
1815 
1816 	/*
1817 	 * Intel recommends entire IP/TCP header length reside in a single
1818 	 * buffer. If multiple descriptors are used to describe the IP and
1819 	 * TCP header, each descriptor should describe one or more
1820 	 * complete headers; descriptors referencing only parts of headers
1821 	 * are not supported. If all layer headers are not coalesced into
1822 	 * a single buffer, each buffer should not cross a 4KB boundary,
1823 	 * or be larger than the maximum read request size.
1824 	 * Controller also requires modifing IP/TCP header to make TSO work
1825 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1826 	 * IP/TCP header into a single buffer to meet the requirement of
1827 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1828 	 * which also has similiar restrictions.
1829 	 */
1830 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1831 		if (do_tso || (m_head->m_next != NULL &&
1832 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1833 			if (M_WRITABLE(*m_headp) == 0) {
1834 				m_head = m_dup(*m_headp, M_NOWAIT);
1835 				m_freem(*m_headp);
1836 				if (m_head == NULL) {
1837 					*m_headp = NULL;
1838 					return (ENOBUFS);
1839 				}
1840 				*m_headp = m_head;
1841 			}
1842 		}
1843 		/*
1844 		 * XXX
1845 		 * Assume IPv4, we don't have TSO/checksum offload support
1846 		 * for IPv6 yet.
1847 		 */
1848 		ip_off = sizeof(struct ether_header);
1849 		m_head = m_pullup(m_head, ip_off);
1850 		if (m_head == NULL) {
1851 			*m_headp = NULL;
1852 			return (ENOBUFS);
1853 		}
1854 		eh = mtod(m_head, struct ether_header *);
1855 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1856 			ip_off = sizeof(struct ether_vlan_header);
1857 			m_head = m_pullup(m_head, ip_off);
1858 			if (m_head == NULL) {
1859 				*m_headp = NULL;
1860 				return (ENOBUFS);
1861 			}
1862 		}
1863 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1864 		if (m_head == NULL) {
1865 			*m_headp = NULL;
1866 			return (ENOBUFS);
1867 		}
1868 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1869 		poff = ip_off + (ip->ip_hl << 2);
1870 		if (do_tso) {
1871 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1872 			if (m_head == NULL) {
1873 				*m_headp = NULL;
1874 				return (ENOBUFS);
1875 			}
1876 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1877 			/*
1878 			 * TSO workaround:
1879 			 *   pull 4 more bytes of data into it.
1880 			 */
1881 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1882 			if (m_head == NULL) {
1883 				*m_headp = NULL;
1884 				return (ENOBUFS);
1885 			}
1886 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1887 			ip->ip_len = 0;
1888 			ip->ip_sum = 0;
1889 			/*
1890 			 * The pseudo TCP checksum does not include TCP payload
1891 			 * length so driver should recompute the checksum here
1892 			 * what hardware expect to see. This is adherence of
1893 			 * Microsoft's Large Send specification.
1894 			 */
1895 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1896 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1897 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1898 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1899 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1900 			if (m_head == NULL) {
1901 				*m_headp = NULL;
1902 				return (ENOBUFS);
1903 			}
1904 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1905 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1906 			if (m_head == NULL) {
1907 				*m_headp = NULL;
1908 				return (ENOBUFS);
1909 			}
1910 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1911 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1913 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1914 			if (m_head == NULL) {
1915 				*m_headp = NULL;
1916 				return (ENOBUFS);
1917 			}
1918 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1919 		}
1920 		*m_headp = m_head;
1921 	}
1922 
1923 	/*
1924 	 * Map the packet for DMA
1925 	 *
1926 	 * Capture the first descriptor index,
1927 	 * this descriptor will have the index
1928 	 * of the EOP which is the only one that
1929 	 * now gets a DONE bit writeback.
1930 	 */
1931 	first = txr->next_avail_desc;
1932 	tx_buffer = &txr->tx_buffers[first];
1933 	tx_buffer_mapped = tx_buffer;
1934 	map = tx_buffer->map;
1935 
1936 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1937 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1938 
1939 	/*
1940 	 * There are two types of errors we can (try) to handle:
1941 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1942 	 *   out of segments.  Defragment the mbuf chain and try again.
1943 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1944 	 *   at this point in time.  Defer sending and try again later.
1945 	 * All other errors, in particular EINVAL, are fatal and prevent the
1946 	 * mbuf chain from ever going through.  Drop it and report error.
1947 	 */
1948 	if (error == EFBIG && remap) {
1949 		struct mbuf *m;
1950 
1951 		m = m_defrag(*m_headp, M_NOWAIT);
1952 		if (m == NULL) {
1953 			adapter->mbuf_alloc_failed++;
1954 			m_freem(*m_headp);
1955 			*m_headp = NULL;
1956 			return (ENOBUFS);
1957 		}
1958 		*m_headp = m;
1959 
1960 		/* Try it again, but only once */
1961 		remap = 0;
1962 		goto retry;
1963 	} else if (error == ENOMEM) {
1964 		adapter->no_tx_dma_setup++;
1965 		return (error);
1966 	} else if (error != 0) {
1967 		adapter->no_tx_dma_setup++;
1968 		m_freem(*m_headp);
1969 		*m_headp = NULL;
1970 		return (error);
1971 	}
1972 
1973 	/*
1974 	 * TSO Hardware workaround, if this packet is not
1975 	 * TSO, and is only a single descriptor long, and
1976 	 * it follows a TSO burst, then we need to add a
1977 	 * sentinel descriptor to prevent premature writeback.
1978 	 */
1979 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1980 		if (nsegs == 1)
1981 			tso_desc = TRUE;
1982 		txr->tx_tso = FALSE;
1983 	}
1984 
1985         if (nsegs > (txr->tx_avail - 2)) {
1986                 txr->no_desc_avail++;
1987 		bus_dmamap_unload(txr->txtag, map);
1988 		return (ENOBUFS);
1989         }
1990 	m_head = *m_headp;
1991 
1992 	/* Do hardware assists */
1993 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1994 		em_tso_setup(txr, m_head, ip_off, ip, tp,
1995 		    &txd_upper, &txd_lower);
1996 		/* we need to make a final sentinel transmit desc */
1997 		tso_desc = TRUE;
1998 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1999 		em_transmit_checksum_setup(txr, m_head,
2000 		    ip_off, ip, &txd_upper, &txd_lower);
2001 
2002 	if (m_head->m_flags & M_VLANTAG) {
2003 		/* Set the vlan id. */
2004 		txd_upper |=
2005 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2006                 /* Tell hardware to add tag */
2007                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2008         }
2009 
2010 	i = txr->next_avail_desc;
2011 
2012 	/* Set up our transmit descriptors */
2013 	for (j = 0; j < nsegs; j++) {
2014 		bus_size_t seg_len;
2015 		bus_addr_t seg_addr;
2016 
2017 		tx_buffer = &txr->tx_buffers[i];
2018 		ctxd = &txr->tx_base[i];
2019 		seg_addr = segs[j].ds_addr;
2020 		seg_len  = segs[j].ds_len;
2021 		/*
2022 		** TSO Workaround:
2023 		** If this is the last descriptor, we want to
2024 		** split it so we have a small final sentinel
2025 		*/
2026 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2027 			seg_len -= 4;
2028 			ctxd->buffer_addr = htole64(seg_addr);
2029 			ctxd->lower.data = htole32(
2030 			adapter->txd_cmd | txd_lower | seg_len);
2031 			ctxd->upper.data =
2032 			    htole32(txd_upper);
2033 			if (++i == adapter->num_tx_desc)
2034 				i = 0;
2035 			/* Now make the sentinel */
2036 			++txd_used; /* using an extra txd */
2037 			ctxd = &txr->tx_base[i];
2038 			tx_buffer = &txr->tx_buffers[i];
2039 			ctxd->buffer_addr =
2040 			    htole64(seg_addr + seg_len);
2041 			ctxd->lower.data = htole32(
2042 			adapter->txd_cmd | txd_lower | 4);
2043 			ctxd->upper.data =
2044 			    htole32(txd_upper);
2045 			last = i;
2046 			if (++i == adapter->num_tx_desc)
2047 				i = 0;
2048 		} else {
2049 			ctxd->buffer_addr = htole64(seg_addr);
2050 			ctxd->lower.data = htole32(
2051 			adapter->txd_cmd | txd_lower | seg_len);
2052 			ctxd->upper.data =
2053 			    htole32(txd_upper);
2054 			last = i;
2055 			if (++i == adapter->num_tx_desc)
2056 				i = 0;
2057 		}
2058 		tx_buffer->m_head = NULL;
2059 		tx_buffer->next_eop = -1;
2060 	}
2061 
2062 	txr->next_avail_desc = i;
2063 	txr->tx_avail -= nsegs;
2064 	if (tso_desc) /* TSO used an extra for sentinel */
2065 		txr->tx_avail -= txd_used;
2066 
2067         tx_buffer->m_head = m_head;
2068 	/*
2069 	** Here we swap the map so the last descriptor,
2070 	** which gets the completion interrupt has the
2071 	** real map, and the first descriptor gets the
2072 	** unused map from this descriptor.
2073 	*/
2074 	tx_buffer_mapped->map = tx_buffer->map;
2075 	tx_buffer->map = map;
2076         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2077 
2078         /*
2079          * Last Descriptor of Packet
2080 	 * needs End Of Packet (EOP)
2081 	 * and Report Status (RS)
2082          */
2083         ctxd->lower.data |=
2084 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2085 	/*
2086 	 * Keep track in the first buffer which
2087 	 * descriptor will be written back
2088 	 */
2089 	tx_buffer = &txr->tx_buffers[first];
2090 	tx_buffer->next_eop = last;
2091 	/* Update the watchdog time early and often */
2092 	txr->watchdog_time = ticks;
2093 
2094 	/*
2095 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2096 	 * that this frame is available to transmit.
2097 	 */
2098 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2099 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2100 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2101 
2102 	return (0);
2103 }
2104 
2105 static void
2106 em_set_promisc(struct adapter *adapter)
2107 {
2108 	struct ifnet	*ifp = adapter->ifp;
2109 	u32		reg_rctl;
2110 
2111 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2112 
2113 	if (ifp->if_flags & IFF_PROMISC) {
2114 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2115 		/* Turn this on if you want to see bad packets */
2116 		if (em_debug_sbp)
2117 			reg_rctl |= E1000_RCTL_SBP;
2118 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2119 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2120 		reg_rctl |= E1000_RCTL_MPE;
2121 		reg_rctl &= ~E1000_RCTL_UPE;
2122 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2123 	}
2124 }
2125 
2126 static void
2127 em_disable_promisc(struct adapter *adapter)
2128 {
2129 	u32	reg_rctl;
2130 
2131 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2132 
2133 	reg_rctl &=  (~E1000_RCTL_UPE);
2134 	reg_rctl &=  (~E1000_RCTL_MPE);
2135 	reg_rctl &=  (~E1000_RCTL_SBP);
2136 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2137 }
2138 
2139 
2140 /*********************************************************************
2141  *  Multicast Update
2142  *
2143  *  This routine is called whenever multicast address list is updated.
2144  *
2145  **********************************************************************/
2146 
2147 static void
2148 em_set_multi(struct adapter *adapter)
2149 {
2150 	struct ifnet	*ifp = adapter->ifp;
2151 	struct ifmultiaddr *ifma;
2152 	u32 reg_rctl = 0;
2153 	u8  *mta; /* Multicast array memory */
2154 	int mcnt = 0;
2155 
2156 	IOCTL_DEBUGOUT("em_set_multi: begin");
2157 
2158 	mta = adapter->mta;
2159 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2160 
2161 	if (adapter->hw.mac.type == e1000_82542 &&
2162 	    adapter->hw.revision_id == E1000_REVISION_2) {
2163 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2164 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2165 			e1000_pci_clear_mwi(&adapter->hw);
2166 		reg_rctl |= E1000_RCTL_RST;
2167 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2168 		msec_delay(5);
2169 	}
2170 
2171 #if __FreeBSD_version < 800000
2172 	IF_ADDR_LOCK(ifp);
2173 #else
2174 	if_maddr_rlock(ifp);
2175 #endif
2176 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2177 		if (ifma->ifma_addr->sa_family != AF_LINK)
2178 			continue;
2179 
2180 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2181 			break;
2182 
2183 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2184 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2185 		mcnt++;
2186 	}
2187 #if __FreeBSD_version < 800000
2188 	IF_ADDR_UNLOCK(ifp);
2189 #else
2190 	if_maddr_runlock(ifp);
2191 #endif
2192 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2193 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2194 		reg_rctl |= E1000_RCTL_MPE;
2195 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2196 	} else
2197 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2198 
2199 	if (adapter->hw.mac.type == e1000_82542 &&
2200 	    adapter->hw.revision_id == E1000_REVISION_2) {
2201 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2202 		reg_rctl &= ~E1000_RCTL_RST;
2203 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2204 		msec_delay(5);
2205 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206 			e1000_pci_set_mwi(&adapter->hw);
2207 	}
2208 }
2209 
2210 
2211 /*********************************************************************
2212  *  Timer routine
2213  *
2214  *  This routine checks for link status and updates statistics.
2215  *
2216  **********************************************************************/
2217 
2218 static void
2219 em_local_timer(void *arg)
2220 {
2221 	struct adapter	*adapter = arg;
2222 	struct ifnet	*ifp = adapter->ifp;
2223 	struct tx_ring	*txr = adapter->tx_rings;
2224 	struct rx_ring	*rxr = adapter->rx_rings;
2225 	u32		trigger;
2226 
2227 	EM_CORE_LOCK_ASSERT(adapter);
2228 
2229 	em_update_link_status(adapter);
2230 	em_update_stats_counters(adapter);
2231 
2232 	/* Reset LAA into RAR[0] on 82571 */
2233 	if ((adapter->hw.mac.type == e1000_82571) &&
2234 	    e1000_get_laa_state_82571(&adapter->hw))
2235 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2236 
2237 	/* Mask to use in the irq trigger */
2238 	if (adapter->msix_mem)
2239 		trigger = rxr->ims; /* RX for 82574 */
2240 	else
2241 		trigger = E1000_ICS_RXDMT0;
2242 
2243 	/*
2244 	** Check on the state of the TX queue(s), this
2245 	** can be done without the lock because its RO
2246 	** and the HUNG state will be static if set.
2247 	*/
2248 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2249 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2250 		    (adapter->pause_frames == 0))
2251 			goto hung;
2252 		/* Schedule a TX tasklet if needed */
2253 		if (txr->tx_avail <= EM_MAX_SCATTER)
2254 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2255 	}
2256 
2257 	adapter->pause_frames = 0;
2258 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2259 #ifndef DEVICE_POLLING
2260 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2261 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2262 #endif
2263 	return;
2264 hung:
2265 	/* Looks like we're hung */
2266 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2267 	device_printf(adapter->dev,
2268 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2269 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2270 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2271 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2272 	    "Next TX to Clean = %d\n",
2273 	    txr->me, txr->tx_avail, txr->next_to_clean);
2274 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2275 	adapter->watchdog_events++;
2276 	adapter->pause_frames = 0;
2277 	em_init_locked(adapter);
2278 }
2279 
2280 
2281 static void
2282 em_update_link_status(struct adapter *adapter)
2283 {
2284 	struct e1000_hw *hw = &adapter->hw;
2285 	struct ifnet *ifp = adapter->ifp;
2286 	device_t dev = adapter->dev;
2287 	struct tx_ring *txr = adapter->tx_rings;
2288 	u32 link_check = 0;
2289 
2290 	/* Get the cached link value or read phy for real */
2291 	switch (hw->phy.media_type) {
2292 	case e1000_media_type_copper:
2293 		if (hw->mac.get_link_status) {
2294 			/* Do the work to read phy */
2295 			e1000_check_for_link(hw);
2296 			link_check = !hw->mac.get_link_status;
2297 			if (link_check) /* ESB2 fix */
2298 				e1000_cfg_on_link_up(hw);
2299 		} else
2300 			link_check = TRUE;
2301 		break;
2302 	case e1000_media_type_fiber:
2303 		e1000_check_for_link(hw);
2304 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2305                                  E1000_STATUS_LU);
2306 		break;
2307 	case e1000_media_type_internal_serdes:
2308 		e1000_check_for_link(hw);
2309 		link_check = adapter->hw.mac.serdes_has_link;
2310 		break;
2311 	default:
2312 	case e1000_media_type_unknown:
2313 		break;
2314 	}
2315 
2316 	/* Now check for a transition */
2317 	if (link_check && (adapter->link_active == 0)) {
2318 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2319 		    &adapter->link_duplex);
2320 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2321 		if ((adapter->link_speed != SPEED_1000) &&
2322 		    ((hw->mac.type == e1000_82571) ||
2323 		    (hw->mac.type == e1000_82572))) {
2324 			int tarc0;
2325 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2326 			tarc0 &= ~SPEED_MODE_BIT;
2327 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2328 		}
2329 		if (bootverbose)
2330 			device_printf(dev, "Link is up %d Mbps %s\n",
2331 			    adapter->link_speed,
2332 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2333 			    "Full Duplex" : "Half Duplex"));
2334 		adapter->link_active = 1;
2335 		adapter->smartspeed = 0;
2336 		ifp->if_baudrate = adapter->link_speed * 1000000;
2337 		if_link_state_change(ifp, LINK_STATE_UP);
2338 	} else if (!link_check && (adapter->link_active == 1)) {
2339 		ifp->if_baudrate = adapter->link_speed = 0;
2340 		adapter->link_duplex = 0;
2341 		if (bootverbose)
2342 			device_printf(dev, "Link is Down\n");
2343 		adapter->link_active = 0;
2344 		/* Link down, disable watchdog */
2345 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2346 			txr->queue_status = EM_QUEUE_IDLE;
2347 		if_link_state_change(ifp, LINK_STATE_DOWN);
2348 	}
2349 }
2350 
2351 /*********************************************************************
2352  *
2353  *  This routine disables all traffic on the adapter by issuing a
2354  *  global reset on the MAC and deallocates TX/RX buffers.
2355  *
2356  *  This routine should always be called with BOTH the CORE
2357  *  and TX locks.
2358  **********************************************************************/
2359 
2360 static void
2361 em_stop(void *arg)
2362 {
2363 	struct adapter	*adapter = arg;
2364 	struct ifnet	*ifp = adapter->ifp;
2365 	struct tx_ring	*txr = adapter->tx_rings;
2366 
2367 	EM_CORE_LOCK_ASSERT(adapter);
2368 
2369 	INIT_DEBUGOUT("em_stop: begin");
2370 
2371 	em_disable_intr(adapter);
2372 	callout_stop(&adapter->timer);
2373 
2374 	/* Tell the stack that the interface is no longer active */
2375 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2376 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2377 
2378         /* Unarm watchdog timer. */
2379 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2380 		EM_TX_LOCK(txr);
2381 		txr->queue_status = EM_QUEUE_IDLE;
2382 		EM_TX_UNLOCK(txr);
2383 	}
2384 
2385 	e1000_reset_hw(&adapter->hw);
2386 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2387 
2388 	e1000_led_off(&adapter->hw);
2389 	e1000_cleanup_led(&adapter->hw);
2390 }
2391 
2392 
2393 /*********************************************************************
2394  *
2395  *  Determine hardware revision.
2396  *
2397  **********************************************************************/
2398 static void
2399 em_identify_hardware(struct adapter *adapter)
2400 {
2401 	device_t dev = adapter->dev;
2402 
2403 	/* Make sure our PCI config space has the necessary stuff set */
2404 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2405 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2406 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2407 		device_printf(dev, "Memory Access and/or Bus Master bits "
2408 		    "were not set!\n");
2409 		adapter->hw.bus.pci_cmd_word |=
2410 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2411 		pci_write_config(dev, PCIR_COMMAND,
2412 		    adapter->hw.bus.pci_cmd_word, 2);
2413 	}
2414 
2415 	/* Save off the information about this board */
2416 	adapter->hw.vendor_id = pci_get_vendor(dev);
2417 	adapter->hw.device_id = pci_get_device(dev);
2418 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2419 	adapter->hw.subsystem_vendor_id =
2420 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2421 	adapter->hw.subsystem_device_id =
2422 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2423 
2424 	/* Do Shared Code Init and Setup */
2425 	if (e1000_set_mac_type(&adapter->hw)) {
2426 		device_printf(dev, "Setup init failure\n");
2427 		return;
2428 	}
2429 }
2430 
2431 static int
2432 em_allocate_pci_resources(struct adapter *adapter)
2433 {
2434 	device_t	dev = adapter->dev;
2435 	int		rid;
2436 
2437 	rid = PCIR_BAR(0);
2438 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2439 	    &rid, RF_ACTIVE);
2440 	if (adapter->memory == NULL) {
2441 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2442 		return (ENXIO);
2443 	}
2444 	adapter->osdep.mem_bus_space_tag =
2445 	    rman_get_bustag(adapter->memory);
2446 	adapter->osdep.mem_bus_space_handle =
2447 	    rman_get_bushandle(adapter->memory);
2448 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2449 
2450 	/* Default to a single queue */
2451 	adapter->num_queues = 1;
2452 
2453 	/*
2454 	 * Setup MSI/X or MSI if PCI Express
2455 	 */
2456 	adapter->msix = em_setup_msix(adapter);
2457 
2458 	adapter->hw.back = &adapter->osdep;
2459 
2460 	return (0);
2461 }
2462 
2463 /*********************************************************************
2464  *
2465  *  Setup the Legacy or MSI Interrupt handler
2466  *
2467  **********************************************************************/
2468 int
2469 em_allocate_legacy(struct adapter *adapter)
2470 {
2471 	device_t dev = adapter->dev;
2472 	struct tx_ring	*txr = adapter->tx_rings;
2473 	int error, rid = 0;
2474 
2475 	/* Manually turn off all interrupts */
2476 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2477 
2478 	if (adapter->msix == 1) /* using MSI */
2479 		rid = 1;
2480 	/* We allocate a single interrupt resource */
2481 	adapter->res = bus_alloc_resource_any(dev,
2482 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2483 	if (adapter->res == NULL) {
2484 		device_printf(dev, "Unable to allocate bus resource: "
2485 		    "interrupt\n");
2486 		return (ENXIO);
2487 	}
2488 
2489 	/*
2490 	 * Allocate a fast interrupt and the associated
2491 	 * deferred processing contexts.
2492 	 */
2493 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2494 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2495 	    taskqueue_thread_enqueue, &adapter->tq);
2496 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2497 	    device_get_nameunit(adapter->dev));
2498 	/* Use a TX only tasklet for local timer */
2499 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2500 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2501 	    taskqueue_thread_enqueue, &txr->tq);
2502 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2503 	    device_get_nameunit(adapter->dev));
2504 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2505 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2506 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2507 		device_printf(dev, "Failed to register fast interrupt "
2508 			    "handler: %d\n", error);
2509 		taskqueue_free(adapter->tq);
2510 		adapter->tq = NULL;
2511 		return (error);
2512 	}
2513 
2514 	return (0);
2515 }
2516 
2517 /*********************************************************************
2518  *
2519  *  Setup the MSIX Interrupt handlers
2520  *   This is not really Multiqueue, rather
2521  *   its just seperate interrupt vectors
2522  *   for TX, RX, and Link.
2523  *
2524  **********************************************************************/
2525 int
2526 em_allocate_msix(struct adapter *adapter)
2527 {
2528 	device_t	dev = adapter->dev;
2529 	struct		tx_ring *txr = adapter->tx_rings;
2530 	struct		rx_ring *rxr = adapter->rx_rings;
2531 	int		error, rid, vector = 0;
2532 
2533 
2534 	/* Make sure all interrupts are disabled */
2535 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2536 
2537 	/* First set up ring resources */
2538 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2539 
2540 		/* RX ring */
2541 		rid = vector + 1;
2542 
2543 		rxr->res = bus_alloc_resource_any(dev,
2544 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2545 		if (rxr->res == NULL) {
2546 			device_printf(dev,
2547 			    "Unable to allocate bus resource: "
2548 			    "RX MSIX Interrupt %d\n", i);
2549 			return (ENXIO);
2550 		}
2551 		if ((error = bus_setup_intr(dev, rxr->res,
2552 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2553 		    rxr, &rxr->tag)) != 0) {
2554 			device_printf(dev, "Failed to register RX handler");
2555 			return (error);
2556 		}
2557 #if __FreeBSD_version >= 800504
2558 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2559 #endif
2560 		rxr->msix = vector++; /* NOTE increment vector for TX */
2561 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2562 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2563 		    taskqueue_thread_enqueue, &rxr->tq);
2564 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2565 		    device_get_nameunit(adapter->dev));
2566 		/*
2567 		** Set the bit to enable interrupt
2568 		** in E1000_IMS -- bits 20 and 21
2569 		** are for RX0 and RX1, note this has
2570 		** NOTHING to do with the MSIX vector
2571 		*/
2572 		rxr->ims = 1 << (20 + i);
2573 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2574 
2575 		/* TX ring */
2576 		rid = vector + 1;
2577 		txr->res = bus_alloc_resource_any(dev,
2578 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2579 		if (txr->res == NULL) {
2580 			device_printf(dev,
2581 			    "Unable to allocate bus resource: "
2582 			    "TX MSIX Interrupt %d\n", i);
2583 			return (ENXIO);
2584 		}
2585 		if ((error = bus_setup_intr(dev, txr->res,
2586 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2587 		    txr, &txr->tag)) != 0) {
2588 			device_printf(dev, "Failed to register TX handler");
2589 			return (error);
2590 		}
2591 #if __FreeBSD_version >= 800504
2592 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2593 #endif
2594 		txr->msix = vector++; /* Increment vector for next pass */
2595 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2596 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2597 		    taskqueue_thread_enqueue, &txr->tq);
2598 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2599 		    device_get_nameunit(adapter->dev));
2600 		/*
2601 		** Set the bit to enable interrupt
2602 		** in E1000_IMS -- bits 22 and 23
2603 		** are for TX0 and TX1, note this has
2604 		** NOTHING to do with the MSIX vector
2605 		*/
2606 		txr->ims = 1 << (22 + i);
2607 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2608 	}
2609 
2610 	/* Link interrupt */
2611 	++rid;
2612 	adapter->res = bus_alloc_resource_any(dev,
2613 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2614 	if (!adapter->res) {
2615 		device_printf(dev,"Unable to allocate "
2616 		    "bus resource: Link interrupt [%d]\n", rid);
2617 		return (ENXIO);
2618         }
2619 	/* Set the link handler function */
2620 	error = bus_setup_intr(dev, adapter->res,
2621 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2622 	    em_msix_link, adapter, &adapter->tag);
2623 	if (error) {
2624 		adapter->res = NULL;
2625 		device_printf(dev, "Failed to register LINK handler");
2626 		return (error);
2627 	}
2628 #if __FreeBSD_version >= 800504
2629 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2630 #endif
2631 	adapter->linkvec = vector;
2632 	adapter->ivars |=  (8 | vector) << 16;
2633 	adapter->ivars |= 0x80000000;
2634 
2635 	return (0);
2636 }
2637 
2638 
2639 static void
2640 em_free_pci_resources(struct adapter *adapter)
2641 {
2642 	device_t	dev = adapter->dev;
2643 	struct tx_ring	*txr;
2644 	struct rx_ring	*rxr;
2645 	int		rid;
2646 
2647 
2648 	/*
2649 	** Release all the queue interrupt resources:
2650 	*/
2651 	for (int i = 0; i < adapter->num_queues; i++) {
2652 		txr = &adapter->tx_rings[i];
2653 		rxr = &adapter->rx_rings[i];
2654 		/* an early abort? */
2655 		if ((txr == NULL) || (rxr == NULL))
2656 			break;
2657 		rid = txr->msix +1;
2658 		if (txr->tag != NULL) {
2659 			bus_teardown_intr(dev, txr->res, txr->tag);
2660 			txr->tag = NULL;
2661 		}
2662 		if (txr->res != NULL)
2663 			bus_release_resource(dev, SYS_RES_IRQ,
2664 			    rid, txr->res);
2665 		rid = rxr->msix +1;
2666 		if (rxr->tag != NULL) {
2667 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2668 			rxr->tag = NULL;
2669 		}
2670 		if (rxr->res != NULL)
2671 			bus_release_resource(dev, SYS_RES_IRQ,
2672 			    rid, rxr->res);
2673 	}
2674 
2675         if (adapter->linkvec) /* we are doing MSIX */
2676                 rid = adapter->linkvec + 1;
2677         else
2678                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2679 
2680 	if (adapter->tag != NULL) {
2681 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2682 		adapter->tag = NULL;
2683 	}
2684 
2685 	if (adapter->res != NULL)
2686 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2687 
2688 
2689 	if (adapter->msix)
2690 		pci_release_msi(dev);
2691 
2692 	if (adapter->msix_mem != NULL)
2693 		bus_release_resource(dev, SYS_RES_MEMORY,
2694 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2695 
2696 	if (adapter->memory != NULL)
2697 		bus_release_resource(dev, SYS_RES_MEMORY,
2698 		    PCIR_BAR(0), adapter->memory);
2699 
2700 	if (adapter->flash != NULL)
2701 		bus_release_resource(dev, SYS_RES_MEMORY,
2702 		    EM_FLASH, adapter->flash);
2703 }
2704 
2705 /*
2706  * Setup MSI or MSI/X
2707  */
2708 static int
2709 em_setup_msix(struct adapter *adapter)
2710 {
2711 	device_t dev = adapter->dev;
2712 	int val = 0;
2713 
2714 	/*
2715 	** Setup MSI/X for Hartwell: tests have shown
2716 	** use of two queues to be unstable, and to
2717 	** provide no great gain anyway, so we simply
2718 	** seperate the interrupts and use a single queue.
2719 	*/
2720 	if ((adapter->hw.mac.type == e1000_82574) &&
2721 	    (em_enable_msix == TRUE)) {
2722 		/* Map the MSIX BAR */
2723 		int rid = PCIR_BAR(EM_MSIX_BAR);
2724 		adapter->msix_mem = bus_alloc_resource_any(dev,
2725 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2726        		if (!adapter->msix_mem) {
2727 			/* May not be enabled */
2728                		device_printf(adapter->dev,
2729 			    "Unable to map MSIX table \n");
2730 			goto msi;
2731        		}
2732 		val = pci_msix_count(dev);
2733 		/* We only need 3 vectors */
2734 		if (val > 3)
2735 			val = 3;
2736 		if ((val != 3) && (val != 5)) {
2737 			bus_release_resource(dev, SYS_RES_MEMORY,
2738 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2739 			adapter->msix_mem = NULL;
2740                		device_printf(adapter->dev,
2741 			    "MSIX: incorrect vectors, using MSI\n");
2742 			goto msi;
2743 		}
2744 
2745 		if (pci_alloc_msix(dev, &val) == 0) {
2746 			device_printf(adapter->dev,
2747 			    "Using MSIX interrupts "
2748 			    "with %d vectors\n", val);
2749 		}
2750 
2751 		return (val);
2752 	}
2753 msi:
2754        	val = pci_msi_count(dev);
2755        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2756                	adapter->msix = 1;
2757                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2758 		return (val);
2759 	}
2760 	/* Should only happen due to manual configuration */
2761 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2762 	return (0);
2763 }
2764 
2765 
2766 /*********************************************************************
2767  *
2768  *  Initialize the hardware to a configuration
2769  *  as specified by the adapter structure.
2770  *
2771  **********************************************************************/
2772 static void
2773 em_reset(struct adapter *adapter)
2774 {
2775 	device_t	dev = adapter->dev;
2776 	struct ifnet	*ifp = adapter->ifp;
2777 	struct e1000_hw	*hw = &adapter->hw;
2778 	u16		rx_buffer_size;
2779 	u32		pba;
2780 
2781 	INIT_DEBUGOUT("em_reset: begin");
2782 
2783 	/* Set up smart power down as default off on newer adapters. */
2784 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2785 	    hw->mac.type == e1000_82572)) {
2786 		u16 phy_tmp = 0;
2787 
2788 		/* Speed up time to link by disabling smart power down. */
2789 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2790 		phy_tmp &= ~IGP02E1000_PM_SPD;
2791 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2792 	}
2793 
2794 	/*
2795 	 * Packet Buffer Allocation (PBA)
2796 	 * Writing PBA sets the receive portion of the buffer
2797 	 * the remainder is used for the transmit buffer.
2798 	 */
2799 	switch (hw->mac.type) {
2800 	/* Total Packet Buffer on these is 48K */
2801 	case e1000_82571:
2802 	case e1000_82572:
2803 	case e1000_80003es2lan:
2804 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2805 		break;
2806 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2807 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2808 		break;
2809 	case e1000_82574:
2810 	case e1000_82583:
2811 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2812 		break;
2813 	case e1000_ich8lan:
2814 		pba = E1000_PBA_8K;
2815 		break;
2816 	case e1000_ich9lan:
2817 	case e1000_ich10lan:
2818 		/* Boost Receive side for jumbo frames */
2819 		if (adapter->max_frame_size > 4096)
2820 			pba = E1000_PBA_14K;
2821 		else
2822 			pba = E1000_PBA_10K;
2823 		break;
2824 	case e1000_pchlan:
2825 	case e1000_pch2lan:
2826 		pba = E1000_PBA_26K;
2827 		break;
2828 	default:
2829 		if (adapter->max_frame_size > 8192)
2830 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2831 		else
2832 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2833 	}
2834 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2835 
2836 	/*
2837 	 * These parameters control the automatic generation (Tx) and
2838 	 * response (Rx) to Ethernet PAUSE frames.
2839 	 * - High water mark should allow for at least two frames to be
2840 	 *   received after sending an XOFF.
2841 	 * - Low water mark works best when it is very near the high water mark.
2842 	 *   This allows the receiver to restart by sending XON when it has
2843 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2844 	 *   restart after one full frame is pulled from the buffer. There
2845 	 *   could be several smaller frames in the buffer and if so they will
2846 	 *   not trigger the XON until their total number reduces the buffer
2847 	 *   by 1500.
2848 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2849 	 */
2850 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2851 	hw->fc.high_water = rx_buffer_size -
2852 	    roundup2(adapter->max_frame_size, 1024);
2853 	hw->fc.low_water = hw->fc.high_water - 1500;
2854 
2855 	if (adapter->fc) /* locally set flow control value? */
2856 		hw->fc.requested_mode = adapter->fc;
2857 	else
2858 		hw->fc.requested_mode = e1000_fc_full;
2859 
2860 	if (hw->mac.type == e1000_80003es2lan)
2861 		hw->fc.pause_time = 0xFFFF;
2862 	else
2863 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2864 
2865 	hw->fc.send_xon = TRUE;
2866 
2867 	/* Device specific overrides/settings */
2868 	switch (hw->mac.type) {
2869 	case e1000_pchlan:
2870 		/* Workaround: no TX flow ctrl for PCH */
2871                 hw->fc.requested_mode = e1000_fc_rx_pause;
2872 		hw->fc.pause_time = 0xFFFF; /* override */
2873 		if (ifp->if_mtu > ETHERMTU) {
2874 			hw->fc.high_water = 0x3500;
2875 			hw->fc.low_water = 0x1500;
2876 		} else {
2877 			hw->fc.high_water = 0x5000;
2878 			hw->fc.low_water = 0x3000;
2879 		}
2880 		hw->fc.refresh_time = 0x1000;
2881 		break;
2882 	case e1000_pch2lan:
2883 		hw->fc.high_water = 0x5C20;
2884 		hw->fc.low_water = 0x5048;
2885 		hw->fc.pause_time = 0x0650;
2886 		hw->fc.refresh_time = 0x0400;
2887 		/* Jumbos need adjusted PBA */
2888 		if (ifp->if_mtu > ETHERMTU)
2889 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2890 		else
2891 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2892 		break;
2893         case e1000_ich9lan:
2894         case e1000_ich10lan:
2895 		if (ifp->if_mtu > ETHERMTU) {
2896 			hw->fc.high_water = 0x2800;
2897 			hw->fc.low_water = hw->fc.high_water - 8;
2898 			break;
2899 		}
2900 		/* else fall thru */
2901 	default:
2902 		if (hw->mac.type == e1000_80003es2lan)
2903 			hw->fc.pause_time = 0xFFFF;
2904 		break;
2905 	}
2906 
2907 	/* Issue a global reset */
2908 	e1000_reset_hw(hw);
2909 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2910 	em_disable_aspm(adapter);
2911 	/* and a re-init */
2912 	if (e1000_init_hw(hw) < 0) {
2913 		device_printf(dev, "Hardware Initialization Failed\n");
2914 		return;
2915 	}
2916 
2917 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2918 	e1000_get_phy_info(hw);
2919 	e1000_check_for_link(hw);
2920 	return;
2921 }
2922 
2923 /*********************************************************************
2924  *
2925  *  Setup networking device structure and register an interface.
2926  *
2927  **********************************************************************/
2928 static int
2929 em_setup_interface(device_t dev, struct adapter *adapter)
2930 {
2931 	struct ifnet   *ifp;
2932 
2933 	INIT_DEBUGOUT("em_setup_interface: begin");
2934 
2935 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2936 	if (ifp == NULL) {
2937 		device_printf(dev, "can not allocate ifnet structure\n");
2938 		return (-1);
2939 	}
2940 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2941 	ifp->if_init =  em_init;
2942 	ifp->if_softc = adapter;
2943 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2944 	ifp->if_ioctl = em_ioctl;
2945 #ifdef EM_MULTIQUEUE
2946 	/* Multiqueue stack interface */
2947 	ifp->if_transmit = em_mq_start;
2948 	ifp->if_qflush = em_qflush;
2949 #else
2950 	ifp->if_start = em_start;
2951 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2952 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2953 	IFQ_SET_READY(&ifp->if_snd);
2954 #endif
2955 
2956 	ether_ifattach(ifp, adapter->hw.mac.addr);
2957 
2958 	ifp->if_capabilities = ifp->if_capenable = 0;
2959 
2960 
2961 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2962 	ifp->if_capabilities |= IFCAP_TSO4;
2963 	/*
2964 	 * Tell the upper layer(s) we
2965 	 * support full VLAN capability
2966 	 */
2967 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2968 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2969 			     |  IFCAP_VLAN_HWTSO
2970 			     |  IFCAP_VLAN_MTU;
2971 	ifp->if_capenable = ifp->if_capabilities;
2972 
2973 	/*
2974 	** Don't turn this on by default, if vlans are
2975 	** created on another pseudo device (eg. lagg)
2976 	** then vlan events are not passed thru, breaking
2977 	** operation, but with HW FILTER off it works. If
2978 	** using vlans directly on the em driver you can
2979 	** enable this and get full hardware tag filtering.
2980 	*/
2981 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2982 
2983 #ifdef DEVICE_POLLING
2984 	ifp->if_capabilities |= IFCAP_POLLING;
2985 #endif
2986 
2987 	/* Enable only WOL MAGIC by default */
2988 	if (adapter->wol) {
2989 		ifp->if_capabilities |= IFCAP_WOL;
2990 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2991 	}
2992 
2993 	/*
2994 	 * Specify the media types supported by this adapter and register
2995 	 * callbacks to update media and link information
2996 	 */
2997 	ifmedia_init(&adapter->media, IFM_IMASK,
2998 	    em_media_change, em_media_status);
2999 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3000 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3001 		u_char fiber_type = IFM_1000_SX;	/* default type */
3002 
3003 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3004 			    0, NULL);
3005 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3006 	} else {
3007 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3008 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3009 			    0, NULL);
3010 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3011 			    0, NULL);
3012 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3013 			    0, NULL);
3014 		if (adapter->hw.phy.type != e1000_phy_ife) {
3015 			ifmedia_add(&adapter->media,
3016 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3017 			ifmedia_add(&adapter->media,
3018 				IFM_ETHER | IFM_1000_T, 0, NULL);
3019 		}
3020 	}
3021 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3022 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3023 	return (0);
3024 }
3025 
3026 
3027 /*
3028  * Manage DMA'able memory.
3029  */
3030 static void
3031 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3032 {
3033 	if (error)
3034 		return;
3035 	*(bus_addr_t *) arg = segs[0].ds_addr;
3036 }
3037 
3038 static int
3039 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3040         struct em_dma_alloc *dma, int mapflags)
3041 {
3042 	int error;
3043 
3044 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3045 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3046 				BUS_SPACE_MAXADDR,	/* lowaddr */
3047 				BUS_SPACE_MAXADDR,	/* highaddr */
3048 				NULL, NULL,		/* filter, filterarg */
3049 				size,			/* maxsize */
3050 				1,			/* nsegments */
3051 				size,			/* maxsegsize */
3052 				0,			/* flags */
3053 				NULL,			/* lockfunc */
3054 				NULL,			/* lockarg */
3055 				&dma->dma_tag);
3056 	if (error) {
3057 		device_printf(adapter->dev,
3058 		    "%s: bus_dma_tag_create failed: %d\n",
3059 		    __func__, error);
3060 		goto fail_0;
3061 	}
3062 
3063 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3064 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3065 	if (error) {
3066 		device_printf(adapter->dev,
3067 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3068 		    __func__, (uintmax_t)size, error);
3069 		goto fail_2;
3070 	}
3071 
3072 	dma->dma_paddr = 0;
3073 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3074 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3075 	if (error || dma->dma_paddr == 0) {
3076 		device_printf(adapter->dev,
3077 		    "%s: bus_dmamap_load failed: %d\n",
3078 		    __func__, error);
3079 		goto fail_3;
3080 	}
3081 
3082 	return (0);
3083 
3084 fail_3:
3085 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3086 fail_2:
3087 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3088 	bus_dma_tag_destroy(dma->dma_tag);
3089 fail_0:
3090 	dma->dma_map = NULL;
3091 	dma->dma_tag = NULL;
3092 
3093 	return (error);
3094 }
3095 
3096 static void
3097 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3098 {
3099 	if (dma->dma_tag == NULL)
3100 		return;
3101 	if (dma->dma_map != NULL) {
3102 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3103 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3104 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3105 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3106 		dma->dma_map = NULL;
3107 	}
3108 	bus_dma_tag_destroy(dma->dma_tag);
3109 	dma->dma_tag = NULL;
3110 }
3111 
3112 
3113 /*********************************************************************
3114  *
3115  *  Allocate memory for the transmit and receive rings, and then
3116  *  the descriptors associated with each, called only once at attach.
3117  *
3118  **********************************************************************/
3119 static int
3120 em_allocate_queues(struct adapter *adapter)
3121 {
3122 	device_t		dev = adapter->dev;
3123 	struct tx_ring		*txr = NULL;
3124 	struct rx_ring		*rxr = NULL;
3125 	int rsize, tsize, error = E1000_SUCCESS;
3126 	int txconf = 0, rxconf = 0;
3127 
3128 
3129 	/* Allocate the TX ring struct memory */
3130 	if (!(adapter->tx_rings =
3131 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3132 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3133 		device_printf(dev, "Unable to allocate TX ring memory\n");
3134 		error = ENOMEM;
3135 		goto fail;
3136 	}
3137 
3138 	/* Now allocate the RX */
3139 	if (!(adapter->rx_rings =
3140 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3141 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3142 		device_printf(dev, "Unable to allocate RX ring memory\n");
3143 		error = ENOMEM;
3144 		goto rx_fail;
3145 	}
3146 
3147 	tsize = roundup2(adapter->num_tx_desc *
3148 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3149 	/*
3150 	 * Now set up the TX queues, txconf is needed to handle the
3151 	 * possibility that things fail midcourse and we need to
3152 	 * undo memory gracefully
3153 	 */
3154 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3155 		/* Set up some basics */
3156 		txr = &adapter->tx_rings[i];
3157 		txr->adapter = adapter;
3158 		txr->me = i;
3159 
3160 		/* Initialize the TX lock */
3161 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3162 		    device_get_nameunit(dev), txr->me);
3163 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3164 
3165 		if (em_dma_malloc(adapter, tsize,
3166 			&txr->txdma, BUS_DMA_NOWAIT)) {
3167 			device_printf(dev,
3168 			    "Unable to allocate TX Descriptor memory\n");
3169 			error = ENOMEM;
3170 			goto err_tx_desc;
3171 		}
3172 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3173 		bzero((void *)txr->tx_base, tsize);
3174 
3175         	if (em_allocate_transmit_buffers(txr)) {
3176 			device_printf(dev,
3177 			    "Critical Failure setting up transmit buffers\n");
3178 			error = ENOMEM;
3179 			goto err_tx_desc;
3180         	}
3181 #if __FreeBSD_version >= 800000
3182 		/* Allocate a buf ring */
3183 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3184 		    M_WAITOK, &txr->tx_mtx);
3185 #endif
3186 	}
3187 
3188 	/*
3189 	 * Next the RX queues...
3190 	 */
3191 	rsize = roundup2(adapter->num_rx_desc *
3192 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3193 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3194 		rxr = &adapter->rx_rings[i];
3195 		rxr->adapter = adapter;
3196 		rxr->me = i;
3197 
3198 		/* Initialize the RX lock */
3199 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3200 		    device_get_nameunit(dev), txr->me);
3201 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3202 
3203 		if (em_dma_malloc(adapter, rsize,
3204 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3205 			device_printf(dev,
3206 			    "Unable to allocate RxDescriptor memory\n");
3207 			error = ENOMEM;
3208 			goto err_rx_desc;
3209 		}
3210 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3211 		bzero((void *)rxr->rx_base, rsize);
3212 
3213         	/* Allocate receive buffers for the ring*/
3214 		if (em_allocate_receive_buffers(rxr)) {
3215 			device_printf(dev,
3216 			    "Critical Failure setting up receive buffers\n");
3217 			error = ENOMEM;
3218 			goto err_rx_desc;
3219 		}
3220 	}
3221 
3222 	return (0);
3223 
3224 err_rx_desc:
3225 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3226 		em_dma_free(adapter, &rxr->rxdma);
3227 err_tx_desc:
3228 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3229 		em_dma_free(adapter, &txr->txdma);
3230 	free(adapter->rx_rings, M_DEVBUF);
3231 rx_fail:
3232 #if __FreeBSD_version >= 800000
3233 	buf_ring_free(txr->br, M_DEVBUF);
3234 #endif
3235 	free(adapter->tx_rings, M_DEVBUF);
3236 fail:
3237 	return (error);
3238 }
3239 
3240 
3241 /*********************************************************************
3242  *
3243  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3244  *  the information needed to transmit a packet on the wire. This is
3245  *  called only once at attach, setup is done every reset.
3246  *
3247  **********************************************************************/
3248 static int
3249 em_allocate_transmit_buffers(struct tx_ring *txr)
3250 {
3251 	struct adapter *adapter = txr->adapter;
3252 	device_t dev = adapter->dev;
3253 	struct em_buffer *txbuf;
3254 	int error, i;
3255 
3256 	/*
3257 	 * Setup DMA descriptor areas.
3258 	 */
3259 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3260 			       1, 0,			/* alignment, bounds */
3261 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3262 			       BUS_SPACE_MAXADDR,	/* highaddr */
3263 			       NULL, NULL,		/* filter, filterarg */
3264 			       EM_TSO_SIZE,		/* maxsize */
3265 			       EM_MAX_SCATTER,		/* nsegments */
3266 			       PAGE_SIZE,		/* maxsegsize */
3267 			       0,			/* flags */
3268 			       NULL,			/* lockfunc */
3269 			       NULL,			/* lockfuncarg */
3270 			       &txr->txtag))) {
3271 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3272 		goto fail;
3273 	}
3274 
3275 	if (!(txr->tx_buffers =
3276 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3277 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3278 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3279 		error = ENOMEM;
3280 		goto fail;
3281 	}
3282 
3283         /* Create the descriptor buffer dma maps */
3284 	txbuf = txr->tx_buffers;
3285 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3286 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3287 		if (error != 0) {
3288 			device_printf(dev, "Unable to create TX DMA map\n");
3289 			goto fail;
3290 		}
3291 	}
3292 
3293 	return 0;
3294 fail:
3295 	/* We free all, it handles case where we are in the middle */
3296 	em_free_transmit_structures(adapter);
3297 	return (error);
3298 }
3299 
3300 /*********************************************************************
3301  *
3302  *  Initialize a transmit ring.
3303  *
3304  **********************************************************************/
3305 static void
3306 em_setup_transmit_ring(struct tx_ring *txr)
3307 {
3308 	struct adapter *adapter = txr->adapter;
3309 	struct em_buffer *txbuf;
3310 	int i;
3311 #ifdef DEV_NETMAP
3312 	struct netmap_adapter *na = NA(adapter->ifp);
3313 	struct netmap_slot *slot;
3314 #endif /* DEV_NETMAP */
3315 
3316 	/* Clear the old descriptor contents */
3317 	EM_TX_LOCK(txr);
3318 #ifdef DEV_NETMAP
3319 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3320 #endif /* DEV_NETMAP */
3321 
3322 	bzero((void *)txr->tx_base,
3323 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3324 	/* Reset indices */
3325 	txr->next_avail_desc = 0;
3326 	txr->next_to_clean = 0;
3327 
3328 	/* Free any existing tx buffers. */
3329         txbuf = txr->tx_buffers;
3330 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3331 		if (txbuf->m_head != NULL) {
3332 			bus_dmamap_sync(txr->txtag, txbuf->map,
3333 			    BUS_DMASYNC_POSTWRITE);
3334 			bus_dmamap_unload(txr->txtag, txbuf->map);
3335 			m_freem(txbuf->m_head);
3336 			txbuf->m_head = NULL;
3337 		}
3338 #ifdef DEV_NETMAP
3339 		if (slot) {
3340 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3341 			uint64_t paddr;
3342 			void *addr;
3343 
3344 			addr = PNMB(slot + si, &paddr);
3345 			txr->tx_base[i].buffer_addr = htole64(paddr);
3346 			/* reload the map for netmap mode */
3347 			netmap_load_map(txr->txtag, txbuf->map, addr);
3348 		}
3349 #endif /* DEV_NETMAP */
3350 
3351 		/* clear the watch index */
3352 		txbuf->next_eop = -1;
3353         }
3354 
3355 	/* Set number of descriptors available */
3356 	txr->tx_avail = adapter->num_tx_desc;
3357 	txr->queue_status = EM_QUEUE_IDLE;
3358 
3359 	/* Clear checksum offload context. */
3360 	txr->last_hw_offload = 0;
3361 	txr->last_hw_ipcss = 0;
3362 	txr->last_hw_ipcso = 0;
3363 	txr->last_hw_tucss = 0;
3364 	txr->last_hw_tucso = 0;
3365 
3366 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3367 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3368 	EM_TX_UNLOCK(txr);
3369 }
3370 
3371 /*********************************************************************
3372  *
3373  *  Initialize all transmit rings.
3374  *
3375  **********************************************************************/
3376 static void
3377 em_setup_transmit_structures(struct adapter *adapter)
3378 {
3379 	struct tx_ring *txr = adapter->tx_rings;
3380 
3381 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3382 		em_setup_transmit_ring(txr);
3383 
3384 	return;
3385 }
3386 
3387 /*********************************************************************
3388  *
3389  *  Enable transmit unit.
3390  *
3391  **********************************************************************/
3392 static void
3393 em_initialize_transmit_unit(struct adapter *adapter)
3394 {
3395 	struct tx_ring	*txr = adapter->tx_rings;
3396 	struct e1000_hw	*hw = &adapter->hw;
3397 	u32	tctl, tarc, tipg = 0;
3398 
3399 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3400 
3401 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3402 		u64 bus_addr = txr->txdma.dma_paddr;
3403 		/* Base and Len of TX Ring */
3404 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3405 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3406 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3407 	    	    (u32)(bus_addr >> 32));
3408 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3409 	    	    (u32)bus_addr);
3410 		/* Init the HEAD/TAIL indices */
3411 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3412 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3413 
3414 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3415 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3416 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3417 
3418 		txr->queue_status = EM_QUEUE_IDLE;
3419 	}
3420 
3421 	/* Set the default values for the Tx Inter Packet Gap timer */
3422 	switch (adapter->hw.mac.type) {
3423 	case e1000_80003es2lan:
3424 		tipg = DEFAULT_82543_TIPG_IPGR1;
3425 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3426 		    E1000_TIPG_IPGR2_SHIFT;
3427 		break;
3428 	default:
3429 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3430 		    (adapter->hw.phy.media_type ==
3431 		    e1000_media_type_internal_serdes))
3432 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3433 		else
3434 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3435 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3436 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3437 	}
3438 
3439 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3440 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3441 
3442 	if(adapter->hw.mac.type >= e1000_82540)
3443 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3444 		    adapter->tx_abs_int_delay.value);
3445 
3446 	if ((adapter->hw.mac.type == e1000_82571) ||
3447 	    (adapter->hw.mac.type == e1000_82572)) {
3448 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3449 		tarc |= SPEED_MODE_BIT;
3450 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3451 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3452 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3453 		tarc |= 1;
3454 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3455 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3456 		tarc |= 1;
3457 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3458 	}
3459 
3460 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3461 	if (adapter->tx_int_delay.value > 0)
3462 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3463 
3464 	/* Program the Transmit Control Register */
3465 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3466 	tctl &= ~E1000_TCTL_CT;
3467 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3468 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3469 
3470 	if (adapter->hw.mac.type >= e1000_82571)
3471 		tctl |= E1000_TCTL_MULR;
3472 
3473 	/* This write will effectively turn on the transmit unit. */
3474 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3475 
3476 }
3477 
3478 
3479 /*********************************************************************
3480  *
3481  *  Free all transmit rings.
3482  *
3483  **********************************************************************/
3484 static void
3485 em_free_transmit_structures(struct adapter *adapter)
3486 {
3487 	struct tx_ring *txr = adapter->tx_rings;
3488 
3489 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3490 		EM_TX_LOCK(txr);
3491 		em_free_transmit_buffers(txr);
3492 		em_dma_free(adapter, &txr->txdma);
3493 		EM_TX_UNLOCK(txr);
3494 		EM_TX_LOCK_DESTROY(txr);
3495 	}
3496 
3497 	free(adapter->tx_rings, M_DEVBUF);
3498 }
3499 
3500 /*********************************************************************
3501  *
3502  *  Free transmit ring related data structures.
3503  *
3504  **********************************************************************/
3505 static void
3506 em_free_transmit_buffers(struct tx_ring *txr)
3507 {
3508 	struct adapter		*adapter = txr->adapter;
3509 	struct em_buffer	*txbuf;
3510 
3511 	INIT_DEBUGOUT("free_transmit_ring: begin");
3512 
3513 	if (txr->tx_buffers == NULL)
3514 		return;
3515 
3516 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3517 		txbuf = &txr->tx_buffers[i];
3518 		if (txbuf->m_head != NULL) {
3519 			bus_dmamap_sync(txr->txtag, txbuf->map,
3520 			    BUS_DMASYNC_POSTWRITE);
3521 			bus_dmamap_unload(txr->txtag,
3522 			    txbuf->map);
3523 			m_freem(txbuf->m_head);
3524 			txbuf->m_head = NULL;
3525 			if (txbuf->map != NULL) {
3526 				bus_dmamap_destroy(txr->txtag,
3527 				    txbuf->map);
3528 				txbuf->map = NULL;
3529 			}
3530 		} else if (txbuf->map != NULL) {
3531 			bus_dmamap_unload(txr->txtag,
3532 			    txbuf->map);
3533 			bus_dmamap_destroy(txr->txtag,
3534 			    txbuf->map);
3535 			txbuf->map = NULL;
3536 		}
3537 	}
3538 #if __FreeBSD_version >= 800000
3539 	if (txr->br != NULL)
3540 		buf_ring_free(txr->br, M_DEVBUF);
3541 #endif
3542 	if (txr->tx_buffers != NULL) {
3543 		free(txr->tx_buffers, M_DEVBUF);
3544 		txr->tx_buffers = NULL;
3545 	}
3546 	if (txr->txtag != NULL) {
3547 		bus_dma_tag_destroy(txr->txtag);
3548 		txr->txtag = NULL;
3549 	}
3550 	return;
3551 }
3552 
3553 
3554 /*********************************************************************
3555  *  The offload context is protocol specific (TCP/UDP) and thus
3556  *  only needs to be set when the protocol changes. The occasion
3557  *  of a context change can be a performance detriment, and
3558  *  might be better just disabled. The reason arises in the way
3559  *  in which the controller supports pipelined requests from the
3560  *  Tx data DMA. Up to four requests can be pipelined, and they may
3561  *  belong to the same packet or to multiple packets. However all
3562  *  requests for one packet are issued before a request is issued
3563  *  for a subsequent packet and if a request for the next packet
3564  *  requires a context change, that request will be stalled
3565  *  until the previous request completes. This means setting up
3566  *  a new context effectively disables pipelined Tx data DMA which
3567  *  in turn greatly slow down performance to send small sized
3568  *  frames.
3569  **********************************************************************/
3570 static void
3571 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3572     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3573 {
3574 	struct adapter			*adapter = txr->adapter;
3575 	struct e1000_context_desc	*TXD = NULL;
3576 	struct em_buffer		*tx_buffer;
3577 	int				cur, hdr_len;
3578 	u32				cmd = 0;
3579 	u16				offload = 0;
3580 	u8				ipcso, ipcss, tucso, tucss;
3581 
3582 	ipcss = ipcso = tucss = tucso = 0;
3583 	hdr_len = ip_off + (ip->ip_hl << 2);
3584 	cur = txr->next_avail_desc;
3585 
3586 	/* Setup of IP header checksum. */
3587 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3588 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3589 		offload |= CSUM_IP;
3590 		ipcss = ip_off;
3591 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3592 		/*
3593 		 * Start offset for header checksum calculation.
3594 		 * End offset for header checksum calculation.
3595 		 * Offset of place to put the checksum.
3596 		 */
3597 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3598 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3599 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3600 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3601 		cmd |= E1000_TXD_CMD_IP;
3602 	}
3603 
3604 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3605  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3606  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3607  		offload |= CSUM_TCP;
3608  		tucss = hdr_len;
3609  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3610  		/*
3611  		 * Setting up new checksum offload context for every frames
3612  		 * takes a lot of processing time for hardware. This also
3613  		 * reduces performance a lot for small sized frames so avoid
3614  		 * it if driver can use previously configured checksum
3615  		 * offload context.
3616  		 */
3617  		if (txr->last_hw_offload == offload) {
3618  			if (offload & CSUM_IP) {
3619  				if (txr->last_hw_ipcss == ipcss &&
3620  				    txr->last_hw_ipcso == ipcso &&
3621  				    txr->last_hw_tucss == tucss &&
3622  				    txr->last_hw_tucso == tucso)
3623  					return;
3624  			} else {
3625  				if (txr->last_hw_tucss == tucss &&
3626  				    txr->last_hw_tucso == tucso)
3627  					return;
3628  			}
3629   		}
3630  		txr->last_hw_offload = offload;
3631  		txr->last_hw_tucss = tucss;
3632  		txr->last_hw_tucso = tucso;
3633  		/*
3634  		 * Start offset for payload checksum calculation.
3635  		 * End offset for payload checksum calculation.
3636  		 * Offset of place to put the checksum.
3637  		 */
3638 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3639  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3640  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3641  		TXD->upper_setup.tcp_fields.tucso = tucso;
3642  		cmd |= E1000_TXD_CMD_TCP;
3643  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3644  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3645  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3646  		tucss = hdr_len;
3647  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3648  		/*
3649  		 * Setting up new checksum offload context for every frames
3650  		 * takes a lot of processing time for hardware. This also
3651  		 * reduces performance a lot for small sized frames so avoid
3652  		 * it if driver can use previously configured checksum
3653  		 * offload context.
3654  		 */
3655  		if (txr->last_hw_offload == offload) {
3656  			if (offload & CSUM_IP) {
3657  				if (txr->last_hw_ipcss == ipcss &&
3658  				    txr->last_hw_ipcso == ipcso &&
3659  				    txr->last_hw_tucss == tucss &&
3660  				    txr->last_hw_tucso == tucso)
3661  					return;
3662  			} else {
3663  				if (txr->last_hw_tucss == tucss &&
3664  				    txr->last_hw_tucso == tucso)
3665  					return;
3666  			}
3667  		}
3668  		txr->last_hw_offload = offload;
3669  		txr->last_hw_tucss = tucss;
3670  		txr->last_hw_tucso = tucso;
3671  		/*
3672  		 * Start offset for header checksum calculation.
3673  		 * End offset for header checksum calculation.
3674  		 * Offset of place to put the checksum.
3675  		 */
3676 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3677  		TXD->upper_setup.tcp_fields.tucss = tucss;
3678  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3679  		TXD->upper_setup.tcp_fields.tucso = tucso;
3680   	}
3681 
3682  	if (offload & CSUM_IP) {
3683  		txr->last_hw_ipcss = ipcss;
3684  		txr->last_hw_ipcso = ipcso;
3685   	}
3686 
3687 	TXD->tcp_seg_setup.data = htole32(0);
3688 	TXD->cmd_and_length =
3689 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3690 	tx_buffer = &txr->tx_buffers[cur];
3691 	tx_buffer->m_head = NULL;
3692 	tx_buffer->next_eop = -1;
3693 
3694 	if (++cur == adapter->num_tx_desc)
3695 		cur = 0;
3696 
3697 	txr->tx_avail--;
3698 	txr->next_avail_desc = cur;
3699 }
3700 
3701 
3702 /**********************************************************************
3703  *
3704  *  Setup work for hardware segmentation offload (TSO)
3705  *
3706  **********************************************************************/
3707 static void
3708 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3709     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3710 {
3711 	struct adapter			*adapter = txr->adapter;
3712 	struct e1000_context_desc	*TXD;
3713 	struct em_buffer		*tx_buffer;
3714 	int cur, hdr_len;
3715 
3716 	/*
3717 	 * In theory we can use the same TSO context if and only if
3718 	 * frame is the same type(IP/TCP) and the same MSS. However
3719 	 * checking whether a frame has the same IP/TCP structure is
3720 	 * hard thing so just ignore that and always restablish a
3721 	 * new TSO context.
3722 	 */
3723 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3724 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3725 		      E1000_TXD_DTYP_D |	/* Data descr type */
3726 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3727 
3728 	/* IP and/or TCP header checksum calculation and insertion. */
3729 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3730 
3731 	cur = txr->next_avail_desc;
3732 	tx_buffer = &txr->tx_buffers[cur];
3733 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3734 
3735 	/*
3736 	 * Start offset for header checksum calculation.
3737 	 * End offset for header checksum calculation.
3738 	 * Offset of place put the checksum.
3739 	 */
3740 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3741 	TXD->lower_setup.ip_fields.ipcse =
3742 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3743 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3744 	/*
3745 	 * Start offset for payload checksum calculation.
3746 	 * End offset for payload checksum calculation.
3747 	 * Offset of place to put the checksum.
3748 	 */
3749 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3750 	TXD->upper_setup.tcp_fields.tucse = 0;
3751 	TXD->upper_setup.tcp_fields.tucso =
3752 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3753 	/*
3754 	 * Payload size per packet w/o any headers.
3755 	 * Length of all headers up to payload.
3756 	 */
3757 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3758 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3759 
3760 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3761 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3762 				E1000_TXD_CMD_TSE |	/* TSE context */
3763 				E1000_TXD_CMD_IP |	/* Do IP csum */
3764 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3765 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3766 
3767 	tx_buffer->m_head = NULL;
3768 	tx_buffer->next_eop = -1;
3769 
3770 	if (++cur == adapter->num_tx_desc)
3771 		cur = 0;
3772 
3773 	txr->tx_avail--;
3774 	txr->next_avail_desc = cur;
3775 	txr->tx_tso = TRUE;
3776 }
3777 
3778 
3779 /**********************************************************************
3780  *
3781  *  Examine each tx_buffer in the used queue. If the hardware is done
3782  *  processing the packet then free associated resources. The
3783  *  tx_buffer is put back on the free queue.
3784  *
3785  **********************************************************************/
3786 static void
3787 em_txeof(struct tx_ring *txr)
3788 {
3789 	struct adapter	*adapter = txr->adapter;
3790         int first, last, done, processed;
3791         struct em_buffer *tx_buffer;
3792         struct e1000_tx_desc   *tx_desc, *eop_desc;
3793 	struct ifnet   *ifp = adapter->ifp;
3794 
3795 	EM_TX_LOCK_ASSERT(txr);
3796 #ifdef DEV_NETMAP
3797 	if (ifp->if_capenable & IFCAP_NETMAP) {
3798 		struct netmap_adapter *na = NA(ifp);
3799 
3800 		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3801 		EM_TX_UNLOCK(txr);
3802 		EM_CORE_LOCK(adapter);
3803 		selwakeuppri(&na->tx_si, PI_NET);
3804 		EM_CORE_UNLOCK(adapter);
3805 		EM_TX_LOCK(txr);
3806 		return;
3807 	}
3808 #endif /* DEV_NETMAP */
3809 
3810 	/* No work, make sure watchdog is off */
3811         if (txr->tx_avail == adapter->num_tx_desc) {
3812 		txr->queue_status = EM_QUEUE_IDLE;
3813                 return;
3814 	}
3815 
3816 	processed = 0;
3817         first = txr->next_to_clean;
3818         tx_desc = &txr->tx_base[first];
3819         tx_buffer = &txr->tx_buffers[first];
3820 	last = tx_buffer->next_eop;
3821         eop_desc = &txr->tx_base[last];
3822 
3823 	/*
3824 	 * What this does is get the index of the
3825 	 * first descriptor AFTER the EOP of the
3826 	 * first packet, that way we can do the
3827 	 * simple comparison on the inner while loop.
3828 	 */
3829 	if (++last == adapter->num_tx_desc)
3830  		last = 0;
3831 	done = last;
3832 
3833         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3834             BUS_DMASYNC_POSTREAD);
3835 
3836         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3837 		/* We clean the range of the packet */
3838 		while (first != done) {
3839                 	tx_desc->upper.data = 0;
3840                 	tx_desc->lower.data = 0;
3841                 	tx_desc->buffer_addr = 0;
3842                 	++txr->tx_avail;
3843 			++processed;
3844 
3845 			if (tx_buffer->m_head) {
3846 				bus_dmamap_sync(txr->txtag,
3847 				    tx_buffer->map,
3848 				    BUS_DMASYNC_POSTWRITE);
3849 				bus_dmamap_unload(txr->txtag,
3850 				    tx_buffer->map);
3851                         	m_freem(tx_buffer->m_head);
3852                         	tx_buffer->m_head = NULL;
3853                 	}
3854 			tx_buffer->next_eop = -1;
3855 			txr->watchdog_time = ticks;
3856 
3857 	                if (++first == adapter->num_tx_desc)
3858 				first = 0;
3859 
3860 	                tx_buffer = &txr->tx_buffers[first];
3861 			tx_desc = &txr->tx_base[first];
3862 		}
3863 		++ifp->if_opackets;
3864 		/* See if we can continue to the next packet */
3865 		last = tx_buffer->next_eop;
3866 		if (last != -1) {
3867         		eop_desc = &txr->tx_base[last];
3868 			/* Get new done point */
3869 			if (++last == adapter->num_tx_desc) last = 0;
3870 			done = last;
3871 		} else
3872 			break;
3873         }
3874         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3875             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3876 
3877         txr->next_to_clean = first;
3878 
3879 	/*
3880 	** Watchdog calculation, we know there's
3881 	** work outstanding or the first return
3882 	** would have been taken, so none processed
3883 	** for too long indicates a hang. local timer
3884 	** will examine this and do a reset if needed.
3885 	*/
3886 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3887 		txr->queue_status = EM_QUEUE_HUNG;
3888 
3889         /*
3890          * If we have a minimum free, clear IFF_DRV_OACTIVE
3891          * to tell the stack that it is OK to send packets.
3892 	 * Notice that all writes of OACTIVE happen under the
3893 	 * TX lock which, with a single queue, guarantees
3894 	 * sanity.
3895          */
3896         if (txr->tx_avail >= EM_MAX_SCATTER)
3897 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3898 
3899 	/* Disable watchdog if all clean */
3900 	if (txr->tx_avail == adapter->num_tx_desc) {
3901 		txr->queue_status = EM_QUEUE_IDLE;
3902 	}
3903 }
3904 
3905 
3906 /*********************************************************************
3907  *
3908  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3909  *
3910  **********************************************************************/
3911 static void
3912 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3913 {
3914 	struct adapter		*adapter = rxr->adapter;
3915 	struct mbuf		*m;
3916 	bus_dma_segment_t	segs[1];
3917 	struct em_buffer	*rxbuf;
3918 	int			i, j, error, nsegs;
3919 	bool			cleaned = FALSE;
3920 
3921 	i = j = rxr->next_to_refresh;
3922 	/*
3923 	** Get one descriptor beyond
3924 	** our work mark to control
3925 	** the loop.
3926 	*/
3927 	if (++j == adapter->num_rx_desc)
3928 		j = 0;
3929 
3930 	while (j != limit) {
3931 		rxbuf = &rxr->rx_buffers[i];
3932 		if (rxbuf->m_head == NULL) {
3933 			m = m_getjcl(M_NOWAIT, MT_DATA,
3934 			    M_PKTHDR, adapter->rx_mbuf_sz);
3935 			/*
3936 			** If we have a temporary resource shortage
3937 			** that causes a failure, just abort refresh
3938 			** for now, we will return to this point when
3939 			** reinvoked from em_rxeof.
3940 			*/
3941 			if (m == NULL)
3942 				goto update;
3943 		} else
3944 			m = rxbuf->m_head;
3945 
3946 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3947 		m->m_flags |= M_PKTHDR;
3948 		m->m_data = m->m_ext.ext_buf;
3949 
3950 		/* Use bus_dma machinery to setup the memory mapping  */
3951 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3952 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3953 		if (error != 0) {
3954 			printf("Refresh mbufs: hdr dmamap load"
3955 			    " failure - %d\n", error);
3956 			m_free(m);
3957 			rxbuf->m_head = NULL;
3958 			goto update;
3959 		}
3960 		rxbuf->m_head = m;
3961 		bus_dmamap_sync(rxr->rxtag,
3962 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3963 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3964 		cleaned = TRUE;
3965 
3966 		i = j; /* Next is precalulated for us */
3967 		rxr->next_to_refresh = i;
3968 		/* Calculate next controlling index */
3969 		if (++j == adapter->num_rx_desc)
3970 			j = 0;
3971 	}
3972 update:
3973 	/*
3974 	** Update the tail pointer only if,
3975 	** and as far as we have refreshed.
3976 	*/
3977 	if (cleaned)
3978 		E1000_WRITE_REG(&adapter->hw,
3979 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3980 
3981 	return;
3982 }
3983 
3984 
3985 /*********************************************************************
3986  *
3987  *  Allocate memory for rx_buffer structures. Since we use one
3988  *  rx_buffer per received packet, the maximum number of rx_buffer's
3989  *  that we'll need is equal to the number of receive descriptors
3990  *  that we've allocated.
3991  *
3992  **********************************************************************/
3993 static int
3994 em_allocate_receive_buffers(struct rx_ring *rxr)
3995 {
3996 	struct adapter		*adapter = rxr->adapter;
3997 	device_t		dev = adapter->dev;
3998 	struct em_buffer	*rxbuf;
3999 	int			error;
4000 
4001 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4002 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4003 	if (rxr->rx_buffers == NULL) {
4004 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4005 		return (ENOMEM);
4006 	}
4007 
4008 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4009 				1, 0,			/* alignment, bounds */
4010 				BUS_SPACE_MAXADDR,	/* lowaddr */
4011 				BUS_SPACE_MAXADDR,	/* highaddr */
4012 				NULL, NULL,		/* filter, filterarg */
4013 				MJUM9BYTES,		/* maxsize */
4014 				1,			/* nsegments */
4015 				MJUM9BYTES,		/* maxsegsize */
4016 				0,			/* flags */
4017 				NULL,			/* lockfunc */
4018 				NULL,			/* lockarg */
4019 				&rxr->rxtag);
4020 	if (error) {
4021 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4022 		    __func__, error);
4023 		goto fail;
4024 	}
4025 
4026 	rxbuf = rxr->rx_buffers;
4027 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4028 		rxbuf = &rxr->rx_buffers[i];
4029 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4030 		    &rxbuf->map);
4031 		if (error) {
4032 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4033 			    __func__, error);
4034 			goto fail;
4035 		}
4036 	}
4037 
4038 	return (0);
4039 
4040 fail:
4041 	em_free_receive_structures(adapter);
4042 	return (error);
4043 }
4044 
4045 
4046 /*********************************************************************
4047  *
4048  *  Initialize a receive ring and its buffers.
4049  *
4050  **********************************************************************/
4051 static int
4052 em_setup_receive_ring(struct rx_ring *rxr)
4053 {
4054 	struct	adapter 	*adapter = rxr->adapter;
4055 	struct em_buffer	*rxbuf;
4056 	bus_dma_segment_t	seg[1];
4057 	int			rsize, nsegs, error = 0;
4058 #ifdef DEV_NETMAP
4059 	struct netmap_adapter *na = NA(adapter->ifp);
4060 	struct netmap_slot *slot;
4061 #endif
4062 
4063 
4064 	/* Clear the ring contents */
4065 	EM_RX_LOCK(rxr);
4066 	rsize = roundup2(adapter->num_rx_desc *
4067 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4068 	bzero((void *)rxr->rx_base, rsize);
4069 #ifdef DEV_NETMAP
4070 	slot = netmap_reset(na, NR_RX, 0, 0);
4071 #endif
4072 
4073 	/*
4074 	** Free current RX buffer structs and their mbufs
4075 	*/
4076 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4077 		rxbuf = &rxr->rx_buffers[i];
4078 		if (rxbuf->m_head != NULL) {
4079 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4080 			    BUS_DMASYNC_POSTREAD);
4081 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4082 			m_freem(rxbuf->m_head);
4083 			rxbuf->m_head = NULL; /* mark as freed */
4084 		}
4085 	}
4086 
4087 	/* Now replenish the mbufs */
4088         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4089 		rxbuf = &rxr->rx_buffers[j];
4090 #ifdef DEV_NETMAP
4091 		if (slot) {
4092 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4093 			uint64_t paddr;
4094 			void *addr;
4095 
4096 			addr = PNMB(slot + si, &paddr);
4097 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4098 			/* Update descriptor */
4099 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4100 			continue;
4101 		}
4102 #endif /* DEV_NETMAP */
4103 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4104 		    M_PKTHDR, adapter->rx_mbuf_sz);
4105 		if (rxbuf->m_head == NULL) {
4106 			error = ENOBUFS;
4107 			goto fail;
4108 		}
4109 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4110 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4111 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4112 
4113 		/* Get the memory mapping */
4114 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4115 		    rxbuf->map, rxbuf->m_head, seg,
4116 		    &nsegs, BUS_DMA_NOWAIT);
4117 		if (error != 0) {
4118 			m_freem(rxbuf->m_head);
4119 			rxbuf->m_head = NULL;
4120 			goto fail;
4121 		}
4122 		bus_dmamap_sync(rxr->rxtag,
4123 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4124 
4125 		/* Update descriptor */
4126 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4127 	}
4128 	rxr->next_to_check = 0;
4129 	rxr->next_to_refresh = 0;
4130 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4131 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4132 
4133 fail:
4134 	EM_RX_UNLOCK(rxr);
4135 	return (error);
4136 }
4137 
4138 /*********************************************************************
4139  *
4140  *  Initialize all receive rings.
4141  *
4142  **********************************************************************/
4143 static int
4144 em_setup_receive_structures(struct adapter *adapter)
4145 {
4146 	struct rx_ring *rxr = adapter->rx_rings;
4147 	int q;
4148 
4149 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4150 		if (em_setup_receive_ring(rxr))
4151 			goto fail;
4152 
4153 	return (0);
4154 fail:
4155 	/*
4156 	 * Free RX buffers allocated so far, we will only handle
4157 	 * the rings that completed, the failing case will have
4158 	 * cleaned up for itself. 'q' failed, so its the terminus.
4159 	 */
4160 	for (int i = 0; i < q; ++i) {
4161 		rxr = &adapter->rx_rings[i];
4162 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4163 			struct em_buffer *rxbuf;
4164 			rxbuf = &rxr->rx_buffers[n];
4165 			if (rxbuf->m_head != NULL) {
4166 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4167 			  	  BUS_DMASYNC_POSTREAD);
4168 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4169 				m_freem(rxbuf->m_head);
4170 				rxbuf->m_head = NULL;
4171 			}
4172 		}
4173 		rxr->next_to_check = 0;
4174 		rxr->next_to_refresh = 0;
4175 	}
4176 
4177 	return (ENOBUFS);
4178 }
4179 
4180 /*********************************************************************
4181  *
4182  *  Free all receive rings.
4183  *
4184  **********************************************************************/
4185 static void
4186 em_free_receive_structures(struct adapter *adapter)
4187 {
4188 	struct rx_ring *rxr = adapter->rx_rings;
4189 
4190 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4191 		em_free_receive_buffers(rxr);
4192 		/* Free the ring memory as well */
4193 		em_dma_free(adapter, &rxr->rxdma);
4194 		EM_RX_LOCK_DESTROY(rxr);
4195 	}
4196 
4197 	free(adapter->rx_rings, M_DEVBUF);
4198 }
4199 
4200 
4201 /*********************************************************************
4202  *
4203  *  Free receive ring data structures
4204  *
4205  **********************************************************************/
4206 static void
4207 em_free_receive_buffers(struct rx_ring *rxr)
4208 {
4209 	struct adapter		*adapter = rxr->adapter;
4210 	struct em_buffer	*rxbuf = NULL;
4211 
4212 	INIT_DEBUGOUT("free_receive_buffers: begin");
4213 
4214 	if (rxr->rx_buffers != NULL) {
4215 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4216 			rxbuf = &rxr->rx_buffers[i];
4217 			if (rxbuf->map != NULL) {
4218 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4219 				    BUS_DMASYNC_POSTREAD);
4220 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4221 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4222 			}
4223 			if (rxbuf->m_head != NULL) {
4224 				m_freem(rxbuf->m_head);
4225 				rxbuf->m_head = NULL;
4226 			}
4227 		}
4228 		free(rxr->rx_buffers, M_DEVBUF);
4229 		rxr->rx_buffers = NULL;
4230 		rxr->next_to_check = 0;
4231 		rxr->next_to_refresh = 0;
4232 	}
4233 
4234 	if (rxr->rxtag != NULL) {
4235 		bus_dma_tag_destroy(rxr->rxtag);
4236 		rxr->rxtag = NULL;
4237 	}
4238 
4239 	return;
4240 }
4241 
4242 
4243 /*********************************************************************
4244  *
4245  *  Enable receive unit.
4246  *
4247  **********************************************************************/
4248 #define MAX_INTS_PER_SEC	8000
4249 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4250 
4251 static void
4252 em_initialize_receive_unit(struct adapter *adapter)
4253 {
4254 	struct rx_ring	*rxr = adapter->rx_rings;
4255 	struct ifnet	*ifp = adapter->ifp;
4256 	struct e1000_hw	*hw = &adapter->hw;
4257 	u64	bus_addr;
4258 	u32	rctl, rxcsum;
4259 
4260 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4261 
4262 	/*
4263 	 * Make sure receives are disabled while setting
4264 	 * up the descriptor ring
4265 	 */
4266 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4267 	/* Do not disable if ever enabled on this hardware */
4268 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4269 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4270 
4271 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4272 	    adapter->rx_abs_int_delay.value);
4273 	/*
4274 	 * Set the interrupt throttling rate. Value is calculated
4275 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4276 	 */
4277 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4278 
4279 	/*
4280 	** When using MSIX interrupts we need to throttle
4281 	** using the EITR register (82574 only)
4282 	*/
4283 	if (hw->mac.type == e1000_82574) {
4284 		for (int i = 0; i < 4; i++)
4285 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4286 			    DEFAULT_ITR);
4287 		/* Disable accelerated acknowledge */
4288 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4289 	}
4290 
4291 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4292 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4293 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4294 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4295 	}
4296 
4297 	/*
4298 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4299 	** long latencies are observed, like Lenovo X60. This
4300 	** change eliminates the problem, but since having positive
4301 	** values in RDTR is a known source of problems on other
4302 	** platforms another solution is being sought.
4303 	*/
4304 	if (hw->mac.type == e1000_82573)
4305 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4306 
4307 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4308 		/* Setup the Base and Length of the Rx Descriptor Ring */
4309 		bus_addr = rxr->rxdma.dma_paddr;
4310 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4311 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4312 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4313 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4314 		/* Setup the Head and Tail Descriptor Pointers */
4315 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4316 #ifdef DEV_NETMAP
4317 		/*
4318 		 * an init() while a netmap client is active must
4319 		 * preserve the rx buffers passed to userspace.
4320 		 * In this driver it means we adjust RDT to
4321 		 * something different from na->num_rx_desc - 1.
4322 		 */
4323 		if (ifp->if_capenable & IFCAP_NETMAP) {
4324 			struct netmap_adapter *na = NA(adapter->ifp);
4325 			struct netmap_kring *kring = &na->rx_rings[i];
4326 			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4327 
4328 			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4329 		} else
4330 #endif /* DEV_NETMAP */
4331 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4332 	}
4333 
4334 	/* Set PTHRESH for improved jumbo performance */
4335 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4336 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4337 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4338 	    (ifp->if_mtu > ETHERMTU)) {
4339 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4340 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4341 	}
4342 
4343 	if (adapter->hw.mac.type == e1000_pch2lan) {
4344 		if (ifp->if_mtu > ETHERMTU)
4345 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4346 		else
4347 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4348 	}
4349 
4350 	/* Setup the Receive Control Register */
4351 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4352 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4353 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4354 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4355 
4356         /* Strip the CRC */
4357         rctl |= E1000_RCTL_SECRC;
4358 
4359         /* Make sure VLAN Filters are off */
4360         rctl &= ~E1000_RCTL_VFE;
4361 	rctl &= ~E1000_RCTL_SBP;
4362 
4363 	if (adapter->rx_mbuf_sz == MCLBYTES)
4364 		rctl |= E1000_RCTL_SZ_2048;
4365 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4366 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4367 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4368 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4369 
4370 	if (ifp->if_mtu > ETHERMTU)
4371 		rctl |= E1000_RCTL_LPE;
4372 	else
4373 		rctl &= ~E1000_RCTL_LPE;
4374 
4375 	/* Write out the settings */
4376 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4377 
4378 	return;
4379 }
4380 
4381 
4382 /*********************************************************************
4383  *
4384  *  This routine executes in interrupt context. It replenishes
4385  *  the mbufs in the descriptor and sends data which has been
4386  *  dma'ed into host memory to upper layer.
4387  *
4388  *  We loop at most count times if count is > 0, or until done if
4389  *  count < 0.
4390  *
4391  *  For polling we also now return the number of cleaned packets
4392  *********************************************************************/
4393 static bool
4394 em_rxeof(struct rx_ring *rxr, int count, int *done)
4395 {
4396 	struct adapter		*adapter = rxr->adapter;
4397 	struct ifnet		*ifp = adapter->ifp;
4398 	struct mbuf		*mp, *sendmp;
4399 	u8			status = 0;
4400 	u16 			len;
4401 	int			i, processed, rxdone = 0;
4402 	bool			eop;
4403 	struct e1000_rx_desc	*cur;
4404 
4405 	EM_RX_LOCK(rxr);
4406 
4407 #ifdef DEV_NETMAP
4408 	if (ifp->if_capenable & IFCAP_NETMAP) {
4409 		struct netmap_adapter *na = NA(ifp);
4410 
4411 		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4412 		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4413 		EM_RX_UNLOCK(rxr);
4414 		EM_CORE_LOCK(adapter);
4415 		selwakeuppri(&na->rx_si, PI_NET);
4416 		EM_CORE_UNLOCK(adapter);
4417 		return (0);
4418 	}
4419 #endif /* DEV_NETMAP */
4420 
4421 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4422 
4423 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4424 			break;
4425 
4426 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4427 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4428 
4429 		cur = &rxr->rx_base[i];
4430 		status = cur->status;
4431 		mp = sendmp = NULL;
4432 
4433 		if ((status & E1000_RXD_STAT_DD) == 0)
4434 			break;
4435 
4436 		len = le16toh(cur->length);
4437 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4438 
4439 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4440 		    (rxr->discard == TRUE)) {
4441 			adapter->dropped_pkts++;
4442 			++rxr->rx_discarded;
4443 			if (!eop) /* Catch subsequent segs */
4444 				rxr->discard = TRUE;
4445 			else
4446 				rxr->discard = FALSE;
4447 			em_rx_discard(rxr, i);
4448 			goto next_desc;
4449 		}
4450 
4451 		/* Assign correct length to the current fragment */
4452 		mp = rxr->rx_buffers[i].m_head;
4453 		mp->m_len = len;
4454 
4455 		/* Trigger for refresh */
4456 		rxr->rx_buffers[i].m_head = NULL;
4457 
4458 		/* First segment? */
4459 		if (rxr->fmp == NULL) {
4460 			mp->m_pkthdr.len = len;
4461 			rxr->fmp = rxr->lmp = mp;
4462 		} else {
4463 			/* Chain mbuf's together */
4464 			mp->m_flags &= ~M_PKTHDR;
4465 			rxr->lmp->m_next = mp;
4466 			rxr->lmp = mp;
4467 			rxr->fmp->m_pkthdr.len += len;
4468 		}
4469 
4470 		if (eop) {
4471 			--count;
4472 			sendmp = rxr->fmp;
4473 			sendmp->m_pkthdr.rcvif = ifp;
4474 			ifp->if_ipackets++;
4475 			em_receive_checksum(cur, sendmp);
4476 #ifndef __NO_STRICT_ALIGNMENT
4477 			if (adapter->max_frame_size >
4478 			    (MCLBYTES - ETHER_ALIGN) &&
4479 			    em_fixup_rx(rxr) != 0)
4480 				goto skip;
4481 #endif
4482 			if (status & E1000_RXD_STAT_VP) {
4483 				sendmp->m_pkthdr.ether_vtag =
4484 				    le16toh(cur->special);
4485 				sendmp->m_flags |= M_VLANTAG;
4486 			}
4487 #ifndef __NO_STRICT_ALIGNMENT
4488 skip:
4489 #endif
4490 			rxr->fmp = rxr->lmp = NULL;
4491 		}
4492 next_desc:
4493 		/* Zero out the receive descriptors status. */
4494 		cur->status = 0;
4495 		++rxdone;	/* cumulative for POLL */
4496 		++processed;
4497 
4498 		/* Advance our pointers to the next descriptor. */
4499 		if (++i == adapter->num_rx_desc)
4500 			i = 0;
4501 
4502 		/* Send to the stack */
4503 		if (sendmp != NULL) {
4504 			rxr->next_to_check = i;
4505 			EM_RX_UNLOCK(rxr);
4506 			(*ifp->if_input)(ifp, sendmp);
4507 			EM_RX_LOCK(rxr);
4508 			i = rxr->next_to_check;
4509 		}
4510 
4511 		/* Only refresh mbufs every 8 descriptors */
4512 		if (processed == 8) {
4513 			em_refresh_mbufs(rxr, i);
4514 			processed = 0;
4515 		}
4516 	}
4517 
4518 	/* Catch any remaining refresh work */
4519 	if (e1000_rx_unrefreshed(rxr))
4520 		em_refresh_mbufs(rxr, i);
4521 
4522 	rxr->next_to_check = i;
4523 	if (done != NULL)
4524 		*done = rxdone;
4525 	EM_RX_UNLOCK(rxr);
4526 
4527 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4528 }
4529 
4530 static __inline void
4531 em_rx_discard(struct rx_ring *rxr, int i)
4532 {
4533 	struct em_buffer	*rbuf;
4534 
4535 	rbuf = &rxr->rx_buffers[i];
4536 	/* Free any previous pieces */
4537 	if (rxr->fmp != NULL) {
4538 		rxr->fmp->m_flags |= M_PKTHDR;
4539 		m_freem(rxr->fmp);
4540 		rxr->fmp = NULL;
4541 		rxr->lmp = NULL;
4542 	}
4543 	/*
4544 	** Free buffer and allow em_refresh_mbufs()
4545 	** to clean up and recharge buffer.
4546 	*/
4547 	if (rbuf->m_head) {
4548 		m_free(rbuf->m_head);
4549 		rbuf->m_head = NULL;
4550 	}
4551 	return;
4552 }
4553 
4554 #ifndef __NO_STRICT_ALIGNMENT
4555 /*
4556  * When jumbo frames are enabled we should realign entire payload on
4557  * architecures with strict alignment. This is serious design mistake of 8254x
4558  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4559  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4560  * payload. On architecures without strict alignment restrictions 8254x still
4561  * performs unaligned memory access which would reduce the performance too.
4562  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4563  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4564  * existing mbuf chain.
4565  *
4566  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4567  * not used at all on architectures with strict alignment.
4568  */
4569 static int
4570 em_fixup_rx(struct rx_ring *rxr)
4571 {
4572 	struct adapter *adapter = rxr->adapter;
4573 	struct mbuf *m, *n;
4574 	int error;
4575 
4576 	error = 0;
4577 	m = rxr->fmp;
4578 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4579 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4580 		m->m_data += ETHER_HDR_LEN;
4581 	} else {
4582 		MGETHDR(n, M_NOWAIT, MT_DATA);
4583 		if (n != NULL) {
4584 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4585 			m->m_data += ETHER_HDR_LEN;
4586 			m->m_len -= ETHER_HDR_LEN;
4587 			n->m_len = ETHER_HDR_LEN;
4588 			M_MOVE_PKTHDR(n, m);
4589 			n->m_next = m;
4590 			rxr->fmp = n;
4591 		} else {
4592 			adapter->dropped_pkts++;
4593 			m_freem(rxr->fmp);
4594 			rxr->fmp = NULL;
4595 			error = ENOMEM;
4596 		}
4597 	}
4598 
4599 	return (error);
4600 }
4601 #endif
4602 
4603 /*********************************************************************
4604  *
4605  *  Verify that the hardware indicated that the checksum is valid.
4606  *  Inform the stack about the status of checksum so that stack
4607  *  doesn't spend time verifying the checksum.
4608  *
4609  *********************************************************************/
4610 static void
4611 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4612 {
4613 	/* Ignore Checksum bit is set */
4614 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4615 		mp->m_pkthdr.csum_flags = 0;
4616 		return;
4617 	}
4618 
4619 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4620 		/* Did it pass? */
4621 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4622 			/* IP Checksum Good */
4623 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4624 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4625 
4626 		} else {
4627 			mp->m_pkthdr.csum_flags = 0;
4628 		}
4629 	}
4630 
4631 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4632 		/* Did it pass? */
4633 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4634 			mp->m_pkthdr.csum_flags |=
4635 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4636 			mp->m_pkthdr.csum_data = htons(0xffff);
4637 		}
4638 	}
4639 }
4640 
4641 /*
4642  * This routine is run via an vlan
4643  * config EVENT
4644  */
4645 static void
4646 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4647 {
4648 	struct adapter	*adapter = ifp->if_softc;
4649 	u32		index, bit;
4650 
4651 	if (ifp->if_softc !=  arg)   /* Not our event */
4652 		return;
4653 
4654 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4655                 return;
4656 
4657 	EM_CORE_LOCK(adapter);
4658 	index = (vtag >> 5) & 0x7F;
4659 	bit = vtag & 0x1F;
4660 	adapter->shadow_vfta[index] |= (1 << bit);
4661 	++adapter->num_vlans;
4662 	/* Re-init to load the changes */
4663 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4664 		em_init_locked(adapter);
4665 	EM_CORE_UNLOCK(adapter);
4666 }
4667 
4668 /*
4669  * This routine is run via an vlan
4670  * unconfig EVENT
4671  */
4672 static void
4673 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4674 {
4675 	struct adapter	*adapter = ifp->if_softc;
4676 	u32		index, bit;
4677 
4678 	if (ifp->if_softc !=  arg)
4679 		return;
4680 
4681 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4682                 return;
4683 
4684 	EM_CORE_LOCK(adapter);
4685 	index = (vtag >> 5) & 0x7F;
4686 	bit = vtag & 0x1F;
4687 	adapter->shadow_vfta[index] &= ~(1 << bit);
4688 	--adapter->num_vlans;
4689 	/* Re-init to load the changes */
4690 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4691 		em_init_locked(adapter);
4692 	EM_CORE_UNLOCK(adapter);
4693 }
4694 
4695 static void
4696 em_setup_vlan_hw_support(struct adapter *adapter)
4697 {
4698 	struct e1000_hw *hw = &adapter->hw;
4699 	u32             reg;
4700 
4701 	/*
4702 	** We get here thru init_locked, meaning
4703 	** a soft reset, this has already cleared
4704 	** the VFTA and other state, so if there
4705 	** have been no vlan's registered do nothing.
4706 	*/
4707 	if (adapter->num_vlans == 0)
4708                 return;
4709 
4710 	/*
4711 	** A soft reset zero's out the VFTA, so
4712 	** we need to repopulate it now.
4713 	*/
4714 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4715                 if (adapter->shadow_vfta[i] != 0)
4716 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4717                             i, adapter->shadow_vfta[i]);
4718 
4719 	reg = E1000_READ_REG(hw, E1000_CTRL);
4720 	reg |= E1000_CTRL_VME;
4721 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4722 
4723 	/* Enable the Filter Table */
4724 	reg = E1000_READ_REG(hw, E1000_RCTL);
4725 	reg &= ~E1000_RCTL_CFIEN;
4726 	reg |= E1000_RCTL_VFE;
4727 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4728 }
4729 
4730 static void
4731 em_enable_intr(struct adapter *adapter)
4732 {
4733 	struct e1000_hw *hw = &adapter->hw;
4734 	u32 ims_mask = IMS_ENABLE_MASK;
4735 
4736 	if (hw->mac.type == e1000_82574) {
4737 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4738 		ims_mask |= EM_MSIX_MASK;
4739 	}
4740 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4741 }
4742 
4743 static void
4744 em_disable_intr(struct adapter *adapter)
4745 {
4746 	struct e1000_hw *hw = &adapter->hw;
4747 
4748 	if (hw->mac.type == e1000_82574)
4749 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4750 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4751 }
4752 
4753 /*
4754  * Bit of a misnomer, what this really means is
4755  * to enable OS management of the system... aka
4756  * to disable special hardware management features
4757  */
4758 static void
4759 em_init_manageability(struct adapter *adapter)
4760 {
4761 	/* A shared code workaround */
4762 #define E1000_82542_MANC2H E1000_MANC2H
4763 	if (adapter->has_manage) {
4764 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4765 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4766 
4767 		/* disable hardware interception of ARP */
4768 		manc &= ~(E1000_MANC_ARP_EN);
4769 
4770                 /* enable receiving management packets to the host */
4771 		manc |= E1000_MANC_EN_MNG2HOST;
4772 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4773 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4774 		manc2h |= E1000_MNG2HOST_PORT_623;
4775 		manc2h |= E1000_MNG2HOST_PORT_664;
4776 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4777 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4778 	}
4779 }
4780 
4781 /*
4782  * Give control back to hardware management
4783  * controller if there is one.
4784  */
4785 static void
4786 em_release_manageability(struct adapter *adapter)
4787 {
4788 	if (adapter->has_manage) {
4789 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4790 
4791 		/* re-enable hardware interception of ARP */
4792 		manc |= E1000_MANC_ARP_EN;
4793 		manc &= ~E1000_MANC_EN_MNG2HOST;
4794 
4795 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4796 	}
4797 }
4798 
4799 /*
4800  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4801  * For ASF and Pass Through versions of f/w this means
4802  * that the driver is loaded. For AMT version type f/w
4803  * this means that the network i/f is open.
4804  */
4805 static void
4806 em_get_hw_control(struct adapter *adapter)
4807 {
4808 	u32 ctrl_ext, swsm;
4809 
4810 	if (adapter->hw.mac.type == e1000_82573) {
4811 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4812 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4813 		    swsm | E1000_SWSM_DRV_LOAD);
4814 		return;
4815 	}
4816 	/* else */
4817 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4818 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4819 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4820 	return;
4821 }
4822 
4823 /*
4824  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4825  * For ASF and Pass Through versions of f/w this means that
4826  * the driver is no longer loaded. For AMT versions of the
4827  * f/w this means that the network i/f is closed.
4828  */
4829 static void
4830 em_release_hw_control(struct adapter *adapter)
4831 {
4832 	u32 ctrl_ext, swsm;
4833 
4834 	if (!adapter->has_manage)
4835 		return;
4836 
4837 	if (adapter->hw.mac.type == e1000_82573) {
4838 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4839 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4840 		    swsm & ~E1000_SWSM_DRV_LOAD);
4841 		return;
4842 	}
4843 	/* else */
4844 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4845 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4846 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4847 	return;
4848 }
4849 
4850 static int
4851 em_is_valid_ether_addr(u8 *addr)
4852 {
4853 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4854 
4855 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4856 		return (FALSE);
4857 	}
4858 
4859 	return (TRUE);
4860 }
4861 
4862 /*
4863 ** Parse the interface capabilities with regard
4864 ** to both system management and wake-on-lan for
4865 ** later use.
4866 */
4867 static void
4868 em_get_wakeup(device_t dev)
4869 {
4870 	struct adapter	*adapter = device_get_softc(dev);
4871 	u16		eeprom_data = 0, device_id, apme_mask;
4872 
4873 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4874 	apme_mask = EM_EEPROM_APME;
4875 
4876 	switch (adapter->hw.mac.type) {
4877 	case e1000_82573:
4878 	case e1000_82583:
4879 		adapter->has_amt = TRUE;
4880 		/* Falls thru */
4881 	case e1000_82571:
4882 	case e1000_82572:
4883 	case e1000_80003es2lan:
4884 		if (adapter->hw.bus.func == 1) {
4885 			e1000_read_nvm(&adapter->hw,
4886 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4887 			break;
4888 		} else
4889 			e1000_read_nvm(&adapter->hw,
4890 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4891 		break;
4892 	case e1000_ich8lan:
4893 	case e1000_ich9lan:
4894 	case e1000_ich10lan:
4895 	case e1000_pchlan:
4896 	case e1000_pch2lan:
4897 		apme_mask = E1000_WUC_APME;
4898 		adapter->has_amt = TRUE;
4899 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4900 		break;
4901 	default:
4902 		e1000_read_nvm(&adapter->hw,
4903 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4904 		break;
4905 	}
4906 	if (eeprom_data & apme_mask)
4907 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4908 	/*
4909          * We have the eeprom settings, now apply the special cases
4910          * where the eeprom may be wrong or the board won't support
4911          * wake on lan on a particular port
4912 	 */
4913 	device_id = pci_get_device(dev);
4914         switch (device_id) {
4915 	case E1000_DEV_ID_82571EB_FIBER:
4916 		/* Wake events only supported on port A for dual fiber
4917 		 * regardless of eeprom setting */
4918 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4919 		    E1000_STATUS_FUNC_1)
4920 			adapter->wol = 0;
4921 		break;
4922 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4923 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4924 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4925                 /* if quad port adapter, disable WoL on all but port A */
4926 		if (global_quad_port_a != 0)
4927 			adapter->wol = 0;
4928 		/* Reset for multiple quad port adapters */
4929 		if (++global_quad_port_a == 4)
4930 			global_quad_port_a = 0;
4931                 break;
4932 	}
4933 	return;
4934 }
4935 
4936 
4937 /*
4938  * Enable PCI Wake On Lan capability
4939  */
4940 static void
4941 em_enable_wakeup(device_t dev)
4942 {
4943 	struct adapter	*adapter = device_get_softc(dev);
4944 	struct ifnet	*ifp = adapter->ifp;
4945 	u32		pmc, ctrl, ctrl_ext, rctl;
4946 	u16     	status;
4947 
4948 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4949 		return;
4950 
4951 	/* Advertise the wakeup capability */
4952 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4953 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4954 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4955 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4956 
4957 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4958 	    (adapter->hw.mac.type == e1000_pchlan) ||
4959 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4960 	    (adapter->hw.mac.type == e1000_ich10lan))
4961 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4962 
4963 	/* Keep the laser running on Fiber adapters */
4964 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4965 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4966 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4967 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4968 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4969 	}
4970 
4971 	/*
4972 	** Determine type of Wakeup: note that wol
4973 	** is set with all bits on by default.
4974 	*/
4975 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4976 		adapter->wol &= ~E1000_WUFC_MAG;
4977 
4978 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4979 		adapter->wol &= ~E1000_WUFC_MC;
4980 	else {
4981 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4982 		rctl |= E1000_RCTL_MPE;
4983 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4984 	}
4985 
4986 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4987 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4988 		if (em_enable_phy_wakeup(adapter))
4989 			return;
4990 	} else {
4991 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4992 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4993 	}
4994 
4995 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4996 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4997 
4998         /* Request PME */
4999         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5000 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5001 	if (ifp->if_capenable & IFCAP_WOL)
5002 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5003         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5004 
5005 	return;
5006 }
5007 
5008 /*
5009 ** WOL in the newer chipset interfaces (pchlan)
5010 ** require thing to be copied into the phy
5011 */
5012 static int
5013 em_enable_phy_wakeup(struct adapter *adapter)
5014 {
5015 	struct e1000_hw *hw = &adapter->hw;
5016 	u32 mreg, ret = 0;
5017 	u16 preg;
5018 
5019 	/* copy MAC RARs to PHY RARs */
5020 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5021 
5022 	/* copy MAC MTA to PHY MTA */
5023 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5024 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5025 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5026 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5027 		    (u16)((mreg >> 16) & 0xFFFF));
5028 	}
5029 
5030 	/* configure PHY Rx Control register */
5031 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5032 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5033 	if (mreg & E1000_RCTL_UPE)
5034 		preg |= BM_RCTL_UPE;
5035 	if (mreg & E1000_RCTL_MPE)
5036 		preg |= BM_RCTL_MPE;
5037 	preg &= ~(BM_RCTL_MO_MASK);
5038 	if (mreg & E1000_RCTL_MO_3)
5039 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5040 				<< BM_RCTL_MO_SHIFT);
5041 	if (mreg & E1000_RCTL_BAM)
5042 		preg |= BM_RCTL_BAM;
5043 	if (mreg & E1000_RCTL_PMCF)
5044 		preg |= BM_RCTL_PMCF;
5045 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5046 	if (mreg & E1000_CTRL_RFCE)
5047 		preg |= BM_RCTL_RFCE;
5048 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5049 
5050 	/* enable PHY wakeup in MAC register */
5051 	E1000_WRITE_REG(hw, E1000_WUC,
5052 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5053 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5054 
5055 	/* configure and enable PHY wakeup in PHY registers */
5056 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5057 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5058 
5059 	/* activate PHY wakeup */
5060 	ret = hw->phy.ops.acquire(hw);
5061 	if (ret) {
5062 		printf("Could not acquire PHY\n");
5063 		return ret;
5064 	}
5065 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5066 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5067 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5068 	if (ret) {
5069 		printf("Could not read PHY page 769\n");
5070 		goto out;
5071 	}
5072 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5073 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5074 	if (ret)
5075 		printf("Could not set PHY Host Wakeup bit\n");
5076 out:
5077 	hw->phy.ops.release(hw);
5078 
5079 	return ret;
5080 }
5081 
5082 static void
5083 em_led_func(void *arg, int onoff)
5084 {
5085 	struct adapter	*adapter = arg;
5086 
5087 	EM_CORE_LOCK(adapter);
5088 	if (onoff) {
5089 		e1000_setup_led(&adapter->hw);
5090 		e1000_led_on(&adapter->hw);
5091 	} else {
5092 		e1000_led_off(&adapter->hw);
5093 		e1000_cleanup_led(&adapter->hw);
5094 	}
5095 	EM_CORE_UNLOCK(adapter);
5096 }
5097 
5098 /*
5099 ** Disable the L0S and L1 LINK states
5100 */
5101 static void
5102 em_disable_aspm(struct adapter *adapter)
5103 {
5104 	int		base, reg;
5105 	u16		link_cap,link_ctrl;
5106 	device_t	dev = adapter->dev;
5107 
5108 	switch (adapter->hw.mac.type) {
5109 		case e1000_82573:
5110 		case e1000_82574:
5111 		case e1000_82583:
5112 			break;
5113 		default:
5114 			return;
5115 	}
5116 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5117 		return;
5118 	reg = base + PCIER_LINK_CAP;
5119 	link_cap = pci_read_config(dev, reg, 2);
5120 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5121 		return;
5122 	reg = base + PCIER_LINK_CTL;
5123 	link_ctrl = pci_read_config(dev, reg, 2);
5124 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5125 	pci_write_config(dev, reg, link_ctrl, 2);
5126 	return;
5127 }
5128 
5129 /**********************************************************************
5130  *
5131  *  Update the board statistics counters.
5132  *
5133  **********************************************************************/
5134 static void
5135 em_update_stats_counters(struct adapter *adapter)
5136 {
5137 	struct ifnet   *ifp;
5138 
5139 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5140 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5141 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5142 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5143 	}
5144 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5145 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5146 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5147 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5148 
5149 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5150 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5151 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5152 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5153 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5154 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5155 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5156 	/*
5157 	** For watchdog management we need to know if we have been
5158 	** paused during the last interval, so capture that here.
5159 	*/
5160 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5161 	adapter->stats.xoffrxc += adapter->pause_frames;
5162 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5163 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5164 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5165 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5166 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5167 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5168 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5169 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5170 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5171 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5172 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5173 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5174 
5175 	/* For the 64-bit byte counters the low dword must be read first. */
5176 	/* Both registers clear on the read of the high dword */
5177 
5178 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5179 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5180 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5181 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5182 
5183 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5184 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5185 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5186 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5187 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5188 
5189 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5190 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5191 
5192 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5193 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5194 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5195 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5196 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5197 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5198 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5199 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5200 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5201 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5202 
5203 	/* Interrupt Counts */
5204 
5205 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5206 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5207 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5208 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5209 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5210 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5211 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5212 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5213 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5214 
5215 	if (adapter->hw.mac.type >= e1000_82543) {
5216 		adapter->stats.algnerrc +=
5217 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5218 		adapter->stats.rxerrc +=
5219 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5220 		adapter->stats.tncrs +=
5221 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5222 		adapter->stats.cexterr +=
5223 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5224 		adapter->stats.tsctc +=
5225 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5226 		adapter->stats.tsctfc +=
5227 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5228 	}
5229 	ifp = adapter->ifp;
5230 
5231 	ifp->if_collisions = adapter->stats.colc;
5232 
5233 	/* Rx Errors */
5234 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5235 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5236 	    adapter->stats.ruc + adapter->stats.roc +
5237 	    adapter->stats.mpc + adapter->stats.cexterr;
5238 
5239 	/* Tx Errors */
5240 	ifp->if_oerrors = adapter->stats.ecol +
5241 	    adapter->stats.latecol + adapter->watchdog_events;
5242 }
5243 
5244 /* Export a single 32-bit register via a read-only sysctl. */
5245 static int
5246 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5247 {
5248 	struct adapter *adapter;
5249 	u_int val;
5250 
5251 	adapter = oidp->oid_arg1;
5252 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5253 	return (sysctl_handle_int(oidp, &val, 0, req));
5254 }
5255 
5256 /*
5257  * Add sysctl variables, one per statistic, to the system.
5258  */
5259 static void
5260 em_add_hw_stats(struct adapter *adapter)
5261 {
5262 	device_t dev = adapter->dev;
5263 
5264 	struct tx_ring *txr = adapter->tx_rings;
5265 	struct rx_ring *rxr = adapter->rx_rings;
5266 
5267 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5268 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5269 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5270 	struct e1000_hw_stats *stats = &adapter->stats;
5271 
5272 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5273 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5274 
5275 #define QUEUE_NAME_LEN 32
5276 	char namebuf[QUEUE_NAME_LEN];
5277 
5278 	/* Driver Statistics */
5279 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5280 			CTLFLAG_RD, &adapter->link_irq,
5281 			"Link MSIX IRQ Handled");
5282 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5283 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5284 			 "Std mbuf failed");
5285 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5286 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5287 			 "Std mbuf cluster failed");
5288 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5289 			CTLFLAG_RD, &adapter->dropped_pkts,
5290 			"Driver dropped packets");
5291 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5292 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5293 			"Driver tx dma failure in xmit");
5294 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5295 			CTLFLAG_RD, &adapter->rx_overruns,
5296 			"RX overruns");
5297 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5298 			CTLFLAG_RD, &adapter->watchdog_events,
5299 			"Watchdog timeouts");
5300 
5301 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5302 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5303 			em_sysctl_reg_handler, "IU",
5304 			"Device Control Register");
5305 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5306 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5307 			em_sysctl_reg_handler, "IU",
5308 			"Receiver Control Register");
5309 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5310 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5311 			"Flow Control High Watermark");
5312 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5313 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5314 			"Flow Control Low Watermark");
5315 
5316 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5317 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5318 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5319 					    CTLFLAG_RD, NULL, "Queue Name");
5320 		queue_list = SYSCTL_CHILDREN(queue_node);
5321 
5322 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5323 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5324 				E1000_TDH(txr->me),
5325 				em_sysctl_reg_handler, "IU",
5326  				"Transmit Descriptor Head");
5327 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5328 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5329 				E1000_TDT(txr->me),
5330 				em_sysctl_reg_handler, "IU",
5331  				"Transmit Descriptor Tail");
5332 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5333 				CTLFLAG_RD, &txr->tx_irq,
5334 				"Queue MSI-X Transmit Interrupts");
5335 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5336 				CTLFLAG_RD, &txr->no_desc_avail,
5337 				"Queue No Descriptor Available");
5338 
5339 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5340 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5341 				E1000_RDH(rxr->me),
5342 				em_sysctl_reg_handler, "IU",
5343 				"Receive Descriptor Head");
5344 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5345 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5346 				E1000_RDT(rxr->me),
5347 				em_sysctl_reg_handler, "IU",
5348 				"Receive Descriptor Tail");
5349 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5350 				CTLFLAG_RD, &rxr->rx_irq,
5351 				"Queue MSI-X Receive Interrupts");
5352 	}
5353 
5354 	/* MAC stats get their own sub node */
5355 
5356 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5357 				    CTLFLAG_RD, NULL, "Statistics");
5358 	stat_list = SYSCTL_CHILDREN(stat_node);
5359 
5360 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5361 			CTLFLAG_RD, &stats->ecol,
5362 			"Excessive collisions");
5363 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5364 			CTLFLAG_RD, &stats->scc,
5365 			"Single collisions");
5366 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5367 			CTLFLAG_RD, &stats->mcc,
5368 			"Multiple collisions");
5369 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5370 			CTLFLAG_RD, &stats->latecol,
5371 			"Late collisions");
5372 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5373 			CTLFLAG_RD, &stats->colc,
5374 			"Collision Count");
5375 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5376 			CTLFLAG_RD, &adapter->stats.symerrs,
5377 			"Symbol Errors");
5378 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5379 			CTLFLAG_RD, &adapter->stats.sec,
5380 			"Sequence Errors");
5381 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5382 			CTLFLAG_RD, &adapter->stats.dc,
5383 			"Defer Count");
5384 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5385 			CTLFLAG_RD, &adapter->stats.mpc,
5386 			"Missed Packets");
5387 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5388 			CTLFLAG_RD, &adapter->stats.rnbc,
5389 			"Receive No Buffers");
5390 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5391 			CTLFLAG_RD, &adapter->stats.ruc,
5392 			"Receive Undersize");
5393 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5394 			CTLFLAG_RD, &adapter->stats.rfc,
5395 			"Fragmented Packets Received ");
5396 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5397 			CTLFLAG_RD, &adapter->stats.roc,
5398 			"Oversized Packets Received");
5399 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5400 			CTLFLAG_RD, &adapter->stats.rjc,
5401 			"Recevied Jabber");
5402 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5403 			CTLFLAG_RD, &adapter->stats.rxerrc,
5404 			"Receive Errors");
5405 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5406 			CTLFLAG_RD, &adapter->stats.crcerrs,
5407 			"CRC errors");
5408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5409 			CTLFLAG_RD, &adapter->stats.algnerrc,
5410 			"Alignment Errors");
5411 	/* On 82575 these are collision counts */
5412 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5413 			CTLFLAG_RD, &adapter->stats.cexterr,
5414 			"Collision/Carrier extension errors");
5415 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5416 			CTLFLAG_RD, &adapter->stats.xonrxc,
5417 			"XON Received");
5418 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5419 			CTLFLAG_RD, &adapter->stats.xontxc,
5420 			"XON Transmitted");
5421 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5422 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5423 			"XOFF Received");
5424 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5425 			CTLFLAG_RD, &adapter->stats.xofftxc,
5426 			"XOFF Transmitted");
5427 
5428 	/* Packet Reception Stats */
5429 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5430 			CTLFLAG_RD, &adapter->stats.tpr,
5431 			"Total Packets Received ");
5432 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5433 			CTLFLAG_RD, &adapter->stats.gprc,
5434 			"Good Packets Received");
5435 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5436 			CTLFLAG_RD, &adapter->stats.bprc,
5437 			"Broadcast Packets Received");
5438 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5439 			CTLFLAG_RD, &adapter->stats.mprc,
5440 			"Multicast Packets Received");
5441 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5442 			CTLFLAG_RD, &adapter->stats.prc64,
5443 			"64 byte frames received ");
5444 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5445 			CTLFLAG_RD, &adapter->stats.prc127,
5446 			"65-127 byte frames received");
5447 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5448 			CTLFLAG_RD, &adapter->stats.prc255,
5449 			"128-255 byte frames received");
5450 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5451 			CTLFLAG_RD, &adapter->stats.prc511,
5452 			"256-511 byte frames received");
5453 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5454 			CTLFLAG_RD, &adapter->stats.prc1023,
5455 			"512-1023 byte frames received");
5456 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5457 			CTLFLAG_RD, &adapter->stats.prc1522,
5458 			"1023-1522 byte frames received");
5459  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5460  			CTLFLAG_RD, &adapter->stats.gorc,
5461  			"Good Octets Received");
5462 
5463 	/* Packet Transmission Stats */
5464  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5465  			CTLFLAG_RD, &adapter->stats.gotc,
5466  			"Good Octets Transmitted");
5467 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5468 			CTLFLAG_RD, &adapter->stats.tpt,
5469 			"Total Packets Transmitted");
5470 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5471 			CTLFLAG_RD, &adapter->stats.gptc,
5472 			"Good Packets Transmitted");
5473 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5474 			CTLFLAG_RD, &adapter->stats.bptc,
5475 			"Broadcast Packets Transmitted");
5476 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5477 			CTLFLAG_RD, &adapter->stats.mptc,
5478 			"Multicast Packets Transmitted");
5479 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5480 			CTLFLAG_RD, &adapter->stats.ptc64,
5481 			"64 byte frames transmitted ");
5482 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5483 			CTLFLAG_RD, &adapter->stats.ptc127,
5484 			"65-127 byte frames transmitted");
5485 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5486 			CTLFLAG_RD, &adapter->stats.ptc255,
5487 			"128-255 byte frames transmitted");
5488 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5489 			CTLFLAG_RD, &adapter->stats.ptc511,
5490 			"256-511 byte frames transmitted");
5491 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5492 			CTLFLAG_RD, &adapter->stats.ptc1023,
5493 			"512-1023 byte frames transmitted");
5494 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5495 			CTLFLAG_RD, &adapter->stats.ptc1522,
5496 			"1024-1522 byte frames transmitted");
5497 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5498 			CTLFLAG_RD, &adapter->stats.tsctc,
5499 			"TSO Contexts Transmitted");
5500 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5501 			CTLFLAG_RD, &adapter->stats.tsctfc,
5502 			"TSO Contexts Failed");
5503 
5504 
5505 	/* Interrupt Stats */
5506 
5507 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5508 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5509 	int_list = SYSCTL_CHILDREN(int_node);
5510 
5511 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5512 			CTLFLAG_RD, &adapter->stats.iac,
5513 			"Interrupt Assertion Count");
5514 
5515 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5516 			CTLFLAG_RD, &adapter->stats.icrxptc,
5517 			"Interrupt Cause Rx Pkt Timer Expire Count");
5518 
5519 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5520 			CTLFLAG_RD, &adapter->stats.icrxatc,
5521 			"Interrupt Cause Rx Abs Timer Expire Count");
5522 
5523 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5524 			CTLFLAG_RD, &adapter->stats.ictxptc,
5525 			"Interrupt Cause Tx Pkt Timer Expire Count");
5526 
5527 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5528 			CTLFLAG_RD, &adapter->stats.ictxatc,
5529 			"Interrupt Cause Tx Abs Timer Expire Count");
5530 
5531 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5532 			CTLFLAG_RD, &adapter->stats.ictxqec,
5533 			"Interrupt Cause Tx Queue Empty Count");
5534 
5535 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5536 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5537 			"Interrupt Cause Tx Queue Min Thresh Count");
5538 
5539 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5540 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5541 			"Interrupt Cause Rx Desc Min Thresh Count");
5542 
5543 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5544 			CTLFLAG_RD, &adapter->stats.icrxoc,
5545 			"Interrupt Cause Receiver Overrun Count");
5546 }
5547 
5548 /**********************************************************************
5549  *
5550  *  This routine provides a way to dump out the adapter eeprom,
5551  *  often a useful debug/service tool. This only dumps the first
5552  *  32 words, stuff that matters is in that extent.
5553  *
5554  **********************************************************************/
5555 static int
5556 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5557 {
5558 	struct adapter *adapter = (struct adapter *)arg1;
5559 	int error;
5560 	int result;
5561 
5562 	result = -1;
5563 	error = sysctl_handle_int(oidp, &result, 0, req);
5564 
5565 	if (error || !req->newptr)
5566 		return (error);
5567 
5568 	/*
5569 	 * This value will cause a hex dump of the
5570 	 * first 32 16-bit words of the EEPROM to
5571 	 * the screen.
5572 	 */
5573 	if (result == 1)
5574 		em_print_nvm_info(adapter);
5575 
5576 	return (error);
5577 }
5578 
5579 static void
5580 em_print_nvm_info(struct adapter *adapter)
5581 {
5582 	u16	eeprom_data;
5583 	int	i, j, row = 0;
5584 
5585 	/* Its a bit crude, but it gets the job done */
5586 	printf("\nInterface EEPROM Dump:\n");
5587 	printf("Offset\n0x0000  ");
5588 	for (i = 0, j = 0; i < 32; i++, j++) {
5589 		if (j == 8) { /* Make the offset block */
5590 			j = 0; ++row;
5591 			printf("\n0x00%x0  ",row);
5592 		}
5593 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5594 		printf("%04x ", eeprom_data);
5595 	}
5596 	printf("\n");
5597 }
5598 
5599 static int
5600 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5601 {
5602 	struct em_int_delay_info *info;
5603 	struct adapter *adapter;
5604 	u32 regval;
5605 	int error, usecs, ticks;
5606 
5607 	info = (struct em_int_delay_info *)arg1;
5608 	usecs = info->value;
5609 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5610 	if (error != 0 || req->newptr == NULL)
5611 		return (error);
5612 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5613 		return (EINVAL);
5614 	info->value = usecs;
5615 	ticks = EM_USECS_TO_TICKS(usecs);
5616 
5617 	adapter = info->adapter;
5618 
5619 	EM_CORE_LOCK(adapter);
5620 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5621 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5622 	/* Handle a few special cases. */
5623 	switch (info->offset) {
5624 	case E1000_RDTR:
5625 		break;
5626 	case E1000_TIDV:
5627 		if (ticks == 0) {
5628 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5629 			/* Don't write 0 into the TIDV register. */
5630 			regval++;
5631 		} else
5632 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5633 		break;
5634 	}
5635 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5636 	EM_CORE_UNLOCK(adapter);
5637 	return (0);
5638 }
5639 
5640 static void
5641 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5642 	const char *description, struct em_int_delay_info *info,
5643 	int offset, int value)
5644 {
5645 	info->adapter = adapter;
5646 	info->offset = offset;
5647 	info->value = value;
5648 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5649 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5650 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5651 	    info, 0, em_sysctl_int_delay, "I", description);
5652 }
5653 
5654 static void
5655 em_set_sysctl_value(struct adapter *adapter, const char *name,
5656 	const char *description, int *limit, int value)
5657 {
5658 	*limit = value;
5659 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5660 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5661 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5662 }
5663 
5664 
5665 /*
5666 ** Set flow control using sysctl:
5667 ** Flow control values:
5668 **      0 - off
5669 **      1 - rx pause
5670 **      2 - tx pause
5671 **      3 - full
5672 */
5673 static int
5674 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5675 {
5676         int		error;
5677 	static int	input = 3; /* default is full */
5678         struct adapter	*adapter = (struct adapter *) arg1;
5679 
5680         error = sysctl_handle_int(oidp, &input, 0, req);
5681 
5682         if ((error) || (req->newptr == NULL))
5683                 return (error);
5684 
5685 	if (input == adapter->fc) /* no change? */
5686 		return (error);
5687 
5688         switch (input) {
5689                 case e1000_fc_rx_pause:
5690                 case e1000_fc_tx_pause:
5691                 case e1000_fc_full:
5692                 case e1000_fc_none:
5693                         adapter->hw.fc.requested_mode = input;
5694 			adapter->fc = input;
5695                         break;
5696                 default:
5697 			/* Do nothing */
5698 			return (error);
5699         }
5700 
5701         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5702         e1000_force_mac_fc(&adapter->hw);
5703         return (error);
5704 }
5705 
5706 /*
5707 ** Manage Energy Efficient Ethernet:
5708 ** Control values:
5709 **     0/1 - enabled/disabled
5710 */
5711 static int
5712 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5713 {
5714        struct adapter *adapter = (struct adapter *) arg1;
5715        int             error, value;
5716 
5717        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5718        error = sysctl_handle_int(oidp, &value, 0, req);
5719        if (error || req->newptr == NULL)
5720                return (error);
5721        EM_CORE_LOCK(adapter);
5722        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5723        em_init_locked(adapter);
5724        EM_CORE_UNLOCK(adapter);
5725        return (0);
5726 }
5727 
5728 static int
5729 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5730 {
5731 	struct adapter *adapter;
5732 	int error;
5733 	int result;
5734 
5735 	result = -1;
5736 	error = sysctl_handle_int(oidp, &result, 0, req);
5737 
5738 	if (error || !req->newptr)
5739 		return (error);
5740 
5741 	if (result == 1) {
5742 		adapter = (struct adapter *)arg1;
5743 		em_print_debug_info(adapter);
5744         }
5745 
5746 	return (error);
5747 }
5748 
5749 /*
5750 ** This routine is meant to be fluid, add whatever is
5751 ** needed for debugging a problem.  -jfv
5752 */
5753 static void
5754 em_print_debug_info(struct adapter *adapter)
5755 {
5756 	device_t dev = adapter->dev;
5757 	struct tx_ring *txr = adapter->tx_rings;
5758 	struct rx_ring *rxr = adapter->rx_rings;
5759 
5760 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5761 		printf("Interface is RUNNING ");
5762 	else
5763 		printf("Interface is NOT RUNNING\n");
5764 
5765 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5766 		printf("and INACTIVE\n");
5767 	else
5768 		printf("and ACTIVE\n");
5769 
5770 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5771 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5772 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5773 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5774 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5775 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5776 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5777 	device_printf(dev, "TX descriptors avail = %d\n",
5778 	    txr->tx_avail);
5779 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5780 	    txr->no_desc_avail);
5781 	device_printf(dev, "RX discarded packets = %ld\n",
5782 	    rxr->rx_discarded);
5783 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5784 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5785 }
5786