xref: /freebsd/sys/dev/e1000/if_em.c (revision 0e1497aefd602cea581d2380d22e67dfdcac6b4e)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2010, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60 
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67 
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70 
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83 
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87 
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int	em_display_debug_stats = 0;
92 
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.1.8";
97 
98 /*********************************************************************
99  *  PCI Device ID Table
100  *
101  *  Used by probe to select devices to load on
102  *  Last field stores an index into e1000_strings
103  *  Last entry must be all 0s
104  *
105  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106  *********************************************************************/
107 
108 static em_vendor_info_t em_vendor_info_array[] =
109 {
110 	/* Intel(R) PRO/1000 Network Connection */
111 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
112 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115 						PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117 						PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119 						PCI_ANY_ID, PCI_ANY_ID, 0},
120 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121 						PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123 						PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
130 
131 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136 						PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138 						PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140 						PCI_ANY_ID, PCI_ANY_ID, 0},
141 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142 						PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	/* required last entry */
175 	{ 0, 0, 0, 0, 0}
176 };
177 
178 /*********************************************************************
179  *  Table of branding strings for all supported NICs.
180  *********************************************************************/
181 
182 static char *em_strings[] = {
183 	"Intel(R) PRO/1000 Network Connection"
184 };
185 
186 /*********************************************************************
187  *  Function prototypes
188  *********************************************************************/
189 static int	em_probe(device_t);
190 static int	em_attach(device_t);
191 static int	em_detach(device_t);
192 static int	em_shutdown(device_t);
193 static int	em_suspend(device_t);
194 static int	em_resume(device_t);
195 static void	em_start(struct ifnet *);
196 static void	em_start_locked(struct ifnet *, struct tx_ring *);
197 #ifdef EM_MULTIQUEUE
198 static int	em_mq_start(struct ifnet *, struct mbuf *);
199 static int	em_mq_start_locked(struct ifnet *,
200 		    struct tx_ring *, struct mbuf *);
201 static void	em_qflush(struct ifnet *);
202 #endif
203 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
204 static void	em_init(void *);
205 static void	em_init_locked(struct adapter *);
206 static void	em_stop(void *);
207 static void	em_media_status(struct ifnet *, struct ifmediareq *);
208 static int	em_media_change(struct ifnet *);
209 static void	em_identify_hardware(struct adapter *);
210 static int	em_allocate_pci_resources(struct adapter *);
211 static int	em_allocate_legacy(struct adapter *);
212 static int	em_allocate_msix(struct adapter *);
213 static int	em_allocate_queues(struct adapter *);
214 static int	em_setup_msix(struct adapter *);
215 static void	em_free_pci_resources(struct adapter *);
216 static void	em_local_timer(void *);
217 static void	em_reset(struct adapter *);
218 static int	em_setup_interface(device_t, struct adapter *);
219 
220 static void	em_setup_transmit_structures(struct adapter *);
221 static void	em_initialize_transmit_unit(struct adapter *);
222 static int	em_allocate_transmit_buffers(struct tx_ring *);
223 static void	em_free_transmit_structures(struct adapter *);
224 static void	em_free_transmit_buffers(struct tx_ring *);
225 
226 static int	em_setup_receive_structures(struct adapter *);
227 static int	em_allocate_receive_buffers(struct rx_ring *);
228 static void	em_initialize_receive_unit(struct adapter *);
229 static void	em_free_receive_structures(struct adapter *);
230 static void	em_free_receive_buffers(struct rx_ring *);
231 
232 static void	em_enable_intr(struct adapter *);
233 static void	em_disable_intr(struct adapter *);
234 static void	em_update_stats_counters(struct adapter *);
235 static void	em_add_hw_stats(struct adapter *adapter);
236 static bool	em_txeof(struct tx_ring *);
237 static bool	em_rxeof(struct rx_ring *, int, int *);
238 #ifndef __NO_STRICT_ALIGNMENT
239 static int	em_fixup_rx(struct rx_ring *);
240 #endif
241 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243 		    struct ip *, u32 *, u32 *);
244 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245 		    struct tcphdr *, u32 *, u32 *);
246 static void	em_set_promisc(struct adapter *);
247 static void	em_disable_promisc(struct adapter *);
248 static void	em_set_multi(struct adapter *);
249 static void	em_update_link_status(struct adapter *);
250 static void	em_refresh_mbufs(struct rx_ring *, int);
251 static void	em_register_vlan(void *, struct ifnet *, u16);
252 static void	em_unregister_vlan(void *, struct ifnet *, u16);
253 static void	em_setup_vlan_hw_support(struct adapter *);
254 static int	em_xmit(struct tx_ring *, struct mbuf **);
255 static int	em_dma_malloc(struct adapter *, bus_size_t,
256 		    struct em_dma_alloc *, int);
257 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
258 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259 static void	em_print_nvm_info(struct adapter *);
260 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261 static void	em_print_debug_info(struct adapter *);
262 static int 	em_is_valid_ether_addr(u8 *);
263 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
265 		    const char *, struct em_int_delay_info *, int, int);
266 /* Management and WOL Support */
267 static void	em_init_manageability(struct adapter *);
268 static void	em_release_manageability(struct adapter *);
269 static void     em_get_hw_control(struct adapter *);
270 static void     em_release_hw_control(struct adapter *);
271 static void	em_get_wakeup(device_t);
272 static void     em_enable_wakeup(device_t);
273 static int	em_enable_phy_wakeup(struct adapter *);
274 static void	em_led_func(void *, int);
275 static void	em_disable_aspm(struct adapter *);
276 
277 static int	em_irq_fast(void *);
278 
279 /* MSIX handlers */
280 static void	em_msix_tx(void *);
281 static void	em_msix_rx(void *);
282 static void	em_msix_link(void *);
283 static void	em_handle_tx(void *context, int pending);
284 static void	em_handle_rx(void *context, int pending);
285 static void	em_handle_link(void *context, int pending);
286 
287 static void	em_add_rx_process_limit(struct adapter *, const char *,
288 		    const char *, int *, int);
289 static void	em_set_flow_cntrl(struct adapter *, const char *,
290 		    const char *, int *, int);
291 
292 static __inline void em_rx_discard(struct rx_ring *, int);
293 
294 #ifdef DEVICE_POLLING
295 static poll_handler_t em_poll;
296 #endif /* POLLING */
297 
298 /*********************************************************************
299  *  FreeBSD Device Interface Entry Points
300  *********************************************************************/
301 
302 static device_method_t em_methods[] = {
303 	/* Device interface */
304 	DEVMETHOD(device_probe, em_probe),
305 	DEVMETHOD(device_attach, em_attach),
306 	DEVMETHOD(device_detach, em_detach),
307 	DEVMETHOD(device_shutdown, em_shutdown),
308 	DEVMETHOD(device_suspend, em_suspend),
309 	DEVMETHOD(device_resume, em_resume),
310 	{0, 0}
311 };
312 
313 static driver_t em_driver = {
314 	"em", em_methods, sizeof(struct adapter),
315 };
316 
317 devclass_t em_devclass;
318 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
319 MODULE_DEPEND(em, pci, 1, 1, 1);
320 MODULE_DEPEND(em, ether, 1, 1, 1);
321 
322 /*********************************************************************
323  *  Tunable default values.
324  *********************************************************************/
325 
326 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
327 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
328 #define M_TSO_LEN			66
329 
330 /* Allow common code without TSO */
331 #ifndef CSUM_TSO
332 #define CSUM_TSO	0
333 #endif
334 
335 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
336 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
337 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
338 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
339 
340 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
343 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
344 
345 static int em_rxd = EM_DEFAULT_RXD;
346 static int em_txd = EM_DEFAULT_TXD;
347 TUNABLE_INT("hw.em.rxd", &em_rxd);
348 TUNABLE_INT("hw.em.txd", &em_txd);
349 
350 static int em_smart_pwr_down = FALSE;
351 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
352 
353 /* Controls whether promiscuous also shows bad packets */
354 static int em_debug_sbp = FALSE;
355 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
356 
357 static int em_enable_msix = TRUE;
358 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
359 
360 /* How many packets rxeof tries to clean at a time */
361 static int em_rx_process_limit = 100;
362 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363 
364 /* Flow control setting - default to FULL */
365 static int em_fc_setting = e1000_fc_full;
366 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367 
368 /* Global used in WOL setup with multiport cards */
369 static int global_quad_port_a = 0;
370 
371 /*********************************************************************
372  *  Device identification routine
373  *
374  *  em_probe determines if the driver should be loaded on
375  *  adapter based on PCI vendor/device id of the adapter.
376  *
377  *  return BUS_PROBE_DEFAULT on success, positive on failure
378  *********************************************************************/
379 
380 static int
381 em_probe(device_t dev)
382 {
383 	char		adapter_name[60];
384 	u16		pci_vendor_id = 0;
385 	u16		pci_device_id = 0;
386 	u16		pci_subvendor_id = 0;
387 	u16		pci_subdevice_id = 0;
388 	em_vendor_info_t *ent;
389 
390 	INIT_DEBUGOUT("em_probe: begin");
391 
392 	pci_vendor_id = pci_get_vendor(dev);
393 	if (pci_vendor_id != EM_VENDOR_ID)
394 		return (ENXIO);
395 
396 	pci_device_id = pci_get_device(dev);
397 	pci_subvendor_id = pci_get_subvendor(dev);
398 	pci_subdevice_id = pci_get_subdevice(dev);
399 
400 	ent = em_vendor_info_array;
401 	while (ent->vendor_id != 0) {
402 		if ((pci_vendor_id == ent->vendor_id) &&
403 		    (pci_device_id == ent->device_id) &&
404 
405 		    ((pci_subvendor_id == ent->subvendor_id) ||
406 		    (ent->subvendor_id == PCI_ANY_ID)) &&
407 
408 		    ((pci_subdevice_id == ent->subdevice_id) ||
409 		    (ent->subdevice_id == PCI_ANY_ID))) {
410 			sprintf(adapter_name, "%s %s",
411 				em_strings[ent->index],
412 				em_driver_version);
413 			device_set_desc_copy(dev, adapter_name);
414 			return (BUS_PROBE_DEFAULT);
415 		}
416 		ent++;
417 	}
418 
419 	return (ENXIO);
420 }
421 
422 /*********************************************************************
423  *  Device initialization routine
424  *
425  *  The attach entry point is called when the driver is being loaded.
426  *  This routine identifies the type of hardware, allocates all resources
427  *  and initializes the hardware.
428  *
429  *  return 0 on success, positive on failure
430  *********************************************************************/
431 
432 static int
433 em_attach(device_t dev)
434 {
435 	struct adapter	*adapter;
436 	int		error = 0;
437 
438 	INIT_DEBUGOUT("em_attach: begin");
439 
440 	adapter = device_get_softc(dev);
441 	adapter->dev = adapter->osdep.dev = dev;
442 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
443 
444 	/* SYSCTL stuff */
445 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
446 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
447 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
448 	    em_sysctl_nvm_info, "I", "NVM Information");
449 
450 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
451 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
453 	    em_sysctl_debug_info, "I", "Debug Information");
454 
455 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
456 
457 	/* Determine hardware and mac info */
458 	em_identify_hardware(adapter);
459 
460 	/* Setup PCI resources */
461 	if (em_allocate_pci_resources(adapter)) {
462 		device_printf(dev, "Allocation of PCI resources failed\n");
463 		error = ENXIO;
464 		goto err_pci;
465 	}
466 
467 	/*
468 	** For ICH8 and family we need to
469 	** map the flash memory, and this
470 	** must happen after the MAC is
471 	** identified
472 	*/
473 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
474 	    (adapter->hw.mac.type == e1000_ich9lan) ||
475 	    (adapter->hw.mac.type == e1000_ich10lan) ||
476 	    (adapter->hw.mac.type == e1000_pchlan) ||
477 	    (adapter->hw.mac.type == e1000_pch2lan)) {
478 		int rid = EM_BAR_TYPE_FLASH;
479 		adapter->flash = bus_alloc_resource_any(dev,
480 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
481 		if (adapter->flash == NULL) {
482 			device_printf(dev, "Mapping of Flash failed\n");
483 			error = ENXIO;
484 			goto err_pci;
485 		}
486 		/* This is used in the shared code */
487 		adapter->hw.flash_address = (u8 *)adapter->flash;
488 		adapter->osdep.flash_bus_space_tag =
489 		    rman_get_bustag(adapter->flash);
490 		adapter->osdep.flash_bus_space_handle =
491 		    rman_get_bushandle(adapter->flash);
492 	}
493 
494 	/* Do Shared Code initialization */
495 	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496 		device_printf(dev, "Setup of Shared code failed\n");
497 		error = ENXIO;
498 		goto err_pci;
499 	}
500 
501 	e1000_get_bus_info(&adapter->hw);
502 
503 	/* Set up some sysctls for the tunable interrupt delays */
504 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
505 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
506 	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
508 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509 	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511 	    "receive interrupt delay limit in usecs",
512 	    &adapter->rx_abs_int_delay,
513 	    E1000_REGISTER(&adapter->hw, E1000_RADV),
514 	    em_rx_abs_int_delay_dflt);
515 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516 	    "transmit interrupt delay limit in usecs",
517 	    &adapter->tx_abs_int_delay,
518 	    E1000_REGISTER(&adapter->hw, E1000_TADV),
519 	    em_tx_abs_int_delay_dflt);
520 
521 	/* Sysctl for limiting the amount of work done in the taskqueue */
522 	em_add_rx_process_limit(adapter, "rx_processing_limit",
523 	    "max number of rx packets to process", &adapter->rx_process_limit,
524 	    em_rx_process_limit);
525 
526 	/* Sysctl for setting the interface flow control */
527 	em_set_flow_cntrl(adapter, "flow_control",
528 	    "configure flow control",
529 	    &adapter->fc_setting, em_fc_setting);
530 
531 	/*
532 	 * Validate number of transmit and receive descriptors. It
533 	 * must not exceed hardware maximum, and must be multiple
534 	 * of E1000_DBA_ALIGN.
535 	 */
536 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
537 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
538 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
539 		    EM_DEFAULT_TXD, em_txd);
540 		adapter->num_tx_desc = EM_DEFAULT_TXD;
541 	} else
542 		adapter->num_tx_desc = em_txd;
543 
544 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
545 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
546 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
547 		    EM_DEFAULT_RXD, em_rxd);
548 		adapter->num_rx_desc = EM_DEFAULT_RXD;
549 	} else
550 		adapter->num_rx_desc = em_rxd;
551 
552 	adapter->hw.mac.autoneg = DO_AUTO_NEG;
553 	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
554 	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
555 
556 	/* Copper options */
557 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558 		adapter->hw.phy.mdix = AUTO_ALL_MODES;
559 		adapter->hw.phy.disable_polarity_correction = FALSE;
560 		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
561 	}
562 
563 	/*
564 	 * Set the frame limits assuming
565 	 * standard ethernet sized frames.
566 	 */
567 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
568 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
569 
570 	/*
571 	 * This controls when hardware reports transmit completion
572 	 * status.
573 	 */
574 	adapter->hw.mac.report_tx_early = 1;
575 
576 	/*
577 	** Get queue/ring memory
578 	*/
579 	if (em_allocate_queues(adapter)) {
580 		error = ENOMEM;
581 		goto err_pci;
582 	}
583 
584 	/* Allocate multicast array memory. */
585 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
586 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
587 	if (adapter->mta == NULL) {
588 		device_printf(dev, "Can not allocate multicast setup array\n");
589 		error = ENOMEM;
590 		goto err_late;
591 	}
592 
593 	/* Check SOL/IDER usage */
594 	if (e1000_check_reset_block(&adapter->hw))
595 		device_printf(dev, "PHY reset is blocked"
596 		    " due to SOL/IDER session.\n");
597 
598 	/*
599 	** Start from a known state, this is
600 	** important in reading the nvm and
601 	** mac from that.
602 	*/
603 	e1000_reset_hw(&adapter->hw);
604 
605 	/* Make sure we have a good EEPROM before we read from it */
606 	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
607 		/*
608 		** Some PCI-E parts fail the first check due to
609 		** the link being in sleep state, call it again,
610 		** if it fails a second time its a real issue.
611 		*/
612 		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
613 			device_printf(dev,
614 			    "The EEPROM Checksum Is Not Valid\n");
615 			error = EIO;
616 			goto err_late;
617 		}
618 	}
619 
620 	/* Copy the permanent MAC address out of the EEPROM */
621 	if (e1000_read_mac_addr(&adapter->hw) < 0) {
622 		device_printf(dev, "EEPROM read error while reading MAC"
623 		    " address\n");
624 		error = EIO;
625 		goto err_late;
626 	}
627 
628 	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
629 		device_printf(dev, "Invalid MAC address\n");
630 		error = EIO;
631 		goto err_late;
632 	}
633 
634 	/*
635 	**  Do interrupt configuration
636 	*/
637 	if (adapter->msix > 1) /* Do MSIX */
638 		error = em_allocate_msix(adapter);
639 	else  /* MSI or Legacy */
640 		error = em_allocate_legacy(adapter);
641 	if (error)
642 		goto err_late;
643 
644 	/*
645 	 * Get Wake-on-Lan and Management info for later use
646 	 */
647 	em_get_wakeup(dev);
648 
649 	/* Setup OS specific network interface */
650 	if (em_setup_interface(dev, adapter) != 0)
651 		goto err_late;
652 
653 	em_reset(adapter);
654 
655 	/* Initialize statistics */
656 	em_update_stats_counters(adapter);
657 
658 	adapter->hw.mac.get_link_status = 1;
659 	em_update_link_status(adapter);
660 
661 	/* Register for VLAN events */
662 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
663 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
664 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
665 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
666 
667 	em_add_hw_stats(adapter);
668 
669 	/* Non-AMT based hardware can now take control from firmware */
670 	if (adapter->has_manage && !adapter->has_amt)
671 		em_get_hw_control(adapter);
672 
673 	/* Tell the stack that the interface is not active */
674 	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
675 
676 	adapter->led_dev = led_create(em_led_func, adapter,
677 	    device_get_nameunit(dev));
678 
679 	INIT_DEBUGOUT("em_attach: end");
680 
681 	return (0);
682 
683 err_late:
684 	em_free_transmit_structures(adapter);
685 	em_free_receive_structures(adapter);
686 	em_release_hw_control(adapter);
687 	if (adapter->ifp != NULL)
688 		if_free(adapter->ifp);
689 err_pci:
690 	em_free_pci_resources(adapter);
691 	free(adapter->mta, M_DEVBUF);
692 	EM_CORE_LOCK_DESTROY(adapter);
693 
694 	return (error);
695 }
696 
697 /*********************************************************************
698  *  Device removal routine
699  *
700  *  The detach entry point is called when the driver is being removed.
701  *  This routine stops the adapter and deallocates all the resources
702  *  that were allocated for driver operation.
703  *
704  *  return 0 on success, positive on failure
705  *********************************************************************/
706 
707 static int
708 em_detach(device_t dev)
709 {
710 	struct adapter	*adapter = device_get_softc(dev);
711 	struct ifnet	*ifp = adapter->ifp;
712 
713 	INIT_DEBUGOUT("em_detach: begin");
714 
715 	/* Make sure VLANS are not using driver */
716 	if (adapter->ifp->if_vlantrunk != NULL) {
717 		device_printf(dev,"Vlan in use, detach first\n");
718 		return (EBUSY);
719 	}
720 
721 #ifdef DEVICE_POLLING
722 	if (ifp->if_capenable & IFCAP_POLLING)
723 		ether_poll_deregister(ifp);
724 #endif
725 
726 	if (adapter->led_dev != NULL)
727 		led_destroy(adapter->led_dev);
728 
729 	EM_CORE_LOCK(adapter);
730 	adapter->in_detach = 1;
731 	em_stop(adapter);
732 	EM_CORE_UNLOCK(adapter);
733 	EM_CORE_LOCK_DESTROY(adapter);
734 
735 	e1000_phy_hw_reset(&adapter->hw);
736 
737 	em_release_manageability(adapter);
738 	em_release_hw_control(adapter);
739 
740 	/* Unregister VLAN events */
741 	if (adapter->vlan_attach != NULL)
742 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
743 	if (adapter->vlan_detach != NULL)
744 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
745 
746 	ether_ifdetach(adapter->ifp);
747 	callout_drain(&adapter->timer);
748 
749 	em_free_pci_resources(adapter);
750 	bus_generic_detach(dev);
751 	if_free(ifp);
752 
753 	em_free_transmit_structures(adapter);
754 	em_free_receive_structures(adapter);
755 
756 	em_release_hw_control(adapter);
757 	free(adapter->mta, M_DEVBUF);
758 
759 	return (0);
760 }
761 
762 /*********************************************************************
763  *
764  *  Shutdown entry point
765  *
766  **********************************************************************/
767 
768 static int
769 em_shutdown(device_t dev)
770 {
771 	return em_suspend(dev);
772 }
773 
774 /*
775  * Suspend/resume device methods.
776  */
777 static int
778 em_suspend(device_t dev)
779 {
780 	struct adapter *adapter = device_get_softc(dev);
781 
782 	EM_CORE_LOCK(adapter);
783 
784         em_release_manageability(adapter);
785 	em_release_hw_control(adapter);
786 	em_enable_wakeup(dev);
787 
788 	EM_CORE_UNLOCK(adapter);
789 
790 	return bus_generic_suspend(dev);
791 }
792 
793 static int
794 em_resume(device_t dev)
795 {
796 	struct adapter *adapter = device_get_softc(dev);
797 	struct ifnet *ifp = adapter->ifp;
798 
799 	EM_CORE_LOCK(adapter);
800 	em_init_locked(adapter);
801 	em_init_manageability(adapter);
802 	EM_CORE_UNLOCK(adapter);
803 	em_start(ifp);
804 
805 	return bus_generic_resume(dev);
806 }
807 
808 
809 /*********************************************************************
810  *  Transmit entry point
811  *
812  *  em_start is called by the stack to initiate a transmit.
813  *  The driver will remain in this routine as long as there are
814  *  packets to transmit and transmit resources are available.
815  *  In case resources are not available stack is notified and
816  *  the packet is requeued.
817  **********************************************************************/
818 
819 #ifdef EM_MULTIQUEUE
820 static int
821 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
822 {
823 	struct adapter  *adapter = txr->adapter;
824         struct mbuf     *next;
825         int             err = 0, enq = 0;
826 
827 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
828 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
829 		if (m != NULL)
830 			err = drbr_enqueue(ifp, txr->br, m);
831 		return (err);
832 	}
833 
834         /* Call cleanup if number of TX descriptors low */
835 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
836 		em_txeof(txr);
837 
838 	enq = 0;
839 	if (m == NULL) {
840 		next = drbr_dequeue(ifp, txr->br);
841 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
842 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
843 			return (err);
844 		next = drbr_dequeue(ifp, txr->br);
845 	} else
846 		next = m;
847 
848 	/* Process the queue */
849 	while (next != NULL) {
850 		if ((err = em_xmit(txr, &next)) != 0) {
851                         if (next != NULL)
852                                 err = drbr_enqueue(ifp, txr->br, next);
853                         break;
854 		}
855 		enq++;
856 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
857 		ETHER_BPF_MTAP(ifp, next);
858 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
859                         break;
860 		if (txr->tx_avail < EM_MAX_SCATTER) {
861 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
862 			break;
863 		}
864 		next = drbr_dequeue(ifp, txr->br);
865 	}
866 
867 	if (enq > 0) {
868                 /* Set the watchdog */
869                 txr->queue_status = EM_QUEUE_WORKING;
870 		txr->watchdog_time = ticks;
871 	}
872 	return (err);
873 }
874 
875 /*
876 ** Multiqueue capable stack interface
877 */
878 static int
879 em_mq_start(struct ifnet *ifp, struct mbuf *m)
880 {
881 	struct adapter	*adapter = ifp->if_softc;
882 	struct tx_ring	*txr = adapter->tx_rings;
883 	int 		error;
884 
885 	if (EM_TX_TRYLOCK(txr)) {
886 		error = em_mq_start_locked(ifp, txr, m);
887 		EM_TX_UNLOCK(txr);
888 	} else
889 		error = drbr_enqueue(ifp, txr->br, m);
890 
891 	return (error);
892 }
893 
894 /*
895 ** Flush all ring buffers
896 */
897 static void
898 em_qflush(struct ifnet *ifp)
899 {
900 	struct adapter  *adapter = ifp->if_softc;
901 	struct tx_ring  *txr = adapter->tx_rings;
902 	struct mbuf     *m;
903 
904 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
905 		EM_TX_LOCK(txr);
906 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
907 			m_freem(m);
908 		EM_TX_UNLOCK(txr);
909 	}
910 	if_qflush(ifp);
911 }
912 
913 #endif /* EM_MULTIQUEUE */
914 
915 static void
916 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
917 {
918 	struct adapter	*adapter = ifp->if_softc;
919 	struct mbuf	*m_head;
920 
921 	EM_TX_LOCK_ASSERT(txr);
922 
923 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
924 	    IFF_DRV_RUNNING)
925 		return;
926 
927 	if (!adapter->link_active)
928 		return;
929 
930         /* Call cleanup if number of TX descriptors low */
931 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
932 		em_txeof(txr);
933 
934 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
935 		if (txr->tx_avail < EM_MAX_SCATTER) {
936 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
937 			break;
938 		}
939                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
940 		if (m_head == NULL)
941 			break;
942 		/*
943 		 *  Encapsulation can modify our pointer, and or make it
944 		 *  NULL on failure.  In that event, we can't requeue.
945 		 */
946 		if (em_xmit(txr, &m_head)) {
947 			if (m_head == NULL)
948 				break;
949 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
950 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
951 			break;
952 		}
953 
954 		/* Send a copy of the frame to the BPF listener */
955 		ETHER_BPF_MTAP(ifp, m_head);
956 
957 		/* Set timeout in case hardware has problems transmitting. */
958 		txr->watchdog_time = ticks;
959                 txr->queue_status = EM_QUEUE_WORKING;
960 	}
961 
962 	return;
963 }
964 
965 static void
966 em_start(struct ifnet *ifp)
967 {
968 	struct adapter	*adapter = ifp->if_softc;
969 	struct tx_ring	*txr = adapter->tx_rings;
970 
971 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
972 		EM_TX_LOCK(txr);
973 		em_start_locked(ifp, txr);
974 		EM_TX_UNLOCK(txr);
975 	}
976 	return;
977 }
978 
979 /*********************************************************************
980  *  Ioctl entry point
981  *
982  *  em_ioctl is called when the user wants to configure the
983  *  interface.
984  *
985  *  return 0 on success, positive on failure
986  **********************************************************************/
987 
988 static int
989 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
990 {
991 	struct adapter	*adapter = ifp->if_softc;
992 	struct ifreq *ifr = (struct ifreq *)data;
993 #ifdef INET
994 	struct ifaddr *ifa = (struct ifaddr *)data;
995 #endif
996 	int error = 0;
997 
998 	if (adapter->in_detach)
999 		return (error);
1000 
1001 	switch (command) {
1002 	case SIOCSIFADDR:
1003 #ifdef INET
1004 		if (ifa->ifa_addr->sa_family == AF_INET) {
1005 			/*
1006 			 * XXX
1007 			 * Since resetting hardware takes a very long time
1008 			 * and results in link renegotiation we only
1009 			 * initialize the hardware only when it is absolutely
1010 			 * required.
1011 			 */
1012 			ifp->if_flags |= IFF_UP;
1013 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1014 				EM_CORE_LOCK(adapter);
1015 				em_init_locked(adapter);
1016 				EM_CORE_UNLOCK(adapter);
1017 			}
1018 			arp_ifinit(ifp, ifa);
1019 		} else
1020 #endif
1021 			error = ether_ioctl(ifp, command, data);
1022 		break;
1023 	case SIOCSIFMTU:
1024 	    {
1025 		int max_frame_size;
1026 
1027 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1028 
1029 		EM_CORE_LOCK(adapter);
1030 		switch (adapter->hw.mac.type) {
1031 		case e1000_82571:
1032 		case e1000_82572:
1033 		case e1000_ich9lan:
1034 		case e1000_ich10lan:
1035 		case e1000_pch2lan:
1036 		case e1000_82574:
1037 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1038 			max_frame_size = 9234;
1039 			break;
1040 		case e1000_pchlan:
1041 			max_frame_size = 4096;
1042 			break;
1043 			/* Adapters that do not support jumbo frames */
1044 		case e1000_82583:
1045 		case e1000_ich8lan:
1046 			max_frame_size = ETHER_MAX_LEN;
1047 			break;
1048 		default:
1049 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1050 		}
1051 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1052 		    ETHER_CRC_LEN) {
1053 			EM_CORE_UNLOCK(adapter);
1054 			error = EINVAL;
1055 			break;
1056 		}
1057 
1058 		ifp->if_mtu = ifr->ifr_mtu;
1059 		adapter->max_frame_size =
1060 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1061 		em_init_locked(adapter);
1062 		EM_CORE_UNLOCK(adapter);
1063 		break;
1064 	    }
1065 	case SIOCSIFFLAGS:
1066 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1067 		    SIOCSIFFLAGS (Set Interface Flags)");
1068 		EM_CORE_LOCK(adapter);
1069 		if (ifp->if_flags & IFF_UP) {
1070 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1071 				if ((ifp->if_flags ^ adapter->if_flags) &
1072 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1073 					em_disable_promisc(adapter);
1074 					em_set_promisc(adapter);
1075 				}
1076 			} else
1077 				em_init_locked(adapter);
1078 		} else
1079 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1080 				em_stop(adapter);
1081 		adapter->if_flags = ifp->if_flags;
1082 		EM_CORE_UNLOCK(adapter);
1083 		break;
1084 	case SIOCADDMULTI:
1085 	case SIOCDELMULTI:
1086 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1087 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1088 			EM_CORE_LOCK(adapter);
1089 			em_disable_intr(adapter);
1090 			em_set_multi(adapter);
1091 #ifdef DEVICE_POLLING
1092 			if (!(ifp->if_capenable & IFCAP_POLLING))
1093 #endif
1094 				em_enable_intr(adapter);
1095 			EM_CORE_UNLOCK(adapter);
1096 		}
1097 		break;
1098 	case SIOCSIFMEDIA:
1099 		/*
1100 		** As the speed/duplex settings are being
1101 		** changed, we need to reset the PHY.
1102 		*/
1103 		adapter->hw.phy.reset_disable = FALSE;
1104 		/* Check SOL/IDER usage */
1105 		EM_CORE_LOCK(adapter);
1106 		if (e1000_check_reset_block(&adapter->hw)) {
1107 			EM_CORE_UNLOCK(adapter);
1108 			device_printf(adapter->dev, "Media change is"
1109 			    " blocked due to SOL/IDER session.\n");
1110 			break;
1111 		}
1112 		EM_CORE_UNLOCK(adapter);
1113 		/* falls thru */
1114 	case SIOCGIFMEDIA:
1115 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1116 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1117 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1118 		break;
1119 	case SIOCSIFCAP:
1120 	    {
1121 		int mask, reinit;
1122 
1123 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1124 		reinit = 0;
1125 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1126 #ifdef DEVICE_POLLING
1127 		if (mask & IFCAP_POLLING) {
1128 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1129 				error = ether_poll_register(em_poll, ifp);
1130 				if (error)
1131 					return (error);
1132 				EM_CORE_LOCK(adapter);
1133 				em_disable_intr(adapter);
1134 				ifp->if_capenable |= IFCAP_POLLING;
1135 				EM_CORE_UNLOCK(adapter);
1136 			} else {
1137 				error = ether_poll_deregister(ifp);
1138 				/* Enable interrupt even in error case */
1139 				EM_CORE_LOCK(adapter);
1140 				em_enable_intr(adapter);
1141 				ifp->if_capenable &= ~IFCAP_POLLING;
1142 				EM_CORE_UNLOCK(adapter);
1143 			}
1144 		}
1145 #endif
1146 		if (mask & IFCAP_HWCSUM) {
1147 			ifp->if_capenable ^= IFCAP_HWCSUM;
1148 			reinit = 1;
1149 		}
1150 		if (mask & IFCAP_TSO4) {
1151 			ifp->if_capenable ^= IFCAP_TSO4;
1152 			reinit = 1;
1153 		}
1154 		if (mask & IFCAP_VLAN_HWTAGGING) {
1155 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1156 			reinit = 1;
1157 		}
1158 		if (mask & IFCAP_VLAN_HWFILTER) {
1159 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1160 			reinit = 1;
1161 		}
1162 		if ((mask & IFCAP_WOL) &&
1163 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1164 			if (mask & IFCAP_WOL_MCAST)
1165 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1166 			if (mask & IFCAP_WOL_MAGIC)
1167 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1168 		}
1169 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1170 			em_init(adapter);
1171 		VLAN_CAPABILITIES(ifp);
1172 		break;
1173 	    }
1174 
1175 	default:
1176 		error = ether_ioctl(ifp, command, data);
1177 		break;
1178 	}
1179 
1180 	return (error);
1181 }
1182 
1183 
1184 /*********************************************************************
1185  *  Init entry point
1186  *
1187  *  This routine is used in two ways. It is used by the stack as
1188  *  init entry point in network interface structure. It is also used
1189  *  by the driver as a hw/sw initialization routine to get to a
1190  *  consistent state.
1191  *
1192  *  return 0 on success, positive on failure
1193  **********************************************************************/
1194 
1195 static void
1196 em_init_locked(struct adapter *adapter)
1197 {
1198 	struct ifnet	*ifp = adapter->ifp;
1199 	device_t	dev = adapter->dev;
1200 	u32		pba;
1201 
1202 	INIT_DEBUGOUT("em_init: begin");
1203 
1204 	EM_CORE_LOCK_ASSERT(adapter);
1205 
1206 	em_disable_intr(adapter);
1207 	callout_stop(&adapter->timer);
1208 
1209 	/*
1210 	 * Packet Buffer Allocation (PBA)
1211 	 * Writing PBA sets the receive portion of the buffer
1212 	 * the remainder is used for the transmit buffer.
1213 	 */
1214 	switch (adapter->hw.mac.type) {
1215 	/* Total Packet Buffer on these is 48K */
1216 	case e1000_82571:
1217 	case e1000_82572:
1218 	case e1000_80003es2lan:
1219 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1220 		break;
1221 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1222 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1223 		break;
1224 	case e1000_82574:
1225 	case e1000_82583:
1226 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1227 		break;
1228 	case e1000_ich8lan:
1229 		pba = E1000_PBA_8K;
1230 		break;
1231 	case e1000_ich9lan:
1232 	case e1000_ich10lan:
1233 		pba = E1000_PBA_10K;
1234 		break;
1235 	case e1000_pchlan:
1236 	case e1000_pch2lan:
1237 		pba = E1000_PBA_26K;
1238 		break;
1239 	default:
1240 		if (adapter->max_frame_size > 8192)
1241 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1242 		else
1243 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1244 	}
1245 
1246 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1247 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1248 
1249 	/* Get the latest mac address, User can use a LAA */
1250         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1251               ETHER_ADDR_LEN);
1252 
1253 	/* Put the address into the Receive Address Array */
1254 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1255 
1256 	/*
1257 	 * With the 82571 adapter, RAR[0] may be overwritten
1258 	 * when the other port is reset, we make a duplicate
1259 	 * in RAR[14] for that eventuality, this assures
1260 	 * the interface continues to function.
1261 	 */
1262 	if (adapter->hw.mac.type == e1000_82571) {
1263 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1264 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1265 		    E1000_RAR_ENTRIES - 1);
1266 	}
1267 
1268 	/* Initialize the hardware */
1269 	em_reset(adapter);
1270 	em_update_link_status(adapter);
1271 
1272 	/* Setup VLAN support, basic and offload if available */
1273 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1274 
1275 	/* Set hardware offload abilities */
1276 	ifp->if_hwassist = 0;
1277 	if (ifp->if_capenable & IFCAP_TXCSUM)
1278 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1279 	if (ifp->if_capenable & IFCAP_TSO4)
1280 		ifp->if_hwassist |= CSUM_TSO;
1281 
1282 	/* Configure for OS presence */
1283 	em_init_manageability(adapter);
1284 
1285 	/* Prepare transmit descriptors and buffers */
1286 	em_setup_transmit_structures(adapter);
1287 	em_initialize_transmit_unit(adapter);
1288 
1289 	/* Setup Multicast table */
1290 	em_set_multi(adapter);
1291 
1292 	/*
1293 	** Figure out the desired mbuf
1294 	** pool for doing jumbos
1295 	*/
1296 	if (adapter->max_frame_size <= 2048)
1297 		adapter->rx_mbuf_sz = MCLBYTES;
1298 	else if (adapter->max_frame_size <= 4096)
1299 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1300 	else
1301 		adapter->rx_mbuf_sz = MJUM9BYTES;
1302 
1303 	/* Prepare receive descriptors and buffers */
1304 	if (em_setup_receive_structures(adapter)) {
1305 		device_printf(dev, "Could not setup receive structures\n");
1306 		em_stop(adapter);
1307 		return;
1308 	}
1309 	em_initialize_receive_unit(adapter);
1310 
1311 	/* Use real VLAN Filter support? */
1312 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1313 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1314 			/* Use real VLAN Filter support */
1315 			em_setup_vlan_hw_support(adapter);
1316 		else {
1317 			u32 ctrl;
1318 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1319 			ctrl |= E1000_CTRL_VME;
1320 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1321 		}
1322 	}
1323 
1324 	/* Don't lose promiscuous settings */
1325 	em_set_promisc(adapter);
1326 
1327 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1328 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1329 
1330 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1331 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1332 
1333 	/* MSI/X configuration for 82574 */
1334 	if (adapter->hw.mac.type == e1000_82574) {
1335 		int tmp;
1336 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1337 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1338 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1339 		/* Set the IVAR - interrupt vector routing. */
1340 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1341 	}
1342 
1343 #ifdef DEVICE_POLLING
1344 	/*
1345 	 * Only enable interrupts if we are not polling, make sure
1346 	 * they are off otherwise.
1347 	 */
1348 	if (ifp->if_capenable & IFCAP_POLLING)
1349 		em_disable_intr(adapter);
1350 	else
1351 #endif /* DEVICE_POLLING */
1352 		em_enable_intr(adapter);
1353 
1354 	/* AMT based hardware can now take control from firmware */
1355 	if (adapter->has_manage && adapter->has_amt)
1356 		em_get_hw_control(adapter);
1357 
1358 	/* Don't reset the phy next time init gets called */
1359 	adapter->hw.phy.reset_disable = TRUE;
1360 }
1361 
1362 static void
1363 em_init(void *arg)
1364 {
1365 	struct adapter *adapter = arg;
1366 
1367 	EM_CORE_LOCK(adapter);
1368 	em_init_locked(adapter);
1369 	EM_CORE_UNLOCK(adapter);
1370 }
1371 
1372 
1373 #ifdef DEVICE_POLLING
1374 /*********************************************************************
1375  *
1376  *  Legacy polling routine: note this only works with single queue
1377  *
1378  *********************************************************************/
1379 static int
1380 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1381 {
1382 	struct adapter *adapter = ifp->if_softc;
1383 	struct tx_ring	*txr = adapter->tx_rings;
1384 	struct rx_ring	*rxr = adapter->rx_rings;
1385 	u32		reg_icr;
1386 	int		rx_done;
1387 
1388 	EM_CORE_LOCK(adapter);
1389 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1390 		EM_CORE_UNLOCK(adapter);
1391 		return (0);
1392 	}
1393 
1394 	if (cmd == POLL_AND_CHECK_STATUS) {
1395 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1396 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1397 			callout_stop(&adapter->timer);
1398 			adapter->hw.mac.get_link_status = 1;
1399 			em_update_link_status(adapter);
1400 			callout_reset(&adapter->timer, hz,
1401 			    em_local_timer, adapter);
1402 		}
1403 	}
1404 	EM_CORE_UNLOCK(adapter);
1405 
1406 	em_rxeof(rxr, count, &rx_done);
1407 
1408 	EM_TX_LOCK(txr);
1409 	em_txeof(txr);
1410 #ifdef EM_MULTIQUEUE
1411 	if (!drbr_empty(ifp, txr->br))
1412 		em_mq_start_locked(ifp, txr, NULL);
1413 #else
1414 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1415 		em_start_locked(ifp, txr);
1416 #endif
1417 	EM_TX_UNLOCK(txr);
1418 
1419 	return (rx_done);
1420 }
1421 #endif /* DEVICE_POLLING */
1422 
1423 
1424 /*********************************************************************
1425  *
1426  *  Fast Legacy/MSI Combined Interrupt Service routine
1427  *
1428  *********************************************************************/
1429 static int
1430 em_irq_fast(void *arg)
1431 {
1432 	struct adapter	*adapter = arg;
1433 	struct ifnet	*ifp;
1434 	u32		reg_icr;
1435 
1436 	ifp = adapter->ifp;
1437 
1438 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1439 
1440 	/* Hot eject?  */
1441 	if (reg_icr == 0xffffffff)
1442 		return FILTER_STRAY;
1443 
1444 	/* Definitely not our interrupt.  */
1445 	if (reg_icr == 0x0)
1446 		return FILTER_STRAY;
1447 
1448 	/*
1449 	 * Starting with the 82571 chip, bit 31 should be used to
1450 	 * determine whether the interrupt belongs to us.
1451 	 */
1452 	if (adapter->hw.mac.type >= e1000_82571 &&
1453 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1454 		return FILTER_STRAY;
1455 
1456 	em_disable_intr(adapter);
1457 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1458 
1459 	/* Link status change */
1460 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1461 		adapter->hw.mac.get_link_status = 1;
1462 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1463 	}
1464 
1465 	if (reg_icr & E1000_ICR_RXO)
1466 		adapter->rx_overruns++;
1467 	return FILTER_HANDLED;
1468 }
1469 
1470 /* Combined RX/TX handler, used by Legacy and MSI */
1471 static void
1472 em_handle_que(void *context, int pending)
1473 {
1474 	struct adapter	*adapter = context;
1475 	struct ifnet	*ifp = adapter->ifp;
1476 	struct tx_ring	*txr = adapter->tx_rings;
1477 	struct rx_ring	*rxr = adapter->rx_rings;
1478 	bool		more;
1479 
1480 
1481 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1482 		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1483 
1484 		EM_TX_LOCK(txr);
1485 		em_txeof(txr);
1486 #ifdef EM_MULTIQUEUE
1487 		if (!drbr_empty(ifp, txr->br))
1488 			em_mq_start_locked(ifp, txr, NULL);
1489 #else
1490 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491 			em_start_locked(ifp, txr);
1492 #endif
1493 		em_txeof(txr);
1494 		EM_TX_UNLOCK(txr);
1495 		if (more) {
1496 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1497 			return;
1498 		}
1499 	}
1500 
1501 	em_enable_intr(adapter);
1502 	return;
1503 }
1504 
1505 
1506 /*********************************************************************
1507  *
1508  *  MSIX Interrupt Service Routines
1509  *
1510  **********************************************************************/
1511 static void
1512 em_msix_tx(void *arg)
1513 {
1514 	struct tx_ring *txr = arg;
1515 	struct adapter *adapter = txr->adapter;
1516 	bool		more;
1517 
1518 	++txr->tx_irq;
1519 	EM_TX_LOCK(txr);
1520 	more = em_txeof(txr);
1521 	EM_TX_UNLOCK(txr);
1522 	if (more)
1523 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1524 	else
1525 		/* Reenable this interrupt */
1526 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1527 	return;
1528 }
1529 
1530 /*********************************************************************
1531  *
1532  *  MSIX RX Interrupt Service routine
1533  *
1534  **********************************************************************/
1535 
1536 static void
1537 em_msix_rx(void *arg)
1538 {
1539 	struct rx_ring	*rxr = arg;
1540 	struct adapter	*adapter = rxr->adapter;
1541 	bool		more;
1542 
1543 	++rxr->rx_irq;
1544 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1545 	if (more)
1546 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1547 	else
1548 		/* Reenable this interrupt */
1549 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1550 	return;
1551 }
1552 
1553 /*********************************************************************
1554  *
1555  *  MSIX Link Fast Interrupt Service routine
1556  *
1557  **********************************************************************/
1558 static void
1559 em_msix_link(void *arg)
1560 {
1561 	struct adapter	*adapter = arg;
1562 	u32		reg_icr;
1563 
1564 	++adapter->link_irq;
1565 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1566 
1567 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1568 		adapter->hw.mac.get_link_status = 1;
1569 		em_handle_link(adapter, 0);
1570 	} else
1571 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1572 		    EM_MSIX_LINK | E1000_IMS_LSC);
1573 	return;
1574 }
1575 
1576 static void
1577 em_handle_rx(void *context, int pending)
1578 {
1579 	struct rx_ring	*rxr = context;
1580 	struct adapter	*adapter = rxr->adapter;
1581         bool            more;
1582 
1583 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1584 	if (more)
1585 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1586 	else
1587 		/* Reenable this interrupt */
1588 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1589 }
1590 
1591 static void
1592 em_handle_tx(void *context, int pending)
1593 {
1594 	struct tx_ring	*txr = context;
1595 	struct adapter	*adapter = txr->adapter;
1596 	struct ifnet	*ifp = adapter->ifp;
1597 
1598 	EM_TX_LOCK(txr);
1599 	em_txeof(txr);
1600 #ifdef EM_MULTIQUEUE
1601 	if (!drbr_empty(ifp, txr->br))
1602 		em_mq_start_locked(ifp, txr, NULL);
1603 #else
1604 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1605 		em_start_locked(ifp, txr);
1606 #endif
1607 	em_txeof(txr);
1608 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1609 	EM_TX_UNLOCK(txr);
1610 }
1611 
1612 static void
1613 em_handle_link(void *context, int pending)
1614 {
1615 	struct adapter	*adapter = context;
1616 	struct ifnet *ifp = adapter->ifp;
1617 
1618 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1619 		return;
1620 
1621 	EM_CORE_LOCK(adapter);
1622 	callout_stop(&adapter->timer);
1623 	em_update_link_status(adapter);
1624 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1625 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1626 	    EM_MSIX_LINK | E1000_IMS_LSC);
1627 	EM_CORE_UNLOCK(adapter);
1628 }
1629 
1630 
1631 /*********************************************************************
1632  *
1633  *  Media Ioctl callback
1634  *
1635  *  This routine is called whenever the user queries the status of
1636  *  the interface using ifconfig.
1637  *
1638  **********************************************************************/
1639 static void
1640 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1641 {
1642 	struct adapter *adapter = ifp->if_softc;
1643 	u_char fiber_type = IFM_1000_SX;
1644 
1645 	INIT_DEBUGOUT("em_media_status: begin");
1646 
1647 	EM_CORE_LOCK(adapter);
1648 	em_update_link_status(adapter);
1649 
1650 	ifmr->ifm_status = IFM_AVALID;
1651 	ifmr->ifm_active = IFM_ETHER;
1652 
1653 	if (!adapter->link_active) {
1654 		EM_CORE_UNLOCK(adapter);
1655 		return;
1656 	}
1657 
1658 	ifmr->ifm_status |= IFM_ACTIVE;
1659 
1660 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1661 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1662 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1663 	} else {
1664 		switch (adapter->link_speed) {
1665 		case 10:
1666 			ifmr->ifm_active |= IFM_10_T;
1667 			break;
1668 		case 100:
1669 			ifmr->ifm_active |= IFM_100_TX;
1670 			break;
1671 		case 1000:
1672 			ifmr->ifm_active |= IFM_1000_T;
1673 			break;
1674 		}
1675 		if (adapter->link_duplex == FULL_DUPLEX)
1676 			ifmr->ifm_active |= IFM_FDX;
1677 		else
1678 			ifmr->ifm_active |= IFM_HDX;
1679 	}
1680 	EM_CORE_UNLOCK(adapter);
1681 }
1682 
1683 /*********************************************************************
1684  *
1685  *  Media Ioctl callback
1686  *
1687  *  This routine is called when the user changes speed/duplex using
1688  *  media/mediopt option with ifconfig.
1689  *
1690  **********************************************************************/
1691 static int
1692 em_media_change(struct ifnet *ifp)
1693 {
1694 	struct adapter *adapter = ifp->if_softc;
1695 	struct ifmedia  *ifm = &adapter->media;
1696 
1697 	INIT_DEBUGOUT("em_media_change: begin");
1698 
1699 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1700 		return (EINVAL);
1701 
1702 	EM_CORE_LOCK(adapter);
1703 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1704 	case IFM_AUTO:
1705 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1706 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1707 		break;
1708 	case IFM_1000_LX:
1709 	case IFM_1000_SX:
1710 	case IFM_1000_T:
1711 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1712 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1713 		break;
1714 	case IFM_100_TX:
1715 		adapter->hw.mac.autoneg = FALSE;
1716 		adapter->hw.phy.autoneg_advertised = 0;
1717 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1718 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1719 		else
1720 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1721 		break;
1722 	case IFM_10_T:
1723 		adapter->hw.mac.autoneg = FALSE;
1724 		adapter->hw.phy.autoneg_advertised = 0;
1725 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1726 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1727 		else
1728 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1729 		break;
1730 	default:
1731 		device_printf(adapter->dev, "Unsupported media type\n");
1732 	}
1733 
1734 	em_init_locked(adapter);
1735 	EM_CORE_UNLOCK(adapter);
1736 
1737 	return (0);
1738 }
1739 
1740 /*********************************************************************
1741  *
1742  *  This routine maps the mbufs to tx descriptors.
1743  *
1744  *  return 0 on success, positive on failure
1745  **********************************************************************/
1746 
1747 static int
1748 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1749 {
1750 	struct adapter		*adapter = txr->adapter;
1751 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1752 	bus_dmamap_t		map;
1753 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1754 	struct e1000_tx_desc	*ctxd = NULL;
1755 	struct mbuf		*m_head;
1756 	struct ether_header	*eh;
1757 	struct ip		*ip = NULL;
1758 	struct tcphdr		*tp = NULL;
1759 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1760 	int			ip_off, poff;
1761 	int			nsegs, i, j, first, last = 0;
1762 	int			error, do_tso, tso_desc = 0;
1763 
1764 	m_head = *m_headp;
1765 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1766 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1767 	ip_off = poff = 0;
1768 
1769 	/*
1770 	 * Intel recommends entire IP/TCP header length reside in a single
1771 	 * buffer. If multiple descriptors are used to describe the IP and
1772 	 * TCP header, each descriptor should describe one or more
1773 	 * complete headers; descriptors referencing only parts of headers
1774 	 * are not supported. If all layer headers are not coalesced into
1775 	 * a single buffer, each buffer should not cross a 4KB boundary,
1776 	 * or be larger than the maximum read request size.
1777 	 * Controller also requires modifing IP/TCP header to make TSO work
1778 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1779 	 * IP/TCP header into a single buffer to meet the requirement of
1780 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1781 	 * which also has similiar restrictions.
1782 	 */
1783 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1784 		if (do_tso || (m_head->m_next != NULL &&
1785 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1786 			if (M_WRITABLE(*m_headp) == 0) {
1787 				m_head = m_dup(*m_headp, M_DONTWAIT);
1788 				m_freem(*m_headp);
1789 				if (m_head == NULL) {
1790 					*m_headp = NULL;
1791 					return (ENOBUFS);
1792 				}
1793 				*m_headp = m_head;
1794 			}
1795 		}
1796 		/*
1797 		 * XXX
1798 		 * Assume IPv4, we don't have TSO/checksum offload support
1799 		 * for IPv6 yet.
1800 		 */
1801 		ip_off = sizeof(struct ether_header);
1802 		m_head = m_pullup(m_head, ip_off);
1803 		if (m_head == NULL) {
1804 			*m_headp = NULL;
1805 			return (ENOBUFS);
1806 		}
1807 		eh = mtod(m_head, struct ether_header *);
1808 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1809 			ip_off = sizeof(struct ether_vlan_header);
1810 			m_head = m_pullup(m_head, ip_off);
1811 			if (m_head == NULL) {
1812 				*m_headp = NULL;
1813 				return (ENOBUFS);
1814 			}
1815 		}
1816 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1817 		if (m_head == NULL) {
1818 			*m_headp = NULL;
1819 			return (ENOBUFS);
1820 		}
1821 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1822 		poff = ip_off + (ip->ip_hl << 2);
1823 		m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1824 		if (m_head == NULL) {
1825 			*m_headp = NULL;
1826 			return (ENOBUFS);
1827 		}
1828 		if (do_tso) {
1829 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1830 			/*
1831 			 * TSO workaround:
1832 			 *   pull 4 more bytes of data into it.
1833 			 */
1834 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1835 			if (m_head == NULL) {
1836 				*m_headp = NULL;
1837 				return (ENOBUFS);
1838 			}
1839 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1840 			ip->ip_len = 0;
1841 			ip->ip_sum = 0;
1842 			/*
1843 			 * The pseudo TCP checksum does not include TCP payload
1844 			 * length so driver should recompute the checksum here
1845 			 * what hardware expect to see. This is adherence of
1846 			 * Microsoft's Large Send specification.
1847 			 */
1848 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1849 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1850 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1851 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1852 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1853 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1854 			if (m_head == NULL) {
1855 				*m_headp = NULL;
1856 				return (ENOBUFS);
1857 			}
1858 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1859 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1860 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1861 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1862 			if (m_head == NULL) {
1863 				*m_headp = NULL;
1864 				return (ENOBUFS);
1865 			}
1866 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1867 		}
1868 		*m_headp = m_head;
1869 	}
1870 
1871 	/*
1872 	 * Map the packet for DMA
1873 	 *
1874 	 * Capture the first descriptor index,
1875 	 * this descriptor will have the index
1876 	 * of the EOP which is the only one that
1877 	 * now gets a DONE bit writeback.
1878 	 */
1879 	first = txr->next_avail_desc;
1880 	tx_buffer = &txr->tx_buffers[first];
1881 	tx_buffer_mapped = tx_buffer;
1882 	map = tx_buffer->map;
1883 
1884 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1885 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1886 
1887 	/*
1888 	 * There are two types of errors we can (try) to handle:
1889 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1890 	 *   out of segments.  Defragment the mbuf chain and try again.
1891 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1892 	 *   at this point in time.  Defer sending and try again later.
1893 	 * All other errors, in particular EINVAL, are fatal and prevent the
1894 	 * mbuf chain from ever going through.  Drop it and report error.
1895 	 */
1896 	if (error == EFBIG) {
1897 		struct mbuf *m;
1898 
1899 		m = m_defrag(*m_headp, M_DONTWAIT);
1900 		if (m == NULL) {
1901 			adapter->mbuf_alloc_failed++;
1902 			m_freem(*m_headp);
1903 			*m_headp = NULL;
1904 			return (ENOBUFS);
1905 		}
1906 		*m_headp = m;
1907 
1908 		/* Try it again */
1909 		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1910 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1911 
1912 		if (error) {
1913 			adapter->no_tx_dma_setup++;
1914 			m_freem(*m_headp);
1915 			*m_headp = NULL;
1916 			return (error);
1917 		}
1918 	} else if (error != 0) {
1919 		adapter->no_tx_dma_setup++;
1920 		return (error);
1921 	}
1922 
1923 	/*
1924 	 * TSO Hardware workaround, if this packet is not
1925 	 * TSO, and is only a single descriptor long, and
1926 	 * it follows a TSO burst, then we need to add a
1927 	 * sentinel descriptor to prevent premature writeback.
1928 	 */
1929 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1930 		if (nsegs == 1)
1931 			tso_desc = TRUE;
1932 		txr->tx_tso = FALSE;
1933 	}
1934 
1935         if (nsegs > (txr->tx_avail - 2)) {
1936                 txr->no_desc_avail++;
1937 		bus_dmamap_unload(txr->txtag, map);
1938 		return (ENOBUFS);
1939         }
1940 	m_head = *m_headp;
1941 
1942 	/* Do hardware assists */
1943 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1944 		em_tso_setup(txr, m_head, ip_off, ip, tp,
1945 		    &txd_upper, &txd_lower);
1946 		/* we need to make a final sentinel transmit desc */
1947 		tso_desc = TRUE;
1948 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1949 		em_transmit_checksum_setup(txr, m_head,
1950 		    ip_off, ip, &txd_upper, &txd_lower);
1951 
1952 	i = txr->next_avail_desc;
1953 
1954 	/* Set up our transmit descriptors */
1955 	for (j = 0; j < nsegs; j++) {
1956 		bus_size_t seg_len;
1957 		bus_addr_t seg_addr;
1958 
1959 		tx_buffer = &txr->tx_buffers[i];
1960 		ctxd = &txr->tx_base[i];
1961 		seg_addr = segs[j].ds_addr;
1962 		seg_len  = segs[j].ds_len;
1963 		/*
1964 		** TSO Workaround:
1965 		** If this is the last descriptor, we want to
1966 		** split it so we have a small final sentinel
1967 		*/
1968 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1969 			seg_len -= 4;
1970 			ctxd->buffer_addr = htole64(seg_addr);
1971 			ctxd->lower.data = htole32(
1972 			adapter->txd_cmd | txd_lower | seg_len);
1973 			ctxd->upper.data =
1974 			    htole32(txd_upper);
1975 			if (++i == adapter->num_tx_desc)
1976 				i = 0;
1977 			/* Now make the sentinel */
1978 			++txd_used; /* using an extra txd */
1979 			ctxd = &txr->tx_base[i];
1980 			tx_buffer = &txr->tx_buffers[i];
1981 			ctxd->buffer_addr =
1982 			    htole64(seg_addr + seg_len);
1983 			ctxd->lower.data = htole32(
1984 			adapter->txd_cmd | txd_lower | 4);
1985 			ctxd->upper.data =
1986 			    htole32(txd_upper);
1987 			last = i;
1988 			if (++i == adapter->num_tx_desc)
1989 				i = 0;
1990 		} else {
1991 			ctxd->buffer_addr = htole64(seg_addr);
1992 			ctxd->lower.data = htole32(
1993 			adapter->txd_cmd | txd_lower | seg_len);
1994 			ctxd->upper.data =
1995 			    htole32(txd_upper);
1996 			last = i;
1997 			if (++i == adapter->num_tx_desc)
1998 				i = 0;
1999 		}
2000 		tx_buffer->m_head = NULL;
2001 		tx_buffer->next_eop = -1;
2002 	}
2003 
2004 	txr->next_avail_desc = i;
2005 	txr->tx_avail -= nsegs;
2006 	if (tso_desc) /* TSO used an extra for sentinel */
2007 		txr->tx_avail -= txd_used;
2008 
2009 	if (m_head->m_flags & M_VLANTAG) {
2010 		/* Set the vlan id. */
2011 		ctxd->upper.fields.special =
2012 		    htole16(m_head->m_pkthdr.ether_vtag);
2013                 /* Tell hardware to add tag */
2014                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2015         }
2016 
2017         tx_buffer->m_head = m_head;
2018 	tx_buffer_mapped->map = tx_buffer->map;
2019 	tx_buffer->map = map;
2020         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2021 
2022         /*
2023          * Last Descriptor of Packet
2024 	 * needs End Of Packet (EOP)
2025 	 * and Report Status (RS)
2026          */
2027         ctxd->lower.data |=
2028 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2029 	/*
2030 	 * Keep track in the first buffer which
2031 	 * descriptor will be written back
2032 	 */
2033 	tx_buffer = &txr->tx_buffers[first];
2034 	tx_buffer->next_eop = last;
2035 	/* Update the watchdog time early and often */
2036 	txr->watchdog_time = ticks;
2037 
2038 	/*
2039 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2040 	 * that this frame is available to transmit.
2041 	 */
2042 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2043 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2044 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2045 
2046 	return (0);
2047 }
2048 
2049 static void
2050 em_set_promisc(struct adapter *adapter)
2051 {
2052 	struct ifnet	*ifp = adapter->ifp;
2053 	u32		reg_rctl;
2054 
2055 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2056 
2057 	if (ifp->if_flags & IFF_PROMISC) {
2058 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2059 		/* Turn this on if you want to see bad packets */
2060 		if (em_debug_sbp)
2061 			reg_rctl |= E1000_RCTL_SBP;
2062 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2063 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2064 		reg_rctl |= E1000_RCTL_MPE;
2065 		reg_rctl &= ~E1000_RCTL_UPE;
2066 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2067 	}
2068 }
2069 
2070 static void
2071 em_disable_promisc(struct adapter *adapter)
2072 {
2073 	u32	reg_rctl;
2074 
2075 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2076 
2077 	reg_rctl &=  (~E1000_RCTL_UPE);
2078 	reg_rctl &=  (~E1000_RCTL_MPE);
2079 	reg_rctl &=  (~E1000_RCTL_SBP);
2080 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2081 }
2082 
2083 
2084 /*********************************************************************
2085  *  Multicast Update
2086  *
2087  *  This routine is called whenever multicast address list is updated.
2088  *
2089  **********************************************************************/
2090 
2091 static void
2092 em_set_multi(struct adapter *adapter)
2093 {
2094 	struct ifnet	*ifp = adapter->ifp;
2095 	struct ifmultiaddr *ifma;
2096 	u32 reg_rctl = 0;
2097 	u8  *mta; /* Multicast array memory */
2098 	int mcnt = 0;
2099 
2100 	IOCTL_DEBUGOUT("em_set_multi: begin");
2101 
2102 	mta = adapter->mta;
2103 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2104 
2105 	if (adapter->hw.mac.type == e1000_82542 &&
2106 	    adapter->hw.revision_id == E1000_REVISION_2) {
2107 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2108 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2109 			e1000_pci_clear_mwi(&adapter->hw);
2110 		reg_rctl |= E1000_RCTL_RST;
2111 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2112 		msec_delay(5);
2113 	}
2114 
2115 #if __FreeBSD_version < 800000
2116 	IF_ADDR_LOCK(ifp);
2117 #else
2118 	if_maddr_rlock(ifp);
2119 #endif
2120 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2121 		if (ifma->ifma_addr->sa_family != AF_LINK)
2122 			continue;
2123 
2124 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2125 			break;
2126 
2127 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2128 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2129 		mcnt++;
2130 	}
2131 #if __FreeBSD_version < 800000
2132 	IF_ADDR_UNLOCK(ifp);
2133 #else
2134 	if_maddr_runlock(ifp);
2135 #endif
2136 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2137 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2138 		reg_rctl |= E1000_RCTL_MPE;
2139 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2140 	} else
2141 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2142 
2143 	if (adapter->hw.mac.type == e1000_82542 &&
2144 	    adapter->hw.revision_id == E1000_REVISION_2) {
2145 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2146 		reg_rctl &= ~E1000_RCTL_RST;
2147 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2148 		msec_delay(5);
2149 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2150 			e1000_pci_set_mwi(&adapter->hw);
2151 	}
2152 }
2153 
2154 
2155 /*********************************************************************
2156  *  Timer routine
2157  *
2158  *  This routine checks for link status and updates statistics.
2159  *
2160  **********************************************************************/
2161 
2162 static void
2163 em_local_timer(void *arg)
2164 {
2165 	struct adapter	*adapter = arg;
2166 	struct ifnet	*ifp = adapter->ifp;
2167 	struct tx_ring	*txr = adapter->tx_rings;
2168 
2169 	EM_CORE_LOCK_ASSERT(adapter);
2170 
2171 	em_update_link_status(adapter);
2172 	em_update_stats_counters(adapter);
2173 
2174 	/* Reset LAA into RAR[0] on 82571 */
2175 	if ((adapter->hw.mac.type == e1000_82571) &&
2176 	    e1000_get_laa_state_82571(&adapter->hw))
2177 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2178 
2179 	/*
2180 	** Don't do TX watchdog check if we've been paused
2181 	*/
2182 	if (adapter->pause_frames) {
2183 		adapter->pause_frames = 0;
2184 		goto out;
2185 	}
2186 	/*
2187 	** Check on the state of the TX queue(s), this
2188 	** can be done without the lock because its RO
2189 	** and the HUNG state will be static if set.
2190 	*/
2191 	for (int i = 0; i < adapter->num_queues; i++, txr++)
2192 		if (txr->queue_status == EM_QUEUE_HUNG)
2193 			goto hung;
2194 out:
2195 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2196 	return;
2197 hung:
2198 	/* Looks like we're hung */
2199 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2200 	device_printf(adapter->dev,
2201 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2202 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2203 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2204 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2205 	    "Next TX to Clean = %d\n",
2206 	    txr->me, txr->tx_avail, txr->next_to_clean);
2207 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2208 	adapter->watchdog_events++;
2209 	EM_TX_UNLOCK(txr);
2210 	em_init_locked(adapter);
2211 }
2212 
2213 
2214 static void
2215 em_update_link_status(struct adapter *adapter)
2216 {
2217 	struct e1000_hw *hw = &adapter->hw;
2218 	struct ifnet *ifp = adapter->ifp;
2219 	device_t dev = adapter->dev;
2220 	struct tx_ring *txr = adapter->tx_rings;
2221 	u32 link_check = 0;
2222 
2223 	/* Get the cached link value or read phy for real */
2224 	switch (hw->phy.media_type) {
2225 	case e1000_media_type_copper:
2226 		if (hw->mac.get_link_status) {
2227 			/* Do the work to read phy */
2228 			e1000_check_for_link(hw);
2229 			link_check = !hw->mac.get_link_status;
2230 			if (link_check) /* ESB2 fix */
2231 				e1000_cfg_on_link_up(hw);
2232 		} else
2233 			link_check = TRUE;
2234 		break;
2235 	case e1000_media_type_fiber:
2236 		e1000_check_for_link(hw);
2237 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2238                                  E1000_STATUS_LU);
2239 		break;
2240 	case e1000_media_type_internal_serdes:
2241 		e1000_check_for_link(hw);
2242 		link_check = adapter->hw.mac.serdes_has_link;
2243 		break;
2244 	default:
2245 	case e1000_media_type_unknown:
2246 		break;
2247 	}
2248 
2249 	/* Now check for a transition */
2250 	if (link_check && (adapter->link_active == 0)) {
2251 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2252 		    &adapter->link_duplex);
2253 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2254 		if ((adapter->link_speed != SPEED_1000) &&
2255 		    ((hw->mac.type == e1000_82571) ||
2256 		    (hw->mac.type == e1000_82572))) {
2257 			int tarc0;
2258 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2259 			tarc0 &= ~SPEED_MODE_BIT;
2260 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2261 		}
2262 		if (bootverbose)
2263 			device_printf(dev, "Link is up %d Mbps %s\n",
2264 			    adapter->link_speed,
2265 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2266 			    "Full Duplex" : "Half Duplex"));
2267 		adapter->link_active = 1;
2268 		adapter->smartspeed = 0;
2269 		ifp->if_baudrate = adapter->link_speed * 1000000;
2270 		if_link_state_change(ifp, LINK_STATE_UP);
2271 	} else if (!link_check && (adapter->link_active == 1)) {
2272 		ifp->if_baudrate = adapter->link_speed = 0;
2273 		adapter->link_duplex = 0;
2274 		if (bootverbose)
2275 			device_printf(dev, "Link is Down\n");
2276 		adapter->link_active = 0;
2277 		/* Link down, disable watchdog */
2278 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2279 			txr->queue_status = EM_QUEUE_IDLE;
2280 		if_link_state_change(ifp, LINK_STATE_DOWN);
2281 	}
2282 }
2283 
2284 /*********************************************************************
2285  *
2286  *  This routine disables all traffic on the adapter by issuing a
2287  *  global reset on the MAC and deallocates TX/RX buffers.
2288  *
2289  *  This routine should always be called with BOTH the CORE
2290  *  and TX locks.
2291  **********************************************************************/
2292 
2293 static void
2294 em_stop(void *arg)
2295 {
2296 	struct adapter	*adapter = arg;
2297 	struct ifnet	*ifp = adapter->ifp;
2298 	struct tx_ring	*txr = adapter->tx_rings;
2299 
2300 	EM_CORE_LOCK_ASSERT(adapter);
2301 
2302 	INIT_DEBUGOUT("em_stop: begin");
2303 
2304 	em_disable_intr(adapter);
2305 	callout_stop(&adapter->timer);
2306 
2307 	/* Tell the stack that the interface is no longer active */
2308 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2309 
2310         /* Unarm watchdog timer. */
2311 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2312 		EM_TX_LOCK(txr);
2313 		txr->queue_status = EM_QUEUE_IDLE;
2314 		EM_TX_UNLOCK(txr);
2315 	}
2316 
2317 	e1000_reset_hw(&adapter->hw);
2318 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2319 
2320 	e1000_led_off(&adapter->hw);
2321 	e1000_cleanup_led(&adapter->hw);
2322 }
2323 
2324 
2325 /*********************************************************************
2326  *
2327  *  Determine hardware revision.
2328  *
2329  **********************************************************************/
2330 static void
2331 em_identify_hardware(struct adapter *adapter)
2332 {
2333 	device_t dev = adapter->dev;
2334 
2335 	/* Make sure our PCI config space has the necessary stuff set */
2336 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2337 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2338 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2339 		device_printf(dev, "Memory Access and/or Bus Master bits "
2340 		    "were not set!\n");
2341 		adapter->hw.bus.pci_cmd_word |=
2342 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2343 		pci_write_config(dev, PCIR_COMMAND,
2344 		    adapter->hw.bus.pci_cmd_word, 2);
2345 	}
2346 
2347 	/* Save off the information about this board */
2348 	adapter->hw.vendor_id = pci_get_vendor(dev);
2349 	adapter->hw.device_id = pci_get_device(dev);
2350 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2351 	adapter->hw.subsystem_vendor_id =
2352 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2353 	adapter->hw.subsystem_device_id =
2354 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2355 
2356 	/* Do Shared Code Init and Setup */
2357 	if (e1000_set_mac_type(&adapter->hw)) {
2358 		device_printf(dev, "Setup init failure\n");
2359 		return;
2360 	}
2361 }
2362 
2363 static int
2364 em_allocate_pci_resources(struct adapter *adapter)
2365 {
2366 	device_t	dev = adapter->dev;
2367 	int		rid;
2368 
2369 	rid = PCIR_BAR(0);
2370 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2371 	    &rid, RF_ACTIVE);
2372 	if (adapter->memory == NULL) {
2373 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2374 		return (ENXIO);
2375 	}
2376 	adapter->osdep.mem_bus_space_tag =
2377 	    rman_get_bustag(adapter->memory);
2378 	adapter->osdep.mem_bus_space_handle =
2379 	    rman_get_bushandle(adapter->memory);
2380 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2381 
2382 	/* Default to a single queue */
2383 	adapter->num_queues = 1;
2384 
2385 	/*
2386 	 * Setup MSI/X or MSI if PCI Express
2387 	 */
2388 	adapter->msix = em_setup_msix(adapter);
2389 
2390 	adapter->hw.back = &adapter->osdep;
2391 
2392 	return (0);
2393 }
2394 
2395 /*********************************************************************
2396  *
2397  *  Setup the Legacy or MSI Interrupt handler
2398  *
2399  **********************************************************************/
2400 int
2401 em_allocate_legacy(struct adapter *adapter)
2402 {
2403 	device_t dev = adapter->dev;
2404 	int error, rid = 0;
2405 
2406 	/* Manually turn off all interrupts */
2407 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2408 
2409 	if (adapter->msix == 1) /* using MSI */
2410 		rid = 1;
2411 	/* We allocate a single interrupt resource */
2412 	adapter->res = bus_alloc_resource_any(dev,
2413 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2414 	if (adapter->res == NULL) {
2415 		device_printf(dev, "Unable to allocate bus resource: "
2416 		    "interrupt\n");
2417 		return (ENXIO);
2418 	}
2419 
2420 	/*
2421 	 * Allocate a fast interrupt and the associated
2422 	 * deferred processing contexts.
2423 	 */
2424 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2425 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2426 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2427 	    taskqueue_thread_enqueue, &adapter->tq);
2428 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2429 	    device_get_nameunit(adapter->dev));
2430 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2431 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2432 		device_printf(dev, "Failed to register fast interrupt "
2433 			    "handler: %d\n", error);
2434 		taskqueue_free(adapter->tq);
2435 		adapter->tq = NULL;
2436 		return (error);
2437 	}
2438 
2439 	return (0);
2440 }
2441 
2442 /*********************************************************************
2443  *
2444  *  Setup the MSIX Interrupt handlers
2445  *   This is not really Multiqueue, rather
2446  *   its just multiple interrupt vectors.
2447  *
2448  **********************************************************************/
2449 int
2450 em_allocate_msix(struct adapter *adapter)
2451 {
2452 	device_t	dev = adapter->dev;
2453 	struct		tx_ring *txr = adapter->tx_rings;
2454 	struct		rx_ring *rxr = adapter->rx_rings;
2455 	int		error, rid, vector = 0;
2456 
2457 
2458 	/* Make sure all interrupts are disabled */
2459 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2460 
2461 	/* First set up ring resources */
2462 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2463 
2464 		/* RX ring */
2465 		rid = vector + 1;
2466 
2467 		rxr->res = bus_alloc_resource_any(dev,
2468 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2469 		if (rxr->res == NULL) {
2470 			device_printf(dev,
2471 			    "Unable to allocate bus resource: "
2472 			    "RX MSIX Interrupt %d\n", i);
2473 			return (ENXIO);
2474 		}
2475 		if ((error = bus_setup_intr(dev, rxr->res,
2476 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2477 		    rxr, &rxr->tag)) != 0) {
2478 			device_printf(dev, "Failed to register RX handler");
2479 			return (error);
2480 		}
2481 #if __FreeBSD_version >= 800504
2482 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2483 #endif
2484 		rxr->msix = vector++; /* NOTE increment vector for TX */
2485 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2486 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2487 		    taskqueue_thread_enqueue, &rxr->tq);
2488 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2489 		    device_get_nameunit(adapter->dev));
2490 		/*
2491 		** Set the bit to enable interrupt
2492 		** in E1000_IMS -- bits 20 and 21
2493 		** are for RX0 and RX1, note this has
2494 		** NOTHING to do with the MSIX vector
2495 		*/
2496 		rxr->ims = 1 << (20 + i);
2497 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2498 
2499 		/* TX ring */
2500 		rid = vector + 1;
2501 		txr->res = bus_alloc_resource_any(dev,
2502 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2503 		if (txr->res == NULL) {
2504 			device_printf(dev,
2505 			    "Unable to allocate bus resource: "
2506 			    "TX MSIX Interrupt %d\n", i);
2507 			return (ENXIO);
2508 		}
2509 		if ((error = bus_setup_intr(dev, txr->res,
2510 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2511 		    txr, &txr->tag)) != 0) {
2512 			device_printf(dev, "Failed to register TX handler");
2513 			return (error);
2514 		}
2515 #if __FreeBSD_version >= 800504
2516 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2517 #endif
2518 		txr->msix = vector++; /* Increment vector for next pass */
2519 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2520 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2521 		    taskqueue_thread_enqueue, &txr->tq);
2522 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2523 		    device_get_nameunit(adapter->dev));
2524 		/*
2525 		** Set the bit to enable interrupt
2526 		** in E1000_IMS -- bits 22 and 23
2527 		** are for TX0 and TX1, note this has
2528 		** NOTHING to do with the MSIX vector
2529 		*/
2530 		txr->ims = 1 << (22 + i);
2531 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2532 	}
2533 
2534 	/* Link interrupt */
2535 	++rid;
2536 	adapter->res = bus_alloc_resource_any(dev,
2537 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2538 	if (!adapter->res) {
2539 		device_printf(dev,"Unable to allocate "
2540 		    "bus resource: Link interrupt [%d]\n", rid);
2541 		return (ENXIO);
2542         }
2543 	/* Set the link handler function */
2544 	error = bus_setup_intr(dev, adapter->res,
2545 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2546 	    em_msix_link, adapter, &adapter->tag);
2547 	if (error) {
2548 		adapter->res = NULL;
2549 		device_printf(dev, "Failed to register LINK handler");
2550 		return (error);
2551 	}
2552 #if __FreeBSD_version >= 800504
2553 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2554 #endif
2555 	adapter->linkvec = vector;
2556 	adapter->ivars |=  (8 | vector) << 16;
2557 	adapter->ivars |= 0x80000000;
2558 
2559 	return (0);
2560 }
2561 
2562 
2563 static void
2564 em_free_pci_resources(struct adapter *adapter)
2565 {
2566 	device_t	dev = adapter->dev;
2567 	struct tx_ring	*txr;
2568 	struct rx_ring	*rxr;
2569 	int		rid;
2570 
2571 
2572 	/*
2573 	** Release all the queue interrupt resources:
2574 	*/
2575 	for (int i = 0; i < adapter->num_queues; i++) {
2576 		txr = &adapter->tx_rings[i];
2577 		rxr = &adapter->rx_rings[i];
2578 		/* an early abort? */
2579 		if ((txr == NULL) || (rxr == NULL))
2580 			break;
2581 		rid = txr->msix +1;
2582 		if (txr->tag != NULL) {
2583 			bus_teardown_intr(dev, txr->res, txr->tag);
2584 			txr->tag = NULL;
2585 		}
2586 		if (txr->res != NULL)
2587 			bus_release_resource(dev, SYS_RES_IRQ,
2588 			    rid, txr->res);
2589 		rid = rxr->msix +1;
2590 		if (rxr->tag != NULL) {
2591 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2592 			rxr->tag = NULL;
2593 		}
2594 		if (rxr->res != NULL)
2595 			bus_release_resource(dev, SYS_RES_IRQ,
2596 			    rid, rxr->res);
2597 	}
2598 
2599         if (adapter->linkvec) /* we are doing MSIX */
2600                 rid = adapter->linkvec + 1;
2601         else
2602                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2603 
2604 	if (adapter->tag != NULL) {
2605 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2606 		adapter->tag = NULL;
2607 	}
2608 
2609 	if (adapter->res != NULL)
2610 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2611 
2612 
2613 	if (adapter->msix)
2614 		pci_release_msi(dev);
2615 
2616 	if (adapter->msix_mem != NULL)
2617 		bus_release_resource(dev, SYS_RES_MEMORY,
2618 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2619 
2620 	if (adapter->memory != NULL)
2621 		bus_release_resource(dev, SYS_RES_MEMORY,
2622 		    PCIR_BAR(0), adapter->memory);
2623 
2624 	if (adapter->flash != NULL)
2625 		bus_release_resource(dev, SYS_RES_MEMORY,
2626 		    EM_FLASH, adapter->flash);
2627 }
2628 
2629 /*
2630  * Setup MSI or MSI/X
2631  */
2632 static int
2633 em_setup_msix(struct adapter *adapter)
2634 {
2635 	device_t dev = adapter->dev;
2636 	int val = 0;
2637 
2638 
2639 	/*
2640 	** Setup MSI/X for Hartwell: tests have shown
2641 	** use of two queues to be unstable, and to
2642 	** provide no great gain anyway, so we simply
2643 	** seperate the interrupts and use a single queue.
2644 	*/
2645 	if ((adapter->hw.mac.type == e1000_82574) &&
2646 	    (em_enable_msix == TRUE)) {
2647 		/* Map the MSIX BAR */
2648 		int rid = PCIR_BAR(EM_MSIX_BAR);
2649 		adapter->msix_mem = bus_alloc_resource_any(dev,
2650 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2651        		if (!adapter->msix_mem) {
2652 			/* May not be enabled */
2653                		device_printf(adapter->dev,
2654 			    "Unable to map MSIX table \n");
2655 			goto msi;
2656        		}
2657 		val = pci_msix_count(dev);
2658 		if (val < 3) {
2659 			bus_release_resource(dev, SYS_RES_MEMORY,
2660 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2661 			adapter->msix_mem = NULL;
2662                		device_printf(adapter->dev,
2663 			    "MSIX: insufficient vectors, using MSI\n");
2664 			goto msi;
2665 		}
2666 		val = 3;
2667 		adapter->num_queues = 1;
2668 		if (pci_alloc_msix(dev, &val) == 0) {
2669 			device_printf(adapter->dev,
2670 			    "Using MSIX interrupts "
2671 			    "with %d vectors\n", val);
2672 		}
2673 
2674 		return (val);
2675 	}
2676 msi:
2677        	val = pci_msi_count(dev);
2678        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2679                	adapter->msix = 1;
2680                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2681 		return (val);
2682 	}
2683 	/* Should only happen due to manual configuration */
2684 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2685 	return (0);
2686 }
2687 
2688 
2689 /*********************************************************************
2690  *
2691  *  Initialize the hardware to a configuration
2692  *  as specified by the adapter structure.
2693  *
2694  **********************************************************************/
2695 static void
2696 em_reset(struct adapter *adapter)
2697 {
2698 	device_t	dev = adapter->dev;
2699 	struct ifnet	*ifp = adapter->ifp;
2700 	struct e1000_hw	*hw = &adapter->hw;
2701 	u16		rx_buffer_size;
2702 
2703 	INIT_DEBUGOUT("em_reset: begin");
2704 
2705 	/* Set up smart power down as default off on newer adapters. */
2706 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2707 	    hw->mac.type == e1000_82572)) {
2708 		u16 phy_tmp = 0;
2709 
2710 		/* Speed up time to link by disabling smart power down. */
2711 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2712 		phy_tmp &= ~IGP02E1000_PM_SPD;
2713 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2714 	}
2715 
2716 	/*
2717 	 * These parameters control the automatic generation (Tx) and
2718 	 * response (Rx) to Ethernet PAUSE frames.
2719 	 * - High water mark should allow for at least two frames to be
2720 	 *   received after sending an XOFF.
2721 	 * - Low water mark works best when it is very near the high water mark.
2722 	 *   This allows the receiver to restart by sending XON when it has
2723 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2724 	 *   restart after one full frame is pulled from the buffer. There
2725 	 *   could be several smaller frames in the buffer and if so they will
2726 	 *   not trigger the XON until their total number reduces the buffer
2727 	 *   by 1500.
2728 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2729 	 */
2730 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2731 
2732 	hw->fc.high_water = rx_buffer_size -
2733 	    roundup2(adapter->max_frame_size, 1024);
2734 	hw->fc.low_water = hw->fc.high_water - 1500;
2735 
2736 	if (hw->mac.type == e1000_80003es2lan)
2737 		hw->fc.pause_time = 0xFFFF;
2738 	else
2739 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2740 
2741 	hw->fc.send_xon = TRUE;
2742 
2743         /* Set Flow control, use the tunable location if sane */
2744 	hw->fc.requested_mode = adapter->fc_setting;
2745 
2746 	/* Workaround: no TX flow ctrl for PCH */
2747 	if (hw->mac.type == e1000_pchlan)
2748                 hw->fc.requested_mode = e1000_fc_rx_pause;
2749 
2750 	/* Override - settings for PCH2LAN, ya its magic :) */
2751 	if (hw->mac.type == e1000_pch2lan) {
2752 		hw->fc.high_water = 0x5C20;
2753 		hw->fc.low_water = 0x5048;
2754 		hw->fc.pause_time = 0x0650;
2755 		hw->fc.refresh_time = 0x0400;
2756 		/* Jumbos need adjusted PBA */
2757 		if (ifp->if_mtu > ETHERMTU)
2758 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2759 		else
2760 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2761 	}
2762 
2763 	/* Issue a global reset */
2764 	e1000_reset_hw(hw);
2765 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2766 	em_disable_aspm(adapter);
2767 
2768 	if (e1000_init_hw(hw) < 0) {
2769 		device_printf(dev, "Hardware Initialization Failed\n");
2770 		return;
2771 	}
2772 
2773 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2774 	e1000_get_phy_info(hw);
2775 	e1000_check_for_link(hw);
2776 	return;
2777 }
2778 
2779 /*********************************************************************
2780  *
2781  *  Setup networking device structure and register an interface.
2782  *
2783  **********************************************************************/
2784 static int
2785 em_setup_interface(device_t dev, struct adapter *adapter)
2786 {
2787 	struct ifnet   *ifp;
2788 
2789 	INIT_DEBUGOUT("em_setup_interface: begin");
2790 
2791 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2792 	if (ifp == NULL) {
2793 		device_printf(dev, "can not allocate ifnet structure\n");
2794 		return (-1);
2795 	}
2796 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2797 	ifp->if_mtu = ETHERMTU;
2798 	ifp->if_init =  em_init;
2799 	ifp->if_softc = adapter;
2800 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2801 	ifp->if_ioctl = em_ioctl;
2802 	ifp->if_start = em_start;
2803 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2804 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2805 	IFQ_SET_READY(&ifp->if_snd);
2806 
2807 	ether_ifattach(ifp, adapter->hw.mac.addr);
2808 
2809 	ifp->if_capabilities = ifp->if_capenable = 0;
2810 
2811 #ifdef EM_MULTIQUEUE
2812 	/* Multiqueue tx functions */
2813 	ifp->if_transmit = em_mq_start;
2814 	ifp->if_qflush = em_qflush;
2815 #endif
2816 
2817 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2818 	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2819 
2820 	/* Enable TSO by default, can disable with ifconfig */
2821 	ifp->if_capabilities |= IFCAP_TSO4;
2822 	ifp->if_capenable |= IFCAP_TSO4;
2823 
2824 	/*
2825 	 * Tell the upper layer(s) we
2826 	 * support full VLAN capability
2827 	 */
2828 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2829 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2830 	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2831 
2832 	/*
2833 	** Dont turn this on by default, if vlans are
2834 	** created on another pseudo device (eg. lagg)
2835 	** then vlan events are not passed thru, breaking
2836 	** operation, but with HW FILTER off it works. If
2837 	** using vlans directly on the em driver you can
2838 	** enable this and get full hardware tag filtering.
2839 	*/
2840 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2841 
2842 #ifdef DEVICE_POLLING
2843 	ifp->if_capabilities |= IFCAP_POLLING;
2844 #endif
2845 
2846 	/* Enable only WOL MAGIC by default */
2847 	if (adapter->wol) {
2848 		ifp->if_capabilities |= IFCAP_WOL;
2849 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2850 	}
2851 
2852 	/*
2853 	 * Specify the media types supported by this adapter and register
2854 	 * callbacks to update media and link information
2855 	 */
2856 	ifmedia_init(&adapter->media, IFM_IMASK,
2857 	    em_media_change, em_media_status);
2858 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2859 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2860 		u_char fiber_type = IFM_1000_SX;	/* default type */
2861 
2862 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2863 			    0, NULL);
2864 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2865 	} else {
2866 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2867 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2868 			    0, NULL);
2869 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2870 			    0, NULL);
2871 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2872 			    0, NULL);
2873 		if (adapter->hw.phy.type != e1000_phy_ife) {
2874 			ifmedia_add(&adapter->media,
2875 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2876 			ifmedia_add(&adapter->media,
2877 				IFM_ETHER | IFM_1000_T, 0, NULL);
2878 		}
2879 	}
2880 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2881 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2882 	return (0);
2883 }
2884 
2885 
2886 /*
2887  * Manage DMA'able memory.
2888  */
2889 static void
2890 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2891 {
2892 	if (error)
2893 		return;
2894 	*(bus_addr_t *) arg = segs[0].ds_addr;
2895 }
2896 
2897 static int
2898 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2899         struct em_dma_alloc *dma, int mapflags)
2900 {
2901 	int error;
2902 
2903 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2904 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2905 				BUS_SPACE_MAXADDR,	/* lowaddr */
2906 				BUS_SPACE_MAXADDR,	/* highaddr */
2907 				NULL, NULL,		/* filter, filterarg */
2908 				size,			/* maxsize */
2909 				1,			/* nsegments */
2910 				size,			/* maxsegsize */
2911 				0,			/* flags */
2912 				NULL,			/* lockfunc */
2913 				NULL,			/* lockarg */
2914 				&dma->dma_tag);
2915 	if (error) {
2916 		device_printf(adapter->dev,
2917 		    "%s: bus_dma_tag_create failed: %d\n",
2918 		    __func__, error);
2919 		goto fail_0;
2920 	}
2921 
2922 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2923 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2924 	if (error) {
2925 		device_printf(adapter->dev,
2926 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2927 		    __func__, (uintmax_t)size, error);
2928 		goto fail_2;
2929 	}
2930 
2931 	dma->dma_paddr = 0;
2932 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2933 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2934 	if (error || dma->dma_paddr == 0) {
2935 		device_printf(adapter->dev,
2936 		    "%s: bus_dmamap_load failed: %d\n",
2937 		    __func__, error);
2938 		goto fail_3;
2939 	}
2940 
2941 	return (0);
2942 
2943 fail_3:
2944 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2945 fail_2:
2946 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2947 	bus_dma_tag_destroy(dma->dma_tag);
2948 fail_0:
2949 	dma->dma_map = NULL;
2950 	dma->dma_tag = NULL;
2951 
2952 	return (error);
2953 }
2954 
2955 static void
2956 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2957 {
2958 	if (dma->dma_tag == NULL)
2959 		return;
2960 	if (dma->dma_map != NULL) {
2961 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2962 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2963 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2964 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2965 		dma->dma_map = NULL;
2966 	}
2967 	bus_dma_tag_destroy(dma->dma_tag);
2968 	dma->dma_tag = NULL;
2969 }
2970 
2971 
2972 /*********************************************************************
2973  *
2974  *  Allocate memory for the transmit and receive rings, and then
2975  *  the descriptors associated with each, called only once at attach.
2976  *
2977  **********************************************************************/
2978 static int
2979 em_allocate_queues(struct adapter *adapter)
2980 {
2981 	device_t		dev = adapter->dev;
2982 	struct tx_ring		*txr = NULL;
2983 	struct rx_ring		*rxr = NULL;
2984 	int rsize, tsize, error = E1000_SUCCESS;
2985 	int txconf = 0, rxconf = 0;
2986 
2987 
2988 	/* Allocate the TX ring struct memory */
2989 	if (!(adapter->tx_rings =
2990 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2991 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2992 		device_printf(dev, "Unable to allocate TX ring memory\n");
2993 		error = ENOMEM;
2994 		goto fail;
2995 	}
2996 
2997 	/* Now allocate the RX */
2998 	if (!(adapter->rx_rings =
2999 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3000 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3001 		device_printf(dev, "Unable to allocate RX ring memory\n");
3002 		error = ENOMEM;
3003 		goto rx_fail;
3004 	}
3005 
3006 	tsize = roundup2(adapter->num_tx_desc *
3007 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3008 	/*
3009 	 * Now set up the TX queues, txconf is needed to handle the
3010 	 * possibility that things fail midcourse and we need to
3011 	 * undo memory gracefully
3012 	 */
3013 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3014 		/* Set up some basics */
3015 		txr = &adapter->tx_rings[i];
3016 		txr->adapter = adapter;
3017 		txr->me = i;
3018 
3019 		/* Initialize the TX lock */
3020 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3021 		    device_get_nameunit(dev), txr->me);
3022 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3023 
3024 		if (em_dma_malloc(adapter, tsize,
3025 			&txr->txdma, BUS_DMA_NOWAIT)) {
3026 			device_printf(dev,
3027 			    "Unable to allocate TX Descriptor memory\n");
3028 			error = ENOMEM;
3029 			goto err_tx_desc;
3030 		}
3031 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3032 		bzero((void *)txr->tx_base, tsize);
3033 
3034         	if (em_allocate_transmit_buffers(txr)) {
3035 			device_printf(dev,
3036 			    "Critical Failure setting up transmit buffers\n");
3037 			error = ENOMEM;
3038 			goto err_tx_desc;
3039         	}
3040 #if __FreeBSD_version >= 800000
3041 		/* Allocate a buf ring */
3042 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3043 		    M_WAITOK, &txr->tx_mtx);
3044 #endif
3045 	}
3046 
3047 	/*
3048 	 * Next the RX queues...
3049 	 */
3050 	rsize = roundup2(adapter->num_rx_desc *
3051 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3052 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3053 		rxr = &adapter->rx_rings[i];
3054 		rxr->adapter = adapter;
3055 		rxr->me = i;
3056 
3057 		/* Initialize the RX lock */
3058 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3059 		    device_get_nameunit(dev), txr->me);
3060 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3061 
3062 		if (em_dma_malloc(adapter, rsize,
3063 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3064 			device_printf(dev,
3065 			    "Unable to allocate RxDescriptor memory\n");
3066 			error = ENOMEM;
3067 			goto err_rx_desc;
3068 		}
3069 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3070 		bzero((void *)rxr->rx_base, rsize);
3071 
3072         	/* Allocate receive buffers for the ring*/
3073 		if (em_allocate_receive_buffers(rxr)) {
3074 			device_printf(dev,
3075 			    "Critical Failure setting up receive buffers\n");
3076 			error = ENOMEM;
3077 			goto err_rx_desc;
3078 		}
3079 	}
3080 
3081 	return (0);
3082 
3083 err_rx_desc:
3084 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3085 		em_dma_free(adapter, &rxr->rxdma);
3086 err_tx_desc:
3087 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3088 		em_dma_free(adapter, &txr->txdma);
3089 	free(adapter->rx_rings, M_DEVBUF);
3090 rx_fail:
3091 #if __FreeBSD_version >= 800000
3092 	buf_ring_free(txr->br, M_DEVBUF);
3093 #endif
3094 	free(adapter->tx_rings, M_DEVBUF);
3095 fail:
3096 	return (error);
3097 }
3098 
3099 
3100 /*********************************************************************
3101  *
3102  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3103  *  the information needed to transmit a packet on the wire. This is
3104  *  called only once at attach, setup is done every reset.
3105  *
3106  **********************************************************************/
3107 static int
3108 em_allocate_transmit_buffers(struct tx_ring *txr)
3109 {
3110 	struct adapter *adapter = txr->adapter;
3111 	device_t dev = adapter->dev;
3112 	struct em_buffer *txbuf;
3113 	int error, i;
3114 
3115 	/*
3116 	 * Setup DMA descriptor areas.
3117 	 */
3118 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3119 			       1, 0,			/* alignment, bounds */
3120 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3121 			       BUS_SPACE_MAXADDR,	/* highaddr */
3122 			       NULL, NULL,		/* filter, filterarg */
3123 			       EM_TSO_SIZE,		/* maxsize */
3124 			       EM_MAX_SCATTER,		/* nsegments */
3125 			       PAGE_SIZE,		/* maxsegsize */
3126 			       0,			/* flags */
3127 			       NULL,			/* lockfunc */
3128 			       NULL,			/* lockfuncarg */
3129 			       &txr->txtag))) {
3130 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3131 		goto fail;
3132 	}
3133 
3134 	if (!(txr->tx_buffers =
3135 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3136 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3137 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3138 		error = ENOMEM;
3139 		goto fail;
3140 	}
3141 
3142         /* Create the descriptor buffer dma maps */
3143 	txbuf = txr->tx_buffers;
3144 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3145 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3146 		if (error != 0) {
3147 			device_printf(dev, "Unable to create TX DMA map\n");
3148 			goto fail;
3149 		}
3150 	}
3151 
3152 	return 0;
3153 fail:
3154 	/* We free all, it handles case where we are in the middle */
3155 	em_free_transmit_structures(adapter);
3156 	return (error);
3157 }
3158 
3159 /*********************************************************************
3160  *
3161  *  Initialize a transmit ring.
3162  *
3163  **********************************************************************/
3164 static void
3165 em_setup_transmit_ring(struct tx_ring *txr)
3166 {
3167 	struct adapter *adapter = txr->adapter;
3168 	struct em_buffer *txbuf;
3169 	int i;
3170 
3171 	/* Clear the old descriptor contents */
3172 	EM_TX_LOCK(txr);
3173 	bzero((void *)txr->tx_base,
3174 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3175 	/* Reset indices */
3176 	txr->next_avail_desc = 0;
3177 	txr->next_to_clean = 0;
3178 
3179 	/* Free any existing tx buffers. */
3180         txbuf = txr->tx_buffers;
3181 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3182 		if (txbuf->m_head != NULL) {
3183 			bus_dmamap_sync(txr->txtag, txbuf->map,
3184 			    BUS_DMASYNC_POSTWRITE);
3185 			bus_dmamap_unload(txr->txtag, txbuf->map);
3186 			m_freem(txbuf->m_head);
3187 			txbuf->m_head = NULL;
3188 		}
3189 		/* clear the watch index */
3190 		txbuf->next_eop = -1;
3191         }
3192 
3193 	/* Set number of descriptors available */
3194 	txr->tx_avail = adapter->num_tx_desc;
3195 	txr->queue_status = EM_QUEUE_IDLE;
3196 
3197 	/* Clear checksum offload context. */
3198 	txr->last_hw_offload = 0;
3199 	txr->last_hw_ipcss = 0;
3200 	txr->last_hw_ipcso = 0;
3201 	txr->last_hw_tucss = 0;
3202 	txr->last_hw_tucso = 0;
3203 
3204 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3205 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3206 	EM_TX_UNLOCK(txr);
3207 }
3208 
3209 /*********************************************************************
3210  *
3211  *  Initialize all transmit rings.
3212  *
3213  **********************************************************************/
3214 static void
3215 em_setup_transmit_structures(struct adapter *adapter)
3216 {
3217 	struct tx_ring *txr = adapter->tx_rings;
3218 
3219 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3220 		em_setup_transmit_ring(txr);
3221 
3222 	return;
3223 }
3224 
3225 /*********************************************************************
3226  *
3227  *  Enable transmit unit.
3228  *
3229  **********************************************************************/
3230 static void
3231 em_initialize_transmit_unit(struct adapter *adapter)
3232 {
3233 	struct tx_ring	*txr = adapter->tx_rings;
3234 	struct e1000_hw	*hw = &adapter->hw;
3235 	u32	tctl, tarc, tipg = 0;
3236 
3237 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3238 
3239 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3240 		u64 bus_addr = txr->txdma.dma_paddr;
3241 		/* Base and Len of TX Ring */
3242 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3243 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3244 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3245 	    	    (u32)(bus_addr >> 32));
3246 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3247 	    	    (u32)bus_addr);
3248 		/* Init the HEAD/TAIL indices */
3249 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3250 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3251 
3252 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3253 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3254 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3255 
3256 		txr->queue_status = EM_QUEUE_IDLE;
3257 	}
3258 
3259 	/* Set the default values for the Tx Inter Packet Gap timer */
3260 	switch (adapter->hw.mac.type) {
3261 	case e1000_82542:
3262 		tipg = DEFAULT_82542_TIPG_IPGT;
3263 		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3264 		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3265 		break;
3266 	case e1000_80003es2lan:
3267 		tipg = DEFAULT_82543_TIPG_IPGR1;
3268 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3269 		    E1000_TIPG_IPGR2_SHIFT;
3270 		break;
3271 	default:
3272 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3273 		    (adapter->hw.phy.media_type ==
3274 		    e1000_media_type_internal_serdes))
3275 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3276 		else
3277 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3278 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3279 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3280 	}
3281 
3282 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3283 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3284 
3285 	if(adapter->hw.mac.type >= e1000_82540)
3286 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3287 		    adapter->tx_abs_int_delay.value);
3288 
3289 	if ((adapter->hw.mac.type == e1000_82571) ||
3290 	    (adapter->hw.mac.type == e1000_82572)) {
3291 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3292 		tarc |= SPEED_MODE_BIT;
3293 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3294 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3295 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3296 		tarc |= 1;
3297 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3298 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3299 		tarc |= 1;
3300 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3301 	}
3302 
3303 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3304 	if (adapter->tx_int_delay.value > 0)
3305 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3306 
3307 	/* Program the Transmit Control Register */
3308 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3309 	tctl &= ~E1000_TCTL_CT;
3310 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3311 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3312 
3313 	if (adapter->hw.mac.type >= e1000_82571)
3314 		tctl |= E1000_TCTL_MULR;
3315 
3316 	/* This write will effectively turn on the transmit unit. */
3317 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3318 
3319 }
3320 
3321 
3322 /*********************************************************************
3323  *
3324  *  Free all transmit rings.
3325  *
3326  **********************************************************************/
3327 static void
3328 em_free_transmit_structures(struct adapter *adapter)
3329 {
3330 	struct tx_ring *txr = adapter->tx_rings;
3331 
3332 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3333 		EM_TX_LOCK(txr);
3334 		em_free_transmit_buffers(txr);
3335 		em_dma_free(adapter, &txr->txdma);
3336 		EM_TX_UNLOCK(txr);
3337 		EM_TX_LOCK_DESTROY(txr);
3338 	}
3339 
3340 	free(adapter->tx_rings, M_DEVBUF);
3341 }
3342 
3343 /*********************************************************************
3344  *
3345  *  Free transmit ring related data structures.
3346  *
3347  **********************************************************************/
3348 static void
3349 em_free_transmit_buffers(struct tx_ring *txr)
3350 {
3351 	struct adapter		*adapter = txr->adapter;
3352 	struct em_buffer	*txbuf;
3353 
3354 	INIT_DEBUGOUT("free_transmit_ring: begin");
3355 
3356 	if (txr->tx_buffers == NULL)
3357 		return;
3358 
3359 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3360 		txbuf = &txr->tx_buffers[i];
3361 		if (txbuf->m_head != NULL) {
3362 			bus_dmamap_sync(txr->txtag, txbuf->map,
3363 			    BUS_DMASYNC_POSTWRITE);
3364 			bus_dmamap_unload(txr->txtag,
3365 			    txbuf->map);
3366 			m_freem(txbuf->m_head);
3367 			txbuf->m_head = NULL;
3368 			if (txbuf->map != NULL) {
3369 				bus_dmamap_destroy(txr->txtag,
3370 				    txbuf->map);
3371 				txbuf->map = NULL;
3372 			}
3373 		} else if (txbuf->map != NULL) {
3374 			bus_dmamap_unload(txr->txtag,
3375 			    txbuf->map);
3376 			bus_dmamap_destroy(txr->txtag,
3377 			    txbuf->map);
3378 			txbuf->map = NULL;
3379 		}
3380 	}
3381 #if __FreeBSD_version >= 800000
3382 	if (txr->br != NULL)
3383 		buf_ring_free(txr->br, M_DEVBUF);
3384 #endif
3385 	if (txr->tx_buffers != NULL) {
3386 		free(txr->tx_buffers, M_DEVBUF);
3387 		txr->tx_buffers = NULL;
3388 	}
3389 	if (txr->txtag != NULL) {
3390 		bus_dma_tag_destroy(txr->txtag);
3391 		txr->txtag = NULL;
3392 	}
3393 	return;
3394 }
3395 
3396 
3397 /*********************************************************************
3398  *  The offload context is protocol specific (TCP/UDP) and thus
3399  *  only needs to be set when the protocol changes. The occasion
3400  *  of a context change can be a performance detriment, and
3401  *  might be better just disabled. The reason arises in the way
3402  *  in which the controller supports pipelined requests from the
3403  *  Tx data DMA. Up to four requests can be pipelined, and they may
3404  *  belong to the same packet or to multiple packets. However all
3405  *  requests for one packet are issued before a request is issued
3406  *  for a subsequent packet and if a request for the next packet
3407  *  requires a context change, that request will be stalled
3408  *  until the previous request completes. This means setting up
3409  *  a new context effectively disables pipelined Tx data DMA which
3410  *  in turn greatly slow down performance to send small sized
3411  *  frames.
3412  **********************************************************************/
3413 static void
3414 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3415     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3416 {
3417 	struct adapter			*adapter = txr->adapter;
3418 	struct e1000_context_desc	*TXD = NULL;
3419 	struct em_buffer		*tx_buffer;
3420 	int				cur, hdr_len;
3421 	u32				cmd = 0;
3422 	u16				offload = 0;
3423 	u8				ipcso, ipcss, tucso, tucss;
3424 
3425 	ipcss = ipcso = tucss = tucso = 0;
3426 	hdr_len = ip_off + (ip->ip_hl << 2);
3427 	cur = txr->next_avail_desc;
3428 
3429 	/* Setup of IP header checksum. */
3430 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3431 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3432 		offload |= CSUM_IP;
3433 		ipcss = ip_off;
3434 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3435 		/*
3436 		 * Start offset for header checksum calculation.
3437 		 * End offset for header checksum calculation.
3438 		 * Offset of place to put the checksum.
3439 		 */
3440 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3441 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3442 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3443 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3444 		cmd |= E1000_TXD_CMD_IP;
3445 	}
3446 
3447 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3448  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3449  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3450  		offload |= CSUM_TCP;
3451  		tucss = hdr_len;
3452  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3453  		/*
3454  		 * Setting up new checksum offload context for every frames
3455  		 * takes a lot of processing time for hardware. This also
3456  		 * reduces performance a lot for small sized frames so avoid
3457  		 * it if driver can use previously configured checksum
3458  		 * offload context.
3459  		 */
3460  		if (txr->last_hw_offload == offload) {
3461  			if (offload & CSUM_IP) {
3462  				if (txr->last_hw_ipcss == ipcss &&
3463  				    txr->last_hw_ipcso == ipcso &&
3464  				    txr->last_hw_tucss == tucss &&
3465  				    txr->last_hw_tucso == tucso)
3466  					return;
3467  			} else {
3468  				if (txr->last_hw_tucss == tucss &&
3469  				    txr->last_hw_tucso == tucso)
3470  					return;
3471  			}
3472   		}
3473  		txr->last_hw_offload = offload;
3474  		txr->last_hw_tucss = tucss;
3475  		txr->last_hw_tucso = tucso;
3476  		/*
3477  		 * Start offset for payload checksum calculation.
3478  		 * End offset for payload checksum calculation.
3479  		 * Offset of place to put the checksum.
3480  		 */
3481 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3482  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3483  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3484  		TXD->upper_setup.tcp_fields.tucso = tucso;
3485  		cmd |= E1000_TXD_CMD_TCP;
3486  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3487  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3488  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3489  		tucss = hdr_len;
3490  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3491  		/*
3492  		 * Setting up new checksum offload context for every frames
3493  		 * takes a lot of processing time for hardware. This also
3494  		 * reduces performance a lot for small sized frames so avoid
3495  		 * it if driver can use previously configured checksum
3496  		 * offload context.
3497  		 */
3498  		if (txr->last_hw_offload == offload) {
3499  			if (offload & CSUM_IP) {
3500  				if (txr->last_hw_ipcss == ipcss &&
3501  				    txr->last_hw_ipcso == ipcso &&
3502  				    txr->last_hw_tucss == tucss &&
3503  				    txr->last_hw_tucso == tucso)
3504  					return;
3505  			} else {
3506  				if (txr->last_hw_tucss == tucss &&
3507  				    txr->last_hw_tucso == tucso)
3508  					return;
3509  			}
3510  		}
3511  		txr->last_hw_offload = offload;
3512  		txr->last_hw_tucss = tucss;
3513  		txr->last_hw_tucso = tucso;
3514  		/*
3515  		 * Start offset for header checksum calculation.
3516  		 * End offset for header checksum calculation.
3517  		 * Offset of place to put the checksum.
3518  		 */
3519 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3520  		TXD->upper_setup.tcp_fields.tucss = tucss;
3521  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3522  		TXD->upper_setup.tcp_fields.tucso = tucso;
3523   	}
3524 
3525  	if (offload & CSUM_IP) {
3526  		txr->last_hw_ipcss = ipcss;
3527  		txr->last_hw_ipcso = ipcso;
3528   	}
3529 
3530 	TXD->tcp_seg_setup.data = htole32(0);
3531 	TXD->cmd_and_length =
3532 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3533 	tx_buffer = &txr->tx_buffers[cur];
3534 	tx_buffer->m_head = NULL;
3535 	tx_buffer->next_eop = -1;
3536 
3537 	if (++cur == adapter->num_tx_desc)
3538 		cur = 0;
3539 
3540 	txr->tx_avail--;
3541 	txr->next_avail_desc = cur;
3542 }
3543 
3544 
3545 /**********************************************************************
3546  *
3547  *  Setup work for hardware segmentation offload (TSO)
3548  *
3549  **********************************************************************/
3550 static void
3551 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3552     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3553 {
3554 	struct adapter			*adapter = txr->adapter;
3555 	struct e1000_context_desc	*TXD;
3556 	struct em_buffer		*tx_buffer;
3557 	int cur, hdr_len;
3558 
3559 	/*
3560 	 * In theory we can use the same TSO context if and only if
3561 	 * frame is the same type(IP/TCP) and the same MSS. However
3562 	 * checking whether a frame has the same IP/TCP structure is
3563 	 * hard thing so just ignore that and always restablish a
3564 	 * new TSO context.
3565 	 */
3566 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3567 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3568 		      E1000_TXD_DTYP_D |	/* Data descr type */
3569 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3570 
3571 	/* IP and/or TCP header checksum calculation and insertion. */
3572 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3573 
3574 	cur = txr->next_avail_desc;
3575 	tx_buffer = &txr->tx_buffers[cur];
3576 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3577 
3578 	/*
3579 	 * Start offset for header checksum calculation.
3580 	 * End offset for header checksum calculation.
3581 	 * Offset of place put the checksum.
3582 	 */
3583 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3584 	TXD->lower_setup.ip_fields.ipcse =
3585 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3586 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3587 	/*
3588 	 * Start offset for payload checksum calculation.
3589 	 * End offset for payload checksum calculation.
3590 	 * Offset of place to put the checksum.
3591 	 */
3592 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3593 	TXD->upper_setup.tcp_fields.tucse = 0;
3594 	TXD->upper_setup.tcp_fields.tucso =
3595 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3596 	/*
3597 	 * Payload size per packet w/o any headers.
3598 	 * Length of all headers up to payload.
3599 	 */
3600 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3601 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3602 
3603 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3604 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3605 				E1000_TXD_CMD_TSE |	/* TSE context */
3606 				E1000_TXD_CMD_IP |	/* Do IP csum */
3607 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3608 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3609 
3610 	tx_buffer->m_head = NULL;
3611 	tx_buffer->next_eop = -1;
3612 
3613 	if (++cur == adapter->num_tx_desc)
3614 		cur = 0;
3615 
3616 	txr->tx_avail--;
3617 	txr->next_avail_desc = cur;
3618 	txr->tx_tso = TRUE;
3619 }
3620 
3621 
3622 /**********************************************************************
3623  *
3624  *  Examine each tx_buffer in the used queue. If the hardware is done
3625  *  processing the packet then free associated resources. The
3626  *  tx_buffer is put back on the free queue.
3627  *
3628  **********************************************************************/
3629 static bool
3630 em_txeof(struct tx_ring *txr)
3631 {
3632 	struct adapter	*adapter = txr->adapter;
3633         int first, last, done, processed;
3634         struct em_buffer *tx_buffer;
3635         struct e1000_tx_desc   *tx_desc, *eop_desc;
3636 	struct ifnet   *ifp = adapter->ifp;
3637 
3638 	EM_TX_LOCK_ASSERT(txr);
3639 
3640 	/* No work, make sure watchdog is off */
3641         if (txr->tx_avail == adapter->num_tx_desc) {
3642 		txr->queue_status = EM_QUEUE_IDLE;
3643                 return (FALSE);
3644 	}
3645 
3646 	processed = 0;
3647         first = txr->next_to_clean;
3648         tx_desc = &txr->tx_base[first];
3649         tx_buffer = &txr->tx_buffers[first];
3650 	last = tx_buffer->next_eop;
3651         eop_desc = &txr->tx_base[last];
3652 
3653 	/*
3654 	 * What this does is get the index of the
3655 	 * first descriptor AFTER the EOP of the
3656 	 * first packet, that way we can do the
3657 	 * simple comparison on the inner while loop.
3658 	 */
3659 	if (++last == adapter->num_tx_desc)
3660  		last = 0;
3661 	done = last;
3662 
3663         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3664             BUS_DMASYNC_POSTREAD);
3665 
3666         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3667 		/* We clean the range of the packet */
3668 		while (first != done) {
3669                 	tx_desc->upper.data = 0;
3670                 	tx_desc->lower.data = 0;
3671                 	tx_desc->buffer_addr = 0;
3672                 	++txr->tx_avail;
3673 			++processed;
3674 
3675 			if (tx_buffer->m_head) {
3676 				bus_dmamap_sync(txr->txtag,
3677 				    tx_buffer->map,
3678 				    BUS_DMASYNC_POSTWRITE);
3679 				bus_dmamap_unload(txr->txtag,
3680 				    tx_buffer->map);
3681                         	m_freem(tx_buffer->m_head);
3682                         	tx_buffer->m_head = NULL;
3683                 	}
3684 			tx_buffer->next_eop = -1;
3685 			txr->watchdog_time = ticks;
3686 
3687 	                if (++first == adapter->num_tx_desc)
3688 				first = 0;
3689 
3690 	                tx_buffer = &txr->tx_buffers[first];
3691 			tx_desc = &txr->tx_base[first];
3692 		}
3693 		++ifp->if_opackets;
3694 		/* See if we can continue to the next packet */
3695 		last = tx_buffer->next_eop;
3696 		if (last != -1) {
3697         		eop_desc = &txr->tx_base[last];
3698 			/* Get new done point */
3699 			if (++last == adapter->num_tx_desc) last = 0;
3700 			done = last;
3701 		} else
3702 			break;
3703         }
3704         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3705             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3706 
3707         txr->next_to_clean = first;
3708 
3709 	/*
3710 	** Watchdog calculation, we know there's
3711 	** work outstanding or the first return
3712 	** would have been taken, so none processed
3713 	** for too long indicates a hang. local timer
3714 	** will examine this and do a reset if needed.
3715 	*/
3716 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3717 		txr->queue_status = EM_QUEUE_HUNG;
3718 
3719         /*
3720          * If we have enough room, clear IFF_DRV_OACTIVE
3721          * to tell the stack that it is OK to send packets.
3722          */
3723         if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) {
3724                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3725 		/* Disable watchdog if all clean */
3726                 if (txr->tx_avail == adapter->num_tx_desc) {
3727 			txr->queue_status = EM_QUEUE_IDLE;
3728 			return (FALSE);
3729 		}
3730         }
3731 
3732 	return (TRUE);
3733 }
3734 
3735 
3736 /*********************************************************************
3737  *
3738  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3739  *
3740  **********************************************************************/
3741 static void
3742 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3743 {
3744 	struct adapter		*adapter = rxr->adapter;
3745 	struct mbuf		*m;
3746 	bus_dma_segment_t	segs[1];
3747 	struct em_buffer	*rxbuf;
3748 	int			i, error, nsegs, cleaned;
3749 
3750 	i = rxr->next_to_refresh;
3751 	cleaned = -1;
3752 	while (i != limit) {
3753 		rxbuf = &rxr->rx_buffers[i];
3754 		if (rxbuf->m_head == NULL) {
3755 			m = m_getjcl(M_DONTWAIT, MT_DATA,
3756 			    M_PKTHDR, adapter->rx_mbuf_sz);
3757 			/*
3758 			** If we have a temporary resource shortage
3759 			** that causes a failure, just abort refresh
3760 			** for now, we will return to this point when
3761 			** reinvoked from em_rxeof.
3762 			*/
3763 			if (m == NULL)
3764 				goto update;
3765 		} else
3766 			m = rxbuf->m_head;
3767 
3768 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3769 		m->m_flags |= M_PKTHDR;
3770 		m->m_data = m->m_ext.ext_buf;
3771 
3772 		/* Use bus_dma machinery to setup the memory mapping  */
3773 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3774 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3775 		if (error != 0) {
3776 			printf("Refresh mbufs: hdr dmamap load"
3777 			    " failure - %d\n", error);
3778 			m_free(m);
3779 			rxbuf->m_head = NULL;
3780 			goto update;
3781 		}
3782 		rxbuf->m_head = m;
3783 		bus_dmamap_sync(rxr->rxtag,
3784 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3785 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3786 
3787 		cleaned = i;
3788 		/* Calculate next index */
3789 		if (++i == adapter->num_rx_desc)
3790 			i = 0;
3791 		rxr->next_to_refresh = i;
3792 	}
3793 update:
3794 	/*
3795 	** Update the tail pointer only if,
3796 	** and as far as we have refreshed.
3797 	*/
3798 	if (cleaned != -1) /* Update tail index */
3799 		E1000_WRITE_REG(&adapter->hw,
3800 		    E1000_RDT(rxr->me), cleaned);
3801 
3802 	return;
3803 }
3804 
3805 
3806 /*********************************************************************
3807  *
3808  *  Allocate memory for rx_buffer structures. Since we use one
3809  *  rx_buffer per received packet, the maximum number of rx_buffer's
3810  *  that we'll need is equal to the number of receive descriptors
3811  *  that we've allocated.
3812  *
3813  **********************************************************************/
3814 static int
3815 em_allocate_receive_buffers(struct rx_ring *rxr)
3816 {
3817 	struct adapter		*adapter = rxr->adapter;
3818 	device_t		dev = adapter->dev;
3819 	struct em_buffer	*rxbuf;
3820 	int			error;
3821 
3822 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3823 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3824 	if (rxr->rx_buffers == NULL) {
3825 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3826 		return (ENOMEM);
3827 	}
3828 
3829 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3830 				1, 0,			/* alignment, bounds */
3831 				BUS_SPACE_MAXADDR,	/* lowaddr */
3832 				BUS_SPACE_MAXADDR,	/* highaddr */
3833 				NULL, NULL,		/* filter, filterarg */
3834 				MJUM9BYTES,		/* maxsize */
3835 				1,			/* nsegments */
3836 				MJUM9BYTES,		/* maxsegsize */
3837 				0,			/* flags */
3838 				NULL,			/* lockfunc */
3839 				NULL,			/* lockarg */
3840 				&rxr->rxtag);
3841 	if (error) {
3842 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3843 		    __func__, error);
3844 		goto fail;
3845 	}
3846 
3847 	rxbuf = rxr->rx_buffers;
3848 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3849 		rxbuf = &rxr->rx_buffers[i];
3850 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3851 		    &rxbuf->map);
3852 		if (error) {
3853 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3854 			    __func__, error);
3855 			goto fail;
3856 		}
3857 	}
3858 
3859 	return (0);
3860 
3861 fail:
3862 	em_free_receive_structures(adapter);
3863 	return (error);
3864 }
3865 
3866 
3867 /*********************************************************************
3868  *
3869  *  Initialize a receive ring and its buffers.
3870  *
3871  **********************************************************************/
3872 static int
3873 em_setup_receive_ring(struct rx_ring *rxr)
3874 {
3875 	struct	adapter 	*adapter = rxr->adapter;
3876 	struct em_buffer	*rxbuf;
3877 	bus_dma_segment_t	seg[1];
3878 	int			rsize, nsegs, error;
3879 
3880 
3881 	/* Clear the ring contents */
3882 	EM_RX_LOCK(rxr);
3883 	rsize = roundup2(adapter->num_rx_desc *
3884 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3885 	bzero((void *)rxr->rx_base, rsize);
3886 
3887 	/*
3888 	** Free current RX buffer structs and their mbufs
3889 	*/
3890 	for (int i = 0; i < adapter->num_rx_desc; i++) {
3891 		rxbuf = &rxr->rx_buffers[i];
3892 		if (rxbuf->m_head != NULL) {
3893 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3894 			    BUS_DMASYNC_POSTREAD);
3895 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3896 			m_freem(rxbuf->m_head);
3897 		}
3898 	}
3899 
3900 	/* Now replenish the mbufs */
3901 	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3902 
3903 		rxbuf = &rxr->rx_buffers[j];
3904 		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3905 		    M_PKTHDR, adapter->rx_mbuf_sz);
3906 		if (rxbuf->m_head == NULL)
3907 			return (ENOBUFS);
3908 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3909 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3910 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3911 
3912 		/* Get the memory mapping */
3913 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3914 		    rxbuf->map, rxbuf->m_head, seg,
3915 		    &nsegs, BUS_DMA_NOWAIT);
3916 		if (error != 0) {
3917 			m_freem(rxbuf->m_head);
3918 			rxbuf->m_head = NULL;
3919 			return (error);
3920 		}
3921 		bus_dmamap_sync(rxr->rxtag,
3922 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3923 
3924 		/* Update descriptor */
3925 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3926 	}
3927 
3928 
3929 	/* Setup our descriptor indices */
3930 	rxr->next_to_check = 0;
3931 	rxr->next_to_refresh = 0;
3932 
3933 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3934 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3935 
3936 	EM_RX_UNLOCK(rxr);
3937 	return (0);
3938 }
3939 
3940 /*********************************************************************
3941  *
3942  *  Initialize all receive rings.
3943  *
3944  **********************************************************************/
3945 static int
3946 em_setup_receive_structures(struct adapter *adapter)
3947 {
3948 	struct rx_ring *rxr = adapter->rx_rings;
3949 	int j;
3950 
3951 	for (j = 0; j < adapter->num_queues; j++, rxr++)
3952 		if (em_setup_receive_ring(rxr))
3953 			goto fail;
3954 
3955 	return (0);
3956 fail:
3957 	/*
3958 	 * Free RX buffers allocated so far, we will only handle
3959 	 * the rings that completed, the failing case will have
3960 	 * cleaned up for itself. 'j' failed, so its the terminus.
3961 	 */
3962 	for (int i = 0; i < j; ++i) {
3963 		rxr = &adapter->rx_rings[i];
3964 		for (int n = 0; n < adapter->num_rx_desc; n++) {
3965 			struct em_buffer *rxbuf;
3966 			rxbuf = &rxr->rx_buffers[n];
3967 			if (rxbuf->m_head != NULL) {
3968 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3969 			  	  BUS_DMASYNC_POSTREAD);
3970 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3971 				m_freem(rxbuf->m_head);
3972 				rxbuf->m_head = NULL;
3973 			}
3974 		}
3975 	}
3976 
3977 	return (ENOBUFS);
3978 }
3979 
3980 /*********************************************************************
3981  *
3982  *  Free all receive rings.
3983  *
3984  **********************************************************************/
3985 static void
3986 em_free_receive_structures(struct adapter *adapter)
3987 {
3988 	struct rx_ring *rxr = adapter->rx_rings;
3989 
3990 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3991 		em_free_receive_buffers(rxr);
3992 		/* Free the ring memory as well */
3993 		em_dma_free(adapter, &rxr->rxdma);
3994 		EM_RX_LOCK_DESTROY(rxr);
3995 	}
3996 
3997 	free(adapter->rx_rings, M_DEVBUF);
3998 }
3999 
4000 
4001 /*********************************************************************
4002  *
4003  *  Free receive ring data structures
4004  *
4005  **********************************************************************/
4006 static void
4007 em_free_receive_buffers(struct rx_ring *rxr)
4008 {
4009 	struct adapter		*adapter = rxr->adapter;
4010 	struct em_buffer	*rxbuf = NULL;
4011 
4012 	INIT_DEBUGOUT("free_receive_buffers: begin");
4013 
4014 	if (rxr->rx_buffers != NULL) {
4015 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4016 			rxbuf = &rxr->rx_buffers[i];
4017 			if (rxbuf->map != NULL) {
4018 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4019 				    BUS_DMASYNC_POSTREAD);
4020 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4021 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4022 			}
4023 			if (rxbuf->m_head != NULL) {
4024 				m_freem(rxbuf->m_head);
4025 				rxbuf->m_head = NULL;
4026 			}
4027 		}
4028 		free(rxr->rx_buffers, M_DEVBUF);
4029 		rxr->rx_buffers = NULL;
4030 	}
4031 
4032 	if (rxr->rxtag != NULL) {
4033 		bus_dma_tag_destroy(rxr->rxtag);
4034 		rxr->rxtag = NULL;
4035 	}
4036 
4037 	return;
4038 }
4039 
4040 
4041 /*********************************************************************
4042  *
4043  *  Enable receive unit.
4044  *
4045  **********************************************************************/
4046 #define MAX_INTS_PER_SEC	8000
4047 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4048 
4049 static void
4050 em_initialize_receive_unit(struct adapter *adapter)
4051 {
4052 	struct rx_ring	*rxr = adapter->rx_rings;
4053 	struct ifnet	*ifp = adapter->ifp;
4054 	struct e1000_hw	*hw = &adapter->hw;
4055 	u64	bus_addr;
4056 	u32	rctl, rxcsum;
4057 
4058 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4059 
4060 	/*
4061 	 * Make sure receives are disabled while setting
4062 	 * up the descriptor ring
4063 	 */
4064 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4065 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4066 
4067 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4068 	    adapter->rx_abs_int_delay.value);
4069 	/*
4070 	 * Set the interrupt throttling rate. Value is calculated
4071 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4072 	 */
4073 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4074 
4075 	/*
4076 	** When using MSIX interrupts we need to throttle
4077 	** using the EITR register (82574 only)
4078 	*/
4079 	if (hw->mac.type == e1000_82574)
4080 		for (int i = 0; i < 4; i++)
4081 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4082 			    DEFAULT_ITR);
4083 
4084 	/* Disable accelerated ackknowledge */
4085 	if (adapter->hw.mac.type == e1000_82574)
4086 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4087 
4088 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4089 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4090 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4091 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4092 	}
4093 
4094 	/*
4095 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4096 	** long latencies are observed, like Lenovo X60. This
4097 	** change eliminates the problem, but since having positive
4098 	** values in RDTR is a known source of problems on other
4099 	** platforms another solution is being sought.
4100 	*/
4101 	if (hw->mac.type == e1000_82573)
4102 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4103 
4104 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4105 		/* Setup the Base and Length of the Rx Descriptor Ring */
4106 		bus_addr = rxr->rxdma.dma_paddr;
4107 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4108 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4109 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4110 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4111 		/* Setup the Head and Tail Descriptor Pointers */
4112 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4113 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4114 	}
4115 
4116 	/* Set early receive threshold on appropriate hw */
4117 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4118 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4119 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4120 	    (ifp->if_mtu > ETHERMTU)) {
4121 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4122 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4123 		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4124 	}
4125 
4126 	if (adapter->hw.mac.type == e1000_pch2lan) {
4127 		if (ifp->if_mtu > ETHERMTU)
4128 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4129 		else
4130 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4131 	}
4132 
4133 	/* Setup the Receive Control Register */
4134 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4135 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4136 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4137 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4138 
4139         /* Strip the CRC */
4140         rctl |= E1000_RCTL_SECRC;
4141 
4142         /* Make sure VLAN Filters are off */
4143         rctl &= ~E1000_RCTL_VFE;
4144 	rctl &= ~E1000_RCTL_SBP;
4145 
4146 	if (adapter->rx_mbuf_sz == MCLBYTES)
4147 		rctl |= E1000_RCTL_SZ_2048;
4148 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4149 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4150 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4151 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4152 
4153 	if (ifp->if_mtu > ETHERMTU)
4154 		rctl |= E1000_RCTL_LPE;
4155 	else
4156 		rctl &= ~E1000_RCTL_LPE;
4157 
4158 	/* Write out the settings */
4159 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4160 
4161 	return;
4162 }
4163 
4164 
4165 /*********************************************************************
4166  *
4167  *  This routine executes in interrupt context. It replenishes
4168  *  the mbufs in the descriptor and sends data which has been
4169  *  dma'ed into host memory to upper layer.
4170  *
4171  *  We loop at most count times if count is > 0, or until done if
4172  *  count < 0.
4173  *
4174  *  For polling we also now return the number of cleaned packets
4175  *********************************************************************/
4176 static bool
4177 em_rxeof(struct rx_ring *rxr, int count, int *done)
4178 {
4179 	struct adapter		*adapter = rxr->adapter;
4180 	struct ifnet		*ifp = adapter->ifp;
4181 	struct mbuf		*mp, *sendmp;
4182 	u8			status = 0;
4183 	u16 			len;
4184 	int			i, processed, rxdone = 0;
4185 	bool			eop;
4186 	struct e1000_rx_desc	*cur;
4187 
4188 	EM_RX_LOCK(rxr);
4189 
4190 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4191 
4192 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4193 			break;
4194 
4195 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4196 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4197 
4198 		cur = &rxr->rx_base[i];
4199 		status = cur->status;
4200 		mp = sendmp = NULL;
4201 
4202 		if ((status & E1000_RXD_STAT_DD) == 0)
4203 			break;
4204 
4205 		len = le16toh(cur->length);
4206 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4207 
4208 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4209 		    (rxr->discard == TRUE)) {
4210 			ifp->if_ierrors++;
4211 			++rxr->rx_discarded;
4212 			if (!eop) /* Catch subsequent segs */
4213 				rxr->discard = TRUE;
4214 			else
4215 				rxr->discard = FALSE;
4216 			em_rx_discard(rxr, i);
4217 			goto next_desc;
4218 		}
4219 
4220 		/* Assign correct length to the current fragment */
4221 		mp = rxr->rx_buffers[i].m_head;
4222 		mp->m_len = len;
4223 
4224 		/* Trigger for refresh */
4225 		rxr->rx_buffers[i].m_head = NULL;
4226 
4227 		/* First segment? */
4228 		if (rxr->fmp == NULL) {
4229 			mp->m_pkthdr.len = len;
4230 			rxr->fmp = rxr->lmp = mp;
4231 		} else {
4232 			/* Chain mbuf's together */
4233 			mp->m_flags &= ~M_PKTHDR;
4234 			rxr->lmp->m_next = mp;
4235 			rxr->lmp = mp;
4236 			rxr->fmp->m_pkthdr.len += len;
4237 		}
4238 
4239 		if (eop) {
4240 			--count;
4241 			sendmp = rxr->fmp;
4242 			sendmp->m_pkthdr.rcvif = ifp;
4243 			ifp->if_ipackets++;
4244 			em_receive_checksum(cur, sendmp);
4245 #ifndef __NO_STRICT_ALIGNMENT
4246 			if (adapter->max_frame_size >
4247 			    (MCLBYTES - ETHER_ALIGN) &&
4248 			    em_fixup_rx(rxr) != 0)
4249 				goto skip;
4250 #endif
4251 			if (status & E1000_RXD_STAT_VP) {
4252 				sendmp->m_pkthdr.ether_vtag =
4253 				    (le16toh(cur->special) &
4254 				    E1000_RXD_SPC_VLAN_MASK);
4255 				sendmp->m_flags |= M_VLANTAG;
4256 			}
4257 #ifdef EM_MULTIQUEUE
4258 			sendmp->m_pkthdr.flowid = rxr->msix;
4259 			sendmp->m_flags |= M_FLOWID;
4260 #endif
4261 #ifndef __NO_STRICT_ALIGNMENT
4262 skip:
4263 #endif
4264 			rxr->fmp = rxr->lmp = NULL;
4265 		}
4266 next_desc:
4267 		/* Zero out the receive descriptors status. */
4268 		cur->status = 0;
4269 		++rxdone;	/* cumulative for POLL */
4270 		++processed;
4271 
4272 		/* Advance our pointers to the next descriptor. */
4273 		if (++i == adapter->num_rx_desc)
4274 			i = 0;
4275 
4276 		/* Send to the stack */
4277 		if (sendmp != NULL) {
4278 			rxr->next_to_check = i;
4279 			EM_RX_UNLOCK(rxr);
4280 			(*ifp->if_input)(ifp, sendmp);
4281 			EM_RX_LOCK(rxr);
4282 			i = rxr->next_to_check;
4283 		}
4284 
4285 		/* Only refresh mbufs every 8 descriptors */
4286 		if (processed == 8) {
4287 			em_refresh_mbufs(rxr, i);
4288 			processed = 0;
4289 		}
4290 	}
4291 
4292 	/* Catch any remaining refresh work */
4293 	em_refresh_mbufs(rxr, i);
4294 
4295 	rxr->next_to_check = i;
4296 	if (done != NULL)
4297 		*done = rxdone;
4298 	EM_RX_UNLOCK(rxr);
4299 
4300 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4301 }
4302 
4303 static __inline void
4304 em_rx_discard(struct rx_ring *rxr, int i)
4305 {
4306 	struct em_buffer	*rbuf;
4307 
4308 	rbuf = &rxr->rx_buffers[i];
4309 	/* Free any previous pieces */
4310 	if (rxr->fmp != NULL) {
4311 		rxr->fmp->m_flags |= M_PKTHDR;
4312 		m_freem(rxr->fmp);
4313 		rxr->fmp = NULL;
4314 		rxr->lmp = NULL;
4315 	}
4316 	/*
4317 	** Free buffer and allow em_refresh_mbufs()
4318 	** to clean up and recharge buffer.
4319 	*/
4320 	if (rbuf->m_head) {
4321 		m_free(rbuf->m_head);
4322 		rbuf->m_head = NULL;
4323 	}
4324 	return;
4325 }
4326 
4327 #ifndef __NO_STRICT_ALIGNMENT
4328 /*
4329  * When jumbo frames are enabled we should realign entire payload on
4330  * architecures with strict alignment. This is serious design mistake of 8254x
4331  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4332  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4333  * payload. On architecures without strict alignment restrictions 8254x still
4334  * performs unaligned memory access which would reduce the performance too.
4335  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4336  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4337  * existing mbuf chain.
4338  *
4339  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4340  * not used at all on architectures with strict alignment.
4341  */
4342 static int
4343 em_fixup_rx(struct rx_ring *rxr)
4344 {
4345 	struct adapter *adapter = rxr->adapter;
4346 	struct mbuf *m, *n;
4347 	int error;
4348 
4349 	error = 0;
4350 	m = rxr->fmp;
4351 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4352 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4353 		m->m_data += ETHER_HDR_LEN;
4354 	} else {
4355 		MGETHDR(n, M_DONTWAIT, MT_DATA);
4356 		if (n != NULL) {
4357 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4358 			m->m_data += ETHER_HDR_LEN;
4359 			m->m_len -= ETHER_HDR_LEN;
4360 			n->m_len = ETHER_HDR_LEN;
4361 			M_MOVE_PKTHDR(n, m);
4362 			n->m_next = m;
4363 			rxr->fmp = n;
4364 		} else {
4365 			adapter->dropped_pkts++;
4366 			m_freem(rxr->fmp);
4367 			rxr->fmp = NULL;
4368 			error = ENOMEM;
4369 		}
4370 	}
4371 
4372 	return (error);
4373 }
4374 #endif
4375 
4376 /*********************************************************************
4377  *
4378  *  Verify that the hardware indicated that the checksum is valid.
4379  *  Inform the stack about the status of checksum so that stack
4380  *  doesn't spend time verifying the checksum.
4381  *
4382  *********************************************************************/
4383 static void
4384 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4385 {
4386 	/* Ignore Checksum bit is set */
4387 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4388 		mp->m_pkthdr.csum_flags = 0;
4389 		return;
4390 	}
4391 
4392 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4393 		/* Did it pass? */
4394 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4395 			/* IP Checksum Good */
4396 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4397 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4398 
4399 		} else {
4400 			mp->m_pkthdr.csum_flags = 0;
4401 		}
4402 	}
4403 
4404 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4405 		/* Did it pass? */
4406 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4407 			mp->m_pkthdr.csum_flags |=
4408 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4409 			mp->m_pkthdr.csum_data = htons(0xffff);
4410 		}
4411 	}
4412 }
4413 
4414 /*
4415  * This routine is run via an vlan
4416  * config EVENT
4417  */
4418 static void
4419 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4420 {
4421 	struct adapter	*adapter = ifp->if_softc;
4422 	u32		index, bit;
4423 
4424 	if (ifp->if_softc !=  arg)   /* Not our event */
4425 		return;
4426 
4427 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4428                 return;
4429 
4430 	EM_CORE_LOCK(adapter);
4431 	index = (vtag >> 5) & 0x7F;
4432 	bit = vtag & 0x1F;
4433 	adapter->shadow_vfta[index] |= (1 << bit);
4434 	++adapter->num_vlans;
4435 	/* Re-init to load the changes */
4436 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4437 		em_init_locked(adapter);
4438 	EM_CORE_UNLOCK(adapter);
4439 }
4440 
4441 /*
4442  * This routine is run via an vlan
4443  * unconfig EVENT
4444  */
4445 static void
4446 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4447 {
4448 	struct adapter	*adapter = ifp->if_softc;
4449 	u32		index, bit;
4450 
4451 	if (ifp->if_softc !=  arg)
4452 		return;
4453 
4454 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4455                 return;
4456 
4457 	EM_CORE_LOCK(adapter);
4458 	index = (vtag >> 5) & 0x7F;
4459 	bit = vtag & 0x1F;
4460 	adapter->shadow_vfta[index] &= ~(1 << bit);
4461 	--adapter->num_vlans;
4462 	/* Re-init to load the changes */
4463 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4464 		em_init_locked(adapter);
4465 	EM_CORE_UNLOCK(adapter);
4466 }
4467 
4468 static void
4469 em_setup_vlan_hw_support(struct adapter *adapter)
4470 {
4471 	struct e1000_hw *hw = &adapter->hw;
4472 	u32             reg;
4473 
4474 	/*
4475 	** We get here thru init_locked, meaning
4476 	** a soft reset, this has already cleared
4477 	** the VFTA and other state, so if there
4478 	** have been no vlan's registered do nothing.
4479 	*/
4480 	if (adapter->num_vlans == 0)
4481                 return;
4482 
4483 	/*
4484 	** A soft reset zero's out the VFTA, so
4485 	** we need to repopulate it now.
4486 	*/
4487 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4488                 if (adapter->shadow_vfta[i] != 0)
4489 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4490                             i, adapter->shadow_vfta[i]);
4491 
4492 	reg = E1000_READ_REG(hw, E1000_CTRL);
4493 	reg |= E1000_CTRL_VME;
4494 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4495 
4496 	/* Enable the Filter Table */
4497 	reg = E1000_READ_REG(hw, E1000_RCTL);
4498 	reg &= ~E1000_RCTL_CFIEN;
4499 	reg |= E1000_RCTL_VFE;
4500 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4501 }
4502 
4503 static void
4504 em_enable_intr(struct adapter *adapter)
4505 {
4506 	struct e1000_hw *hw = &adapter->hw;
4507 	u32 ims_mask = IMS_ENABLE_MASK;
4508 
4509 	if (hw->mac.type == e1000_82574) {
4510 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4511 		ims_mask |= EM_MSIX_MASK;
4512 	}
4513 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4514 }
4515 
4516 static void
4517 em_disable_intr(struct adapter *adapter)
4518 {
4519 	struct e1000_hw *hw = &adapter->hw;
4520 
4521 	if (hw->mac.type == e1000_82574)
4522 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4523 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4524 }
4525 
4526 /*
4527  * Bit of a misnomer, what this really means is
4528  * to enable OS management of the system... aka
4529  * to disable special hardware management features
4530  */
4531 static void
4532 em_init_manageability(struct adapter *adapter)
4533 {
4534 	/* A shared code workaround */
4535 #define E1000_82542_MANC2H E1000_MANC2H
4536 	if (adapter->has_manage) {
4537 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4538 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4539 
4540 		/* disable hardware interception of ARP */
4541 		manc &= ~(E1000_MANC_ARP_EN);
4542 
4543                 /* enable receiving management packets to the host */
4544 		manc |= E1000_MANC_EN_MNG2HOST;
4545 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4546 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4547 		manc2h |= E1000_MNG2HOST_PORT_623;
4548 		manc2h |= E1000_MNG2HOST_PORT_664;
4549 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4550 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4551 	}
4552 }
4553 
4554 /*
4555  * Give control back to hardware management
4556  * controller if there is one.
4557  */
4558 static void
4559 em_release_manageability(struct adapter *adapter)
4560 {
4561 	if (adapter->has_manage) {
4562 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4563 
4564 		/* re-enable hardware interception of ARP */
4565 		manc |= E1000_MANC_ARP_EN;
4566 		manc &= ~E1000_MANC_EN_MNG2HOST;
4567 
4568 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4569 	}
4570 }
4571 
4572 /*
4573  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4574  * For ASF and Pass Through versions of f/w this means
4575  * that the driver is loaded. For AMT version type f/w
4576  * this means that the network i/f is open.
4577  */
4578 static void
4579 em_get_hw_control(struct adapter *adapter)
4580 {
4581 	u32 ctrl_ext, swsm;
4582 
4583 	if (adapter->hw.mac.type == e1000_82573) {
4584 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4585 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4586 		    swsm | E1000_SWSM_DRV_LOAD);
4587 		return;
4588 	}
4589 	/* else */
4590 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4591 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4592 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4593 	return;
4594 }
4595 
4596 /*
4597  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4598  * For ASF and Pass Through versions of f/w this means that
4599  * the driver is no longer loaded. For AMT versions of the
4600  * f/w this means that the network i/f is closed.
4601  */
4602 static void
4603 em_release_hw_control(struct adapter *adapter)
4604 {
4605 	u32 ctrl_ext, swsm;
4606 
4607 	if (!adapter->has_manage)
4608 		return;
4609 
4610 	if (adapter->hw.mac.type == e1000_82573) {
4611 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4612 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4613 		    swsm & ~E1000_SWSM_DRV_LOAD);
4614 		return;
4615 	}
4616 	/* else */
4617 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4618 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4619 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4620 	return;
4621 }
4622 
4623 static int
4624 em_is_valid_ether_addr(u8 *addr)
4625 {
4626 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4627 
4628 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4629 		return (FALSE);
4630 	}
4631 
4632 	return (TRUE);
4633 }
4634 
4635 /*
4636 ** Parse the interface capabilities with regard
4637 ** to both system management and wake-on-lan for
4638 ** later use.
4639 */
4640 static void
4641 em_get_wakeup(device_t dev)
4642 {
4643 	struct adapter	*adapter = device_get_softc(dev);
4644 	u16		eeprom_data = 0, device_id, apme_mask;
4645 
4646 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4647 	apme_mask = EM_EEPROM_APME;
4648 
4649 	switch (adapter->hw.mac.type) {
4650 	case e1000_82573:
4651 	case e1000_82583:
4652 		adapter->has_amt = TRUE;
4653 		/* Falls thru */
4654 	case e1000_82571:
4655 	case e1000_82572:
4656 	case e1000_80003es2lan:
4657 		if (adapter->hw.bus.func == 1) {
4658 			e1000_read_nvm(&adapter->hw,
4659 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4660 			break;
4661 		} else
4662 			e1000_read_nvm(&adapter->hw,
4663 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4664 		break;
4665 	case e1000_ich8lan:
4666 	case e1000_ich9lan:
4667 	case e1000_ich10lan:
4668 	case e1000_pchlan:
4669 	case e1000_pch2lan:
4670 		apme_mask = E1000_WUC_APME;
4671 		adapter->has_amt = TRUE;
4672 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4673 		break;
4674 	default:
4675 		e1000_read_nvm(&adapter->hw,
4676 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4677 		break;
4678 	}
4679 	if (eeprom_data & apme_mask)
4680 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4681 	/*
4682          * We have the eeprom settings, now apply the special cases
4683          * where the eeprom may be wrong or the board won't support
4684          * wake on lan on a particular port
4685 	 */
4686 	device_id = pci_get_device(dev);
4687         switch (device_id) {
4688 	case E1000_DEV_ID_82571EB_FIBER:
4689 		/* Wake events only supported on port A for dual fiber
4690 		 * regardless of eeprom setting */
4691 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4692 		    E1000_STATUS_FUNC_1)
4693 			adapter->wol = 0;
4694 		break;
4695 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4696 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4697 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4698                 /* if quad port adapter, disable WoL on all but port A */
4699 		if (global_quad_port_a != 0)
4700 			adapter->wol = 0;
4701 		/* Reset for multiple quad port adapters */
4702 		if (++global_quad_port_a == 4)
4703 			global_quad_port_a = 0;
4704                 break;
4705 	}
4706 	return;
4707 }
4708 
4709 
4710 /*
4711  * Enable PCI Wake On Lan capability
4712  */
4713 static void
4714 em_enable_wakeup(device_t dev)
4715 {
4716 	struct adapter	*adapter = device_get_softc(dev);
4717 	struct ifnet	*ifp = adapter->ifp;
4718 	u32		pmc, ctrl, ctrl_ext, rctl;
4719 	u16     	status;
4720 
4721 	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4722 		return;
4723 
4724 	/* Advertise the wakeup capability */
4725 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4726 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4727 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4728 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4729 
4730 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4731 	    (adapter->hw.mac.type == e1000_pchlan) ||
4732 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4733 	    (adapter->hw.mac.type == e1000_ich10lan)) {
4734 		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4735 		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4736 	}
4737 
4738 	/* Keep the laser running on Fiber adapters */
4739 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4740 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4741 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4742 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4743 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4744 	}
4745 
4746 	/*
4747 	** Determine type of Wakeup: note that wol
4748 	** is set with all bits on by default.
4749 	*/
4750 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4751 		adapter->wol &= ~E1000_WUFC_MAG;
4752 
4753 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4754 		adapter->wol &= ~E1000_WUFC_MC;
4755 	else {
4756 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4757 		rctl |= E1000_RCTL_MPE;
4758 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4759 	}
4760 
4761 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4762 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4763 		if (em_enable_phy_wakeup(adapter))
4764 			return;
4765 	} else {
4766 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4767 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4768 	}
4769 
4770 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4771 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4772 
4773         /* Request PME */
4774         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4775 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4776 	if (ifp->if_capenable & IFCAP_WOL)
4777 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4778         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4779 
4780 	return;
4781 }
4782 
4783 /*
4784 ** WOL in the newer chipset interfaces (pchlan)
4785 ** require thing to be copied into the phy
4786 */
4787 static int
4788 em_enable_phy_wakeup(struct adapter *adapter)
4789 {
4790 	struct e1000_hw *hw = &adapter->hw;
4791 	u32 mreg, ret = 0;
4792 	u16 preg;
4793 
4794 	/* copy MAC RARs to PHY RARs */
4795 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4796 
4797 	/* copy MAC MTA to PHY MTA */
4798 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4799 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4800 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4801 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4802 		    (u16)((mreg >> 16) & 0xFFFF));
4803 	}
4804 
4805 	/* configure PHY Rx Control register */
4806 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4807 	mreg = E1000_READ_REG(hw, E1000_RCTL);
4808 	if (mreg & E1000_RCTL_UPE)
4809 		preg |= BM_RCTL_UPE;
4810 	if (mreg & E1000_RCTL_MPE)
4811 		preg |= BM_RCTL_MPE;
4812 	preg &= ~(BM_RCTL_MO_MASK);
4813 	if (mreg & E1000_RCTL_MO_3)
4814 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4815 				<< BM_RCTL_MO_SHIFT);
4816 	if (mreg & E1000_RCTL_BAM)
4817 		preg |= BM_RCTL_BAM;
4818 	if (mreg & E1000_RCTL_PMCF)
4819 		preg |= BM_RCTL_PMCF;
4820 	mreg = E1000_READ_REG(hw, E1000_CTRL);
4821 	if (mreg & E1000_CTRL_RFCE)
4822 		preg |= BM_RCTL_RFCE;
4823 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4824 
4825 	/* enable PHY wakeup in MAC register */
4826 	E1000_WRITE_REG(hw, E1000_WUC,
4827 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4828 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4829 
4830 	/* configure and enable PHY wakeup in PHY registers */
4831 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4832 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4833 
4834 	/* activate PHY wakeup */
4835 	ret = hw->phy.ops.acquire(hw);
4836 	if (ret) {
4837 		printf("Could not acquire PHY\n");
4838 		return ret;
4839 	}
4840 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4841 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4842 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4843 	if (ret) {
4844 		printf("Could not read PHY page 769\n");
4845 		goto out;
4846 	}
4847 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4848 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4849 	if (ret)
4850 		printf("Could not set PHY Host Wakeup bit\n");
4851 out:
4852 	hw->phy.ops.release(hw);
4853 
4854 	return ret;
4855 }
4856 
4857 static void
4858 em_led_func(void *arg, int onoff)
4859 {
4860 	struct adapter	*adapter = arg;
4861 
4862 	EM_CORE_LOCK(adapter);
4863 	if (onoff) {
4864 		e1000_setup_led(&adapter->hw);
4865 		e1000_led_on(&adapter->hw);
4866 	} else {
4867 		e1000_led_off(&adapter->hw);
4868 		e1000_cleanup_led(&adapter->hw);
4869 	}
4870 	EM_CORE_UNLOCK(adapter);
4871 }
4872 
4873 /*
4874 ** Disable the L0S and L1 LINK states
4875 */
4876 static void
4877 em_disable_aspm(struct adapter *adapter)
4878 {
4879 	int		base, reg;
4880 	u16		link_cap,link_ctrl;
4881 	device_t	dev = adapter->dev;
4882 
4883 	switch (adapter->hw.mac.type) {
4884 		case e1000_82573:
4885 		case e1000_82574:
4886 		case e1000_82583:
4887 			break;
4888 		default:
4889 			return;
4890 	}
4891 	if (pci_find_extcap(dev, PCIY_EXPRESS, &base) != 0)
4892 		return;
4893 	reg = base + PCIR_EXPRESS_LINK_CAP;
4894 	link_cap = pci_read_config(dev, reg, 2);
4895 	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4896 		return;
4897 	reg = base + PCIR_EXPRESS_LINK_CTL;
4898 	link_ctrl = pci_read_config(dev, reg, 2);
4899 	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4900 	pci_write_config(dev, reg, link_ctrl, 2);
4901 	return;
4902 }
4903 
4904 /**********************************************************************
4905  *
4906  *  Update the board statistics counters.
4907  *
4908  **********************************************************************/
4909 static void
4910 em_update_stats_counters(struct adapter *adapter)
4911 {
4912 	struct ifnet   *ifp;
4913 
4914 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4915 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4916 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4917 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4918 	}
4919 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4920 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4921 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4922 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4923 
4924 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4925 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4926 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4927 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4928 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4929 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4930 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4931 	/*
4932 	** For watchdog management we need to know if we have been
4933 	** paused during the last interval, so capture that here.
4934 	*/
4935 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4936 	adapter->stats.xoffrxc += adapter->pause_frames;
4937 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4938 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4939 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4940 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4941 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4942 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4943 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4944 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4945 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4946 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4947 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4948 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4949 
4950 	/* For the 64-bit byte counters the low dword must be read first. */
4951 	/* Both registers clear on the read of the high dword */
4952 
4953 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4954 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4955 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4956 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4957 
4958 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4959 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4960 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4961 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4962 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4963 
4964 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4965 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4966 
4967 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4968 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4969 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4970 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4971 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4972 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4973 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4974 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4975 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4976 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4977 
4978 	/* Interrupt Counts */
4979 
4980 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
4981 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
4982 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
4983 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
4984 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
4985 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
4986 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
4987 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
4988 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
4989 
4990 	if (adapter->hw.mac.type >= e1000_82543) {
4991 		adapter->stats.algnerrc +=
4992 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4993 		adapter->stats.rxerrc +=
4994 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4995 		adapter->stats.tncrs +=
4996 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4997 		adapter->stats.cexterr +=
4998 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4999 		adapter->stats.tsctc +=
5000 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5001 		adapter->stats.tsctfc +=
5002 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5003 	}
5004 	ifp = adapter->ifp;
5005 
5006 	ifp->if_collisions = adapter->stats.colc;
5007 
5008 	/* Rx Errors */
5009 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5010 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5011 	    adapter->stats.ruc + adapter->stats.roc +
5012 	    adapter->stats.mpc + adapter->stats.cexterr;
5013 
5014 	/* Tx Errors */
5015 	ifp->if_oerrors = adapter->stats.ecol +
5016 	    adapter->stats.latecol + adapter->watchdog_events;
5017 }
5018 
5019 /* Export a single 32-bit register via a read-only sysctl. */
5020 static int
5021 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5022 {
5023 	struct adapter *adapter;
5024 	u_int val;
5025 
5026 	adapter = oidp->oid_arg1;
5027 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5028 	return (sysctl_handle_int(oidp, &val, 0, req));
5029 }
5030 
5031 /*
5032  * Add sysctl variables, one per statistic, to the system.
5033  */
5034 static void
5035 em_add_hw_stats(struct adapter *adapter)
5036 {
5037 	device_t dev = adapter->dev;
5038 
5039 	struct tx_ring *txr = adapter->tx_rings;
5040 	struct rx_ring *rxr = adapter->rx_rings;
5041 
5042 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5043 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5044 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5045 	struct e1000_hw_stats *stats = &adapter->stats;
5046 
5047 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5048 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5049 
5050 #define QUEUE_NAME_LEN 32
5051 	char namebuf[QUEUE_NAME_LEN];
5052 
5053 	/* Driver Statistics */
5054 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5055 			CTLFLAG_RD, &adapter->link_irq, 0,
5056 			"Link MSIX IRQ Handled");
5057 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5058 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5059 			 "Std mbuf failed");
5060 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5061 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5062 			 "Std mbuf cluster failed");
5063 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5064 			CTLFLAG_RD, &adapter->dropped_pkts,
5065 			"Driver dropped packets");
5066 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5067 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5068 			"Driver tx dma failure in xmit");
5069 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5070 			CTLFLAG_RD, &adapter->rx_overruns,
5071 			"RX overruns");
5072 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5073 			CTLFLAG_RD, &adapter->watchdog_events,
5074 			"Watchdog timeouts");
5075 
5076 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5077 			CTLFLAG_RD, adapter, E1000_CTRL,
5078 			em_sysctl_reg_handler, "IU",
5079 			"Device Control Register");
5080 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5081 			CTLFLAG_RD, adapter, E1000_RCTL,
5082 			em_sysctl_reg_handler, "IU",
5083 			"Receiver Control Register");
5084 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5085 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5086 			"Flow Control High Watermark");
5087 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5088 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5089 			"Flow Control Low Watermark");
5090 
5091 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5092 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5093 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5094 					    CTLFLAG_RD, NULL, "Queue Name");
5095 		queue_list = SYSCTL_CHILDREN(queue_node);
5096 
5097 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5098 				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5099 				em_sysctl_reg_handler, "IU",
5100  				"Transmit Descriptor Head");
5101 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5102 				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5103 				em_sysctl_reg_handler, "IU",
5104  				"Transmit Descriptor Tail");
5105 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5106 				CTLFLAG_RD, &txr->tx_irq,
5107 				"Queue MSI-X Transmit Interrupts");
5108 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5109 				CTLFLAG_RD, &txr->no_desc_avail,
5110 				"Queue No Descriptor Available");
5111 
5112 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5113 				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5114 				em_sysctl_reg_handler, "IU",
5115 				"Receive Descriptor Head");
5116 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5117 				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5118 				em_sysctl_reg_handler, "IU",
5119 				"Receive Descriptor Tail");
5120 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5121 				CTLFLAG_RD, &rxr->rx_irq,
5122 				"Queue MSI-X Receive Interrupts");
5123 	}
5124 
5125 	/* MAC stats get their own sub node */
5126 
5127 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5128 				    CTLFLAG_RD, NULL, "Statistics");
5129 	stat_list = SYSCTL_CHILDREN(stat_node);
5130 
5131 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5132 			CTLFLAG_RD, &stats->ecol,
5133 			"Excessive collisions");
5134 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5135 			CTLFLAG_RD, &stats->scc,
5136 			"Single collisions");
5137 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5138 			CTLFLAG_RD, &stats->mcc,
5139 			"Multiple collisions");
5140 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5141 			CTLFLAG_RD, &stats->latecol,
5142 			"Late collisions");
5143 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5144 			CTLFLAG_RD, &stats->colc,
5145 			"Collision Count");
5146 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5147 			CTLFLAG_RD, &adapter->stats.symerrs,
5148 			"Symbol Errors");
5149 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5150 			CTLFLAG_RD, &adapter->stats.sec,
5151 			"Sequence Errors");
5152 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5153 			CTLFLAG_RD, &adapter->stats.dc,
5154 			"Defer Count");
5155 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5156 			CTLFLAG_RD, &adapter->stats.mpc,
5157 			"Missed Packets");
5158 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5159 			CTLFLAG_RD, &adapter->stats.rnbc,
5160 			"Receive No Buffers");
5161 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5162 			CTLFLAG_RD, &adapter->stats.ruc,
5163 			"Receive Undersize");
5164 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5165 			CTLFLAG_RD, &adapter->stats.rfc,
5166 			"Fragmented Packets Received ");
5167 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5168 			CTLFLAG_RD, &adapter->stats.roc,
5169 			"Oversized Packets Received");
5170 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5171 			CTLFLAG_RD, &adapter->stats.rjc,
5172 			"Recevied Jabber");
5173 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5174 			CTLFLAG_RD, &adapter->stats.rxerrc,
5175 			"Receive Errors");
5176 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5177 			CTLFLAG_RD, &adapter->stats.crcerrs,
5178 			"CRC errors");
5179 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5180 			CTLFLAG_RD, &adapter->stats.algnerrc,
5181 			"Alignment Errors");
5182 	/* On 82575 these are collision counts */
5183 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5184 			CTLFLAG_RD, &adapter->stats.cexterr,
5185 			"Collision/Carrier extension errors");
5186 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5187 			CTLFLAG_RD, &adapter->stats.xonrxc,
5188 			"XON Received");
5189 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5190 			CTLFLAG_RD, &adapter->stats.xontxc,
5191 			"XON Transmitted");
5192 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5193 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5194 			"XOFF Received");
5195 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5196 			CTLFLAG_RD, &adapter->stats.xofftxc,
5197 			"XOFF Transmitted");
5198 
5199 	/* Packet Reception Stats */
5200 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5201 			CTLFLAG_RD, &adapter->stats.tpr,
5202 			"Total Packets Received ");
5203 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5204 			CTLFLAG_RD, &adapter->stats.gprc,
5205 			"Good Packets Received");
5206 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5207 			CTLFLAG_RD, &adapter->stats.bprc,
5208 			"Broadcast Packets Received");
5209 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5210 			CTLFLAG_RD, &adapter->stats.mprc,
5211 			"Multicast Packets Received");
5212 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5213 			CTLFLAG_RD, &adapter->stats.prc64,
5214 			"64 byte frames received ");
5215 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5216 			CTLFLAG_RD, &adapter->stats.prc127,
5217 			"65-127 byte frames received");
5218 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5219 			CTLFLAG_RD, &adapter->stats.prc255,
5220 			"128-255 byte frames received");
5221 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5222 			CTLFLAG_RD, &adapter->stats.prc511,
5223 			"256-511 byte frames received");
5224 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5225 			CTLFLAG_RD, &adapter->stats.prc1023,
5226 			"512-1023 byte frames received");
5227 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5228 			CTLFLAG_RD, &adapter->stats.prc1522,
5229 			"1023-1522 byte frames received");
5230  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5231  			CTLFLAG_RD, &adapter->stats.gorc,
5232  			"Good Octets Received");
5233 
5234 	/* Packet Transmission Stats */
5235  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5236  			CTLFLAG_RD, &adapter->stats.gotc,
5237  			"Good Octets Transmitted");
5238 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5239 			CTLFLAG_RD, &adapter->stats.tpt,
5240 			"Total Packets Transmitted");
5241 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5242 			CTLFLAG_RD, &adapter->stats.gptc,
5243 			"Good Packets Transmitted");
5244 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5245 			CTLFLAG_RD, &adapter->stats.bptc,
5246 			"Broadcast Packets Transmitted");
5247 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5248 			CTLFLAG_RD, &adapter->stats.mptc,
5249 			"Multicast Packets Transmitted");
5250 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5251 			CTLFLAG_RD, &adapter->stats.ptc64,
5252 			"64 byte frames transmitted ");
5253 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5254 			CTLFLAG_RD, &adapter->stats.ptc127,
5255 			"65-127 byte frames transmitted");
5256 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5257 			CTLFLAG_RD, &adapter->stats.ptc255,
5258 			"128-255 byte frames transmitted");
5259 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5260 			CTLFLAG_RD, &adapter->stats.ptc511,
5261 			"256-511 byte frames transmitted");
5262 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5263 			CTLFLAG_RD, &adapter->stats.ptc1023,
5264 			"512-1023 byte frames transmitted");
5265 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5266 			CTLFLAG_RD, &adapter->stats.ptc1522,
5267 			"1024-1522 byte frames transmitted");
5268 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5269 			CTLFLAG_RD, &adapter->stats.tsctc,
5270 			"TSO Contexts Transmitted");
5271 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5272 			CTLFLAG_RD, &adapter->stats.tsctfc,
5273 			"TSO Contexts Failed");
5274 
5275 
5276 	/* Interrupt Stats */
5277 
5278 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5279 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5280 	int_list = SYSCTL_CHILDREN(int_node);
5281 
5282 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5283 			CTLFLAG_RD, &adapter->stats.iac,
5284 			"Interrupt Assertion Count");
5285 
5286 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5287 			CTLFLAG_RD, &adapter->stats.icrxptc,
5288 			"Interrupt Cause Rx Pkt Timer Expire Count");
5289 
5290 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5291 			CTLFLAG_RD, &adapter->stats.icrxatc,
5292 			"Interrupt Cause Rx Abs Timer Expire Count");
5293 
5294 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5295 			CTLFLAG_RD, &adapter->stats.ictxptc,
5296 			"Interrupt Cause Tx Pkt Timer Expire Count");
5297 
5298 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5299 			CTLFLAG_RD, &adapter->stats.ictxatc,
5300 			"Interrupt Cause Tx Abs Timer Expire Count");
5301 
5302 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5303 			CTLFLAG_RD, &adapter->stats.ictxqec,
5304 			"Interrupt Cause Tx Queue Empty Count");
5305 
5306 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5307 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5308 			"Interrupt Cause Tx Queue Min Thresh Count");
5309 
5310 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5311 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5312 			"Interrupt Cause Rx Desc Min Thresh Count");
5313 
5314 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5315 			CTLFLAG_RD, &adapter->stats.icrxoc,
5316 			"Interrupt Cause Receiver Overrun Count");
5317 }
5318 
5319 /**********************************************************************
5320  *
5321  *  This routine provides a way to dump out the adapter eeprom,
5322  *  often a useful debug/service tool. This only dumps the first
5323  *  32 words, stuff that matters is in that extent.
5324  *
5325  **********************************************************************/
5326 static int
5327 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5328 {
5329 	struct adapter *adapter;
5330 	int error;
5331 	int result;
5332 
5333 	result = -1;
5334 	error = sysctl_handle_int(oidp, &result, 0, req);
5335 
5336 	if (error || !req->newptr)
5337 		return (error);
5338 
5339 	/*
5340 	 * This value will cause a hex dump of the
5341 	 * first 32 16-bit words of the EEPROM to
5342 	 * the screen.
5343 	 */
5344 	if (result == 1) {
5345 		adapter = (struct adapter *)arg1;
5346 		em_print_nvm_info(adapter);
5347         }
5348 
5349 	return (error);
5350 }
5351 
5352 static void
5353 em_print_nvm_info(struct adapter *adapter)
5354 {
5355 	u16	eeprom_data;
5356 	int	i, j, row = 0;
5357 
5358 	/* Its a bit crude, but it gets the job done */
5359 	printf("\nInterface EEPROM Dump:\n");
5360 	printf("Offset\n0x0000  ");
5361 	for (i = 0, j = 0; i < 32; i++, j++) {
5362 		if (j == 8) { /* Make the offset block */
5363 			j = 0; ++row;
5364 			printf("\n0x00%x0  ",row);
5365 		}
5366 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5367 		printf("%04x ", eeprom_data);
5368 	}
5369 	printf("\n");
5370 }
5371 
5372 static int
5373 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5374 {
5375 	struct em_int_delay_info *info;
5376 	struct adapter *adapter;
5377 	u32 regval;
5378 	int error, usecs, ticks;
5379 
5380 	info = (struct em_int_delay_info *)arg1;
5381 	usecs = info->value;
5382 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5383 	if (error != 0 || req->newptr == NULL)
5384 		return (error);
5385 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5386 		return (EINVAL);
5387 	info->value = usecs;
5388 	ticks = EM_USECS_TO_TICKS(usecs);
5389 
5390 	adapter = info->adapter;
5391 
5392 	EM_CORE_LOCK(adapter);
5393 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5394 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5395 	/* Handle a few special cases. */
5396 	switch (info->offset) {
5397 	case E1000_RDTR:
5398 		break;
5399 	case E1000_TIDV:
5400 		if (ticks == 0) {
5401 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5402 			/* Don't write 0 into the TIDV register. */
5403 			regval++;
5404 		} else
5405 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5406 		break;
5407 	}
5408 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5409 	EM_CORE_UNLOCK(adapter);
5410 	return (0);
5411 }
5412 
5413 static void
5414 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5415 	const char *description, struct em_int_delay_info *info,
5416 	int offset, int value)
5417 {
5418 	info->adapter = adapter;
5419 	info->offset = offset;
5420 	info->value = value;
5421 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5422 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5423 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5424 	    info, 0, em_sysctl_int_delay, "I", description);
5425 }
5426 
5427 static void
5428 em_add_rx_process_limit(struct adapter *adapter, const char *name,
5429 	const char *description, int *limit, int value)
5430 {
5431 	*limit = value;
5432 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5433 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5434 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5435 }
5436 
5437 static void
5438 em_set_flow_cntrl(struct adapter *adapter, const char *name,
5439 	const char *description, int *limit, int value)
5440 {
5441 	*limit = value;
5442 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5443 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5444 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5445 }
5446 
5447 static int
5448 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5449 {
5450 	struct adapter *adapter;
5451 	int error;
5452 	int result;
5453 
5454 	result = -1;
5455 	error = sysctl_handle_int(oidp, &result, 0, req);
5456 
5457 	if (error || !req->newptr)
5458 		return (error);
5459 
5460 	if (result == 1) {
5461 		adapter = (struct adapter *)arg1;
5462 		em_print_debug_info(adapter);
5463         }
5464 
5465 	return (error);
5466 }
5467 
5468 /*
5469 ** This routine is meant to be fluid, add whatever is
5470 ** needed for debugging a problem.  -jfv
5471 */
5472 static void
5473 em_print_debug_info(struct adapter *adapter)
5474 {
5475 	device_t dev = adapter->dev;
5476 	struct tx_ring *txr = adapter->tx_rings;
5477 	struct rx_ring *rxr = adapter->rx_rings;
5478 
5479 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5480 		printf("Interface is RUNNING ");
5481 	else
5482 		printf("Interface is NOT RUNNING\n");
5483 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5484 		printf("and ACTIVE\n");
5485 	else
5486 		printf("and INACTIVE\n");
5487 
5488 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5489 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5490 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5491 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5492 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5493 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5494 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5495 	device_printf(dev, "TX descriptors avail = %d\n",
5496 	    txr->tx_avail);
5497 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5498 	    txr->no_desc_avail);
5499 	device_printf(dev, "RX discarded packets = %ld\n",
5500 	    rxr->rx_discarded);
5501 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5502 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5503 }
5504