xref: /freebsd/sys/dev/e1000/if_em.c (revision 4c8945a06b01a5c8122cdeb402af36bb46a06acc)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2010, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60 
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67 
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70 
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83 
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87 
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int	em_display_debug_stats = 0;
92 
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.1.9";
97 
98 /*********************************************************************
99  *  PCI Device ID Table
100  *
101  *  Used by probe to select devices to load on
102  *  Last field stores an index into e1000_strings
103  *  Last entry must be all 0s
104  *
105  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106  *********************************************************************/
107 
108 static em_vendor_info_t em_vendor_info_array[] =
109 {
110 	/* Intel(R) PRO/1000 Network Connection */
111 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
112 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115 						PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117 						PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119 						PCI_ANY_ID, PCI_ANY_ID, 0},
120 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121 						PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123 						PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
130 
131 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136 						PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138 						PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140 						PCI_ANY_ID, PCI_ANY_ID, 0},
141 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142 						PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	/* required last entry */
175 	{ 0, 0, 0, 0, 0}
176 };
177 
178 /*********************************************************************
179  *  Table of branding strings for all supported NICs.
180  *********************************************************************/
181 
182 static char *em_strings[] = {
183 	"Intel(R) PRO/1000 Network Connection"
184 };
185 
186 /*********************************************************************
187  *  Function prototypes
188  *********************************************************************/
189 static int	em_probe(device_t);
190 static int	em_attach(device_t);
191 static int	em_detach(device_t);
192 static int	em_shutdown(device_t);
193 static int	em_suspend(device_t);
194 static int	em_resume(device_t);
195 static void	em_start(struct ifnet *);
196 static void	em_start_locked(struct ifnet *, struct tx_ring *);
197 #ifdef EM_MULTIQUEUE
198 static int	em_mq_start(struct ifnet *, struct mbuf *);
199 static int	em_mq_start_locked(struct ifnet *,
200 		    struct tx_ring *, struct mbuf *);
201 static void	em_qflush(struct ifnet *);
202 #endif
203 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
204 static void	em_init(void *);
205 static void	em_init_locked(struct adapter *);
206 static void	em_stop(void *);
207 static void	em_media_status(struct ifnet *, struct ifmediareq *);
208 static int	em_media_change(struct ifnet *);
209 static void	em_identify_hardware(struct adapter *);
210 static int	em_allocate_pci_resources(struct adapter *);
211 static int	em_allocate_legacy(struct adapter *);
212 static int	em_allocate_msix(struct adapter *);
213 static int	em_allocate_queues(struct adapter *);
214 static int	em_setup_msix(struct adapter *);
215 static void	em_free_pci_resources(struct adapter *);
216 static void	em_local_timer(void *);
217 static void	em_reset(struct adapter *);
218 static int	em_setup_interface(device_t, struct adapter *);
219 
220 static void	em_setup_transmit_structures(struct adapter *);
221 static void	em_initialize_transmit_unit(struct adapter *);
222 static int	em_allocate_transmit_buffers(struct tx_ring *);
223 static void	em_free_transmit_structures(struct adapter *);
224 static void	em_free_transmit_buffers(struct tx_ring *);
225 
226 static int	em_setup_receive_structures(struct adapter *);
227 static int	em_allocate_receive_buffers(struct rx_ring *);
228 static void	em_initialize_receive_unit(struct adapter *);
229 static void	em_free_receive_structures(struct adapter *);
230 static void	em_free_receive_buffers(struct rx_ring *);
231 
232 static void	em_enable_intr(struct adapter *);
233 static void	em_disable_intr(struct adapter *);
234 static void	em_update_stats_counters(struct adapter *);
235 static void	em_add_hw_stats(struct adapter *adapter);
236 static bool	em_txeof(struct tx_ring *);
237 static bool	em_rxeof(struct rx_ring *, int, int *);
238 #ifndef __NO_STRICT_ALIGNMENT
239 static int	em_fixup_rx(struct rx_ring *);
240 #endif
241 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243 		    struct ip *, u32 *, u32 *);
244 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245 		    struct tcphdr *, u32 *, u32 *);
246 static void	em_set_promisc(struct adapter *);
247 static void	em_disable_promisc(struct adapter *);
248 static void	em_set_multi(struct adapter *);
249 static void	em_update_link_status(struct adapter *);
250 static void	em_refresh_mbufs(struct rx_ring *, int);
251 static void	em_register_vlan(void *, struct ifnet *, u16);
252 static void	em_unregister_vlan(void *, struct ifnet *, u16);
253 static void	em_setup_vlan_hw_support(struct adapter *);
254 static int	em_xmit(struct tx_ring *, struct mbuf **);
255 static int	em_dma_malloc(struct adapter *, bus_size_t,
256 		    struct em_dma_alloc *, int);
257 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
258 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259 static void	em_print_nvm_info(struct adapter *);
260 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261 static void	em_print_debug_info(struct adapter *);
262 static int 	em_is_valid_ether_addr(u8 *);
263 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
265 		    const char *, struct em_int_delay_info *, int, int);
266 /* Management and WOL Support */
267 static void	em_init_manageability(struct adapter *);
268 static void	em_release_manageability(struct adapter *);
269 static void     em_get_hw_control(struct adapter *);
270 static void     em_release_hw_control(struct adapter *);
271 static void	em_get_wakeup(device_t);
272 static void     em_enable_wakeup(device_t);
273 static int	em_enable_phy_wakeup(struct adapter *);
274 static void	em_led_func(void *, int);
275 static void	em_disable_aspm(struct adapter *);
276 
277 static int	em_irq_fast(void *);
278 
279 /* MSIX handlers */
280 static void	em_msix_tx(void *);
281 static void	em_msix_rx(void *);
282 static void	em_msix_link(void *);
283 static void	em_handle_tx(void *context, int pending);
284 static void	em_handle_rx(void *context, int pending);
285 static void	em_handle_link(void *context, int pending);
286 
287 static void	em_add_rx_process_limit(struct adapter *, const char *,
288 		    const char *, int *, int);
289 static void	em_set_flow_cntrl(struct adapter *, const char *,
290 		    const char *, int *, int);
291 
292 static __inline void em_rx_discard(struct rx_ring *, int);
293 
294 #ifdef DEVICE_POLLING
295 static poll_handler_t em_poll;
296 #endif /* POLLING */
297 
298 /*********************************************************************
299  *  FreeBSD Device Interface Entry Points
300  *********************************************************************/
301 
302 static device_method_t em_methods[] = {
303 	/* Device interface */
304 	DEVMETHOD(device_probe, em_probe),
305 	DEVMETHOD(device_attach, em_attach),
306 	DEVMETHOD(device_detach, em_detach),
307 	DEVMETHOD(device_shutdown, em_shutdown),
308 	DEVMETHOD(device_suspend, em_suspend),
309 	DEVMETHOD(device_resume, em_resume),
310 	{0, 0}
311 };
312 
313 static driver_t em_driver = {
314 	"em", em_methods, sizeof(struct adapter),
315 };
316 
317 devclass_t em_devclass;
318 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
319 MODULE_DEPEND(em, pci, 1, 1, 1);
320 MODULE_DEPEND(em, ether, 1, 1, 1);
321 
322 /*********************************************************************
323  *  Tunable default values.
324  *********************************************************************/
325 
326 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
327 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
328 #define M_TSO_LEN			66
329 
330 /* Allow common code without TSO */
331 #ifndef CSUM_TSO
332 #define CSUM_TSO	0
333 #endif
334 
335 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
336 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
337 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
338 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
339 
340 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
343 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
344 
345 static int em_rxd = EM_DEFAULT_RXD;
346 static int em_txd = EM_DEFAULT_TXD;
347 TUNABLE_INT("hw.em.rxd", &em_rxd);
348 TUNABLE_INT("hw.em.txd", &em_txd);
349 
350 static int em_smart_pwr_down = FALSE;
351 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
352 
353 /* Controls whether promiscuous also shows bad packets */
354 static int em_debug_sbp = FALSE;
355 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
356 
357 static int em_enable_msix = TRUE;
358 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
359 
360 /* How many packets rxeof tries to clean at a time */
361 static int em_rx_process_limit = 100;
362 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363 
364 /* Flow control setting - default to FULL */
365 static int em_fc_setting = e1000_fc_full;
366 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367 
368 /* Global used in WOL setup with multiport cards */
369 static int global_quad_port_a = 0;
370 
371 /*********************************************************************
372  *  Device identification routine
373  *
374  *  em_probe determines if the driver should be loaded on
375  *  adapter based on PCI vendor/device id of the adapter.
376  *
377  *  return BUS_PROBE_DEFAULT on success, positive on failure
378  *********************************************************************/
379 
380 static int
381 em_probe(device_t dev)
382 {
383 	char		adapter_name[60];
384 	u16		pci_vendor_id = 0;
385 	u16		pci_device_id = 0;
386 	u16		pci_subvendor_id = 0;
387 	u16		pci_subdevice_id = 0;
388 	em_vendor_info_t *ent;
389 
390 	INIT_DEBUGOUT("em_probe: begin");
391 
392 	pci_vendor_id = pci_get_vendor(dev);
393 	if (pci_vendor_id != EM_VENDOR_ID)
394 		return (ENXIO);
395 
396 	pci_device_id = pci_get_device(dev);
397 	pci_subvendor_id = pci_get_subvendor(dev);
398 	pci_subdevice_id = pci_get_subdevice(dev);
399 
400 	ent = em_vendor_info_array;
401 	while (ent->vendor_id != 0) {
402 		if ((pci_vendor_id == ent->vendor_id) &&
403 		    (pci_device_id == ent->device_id) &&
404 
405 		    ((pci_subvendor_id == ent->subvendor_id) ||
406 		    (ent->subvendor_id == PCI_ANY_ID)) &&
407 
408 		    ((pci_subdevice_id == ent->subdevice_id) ||
409 		    (ent->subdevice_id == PCI_ANY_ID))) {
410 			sprintf(adapter_name, "%s %s",
411 				em_strings[ent->index],
412 				em_driver_version);
413 			device_set_desc_copy(dev, adapter_name);
414 			return (BUS_PROBE_DEFAULT);
415 		}
416 		ent++;
417 	}
418 
419 	return (ENXIO);
420 }
421 
422 /*********************************************************************
423  *  Device initialization routine
424  *
425  *  The attach entry point is called when the driver is being loaded.
426  *  This routine identifies the type of hardware, allocates all resources
427  *  and initializes the hardware.
428  *
429  *  return 0 on success, positive on failure
430  *********************************************************************/
431 
432 static int
433 em_attach(device_t dev)
434 {
435 	struct adapter	*adapter;
436 	int		error = 0;
437 
438 	INIT_DEBUGOUT("em_attach: begin");
439 
440 	adapter = device_get_softc(dev);
441 	adapter->dev = adapter->osdep.dev = dev;
442 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
443 
444 	/* SYSCTL stuff */
445 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
446 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
447 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
448 	    em_sysctl_nvm_info, "I", "NVM Information");
449 
450 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
451 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
453 	    em_sysctl_debug_info, "I", "Debug Information");
454 
455 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
456 
457 	/* Determine hardware and mac info */
458 	em_identify_hardware(adapter);
459 
460 	/* Setup PCI resources */
461 	if (em_allocate_pci_resources(adapter)) {
462 		device_printf(dev, "Allocation of PCI resources failed\n");
463 		error = ENXIO;
464 		goto err_pci;
465 	}
466 
467 	/*
468 	** For ICH8 and family we need to
469 	** map the flash memory, and this
470 	** must happen after the MAC is
471 	** identified
472 	*/
473 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
474 	    (adapter->hw.mac.type == e1000_ich9lan) ||
475 	    (adapter->hw.mac.type == e1000_ich10lan) ||
476 	    (adapter->hw.mac.type == e1000_pchlan) ||
477 	    (adapter->hw.mac.type == e1000_pch2lan)) {
478 		int rid = EM_BAR_TYPE_FLASH;
479 		adapter->flash = bus_alloc_resource_any(dev,
480 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
481 		if (adapter->flash == NULL) {
482 			device_printf(dev, "Mapping of Flash failed\n");
483 			error = ENXIO;
484 			goto err_pci;
485 		}
486 		/* This is used in the shared code */
487 		adapter->hw.flash_address = (u8 *)adapter->flash;
488 		adapter->osdep.flash_bus_space_tag =
489 		    rman_get_bustag(adapter->flash);
490 		adapter->osdep.flash_bus_space_handle =
491 		    rman_get_bushandle(adapter->flash);
492 	}
493 
494 	/* Do Shared Code initialization */
495 	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496 		device_printf(dev, "Setup of Shared code failed\n");
497 		error = ENXIO;
498 		goto err_pci;
499 	}
500 
501 	e1000_get_bus_info(&adapter->hw);
502 
503 	/* Set up some sysctls for the tunable interrupt delays */
504 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
505 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
506 	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
508 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509 	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511 	    "receive interrupt delay limit in usecs",
512 	    &adapter->rx_abs_int_delay,
513 	    E1000_REGISTER(&adapter->hw, E1000_RADV),
514 	    em_rx_abs_int_delay_dflt);
515 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516 	    "transmit interrupt delay limit in usecs",
517 	    &adapter->tx_abs_int_delay,
518 	    E1000_REGISTER(&adapter->hw, E1000_TADV),
519 	    em_tx_abs_int_delay_dflt);
520 
521 	/* Sysctl for limiting the amount of work done in the taskqueue */
522 	em_add_rx_process_limit(adapter, "rx_processing_limit",
523 	    "max number of rx packets to process", &adapter->rx_process_limit,
524 	    em_rx_process_limit);
525 
526 	/* Sysctl for setting the interface flow control */
527 	em_set_flow_cntrl(adapter, "flow_control",
528 	    "configure flow control",
529 	    &adapter->fc_setting, em_fc_setting);
530 
531 	/*
532 	 * Validate number of transmit and receive descriptors. It
533 	 * must not exceed hardware maximum, and must be multiple
534 	 * of E1000_DBA_ALIGN.
535 	 */
536 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
537 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
538 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
539 		    EM_DEFAULT_TXD, em_txd);
540 		adapter->num_tx_desc = EM_DEFAULT_TXD;
541 	} else
542 		adapter->num_tx_desc = em_txd;
543 
544 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
545 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
546 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
547 		    EM_DEFAULT_RXD, em_rxd);
548 		adapter->num_rx_desc = EM_DEFAULT_RXD;
549 	} else
550 		adapter->num_rx_desc = em_rxd;
551 
552 	adapter->hw.mac.autoneg = DO_AUTO_NEG;
553 	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
554 	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
555 
556 	/* Copper options */
557 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558 		adapter->hw.phy.mdix = AUTO_ALL_MODES;
559 		adapter->hw.phy.disable_polarity_correction = FALSE;
560 		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
561 	}
562 
563 	/*
564 	 * Set the frame limits assuming
565 	 * standard ethernet sized frames.
566 	 */
567 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
568 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
569 
570 	/*
571 	 * This controls when hardware reports transmit completion
572 	 * status.
573 	 */
574 	adapter->hw.mac.report_tx_early = 1;
575 
576 	/*
577 	** Get queue/ring memory
578 	*/
579 	if (em_allocate_queues(adapter)) {
580 		error = ENOMEM;
581 		goto err_pci;
582 	}
583 
584 	/* Allocate multicast array memory. */
585 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
586 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
587 	if (adapter->mta == NULL) {
588 		device_printf(dev, "Can not allocate multicast setup array\n");
589 		error = ENOMEM;
590 		goto err_late;
591 	}
592 
593 	/* Check SOL/IDER usage */
594 	if (e1000_check_reset_block(&adapter->hw))
595 		device_printf(dev, "PHY reset is blocked"
596 		    " due to SOL/IDER session.\n");
597 
598 	/*
599 	** Start from a known state, this is
600 	** important in reading the nvm and
601 	** mac from that.
602 	*/
603 	e1000_reset_hw(&adapter->hw);
604 
605 	/* Make sure we have a good EEPROM before we read from it */
606 	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
607 		/*
608 		** Some PCI-E parts fail the first check due to
609 		** the link being in sleep state, call it again,
610 		** if it fails a second time its a real issue.
611 		*/
612 		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
613 			device_printf(dev,
614 			    "The EEPROM Checksum Is Not Valid\n");
615 			error = EIO;
616 			goto err_late;
617 		}
618 	}
619 
620 	/* Copy the permanent MAC address out of the EEPROM */
621 	if (e1000_read_mac_addr(&adapter->hw) < 0) {
622 		device_printf(dev, "EEPROM read error while reading MAC"
623 		    " address\n");
624 		error = EIO;
625 		goto err_late;
626 	}
627 
628 	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
629 		device_printf(dev, "Invalid MAC address\n");
630 		error = EIO;
631 		goto err_late;
632 	}
633 
634 	/*
635 	**  Do interrupt configuration
636 	*/
637 	if (adapter->msix > 1) /* Do MSIX */
638 		error = em_allocate_msix(adapter);
639 	else  /* MSI or Legacy */
640 		error = em_allocate_legacy(adapter);
641 	if (error)
642 		goto err_late;
643 
644 	/*
645 	 * Get Wake-on-Lan and Management info for later use
646 	 */
647 	em_get_wakeup(dev);
648 
649 	/* Setup OS specific network interface */
650 	if (em_setup_interface(dev, adapter) != 0)
651 		goto err_late;
652 
653 	em_reset(adapter);
654 
655 	/* Initialize statistics */
656 	em_update_stats_counters(adapter);
657 
658 	adapter->hw.mac.get_link_status = 1;
659 	em_update_link_status(adapter);
660 
661 	/* Register for VLAN events */
662 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
663 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
664 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
665 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
666 
667 	em_add_hw_stats(adapter);
668 
669 	/* Non-AMT based hardware can now take control from firmware */
670 	if (adapter->has_manage && !adapter->has_amt)
671 		em_get_hw_control(adapter);
672 
673 	/* Tell the stack that the interface is not active */
674 	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
675 
676 	adapter->led_dev = led_create(em_led_func, adapter,
677 	    device_get_nameunit(dev));
678 
679 	INIT_DEBUGOUT("em_attach: end");
680 
681 	return (0);
682 
683 err_late:
684 	em_free_transmit_structures(adapter);
685 	em_free_receive_structures(adapter);
686 	em_release_hw_control(adapter);
687 	if (adapter->ifp != NULL)
688 		if_free(adapter->ifp);
689 err_pci:
690 	em_free_pci_resources(adapter);
691 	free(adapter->mta, M_DEVBUF);
692 	EM_CORE_LOCK_DESTROY(adapter);
693 
694 	return (error);
695 }
696 
697 /*********************************************************************
698  *  Device removal routine
699  *
700  *  The detach entry point is called when the driver is being removed.
701  *  This routine stops the adapter and deallocates all the resources
702  *  that were allocated for driver operation.
703  *
704  *  return 0 on success, positive on failure
705  *********************************************************************/
706 
707 static int
708 em_detach(device_t dev)
709 {
710 	struct adapter	*adapter = device_get_softc(dev);
711 	struct ifnet	*ifp = adapter->ifp;
712 
713 	INIT_DEBUGOUT("em_detach: begin");
714 
715 	/* Make sure VLANS are not using driver */
716 	if (adapter->ifp->if_vlantrunk != NULL) {
717 		device_printf(dev,"Vlan in use, detach first\n");
718 		return (EBUSY);
719 	}
720 
721 #ifdef DEVICE_POLLING
722 	if (ifp->if_capenable & IFCAP_POLLING)
723 		ether_poll_deregister(ifp);
724 #endif
725 
726 	if (adapter->led_dev != NULL)
727 		led_destroy(adapter->led_dev);
728 
729 	EM_CORE_LOCK(adapter);
730 	adapter->in_detach = 1;
731 	em_stop(adapter);
732 	EM_CORE_UNLOCK(adapter);
733 	EM_CORE_LOCK_DESTROY(adapter);
734 
735 	e1000_phy_hw_reset(&adapter->hw);
736 
737 	em_release_manageability(adapter);
738 	em_release_hw_control(adapter);
739 
740 	/* Unregister VLAN events */
741 	if (adapter->vlan_attach != NULL)
742 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
743 	if (adapter->vlan_detach != NULL)
744 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
745 
746 	ether_ifdetach(adapter->ifp);
747 	callout_drain(&adapter->timer);
748 
749 	em_free_pci_resources(adapter);
750 	bus_generic_detach(dev);
751 	if_free(ifp);
752 
753 	em_free_transmit_structures(adapter);
754 	em_free_receive_structures(adapter);
755 
756 	em_release_hw_control(adapter);
757 	free(adapter->mta, M_DEVBUF);
758 
759 	return (0);
760 }
761 
762 /*********************************************************************
763  *
764  *  Shutdown entry point
765  *
766  **********************************************************************/
767 
768 static int
769 em_shutdown(device_t dev)
770 {
771 	return em_suspend(dev);
772 }
773 
774 /*
775  * Suspend/resume device methods.
776  */
777 static int
778 em_suspend(device_t dev)
779 {
780 	struct adapter *adapter = device_get_softc(dev);
781 
782 	EM_CORE_LOCK(adapter);
783 
784         em_release_manageability(adapter);
785 	em_release_hw_control(adapter);
786 	em_enable_wakeup(dev);
787 
788 	EM_CORE_UNLOCK(adapter);
789 
790 	return bus_generic_suspend(dev);
791 }
792 
793 static int
794 em_resume(device_t dev)
795 {
796 	struct adapter *adapter = device_get_softc(dev);
797 	struct ifnet *ifp = adapter->ifp;
798 
799 	EM_CORE_LOCK(adapter);
800 	em_init_locked(adapter);
801 	em_init_manageability(adapter);
802 	EM_CORE_UNLOCK(adapter);
803 	em_start(ifp);
804 
805 	return bus_generic_resume(dev);
806 }
807 
808 
809 /*********************************************************************
810  *  Transmit entry point
811  *
812  *  em_start is called by the stack to initiate a transmit.
813  *  The driver will remain in this routine as long as there are
814  *  packets to transmit and transmit resources are available.
815  *  In case resources are not available stack is notified and
816  *  the packet is requeued.
817  **********************************************************************/
818 
819 #ifdef EM_MULTIQUEUE
820 static int
821 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
822 {
823 	struct adapter  *adapter = txr->adapter;
824         struct mbuf     *next;
825         int             err = 0, enq = 0;
826 
827 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
828 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
829 		if (m != NULL)
830 			err = drbr_enqueue(ifp, txr->br, m);
831 		return (err);
832 	}
833 
834         /* Call cleanup if number of TX descriptors low */
835 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
836 		em_txeof(txr);
837 
838 	enq = 0;
839 	if (m == NULL) {
840 		next = drbr_dequeue(ifp, txr->br);
841 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
842 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
843 			return (err);
844 		next = drbr_dequeue(ifp, txr->br);
845 	} else
846 		next = m;
847 
848 	/* Process the queue */
849 	while (next != NULL) {
850 		if ((err = em_xmit(txr, &next)) != 0) {
851                         if (next != NULL)
852                                 err = drbr_enqueue(ifp, txr->br, next);
853                         break;
854 		}
855 		enq++;
856 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
857 		ETHER_BPF_MTAP(ifp, next);
858 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
859                         break;
860 		if (txr->tx_avail < EM_MAX_SCATTER) {
861 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
862 			break;
863 		}
864 		next = drbr_dequeue(ifp, txr->br);
865 	}
866 
867 	if (enq > 0) {
868                 /* Set the watchdog */
869                 txr->queue_status = EM_QUEUE_WORKING;
870 		txr->watchdog_time = ticks;
871 	}
872 	return (err);
873 }
874 
875 /*
876 ** Multiqueue capable stack interface
877 */
878 static int
879 em_mq_start(struct ifnet *ifp, struct mbuf *m)
880 {
881 	struct adapter	*adapter = ifp->if_softc;
882 	struct tx_ring	*txr = adapter->tx_rings;
883 	int 		error;
884 
885 	if (EM_TX_TRYLOCK(txr)) {
886 		error = em_mq_start_locked(ifp, txr, m);
887 		EM_TX_UNLOCK(txr);
888 	} else
889 		error = drbr_enqueue(ifp, txr->br, m);
890 
891 	return (error);
892 }
893 
894 /*
895 ** Flush all ring buffers
896 */
897 static void
898 em_qflush(struct ifnet *ifp)
899 {
900 	struct adapter  *adapter = ifp->if_softc;
901 	struct tx_ring  *txr = adapter->tx_rings;
902 	struct mbuf     *m;
903 
904 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
905 		EM_TX_LOCK(txr);
906 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
907 			m_freem(m);
908 		EM_TX_UNLOCK(txr);
909 	}
910 	if_qflush(ifp);
911 }
912 
913 #endif /* EM_MULTIQUEUE */
914 
915 static void
916 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
917 {
918 	struct adapter	*adapter = ifp->if_softc;
919 	struct mbuf	*m_head;
920 
921 	EM_TX_LOCK_ASSERT(txr);
922 
923 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
924 	    IFF_DRV_RUNNING)
925 		return;
926 
927 	if (!adapter->link_active)
928 		return;
929 
930         /* Call cleanup if number of TX descriptors low */
931 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
932 		em_txeof(txr);
933 
934 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
935 		if (txr->tx_avail < EM_MAX_SCATTER) {
936 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
937 			break;
938 		}
939                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
940 		if (m_head == NULL)
941 			break;
942 		/*
943 		 *  Encapsulation can modify our pointer, and or make it
944 		 *  NULL on failure.  In that event, we can't requeue.
945 		 */
946 		if (em_xmit(txr, &m_head)) {
947 			if (m_head == NULL)
948 				break;
949 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
950 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
951 			break;
952 		}
953 
954 		/* Send a copy of the frame to the BPF listener */
955 		ETHER_BPF_MTAP(ifp, m_head);
956 
957 		/* Set timeout in case hardware has problems transmitting. */
958 		txr->watchdog_time = ticks;
959                 txr->queue_status = EM_QUEUE_WORKING;
960 	}
961 
962 	return;
963 }
964 
965 static void
966 em_start(struct ifnet *ifp)
967 {
968 	struct adapter	*adapter = ifp->if_softc;
969 	struct tx_ring	*txr = adapter->tx_rings;
970 
971 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
972 		EM_TX_LOCK(txr);
973 		em_start_locked(ifp, txr);
974 		EM_TX_UNLOCK(txr);
975 	}
976 	return;
977 }
978 
979 /*********************************************************************
980  *  Ioctl entry point
981  *
982  *  em_ioctl is called when the user wants to configure the
983  *  interface.
984  *
985  *  return 0 on success, positive on failure
986  **********************************************************************/
987 
988 static int
989 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
990 {
991 	struct adapter	*adapter = ifp->if_softc;
992 	struct ifreq *ifr = (struct ifreq *)data;
993 #ifdef INET
994 	struct ifaddr *ifa = (struct ifaddr *)data;
995 #endif
996 	int error = 0;
997 
998 	if (adapter->in_detach)
999 		return (error);
1000 
1001 	switch (command) {
1002 	case SIOCSIFADDR:
1003 #ifdef INET
1004 		if (ifa->ifa_addr->sa_family == AF_INET) {
1005 			/*
1006 			 * XXX
1007 			 * Since resetting hardware takes a very long time
1008 			 * and results in link renegotiation we only
1009 			 * initialize the hardware only when it is absolutely
1010 			 * required.
1011 			 */
1012 			ifp->if_flags |= IFF_UP;
1013 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1014 				EM_CORE_LOCK(adapter);
1015 				em_init_locked(adapter);
1016 				EM_CORE_UNLOCK(adapter);
1017 			}
1018 			arp_ifinit(ifp, ifa);
1019 		} else
1020 #endif
1021 			error = ether_ioctl(ifp, command, data);
1022 		break;
1023 	case SIOCSIFMTU:
1024 	    {
1025 		int max_frame_size;
1026 
1027 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1028 
1029 		EM_CORE_LOCK(adapter);
1030 		switch (adapter->hw.mac.type) {
1031 		case e1000_82571:
1032 		case e1000_82572:
1033 		case e1000_ich9lan:
1034 		case e1000_ich10lan:
1035 		case e1000_pch2lan:
1036 		case e1000_82574:
1037 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1038 			max_frame_size = 9234;
1039 			break;
1040 		case e1000_pchlan:
1041 			max_frame_size = 4096;
1042 			break;
1043 			/* Adapters that do not support jumbo frames */
1044 		case e1000_82583:
1045 		case e1000_ich8lan:
1046 			max_frame_size = ETHER_MAX_LEN;
1047 			break;
1048 		default:
1049 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1050 		}
1051 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1052 		    ETHER_CRC_LEN) {
1053 			EM_CORE_UNLOCK(adapter);
1054 			error = EINVAL;
1055 			break;
1056 		}
1057 
1058 		ifp->if_mtu = ifr->ifr_mtu;
1059 		adapter->max_frame_size =
1060 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1061 		em_init_locked(adapter);
1062 		EM_CORE_UNLOCK(adapter);
1063 		break;
1064 	    }
1065 	case SIOCSIFFLAGS:
1066 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1067 		    SIOCSIFFLAGS (Set Interface Flags)");
1068 		EM_CORE_LOCK(adapter);
1069 		if (ifp->if_flags & IFF_UP) {
1070 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1071 				if ((ifp->if_flags ^ adapter->if_flags) &
1072 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1073 					em_disable_promisc(adapter);
1074 					em_set_promisc(adapter);
1075 				}
1076 			} else
1077 				em_init_locked(adapter);
1078 		} else
1079 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1080 				em_stop(adapter);
1081 		adapter->if_flags = ifp->if_flags;
1082 		EM_CORE_UNLOCK(adapter);
1083 		break;
1084 	case SIOCADDMULTI:
1085 	case SIOCDELMULTI:
1086 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1087 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1088 			EM_CORE_LOCK(adapter);
1089 			em_disable_intr(adapter);
1090 			em_set_multi(adapter);
1091 #ifdef DEVICE_POLLING
1092 			if (!(ifp->if_capenable & IFCAP_POLLING))
1093 #endif
1094 				em_enable_intr(adapter);
1095 			EM_CORE_UNLOCK(adapter);
1096 		}
1097 		break;
1098 	case SIOCSIFMEDIA:
1099 		/*
1100 		** As the speed/duplex settings are being
1101 		** changed, we need to reset the PHY.
1102 		*/
1103 		adapter->hw.phy.reset_disable = FALSE;
1104 		/* Check SOL/IDER usage */
1105 		EM_CORE_LOCK(adapter);
1106 		if (e1000_check_reset_block(&adapter->hw)) {
1107 			EM_CORE_UNLOCK(adapter);
1108 			device_printf(adapter->dev, "Media change is"
1109 			    " blocked due to SOL/IDER session.\n");
1110 			break;
1111 		}
1112 		EM_CORE_UNLOCK(adapter);
1113 		/* falls thru */
1114 	case SIOCGIFMEDIA:
1115 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1116 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1117 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1118 		break;
1119 	case SIOCSIFCAP:
1120 	    {
1121 		int mask, reinit;
1122 
1123 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1124 		reinit = 0;
1125 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1126 #ifdef DEVICE_POLLING
1127 		if (mask & IFCAP_POLLING) {
1128 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1129 				error = ether_poll_register(em_poll, ifp);
1130 				if (error)
1131 					return (error);
1132 				EM_CORE_LOCK(adapter);
1133 				em_disable_intr(adapter);
1134 				ifp->if_capenable |= IFCAP_POLLING;
1135 				EM_CORE_UNLOCK(adapter);
1136 			} else {
1137 				error = ether_poll_deregister(ifp);
1138 				/* Enable interrupt even in error case */
1139 				EM_CORE_LOCK(adapter);
1140 				em_enable_intr(adapter);
1141 				ifp->if_capenable &= ~IFCAP_POLLING;
1142 				EM_CORE_UNLOCK(adapter);
1143 			}
1144 		}
1145 #endif
1146 		if (mask & IFCAP_HWCSUM) {
1147 			ifp->if_capenable ^= IFCAP_HWCSUM;
1148 			reinit = 1;
1149 		}
1150 		if (mask & IFCAP_TSO4) {
1151 			ifp->if_capenable ^= IFCAP_TSO4;
1152 			reinit = 1;
1153 		}
1154 		if (mask & IFCAP_VLAN_HWTAGGING) {
1155 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1156 			reinit = 1;
1157 		}
1158 		if (mask & IFCAP_VLAN_HWFILTER) {
1159 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1160 			reinit = 1;
1161 		}
1162 		if ((mask & IFCAP_WOL) &&
1163 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1164 			if (mask & IFCAP_WOL_MCAST)
1165 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1166 			if (mask & IFCAP_WOL_MAGIC)
1167 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1168 		}
1169 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1170 			em_init(adapter);
1171 		VLAN_CAPABILITIES(ifp);
1172 		break;
1173 	    }
1174 
1175 	default:
1176 		error = ether_ioctl(ifp, command, data);
1177 		break;
1178 	}
1179 
1180 	return (error);
1181 }
1182 
1183 
1184 /*********************************************************************
1185  *  Init entry point
1186  *
1187  *  This routine is used in two ways. It is used by the stack as
1188  *  init entry point in network interface structure. It is also used
1189  *  by the driver as a hw/sw initialization routine to get to a
1190  *  consistent state.
1191  *
1192  *  return 0 on success, positive on failure
1193  **********************************************************************/
1194 
1195 static void
1196 em_init_locked(struct adapter *adapter)
1197 {
1198 	struct ifnet	*ifp = adapter->ifp;
1199 	device_t	dev = adapter->dev;
1200 	u32		pba;
1201 
1202 	INIT_DEBUGOUT("em_init: begin");
1203 
1204 	EM_CORE_LOCK_ASSERT(adapter);
1205 
1206 	em_disable_intr(adapter);
1207 	callout_stop(&adapter->timer);
1208 
1209 	/*
1210 	 * Packet Buffer Allocation (PBA)
1211 	 * Writing PBA sets the receive portion of the buffer
1212 	 * the remainder is used for the transmit buffer.
1213 	 */
1214 	switch (adapter->hw.mac.type) {
1215 	/* Total Packet Buffer on these is 48K */
1216 	case e1000_82571:
1217 	case e1000_82572:
1218 	case e1000_80003es2lan:
1219 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1220 		break;
1221 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1222 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1223 		break;
1224 	case e1000_82574:
1225 	case e1000_82583:
1226 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1227 		break;
1228 	case e1000_ich8lan:
1229 		pba = E1000_PBA_8K;
1230 		break;
1231 	case e1000_ich9lan:
1232 	case e1000_ich10lan:
1233 		pba = E1000_PBA_10K;
1234 		break;
1235 	case e1000_pchlan:
1236 	case e1000_pch2lan:
1237 		pba = E1000_PBA_26K;
1238 		break;
1239 	default:
1240 		if (adapter->max_frame_size > 8192)
1241 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1242 		else
1243 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1244 	}
1245 
1246 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1247 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1248 
1249 	/* Get the latest mac address, User can use a LAA */
1250         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1251               ETHER_ADDR_LEN);
1252 
1253 	/* Put the address into the Receive Address Array */
1254 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1255 
1256 	/*
1257 	 * With the 82571 adapter, RAR[0] may be overwritten
1258 	 * when the other port is reset, we make a duplicate
1259 	 * in RAR[14] for that eventuality, this assures
1260 	 * the interface continues to function.
1261 	 */
1262 	if (adapter->hw.mac.type == e1000_82571) {
1263 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1264 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1265 		    E1000_RAR_ENTRIES - 1);
1266 	}
1267 
1268 	/* Initialize the hardware */
1269 	em_reset(adapter);
1270 	em_update_link_status(adapter);
1271 
1272 	/* Setup VLAN support, basic and offload if available */
1273 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1274 
1275 	/* Set hardware offload abilities */
1276 	ifp->if_hwassist = 0;
1277 	if (ifp->if_capenable & IFCAP_TXCSUM)
1278 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1279 	if (ifp->if_capenable & IFCAP_TSO4)
1280 		ifp->if_hwassist |= CSUM_TSO;
1281 
1282 	/* Configure for OS presence */
1283 	em_init_manageability(adapter);
1284 
1285 	/* Prepare transmit descriptors and buffers */
1286 	em_setup_transmit_structures(adapter);
1287 	em_initialize_transmit_unit(adapter);
1288 
1289 	/* Setup Multicast table */
1290 	em_set_multi(adapter);
1291 
1292 	/*
1293 	** Figure out the desired mbuf
1294 	** pool for doing jumbos
1295 	*/
1296 	if (adapter->max_frame_size <= 2048)
1297 		adapter->rx_mbuf_sz = MCLBYTES;
1298 	else if (adapter->max_frame_size <= 4096)
1299 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1300 	else
1301 		adapter->rx_mbuf_sz = MJUM9BYTES;
1302 
1303 	/* Prepare receive descriptors and buffers */
1304 	if (em_setup_receive_structures(adapter)) {
1305 		device_printf(dev, "Could not setup receive structures\n");
1306 		em_stop(adapter);
1307 		return;
1308 	}
1309 	em_initialize_receive_unit(adapter);
1310 
1311 	/* Use real VLAN Filter support? */
1312 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1313 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1314 			/* Use real VLAN Filter support */
1315 			em_setup_vlan_hw_support(adapter);
1316 		else {
1317 			u32 ctrl;
1318 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1319 			ctrl |= E1000_CTRL_VME;
1320 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1321 		}
1322 	}
1323 
1324 	/* Don't lose promiscuous settings */
1325 	em_set_promisc(adapter);
1326 
1327 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1328 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1329 
1330 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1331 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1332 
1333 	/* MSI/X configuration for 82574 */
1334 	if (adapter->hw.mac.type == e1000_82574) {
1335 		int tmp;
1336 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1337 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1338 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1339 		/* Set the IVAR - interrupt vector routing. */
1340 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1341 	}
1342 
1343 #ifdef DEVICE_POLLING
1344 	/*
1345 	 * Only enable interrupts if we are not polling, make sure
1346 	 * they are off otherwise.
1347 	 */
1348 	if (ifp->if_capenable & IFCAP_POLLING)
1349 		em_disable_intr(adapter);
1350 	else
1351 #endif /* DEVICE_POLLING */
1352 		em_enable_intr(adapter);
1353 
1354 	/* AMT based hardware can now take control from firmware */
1355 	if (adapter->has_manage && adapter->has_amt)
1356 		em_get_hw_control(adapter);
1357 
1358 	/* Don't reset the phy next time init gets called */
1359 	adapter->hw.phy.reset_disable = TRUE;
1360 }
1361 
1362 static void
1363 em_init(void *arg)
1364 {
1365 	struct adapter *adapter = arg;
1366 
1367 	EM_CORE_LOCK(adapter);
1368 	em_init_locked(adapter);
1369 	EM_CORE_UNLOCK(adapter);
1370 }
1371 
1372 
1373 #ifdef DEVICE_POLLING
1374 /*********************************************************************
1375  *
1376  *  Legacy polling routine: note this only works with single queue
1377  *
1378  *********************************************************************/
1379 static int
1380 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1381 {
1382 	struct adapter *adapter = ifp->if_softc;
1383 	struct tx_ring	*txr = adapter->tx_rings;
1384 	struct rx_ring	*rxr = adapter->rx_rings;
1385 	u32		reg_icr;
1386 	int		rx_done;
1387 
1388 	EM_CORE_LOCK(adapter);
1389 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1390 		EM_CORE_UNLOCK(adapter);
1391 		return (0);
1392 	}
1393 
1394 	if (cmd == POLL_AND_CHECK_STATUS) {
1395 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1396 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1397 			callout_stop(&adapter->timer);
1398 			adapter->hw.mac.get_link_status = 1;
1399 			em_update_link_status(adapter);
1400 			callout_reset(&adapter->timer, hz,
1401 			    em_local_timer, adapter);
1402 		}
1403 	}
1404 	EM_CORE_UNLOCK(adapter);
1405 
1406 	em_rxeof(rxr, count, &rx_done);
1407 
1408 	EM_TX_LOCK(txr);
1409 	em_txeof(txr);
1410 #ifdef EM_MULTIQUEUE
1411 	if (!drbr_empty(ifp, txr->br))
1412 		em_mq_start_locked(ifp, txr, NULL);
1413 #else
1414 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1415 		em_start_locked(ifp, txr);
1416 #endif
1417 	EM_TX_UNLOCK(txr);
1418 
1419 	return (rx_done);
1420 }
1421 #endif /* DEVICE_POLLING */
1422 
1423 
1424 /*********************************************************************
1425  *
1426  *  Fast Legacy/MSI Combined Interrupt Service routine
1427  *
1428  *********************************************************************/
1429 static int
1430 em_irq_fast(void *arg)
1431 {
1432 	struct adapter	*adapter = arg;
1433 	struct ifnet	*ifp;
1434 	u32		reg_icr;
1435 
1436 	ifp = adapter->ifp;
1437 
1438 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1439 
1440 	/* Hot eject?  */
1441 	if (reg_icr == 0xffffffff)
1442 		return FILTER_STRAY;
1443 
1444 	/* Definitely not our interrupt.  */
1445 	if (reg_icr == 0x0)
1446 		return FILTER_STRAY;
1447 
1448 	/*
1449 	 * Starting with the 82571 chip, bit 31 should be used to
1450 	 * determine whether the interrupt belongs to us.
1451 	 */
1452 	if (adapter->hw.mac.type >= e1000_82571 &&
1453 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1454 		return FILTER_STRAY;
1455 
1456 	em_disable_intr(adapter);
1457 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1458 
1459 	/* Link status change */
1460 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1461 		adapter->hw.mac.get_link_status = 1;
1462 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1463 	}
1464 
1465 	if (reg_icr & E1000_ICR_RXO)
1466 		adapter->rx_overruns++;
1467 	return FILTER_HANDLED;
1468 }
1469 
1470 /* Combined RX/TX handler, used by Legacy and MSI */
1471 static void
1472 em_handle_que(void *context, int pending)
1473 {
1474 	struct adapter	*adapter = context;
1475 	struct ifnet	*ifp = adapter->ifp;
1476 	struct tx_ring	*txr = adapter->tx_rings;
1477 	struct rx_ring	*rxr = adapter->rx_rings;
1478 	bool		more;
1479 
1480 
1481 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1482 		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1483 
1484 		EM_TX_LOCK(txr);
1485 		em_txeof(txr);
1486 #ifdef EM_MULTIQUEUE
1487 		if (!drbr_empty(ifp, txr->br))
1488 			em_mq_start_locked(ifp, txr, NULL);
1489 #else
1490 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491 			em_start_locked(ifp, txr);
1492 #endif
1493 		em_txeof(txr);
1494 		EM_TX_UNLOCK(txr);
1495 		if (more) {
1496 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1497 			return;
1498 		}
1499 	}
1500 
1501 	em_enable_intr(adapter);
1502 	return;
1503 }
1504 
1505 
1506 /*********************************************************************
1507  *
1508  *  MSIX Interrupt Service Routines
1509  *
1510  **********************************************************************/
1511 static void
1512 em_msix_tx(void *arg)
1513 {
1514 	struct tx_ring *txr = arg;
1515 	struct adapter *adapter = txr->adapter;
1516 	bool		more;
1517 
1518 	++txr->tx_irq;
1519 	EM_TX_LOCK(txr);
1520 	more = em_txeof(txr);
1521 	EM_TX_UNLOCK(txr);
1522 	if (more)
1523 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1524 	else
1525 		/* Reenable this interrupt */
1526 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1527 	return;
1528 }
1529 
1530 /*********************************************************************
1531  *
1532  *  MSIX RX Interrupt Service routine
1533  *
1534  **********************************************************************/
1535 
1536 static void
1537 em_msix_rx(void *arg)
1538 {
1539 	struct rx_ring	*rxr = arg;
1540 	struct adapter	*adapter = rxr->adapter;
1541 	bool		more;
1542 
1543 	++rxr->rx_irq;
1544 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1545 	if (more)
1546 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1547 	else
1548 		/* Reenable this interrupt */
1549 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1550 	return;
1551 }
1552 
1553 /*********************************************************************
1554  *
1555  *  MSIX Link Fast Interrupt Service routine
1556  *
1557  **********************************************************************/
1558 static void
1559 em_msix_link(void *arg)
1560 {
1561 	struct adapter	*adapter = arg;
1562 	u32		reg_icr;
1563 
1564 	++adapter->link_irq;
1565 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1566 
1567 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1568 		adapter->hw.mac.get_link_status = 1;
1569 		em_handle_link(adapter, 0);
1570 	} else
1571 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1572 		    EM_MSIX_LINK | E1000_IMS_LSC);
1573 	return;
1574 }
1575 
1576 static void
1577 em_handle_rx(void *context, int pending)
1578 {
1579 	struct rx_ring	*rxr = context;
1580 	struct adapter	*adapter = rxr->adapter;
1581         bool            more;
1582 
1583 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1584 	if (more)
1585 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1586 	else
1587 		/* Reenable this interrupt */
1588 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1589 }
1590 
1591 static void
1592 em_handle_tx(void *context, int pending)
1593 {
1594 	struct tx_ring	*txr = context;
1595 	struct adapter	*adapter = txr->adapter;
1596 	struct ifnet	*ifp = adapter->ifp;
1597 
1598 	EM_TX_LOCK(txr);
1599 	em_txeof(txr);
1600 #ifdef EM_MULTIQUEUE
1601 	if (!drbr_empty(ifp, txr->br))
1602 		em_mq_start_locked(ifp, txr, NULL);
1603 #else
1604 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1605 		em_start_locked(ifp, txr);
1606 #endif
1607 	em_txeof(txr);
1608 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1609 	EM_TX_UNLOCK(txr);
1610 }
1611 
1612 static void
1613 em_handle_link(void *context, int pending)
1614 {
1615 	struct adapter	*adapter = context;
1616 	struct ifnet *ifp = adapter->ifp;
1617 
1618 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1619 		return;
1620 
1621 	EM_CORE_LOCK(adapter);
1622 	callout_stop(&adapter->timer);
1623 	em_update_link_status(adapter);
1624 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1625 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1626 	    EM_MSIX_LINK | E1000_IMS_LSC);
1627 	EM_CORE_UNLOCK(adapter);
1628 }
1629 
1630 
1631 /*********************************************************************
1632  *
1633  *  Media Ioctl callback
1634  *
1635  *  This routine is called whenever the user queries the status of
1636  *  the interface using ifconfig.
1637  *
1638  **********************************************************************/
1639 static void
1640 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1641 {
1642 	struct adapter *adapter = ifp->if_softc;
1643 	u_char fiber_type = IFM_1000_SX;
1644 
1645 	INIT_DEBUGOUT("em_media_status: begin");
1646 
1647 	EM_CORE_LOCK(adapter);
1648 	em_update_link_status(adapter);
1649 
1650 	ifmr->ifm_status = IFM_AVALID;
1651 	ifmr->ifm_active = IFM_ETHER;
1652 
1653 	if (!adapter->link_active) {
1654 		EM_CORE_UNLOCK(adapter);
1655 		return;
1656 	}
1657 
1658 	ifmr->ifm_status |= IFM_ACTIVE;
1659 
1660 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1661 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1662 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1663 	} else {
1664 		switch (adapter->link_speed) {
1665 		case 10:
1666 			ifmr->ifm_active |= IFM_10_T;
1667 			break;
1668 		case 100:
1669 			ifmr->ifm_active |= IFM_100_TX;
1670 			break;
1671 		case 1000:
1672 			ifmr->ifm_active |= IFM_1000_T;
1673 			break;
1674 		}
1675 		if (adapter->link_duplex == FULL_DUPLEX)
1676 			ifmr->ifm_active |= IFM_FDX;
1677 		else
1678 			ifmr->ifm_active |= IFM_HDX;
1679 	}
1680 	EM_CORE_UNLOCK(adapter);
1681 }
1682 
1683 /*********************************************************************
1684  *
1685  *  Media Ioctl callback
1686  *
1687  *  This routine is called when the user changes speed/duplex using
1688  *  media/mediopt option with ifconfig.
1689  *
1690  **********************************************************************/
1691 static int
1692 em_media_change(struct ifnet *ifp)
1693 {
1694 	struct adapter *adapter = ifp->if_softc;
1695 	struct ifmedia  *ifm = &adapter->media;
1696 
1697 	INIT_DEBUGOUT("em_media_change: begin");
1698 
1699 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1700 		return (EINVAL);
1701 
1702 	EM_CORE_LOCK(adapter);
1703 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1704 	case IFM_AUTO:
1705 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1706 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1707 		break;
1708 	case IFM_1000_LX:
1709 	case IFM_1000_SX:
1710 	case IFM_1000_T:
1711 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1712 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1713 		break;
1714 	case IFM_100_TX:
1715 		adapter->hw.mac.autoneg = FALSE;
1716 		adapter->hw.phy.autoneg_advertised = 0;
1717 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1718 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1719 		else
1720 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1721 		break;
1722 	case IFM_10_T:
1723 		adapter->hw.mac.autoneg = FALSE;
1724 		adapter->hw.phy.autoneg_advertised = 0;
1725 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1726 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1727 		else
1728 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1729 		break;
1730 	default:
1731 		device_printf(adapter->dev, "Unsupported media type\n");
1732 	}
1733 
1734 	em_init_locked(adapter);
1735 	EM_CORE_UNLOCK(adapter);
1736 
1737 	return (0);
1738 }
1739 
1740 /*********************************************************************
1741  *
1742  *  This routine maps the mbufs to tx descriptors.
1743  *
1744  *  return 0 on success, positive on failure
1745  **********************************************************************/
1746 
1747 static int
1748 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1749 {
1750 	struct adapter		*adapter = txr->adapter;
1751 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1752 	bus_dmamap_t		map;
1753 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1754 	struct e1000_tx_desc	*ctxd = NULL;
1755 	struct mbuf		*m_head;
1756 	struct ether_header	*eh;
1757 	struct ip		*ip = NULL;
1758 	struct tcphdr		*tp = NULL;
1759 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1760 	int			ip_off, poff;
1761 	int			nsegs, i, j, first, last = 0;
1762 	int			error, do_tso, tso_desc = 0;
1763 
1764 	m_head = *m_headp;
1765 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1766 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1767 	ip_off = poff = 0;
1768 
1769 	/*
1770 	 * Intel recommends entire IP/TCP header length reside in a single
1771 	 * buffer. If multiple descriptors are used to describe the IP and
1772 	 * TCP header, each descriptor should describe one or more
1773 	 * complete headers; descriptors referencing only parts of headers
1774 	 * are not supported. If all layer headers are not coalesced into
1775 	 * a single buffer, each buffer should not cross a 4KB boundary,
1776 	 * or be larger than the maximum read request size.
1777 	 * Controller also requires modifing IP/TCP header to make TSO work
1778 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1779 	 * IP/TCP header into a single buffer to meet the requirement of
1780 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1781 	 * which also has similiar restrictions.
1782 	 */
1783 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1784 		if (do_tso || (m_head->m_next != NULL &&
1785 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1786 			if (M_WRITABLE(*m_headp) == 0) {
1787 				m_head = m_dup(*m_headp, M_DONTWAIT);
1788 				m_freem(*m_headp);
1789 				if (m_head == NULL) {
1790 					*m_headp = NULL;
1791 					return (ENOBUFS);
1792 				}
1793 				*m_headp = m_head;
1794 			}
1795 		}
1796 		/*
1797 		 * XXX
1798 		 * Assume IPv4, we don't have TSO/checksum offload support
1799 		 * for IPv6 yet.
1800 		 */
1801 		ip_off = sizeof(struct ether_header);
1802 		m_head = m_pullup(m_head, ip_off);
1803 		if (m_head == NULL) {
1804 			*m_headp = NULL;
1805 			return (ENOBUFS);
1806 		}
1807 		eh = mtod(m_head, struct ether_header *);
1808 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1809 			ip_off = sizeof(struct ether_vlan_header);
1810 			m_head = m_pullup(m_head, ip_off);
1811 			if (m_head == NULL) {
1812 				*m_headp = NULL;
1813 				return (ENOBUFS);
1814 			}
1815 		}
1816 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1817 		if (m_head == NULL) {
1818 			*m_headp = NULL;
1819 			return (ENOBUFS);
1820 		}
1821 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1822 		poff = ip_off + (ip->ip_hl << 2);
1823 		m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1824 		if (m_head == NULL) {
1825 			*m_headp = NULL;
1826 			return (ENOBUFS);
1827 		}
1828 		if (do_tso) {
1829 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1830 			/*
1831 			 * TSO workaround:
1832 			 *   pull 4 more bytes of data into it.
1833 			 */
1834 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1835 			if (m_head == NULL) {
1836 				*m_headp = NULL;
1837 				return (ENOBUFS);
1838 			}
1839 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1840 			ip->ip_len = 0;
1841 			ip->ip_sum = 0;
1842 			/*
1843 			 * The pseudo TCP checksum does not include TCP payload
1844 			 * length so driver should recompute the checksum here
1845 			 * what hardware expect to see. This is adherence of
1846 			 * Microsoft's Large Send specification.
1847 			 */
1848 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1849 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1850 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1851 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1852 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1853 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1854 			if (m_head == NULL) {
1855 				*m_headp = NULL;
1856 				return (ENOBUFS);
1857 			}
1858 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1859 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1860 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1861 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1862 			if (m_head == NULL) {
1863 				*m_headp = NULL;
1864 				return (ENOBUFS);
1865 			}
1866 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1867 		}
1868 		*m_headp = m_head;
1869 	}
1870 
1871 	/*
1872 	 * Map the packet for DMA
1873 	 *
1874 	 * Capture the first descriptor index,
1875 	 * this descriptor will have the index
1876 	 * of the EOP which is the only one that
1877 	 * now gets a DONE bit writeback.
1878 	 */
1879 	first = txr->next_avail_desc;
1880 	tx_buffer = &txr->tx_buffers[first];
1881 	tx_buffer_mapped = tx_buffer;
1882 	map = tx_buffer->map;
1883 
1884 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1885 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1886 
1887 	/*
1888 	 * There are two types of errors we can (try) to handle:
1889 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1890 	 *   out of segments.  Defragment the mbuf chain and try again.
1891 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1892 	 *   at this point in time.  Defer sending and try again later.
1893 	 * All other errors, in particular EINVAL, are fatal and prevent the
1894 	 * mbuf chain from ever going through.  Drop it and report error.
1895 	 */
1896 	if (error == EFBIG) {
1897 		struct mbuf *m;
1898 
1899 		m = m_defrag(*m_headp, M_DONTWAIT);
1900 		if (m == NULL) {
1901 			adapter->mbuf_alloc_failed++;
1902 			m_freem(*m_headp);
1903 			*m_headp = NULL;
1904 			return (ENOBUFS);
1905 		}
1906 		*m_headp = m;
1907 
1908 		/* Try it again */
1909 		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1910 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1911 
1912 		if (error == ENOMEM) {
1913 			adapter->no_tx_dma_setup++;
1914 			return (error);
1915 		} else if (error != 0) {
1916 			adapter->no_tx_dma_setup++;
1917 			m_freem(*m_headp);
1918 			*m_headp = NULL;
1919 			return (error);
1920 		}
1921 
1922 	} else if (error == ENOMEM) {
1923 		adapter->no_tx_dma_setup++;
1924 		return (error);
1925 	} else if (error != 0) {
1926 		adapter->no_tx_dma_setup++;
1927 		m_freem(*m_headp);
1928 		*m_headp = NULL;
1929 		return (error);
1930 	}
1931 
1932 	/*
1933 	 * TSO Hardware workaround, if this packet is not
1934 	 * TSO, and is only a single descriptor long, and
1935 	 * it follows a TSO burst, then we need to add a
1936 	 * sentinel descriptor to prevent premature writeback.
1937 	 */
1938 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1939 		if (nsegs == 1)
1940 			tso_desc = TRUE;
1941 		txr->tx_tso = FALSE;
1942 	}
1943 
1944         if (nsegs > (txr->tx_avail - 2)) {
1945                 txr->no_desc_avail++;
1946 		bus_dmamap_unload(txr->txtag, map);
1947 		return (ENOBUFS);
1948         }
1949 	m_head = *m_headp;
1950 
1951 	/* Do hardware assists */
1952 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1953 		em_tso_setup(txr, m_head, ip_off, ip, tp,
1954 		    &txd_upper, &txd_lower);
1955 		/* we need to make a final sentinel transmit desc */
1956 		tso_desc = TRUE;
1957 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1958 		em_transmit_checksum_setup(txr, m_head,
1959 		    ip_off, ip, &txd_upper, &txd_lower);
1960 
1961 	i = txr->next_avail_desc;
1962 
1963 	/* Set up our transmit descriptors */
1964 	for (j = 0; j < nsegs; j++) {
1965 		bus_size_t seg_len;
1966 		bus_addr_t seg_addr;
1967 
1968 		tx_buffer = &txr->tx_buffers[i];
1969 		ctxd = &txr->tx_base[i];
1970 		seg_addr = segs[j].ds_addr;
1971 		seg_len  = segs[j].ds_len;
1972 		/*
1973 		** TSO Workaround:
1974 		** If this is the last descriptor, we want to
1975 		** split it so we have a small final sentinel
1976 		*/
1977 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1978 			seg_len -= 4;
1979 			ctxd->buffer_addr = htole64(seg_addr);
1980 			ctxd->lower.data = htole32(
1981 			adapter->txd_cmd | txd_lower | seg_len);
1982 			ctxd->upper.data =
1983 			    htole32(txd_upper);
1984 			if (++i == adapter->num_tx_desc)
1985 				i = 0;
1986 			/* Now make the sentinel */
1987 			++txd_used; /* using an extra txd */
1988 			ctxd = &txr->tx_base[i];
1989 			tx_buffer = &txr->tx_buffers[i];
1990 			ctxd->buffer_addr =
1991 			    htole64(seg_addr + seg_len);
1992 			ctxd->lower.data = htole32(
1993 			adapter->txd_cmd | txd_lower | 4);
1994 			ctxd->upper.data =
1995 			    htole32(txd_upper);
1996 			last = i;
1997 			if (++i == adapter->num_tx_desc)
1998 				i = 0;
1999 		} else {
2000 			ctxd->buffer_addr = htole64(seg_addr);
2001 			ctxd->lower.data = htole32(
2002 			adapter->txd_cmd | txd_lower | seg_len);
2003 			ctxd->upper.data =
2004 			    htole32(txd_upper);
2005 			last = i;
2006 			if (++i == adapter->num_tx_desc)
2007 				i = 0;
2008 		}
2009 		tx_buffer->m_head = NULL;
2010 		tx_buffer->next_eop = -1;
2011 	}
2012 
2013 	txr->next_avail_desc = i;
2014 	txr->tx_avail -= nsegs;
2015 	if (tso_desc) /* TSO used an extra for sentinel */
2016 		txr->tx_avail -= txd_used;
2017 
2018 	if (m_head->m_flags & M_VLANTAG) {
2019 		/* Set the vlan id. */
2020 		ctxd->upper.fields.special =
2021 		    htole16(m_head->m_pkthdr.ether_vtag);
2022                 /* Tell hardware to add tag */
2023                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2024         }
2025 
2026         tx_buffer->m_head = m_head;
2027 	tx_buffer_mapped->map = tx_buffer->map;
2028 	tx_buffer->map = map;
2029         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2030 
2031         /*
2032          * Last Descriptor of Packet
2033 	 * needs End Of Packet (EOP)
2034 	 * and Report Status (RS)
2035          */
2036         ctxd->lower.data |=
2037 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2038 	/*
2039 	 * Keep track in the first buffer which
2040 	 * descriptor will be written back
2041 	 */
2042 	tx_buffer = &txr->tx_buffers[first];
2043 	tx_buffer->next_eop = last;
2044 	/* Update the watchdog time early and often */
2045 	txr->watchdog_time = ticks;
2046 
2047 	/*
2048 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2049 	 * that this frame is available to transmit.
2050 	 */
2051 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2052 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2053 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2054 
2055 	return (0);
2056 }
2057 
2058 static void
2059 em_set_promisc(struct adapter *adapter)
2060 {
2061 	struct ifnet	*ifp = adapter->ifp;
2062 	u32		reg_rctl;
2063 
2064 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2065 
2066 	if (ifp->if_flags & IFF_PROMISC) {
2067 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2068 		/* Turn this on if you want to see bad packets */
2069 		if (em_debug_sbp)
2070 			reg_rctl |= E1000_RCTL_SBP;
2071 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2072 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2073 		reg_rctl |= E1000_RCTL_MPE;
2074 		reg_rctl &= ~E1000_RCTL_UPE;
2075 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2076 	}
2077 }
2078 
2079 static void
2080 em_disable_promisc(struct adapter *adapter)
2081 {
2082 	u32	reg_rctl;
2083 
2084 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2085 
2086 	reg_rctl &=  (~E1000_RCTL_UPE);
2087 	reg_rctl &=  (~E1000_RCTL_MPE);
2088 	reg_rctl &=  (~E1000_RCTL_SBP);
2089 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2090 }
2091 
2092 
2093 /*********************************************************************
2094  *  Multicast Update
2095  *
2096  *  This routine is called whenever multicast address list is updated.
2097  *
2098  **********************************************************************/
2099 
2100 static void
2101 em_set_multi(struct adapter *adapter)
2102 {
2103 	struct ifnet	*ifp = adapter->ifp;
2104 	struct ifmultiaddr *ifma;
2105 	u32 reg_rctl = 0;
2106 	u8  *mta; /* Multicast array memory */
2107 	int mcnt = 0;
2108 
2109 	IOCTL_DEBUGOUT("em_set_multi: begin");
2110 
2111 	mta = adapter->mta;
2112 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2113 
2114 	if (adapter->hw.mac.type == e1000_82542 &&
2115 	    adapter->hw.revision_id == E1000_REVISION_2) {
2116 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2117 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2118 			e1000_pci_clear_mwi(&adapter->hw);
2119 		reg_rctl |= E1000_RCTL_RST;
2120 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2121 		msec_delay(5);
2122 	}
2123 
2124 #if __FreeBSD_version < 800000
2125 	IF_ADDR_LOCK(ifp);
2126 #else
2127 	if_maddr_rlock(ifp);
2128 #endif
2129 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2130 		if (ifma->ifma_addr->sa_family != AF_LINK)
2131 			continue;
2132 
2133 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2134 			break;
2135 
2136 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2137 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2138 		mcnt++;
2139 	}
2140 #if __FreeBSD_version < 800000
2141 	IF_ADDR_UNLOCK(ifp);
2142 #else
2143 	if_maddr_runlock(ifp);
2144 #endif
2145 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2146 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2147 		reg_rctl |= E1000_RCTL_MPE;
2148 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2149 	} else
2150 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2151 
2152 	if (adapter->hw.mac.type == e1000_82542 &&
2153 	    adapter->hw.revision_id == E1000_REVISION_2) {
2154 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2155 		reg_rctl &= ~E1000_RCTL_RST;
2156 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2157 		msec_delay(5);
2158 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2159 			e1000_pci_set_mwi(&adapter->hw);
2160 	}
2161 }
2162 
2163 
2164 /*********************************************************************
2165  *  Timer routine
2166  *
2167  *  This routine checks for link status and updates statistics.
2168  *
2169  **********************************************************************/
2170 
2171 static void
2172 em_local_timer(void *arg)
2173 {
2174 	struct adapter	*adapter = arg;
2175 	struct ifnet	*ifp = adapter->ifp;
2176 	struct tx_ring	*txr = adapter->tx_rings;
2177 
2178 	EM_CORE_LOCK_ASSERT(adapter);
2179 
2180 	em_update_link_status(adapter);
2181 	em_update_stats_counters(adapter);
2182 
2183 	/* Reset LAA into RAR[0] on 82571 */
2184 	if ((adapter->hw.mac.type == e1000_82571) &&
2185 	    e1000_get_laa_state_82571(&adapter->hw))
2186 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2187 
2188 	/*
2189 	** Don't do TX watchdog check if we've been paused
2190 	*/
2191 	if (adapter->pause_frames) {
2192 		adapter->pause_frames = 0;
2193 		goto out;
2194 	}
2195 	/*
2196 	** Check on the state of the TX queue(s), this
2197 	** can be done without the lock because its RO
2198 	** and the HUNG state will be static if set.
2199 	*/
2200 	for (int i = 0; i < adapter->num_queues; i++, txr++)
2201 		if (txr->queue_status == EM_QUEUE_HUNG)
2202 			goto hung;
2203 out:
2204 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2205 	return;
2206 hung:
2207 	/* Looks like we're hung */
2208 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2209 	device_printf(adapter->dev,
2210 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2211 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2212 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2213 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2214 	    "Next TX to Clean = %d\n",
2215 	    txr->me, txr->tx_avail, txr->next_to_clean);
2216 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2217 	adapter->watchdog_events++;
2218 	em_init_locked(adapter);
2219 }
2220 
2221 
2222 static void
2223 em_update_link_status(struct adapter *adapter)
2224 {
2225 	struct e1000_hw *hw = &adapter->hw;
2226 	struct ifnet *ifp = adapter->ifp;
2227 	device_t dev = adapter->dev;
2228 	struct tx_ring *txr = adapter->tx_rings;
2229 	u32 link_check = 0;
2230 
2231 	/* Get the cached link value or read phy for real */
2232 	switch (hw->phy.media_type) {
2233 	case e1000_media_type_copper:
2234 		if (hw->mac.get_link_status) {
2235 			/* Do the work to read phy */
2236 			e1000_check_for_link(hw);
2237 			link_check = !hw->mac.get_link_status;
2238 			if (link_check) /* ESB2 fix */
2239 				e1000_cfg_on_link_up(hw);
2240 		} else
2241 			link_check = TRUE;
2242 		break;
2243 	case e1000_media_type_fiber:
2244 		e1000_check_for_link(hw);
2245 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2246                                  E1000_STATUS_LU);
2247 		break;
2248 	case e1000_media_type_internal_serdes:
2249 		e1000_check_for_link(hw);
2250 		link_check = adapter->hw.mac.serdes_has_link;
2251 		break;
2252 	default:
2253 	case e1000_media_type_unknown:
2254 		break;
2255 	}
2256 
2257 	/* Now check for a transition */
2258 	if (link_check && (adapter->link_active == 0)) {
2259 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2260 		    &adapter->link_duplex);
2261 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2262 		if ((adapter->link_speed != SPEED_1000) &&
2263 		    ((hw->mac.type == e1000_82571) ||
2264 		    (hw->mac.type == e1000_82572))) {
2265 			int tarc0;
2266 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2267 			tarc0 &= ~SPEED_MODE_BIT;
2268 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2269 		}
2270 		if (bootverbose)
2271 			device_printf(dev, "Link is up %d Mbps %s\n",
2272 			    adapter->link_speed,
2273 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2274 			    "Full Duplex" : "Half Duplex"));
2275 		adapter->link_active = 1;
2276 		adapter->smartspeed = 0;
2277 		ifp->if_baudrate = adapter->link_speed * 1000000;
2278 		if_link_state_change(ifp, LINK_STATE_UP);
2279 	} else if (!link_check && (adapter->link_active == 1)) {
2280 		ifp->if_baudrate = adapter->link_speed = 0;
2281 		adapter->link_duplex = 0;
2282 		if (bootverbose)
2283 			device_printf(dev, "Link is Down\n");
2284 		adapter->link_active = 0;
2285 		/* Link down, disable watchdog */
2286 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2287 			txr->queue_status = EM_QUEUE_IDLE;
2288 		if_link_state_change(ifp, LINK_STATE_DOWN);
2289 	}
2290 }
2291 
2292 /*********************************************************************
2293  *
2294  *  This routine disables all traffic on the adapter by issuing a
2295  *  global reset on the MAC and deallocates TX/RX buffers.
2296  *
2297  *  This routine should always be called with BOTH the CORE
2298  *  and TX locks.
2299  **********************************************************************/
2300 
2301 static void
2302 em_stop(void *arg)
2303 {
2304 	struct adapter	*adapter = arg;
2305 	struct ifnet	*ifp = adapter->ifp;
2306 	struct tx_ring	*txr = adapter->tx_rings;
2307 
2308 	EM_CORE_LOCK_ASSERT(adapter);
2309 
2310 	INIT_DEBUGOUT("em_stop: begin");
2311 
2312 	em_disable_intr(adapter);
2313 	callout_stop(&adapter->timer);
2314 
2315 	/* Tell the stack that the interface is no longer active */
2316 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2317 
2318         /* Unarm watchdog timer. */
2319 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2320 		EM_TX_LOCK(txr);
2321 		txr->queue_status = EM_QUEUE_IDLE;
2322 		EM_TX_UNLOCK(txr);
2323 	}
2324 
2325 	e1000_reset_hw(&adapter->hw);
2326 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2327 
2328 	e1000_led_off(&adapter->hw);
2329 	e1000_cleanup_led(&adapter->hw);
2330 }
2331 
2332 
2333 /*********************************************************************
2334  *
2335  *  Determine hardware revision.
2336  *
2337  **********************************************************************/
2338 static void
2339 em_identify_hardware(struct adapter *adapter)
2340 {
2341 	device_t dev = adapter->dev;
2342 
2343 	/* Make sure our PCI config space has the necessary stuff set */
2344 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2345 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2346 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2347 		device_printf(dev, "Memory Access and/or Bus Master bits "
2348 		    "were not set!\n");
2349 		adapter->hw.bus.pci_cmd_word |=
2350 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2351 		pci_write_config(dev, PCIR_COMMAND,
2352 		    adapter->hw.bus.pci_cmd_word, 2);
2353 	}
2354 
2355 	/* Save off the information about this board */
2356 	adapter->hw.vendor_id = pci_get_vendor(dev);
2357 	adapter->hw.device_id = pci_get_device(dev);
2358 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2359 	adapter->hw.subsystem_vendor_id =
2360 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2361 	adapter->hw.subsystem_device_id =
2362 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2363 
2364 	/* Do Shared Code Init and Setup */
2365 	if (e1000_set_mac_type(&adapter->hw)) {
2366 		device_printf(dev, "Setup init failure\n");
2367 		return;
2368 	}
2369 }
2370 
2371 static int
2372 em_allocate_pci_resources(struct adapter *adapter)
2373 {
2374 	device_t	dev = adapter->dev;
2375 	int		rid;
2376 
2377 	rid = PCIR_BAR(0);
2378 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2379 	    &rid, RF_ACTIVE);
2380 	if (adapter->memory == NULL) {
2381 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2382 		return (ENXIO);
2383 	}
2384 	adapter->osdep.mem_bus_space_tag =
2385 	    rman_get_bustag(adapter->memory);
2386 	adapter->osdep.mem_bus_space_handle =
2387 	    rman_get_bushandle(adapter->memory);
2388 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2389 
2390 	/* Default to a single queue */
2391 	adapter->num_queues = 1;
2392 
2393 	/*
2394 	 * Setup MSI/X or MSI if PCI Express
2395 	 */
2396 	adapter->msix = em_setup_msix(adapter);
2397 
2398 	adapter->hw.back = &adapter->osdep;
2399 
2400 	return (0);
2401 }
2402 
2403 /*********************************************************************
2404  *
2405  *  Setup the Legacy or MSI Interrupt handler
2406  *
2407  **********************************************************************/
2408 int
2409 em_allocate_legacy(struct adapter *adapter)
2410 {
2411 	device_t dev = adapter->dev;
2412 	int error, rid = 0;
2413 
2414 	/* Manually turn off all interrupts */
2415 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2416 
2417 	if (adapter->msix == 1) /* using MSI */
2418 		rid = 1;
2419 	/* We allocate a single interrupt resource */
2420 	adapter->res = bus_alloc_resource_any(dev,
2421 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2422 	if (adapter->res == NULL) {
2423 		device_printf(dev, "Unable to allocate bus resource: "
2424 		    "interrupt\n");
2425 		return (ENXIO);
2426 	}
2427 
2428 	/*
2429 	 * Allocate a fast interrupt and the associated
2430 	 * deferred processing contexts.
2431 	 */
2432 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2433 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2434 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2435 	    taskqueue_thread_enqueue, &adapter->tq);
2436 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2437 	    device_get_nameunit(adapter->dev));
2438 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2439 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2440 		device_printf(dev, "Failed to register fast interrupt "
2441 			    "handler: %d\n", error);
2442 		taskqueue_free(adapter->tq);
2443 		adapter->tq = NULL;
2444 		return (error);
2445 	}
2446 
2447 	return (0);
2448 }
2449 
2450 /*********************************************************************
2451  *
2452  *  Setup the MSIX Interrupt handlers
2453  *   This is not really Multiqueue, rather
2454  *   its just multiple interrupt vectors.
2455  *
2456  **********************************************************************/
2457 int
2458 em_allocate_msix(struct adapter *adapter)
2459 {
2460 	device_t	dev = adapter->dev;
2461 	struct		tx_ring *txr = adapter->tx_rings;
2462 	struct		rx_ring *rxr = adapter->rx_rings;
2463 	int		error, rid, vector = 0;
2464 
2465 
2466 	/* Make sure all interrupts are disabled */
2467 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2468 
2469 	/* First set up ring resources */
2470 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2471 
2472 		/* RX ring */
2473 		rid = vector + 1;
2474 
2475 		rxr->res = bus_alloc_resource_any(dev,
2476 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2477 		if (rxr->res == NULL) {
2478 			device_printf(dev,
2479 			    "Unable to allocate bus resource: "
2480 			    "RX MSIX Interrupt %d\n", i);
2481 			return (ENXIO);
2482 		}
2483 		if ((error = bus_setup_intr(dev, rxr->res,
2484 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2485 		    rxr, &rxr->tag)) != 0) {
2486 			device_printf(dev, "Failed to register RX handler");
2487 			return (error);
2488 		}
2489 #if __FreeBSD_version >= 800504
2490 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2491 #endif
2492 		rxr->msix = vector++; /* NOTE increment vector for TX */
2493 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2494 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2495 		    taskqueue_thread_enqueue, &rxr->tq);
2496 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2497 		    device_get_nameunit(adapter->dev));
2498 		/*
2499 		** Set the bit to enable interrupt
2500 		** in E1000_IMS -- bits 20 and 21
2501 		** are for RX0 and RX1, note this has
2502 		** NOTHING to do with the MSIX vector
2503 		*/
2504 		rxr->ims = 1 << (20 + i);
2505 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2506 
2507 		/* TX ring */
2508 		rid = vector + 1;
2509 		txr->res = bus_alloc_resource_any(dev,
2510 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2511 		if (txr->res == NULL) {
2512 			device_printf(dev,
2513 			    "Unable to allocate bus resource: "
2514 			    "TX MSIX Interrupt %d\n", i);
2515 			return (ENXIO);
2516 		}
2517 		if ((error = bus_setup_intr(dev, txr->res,
2518 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2519 		    txr, &txr->tag)) != 0) {
2520 			device_printf(dev, "Failed to register TX handler");
2521 			return (error);
2522 		}
2523 #if __FreeBSD_version >= 800504
2524 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2525 #endif
2526 		txr->msix = vector++; /* Increment vector for next pass */
2527 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2528 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2529 		    taskqueue_thread_enqueue, &txr->tq);
2530 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2531 		    device_get_nameunit(adapter->dev));
2532 		/*
2533 		** Set the bit to enable interrupt
2534 		** in E1000_IMS -- bits 22 and 23
2535 		** are for TX0 and TX1, note this has
2536 		** NOTHING to do with the MSIX vector
2537 		*/
2538 		txr->ims = 1 << (22 + i);
2539 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2540 	}
2541 
2542 	/* Link interrupt */
2543 	++rid;
2544 	adapter->res = bus_alloc_resource_any(dev,
2545 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2546 	if (!adapter->res) {
2547 		device_printf(dev,"Unable to allocate "
2548 		    "bus resource: Link interrupt [%d]\n", rid);
2549 		return (ENXIO);
2550         }
2551 	/* Set the link handler function */
2552 	error = bus_setup_intr(dev, adapter->res,
2553 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2554 	    em_msix_link, adapter, &adapter->tag);
2555 	if (error) {
2556 		adapter->res = NULL;
2557 		device_printf(dev, "Failed to register LINK handler");
2558 		return (error);
2559 	}
2560 #if __FreeBSD_version >= 800504
2561 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2562 #endif
2563 	adapter->linkvec = vector;
2564 	adapter->ivars |=  (8 | vector) << 16;
2565 	adapter->ivars |= 0x80000000;
2566 
2567 	return (0);
2568 }
2569 
2570 
2571 static void
2572 em_free_pci_resources(struct adapter *adapter)
2573 {
2574 	device_t	dev = adapter->dev;
2575 	struct tx_ring	*txr;
2576 	struct rx_ring	*rxr;
2577 	int		rid;
2578 
2579 
2580 	/*
2581 	** Release all the queue interrupt resources:
2582 	*/
2583 	for (int i = 0; i < adapter->num_queues; i++) {
2584 		txr = &adapter->tx_rings[i];
2585 		rxr = &adapter->rx_rings[i];
2586 		/* an early abort? */
2587 		if ((txr == NULL) || (rxr == NULL))
2588 			break;
2589 		rid = txr->msix +1;
2590 		if (txr->tag != NULL) {
2591 			bus_teardown_intr(dev, txr->res, txr->tag);
2592 			txr->tag = NULL;
2593 		}
2594 		if (txr->res != NULL)
2595 			bus_release_resource(dev, SYS_RES_IRQ,
2596 			    rid, txr->res);
2597 		rid = rxr->msix +1;
2598 		if (rxr->tag != NULL) {
2599 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2600 			rxr->tag = NULL;
2601 		}
2602 		if (rxr->res != NULL)
2603 			bus_release_resource(dev, SYS_RES_IRQ,
2604 			    rid, rxr->res);
2605 	}
2606 
2607         if (adapter->linkvec) /* we are doing MSIX */
2608                 rid = adapter->linkvec + 1;
2609         else
2610                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2611 
2612 	if (adapter->tag != NULL) {
2613 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2614 		adapter->tag = NULL;
2615 	}
2616 
2617 	if (adapter->res != NULL)
2618 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2619 
2620 
2621 	if (adapter->msix)
2622 		pci_release_msi(dev);
2623 
2624 	if (adapter->msix_mem != NULL)
2625 		bus_release_resource(dev, SYS_RES_MEMORY,
2626 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2627 
2628 	if (adapter->memory != NULL)
2629 		bus_release_resource(dev, SYS_RES_MEMORY,
2630 		    PCIR_BAR(0), adapter->memory);
2631 
2632 	if (adapter->flash != NULL)
2633 		bus_release_resource(dev, SYS_RES_MEMORY,
2634 		    EM_FLASH, adapter->flash);
2635 }
2636 
2637 /*
2638  * Setup MSI or MSI/X
2639  */
2640 static int
2641 em_setup_msix(struct adapter *adapter)
2642 {
2643 	device_t dev = adapter->dev;
2644 	int val = 0;
2645 
2646 
2647 	/*
2648 	** Setup MSI/X for Hartwell: tests have shown
2649 	** use of two queues to be unstable, and to
2650 	** provide no great gain anyway, so we simply
2651 	** seperate the interrupts and use a single queue.
2652 	*/
2653 	if ((adapter->hw.mac.type == e1000_82574) &&
2654 	    (em_enable_msix == TRUE)) {
2655 		/* Map the MSIX BAR */
2656 		int rid = PCIR_BAR(EM_MSIX_BAR);
2657 		adapter->msix_mem = bus_alloc_resource_any(dev,
2658 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2659        		if (!adapter->msix_mem) {
2660 			/* May not be enabled */
2661                		device_printf(adapter->dev,
2662 			    "Unable to map MSIX table \n");
2663 			goto msi;
2664        		}
2665 		val = pci_msix_count(dev);
2666 		if (val < 3) {
2667 			bus_release_resource(dev, SYS_RES_MEMORY,
2668 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2669 			adapter->msix_mem = NULL;
2670                		device_printf(adapter->dev,
2671 			    "MSIX: insufficient vectors, using MSI\n");
2672 			goto msi;
2673 		}
2674 		val = 3;
2675 		adapter->num_queues = 1;
2676 		if (pci_alloc_msix(dev, &val) == 0) {
2677 			device_printf(adapter->dev,
2678 			    "Using MSIX interrupts "
2679 			    "with %d vectors\n", val);
2680 		}
2681 
2682 		return (val);
2683 	}
2684 msi:
2685        	val = pci_msi_count(dev);
2686        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2687                	adapter->msix = 1;
2688                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2689 		return (val);
2690 	}
2691 	/* Should only happen due to manual configuration */
2692 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2693 	return (0);
2694 }
2695 
2696 
2697 /*********************************************************************
2698  *
2699  *  Initialize the hardware to a configuration
2700  *  as specified by the adapter structure.
2701  *
2702  **********************************************************************/
2703 static void
2704 em_reset(struct adapter *adapter)
2705 {
2706 	device_t	dev = adapter->dev;
2707 	struct ifnet	*ifp = adapter->ifp;
2708 	struct e1000_hw	*hw = &adapter->hw;
2709 	u16		rx_buffer_size;
2710 
2711 	INIT_DEBUGOUT("em_reset: begin");
2712 
2713 	/* Set up smart power down as default off on newer adapters. */
2714 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2715 	    hw->mac.type == e1000_82572)) {
2716 		u16 phy_tmp = 0;
2717 
2718 		/* Speed up time to link by disabling smart power down. */
2719 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2720 		phy_tmp &= ~IGP02E1000_PM_SPD;
2721 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2722 	}
2723 
2724 	/*
2725 	 * These parameters control the automatic generation (Tx) and
2726 	 * response (Rx) to Ethernet PAUSE frames.
2727 	 * - High water mark should allow for at least two frames to be
2728 	 *   received after sending an XOFF.
2729 	 * - Low water mark works best when it is very near the high water mark.
2730 	 *   This allows the receiver to restart by sending XON when it has
2731 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2732 	 *   restart after one full frame is pulled from the buffer. There
2733 	 *   could be several smaller frames in the buffer and if so they will
2734 	 *   not trigger the XON until their total number reduces the buffer
2735 	 *   by 1500.
2736 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2737 	 */
2738 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2739 
2740 	hw->fc.high_water = rx_buffer_size -
2741 	    roundup2(adapter->max_frame_size, 1024);
2742 	hw->fc.low_water = hw->fc.high_water - 1500;
2743 
2744 	if (hw->mac.type == e1000_80003es2lan)
2745 		hw->fc.pause_time = 0xFFFF;
2746 	else
2747 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2748 
2749 	hw->fc.send_xon = TRUE;
2750 
2751         /* Set Flow control, use the tunable location if sane */
2752 	hw->fc.requested_mode = adapter->fc_setting;
2753 
2754 	/* Workaround: no TX flow ctrl for PCH */
2755 	if (hw->mac.type == e1000_pchlan)
2756                 hw->fc.requested_mode = e1000_fc_rx_pause;
2757 
2758 	/* Override - settings for PCH2LAN, ya its magic :) */
2759 	if (hw->mac.type == e1000_pch2lan) {
2760 		hw->fc.high_water = 0x5C20;
2761 		hw->fc.low_water = 0x5048;
2762 		hw->fc.pause_time = 0x0650;
2763 		hw->fc.refresh_time = 0x0400;
2764 		/* Jumbos need adjusted PBA */
2765 		if (ifp->if_mtu > ETHERMTU)
2766 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2767 		else
2768 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2769 	}
2770 
2771 	/* Issue a global reset */
2772 	e1000_reset_hw(hw);
2773 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2774 	em_disable_aspm(adapter);
2775 
2776 	if (e1000_init_hw(hw) < 0) {
2777 		device_printf(dev, "Hardware Initialization Failed\n");
2778 		return;
2779 	}
2780 
2781 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2782 	e1000_get_phy_info(hw);
2783 	e1000_check_for_link(hw);
2784 	return;
2785 }
2786 
2787 /*********************************************************************
2788  *
2789  *  Setup networking device structure and register an interface.
2790  *
2791  **********************************************************************/
2792 static int
2793 em_setup_interface(device_t dev, struct adapter *adapter)
2794 {
2795 	struct ifnet   *ifp;
2796 
2797 	INIT_DEBUGOUT("em_setup_interface: begin");
2798 
2799 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2800 	if (ifp == NULL) {
2801 		device_printf(dev, "can not allocate ifnet structure\n");
2802 		return (-1);
2803 	}
2804 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2805 	ifp->if_mtu = ETHERMTU;
2806 	ifp->if_init =  em_init;
2807 	ifp->if_softc = adapter;
2808 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2809 	ifp->if_ioctl = em_ioctl;
2810 	ifp->if_start = em_start;
2811 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2812 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2813 	IFQ_SET_READY(&ifp->if_snd);
2814 
2815 	ether_ifattach(ifp, adapter->hw.mac.addr);
2816 
2817 	ifp->if_capabilities = ifp->if_capenable = 0;
2818 
2819 #ifdef EM_MULTIQUEUE
2820 	/* Multiqueue tx functions */
2821 	ifp->if_transmit = em_mq_start;
2822 	ifp->if_qflush = em_qflush;
2823 #endif
2824 
2825 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2826 	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2827 
2828 	/* Enable TSO by default, can disable with ifconfig */
2829 	ifp->if_capabilities |= IFCAP_TSO4;
2830 	ifp->if_capenable |= IFCAP_TSO4;
2831 
2832 	/*
2833 	 * Tell the upper layer(s) we
2834 	 * support full VLAN capability
2835 	 */
2836 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2837 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2838 	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2839 
2840 	/*
2841 	** Dont turn this on by default, if vlans are
2842 	** created on another pseudo device (eg. lagg)
2843 	** then vlan events are not passed thru, breaking
2844 	** operation, but with HW FILTER off it works. If
2845 	** using vlans directly on the em driver you can
2846 	** enable this and get full hardware tag filtering.
2847 	*/
2848 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2849 
2850 #ifdef DEVICE_POLLING
2851 	ifp->if_capabilities |= IFCAP_POLLING;
2852 #endif
2853 
2854 	/* Enable only WOL MAGIC by default */
2855 	if (adapter->wol) {
2856 		ifp->if_capabilities |= IFCAP_WOL;
2857 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2858 	}
2859 
2860 	/*
2861 	 * Specify the media types supported by this adapter and register
2862 	 * callbacks to update media and link information
2863 	 */
2864 	ifmedia_init(&adapter->media, IFM_IMASK,
2865 	    em_media_change, em_media_status);
2866 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2867 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2868 		u_char fiber_type = IFM_1000_SX;	/* default type */
2869 
2870 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2871 			    0, NULL);
2872 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2873 	} else {
2874 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2875 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2876 			    0, NULL);
2877 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2878 			    0, NULL);
2879 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2880 			    0, NULL);
2881 		if (adapter->hw.phy.type != e1000_phy_ife) {
2882 			ifmedia_add(&adapter->media,
2883 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2884 			ifmedia_add(&adapter->media,
2885 				IFM_ETHER | IFM_1000_T, 0, NULL);
2886 		}
2887 	}
2888 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2889 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2890 	return (0);
2891 }
2892 
2893 
2894 /*
2895  * Manage DMA'able memory.
2896  */
2897 static void
2898 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2899 {
2900 	if (error)
2901 		return;
2902 	*(bus_addr_t *) arg = segs[0].ds_addr;
2903 }
2904 
2905 static int
2906 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2907         struct em_dma_alloc *dma, int mapflags)
2908 {
2909 	int error;
2910 
2911 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2912 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2913 				BUS_SPACE_MAXADDR,	/* lowaddr */
2914 				BUS_SPACE_MAXADDR,	/* highaddr */
2915 				NULL, NULL,		/* filter, filterarg */
2916 				size,			/* maxsize */
2917 				1,			/* nsegments */
2918 				size,			/* maxsegsize */
2919 				0,			/* flags */
2920 				NULL,			/* lockfunc */
2921 				NULL,			/* lockarg */
2922 				&dma->dma_tag);
2923 	if (error) {
2924 		device_printf(adapter->dev,
2925 		    "%s: bus_dma_tag_create failed: %d\n",
2926 		    __func__, error);
2927 		goto fail_0;
2928 	}
2929 
2930 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2931 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2932 	if (error) {
2933 		device_printf(adapter->dev,
2934 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2935 		    __func__, (uintmax_t)size, error);
2936 		goto fail_2;
2937 	}
2938 
2939 	dma->dma_paddr = 0;
2940 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2941 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2942 	if (error || dma->dma_paddr == 0) {
2943 		device_printf(adapter->dev,
2944 		    "%s: bus_dmamap_load failed: %d\n",
2945 		    __func__, error);
2946 		goto fail_3;
2947 	}
2948 
2949 	return (0);
2950 
2951 fail_3:
2952 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2953 fail_2:
2954 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2955 	bus_dma_tag_destroy(dma->dma_tag);
2956 fail_0:
2957 	dma->dma_map = NULL;
2958 	dma->dma_tag = NULL;
2959 
2960 	return (error);
2961 }
2962 
2963 static void
2964 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2965 {
2966 	if (dma->dma_tag == NULL)
2967 		return;
2968 	if (dma->dma_map != NULL) {
2969 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2970 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2971 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2972 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2973 		dma->dma_map = NULL;
2974 	}
2975 	bus_dma_tag_destroy(dma->dma_tag);
2976 	dma->dma_tag = NULL;
2977 }
2978 
2979 
2980 /*********************************************************************
2981  *
2982  *  Allocate memory for the transmit and receive rings, and then
2983  *  the descriptors associated with each, called only once at attach.
2984  *
2985  **********************************************************************/
2986 static int
2987 em_allocate_queues(struct adapter *adapter)
2988 {
2989 	device_t		dev = adapter->dev;
2990 	struct tx_ring		*txr = NULL;
2991 	struct rx_ring		*rxr = NULL;
2992 	int rsize, tsize, error = E1000_SUCCESS;
2993 	int txconf = 0, rxconf = 0;
2994 
2995 
2996 	/* Allocate the TX ring struct memory */
2997 	if (!(adapter->tx_rings =
2998 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2999 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3000 		device_printf(dev, "Unable to allocate TX ring memory\n");
3001 		error = ENOMEM;
3002 		goto fail;
3003 	}
3004 
3005 	/* Now allocate the RX */
3006 	if (!(adapter->rx_rings =
3007 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3008 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3009 		device_printf(dev, "Unable to allocate RX ring memory\n");
3010 		error = ENOMEM;
3011 		goto rx_fail;
3012 	}
3013 
3014 	tsize = roundup2(adapter->num_tx_desc *
3015 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3016 	/*
3017 	 * Now set up the TX queues, txconf is needed to handle the
3018 	 * possibility that things fail midcourse and we need to
3019 	 * undo memory gracefully
3020 	 */
3021 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3022 		/* Set up some basics */
3023 		txr = &adapter->tx_rings[i];
3024 		txr->adapter = adapter;
3025 		txr->me = i;
3026 
3027 		/* Initialize the TX lock */
3028 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3029 		    device_get_nameunit(dev), txr->me);
3030 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3031 
3032 		if (em_dma_malloc(adapter, tsize,
3033 			&txr->txdma, BUS_DMA_NOWAIT)) {
3034 			device_printf(dev,
3035 			    "Unable to allocate TX Descriptor memory\n");
3036 			error = ENOMEM;
3037 			goto err_tx_desc;
3038 		}
3039 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3040 		bzero((void *)txr->tx_base, tsize);
3041 
3042         	if (em_allocate_transmit_buffers(txr)) {
3043 			device_printf(dev,
3044 			    "Critical Failure setting up transmit buffers\n");
3045 			error = ENOMEM;
3046 			goto err_tx_desc;
3047         	}
3048 #if __FreeBSD_version >= 800000
3049 		/* Allocate a buf ring */
3050 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3051 		    M_WAITOK, &txr->tx_mtx);
3052 #endif
3053 	}
3054 
3055 	/*
3056 	 * Next the RX queues...
3057 	 */
3058 	rsize = roundup2(adapter->num_rx_desc *
3059 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3060 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3061 		rxr = &adapter->rx_rings[i];
3062 		rxr->adapter = adapter;
3063 		rxr->me = i;
3064 
3065 		/* Initialize the RX lock */
3066 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3067 		    device_get_nameunit(dev), txr->me);
3068 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3069 
3070 		if (em_dma_malloc(adapter, rsize,
3071 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3072 			device_printf(dev,
3073 			    "Unable to allocate RxDescriptor memory\n");
3074 			error = ENOMEM;
3075 			goto err_rx_desc;
3076 		}
3077 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3078 		bzero((void *)rxr->rx_base, rsize);
3079 
3080         	/* Allocate receive buffers for the ring*/
3081 		if (em_allocate_receive_buffers(rxr)) {
3082 			device_printf(dev,
3083 			    "Critical Failure setting up receive buffers\n");
3084 			error = ENOMEM;
3085 			goto err_rx_desc;
3086 		}
3087 	}
3088 
3089 	return (0);
3090 
3091 err_rx_desc:
3092 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3093 		em_dma_free(adapter, &rxr->rxdma);
3094 err_tx_desc:
3095 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3096 		em_dma_free(adapter, &txr->txdma);
3097 	free(adapter->rx_rings, M_DEVBUF);
3098 rx_fail:
3099 #if __FreeBSD_version >= 800000
3100 	buf_ring_free(txr->br, M_DEVBUF);
3101 #endif
3102 	free(adapter->tx_rings, M_DEVBUF);
3103 fail:
3104 	return (error);
3105 }
3106 
3107 
3108 /*********************************************************************
3109  *
3110  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3111  *  the information needed to transmit a packet on the wire. This is
3112  *  called only once at attach, setup is done every reset.
3113  *
3114  **********************************************************************/
3115 static int
3116 em_allocate_transmit_buffers(struct tx_ring *txr)
3117 {
3118 	struct adapter *adapter = txr->adapter;
3119 	device_t dev = adapter->dev;
3120 	struct em_buffer *txbuf;
3121 	int error, i;
3122 
3123 	/*
3124 	 * Setup DMA descriptor areas.
3125 	 */
3126 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3127 			       1, 0,			/* alignment, bounds */
3128 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3129 			       BUS_SPACE_MAXADDR,	/* highaddr */
3130 			       NULL, NULL,		/* filter, filterarg */
3131 			       EM_TSO_SIZE,		/* maxsize */
3132 			       EM_MAX_SCATTER,		/* nsegments */
3133 			       PAGE_SIZE,		/* maxsegsize */
3134 			       0,			/* flags */
3135 			       NULL,			/* lockfunc */
3136 			       NULL,			/* lockfuncarg */
3137 			       &txr->txtag))) {
3138 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3139 		goto fail;
3140 	}
3141 
3142 	if (!(txr->tx_buffers =
3143 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3144 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3145 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3146 		error = ENOMEM;
3147 		goto fail;
3148 	}
3149 
3150         /* Create the descriptor buffer dma maps */
3151 	txbuf = txr->tx_buffers;
3152 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3153 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3154 		if (error != 0) {
3155 			device_printf(dev, "Unable to create TX DMA map\n");
3156 			goto fail;
3157 		}
3158 	}
3159 
3160 	return 0;
3161 fail:
3162 	/* We free all, it handles case where we are in the middle */
3163 	em_free_transmit_structures(adapter);
3164 	return (error);
3165 }
3166 
3167 /*********************************************************************
3168  *
3169  *  Initialize a transmit ring.
3170  *
3171  **********************************************************************/
3172 static void
3173 em_setup_transmit_ring(struct tx_ring *txr)
3174 {
3175 	struct adapter *adapter = txr->adapter;
3176 	struct em_buffer *txbuf;
3177 	int i;
3178 
3179 	/* Clear the old descriptor contents */
3180 	EM_TX_LOCK(txr);
3181 	bzero((void *)txr->tx_base,
3182 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3183 	/* Reset indices */
3184 	txr->next_avail_desc = 0;
3185 	txr->next_to_clean = 0;
3186 
3187 	/* Free any existing tx buffers. */
3188         txbuf = txr->tx_buffers;
3189 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3190 		if (txbuf->m_head != NULL) {
3191 			bus_dmamap_sync(txr->txtag, txbuf->map,
3192 			    BUS_DMASYNC_POSTWRITE);
3193 			bus_dmamap_unload(txr->txtag, txbuf->map);
3194 			m_freem(txbuf->m_head);
3195 			txbuf->m_head = NULL;
3196 		}
3197 		/* clear the watch index */
3198 		txbuf->next_eop = -1;
3199         }
3200 
3201 	/* Set number of descriptors available */
3202 	txr->tx_avail = adapter->num_tx_desc;
3203 	txr->queue_status = EM_QUEUE_IDLE;
3204 
3205 	/* Clear checksum offload context. */
3206 	txr->last_hw_offload = 0;
3207 	txr->last_hw_ipcss = 0;
3208 	txr->last_hw_ipcso = 0;
3209 	txr->last_hw_tucss = 0;
3210 	txr->last_hw_tucso = 0;
3211 
3212 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3213 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3214 	EM_TX_UNLOCK(txr);
3215 }
3216 
3217 /*********************************************************************
3218  *
3219  *  Initialize all transmit rings.
3220  *
3221  **********************************************************************/
3222 static void
3223 em_setup_transmit_structures(struct adapter *adapter)
3224 {
3225 	struct tx_ring *txr = adapter->tx_rings;
3226 
3227 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3228 		em_setup_transmit_ring(txr);
3229 
3230 	return;
3231 }
3232 
3233 /*********************************************************************
3234  *
3235  *  Enable transmit unit.
3236  *
3237  **********************************************************************/
3238 static void
3239 em_initialize_transmit_unit(struct adapter *adapter)
3240 {
3241 	struct tx_ring	*txr = adapter->tx_rings;
3242 	struct e1000_hw	*hw = &adapter->hw;
3243 	u32	tctl, tarc, tipg = 0;
3244 
3245 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3246 
3247 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3248 		u64 bus_addr = txr->txdma.dma_paddr;
3249 		/* Base and Len of TX Ring */
3250 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3251 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3252 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3253 	    	    (u32)(bus_addr >> 32));
3254 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3255 	    	    (u32)bus_addr);
3256 		/* Init the HEAD/TAIL indices */
3257 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3258 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3259 
3260 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3261 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3262 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3263 
3264 		txr->queue_status = EM_QUEUE_IDLE;
3265 	}
3266 
3267 	/* Set the default values for the Tx Inter Packet Gap timer */
3268 	switch (adapter->hw.mac.type) {
3269 	case e1000_82542:
3270 		tipg = DEFAULT_82542_TIPG_IPGT;
3271 		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3272 		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3273 		break;
3274 	case e1000_80003es2lan:
3275 		tipg = DEFAULT_82543_TIPG_IPGR1;
3276 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3277 		    E1000_TIPG_IPGR2_SHIFT;
3278 		break;
3279 	default:
3280 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3281 		    (adapter->hw.phy.media_type ==
3282 		    e1000_media_type_internal_serdes))
3283 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3284 		else
3285 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3286 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3287 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3288 	}
3289 
3290 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3291 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3292 
3293 	if(adapter->hw.mac.type >= e1000_82540)
3294 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3295 		    adapter->tx_abs_int_delay.value);
3296 
3297 	if ((adapter->hw.mac.type == e1000_82571) ||
3298 	    (adapter->hw.mac.type == e1000_82572)) {
3299 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3300 		tarc |= SPEED_MODE_BIT;
3301 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3302 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3303 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3304 		tarc |= 1;
3305 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3306 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3307 		tarc |= 1;
3308 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3309 	}
3310 
3311 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3312 	if (adapter->tx_int_delay.value > 0)
3313 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3314 
3315 	/* Program the Transmit Control Register */
3316 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3317 	tctl &= ~E1000_TCTL_CT;
3318 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3319 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3320 
3321 	if (adapter->hw.mac.type >= e1000_82571)
3322 		tctl |= E1000_TCTL_MULR;
3323 
3324 	/* This write will effectively turn on the transmit unit. */
3325 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3326 
3327 }
3328 
3329 
3330 /*********************************************************************
3331  *
3332  *  Free all transmit rings.
3333  *
3334  **********************************************************************/
3335 static void
3336 em_free_transmit_structures(struct adapter *adapter)
3337 {
3338 	struct tx_ring *txr = adapter->tx_rings;
3339 
3340 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3341 		EM_TX_LOCK(txr);
3342 		em_free_transmit_buffers(txr);
3343 		em_dma_free(adapter, &txr->txdma);
3344 		EM_TX_UNLOCK(txr);
3345 		EM_TX_LOCK_DESTROY(txr);
3346 	}
3347 
3348 	free(adapter->tx_rings, M_DEVBUF);
3349 }
3350 
3351 /*********************************************************************
3352  *
3353  *  Free transmit ring related data structures.
3354  *
3355  **********************************************************************/
3356 static void
3357 em_free_transmit_buffers(struct tx_ring *txr)
3358 {
3359 	struct adapter		*adapter = txr->adapter;
3360 	struct em_buffer	*txbuf;
3361 
3362 	INIT_DEBUGOUT("free_transmit_ring: begin");
3363 
3364 	if (txr->tx_buffers == NULL)
3365 		return;
3366 
3367 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3368 		txbuf = &txr->tx_buffers[i];
3369 		if (txbuf->m_head != NULL) {
3370 			bus_dmamap_sync(txr->txtag, txbuf->map,
3371 			    BUS_DMASYNC_POSTWRITE);
3372 			bus_dmamap_unload(txr->txtag,
3373 			    txbuf->map);
3374 			m_freem(txbuf->m_head);
3375 			txbuf->m_head = NULL;
3376 			if (txbuf->map != NULL) {
3377 				bus_dmamap_destroy(txr->txtag,
3378 				    txbuf->map);
3379 				txbuf->map = NULL;
3380 			}
3381 		} else if (txbuf->map != NULL) {
3382 			bus_dmamap_unload(txr->txtag,
3383 			    txbuf->map);
3384 			bus_dmamap_destroy(txr->txtag,
3385 			    txbuf->map);
3386 			txbuf->map = NULL;
3387 		}
3388 	}
3389 #if __FreeBSD_version >= 800000
3390 	if (txr->br != NULL)
3391 		buf_ring_free(txr->br, M_DEVBUF);
3392 #endif
3393 	if (txr->tx_buffers != NULL) {
3394 		free(txr->tx_buffers, M_DEVBUF);
3395 		txr->tx_buffers = NULL;
3396 	}
3397 	if (txr->txtag != NULL) {
3398 		bus_dma_tag_destroy(txr->txtag);
3399 		txr->txtag = NULL;
3400 	}
3401 	return;
3402 }
3403 
3404 
3405 /*********************************************************************
3406  *  The offload context is protocol specific (TCP/UDP) and thus
3407  *  only needs to be set when the protocol changes. The occasion
3408  *  of a context change can be a performance detriment, and
3409  *  might be better just disabled. The reason arises in the way
3410  *  in which the controller supports pipelined requests from the
3411  *  Tx data DMA. Up to four requests can be pipelined, and they may
3412  *  belong to the same packet or to multiple packets. However all
3413  *  requests for one packet are issued before a request is issued
3414  *  for a subsequent packet and if a request for the next packet
3415  *  requires a context change, that request will be stalled
3416  *  until the previous request completes. This means setting up
3417  *  a new context effectively disables pipelined Tx data DMA which
3418  *  in turn greatly slow down performance to send small sized
3419  *  frames.
3420  **********************************************************************/
3421 static void
3422 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3423     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3424 {
3425 	struct adapter			*adapter = txr->adapter;
3426 	struct e1000_context_desc	*TXD = NULL;
3427 	struct em_buffer		*tx_buffer;
3428 	int				cur, hdr_len;
3429 	u32				cmd = 0;
3430 	u16				offload = 0;
3431 	u8				ipcso, ipcss, tucso, tucss;
3432 
3433 	ipcss = ipcso = tucss = tucso = 0;
3434 	hdr_len = ip_off + (ip->ip_hl << 2);
3435 	cur = txr->next_avail_desc;
3436 
3437 	/* Setup of IP header checksum. */
3438 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3439 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3440 		offload |= CSUM_IP;
3441 		ipcss = ip_off;
3442 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3443 		/*
3444 		 * Start offset for header checksum calculation.
3445 		 * End offset for header checksum calculation.
3446 		 * Offset of place to put the checksum.
3447 		 */
3448 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3449 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3450 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3451 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3452 		cmd |= E1000_TXD_CMD_IP;
3453 	}
3454 
3455 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3456  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3457  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3458  		offload |= CSUM_TCP;
3459  		tucss = hdr_len;
3460  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3461  		/*
3462  		 * Setting up new checksum offload context for every frames
3463  		 * takes a lot of processing time for hardware. This also
3464  		 * reduces performance a lot for small sized frames so avoid
3465  		 * it if driver can use previously configured checksum
3466  		 * offload context.
3467  		 */
3468  		if (txr->last_hw_offload == offload) {
3469  			if (offload & CSUM_IP) {
3470  				if (txr->last_hw_ipcss == ipcss &&
3471  				    txr->last_hw_ipcso == ipcso &&
3472  				    txr->last_hw_tucss == tucss &&
3473  				    txr->last_hw_tucso == tucso)
3474  					return;
3475  			} else {
3476  				if (txr->last_hw_tucss == tucss &&
3477  				    txr->last_hw_tucso == tucso)
3478  					return;
3479  			}
3480   		}
3481  		txr->last_hw_offload = offload;
3482  		txr->last_hw_tucss = tucss;
3483  		txr->last_hw_tucso = tucso;
3484  		/*
3485  		 * Start offset for payload checksum calculation.
3486  		 * End offset for payload checksum calculation.
3487  		 * Offset of place to put the checksum.
3488  		 */
3489 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3490  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3491  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3492  		TXD->upper_setup.tcp_fields.tucso = tucso;
3493  		cmd |= E1000_TXD_CMD_TCP;
3494  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3495  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3496  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3497  		tucss = hdr_len;
3498  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3499  		/*
3500  		 * Setting up new checksum offload context for every frames
3501  		 * takes a lot of processing time for hardware. This also
3502  		 * reduces performance a lot for small sized frames so avoid
3503  		 * it if driver can use previously configured checksum
3504  		 * offload context.
3505  		 */
3506  		if (txr->last_hw_offload == offload) {
3507  			if (offload & CSUM_IP) {
3508  				if (txr->last_hw_ipcss == ipcss &&
3509  				    txr->last_hw_ipcso == ipcso &&
3510  				    txr->last_hw_tucss == tucss &&
3511  				    txr->last_hw_tucso == tucso)
3512  					return;
3513  			} else {
3514  				if (txr->last_hw_tucss == tucss &&
3515  				    txr->last_hw_tucso == tucso)
3516  					return;
3517  			}
3518  		}
3519  		txr->last_hw_offload = offload;
3520  		txr->last_hw_tucss = tucss;
3521  		txr->last_hw_tucso = tucso;
3522  		/*
3523  		 * Start offset for header checksum calculation.
3524  		 * End offset for header checksum calculation.
3525  		 * Offset of place to put the checksum.
3526  		 */
3527 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3528  		TXD->upper_setup.tcp_fields.tucss = tucss;
3529  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3530  		TXD->upper_setup.tcp_fields.tucso = tucso;
3531   	}
3532 
3533  	if (offload & CSUM_IP) {
3534  		txr->last_hw_ipcss = ipcss;
3535  		txr->last_hw_ipcso = ipcso;
3536   	}
3537 
3538 	TXD->tcp_seg_setup.data = htole32(0);
3539 	TXD->cmd_and_length =
3540 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3541 	tx_buffer = &txr->tx_buffers[cur];
3542 	tx_buffer->m_head = NULL;
3543 	tx_buffer->next_eop = -1;
3544 
3545 	if (++cur == adapter->num_tx_desc)
3546 		cur = 0;
3547 
3548 	txr->tx_avail--;
3549 	txr->next_avail_desc = cur;
3550 }
3551 
3552 
3553 /**********************************************************************
3554  *
3555  *  Setup work for hardware segmentation offload (TSO)
3556  *
3557  **********************************************************************/
3558 static void
3559 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3560     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3561 {
3562 	struct adapter			*adapter = txr->adapter;
3563 	struct e1000_context_desc	*TXD;
3564 	struct em_buffer		*tx_buffer;
3565 	int cur, hdr_len;
3566 
3567 	/*
3568 	 * In theory we can use the same TSO context if and only if
3569 	 * frame is the same type(IP/TCP) and the same MSS. However
3570 	 * checking whether a frame has the same IP/TCP structure is
3571 	 * hard thing so just ignore that and always restablish a
3572 	 * new TSO context.
3573 	 */
3574 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3575 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3576 		      E1000_TXD_DTYP_D |	/* Data descr type */
3577 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3578 
3579 	/* IP and/or TCP header checksum calculation and insertion. */
3580 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3581 
3582 	cur = txr->next_avail_desc;
3583 	tx_buffer = &txr->tx_buffers[cur];
3584 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3585 
3586 	/*
3587 	 * Start offset for header checksum calculation.
3588 	 * End offset for header checksum calculation.
3589 	 * Offset of place put the checksum.
3590 	 */
3591 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3592 	TXD->lower_setup.ip_fields.ipcse =
3593 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3594 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3595 	/*
3596 	 * Start offset for payload checksum calculation.
3597 	 * End offset for payload checksum calculation.
3598 	 * Offset of place to put the checksum.
3599 	 */
3600 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3601 	TXD->upper_setup.tcp_fields.tucse = 0;
3602 	TXD->upper_setup.tcp_fields.tucso =
3603 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3604 	/*
3605 	 * Payload size per packet w/o any headers.
3606 	 * Length of all headers up to payload.
3607 	 */
3608 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3609 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3610 
3611 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3612 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3613 				E1000_TXD_CMD_TSE |	/* TSE context */
3614 				E1000_TXD_CMD_IP |	/* Do IP csum */
3615 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3616 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3617 
3618 	tx_buffer->m_head = NULL;
3619 	tx_buffer->next_eop = -1;
3620 
3621 	if (++cur == adapter->num_tx_desc)
3622 		cur = 0;
3623 
3624 	txr->tx_avail--;
3625 	txr->next_avail_desc = cur;
3626 	txr->tx_tso = TRUE;
3627 }
3628 
3629 
3630 /**********************************************************************
3631  *
3632  *  Examine each tx_buffer in the used queue. If the hardware is done
3633  *  processing the packet then free associated resources. The
3634  *  tx_buffer is put back on the free queue.
3635  *
3636  **********************************************************************/
3637 static bool
3638 em_txeof(struct tx_ring *txr)
3639 {
3640 	struct adapter	*adapter = txr->adapter;
3641         int first, last, done, processed;
3642         struct em_buffer *tx_buffer;
3643         struct e1000_tx_desc   *tx_desc, *eop_desc;
3644 	struct ifnet   *ifp = adapter->ifp;
3645 
3646 	EM_TX_LOCK_ASSERT(txr);
3647 
3648 	/* No work, make sure watchdog is off */
3649         if (txr->tx_avail == adapter->num_tx_desc) {
3650 		txr->queue_status = EM_QUEUE_IDLE;
3651                 return (FALSE);
3652 	}
3653 
3654 	processed = 0;
3655         first = txr->next_to_clean;
3656         tx_desc = &txr->tx_base[first];
3657         tx_buffer = &txr->tx_buffers[first];
3658 	last = tx_buffer->next_eop;
3659         eop_desc = &txr->tx_base[last];
3660 
3661 	/*
3662 	 * What this does is get the index of the
3663 	 * first descriptor AFTER the EOP of the
3664 	 * first packet, that way we can do the
3665 	 * simple comparison on the inner while loop.
3666 	 */
3667 	if (++last == adapter->num_tx_desc)
3668  		last = 0;
3669 	done = last;
3670 
3671         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3672             BUS_DMASYNC_POSTREAD);
3673 
3674         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3675 		/* We clean the range of the packet */
3676 		while (first != done) {
3677                 	tx_desc->upper.data = 0;
3678                 	tx_desc->lower.data = 0;
3679                 	tx_desc->buffer_addr = 0;
3680                 	++txr->tx_avail;
3681 			++processed;
3682 
3683 			if (tx_buffer->m_head) {
3684 				bus_dmamap_sync(txr->txtag,
3685 				    tx_buffer->map,
3686 				    BUS_DMASYNC_POSTWRITE);
3687 				bus_dmamap_unload(txr->txtag,
3688 				    tx_buffer->map);
3689                         	m_freem(tx_buffer->m_head);
3690                         	tx_buffer->m_head = NULL;
3691                 	}
3692 			tx_buffer->next_eop = -1;
3693 			txr->watchdog_time = ticks;
3694 
3695 	                if (++first == adapter->num_tx_desc)
3696 				first = 0;
3697 
3698 	                tx_buffer = &txr->tx_buffers[first];
3699 			tx_desc = &txr->tx_base[first];
3700 		}
3701 		++ifp->if_opackets;
3702 		/* See if we can continue to the next packet */
3703 		last = tx_buffer->next_eop;
3704 		if (last != -1) {
3705         		eop_desc = &txr->tx_base[last];
3706 			/* Get new done point */
3707 			if (++last == adapter->num_tx_desc) last = 0;
3708 			done = last;
3709 		} else
3710 			break;
3711         }
3712         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3713             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3714 
3715         txr->next_to_clean = first;
3716 
3717 	/*
3718 	** Watchdog calculation, we know there's
3719 	** work outstanding or the first return
3720 	** would have been taken, so none processed
3721 	** for too long indicates a hang. local timer
3722 	** will examine this and do a reset if needed.
3723 	*/
3724 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3725 		txr->queue_status = EM_QUEUE_HUNG;
3726 
3727         /*
3728          * If we have enough room, clear IFF_DRV_OACTIVE
3729          * to tell the stack that it is OK to send packets.
3730          */
3731         if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) {
3732                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3733 		/* Disable watchdog if all clean */
3734                 if (txr->tx_avail == adapter->num_tx_desc) {
3735 			txr->queue_status = EM_QUEUE_IDLE;
3736 			return (FALSE);
3737 		}
3738         }
3739 
3740 	return (TRUE);
3741 }
3742 
3743 
3744 /*********************************************************************
3745  *
3746  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3747  *
3748  **********************************************************************/
3749 static void
3750 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3751 {
3752 	struct adapter		*adapter = rxr->adapter;
3753 	struct mbuf		*m;
3754 	bus_dma_segment_t	segs[1];
3755 	struct em_buffer	*rxbuf;
3756 	int			i, error, nsegs, cleaned;
3757 
3758 	i = rxr->next_to_refresh;
3759 	cleaned = -1;
3760 	while (i != limit) {
3761 		rxbuf = &rxr->rx_buffers[i];
3762 		if (rxbuf->m_head == NULL) {
3763 			m = m_getjcl(M_DONTWAIT, MT_DATA,
3764 			    M_PKTHDR, adapter->rx_mbuf_sz);
3765 			/*
3766 			** If we have a temporary resource shortage
3767 			** that causes a failure, just abort refresh
3768 			** for now, we will return to this point when
3769 			** reinvoked from em_rxeof.
3770 			*/
3771 			if (m == NULL)
3772 				goto update;
3773 		} else
3774 			m = rxbuf->m_head;
3775 
3776 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3777 		m->m_flags |= M_PKTHDR;
3778 		m->m_data = m->m_ext.ext_buf;
3779 
3780 		/* Use bus_dma machinery to setup the memory mapping  */
3781 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3782 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3783 		if (error != 0) {
3784 			printf("Refresh mbufs: hdr dmamap load"
3785 			    " failure - %d\n", error);
3786 			m_free(m);
3787 			rxbuf->m_head = NULL;
3788 			goto update;
3789 		}
3790 		rxbuf->m_head = m;
3791 		bus_dmamap_sync(rxr->rxtag,
3792 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3793 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3794 
3795 		cleaned = i;
3796 		/* Calculate next index */
3797 		if (++i == adapter->num_rx_desc)
3798 			i = 0;
3799 		rxr->next_to_refresh = i;
3800 	}
3801 update:
3802 	/*
3803 	** Update the tail pointer only if,
3804 	** and as far as we have refreshed.
3805 	*/
3806 	if (cleaned != -1) /* Update tail index */
3807 		E1000_WRITE_REG(&adapter->hw,
3808 		    E1000_RDT(rxr->me), cleaned);
3809 
3810 	return;
3811 }
3812 
3813 
3814 /*********************************************************************
3815  *
3816  *  Allocate memory for rx_buffer structures. Since we use one
3817  *  rx_buffer per received packet, the maximum number of rx_buffer's
3818  *  that we'll need is equal to the number of receive descriptors
3819  *  that we've allocated.
3820  *
3821  **********************************************************************/
3822 static int
3823 em_allocate_receive_buffers(struct rx_ring *rxr)
3824 {
3825 	struct adapter		*adapter = rxr->adapter;
3826 	device_t		dev = adapter->dev;
3827 	struct em_buffer	*rxbuf;
3828 	int			error;
3829 
3830 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3831 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3832 	if (rxr->rx_buffers == NULL) {
3833 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3834 		return (ENOMEM);
3835 	}
3836 
3837 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3838 				1, 0,			/* alignment, bounds */
3839 				BUS_SPACE_MAXADDR,	/* lowaddr */
3840 				BUS_SPACE_MAXADDR,	/* highaddr */
3841 				NULL, NULL,		/* filter, filterarg */
3842 				MJUM9BYTES,		/* maxsize */
3843 				1,			/* nsegments */
3844 				MJUM9BYTES,		/* maxsegsize */
3845 				0,			/* flags */
3846 				NULL,			/* lockfunc */
3847 				NULL,			/* lockarg */
3848 				&rxr->rxtag);
3849 	if (error) {
3850 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3851 		    __func__, error);
3852 		goto fail;
3853 	}
3854 
3855 	rxbuf = rxr->rx_buffers;
3856 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3857 		rxbuf = &rxr->rx_buffers[i];
3858 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3859 		    &rxbuf->map);
3860 		if (error) {
3861 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3862 			    __func__, error);
3863 			goto fail;
3864 		}
3865 	}
3866 
3867 	return (0);
3868 
3869 fail:
3870 	em_free_receive_structures(adapter);
3871 	return (error);
3872 }
3873 
3874 
3875 /*********************************************************************
3876  *
3877  *  Initialize a receive ring and its buffers.
3878  *
3879  **********************************************************************/
3880 static int
3881 em_setup_receive_ring(struct rx_ring *rxr)
3882 {
3883 	struct	adapter 	*adapter = rxr->adapter;
3884 	struct em_buffer	*rxbuf;
3885 	bus_dma_segment_t	seg[1];
3886 	int			rsize, nsegs, error;
3887 
3888 
3889 	/* Clear the ring contents */
3890 	EM_RX_LOCK(rxr);
3891 	rsize = roundup2(adapter->num_rx_desc *
3892 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3893 	bzero((void *)rxr->rx_base, rsize);
3894 
3895 	/*
3896 	** Free current RX buffer structs and their mbufs
3897 	*/
3898 	for (int i = 0; i < adapter->num_rx_desc; i++) {
3899 		rxbuf = &rxr->rx_buffers[i];
3900 		if (rxbuf->m_head != NULL) {
3901 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3902 			    BUS_DMASYNC_POSTREAD);
3903 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3904 			m_freem(rxbuf->m_head);
3905 		}
3906 	}
3907 
3908 	/* Now replenish the mbufs */
3909 	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3910 
3911 		rxbuf = &rxr->rx_buffers[j];
3912 		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3913 		    M_PKTHDR, adapter->rx_mbuf_sz);
3914 		if (rxbuf->m_head == NULL)
3915 			return (ENOBUFS);
3916 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3917 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3918 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3919 
3920 		/* Get the memory mapping */
3921 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3922 		    rxbuf->map, rxbuf->m_head, seg,
3923 		    &nsegs, BUS_DMA_NOWAIT);
3924 		if (error != 0) {
3925 			m_freem(rxbuf->m_head);
3926 			rxbuf->m_head = NULL;
3927 			return (error);
3928 		}
3929 		bus_dmamap_sync(rxr->rxtag,
3930 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3931 
3932 		/* Update descriptor */
3933 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3934 	}
3935 
3936 
3937 	/* Setup our descriptor indices */
3938 	rxr->next_to_check = 0;
3939 	rxr->next_to_refresh = 0;
3940 
3941 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3942 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3943 
3944 	EM_RX_UNLOCK(rxr);
3945 	return (0);
3946 }
3947 
3948 /*********************************************************************
3949  *
3950  *  Initialize all receive rings.
3951  *
3952  **********************************************************************/
3953 static int
3954 em_setup_receive_structures(struct adapter *adapter)
3955 {
3956 	struct rx_ring *rxr = adapter->rx_rings;
3957 	int j;
3958 
3959 	for (j = 0; j < adapter->num_queues; j++, rxr++)
3960 		if (em_setup_receive_ring(rxr))
3961 			goto fail;
3962 
3963 	return (0);
3964 fail:
3965 	/*
3966 	 * Free RX buffers allocated so far, we will only handle
3967 	 * the rings that completed, the failing case will have
3968 	 * cleaned up for itself. 'j' failed, so its the terminus.
3969 	 */
3970 	for (int i = 0; i < j; ++i) {
3971 		rxr = &adapter->rx_rings[i];
3972 		for (int n = 0; n < adapter->num_rx_desc; n++) {
3973 			struct em_buffer *rxbuf;
3974 			rxbuf = &rxr->rx_buffers[n];
3975 			if (rxbuf->m_head != NULL) {
3976 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3977 			  	  BUS_DMASYNC_POSTREAD);
3978 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3979 				m_freem(rxbuf->m_head);
3980 				rxbuf->m_head = NULL;
3981 			}
3982 		}
3983 	}
3984 
3985 	return (ENOBUFS);
3986 }
3987 
3988 /*********************************************************************
3989  *
3990  *  Free all receive rings.
3991  *
3992  **********************************************************************/
3993 static void
3994 em_free_receive_structures(struct adapter *adapter)
3995 {
3996 	struct rx_ring *rxr = adapter->rx_rings;
3997 
3998 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3999 		em_free_receive_buffers(rxr);
4000 		/* Free the ring memory as well */
4001 		em_dma_free(adapter, &rxr->rxdma);
4002 		EM_RX_LOCK_DESTROY(rxr);
4003 	}
4004 
4005 	free(adapter->rx_rings, M_DEVBUF);
4006 }
4007 
4008 
4009 /*********************************************************************
4010  *
4011  *  Free receive ring data structures
4012  *
4013  **********************************************************************/
4014 static void
4015 em_free_receive_buffers(struct rx_ring *rxr)
4016 {
4017 	struct adapter		*adapter = rxr->adapter;
4018 	struct em_buffer	*rxbuf = NULL;
4019 
4020 	INIT_DEBUGOUT("free_receive_buffers: begin");
4021 
4022 	if (rxr->rx_buffers != NULL) {
4023 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4024 			rxbuf = &rxr->rx_buffers[i];
4025 			if (rxbuf->map != NULL) {
4026 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4027 				    BUS_DMASYNC_POSTREAD);
4028 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4029 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4030 			}
4031 			if (rxbuf->m_head != NULL) {
4032 				m_freem(rxbuf->m_head);
4033 				rxbuf->m_head = NULL;
4034 			}
4035 		}
4036 		free(rxr->rx_buffers, M_DEVBUF);
4037 		rxr->rx_buffers = NULL;
4038 	}
4039 
4040 	if (rxr->rxtag != NULL) {
4041 		bus_dma_tag_destroy(rxr->rxtag);
4042 		rxr->rxtag = NULL;
4043 	}
4044 
4045 	return;
4046 }
4047 
4048 
4049 /*********************************************************************
4050  *
4051  *  Enable receive unit.
4052  *
4053  **********************************************************************/
4054 #define MAX_INTS_PER_SEC	8000
4055 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4056 
4057 static void
4058 em_initialize_receive_unit(struct adapter *adapter)
4059 {
4060 	struct rx_ring	*rxr = adapter->rx_rings;
4061 	struct ifnet	*ifp = adapter->ifp;
4062 	struct e1000_hw	*hw = &adapter->hw;
4063 	u64	bus_addr;
4064 	u32	rctl, rxcsum;
4065 
4066 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4067 
4068 	/*
4069 	 * Make sure receives are disabled while setting
4070 	 * up the descriptor ring
4071 	 */
4072 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4073 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4074 
4075 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4076 	    adapter->rx_abs_int_delay.value);
4077 	/*
4078 	 * Set the interrupt throttling rate. Value is calculated
4079 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4080 	 */
4081 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4082 
4083 	/*
4084 	** When using MSIX interrupts we need to throttle
4085 	** using the EITR register (82574 only)
4086 	*/
4087 	if (hw->mac.type == e1000_82574)
4088 		for (int i = 0; i < 4; i++)
4089 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4090 			    DEFAULT_ITR);
4091 
4092 	/* Disable accelerated ackknowledge */
4093 	if (adapter->hw.mac.type == e1000_82574)
4094 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4095 
4096 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4097 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4098 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4099 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4100 	}
4101 
4102 	/*
4103 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4104 	** long latencies are observed, like Lenovo X60. This
4105 	** change eliminates the problem, but since having positive
4106 	** values in RDTR is a known source of problems on other
4107 	** platforms another solution is being sought.
4108 	*/
4109 	if (hw->mac.type == e1000_82573)
4110 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4111 
4112 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4113 		/* Setup the Base and Length of the Rx Descriptor Ring */
4114 		bus_addr = rxr->rxdma.dma_paddr;
4115 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4116 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4117 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4118 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4119 		/* Setup the Head and Tail Descriptor Pointers */
4120 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4121 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4122 	}
4123 
4124 	/* Set early receive threshold on appropriate hw */
4125 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4126 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4127 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4128 	    (ifp->if_mtu > ETHERMTU)) {
4129 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4130 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4131 		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4132 	}
4133 
4134 	if (adapter->hw.mac.type == e1000_pch2lan) {
4135 		if (ifp->if_mtu > ETHERMTU)
4136 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4137 		else
4138 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4139 	}
4140 
4141 	/* Setup the Receive Control Register */
4142 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4143 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4144 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4145 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4146 
4147         /* Strip the CRC */
4148         rctl |= E1000_RCTL_SECRC;
4149 
4150         /* Make sure VLAN Filters are off */
4151         rctl &= ~E1000_RCTL_VFE;
4152 	rctl &= ~E1000_RCTL_SBP;
4153 
4154 	if (adapter->rx_mbuf_sz == MCLBYTES)
4155 		rctl |= E1000_RCTL_SZ_2048;
4156 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4157 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4158 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4159 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4160 
4161 	if (ifp->if_mtu > ETHERMTU)
4162 		rctl |= E1000_RCTL_LPE;
4163 	else
4164 		rctl &= ~E1000_RCTL_LPE;
4165 
4166 	/* Write out the settings */
4167 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4168 
4169 	return;
4170 }
4171 
4172 
4173 /*********************************************************************
4174  *
4175  *  This routine executes in interrupt context. It replenishes
4176  *  the mbufs in the descriptor and sends data which has been
4177  *  dma'ed into host memory to upper layer.
4178  *
4179  *  We loop at most count times if count is > 0, or until done if
4180  *  count < 0.
4181  *
4182  *  For polling we also now return the number of cleaned packets
4183  *********************************************************************/
4184 static bool
4185 em_rxeof(struct rx_ring *rxr, int count, int *done)
4186 {
4187 	struct adapter		*adapter = rxr->adapter;
4188 	struct ifnet		*ifp = adapter->ifp;
4189 	struct mbuf		*mp, *sendmp;
4190 	u8			status = 0;
4191 	u16 			len;
4192 	int			i, processed, rxdone = 0;
4193 	bool			eop;
4194 	struct e1000_rx_desc	*cur;
4195 
4196 	EM_RX_LOCK(rxr);
4197 
4198 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4199 
4200 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4201 			break;
4202 
4203 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4204 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4205 
4206 		cur = &rxr->rx_base[i];
4207 		status = cur->status;
4208 		mp = sendmp = NULL;
4209 
4210 		if ((status & E1000_RXD_STAT_DD) == 0)
4211 			break;
4212 
4213 		len = le16toh(cur->length);
4214 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4215 
4216 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4217 		    (rxr->discard == TRUE)) {
4218 			ifp->if_ierrors++;
4219 			++rxr->rx_discarded;
4220 			if (!eop) /* Catch subsequent segs */
4221 				rxr->discard = TRUE;
4222 			else
4223 				rxr->discard = FALSE;
4224 			em_rx_discard(rxr, i);
4225 			goto next_desc;
4226 		}
4227 
4228 		/* Assign correct length to the current fragment */
4229 		mp = rxr->rx_buffers[i].m_head;
4230 		mp->m_len = len;
4231 
4232 		/* Trigger for refresh */
4233 		rxr->rx_buffers[i].m_head = NULL;
4234 
4235 		/* First segment? */
4236 		if (rxr->fmp == NULL) {
4237 			mp->m_pkthdr.len = len;
4238 			rxr->fmp = rxr->lmp = mp;
4239 		} else {
4240 			/* Chain mbuf's together */
4241 			mp->m_flags &= ~M_PKTHDR;
4242 			rxr->lmp->m_next = mp;
4243 			rxr->lmp = mp;
4244 			rxr->fmp->m_pkthdr.len += len;
4245 		}
4246 
4247 		if (eop) {
4248 			--count;
4249 			sendmp = rxr->fmp;
4250 			sendmp->m_pkthdr.rcvif = ifp;
4251 			ifp->if_ipackets++;
4252 			em_receive_checksum(cur, sendmp);
4253 #ifndef __NO_STRICT_ALIGNMENT
4254 			if (adapter->max_frame_size >
4255 			    (MCLBYTES - ETHER_ALIGN) &&
4256 			    em_fixup_rx(rxr) != 0)
4257 				goto skip;
4258 #endif
4259 			if (status & E1000_RXD_STAT_VP) {
4260 				sendmp->m_pkthdr.ether_vtag =
4261 				    (le16toh(cur->special) &
4262 				    E1000_RXD_SPC_VLAN_MASK);
4263 				sendmp->m_flags |= M_VLANTAG;
4264 			}
4265 #ifdef EM_MULTIQUEUE
4266 			sendmp->m_pkthdr.flowid = rxr->msix;
4267 			sendmp->m_flags |= M_FLOWID;
4268 #endif
4269 #ifndef __NO_STRICT_ALIGNMENT
4270 skip:
4271 #endif
4272 			rxr->fmp = rxr->lmp = NULL;
4273 		}
4274 next_desc:
4275 		/* Zero out the receive descriptors status. */
4276 		cur->status = 0;
4277 		++rxdone;	/* cumulative for POLL */
4278 		++processed;
4279 
4280 		/* Advance our pointers to the next descriptor. */
4281 		if (++i == adapter->num_rx_desc)
4282 			i = 0;
4283 
4284 		/* Send to the stack */
4285 		if (sendmp != NULL) {
4286 			rxr->next_to_check = i;
4287 			EM_RX_UNLOCK(rxr);
4288 			(*ifp->if_input)(ifp, sendmp);
4289 			EM_RX_LOCK(rxr);
4290 			i = rxr->next_to_check;
4291 		}
4292 
4293 		/* Only refresh mbufs every 8 descriptors */
4294 		if (processed == 8) {
4295 			em_refresh_mbufs(rxr, i);
4296 			processed = 0;
4297 		}
4298 	}
4299 
4300 	/* Catch any remaining refresh work */
4301 	em_refresh_mbufs(rxr, i);
4302 
4303 	rxr->next_to_check = i;
4304 	if (done != NULL)
4305 		*done = rxdone;
4306 	EM_RX_UNLOCK(rxr);
4307 
4308 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4309 }
4310 
4311 static __inline void
4312 em_rx_discard(struct rx_ring *rxr, int i)
4313 {
4314 	struct em_buffer	*rbuf;
4315 
4316 	rbuf = &rxr->rx_buffers[i];
4317 	/* Free any previous pieces */
4318 	if (rxr->fmp != NULL) {
4319 		rxr->fmp->m_flags |= M_PKTHDR;
4320 		m_freem(rxr->fmp);
4321 		rxr->fmp = NULL;
4322 		rxr->lmp = NULL;
4323 	}
4324 	/*
4325 	** Free buffer and allow em_refresh_mbufs()
4326 	** to clean up and recharge buffer.
4327 	*/
4328 	if (rbuf->m_head) {
4329 		m_free(rbuf->m_head);
4330 		rbuf->m_head = NULL;
4331 	}
4332 	return;
4333 }
4334 
4335 #ifndef __NO_STRICT_ALIGNMENT
4336 /*
4337  * When jumbo frames are enabled we should realign entire payload on
4338  * architecures with strict alignment. This is serious design mistake of 8254x
4339  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4340  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4341  * payload. On architecures without strict alignment restrictions 8254x still
4342  * performs unaligned memory access which would reduce the performance too.
4343  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4344  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4345  * existing mbuf chain.
4346  *
4347  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4348  * not used at all on architectures with strict alignment.
4349  */
4350 static int
4351 em_fixup_rx(struct rx_ring *rxr)
4352 {
4353 	struct adapter *adapter = rxr->adapter;
4354 	struct mbuf *m, *n;
4355 	int error;
4356 
4357 	error = 0;
4358 	m = rxr->fmp;
4359 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4360 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4361 		m->m_data += ETHER_HDR_LEN;
4362 	} else {
4363 		MGETHDR(n, M_DONTWAIT, MT_DATA);
4364 		if (n != NULL) {
4365 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4366 			m->m_data += ETHER_HDR_LEN;
4367 			m->m_len -= ETHER_HDR_LEN;
4368 			n->m_len = ETHER_HDR_LEN;
4369 			M_MOVE_PKTHDR(n, m);
4370 			n->m_next = m;
4371 			rxr->fmp = n;
4372 		} else {
4373 			adapter->dropped_pkts++;
4374 			m_freem(rxr->fmp);
4375 			rxr->fmp = NULL;
4376 			error = ENOMEM;
4377 		}
4378 	}
4379 
4380 	return (error);
4381 }
4382 #endif
4383 
4384 /*********************************************************************
4385  *
4386  *  Verify that the hardware indicated that the checksum is valid.
4387  *  Inform the stack about the status of checksum so that stack
4388  *  doesn't spend time verifying the checksum.
4389  *
4390  *********************************************************************/
4391 static void
4392 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4393 {
4394 	/* Ignore Checksum bit is set */
4395 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4396 		mp->m_pkthdr.csum_flags = 0;
4397 		return;
4398 	}
4399 
4400 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4401 		/* Did it pass? */
4402 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4403 			/* IP Checksum Good */
4404 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4405 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4406 
4407 		} else {
4408 			mp->m_pkthdr.csum_flags = 0;
4409 		}
4410 	}
4411 
4412 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4413 		/* Did it pass? */
4414 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4415 			mp->m_pkthdr.csum_flags |=
4416 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4417 			mp->m_pkthdr.csum_data = htons(0xffff);
4418 		}
4419 	}
4420 }
4421 
4422 /*
4423  * This routine is run via an vlan
4424  * config EVENT
4425  */
4426 static void
4427 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4428 {
4429 	struct adapter	*adapter = ifp->if_softc;
4430 	u32		index, bit;
4431 
4432 	if (ifp->if_softc !=  arg)   /* Not our event */
4433 		return;
4434 
4435 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4436                 return;
4437 
4438 	EM_CORE_LOCK(adapter);
4439 	index = (vtag >> 5) & 0x7F;
4440 	bit = vtag & 0x1F;
4441 	adapter->shadow_vfta[index] |= (1 << bit);
4442 	++adapter->num_vlans;
4443 	/* Re-init to load the changes */
4444 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4445 		em_init_locked(adapter);
4446 	EM_CORE_UNLOCK(adapter);
4447 }
4448 
4449 /*
4450  * This routine is run via an vlan
4451  * unconfig EVENT
4452  */
4453 static void
4454 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4455 {
4456 	struct adapter	*adapter = ifp->if_softc;
4457 	u32		index, bit;
4458 
4459 	if (ifp->if_softc !=  arg)
4460 		return;
4461 
4462 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4463                 return;
4464 
4465 	EM_CORE_LOCK(adapter);
4466 	index = (vtag >> 5) & 0x7F;
4467 	bit = vtag & 0x1F;
4468 	adapter->shadow_vfta[index] &= ~(1 << bit);
4469 	--adapter->num_vlans;
4470 	/* Re-init to load the changes */
4471 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4472 		em_init_locked(adapter);
4473 	EM_CORE_UNLOCK(adapter);
4474 }
4475 
4476 static void
4477 em_setup_vlan_hw_support(struct adapter *adapter)
4478 {
4479 	struct e1000_hw *hw = &adapter->hw;
4480 	u32             reg;
4481 
4482 	/*
4483 	** We get here thru init_locked, meaning
4484 	** a soft reset, this has already cleared
4485 	** the VFTA and other state, so if there
4486 	** have been no vlan's registered do nothing.
4487 	*/
4488 	if (adapter->num_vlans == 0)
4489                 return;
4490 
4491 	/*
4492 	** A soft reset zero's out the VFTA, so
4493 	** we need to repopulate it now.
4494 	*/
4495 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4496                 if (adapter->shadow_vfta[i] != 0)
4497 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4498                             i, adapter->shadow_vfta[i]);
4499 
4500 	reg = E1000_READ_REG(hw, E1000_CTRL);
4501 	reg |= E1000_CTRL_VME;
4502 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4503 
4504 	/* Enable the Filter Table */
4505 	reg = E1000_READ_REG(hw, E1000_RCTL);
4506 	reg &= ~E1000_RCTL_CFIEN;
4507 	reg |= E1000_RCTL_VFE;
4508 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4509 }
4510 
4511 static void
4512 em_enable_intr(struct adapter *adapter)
4513 {
4514 	struct e1000_hw *hw = &adapter->hw;
4515 	u32 ims_mask = IMS_ENABLE_MASK;
4516 
4517 	if (hw->mac.type == e1000_82574) {
4518 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4519 		ims_mask |= EM_MSIX_MASK;
4520 	}
4521 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4522 }
4523 
4524 static void
4525 em_disable_intr(struct adapter *adapter)
4526 {
4527 	struct e1000_hw *hw = &adapter->hw;
4528 
4529 	if (hw->mac.type == e1000_82574)
4530 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4531 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4532 }
4533 
4534 /*
4535  * Bit of a misnomer, what this really means is
4536  * to enable OS management of the system... aka
4537  * to disable special hardware management features
4538  */
4539 static void
4540 em_init_manageability(struct adapter *adapter)
4541 {
4542 	/* A shared code workaround */
4543 #define E1000_82542_MANC2H E1000_MANC2H
4544 	if (adapter->has_manage) {
4545 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4546 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4547 
4548 		/* disable hardware interception of ARP */
4549 		manc &= ~(E1000_MANC_ARP_EN);
4550 
4551                 /* enable receiving management packets to the host */
4552 		manc |= E1000_MANC_EN_MNG2HOST;
4553 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4554 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4555 		manc2h |= E1000_MNG2HOST_PORT_623;
4556 		manc2h |= E1000_MNG2HOST_PORT_664;
4557 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4558 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4559 	}
4560 }
4561 
4562 /*
4563  * Give control back to hardware management
4564  * controller if there is one.
4565  */
4566 static void
4567 em_release_manageability(struct adapter *adapter)
4568 {
4569 	if (adapter->has_manage) {
4570 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4571 
4572 		/* re-enable hardware interception of ARP */
4573 		manc |= E1000_MANC_ARP_EN;
4574 		manc &= ~E1000_MANC_EN_MNG2HOST;
4575 
4576 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4577 	}
4578 }
4579 
4580 /*
4581  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4582  * For ASF and Pass Through versions of f/w this means
4583  * that the driver is loaded. For AMT version type f/w
4584  * this means that the network i/f is open.
4585  */
4586 static void
4587 em_get_hw_control(struct adapter *adapter)
4588 {
4589 	u32 ctrl_ext, swsm;
4590 
4591 	if (adapter->hw.mac.type == e1000_82573) {
4592 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4593 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4594 		    swsm | E1000_SWSM_DRV_LOAD);
4595 		return;
4596 	}
4597 	/* else */
4598 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4599 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4600 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4601 	return;
4602 }
4603 
4604 /*
4605  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4606  * For ASF and Pass Through versions of f/w this means that
4607  * the driver is no longer loaded. For AMT versions of the
4608  * f/w this means that the network i/f is closed.
4609  */
4610 static void
4611 em_release_hw_control(struct adapter *adapter)
4612 {
4613 	u32 ctrl_ext, swsm;
4614 
4615 	if (!adapter->has_manage)
4616 		return;
4617 
4618 	if (adapter->hw.mac.type == e1000_82573) {
4619 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4620 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4621 		    swsm & ~E1000_SWSM_DRV_LOAD);
4622 		return;
4623 	}
4624 	/* else */
4625 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4626 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4627 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4628 	return;
4629 }
4630 
4631 static int
4632 em_is_valid_ether_addr(u8 *addr)
4633 {
4634 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4635 
4636 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4637 		return (FALSE);
4638 	}
4639 
4640 	return (TRUE);
4641 }
4642 
4643 /*
4644 ** Parse the interface capabilities with regard
4645 ** to both system management and wake-on-lan for
4646 ** later use.
4647 */
4648 static void
4649 em_get_wakeup(device_t dev)
4650 {
4651 	struct adapter	*adapter = device_get_softc(dev);
4652 	u16		eeprom_data = 0, device_id, apme_mask;
4653 
4654 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4655 	apme_mask = EM_EEPROM_APME;
4656 
4657 	switch (adapter->hw.mac.type) {
4658 	case e1000_82573:
4659 	case e1000_82583:
4660 		adapter->has_amt = TRUE;
4661 		/* Falls thru */
4662 	case e1000_82571:
4663 	case e1000_82572:
4664 	case e1000_80003es2lan:
4665 		if (adapter->hw.bus.func == 1) {
4666 			e1000_read_nvm(&adapter->hw,
4667 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4668 			break;
4669 		} else
4670 			e1000_read_nvm(&adapter->hw,
4671 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4672 		break;
4673 	case e1000_ich8lan:
4674 	case e1000_ich9lan:
4675 	case e1000_ich10lan:
4676 	case e1000_pchlan:
4677 	case e1000_pch2lan:
4678 		apme_mask = E1000_WUC_APME;
4679 		adapter->has_amt = TRUE;
4680 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4681 		break;
4682 	default:
4683 		e1000_read_nvm(&adapter->hw,
4684 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4685 		break;
4686 	}
4687 	if (eeprom_data & apme_mask)
4688 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4689 	/*
4690          * We have the eeprom settings, now apply the special cases
4691          * where the eeprom may be wrong or the board won't support
4692          * wake on lan on a particular port
4693 	 */
4694 	device_id = pci_get_device(dev);
4695         switch (device_id) {
4696 	case E1000_DEV_ID_82571EB_FIBER:
4697 		/* Wake events only supported on port A for dual fiber
4698 		 * regardless of eeprom setting */
4699 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4700 		    E1000_STATUS_FUNC_1)
4701 			adapter->wol = 0;
4702 		break;
4703 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4704 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4705 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4706                 /* if quad port adapter, disable WoL on all but port A */
4707 		if (global_quad_port_a != 0)
4708 			adapter->wol = 0;
4709 		/* Reset for multiple quad port adapters */
4710 		if (++global_quad_port_a == 4)
4711 			global_quad_port_a = 0;
4712                 break;
4713 	}
4714 	return;
4715 }
4716 
4717 
4718 /*
4719  * Enable PCI Wake On Lan capability
4720  */
4721 static void
4722 em_enable_wakeup(device_t dev)
4723 {
4724 	struct adapter	*adapter = device_get_softc(dev);
4725 	struct ifnet	*ifp = adapter->ifp;
4726 	u32		pmc, ctrl, ctrl_ext, rctl;
4727 	u16     	status;
4728 
4729 	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4730 		return;
4731 
4732 	/* Advertise the wakeup capability */
4733 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4734 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4735 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4736 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4737 
4738 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4739 	    (adapter->hw.mac.type == e1000_pchlan) ||
4740 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4741 	    (adapter->hw.mac.type == e1000_ich10lan)) {
4742 		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4743 		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4744 	}
4745 
4746 	/* Keep the laser running on Fiber adapters */
4747 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4748 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4749 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4750 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4751 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4752 	}
4753 
4754 	/*
4755 	** Determine type of Wakeup: note that wol
4756 	** is set with all bits on by default.
4757 	*/
4758 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4759 		adapter->wol &= ~E1000_WUFC_MAG;
4760 
4761 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4762 		adapter->wol &= ~E1000_WUFC_MC;
4763 	else {
4764 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4765 		rctl |= E1000_RCTL_MPE;
4766 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4767 	}
4768 
4769 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4770 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4771 		if (em_enable_phy_wakeup(adapter))
4772 			return;
4773 	} else {
4774 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4775 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4776 	}
4777 
4778 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4779 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4780 
4781         /* Request PME */
4782         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4783 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4784 	if (ifp->if_capenable & IFCAP_WOL)
4785 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4786         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4787 
4788 	return;
4789 }
4790 
4791 /*
4792 ** WOL in the newer chipset interfaces (pchlan)
4793 ** require thing to be copied into the phy
4794 */
4795 static int
4796 em_enable_phy_wakeup(struct adapter *adapter)
4797 {
4798 	struct e1000_hw *hw = &adapter->hw;
4799 	u32 mreg, ret = 0;
4800 	u16 preg;
4801 
4802 	/* copy MAC RARs to PHY RARs */
4803 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4804 
4805 	/* copy MAC MTA to PHY MTA */
4806 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4807 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4808 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4809 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4810 		    (u16)((mreg >> 16) & 0xFFFF));
4811 	}
4812 
4813 	/* configure PHY Rx Control register */
4814 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4815 	mreg = E1000_READ_REG(hw, E1000_RCTL);
4816 	if (mreg & E1000_RCTL_UPE)
4817 		preg |= BM_RCTL_UPE;
4818 	if (mreg & E1000_RCTL_MPE)
4819 		preg |= BM_RCTL_MPE;
4820 	preg &= ~(BM_RCTL_MO_MASK);
4821 	if (mreg & E1000_RCTL_MO_3)
4822 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4823 				<< BM_RCTL_MO_SHIFT);
4824 	if (mreg & E1000_RCTL_BAM)
4825 		preg |= BM_RCTL_BAM;
4826 	if (mreg & E1000_RCTL_PMCF)
4827 		preg |= BM_RCTL_PMCF;
4828 	mreg = E1000_READ_REG(hw, E1000_CTRL);
4829 	if (mreg & E1000_CTRL_RFCE)
4830 		preg |= BM_RCTL_RFCE;
4831 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4832 
4833 	/* enable PHY wakeup in MAC register */
4834 	E1000_WRITE_REG(hw, E1000_WUC,
4835 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4836 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4837 
4838 	/* configure and enable PHY wakeup in PHY registers */
4839 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4840 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4841 
4842 	/* activate PHY wakeup */
4843 	ret = hw->phy.ops.acquire(hw);
4844 	if (ret) {
4845 		printf("Could not acquire PHY\n");
4846 		return ret;
4847 	}
4848 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4849 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4850 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4851 	if (ret) {
4852 		printf("Could not read PHY page 769\n");
4853 		goto out;
4854 	}
4855 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4856 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4857 	if (ret)
4858 		printf("Could not set PHY Host Wakeup bit\n");
4859 out:
4860 	hw->phy.ops.release(hw);
4861 
4862 	return ret;
4863 }
4864 
4865 static void
4866 em_led_func(void *arg, int onoff)
4867 {
4868 	struct adapter	*adapter = arg;
4869 
4870 	EM_CORE_LOCK(adapter);
4871 	if (onoff) {
4872 		e1000_setup_led(&adapter->hw);
4873 		e1000_led_on(&adapter->hw);
4874 	} else {
4875 		e1000_led_off(&adapter->hw);
4876 		e1000_cleanup_led(&adapter->hw);
4877 	}
4878 	EM_CORE_UNLOCK(adapter);
4879 }
4880 
4881 /*
4882 ** Disable the L0S and L1 LINK states
4883 */
4884 static void
4885 em_disable_aspm(struct adapter *adapter)
4886 {
4887 	int		base, reg;
4888 	u16		link_cap,link_ctrl;
4889 	device_t	dev = adapter->dev;
4890 
4891 	switch (adapter->hw.mac.type) {
4892 		case e1000_82573:
4893 		case e1000_82574:
4894 		case e1000_82583:
4895 			break;
4896 		default:
4897 			return;
4898 	}
4899 	if (pci_find_extcap(dev, PCIY_EXPRESS, &base) != 0)
4900 		return;
4901 	reg = base + PCIR_EXPRESS_LINK_CAP;
4902 	link_cap = pci_read_config(dev, reg, 2);
4903 	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4904 		return;
4905 	reg = base + PCIR_EXPRESS_LINK_CTL;
4906 	link_ctrl = pci_read_config(dev, reg, 2);
4907 	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4908 	pci_write_config(dev, reg, link_ctrl, 2);
4909 	return;
4910 }
4911 
4912 /**********************************************************************
4913  *
4914  *  Update the board statistics counters.
4915  *
4916  **********************************************************************/
4917 static void
4918 em_update_stats_counters(struct adapter *adapter)
4919 {
4920 	struct ifnet   *ifp;
4921 
4922 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4923 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4924 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4925 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4926 	}
4927 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4928 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4929 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4930 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4931 
4932 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4933 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4934 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4935 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4936 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4937 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4938 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4939 	/*
4940 	** For watchdog management we need to know if we have been
4941 	** paused during the last interval, so capture that here.
4942 	*/
4943 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4944 	adapter->stats.xoffrxc += adapter->pause_frames;
4945 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4946 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4947 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4948 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4949 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4950 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4951 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4952 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4953 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4954 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4955 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4956 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4957 
4958 	/* For the 64-bit byte counters the low dword must be read first. */
4959 	/* Both registers clear on the read of the high dword */
4960 
4961 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4962 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4963 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4964 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4965 
4966 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4967 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4968 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4969 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4970 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4971 
4972 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4973 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4974 
4975 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4976 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4977 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4978 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4979 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4980 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4981 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4982 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4983 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4984 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4985 
4986 	/* Interrupt Counts */
4987 
4988 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
4989 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
4990 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
4991 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
4992 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
4993 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
4994 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
4995 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
4996 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
4997 
4998 	if (adapter->hw.mac.type >= e1000_82543) {
4999 		adapter->stats.algnerrc +=
5000 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5001 		adapter->stats.rxerrc +=
5002 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5003 		adapter->stats.tncrs +=
5004 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5005 		adapter->stats.cexterr +=
5006 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5007 		adapter->stats.tsctc +=
5008 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5009 		adapter->stats.tsctfc +=
5010 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5011 	}
5012 	ifp = adapter->ifp;
5013 
5014 	ifp->if_collisions = adapter->stats.colc;
5015 
5016 	/* Rx Errors */
5017 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5018 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5019 	    adapter->stats.ruc + adapter->stats.roc +
5020 	    adapter->stats.mpc + adapter->stats.cexterr;
5021 
5022 	/* Tx Errors */
5023 	ifp->if_oerrors = adapter->stats.ecol +
5024 	    adapter->stats.latecol + adapter->watchdog_events;
5025 }
5026 
5027 /* Export a single 32-bit register via a read-only sysctl. */
5028 static int
5029 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5030 {
5031 	struct adapter *adapter;
5032 	u_int val;
5033 
5034 	adapter = oidp->oid_arg1;
5035 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5036 	return (sysctl_handle_int(oidp, &val, 0, req));
5037 }
5038 
5039 /*
5040  * Add sysctl variables, one per statistic, to the system.
5041  */
5042 static void
5043 em_add_hw_stats(struct adapter *adapter)
5044 {
5045 	device_t dev = adapter->dev;
5046 
5047 	struct tx_ring *txr = adapter->tx_rings;
5048 	struct rx_ring *rxr = adapter->rx_rings;
5049 
5050 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5051 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5052 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5053 	struct e1000_hw_stats *stats = &adapter->stats;
5054 
5055 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5056 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5057 
5058 #define QUEUE_NAME_LEN 32
5059 	char namebuf[QUEUE_NAME_LEN];
5060 
5061 	/* Driver Statistics */
5062 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5063 			CTLFLAG_RD, &adapter->link_irq, 0,
5064 			"Link MSIX IRQ Handled");
5065 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5066 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5067 			 "Std mbuf failed");
5068 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5069 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5070 			 "Std mbuf cluster failed");
5071 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5072 			CTLFLAG_RD, &adapter->dropped_pkts,
5073 			"Driver dropped packets");
5074 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5075 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5076 			"Driver tx dma failure in xmit");
5077 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5078 			CTLFLAG_RD, &adapter->rx_overruns,
5079 			"RX overruns");
5080 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5081 			CTLFLAG_RD, &adapter->watchdog_events,
5082 			"Watchdog timeouts");
5083 
5084 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5085 			CTLFLAG_RD, adapter, E1000_CTRL,
5086 			em_sysctl_reg_handler, "IU",
5087 			"Device Control Register");
5088 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5089 			CTLFLAG_RD, adapter, E1000_RCTL,
5090 			em_sysctl_reg_handler, "IU",
5091 			"Receiver Control Register");
5092 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5093 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5094 			"Flow Control High Watermark");
5095 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5096 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5097 			"Flow Control Low Watermark");
5098 
5099 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5100 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5101 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5102 					    CTLFLAG_RD, NULL, "Queue Name");
5103 		queue_list = SYSCTL_CHILDREN(queue_node);
5104 
5105 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5106 				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5107 				em_sysctl_reg_handler, "IU",
5108  				"Transmit Descriptor Head");
5109 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5110 				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5111 				em_sysctl_reg_handler, "IU",
5112  				"Transmit Descriptor Tail");
5113 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5114 				CTLFLAG_RD, &txr->tx_irq,
5115 				"Queue MSI-X Transmit Interrupts");
5116 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5117 				CTLFLAG_RD, &txr->no_desc_avail,
5118 				"Queue No Descriptor Available");
5119 
5120 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5121 				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5122 				em_sysctl_reg_handler, "IU",
5123 				"Receive Descriptor Head");
5124 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5125 				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5126 				em_sysctl_reg_handler, "IU",
5127 				"Receive Descriptor Tail");
5128 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5129 				CTLFLAG_RD, &rxr->rx_irq,
5130 				"Queue MSI-X Receive Interrupts");
5131 	}
5132 
5133 	/* MAC stats get their own sub node */
5134 
5135 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5136 				    CTLFLAG_RD, NULL, "Statistics");
5137 	stat_list = SYSCTL_CHILDREN(stat_node);
5138 
5139 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5140 			CTLFLAG_RD, &stats->ecol,
5141 			"Excessive collisions");
5142 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5143 			CTLFLAG_RD, &stats->scc,
5144 			"Single collisions");
5145 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5146 			CTLFLAG_RD, &stats->mcc,
5147 			"Multiple collisions");
5148 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5149 			CTLFLAG_RD, &stats->latecol,
5150 			"Late collisions");
5151 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5152 			CTLFLAG_RD, &stats->colc,
5153 			"Collision Count");
5154 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5155 			CTLFLAG_RD, &adapter->stats.symerrs,
5156 			"Symbol Errors");
5157 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5158 			CTLFLAG_RD, &adapter->stats.sec,
5159 			"Sequence Errors");
5160 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5161 			CTLFLAG_RD, &adapter->stats.dc,
5162 			"Defer Count");
5163 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5164 			CTLFLAG_RD, &adapter->stats.mpc,
5165 			"Missed Packets");
5166 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5167 			CTLFLAG_RD, &adapter->stats.rnbc,
5168 			"Receive No Buffers");
5169 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5170 			CTLFLAG_RD, &adapter->stats.ruc,
5171 			"Receive Undersize");
5172 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5173 			CTLFLAG_RD, &adapter->stats.rfc,
5174 			"Fragmented Packets Received ");
5175 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5176 			CTLFLAG_RD, &adapter->stats.roc,
5177 			"Oversized Packets Received");
5178 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5179 			CTLFLAG_RD, &adapter->stats.rjc,
5180 			"Recevied Jabber");
5181 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5182 			CTLFLAG_RD, &adapter->stats.rxerrc,
5183 			"Receive Errors");
5184 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5185 			CTLFLAG_RD, &adapter->stats.crcerrs,
5186 			"CRC errors");
5187 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5188 			CTLFLAG_RD, &adapter->stats.algnerrc,
5189 			"Alignment Errors");
5190 	/* On 82575 these are collision counts */
5191 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5192 			CTLFLAG_RD, &adapter->stats.cexterr,
5193 			"Collision/Carrier extension errors");
5194 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5195 			CTLFLAG_RD, &adapter->stats.xonrxc,
5196 			"XON Received");
5197 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5198 			CTLFLAG_RD, &adapter->stats.xontxc,
5199 			"XON Transmitted");
5200 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5201 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5202 			"XOFF Received");
5203 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5204 			CTLFLAG_RD, &adapter->stats.xofftxc,
5205 			"XOFF Transmitted");
5206 
5207 	/* Packet Reception Stats */
5208 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5209 			CTLFLAG_RD, &adapter->stats.tpr,
5210 			"Total Packets Received ");
5211 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5212 			CTLFLAG_RD, &adapter->stats.gprc,
5213 			"Good Packets Received");
5214 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5215 			CTLFLAG_RD, &adapter->stats.bprc,
5216 			"Broadcast Packets Received");
5217 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5218 			CTLFLAG_RD, &adapter->stats.mprc,
5219 			"Multicast Packets Received");
5220 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5221 			CTLFLAG_RD, &adapter->stats.prc64,
5222 			"64 byte frames received ");
5223 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5224 			CTLFLAG_RD, &adapter->stats.prc127,
5225 			"65-127 byte frames received");
5226 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5227 			CTLFLAG_RD, &adapter->stats.prc255,
5228 			"128-255 byte frames received");
5229 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5230 			CTLFLAG_RD, &adapter->stats.prc511,
5231 			"256-511 byte frames received");
5232 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5233 			CTLFLAG_RD, &adapter->stats.prc1023,
5234 			"512-1023 byte frames received");
5235 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5236 			CTLFLAG_RD, &adapter->stats.prc1522,
5237 			"1023-1522 byte frames received");
5238  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5239  			CTLFLAG_RD, &adapter->stats.gorc,
5240  			"Good Octets Received");
5241 
5242 	/* Packet Transmission Stats */
5243  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5244  			CTLFLAG_RD, &adapter->stats.gotc,
5245  			"Good Octets Transmitted");
5246 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5247 			CTLFLAG_RD, &adapter->stats.tpt,
5248 			"Total Packets Transmitted");
5249 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5250 			CTLFLAG_RD, &adapter->stats.gptc,
5251 			"Good Packets Transmitted");
5252 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5253 			CTLFLAG_RD, &adapter->stats.bptc,
5254 			"Broadcast Packets Transmitted");
5255 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5256 			CTLFLAG_RD, &adapter->stats.mptc,
5257 			"Multicast Packets Transmitted");
5258 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5259 			CTLFLAG_RD, &adapter->stats.ptc64,
5260 			"64 byte frames transmitted ");
5261 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5262 			CTLFLAG_RD, &adapter->stats.ptc127,
5263 			"65-127 byte frames transmitted");
5264 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5265 			CTLFLAG_RD, &adapter->stats.ptc255,
5266 			"128-255 byte frames transmitted");
5267 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5268 			CTLFLAG_RD, &adapter->stats.ptc511,
5269 			"256-511 byte frames transmitted");
5270 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5271 			CTLFLAG_RD, &adapter->stats.ptc1023,
5272 			"512-1023 byte frames transmitted");
5273 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5274 			CTLFLAG_RD, &adapter->stats.ptc1522,
5275 			"1024-1522 byte frames transmitted");
5276 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5277 			CTLFLAG_RD, &adapter->stats.tsctc,
5278 			"TSO Contexts Transmitted");
5279 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5280 			CTLFLAG_RD, &adapter->stats.tsctfc,
5281 			"TSO Contexts Failed");
5282 
5283 
5284 	/* Interrupt Stats */
5285 
5286 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5287 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5288 	int_list = SYSCTL_CHILDREN(int_node);
5289 
5290 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5291 			CTLFLAG_RD, &adapter->stats.iac,
5292 			"Interrupt Assertion Count");
5293 
5294 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5295 			CTLFLAG_RD, &adapter->stats.icrxptc,
5296 			"Interrupt Cause Rx Pkt Timer Expire Count");
5297 
5298 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5299 			CTLFLAG_RD, &adapter->stats.icrxatc,
5300 			"Interrupt Cause Rx Abs Timer Expire Count");
5301 
5302 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5303 			CTLFLAG_RD, &adapter->stats.ictxptc,
5304 			"Interrupt Cause Tx Pkt Timer Expire Count");
5305 
5306 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5307 			CTLFLAG_RD, &adapter->stats.ictxatc,
5308 			"Interrupt Cause Tx Abs Timer Expire Count");
5309 
5310 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5311 			CTLFLAG_RD, &adapter->stats.ictxqec,
5312 			"Interrupt Cause Tx Queue Empty Count");
5313 
5314 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5315 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5316 			"Interrupt Cause Tx Queue Min Thresh Count");
5317 
5318 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5319 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5320 			"Interrupt Cause Rx Desc Min Thresh Count");
5321 
5322 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5323 			CTLFLAG_RD, &adapter->stats.icrxoc,
5324 			"Interrupt Cause Receiver Overrun Count");
5325 }
5326 
5327 /**********************************************************************
5328  *
5329  *  This routine provides a way to dump out the adapter eeprom,
5330  *  often a useful debug/service tool. This only dumps the first
5331  *  32 words, stuff that matters is in that extent.
5332  *
5333  **********************************************************************/
5334 static int
5335 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5336 {
5337 	struct adapter *adapter;
5338 	int error;
5339 	int result;
5340 
5341 	result = -1;
5342 	error = sysctl_handle_int(oidp, &result, 0, req);
5343 
5344 	if (error || !req->newptr)
5345 		return (error);
5346 
5347 	/*
5348 	 * This value will cause a hex dump of the
5349 	 * first 32 16-bit words of the EEPROM to
5350 	 * the screen.
5351 	 */
5352 	if (result == 1) {
5353 		adapter = (struct adapter *)arg1;
5354 		em_print_nvm_info(adapter);
5355         }
5356 
5357 	return (error);
5358 }
5359 
5360 static void
5361 em_print_nvm_info(struct adapter *adapter)
5362 {
5363 	u16	eeprom_data;
5364 	int	i, j, row = 0;
5365 
5366 	/* Its a bit crude, but it gets the job done */
5367 	printf("\nInterface EEPROM Dump:\n");
5368 	printf("Offset\n0x0000  ");
5369 	for (i = 0, j = 0; i < 32; i++, j++) {
5370 		if (j == 8) { /* Make the offset block */
5371 			j = 0; ++row;
5372 			printf("\n0x00%x0  ",row);
5373 		}
5374 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5375 		printf("%04x ", eeprom_data);
5376 	}
5377 	printf("\n");
5378 }
5379 
5380 static int
5381 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5382 {
5383 	struct em_int_delay_info *info;
5384 	struct adapter *adapter;
5385 	u32 regval;
5386 	int error, usecs, ticks;
5387 
5388 	info = (struct em_int_delay_info *)arg1;
5389 	usecs = info->value;
5390 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5391 	if (error != 0 || req->newptr == NULL)
5392 		return (error);
5393 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5394 		return (EINVAL);
5395 	info->value = usecs;
5396 	ticks = EM_USECS_TO_TICKS(usecs);
5397 
5398 	adapter = info->adapter;
5399 
5400 	EM_CORE_LOCK(adapter);
5401 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5402 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5403 	/* Handle a few special cases. */
5404 	switch (info->offset) {
5405 	case E1000_RDTR:
5406 		break;
5407 	case E1000_TIDV:
5408 		if (ticks == 0) {
5409 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5410 			/* Don't write 0 into the TIDV register. */
5411 			regval++;
5412 		} else
5413 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5414 		break;
5415 	}
5416 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5417 	EM_CORE_UNLOCK(adapter);
5418 	return (0);
5419 }
5420 
5421 static void
5422 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5423 	const char *description, struct em_int_delay_info *info,
5424 	int offset, int value)
5425 {
5426 	info->adapter = adapter;
5427 	info->offset = offset;
5428 	info->value = value;
5429 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5430 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5431 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5432 	    info, 0, em_sysctl_int_delay, "I", description);
5433 }
5434 
5435 static void
5436 em_add_rx_process_limit(struct adapter *adapter, const char *name,
5437 	const char *description, int *limit, int value)
5438 {
5439 	*limit = value;
5440 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5441 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5442 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5443 }
5444 
5445 static void
5446 em_set_flow_cntrl(struct adapter *adapter, const char *name,
5447 	const char *description, int *limit, int value)
5448 {
5449 	*limit = value;
5450 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5451 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5452 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5453 }
5454 
5455 static int
5456 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5457 {
5458 	struct adapter *adapter;
5459 	int error;
5460 	int result;
5461 
5462 	result = -1;
5463 	error = sysctl_handle_int(oidp, &result, 0, req);
5464 
5465 	if (error || !req->newptr)
5466 		return (error);
5467 
5468 	if (result == 1) {
5469 		adapter = (struct adapter *)arg1;
5470 		em_print_debug_info(adapter);
5471         }
5472 
5473 	return (error);
5474 }
5475 
5476 /*
5477 ** This routine is meant to be fluid, add whatever is
5478 ** needed for debugging a problem.  -jfv
5479 */
5480 static void
5481 em_print_debug_info(struct adapter *adapter)
5482 {
5483 	device_t dev = adapter->dev;
5484 	struct tx_ring *txr = adapter->tx_rings;
5485 	struct rx_ring *rxr = adapter->rx_rings;
5486 
5487 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5488 		printf("Interface is RUNNING ");
5489 	else
5490 		printf("Interface is NOT RUNNING\n");
5491 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5492 		printf("and ACTIVE\n");
5493 	else
5494 		printf("and INACTIVE\n");
5495 
5496 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5497 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5498 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5499 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5500 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5501 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5502 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5503 	device_printf(dev, "TX descriptors avail = %d\n",
5504 	    txr->tx_avail);
5505 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5506 	    txr->no_desc_avail);
5507 	device_printf(dev, "RX discarded packets = %ld\n",
5508 	    rxr->rx_discarded);
5509 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5510 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5511 }
5512