xref: /freebsd/sys/dev/e1000/if_em.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2011, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60 
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67 
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70 
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83 
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87 
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int	em_display_debug_stats = 0;
92 
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.2.3";
97 
98 /*********************************************************************
99  *  PCI Device ID Table
100  *
101  *  Used by probe to select devices to load on
102  *  Last field stores an index into e1000_strings
103  *  Last entry must be all 0s
104  *
105  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106  *********************************************************************/
107 
108 static em_vendor_info_t em_vendor_info_array[] =
109 {
110 	/* Intel(R) PRO/1000 Network Connection */
111 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
112 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115 						PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117 						PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119 						PCI_ANY_ID, PCI_ANY_ID, 0},
120 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121 						PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123 						PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
130 
131 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136 						PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138 						PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140 						PCI_ANY_ID, PCI_ANY_ID, 0},
141 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142 						PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	/* required last entry */
175 	{ 0, 0, 0, 0, 0}
176 };
177 
178 /*********************************************************************
179  *  Table of branding strings for all supported NICs.
180  *********************************************************************/
181 
182 static char *em_strings[] = {
183 	"Intel(R) PRO/1000 Network Connection"
184 };
185 
186 /*********************************************************************
187  *  Function prototypes
188  *********************************************************************/
189 static int	em_probe(device_t);
190 static int	em_attach(device_t);
191 static int	em_detach(device_t);
192 static int	em_shutdown(device_t);
193 static int	em_suspend(device_t);
194 static int	em_resume(device_t);
195 static void	em_start(struct ifnet *);
196 static void	em_start_locked(struct ifnet *, struct tx_ring *);
197 #ifdef EM_MULTIQUEUE
198 static int	em_mq_start(struct ifnet *, struct mbuf *);
199 static int	em_mq_start_locked(struct ifnet *,
200 		    struct tx_ring *, struct mbuf *);
201 static void	em_qflush(struct ifnet *);
202 #endif
203 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
204 static void	em_init(void *);
205 static void	em_init_locked(struct adapter *);
206 static void	em_stop(void *);
207 static void	em_media_status(struct ifnet *, struct ifmediareq *);
208 static int	em_media_change(struct ifnet *);
209 static void	em_identify_hardware(struct adapter *);
210 static int	em_allocate_pci_resources(struct adapter *);
211 static int	em_allocate_legacy(struct adapter *);
212 static int	em_allocate_msix(struct adapter *);
213 static int	em_allocate_queues(struct adapter *);
214 static int	em_setup_msix(struct adapter *);
215 static void	em_free_pci_resources(struct adapter *);
216 static void	em_local_timer(void *);
217 static void	em_reset(struct adapter *);
218 static int	em_setup_interface(device_t, struct adapter *);
219 
220 static void	em_setup_transmit_structures(struct adapter *);
221 static void	em_initialize_transmit_unit(struct adapter *);
222 static int	em_allocate_transmit_buffers(struct tx_ring *);
223 static void	em_free_transmit_structures(struct adapter *);
224 static void	em_free_transmit_buffers(struct tx_ring *);
225 
226 static int	em_setup_receive_structures(struct adapter *);
227 static int	em_allocate_receive_buffers(struct rx_ring *);
228 static void	em_initialize_receive_unit(struct adapter *);
229 static void	em_free_receive_structures(struct adapter *);
230 static void	em_free_receive_buffers(struct rx_ring *);
231 
232 static void	em_enable_intr(struct adapter *);
233 static void	em_disable_intr(struct adapter *);
234 static void	em_update_stats_counters(struct adapter *);
235 static void	em_add_hw_stats(struct adapter *adapter);
236 static bool	em_txeof(struct tx_ring *);
237 static bool	em_rxeof(struct rx_ring *, int, int *);
238 #ifndef __NO_STRICT_ALIGNMENT
239 static int	em_fixup_rx(struct rx_ring *);
240 #endif
241 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243 		    struct ip *, u32 *, u32 *);
244 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245 		    struct tcphdr *, u32 *, u32 *);
246 static void	em_set_promisc(struct adapter *);
247 static void	em_disable_promisc(struct adapter *);
248 static void	em_set_multi(struct adapter *);
249 static void	em_update_link_status(struct adapter *);
250 static void	em_refresh_mbufs(struct rx_ring *, int);
251 static void	em_register_vlan(void *, struct ifnet *, u16);
252 static void	em_unregister_vlan(void *, struct ifnet *, u16);
253 static void	em_setup_vlan_hw_support(struct adapter *);
254 static int	em_xmit(struct tx_ring *, struct mbuf **);
255 static int	em_dma_malloc(struct adapter *, bus_size_t,
256 		    struct em_dma_alloc *, int);
257 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
258 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259 static void	em_print_nvm_info(struct adapter *);
260 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261 static void	em_print_debug_info(struct adapter *);
262 static int 	em_is_valid_ether_addr(u8 *);
263 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
265 		    const char *, struct em_int_delay_info *, int, int);
266 /* Management and WOL Support */
267 static void	em_init_manageability(struct adapter *);
268 static void	em_release_manageability(struct adapter *);
269 static void     em_get_hw_control(struct adapter *);
270 static void     em_release_hw_control(struct adapter *);
271 static void	em_get_wakeup(device_t);
272 static void     em_enable_wakeup(device_t);
273 static int	em_enable_phy_wakeup(struct adapter *);
274 static void	em_led_func(void *, int);
275 static void	em_disable_aspm(struct adapter *);
276 
277 static int	em_irq_fast(void *);
278 
279 /* MSIX handlers */
280 static void	em_msix_tx(void *);
281 static void	em_msix_rx(void *);
282 static void	em_msix_link(void *);
283 static void	em_handle_tx(void *context, int pending);
284 static void	em_handle_rx(void *context, int pending);
285 static void	em_handle_link(void *context, int pending);
286 
287 static void	em_set_sysctl_value(struct adapter *, const char *,
288 		    const char *, int *, int);
289 
290 static __inline void em_rx_discard(struct rx_ring *, int);
291 
292 #ifdef DEVICE_POLLING
293 static poll_handler_t em_poll;
294 #endif /* POLLING */
295 
296 /*********************************************************************
297  *  FreeBSD Device Interface Entry Points
298  *********************************************************************/
299 
300 static device_method_t em_methods[] = {
301 	/* Device interface */
302 	DEVMETHOD(device_probe, em_probe),
303 	DEVMETHOD(device_attach, em_attach),
304 	DEVMETHOD(device_detach, em_detach),
305 	DEVMETHOD(device_shutdown, em_shutdown),
306 	DEVMETHOD(device_suspend, em_suspend),
307 	DEVMETHOD(device_resume, em_resume),
308 	{0, 0}
309 };
310 
311 static driver_t em_driver = {
312 	"em", em_methods, sizeof(struct adapter),
313 };
314 
315 devclass_t em_devclass;
316 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
317 MODULE_DEPEND(em, pci, 1, 1, 1);
318 MODULE_DEPEND(em, ether, 1, 1, 1);
319 
320 /*********************************************************************
321  *  Tunable default values.
322  *********************************************************************/
323 
324 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
325 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
326 #define M_TSO_LEN			66
327 
328 /* Allow common code without TSO */
329 #ifndef CSUM_TSO
330 #define CSUM_TSO	0
331 #endif
332 
333 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
334 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
335 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
336 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
337 
338 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
339 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
340 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
341 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
342 
343 static int em_rxd = EM_DEFAULT_RXD;
344 static int em_txd = EM_DEFAULT_TXD;
345 TUNABLE_INT("hw.em.rxd", &em_rxd);
346 TUNABLE_INT("hw.em.txd", &em_txd);
347 
348 static int em_smart_pwr_down = FALSE;
349 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
350 
351 /* Controls whether promiscuous also shows bad packets */
352 static int em_debug_sbp = FALSE;
353 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
354 
355 static int em_enable_msix = TRUE;
356 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
357 
358 /* How many packets rxeof tries to clean at a time */
359 static int em_rx_process_limit = 100;
360 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
361 
362 /* Flow control setting - default to FULL */
363 static int em_fc_setting = e1000_fc_full;
364 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
365 
366 /* Energy efficient ethernet - default to OFF */
367 static int eee_setting = 0;
368 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
369 
370 /* Global used in WOL setup with multiport cards */
371 static int global_quad_port_a = 0;
372 
373 /*********************************************************************
374  *  Device identification routine
375  *
376  *  em_probe determines if the driver should be loaded on
377  *  adapter based on PCI vendor/device id of the adapter.
378  *
379  *  return BUS_PROBE_DEFAULT on success, positive on failure
380  *********************************************************************/
381 
382 static int
383 em_probe(device_t dev)
384 {
385 	char		adapter_name[60];
386 	u16		pci_vendor_id = 0;
387 	u16		pci_device_id = 0;
388 	u16		pci_subvendor_id = 0;
389 	u16		pci_subdevice_id = 0;
390 	em_vendor_info_t *ent;
391 
392 	INIT_DEBUGOUT("em_probe: begin");
393 
394 	pci_vendor_id = pci_get_vendor(dev);
395 	if (pci_vendor_id != EM_VENDOR_ID)
396 		return (ENXIO);
397 
398 	pci_device_id = pci_get_device(dev);
399 	pci_subvendor_id = pci_get_subvendor(dev);
400 	pci_subdevice_id = pci_get_subdevice(dev);
401 
402 	ent = em_vendor_info_array;
403 	while (ent->vendor_id != 0) {
404 		if ((pci_vendor_id == ent->vendor_id) &&
405 		    (pci_device_id == ent->device_id) &&
406 
407 		    ((pci_subvendor_id == ent->subvendor_id) ||
408 		    (ent->subvendor_id == PCI_ANY_ID)) &&
409 
410 		    ((pci_subdevice_id == ent->subdevice_id) ||
411 		    (ent->subdevice_id == PCI_ANY_ID))) {
412 			sprintf(adapter_name, "%s %s",
413 				em_strings[ent->index],
414 				em_driver_version);
415 			device_set_desc_copy(dev, adapter_name);
416 			return (BUS_PROBE_DEFAULT);
417 		}
418 		ent++;
419 	}
420 
421 	return (ENXIO);
422 }
423 
424 /*********************************************************************
425  *  Device initialization routine
426  *
427  *  The attach entry point is called when the driver is being loaded.
428  *  This routine identifies the type of hardware, allocates all resources
429  *  and initializes the hardware.
430  *
431  *  return 0 on success, positive on failure
432  *********************************************************************/
433 
434 static int
435 em_attach(device_t dev)
436 {
437 	struct adapter	*adapter;
438 	struct e1000_hw	*hw;
439 	int		error = 0;
440 
441 	INIT_DEBUGOUT("em_attach: begin");
442 
443 	adapter = device_get_softc(dev);
444 	adapter->dev = adapter->osdep.dev = dev;
445 	hw = &adapter->hw;
446 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
447 
448 	/* SYSCTL stuff */
449 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
450 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
451 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
452 	    em_sysctl_nvm_info, "I", "NVM Information");
453 
454 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
455 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
456 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
457 	    em_sysctl_debug_info, "I", "Debug Information");
458 
459 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
460 
461 	/* Determine hardware and mac info */
462 	em_identify_hardware(adapter);
463 
464 	/* Setup PCI resources */
465 	if (em_allocate_pci_resources(adapter)) {
466 		device_printf(dev, "Allocation of PCI resources failed\n");
467 		error = ENXIO;
468 		goto err_pci;
469 	}
470 
471 	/*
472 	** For ICH8 and family we need to
473 	** map the flash memory, and this
474 	** must happen after the MAC is
475 	** identified
476 	*/
477 	if ((hw->mac.type == e1000_ich8lan) ||
478 	    (hw->mac.type == e1000_ich9lan) ||
479 	    (hw->mac.type == e1000_ich10lan) ||
480 	    (hw->mac.type == e1000_pchlan) ||
481 	    (hw->mac.type == e1000_pch2lan)) {
482 		int rid = EM_BAR_TYPE_FLASH;
483 		adapter->flash = bus_alloc_resource_any(dev,
484 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
485 		if (adapter->flash == NULL) {
486 			device_printf(dev, "Mapping of Flash failed\n");
487 			error = ENXIO;
488 			goto err_pci;
489 		}
490 		/* This is used in the shared code */
491 		hw->flash_address = (u8 *)adapter->flash;
492 		adapter->osdep.flash_bus_space_tag =
493 		    rman_get_bustag(adapter->flash);
494 		adapter->osdep.flash_bus_space_handle =
495 		    rman_get_bushandle(adapter->flash);
496 	}
497 
498 	/* Do Shared Code initialization */
499 	if (e1000_setup_init_funcs(hw, TRUE)) {
500 		device_printf(dev, "Setup of Shared code failed\n");
501 		error = ENXIO;
502 		goto err_pci;
503 	}
504 
505 	e1000_get_bus_info(hw);
506 
507 	/* Set up some sysctls for the tunable interrupt delays */
508 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
509 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
510 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
511 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
512 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
513 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
514 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
515 	    "receive interrupt delay limit in usecs",
516 	    &adapter->rx_abs_int_delay,
517 	    E1000_REGISTER(hw, E1000_RADV),
518 	    em_rx_abs_int_delay_dflt);
519 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
520 	    "transmit interrupt delay limit in usecs",
521 	    &adapter->tx_abs_int_delay,
522 	    E1000_REGISTER(hw, E1000_TADV),
523 	    em_tx_abs_int_delay_dflt);
524 
525 	/* Sysctl for limiting the amount of work done in the taskqueue */
526 	em_set_sysctl_value(adapter, "rx_processing_limit",
527 	    "max number of rx packets to process", &adapter->rx_process_limit,
528 	    em_rx_process_limit);
529 
530 	/* Sysctl for setting the interface flow control */
531 	em_set_sysctl_value(adapter, "flow_control",
532 	    "configure flow control",
533 	    &adapter->fc_setting, em_fc_setting);
534 
535 	/*
536 	 * Validate number of transmit and receive descriptors. It
537 	 * must not exceed hardware maximum, and must be multiple
538 	 * of E1000_DBA_ALIGN.
539 	 */
540 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
541 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
542 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
543 		    EM_DEFAULT_TXD, em_txd);
544 		adapter->num_tx_desc = EM_DEFAULT_TXD;
545 	} else
546 		adapter->num_tx_desc = em_txd;
547 
548 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
549 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
550 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
551 		    EM_DEFAULT_RXD, em_rxd);
552 		adapter->num_rx_desc = EM_DEFAULT_RXD;
553 	} else
554 		adapter->num_rx_desc = em_rxd;
555 
556 	hw->mac.autoneg = DO_AUTO_NEG;
557 	hw->phy.autoneg_wait_to_complete = FALSE;
558 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
559 
560 	/* Copper options */
561 	if (hw->phy.media_type == e1000_media_type_copper) {
562 		hw->phy.mdix = AUTO_ALL_MODES;
563 		hw->phy.disable_polarity_correction = FALSE;
564 		hw->phy.ms_type = EM_MASTER_SLAVE;
565 	}
566 
567 	/*
568 	 * Set the frame limits assuming
569 	 * standard ethernet sized frames.
570 	 */
571 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
572 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
573 
574 	/*
575 	 * This controls when hardware reports transmit completion
576 	 * status.
577 	 */
578 	hw->mac.report_tx_early = 1;
579 
580 	/*
581 	** Get queue/ring memory
582 	*/
583 	if (em_allocate_queues(adapter)) {
584 		error = ENOMEM;
585 		goto err_pci;
586 	}
587 
588 	/* Allocate multicast array memory. */
589 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
590 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
591 	if (adapter->mta == NULL) {
592 		device_printf(dev, "Can not allocate multicast setup array\n");
593 		error = ENOMEM;
594 		goto err_late;
595 	}
596 
597 	/* Check SOL/IDER usage */
598 	if (e1000_check_reset_block(hw))
599 		device_printf(dev, "PHY reset is blocked"
600 		    " due to SOL/IDER session.\n");
601 
602 	/* Sysctl for setting Energy Efficient Ethernet */
603 	em_set_sysctl_value(adapter, "eee_control",
604 	    "enable Energy Efficient Ethernet",
605 	    &hw->dev_spec.ich8lan.eee_disable, eee_setting);
606 
607 	/*
608 	** Start from a known state, this is
609 	** important in reading the nvm and
610 	** mac from that.
611 	*/
612 	e1000_reset_hw(hw);
613 
614 
615 	/* Make sure we have a good EEPROM before we read from it */
616 	if (e1000_validate_nvm_checksum(hw) < 0) {
617 		/*
618 		** Some PCI-E parts fail the first check due to
619 		** the link being in sleep state, call it again,
620 		** if it fails a second time its a real issue.
621 		*/
622 		if (e1000_validate_nvm_checksum(hw) < 0) {
623 			device_printf(dev,
624 			    "The EEPROM Checksum Is Not Valid\n");
625 			error = EIO;
626 			goto err_late;
627 		}
628 	}
629 
630 	/* Copy the permanent MAC address out of the EEPROM */
631 	if (e1000_read_mac_addr(hw) < 0) {
632 		device_printf(dev, "EEPROM read error while reading MAC"
633 		    " address\n");
634 		error = EIO;
635 		goto err_late;
636 	}
637 
638 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
639 		device_printf(dev, "Invalid MAC address\n");
640 		error = EIO;
641 		goto err_late;
642 	}
643 
644 	/*
645 	**  Do interrupt configuration
646 	*/
647 	if (adapter->msix > 1) /* Do MSIX */
648 		error = em_allocate_msix(adapter);
649 	else  /* MSI or Legacy */
650 		error = em_allocate_legacy(adapter);
651 	if (error)
652 		goto err_late;
653 
654 	/*
655 	 * Get Wake-on-Lan and Management info for later use
656 	 */
657 	em_get_wakeup(dev);
658 
659 	/* Setup OS specific network interface */
660 	if (em_setup_interface(dev, adapter) != 0)
661 		goto err_late;
662 
663 	em_reset(adapter);
664 
665 	/* Initialize statistics */
666 	em_update_stats_counters(adapter);
667 
668 	hw->mac.get_link_status = 1;
669 	em_update_link_status(adapter);
670 
671 	/* Register for VLAN events */
672 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
673 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
674 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
675 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
676 
677 	em_add_hw_stats(adapter);
678 
679 	/* Non-AMT based hardware can now take control from firmware */
680 	if (adapter->has_manage && !adapter->has_amt)
681 		em_get_hw_control(adapter);
682 
683 	/* Tell the stack that the interface is not active */
684 	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
685 
686 	adapter->led_dev = led_create(em_led_func, adapter,
687 	    device_get_nameunit(dev));
688 
689 	INIT_DEBUGOUT("em_attach: end");
690 
691 	return (0);
692 
693 err_late:
694 	em_free_transmit_structures(adapter);
695 	em_free_receive_structures(adapter);
696 	em_release_hw_control(adapter);
697 	if (adapter->ifp != NULL)
698 		if_free(adapter->ifp);
699 err_pci:
700 	em_free_pci_resources(adapter);
701 	free(adapter->mta, M_DEVBUF);
702 	EM_CORE_LOCK_DESTROY(adapter);
703 
704 	return (error);
705 }
706 
707 /*********************************************************************
708  *  Device removal routine
709  *
710  *  The detach entry point is called when the driver is being removed.
711  *  This routine stops the adapter and deallocates all the resources
712  *  that were allocated for driver operation.
713  *
714  *  return 0 on success, positive on failure
715  *********************************************************************/
716 
717 static int
718 em_detach(device_t dev)
719 {
720 	struct adapter	*adapter = device_get_softc(dev);
721 	struct ifnet	*ifp = adapter->ifp;
722 
723 	INIT_DEBUGOUT("em_detach: begin");
724 
725 	/* Make sure VLANS are not using driver */
726 	if (adapter->ifp->if_vlantrunk != NULL) {
727 		device_printf(dev,"Vlan in use, detach first\n");
728 		return (EBUSY);
729 	}
730 
731 #ifdef DEVICE_POLLING
732 	if (ifp->if_capenable & IFCAP_POLLING)
733 		ether_poll_deregister(ifp);
734 #endif
735 
736 	if (adapter->led_dev != NULL)
737 		led_destroy(adapter->led_dev);
738 
739 	EM_CORE_LOCK(adapter);
740 	adapter->in_detach = 1;
741 	em_stop(adapter);
742 	EM_CORE_UNLOCK(adapter);
743 	EM_CORE_LOCK_DESTROY(adapter);
744 
745 	e1000_phy_hw_reset(&adapter->hw);
746 
747 	em_release_manageability(adapter);
748 	em_release_hw_control(adapter);
749 
750 	/* Unregister VLAN events */
751 	if (adapter->vlan_attach != NULL)
752 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
753 	if (adapter->vlan_detach != NULL)
754 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
755 
756 	ether_ifdetach(adapter->ifp);
757 	callout_drain(&adapter->timer);
758 
759 	em_free_pci_resources(adapter);
760 	bus_generic_detach(dev);
761 	if_free(ifp);
762 
763 	em_free_transmit_structures(adapter);
764 	em_free_receive_structures(adapter);
765 
766 	em_release_hw_control(adapter);
767 	free(adapter->mta, M_DEVBUF);
768 
769 	return (0);
770 }
771 
772 /*********************************************************************
773  *
774  *  Shutdown entry point
775  *
776  **********************************************************************/
777 
778 static int
779 em_shutdown(device_t dev)
780 {
781 	return em_suspend(dev);
782 }
783 
784 /*
785  * Suspend/resume device methods.
786  */
787 static int
788 em_suspend(device_t dev)
789 {
790 	struct adapter *adapter = device_get_softc(dev);
791 
792 	EM_CORE_LOCK(adapter);
793 
794         em_release_manageability(adapter);
795 	em_release_hw_control(adapter);
796 	em_enable_wakeup(dev);
797 
798 	EM_CORE_UNLOCK(adapter);
799 
800 	return bus_generic_suspend(dev);
801 }
802 
803 static int
804 em_resume(device_t dev)
805 {
806 	struct adapter *adapter = device_get_softc(dev);
807 	struct ifnet *ifp = adapter->ifp;
808 
809 	EM_CORE_LOCK(adapter);
810 	em_init_locked(adapter);
811 	em_init_manageability(adapter);
812 	EM_CORE_UNLOCK(adapter);
813 	em_start(ifp);
814 
815 	return bus_generic_resume(dev);
816 }
817 
818 
819 /*********************************************************************
820  *  Transmit entry point
821  *
822  *  em_start is called by the stack to initiate a transmit.
823  *  The driver will remain in this routine as long as there are
824  *  packets to transmit and transmit resources are available.
825  *  In case resources are not available stack is notified and
826  *  the packet is requeued.
827  **********************************************************************/
828 
829 #ifdef EM_MULTIQUEUE
830 static int
831 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
832 {
833 	struct adapter  *adapter = txr->adapter;
834         struct mbuf     *next;
835         int             err = 0, enq = 0;
836 
837 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
838 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
839 		if (m != NULL)
840 			err = drbr_enqueue(ifp, txr->br, m);
841 		return (err);
842 	}
843 
844         /* Call cleanup if number of TX descriptors low */
845 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
846 		em_txeof(txr);
847 
848 	enq = 0;
849 	if (m == NULL) {
850 		next = drbr_dequeue(ifp, txr->br);
851 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
852 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
853 			return (err);
854 		next = drbr_dequeue(ifp, txr->br);
855 	} else
856 		next = m;
857 
858 	/* Process the queue */
859 	while (next != NULL) {
860 		if ((err = em_xmit(txr, &next)) != 0) {
861                         if (next != NULL)
862                                 err = drbr_enqueue(ifp, txr->br, next);
863                         break;
864 		}
865 		enq++;
866 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
867 		ETHER_BPF_MTAP(ifp, next);
868 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
869                         break;
870 		if (txr->tx_avail < EM_MAX_SCATTER) {
871 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
872 			break;
873 		}
874 		next = drbr_dequeue(ifp, txr->br);
875 	}
876 
877 	if (enq > 0) {
878                 /* Set the watchdog */
879                 txr->queue_status = EM_QUEUE_WORKING;
880 		txr->watchdog_time = ticks;
881 	}
882 	return (err);
883 }
884 
885 /*
886 ** Multiqueue capable stack interface
887 */
888 static int
889 em_mq_start(struct ifnet *ifp, struct mbuf *m)
890 {
891 	struct adapter	*adapter = ifp->if_softc;
892 	struct tx_ring	*txr = adapter->tx_rings;
893 	int 		error;
894 
895 	if (EM_TX_TRYLOCK(txr)) {
896 		error = em_mq_start_locked(ifp, txr, m);
897 		EM_TX_UNLOCK(txr);
898 	} else
899 		error = drbr_enqueue(ifp, txr->br, m);
900 
901 	return (error);
902 }
903 
904 /*
905 ** Flush all ring buffers
906 */
907 static void
908 em_qflush(struct ifnet *ifp)
909 {
910 	struct adapter  *adapter = ifp->if_softc;
911 	struct tx_ring  *txr = adapter->tx_rings;
912 	struct mbuf     *m;
913 
914 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
915 		EM_TX_LOCK(txr);
916 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
917 			m_freem(m);
918 		EM_TX_UNLOCK(txr);
919 	}
920 	if_qflush(ifp);
921 }
922 
923 #endif /* EM_MULTIQUEUE */
924 
925 static void
926 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
927 {
928 	struct adapter	*adapter = ifp->if_softc;
929 	struct mbuf	*m_head;
930 
931 	EM_TX_LOCK_ASSERT(txr);
932 
933 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
934 	    IFF_DRV_RUNNING)
935 		return;
936 
937 	if (!adapter->link_active)
938 		return;
939 
940 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
941         	/* Call cleanup if number of TX descriptors low */
942 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
943 			em_txeof(txr);
944 		if (txr->tx_avail < EM_MAX_SCATTER) {
945 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
946 			break;
947 		}
948                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
949 		if (m_head == NULL)
950 			break;
951 		/*
952 		 *  Encapsulation can modify our pointer, and or make it
953 		 *  NULL on failure.  In that event, we can't requeue.
954 		 */
955 		if (em_xmit(txr, &m_head)) {
956 			if (m_head == NULL)
957 				break;
958 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
960 			break;
961 		}
962 
963 		/* Send a copy of the frame to the BPF listener */
964 		ETHER_BPF_MTAP(ifp, m_head);
965 
966 		/* Set timeout in case hardware has problems transmitting. */
967 		txr->watchdog_time = ticks;
968                 txr->queue_status = EM_QUEUE_WORKING;
969 	}
970 
971 	return;
972 }
973 
974 static void
975 em_start(struct ifnet *ifp)
976 {
977 	struct adapter	*adapter = ifp->if_softc;
978 	struct tx_ring	*txr = adapter->tx_rings;
979 
980 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
981 		EM_TX_LOCK(txr);
982 		em_start_locked(ifp, txr);
983 		EM_TX_UNLOCK(txr);
984 	}
985 	return;
986 }
987 
988 /*********************************************************************
989  *  Ioctl entry point
990  *
991  *  em_ioctl is called when the user wants to configure the
992  *  interface.
993  *
994  *  return 0 on success, positive on failure
995  **********************************************************************/
996 
997 static int
998 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
999 {
1000 	struct adapter	*adapter = ifp->if_softc;
1001 	struct ifreq *ifr = (struct ifreq *)data;
1002 #ifdef INET
1003 	struct ifaddr *ifa = (struct ifaddr *)data;
1004 #endif
1005 	int error = 0;
1006 
1007 	if (adapter->in_detach)
1008 		return (error);
1009 
1010 	switch (command) {
1011 	case SIOCSIFADDR:
1012 #ifdef INET
1013 		if (ifa->ifa_addr->sa_family == AF_INET) {
1014 			/*
1015 			 * XXX
1016 			 * Since resetting hardware takes a very long time
1017 			 * and results in link renegotiation we only
1018 			 * initialize the hardware only when it is absolutely
1019 			 * required.
1020 			 */
1021 			ifp->if_flags |= IFF_UP;
1022 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1023 				EM_CORE_LOCK(adapter);
1024 				em_init_locked(adapter);
1025 				EM_CORE_UNLOCK(adapter);
1026 			}
1027 			arp_ifinit(ifp, ifa);
1028 		} else
1029 #endif
1030 			error = ether_ioctl(ifp, command, data);
1031 		break;
1032 	case SIOCSIFMTU:
1033 	    {
1034 		int max_frame_size;
1035 
1036 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1037 
1038 		EM_CORE_LOCK(adapter);
1039 		switch (adapter->hw.mac.type) {
1040 		case e1000_82571:
1041 		case e1000_82572:
1042 		case e1000_ich9lan:
1043 		case e1000_ich10lan:
1044 		case e1000_pch2lan:
1045 		case e1000_82574:
1046 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1047 			max_frame_size = 9234;
1048 			break;
1049 		case e1000_pchlan:
1050 			max_frame_size = 4096;
1051 			break;
1052 			/* Adapters that do not support jumbo frames */
1053 		case e1000_82583:
1054 		case e1000_ich8lan:
1055 			max_frame_size = ETHER_MAX_LEN;
1056 			break;
1057 		default:
1058 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1059 		}
1060 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1061 		    ETHER_CRC_LEN) {
1062 			EM_CORE_UNLOCK(adapter);
1063 			error = EINVAL;
1064 			break;
1065 		}
1066 
1067 		ifp->if_mtu = ifr->ifr_mtu;
1068 		adapter->max_frame_size =
1069 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1070 		em_init_locked(adapter);
1071 		EM_CORE_UNLOCK(adapter);
1072 		break;
1073 	    }
1074 	case SIOCSIFFLAGS:
1075 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1076 		    SIOCSIFFLAGS (Set Interface Flags)");
1077 		EM_CORE_LOCK(adapter);
1078 		if (ifp->if_flags & IFF_UP) {
1079 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1080 				if ((ifp->if_flags ^ adapter->if_flags) &
1081 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1082 					em_disable_promisc(adapter);
1083 					em_set_promisc(adapter);
1084 				}
1085 			} else
1086 				em_init_locked(adapter);
1087 		} else
1088 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1089 				em_stop(adapter);
1090 		adapter->if_flags = ifp->if_flags;
1091 		EM_CORE_UNLOCK(adapter);
1092 		break;
1093 	case SIOCADDMULTI:
1094 	case SIOCDELMULTI:
1095 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1096 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1097 			EM_CORE_LOCK(adapter);
1098 			em_disable_intr(adapter);
1099 			em_set_multi(adapter);
1100 #ifdef DEVICE_POLLING
1101 			if (!(ifp->if_capenable & IFCAP_POLLING))
1102 #endif
1103 				em_enable_intr(adapter);
1104 			EM_CORE_UNLOCK(adapter);
1105 		}
1106 		break;
1107 	case SIOCSIFMEDIA:
1108 		/*
1109 		** As the speed/duplex settings are being
1110 		** changed, we need to reset the PHY.
1111 		*/
1112 		adapter->hw.phy.reset_disable = FALSE;
1113 		/* Check SOL/IDER usage */
1114 		EM_CORE_LOCK(adapter);
1115 		if (e1000_check_reset_block(&adapter->hw)) {
1116 			EM_CORE_UNLOCK(adapter);
1117 			device_printf(adapter->dev, "Media change is"
1118 			    " blocked due to SOL/IDER session.\n");
1119 			break;
1120 		}
1121 		EM_CORE_UNLOCK(adapter);
1122 		/* falls thru */
1123 	case SIOCGIFMEDIA:
1124 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1125 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1126 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1127 		break;
1128 	case SIOCSIFCAP:
1129 	    {
1130 		int mask, reinit;
1131 
1132 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1133 		reinit = 0;
1134 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1135 #ifdef DEVICE_POLLING
1136 		if (mask & IFCAP_POLLING) {
1137 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1138 				error = ether_poll_register(em_poll, ifp);
1139 				if (error)
1140 					return (error);
1141 				EM_CORE_LOCK(adapter);
1142 				em_disable_intr(adapter);
1143 				ifp->if_capenable |= IFCAP_POLLING;
1144 				EM_CORE_UNLOCK(adapter);
1145 			} else {
1146 				error = ether_poll_deregister(ifp);
1147 				/* Enable interrupt even in error case */
1148 				EM_CORE_LOCK(adapter);
1149 				em_enable_intr(adapter);
1150 				ifp->if_capenable &= ~IFCAP_POLLING;
1151 				EM_CORE_UNLOCK(adapter);
1152 			}
1153 		}
1154 #endif
1155 		if (mask & IFCAP_HWCSUM) {
1156 			ifp->if_capenable ^= IFCAP_HWCSUM;
1157 			reinit = 1;
1158 		}
1159 		if (mask & IFCAP_TSO4) {
1160 			ifp->if_capenable ^= IFCAP_TSO4;
1161 			reinit = 1;
1162 		}
1163 		if (mask & IFCAP_VLAN_HWTAGGING) {
1164 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1165 			reinit = 1;
1166 		}
1167 		if (mask & IFCAP_VLAN_HWFILTER) {
1168 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1169 			reinit = 1;
1170 		}
1171 		if ((mask & IFCAP_WOL) &&
1172 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1173 			if (mask & IFCAP_WOL_MCAST)
1174 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1175 			if (mask & IFCAP_WOL_MAGIC)
1176 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1177 		}
1178 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1179 			em_init(adapter);
1180 		VLAN_CAPABILITIES(ifp);
1181 		break;
1182 	    }
1183 
1184 	default:
1185 		error = ether_ioctl(ifp, command, data);
1186 		break;
1187 	}
1188 
1189 	return (error);
1190 }
1191 
1192 
1193 /*********************************************************************
1194  *  Init entry point
1195  *
1196  *  This routine is used in two ways. It is used by the stack as
1197  *  init entry point in network interface structure. It is also used
1198  *  by the driver as a hw/sw initialization routine to get to a
1199  *  consistent state.
1200  *
1201  *  return 0 on success, positive on failure
1202  **********************************************************************/
1203 
1204 static void
1205 em_init_locked(struct adapter *adapter)
1206 {
1207 	struct ifnet	*ifp = adapter->ifp;
1208 	device_t	dev = adapter->dev;
1209 	u32		pba;
1210 
1211 	INIT_DEBUGOUT("em_init: begin");
1212 
1213 	EM_CORE_LOCK_ASSERT(adapter);
1214 
1215 	em_disable_intr(adapter);
1216 	callout_stop(&adapter->timer);
1217 
1218 	/*
1219 	 * Packet Buffer Allocation (PBA)
1220 	 * Writing PBA sets the receive portion of the buffer
1221 	 * the remainder is used for the transmit buffer.
1222 	 */
1223 	switch (adapter->hw.mac.type) {
1224 	/* Total Packet Buffer on these is 48K */
1225 	case e1000_82571:
1226 	case e1000_82572:
1227 	case e1000_80003es2lan:
1228 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1229 		break;
1230 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1231 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1232 		break;
1233 	case e1000_82574:
1234 	case e1000_82583:
1235 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1236 		break;
1237 	case e1000_ich8lan:
1238 		pba = E1000_PBA_8K;
1239 		break;
1240 	case e1000_ich9lan:
1241 	case e1000_ich10lan:
1242 		pba = E1000_PBA_10K;
1243 		break;
1244 	case e1000_pchlan:
1245 	case e1000_pch2lan:
1246 		pba = E1000_PBA_26K;
1247 		break;
1248 	default:
1249 		if (adapter->max_frame_size > 8192)
1250 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1251 		else
1252 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1253 	}
1254 
1255 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1256 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1257 
1258 	/* Get the latest mac address, User can use a LAA */
1259         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1260               ETHER_ADDR_LEN);
1261 
1262 	/* Put the address into the Receive Address Array */
1263 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1264 
1265 	/*
1266 	 * With the 82571 adapter, RAR[0] may be overwritten
1267 	 * when the other port is reset, we make a duplicate
1268 	 * in RAR[14] for that eventuality, this assures
1269 	 * the interface continues to function.
1270 	 */
1271 	if (adapter->hw.mac.type == e1000_82571) {
1272 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1273 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1274 		    E1000_RAR_ENTRIES - 1);
1275 	}
1276 
1277 	/* Initialize the hardware */
1278 	em_reset(adapter);
1279 	em_update_link_status(adapter);
1280 
1281 	/* Setup VLAN support, basic and offload if available */
1282 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1283 
1284 	/* Set hardware offload abilities */
1285 	ifp->if_hwassist = 0;
1286 	if (ifp->if_capenable & IFCAP_TXCSUM)
1287 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1288 	if (ifp->if_capenable & IFCAP_TSO4)
1289 		ifp->if_hwassist |= CSUM_TSO;
1290 
1291 	/* Configure for OS presence */
1292 	em_init_manageability(adapter);
1293 
1294 	/* Prepare transmit descriptors and buffers */
1295 	em_setup_transmit_structures(adapter);
1296 	em_initialize_transmit_unit(adapter);
1297 
1298 	/* Setup Multicast table */
1299 	em_set_multi(adapter);
1300 
1301 	/*
1302 	** Figure out the desired mbuf
1303 	** pool for doing jumbos
1304 	*/
1305 	if (adapter->max_frame_size <= 2048)
1306 		adapter->rx_mbuf_sz = MCLBYTES;
1307 	else if (adapter->max_frame_size <= 4096)
1308 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1309 	else
1310 		adapter->rx_mbuf_sz = MJUM9BYTES;
1311 
1312 	/* Prepare receive descriptors and buffers */
1313 	if (em_setup_receive_structures(adapter)) {
1314 		device_printf(dev, "Could not setup receive structures\n");
1315 		em_stop(adapter);
1316 		return;
1317 	}
1318 	em_initialize_receive_unit(adapter);
1319 
1320 	/* Use real VLAN Filter support? */
1321 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1322 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1323 			/* Use real VLAN Filter support */
1324 			em_setup_vlan_hw_support(adapter);
1325 		else {
1326 			u32 ctrl;
1327 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1328 			ctrl |= E1000_CTRL_VME;
1329 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1330 		}
1331 	}
1332 
1333 	/* Don't lose promiscuous settings */
1334 	em_set_promisc(adapter);
1335 
1336 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1337 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1338 
1339 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1340 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1341 
1342 	/* MSI/X configuration for 82574 */
1343 	if (adapter->hw.mac.type == e1000_82574) {
1344 		int tmp;
1345 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1346 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1347 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1348 		/* Set the IVAR - interrupt vector routing. */
1349 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1350 	}
1351 
1352 #ifdef DEVICE_POLLING
1353 	/*
1354 	 * Only enable interrupts if we are not polling, make sure
1355 	 * they are off otherwise.
1356 	 */
1357 	if (ifp->if_capenable & IFCAP_POLLING)
1358 		em_disable_intr(adapter);
1359 	else
1360 #endif /* DEVICE_POLLING */
1361 		em_enable_intr(adapter);
1362 
1363 	/* AMT based hardware can now take control from firmware */
1364 	if (adapter->has_manage && adapter->has_amt)
1365 		em_get_hw_control(adapter);
1366 
1367 	/* Don't reset the phy next time init gets called */
1368 	adapter->hw.phy.reset_disable = TRUE;
1369 }
1370 
1371 static void
1372 em_init(void *arg)
1373 {
1374 	struct adapter *adapter = arg;
1375 
1376 	EM_CORE_LOCK(adapter);
1377 	em_init_locked(adapter);
1378 	EM_CORE_UNLOCK(adapter);
1379 }
1380 
1381 
1382 #ifdef DEVICE_POLLING
1383 /*********************************************************************
1384  *
1385  *  Legacy polling routine: note this only works with single queue
1386  *
1387  *********************************************************************/
1388 static int
1389 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1390 {
1391 	struct adapter *adapter = ifp->if_softc;
1392 	struct tx_ring	*txr = adapter->tx_rings;
1393 	struct rx_ring	*rxr = adapter->rx_rings;
1394 	u32		reg_icr;
1395 	int		rx_done;
1396 
1397 	EM_CORE_LOCK(adapter);
1398 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1399 		EM_CORE_UNLOCK(adapter);
1400 		return (0);
1401 	}
1402 
1403 	if (cmd == POLL_AND_CHECK_STATUS) {
1404 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1405 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1406 			callout_stop(&adapter->timer);
1407 			adapter->hw.mac.get_link_status = 1;
1408 			em_update_link_status(adapter);
1409 			callout_reset(&adapter->timer, hz,
1410 			    em_local_timer, adapter);
1411 		}
1412 	}
1413 	EM_CORE_UNLOCK(adapter);
1414 
1415 	em_rxeof(rxr, count, &rx_done);
1416 
1417 	EM_TX_LOCK(txr);
1418 	em_txeof(txr);
1419 #ifdef EM_MULTIQUEUE
1420 	if (!drbr_empty(ifp, txr->br))
1421 		em_mq_start_locked(ifp, txr, NULL);
1422 #else
1423 	em_start_locked(ifp, txr);
1424 #endif
1425 	EM_TX_UNLOCK(txr);
1426 
1427 	return (rx_done);
1428 }
1429 #endif /* DEVICE_POLLING */
1430 
1431 
1432 /*********************************************************************
1433  *
1434  *  Fast Legacy/MSI Combined Interrupt Service routine
1435  *
1436  *********************************************************************/
1437 static int
1438 em_irq_fast(void *arg)
1439 {
1440 	struct adapter	*adapter = arg;
1441 	struct ifnet	*ifp;
1442 	u32		reg_icr;
1443 
1444 	ifp = adapter->ifp;
1445 
1446 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1447 
1448 	/* Hot eject?  */
1449 	if (reg_icr == 0xffffffff)
1450 		return FILTER_STRAY;
1451 
1452 	/* Definitely not our interrupt.  */
1453 	if (reg_icr == 0x0)
1454 		return FILTER_STRAY;
1455 
1456 	/*
1457 	 * Starting with the 82571 chip, bit 31 should be used to
1458 	 * determine whether the interrupt belongs to us.
1459 	 */
1460 	if (adapter->hw.mac.type >= e1000_82571 &&
1461 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1462 		return FILTER_STRAY;
1463 
1464 	em_disable_intr(adapter);
1465 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1466 
1467 	/* Link status change */
1468 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1469 		adapter->hw.mac.get_link_status = 1;
1470 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1471 	}
1472 
1473 	if (reg_icr & E1000_ICR_RXO)
1474 		adapter->rx_overruns++;
1475 	return FILTER_HANDLED;
1476 }
1477 
1478 /* Combined RX/TX handler, used by Legacy and MSI */
1479 static void
1480 em_handle_que(void *context, int pending)
1481 {
1482 	struct adapter	*adapter = context;
1483 	struct ifnet	*ifp = adapter->ifp;
1484 	struct tx_ring	*txr = adapter->tx_rings;
1485 	struct rx_ring	*rxr = adapter->rx_rings;
1486 
1487 
1488 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1489 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1490 		EM_TX_LOCK(txr);
1491 		em_txeof(txr);
1492 #ifdef EM_MULTIQUEUE
1493 		if (!drbr_empty(ifp, txr->br))
1494 			em_mq_start_locked(ifp, txr, NULL);
1495 #else
1496 		em_start_locked(ifp, txr);
1497 #endif
1498 		EM_TX_UNLOCK(txr);
1499 		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1500 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1501 			return;
1502 		}
1503 	}
1504 
1505 	em_enable_intr(adapter);
1506 	return;
1507 }
1508 
1509 
1510 /*********************************************************************
1511  *
1512  *  MSIX Interrupt Service Routines
1513  *
1514  **********************************************************************/
1515 static void
1516 em_msix_tx(void *arg)
1517 {
1518 	struct tx_ring *txr = arg;
1519 	struct adapter *adapter = txr->adapter;
1520 	bool		more;
1521 
1522 	++txr->tx_irq;
1523 	EM_TX_LOCK(txr);
1524 	more = em_txeof(txr);
1525 	EM_TX_UNLOCK(txr);
1526 	if (more)
1527 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1528 	else
1529 		/* Reenable this interrupt */
1530 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1531 	return;
1532 }
1533 
1534 /*********************************************************************
1535  *
1536  *  MSIX RX Interrupt Service routine
1537  *
1538  **********************************************************************/
1539 
1540 static void
1541 em_msix_rx(void *arg)
1542 {
1543 	struct rx_ring	*rxr = arg;
1544 	struct adapter	*adapter = rxr->adapter;
1545 	bool		more;
1546 
1547 	++rxr->rx_irq;
1548 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1549 	if (more)
1550 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1551 	else
1552 		/* Reenable this interrupt */
1553 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1554 	return;
1555 }
1556 
1557 /*********************************************************************
1558  *
1559  *  MSIX Link Fast Interrupt Service routine
1560  *
1561  **********************************************************************/
1562 static void
1563 em_msix_link(void *arg)
1564 {
1565 	struct adapter	*adapter = arg;
1566 	u32		reg_icr;
1567 
1568 	++adapter->link_irq;
1569 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1570 
1571 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1572 		adapter->hw.mac.get_link_status = 1;
1573 		em_handle_link(adapter, 0);
1574 	} else
1575 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1576 		    EM_MSIX_LINK | E1000_IMS_LSC);
1577 	return;
1578 }
1579 
1580 static void
1581 em_handle_rx(void *context, int pending)
1582 {
1583 	struct rx_ring	*rxr = context;
1584 	struct adapter	*adapter = rxr->adapter;
1585         bool            more;
1586 
1587 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1588 	if (more)
1589 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1590 	else
1591 		/* Reenable this interrupt */
1592 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1593 }
1594 
1595 static void
1596 em_handle_tx(void *context, int pending)
1597 {
1598 	struct tx_ring	*txr = context;
1599 	struct adapter	*adapter = txr->adapter;
1600 	struct ifnet	*ifp = adapter->ifp;
1601 
1602 	EM_TX_LOCK(txr);
1603 	em_txeof(txr);
1604 #ifdef EM_MULTIQUEUE
1605 	if (!drbr_empty(ifp, txr->br))
1606 		em_mq_start_locked(ifp, txr, NULL);
1607 #else
1608 	em_start_locked(ifp, txr);
1609 #endif
1610 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1611 	EM_TX_UNLOCK(txr);
1612 }
1613 
1614 static void
1615 em_handle_link(void *context, int pending)
1616 {
1617 	struct adapter	*adapter = context;
1618 	struct ifnet *ifp = adapter->ifp;
1619 
1620 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1621 		return;
1622 
1623 	EM_CORE_LOCK(adapter);
1624 	callout_stop(&adapter->timer);
1625 	em_update_link_status(adapter);
1626 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1627 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1628 	    EM_MSIX_LINK | E1000_IMS_LSC);
1629 	EM_CORE_UNLOCK(adapter);
1630 }
1631 
1632 
1633 /*********************************************************************
1634  *
1635  *  Media Ioctl callback
1636  *
1637  *  This routine is called whenever the user queries the status of
1638  *  the interface using ifconfig.
1639  *
1640  **********************************************************************/
1641 static void
1642 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1643 {
1644 	struct adapter *adapter = ifp->if_softc;
1645 	u_char fiber_type = IFM_1000_SX;
1646 
1647 	INIT_DEBUGOUT("em_media_status: begin");
1648 
1649 	EM_CORE_LOCK(adapter);
1650 	em_update_link_status(adapter);
1651 
1652 	ifmr->ifm_status = IFM_AVALID;
1653 	ifmr->ifm_active = IFM_ETHER;
1654 
1655 	if (!adapter->link_active) {
1656 		EM_CORE_UNLOCK(adapter);
1657 		return;
1658 	}
1659 
1660 	ifmr->ifm_status |= IFM_ACTIVE;
1661 
1662 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1663 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1664 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1665 	} else {
1666 		switch (adapter->link_speed) {
1667 		case 10:
1668 			ifmr->ifm_active |= IFM_10_T;
1669 			break;
1670 		case 100:
1671 			ifmr->ifm_active |= IFM_100_TX;
1672 			break;
1673 		case 1000:
1674 			ifmr->ifm_active |= IFM_1000_T;
1675 			break;
1676 		}
1677 		if (adapter->link_duplex == FULL_DUPLEX)
1678 			ifmr->ifm_active |= IFM_FDX;
1679 		else
1680 			ifmr->ifm_active |= IFM_HDX;
1681 	}
1682 	EM_CORE_UNLOCK(adapter);
1683 }
1684 
1685 /*********************************************************************
1686  *
1687  *  Media Ioctl callback
1688  *
1689  *  This routine is called when the user changes speed/duplex using
1690  *  media/mediopt option with ifconfig.
1691  *
1692  **********************************************************************/
1693 static int
1694 em_media_change(struct ifnet *ifp)
1695 {
1696 	struct adapter *adapter = ifp->if_softc;
1697 	struct ifmedia  *ifm = &adapter->media;
1698 
1699 	INIT_DEBUGOUT("em_media_change: begin");
1700 
1701 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1702 		return (EINVAL);
1703 
1704 	EM_CORE_LOCK(adapter);
1705 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1706 	case IFM_AUTO:
1707 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1708 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1709 		break;
1710 	case IFM_1000_LX:
1711 	case IFM_1000_SX:
1712 	case IFM_1000_T:
1713 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1714 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1715 		break;
1716 	case IFM_100_TX:
1717 		adapter->hw.mac.autoneg = FALSE;
1718 		adapter->hw.phy.autoneg_advertised = 0;
1719 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1720 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1721 		else
1722 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1723 		break;
1724 	case IFM_10_T:
1725 		adapter->hw.mac.autoneg = FALSE;
1726 		adapter->hw.phy.autoneg_advertised = 0;
1727 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1728 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1729 		else
1730 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1731 		break;
1732 	default:
1733 		device_printf(adapter->dev, "Unsupported media type\n");
1734 	}
1735 
1736 	em_init_locked(adapter);
1737 	EM_CORE_UNLOCK(adapter);
1738 
1739 	return (0);
1740 }
1741 
1742 /*********************************************************************
1743  *
1744  *  This routine maps the mbufs to tx descriptors.
1745  *
1746  *  return 0 on success, positive on failure
1747  **********************************************************************/
1748 
1749 static int
1750 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1751 {
1752 	struct adapter		*adapter = txr->adapter;
1753 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1754 	bus_dmamap_t		map;
1755 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1756 	struct e1000_tx_desc	*ctxd = NULL;
1757 	struct mbuf		*m_head;
1758 	struct ether_header	*eh;
1759 	struct ip		*ip = NULL;
1760 	struct tcphdr		*tp = NULL;
1761 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1762 	int			ip_off, poff;
1763 	int			nsegs, i, j, first, last = 0;
1764 	int			error, do_tso, tso_desc = 0, remap = 1;
1765 
1766 retry:
1767 	m_head = *m_headp;
1768 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1769 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1770 	ip_off = poff = 0;
1771 
1772 	/*
1773 	 * Intel recommends entire IP/TCP header length reside in a single
1774 	 * buffer. If multiple descriptors are used to describe the IP and
1775 	 * TCP header, each descriptor should describe one or more
1776 	 * complete headers; descriptors referencing only parts of headers
1777 	 * are not supported. If all layer headers are not coalesced into
1778 	 * a single buffer, each buffer should not cross a 4KB boundary,
1779 	 * or be larger than the maximum read request size.
1780 	 * Controller also requires modifing IP/TCP header to make TSO work
1781 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1782 	 * IP/TCP header into a single buffer to meet the requirement of
1783 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1784 	 * which also has similiar restrictions.
1785 	 */
1786 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1787 		if (do_tso || (m_head->m_next != NULL &&
1788 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1789 			if (M_WRITABLE(*m_headp) == 0) {
1790 				m_head = m_dup(*m_headp, M_DONTWAIT);
1791 				m_freem(*m_headp);
1792 				if (m_head == NULL) {
1793 					*m_headp = NULL;
1794 					return (ENOBUFS);
1795 				}
1796 				*m_headp = m_head;
1797 			}
1798 		}
1799 		/*
1800 		 * XXX
1801 		 * Assume IPv4, we don't have TSO/checksum offload support
1802 		 * for IPv6 yet.
1803 		 */
1804 		ip_off = sizeof(struct ether_header);
1805 		m_head = m_pullup(m_head, ip_off);
1806 		if (m_head == NULL) {
1807 			*m_headp = NULL;
1808 			return (ENOBUFS);
1809 		}
1810 		eh = mtod(m_head, struct ether_header *);
1811 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1812 			ip_off = sizeof(struct ether_vlan_header);
1813 			m_head = m_pullup(m_head, ip_off);
1814 			if (m_head == NULL) {
1815 				*m_headp = NULL;
1816 				return (ENOBUFS);
1817 			}
1818 		}
1819 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1820 		if (m_head == NULL) {
1821 			*m_headp = NULL;
1822 			return (ENOBUFS);
1823 		}
1824 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1825 		poff = ip_off + (ip->ip_hl << 2);
1826 		if (do_tso) {
1827 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1828 			if (m_head == NULL) {
1829 				*m_headp = NULL;
1830 				return (ENOBUFS);
1831 			}
1832 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1833 			/*
1834 			 * TSO workaround:
1835 			 *   pull 4 more bytes of data into it.
1836 			 */
1837 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1838 			if (m_head == NULL) {
1839 				*m_headp = NULL;
1840 				return (ENOBUFS);
1841 			}
1842 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1843 			ip->ip_len = 0;
1844 			ip->ip_sum = 0;
1845 			/*
1846 			 * The pseudo TCP checksum does not include TCP payload
1847 			 * length so driver should recompute the checksum here
1848 			 * what hardware expect to see. This is adherence of
1849 			 * Microsoft's Large Send specification.
1850 			 */
1851 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1852 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1853 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1854 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1855 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1856 			if (m_head == NULL) {
1857 				*m_headp = NULL;
1858 				return (ENOBUFS);
1859 			}
1860 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1861 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1862 			if (m_head == NULL) {
1863 				*m_headp = NULL;
1864 				return (ENOBUFS);
1865 			}
1866 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1867 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1868 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1869 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1870 			if (m_head == NULL) {
1871 				*m_headp = NULL;
1872 				return (ENOBUFS);
1873 			}
1874 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1875 		}
1876 		*m_headp = m_head;
1877 	}
1878 
1879 	/*
1880 	 * Map the packet for DMA
1881 	 *
1882 	 * Capture the first descriptor index,
1883 	 * this descriptor will have the index
1884 	 * of the EOP which is the only one that
1885 	 * now gets a DONE bit writeback.
1886 	 */
1887 	first = txr->next_avail_desc;
1888 	tx_buffer = &txr->tx_buffers[first];
1889 	tx_buffer_mapped = tx_buffer;
1890 	map = tx_buffer->map;
1891 
1892 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1893 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1894 
1895 	/*
1896 	 * There are two types of errors we can (try) to handle:
1897 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1898 	 *   out of segments.  Defragment the mbuf chain and try again.
1899 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1900 	 *   at this point in time.  Defer sending and try again later.
1901 	 * All other errors, in particular EINVAL, are fatal and prevent the
1902 	 * mbuf chain from ever going through.  Drop it and report error.
1903 	 */
1904 	if (error == EFBIG && remap) {
1905 		struct mbuf *m;
1906 
1907 		m = m_defrag(*m_headp, M_DONTWAIT);
1908 		if (m == NULL) {
1909 			adapter->mbuf_alloc_failed++;
1910 			m_freem(*m_headp);
1911 			*m_headp = NULL;
1912 			return (ENOBUFS);
1913 		}
1914 		*m_headp = m;
1915 
1916 		/* Try it again, but only once */
1917 		remap = 0;
1918 		goto retry;
1919 	} else if (error == ENOMEM) {
1920 		adapter->no_tx_dma_setup++;
1921 		return (error);
1922 	} else if (error != 0) {
1923 		adapter->no_tx_dma_setup++;
1924 		m_freem(*m_headp);
1925 		*m_headp = NULL;
1926 		return (error);
1927 	}
1928 
1929 	/*
1930 	 * TSO Hardware workaround, if this packet is not
1931 	 * TSO, and is only a single descriptor long, and
1932 	 * it follows a TSO burst, then we need to add a
1933 	 * sentinel descriptor to prevent premature writeback.
1934 	 */
1935 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1936 		if (nsegs == 1)
1937 			tso_desc = TRUE;
1938 		txr->tx_tso = FALSE;
1939 	}
1940 
1941         if (nsegs > (txr->tx_avail - 2)) {
1942                 txr->no_desc_avail++;
1943 		bus_dmamap_unload(txr->txtag, map);
1944 		return (ENOBUFS);
1945         }
1946 	m_head = *m_headp;
1947 
1948 	/* Do hardware assists */
1949 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1950 		em_tso_setup(txr, m_head, ip_off, ip, tp,
1951 		    &txd_upper, &txd_lower);
1952 		/* we need to make a final sentinel transmit desc */
1953 		tso_desc = TRUE;
1954 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1955 		em_transmit_checksum_setup(txr, m_head,
1956 		    ip_off, ip, &txd_upper, &txd_lower);
1957 
1958 	i = txr->next_avail_desc;
1959 
1960 	/* Set up our transmit descriptors */
1961 	for (j = 0; j < nsegs; j++) {
1962 		bus_size_t seg_len;
1963 		bus_addr_t seg_addr;
1964 
1965 		tx_buffer = &txr->tx_buffers[i];
1966 		ctxd = &txr->tx_base[i];
1967 		seg_addr = segs[j].ds_addr;
1968 		seg_len  = segs[j].ds_len;
1969 		/*
1970 		** TSO Workaround:
1971 		** If this is the last descriptor, we want to
1972 		** split it so we have a small final sentinel
1973 		*/
1974 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1975 			seg_len -= 4;
1976 			ctxd->buffer_addr = htole64(seg_addr);
1977 			ctxd->lower.data = htole32(
1978 			adapter->txd_cmd | txd_lower | seg_len);
1979 			ctxd->upper.data =
1980 			    htole32(txd_upper);
1981 			if (++i == adapter->num_tx_desc)
1982 				i = 0;
1983 			/* Now make the sentinel */
1984 			++txd_used; /* using an extra txd */
1985 			ctxd = &txr->tx_base[i];
1986 			tx_buffer = &txr->tx_buffers[i];
1987 			ctxd->buffer_addr =
1988 			    htole64(seg_addr + seg_len);
1989 			ctxd->lower.data = htole32(
1990 			adapter->txd_cmd | txd_lower | 4);
1991 			ctxd->upper.data =
1992 			    htole32(txd_upper);
1993 			last = i;
1994 			if (++i == adapter->num_tx_desc)
1995 				i = 0;
1996 		} else {
1997 			ctxd->buffer_addr = htole64(seg_addr);
1998 			ctxd->lower.data = htole32(
1999 			adapter->txd_cmd | txd_lower | seg_len);
2000 			ctxd->upper.data =
2001 			    htole32(txd_upper);
2002 			last = i;
2003 			if (++i == adapter->num_tx_desc)
2004 				i = 0;
2005 		}
2006 		tx_buffer->m_head = NULL;
2007 		tx_buffer->next_eop = -1;
2008 	}
2009 
2010 	txr->next_avail_desc = i;
2011 	txr->tx_avail -= nsegs;
2012 	if (tso_desc) /* TSO used an extra for sentinel */
2013 		txr->tx_avail -= txd_used;
2014 
2015 	if (m_head->m_flags & M_VLANTAG) {
2016 		/* Set the vlan id. */
2017 		ctxd->upper.fields.special =
2018 		    htole16(m_head->m_pkthdr.ether_vtag);
2019                 /* Tell hardware to add tag */
2020                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2021         }
2022 
2023         tx_buffer->m_head = m_head;
2024 	tx_buffer_mapped->map = tx_buffer->map;
2025 	tx_buffer->map = map;
2026         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2027 
2028         /*
2029          * Last Descriptor of Packet
2030 	 * needs End Of Packet (EOP)
2031 	 * and Report Status (RS)
2032          */
2033         ctxd->lower.data |=
2034 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2035 	/*
2036 	 * Keep track in the first buffer which
2037 	 * descriptor will be written back
2038 	 */
2039 	tx_buffer = &txr->tx_buffers[first];
2040 	tx_buffer->next_eop = last;
2041 	/* Update the watchdog time early and often */
2042 	txr->watchdog_time = ticks;
2043 
2044 	/*
2045 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2046 	 * that this frame is available to transmit.
2047 	 */
2048 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2049 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2050 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2051 
2052 	return (0);
2053 }
2054 
2055 static void
2056 em_set_promisc(struct adapter *adapter)
2057 {
2058 	struct ifnet	*ifp = adapter->ifp;
2059 	u32		reg_rctl;
2060 
2061 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2062 
2063 	if (ifp->if_flags & IFF_PROMISC) {
2064 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2065 		/* Turn this on if you want to see bad packets */
2066 		if (em_debug_sbp)
2067 			reg_rctl |= E1000_RCTL_SBP;
2068 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2069 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2070 		reg_rctl |= E1000_RCTL_MPE;
2071 		reg_rctl &= ~E1000_RCTL_UPE;
2072 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2073 	}
2074 }
2075 
2076 static void
2077 em_disable_promisc(struct adapter *adapter)
2078 {
2079 	u32	reg_rctl;
2080 
2081 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2082 
2083 	reg_rctl &=  (~E1000_RCTL_UPE);
2084 	reg_rctl &=  (~E1000_RCTL_MPE);
2085 	reg_rctl &=  (~E1000_RCTL_SBP);
2086 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2087 }
2088 
2089 
2090 /*********************************************************************
2091  *  Multicast Update
2092  *
2093  *  This routine is called whenever multicast address list is updated.
2094  *
2095  **********************************************************************/
2096 
2097 static void
2098 em_set_multi(struct adapter *adapter)
2099 {
2100 	struct ifnet	*ifp = adapter->ifp;
2101 	struct ifmultiaddr *ifma;
2102 	u32 reg_rctl = 0;
2103 	u8  *mta; /* Multicast array memory */
2104 	int mcnt = 0;
2105 
2106 	IOCTL_DEBUGOUT("em_set_multi: begin");
2107 
2108 	mta = adapter->mta;
2109 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2110 
2111 	if (adapter->hw.mac.type == e1000_82542 &&
2112 	    adapter->hw.revision_id == E1000_REVISION_2) {
2113 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2114 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2115 			e1000_pci_clear_mwi(&adapter->hw);
2116 		reg_rctl |= E1000_RCTL_RST;
2117 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2118 		msec_delay(5);
2119 	}
2120 
2121 #if __FreeBSD_version < 800000
2122 	IF_ADDR_LOCK(ifp);
2123 #else
2124 	if_maddr_rlock(ifp);
2125 #endif
2126 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2127 		if (ifma->ifma_addr->sa_family != AF_LINK)
2128 			continue;
2129 
2130 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2131 			break;
2132 
2133 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2134 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2135 		mcnt++;
2136 	}
2137 #if __FreeBSD_version < 800000
2138 	IF_ADDR_UNLOCK(ifp);
2139 #else
2140 	if_maddr_runlock(ifp);
2141 #endif
2142 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2143 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2144 		reg_rctl |= E1000_RCTL_MPE;
2145 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2146 	} else
2147 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2148 
2149 	if (adapter->hw.mac.type == e1000_82542 &&
2150 	    adapter->hw.revision_id == E1000_REVISION_2) {
2151 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2152 		reg_rctl &= ~E1000_RCTL_RST;
2153 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2154 		msec_delay(5);
2155 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2156 			e1000_pci_set_mwi(&adapter->hw);
2157 	}
2158 }
2159 
2160 
2161 /*********************************************************************
2162  *  Timer routine
2163  *
2164  *  This routine checks for link status and updates statistics.
2165  *
2166  **********************************************************************/
2167 
2168 static void
2169 em_local_timer(void *arg)
2170 {
2171 	struct adapter	*adapter = arg;
2172 	struct ifnet	*ifp = adapter->ifp;
2173 	struct tx_ring	*txr = adapter->tx_rings;
2174 	struct rx_ring	*rxr = adapter->rx_rings;
2175 	u32		trigger;
2176 
2177 	EM_CORE_LOCK_ASSERT(adapter);
2178 
2179 	em_update_link_status(adapter);
2180 	em_update_stats_counters(adapter);
2181 
2182 	/* Reset LAA into RAR[0] on 82571 */
2183 	if ((adapter->hw.mac.type == e1000_82571) &&
2184 	    e1000_get_laa_state_82571(&adapter->hw))
2185 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2186 
2187 	/* Mask to use in the irq trigger */
2188 	if (adapter->msix_mem)
2189 		trigger = rxr->ims; /* RX for 82574 */
2190 	else
2191 		trigger = E1000_ICS_RXDMT0;
2192 
2193 	/*
2194 	** Don't do TX watchdog check if we've been paused
2195 	*/
2196 	if (adapter->pause_frames) {
2197 		adapter->pause_frames = 0;
2198 		goto out;
2199 	}
2200 	/*
2201 	** Check on the state of the TX queue(s), this
2202 	** can be done without the lock because its RO
2203 	** and the HUNG state will be static if set.
2204 	*/
2205 	for (int i = 0; i < adapter->num_queues; i++, txr++)
2206 		if (txr->queue_status == EM_QUEUE_HUNG)
2207 			goto hung;
2208 out:
2209 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2210 #ifndef DEVICE_POLLING
2211 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2212 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2213 #endif
2214 	return;
2215 hung:
2216 	/* Looks like we're hung */
2217 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2218 	device_printf(adapter->dev,
2219 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2220 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2221 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2222 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2223 	    "Next TX to Clean = %d\n",
2224 	    txr->me, txr->tx_avail, txr->next_to_clean);
2225 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2226 	adapter->watchdog_events++;
2227 	em_init_locked(adapter);
2228 }
2229 
2230 
2231 static void
2232 em_update_link_status(struct adapter *adapter)
2233 {
2234 	struct e1000_hw *hw = &adapter->hw;
2235 	struct ifnet *ifp = adapter->ifp;
2236 	device_t dev = adapter->dev;
2237 	struct tx_ring *txr = adapter->tx_rings;
2238 	u32 link_check = 0;
2239 
2240 	/* Get the cached link value or read phy for real */
2241 	switch (hw->phy.media_type) {
2242 	case e1000_media_type_copper:
2243 		if (hw->mac.get_link_status) {
2244 			/* Do the work to read phy */
2245 			e1000_check_for_link(hw);
2246 			link_check = !hw->mac.get_link_status;
2247 			if (link_check) /* ESB2 fix */
2248 				e1000_cfg_on_link_up(hw);
2249 		} else
2250 			link_check = TRUE;
2251 		break;
2252 	case e1000_media_type_fiber:
2253 		e1000_check_for_link(hw);
2254 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2255                                  E1000_STATUS_LU);
2256 		break;
2257 	case e1000_media_type_internal_serdes:
2258 		e1000_check_for_link(hw);
2259 		link_check = adapter->hw.mac.serdes_has_link;
2260 		break;
2261 	default:
2262 	case e1000_media_type_unknown:
2263 		break;
2264 	}
2265 
2266 	/* Now check for a transition */
2267 	if (link_check && (adapter->link_active == 0)) {
2268 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2269 		    &adapter->link_duplex);
2270 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2271 		if ((adapter->link_speed != SPEED_1000) &&
2272 		    ((hw->mac.type == e1000_82571) ||
2273 		    (hw->mac.type == e1000_82572))) {
2274 			int tarc0;
2275 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2276 			tarc0 &= ~SPEED_MODE_BIT;
2277 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2278 		}
2279 		if (bootverbose)
2280 			device_printf(dev, "Link is up %d Mbps %s\n",
2281 			    adapter->link_speed,
2282 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2283 			    "Full Duplex" : "Half Duplex"));
2284 		adapter->link_active = 1;
2285 		adapter->smartspeed = 0;
2286 		ifp->if_baudrate = adapter->link_speed * 1000000;
2287 		if_link_state_change(ifp, LINK_STATE_UP);
2288 	} else if (!link_check && (adapter->link_active == 1)) {
2289 		ifp->if_baudrate = adapter->link_speed = 0;
2290 		adapter->link_duplex = 0;
2291 		if (bootverbose)
2292 			device_printf(dev, "Link is Down\n");
2293 		adapter->link_active = 0;
2294 		/* Link down, disable watchdog */
2295 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2296 			txr->queue_status = EM_QUEUE_IDLE;
2297 		if_link_state_change(ifp, LINK_STATE_DOWN);
2298 	}
2299 }
2300 
2301 /*********************************************************************
2302  *
2303  *  This routine disables all traffic on the adapter by issuing a
2304  *  global reset on the MAC and deallocates TX/RX buffers.
2305  *
2306  *  This routine should always be called with BOTH the CORE
2307  *  and TX locks.
2308  **********************************************************************/
2309 
2310 static void
2311 em_stop(void *arg)
2312 {
2313 	struct adapter	*adapter = arg;
2314 	struct ifnet	*ifp = adapter->ifp;
2315 	struct tx_ring	*txr = adapter->tx_rings;
2316 
2317 	EM_CORE_LOCK_ASSERT(adapter);
2318 
2319 	INIT_DEBUGOUT("em_stop: begin");
2320 
2321 	em_disable_intr(adapter);
2322 	callout_stop(&adapter->timer);
2323 
2324 	/* Tell the stack that the interface is no longer active */
2325 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2326 
2327         /* Unarm watchdog timer. */
2328 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2329 		EM_TX_LOCK(txr);
2330 		txr->queue_status = EM_QUEUE_IDLE;
2331 		EM_TX_UNLOCK(txr);
2332 	}
2333 
2334 	e1000_reset_hw(&adapter->hw);
2335 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2336 
2337 	e1000_led_off(&adapter->hw);
2338 	e1000_cleanup_led(&adapter->hw);
2339 }
2340 
2341 
2342 /*********************************************************************
2343  *
2344  *  Determine hardware revision.
2345  *
2346  **********************************************************************/
2347 static void
2348 em_identify_hardware(struct adapter *adapter)
2349 {
2350 	device_t dev = adapter->dev;
2351 
2352 	/* Make sure our PCI config space has the necessary stuff set */
2353 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2354 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2355 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2356 		device_printf(dev, "Memory Access and/or Bus Master bits "
2357 		    "were not set!\n");
2358 		adapter->hw.bus.pci_cmd_word |=
2359 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2360 		pci_write_config(dev, PCIR_COMMAND,
2361 		    adapter->hw.bus.pci_cmd_word, 2);
2362 	}
2363 
2364 	/* Save off the information about this board */
2365 	adapter->hw.vendor_id = pci_get_vendor(dev);
2366 	adapter->hw.device_id = pci_get_device(dev);
2367 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2368 	adapter->hw.subsystem_vendor_id =
2369 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2370 	adapter->hw.subsystem_device_id =
2371 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2372 
2373 	/* Do Shared Code Init and Setup */
2374 	if (e1000_set_mac_type(&adapter->hw)) {
2375 		device_printf(dev, "Setup init failure\n");
2376 		return;
2377 	}
2378 }
2379 
2380 static int
2381 em_allocate_pci_resources(struct adapter *adapter)
2382 {
2383 	device_t	dev = adapter->dev;
2384 	int		rid;
2385 
2386 	rid = PCIR_BAR(0);
2387 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2388 	    &rid, RF_ACTIVE);
2389 	if (adapter->memory == NULL) {
2390 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2391 		return (ENXIO);
2392 	}
2393 	adapter->osdep.mem_bus_space_tag =
2394 	    rman_get_bustag(adapter->memory);
2395 	adapter->osdep.mem_bus_space_handle =
2396 	    rman_get_bushandle(adapter->memory);
2397 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2398 
2399 	/* Default to a single queue */
2400 	adapter->num_queues = 1;
2401 
2402 	/*
2403 	 * Setup MSI/X or MSI if PCI Express
2404 	 */
2405 	adapter->msix = em_setup_msix(adapter);
2406 
2407 	adapter->hw.back = &adapter->osdep;
2408 
2409 	return (0);
2410 }
2411 
2412 /*********************************************************************
2413  *
2414  *  Setup the Legacy or MSI Interrupt handler
2415  *
2416  **********************************************************************/
2417 int
2418 em_allocate_legacy(struct adapter *adapter)
2419 {
2420 	device_t dev = adapter->dev;
2421 	int error, rid = 0;
2422 
2423 	/* Manually turn off all interrupts */
2424 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2425 
2426 	if (adapter->msix == 1) /* using MSI */
2427 		rid = 1;
2428 	/* We allocate a single interrupt resource */
2429 	adapter->res = bus_alloc_resource_any(dev,
2430 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2431 	if (adapter->res == NULL) {
2432 		device_printf(dev, "Unable to allocate bus resource: "
2433 		    "interrupt\n");
2434 		return (ENXIO);
2435 	}
2436 
2437 	/*
2438 	 * Allocate a fast interrupt and the associated
2439 	 * deferred processing contexts.
2440 	 */
2441 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2442 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2443 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2444 	    taskqueue_thread_enqueue, &adapter->tq);
2445 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2446 	    device_get_nameunit(adapter->dev));
2447 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2448 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2449 		device_printf(dev, "Failed to register fast interrupt "
2450 			    "handler: %d\n", error);
2451 		taskqueue_free(adapter->tq);
2452 		adapter->tq = NULL;
2453 		return (error);
2454 	}
2455 
2456 	return (0);
2457 }
2458 
2459 /*********************************************************************
2460  *
2461  *  Setup the MSIX Interrupt handlers
2462  *   This is not really Multiqueue, rather
2463  *   its just multiple interrupt vectors.
2464  *
2465  **********************************************************************/
2466 int
2467 em_allocate_msix(struct adapter *adapter)
2468 {
2469 	device_t	dev = adapter->dev;
2470 	struct		tx_ring *txr = adapter->tx_rings;
2471 	struct		rx_ring *rxr = adapter->rx_rings;
2472 	int		error, rid, vector = 0;
2473 
2474 
2475 	/* Make sure all interrupts are disabled */
2476 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2477 
2478 	/* First set up ring resources */
2479 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2480 
2481 		/* RX ring */
2482 		rid = vector + 1;
2483 
2484 		rxr->res = bus_alloc_resource_any(dev,
2485 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2486 		if (rxr->res == NULL) {
2487 			device_printf(dev,
2488 			    "Unable to allocate bus resource: "
2489 			    "RX MSIX Interrupt %d\n", i);
2490 			return (ENXIO);
2491 		}
2492 		if ((error = bus_setup_intr(dev, rxr->res,
2493 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2494 		    rxr, &rxr->tag)) != 0) {
2495 			device_printf(dev, "Failed to register RX handler");
2496 			return (error);
2497 		}
2498 #if __FreeBSD_version >= 800504
2499 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2500 #endif
2501 		rxr->msix = vector++; /* NOTE increment vector for TX */
2502 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2503 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2504 		    taskqueue_thread_enqueue, &rxr->tq);
2505 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2506 		    device_get_nameunit(adapter->dev));
2507 		/*
2508 		** Set the bit to enable interrupt
2509 		** in E1000_IMS -- bits 20 and 21
2510 		** are for RX0 and RX1, note this has
2511 		** NOTHING to do with the MSIX vector
2512 		*/
2513 		rxr->ims = 1 << (20 + i);
2514 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2515 
2516 		/* TX ring */
2517 		rid = vector + 1;
2518 		txr->res = bus_alloc_resource_any(dev,
2519 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2520 		if (txr->res == NULL) {
2521 			device_printf(dev,
2522 			    "Unable to allocate bus resource: "
2523 			    "TX MSIX Interrupt %d\n", i);
2524 			return (ENXIO);
2525 		}
2526 		if ((error = bus_setup_intr(dev, txr->res,
2527 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2528 		    txr, &txr->tag)) != 0) {
2529 			device_printf(dev, "Failed to register TX handler");
2530 			return (error);
2531 		}
2532 #if __FreeBSD_version >= 800504
2533 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2534 #endif
2535 		txr->msix = vector++; /* Increment vector for next pass */
2536 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2537 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2538 		    taskqueue_thread_enqueue, &txr->tq);
2539 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2540 		    device_get_nameunit(adapter->dev));
2541 		/*
2542 		** Set the bit to enable interrupt
2543 		** in E1000_IMS -- bits 22 and 23
2544 		** are for TX0 and TX1, note this has
2545 		** NOTHING to do with the MSIX vector
2546 		*/
2547 		txr->ims = 1 << (22 + i);
2548 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2549 	}
2550 
2551 	/* Link interrupt */
2552 	++rid;
2553 	adapter->res = bus_alloc_resource_any(dev,
2554 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2555 	if (!adapter->res) {
2556 		device_printf(dev,"Unable to allocate "
2557 		    "bus resource: Link interrupt [%d]\n", rid);
2558 		return (ENXIO);
2559         }
2560 	/* Set the link handler function */
2561 	error = bus_setup_intr(dev, adapter->res,
2562 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2563 	    em_msix_link, adapter, &adapter->tag);
2564 	if (error) {
2565 		adapter->res = NULL;
2566 		device_printf(dev, "Failed to register LINK handler");
2567 		return (error);
2568 	}
2569 #if __FreeBSD_version >= 800504
2570 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2571 #endif
2572 	adapter->linkvec = vector;
2573 	adapter->ivars |=  (8 | vector) << 16;
2574 	adapter->ivars |= 0x80000000;
2575 
2576 	return (0);
2577 }
2578 
2579 
2580 static void
2581 em_free_pci_resources(struct adapter *adapter)
2582 {
2583 	device_t	dev = adapter->dev;
2584 	struct tx_ring	*txr;
2585 	struct rx_ring	*rxr;
2586 	int		rid;
2587 
2588 
2589 	/*
2590 	** Release all the queue interrupt resources:
2591 	*/
2592 	for (int i = 0; i < adapter->num_queues; i++) {
2593 		txr = &adapter->tx_rings[i];
2594 		rxr = &adapter->rx_rings[i];
2595 		/* an early abort? */
2596 		if ((txr == NULL) || (rxr == NULL))
2597 			break;
2598 		rid = txr->msix +1;
2599 		if (txr->tag != NULL) {
2600 			bus_teardown_intr(dev, txr->res, txr->tag);
2601 			txr->tag = NULL;
2602 		}
2603 		if (txr->res != NULL)
2604 			bus_release_resource(dev, SYS_RES_IRQ,
2605 			    rid, txr->res);
2606 		rid = rxr->msix +1;
2607 		if (rxr->tag != NULL) {
2608 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2609 			rxr->tag = NULL;
2610 		}
2611 		if (rxr->res != NULL)
2612 			bus_release_resource(dev, SYS_RES_IRQ,
2613 			    rid, rxr->res);
2614 	}
2615 
2616         if (adapter->linkvec) /* we are doing MSIX */
2617                 rid = adapter->linkvec + 1;
2618         else
2619                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2620 
2621 	if (adapter->tag != NULL) {
2622 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2623 		adapter->tag = NULL;
2624 	}
2625 
2626 	if (adapter->res != NULL)
2627 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2628 
2629 
2630 	if (adapter->msix)
2631 		pci_release_msi(dev);
2632 
2633 	if (adapter->msix_mem != NULL)
2634 		bus_release_resource(dev, SYS_RES_MEMORY,
2635 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2636 
2637 	if (adapter->memory != NULL)
2638 		bus_release_resource(dev, SYS_RES_MEMORY,
2639 		    PCIR_BAR(0), adapter->memory);
2640 
2641 	if (adapter->flash != NULL)
2642 		bus_release_resource(dev, SYS_RES_MEMORY,
2643 		    EM_FLASH, adapter->flash);
2644 }
2645 
2646 /*
2647  * Setup MSI or MSI/X
2648  */
2649 static int
2650 em_setup_msix(struct adapter *adapter)
2651 {
2652 	device_t dev = adapter->dev;
2653 	int val = 0;
2654 
2655 
2656 	/*
2657 	** Setup MSI/X for Hartwell: tests have shown
2658 	** use of two queues to be unstable, and to
2659 	** provide no great gain anyway, so we simply
2660 	** seperate the interrupts and use a single queue.
2661 	*/
2662 	if ((adapter->hw.mac.type == e1000_82574) &&
2663 	    (em_enable_msix == TRUE)) {
2664 		/* Map the MSIX BAR */
2665 		int rid = PCIR_BAR(EM_MSIX_BAR);
2666 		adapter->msix_mem = bus_alloc_resource_any(dev,
2667 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2668        		if (!adapter->msix_mem) {
2669 			/* May not be enabled */
2670                		device_printf(adapter->dev,
2671 			    "Unable to map MSIX table \n");
2672 			goto msi;
2673        		}
2674 		val = pci_msix_count(dev);
2675 		if (val < 3) {
2676 			bus_release_resource(dev, SYS_RES_MEMORY,
2677 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2678 			adapter->msix_mem = NULL;
2679                		device_printf(adapter->dev,
2680 			    "MSIX: insufficient vectors, using MSI\n");
2681 			goto msi;
2682 		}
2683 		val = 3;
2684 		adapter->num_queues = 1;
2685 		if (pci_alloc_msix(dev, &val) == 0) {
2686 			device_printf(adapter->dev,
2687 			    "Using MSIX interrupts "
2688 			    "with %d vectors\n", val);
2689 		}
2690 
2691 		return (val);
2692 	}
2693 msi:
2694        	val = pci_msi_count(dev);
2695        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2696                	adapter->msix = 1;
2697                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2698 		return (val);
2699 	}
2700 	/* Should only happen due to manual configuration */
2701 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2702 	return (0);
2703 }
2704 
2705 
2706 /*********************************************************************
2707  *
2708  *  Initialize the hardware to a configuration
2709  *  as specified by the adapter structure.
2710  *
2711  **********************************************************************/
2712 static void
2713 em_reset(struct adapter *adapter)
2714 {
2715 	device_t	dev = adapter->dev;
2716 	struct ifnet	*ifp = adapter->ifp;
2717 	struct e1000_hw	*hw = &adapter->hw;
2718 	u16		rx_buffer_size;
2719 
2720 	INIT_DEBUGOUT("em_reset: begin");
2721 
2722 	/* Set up smart power down as default off on newer adapters. */
2723 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2724 	    hw->mac.type == e1000_82572)) {
2725 		u16 phy_tmp = 0;
2726 
2727 		/* Speed up time to link by disabling smart power down. */
2728 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2729 		phy_tmp &= ~IGP02E1000_PM_SPD;
2730 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2731 	}
2732 
2733 	/*
2734 	 * These parameters control the automatic generation (Tx) and
2735 	 * response (Rx) to Ethernet PAUSE frames.
2736 	 * - High water mark should allow for at least two frames to be
2737 	 *   received after sending an XOFF.
2738 	 * - Low water mark works best when it is very near the high water mark.
2739 	 *   This allows the receiver to restart by sending XON when it has
2740 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2741 	 *   restart after one full frame is pulled from the buffer. There
2742 	 *   could be several smaller frames in the buffer and if so they will
2743 	 *   not trigger the XON until their total number reduces the buffer
2744 	 *   by 1500.
2745 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2746 	 */
2747 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2748 
2749 	hw->fc.high_water = rx_buffer_size -
2750 	    roundup2(adapter->max_frame_size, 1024);
2751 	hw->fc.low_water = hw->fc.high_water - 1500;
2752 
2753 	if (hw->mac.type == e1000_80003es2lan)
2754 		hw->fc.pause_time = 0xFFFF;
2755 	else
2756 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2757 
2758 	hw->fc.send_xon = TRUE;
2759 
2760         /* Set Flow control, use the tunable location if sane */
2761 	hw->fc.requested_mode = adapter->fc_setting;
2762 
2763 	/* Workaround: no TX flow ctrl for PCH */
2764 	if (hw->mac.type == e1000_pchlan)
2765                 hw->fc.requested_mode = e1000_fc_rx_pause;
2766 
2767 	/* Override - settings for PCH2LAN, ya its magic :) */
2768 	if (hw->mac.type == e1000_pch2lan) {
2769 		hw->fc.high_water = 0x5C20;
2770 		hw->fc.low_water = 0x5048;
2771 		hw->fc.pause_time = 0x0650;
2772 		hw->fc.refresh_time = 0x0400;
2773 		/* Jumbos need adjusted PBA */
2774 		if (ifp->if_mtu > ETHERMTU)
2775 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2776 		else
2777 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2778 	}
2779 
2780 	/* Issue a global reset */
2781 	e1000_reset_hw(hw);
2782 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2783 	em_disable_aspm(adapter);
2784 
2785 	if (e1000_init_hw(hw) < 0) {
2786 		device_printf(dev, "Hardware Initialization Failed\n");
2787 		return;
2788 	}
2789 
2790 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2791 	e1000_get_phy_info(hw);
2792 	e1000_check_for_link(hw);
2793 	return;
2794 }
2795 
2796 /*********************************************************************
2797  *
2798  *  Setup networking device structure and register an interface.
2799  *
2800  **********************************************************************/
2801 static int
2802 em_setup_interface(device_t dev, struct adapter *adapter)
2803 {
2804 	struct ifnet   *ifp;
2805 
2806 	INIT_DEBUGOUT("em_setup_interface: begin");
2807 
2808 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2809 	if (ifp == NULL) {
2810 		device_printf(dev, "can not allocate ifnet structure\n");
2811 		return (-1);
2812 	}
2813 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2814 	ifp->if_mtu = ETHERMTU;
2815 	ifp->if_init =  em_init;
2816 	ifp->if_softc = adapter;
2817 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2818 	ifp->if_ioctl = em_ioctl;
2819 	ifp->if_start = em_start;
2820 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2821 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2822 	IFQ_SET_READY(&ifp->if_snd);
2823 
2824 	ether_ifattach(ifp, adapter->hw.mac.addr);
2825 
2826 	ifp->if_capabilities = ifp->if_capenable = 0;
2827 
2828 #ifdef EM_MULTIQUEUE
2829 	/* Multiqueue tx functions */
2830 	ifp->if_transmit = em_mq_start;
2831 	ifp->if_qflush = em_qflush;
2832 #endif
2833 
2834 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2835 	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2836 
2837 	/* Enable TSO by default, can disable with ifconfig */
2838 	ifp->if_capabilities |= IFCAP_TSO4;
2839 	ifp->if_capenable |= IFCAP_TSO4;
2840 
2841 	/*
2842 	 * Tell the upper layer(s) we
2843 	 * support full VLAN capability
2844 	 */
2845 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2846 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2847 	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2848 
2849 	/*
2850 	** Dont turn this on by default, if vlans are
2851 	** created on another pseudo device (eg. lagg)
2852 	** then vlan events are not passed thru, breaking
2853 	** operation, but with HW FILTER off it works. If
2854 	** using vlans directly on the em driver you can
2855 	** enable this and get full hardware tag filtering.
2856 	*/
2857 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2858 
2859 #ifdef DEVICE_POLLING
2860 	ifp->if_capabilities |= IFCAP_POLLING;
2861 #endif
2862 
2863 	/* Enable only WOL MAGIC by default */
2864 	if (adapter->wol) {
2865 		ifp->if_capabilities |= IFCAP_WOL;
2866 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2867 	}
2868 
2869 	/*
2870 	 * Specify the media types supported by this adapter and register
2871 	 * callbacks to update media and link information
2872 	 */
2873 	ifmedia_init(&adapter->media, IFM_IMASK,
2874 	    em_media_change, em_media_status);
2875 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2876 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2877 		u_char fiber_type = IFM_1000_SX;	/* default type */
2878 
2879 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2880 			    0, NULL);
2881 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2882 	} else {
2883 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2884 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2885 			    0, NULL);
2886 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2887 			    0, NULL);
2888 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2889 			    0, NULL);
2890 		if (adapter->hw.phy.type != e1000_phy_ife) {
2891 			ifmedia_add(&adapter->media,
2892 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2893 			ifmedia_add(&adapter->media,
2894 				IFM_ETHER | IFM_1000_T, 0, NULL);
2895 		}
2896 	}
2897 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2898 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2899 	return (0);
2900 }
2901 
2902 
2903 /*
2904  * Manage DMA'able memory.
2905  */
2906 static void
2907 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2908 {
2909 	if (error)
2910 		return;
2911 	*(bus_addr_t *) arg = segs[0].ds_addr;
2912 }
2913 
2914 static int
2915 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2916         struct em_dma_alloc *dma, int mapflags)
2917 {
2918 	int error;
2919 
2920 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2921 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2922 				BUS_SPACE_MAXADDR,	/* lowaddr */
2923 				BUS_SPACE_MAXADDR,	/* highaddr */
2924 				NULL, NULL,		/* filter, filterarg */
2925 				size,			/* maxsize */
2926 				1,			/* nsegments */
2927 				size,			/* maxsegsize */
2928 				0,			/* flags */
2929 				NULL,			/* lockfunc */
2930 				NULL,			/* lockarg */
2931 				&dma->dma_tag);
2932 	if (error) {
2933 		device_printf(adapter->dev,
2934 		    "%s: bus_dma_tag_create failed: %d\n",
2935 		    __func__, error);
2936 		goto fail_0;
2937 	}
2938 
2939 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2940 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2941 	if (error) {
2942 		device_printf(adapter->dev,
2943 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2944 		    __func__, (uintmax_t)size, error);
2945 		goto fail_2;
2946 	}
2947 
2948 	dma->dma_paddr = 0;
2949 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2950 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2951 	if (error || dma->dma_paddr == 0) {
2952 		device_printf(adapter->dev,
2953 		    "%s: bus_dmamap_load failed: %d\n",
2954 		    __func__, error);
2955 		goto fail_3;
2956 	}
2957 
2958 	return (0);
2959 
2960 fail_3:
2961 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2962 fail_2:
2963 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2964 	bus_dma_tag_destroy(dma->dma_tag);
2965 fail_0:
2966 	dma->dma_map = NULL;
2967 	dma->dma_tag = NULL;
2968 
2969 	return (error);
2970 }
2971 
2972 static void
2973 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2974 {
2975 	if (dma->dma_tag == NULL)
2976 		return;
2977 	if (dma->dma_map != NULL) {
2978 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2979 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2980 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2981 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2982 		dma->dma_map = NULL;
2983 	}
2984 	bus_dma_tag_destroy(dma->dma_tag);
2985 	dma->dma_tag = NULL;
2986 }
2987 
2988 
2989 /*********************************************************************
2990  *
2991  *  Allocate memory for the transmit and receive rings, and then
2992  *  the descriptors associated with each, called only once at attach.
2993  *
2994  **********************************************************************/
2995 static int
2996 em_allocate_queues(struct adapter *adapter)
2997 {
2998 	device_t		dev = adapter->dev;
2999 	struct tx_ring		*txr = NULL;
3000 	struct rx_ring		*rxr = NULL;
3001 	int rsize, tsize, error = E1000_SUCCESS;
3002 	int txconf = 0, rxconf = 0;
3003 
3004 
3005 	/* Allocate the TX ring struct memory */
3006 	if (!(adapter->tx_rings =
3007 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3008 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3009 		device_printf(dev, "Unable to allocate TX ring memory\n");
3010 		error = ENOMEM;
3011 		goto fail;
3012 	}
3013 
3014 	/* Now allocate the RX */
3015 	if (!(adapter->rx_rings =
3016 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3017 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3018 		device_printf(dev, "Unable to allocate RX ring memory\n");
3019 		error = ENOMEM;
3020 		goto rx_fail;
3021 	}
3022 
3023 	tsize = roundup2(adapter->num_tx_desc *
3024 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3025 	/*
3026 	 * Now set up the TX queues, txconf is needed to handle the
3027 	 * possibility that things fail midcourse and we need to
3028 	 * undo memory gracefully
3029 	 */
3030 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3031 		/* Set up some basics */
3032 		txr = &adapter->tx_rings[i];
3033 		txr->adapter = adapter;
3034 		txr->me = i;
3035 
3036 		/* Initialize the TX lock */
3037 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3038 		    device_get_nameunit(dev), txr->me);
3039 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3040 
3041 		if (em_dma_malloc(adapter, tsize,
3042 			&txr->txdma, BUS_DMA_NOWAIT)) {
3043 			device_printf(dev,
3044 			    "Unable to allocate TX Descriptor memory\n");
3045 			error = ENOMEM;
3046 			goto err_tx_desc;
3047 		}
3048 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3049 		bzero((void *)txr->tx_base, tsize);
3050 
3051         	if (em_allocate_transmit_buffers(txr)) {
3052 			device_printf(dev,
3053 			    "Critical Failure setting up transmit buffers\n");
3054 			error = ENOMEM;
3055 			goto err_tx_desc;
3056         	}
3057 #if __FreeBSD_version >= 800000
3058 		/* Allocate a buf ring */
3059 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3060 		    M_WAITOK, &txr->tx_mtx);
3061 #endif
3062 	}
3063 
3064 	/*
3065 	 * Next the RX queues...
3066 	 */
3067 	rsize = roundup2(adapter->num_rx_desc *
3068 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3069 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3070 		rxr = &adapter->rx_rings[i];
3071 		rxr->adapter = adapter;
3072 		rxr->me = i;
3073 
3074 		/* Initialize the RX lock */
3075 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3076 		    device_get_nameunit(dev), txr->me);
3077 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3078 
3079 		if (em_dma_malloc(adapter, rsize,
3080 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3081 			device_printf(dev,
3082 			    "Unable to allocate RxDescriptor memory\n");
3083 			error = ENOMEM;
3084 			goto err_rx_desc;
3085 		}
3086 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3087 		bzero((void *)rxr->rx_base, rsize);
3088 
3089         	/* Allocate receive buffers for the ring*/
3090 		if (em_allocate_receive_buffers(rxr)) {
3091 			device_printf(dev,
3092 			    "Critical Failure setting up receive buffers\n");
3093 			error = ENOMEM;
3094 			goto err_rx_desc;
3095 		}
3096 	}
3097 
3098 	return (0);
3099 
3100 err_rx_desc:
3101 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3102 		em_dma_free(adapter, &rxr->rxdma);
3103 err_tx_desc:
3104 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3105 		em_dma_free(adapter, &txr->txdma);
3106 	free(adapter->rx_rings, M_DEVBUF);
3107 rx_fail:
3108 #if __FreeBSD_version >= 800000
3109 	buf_ring_free(txr->br, M_DEVBUF);
3110 #endif
3111 	free(adapter->tx_rings, M_DEVBUF);
3112 fail:
3113 	return (error);
3114 }
3115 
3116 
3117 /*********************************************************************
3118  *
3119  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3120  *  the information needed to transmit a packet on the wire. This is
3121  *  called only once at attach, setup is done every reset.
3122  *
3123  **********************************************************************/
3124 static int
3125 em_allocate_transmit_buffers(struct tx_ring *txr)
3126 {
3127 	struct adapter *adapter = txr->adapter;
3128 	device_t dev = adapter->dev;
3129 	struct em_buffer *txbuf;
3130 	int error, i;
3131 
3132 	/*
3133 	 * Setup DMA descriptor areas.
3134 	 */
3135 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3136 			       1, 0,			/* alignment, bounds */
3137 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3138 			       BUS_SPACE_MAXADDR,	/* highaddr */
3139 			       NULL, NULL,		/* filter, filterarg */
3140 			       EM_TSO_SIZE,		/* maxsize */
3141 			       EM_MAX_SCATTER,		/* nsegments */
3142 			       PAGE_SIZE,		/* maxsegsize */
3143 			       0,			/* flags */
3144 			       NULL,			/* lockfunc */
3145 			       NULL,			/* lockfuncarg */
3146 			       &txr->txtag))) {
3147 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3148 		goto fail;
3149 	}
3150 
3151 	if (!(txr->tx_buffers =
3152 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3153 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3154 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3155 		error = ENOMEM;
3156 		goto fail;
3157 	}
3158 
3159         /* Create the descriptor buffer dma maps */
3160 	txbuf = txr->tx_buffers;
3161 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3162 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3163 		if (error != 0) {
3164 			device_printf(dev, "Unable to create TX DMA map\n");
3165 			goto fail;
3166 		}
3167 	}
3168 
3169 	return 0;
3170 fail:
3171 	/* We free all, it handles case where we are in the middle */
3172 	em_free_transmit_structures(adapter);
3173 	return (error);
3174 }
3175 
3176 /*********************************************************************
3177  *
3178  *  Initialize a transmit ring.
3179  *
3180  **********************************************************************/
3181 static void
3182 em_setup_transmit_ring(struct tx_ring *txr)
3183 {
3184 	struct adapter *adapter = txr->adapter;
3185 	struct em_buffer *txbuf;
3186 	int i;
3187 
3188 	/* Clear the old descriptor contents */
3189 	EM_TX_LOCK(txr);
3190 	bzero((void *)txr->tx_base,
3191 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3192 	/* Reset indices */
3193 	txr->next_avail_desc = 0;
3194 	txr->next_to_clean = 0;
3195 
3196 	/* Free any existing tx buffers. */
3197         txbuf = txr->tx_buffers;
3198 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3199 		if (txbuf->m_head != NULL) {
3200 			bus_dmamap_sync(txr->txtag, txbuf->map,
3201 			    BUS_DMASYNC_POSTWRITE);
3202 			bus_dmamap_unload(txr->txtag, txbuf->map);
3203 			m_freem(txbuf->m_head);
3204 			txbuf->m_head = NULL;
3205 		}
3206 		/* clear the watch index */
3207 		txbuf->next_eop = -1;
3208         }
3209 
3210 	/* Set number of descriptors available */
3211 	txr->tx_avail = adapter->num_tx_desc;
3212 	txr->queue_status = EM_QUEUE_IDLE;
3213 
3214 	/* Clear checksum offload context. */
3215 	txr->last_hw_offload = 0;
3216 	txr->last_hw_ipcss = 0;
3217 	txr->last_hw_ipcso = 0;
3218 	txr->last_hw_tucss = 0;
3219 	txr->last_hw_tucso = 0;
3220 
3221 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3222 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3223 	EM_TX_UNLOCK(txr);
3224 }
3225 
3226 /*********************************************************************
3227  *
3228  *  Initialize all transmit rings.
3229  *
3230  **********************************************************************/
3231 static void
3232 em_setup_transmit_structures(struct adapter *adapter)
3233 {
3234 	struct tx_ring *txr = adapter->tx_rings;
3235 
3236 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3237 		em_setup_transmit_ring(txr);
3238 
3239 	return;
3240 }
3241 
3242 /*********************************************************************
3243  *
3244  *  Enable transmit unit.
3245  *
3246  **********************************************************************/
3247 static void
3248 em_initialize_transmit_unit(struct adapter *adapter)
3249 {
3250 	struct tx_ring	*txr = adapter->tx_rings;
3251 	struct e1000_hw	*hw = &adapter->hw;
3252 	u32	tctl, tarc, tipg = 0;
3253 
3254 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3255 
3256 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3257 		u64 bus_addr = txr->txdma.dma_paddr;
3258 		/* Base and Len of TX Ring */
3259 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3260 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3261 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3262 	    	    (u32)(bus_addr >> 32));
3263 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3264 	    	    (u32)bus_addr);
3265 		/* Init the HEAD/TAIL indices */
3266 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3267 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3268 
3269 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3270 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3271 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3272 
3273 		txr->queue_status = EM_QUEUE_IDLE;
3274 	}
3275 
3276 	/* Set the default values for the Tx Inter Packet Gap timer */
3277 	switch (adapter->hw.mac.type) {
3278 	case e1000_82542:
3279 		tipg = DEFAULT_82542_TIPG_IPGT;
3280 		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3281 		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3282 		break;
3283 	case e1000_80003es2lan:
3284 		tipg = DEFAULT_82543_TIPG_IPGR1;
3285 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3286 		    E1000_TIPG_IPGR2_SHIFT;
3287 		break;
3288 	default:
3289 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3290 		    (adapter->hw.phy.media_type ==
3291 		    e1000_media_type_internal_serdes))
3292 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3293 		else
3294 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3295 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3296 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3297 	}
3298 
3299 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3300 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3301 
3302 	if(adapter->hw.mac.type >= e1000_82540)
3303 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3304 		    adapter->tx_abs_int_delay.value);
3305 
3306 	if ((adapter->hw.mac.type == e1000_82571) ||
3307 	    (adapter->hw.mac.type == e1000_82572)) {
3308 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3309 		tarc |= SPEED_MODE_BIT;
3310 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3311 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3312 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3313 		tarc |= 1;
3314 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3315 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3316 		tarc |= 1;
3317 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3318 	}
3319 
3320 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3321 	if (adapter->tx_int_delay.value > 0)
3322 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3323 
3324 	/* Program the Transmit Control Register */
3325 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3326 	tctl &= ~E1000_TCTL_CT;
3327 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3328 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3329 
3330 	if (adapter->hw.mac.type >= e1000_82571)
3331 		tctl |= E1000_TCTL_MULR;
3332 
3333 	/* This write will effectively turn on the transmit unit. */
3334 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3335 
3336 }
3337 
3338 
3339 /*********************************************************************
3340  *
3341  *  Free all transmit rings.
3342  *
3343  **********************************************************************/
3344 static void
3345 em_free_transmit_structures(struct adapter *adapter)
3346 {
3347 	struct tx_ring *txr = adapter->tx_rings;
3348 
3349 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3350 		EM_TX_LOCK(txr);
3351 		em_free_transmit_buffers(txr);
3352 		em_dma_free(adapter, &txr->txdma);
3353 		EM_TX_UNLOCK(txr);
3354 		EM_TX_LOCK_DESTROY(txr);
3355 	}
3356 
3357 	free(adapter->tx_rings, M_DEVBUF);
3358 }
3359 
3360 /*********************************************************************
3361  *
3362  *  Free transmit ring related data structures.
3363  *
3364  **********************************************************************/
3365 static void
3366 em_free_transmit_buffers(struct tx_ring *txr)
3367 {
3368 	struct adapter		*adapter = txr->adapter;
3369 	struct em_buffer	*txbuf;
3370 
3371 	INIT_DEBUGOUT("free_transmit_ring: begin");
3372 
3373 	if (txr->tx_buffers == NULL)
3374 		return;
3375 
3376 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3377 		txbuf = &txr->tx_buffers[i];
3378 		if (txbuf->m_head != NULL) {
3379 			bus_dmamap_sync(txr->txtag, txbuf->map,
3380 			    BUS_DMASYNC_POSTWRITE);
3381 			bus_dmamap_unload(txr->txtag,
3382 			    txbuf->map);
3383 			m_freem(txbuf->m_head);
3384 			txbuf->m_head = NULL;
3385 			if (txbuf->map != NULL) {
3386 				bus_dmamap_destroy(txr->txtag,
3387 				    txbuf->map);
3388 				txbuf->map = NULL;
3389 			}
3390 		} else if (txbuf->map != NULL) {
3391 			bus_dmamap_unload(txr->txtag,
3392 			    txbuf->map);
3393 			bus_dmamap_destroy(txr->txtag,
3394 			    txbuf->map);
3395 			txbuf->map = NULL;
3396 		}
3397 	}
3398 #if __FreeBSD_version >= 800000
3399 	if (txr->br != NULL)
3400 		buf_ring_free(txr->br, M_DEVBUF);
3401 #endif
3402 	if (txr->tx_buffers != NULL) {
3403 		free(txr->tx_buffers, M_DEVBUF);
3404 		txr->tx_buffers = NULL;
3405 	}
3406 	if (txr->txtag != NULL) {
3407 		bus_dma_tag_destroy(txr->txtag);
3408 		txr->txtag = NULL;
3409 	}
3410 	return;
3411 }
3412 
3413 
3414 /*********************************************************************
3415  *  The offload context is protocol specific (TCP/UDP) and thus
3416  *  only needs to be set when the protocol changes. The occasion
3417  *  of a context change can be a performance detriment, and
3418  *  might be better just disabled. The reason arises in the way
3419  *  in which the controller supports pipelined requests from the
3420  *  Tx data DMA. Up to four requests can be pipelined, and they may
3421  *  belong to the same packet or to multiple packets. However all
3422  *  requests for one packet are issued before a request is issued
3423  *  for a subsequent packet and if a request for the next packet
3424  *  requires a context change, that request will be stalled
3425  *  until the previous request completes. This means setting up
3426  *  a new context effectively disables pipelined Tx data DMA which
3427  *  in turn greatly slow down performance to send small sized
3428  *  frames.
3429  **********************************************************************/
3430 static void
3431 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3432     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3433 {
3434 	struct adapter			*adapter = txr->adapter;
3435 	struct e1000_context_desc	*TXD = NULL;
3436 	struct em_buffer		*tx_buffer;
3437 	int				cur, hdr_len;
3438 	u32				cmd = 0;
3439 	u16				offload = 0;
3440 	u8				ipcso, ipcss, tucso, tucss;
3441 
3442 	ipcss = ipcso = tucss = tucso = 0;
3443 	hdr_len = ip_off + (ip->ip_hl << 2);
3444 	cur = txr->next_avail_desc;
3445 
3446 	/* Setup of IP header checksum. */
3447 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3448 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3449 		offload |= CSUM_IP;
3450 		ipcss = ip_off;
3451 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3452 		/*
3453 		 * Start offset for header checksum calculation.
3454 		 * End offset for header checksum calculation.
3455 		 * Offset of place to put the checksum.
3456 		 */
3457 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3458 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3459 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3460 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3461 		cmd |= E1000_TXD_CMD_IP;
3462 	}
3463 
3464 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3465  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3466  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3467  		offload |= CSUM_TCP;
3468  		tucss = hdr_len;
3469  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3470  		/*
3471  		 * Setting up new checksum offload context for every frames
3472  		 * takes a lot of processing time for hardware. This also
3473  		 * reduces performance a lot for small sized frames so avoid
3474  		 * it if driver can use previously configured checksum
3475  		 * offload context.
3476  		 */
3477  		if (txr->last_hw_offload == offload) {
3478  			if (offload & CSUM_IP) {
3479  				if (txr->last_hw_ipcss == ipcss &&
3480  				    txr->last_hw_ipcso == ipcso &&
3481  				    txr->last_hw_tucss == tucss &&
3482  				    txr->last_hw_tucso == tucso)
3483  					return;
3484  			} else {
3485  				if (txr->last_hw_tucss == tucss &&
3486  				    txr->last_hw_tucso == tucso)
3487  					return;
3488  			}
3489   		}
3490  		txr->last_hw_offload = offload;
3491  		txr->last_hw_tucss = tucss;
3492  		txr->last_hw_tucso = tucso;
3493  		/*
3494  		 * Start offset for payload checksum calculation.
3495  		 * End offset for payload checksum calculation.
3496  		 * Offset of place to put the checksum.
3497  		 */
3498 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3499  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3500  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3501  		TXD->upper_setup.tcp_fields.tucso = tucso;
3502  		cmd |= E1000_TXD_CMD_TCP;
3503  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3504  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3505  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3506  		tucss = hdr_len;
3507  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3508  		/*
3509  		 * Setting up new checksum offload context for every frames
3510  		 * takes a lot of processing time for hardware. This also
3511  		 * reduces performance a lot for small sized frames so avoid
3512  		 * it if driver can use previously configured checksum
3513  		 * offload context.
3514  		 */
3515  		if (txr->last_hw_offload == offload) {
3516  			if (offload & CSUM_IP) {
3517  				if (txr->last_hw_ipcss == ipcss &&
3518  				    txr->last_hw_ipcso == ipcso &&
3519  				    txr->last_hw_tucss == tucss &&
3520  				    txr->last_hw_tucso == tucso)
3521  					return;
3522  			} else {
3523  				if (txr->last_hw_tucss == tucss &&
3524  				    txr->last_hw_tucso == tucso)
3525  					return;
3526  			}
3527  		}
3528  		txr->last_hw_offload = offload;
3529  		txr->last_hw_tucss = tucss;
3530  		txr->last_hw_tucso = tucso;
3531  		/*
3532  		 * Start offset for header checksum calculation.
3533  		 * End offset for header checksum calculation.
3534  		 * Offset of place to put the checksum.
3535  		 */
3536 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3537  		TXD->upper_setup.tcp_fields.tucss = tucss;
3538  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3539  		TXD->upper_setup.tcp_fields.tucso = tucso;
3540   	}
3541 
3542  	if (offload & CSUM_IP) {
3543  		txr->last_hw_ipcss = ipcss;
3544  		txr->last_hw_ipcso = ipcso;
3545   	}
3546 
3547 	TXD->tcp_seg_setup.data = htole32(0);
3548 	TXD->cmd_and_length =
3549 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3550 	tx_buffer = &txr->tx_buffers[cur];
3551 	tx_buffer->m_head = NULL;
3552 	tx_buffer->next_eop = -1;
3553 
3554 	if (++cur == adapter->num_tx_desc)
3555 		cur = 0;
3556 
3557 	txr->tx_avail--;
3558 	txr->next_avail_desc = cur;
3559 }
3560 
3561 
3562 /**********************************************************************
3563  *
3564  *  Setup work for hardware segmentation offload (TSO)
3565  *
3566  **********************************************************************/
3567 static void
3568 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3569     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3570 {
3571 	struct adapter			*adapter = txr->adapter;
3572 	struct e1000_context_desc	*TXD;
3573 	struct em_buffer		*tx_buffer;
3574 	int cur, hdr_len;
3575 
3576 	/*
3577 	 * In theory we can use the same TSO context if and only if
3578 	 * frame is the same type(IP/TCP) and the same MSS. However
3579 	 * checking whether a frame has the same IP/TCP structure is
3580 	 * hard thing so just ignore that and always restablish a
3581 	 * new TSO context.
3582 	 */
3583 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3584 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3585 		      E1000_TXD_DTYP_D |	/* Data descr type */
3586 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3587 
3588 	/* IP and/or TCP header checksum calculation and insertion. */
3589 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3590 
3591 	cur = txr->next_avail_desc;
3592 	tx_buffer = &txr->tx_buffers[cur];
3593 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3594 
3595 	/*
3596 	 * Start offset for header checksum calculation.
3597 	 * End offset for header checksum calculation.
3598 	 * Offset of place put the checksum.
3599 	 */
3600 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3601 	TXD->lower_setup.ip_fields.ipcse =
3602 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3603 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3604 	/*
3605 	 * Start offset for payload checksum calculation.
3606 	 * End offset for payload checksum calculation.
3607 	 * Offset of place to put the checksum.
3608 	 */
3609 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3610 	TXD->upper_setup.tcp_fields.tucse = 0;
3611 	TXD->upper_setup.tcp_fields.tucso =
3612 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3613 	/*
3614 	 * Payload size per packet w/o any headers.
3615 	 * Length of all headers up to payload.
3616 	 */
3617 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3618 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3619 
3620 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3621 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3622 				E1000_TXD_CMD_TSE |	/* TSE context */
3623 				E1000_TXD_CMD_IP |	/* Do IP csum */
3624 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3625 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3626 
3627 	tx_buffer->m_head = NULL;
3628 	tx_buffer->next_eop = -1;
3629 
3630 	if (++cur == adapter->num_tx_desc)
3631 		cur = 0;
3632 
3633 	txr->tx_avail--;
3634 	txr->next_avail_desc = cur;
3635 	txr->tx_tso = TRUE;
3636 }
3637 
3638 
3639 /**********************************************************************
3640  *
3641  *  Examine each tx_buffer in the used queue. If the hardware is done
3642  *  processing the packet then free associated resources. The
3643  *  tx_buffer is put back on the free queue.
3644  *
3645  **********************************************************************/
3646 static bool
3647 em_txeof(struct tx_ring *txr)
3648 {
3649 	struct adapter	*adapter = txr->adapter;
3650         int first, last, done, processed;
3651         struct em_buffer *tx_buffer;
3652         struct e1000_tx_desc   *tx_desc, *eop_desc;
3653 	struct ifnet   *ifp = adapter->ifp;
3654 
3655 	EM_TX_LOCK_ASSERT(txr);
3656 
3657 	/* No work, make sure watchdog is off */
3658         if (txr->tx_avail == adapter->num_tx_desc) {
3659 		txr->queue_status = EM_QUEUE_IDLE;
3660                 return (FALSE);
3661 	}
3662 
3663 	processed = 0;
3664         first = txr->next_to_clean;
3665         tx_desc = &txr->tx_base[first];
3666         tx_buffer = &txr->tx_buffers[first];
3667 	last = tx_buffer->next_eop;
3668         eop_desc = &txr->tx_base[last];
3669 
3670 	/*
3671 	 * What this does is get the index of the
3672 	 * first descriptor AFTER the EOP of the
3673 	 * first packet, that way we can do the
3674 	 * simple comparison on the inner while loop.
3675 	 */
3676 	if (++last == adapter->num_tx_desc)
3677  		last = 0;
3678 	done = last;
3679 
3680         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3681             BUS_DMASYNC_POSTREAD);
3682 
3683         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3684 		/* We clean the range of the packet */
3685 		while (first != done) {
3686                 	tx_desc->upper.data = 0;
3687                 	tx_desc->lower.data = 0;
3688                 	tx_desc->buffer_addr = 0;
3689                 	++txr->tx_avail;
3690 			++processed;
3691 
3692 			if (tx_buffer->m_head) {
3693 				bus_dmamap_sync(txr->txtag,
3694 				    tx_buffer->map,
3695 				    BUS_DMASYNC_POSTWRITE);
3696 				bus_dmamap_unload(txr->txtag,
3697 				    tx_buffer->map);
3698                         	m_freem(tx_buffer->m_head);
3699                         	tx_buffer->m_head = NULL;
3700                 	}
3701 			tx_buffer->next_eop = -1;
3702 			txr->watchdog_time = ticks;
3703 
3704 	                if (++first == adapter->num_tx_desc)
3705 				first = 0;
3706 
3707 	                tx_buffer = &txr->tx_buffers[first];
3708 			tx_desc = &txr->tx_base[first];
3709 		}
3710 		++ifp->if_opackets;
3711 		/* See if we can continue to the next packet */
3712 		last = tx_buffer->next_eop;
3713 		if (last != -1) {
3714         		eop_desc = &txr->tx_base[last];
3715 			/* Get new done point */
3716 			if (++last == adapter->num_tx_desc) last = 0;
3717 			done = last;
3718 		} else
3719 			break;
3720         }
3721         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3722             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3723 
3724         txr->next_to_clean = first;
3725 
3726 	/*
3727 	** Watchdog calculation, we know there's
3728 	** work outstanding or the first return
3729 	** would have been taken, so none processed
3730 	** for too long indicates a hang. local timer
3731 	** will examine this and do a reset if needed.
3732 	*/
3733 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3734 		txr->queue_status = EM_QUEUE_HUNG;
3735 
3736         /*
3737          * If we have a minimum free, clear IFF_DRV_OACTIVE
3738          * to tell the stack that it is OK to send packets.
3739          */
3740         if (txr->tx_avail > EM_MAX_SCATTER)
3741                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3742 
3743 	/* Disable watchdog if all clean */
3744 	if (txr->tx_avail == adapter->num_tx_desc) {
3745 		txr->queue_status = EM_QUEUE_IDLE;
3746 		return (FALSE);
3747 	}
3748 
3749 	return (TRUE);
3750 }
3751 
3752 
3753 /*********************************************************************
3754  *
3755  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3756  *
3757  **********************************************************************/
3758 static void
3759 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3760 {
3761 	struct adapter		*adapter = rxr->adapter;
3762 	struct mbuf		*m;
3763 	bus_dma_segment_t	segs[1];
3764 	struct em_buffer	*rxbuf;
3765 	int			i, j, error, nsegs;
3766 	bool			cleaned = FALSE;
3767 
3768 	i = j = rxr->next_to_refresh;
3769 	/*
3770 	** Get one descriptor beyond
3771 	** our work mark to control
3772 	** the loop.
3773 	*/
3774 	if (++j == adapter->num_rx_desc)
3775 		j = 0;
3776 
3777 	while (j != limit) {
3778 		rxbuf = &rxr->rx_buffers[i];
3779 		if (rxbuf->m_head == NULL) {
3780 			m = m_getjcl(M_DONTWAIT, MT_DATA,
3781 			    M_PKTHDR, adapter->rx_mbuf_sz);
3782 			/*
3783 			** If we have a temporary resource shortage
3784 			** that causes a failure, just abort refresh
3785 			** for now, we will return to this point when
3786 			** reinvoked from em_rxeof.
3787 			*/
3788 			if (m == NULL)
3789 				goto update;
3790 		} else
3791 			m = rxbuf->m_head;
3792 
3793 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3794 		m->m_flags |= M_PKTHDR;
3795 		m->m_data = m->m_ext.ext_buf;
3796 
3797 		/* Use bus_dma machinery to setup the memory mapping  */
3798 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3799 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3800 		if (error != 0) {
3801 			printf("Refresh mbufs: hdr dmamap load"
3802 			    " failure - %d\n", error);
3803 			m_free(m);
3804 			rxbuf->m_head = NULL;
3805 			goto update;
3806 		}
3807 		rxbuf->m_head = m;
3808 		bus_dmamap_sync(rxr->rxtag,
3809 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3810 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3811 		cleaned = TRUE;
3812 
3813 		i = j; /* Next is precalulated for us */
3814 		rxr->next_to_refresh = i;
3815 		/* Calculate next controlling index */
3816 		if (++j == adapter->num_rx_desc)
3817 			j = 0;
3818 	}
3819 update:
3820 	/*
3821 	** Update the tail pointer only if,
3822 	** and as far as we have refreshed.
3823 	*/
3824 	if (cleaned)
3825 		E1000_WRITE_REG(&adapter->hw,
3826 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3827 
3828 	return;
3829 }
3830 
3831 
3832 /*********************************************************************
3833  *
3834  *  Allocate memory for rx_buffer structures. Since we use one
3835  *  rx_buffer per received packet, the maximum number of rx_buffer's
3836  *  that we'll need is equal to the number of receive descriptors
3837  *  that we've allocated.
3838  *
3839  **********************************************************************/
3840 static int
3841 em_allocate_receive_buffers(struct rx_ring *rxr)
3842 {
3843 	struct adapter		*adapter = rxr->adapter;
3844 	device_t		dev = adapter->dev;
3845 	struct em_buffer	*rxbuf;
3846 	int			error;
3847 
3848 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3849 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3850 	if (rxr->rx_buffers == NULL) {
3851 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3852 		return (ENOMEM);
3853 	}
3854 
3855 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3856 				1, 0,			/* alignment, bounds */
3857 				BUS_SPACE_MAXADDR,	/* lowaddr */
3858 				BUS_SPACE_MAXADDR,	/* highaddr */
3859 				NULL, NULL,		/* filter, filterarg */
3860 				MJUM9BYTES,		/* maxsize */
3861 				1,			/* nsegments */
3862 				MJUM9BYTES,		/* maxsegsize */
3863 				0,			/* flags */
3864 				NULL,			/* lockfunc */
3865 				NULL,			/* lockarg */
3866 				&rxr->rxtag);
3867 	if (error) {
3868 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3869 		    __func__, error);
3870 		goto fail;
3871 	}
3872 
3873 	rxbuf = rxr->rx_buffers;
3874 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3875 		rxbuf = &rxr->rx_buffers[i];
3876 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3877 		    &rxbuf->map);
3878 		if (error) {
3879 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3880 			    __func__, error);
3881 			goto fail;
3882 		}
3883 	}
3884 
3885 	return (0);
3886 
3887 fail:
3888 	em_free_receive_structures(adapter);
3889 	return (error);
3890 }
3891 
3892 
3893 /*********************************************************************
3894  *
3895  *  Initialize a receive ring and its buffers.
3896  *
3897  **********************************************************************/
3898 static int
3899 em_setup_receive_ring(struct rx_ring *rxr)
3900 {
3901 	struct	adapter 	*adapter = rxr->adapter;
3902 	struct em_buffer	*rxbuf;
3903 	bus_dma_segment_t	seg[1];
3904 	int			i, j, nsegs, error = 0;
3905 
3906 
3907 	/* Clear the ring contents */
3908 	EM_RX_LOCK(rxr);
3909 
3910 	/* Invalidate all descriptors */
3911 	for (i = 0; i < adapter->num_rx_desc; i++) {
3912 		struct e1000_rx_desc* cur;
3913 		cur = &rxr->rx_base[i];
3914 		cur->status = 0;
3915 	}
3916 
3917 	/* Now replenish the mbufs */
3918 	i = j = rxr->next_to_refresh;
3919 	if (++j == adapter->num_rx_desc)
3920 		j = 0;
3921 
3922 	while (j != rxr->next_to_check) {
3923 		rxbuf = &rxr->rx_buffers[i];
3924 		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3925 		    M_PKTHDR, adapter->rx_mbuf_sz);
3926 		if (rxbuf->m_head == NULL) {
3927 			error = ENOBUFS;
3928 			goto fail;
3929 		}
3930 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3931 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3932 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3933 
3934 		/* Get the memory mapping */
3935 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3936 		    rxbuf->map, rxbuf->m_head, seg,
3937 		    &nsegs, BUS_DMA_NOWAIT);
3938 		if (error != 0) {
3939 			m_freem(rxbuf->m_head);
3940 			rxbuf->m_head = NULL;
3941 			goto fail;
3942 		}
3943 		bus_dmamap_sync(rxr->rxtag,
3944 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3945 
3946 		/* Update descriptor */
3947 		rxr->rx_base[i].buffer_addr = htole64(seg[0].ds_addr);
3948 		i = j;
3949 		if (++j == adapter->num_rx_desc)
3950 			j = 0;
3951 	}
3952 
3953 fail:
3954 	rxr->next_to_refresh = i;
3955 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3956 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3957 	EM_RX_UNLOCK(rxr);
3958 	return (error);
3959 }
3960 
3961 /*********************************************************************
3962  *
3963  *  Initialize all receive rings.
3964  *
3965  **********************************************************************/
3966 static int
3967 em_setup_receive_structures(struct adapter *adapter)
3968 {
3969 	struct rx_ring *rxr = adapter->rx_rings;
3970 	int q;
3971 
3972 	for (q = 0; q < adapter->num_queues; q++, rxr++)
3973 		if (em_setup_receive_ring(rxr))
3974 			goto fail;
3975 
3976 	return (0);
3977 fail:
3978 	/*
3979 	 * Free RX buffers allocated so far, we will only handle
3980 	 * the rings that completed, the failing case will have
3981 	 * cleaned up for itself. 'q' failed, so its the terminus.
3982 	 */
3983 	for (int i = 0, n = 0; i < q; ++i) {
3984 		rxr = &adapter->rx_rings[i];
3985 		n = rxr->next_to_check;
3986 		while(n != rxr->next_to_refresh) {
3987 			struct em_buffer *rxbuf;
3988 			rxbuf = &rxr->rx_buffers[n];
3989 			if (rxbuf->m_head != NULL) {
3990 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3991 			  	  BUS_DMASYNC_POSTREAD);
3992 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3993 				m_freem(rxbuf->m_head);
3994 				rxbuf->m_head = NULL;
3995 			}
3996 			if (++n == adapter->num_rx_desc)
3997 				n = 0;
3998 		}
3999 		rxr->next_to_check = 0;
4000 		rxr->next_to_refresh = 0;
4001 	}
4002 
4003 	return (ENOBUFS);
4004 }
4005 
4006 /*********************************************************************
4007  *
4008  *  Free all receive rings.
4009  *
4010  **********************************************************************/
4011 static void
4012 em_free_receive_structures(struct adapter *adapter)
4013 {
4014 	struct rx_ring *rxr = adapter->rx_rings;
4015 
4016 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4017 		em_free_receive_buffers(rxr);
4018 		/* Free the ring memory as well */
4019 		em_dma_free(adapter, &rxr->rxdma);
4020 		EM_RX_LOCK_DESTROY(rxr);
4021 	}
4022 
4023 	free(adapter->rx_rings, M_DEVBUF);
4024 }
4025 
4026 
4027 /*********************************************************************
4028  *
4029  *  Free receive ring data structures
4030  *
4031  **********************************************************************/
4032 static void
4033 em_free_receive_buffers(struct rx_ring *rxr)
4034 {
4035 	struct adapter		*adapter = rxr->adapter;
4036 	struct em_buffer	*rxbuf = NULL;
4037 
4038 	INIT_DEBUGOUT("free_receive_buffers: begin");
4039 
4040 	if (rxr->rx_buffers != NULL) {
4041 		int i = rxr->next_to_check;
4042 		while(i != rxr->next_to_refresh) {
4043 			rxbuf = &rxr->rx_buffers[i];
4044 			if (rxbuf->map != NULL) {
4045 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4046 				    BUS_DMASYNC_POSTREAD);
4047 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4048 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4049 			}
4050 			if (rxbuf->m_head != NULL) {
4051 				m_freem(rxbuf->m_head);
4052 				rxbuf->m_head = NULL;
4053 			}
4054 			if (++i == adapter->num_rx_desc)
4055 				i = 0;
4056 		}
4057 		free(rxr->rx_buffers, M_DEVBUF);
4058 		rxr->rx_buffers = NULL;
4059 		rxr->next_to_check = 0;
4060 		rxr->next_to_refresh = 0;
4061 	}
4062 
4063 	if (rxr->rxtag != NULL) {
4064 		bus_dma_tag_destroy(rxr->rxtag);
4065 		rxr->rxtag = NULL;
4066 	}
4067 
4068 	return;
4069 }
4070 
4071 
4072 /*********************************************************************
4073  *
4074  *  Enable receive unit.
4075  *
4076  **********************************************************************/
4077 #define MAX_INTS_PER_SEC	8000
4078 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4079 
4080 static void
4081 em_initialize_receive_unit(struct adapter *adapter)
4082 {
4083 	struct rx_ring	*rxr = adapter->rx_rings;
4084 	struct ifnet	*ifp = adapter->ifp;
4085 	struct e1000_hw	*hw = &adapter->hw;
4086 	u64	bus_addr;
4087 	u32	rctl, rxcsum;
4088 
4089 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4090 
4091 	/*
4092 	 * Make sure receives are disabled while setting
4093 	 * up the descriptor ring
4094 	 */
4095 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4096 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4097 
4098 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4099 	    adapter->rx_abs_int_delay.value);
4100 	/*
4101 	 * Set the interrupt throttling rate. Value is calculated
4102 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4103 	 */
4104 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4105 
4106 	/*
4107 	** When using MSIX interrupts we need to throttle
4108 	** using the EITR register (82574 only)
4109 	*/
4110 	if (hw->mac.type == e1000_82574)
4111 		for (int i = 0; i < 4; i++)
4112 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4113 			    DEFAULT_ITR);
4114 
4115 	/* Disable accelerated ackknowledge */
4116 	if (adapter->hw.mac.type == e1000_82574)
4117 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4118 
4119 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4120 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4121 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4122 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4123 	}
4124 
4125 	/*
4126 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4127 	** long latencies are observed, like Lenovo X60. This
4128 	** change eliminates the problem, but since having positive
4129 	** values in RDTR is a known source of problems on other
4130 	** platforms another solution is being sought.
4131 	*/
4132 	if (hw->mac.type == e1000_82573)
4133 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4134 
4135 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4136 		/* Setup the Base and Length of the Rx Descriptor Ring */
4137 		bus_addr = rxr->rxdma.dma_paddr;
4138 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4139 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4140 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4141 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4142 		/* Setup the Head and Tail Descriptor Pointers */
4143 		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4144 		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4145 	}
4146 
4147 	/* Set early receive threshold on appropriate hw */
4148 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4149 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4150 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4151 	    (ifp->if_mtu > ETHERMTU)) {
4152 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4153 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4154 		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4155 	}
4156 
4157 	if (adapter->hw.mac.type == e1000_pch2lan) {
4158 		if (ifp->if_mtu > ETHERMTU)
4159 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4160 		else
4161 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4162 	}
4163 
4164 	/* Setup the Receive Control Register */
4165 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4166 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4167 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4168 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4169 
4170         /* Strip the CRC */
4171         rctl |= E1000_RCTL_SECRC;
4172 
4173         /* Make sure VLAN Filters are off */
4174         rctl &= ~E1000_RCTL_VFE;
4175 	rctl &= ~E1000_RCTL_SBP;
4176 
4177 	if (adapter->rx_mbuf_sz == MCLBYTES)
4178 		rctl |= E1000_RCTL_SZ_2048;
4179 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4180 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4181 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4182 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4183 
4184 	if (ifp->if_mtu > ETHERMTU)
4185 		rctl |= E1000_RCTL_LPE;
4186 	else
4187 		rctl &= ~E1000_RCTL_LPE;
4188 
4189 	/* Write out the settings */
4190 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4191 
4192 	return;
4193 }
4194 
4195 
4196 /*********************************************************************
4197  *
4198  *  This routine executes in interrupt context. It replenishes
4199  *  the mbufs in the descriptor and sends data which has been
4200  *  dma'ed into host memory to upper layer.
4201  *
4202  *  We loop at most count times if count is > 0, or until done if
4203  *  count < 0.
4204  *
4205  *  For polling we also now return the number of cleaned packets
4206  *********************************************************************/
4207 static bool
4208 em_rxeof(struct rx_ring *rxr, int count, int *done)
4209 {
4210 	struct adapter		*adapter = rxr->adapter;
4211 	struct ifnet		*ifp = adapter->ifp;
4212 	struct mbuf		*mp, *sendmp;
4213 	u8			status = 0;
4214 	u16 			len;
4215 	int			i, processed, rxdone = 0;
4216 	bool			eop;
4217 	struct e1000_rx_desc	*cur;
4218 
4219 	EM_RX_LOCK(rxr);
4220 
4221 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4222 
4223 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4224 			break;
4225 
4226 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4227 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4228 
4229 		cur = &rxr->rx_base[i];
4230 		status = cur->status;
4231 		mp = sendmp = NULL;
4232 
4233 		if ((status & E1000_RXD_STAT_DD) == 0)
4234 			break;
4235 
4236 		len = le16toh(cur->length);
4237 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4238 
4239 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4240 		    (rxr->discard == TRUE)) {
4241 			ifp->if_ierrors++;
4242 			++rxr->rx_discarded;
4243 			if (!eop) /* Catch subsequent segs */
4244 				rxr->discard = TRUE;
4245 			else
4246 				rxr->discard = FALSE;
4247 			em_rx_discard(rxr, i);
4248 			goto next_desc;
4249 		}
4250 
4251 		/* Assign correct length to the current fragment */
4252 		mp = rxr->rx_buffers[i].m_head;
4253 		mp->m_len = len;
4254 
4255 		/* Trigger for refresh */
4256 		rxr->rx_buffers[i].m_head = NULL;
4257 
4258 		/* First segment? */
4259 		if (rxr->fmp == NULL) {
4260 			mp->m_pkthdr.len = len;
4261 			rxr->fmp = rxr->lmp = mp;
4262 		} else {
4263 			/* Chain mbuf's together */
4264 			mp->m_flags &= ~M_PKTHDR;
4265 			rxr->lmp->m_next = mp;
4266 			rxr->lmp = mp;
4267 			rxr->fmp->m_pkthdr.len += len;
4268 		}
4269 
4270 		if (eop) {
4271 			--count;
4272 			sendmp = rxr->fmp;
4273 			sendmp->m_pkthdr.rcvif = ifp;
4274 			ifp->if_ipackets++;
4275 			em_receive_checksum(cur, sendmp);
4276 #ifndef __NO_STRICT_ALIGNMENT
4277 			if (adapter->max_frame_size >
4278 			    (MCLBYTES - ETHER_ALIGN) &&
4279 			    em_fixup_rx(rxr) != 0)
4280 				goto skip;
4281 #endif
4282 			if (status & E1000_RXD_STAT_VP) {
4283 				sendmp->m_pkthdr.ether_vtag =
4284 				    (le16toh(cur->special) &
4285 				    E1000_RXD_SPC_VLAN_MASK);
4286 				sendmp->m_flags |= M_VLANTAG;
4287 			}
4288 #ifdef EM_MULTIQUEUE
4289 			sendmp->m_pkthdr.flowid = rxr->msix;
4290 			sendmp->m_flags |= M_FLOWID;
4291 #endif
4292 #ifndef __NO_STRICT_ALIGNMENT
4293 skip:
4294 #endif
4295 			rxr->fmp = rxr->lmp = NULL;
4296 		}
4297 next_desc:
4298 		/* Zero out the receive descriptors status. */
4299 		cur->status = 0;
4300 		++rxdone;	/* cumulative for POLL */
4301 		++processed;
4302 
4303 		/* Advance our pointers to the next descriptor. */
4304 		if (++i == adapter->num_rx_desc)
4305 			i = 0;
4306 
4307 		/* Send to the stack */
4308 		if (sendmp != NULL) {
4309 			rxr->next_to_check = i;
4310 			EM_RX_UNLOCK(rxr);
4311 			(*ifp->if_input)(ifp, sendmp);
4312 			EM_RX_LOCK(rxr);
4313 			i = rxr->next_to_check;
4314 		}
4315 
4316 		/* Only refresh mbufs every 8 descriptors */
4317 		if (processed == 8) {
4318 			em_refresh_mbufs(rxr, i);
4319 			processed = 0;
4320 		}
4321 	}
4322 
4323 	/* Catch any remaining refresh work */
4324 	if (e1000_rx_unrefreshed(rxr))
4325 		em_refresh_mbufs(rxr, i);
4326 
4327 	rxr->next_to_check = i;
4328 	if (done != NULL)
4329 		*done = rxdone;
4330 	EM_RX_UNLOCK(rxr);
4331 
4332 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4333 }
4334 
4335 static __inline void
4336 em_rx_discard(struct rx_ring *rxr, int i)
4337 {
4338 	struct em_buffer	*rbuf;
4339 
4340 	rbuf = &rxr->rx_buffers[i];
4341 	/* Free any previous pieces */
4342 	if (rxr->fmp != NULL) {
4343 		rxr->fmp->m_flags |= M_PKTHDR;
4344 		m_freem(rxr->fmp);
4345 		rxr->fmp = NULL;
4346 		rxr->lmp = NULL;
4347 	}
4348 	/*
4349 	** Free buffer and allow em_refresh_mbufs()
4350 	** to clean up and recharge buffer.
4351 	*/
4352 	if (rbuf->m_head) {
4353 		m_free(rbuf->m_head);
4354 		rbuf->m_head = NULL;
4355 	}
4356 	return;
4357 }
4358 
4359 #ifndef __NO_STRICT_ALIGNMENT
4360 /*
4361  * When jumbo frames are enabled we should realign entire payload on
4362  * architecures with strict alignment. This is serious design mistake of 8254x
4363  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4364  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4365  * payload. On architecures without strict alignment restrictions 8254x still
4366  * performs unaligned memory access which would reduce the performance too.
4367  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4368  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4369  * existing mbuf chain.
4370  *
4371  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4372  * not used at all on architectures with strict alignment.
4373  */
4374 static int
4375 em_fixup_rx(struct rx_ring *rxr)
4376 {
4377 	struct adapter *adapter = rxr->adapter;
4378 	struct mbuf *m, *n;
4379 	int error;
4380 
4381 	error = 0;
4382 	m = rxr->fmp;
4383 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4384 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4385 		m->m_data += ETHER_HDR_LEN;
4386 	} else {
4387 		MGETHDR(n, M_DONTWAIT, MT_DATA);
4388 		if (n != NULL) {
4389 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4390 			m->m_data += ETHER_HDR_LEN;
4391 			m->m_len -= ETHER_HDR_LEN;
4392 			n->m_len = ETHER_HDR_LEN;
4393 			M_MOVE_PKTHDR(n, m);
4394 			n->m_next = m;
4395 			rxr->fmp = n;
4396 		} else {
4397 			adapter->dropped_pkts++;
4398 			m_freem(rxr->fmp);
4399 			rxr->fmp = NULL;
4400 			error = ENOMEM;
4401 		}
4402 	}
4403 
4404 	return (error);
4405 }
4406 #endif
4407 
4408 /*********************************************************************
4409  *
4410  *  Verify that the hardware indicated that the checksum is valid.
4411  *  Inform the stack about the status of checksum so that stack
4412  *  doesn't spend time verifying the checksum.
4413  *
4414  *********************************************************************/
4415 static void
4416 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4417 {
4418 	/* Ignore Checksum bit is set */
4419 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4420 		mp->m_pkthdr.csum_flags = 0;
4421 		return;
4422 	}
4423 
4424 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4425 		/* Did it pass? */
4426 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4427 			/* IP Checksum Good */
4428 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4429 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4430 
4431 		} else {
4432 			mp->m_pkthdr.csum_flags = 0;
4433 		}
4434 	}
4435 
4436 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4437 		/* Did it pass? */
4438 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4439 			mp->m_pkthdr.csum_flags |=
4440 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4441 			mp->m_pkthdr.csum_data = htons(0xffff);
4442 		}
4443 	}
4444 }
4445 
4446 /*
4447  * This routine is run via an vlan
4448  * config EVENT
4449  */
4450 static void
4451 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4452 {
4453 	struct adapter	*adapter = ifp->if_softc;
4454 	u32		index, bit;
4455 
4456 	if (ifp->if_softc !=  arg)   /* Not our event */
4457 		return;
4458 
4459 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4460                 return;
4461 
4462 	EM_CORE_LOCK(adapter);
4463 	index = (vtag >> 5) & 0x7F;
4464 	bit = vtag & 0x1F;
4465 	adapter->shadow_vfta[index] |= (1 << bit);
4466 	++adapter->num_vlans;
4467 	/* Re-init to load the changes */
4468 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4469 		em_init_locked(adapter);
4470 	EM_CORE_UNLOCK(adapter);
4471 }
4472 
4473 /*
4474  * This routine is run via an vlan
4475  * unconfig EVENT
4476  */
4477 static void
4478 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4479 {
4480 	struct adapter	*adapter = ifp->if_softc;
4481 	u32		index, bit;
4482 
4483 	if (ifp->if_softc !=  arg)
4484 		return;
4485 
4486 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4487                 return;
4488 
4489 	EM_CORE_LOCK(adapter);
4490 	index = (vtag >> 5) & 0x7F;
4491 	bit = vtag & 0x1F;
4492 	adapter->shadow_vfta[index] &= ~(1 << bit);
4493 	--adapter->num_vlans;
4494 	/* Re-init to load the changes */
4495 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4496 		em_init_locked(adapter);
4497 	EM_CORE_UNLOCK(adapter);
4498 }
4499 
4500 static void
4501 em_setup_vlan_hw_support(struct adapter *adapter)
4502 {
4503 	struct e1000_hw *hw = &adapter->hw;
4504 	u32             reg;
4505 
4506 	/*
4507 	** We get here thru init_locked, meaning
4508 	** a soft reset, this has already cleared
4509 	** the VFTA and other state, so if there
4510 	** have been no vlan's registered do nothing.
4511 	*/
4512 	if (adapter->num_vlans == 0)
4513                 return;
4514 
4515 	/*
4516 	** A soft reset zero's out the VFTA, so
4517 	** we need to repopulate it now.
4518 	*/
4519 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4520                 if (adapter->shadow_vfta[i] != 0)
4521 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4522                             i, adapter->shadow_vfta[i]);
4523 
4524 	reg = E1000_READ_REG(hw, E1000_CTRL);
4525 	reg |= E1000_CTRL_VME;
4526 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4527 
4528 	/* Enable the Filter Table */
4529 	reg = E1000_READ_REG(hw, E1000_RCTL);
4530 	reg &= ~E1000_RCTL_CFIEN;
4531 	reg |= E1000_RCTL_VFE;
4532 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4533 }
4534 
4535 static void
4536 em_enable_intr(struct adapter *adapter)
4537 {
4538 	struct e1000_hw *hw = &adapter->hw;
4539 	u32 ims_mask = IMS_ENABLE_MASK;
4540 
4541 	if (hw->mac.type == e1000_82574) {
4542 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4543 		ims_mask |= EM_MSIX_MASK;
4544 	}
4545 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4546 }
4547 
4548 static void
4549 em_disable_intr(struct adapter *adapter)
4550 {
4551 	struct e1000_hw *hw = &adapter->hw;
4552 
4553 	if (hw->mac.type == e1000_82574)
4554 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4555 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4556 }
4557 
4558 /*
4559  * Bit of a misnomer, what this really means is
4560  * to enable OS management of the system... aka
4561  * to disable special hardware management features
4562  */
4563 static void
4564 em_init_manageability(struct adapter *adapter)
4565 {
4566 	/* A shared code workaround */
4567 #define E1000_82542_MANC2H E1000_MANC2H
4568 	if (adapter->has_manage) {
4569 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4570 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4571 
4572 		/* disable hardware interception of ARP */
4573 		manc &= ~(E1000_MANC_ARP_EN);
4574 
4575                 /* enable receiving management packets to the host */
4576 		manc |= E1000_MANC_EN_MNG2HOST;
4577 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4578 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4579 		manc2h |= E1000_MNG2HOST_PORT_623;
4580 		manc2h |= E1000_MNG2HOST_PORT_664;
4581 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4582 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4583 	}
4584 }
4585 
4586 /*
4587  * Give control back to hardware management
4588  * controller if there is one.
4589  */
4590 static void
4591 em_release_manageability(struct adapter *adapter)
4592 {
4593 	if (adapter->has_manage) {
4594 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4595 
4596 		/* re-enable hardware interception of ARP */
4597 		manc |= E1000_MANC_ARP_EN;
4598 		manc &= ~E1000_MANC_EN_MNG2HOST;
4599 
4600 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4601 	}
4602 }
4603 
4604 /*
4605  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4606  * For ASF and Pass Through versions of f/w this means
4607  * that the driver is loaded. For AMT version type f/w
4608  * this means that the network i/f is open.
4609  */
4610 static void
4611 em_get_hw_control(struct adapter *adapter)
4612 {
4613 	u32 ctrl_ext, swsm;
4614 
4615 	if (adapter->hw.mac.type == e1000_82573) {
4616 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4617 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4618 		    swsm | E1000_SWSM_DRV_LOAD);
4619 		return;
4620 	}
4621 	/* else */
4622 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4623 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4624 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4625 	return;
4626 }
4627 
4628 /*
4629  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4630  * For ASF and Pass Through versions of f/w this means that
4631  * the driver is no longer loaded. For AMT versions of the
4632  * f/w this means that the network i/f is closed.
4633  */
4634 static void
4635 em_release_hw_control(struct adapter *adapter)
4636 {
4637 	u32 ctrl_ext, swsm;
4638 
4639 	if (!adapter->has_manage)
4640 		return;
4641 
4642 	if (adapter->hw.mac.type == e1000_82573) {
4643 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4644 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4645 		    swsm & ~E1000_SWSM_DRV_LOAD);
4646 		return;
4647 	}
4648 	/* else */
4649 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4650 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4651 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4652 	return;
4653 }
4654 
4655 static int
4656 em_is_valid_ether_addr(u8 *addr)
4657 {
4658 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4659 
4660 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4661 		return (FALSE);
4662 	}
4663 
4664 	return (TRUE);
4665 }
4666 
4667 /*
4668 ** Parse the interface capabilities with regard
4669 ** to both system management and wake-on-lan for
4670 ** later use.
4671 */
4672 static void
4673 em_get_wakeup(device_t dev)
4674 {
4675 	struct adapter	*adapter = device_get_softc(dev);
4676 	u16		eeprom_data = 0, device_id, apme_mask;
4677 
4678 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4679 	apme_mask = EM_EEPROM_APME;
4680 
4681 	switch (adapter->hw.mac.type) {
4682 	case e1000_82573:
4683 	case e1000_82583:
4684 		adapter->has_amt = TRUE;
4685 		/* Falls thru */
4686 	case e1000_82571:
4687 	case e1000_82572:
4688 	case e1000_80003es2lan:
4689 		if (adapter->hw.bus.func == 1) {
4690 			e1000_read_nvm(&adapter->hw,
4691 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4692 			break;
4693 		} else
4694 			e1000_read_nvm(&adapter->hw,
4695 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4696 		break;
4697 	case e1000_ich8lan:
4698 	case e1000_ich9lan:
4699 	case e1000_ich10lan:
4700 	case e1000_pchlan:
4701 	case e1000_pch2lan:
4702 		apme_mask = E1000_WUC_APME;
4703 		adapter->has_amt = TRUE;
4704 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4705 		break;
4706 	default:
4707 		e1000_read_nvm(&adapter->hw,
4708 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4709 		break;
4710 	}
4711 	if (eeprom_data & apme_mask)
4712 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4713 	/*
4714          * We have the eeprom settings, now apply the special cases
4715          * where the eeprom may be wrong or the board won't support
4716          * wake on lan on a particular port
4717 	 */
4718 	device_id = pci_get_device(dev);
4719         switch (device_id) {
4720 	case E1000_DEV_ID_82571EB_FIBER:
4721 		/* Wake events only supported on port A for dual fiber
4722 		 * regardless of eeprom setting */
4723 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4724 		    E1000_STATUS_FUNC_1)
4725 			adapter->wol = 0;
4726 		break;
4727 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4728 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4729 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4730                 /* if quad port adapter, disable WoL on all but port A */
4731 		if (global_quad_port_a != 0)
4732 			adapter->wol = 0;
4733 		/* Reset for multiple quad port adapters */
4734 		if (++global_quad_port_a == 4)
4735 			global_quad_port_a = 0;
4736                 break;
4737 	}
4738 	return;
4739 }
4740 
4741 
4742 /*
4743  * Enable PCI Wake On Lan capability
4744  */
4745 static void
4746 em_enable_wakeup(device_t dev)
4747 {
4748 	struct adapter	*adapter = device_get_softc(dev);
4749 	struct ifnet	*ifp = adapter->ifp;
4750 	u32		pmc, ctrl, ctrl_ext, rctl;
4751 	u16     	status;
4752 
4753 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4754 		return;
4755 
4756 	/* Advertise the wakeup capability */
4757 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4758 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4759 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4760 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4761 
4762 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4763 	    (adapter->hw.mac.type == e1000_pchlan) ||
4764 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4765 	    (adapter->hw.mac.type == e1000_ich10lan))
4766 		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4767 
4768 	/* Keep the laser running on Fiber adapters */
4769 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4770 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4771 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4772 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4773 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4774 	}
4775 
4776 	/*
4777 	** Determine type of Wakeup: note that wol
4778 	** is set with all bits on by default.
4779 	*/
4780 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4781 		adapter->wol &= ~E1000_WUFC_MAG;
4782 
4783 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4784 		adapter->wol &= ~E1000_WUFC_MC;
4785 	else {
4786 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4787 		rctl |= E1000_RCTL_MPE;
4788 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4789 	}
4790 
4791 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4792 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4793 		if (em_enable_phy_wakeup(adapter))
4794 			return;
4795 	} else {
4796 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4797 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4798 	}
4799 
4800 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4801 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4802 
4803         /* Request PME */
4804         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4805 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4806 	if (ifp->if_capenable & IFCAP_WOL)
4807 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4808         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4809 
4810 	return;
4811 }
4812 
4813 /*
4814 ** WOL in the newer chipset interfaces (pchlan)
4815 ** require thing to be copied into the phy
4816 */
4817 static int
4818 em_enable_phy_wakeup(struct adapter *adapter)
4819 {
4820 	struct e1000_hw *hw = &adapter->hw;
4821 	u32 mreg, ret = 0;
4822 	u16 preg;
4823 
4824 	/* copy MAC RARs to PHY RARs */
4825 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4826 
4827 	/* copy MAC MTA to PHY MTA */
4828 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4829 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4830 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4831 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4832 		    (u16)((mreg >> 16) & 0xFFFF));
4833 	}
4834 
4835 	/* configure PHY Rx Control register */
4836 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4837 	mreg = E1000_READ_REG(hw, E1000_RCTL);
4838 	if (mreg & E1000_RCTL_UPE)
4839 		preg |= BM_RCTL_UPE;
4840 	if (mreg & E1000_RCTL_MPE)
4841 		preg |= BM_RCTL_MPE;
4842 	preg &= ~(BM_RCTL_MO_MASK);
4843 	if (mreg & E1000_RCTL_MO_3)
4844 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4845 				<< BM_RCTL_MO_SHIFT);
4846 	if (mreg & E1000_RCTL_BAM)
4847 		preg |= BM_RCTL_BAM;
4848 	if (mreg & E1000_RCTL_PMCF)
4849 		preg |= BM_RCTL_PMCF;
4850 	mreg = E1000_READ_REG(hw, E1000_CTRL);
4851 	if (mreg & E1000_CTRL_RFCE)
4852 		preg |= BM_RCTL_RFCE;
4853 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4854 
4855 	/* enable PHY wakeup in MAC register */
4856 	E1000_WRITE_REG(hw, E1000_WUC,
4857 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4858 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4859 
4860 	/* configure and enable PHY wakeup in PHY registers */
4861 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4862 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4863 
4864 	/* activate PHY wakeup */
4865 	ret = hw->phy.ops.acquire(hw);
4866 	if (ret) {
4867 		printf("Could not acquire PHY\n");
4868 		return ret;
4869 	}
4870 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4871 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4872 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4873 	if (ret) {
4874 		printf("Could not read PHY page 769\n");
4875 		goto out;
4876 	}
4877 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4878 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4879 	if (ret)
4880 		printf("Could not set PHY Host Wakeup bit\n");
4881 out:
4882 	hw->phy.ops.release(hw);
4883 
4884 	return ret;
4885 }
4886 
4887 static void
4888 em_led_func(void *arg, int onoff)
4889 {
4890 	struct adapter	*adapter = arg;
4891 
4892 	EM_CORE_LOCK(adapter);
4893 	if (onoff) {
4894 		e1000_setup_led(&adapter->hw);
4895 		e1000_led_on(&adapter->hw);
4896 	} else {
4897 		e1000_led_off(&adapter->hw);
4898 		e1000_cleanup_led(&adapter->hw);
4899 	}
4900 	EM_CORE_UNLOCK(adapter);
4901 }
4902 
4903 /*
4904 ** Disable the L0S and L1 LINK states
4905 */
4906 static void
4907 em_disable_aspm(struct adapter *adapter)
4908 {
4909 	int		base, reg;
4910 	u16		link_cap,link_ctrl;
4911 	device_t	dev = adapter->dev;
4912 
4913 	switch (adapter->hw.mac.type) {
4914 		case e1000_82573:
4915 		case e1000_82574:
4916 		case e1000_82583:
4917 			break;
4918 		default:
4919 			return;
4920 	}
4921 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
4922 		return;
4923 	reg = base + PCIR_EXPRESS_LINK_CAP;
4924 	link_cap = pci_read_config(dev, reg, 2);
4925 	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4926 		return;
4927 	reg = base + PCIR_EXPRESS_LINK_CTL;
4928 	link_ctrl = pci_read_config(dev, reg, 2);
4929 	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4930 	pci_write_config(dev, reg, link_ctrl, 2);
4931 	return;
4932 }
4933 
4934 /**********************************************************************
4935  *
4936  *  Update the board statistics counters.
4937  *
4938  **********************************************************************/
4939 static void
4940 em_update_stats_counters(struct adapter *adapter)
4941 {
4942 	struct ifnet   *ifp;
4943 
4944 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4945 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4946 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4947 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4948 	}
4949 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4950 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4951 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4952 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4953 
4954 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4955 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4956 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4957 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4958 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4959 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4960 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4961 	/*
4962 	** For watchdog management we need to know if we have been
4963 	** paused during the last interval, so capture that here.
4964 	*/
4965 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4966 	adapter->stats.xoffrxc += adapter->pause_frames;
4967 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4968 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4969 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4970 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4971 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4972 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4973 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4974 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4975 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4976 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4977 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4978 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4979 
4980 	/* For the 64-bit byte counters the low dword must be read first. */
4981 	/* Both registers clear on the read of the high dword */
4982 
4983 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4984 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4985 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4986 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4987 
4988 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4989 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4990 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4991 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4992 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4993 
4994 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4995 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4996 
4997 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4998 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4999 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5000 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5001 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5002 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5003 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5004 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5005 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5006 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5007 
5008 	/* Interrupt Counts */
5009 
5010 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5011 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5012 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5013 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5014 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5015 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5016 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5017 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5018 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5019 
5020 	if (adapter->hw.mac.type >= e1000_82543) {
5021 		adapter->stats.algnerrc +=
5022 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5023 		adapter->stats.rxerrc +=
5024 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5025 		adapter->stats.tncrs +=
5026 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5027 		adapter->stats.cexterr +=
5028 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5029 		adapter->stats.tsctc +=
5030 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5031 		adapter->stats.tsctfc +=
5032 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5033 	}
5034 	ifp = adapter->ifp;
5035 
5036 	ifp->if_collisions = adapter->stats.colc;
5037 
5038 	/* Rx Errors */
5039 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5040 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5041 	    adapter->stats.ruc + adapter->stats.roc +
5042 	    adapter->stats.mpc + adapter->stats.cexterr;
5043 
5044 	/* Tx Errors */
5045 	ifp->if_oerrors = adapter->stats.ecol +
5046 	    adapter->stats.latecol + adapter->watchdog_events;
5047 }
5048 
5049 /* Export a single 32-bit register via a read-only sysctl. */
5050 static int
5051 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5052 {
5053 	struct adapter *adapter;
5054 	u_int val;
5055 
5056 	adapter = oidp->oid_arg1;
5057 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5058 	return (sysctl_handle_int(oidp, &val, 0, req));
5059 }
5060 
5061 /*
5062  * Add sysctl variables, one per statistic, to the system.
5063  */
5064 static void
5065 em_add_hw_stats(struct adapter *adapter)
5066 {
5067 	device_t dev = adapter->dev;
5068 
5069 	struct tx_ring *txr = adapter->tx_rings;
5070 	struct rx_ring *rxr = adapter->rx_rings;
5071 
5072 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5073 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5074 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5075 	struct e1000_hw_stats *stats = &adapter->stats;
5076 
5077 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5078 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5079 
5080 #define QUEUE_NAME_LEN 32
5081 	char namebuf[QUEUE_NAME_LEN];
5082 
5083 	/* Driver Statistics */
5084 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5085 			CTLFLAG_RD, &adapter->link_irq,
5086 			"Link MSIX IRQ Handled");
5087 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5088 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5089 			 "Std mbuf failed");
5090 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5091 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5092 			 "Std mbuf cluster failed");
5093 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5094 			CTLFLAG_RD, &adapter->dropped_pkts,
5095 			"Driver dropped packets");
5096 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5097 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5098 			"Driver tx dma failure in xmit");
5099 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5100 			CTLFLAG_RD, &adapter->rx_overruns,
5101 			"RX overruns");
5102 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5103 			CTLFLAG_RD, &adapter->watchdog_events,
5104 			"Watchdog timeouts");
5105 
5106 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5107 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5108 			em_sysctl_reg_handler, "IU",
5109 			"Device Control Register");
5110 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5111 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5112 			em_sysctl_reg_handler, "IU",
5113 			"Receiver Control Register");
5114 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5115 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5116 			"Flow Control High Watermark");
5117 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5118 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5119 			"Flow Control Low Watermark");
5120 
5121 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5122 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5123 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5124 					    CTLFLAG_RD, NULL, "Queue Name");
5125 		queue_list = SYSCTL_CHILDREN(queue_node);
5126 
5127 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5128 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5129 				E1000_TDH(txr->me),
5130 				em_sysctl_reg_handler, "IU",
5131  				"Transmit Descriptor Head");
5132 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5133 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5134 				E1000_TDT(txr->me),
5135 				em_sysctl_reg_handler, "IU",
5136  				"Transmit Descriptor Tail");
5137 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5138 				CTLFLAG_RD, &txr->tx_irq,
5139 				"Queue MSI-X Transmit Interrupts");
5140 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5141 				CTLFLAG_RD, &txr->no_desc_avail,
5142 				"Queue No Descriptor Available");
5143 
5144 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5145 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5146 				E1000_RDH(rxr->me),
5147 				em_sysctl_reg_handler, "IU",
5148 				"Receive Descriptor Head");
5149 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5150 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5151 				E1000_RDT(rxr->me),
5152 				em_sysctl_reg_handler, "IU",
5153 				"Receive Descriptor Tail");
5154 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5155 				CTLFLAG_RD, &rxr->rx_irq,
5156 				"Queue MSI-X Receive Interrupts");
5157 	}
5158 
5159 	/* MAC stats get their own sub node */
5160 
5161 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5162 				    CTLFLAG_RD, NULL, "Statistics");
5163 	stat_list = SYSCTL_CHILDREN(stat_node);
5164 
5165 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5166 			CTLFLAG_RD, &stats->ecol,
5167 			"Excessive collisions");
5168 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5169 			CTLFLAG_RD, &stats->scc,
5170 			"Single collisions");
5171 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5172 			CTLFLAG_RD, &stats->mcc,
5173 			"Multiple collisions");
5174 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5175 			CTLFLAG_RD, &stats->latecol,
5176 			"Late collisions");
5177 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5178 			CTLFLAG_RD, &stats->colc,
5179 			"Collision Count");
5180 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5181 			CTLFLAG_RD, &adapter->stats.symerrs,
5182 			"Symbol Errors");
5183 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5184 			CTLFLAG_RD, &adapter->stats.sec,
5185 			"Sequence Errors");
5186 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5187 			CTLFLAG_RD, &adapter->stats.dc,
5188 			"Defer Count");
5189 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5190 			CTLFLAG_RD, &adapter->stats.mpc,
5191 			"Missed Packets");
5192 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5193 			CTLFLAG_RD, &adapter->stats.rnbc,
5194 			"Receive No Buffers");
5195 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5196 			CTLFLAG_RD, &adapter->stats.ruc,
5197 			"Receive Undersize");
5198 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5199 			CTLFLAG_RD, &adapter->stats.rfc,
5200 			"Fragmented Packets Received ");
5201 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5202 			CTLFLAG_RD, &adapter->stats.roc,
5203 			"Oversized Packets Received");
5204 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5205 			CTLFLAG_RD, &adapter->stats.rjc,
5206 			"Recevied Jabber");
5207 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5208 			CTLFLAG_RD, &adapter->stats.rxerrc,
5209 			"Receive Errors");
5210 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5211 			CTLFLAG_RD, &adapter->stats.crcerrs,
5212 			"CRC errors");
5213 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5214 			CTLFLAG_RD, &adapter->stats.algnerrc,
5215 			"Alignment Errors");
5216 	/* On 82575 these are collision counts */
5217 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5218 			CTLFLAG_RD, &adapter->stats.cexterr,
5219 			"Collision/Carrier extension errors");
5220 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5221 			CTLFLAG_RD, &adapter->stats.xonrxc,
5222 			"XON Received");
5223 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5224 			CTLFLAG_RD, &adapter->stats.xontxc,
5225 			"XON Transmitted");
5226 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5227 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5228 			"XOFF Received");
5229 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5230 			CTLFLAG_RD, &adapter->stats.xofftxc,
5231 			"XOFF Transmitted");
5232 
5233 	/* Packet Reception Stats */
5234 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5235 			CTLFLAG_RD, &adapter->stats.tpr,
5236 			"Total Packets Received ");
5237 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5238 			CTLFLAG_RD, &adapter->stats.gprc,
5239 			"Good Packets Received");
5240 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5241 			CTLFLAG_RD, &adapter->stats.bprc,
5242 			"Broadcast Packets Received");
5243 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5244 			CTLFLAG_RD, &adapter->stats.mprc,
5245 			"Multicast Packets Received");
5246 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5247 			CTLFLAG_RD, &adapter->stats.prc64,
5248 			"64 byte frames received ");
5249 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5250 			CTLFLAG_RD, &adapter->stats.prc127,
5251 			"65-127 byte frames received");
5252 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5253 			CTLFLAG_RD, &adapter->stats.prc255,
5254 			"128-255 byte frames received");
5255 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5256 			CTLFLAG_RD, &adapter->stats.prc511,
5257 			"256-511 byte frames received");
5258 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5259 			CTLFLAG_RD, &adapter->stats.prc1023,
5260 			"512-1023 byte frames received");
5261 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5262 			CTLFLAG_RD, &adapter->stats.prc1522,
5263 			"1023-1522 byte frames received");
5264  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5265  			CTLFLAG_RD, &adapter->stats.gorc,
5266  			"Good Octets Received");
5267 
5268 	/* Packet Transmission Stats */
5269  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5270  			CTLFLAG_RD, &adapter->stats.gotc,
5271  			"Good Octets Transmitted");
5272 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5273 			CTLFLAG_RD, &adapter->stats.tpt,
5274 			"Total Packets Transmitted");
5275 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5276 			CTLFLAG_RD, &adapter->stats.gptc,
5277 			"Good Packets Transmitted");
5278 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5279 			CTLFLAG_RD, &adapter->stats.bptc,
5280 			"Broadcast Packets Transmitted");
5281 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5282 			CTLFLAG_RD, &adapter->stats.mptc,
5283 			"Multicast Packets Transmitted");
5284 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5285 			CTLFLAG_RD, &adapter->stats.ptc64,
5286 			"64 byte frames transmitted ");
5287 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5288 			CTLFLAG_RD, &adapter->stats.ptc127,
5289 			"65-127 byte frames transmitted");
5290 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5291 			CTLFLAG_RD, &adapter->stats.ptc255,
5292 			"128-255 byte frames transmitted");
5293 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5294 			CTLFLAG_RD, &adapter->stats.ptc511,
5295 			"256-511 byte frames transmitted");
5296 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5297 			CTLFLAG_RD, &adapter->stats.ptc1023,
5298 			"512-1023 byte frames transmitted");
5299 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5300 			CTLFLAG_RD, &adapter->stats.ptc1522,
5301 			"1024-1522 byte frames transmitted");
5302 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5303 			CTLFLAG_RD, &adapter->stats.tsctc,
5304 			"TSO Contexts Transmitted");
5305 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5306 			CTLFLAG_RD, &adapter->stats.tsctfc,
5307 			"TSO Contexts Failed");
5308 
5309 
5310 	/* Interrupt Stats */
5311 
5312 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5313 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5314 	int_list = SYSCTL_CHILDREN(int_node);
5315 
5316 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5317 			CTLFLAG_RD, &adapter->stats.iac,
5318 			"Interrupt Assertion Count");
5319 
5320 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5321 			CTLFLAG_RD, &adapter->stats.icrxptc,
5322 			"Interrupt Cause Rx Pkt Timer Expire Count");
5323 
5324 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5325 			CTLFLAG_RD, &adapter->stats.icrxatc,
5326 			"Interrupt Cause Rx Abs Timer Expire Count");
5327 
5328 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5329 			CTLFLAG_RD, &adapter->stats.ictxptc,
5330 			"Interrupt Cause Tx Pkt Timer Expire Count");
5331 
5332 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5333 			CTLFLAG_RD, &adapter->stats.ictxatc,
5334 			"Interrupt Cause Tx Abs Timer Expire Count");
5335 
5336 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5337 			CTLFLAG_RD, &adapter->stats.ictxqec,
5338 			"Interrupt Cause Tx Queue Empty Count");
5339 
5340 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5341 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5342 			"Interrupt Cause Tx Queue Min Thresh Count");
5343 
5344 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5345 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5346 			"Interrupt Cause Rx Desc Min Thresh Count");
5347 
5348 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5349 			CTLFLAG_RD, &adapter->stats.icrxoc,
5350 			"Interrupt Cause Receiver Overrun Count");
5351 }
5352 
5353 /**********************************************************************
5354  *
5355  *  This routine provides a way to dump out the adapter eeprom,
5356  *  often a useful debug/service tool. This only dumps the first
5357  *  32 words, stuff that matters is in that extent.
5358  *
5359  **********************************************************************/
5360 static int
5361 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5362 {
5363 	struct adapter *adapter;
5364 	int error;
5365 	int result;
5366 
5367 	result = -1;
5368 	error = sysctl_handle_int(oidp, &result, 0, req);
5369 
5370 	if (error || !req->newptr)
5371 		return (error);
5372 
5373 	/*
5374 	 * This value will cause a hex dump of the
5375 	 * first 32 16-bit words of the EEPROM to
5376 	 * the screen.
5377 	 */
5378 	if (result == 1) {
5379 		adapter = (struct adapter *)arg1;
5380 		em_print_nvm_info(adapter);
5381         }
5382 
5383 	return (error);
5384 }
5385 
5386 static void
5387 em_print_nvm_info(struct adapter *adapter)
5388 {
5389 	u16	eeprom_data;
5390 	int	i, j, row = 0;
5391 
5392 	/* Its a bit crude, but it gets the job done */
5393 	printf("\nInterface EEPROM Dump:\n");
5394 	printf("Offset\n0x0000  ");
5395 	for (i = 0, j = 0; i < 32; i++, j++) {
5396 		if (j == 8) { /* Make the offset block */
5397 			j = 0; ++row;
5398 			printf("\n0x00%x0  ",row);
5399 		}
5400 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5401 		printf("%04x ", eeprom_data);
5402 	}
5403 	printf("\n");
5404 }
5405 
5406 static int
5407 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5408 {
5409 	struct em_int_delay_info *info;
5410 	struct adapter *adapter;
5411 	u32 regval;
5412 	int error, usecs, ticks;
5413 
5414 	info = (struct em_int_delay_info *)arg1;
5415 	usecs = info->value;
5416 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5417 	if (error != 0 || req->newptr == NULL)
5418 		return (error);
5419 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5420 		return (EINVAL);
5421 	info->value = usecs;
5422 	ticks = EM_USECS_TO_TICKS(usecs);
5423 
5424 	adapter = info->adapter;
5425 
5426 	EM_CORE_LOCK(adapter);
5427 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5428 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5429 	/* Handle a few special cases. */
5430 	switch (info->offset) {
5431 	case E1000_RDTR:
5432 		break;
5433 	case E1000_TIDV:
5434 		if (ticks == 0) {
5435 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5436 			/* Don't write 0 into the TIDV register. */
5437 			regval++;
5438 		} else
5439 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5440 		break;
5441 	}
5442 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5443 	EM_CORE_UNLOCK(adapter);
5444 	return (0);
5445 }
5446 
5447 static void
5448 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5449 	const char *description, struct em_int_delay_info *info,
5450 	int offset, int value)
5451 {
5452 	info->adapter = adapter;
5453 	info->offset = offset;
5454 	info->value = value;
5455 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5456 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5457 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5458 	    info, 0, em_sysctl_int_delay, "I", description);
5459 }
5460 
5461 static void
5462 em_set_sysctl_value(struct adapter *adapter, const char *name,
5463 	const char *description, int *limit, int value)
5464 {
5465 	*limit = value;
5466 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5467 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5468 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5469 }
5470 
5471 static int
5472 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5473 {
5474 	struct adapter *adapter;
5475 	int error;
5476 	int result;
5477 
5478 	result = -1;
5479 	error = sysctl_handle_int(oidp, &result, 0, req);
5480 
5481 	if (error || !req->newptr)
5482 		return (error);
5483 
5484 	if (result == 1) {
5485 		adapter = (struct adapter *)arg1;
5486 		em_print_debug_info(adapter);
5487         }
5488 
5489 	return (error);
5490 }
5491 
5492 /*
5493 ** This routine is meant to be fluid, add whatever is
5494 ** needed for debugging a problem.  -jfv
5495 */
5496 static void
5497 em_print_debug_info(struct adapter *adapter)
5498 {
5499 	device_t dev = adapter->dev;
5500 	struct tx_ring *txr = adapter->tx_rings;
5501 	struct rx_ring *rxr = adapter->rx_rings;
5502 
5503 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5504 		printf("Interface is RUNNING ");
5505 	else
5506 		printf("Interface is NOT RUNNING\n");
5507 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5508 		printf("and ACTIVE\n");
5509 	else
5510 		printf("and INACTIVE\n");
5511 
5512 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5513 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5514 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5515 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5516 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5517 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5518 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5519 	device_printf(dev, "TX descriptors avail = %d\n",
5520 	    txr->tx_avail);
5521 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5522 	    txr->no_desc_avail);
5523 	device_printf(dev, "RX discarded packets = %ld\n",
5524 	    rxr->rx_discarded);
5525 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5526 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5527 }
5528