xref: /freebsd/sys/dev/e1000/if_em.c (revision b2db760808f74bb53c232900091c9da801ebbfcc)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2010, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60 
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67 
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70 
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83 
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87 
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int	em_display_debug_stats = 0;
92 
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.0.5";
97 
98 
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108 
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111 	/* Intel(R) PRO/1000 Network Connection */
112 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116 						PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131 
132 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137 						PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	/* required last entry */
173 	{ 0, 0, 0, 0, 0}
174 };
175 
176 /*********************************************************************
177  *  Table of branding strings for all supported NICs.
178  *********************************************************************/
179 
180 static char *em_strings[] = {
181 	"Intel(R) PRO/1000 Network Connection"
182 };
183 
184 /*********************************************************************
185  *  Function prototypes
186  *********************************************************************/
187 static int	em_probe(device_t);
188 static int	em_attach(device_t);
189 static int	em_detach(device_t);
190 static int	em_shutdown(device_t);
191 static int	em_suspend(device_t);
192 static int	em_resume(device_t);
193 static void	em_start(struct ifnet *);
194 static void	em_start_locked(struct ifnet *, struct tx_ring *);
195 #ifdef EM_MULTIQUEUE
196 static int	em_mq_start(struct ifnet *, struct mbuf *);
197 static int	em_mq_start_locked(struct ifnet *,
198 		    struct tx_ring *, struct mbuf *);
199 static void	em_qflush(struct ifnet *);
200 #endif
201 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202 static void	em_init(void *);
203 static void	em_init_locked(struct adapter *);
204 static void	em_stop(void *);
205 static void	em_media_status(struct ifnet *, struct ifmediareq *);
206 static int	em_media_change(struct ifnet *);
207 static void	em_identify_hardware(struct adapter *);
208 static int	em_allocate_pci_resources(struct adapter *);
209 static int	em_allocate_legacy(struct adapter *);
210 static int	em_allocate_msix(struct adapter *);
211 static int	em_allocate_queues(struct adapter *);
212 static int	em_setup_msix(struct adapter *);
213 static void	em_free_pci_resources(struct adapter *);
214 static void	em_local_timer(void *);
215 static void	em_reset(struct adapter *);
216 static void	em_setup_interface(device_t, struct adapter *);
217 
218 static void	em_setup_transmit_structures(struct adapter *);
219 static void	em_initialize_transmit_unit(struct adapter *);
220 static int	em_allocate_transmit_buffers(struct tx_ring *);
221 static void	em_free_transmit_structures(struct adapter *);
222 static void	em_free_transmit_buffers(struct tx_ring *);
223 
224 static int	em_setup_receive_structures(struct adapter *);
225 static int	em_allocate_receive_buffers(struct rx_ring *);
226 static void	em_initialize_receive_unit(struct adapter *);
227 static void	em_free_receive_structures(struct adapter *);
228 static void	em_free_receive_buffers(struct rx_ring *);
229 
230 static void	em_enable_intr(struct adapter *);
231 static void	em_disable_intr(struct adapter *);
232 static void	em_update_stats_counters(struct adapter *);
233 static void	em_add_hw_stats(struct adapter *adapter);
234 static bool	em_txeof(struct tx_ring *);
235 static bool	em_rxeof(struct rx_ring *, int, int *);
236 #ifndef __NO_STRICT_ALIGNMENT
237 static int	em_fixup_rx(struct rx_ring *);
238 #endif
239 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
240 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
241 		    u32 *, u32 *);
242 static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
243 static void	em_set_promisc(struct adapter *);
244 static void	em_disable_promisc(struct adapter *);
245 static void	em_set_multi(struct adapter *);
246 static void	em_update_link_status(struct adapter *);
247 static void	em_refresh_mbufs(struct rx_ring *, int);
248 static void	em_register_vlan(void *, struct ifnet *, u16);
249 static void	em_unregister_vlan(void *, struct ifnet *, u16);
250 static void	em_setup_vlan_hw_support(struct adapter *);
251 static int	em_xmit(struct tx_ring *, struct mbuf **);
252 static int	em_dma_malloc(struct adapter *, bus_size_t,
253 		    struct em_dma_alloc *, int);
254 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
256 static void	em_print_nvm_info(struct adapter *);
257 static int 	em_is_valid_ether_addr(u8 *);
258 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
259 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
260 		    const char *, struct em_int_delay_info *, int, int);
261 /* Management and WOL Support */
262 static void	em_init_manageability(struct adapter *);
263 static void	em_release_manageability(struct adapter *);
264 static void     em_get_hw_control(struct adapter *);
265 static void     em_release_hw_control(struct adapter *);
266 static void	em_get_wakeup(device_t);
267 static void     em_enable_wakeup(device_t);
268 static int	em_enable_phy_wakeup(struct adapter *);
269 static void	em_led_func(void *, int);
270 
271 static int	em_irq_fast(void *);
272 
273 /* MSIX handlers */
274 static void	em_msix_tx(void *);
275 static void	em_msix_rx(void *);
276 static void	em_msix_link(void *);
277 static void	em_handle_tx(void *context, int pending);
278 static void	em_handle_rx(void *context, int pending);
279 static void	em_handle_link(void *context, int pending);
280 
281 static void	em_add_rx_process_limit(struct adapter *, const char *,
282 		    const char *, int *, int);
283 
284 #ifdef DEVICE_POLLING
285 static poll_handler_t em_poll;
286 #endif /* POLLING */
287 
288 /*********************************************************************
289  *  FreeBSD Device Interface Entry Points
290  *********************************************************************/
291 
292 static device_method_t em_methods[] = {
293 	/* Device interface */
294 	DEVMETHOD(device_probe, em_probe),
295 	DEVMETHOD(device_attach, em_attach),
296 	DEVMETHOD(device_detach, em_detach),
297 	DEVMETHOD(device_shutdown, em_shutdown),
298 	DEVMETHOD(device_suspend, em_suspend),
299 	DEVMETHOD(device_resume, em_resume),
300 	{0, 0}
301 };
302 
303 static driver_t em_driver = {
304 	"em", em_methods, sizeof(struct adapter),
305 };
306 
307 devclass_t em_devclass;
308 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
309 MODULE_DEPEND(em, pci, 1, 1, 1);
310 MODULE_DEPEND(em, ether, 1, 1, 1);
311 
312 /*********************************************************************
313  *  Tunable default values.
314  *********************************************************************/
315 
316 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
317 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
318 #define M_TSO_LEN			66
319 
320 /* Allow common code without TSO */
321 #ifndef CSUM_TSO
322 #define CSUM_TSO	0
323 #endif
324 
325 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
326 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
327 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
328 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
329 
330 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
331 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
332 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
333 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
334 
335 static int em_rxd = EM_DEFAULT_RXD;
336 static int em_txd = EM_DEFAULT_TXD;
337 TUNABLE_INT("hw.em.rxd", &em_rxd);
338 TUNABLE_INT("hw.em.txd", &em_txd);
339 
340 static int em_smart_pwr_down = FALSE;
341 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
342 
343 /* Controls whether promiscuous also shows bad packets */
344 static int em_debug_sbp = FALSE;
345 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
346 
347 /* Local controls for MSI/MSIX */
348 #ifdef EM_MULTIQUEUE
349 static int em_enable_msix = TRUE;
350 static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
351 #else
352 static int em_enable_msix = FALSE;
353 static int em_msix_queues = 0; /* disable */
354 #endif
355 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
356 TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
357 
358 /* How many packets rxeof tries to clean at a time */
359 static int em_rx_process_limit = 100;
360 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
361 
362 /* Flow control setting - default to FULL */
363 static int em_fc_setting = e1000_fc_full;
364 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
365 
366 /*
367 ** Shadow VFTA table, this is needed because
368 ** the real vlan filter table gets cleared during
369 ** a soft reset and the driver needs to be able
370 ** to repopulate it.
371 */
372 static u32 em_shadow_vfta[EM_VFTA_SIZE];
373 
374 /* Global used in WOL setup with multiport cards */
375 static int global_quad_port_a = 0;
376 
377 /*********************************************************************
378  *  Device identification routine
379  *
380  *  em_probe determines if the driver should be loaded on
381  *  adapter based on PCI vendor/device id of the adapter.
382  *
383  *  return BUS_PROBE_DEFAULT on success, positive on failure
384  *********************************************************************/
385 
386 static int
387 em_probe(device_t dev)
388 {
389 	char		adapter_name[60];
390 	u16		pci_vendor_id = 0;
391 	u16		pci_device_id = 0;
392 	u16		pci_subvendor_id = 0;
393 	u16		pci_subdevice_id = 0;
394 	em_vendor_info_t *ent;
395 
396 	INIT_DEBUGOUT("em_probe: begin");
397 
398 	pci_vendor_id = pci_get_vendor(dev);
399 	if (pci_vendor_id != EM_VENDOR_ID)
400 		return (ENXIO);
401 
402 	pci_device_id = pci_get_device(dev);
403 	pci_subvendor_id = pci_get_subvendor(dev);
404 	pci_subdevice_id = pci_get_subdevice(dev);
405 
406 	ent = em_vendor_info_array;
407 	while (ent->vendor_id != 0) {
408 		if ((pci_vendor_id == ent->vendor_id) &&
409 		    (pci_device_id == ent->device_id) &&
410 
411 		    ((pci_subvendor_id == ent->subvendor_id) ||
412 		    (ent->subvendor_id == PCI_ANY_ID)) &&
413 
414 		    ((pci_subdevice_id == ent->subdevice_id) ||
415 		    (ent->subdevice_id == PCI_ANY_ID))) {
416 			sprintf(adapter_name, "%s %s",
417 				em_strings[ent->index],
418 				em_driver_version);
419 			device_set_desc_copy(dev, adapter_name);
420 			return (BUS_PROBE_DEFAULT);
421 		}
422 		ent++;
423 	}
424 
425 	return (ENXIO);
426 }
427 
428 /*********************************************************************
429  *  Device initialization routine
430  *
431  *  The attach entry point is called when the driver is being loaded.
432  *  This routine identifies the type of hardware, allocates all resources
433  *  and initializes the hardware.
434  *
435  *  return 0 on success, positive on failure
436  *********************************************************************/
437 
438 static int
439 em_attach(device_t dev)
440 {
441 	struct adapter	*adapter;
442 	int		error = 0;
443 
444 	INIT_DEBUGOUT("em_attach: begin");
445 
446 	adapter = device_get_softc(dev);
447 	adapter->dev = adapter->osdep.dev = dev;
448 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
449 
450 	/* SYSCTL stuff */
451 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
452 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
453 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
454 	    em_sysctl_nvm_info, "I", "NVM Information");
455 
456 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
457 
458 	/* Determine hardware and mac info */
459 	em_identify_hardware(adapter);
460 
461 	/* Setup PCI resources */
462 	if (em_allocate_pci_resources(adapter)) {
463 		device_printf(dev, "Allocation of PCI resources failed\n");
464 		error = ENXIO;
465 		goto err_pci;
466 	}
467 
468 	/*
469 	** For ICH8 and family we need to
470 	** map the flash memory, and this
471 	** must happen after the MAC is
472 	** identified
473 	*/
474 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
475 	    (adapter->hw.mac.type == e1000_pchlan) ||
476 	    (adapter->hw.mac.type == e1000_ich9lan) ||
477 	    (adapter->hw.mac.type == e1000_ich10lan)) {
478 		int rid = EM_BAR_TYPE_FLASH;
479 		adapter->flash = bus_alloc_resource_any(dev,
480 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
481 		if (adapter->flash == NULL) {
482 			device_printf(dev, "Mapping of Flash failed\n");
483 			error = ENXIO;
484 			goto err_pci;
485 		}
486 		/* This is used in the shared code */
487 		adapter->hw.flash_address = (u8 *)adapter->flash;
488 		adapter->osdep.flash_bus_space_tag =
489 		    rman_get_bustag(adapter->flash);
490 		adapter->osdep.flash_bus_space_handle =
491 		    rman_get_bushandle(adapter->flash);
492 	}
493 
494 	/* Do Shared Code initialization */
495 	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496 		device_printf(dev, "Setup of Shared code failed\n");
497 		error = ENXIO;
498 		goto err_pci;
499 	}
500 
501 	e1000_get_bus_info(&adapter->hw);
502 
503 	/* Set up some sysctls for the tunable interrupt delays */
504 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
505 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
506 	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
508 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509 	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511 	    "receive interrupt delay limit in usecs",
512 	    &adapter->rx_abs_int_delay,
513 	    E1000_REGISTER(&adapter->hw, E1000_RADV),
514 	    em_rx_abs_int_delay_dflt);
515 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516 	    "transmit interrupt delay limit in usecs",
517 	    &adapter->tx_abs_int_delay,
518 	    E1000_REGISTER(&adapter->hw, E1000_TADV),
519 	    em_tx_abs_int_delay_dflt);
520 
521 	/* Sysctls for limiting the amount of work done in the taskqueue */
522 	em_add_rx_process_limit(adapter, "rx_processing_limit",
523 	    "max number of rx packets to process", &adapter->rx_process_limit,
524 	    em_rx_process_limit);
525 
526 	/*
527 	 * Validate number of transmit and receive descriptors. It
528 	 * must not exceed hardware maximum, and must be multiple
529 	 * of E1000_DBA_ALIGN.
530 	 */
531 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
532 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
533 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
534 		    EM_DEFAULT_TXD, em_txd);
535 		adapter->num_tx_desc = EM_DEFAULT_TXD;
536 	} else
537 		adapter->num_tx_desc = em_txd;
538 
539 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
540 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
541 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
542 		    EM_DEFAULT_RXD, em_rxd);
543 		adapter->num_rx_desc = EM_DEFAULT_RXD;
544 	} else
545 		adapter->num_rx_desc = em_rxd;
546 
547 	adapter->hw.mac.autoneg = DO_AUTO_NEG;
548 	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
549 	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
550 
551 	/* Copper options */
552 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553 		adapter->hw.phy.mdix = AUTO_ALL_MODES;
554 		adapter->hw.phy.disable_polarity_correction = FALSE;
555 		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
556 	}
557 
558 	/*
559 	 * Set the frame limits assuming
560 	 * standard ethernet sized frames.
561 	 */
562 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
563 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
564 
565 	/*
566 	 * This controls when hardware reports transmit completion
567 	 * status.
568 	 */
569 	adapter->hw.mac.report_tx_early = 1;
570 
571 	/*
572 	** Get queue/ring memory
573 	*/
574 	if (em_allocate_queues(adapter)) {
575 		error = ENOMEM;
576 		goto err_pci;
577 	}
578 
579 	/*
580 	** Start from a known state, this is
581 	** important in reading the nvm and
582 	** mac from that.
583 	*/
584 	e1000_reset_hw(&adapter->hw);
585 
586 	/* Make sure we have a good EEPROM before we read from it */
587 	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
588 		/*
589 		** Some PCI-E parts fail the first check due to
590 		** the link being in sleep state, call it again,
591 		** if it fails a second time its a real issue.
592 		*/
593 		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
594 			device_printf(dev,
595 			    "The EEPROM Checksum Is Not Valid\n");
596 			error = EIO;
597 			goto err_late;
598 		}
599 	}
600 
601 	/* Copy the permanent MAC address out of the EEPROM */
602 	if (e1000_read_mac_addr(&adapter->hw) < 0) {
603 		device_printf(dev, "EEPROM read error while reading MAC"
604 		    " address\n");
605 		error = EIO;
606 		goto err_late;
607 	}
608 
609 	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
610 		device_printf(dev, "Invalid MAC address\n");
611 		error = EIO;
612 		goto err_late;
613 	}
614 
615 	/*
616 	**  Do interrupt configuration
617 	*/
618 	if (adapter->msix > 1) /* Do MSIX */
619 		error = em_allocate_msix(adapter);
620 	else  /* MSI or Legacy */
621 		error = em_allocate_legacy(adapter);
622 	if (error)
623 		goto err_late;
624 
625 	/*
626 	 * Get Wake-on-Lan and Management info for later use
627 	 */
628 	em_get_wakeup(dev);
629 
630 	/* Setup OS specific network interface */
631 	em_setup_interface(dev, adapter);
632 
633 	em_reset(adapter);
634 
635 	/* Initialize statistics */
636 	em_update_stats_counters(adapter);
637 
638 	adapter->hw.mac.get_link_status = 1;
639 	em_update_link_status(adapter);
640 
641 	/* Indicate SOL/IDER usage */
642 	if (e1000_check_reset_block(&adapter->hw))
643 		device_printf(dev,
644 		    "PHY reset is blocked due to SOL/IDER session.\n");
645 
646 	/* Register for VLAN events */
647 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
648 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
649 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
650 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
651 
652 	em_add_hw_stats(adapter);
653 
654 	/* Non-AMT based hardware can now take control from firmware */
655 	if (adapter->has_manage && !adapter->has_amt)
656 		em_get_hw_control(adapter);
657 
658 	/* Tell the stack that the interface is not active */
659 	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
660 
661 	adapter->led_dev = led_create(em_led_func, adapter,
662 	    device_get_nameunit(dev));
663 
664 	INIT_DEBUGOUT("em_attach: end");
665 
666 	return (0);
667 
668 err_late:
669 	em_free_transmit_structures(adapter);
670 	em_free_receive_structures(adapter);
671 	em_release_hw_control(adapter);
672 err_pci:
673 	em_free_pci_resources(adapter);
674 	EM_CORE_LOCK_DESTROY(adapter);
675 
676 	return (error);
677 }
678 
679 /*********************************************************************
680  *  Device removal routine
681  *
682  *  The detach entry point is called when the driver is being removed.
683  *  This routine stops the adapter and deallocates all the resources
684  *  that were allocated for driver operation.
685  *
686  *  return 0 on success, positive on failure
687  *********************************************************************/
688 
689 static int
690 em_detach(device_t dev)
691 {
692 	struct adapter	*adapter = device_get_softc(dev);
693 	struct ifnet	*ifp = adapter->ifp;
694 
695 	INIT_DEBUGOUT("em_detach: begin");
696 
697 	/* Make sure VLANS are not using driver */
698 	if (adapter->ifp->if_vlantrunk != NULL) {
699 		device_printf(dev,"Vlan in use, detach first\n");
700 		return (EBUSY);
701 	}
702 
703 #ifdef DEVICE_POLLING
704 	if (ifp->if_capenable & IFCAP_POLLING)
705 		ether_poll_deregister(ifp);
706 #endif
707 
708 	if (adapter->led_dev != NULL)
709 		led_destroy(adapter->led_dev);
710 
711 	EM_CORE_LOCK(adapter);
712 	adapter->in_detach = 1;
713 	em_stop(adapter);
714 	EM_CORE_UNLOCK(adapter);
715 	EM_CORE_LOCK_DESTROY(adapter);
716 
717 	e1000_phy_hw_reset(&adapter->hw);
718 
719 	em_release_manageability(adapter);
720 	em_release_hw_control(adapter);
721 
722 	/* Unregister VLAN events */
723 	if (adapter->vlan_attach != NULL)
724 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
725 	if (adapter->vlan_detach != NULL)
726 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
727 
728 	ether_ifdetach(adapter->ifp);
729 	callout_drain(&adapter->timer);
730 
731 	em_free_pci_resources(adapter);
732 	bus_generic_detach(dev);
733 	if_free(ifp);
734 
735 	em_free_transmit_structures(adapter);
736 	em_free_receive_structures(adapter);
737 
738 	em_release_hw_control(adapter);
739 
740 	return (0);
741 }
742 
743 /*********************************************************************
744  *
745  *  Shutdown entry point
746  *
747  **********************************************************************/
748 
749 static int
750 em_shutdown(device_t dev)
751 {
752 	return em_suspend(dev);
753 }
754 
755 /*
756  * Suspend/resume device methods.
757  */
758 static int
759 em_suspend(device_t dev)
760 {
761 	struct adapter *adapter = device_get_softc(dev);
762 
763 	EM_CORE_LOCK(adapter);
764 
765         em_release_manageability(adapter);
766 	em_release_hw_control(adapter);
767 	em_enable_wakeup(dev);
768 
769 	EM_CORE_UNLOCK(adapter);
770 
771 	return bus_generic_suspend(dev);
772 }
773 
774 static int
775 em_resume(device_t dev)
776 {
777 	struct adapter *adapter = device_get_softc(dev);
778 	struct ifnet *ifp = adapter->ifp;
779 
780 	EM_CORE_LOCK(adapter);
781 	em_init_locked(adapter);
782 	em_init_manageability(adapter);
783 	EM_CORE_UNLOCK(adapter);
784 	em_start(ifp);
785 
786 	return bus_generic_resume(dev);
787 }
788 
789 
790 /*********************************************************************
791  *  Transmit entry point
792  *
793  *  em_start is called by the stack to initiate a transmit.
794  *  The driver will remain in this routine as long as there are
795  *  packets to transmit and transmit resources are available.
796  *  In case resources are not available stack is notified and
797  *  the packet is requeued.
798  **********************************************************************/
799 
800 #ifdef EM_MULTIQUEUE
801 static int
802 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
803 {
804 	struct adapter  *adapter = txr->adapter;
805         struct mbuf     *next;
806         int             err = 0, enq = 0;
807 
808 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
809 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
810 		if (m != NULL)
811 			err = drbr_enqueue(ifp, txr->br, m);
812 		return (err);
813 	}
814 
815         /* Call cleanup if number of TX descriptors low */
816 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
817 		em_txeof(txr);
818 
819 	enq = 0;
820 	if (m == NULL) {
821 		next = drbr_dequeue(ifp, txr->br);
822 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
823 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
824 			return (err);
825 		next = drbr_dequeue(ifp, txr->br);
826 	} else
827 		next = m;
828 
829 	/* Process the queue */
830 	while (next != NULL) {
831 		if ((err = em_xmit(txr, &next)) != 0) {
832                         if (next != NULL)
833                                 err = drbr_enqueue(ifp, txr->br, next);
834                         break;
835 		}
836 		enq++;
837 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
838 		ETHER_BPF_MTAP(ifp, next);
839 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
840                         break;
841 		if (txr->tx_avail < EM_MAX_SCATTER) {
842 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
843 			break;
844 		}
845 		next = drbr_dequeue(ifp, txr->br);
846 	}
847 
848 	if (enq > 0) {
849                 /* Set the watchdog */
850                 txr->watchdog_check = TRUE;
851 		txr->watchdog_time = ticks;
852 	}
853 	return (err);
854 }
855 
856 /*
857 ** Multiqueue capable stack interface, this is not
858 ** yet truely multiqueue, but that is coming...
859 */
860 static int
861 em_mq_start(struct ifnet *ifp, struct mbuf *m)
862 {
863 	struct adapter	*adapter = ifp->if_softc;
864 	struct tx_ring	*txr;
865 	int 		i, error = 0;
866 
867 	/* Which queue to use */
868 	if ((m->m_flags & M_FLOWID) != 0)
869                 i = m->m_pkthdr.flowid % adapter->num_queues;
870 	else
871 		i = curcpu % adapter->num_queues;
872 
873 	txr = &adapter->tx_rings[i];
874 
875 	if (EM_TX_TRYLOCK(txr)) {
876 		error = em_mq_start_locked(ifp, txr, m);
877 		EM_TX_UNLOCK(txr);
878 	} else
879 		error = drbr_enqueue(ifp, txr->br, m);
880 
881 	return (error);
882 }
883 
884 /*
885 ** Flush all ring buffers
886 */
887 static void
888 em_qflush(struct ifnet *ifp)
889 {
890 	struct adapter  *adapter = ifp->if_softc;
891 	struct tx_ring  *txr = adapter->tx_rings;
892 	struct mbuf     *m;
893 
894 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
895 		EM_TX_LOCK(txr);
896 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
897 			m_freem(m);
898 		EM_TX_UNLOCK(txr);
899 	}
900 	if_qflush(ifp);
901 }
902 
903 #endif /* EM_MULTIQUEUE */
904 
905 static void
906 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
907 {
908 	struct adapter	*adapter = ifp->if_softc;
909 	struct mbuf	*m_head;
910 
911 	EM_TX_LOCK_ASSERT(txr);
912 
913 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
914 	    IFF_DRV_RUNNING)
915 		return;
916 
917 	if (!adapter->link_active)
918 		return;
919 
920         /* Call cleanup if number of TX descriptors low */
921 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
922 		em_txeof(txr);
923 
924 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
925 		if (txr->tx_avail < EM_MAX_SCATTER) {
926 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
927 			break;
928 		}
929                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
930 		if (m_head == NULL)
931 			break;
932 		/*
933 		 *  Encapsulation can modify our pointer, and or make it
934 		 *  NULL on failure.  In that event, we can't requeue.
935 		 */
936 		if (em_xmit(txr, &m_head)) {
937 			if (m_head == NULL)
938 				break;
939 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
940 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
941 			break;
942 		}
943 
944 		/* Send a copy of the frame to the BPF listener */
945 		ETHER_BPF_MTAP(ifp, m_head);
946 
947 		/* Set timeout in case hardware has problems transmitting. */
948 		txr->watchdog_time = ticks;
949 		txr->watchdog_check = TRUE;
950 	}
951 
952 	return;
953 }
954 
955 static void
956 em_start(struct ifnet *ifp)
957 {
958 	struct adapter	*adapter = ifp->if_softc;
959 	struct tx_ring	*txr = adapter->tx_rings;
960 
961 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
962 		EM_TX_LOCK(txr);
963 		em_start_locked(ifp, txr);
964 		EM_TX_UNLOCK(txr);
965 	}
966 	return;
967 }
968 
969 /*********************************************************************
970  *  Ioctl entry point
971  *
972  *  em_ioctl is called when the user wants to configure the
973  *  interface.
974  *
975  *  return 0 on success, positive on failure
976  **********************************************************************/
977 
978 static int
979 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
980 {
981 	struct adapter	*adapter = ifp->if_softc;
982 	struct ifreq *ifr = (struct ifreq *)data;
983 #ifdef INET
984 	struct ifaddr *ifa = (struct ifaddr *)data;
985 #endif
986 	int error = 0;
987 
988 	if (adapter->in_detach)
989 		return (error);
990 
991 	switch (command) {
992 	case SIOCSIFADDR:
993 #ifdef INET
994 		if (ifa->ifa_addr->sa_family == AF_INET) {
995 			/*
996 			 * XXX
997 			 * Since resetting hardware takes a very long time
998 			 * and results in link renegotiation we only
999 			 * initialize the hardware only when it is absolutely
1000 			 * required.
1001 			 */
1002 			ifp->if_flags |= IFF_UP;
1003 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1004 				EM_CORE_LOCK(adapter);
1005 				em_init_locked(adapter);
1006 				EM_CORE_UNLOCK(adapter);
1007 			}
1008 			arp_ifinit(ifp, ifa);
1009 		} else
1010 #endif
1011 			error = ether_ioctl(ifp, command, data);
1012 		break;
1013 	case SIOCSIFMTU:
1014 	    {
1015 		int max_frame_size;
1016 
1017 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1018 
1019 		EM_CORE_LOCK(adapter);
1020 		switch (adapter->hw.mac.type) {
1021 		case e1000_82571:
1022 		case e1000_82572:
1023 		case e1000_ich9lan:
1024 		case e1000_ich10lan:
1025 		case e1000_82574:
1026 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1027 			max_frame_size = 9234;
1028 			break;
1029 		case e1000_pchlan:
1030 			max_frame_size = 4096;
1031 			break;
1032 			/* Adapters that do not support jumbo frames */
1033 		case e1000_82583:
1034 		case e1000_ich8lan:
1035 			max_frame_size = ETHER_MAX_LEN;
1036 			break;
1037 		default:
1038 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1039 		}
1040 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1041 		    ETHER_CRC_LEN) {
1042 			EM_CORE_UNLOCK(adapter);
1043 			error = EINVAL;
1044 			break;
1045 		}
1046 
1047 		ifp->if_mtu = ifr->ifr_mtu;
1048 		adapter->max_frame_size =
1049 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1050 		em_init_locked(adapter);
1051 		EM_CORE_UNLOCK(adapter);
1052 		break;
1053 	    }
1054 	case SIOCSIFFLAGS:
1055 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1056 		    SIOCSIFFLAGS (Set Interface Flags)");
1057 		EM_CORE_LOCK(adapter);
1058 		if (ifp->if_flags & IFF_UP) {
1059 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1060 				if ((ifp->if_flags ^ adapter->if_flags) &
1061 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1062 					em_disable_promisc(adapter);
1063 					em_set_promisc(adapter);
1064 				}
1065 			} else
1066 				em_init_locked(adapter);
1067 		} else
1068 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1069 				em_stop(adapter);
1070 		adapter->if_flags = ifp->if_flags;
1071 		EM_CORE_UNLOCK(adapter);
1072 		break;
1073 	case SIOCADDMULTI:
1074 	case SIOCDELMULTI:
1075 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1076 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1077 			EM_CORE_LOCK(adapter);
1078 			em_disable_intr(adapter);
1079 			em_set_multi(adapter);
1080 #ifdef DEVICE_POLLING
1081 			if (!(ifp->if_capenable & IFCAP_POLLING))
1082 #endif
1083 				em_enable_intr(adapter);
1084 			EM_CORE_UNLOCK(adapter);
1085 		}
1086 		break;
1087 	case SIOCSIFMEDIA:
1088 		/* Check SOL/IDER usage */
1089 		EM_CORE_LOCK(adapter);
1090 		if (e1000_check_reset_block(&adapter->hw)) {
1091 			EM_CORE_UNLOCK(adapter);
1092 			device_printf(adapter->dev, "Media change is"
1093 			    " blocked due to SOL/IDER session.\n");
1094 			break;
1095 		}
1096 		EM_CORE_UNLOCK(adapter);
1097 	case SIOCGIFMEDIA:
1098 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1099 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1100 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1101 		break;
1102 	case SIOCSIFCAP:
1103 	    {
1104 		int mask, reinit;
1105 
1106 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1107 		reinit = 0;
1108 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1109 #ifdef DEVICE_POLLING
1110 		if (mask & IFCAP_POLLING) {
1111 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1112 				error = ether_poll_register(em_poll, ifp);
1113 				if (error)
1114 					return (error);
1115 				EM_CORE_LOCK(adapter);
1116 				em_disable_intr(adapter);
1117 				ifp->if_capenable |= IFCAP_POLLING;
1118 				EM_CORE_UNLOCK(adapter);
1119 			} else {
1120 				error = ether_poll_deregister(ifp);
1121 				/* Enable interrupt even in error case */
1122 				EM_CORE_LOCK(adapter);
1123 				em_enable_intr(adapter);
1124 				ifp->if_capenable &= ~IFCAP_POLLING;
1125 				EM_CORE_UNLOCK(adapter);
1126 			}
1127 		}
1128 #endif
1129 		if (mask & IFCAP_HWCSUM) {
1130 			ifp->if_capenable ^= IFCAP_HWCSUM;
1131 			reinit = 1;
1132 		}
1133 		if (mask & IFCAP_TSO4) {
1134 			ifp->if_capenable ^= IFCAP_TSO4;
1135 			reinit = 1;
1136 		}
1137 		if (mask & IFCAP_VLAN_HWTAGGING) {
1138 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1139 			reinit = 1;
1140 		}
1141 		if (mask & IFCAP_VLAN_HWFILTER) {
1142 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1143 			reinit = 1;
1144 		}
1145 		if ((mask & IFCAP_WOL) &&
1146 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1147 			if (mask & IFCAP_WOL_MCAST)
1148 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1149 			if (mask & IFCAP_WOL_MAGIC)
1150 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1151 		}
1152 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1153 			em_init(adapter);
1154 		VLAN_CAPABILITIES(ifp);
1155 		break;
1156 	    }
1157 
1158 	default:
1159 		error = ether_ioctl(ifp, command, data);
1160 		break;
1161 	}
1162 
1163 	return (error);
1164 }
1165 
1166 
1167 /*********************************************************************
1168  *  Init entry point
1169  *
1170  *  This routine is used in two ways. It is used by the stack as
1171  *  init entry point in network interface structure. It is also used
1172  *  by the driver as a hw/sw initialization routine to get to a
1173  *  consistent state.
1174  *
1175  *  return 0 on success, positive on failure
1176  **********************************************************************/
1177 
1178 static void
1179 em_init_locked(struct adapter *adapter)
1180 {
1181 	struct ifnet	*ifp = adapter->ifp;
1182 	device_t	dev = adapter->dev;
1183 	u32		pba;
1184 
1185 	INIT_DEBUGOUT("em_init: begin");
1186 
1187 	EM_CORE_LOCK_ASSERT(adapter);
1188 
1189 	em_disable_intr(adapter);
1190 	callout_stop(&adapter->timer);
1191 
1192 	/*
1193 	 * Packet Buffer Allocation (PBA)
1194 	 * Writing PBA sets the receive portion of the buffer
1195 	 * the remainder is used for the transmit buffer.
1196 	 */
1197 	switch (adapter->hw.mac.type) {
1198 	/* Total Packet Buffer on these is 48K */
1199 	case e1000_82571:
1200 	case e1000_82572:
1201 	case e1000_80003es2lan:
1202 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1203 		break;
1204 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1205 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1206 		break;
1207 	case e1000_82574:
1208 	case e1000_82583:
1209 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1210 		break;
1211 	case e1000_ich9lan:
1212 	case e1000_ich10lan:
1213 	case e1000_pchlan:
1214 		pba = E1000_PBA_10K;
1215 		break;
1216 	case e1000_ich8lan:
1217 		pba = E1000_PBA_8K;
1218 		break;
1219 	default:
1220 		if (adapter->max_frame_size > 8192)
1221 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1222 		else
1223 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1224 	}
1225 
1226 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1227 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1228 
1229 	/* Get the latest mac address, User can use a LAA */
1230         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1231               ETHER_ADDR_LEN);
1232 
1233 	/* Put the address into the Receive Address Array */
1234 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1235 
1236 	/*
1237 	 * With the 82571 adapter, RAR[0] may be overwritten
1238 	 * when the other port is reset, we make a duplicate
1239 	 * in RAR[14] for that eventuality, this assures
1240 	 * the interface continues to function.
1241 	 */
1242 	if (adapter->hw.mac.type == e1000_82571) {
1243 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1244 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1245 		    E1000_RAR_ENTRIES - 1);
1246 	}
1247 
1248 	/* Initialize the hardware */
1249 	em_reset(adapter);
1250 	em_update_link_status(adapter);
1251 
1252 	/* Setup VLAN support, basic and offload if available */
1253 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1254 
1255 	/* Use real VLAN Filter support? */
1256 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1257 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1258 			/* Use real VLAN Filter support */
1259 			em_setup_vlan_hw_support(adapter);
1260 		else {
1261 			u32 ctrl;
1262 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1263 			ctrl |= E1000_CTRL_VME;
1264 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1265 		}
1266 	}
1267 
1268 	/* Set hardware offload abilities */
1269 	ifp->if_hwassist = 0;
1270 	if (ifp->if_capenable & IFCAP_TXCSUM)
1271 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1272 	if (ifp->if_capenable & IFCAP_TSO4)
1273 		ifp->if_hwassist |= CSUM_TSO;
1274 
1275 	/* Configure for OS presence */
1276 	em_init_manageability(adapter);
1277 
1278 	/* Prepare transmit descriptors and buffers */
1279 	em_setup_transmit_structures(adapter);
1280 	em_initialize_transmit_unit(adapter);
1281 
1282 	/* Setup Multicast table */
1283 	em_set_multi(adapter);
1284 
1285 	/* Prepare receive descriptors and buffers */
1286 	if (em_setup_receive_structures(adapter)) {
1287 		device_printf(dev, "Could not setup receive structures\n");
1288 		em_stop(adapter);
1289 		return;
1290 	}
1291 	em_initialize_receive_unit(adapter);
1292 
1293 	/* Don't lose promiscuous settings */
1294 	em_set_promisc(adapter);
1295 
1296 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1297 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1298 
1299 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1300 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1301 
1302 	/* MSI/X configuration for 82574 */
1303 	if (adapter->hw.mac.type == e1000_82574) {
1304 		int tmp;
1305 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1306 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1307 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1308 		/* Set the IVAR - interrupt vector routing. */
1309 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1310 	}
1311 
1312 #ifdef DEVICE_POLLING
1313 	/*
1314 	 * Only enable interrupts if we are not polling, make sure
1315 	 * they are off otherwise.
1316 	 */
1317 	if (ifp->if_capenable & IFCAP_POLLING)
1318 		em_disable_intr(adapter);
1319 	else
1320 #endif /* DEVICE_POLLING */
1321 		em_enable_intr(adapter);
1322 
1323 	/* AMT based hardware can now take control from firmware */
1324 	if (adapter->has_manage && adapter->has_amt)
1325 		em_get_hw_control(adapter);
1326 
1327 	/* Don't reset the phy next time init gets called */
1328 	adapter->hw.phy.reset_disable = TRUE;
1329 }
1330 
1331 static void
1332 em_init(void *arg)
1333 {
1334 	struct adapter *adapter = arg;
1335 
1336 	EM_CORE_LOCK(adapter);
1337 	em_init_locked(adapter);
1338 	EM_CORE_UNLOCK(adapter);
1339 }
1340 
1341 
1342 #ifdef DEVICE_POLLING
1343 /*********************************************************************
1344  *
1345  *  Legacy polling routine: note this only works with single queue
1346  *
1347  *********************************************************************/
1348 static int
1349 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1350 {
1351 	struct adapter *adapter = ifp->if_softc;
1352 	struct tx_ring	*txr = adapter->tx_rings;
1353 	struct rx_ring	*rxr = adapter->rx_rings;
1354 	u32		reg_icr;
1355 	int		rx_done;
1356 
1357 	EM_CORE_LOCK(adapter);
1358 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1359 		EM_CORE_UNLOCK(adapter);
1360 		return (0);
1361 	}
1362 
1363 	if (cmd == POLL_AND_CHECK_STATUS) {
1364 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1365 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1366 			callout_stop(&adapter->timer);
1367 			adapter->hw.mac.get_link_status = 1;
1368 			em_update_link_status(adapter);
1369 			callout_reset(&adapter->timer, hz,
1370 			    em_local_timer, adapter);
1371 		}
1372 	}
1373 	EM_CORE_UNLOCK(adapter);
1374 
1375 	em_rxeof(rxr, count, &rx_done);
1376 
1377 	EM_TX_LOCK(txr);
1378 	em_txeof(txr);
1379 #ifdef EM_MULTIQUEUE
1380 	if (!drbr_empty(ifp, txr->br))
1381 		em_mq_start_locked(ifp, txr, NULL);
1382 #else
1383 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1384 		em_start_locked(ifp, txr);
1385 #endif
1386 	EM_TX_UNLOCK(txr);
1387 
1388 	return (rx_done);
1389 }
1390 #endif /* DEVICE_POLLING */
1391 
1392 
1393 /*********************************************************************
1394  *
1395  *  Fast Legacy/MSI Combined Interrupt Service routine
1396  *
1397  *********************************************************************/
1398 static int
1399 em_irq_fast(void *arg)
1400 {
1401 	struct adapter	*adapter = arg;
1402 	struct ifnet	*ifp;
1403 	u32		reg_icr;
1404 
1405 	ifp = adapter->ifp;
1406 
1407 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1408 
1409 	/* Hot eject?  */
1410 	if (reg_icr == 0xffffffff)
1411 		return FILTER_STRAY;
1412 
1413 	/* Definitely not our interrupt.  */
1414 	if (reg_icr == 0x0)
1415 		return FILTER_STRAY;
1416 
1417 	/*
1418 	 * Starting with the 82571 chip, bit 31 should be used to
1419 	 * determine whether the interrupt belongs to us.
1420 	 */
1421 	if (adapter->hw.mac.type >= e1000_82571 &&
1422 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1423 		return FILTER_STRAY;
1424 
1425 	em_disable_intr(adapter);
1426 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1427 
1428 	/* Link status change */
1429 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1430 		adapter->hw.mac.get_link_status = 1;
1431 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1432 	}
1433 
1434 	if (reg_icr & E1000_ICR_RXO)
1435 		adapter->rx_overruns++;
1436 	return FILTER_HANDLED;
1437 }
1438 
1439 /* Combined RX/TX handler, used by Legacy and MSI */
1440 static void
1441 em_handle_que(void *context, int pending)
1442 {
1443 	struct adapter	*adapter = context;
1444 	struct ifnet	*ifp = adapter->ifp;
1445 	struct tx_ring	*txr = adapter->tx_rings;
1446 	struct rx_ring	*rxr = adapter->rx_rings;
1447 	bool		more;
1448 
1449 
1450 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1451 		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1452 
1453 		EM_TX_LOCK(txr);
1454 		if (em_txeof(txr))
1455 			more = TRUE;
1456 #ifdef EM_MULTIQUEUE
1457 		if (!drbr_empty(ifp, txr->br))
1458 			em_mq_start_locked(ifp, txr, NULL);
1459 #else
1460 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461 			em_start_locked(ifp, txr);
1462 #endif
1463 		EM_TX_UNLOCK(txr);
1464 		if (more) {
1465 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1466 			return;
1467 		}
1468 	}
1469 
1470 	em_enable_intr(adapter);
1471 	return;
1472 }
1473 
1474 
1475 /*********************************************************************
1476  *
1477  *  MSIX Interrupt Service Routines
1478  *
1479  **********************************************************************/
1480 static void
1481 em_msix_tx(void *arg)
1482 {
1483 	struct tx_ring *txr = arg;
1484 	struct adapter *adapter = txr->adapter;
1485 	bool		more;
1486 
1487 	++txr->tx_irq;
1488 	EM_TX_LOCK(txr);
1489 	more = em_txeof(txr);
1490 	EM_TX_UNLOCK(txr);
1491 	if (more)
1492 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1493 	else
1494 		/* Reenable this interrupt */
1495 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1496 	return;
1497 }
1498 
1499 /*********************************************************************
1500  *
1501  *  MSIX RX Interrupt Service routine
1502  *
1503  **********************************************************************/
1504 
1505 static void
1506 em_msix_rx(void *arg)
1507 {
1508 	struct rx_ring	*rxr = arg;
1509 	struct adapter	*adapter = rxr->adapter;
1510 	bool		more;
1511 
1512 	++rxr->rx_irq;
1513 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1514 	if (more)
1515 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1516 	else
1517 		/* Reenable this interrupt */
1518 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1519 	return;
1520 }
1521 
1522 /*********************************************************************
1523  *
1524  *  MSIX Link Fast Interrupt Service routine
1525  *
1526  **********************************************************************/
1527 static void
1528 em_msix_link(void *arg)
1529 {
1530 	struct adapter	*adapter = arg;
1531 	u32		reg_icr;
1532 
1533 	++adapter->link_irq;
1534 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1535 
1536 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1537 		adapter->hw.mac.get_link_status = 1;
1538 		em_handle_link(adapter, 0);
1539 	} else
1540 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1541 		    EM_MSIX_LINK | E1000_IMS_LSC);
1542 	return;
1543 }
1544 
1545 static void
1546 em_handle_rx(void *context, int pending)
1547 {
1548 	struct rx_ring	*rxr = context;
1549 	struct adapter	*adapter = rxr->adapter;
1550         bool            more;
1551 
1552 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1553 	if (more)
1554 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1555 	else
1556 		/* Reenable this interrupt */
1557 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1558 }
1559 
1560 static void
1561 em_handle_tx(void *context, int pending)
1562 {
1563 	struct tx_ring	*txr = context;
1564 	struct adapter	*adapter = txr->adapter;
1565 	struct ifnet	*ifp = adapter->ifp;
1566 
1567 	if (!EM_TX_TRYLOCK(txr))
1568 		return;
1569 
1570 	em_txeof(txr);
1571 
1572 #ifdef EM_MULTIQUEUE
1573 	if (!drbr_empty(ifp, txr->br))
1574 		em_mq_start_locked(ifp, txr, NULL);
1575 #else
1576 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1577 		em_start_locked(ifp, txr);
1578 #endif
1579 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1580 	EM_TX_UNLOCK(txr);
1581 }
1582 
1583 static void
1584 em_handle_link(void *context, int pending)
1585 {
1586 	struct adapter	*adapter = context;
1587 	struct ifnet *ifp = adapter->ifp;
1588 
1589 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1590 		return;
1591 
1592 	EM_CORE_LOCK(adapter);
1593 	callout_stop(&adapter->timer);
1594 	em_update_link_status(adapter);
1595 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1596 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1597 	    EM_MSIX_LINK | E1000_IMS_LSC);
1598 	EM_CORE_UNLOCK(adapter);
1599 }
1600 
1601 
1602 /*********************************************************************
1603  *
1604  *  Media Ioctl callback
1605  *
1606  *  This routine is called whenever the user queries the status of
1607  *  the interface using ifconfig.
1608  *
1609  **********************************************************************/
1610 static void
1611 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1612 {
1613 	struct adapter *adapter = ifp->if_softc;
1614 	u_char fiber_type = IFM_1000_SX;
1615 
1616 	INIT_DEBUGOUT("em_media_status: begin");
1617 
1618 	EM_CORE_LOCK(adapter);
1619 	em_update_link_status(adapter);
1620 
1621 	ifmr->ifm_status = IFM_AVALID;
1622 	ifmr->ifm_active = IFM_ETHER;
1623 
1624 	if (!adapter->link_active) {
1625 		EM_CORE_UNLOCK(adapter);
1626 		return;
1627 	}
1628 
1629 	ifmr->ifm_status |= IFM_ACTIVE;
1630 
1631 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1632 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1633 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1634 	} else {
1635 		switch (adapter->link_speed) {
1636 		case 10:
1637 			ifmr->ifm_active |= IFM_10_T;
1638 			break;
1639 		case 100:
1640 			ifmr->ifm_active |= IFM_100_TX;
1641 			break;
1642 		case 1000:
1643 			ifmr->ifm_active |= IFM_1000_T;
1644 			break;
1645 		}
1646 		if (adapter->link_duplex == FULL_DUPLEX)
1647 			ifmr->ifm_active |= IFM_FDX;
1648 		else
1649 			ifmr->ifm_active |= IFM_HDX;
1650 	}
1651 	EM_CORE_UNLOCK(adapter);
1652 }
1653 
1654 /*********************************************************************
1655  *
1656  *  Media Ioctl callback
1657  *
1658  *  This routine is called when the user changes speed/duplex using
1659  *  media/mediopt option with ifconfig.
1660  *
1661  **********************************************************************/
1662 static int
1663 em_media_change(struct ifnet *ifp)
1664 {
1665 	struct adapter *adapter = ifp->if_softc;
1666 	struct ifmedia  *ifm = &adapter->media;
1667 
1668 	INIT_DEBUGOUT("em_media_change: begin");
1669 
1670 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1671 		return (EINVAL);
1672 
1673 	EM_CORE_LOCK(adapter);
1674 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1675 	case IFM_AUTO:
1676 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1677 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1678 		break;
1679 	case IFM_1000_LX:
1680 	case IFM_1000_SX:
1681 	case IFM_1000_T:
1682 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1683 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1684 		break;
1685 	case IFM_100_TX:
1686 		adapter->hw.mac.autoneg = FALSE;
1687 		adapter->hw.phy.autoneg_advertised = 0;
1688 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1689 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1690 		else
1691 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1692 		break;
1693 	case IFM_10_T:
1694 		adapter->hw.mac.autoneg = FALSE;
1695 		adapter->hw.phy.autoneg_advertised = 0;
1696 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1697 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1698 		else
1699 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1700 		break;
1701 	default:
1702 		device_printf(adapter->dev, "Unsupported media type\n");
1703 	}
1704 
1705 	/* As the speed/duplex settings my have changed we need to
1706 	 * reset the PHY.
1707 	 */
1708 	adapter->hw.phy.reset_disable = FALSE;
1709 
1710 	em_init_locked(adapter);
1711 	EM_CORE_UNLOCK(adapter);
1712 
1713 	return (0);
1714 }
1715 
1716 /*********************************************************************
1717  *
1718  *  This routine maps the mbufs to tx descriptors.
1719  *
1720  *  return 0 on success, positive on failure
1721  **********************************************************************/
1722 
1723 static int
1724 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1725 {
1726 	struct adapter		*adapter = txr->adapter;
1727 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1728 	bus_dmamap_t		map;
1729 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1730 	struct e1000_tx_desc	*ctxd = NULL;
1731 	struct mbuf		*m_head;
1732 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1733 	int			nsegs, i, j, first, last = 0;
1734 	int			error, do_tso, tso_desc = 0;
1735 
1736 	m_head = *m_headp;
1737 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1738 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1739 
1740 	/*
1741 	** When doing checksum offload, it is critical to
1742 	** make sure the first mbuf has more than header,
1743 	** because that routine expects data to be present.
1744 	*/
1745 	if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) &&
1746 	    (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) {
1747 		m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip));
1748 		*m_headp = m_head;
1749 		if (m_head == NULL)
1750 			return (ENOBUFS);
1751 	}
1752 
1753 	/*
1754 	 * TSO workaround:
1755 	 *  If an mbuf is only header we need
1756 	 *     to pull 4 bytes of data into it.
1757 	 */
1758 	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1759 		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1760 		*m_headp = m_head;
1761 		if (m_head == NULL)
1762 			return (ENOBUFS);
1763 	}
1764 
1765 	/*
1766 	 * Map the packet for DMA
1767 	 *
1768 	 * Capture the first descriptor index,
1769 	 * this descriptor will have the index
1770 	 * of the EOP which is the only one that
1771 	 * now gets a DONE bit writeback.
1772 	 */
1773 	first = txr->next_avail_desc;
1774 	tx_buffer = &txr->tx_buffers[first];
1775 	tx_buffer_mapped = tx_buffer;
1776 	map = tx_buffer->map;
1777 
1778 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1779 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1780 
1781 	/*
1782 	 * There are two types of errors we can (try) to handle:
1783 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1784 	 *   out of segments.  Defragment the mbuf chain and try again.
1785 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1786 	 *   at this point in time.  Defer sending and try again later.
1787 	 * All other errors, in particular EINVAL, are fatal and prevent the
1788 	 * mbuf chain from ever going through.  Drop it and report error.
1789 	 */
1790 	if (error == EFBIG) {
1791 		struct mbuf *m;
1792 
1793 		m = m_defrag(*m_headp, M_DONTWAIT);
1794 		if (m == NULL) {
1795 			adapter->mbuf_alloc_failed++;
1796 			m_freem(*m_headp);
1797 			*m_headp = NULL;
1798 			return (ENOBUFS);
1799 		}
1800 		*m_headp = m;
1801 
1802 		/* Try it again */
1803 		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1804 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1805 
1806 		if (error) {
1807 			adapter->no_tx_dma_setup++;
1808 			m_freem(*m_headp);
1809 			*m_headp = NULL;
1810 			return (error);
1811 		}
1812 	} else if (error != 0) {
1813 		adapter->no_tx_dma_setup++;
1814 		return (error);
1815 	}
1816 
1817 	/*
1818 	 * TSO Hardware workaround, if this packet is not
1819 	 * TSO, and is only a single descriptor long, and
1820 	 * it follows a TSO burst, then we need to add a
1821 	 * sentinel descriptor to prevent premature writeback.
1822 	 */
1823 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1824 		if (nsegs == 1)
1825 			tso_desc = TRUE;
1826 		txr->tx_tso = FALSE;
1827 	}
1828 
1829         if (nsegs > (txr->tx_avail - 2)) {
1830                 txr->no_desc_avail++;
1831 		bus_dmamap_unload(txr->txtag, map);
1832 		return (ENOBUFS);
1833         }
1834 	m_head = *m_headp;
1835 
1836 	/* Do hardware assists */
1837 #if __FreeBSD_version >= 700000
1838 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1839 		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1840 		if (error != TRUE)
1841 			return (ENXIO); /* something foobar */
1842 		/* we need to make a final sentinel transmit desc */
1843 		tso_desc = TRUE;
1844 	} else
1845 #endif
1846 	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1847 		em_transmit_checksum_setup(txr,  m_head,
1848 		    &txd_upper, &txd_lower);
1849 
1850 	i = txr->next_avail_desc;
1851 
1852 	/* Set up our transmit descriptors */
1853 	for (j = 0; j < nsegs; j++) {
1854 		bus_size_t seg_len;
1855 		bus_addr_t seg_addr;
1856 
1857 		tx_buffer = &txr->tx_buffers[i];
1858 		ctxd = &txr->tx_base[i];
1859 		seg_addr = segs[j].ds_addr;
1860 		seg_len  = segs[j].ds_len;
1861 		/*
1862 		** TSO Workaround:
1863 		** If this is the last descriptor, we want to
1864 		** split it so we have a small final sentinel
1865 		*/
1866 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1867 			seg_len -= 4;
1868 			ctxd->buffer_addr = htole64(seg_addr);
1869 			ctxd->lower.data = htole32(
1870 			adapter->txd_cmd | txd_lower | seg_len);
1871 			ctxd->upper.data =
1872 			    htole32(txd_upper);
1873 			if (++i == adapter->num_tx_desc)
1874 				i = 0;
1875 			/* Now make the sentinel */
1876 			++txd_used; /* using an extra txd */
1877 			ctxd = &txr->tx_base[i];
1878 			tx_buffer = &txr->tx_buffers[i];
1879 			ctxd->buffer_addr =
1880 			    htole64(seg_addr + seg_len);
1881 			ctxd->lower.data = htole32(
1882 			adapter->txd_cmd | txd_lower | 4);
1883 			ctxd->upper.data =
1884 			    htole32(txd_upper);
1885 			last = i;
1886 			if (++i == adapter->num_tx_desc)
1887 				i = 0;
1888 		} else {
1889 			ctxd->buffer_addr = htole64(seg_addr);
1890 			ctxd->lower.data = htole32(
1891 			adapter->txd_cmd | txd_lower | seg_len);
1892 			ctxd->upper.data =
1893 			    htole32(txd_upper);
1894 			last = i;
1895 			if (++i == adapter->num_tx_desc)
1896 				i = 0;
1897 		}
1898 		tx_buffer->m_head = NULL;
1899 		tx_buffer->next_eop = -1;
1900 	}
1901 
1902 	txr->next_avail_desc = i;
1903 	txr->tx_avail -= nsegs;
1904 	if (tso_desc) /* TSO used an extra for sentinel */
1905 		txr->tx_avail -= txd_used;
1906 
1907 	if (m_head->m_flags & M_VLANTAG) {
1908 		/* Set the vlan id. */
1909 		ctxd->upper.fields.special =
1910 		    htole16(m_head->m_pkthdr.ether_vtag);
1911                 /* Tell hardware to add tag */
1912                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1913         }
1914 
1915         tx_buffer->m_head = m_head;
1916 	tx_buffer_mapped->map = tx_buffer->map;
1917 	tx_buffer->map = map;
1918         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1919 
1920         /*
1921          * Last Descriptor of Packet
1922 	 * needs End Of Packet (EOP)
1923 	 * and Report Status (RS)
1924          */
1925         ctxd->lower.data |=
1926 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1927 	/*
1928 	 * Keep track in the first buffer which
1929 	 * descriptor will be written back
1930 	 */
1931 	tx_buffer = &txr->tx_buffers[first];
1932 	tx_buffer->next_eop = last;
1933 
1934 	/*
1935 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1936 	 * that this frame is available to transmit.
1937 	 */
1938 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1939 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1940 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1941 
1942 	return (0);
1943 }
1944 
1945 static void
1946 em_set_promisc(struct adapter *adapter)
1947 {
1948 	struct ifnet	*ifp = adapter->ifp;
1949 	u32		reg_rctl;
1950 
1951 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1952 
1953 	if (ifp->if_flags & IFF_PROMISC) {
1954 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1955 		/* Turn this on if you want to see bad packets */
1956 		if (em_debug_sbp)
1957 			reg_rctl |= E1000_RCTL_SBP;
1958 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1959 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1960 		reg_rctl |= E1000_RCTL_MPE;
1961 		reg_rctl &= ~E1000_RCTL_UPE;
1962 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1963 	}
1964 }
1965 
1966 static void
1967 em_disable_promisc(struct adapter *adapter)
1968 {
1969 	u32	reg_rctl;
1970 
1971 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1972 
1973 	reg_rctl &=  (~E1000_RCTL_UPE);
1974 	reg_rctl &=  (~E1000_RCTL_MPE);
1975 	reg_rctl &=  (~E1000_RCTL_SBP);
1976 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1977 }
1978 
1979 
1980 /*********************************************************************
1981  *  Multicast Update
1982  *
1983  *  This routine is called whenever multicast address list is updated.
1984  *
1985  **********************************************************************/
1986 
1987 static void
1988 em_set_multi(struct adapter *adapter)
1989 {
1990 	struct ifnet	*ifp = adapter->ifp;
1991 	struct ifmultiaddr *ifma;
1992 	u32 reg_rctl = 0;
1993 	u8  *mta; /* Multicast array memory */
1994 	int mcnt = 0;
1995 
1996 	IOCTL_DEBUGOUT("em_set_multi: begin");
1997 
1998 	if (adapter->hw.mac.type == e1000_82542 &&
1999 	    adapter->hw.revision_id == E1000_REVISION_2) {
2000 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2001 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2002 			e1000_pci_clear_mwi(&adapter->hw);
2003 		reg_rctl |= E1000_RCTL_RST;
2004 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2005 		msec_delay(5);
2006 	}
2007 
2008 	/* Allocate temporary memory to setup array */
2009 	mta = malloc(sizeof(u8) *
2010 	    (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
2011 	    M_DEVBUF, M_NOWAIT | M_ZERO);
2012 	if (mta == NULL)
2013 		panic("em_set_multi memory failure\n");
2014 
2015 #if __FreeBSD_version < 800000
2016 	IF_ADDR_LOCK(ifp);
2017 #else
2018 	if_maddr_rlock(ifp);
2019 #endif
2020 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2021 		if (ifma->ifma_addr->sa_family != AF_LINK)
2022 			continue;
2023 
2024 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2025 			break;
2026 
2027 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2028 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2029 		mcnt++;
2030 	}
2031 #if __FreeBSD_version < 800000
2032 	IF_ADDR_UNLOCK(ifp);
2033 #else
2034 	if_maddr_runlock(ifp);
2035 #endif
2036 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2037 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2038 		reg_rctl |= E1000_RCTL_MPE;
2039 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2040 	} else
2041 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2042 
2043 	if (adapter->hw.mac.type == e1000_82542 &&
2044 	    adapter->hw.revision_id == E1000_REVISION_2) {
2045 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2046 		reg_rctl &= ~E1000_RCTL_RST;
2047 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2048 		msec_delay(5);
2049 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2050 			e1000_pci_set_mwi(&adapter->hw);
2051 	}
2052 	free(mta, M_DEVBUF);
2053 }
2054 
2055 
2056 /*********************************************************************
2057  *  Timer routine
2058  *
2059  *  This routine checks for link status and updates statistics.
2060  *
2061  **********************************************************************/
2062 
2063 static void
2064 em_local_timer(void *arg)
2065 {
2066 	struct adapter	*adapter = arg;
2067 	struct ifnet	*ifp = adapter->ifp;
2068 	struct tx_ring	*txr = adapter->tx_rings;
2069 
2070 	EM_CORE_LOCK_ASSERT(adapter);
2071 
2072 	em_update_link_status(adapter);
2073 	em_update_stats_counters(adapter);
2074 
2075 	/* Reset LAA into RAR[0] on 82571 */
2076 	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2077 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2078 
2079 	/*
2080 	** Check for time since any descriptor was cleaned
2081 	*/
2082 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2083 		EM_TX_LOCK(txr);
2084 		if (txr->watchdog_check == FALSE) {
2085 			EM_TX_UNLOCK(txr);
2086 			continue;
2087 		}
2088 		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2089 			goto hung;
2090 		EM_TX_UNLOCK(txr);
2091 	}
2092 
2093 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2094 	return;
2095 hung:
2096 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2097 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2098 	adapter->watchdog_events++;
2099 	EM_TX_UNLOCK(txr);
2100 	em_init_locked(adapter);
2101 }
2102 
2103 
2104 static void
2105 em_update_link_status(struct adapter *adapter)
2106 {
2107 	struct e1000_hw *hw = &adapter->hw;
2108 	struct ifnet *ifp = adapter->ifp;
2109 	device_t dev = adapter->dev;
2110 	u32 link_check = 0;
2111 
2112 	/* Get the cached link value or read phy for real */
2113 	switch (hw->phy.media_type) {
2114 	case e1000_media_type_copper:
2115 		if (hw->mac.get_link_status) {
2116 			/* Do the work to read phy */
2117 			e1000_check_for_link(hw);
2118 			link_check = !hw->mac.get_link_status;
2119 			if (link_check) /* ESB2 fix */
2120 				e1000_cfg_on_link_up(hw);
2121 		} else
2122 			link_check = TRUE;
2123 		break;
2124 	case e1000_media_type_fiber:
2125 		e1000_check_for_link(hw);
2126 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2127                                  E1000_STATUS_LU);
2128 		break;
2129 	case e1000_media_type_internal_serdes:
2130 		e1000_check_for_link(hw);
2131 		link_check = adapter->hw.mac.serdes_has_link;
2132 		break;
2133 	default:
2134 	case e1000_media_type_unknown:
2135 		break;
2136 	}
2137 
2138 	/* Now check for a transition */
2139 	if (link_check && (adapter->link_active == 0)) {
2140 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2141 		    &adapter->link_duplex);
2142 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2143 		if ((adapter->link_speed != SPEED_1000) &&
2144 		    ((hw->mac.type == e1000_82571) ||
2145 		    (hw->mac.type == e1000_82572))) {
2146 			int tarc0;
2147 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2148 			tarc0 &= ~SPEED_MODE_BIT;
2149 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2150 		}
2151 		if (bootverbose)
2152 			device_printf(dev, "Link is up %d Mbps %s\n",
2153 			    adapter->link_speed,
2154 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2155 			    "Full Duplex" : "Half Duplex"));
2156 		adapter->link_active = 1;
2157 		adapter->smartspeed = 0;
2158 		ifp->if_baudrate = adapter->link_speed * 1000000;
2159 		if_link_state_change(ifp, LINK_STATE_UP);
2160 	} else if (!link_check && (adapter->link_active == 1)) {
2161 		ifp->if_baudrate = adapter->link_speed = 0;
2162 		adapter->link_duplex = 0;
2163 		if (bootverbose)
2164 			device_printf(dev, "Link is Down\n");
2165 		adapter->link_active = 0;
2166 		/* Link down, disable watchdog */
2167 		// JFV change later
2168 		//adapter->watchdog_check = FALSE;
2169 		if_link_state_change(ifp, LINK_STATE_DOWN);
2170 	}
2171 }
2172 
2173 /*********************************************************************
2174  *
2175  *  This routine disables all traffic on the adapter by issuing a
2176  *  global reset on the MAC and deallocates TX/RX buffers.
2177  *
2178  *  This routine should always be called with BOTH the CORE
2179  *  and TX locks.
2180  **********************************************************************/
2181 
2182 static void
2183 em_stop(void *arg)
2184 {
2185 	struct adapter	*adapter = arg;
2186 	struct ifnet	*ifp = adapter->ifp;
2187 	struct tx_ring	*txr = adapter->tx_rings;
2188 
2189 	EM_CORE_LOCK_ASSERT(adapter);
2190 
2191 	INIT_DEBUGOUT("em_stop: begin");
2192 
2193 	em_disable_intr(adapter);
2194 	callout_stop(&adapter->timer);
2195 
2196 	/* Tell the stack that the interface is no longer active */
2197 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2198 
2199         /* Unarm watchdog timer. */
2200 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2201 		EM_TX_LOCK(txr);
2202 		txr->watchdog_check = FALSE;
2203 		EM_TX_UNLOCK(txr);
2204 	}
2205 
2206 	e1000_reset_hw(&adapter->hw);
2207 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2208 
2209 	e1000_led_off(&adapter->hw);
2210 	e1000_cleanup_led(&adapter->hw);
2211 }
2212 
2213 
2214 /*********************************************************************
2215  *
2216  *  Determine hardware revision.
2217  *
2218  **********************************************************************/
2219 static void
2220 em_identify_hardware(struct adapter *adapter)
2221 {
2222 	device_t dev = adapter->dev;
2223 
2224 	/* Make sure our PCI config space has the necessary stuff set */
2225 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2226 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2227 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2228 		device_printf(dev, "Memory Access and/or Bus Master bits "
2229 		    "were not set!\n");
2230 		adapter->hw.bus.pci_cmd_word |=
2231 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2232 		pci_write_config(dev, PCIR_COMMAND,
2233 		    adapter->hw.bus.pci_cmd_word, 2);
2234 	}
2235 
2236 	/* Save off the information about this board */
2237 	adapter->hw.vendor_id = pci_get_vendor(dev);
2238 	adapter->hw.device_id = pci_get_device(dev);
2239 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2240 	adapter->hw.subsystem_vendor_id =
2241 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2242 	adapter->hw.subsystem_device_id =
2243 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2244 
2245 	/* Do Shared Code Init and Setup */
2246 	if (e1000_set_mac_type(&adapter->hw)) {
2247 		device_printf(dev, "Setup init failure\n");
2248 		return;
2249 	}
2250 }
2251 
2252 static int
2253 em_allocate_pci_resources(struct adapter *adapter)
2254 {
2255 	device_t	dev = adapter->dev;
2256 	int		rid;
2257 
2258 	rid = PCIR_BAR(0);
2259 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2260 	    &rid, RF_ACTIVE);
2261 	if (adapter->memory == NULL) {
2262 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2263 		return (ENXIO);
2264 	}
2265 	adapter->osdep.mem_bus_space_tag =
2266 	    rman_get_bustag(adapter->memory);
2267 	adapter->osdep.mem_bus_space_handle =
2268 	    rman_get_bushandle(adapter->memory);
2269 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2270 
2271 	/* Default to a single queue */
2272 	adapter->num_queues = 1;
2273 
2274 	/*
2275 	 * Setup MSI/X or MSI if PCI Express
2276 	 */
2277 	adapter->msix = em_setup_msix(adapter);
2278 
2279 	adapter->hw.back = &adapter->osdep;
2280 
2281 	return (0);
2282 }
2283 
2284 /*********************************************************************
2285  *
2286  *  Setup the Legacy or MSI Interrupt handler
2287  *
2288  **********************************************************************/
2289 int
2290 em_allocate_legacy(struct adapter *adapter)
2291 {
2292 	device_t dev = adapter->dev;
2293 	int error, rid = 0;
2294 
2295 	/* Manually turn off all interrupts */
2296 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2297 
2298 	if (adapter->msix == 1) /* using MSI */
2299 		rid = 1;
2300 	/* We allocate a single interrupt resource */
2301 	adapter->res = bus_alloc_resource_any(dev,
2302 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2303 	if (adapter->res == NULL) {
2304 		device_printf(dev, "Unable to allocate bus resource: "
2305 		    "interrupt\n");
2306 		return (ENXIO);
2307 	}
2308 
2309 	/*
2310 	 * Allocate a fast interrupt and the associated
2311 	 * deferred processing contexts.
2312 	 */
2313 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2314 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2315 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2316 	    taskqueue_thread_enqueue, &adapter->tq);
2317 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2318 	    device_get_nameunit(adapter->dev));
2319 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2320 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2321 		device_printf(dev, "Failed to register fast interrupt "
2322 			    "handler: %d\n", error);
2323 		taskqueue_free(adapter->tq);
2324 		adapter->tq = NULL;
2325 		return (error);
2326 	}
2327 
2328 	return (0);
2329 }
2330 
2331 /*********************************************************************
2332  *
2333  *  Setup the MSIX Interrupt handlers
2334  *   This is not really Multiqueue, rather
2335  *   its just multiple interrupt vectors.
2336  *
2337  **********************************************************************/
2338 int
2339 em_allocate_msix(struct adapter *adapter)
2340 {
2341 	device_t	dev = adapter->dev;
2342 	struct		tx_ring *txr = adapter->tx_rings;
2343 	struct		rx_ring *rxr = adapter->rx_rings;
2344 	int		error, rid, vector = 0;
2345 
2346 
2347 	/* Make sure all interrupts are disabled */
2348 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2349 
2350 	/* First set up ring resources */
2351 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2352 
2353 		/* RX ring */
2354 		rid = vector + 1;
2355 
2356 		rxr->res = bus_alloc_resource_any(dev,
2357 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2358 		if (rxr->res == NULL) {
2359 			device_printf(dev,
2360 			    "Unable to allocate bus resource: "
2361 			    "RX MSIX Interrupt %d\n", i);
2362 			return (ENXIO);
2363 		}
2364 		if ((error = bus_setup_intr(dev, rxr->res,
2365 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2366 		    rxr, &rxr->tag)) != 0) {
2367 			device_printf(dev, "Failed to register RX handler");
2368 			return (error);
2369 		}
2370 		rxr->msix = vector++; /* NOTE increment vector for TX */
2371 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2372 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2373 		    taskqueue_thread_enqueue, &rxr->tq);
2374 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2375 		    device_get_nameunit(adapter->dev));
2376 		/*
2377 		** Set the bit to enable interrupt
2378 		** in E1000_IMS -- bits 20 and 21
2379 		** are for RX0 and RX1, note this has
2380 		** NOTHING to do with the MSIX vector
2381 		*/
2382 		rxr->ims = 1 << (20 + i);
2383 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2384 
2385 		/* TX ring */
2386 		rid = vector + 1;
2387 		txr->res = bus_alloc_resource_any(dev,
2388 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2389 		if (txr->res == NULL) {
2390 			device_printf(dev,
2391 			    "Unable to allocate bus resource: "
2392 			    "TX MSIX Interrupt %d\n", i);
2393 			return (ENXIO);
2394 		}
2395 		if ((error = bus_setup_intr(dev, txr->res,
2396 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2397 		    txr, &txr->tag)) != 0) {
2398 			device_printf(dev, "Failed to register TX handler");
2399 			return (error);
2400 		}
2401 		txr->msix = vector++; /* Increment vector for next pass */
2402 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2403 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2404 		    taskqueue_thread_enqueue, &txr->tq);
2405 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2406 		    device_get_nameunit(adapter->dev));
2407 		/*
2408 		** Set the bit to enable interrupt
2409 		** in E1000_IMS -- bits 22 and 23
2410 		** are for TX0 and TX1, note this has
2411 		** NOTHING to do with the MSIX vector
2412 		*/
2413 		txr->ims = 1 << (22 + i);
2414 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2415 	}
2416 
2417 	/* Link interrupt */
2418 	++rid;
2419 	adapter->res = bus_alloc_resource_any(dev,
2420 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2421 	if (!adapter->res) {
2422 		device_printf(dev,"Unable to allocate "
2423 		    "bus resource: Link interrupt [%d]\n", rid);
2424 		return (ENXIO);
2425         }
2426 	/* Set the link handler function */
2427 	error = bus_setup_intr(dev, adapter->res,
2428 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2429 	    em_msix_link, adapter, &adapter->tag);
2430 	if (error) {
2431 		adapter->res = NULL;
2432 		device_printf(dev, "Failed to register LINK handler");
2433 		return (error);
2434 	}
2435 	adapter->linkvec = vector;
2436 	adapter->ivars |=  (8 | vector) << 16;
2437 	adapter->ivars |= 0x80000000;
2438 
2439 	return (0);
2440 }
2441 
2442 
2443 static void
2444 em_free_pci_resources(struct adapter *adapter)
2445 {
2446 	device_t	dev = adapter->dev;
2447 	struct tx_ring	*txr;
2448 	struct rx_ring	*rxr;
2449 	int		rid;
2450 
2451 
2452 	/*
2453 	** Release all the queue interrupt resources:
2454 	*/
2455 	for (int i = 0; i < adapter->num_queues; i++) {
2456 		txr = &adapter->tx_rings[i];
2457 		rxr = &adapter->rx_rings[i];
2458 		rid = txr->msix +1;
2459 		if (txr->tag != NULL) {
2460 			bus_teardown_intr(dev, txr->res, txr->tag);
2461 			txr->tag = NULL;
2462 		}
2463 		if (txr->res != NULL)
2464 			bus_release_resource(dev, SYS_RES_IRQ,
2465 			    rid, txr->res);
2466 		rid = rxr->msix +1;
2467 		if (rxr->tag != NULL) {
2468 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2469 			rxr->tag = NULL;
2470 		}
2471 		if (rxr->res != NULL)
2472 			bus_release_resource(dev, SYS_RES_IRQ,
2473 			    rid, rxr->res);
2474 	}
2475 
2476         if (adapter->linkvec) /* we are doing MSIX */
2477                 rid = adapter->linkvec + 1;
2478         else
2479                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2480 
2481 	if (adapter->tag != NULL) {
2482 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2483 		adapter->tag = NULL;
2484 	}
2485 
2486 	if (adapter->res != NULL)
2487 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2488 
2489 
2490 	if (adapter->msix)
2491 		pci_release_msi(dev);
2492 
2493 	if (adapter->msix_mem != NULL)
2494 		bus_release_resource(dev, SYS_RES_MEMORY,
2495 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2496 
2497 	if (adapter->memory != NULL)
2498 		bus_release_resource(dev, SYS_RES_MEMORY,
2499 		    PCIR_BAR(0), adapter->memory);
2500 
2501 	if (adapter->flash != NULL)
2502 		bus_release_resource(dev, SYS_RES_MEMORY,
2503 		    EM_FLASH, adapter->flash);
2504 }
2505 
2506 /*
2507  * Setup MSI or MSI/X
2508  */
2509 static int
2510 em_setup_msix(struct adapter *adapter)
2511 {
2512 	device_t dev = adapter->dev;
2513 	int val = 0;
2514 
2515 
2516 	/* Setup MSI/X for Hartwell */
2517 	if ((adapter->hw.mac.type == e1000_82574) &&
2518 	    (em_enable_msix == TRUE)) {
2519 		/* Map the MSIX BAR */
2520 		int rid = PCIR_BAR(EM_MSIX_BAR);
2521 		adapter->msix_mem = bus_alloc_resource_any(dev,
2522 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2523        		if (!adapter->msix_mem) {
2524 			/* May not be enabled */
2525                		device_printf(adapter->dev,
2526 			    "Unable to map MSIX table \n");
2527 			goto msi;
2528        		}
2529 		val = pci_msix_count(dev);
2530 		if (val != 5) {
2531 			bus_release_resource(dev, SYS_RES_MEMORY,
2532 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2533 			adapter->msix_mem = NULL;
2534                		device_printf(adapter->dev,
2535 			    "MSIX vectors wrong, using MSI \n");
2536 			goto msi;
2537 		}
2538 		if (em_msix_queues == 2) {
2539 			val = 5;
2540 			adapter->num_queues = 2;
2541 		} else {
2542 			val = 3;
2543 			adapter->num_queues = 1;
2544 		}
2545 		if (pci_alloc_msix(dev, &val) == 0) {
2546 			device_printf(adapter->dev,
2547 			    "Using MSIX interrupts "
2548 			    "with %d vectors\n", val);
2549 		}
2550 
2551 		return (val);
2552 	}
2553 msi:
2554        	val = pci_msi_count(dev);
2555        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2556                	adapter->msix = 1;
2557                	device_printf(adapter->dev,"Using MSI interrupt\n");
2558 		return (val);
2559 	}
2560 	/* Should only happen due to manual invention */
2561 	device_printf(adapter->dev,"Setup MSIX failure\n");
2562 	return (0);
2563 }
2564 
2565 
2566 /*********************************************************************
2567  *
2568  *  Initialize the hardware to a configuration
2569  *  as specified by the adapter structure.
2570  *
2571  **********************************************************************/
2572 static void
2573 em_reset(struct adapter *adapter)
2574 {
2575 	device_t	dev = adapter->dev;
2576 	struct e1000_hw	*hw = &adapter->hw;
2577 	u16		rx_buffer_size;
2578 
2579 	INIT_DEBUGOUT("em_reset: begin");
2580 
2581 	/* Set up smart power down as default off on newer adapters. */
2582 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2583 	    hw->mac.type == e1000_82572)) {
2584 		u16 phy_tmp = 0;
2585 
2586 		/* Speed up time to link by disabling smart power down. */
2587 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2588 		phy_tmp &= ~IGP02E1000_PM_SPD;
2589 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2590 	}
2591 
2592 	/*
2593 	 * These parameters control the automatic generation (Tx) and
2594 	 * response (Rx) to Ethernet PAUSE frames.
2595 	 * - High water mark should allow for at least two frames to be
2596 	 *   received after sending an XOFF.
2597 	 * - Low water mark works best when it is very near the high water mark.
2598 	 *   This allows the receiver to restart by sending XON when it has
2599 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2600 	 *   restart after one full frame is pulled from the buffer. There
2601 	 *   could be several smaller frames in the buffer and if so they will
2602 	 *   not trigger the XON until their total number reduces the buffer
2603 	 *   by 1500.
2604 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2605 	 */
2606 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2607 
2608 	hw->fc.high_water = rx_buffer_size -
2609 	    roundup2(adapter->max_frame_size, 1024);
2610 	hw->fc.low_water = hw->fc.high_water - 1500;
2611 
2612 	if (hw->mac.type == e1000_80003es2lan)
2613 		hw->fc.pause_time = 0xFFFF;
2614 	else
2615 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2616 
2617 	hw->fc.send_xon = TRUE;
2618 
2619         /* Set Flow control, use the tunable location if sane */
2620         if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2621 		hw->fc.requested_mode = em_fc_setting;
2622 	else
2623 		hw->fc.requested_mode = e1000_fc_none;
2624 
2625 	/* Override - workaround for PCHLAN issue */
2626 	if (hw->mac.type == e1000_pchlan)
2627                 hw->fc.requested_mode = e1000_fc_rx_pause;
2628 
2629 	/* Issue a global reset */
2630 	e1000_reset_hw(hw);
2631 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2632 
2633 	if (e1000_init_hw(hw) < 0) {
2634 		device_printf(dev, "Hardware Initialization Failed\n");
2635 		return;
2636 	}
2637 
2638 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2639 	e1000_get_phy_info(hw);
2640 	e1000_check_for_link(hw);
2641 	return;
2642 }
2643 
2644 /*********************************************************************
2645  *
2646  *  Setup networking device structure and register an interface.
2647  *
2648  **********************************************************************/
2649 static void
2650 em_setup_interface(device_t dev, struct adapter *adapter)
2651 {
2652 	struct ifnet   *ifp;
2653 
2654 	INIT_DEBUGOUT("em_setup_interface: begin");
2655 
2656 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2657 	if (ifp == NULL)
2658 		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2659 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2660 	ifp->if_mtu = ETHERMTU;
2661 	ifp->if_init =  em_init;
2662 	ifp->if_softc = adapter;
2663 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2664 	ifp->if_ioctl = em_ioctl;
2665 	ifp->if_start = em_start;
2666 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2667 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2668 	IFQ_SET_READY(&ifp->if_snd);
2669 
2670 	ether_ifattach(ifp, adapter->hw.mac.addr);
2671 
2672 	ifp->if_capabilities = ifp->if_capenable = 0;
2673 
2674 #ifdef EM_MULTIQUEUE
2675 	/* Multiqueue tx functions */
2676 	ifp->if_transmit = em_mq_start;
2677 	ifp->if_qflush = em_qflush;
2678 #endif
2679 
2680 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2681 	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2682 
2683 	/* Enable TSO by default, can disable with ifconfig */
2684 	ifp->if_capabilities |= IFCAP_TSO4;
2685 	ifp->if_capenable |= IFCAP_TSO4;
2686 
2687 	/*
2688 	 * Tell the upper layer(s) we
2689 	 * support full VLAN capability
2690 	 */
2691 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2692 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2693 	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2694 
2695 	/*
2696 	** Dont turn this on by default, if vlans are
2697 	** created on another pseudo device (eg. lagg)
2698 	** then vlan events are not passed thru, breaking
2699 	** operation, but with HW FILTER off it works. If
2700 	** using vlans directly on the em driver you can
2701 	** enable this and get full hardware tag filtering.
2702 	*/
2703 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2704 
2705 #ifdef DEVICE_POLLING
2706 	ifp->if_capabilities |= IFCAP_POLLING;
2707 #endif
2708 
2709 	/* Enable only WOL MAGIC by default */
2710 	if (adapter->wol) {
2711 		ifp->if_capabilities |= IFCAP_WOL;
2712 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2713 	}
2714 
2715 	/*
2716 	 * Specify the media types supported by this adapter and register
2717 	 * callbacks to update media and link information
2718 	 */
2719 	ifmedia_init(&adapter->media, IFM_IMASK,
2720 	    em_media_change, em_media_status);
2721 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2722 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2723 		u_char fiber_type = IFM_1000_SX;	/* default type */
2724 
2725 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2726 			    0, NULL);
2727 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2728 	} else {
2729 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2730 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2731 			    0, NULL);
2732 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2733 			    0, NULL);
2734 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2735 			    0, NULL);
2736 		if (adapter->hw.phy.type != e1000_phy_ife) {
2737 			ifmedia_add(&adapter->media,
2738 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2739 			ifmedia_add(&adapter->media,
2740 				IFM_ETHER | IFM_1000_T, 0, NULL);
2741 		}
2742 	}
2743 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2744 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2745 }
2746 
2747 
2748 /*
2749  * Manage DMA'able memory.
2750  */
2751 static void
2752 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2753 {
2754 	if (error)
2755 		return;
2756 	*(bus_addr_t *) arg = segs[0].ds_addr;
2757 }
2758 
2759 static int
2760 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2761         struct em_dma_alloc *dma, int mapflags)
2762 {
2763 	int error;
2764 
2765 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2766 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2767 				BUS_SPACE_MAXADDR,	/* lowaddr */
2768 				BUS_SPACE_MAXADDR,	/* highaddr */
2769 				NULL, NULL,		/* filter, filterarg */
2770 				size,			/* maxsize */
2771 				1,			/* nsegments */
2772 				size,			/* maxsegsize */
2773 				0,			/* flags */
2774 				NULL,			/* lockfunc */
2775 				NULL,			/* lockarg */
2776 				&dma->dma_tag);
2777 	if (error) {
2778 		device_printf(adapter->dev,
2779 		    "%s: bus_dma_tag_create failed: %d\n",
2780 		    __func__, error);
2781 		goto fail_0;
2782 	}
2783 
2784 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2785 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2786 	if (error) {
2787 		device_printf(adapter->dev,
2788 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2789 		    __func__, (uintmax_t)size, error);
2790 		goto fail_2;
2791 	}
2792 
2793 	dma->dma_paddr = 0;
2794 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2795 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2796 	if (error || dma->dma_paddr == 0) {
2797 		device_printf(adapter->dev,
2798 		    "%s: bus_dmamap_load failed: %d\n",
2799 		    __func__, error);
2800 		goto fail_3;
2801 	}
2802 
2803 	return (0);
2804 
2805 fail_3:
2806 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2807 fail_2:
2808 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2809 	bus_dma_tag_destroy(dma->dma_tag);
2810 fail_0:
2811 	dma->dma_map = NULL;
2812 	dma->dma_tag = NULL;
2813 
2814 	return (error);
2815 }
2816 
2817 static void
2818 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2819 {
2820 	if (dma->dma_tag == NULL)
2821 		return;
2822 	if (dma->dma_map != NULL) {
2823 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2824 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2825 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2826 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2827 		dma->dma_map = NULL;
2828 	}
2829 	bus_dma_tag_destroy(dma->dma_tag);
2830 	dma->dma_tag = NULL;
2831 }
2832 
2833 
2834 /*********************************************************************
2835  *
2836  *  Allocate memory for the transmit and receive rings, and then
2837  *  the descriptors associated with each, called only once at attach.
2838  *
2839  **********************************************************************/
2840 static int
2841 em_allocate_queues(struct adapter *adapter)
2842 {
2843 	device_t		dev = adapter->dev;
2844 	struct tx_ring		*txr = NULL;
2845 	struct rx_ring		*rxr = NULL;
2846 	int rsize, tsize, error = E1000_SUCCESS;
2847 	int txconf = 0, rxconf = 0;
2848 
2849 
2850 	/* Allocate the TX ring struct memory */
2851 	if (!(adapter->tx_rings =
2852 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2853 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2854 		device_printf(dev, "Unable to allocate TX ring memory\n");
2855 		error = ENOMEM;
2856 		goto fail;
2857 	}
2858 
2859 	/* Now allocate the RX */
2860 	if (!(adapter->rx_rings =
2861 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2862 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2863 		device_printf(dev, "Unable to allocate RX ring memory\n");
2864 		error = ENOMEM;
2865 		goto rx_fail;
2866 	}
2867 
2868 	tsize = roundup2(adapter->num_tx_desc *
2869 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2870 	/*
2871 	 * Now set up the TX queues, txconf is needed to handle the
2872 	 * possibility that things fail midcourse and we need to
2873 	 * undo memory gracefully
2874 	 */
2875 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2876 		/* Set up some basics */
2877 		txr = &adapter->tx_rings[i];
2878 		txr->adapter = adapter;
2879 		txr->me = i;
2880 
2881 		/* Initialize the TX lock */
2882 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2883 		    device_get_nameunit(dev), txr->me);
2884 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2885 
2886 		if (em_dma_malloc(adapter, tsize,
2887 			&txr->txdma, BUS_DMA_NOWAIT)) {
2888 			device_printf(dev,
2889 			    "Unable to allocate TX Descriptor memory\n");
2890 			error = ENOMEM;
2891 			goto err_tx_desc;
2892 		}
2893 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2894 		bzero((void *)txr->tx_base, tsize);
2895 
2896         	if (em_allocate_transmit_buffers(txr)) {
2897 			device_printf(dev,
2898 			    "Critical Failure setting up transmit buffers\n");
2899 			error = ENOMEM;
2900 			goto err_tx_desc;
2901         	}
2902 #if __FreeBSD_version >= 800000
2903 		/* Allocate a buf ring */
2904 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2905 		    M_WAITOK, &txr->tx_mtx);
2906 #endif
2907 	}
2908 
2909 	/*
2910 	 * Next the RX queues...
2911 	 */
2912 	rsize = roundup2(adapter->num_rx_desc *
2913 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2914 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2915 		rxr = &adapter->rx_rings[i];
2916 		rxr->adapter = adapter;
2917 		rxr->me = i;
2918 
2919 		/* Initialize the RX lock */
2920 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2921 		    device_get_nameunit(dev), txr->me);
2922 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2923 
2924 		if (em_dma_malloc(adapter, rsize,
2925 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2926 			device_printf(dev,
2927 			    "Unable to allocate RxDescriptor memory\n");
2928 			error = ENOMEM;
2929 			goto err_rx_desc;
2930 		}
2931 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2932 		bzero((void *)rxr->rx_base, rsize);
2933 
2934         	/* Allocate receive buffers for the ring*/
2935 		if (em_allocate_receive_buffers(rxr)) {
2936 			device_printf(dev,
2937 			    "Critical Failure setting up receive buffers\n");
2938 			error = ENOMEM;
2939 			goto err_rx_desc;
2940 		}
2941 	}
2942 
2943 	return (0);
2944 
2945 err_rx_desc:
2946 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2947 		em_dma_free(adapter, &rxr->rxdma);
2948 err_tx_desc:
2949 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2950 		em_dma_free(adapter, &txr->txdma);
2951 	free(adapter->rx_rings, M_DEVBUF);
2952 rx_fail:
2953 #if __FreeBSD_version >= 800000
2954 	buf_ring_free(txr->br, M_DEVBUF);
2955 #endif
2956 	free(adapter->tx_rings, M_DEVBUF);
2957 fail:
2958 	return (error);
2959 }
2960 
2961 
2962 /*********************************************************************
2963  *
2964  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2965  *  the information needed to transmit a packet on the wire. This is
2966  *  called only once at attach, setup is done every reset.
2967  *
2968  **********************************************************************/
2969 static int
2970 em_allocate_transmit_buffers(struct tx_ring *txr)
2971 {
2972 	struct adapter *adapter = txr->adapter;
2973 	device_t dev = adapter->dev;
2974 	struct em_buffer *txbuf;
2975 	int error, i;
2976 
2977 	/*
2978 	 * Setup DMA descriptor areas.
2979 	 */
2980 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2981 			       1, 0,			/* alignment, bounds */
2982 			       BUS_SPACE_MAXADDR,	/* lowaddr */
2983 			       BUS_SPACE_MAXADDR,	/* highaddr */
2984 			       NULL, NULL,		/* filter, filterarg */
2985 			       EM_TSO_SIZE,		/* maxsize */
2986 			       EM_MAX_SCATTER,		/* nsegments */
2987 			       PAGE_SIZE,		/* maxsegsize */
2988 			       0,			/* flags */
2989 			       NULL,			/* lockfunc */
2990 			       NULL,			/* lockfuncarg */
2991 			       &txr->txtag))) {
2992 		device_printf(dev,"Unable to allocate TX DMA tag\n");
2993 		goto fail;
2994 	}
2995 
2996 	if (!(txr->tx_buffers =
2997 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2998 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2999 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3000 		error = ENOMEM;
3001 		goto fail;
3002 	}
3003 
3004         /* Create the descriptor buffer dma maps */
3005 	txbuf = txr->tx_buffers;
3006 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3007 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3008 		if (error != 0) {
3009 			device_printf(dev, "Unable to create TX DMA map\n");
3010 			goto fail;
3011 		}
3012 	}
3013 
3014 	return 0;
3015 fail:
3016 	/* We free all, it handles case where we are in the middle */
3017 	em_free_transmit_structures(adapter);
3018 	return (error);
3019 }
3020 
3021 /*********************************************************************
3022  *
3023  *  Initialize a transmit ring.
3024  *
3025  **********************************************************************/
3026 static void
3027 em_setup_transmit_ring(struct tx_ring *txr)
3028 {
3029 	struct adapter *adapter = txr->adapter;
3030 	struct em_buffer *txbuf;
3031 	int i;
3032 
3033 	/* Clear the old descriptor contents */
3034 	EM_TX_LOCK(txr);
3035 	bzero((void *)txr->tx_base,
3036 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3037 	/* Reset indices */
3038 	txr->next_avail_desc = 0;
3039 	txr->next_to_clean = 0;
3040 
3041 	/* Free any existing tx buffers. */
3042         txbuf = txr->tx_buffers;
3043 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3044 		if (txbuf->m_head != NULL) {
3045 			bus_dmamap_sync(txr->txtag, txbuf->map,
3046 			    BUS_DMASYNC_POSTWRITE);
3047 			bus_dmamap_unload(txr->txtag, txbuf->map);
3048 			m_freem(txbuf->m_head);
3049 			txbuf->m_head = NULL;
3050 		}
3051 		/* clear the watch index */
3052 		txbuf->next_eop = -1;
3053         }
3054 
3055 	/* Set number of descriptors available */
3056 	txr->tx_avail = adapter->num_tx_desc;
3057 
3058 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3059 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3060 	EM_TX_UNLOCK(txr);
3061 }
3062 
3063 /*********************************************************************
3064  *
3065  *  Initialize all transmit rings.
3066  *
3067  **********************************************************************/
3068 static void
3069 em_setup_transmit_structures(struct adapter *adapter)
3070 {
3071 	struct tx_ring *txr = adapter->tx_rings;
3072 
3073 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3074 		em_setup_transmit_ring(txr);
3075 
3076 	return;
3077 }
3078 
3079 /*********************************************************************
3080  *
3081  *  Enable transmit unit.
3082  *
3083  **********************************************************************/
3084 static void
3085 em_initialize_transmit_unit(struct adapter *adapter)
3086 {
3087 	struct tx_ring	*txr = adapter->tx_rings;
3088 	struct e1000_hw	*hw = &adapter->hw;
3089 	u32	tctl, tarc, tipg = 0;
3090 
3091 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3092 
3093 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3094 		u64 bus_addr = txr->txdma.dma_paddr;
3095 		/* Base and Len of TX Ring */
3096 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3097 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3098 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3099 	    	    (u32)(bus_addr >> 32));
3100 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3101 	    	    (u32)bus_addr);
3102 		/* Init the HEAD/TAIL indices */
3103 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3104 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3105 
3106 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3107 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3108 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3109 
3110 		txr->watchdog_check = FALSE;
3111 	}
3112 
3113 	/* Set the default values for the Tx Inter Packet Gap timer */
3114 	switch (adapter->hw.mac.type) {
3115 	case e1000_82542:
3116 		tipg = DEFAULT_82542_TIPG_IPGT;
3117 		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3118 		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3119 		break;
3120 	case e1000_80003es2lan:
3121 		tipg = DEFAULT_82543_TIPG_IPGR1;
3122 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3123 		    E1000_TIPG_IPGR2_SHIFT;
3124 		break;
3125 	default:
3126 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3127 		    (adapter->hw.phy.media_type ==
3128 		    e1000_media_type_internal_serdes))
3129 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3130 		else
3131 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3132 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3133 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3134 	}
3135 
3136 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3137 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3138 
3139 	if(adapter->hw.mac.type >= e1000_82540)
3140 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3141 		    adapter->tx_abs_int_delay.value);
3142 
3143 	if ((adapter->hw.mac.type == e1000_82571) ||
3144 	    (adapter->hw.mac.type == e1000_82572)) {
3145 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3146 		tarc |= SPEED_MODE_BIT;
3147 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3148 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3149 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3150 		tarc |= 1;
3151 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3152 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3153 		tarc |= 1;
3154 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3155 	}
3156 
3157 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3158 	if (adapter->tx_int_delay.value > 0)
3159 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3160 
3161 	/* Program the Transmit Control Register */
3162 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3163 	tctl &= ~E1000_TCTL_CT;
3164 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3165 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3166 
3167 	if (adapter->hw.mac.type >= e1000_82571)
3168 		tctl |= E1000_TCTL_MULR;
3169 
3170 	/* This write will effectively turn on the transmit unit. */
3171 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3172 
3173 }
3174 
3175 
3176 /*********************************************************************
3177  *
3178  *  Free all transmit rings.
3179  *
3180  **********************************************************************/
3181 static void
3182 em_free_transmit_structures(struct adapter *adapter)
3183 {
3184 	struct tx_ring *txr = adapter->tx_rings;
3185 
3186 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3187 		EM_TX_LOCK(txr);
3188 		em_free_transmit_buffers(txr);
3189 		em_dma_free(adapter, &txr->txdma);
3190 		EM_TX_UNLOCK(txr);
3191 		EM_TX_LOCK_DESTROY(txr);
3192 	}
3193 
3194 	free(adapter->tx_rings, M_DEVBUF);
3195 }
3196 
3197 /*********************************************************************
3198  *
3199  *  Free transmit ring related data structures.
3200  *
3201  **********************************************************************/
3202 static void
3203 em_free_transmit_buffers(struct tx_ring *txr)
3204 {
3205 	struct adapter		*adapter = txr->adapter;
3206 	struct em_buffer	*txbuf;
3207 
3208 	INIT_DEBUGOUT("free_transmit_ring: begin");
3209 
3210 	if (txr->tx_buffers == NULL)
3211 		return;
3212 
3213 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3214 		txbuf = &txr->tx_buffers[i];
3215 		if (txbuf->m_head != NULL) {
3216 			bus_dmamap_sync(txr->txtag, txbuf->map,
3217 			    BUS_DMASYNC_POSTWRITE);
3218 			bus_dmamap_unload(txr->txtag,
3219 			    txbuf->map);
3220 			m_freem(txbuf->m_head);
3221 			txbuf->m_head = NULL;
3222 			if (txbuf->map != NULL) {
3223 				bus_dmamap_destroy(txr->txtag,
3224 				    txbuf->map);
3225 				txbuf->map = NULL;
3226 			}
3227 		} else if (txbuf->map != NULL) {
3228 			bus_dmamap_unload(txr->txtag,
3229 			    txbuf->map);
3230 			bus_dmamap_destroy(txr->txtag,
3231 			    txbuf->map);
3232 			txbuf->map = NULL;
3233 		}
3234 	}
3235 #if __FreeBSD_version >= 800000
3236 	if (txr->br != NULL)
3237 		buf_ring_free(txr->br, M_DEVBUF);
3238 #endif
3239 	if (txr->tx_buffers != NULL) {
3240 		free(txr->tx_buffers, M_DEVBUF);
3241 		txr->tx_buffers = NULL;
3242 	}
3243 	if (txr->txtag != NULL) {
3244 		bus_dma_tag_destroy(txr->txtag);
3245 		txr->txtag = NULL;
3246 	}
3247 	return;
3248 }
3249 
3250 
3251 /*********************************************************************
3252  *
3253  *  The offload context needs to be set when we transfer the first
3254  *  packet of a particular protocol (TCP/UDP). This routine has been
3255  *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3256  *
3257  *  Added back the old method of keeping the current context type
3258  *  and not setting if unnecessary, as this is reported to be a
3259  *  big performance win.  -jfv
3260  **********************************************************************/
3261 static void
3262 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3263     u32 *txd_upper, u32 *txd_lower)
3264 {
3265 	struct adapter			*adapter = txr->adapter;
3266 	struct e1000_context_desc	*TXD = NULL;
3267 	struct em_buffer *tx_buffer;
3268 	struct ether_vlan_header *eh;
3269 	struct ip *ip = NULL;
3270 	struct ip6_hdr *ip6;
3271 	int cur, ehdrlen;
3272 	u32 cmd, hdr_len, ip_hlen;
3273 	u16 etype;
3274 	u8 ipproto;
3275 
3276 
3277 	cmd = hdr_len = ipproto = 0;
3278 	*txd_upper = *txd_lower = 0;
3279 	cur = txr->next_avail_desc;
3280 
3281 	/*
3282 	 * Determine where frame payload starts.
3283 	 * Jump over vlan headers if already present,
3284 	 * helpful for QinQ too.
3285 	 */
3286 	eh = mtod(mp, struct ether_vlan_header *);
3287 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3288 		etype = ntohs(eh->evl_proto);
3289 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3290 	} else {
3291 		etype = ntohs(eh->evl_encap_proto);
3292 		ehdrlen = ETHER_HDR_LEN;
3293 	}
3294 
3295 	/*
3296 	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3297 	 * TODO: Support SCTP too when it hits the tree.
3298 	 */
3299 	switch (etype) {
3300 	case ETHERTYPE_IP:
3301 		ip = (struct ip *)(mp->m_data + ehdrlen);
3302 		ip_hlen = ip->ip_hl << 2;
3303 
3304 		/* Setup of IP header checksum. */
3305 		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3306 			/*
3307 			 * Start offset for header checksum calculation.
3308 			 * End offset for header checksum calculation.
3309 			 * Offset of place to put the checksum.
3310 			 */
3311 			TXD = (struct e1000_context_desc *)
3312 			    &txr->tx_base[cur];
3313 			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3314 			TXD->lower_setup.ip_fields.ipcse =
3315 			    htole16(ehdrlen + ip_hlen);
3316 			TXD->lower_setup.ip_fields.ipcso =
3317 			    ehdrlen + offsetof(struct ip, ip_sum);
3318 			cmd |= E1000_TXD_CMD_IP;
3319 			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3320 		}
3321 
3322 		hdr_len = ehdrlen + ip_hlen;
3323 		ipproto = ip->ip_p;
3324 		break;
3325 
3326 	case ETHERTYPE_IPV6:
3327 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3328 		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3329 
3330 		/* IPv6 doesn't have a header checksum. */
3331 
3332 		hdr_len = ehdrlen + ip_hlen;
3333 		ipproto = ip6->ip6_nxt;
3334 		break;
3335 
3336 	default:
3337 		return;
3338 	}
3339 
3340 	switch (ipproto) {
3341 	case IPPROTO_TCP:
3342 		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3343 			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3344 			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3345 			/* no need for context if already set */
3346 			if (txr->last_hw_offload == CSUM_TCP)
3347 				return;
3348 			txr->last_hw_offload = CSUM_TCP;
3349 			/*
3350 			 * Start offset for payload checksum calculation.
3351 			 * End offset for payload checksum calculation.
3352 			 * Offset of place to put the checksum.
3353 			 */
3354 			TXD = (struct e1000_context_desc *)
3355 			    &txr->tx_base[cur];
3356 			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3357 			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3358 			TXD->upper_setup.tcp_fields.tucso =
3359 			    hdr_len + offsetof(struct tcphdr, th_sum);
3360 			cmd |= E1000_TXD_CMD_TCP;
3361 		}
3362 		break;
3363 	case IPPROTO_UDP:
3364 	{
3365 		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3366 			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3367 			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3368 			/* no need for context if already set */
3369 			if (txr->last_hw_offload == CSUM_UDP)
3370 				return;
3371 			txr->last_hw_offload = CSUM_UDP;
3372 			/*
3373 			 * Start offset for header checksum calculation.
3374 			 * End offset for header checksum calculation.
3375 			 * Offset of place to put the checksum.
3376 			 */
3377 			TXD = (struct e1000_context_desc *)
3378 			    &txr->tx_base[cur];
3379 			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3380 			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3381 			TXD->upper_setup.tcp_fields.tucso =
3382 			    hdr_len + offsetof(struct udphdr, uh_sum);
3383 		}
3384 		/* Fall Thru */
3385 	}
3386 	default:
3387 		break;
3388 	}
3389 
3390 	if (TXD == NULL)
3391 		return;
3392 	TXD->tcp_seg_setup.data = htole32(0);
3393 	TXD->cmd_and_length =
3394 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3395 	tx_buffer = &txr->tx_buffers[cur];
3396 	tx_buffer->m_head = NULL;
3397 	tx_buffer->next_eop = -1;
3398 
3399 	if (++cur == adapter->num_tx_desc)
3400 		cur = 0;
3401 
3402 	txr->tx_avail--;
3403 	txr->next_avail_desc = cur;
3404 }
3405 
3406 
3407 /**********************************************************************
3408  *
3409  *  Setup work for hardware segmentation offload (TSO)
3410  *
3411  **********************************************************************/
3412 static bool
3413 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3414    u32 *txd_lower)
3415 {
3416 	struct adapter			*adapter = txr->adapter;
3417 	struct e1000_context_desc	*TXD;
3418 	struct em_buffer		*tx_buffer;
3419 	struct ether_vlan_header	*eh;
3420 	struct ip			*ip;
3421 	struct ip6_hdr			*ip6;
3422 	struct tcphdr			*th;
3423 	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3424 	u16 etype;
3425 
3426 	/*
3427 	 * This function could/should be extended to support IP/IPv6
3428 	 * fragmentation as well.  But as they say, one step at a time.
3429 	 */
3430 
3431 	/*
3432 	 * Determine where frame payload starts.
3433 	 * Jump over vlan headers if already present,
3434 	 * helpful for QinQ too.
3435 	 */
3436 	eh = mtod(mp, struct ether_vlan_header *);
3437 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3438 		etype = ntohs(eh->evl_proto);
3439 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3440 	} else {
3441 		etype = ntohs(eh->evl_encap_proto);
3442 		ehdrlen = ETHER_HDR_LEN;
3443 	}
3444 
3445 	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3446 	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3447 		return FALSE;	/* -1 */
3448 
3449 	/*
3450 	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3451 	 * TODO: Support SCTP too when it hits the tree.
3452 	 */
3453 	switch (etype) {
3454 	case ETHERTYPE_IP:
3455 		isip6 = 0;
3456 		ip = (struct ip *)(mp->m_data + ehdrlen);
3457 		if (ip->ip_p != IPPROTO_TCP)
3458 			return FALSE;	/* 0 */
3459 		ip->ip_len = 0;
3460 		ip->ip_sum = 0;
3461 		ip_hlen = ip->ip_hl << 2;
3462 		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3463 			return FALSE;	/* -1 */
3464 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3465 #if 1
3466 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3467 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3468 #else
3469 		th->th_sum = mp->m_pkthdr.csum_data;
3470 #endif
3471 		break;
3472 	case ETHERTYPE_IPV6:
3473 		isip6 = 1;
3474 		return FALSE;			/* Not supported yet. */
3475 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3476 		if (ip6->ip6_nxt != IPPROTO_TCP)
3477 			return FALSE;	/* 0 */
3478 		ip6->ip6_plen = 0;
3479 		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3480 		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3481 			return FALSE;	/* -1 */
3482 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3483 #if 0
3484 		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3485 		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3486 #else
3487 		th->th_sum = mp->m_pkthdr.csum_data;
3488 #endif
3489 		break;
3490 	default:
3491 		return FALSE;
3492 	}
3493 	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3494 
3495 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3496 		      E1000_TXD_DTYP_D |	/* Data descr type */
3497 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3498 
3499 	/* IP and/or TCP header checksum calculation and insertion. */
3500 	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3501 		      E1000_TXD_POPTS_TXSM) << 8;
3502 
3503 	cur = txr->next_avail_desc;
3504 	tx_buffer = &txr->tx_buffers[cur];
3505 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3506 
3507 	/* IPv6 doesn't have a header checksum. */
3508 	if (!isip6) {
3509 		/*
3510 		 * Start offset for header checksum calculation.
3511 		 * End offset for header checksum calculation.
3512 		 * Offset of place put the checksum.
3513 		 */
3514 		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3515 		TXD->lower_setup.ip_fields.ipcse =
3516 		    htole16(ehdrlen + ip_hlen - 1);
3517 		TXD->lower_setup.ip_fields.ipcso =
3518 		    ehdrlen + offsetof(struct ip, ip_sum);
3519 	}
3520 	/*
3521 	 * Start offset for payload checksum calculation.
3522 	 * End offset for payload checksum calculation.
3523 	 * Offset of place to put the checksum.
3524 	 */
3525 	TXD->upper_setup.tcp_fields.tucss =
3526 	    ehdrlen + ip_hlen;
3527 	TXD->upper_setup.tcp_fields.tucse = 0;
3528 	TXD->upper_setup.tcp_fields.tucso =
3529 	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3530 	/*
3531 	 * Payload size per packet w/o any headers.
3532 	 * Length of all headers up to payload.
3533 	 */
3534 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3535 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3536 
3537 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3538 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3539 				E1000_TXD_CMD_TSE |	/* TSE context */
3540 				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3541 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3542 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3543 
3544 	tx_buffer->m_head = NULL;
3545 	tx_buffer->next_eop = -1;
3546 
3547 	if (++cur == adapter->num_tx_desc)
3548 		cur = 0;
3549 
3550 	txr->tx_avail--;
3551 	txr->next_avail_desc = cur;
3552 	txr->tx_tso = TRUE;
3553 
3554 	return TRUE;
3555 }
3556 
3557 
3558 /**********************************************************************
3559  *
3560  *  Examine each tx_buffer in the used queue. If the hardware is done
3561  *  processing the packet then free associated resources. The
3562  *  tx_buffer is put back on the free queue.
3563  *
3564  **********************************************************************/
3565 static bool
3566 em_txeof(struct tx_ring *txr)
3567 {
3568 	struct adapter	*adapter = txr->adapter;
3569         int first, last, done, num_avail;
3570         struct em_buffer *tx_buffer;
3571         struct e1000_tx_desc   *tx_desc, *eop_desc;
3572 	struct ifnet   *ifp = adapter->ifp;
3573 
3574 	EM_TX_LOCK_ASSERT(txr);
3575 
3576         if (txr->tx_avail == adapter->num_tx_desc)
3577                 return (FALSE);
3578 
3579         num_avail = txr->tx_avail;
3580         first = txr->next_to_clean;
3581         tx_desc = &txr->tx_base[first];
3582         tx_buffer = &txr->tx_buffers[first];
3583 	last = tx_buffer->next_eop;
3584         eop_desc = &txr->tx_base[last];
3585 
3586 	/*
3587 	 * What this does is get the index of the
3588 	 * first descriptor AFTER the EOP of the
3589 	 * first packet, that way we can do the
3590 	 * simple comparison on the inner while loop.
3591 	 */
3592 	if (++last == adapter->num_tx_desc)
3593  		last = 0;
3594 	done = last;
3595 
3596         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3597             BUS_DMASYNC_POSTREAD);
3598 
3599         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3600 		/* We clean the range of the packet */
3601 		while (first != done) {
3602                 	tx_desc->upper.data = 0;
3603                 	tx_desc->lower.data = 0;
3604                 	tx_desc->buffer_addr = 0;
3605                 	++num_avail;
3606 
3607 			if (tx_buffer->m_head) {
3608 				ifp->if_opackets++;
3609 				bus_dmamap_sync(txr->txtag,
3610 				    tx_buffer->map,
3611 				    BUS_DMASYNC_POSTWRITE);
3612 				bus_dmamap_unload(txr->txtag,
3613 				    tx_buffer->map);
3614 
3615                         	m_freem(tx_buffer->m_head);
3616                         	tx_buffer->m_head = NULL;
3617                 	}
3618 			tx_buffer->next_eop = -1;
3619 			txr->watchdog_time = ticks;
3620 
3621 	                if (++first == adapter->num_tx_desc)
3622 				first = 0;
3623 
3624 	                tx_buffer = &txr->tx_buffers[first];
3625 			tx_desc = &txr->tx_base[first];
3626 		}
3627 		/* See if we can continue to the next packet */
3628 		last = tx_buffer->next_eop;
3629 		if (last != -1) {
3630         		eop_desc = &txr->tx_base[last];
3631 			/* Get new done point */
3632 			if (++last == adapter->num_tx_desc) last = 0;
3633 			done = last;
3634 		} else
3635 			break;
3636         }
3637         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3638             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3639 
3640         txr->next_to_clean = first;
3641 
3642         /*
3643          * If we have enough room, clear IFF_DRV_OACTIVE to
3644          * tell the stack that it is OK to send packets.
3645          * If there are no pending descriptors, clear the watchdog.
3646          */
3647         if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3648                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3649                 if (num_avail == adapter->num_tx_desc) {
3650 			txr->watchdog_check = FALSE;
3651         		txr->tx_avail = num_avail;
3652 			return (FALSE);
3653 		}
3654         }
3655 
3656         txr->tx_avail = num_avail;
3657 	return (TRUE);
3658 }
3659 
3660 
3661 /*********************************************************************
3662  *
3663  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3664  *
3665  **********************************************************************/
3666 static void
3667 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3668 {
3669 	struct adapter		*adapter = rxr->adapter;
3670 	struct mbuf		*m;
3671 	bus_dma_segment_t	segs[1];
3672 	bus_dmamap_t		map;
3673 	struct em_buffer	*rxbuf;
3674 	int			i, error, nsegs, cleaned;
3675 
3676 	i = rxr->next_to_refresh;
3677 	cleaned = -1;
3678 	while (i != limit) {
3679 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3680 		if (m == NULL)
3681 			goto update;
3682 		m->m_len = m->m_pkthdr.len = MCLBYTES;
3683 
3684 		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3685 			m_adj(m, ETHER_ALIGN);
3686 
3687 		/*
3688 		 * Using memory from the mbuf cluster pool, invoke the
3689 		 * bus_dma machinery to arrange the memory mapping.
3690 		 */
3691 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3692 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3693 		if (error != 0) {
3694 			m_free(m);
3695 			goto update;
3696 		}
3697 
3698 		/* If nsegs is wrong then the stack is corrupt. */
3699 		KASSERT(nsegs == 1, ("Too many segments returned!"));
3700 
3701 		rxbuf = &rxr->rx_buffers[i];
3702 		if (rxbuf->m_head != NULL)
3703 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3704 
3705 		map = rxbuf->map;
3706 		rxbuf->map = rxr->rx_sparemap;
3707 		rxr->rx_sparemap = map;
3708 		bus_dmamap_sync(rxr->rxtag,
3709 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3710 		rxbuf->m_head = m;
3711 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3712 
3713 		cleaned = i;
3714 		/* Calculate next index */
3715 		if (++i == adapter->num_rx_desc)
3716 			i = 0;
3717 		/* This is the work marker for refresh */
3718 		rxr->next_to_refresh = i;
3719 	}
3720 update:
3721 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3722 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3723 	if (cleaned != -1) /* Update tail index */
3724 		E1000_WRITE_REG(&adapter->hw,
3725 		    E1000_RDT(rxr->me), cleaned);
3726 
3727 	return;
3728 }
3729 
3730 
3731 /*********************************************************************
3732  *
3733  *  Allocate memory for rx_buffer structures. Since we use one
3734  *  rx_buffer per received packet, the maximum number of rx_buffer's
3735  *  that we'll need is equal to the number of receive descriptors
3736  *  that we've allocated.
3737  *
3738  **********************************************************************/
3739 static int
3740 em_allocate_receive_buffers(struct rx_ring *rxr)
3741 {
3742 	struct adapter		*adapter = rxr->adapter;
3743 	device_t		dev = adapter->dev;
3744 	struct em_buffer	*rxbuf;
3745 	int			error;
3746 
3747 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3748 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3749 	if (rxr->rx_buffers == NULL) {
3750 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3751 		return (ENOMEM);
3752 	}
3753 
3754 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3755 				1, 0,			/* alignment, bounds */
3756 				BUS_SPACE_MAXADDR,	/* lowaddr */
3757 				BUS_SPACE_MAXADDR,	/* highaddr */
3758 				NULL, NULL,		/* filter, filterarg */
3759 				MCLBYTES,		/* maxsize */
3760 				1,			/* nsegments */
3761 				MCLBYTES,		/* maxsegsize */
3762 				0,			/* flags */
3763 				NULL,			/* lockfunc */
3764 				NULL,			/* lockarg */
3765 				&rxr->rxtag);
3766 	if (error) {
3767 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3768 		    __func__, error);
3769 		goto fail;
3770 	}
3771 
3772 	/* Create the spare map (used by getbuf) */
3773 	error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3774 	     &rxr->rx_sparemap);
3775 	if (error) {
3776 		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3777 		    __func__, error);
3778 		goto fail;
3779 	}
3780 
3781 	rxbuf = rxr->rx_buffers;
3782 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3783 		rxbuf = &rxr->rx_buffers[i];
3784 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3785 		    &rxbuf->map);
3786 		if (error) {
3787 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3788 			    __func__, error);
3789 			goto fail;
3790 		}
3791 	}
3792 
3793 	return (0);
3794 
3795 fail:
3796 	em_free_receive_structures(adapter);
3797 	return (error);
3798 }
3799 
3800 
3801 /*********************************************************************
3802  *
3803  *  Initialize a receive ring and its buffers.
3804  *
3805  **********************************************************************/
3806 static int
3807 em_setup_receive_ring(struct rx_ring *rxr)
3808 {
3809 	struct	adapter 	*adapter = rxr->adapter;
3810 	struct em_buffer	*rxbuf;
3811 	bus_dma_segment_t	seg[1];
3812 	int			rsize, nsegs, error;
3813 
3814 
3815 	/* Clear the ring contents */
3816 	EM_RX_LOCK(rxr);
3817 	rsize = roundup2(adapter->num_rx_desc *
3818 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3819 	bzero((void *)rxr->rx_base, rsize);
3820 
3821 	/*
3822 	** Free current RX buffer structs and their mbufs
3823 	*/
3824 	for (int i = 0; i < adapter->num_rx_desc; i++) {
3825 		rxbuf = &rxr->rx_buffers[i];
3826 		if (rxbuf->m_head != NULL) {
3827 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3828 			    BUS_DMASYNC_POSTREAD);
3829 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3830 			m_freem(rxbuf->m_head);
3831 		}
3832 	}
3833 
3834 	/* Now replenish the mbufs */
3835 	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3836 
3837 		rxbuf = &rxr->rx_buffers[j];
3838 		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3839 		if (rxbuf->m_head == NULL)
3840 			panic("RX ring hdr initialization failed!\n");
3841 		rxbuf->m_head->m_len = MCLBYTES;
3842 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3843 		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3844 
3845 		/* Get the memory mapping */
3846 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3847 		    rxbuf->map, rxbuf->m_head, seg,
3848 		    &nsegs, BUS_DMA_NOWAIT);
3849 		if (error != 0)
3850 			panic("RX ring dma initialization failed!\n");
3851 		bus_dmamap_sync(rxr->rxtag,
3852 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3853 
3854 		/* Update descriptor */
3855 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3856 	}
3857 
3858 
3859 	/* Setup our descriptor indices */
3860 	rxr->next_to_check = 0;
3861 	rxr->next_to_refresh = 0;
3862 
3863 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3864 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3865 
3866 	EM_RX_UNLOCK(rxr);
3867 	return (0);
3868 }
3869 
3870 /*********************************************************************
3871  *
3872  *  Initialize all receive rings.
3873  *
3874  **********************************************************************/
3875 static int
3876 em_setup_receive_structures(struct adapter *adapter)
3877 {
3878 	struct rx_ring *rxr = adapter->rx_rings;
3879 	int j;
3880 
3881 	for (j = 0; j < adapter->num_queues; j++, rxr++)
3882 		if (em_setup_receive_ring(rxr))
3883 			goto fail;
3884 
3885 	return (0);
3886 fail:
3887 	/*
3888 	 * Free RX buffers allocated so far, we will only handle
3889 	 * the rings that completed, the failing case will have
3890 	 * cleaned up for itself. 'j' failed, so its the terminus.
3891 	 */
3892 	for (int i = 0; i < j; ++i) {
3893 		rxr = &adapter->rx_rings[i];
3894 		for (int n = 0; n < adapter->num_rx_desc; n++) {
3895 			struct em_buffer *rxbuf;
3896 			rxbuf = &rxr->rx_buffers[n];
3897 			if (rxbuf->m_head != NULL) {
3898 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3899 			  	  BUS_DMASYNC_POSTREAD);
3900 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3901 				m_freem(rxbuf->m_head);
3902 				rxbuf->m_head = NULL;
3903 			}
3904 		}
3905 	}
3906 
3907 	return (ENOBUFS);
3908 }
3909 
3910 /*********************************************************************
3911  *
3912  *  Free all receive rings.
3913  *
3914  **********************************************************************/
3915 static void
3916 em_free_receive_structures(struct adapter *adapter)
3917 {
3918 	struct rx_ring *rxr = adapter->rx_rings;
3919 
3920 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3921 		em_free_receive_buffers(rxr);
3922 		/* Free the ring memory as well */
3923 		em_dma_free(adapter, &rxr->rxdma);
3924 		EM_RX_LOCK_DESTROY(rxr);
3925 	}
3926 
3927 	free(adapter->rx_rings, M_DEVBUF);
3928 }
3929 
3930 
3931 /*********************************************************************
3932  *
3933  *  Free receive ring data structures
3934  *
3935  **********************************************************************/
3936 static void
3937 em_free_receive_buffers(struct rx_ring *rxr)
3938 {
3939 	struct adapter		*adapter = rxr->adapter;
3940 	struct em_buffer	*rxbuf = NULL;
3941 
3942 	INIT_DEBUGOUT("free_receive_buffers: begin");
3943 
3944 	if (rxr->rx_sparemap) {
3945 		bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3946 		rxr->rx_sparemap = NULL;
3947 	}
3948 
3949 	if (rxr->rx_buffers != NULL) {
3950 		for (int i = 0; i < adapter->num_rx_desc; i++) {
3951 			rxbuf = &rxr->rx_buffers[i];
3952 			if (rxbuf->map != NULL) {
3953 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3954 				    BUS_DMASYNC_POSTREAD);
3955 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3956 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3957 			}
3958 			if (rxbuf->m_head != NULL) {
3959 				m_freem(rxbuf->m_head);
3960 				rxbuf->m_head = NULL;
3961 			}
3962 		}
3963 		free(rxr->rx_buffers, M_DEVBUF);
3964 		rxr->rx_buffers = NULL;
3965 	}
3966 
3967 	if (rxr->rxtag != NULL) {
3968 		bus_dma_tag_destroy(rxr->rxtag);
3969 		rxr->rxtag = NULL;
3970 	}
3971 
3972 	return;
3973 }
3974 
3975 
3976 /*********************************************************************
3977  *
3978  *  Enable receive unit.
3979  *
3980  **********************************************************************/
3981 #define MAX_INTS_PER_SEC	8000
3982 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3983 
3984 static void
3985 em_initialize_receive_unit(struct adapter *adapter)
3986 {
3987 	struct rx_ring	*rxr = adapter->rx_rings;
3988 	struct ifnet	*ifp = adapter->ifp;
3989 	struct e1000_hw	*hw = &adapter->hw;
3990 	u64	bus_addr;
3991 	u32	rctl, rxcsum;
3992 
3993 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
3994 
3995 	/*
3996 	 * Make sure receives are disabled while setting
3997 	 * up the descriptor ring
3998 	 */
3999 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4000 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4001 
4002 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4003 	    adapter->rx_abs_int_delay.value);
4004 	/*
4005 	 * Set the interrupt throttling rate. Value is calculated
4006 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4007 	 */
4008 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4009 
4010 	/*
4011 	** When using MSIX interrupts we need to throttle
4012 	** using the EITR register (82574 only)
4013 	*/
4014 	if (hw->mac.type == e1000_82574)
4015 		for (int i = 0; i < 4; i++)
4016 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4017 			    DEFAULT_ITR);
4018 
4019 	/* Disable accelerated ackknowledge */
4020 	if (adapter->hw.mac.type == e1000_82574)
4021 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4022 
4023 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4024 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4025 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4026 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4027 	}
4028 
4029 	/*
4030 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4031 	** long latencies are observed, like Lenovo X60. This
4032 	** change eliminates the problem, but since having positive
4033 	** values in RDTR is a known source of problems on other
4034 	** platforms another solution is being sought.
4035 	*/
4036 	if (hw->mac.type == e1000_82573)
4037 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4038 
4039 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4040 		/* Setup the Base and Length of the Rx Descriptor Ring */
4041 		bus_addr = rxr->rxdma.dma_paddr;
4042 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4043 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4044 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4045 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4046 		/* Setup the Head and Tail Descriptor Pointers */
4047 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4048 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4049 	}
4050 
4051 	/* Setup the Receive Control Register */
4052 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4053 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4054 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4055 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4056 
4057         /* Strip the CRC */
4058         rctl |= E1000_RCTL_SECRC;
4059 
4060         /* Make sure VLAN Filters are off */
4061         rctl &= ~E1000_RCTL_VFE;
4062 	rctl &= ~E1000_RCTL_SBP;
4063 	rctl |= E1000_RCTL_SZ_2048;
4064 	if (ifp->if_mtu > ETHERMTU)
4065 		rctl |= E1000_RCTL_LPE;
4066 	else
4067 		rctl &= ~E1000_RCTL_LPE;
4068 
4069 	/* Write out the settings */
4070 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4071 
4072 	return;
4073 }
4074 
4075 
4076 /*********************************************************************
4077  *
4078  *  This routine executes in interrupt context. It replenishes
4079  *  the mbufs in the descriptor and sends data which has been
4080  *  dma'ed into host memory to upper layer.
4081  *
4082  *  We loop at most count times if count is > 0, or until done if
4083  *  count < 0.
4084  *
4085  *  For polling we also now return the number of cleaned packets
4086  *********************************************************************/
4087 static bool
4088 em_rxeof(struct rx_ring *rxr, int count, int *done)
4089 {
4090 	struct adapter		*adapter = rxr->adapter;
4091 	struct ifnet		*ifp = adapter->ifp;
4092 	struct mbuf		*mp, *sendmp;
4093 	u8			status = 0;
4094 	u16 			len;
4095 	int			i, processed, rxdone = 0;
4096 	bool			eop;
4097 	struct e1000_rx_desc	*cur;
4098 
4099 	EM_RX_LOCK(rxr);
4100 
4101 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4102 
4103 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4104 			break;
4105 
4106 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4107 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4108 
4109 		cur = &rxr->rx_base[i];
4110 		status = cur->status;
4111 		mp = sendmp = NULL;
4112 
4113 		if ((status & E1000_RXD_STAT_DD) == 0)
4114 			break;
4115 
4116 		len = le16toh(cur->length);
4117 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4118 		count--;
4119 
4120 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4121 
4122 			/* Assign correct length to the current fragment */
4123 			mp = rxr->rx_buffers[i].m_head;
4124 			mp->m_len = len;
4125 
4126 			if (rxr->fmp == NULL) {
4127 				mp->m_pkthdr.len = len;
4128 				rxr->fmp = mp; /* Store the first mbuf */
4129 				rxr->lmp = mp;
4130 			} else {
4131 				/* Chain mbuf's together */
4132 				mp->m_flags &= ~M_PKTHDR;
4133 				rxr->lmp->m_next = mp;
4134 				rxr->lmp = rxr->lmp->m_next;
4135 				rxr->fmp->m_pkthdr.len += len;
4136 			}
4137 
4138 			if (eop) {
4139 				rxr->fmp->m_pkthdr.rcvif = ifp;
4140 				ifp->if_ipackets++;
4141 				em_receive_checksum(cur, rxr->fmp);
4142 #ifndef __NO_STRICT_ALIGNMENT
4143 				if (adapter->max_frame_size >
4144 				    (MCLBYTES - ETHER_ALIGN) &&
4145 				    em_fixup_rx(rxr) != 0)
4146 					goto skip;
4147 #endif
4148 				if (status & E1000_RXD_STAT_VP) {
4149 					rxr->fmp->m_pkthdr.ether_vtag =
4150 					    (le16toh(cur->special) &
4151 					    E1000_RXD_SPC_VLAN_MASK);
4152 					rxr->fmp->m_flags |= M_VLANTAG;
4153 				}
4154 #ifdef EM_MULTIQUEUE
4155 				rxr->fmp->m_pkthdr.flowid = curcpu;
4156 				rxr->fmp->m_flags |= M_FLOWID;
4157 #endif
4158 #ifndef __NO_STRICT_ALIGNMENT
4159 skip:
4160 #endif
4161 				sendmp = rxr->fmp;
4162 				rxr->fmp = NULL;
4163 				rxr->lmp = NULL;
4164 			}
4165 		} else {
4166 			ifp->if_ierrors++;
4167 			/* Reuse loaded DMA map and just update mbuf chain */
4168 			mp = rxr->rx_buffers[i].m_head;
4169 			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4170 			mp->m_data = mp->m_ext.ext_buf;
4171 			mp->m_next = NULL;
4172 			if (adapter->max_frame_size <=
4173 			    (MCLBYTES - ETHER_ALIGN))
4174 				m_adj(mp, ETHER_ALIGN);
4175 			if (rxr->fmp != NULL) {
4176 				m_freem(rxr->fmp);
4177 				rxr->fmp = NULL;
4178 				rxr->lmp = NULL;
4179 			}
4180 			sendmp = NULL;
4181 		}
4182 
4183 		/* Zero out the receive descriptors status. */
4184 		cur->status = 0;
4185 		++rxdone;	/* cumulative for POLL */
4186 		++processed;
4187 
4188 		/* Advance our pointers to the next descriptor. */
4189 		if (++i == adapter->num_rx_desc)
4190 			i = 0;
4191 
4192 		/* Send to the stack */
4193 		if (sendmp != NULL) {
4194 			rxr->next_to_check = i;
4195 			EM_RX_UNLOCK(rxr);
4196 			(*ifp->if_input)(ifp, sendmp);
4197 			EM_RX_LOCK(rxr);
4198 			i = rxr->next_to_check;
4199 		}
4200 
4201 		/* Only refresh mbufs every 8 descriptors */
4202 		if (processed == 8) {
4203 			em_refresh_mbufs(rxr, i);
4204 			processed = 0;
4205 		}
4206 	}
4207 
4208 	/* Catch any remaining refresh work */
4209 	if (processed != 0) {
4210 		em_refresh_mbufs(rxr, i);
4211 		processed = 0;
4212 	}
4213 
4214 	rxr->next_to_check = i;
4215 	if (done != NULL)
4216 		*done = rxdone;
4217 	EM_RX_UNLOCK(rxr);
4218 
4219 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4220 }
4221 
4222 #ifndef __NO_STRICT_ALIGNMENT
4223 /*
4224  * When jumbo frames are enabled we should realign entire payload on
4225  * architecures with strict alignment. This is serious design mistake of 8254x
4226  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4227  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4228  * payload. On architecures without strict alignment restrictions 8254x still
4229  * performs unaligned memory access which would reduce the performance too.
4230  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4231  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4232  * existing mbuf chain.
4233  *
4234  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4235  * not used at all on architectures with strict alignment.
4236  */
4237 static int
4238 em_fixup_rx(struct rx_ring *rxr)
4239 {
4240 	struct adapter *adapter = rxr->adapter;
4241 	struct mbuf *m, *n;
4242 	int error;
4243 
4244 	error = 0;
4245 	m = rxr->fmp;
4246 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4247 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4248 		m->m_data += ETHER_HDR_LEN;
4249 	} else {
4250 		MGETHDR(n, M_DONTWAIT, MT_DATA);
4251 		if (n != NULL) {
4252 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4253 			m->m_data += ETHER_HDR_LEN;
4254 			m->m_len -= ETHER_HDR_LEN;
4255 			n->m_len = ETHER_HDR_LEN;
4256 			M_MOVE_PKTHDR(n, m);
4257 			n->m_next = m;
4258 			rxr->fmp = n;
4259 		} else {
4260 			adapter->dropped_pkts++;
4261 			m_freem(rxr->fmp);
4262 			rxr->fmp = NULL;
4263 			error = ENOMEM;
4264 		}
4265 	}
4266 
4267 	return (error);
4268 }
4269 #endif
4270 
4271 /*********************************************************************
4272  *
4273  *  Verify that the hardware indicated that the checksum is valid.
4274  *  Inform the stack about the status of checksum so that stack
4275  *  doesn't spend time verifying the checksum.
4276  *
4277  *********************************************************************/
4278 static void
4279 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4280 {
4281 	/* Ignore Checksum bit is set */
4282 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4283 		mp->m_pkthdr.csum_flags = 0;
4284 		return;
4285 	}
4286 
4287 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4288 		/* Did it pass? */
4289 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4290 			/* IP Checksum Good */
4291 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4292 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4293 
4294 		} else {
4295 			mp->m_pkthdr.csum_flags = 0;
4296 		}
4297 	}
4298 
4299 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4300 		/* Did it pass? */
4301 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4302 			mp->m_pkthdr.csum_flags |=
4303 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4304 			mp->m_pkthdr.csum_data = htons(0xffff);
4305 		}
4306 	}
4307 }
4308 
4309 /*
4310  * This routine is run via an vlan
4311  * config EVENT
4312  */
4313 static void
4314 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4315 {
4316 	struct adapter	*adapter = ifp->if_softc;
4317 	u32		index, bit;
4318 
4319 	if (ifp->if_softc !=  arg)   /* Not our event */
4320 		return;
4321 
4322 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4323                 return;
4324 
4325 	index = (vtag >> 5) & 0x7F;
4326 	bit = vtag & 0x1F;
4327 	em_shadow_vfta[index] |= (1 << bit);
4328 	++adapter->num_vlans;
4329 	/* Re-init to load the changes */
4330 	em_init(adapter);
4331 }
4332 
4333 /*
4334  * This routine is run via an vlan
4335  * unconfig EVENT
4336  */
4337 static void
4338 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4339 {
4340 	struct adapter	*adapter = ifp->if_softc;
4341 	u32		index, bit;
4342 
4343 	if (ifp->if_softc !=  arg)
4344 		return;
4345 
4346 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4347                 return;
4348 
4349 	index = (vtag >> 5) & 0x7F;
4350 	bit = vtag & 0x1F;
4351 	em_shadow_vfta[index] &= ~(1 << bit);
4352 	--adapter->num_vlans;
4353 	/* Re-init to load the changes */
4354 	em_init(adapter);
4355 }
4356 
4357 static void
4358 em_setup_vlan_hw_support(struct adapter *adapter)
4359 {
4360 	struct e1000_hw *hw = &adapter->hw;
4361 	u32             reg;
4362 
4363 	/*
4364 	** We get here thru init_locked, meaning
4365 	** a soft reset, this has already cleared
4366 	** the VFTA and other state, so if there
4367 	** have been no vlan's registered do nothing.
4368 	*/
4369 	if (adapter->num_vlans == 0)
4370                 return;
4371 
4372 	/*
4373 	** A soft reset zero's out the VFTA, so
4374 	** we need to repopulate it now.
4375 	*/
4376 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4377                 if (em_shadow_vfta[i] != 0)
4378 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4379                             i, em_shadow_vfta[i]);
4380 
4381 	reg = E1000_READ_REG(hw, E1000_CTRL);
4382 	reg |= E1000_CTRL_VME;
4383 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4384 
4385 	/* Enable the Filter Table */
4386 	reg = E1000_READ_REG(hw, E1000_RCTL);
4387 	reg &= ~E1000_RCTL_CFIEN;
4388 	reg |= E1000_RCTL_VFE;
4389 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4390 
4391 	/* Update the frame size */
4392 	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4393 	    adapter->max_frame_size + VLAN_TAG_SIZE);
4394 }
4395 
4396 static void
4397 em_enable_intr(struct adapter *adapter)
4398 {
4399 	struct e1000_hw *hw = &adapter->hw;
4400 	u32 ims_mask = IMS_ENABLE_MASK;
4401 
4402 	if (hw->mac.type == e1000_82574) {
4403 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4404 		ims_mask |= EM_MSIX_MASK;
4405 	}
4406 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4407 }
4408 
4409 static void
4410 em_disable_intr(struct adapter *adapter)
4411 {
4412 	struct e1000_hw *hw = &adapter->hw;
4413 
4414 	if (hw->mac.type == e1000_82574)
4415 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4416 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4417 }
4418 
4419 /*
4420  * Bit of a misnomer, what this really means is
4421  * to enable OS management of the system... aka
4422  * to disable special hardware management features
4423  */
4424 static void
4425 em_init_manageability(struct adapter *adapter)
4426 {
4427 	/* A shared code workaround */
4428 #define E1000_82542_MANC2H E1000_MANC2H
4429 	if (adapter->has_manage) {
4430 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4431 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4432 
4433 		/* disable hardware interception of ARP */
4434 		manc &= ~(E1000_MANC_ARP_EN);
4435 
4436                 /* enable receiving management packets to the host */
4437 		manc |= E1000_MANC_EN_MNG2HOST;
4438 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4439 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4440 		manc2h |= E1000_MNG2HOST_PORT_623;
4441 		manc2h |= E1000_MNG2HOST_PORT_664;
4442 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4443 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4444 	}
4445 }
4446 
4447 /*
4448  * Give control back to hardware management
4449  * controller if there is one.
4450  */
4451 static void
4452 em_release_manageability(struct adapter *adapter)
4453 {
4454 	if (adapter->has_manage) {
4455 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4456 
4457 		/* re-enable hardware interception of ARP */
4458 		manc |= E1000_MANC_ARP_EN;
4459 		manc &= ~E1000_MANC_EN_MNG2HOST;
4460 
4461 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4462 	}
4463 }
4464 
4465 /*
4466  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4467  * For ASF and Pass Through versions of f/w this means
4468  * that the driver is loaded. For AMT version type f/w
4469  * this means that the network i/f is open.
4470  */
4471 static void
4472 em_get_hw_control(struct adapter *adapter)
4473 {
4474 	u32 ctrl_ext, swsm;
4475 
4476 	if (adapter->hw.mac.type == e1000_82573) {
4477 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4478 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4479 		    swsm | E1000_SWSM_DRV_LOAD);
4480 		return;
4481 	}
4482 	/* else */
4483 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4484 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4485 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4486 	return;
4487 }
4488 
4489 /*
4490  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4491  * For ASF and Pass Through versions of f/w this means that
4492  * the driver is no longer loaded. For AMT versions of the
4493  * f/w this means that the network i/f is closed.
4494  */
4495 static void
4496 em_release_hw_control(struct adapter *adapter)
4497 {
4498 	u32 ctrl_ext, swsm;
4499 
4500 	if (!adapter->has_manage)
4501 		return;
4502 
4503 	if (adapter->hw.mac.type == e1000_82573) {
4504 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4505 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4506 		    swsm & ~E1000_SWSM_DRV_LOAD);
4507 		return;
4508 	}
4509 	/* else */
4510 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4511 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4512 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4513 	return;
4514 }
4515 
4516 static int
4517 em_is_valid_ether_addr(u8 *addr)
4518 {
4519 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4520 
4521 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4522 		return (FALSE);
4523 	}
4524 
4525 	return (TRUE);
4526 }
4527 
4528 /*
4529 ** Parse the interface capabilities with regard
4530 ** to both system management and wake-on-lan for
4531 ** later use.
4532 */
4533 static void
4534 em_get_wakeup(device_t dev)
4535 {
4536 	struct adapter	*adapter = device_get_softc(dev);
4537 	u16		eeprom_data = 0, device_id, apme_mask;
4538 
4539 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4540 	apme_mask = EM_EEPROM_APME;
4541 
4542 	switch (adapter->hw.mac.type) {
4543 	case e1000_82573:
4544 	case e1000_82583:
4545 		adapter->has_amt = TRUE;
4546 		/* Falls thru */
4547 	case e1000_82571:
4548 	case e1000_82572:
4549 	case e1000_80003es2lan:
4550 		if (adapter->hw.bus.func == 1) {
4551 			e1000_read_nvm(&adapter->hw,
4552 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4553 			break;
4554 		} else
4555 			e1000_read_nvm(&adapter->hw,
4556 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4557 		break;
4558 	case e1000_ich8lan:
4559 	case e1000_ich9lan:
4560 	case e1000_ich10lan:
4561 	case e1000_pchlan:
4562 		apme_mask = E1000_WUC_APME;
4563 		adapter->has_amt = TRUE;
4564 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4565 		break;
4566 	default:
4567 		e1000_read_nvm(&adapter->hw,
4568 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4569 		break;
4570 	}
4571 	if (eeprom_data & apme_mask)
4572 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4573 	/*
4574          * We have the eeprom settings, now apply the special cases
4575          * where the eeprom may be wrong or the board won't support
4576          * wake on lan on a particular port
4577 	 */
4578 	device_id = pci_get_device(dev);
4579         switch (device_id) {
4580 	case E1000_DEV_ID_82571EB_FIBER:
4581 		/* Wake events only supported on port A for dual fiber
4582 		 * regardless of eeprom setting */
4583 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4584 		    E1000_STATUS_FUNC_1)
4585 			adapter->wol = 0;
4586 		break;
4587 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4588 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4589 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4590                 /* if quad port adapter, disable WoL on all but port A */
4591 		if (global_quad_port_a != 0)
4592 			adapter->wol = 0;
4593 		/* Reset for multiple quad port adapters */
4594 		if (++global_quad_port_a == 4)
4595 			global_quad_port_a = 0;
4596                 break;
4597 	}
4598 	return;
4599 }
4600 
4601 
4602 /*
4603  * Enable PCI Wake On Lan capability
4604  */
4605 static void
4606 em_enable_wakeup(device_t dev)
4607 {
4608 	struct adapter	*adapter = device_get_softc(dev);
4609 	struct ifnet	*ifp = adapter->ifp;
4610 	u32		pmc, ctrl, ctrl_ext, rctl;
4611 	u16     	status;
4612 
4613 	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4614 		return;
4615 
4616 	/* Advertise the wakeup capability */
4617 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4618 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4619 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4620 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4621 
4622 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4623 	    (adapter->hw.mac.type == e1000_pchlan) ||
4624 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4625 	    (adapter->hw.mac.type == e1000_ich10lan)) {
4626 		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4627 		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4628 	}
4629 
4630 	/* Keep the laser running on Fiber adapters */
4631 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4632 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4633 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4634 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4635 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4636 	}
4637 
4638 	/*
4639 	** Determine type of Wakeup: note that wol
4640 	** is set with all bits on by default.
4641 	*/
4642 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4643 		adapter->wol &= ~E1000_WUFC_MAG;
4644 
4645 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4646 		adapter->wol &= ~E1000_WUFC_MC;
4647 	else {
4648 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4649 		rctl |= E1000_RCTL_MPE;
4650 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4651 	}
4652 
4653 	if (adapter->hw.mac.type == e1000_pchlan) {
4654 		if (em_enable_phy_wakeup(adapter))
4655 			return;
4656 	} else {
4657 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4658 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4659 	}
4660 
4661 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4662 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4663 
4664         /* Request PME */
4665         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4666 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4667 	if (ifp->if_capenable & IFCAP_WOL)
4668 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4669         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4670 
4671 	return;
4672 }
4673 
4674 /*
4675 ** WOL in the newer chipset interfaces (pchlan)
4676 ** require thing to be copied into the phy
4677 */
4678 static int
4679 em_enable_phy_wakeup(struct adapter *adapter)
4680 {
4681 	struct e1000_hw *hw = &adapter->hw;
4682 	u32 mreg, ret = 0;
4683 	u16 preg;
4684 
4685 	/* copy MAC RARs to PHY RARs */
4686 	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4687 		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4688 		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4689 		e1000_write_phy_reg(hw, BM_RAR_M(i),
4690 		    (u16)((mreg >> 16) & 0xFFFF));
4691 		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4692 		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4693 		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4694 		    (u16)((mreg >> 16) & 0xFFFF));
4695 	}
4696 
4697 	/* copy MAC MTA to PHY MTA */
4698 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4699 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4700 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4701 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4702 		    (u16)((mreg >> 16) & 0xFFFF));
4703 	}
4704 
4705 	/* configure PHY Rx Control register */
4706 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4707 	mreg = E1000_READ_REG(hw, E1000_RCTL);
4708 	if (mreg & E1000_RCTL_UPE)
4709 		preg |= BM_RCTL_UPE;
4710 	if (mreg & E1000_RCTL_MPE)
4711 		preg |= BM_RCTL_MPE;
4712 	preg &= ~(BM_RCTL_MO_MASK);
4713 	if (mreg & E1000_RCTL_MO_3)
4714 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4715 				<< BM_RCTL_MO_SHIFT);
4716 	if (mreg & E1000_RCTL_BAM)
4717 		preg |= BM_RCTL_BAM;
4718 	if (mreg & E1000_RCTL_PMCF)
4719 		preg |= BM_RCTL_PMCF;
4720 	mreg = E1000_READ_REG(hw, E1000_CTRL);
4721 	if (mreg & E1000_CTRL_RFCE)
4722 		preg |= BM_RCTL_RFCE;
4723 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4724 
4725 	/* enable PHY wakeup in MAC register */
4726 	E1000_WRITE_REG(hw, E1000_WUC,
4727 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4728 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4729 
4730 	/* configure and enable PHY wakeup in PHY registers */
4731 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4732 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4733 
4734 	/* activate PHY wakeup */
4735 	ret = hw->phy.ops.acquire(hw);
4736 	if (ret) {
4737 		printf("Could not acquire PHY\n");
4738 		return ret;
4739 	}
4740 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4741 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4742 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4743 	if (ret) {
4744 		printf("Could not read PHY page 769\n");
4745 		goto out;
4746 	}
4747 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4748 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4749 	if (ret)
4750 		printf("Could not set PHY Host Wakeup bit\n");
4751 out:
4752 	hw->phy.ops.release(hw);
4753 
4754 	return ret;
4755 }
4756 
4757 static void
4758 em_led_func(void *arg, int onoff)
4759 {
4760 	struct adapter	*adapter = arg;
4761 
4762 	EM_CORE_LOCK(adapter);
4763 	if (onoff) {
4764 		e1000_setup_led(&adapter->hw);
4765 		e1000_led_on(&adapter->hw);
4766 	} else {
4767 		e1000_led_off(&adapter->hw);
4768 		e1000_cleanup_led(&adapter->hw);
4769 	}
4770 	EM_CORE_UNLOCK(adapter);
4771 }
4772 
4773 /**********************************************************************
4774  *
4775  *  Update the board statistics counters.
4776  *
4777  **********************************************************************/
4778 static void
4779 em_update_stats_counters(struct adapter *adapter)
4780 {
4781 	struct ifnet   *ifp;
4782 
4783 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4784 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4785 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4786 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4787 	}
4788 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4789 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4790 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4791 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4792 
4793 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4794 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4795 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4796 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4797 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4798 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4799 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4800 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4801 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4802 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4803 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4804 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4805 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4806 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4807 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4808 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4809 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4810 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4811 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4812 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4813 
4814 	/* For the 64-bit byte counters the low dword must be read first. */
4815 	/* Both registers clear on the read of the high dword */
4816 
4817 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4818 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4819 
4820 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4821 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4822 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4823 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4824 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4825 
4826 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4827 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4828 
4829 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4830 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4831 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4832 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4833 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4834 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4835 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4836 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4837 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4838 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4839 
4840 	if (adapter->hw.mac.type >= e1000_82543) {
4841 		adapter->stats.algnerrc +=
4842 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4843 		adapter->stats.rxerrc +=
4844 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4845 		adapter->stats.tncrs +=
4846 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4847 		adapter->stats.cexterr +=
4848 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4849 		adapter->stats.tsctc +=
4850 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4851 		adapter->stats.tsctfc +=
4852 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4853 	}
4854 	ifp = adapter->ifp;
4855 
4856 	ifp->if_collisions = adapter->stats.colc;
4857 
4858 	/* Rx Errors */
4859 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4860 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4861 	    adapter->stats.ruc + adapter->stats.roc +
4862 	    adapter->stats.mpc + adapter->stats.cexterr;
4863 
4864 	/* Tx Errors */
4865 	ifp->if_oerrors = adapter->stats.ecol +
4866 	    adapter->stats.latecol + adapter->watchdog_events;
4867 }
4868 
4869 
4870 /*
4871  * Add sysctl variables, one per statistic, to the system.
4872  */
4873 static void
4874 em_add_hw_stats(struct adapter *adapter)
4875 {
4876 
4877 	device_t dev = adapter->dev;
4878 
4879 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4880 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
4881 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
4882 	struct e1000_hw_stats *stats = &adapter->stats;
4883 
4884 	struct sysctl_oid *stat_node, *int_node, *host_node;
4885 	struct sysctl_oid_list *stat_list, *int_list, *host_list;
4886 
4887 	/* Driver Statistics */
4888 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
4889 			CTLFLAG_RD, &adapter->link_irq, 0,
4890 			"Link MSIX IRQ Handled");
4891 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
4892 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
4893 			 "Std mbuf failed");
4894 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
4895 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
4896 			 "Std mbuf cluster failed");
4897 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
4898 			CTLFLAG_RD, &adapter->dropped_pkts,
4899 			"Driver dropped packets");
4900 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
4901 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
4902 			"Driver tx dma failure in xmit");
4903 
4904 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
4905 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
4906 			"Flow Control High Watermark");
4907 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
4908 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
4909 			"Flow Control Low Watermark");
4910 
4911 	/* MAC stats get the own sub node */
4912 
4913 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
4914 				    CTLFLAG_RD, NULL, "Statistics");
4915 	stat_list = SYSCTL_CHILDREN(stat_node);
4916 
4917 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
4918 			CTLFLAG_RD, &stats->ecol,
4919 			"Excessive collisions");
4920 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
4921 			CTLFLAG_RD, &adapter->stats.symerrs,
4922 			"Symbol Errors");
4923 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
4924 			CTLFLAG_RD, &adapter->stats.sec,
4925 			"Sequence Errors");
4926 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
4927 			CTLFLAG_RD, &adapter->stats.dc,
4928 			"Defer Count");
4929 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
4930 			CTLFLAG_RD, &adapter->stats.mpc,
4931 			"Missed Packets");
4932 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
4933 			CTLFLAG_RD, &adapter->stats.rnbc,
4934 			"Receive No Buffers");
4935 	/* RLEC is inaccurate on some hardware, calculate our own. */
4936 /* 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_len_errs", */
4937 /* 			CTLFLAG_RD, adapter->stats.roc + adapter->stats.ruc, */
4938 /* 			"Receive Length Errors"); */
4939 
4940 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
4941 			CTLFLAG_RD, &adapter->stats.rxerrc,
4942 			"Receive Errors");
4943 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
4944 			CTLFLAG_RD, &adapter->stats.crcerrs,
4945 			"CRC errors");
4946 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
4947 			CTLFLAG_RD, &adapter->stats.algnerrc,
4948 			"Alignment Errors");
4949 	/* On 82575 these are collision counts */
4950 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
4951 			CTLFLAG_RD, &adapter->stats.cexterr,
4952 			"Collision/Carrier extension errors");
4953 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_overruns",
4954 			CTLFLAG_RD, &adapter->rx_overruns,
4955 			"RX overruns");
4956 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "watchdog_timeouts",
4957 			CTLFLAG_RD, &adapter->watchdog_events,
4958 			"Watchdog timeouts");
4959 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
4960 			CTLFLAG_RD, &adapter->stats.xonrxc,
4961 			"XON Received");
4962 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
4963 			CTLFLAG_RD, &adapter->stats.xontxc,
4964 			"XON Transmitted");
4965 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
4966 			CTLFLAG_RD, &adapter->stats.xoffrxc,
4967 			"XOFF Received");
4968 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
4969 			CTLFLAG_RD, &adapter->stats.xofftxc,
4970 			"XOFF Transmitted");
4971 
4972 	/* Packet Reception Stats */
4973 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
4974 			CTLFLAG_RD, &adapter->stats.tpr,
4975 			"Total Packets Received ");
4976 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
4977 			CTLFLAG_RD, &adapter->stats.gprc,
4978 			"Good Packets Received");
4979 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
4980 			CTLFLAG_RD, &adapter->stats.bprc,
4981 			"Broadcast Packets Received");
4982 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
4983 			CTLFLAG_RD, &adapter->stats.mprc,
4984 			"Multicast Packets Received");
4985 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
4986 			CTLFLAG_RD, &adapter->stats.prc64,
4987 			"64 byte frames received ");
4988 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
4989 			CTLFLAG_RD, &adapter->stats.prc127,
4990 			"65-127 byte frames received");
4991 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
4992 			CTLFLAG_RD, &adapter->stats.prc255,
4993 			"128-255 byte frames received");
4994 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
4995 			CTLFLAG_RD, &adapter->stats.prc511,
4996 			"256-511 byte frames received");
4997 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
4998 			CTLFLAG_RD, &adapter->stats.prc1023,
4999 			"512-1023 byte frames received");
5000 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5001 			CTLFLAG_RD, &adapter->stats.prc1522,
5002 			"1023-1522 byte frames received");
5003  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5004  			CTLFLAG_RD, &adapter->stats.gorc,
5005  			"Good Octets Received");
5006 
5007 	/* Packet Transmission Stats */
5008  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd",
5009  			CTLFLAG_RD, &adapter->stats.gotc,
5010  			"Good Octest Transmitted");
5011 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5012 			CTLFLAG_RD, &adapter->stats.tpt,
5013 			"Total Packets Transmitted");
5014 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5015 			CTLFLAG_RD, &adapter->stats.gptc,
5016 			"Good Packets Transmitted");
5017 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5018 			CTLFLAG_RD, &adapter->stats.bptc,
5019 			"Broadcast Packets Transmitted");
5020 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5021 			CTLFLAG_RD, &adapter->stats.mptc,
5022 			"Multicast Packets Transmitted");
5023 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5024 			CTLFLAG_RD, &adapter->stats.ptc64,
5025 			"64 byte frames transmitted ");
5026 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5027 			CTLFLAG_RD, &adapter->stats.ptc127,
5028 			"65-127 byte frames transmitted");
5029 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5030 			CTLFLAG_RD, &adapter->stats.ptc255,
5031 			"128-255 byte frames transmitted");
5032 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5033 			CTLFLAG_RD, &adapter->stats.ptc511,
5034 			"256-511 byte frames transmitted");
5035 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5036 			CTLFLAG_RD, &adapter->stats.ptc1023,
5037 			"512-1023 byte frames transmitted");
5038 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5039 			CTLFLAG_RD, &adapter->stats.ptc1522,
5040 			"1024-1522 byte frames transmitted");
5041 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5042 			CTLFLAG_RD, &adapter->stats.tsctc,
5043 			"TSO Contexts Transmitted");
5044 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5045 			CTLFLAG_RD, &adapter->stats.tsctfc,
5046 			"TSO Contexts Failed");
5047 
5048 
5049 	/* Interrupt Stats */
5050 
5051 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5052 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5053 	int_list = SYSCTL_CHILDREN(int_node);
5054 
5055 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5056 			CTLFLAG_RD, &adapter->stats.iac,
5057 			"Interrupt Assertion Count");
5058 
5059 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5060 			CTLFLAG_RD, &adapter->stats.icrxptc,
5061 			"Interrupt Cause Rx Pkt Timer Expire Count");
5062 
5063 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5064 			CTLFLAG_RD, &adapter->stats.icrxatc,
5065 			"Interrupt Cause Rx Abs Timer Expire Count");
5066 
5067 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5068 			CTLFLAG_RD, &adapter->stats.ictxptc,
5069 			"Interrupt Cause Tx Pkt Timer Expire Count");
5070 
5071 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5072 			CTLFLAG_RD, &adapter->stats.ictxatc,
5073 			"Interrupt Cause Tx Abs Timer Expire Count");
5074 
5075 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5076 			CTLFLAG_RD, &adapter->stats.ictxqec,
5077 			"Interrupt Cause Tx Queue Empty Count");
5078 
5079 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5080 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5081 			"Interrupt Cause Tx Queue Min Thresh Count");
5082 
5083 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5084 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5085 			"Interrupt Cause Rx Desc Min Thresh Count");
5086 
5087 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5088 			CTLFLAG_RD, &adapter->stats.icrxoc,
5089 			"Interrupt Cause Receiver Overrun Count");
5090 
5091 	/* Host to Card Stats */
5092 
5093 	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5094 				    CTLFLAG_RD, NULL,
5095 				    "Host to Card Statistics");
5096 
5097 	host_list = SYSCTL_CHILDREN(host_node);
5098 
5099 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5100 			CTLFLAG_RD, &adapter->stats.cbtmpc,
5101 			"Circuit Breaker Tx Packet Count");
5102 
5103 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5104 			CTLFLAG_RD, &adapter->stats.htdpmc,
5105 			"Host Transmit Discarded Packets");
5106 
5107 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5108 			CTLFLAG_RD, &adapter->stats.rpthc,
5109 			"Rx Packets To Host");
5110 
5111 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5112 			CTLFLAG_RD, &adapter->stats.cbrmpc,
5113 			"Circuit Breaker Rx Packet Count");
5114 
5115 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5116 			CTLFLAG_RD, &adapter->stats.cbrdpc,
5117 			"Circuit Breaker Rx Dropped Count");
5118 
5119 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5120 			CTLFLAG_RD, &adapter->stats.hgptc,
5121 			"Host Good Packets Tx Count");
5122 
5123 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5124 			CTLFLAG_RD, &adapter->stats.htcbdpc,
5125 			"Host Tx Circuit Breaker Dropped Count");
5126 
5127 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5128 			CTLFLAG_RD, &adapter->stats.hgorc,
5129 			"Host Good Octets Received Count");
5130 
5131 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5132 			CTLFLAG_RD, &adapter->stats.hgotc,
5133 			"Host Good Octets Transmit Count");
5134 
5135 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5136 			CTLFLAG_RD, &adapter->stats.lenerrs,
5137 			"Length Errors");
5138 
5139 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5140 			CTLFLAG_RD, &adapter->stats.scvpc,
5141 			"SerDes/SGMII Code Violation Pkt Count");
5142 
5143 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5144 			CTLFLAG_RD, &adapter->stats.hrmpc,
5145 			"Header Redirection Missed Packet Count");
5146 
5147 
5148 
5149 }
5150 
5151 /**********************************************************************
5152  *
5153  *  This routine provides a way to dump out the adapter eeprom,
5154  *  often a useful debug/service tool. This only dumps the first
5155  *  32 words, stuff that matters is in that extent.
5156  *
5157  **********************************************************************/
5158 
5159 static int
5160 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5161 {
5162 	struct adapter *adapter;
5163 	int error;
5164 	int result;
5165 
5166 	result = -1;
5167 	error = sysctl_handle_int(oidp, &result, 0, req);
5168 
5169 	if (error || !req->newptr)
5170 		return (error);
5171 
5172 	/*
5173 	 * This value will cause a hex dump of the
5174 	 * first 32 16-bit words of the EEPROM to
5175 	 * the screen.
5176 	 */
5177 	if (result == 1) {
5178 		adapter = (struct adapter *)arg1;
5179 		em_print_nvm_info(adapter);
5180         }
5181 
5182 	return (error);
5183 }
5184 
5185 static void
5186 em_print_nvm_info(struct adapter *adapter)
5187 {
5188 	u16	eeprom_data;
5189 	int	i, j, row = 0;
5190 
5191 	/* Its a bit crude, but it gets the job done */
5192 	printf("\nInterface EEPROM Dump:\n");
5193 	printf("Offset\n0x0000  ");
5194 	for (i = 0, j = 0; i < 32; i++, j++) {
5195 		if (j == 8) { /* Make the offset block */
5196 			j = 0; ++row;
5197 			printf("\n0x00%x0  ",row);
5198 		}
5199 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5200 		printf("%04x ", eeprom_data);
5201 	}
5202 	printf("\n");
5203 }
5204 
5205 static int
5206 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5207 {
5208 	struct em_int_delay_info *info;
5209 	struct adapter *adapter;
5210 	u32 regval;
5211 	int error, usecs, ticks;
5212 
5213 	info = (struct em_int_delay_info *)arg1;
5214 	usecs = info->value;
5215 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5216 	if (error != 0 || req->newptr == NULL)
5217 		return (error);
5218 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5219 		return (EINVAL);
5220 	info->value = usecs;
5221 	ticks = EM_USECS_TO_TICKS(usecs);
5222 
5223 	adapter = info->adapter;
5224 
5225 	EM_CORE_LOCK(adapter);
5226 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5227 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5228 	/* Handle a few special cases. */
5229 	switch (info->offset) {
5230 	case E1000_RDTR:
5231 		break;
5232 	case E1000_TIDV:
5233 		if (ticks == 0) {
5234 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5235 			/* Don't write 0 into the TIDV register. */
5236 			regval++;
5237 		} else
5238 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5239 		break;
5240 	}
5241 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5242 	EM_CORE_UNLOCK(adapter);
5243 	return (0);
5244 }
5245 
5246 static void
5247 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5248 	const char *description, struct em_int_delay_info *info,
5249 	int offset, int value)
5250 {
5251 	info->adapter = adapter;
5252 	info->offset = offset;
5253 	info->value = value;
5254 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5255 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5256 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5257 	    info, 0, em_sysctl_int_delay, "I", description);
5258 }
5259 
5260 static void
5261 em_add_rx_process_limit(struct adapter *adapter, const char *name,
5262 	const char *description, int *limit, int value)
5263 {
5264 	*limit = value;
5265 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5266 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5267 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5268 }
5269 
5270 
5271