xref: /freebsd/sys/dev/e1000/if_em.c (revision dadef94c7a762d05890e2891bc4a7d1dfe0cf758)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2010, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60 
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67 
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70 
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83 
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87 
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int	em_display_debug_stats = 0;
92 
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.0.8";
97 
98 
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108 
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111 	/* Intel(R) PRO/1000 Network Connection */
112 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116 						PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131 
132 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137 						PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	/* required last entry */
174 	{ 0, 0, 0, 0, 0}
175 };
176 
177 /*********************************************************************
178  *  Table of branding strings for all supported NICs.
179  *********************************************************************/
180 
181 static char *em_strings[] = {
182 	"Intel(R) PRO/1000 Network Connection"
183 };
184 
185 /*********************************************************************
186  *  Function prototypes
187  *********************************************************************/
188 static int	em_probe(device_t);
189 static int	em_attach(device_t);
190 static int	em_detach(device_t);
191 static int	em_shutdown(device_t);
192 static int	em_suspend(device_t);
193 static int	em_resume(device_t);
194 static void	em_start(struct ifnet *);
195 static void	em_start_locked(struct ifnet *, struct tx_ring *);
196 #ifdef EM_MULTIQUEUE
197 static int	em_mq_start(struct ifnet *, struct mbuf *);
198 static int	em_mq_start_locked(struct ifnet *,
199 		    struct tx_ring *, struct mbuf *);
200 static void	em_qflush(struct ifnet *);
201 #endif
202 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
203 static void	em_init(void *);
204 static void	em_init_locked(struct adapter *);
205 static void	em_stop(void *);
206 static void	em_media_status(struct ifnet *, struct ifmediareq *);
207 static int	em_media_change(struct ifnet *);
208 static void	em_identify_hardware(struct adapter *);
209 static int	em_allocate_pci_resources(struct adapter *);
210 static int	em_allocate_legacy(struct adapter *);
211 static int	em_allocate_msix(struct adapter *);
212 static int	em_allocate_queues(struct adapter *);
213 static int	em_setup_msix(struct adapter *);
214 static void	em_free_pci_resources(struct adapter *);
215 static void	em_local_timer(void *);
216 static void	em_reset(struct adapter *);
217 static int	em_setup_interface(device_t, struct adapter *);
218 
219 static void	em_setup_transmit_structures(struct adapter *);
220 static void	em_initialize_transmit_unit(struct adapter *);
221 static int	em_allocate_transmit_buffers(struct tx_ring *);
222 static void	em_free_transmit_structures(struct adapter *);
223 static void	em_free_transmit_buffers(struct tx_ring *);
224 
225 static int	em_setup_receive_structures(struct adapter *);
226 static int	em_allocate_receive_buffers(struct rx_ring *);
227 static void	em_initialize_receive_unit(struct adapter *);
228 static void	em_free_receive_structures(struct adapter *);
229 static void	em_free_receive_buffers(struct rx_ring *);
230 
231 static void	em_enable_intr(struct adapter *);
232 static void	em_disable_intr(struct adapter *);
233 static void	em_update_stats_counters(struct adapter *);
234 static void	em_add_hw_stats(struct adapter *adapter);
235 static bool	em_txeof(struct tx_ring *);
236 static bool	em_rxeof(struct rx_ring *, int, int *);
237 #ifndef __NO_STRICT_ALIGNMENT
238 static int	em_fixup_rx(struct rx_ring *);
239 #endif
240 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
241 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
242 		    struct ip *, u32 *, u32 *);
243 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
244 		    struct tcphdr *, u32 *, u32 *);
245 static void	em_set_promisc(struct adapter *);
246 static void	em_disable_promisc(struct adapter *);
247 static void	em_set_multi(struct adapter *);
248 static void	em_update_link_status(struct adapter *);
249 static void	em_refresh_mbufs(struct rx_ring *, int);
250 static void	em_register_vlan(void *, struct ifnet *, u16);
251 static void	em_unregister_vlan(void *, struct ifnet *, u16);
252 static void	em_setup_vlan_hw_support(struct adapter *);
253 static int	em_xmit(struct tx_ring *, struct mbuf **);
254 static int	em_dma_malloc(struct adapter *, bus_size_t,
255 		    struct em_dma_alloc *, int);
256 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
257 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
258 static void	em_print_nvm_info(struct adapter *);
259 static int 	em_is_valid_ether_addr(u8 *);
260 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
262 		    const char *, struct em_int_delay_info *, int, int);
263 /* Management and WOL Support */
264 static void	em_init_manageability(struct adapter *);
265 static void	em_release_manageability(struct adapter *);
266 static void     em_get_hw_control(struct adapter *);
267 static void     em_release_hw_control(struct adapter *);
268 static void	em_get_wakeup(device_t);
269 static void     em_enable_wakeup(device_t);
270 static int	em_enable_phy_wakeup(struct adapter *);
271 static void	em_led_func(void *, int);
272 
273 static int	em_irq_fast(void *);
274 
275 /* MSIX handlers */
276 static void	em_msix_tx(void *);
277 static void	em_msix_rx(void *);
278 static void	em_msix_link(void *);
279 static void	em_handle_tx(void *context, int pending);
280 static void	em_handle_rx(void *context, int pending);
281 static void	em_handle_link(void *context, int pending);
282 
283 static void	em_add_rx_process_limit(struct adapter *, const char *,
284 		    const char *, int *, int);
285 
286 static __inline void em_rx_discard(struct rx_ring *, int);
287 
288 #ifdef DEVICE_POLLING
289 static poll_handler_t em_poll;
290 #endif /* POLLING */
291 
292 /*********************************************************************
293  *  FreeBSD Device Interface Entry Points
294  *********************************************************************/
295 
296 static device_method_t em_methods[] = {
297 	/* Device interface */
298 	DEVMETHOD(device_probe, em_probe),
299 	DEVMETHOD(device_attach, em_attach),
300 	DEVMETHOD(device_detach, em_detach),
301 	DEVMETHOD(device_shutdown, em_shutdown),
302 	DEVMETHOD(device_suspend, em_suspend),
303 	DEVMETHOD(device_resume, em_resume),
304 	{0, 0}
305 };
306 
307 static driver_t em_driver = {
308 	"em", em_methods, sizeof(struct adapter),
309 };
310 
311 devclass_t em_devclass;
312 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
313 MODULE_DEPEND(em, pci, 1, 1, 1);
314 MODULE_DEPEND(em, ether, 1, 1, 1);
315 
316 /*********************************************************************
317  *  Tunable default values.
318  *********************************************************************/
319 
320 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
321 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
322 #define M_TSO_LEN			66
323 
324 /* Allow common code without TSO */
325 #ifndef CSUM_TSO
326 #define CSUM_TSO	0
327 #endif
328 
329 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
330 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
331 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
332 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
333 
334 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
335 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
336 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
337 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
338 
339 static int em_rxd = EM_DEFAULT_RXD;
340 static int em_txd = EM_DEFAULT_TXD;
341 TUNABLE_INT("hw.em.rxd", &em_rxd);
342 TUNABLE_INT("hw.em.txd", &em_txd);
343 
344 static int em_smart_pwr_down = FALSE;
345 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
346 
347 /* Controls whether promiscuous also shows bad packets */
348 static int em_debug_sbp = FALSE;
349 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
350 
351 static int em_enable_msix = TRUE;
352 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
353 
354 /* How many packets rxeof tries to clean at a time */
355 static int em_rx_process_limit = 100;
356 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
357 
358 /* Flow control setting - default to FULL */
359 static int em_fc_setting = e1000_fc_full;
360 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
361 
362 /*
363 ** Shadow VFTA table, this is needed because
364 ** the real vlan filter table gets cleared during
365 ** a soft reset and the driver needs to be able
366 ** to repopulate it.
367 */
368 static u32 em_shadow_vfta[EM_VFTA_SIZE];
369 
370 /* Global used in WOL setup with multiport cards */
371 static int global_quad_port_a = 0;
372 
373 /*********************************************************************
374  *  Device identification routine
375  *
376  *  em_probe determines if the driver should be loaded on
377  *  adapter based on PCI vendor/device id of the adapter.
378  *
379  *  return BUS_PROBE_DEFAULT on success, positive on failure
380  *********************************************************************/
381 
382 static int
383 em_probe(device_t dev)
384 {
385 	char		adapter_name[60];
386 	u16		pci_vendor_id = 0;
387 	u16		pci_device_id = 0;
388 	u16		pci_subvendor_id = 0;
389 	u16		pci_subdevice_id = 0;
390 	em_vendor_info_t *ent;
391 
392 	INIT_DEBUGOUT("em_probe: begin");
393 
394 	pci_vendor_id = pci_get_vendor(dev);
395 	if (pci_vendor_id != EM_VENDOR_ID)
396 		return (ENXIO);
397 
398 	pci_device_id = pci_get_device(dev);
399 	pci_subvendor_id = pci_get_subvendor(dev);
400 	pci_subdevice_id = pci_get_subdevice(dev);
401 
402 	ent = em_vendor_info_array;
403 	while (ent->vendor_id != 0) {
404 		if ((pci_vendor_id == ent->vendor_id) &&
405 		    (pci_device_id == ent->device_id) &&
406 
407 		    ((pci_subvendor_id == ent->subvendor_id) ||
408 		    (ent->subvendor_id == PCI_ANY_ID)) &&
409 
410 		    ((pci_subdevice_id == ent->subdevice_id) ||
411 		    (ent->subdevice_id == PCI_ANY_ID))) {
412 			sprintf(adapter_name, "%s %s",
413 				em_strings[ent->index],
414 				em_driver_version);
415 			device_set_desc_copy(dev, adapter_name);
416 			return (BUS_PROBE_DEFAULT);
417 		}
418 		ent++;
419 	}
420 
421 	return (ENXIO);
422 }
423 
424 /*********************************************************************
425  *  Device initialization routine
426  *
427  *  The attach entry point is called when the driver is being loaded.
428  *  This routine identifies the type of hardware, allocates all resources
429  *  and initializes the hardware.
430  *
431  *  return 0 on success, positive on failure
432  *********************************************************************/
433 
434 static int
435 em_attach(device_t dev)
436 {
437 	struct adapter	*adapter;
438 	int		error = 0;
439 
440 	INIT_DEBUGOUT("em_attach: begin");
441 
442 	adapter = device_get_softc(dev);
443 	adapter->dev = adapter->osdep.dev = dev;
444 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
445 
446 	/* SYSCTL stuff */
447 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
448 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
449 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
450 	    em_sysctl_nvm_info, "I", "NVM Information");
451 
452 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
453 
454 	/* Determine hardware and mac info */
455 	em_identify_hardware(adapter);
456 
457 	/* Setup PCI resources */
458 	if (em_allocate_pci_resources(adapter)) {
459 		device_printf(dev, "Allocation of PCI resources failed\n");
460 		error = ENXIO;
461 		goto err_pci;
462 	}
463 
464 	/*
465 	** For ICH8 and family we need to
466 	** map the flash memory, and this
467 	** must happen after the MAC is
468 	** identified
469 	*/
470 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
471 	    (adapter->hw.mac.type == e1000_pchlan) ||
472 	    (adapter->hw.mac.type == e1000_ich9lan) ||
473 	    (adapter->hw.mac.type == e1000_ich10lan)) {
474 		int rid = EM_BAR_TYPE_FLASH;
475 		adapter->flash = bus_alloc_resource_any(dev,
476 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
477 		if (adapter->flash == NULL) {
478 			device_printf(dev, "Mapping of Flash failed\n");
479 			error = ENXIO;
480 			goto err_pci;
481 		}
482 		/* This is used in the shared code */
483 		adapter->hw.flash_address = (u8 *)adapter->flash;
484 		adapter->osdep.flash_bus_space_tag =
485 		    rman_get_bustag(adapter->flash);
486 		adapter->osdep.flash_bus_space_handle =
487 		    rman_get_bushandle(adapter->flash);
488 	}
489 
490 	/* Do Shared Code initialization */
491 	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
492 		device_printf(dev, "Setup of Shared code failed\n");
493 		error = ENXIO;
494 		goto err_pci;
495 	}
496 
497 	e1000_get_bus_info(&adapter->hw);
498 
499 	/* Set up some sysctls for the tunable interrupt delays */
500 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
501 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
502 	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
503 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
504 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
505 	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
506 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
507 	    "receive interrupt delay limit in usecs",
508 	    &adapter->rx_abs_int_delay,
509 	    E1000_REGISTER(&adapter->hw, E1000_RADV),
510 	    em_rx_abs_int_delay_dflt);
511 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
512 	    "transmit interrupt delay limit in usecs",
513 	    &adapter->tx_abs_int_delay,
514 	    E1000_REGISTER(&adapter->hw, E1000_TADV),
515 	    em_tx_abs_int_delay_dflt);
516 
517 	/* Sysctls for limiting the amount of work done in the taskqueue */
518 	em_add_rx_process_limit(adapter, "rx_processing_limit",
519 	    "max number of rx packets to process", &adapter->rx_process_limit,
520 	    em_rx_process_limit);
521 
522 	/*
523 	 * Validate number of transmit and receive descriptors. It
524 	 * must not exceed hardware maximum, and must be multiple
525 	 * of E1000_DBA_ALIGN.
526 	 */
527 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
528 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
529 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
530 		    EM_DEFAULT_TXD, em_txd);
531 		adapter->num_tx_desc = EM_DEFAULT_TXD;
532 	} else
533 		adapter->num_tx_desc = em_txd;
534 
535 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
536 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
537 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
538 		    EM_DEFAULT_RXD, em_rxd);
539 		adapter->num_rx_desc = EM_DEFAULT_RXD;
540 	} else
541 		adapter->num_rx_desc = em_rxd;
542 
543 	adapter->hw.mac.autoneg = DO_AUTO_NEG;
544 	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
545 	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
546 
547 	/* Copper options */
548 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
549 		adapter->hw.phy.mdix = AUTO_ALL_MODES;
550 		adapter->hw.phy.disable_polarity_correction = FALSE;
551 		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
552 	}
553 
554 	/*
555 	 * Set the frame limits assuming
556 	 * standard ethernet sized frames.
557 	 */
558 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
559 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
560 
561 	/*
562 	 * This controls when hardware reports transmit completion
563 	 * status.
564 	 */
565 	adapter->hw.mac.report_tx_early = 1;
566 
567 	/*
568 	** Get queue/ring memory
569 	*/
570 	if (em_allocate_queues(adapter)) {
571 		error = ENOMEM;
572 		goto err_pci;
573 	}
574 
575 	/* Allocate multicast array memory. */
576 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
577 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
578 	if (adapter->mta == NULL) {
579 		device_printf(dev, "Can not allocate multicast setup array\n");
580 		error = ENOMEM;
581 		goto err_late;
582 	}
583 
584 	/*
585 	** Start from a known state, this is
586 	** important in reading the nvm and
587 	** mac from that.
588 	*/
589 	e1000_reset_hw(&adapter->hw);
590 
591 	/* Make sure we have a good EEPROM before we read from it */
592 	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
593 		/*
594 		** Some PCI-E parts fail the first check due to
595 		** the link being in sleep state, call it again,
596 		** if it fails a second time its a real issue.
597 		*/
598 		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
599 			device_printf(dev,
600 			    "The EEPROM Checksum Is Not Valid\n");
601 			error = EIO;
602 			goto err_late;
603 		}
604 	}
605 
606 	/* Copy the permanent MAC address out of the EEPROM */
607 	if (e1000_read_mac_addr(&adapter->hw) < 0) {
608 		device_printf(dev, "EEPROM read error while reading MAC"
609 		    " address\n");
610 		error = EIO;
611 		goto err_late;
612 	}
613 
614 	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
615 		device_printf(dev, "Invalid MAC address\n");
616 		error = EIO;
617 		goto err_late;
618 	}
619 
620 	/*
621 	**  Do interrupt configuration
622 	*/
623 	if (adapter->msix > 1) /* Do MSIX */
624 		error = em_allocate_msix(adapter);
625 	else  /* MSI or Legacy */
626 		error = em_allocate_legacy(adapter);
627 	if (error)
628 		goto err_late;
629 
630 	/*
631 	 * Get Wake-on-Lan and Management info for later use
632 	 */
633 	em_get_wakeup(dev);
634 
635 	/* Setup OS specific network interface */
636 	if (em_setup_interface(dev, adapter) != 0)
637 		goto err_late;
638 
639 	em_reset(adapter);
640 
641 	/* Initialize statistics */
642 	em_update_stats_counters(adapter);
643 
644 	adapter->hw.mac.get_link_status = 1;
645 	em_update_link_status(adapter);
646 
647 	/* Indicate SOL/IDER usage */
648 	if (e1000_check_reset_block(&adapter->hw))
649 		device_printf(dev,
650 		    "PHY reset is blocked due to SOL/IDER session.\n");
651 
652 	/* Register for VLAN events */
653 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
654 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
655 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
656 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
657 
658 	em_add_hw_stats(adapter);
659 
660 	/* Non-AMT based hardware can now take control from firmware */
661 	if (adapter->has_manage && !adapter->has_amt)
662 		em_get_hw_control(adapter);
663 
664 	/* Tell the stack that the interface is not active */
665 	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
666 
667 	adapter->led_dev = led_create(em_led_func, adapter,
668 	    device_get_nameunit(dev));
669 
670 	INIT_DEBUGOUT("em_attach: end");
671 
672 	return (0);
673 
674 err_late:
675 	em_free_transmit_structures(adapter);
676 	em_free_receive_structures(adapter);
677 	em_release_hw_control(adapter);
678 	if (adapter->ifp != NULL)
679 		if_free(adapter->ifp);
680 err_pci:
681 	em_free_pci_resources(adapter);
682 	free(adapter->mta, M_DEVBUF);
683 	EM_CORE_LOCK_DESTROY(adapter);
684 
685 	return (error);
686 }
687 
688 /*********************************************************************
689  *  Device removal routine
690  *
691  *  The detach entry point is called when the driver is being removed.
692  *  This routine stops the adapter and deallocates all the resources
693  *  that were allocated for driver operation.
694  *
695  *  return 0 on success, positive on failure
696  *********************************************************************/
697 
698 static int
699 em_detach(device_t dev)
700 {
701 	struct adapter	*adapter = device_get_softc(dev);
702 	struct ifnet	*ifp = adapter->ifp;
703 
704 	INIT_DEBUGOUT("em_detach: begin");
705 
706 	/* Make sure VLANS are not using driver */
707 	if (adapter->ifp->if_vlantrunk != NULL) {
708 		device_printf(dev,"Vlan in use, detach first\n");
709 		return (EBUSY);
710 	}
711 
712 #ifdef DEVICE_POLLING
713 	if (ifp->if_capenable & IFCAP_POLLING)
714 		ether_poll_deregister(ifp);
715 #endif
716 
717 	if (adapter->led_dev != NULL)
718 		led_destroy(adapter->led_dev);
719 
720 	EM_CORE_LOCK(adapter);
721 	adapter->in_detach = 1;
722 	em_stop(adapter);
723 	EM_CORE_UNLOCK(adapter);
724 	EM_CORE_LOCK_DESTROY(adapter);
725 
726 	e1000_phy_hw_reset(&adapter->hw);
727 
728 	em_release_manageability(adapter);
729 	em_release_hw_control(adapter);
730 
731 	/* Unregister VLAN events */
732 	if (adapter->vlan_attach != NULL)
733 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
734 	if (adapter->vlan_detach != NULL)
735 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
736 
737 	ether_ifdetach(adapter->ifp);
738 	callout_drain(&adapter->timer);
739 
740 	em_free_pci_resources(adapter);
741 	bus_generic_detach(dev);
742 	if_free(ifp);
743 
744 	em_free_transmit_structures(adapter);
745 	em_free_receive_structures(adapter);
746 
747 	em_release_hw_control(adapter);
748 	free(adapter->mta, M_DEVBUF);
749 
750 	return (0);
751 }
752 
753 /*********************************************************************
754  *
755  *  Shutdown entry point
756  *
757  **********************************************************************/
758 
759 static int
760 em_shutdown(device_t dev)
761 {
762 	return em_suspend(dev);
763 }
764 
765 /*
766  * Suspend/resume device methods.
767  */
768 static int
769 em_suspend(device_t dev)
770 {
771 	struct adapter *adapter = device_get_softc(dev);
772 
773 	EM_CORE_LOCK(adapter);
774 
775         em_release_manageability(adapter);
776 	em_release_hw_control(adapter);
777 	em_enable_wakeup(dev);
778 
779 	EM_CORE_UNLOCK(adapter);
780 
781 	return bus_generic_suspend(dev);
782 }
783 
784 static int
785 em_resume(device_t dev)
786 {
787 	struct adapter *adapter = device_get_softc(dev);
788 	struct ifnet *ifp = adapter->ifp;
789 
790 	EM_CORE_LOCK(adapter);
791 	em_init_locked(adapter);
792 	em_init_manageability(adapter);
793 	EM_CORE_UNLOCK(adapter);
794 	em_start(ifp);
795 
796 	return bus_generic_resume(dev);
797 }
798 
799 
800 /*********************************************************************
801  *  Transmit entry point
802  *
803  *  em_start is called by the stack to initiate a transmit.
804  *  The driver will remain in this routine as long as there are
805  *  packets to transmit and transmit resources are available.
806  *  In case resources are not available stack is notified and
807  *  the packet is requeued.
808  **********************************************************************/
809 
810 #ifdef EM_MULTIQUEUE
811 static int
812 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
813 {
814 	struct adapter  *adapter = txr->adapter;
815         struct mbuf     *next;
816         int             err = 0, enq = 0;
817 
818 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
819 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
820 		if (m != NULL)
821 			err = drbr_enqueue(ifp, txr->br, m);
822 		return (err);
823 	}
824 
825         /* Call cleanup if number of TX descriptors low */
826 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
827 		em_txeof(txr);
828 
829 	enq = 0;
830 	if (m == NULL) {
831 		next = drbr_dequeue(ifp, txr->br);
832 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
833 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
834 			return (err);
835 		next = drbr_dequeue(ifp, txr->br);
836 	} else
837 		next = m;
838 
839 	/* Process the queue */
840 	while (next != NULL) {
841 		if ((err = em_xmit(txr, &next)) != 0) {
842                         if (next != NULL)
843                                 err = drbr_enqueue(ifp, txr->br, next);
844                         break;
845 		}
846 		enq++;
847 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
848 		ETHER_BPF_MTAP(ifp, next);
849 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
850                         break;
851 		if (txr->tx_avail < EM_MAX_SCATTER) {
852 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
853 			break;
854 		}
855 		next = drbr_dequeue(ifp, txr->br);
856 	}
857 
858 	if (enq > 0) {
859                 /* Set the watchdog */
860                 txr->watchdog_check = TRUE;
861 		txr->watchdog_time = ticks;
862 	}
863 	return (err);
864 }
865 
866 /*
867 ** Multiqueue capable stack interface
868 */
869 static int
870 em_mq_start(struct ifnet *ifp, struct mbuf *m)
871 {
872 	struct adapter	*adapter = ifp->if_softc;
873 	struct tx_ring	*txr;
874 	int 		i = 0, error = 0;
875 
876 	/* Which queue to use */
877 	if ((m->m_flags & M_FLOWID) != 0)
878                 i = m->m_pkthdr.flowid % adapter->num_queues;
879 
880 	txr = &adapter->tx_rings[i];
881 
882 	if (EM_TX_TRYLOCK(txr)) {
883 		error = em_mq_start_locked(ifp, txr, m);
884 		EM_TX_UNLOCK(txr);
885 	} else
886 		error = drbr_enqueue(ifp, txr->br, m);
887 
888 	return (error);
889 }
890 
891 /*
892 ** Flush all ring buffers
893 */
894 static void
895 em_qflush(struct ifnet *ifp)
896 {
897 	struct adapter  *adapter = ifp->if_softc;
898 	struct tx_ring  *txr = adapter->tx_rings;
899 	struct mbuf     *m;
900 
901 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
902 		EM_TX_LOCK(txr);
903 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
904 			m_freem(m);
905 		EM_TX_UNLOCK(txr);
906 	}
907 	if_qflush(ifp);
908 }
909 
910 #endif /* EM_MULTIQUEUE */
911 
912 static void
913 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
914 {
915 	struct adapter	*adapter = ifp->if_softc;
916 	struct mbuf	*m_head;
917 
918 	EM_TX_LOCK_ASSERT(txr);
919 
920 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
921 	    IFF_DRV_RUNNING)
922 		return;
923 
924 	if (!adapter->link_active)
925 		return;
926 
927         /* Call cleanup if number of TX descriptors low */
928 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
929 		em_txeof(txr);
930 
931 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
932 		if (txr->tx_avail < EM_MAX_SCATTER) {
933 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
934 			break;
935 		}
936                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
937 		if (m_head == NULL)
938 			break;
939 		/*
940 		 *  Encapsulation can modify our pointer, and or make it
941 		 *  NULL on failure.  In that event, we can't requeue.
942 		 */
943 		if (em_xmit(txr, &m_head)) {
944 			if (m_head == NULL)
945 				break;
946 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
947 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
948 			break;
949 		}
950 
951 		/* Send a copy of the frame to the BPF listener */
952 		ETHER_BPF_MTAP(ifp, m_head);
953 
954 		/* Set timeout in case hardware has problems transmitting. */
955 		txr->watchdog_time = ticks;
956 		txr->watchdog_check = TRUE;
957 	}
958 
959 	return;
960 }
961 
962 static void
963 em_start(struct ifnet *ifp)
964 {
965 	struct adapter	*adapter = ifp->if_softc;
966 	struct tx_ring	*txr = adapter->tx_rings;
967 
968 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
969 		EM_TX_LOCK(txr);
970 		em_start_locked(ifp, txr);
971 		EM_TX_UNLOCK(txr);
972 	}
973 	return;
974 }
975 
976 /*********************************************************************
977  *  Ioctl entry point
978  *
979  *  em_ioctl is called when the user wants to configure the
980  *  interface.
981  *
982  *  return 0 on success, positive on failure
983  **********************************************************************/
984 
985 static int
986 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
987 {
988 	struct adapter	*adapter = ifp->if_softc;
989 	struct ifreq *ifr = (struct ifreq *)data;
990 #ifdef INET
991 	struct ifaddr *ifa = (struct ifaddr *)data;
992 #endif
993 	int error = 0;
994 
995 	if (adapter->in_detach)
996 		return (error);
997 
998 	switch (command) {
999 	case SIOCSIFADDR:
1000 #ifdef INET
1001 		if (ifa->ifa_addr->sa_family == AF_INET) {
1002 			/*
1003 			 * XXX
1004 			 * Since resetting hardware takes a very long time
1005 			 * and results in link renegotiation we only
1006 			 * initialize the hardware only when it is absolutely
1007 			 * required.
1008 			 */
1009 			ifp->if_flags |= IFF_UP;
1010 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1011 				EM_CORE_LOCK(adapter);
1012 				em_init_locked(adapter);
1013 				EM_CORE_UNLOCK(adapter);
1014 			}
1015 			arp_ifinit(ifp, ifa);
1016 		} else
1017 #endif
1018 			error = ether_ioctl(ifp, command, data);
1019 		break;
1020 	case SIOCSIFMTU:
1021 	    {
1022 		int max_frame_size;
1023 
1024 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1025 
1026 		EM_CORE_LOCK(adapter);
1027 		switch (adapter->hw.mac.type) {
1028 		case e1000_82571:
1029 		case e1000_82572:
1030 		case e1000_ich9lan:
1031 		case e1000_ich10lan:
1032 		case e1000_82574:
1033 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1034 			max_frame_size = 9234;
1035 			break;
1036 		case e1000_pchlan:
1037 			max_frame_size = 4096;
1038 			break;
1039 			/* Adapters that do not support jumbo frames */
1040 		case e1000_82583:
1041 		case e1000_ich8lan:
1042 			max_frame_size = ETHER_MAX_LEN;
1043 			break;
1044 		default:
1045 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1046 		}
1047 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1048 		    ETHER_CRC_LEN) {
1049 			EM_CORE_UNLOCK(adapter);
1050 			error = EINVAL;
1051 			break;
1052 		}
1053 
1054 		ifp->if_mtu = ifr->ifr_mtu;
1055 		adapter->max_frame_size =
1056 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1057 		em_init_locked(adapter);
1058 		EM_CORE_UNLOCK(adapter);
1059 		break;
1060 	    }
1061 	case SIOCSIFFLAGS:
1062 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1063 		    SIOCSIFFLAGS (Set Interface Flags)");
1064 		EM_CORE_LOCK(adapter);
1065 		if (ifp->if_flags & IFF_UP) {
1066 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1067 				if ((ifp->if_flags ^ adapter->if_flags) &
1068 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1069 					em_disable_promisc(adapter);
1070 					em_set_promisc(adapter);
1071 				}
1072 			} else
1073 				em_init_locked(adapter);
1074 		} else
1075 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1076 				em_stop(adapter);
1077 		adapter->if_flags = ifp->if_flags;
1078 		EM_CORE_UNLOCK(adapter);
1079 		break;
1080 	case SIOCADDMULTI:
1081 	case SIOCDELMULTI:
1082 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1083 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1084 			EM_CORE_LOCK(adapter);
1085 			em_disable_intr(adapter);
1086 			em_set_multi(adapter);
1087 #ifdef DEVICE_POLLING
1088 			if (!(ifp->if_capenable & IFCAP_POLLING))
1089 #endif
1090 				em_enable_intr(adapter);
1091 			EM_CORE_UNLOCK(adapter);
1092 		}
1093 		break;
1094 	case SIOCSIFMEDIA:
1095 		/* Check SOL/IDER usage */
1096 		EM_CORE_LOCK(adapter);
1097 		if (e1000_check_reset_block(&adapter->hw)) {
1098 			EM_CORE_UNLOCK(adapter);
1099 			device_printf(adapter->dev, "Media change is"
1100 			    " blocked due to SOL/IDER session.\n");
1101 			break;
1102 		}
1103 		EM_CORE_UNLOCK(adapter);
1104 	case SIOCGIFMEDIA:
1105 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1106 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1107 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1108 		break;
1109 	case SIOCSIFCAP:
1110 	    {
1111 		int mask, reinit;
1112 
1113 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1114 		reinit = 0;
1115 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1116 #ifdef DEVICE_POLLING
1117 		if (mask & IFCAP_POLLING) {
1118 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1119 				error = ether_poll_register(em_poll, ifp);
1120 				if (error)
1121 					return (error);
1122 				EM_CORE_LOCK(adapter);
1123 				em_disable_intr(adapter);
1124 				ifp->if_capenable |= IFCAP_POLLING;
1125 				EM_CORE_UNLOCK(adapter);
1126 			} else {
1127 				error = ether_poll_deregister(ifp);
1128 				/* Enable interrupt even in error case */
1129 				EM_CORE_LOCK(adapter);
1130 				em_enable_intr(adapter);
1131 				ifp->if_capenable &= ~IFCAP_POLLING;
1132 				EM_CORE_UNLOCK(adapter);
1133 			}
1134 		}
1135 #endif
1136 		if (mask & IFCAP_HWCSUM) {
1137 			ifp->if_capenable ^= IFCAP_HWCSUM;
1138 			reinit = 1;
1139 		}
1140 		if (mask & IFCAP_TSO4) {
1141 			ifp->if_capenable ^= IFCAP_TSO4;
1142 			reinit = 1;
1143 		}
1144 		if (mask & IFCAP_VLAN_HWTAGGING) {
1145 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1146 			reinit = 1;
1147 		}
1148 		if (mask & IFCAP_VLAN_HWFILTER) {
1149 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1150 			reinit = 1;
1151 		}
1152 		if ((mask & IFCAP_WOL) &&
1153 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1154 			if (mask & IFCAP_WOL_MCAST)
1155 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1156 			if (mask & IFCAP_WOL_MAGIC)
1157 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1158 		}
1159 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1160 			em_init(adapter);
1161 		VLAN_CAPABILITIES(ifp);
1162 		break;
1163 	    }
1164 
1165 	default:
1166 		error = ether_ioctl(ifp, command, data);
1167 		break;
1168 	}
1169 
1170 	return (error);
1171 }
1172 
1173 
1174 /*********************************************************************
1175  *  Init entry point
1176  *
1177  *  This routine is used in two ways. It is used by the stack as
1178  *  init entry point in network interface structure. It is also used
1179  *  by the driver as a hw/sw initialization routine to get to a
1180  *  consistent state.
1181  *
1182  *  return 0 on success, positive on failure
1183  **********************************************************************/
1184 
1185 static void
1186 em_init_locked(struct adapter *adapter)
1187 {
1188 	struct ifnet	*ifp = adapter->ifp;
1189 	device_t	dev = adapter->dev;
1190 	u32		pba;
1191 
1192 	INIT_DEBUGOUT("em_init: begin");
1193 
1194 	EM_CORE_LOCK_ASSERT(adapter);
1195 
1196 	em_disable_intr(adapter);
1197 	callout_stop(&adapter->timer);
1198 
1199 	/*
1200 	 * Packet Buffer Allocation (PBA)
1201 	 * Writing PBA sets the receive portion of the buffer
1202 	 * the remainder is used for the transmit buffer.
1203 	 */
1204 	switch (adapter->hw.mac.type) {
1205 	/* Total Packet Buffer on these is 48K */
1206 	case e1000_82571:
1207 	case e1000_82572:
1208 	case e1000_80003es2lan:
1209 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1210 		break;
1211 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1212 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1213 		break;
1214 	case e1000_82574:
1215 	case e1000_82583:
1216 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1217 		break;
1218 	case e1000_ich9lan:
1219 	case e1000_ich10lan:
1220 	case e1000_pchlan:
1221 		pba = E1000_PBA_10K;
1222 		break;
1223 	case e1000_ich8lan:
1224 		pba = E1000_PBA_8K;
1225 		break;
1226 	default:
1227 		if (adapter->max_frame_size > 8192)
1228 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1229 		else
1230 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1231 	}
1232 
1233 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1234 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1235 
1236 	/* Get the latest mac address, User can use a LAA */
1237         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1238               ETHER_ADDR_LEN);
1239 
1240 	/* Put the address into the Receive Address Array */
1241 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1242 
1243 	/*
1244 	 * With the 82571 adapter, RAR[0] may be overwritten
1245 	 * when the other port is reset, we make a duplicate
1246 	 * in RAR[14] for that eventuality, this assures
1247 	 * the interface continues to function.
1248 	 */
1249 	if (adapter->hw.mac.type == e1000_82571) {
1250 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1251 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1252 		    E1000_RAR_ENTRIES - 1);
1253 	}
1254 
1255 	/* Initialize the hardware */
1256 	em_reset(adapter);
1257 	em_update_link_status(adapter);
1258 
1259 	/* Setup VLAN support, basic and offload if available */
1260 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1261 
1262 	/* Use real VLAN Filter support? */
1263 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1264 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1265 			/* Use real VLAN Filter support */
1266 			em_setup_vlan_hw_support(adapter);
1267 		else {
1268 			u32 ctrl;
1269 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1270 			ctrl |= E1000_CTRL_VME;
1271 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1272 		}
1273 	}
1274 
1275 	/* Set hardware offload abilities */
1276 	ifp->if_hwassist = 0;
1277 	if (ifp->if_capenable & IFCAP_TXCSUM)
1278 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1279 	if (ifp->if_capenable & IFCAP_TSO4)
1280 		ifp->if_hwassist |= CSUM_TSO;
1281 
1282 	/* Configure for OS presence */
1283 	em_init_manageability(adapter);
1284 
1285 	/* Prepare transmit descriptors and buffers */
1286 	em_setup_transmit_structures(adapter);
1287 	em_initialize_transmit_unit(adapter);
1288 
1289 	/* Setup Multicast table */
1290 	em_set_multi(adapter);
1291 
1292 	/* Prepare receive descriptors and buffers */
1293 	if (em_setup_receive_structures(adapter)) {
1294 		device_printf(dev, "Could not setup receive structures\n");
1295 		em_stop(adapter);
1296 		return;
1297 	}
1298 	em_initialize_receive_unit(adapter);
1299 
1300 	/* Don't lose promiscuous settings */
1301 	em_set_promisc(adapter);
1302 
1303 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1304 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1305 
1306 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1307 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1308 
1309 	/* MSI/X configuration for 82574 */
1310 	if (adapter->hw.mac.type == e1000_82574) {
1311 		int tmp;
1312 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1313 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1314 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1315 		/* Set the IVAR - interrupt vector routing. */
1316 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1317 	}
1318 
1319 #ifdef DEVICE_POLLING
1320 	/*
1321 	 * Only enable interrupts if we are not polling, make sure
1322 	 * they are off otherwise.
1323 	 */
1324 	if (ifp->if_capenable & IFCAP_POLLING)
1325 		em_disable_intr(adapter);
1326 	else
1327 #endif /* DEVICE_POLLING */
1328 		em_enable_intr(adapter);
1329 
1330 	/* AMT based hardware can now take control from firmware */
1331 	if (adapter->has_manage && adapter->has_amt)
1332 		em_get_hw_control(adapter);
1333 
1334 	/* Don't reset the phy next time init gets called */
1335 	adapter->hw.phy.reset_disable = TRUE;
1336 }
1337 
1338 static void
1339 em_init(void *arg)
1340 {
1341 	struct adapter *adapter = arg;
1342 
1343 	EM_CORE_LOCK(adapter);
1344 	em_init_locked(adapter);
1345 	EM_CORE_UNLOCK(adapter);
1346 }
1347 
1348 
1349 #ifdef DEVICE_POLLING
1350 /*********************************************************************
1351  *
1352  *  Legacy polling routine: note this only works with single queue
1353  *
1354  *********************************************************************/
1355 static int
1356 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1357 {
1358 	struct adapter *adapter = ifp->if_softc;
1359 	struct tx_ring	*txr = adapter->tx_rings;
1360 	struct rx_ring	*rxr = adapter->rx_rings;
1361 	u32		reg_icr;
1362 	int		rx_done;
1363 
1364 	EM_CORE_LOCK(adapter);
1365 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1366 		EM_CORE_UNLOCK(adapter);
1367 		return (0);
1368 	}
1369 
1370 	if (cmd == POLL_AND_CHECK_STATUS) {
1371 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1372 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1373 			callout_stop(&adapter->timer);
1374 			adapter->hw.mac.get_link_status = 1;
1375 			em_update_link_status(adapter);
1376 			callout_reset(&adapter->timer, hz,
1377 			    em_local_timer, adapter);
1378 		}
1379 	}
1380 	EM_CORE_UNLOCK(adapter);
1381 
1382 	em_rxeof(rxr, count, &rx_done);
1383 
1384 	EM_TX_LOCK(txr);
1385 	em_txeof(txr);
1386 #ifdef EM_MULTIQUEUE
1387 	if (!drbr_empty(ifp, txr->br))
1388 		em_mq_start_locked(ifp, txr, NULL);
1389 #else
1390 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1391 		em_start_locked(ifp, txr);
1392 #endif
1393 	EM_TX_UNLOCK(txr);
1394 
1395 	return (rx_done);
1396 }
1397 #endif /* DEVICE_POLLING */
1398 
1399 
1400 /*********************************************************************
1401  *
1402  *  Fast Legacy/MSI Combined Interrupt Service routine
1403  *
1404  *********************************************************************/
1405 static int
1406 em_irq_fast(void *arg)
1407 {
1408 	struct adapter	*adapter = arg;
1409 	struct ifnet	*ifp;
1410 	u32		reg_icr;
1411 
1412 	ifp = adapter->ifp;
1413 
1414 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1415 
1416 	/* Hot eject?  */
1417 	if (reg_icr == 0xffffffff)
1418 		return FILTER_STRAY;
1419 
1420 	/* Definitely not our interrupt.  */
1421 	if (reg_icr == 0x0)
1422 		return FILTER_STRAY;
1423 
1424 	/*
1425 	 * Starting with the 82571 chip, bit 31 should be used to
1426 	 * determine whether the interrupt belongs to us.
1427 	 */
1428 	if (adapter->hw.mac.type >= e1000_82571 &&
1429 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1430 		return FILTER_STRAY;
1431 
1432 	em_disable_intr(adapter);
1433 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1434 
1435 	/* Link status change */
1436 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1437 		adapter->hw.mac.get_link_status = 1;
1438 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1439 	}
1440 
1441 	if (reg_icr & E1000_ICR_RXO)
1442 		adapter->rx_overruns++;
1443 	return FILTER_HANDLED;
1444 }
1445 
1446 /* Combined RX/TX handler, used by Legacy and MSI */
1447 static void
1448 em_handle_que(void *context, int pending)
1449 {
1450 	struct adapter	*adapter = context;
1451 	struct ifnet	*ifp = adapter->ifp;
1452 	struct tx_ring	*txr = adapter->tx_rings;
1453 	struct rx_ring	*rxr = adapter->rx_rings;
1454 	bool		more;
1455 
1456 
1457 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1458 		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1459 
1460 		EM_TX_LOCK(txr);
1461 		em_txeof(txr);
1462 #ifdef EM_MULTIQUEUE
1463 		if (!drbr_empty(ifp, txr->br))
1464 			em_mq_start_locked(ifp, txr, NULL);
1465 #else
1466 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1467 			em_start_locked(ifp, txr);
1468 #endif
1469 		em_txeof(txr);
1470 		EM_TX_UNLOCK(txr);
1471 		if (more) {
1472 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1473 			return;
1474 		}
1475 	}
1476 
1477 	em_enable_intr(adapter);
1478 	return;
1479 }
1480 
1481 
1482 /*********************************************************************
1483  *
1484  *  MSIX Interrupt Service Routines
1485  *
1486  **********************************************************************/
1487 static void
1488 em_msix_tx(void *arg)
1489 {
1490 	struct tx_ring *txr = arg;
1491 	struct adapter *adapter = txr->adapter;
1492 	bool		more;
1493 
1494 	++txr->tx_irq;
1495 	EM_TX_LOCK(txr);
1496 	more = em_txeof(txr);
1497 	EM_TX_UNLOCK(txr);
1498 	if (more)
1499 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1500 	else
1501 		/* Reenable this interrupt */
1502 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1503 	return;
1504 }
1505 
1506 /*********************************************************************
1507  *
1508  *  MSIX RX Interrupt Service routine
1509  *
1510  **********************************************************************/
1511 
1512 static void
1513 em_msix_rx(void *arg)
1514 {
1515 	struct rx_ring	*rxr = arg;
1516 	struct adapter	*adapter = rxr->adapter;
1517 	bool		more;
1518 
1519 	++rxr->rx_irq;
1520 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1521 	if (more)
1522 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1523 	else
1524 		/* Reenable this interrupt */
1525 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1526 	return;
1527 }
1528 
1529 /*********************************************************************
1530  *
1531  *  MSIX Link Fast Interrupt Service routine
1532  *
1533  **********************************************************************/
1534 static void
1535 em_msix_link(void *arg)
1536 {
1537 	struct adapter	*adapter = arg;
1538 	u32		reg_icr;
1539 
1540 	++adapter->link_irq;
1541 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1542 
1543 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1544 		adapter->hw.mac.get_link_status = 1;
1545 		em_handle_link(adapter, 0);
1546 	} else
1547 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1548 		    EM_MSIX_LINK | E1000_IMS_LSC);
1549 	return;
1550 }
1551 
1552 static void
1553 em_handle_rx(void *context, int pending)
1554 {
1555 	struct rx_ring	*rxr = context;
1556 	struct adapter	*adapter = rxr->adapter;
1557         bool            more;
1558 
1559 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1560 	if (more)
1561 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1562 	else
1563 		/* Reenable this interrupt */
1564 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1565 }
1566 
1567 static void
1568 em_handle_tx(void *context, int pending)
1569 {
1570 	struct tx_ring	*txr = context;
1571 	struct adapter	*adapter = txr->adapter;
1572 	struct ifnet	*ifp = adapter->ifp;
1573 
1574 	EM_TX_LOCK(txr);
1575 	em_txeof(txr);
1576 #ifdef EM_MULTIQUEUE
1577 	if (!drbr_empty(ifp, txr->br))
1578 		em_mq_start_locked(ifp, txr, NULL);
1579 #else
1580 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1581 		em_start_locked(ifp, txr);
1582 #endif
1583 	em_txeof(txr);
1584 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1585 	EM_TX_UNLOCK(txr);
1586 }
1587 
1588 static void
1589 em_handle_link(void *context, int pending)
1590 {
1591 	struct adapter	*adapter = context;
1592 	struct ifnet *ifp = adapter->ifp;
1593 
1594 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1595 		return;
1596 
1597 	EM_CORE_LOCK(adapter);
1598 	callout_stop(&adapter->timer);
1599 	em_update_link_status(adapter);
1600 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1601 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1602 	    EM_MSIX_LINK | E1000_IMS_LSC);
1603 	EM_CORE_UNLOCK(adapter);
1604 }
1605 
1606 
1607 /*********************************************************************
1608  *
1609  *  Media Ioctl callback
1610  *
1611  *  This routine is called whenever the user queries the status of
1612  *  the interface using ifconfig.
1613  *
1614  **********************************************************************/
1615 static void
1616 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1617 {
1618 	struct adapter *adapter = ifp->if_softc;
1619 	u_char fiber_type = IFM_1000_SX;
1620 
1621 	INIT_DEBUGOUT("em_media_status: begin");
1622 
1623 	EM_CORE_LOCK(adapter);
1624 	em_update_link_status(adapter);
1625 
1626 	ifmr->ifm_status = IFM_AVALID;
1627 	ifmr->ifm_active = IFM_ETHER;
1628 
1629 	if (!adapter->link_active) {
1630 		EM_CORE_UNLOCK(adapter);
1631 		return;
1632 	}
1633 
1634 	ifmr->ifm_status |= IFM_ACTIVE;
1635 
1636 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1637 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1638 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1639 	} else {
1640 		switch (adapter->link_speed) {
1641 		case 10:
1642 			ifmr->ifm_active |= IFM_10_T;
1643 			break;
1644 		case 100:
1645 			ifmr->ifm_active |= IFM_100_TX;
1646 			break;
1647 		case 1000:
1648 			ifmr->ifm_active |= IFM_1000_T;
1649 			break;
1650 		}
1651 		if (adapter->link_duplex == FULL_DUPLEX)
1652 			ifmr->ifm_active |= IFM_FDX;
1653 		else
1654 			ifmr->ifm_active |= IFM_HDX;
1655 	}
1656 	EM_CORE_UNLOCK(adapter);
1657 }
1658 
1659 /*********************************************************************
1660  *
1661  *  Media Ioctl callback
1662  *
1663  *  This routine is called when the user changes speed/duplex using
1664  *  media/mediopt option with ifconfig.
1665  *
1666  **********************************************************************/
1667 static int
1668 em_media_change(struct ifnet *ifp)
1669 {
1670 	struct adapter *adapter = ifp->if_softc;
1671 	struct ifmedia  *ifm = &adapter->media;
1672 
1673 	INIT_DEBUGOUT("em_media_change: begin");
1674 
1675 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1676 		return (EINVAL);
1677 
1678 	EM_CORE_LOCK(adapter);
1679 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1680 	case IFM_AUTO:
1681 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1682 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1683 		break;
1684 	case IFM_1000_LX:
1685 	case IFM_1000_SX:
1686 	case IFM_1000_T:
1687 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1688 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1689 		break;
1690 	case IFM_100_TX:
1691 		adapter->hw.mac.autoneg = FALSE;
1692 		adapter->hw.phy.autoneg_advertised = 0;
1693 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1694 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1695 		else
1696 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1697 		break;
1698 	case IFM_10_T:
1699 		adapter->hw.mac.autoneg = FALSE;
1700 		adapter->hw.phy.autoneg_advertised = 0;
1701 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1702 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1703 		else
1704 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1705 		break;
1706 	default:
1707 		device_printf(adapter->dev, "Unsupported media type\n");
1708 	}
1709 
1710 	/* As the speed/duplex settings my have changed we need to
1711 	 * reset the PHY.
1712 	 */
1713 	adapter->hw.phy.reset_disable = FALSE;
1714 
1715 	em_init_locked(adapter);
1716 	EM_CORE_UNLOCK(adapter);
1717 
1718 	return (0);
1719 }
1720 
1721 /*********************************************************************
1722  *
1723  *  This routine maps the mbufs to tx descriptors.
1724  *
1725  *  return 0 on success, positive on failure
1726  **********************************************************************/
1727 
1728 static int
1729 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1730 {
1731 	struct adapter		*adapter = txr->adapter;
1732 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1733 	bus_dmamap_t		map;
1734 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1735 	struct e1000_tx_desc	*ctxd = NULL;
1736 	struct mbuf		*m_head;
1737 	struct ether_header	*eh;
1738 	struct ip		*ip = NULL;
1739 	struct tcphdr		*tp = NULL;
1740 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1741 	int			ip_off, poff;
1742 	int			nsegs, i, j, first, last = 0;
1743 	int			error, do_tso, tso_desc = 0;
1744 
1745 	m_head = *m_headp;
1746 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1747 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1748 	ip_off = poff = 0;
1749 
1750 	/*
1751 	** When doing checksum offload, it is critical to
1752 	** make sure the first mbuf has more than header,
1753 	** because that routine expects data to be present.
1754 	*/
1755 	if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) &&
1756 	    (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) {
1757 		m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip));
1758 		*m_headp = m_head;
1759 		if (m_head == NULL)
1760 			return (ENOBUFS);
1761 	}
1762 
1763 	/*
1764 	 * Intel recommends entire IP/TCP header length reside in a single
1765 	 * buffer. If multiple descriptors are used to describe the IP and
1766 	 * TCP header, each descriptor should describe one or more
1767 	 * complete headers; descriptors referencing only parts of headers
1768 	 * are not supported. If all layer headers are not coalesced into
1769 	 * a single buffer, each buffer should not cross a 4KB boundary,
1770 	 * or be larger than the maximum read request size.
1771 	 * Controller also requires modifing IP/TCP header to make TSO work
1772 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1773 	 * IP/TCP header into a single buffer to meet the requirement of
1774 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1775 	 * which also has similiar restrictions.
1776 	 */
1777 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1778 		if (do_tso || (m_head->m_next != NULL &&
1779 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1780 			if (M_WRITABLE(*m_headp) == 0) {
1781 				m_head = m_dup(*m_headp, M_DONTWAIT);
1782 				m_freem(*m_headp);
1783 				if (m_head == NULL) {
1784 					*m_headp = NULL;
1785 					return (ENOBUFS);
1786 				}
1787 				*m_headp = m_head;
1788 			}
1789 		}
1790 		/*
1791 		 * XXX
1792 		 * Assume IPv4, we don't have TSO/checksum offload support
1793 		 * for IPv6 yet.
1794 		 */
1795 		ip_off = sizeof(struct ether_header);
1796 		m_head = m_pullup(m_head, ip_off);
1797 		if (m_head == NULL) {
1798 			*m_headp = NULL;
1799 			return (ENOBUFS);
1800 		}
1801 		eh = mtod(m_head, struct ether_header *);
1802 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1803 			ip_off = sizeof(struct ether_vlan_header);
1804 			m_head = m_pullup(m_head, ip_off);
1805 			if (m_head == NULL) {
1806 				*m_headp = NULL;
1807 				return (ENOBUFS);
1808 			}
1809 		}
1810 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1811 		if (m_head == NULL) {
1812 			*m_headp = NULL;
1813 			return (ENOBUFS);
1814 		}
1815 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1816 		poff = ip_off + (ip->ip_hl << 2);
1817 		m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1818 		if (m_head == NULL) {
1819 			*m_headp = NULL;
1820 			return (ENOBUFS);
1821 		}
1822 		if (do_tso) {
1823 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1824 			/*
1825 			 * TSO workaround:
1826 			 *   pull 4 more bytes of data into it.
1827 			 */
1828 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1829 			if (m_head == NULL) {
1830 				*m_headp = NULL;
1831 				return (ENOBUFS);
1832 			}
1833 			ip->ip_len = 0;
1834 			ip->ip_sum = 0;
1835 			/*
1836 			 * The pseudo TCP checksum does not include TCP payload
1837 			 * length so driver should recompute the checksum here
1838 			 * what hardware expect to see. This is adherence of
1839 			 * Microsoft's Large Send specification.
1840 			 */
1841 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1842 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1843 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1844 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1845 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1846 			if (m_head == NULL) {
1847 				*m_headp = NULL;
1848 				return (ENOBUFS);
1849 			}
1850 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1851 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1852 			if (m_head == NULL) {
1853 				*m_headp = NULL;
1854 				return (ENOBUFS);
1855 			}
1856 		}
1857 		*m_headp = m_head;
1858 	}
1859 
1860 	/*
1861 	 * Map the packet for DMA
1862 	 *
1863 	 * Capture the first descriptor index,
1864 	 * this descriptor will have the index
1865 	 * of the EOP which is the only one that
1866 	 * now gets a DONE bit writeback.
1867 	 */
1868 	first = txr->next_avail_desc;
1869 	tx_buffer = &txr->tx_buffers[first];
1870 	tx_buffer_mapped = tx_buffer;
1871 	map = tx_buffer->map;
1872 
1873 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1874 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1875 
1876 	/*
1877 	 * There are two types of errors we can (try) to handle:
1878 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1879 	 *   out of segments.  Defragment the mbuf chain and try again.
1880 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1881 	 *   at this point in time.  Defer sending and try again later.
1882 	 * All other errors, in particular EINVAL, are fatal and prevent the
1883 	 * mbuf chain from ever going through.  Drop it and report error.
1884 	 */
1885 	if (error == EFBIG) {
1886 		struct mbuf *m;
1887 
1888 		m = m_defrag(*m_headp, M_DONTWAIT);
1889 		if (m == NULL) {
1890 			adapter->mbuf_alloc_failed++;
1891 			m_freem(*m_headp);
1892 			*m_headp = NULL;
1893 			return (ENOBUFS);
1894 		}
1895 		*m_headp = m;
1896 
1897 		/* Try it again */
1898 		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1899 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1900 
1901 		if (error) {
1902 			adapter->no_tx_dma_setup++;
1903 			m_freem(*m_headp);
1904 			*m_headp = NULL;
1905 			return (error);
1906 		}
1907 	} else if (error != 0) {
1908 		adapter->no_tx_dma_setup++;
1909 		return (error);
1910 	}
1911 
1912 	/*
1913 	 * TSO Hardware workaround, if this packet is not
1914 	 * TSO, and is only a single descriptor long, and
1915 	 * it follows a TSO burst, then we need to add a
1916 	 * sentinel descriptor to prevent premature writeback.
1917 	 */
1918 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1919 		if (nsegs == 1)
1920 			tso_desc = TRUE;
1921 		txr->tx_tso = FALSE;
1922 	}
1923 
1924         if (nsegs > (txr->tx_avail - 2)) {
1925                 txr->no_desc_avail++;
1926 		bus_dmamap_unload(txr->txtag, map);
1927 		return (ENOBUFS);
1928         }
1929 	m_head = *m_headp;
1930 
1931 	/* Do hardware assists */
1932 #if __FreeBSD_version >= 700000
1933 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1934 		em_tso_setup(txr, m_head, ip_off, ip, tp, &txd_upper,
1935 		    &txd_lower);
1936 		/* we need to make a final sentinel transmit desc */
1937 		tso_desc = TRUE;
1938 	} else
1939 #endif
1940 	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1941 		em_transmit_checksum_setup(txr, m_head,
1942 		    ip_off, ip, &txd_upper, &txd_lower);
1943 
1944 	i = txr->next_avail_desc;
1945 
1946 	/* Set up our transmit descriptors */
1947 	for (j = 0; j < nsegs; j++) {
1948 		bus_size_t seg_len;
1949 		bus_addr_t seg_addr;
1950 
1951 		tx_buffer = &txr->tx_buffers[i];
1952 		ctxd = &txr->tx_base[i];
1953 		seg_addr = segs[j].ds_addr;
1954 		seg_len  = segs[j].ds_len;
1955 		/*
1956 		** TSO Workaround:
1957 		** If this is the last descriptor, we want to
1958 		** split it so we have a small final sentinel
1959 		*/
1960 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1961 			seg_len -= 4;
1962 			ctxd->buffer_addr = htole64(seg_addr);
1963 			ctxd->lower.data = htole32(
1964 			adapter->txd_cmd | txd_lower | seg_len);
1965 			ctxd->upper.data =
1966 			    htole32(txd_upper);
1967 			if (++i == adapter->num_tx_desc)
1968 				i = 0;
1969 			/* Now make the sentinel */
1970 			++txd_used; /* using an extra txd */
1971 			ctxd = &txr->tx_base[i];
1972 			tx_buffer = &txr->tx_buffers[i];
1973 			ctxd->buffer_addr =
1974 			    htole64(seg_addr + seg_len);
1975 			ctxd->lower.data = htole32(
1976 			adapter->txd_cmd | txd_lower | 4);
1977 			ctxd->upper.data =
1978 			    htole32(txd_upper);
1979 			last = i;
1980 			if (++i == adapter->num_tx_desc)
1981 				i = 0;
1982 		} else {
1983 			ctxd->buffer_addr = htole64(seg_addr);
1984 			ctxd->lower.data = htole32(
1985 			adapter->txd_cmd | txd_lower | seg_len);
1986 			ctxd->upper.data =
1987 			    htole32(txd_upper);
1988 			last = i;
1989 			if (++i == adapter->num_tx_desc)
1990 				i = 0;
1991 		}
1992 		tx_buffer->m_head = NULL;
1993 		tx_buffer->next_eop = -1;
1994 	}
1995 
1996 	txr->next_avail_desc = i;
1997 	txr->tx_avail -= nsegs;
1998 	if (tso_desc) /* TSO used an extra for sentinel */
1999 		txr->tx_avail -= txd_used;
2000 
2001 	if (m_head->m_flags & M_VLANTAG) {
2002 		/* Set the vlan id. */
2003 		ctxd->upper.fields.special =
2004 		    htole16(m_head->m_pkthdr.ether_vtag);
2005                 /* Tell hardware to add tag */
2006                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2007         }
2008 
2009         tx_buffer->m_head = m_head;
2010 	tx_buffer_mapped->map = tx_buffer->map;
2011 	tx_buffer->map = map;
2012         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2013 
2014         /*
2015          * Last Descriptor of Packet
2016 	 * needs End Of Packet (EOP)
2017 	 * and Report Status (RS)
2018          */
2019         ctxd->lower.data |=
2020 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2021 	/*
2022 	 * Keep track in the first buffer which
2023 	 * descriptor will be written back
2024 	 */
2025 	tx_buffer = &txr->tx_buffers[first];
2026 	tx_buffer->next_eop = last;
2027 	/* Update the watchdog time early and often */
2028 	txr->watchdog_time = ticks;
2029 
2030 	/*
2031 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2032 	 * that this frame is available to transmit.
2033 	 */
2034 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2035 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2036 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2037 
2038 	return (0);
2039 }
2040 
2041 static void
2042 em_set_promisc(struct adapter *adapter)
2043 {
2044 	struct ifnet	*ifp = adapter->ifp;
2045 	u32		reg_rctl;
2046 
2047 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2048 
2049 	if (ifp->if_flags & IFF_PROMISC) {
2050 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2051 		/* Turn this on if you want to see bad packets */
2052 		if (em_debug_sbp)
2053 			reg_rctl |= E1000_RCTL_SBP;
2054 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2055 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2056 		reg_rctl |= E1000_RCTL_MPE;
2057 		reg_rctl &= ~E1000_RCTL_UPE;
2058 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2059 	}
2060 }
2061 
2062 static void
2063 em_disable_promisc(struct adapter *adapter)
2064 {
2065 	u32	reg_rctl;
2066 
2067 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2068 
2069 	reg_rctl &=  (~E1000_RCTL_UPE);
2070 	reg_rctl &=  (~E1000_RCTL_MPE);
2071 	reg_rctl &=  (~E1000_RCTL_SBP);
2072 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2073 }
2074 
2075 
2076 /*********************************************************************
2077  *  Multicast Update
2078  *
2079  *  This routine is called whenever multicast address list is updated.
2080  *
2081  **********************************************************************/
2082 
2083 static void
2084 em_set_multi(struct adapter *adapter)
2085 {
2086 	struct ifnet	*ifp = adapter->ifp;
2087 	struct ifmultiaddr *ifma;
2088 	u32 reg_rctl = 0;
2089 	u8  *mta; /* Multicast array memory */
2090 	int mcnt = 0;
2091 
2092 	IOCTL_DEBUGOUT("em_set_multi: begin");
2093 
2094 	mta = adapter->mta;
2095 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2096 
2097 	if (adapter->hw.mac.type == e1000_82542 &&
2098 	    adapter->hw.revision_id == E1000_REVISION_2) {
2099 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2100 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2101 			e1000_pci_clear_mwi(&adapter->hw);
2102 		reg_rctl |= E1000_RCTL_RST;
2103 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2104 		msec_delay(5);
2105 	}
2106 
2107 #if __FreeBSD_version < 800000
2108 	IF_ADDR_LOCK(ifp);
2109 #else
2110 	if_maddr_rlock(ifp);
2111 #endif
2112 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2113 		if (ifma->ifma_addr->sa_family != AF_LINK)
2114 			continue;
2115 
2116 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2117 			break;
2118 
2119 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2120 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2121 		mcnt++;
2122 	}
2123 #if __FreeBSD_version < 800000
2124 	IF_ADDR_UNLOCK(ifp);
2125 #else
2126 	if_maddr_runlock(ifp);
2127 #endif
2128 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2129 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2130 		reg_rctl |= E1000_RCTL_MPE;
2131 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2132 	} else
2133 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2134 
2135 	if (adapter->hw.mac.type == e1000_82542 &&
2136 	    adapter->hw.revision_id == E1000_REVISION_2) {
2137 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2138 		reg_rctl &= ~E1000_RCTL_RST;
2139 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2140 		msec_delay(5);
2141 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2142 			e1000_pci_set_mwi(&adapter->hw);
2143 	}
2144 }
2145 
2146 
2147 /*********************************************************************
2148  *  Timer routine
2149  *
2150  *  This routine checks for link status and updates statistics.
2151  *
2152  **********************************************************************/
2153 
2154 static void
2155 em_local_timer(void *arg)
2156 {
2157 	struct adapter	*adapter = arg;
2158 	struct ifnet	*ifp = adapter->ifp;
2159 	struct tx_ring	*txr = adapter->tx_rings;
2160 
2161 	EM_CORE_LOCK_ASSERT(adapter);
2162 
2163 	em_update_link_status(adapter);
2164 	em_update_stats_counters(adapter);
2165 
2166 	/* Reset LAA into RAR[0] on 82571 */
2167 	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2168 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2169 
2170 	/*
2171 	** If flow control has paused us since last checking
2172 	** it invalidates the watchdog timing, so dont run it.
2173 	*/
2174 	if (adapter->pause_frames) {
2175 		adapter->pause_frames = 0;
2176 		goto out;
2177 	}
2178 	/*
2179 	** Check for time since any descriptor was cleaned
2180 	*/
2181 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2182 		EM_TX_LOCK(txr);
2183 		if (txr->watchdog_check == FALSE) {
2184 			EM_TX_UNLOCK(txr);
2185 			continue;
2186 		}
2187 		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2188 			goto hung;
2189 		EM_TX_UNLOCK(txr);
2190 	}
2191 out:
2192 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2193 	return;
2194 hung:
2195 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2196 	device_printf(adapter->dev,
2197 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2198 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2199 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2200 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2201 	    "Next TX to Clean = %d\n",
2202 	    txr->me, txr->tx_avail, txr->next_to_clean);
2203 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2204 	adapter->watchdog_events++;
2205 	EM_TX_UNLOCK(txr);
2206 	em_init_locked(adapter);
2207 }
2208 
2209 
2210 static void
2211 em_update_link_status(struct adapter *adapter)
2212 {
2213 	struct e1000_hw *hw = &adapter->hw;
2214 	struct ifnet *ifp = adapter->ifp;
2215 	device_t dev = adapter->dev;
2216 	struct tx_ring *txr = adapter->tx_rings;
2217 	u32 link_check = 0;
2218 
2219 	/* Get the cached link value or read phy for real */
2220 	switch (hw->phy.media_type) {
2221 	case e1000_media_type_copper:
2222 		if (hw->mac.get_link_status) {
2223 			/* Do the work to read phy */
2224 			e1000_check_for_link(hw);
2225 			link_check = !hw->mac.get_link_status;
2226 			if (link_check) /* ESB2 fix */
2227 				e1000_cfg_on_link_up(hw);
2228 		} else
2229 			link_check = TRUE;
2230 		break;
2231 	case e1000_media_type_fiber:
2232 		e1000_check_for_link(hw);
2233 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2234                                  E1000_STATUS_LU);
2235 		break;
2236 	case e1000_media_type_internal_serdes:
2237 		e1000_check_for_link(hw);
2238 		link_check = adapter->hw.mac.serdes_has_link;
2239 		break;
2240 	default:
2241 	case e1000_media_type_unknown:
2242 		break;
2243 	}
2244 
2245 	/* Now check for a transition */
2246 	if (link_check && (adapter->link_active == 0)) {
2247 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2248 		    &adapter->link_duplex);
2249 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2250 		if ((adapter->link_speed != SPEED_1000) &&
2251 		    ((hw->mac.type == e1000_82571) ||
2252 		    (hw->mac.type == e1000_82572))) {
2253 			int tarc0;
2254 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2255 			tarc0 &= ~SPEED_MODE_BIT;
2256 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2257 		}
2258 		if (bootverbose)
2259 			device_printf(dev, "Link is up %d Mbps %s\n",
2260 			    adapter->link_speed,
2261 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2262 			    "Full Duplex" : "Half Duplex"));
2263 		adapter->link_active = 1;
2264 		adapter->smartspeed = 0;
2265 		ifp->if_baudrate = adapter->link_speed * 1000000;
2266 		if_link_state_change(ifp, LINK_STATE_UP);
2267 	} else if (!link_check && (adapter->link_active == 1)) {
2268 		ifp->if_baudrate = adapter->link_speed = 0;
2269 		adapter->link_duplex = 0;
2270 		if (bootverbose)
2271 			device_printf(dev, "Link is Down\n");
2272 		adapter->link_active = 0;
2273 		/* Link down, disable watchdog */
2274 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2275 			txr->watchdog_check = FALSE;
2276 		if_link_state_change(ifp, LINK_STATE_DOWN);
2277 	}
2278 }
2279 
2280 /*********************************************************************
2281  *
2282  *  This routine disables all traffic on the adapter by issuing a
2283  *  global reset on the MAC and deallocates TX/RX buffers.
2284  *
2285  *  This routine should always be called with BOTH the CORE
2286  *  and TX locks.
2287  **********************************************************************/
2288 
2289 static void
2290 em_stop(void *arg)
2291 {
2292 	struct adapter	*adapter = arg;
2293 	struct ifnet	*ifp = adapter->ifp;
2294 	struct tx_ring	*txr = adapter->tx_rings;
2295 
2296 	EM_CORE_LOCK_ASSERT(adapter);
2297 
2298 	INIT_DEBUGOUT("em_stop: begin");
2299 
2300 	em_disable_intr(adapter);
2301 	callout_stop(&adapter->timer);
2302 
2303 	/* Tell the stack that the interface is no longer active */
2304 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2305 
2306         /* Unarm watchdog timer. */
2307 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2308 		EM_TX_LOCK(txr);
2309 		txr->watchdog_check = FALSE;
2310 		EM_TX_UNLOCK(txr);
2311 	}
2312 
2313 	e1000_reset_hw(&adapter->hw);
2314 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2315 
2316 	e1000_led_off(&adapter->hw);
2317 	e1000_cleanup_led(&adapter->hw);
2318 }
2319 
2320 
2321 /*********************************************************************
2322  *
2323  *  Determine hardware revision.
2324  *
2325  **********************************************************************/
2326 static void
2327 em_identify_hardware(struct adapter *adapter)
2328 {
2329 	device_t dev = adapter->dev;
2330 
2331 	/* Make sure our PCI config space has the necessary stuff set */
2332 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2333 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2334 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2335 		device_printf(dev, "Memory Access and/or Bus Master bits "
2336 		    "were not set!\n");
2337 		adapter->hw.bus.pci_cmd_word |=
2338 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2339 		pci_write_config(dev, PCIR_COMMAND,
2340 		    adapter->hw.bus.pci_cmd_word, 2);
2341 	}
2342 
2343 	/* Save off the information about this board */
2344 	adapter->hw.vendor_id = pci_get_vendor(dev);
2345 	adapter->hw.device_id = pci_get_device(dev);
2346 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2347 	adapter->hw.subsystem_vendor_id =
2348 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2349 	adapter->hw.subsystem_device_id =
2350 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2351 
2352 	/* Do Shared Code Init and Setup */
2353 	if (e1000_set_mac_type(&adapter->hw)) {
2354 		device_printf(dev, "Setup init failure\n");
2355 		return;
2356 	}
2357 }
2358 
2359 static int
2360 em_allocate_pci_resources(struct adapter *adapter)
2361 {
2362 	device_t	dev = adapter->dev;
2363 	int		rid;
2364 
2365 	rid = PCIR_BAR(0);
2366 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2367 	    &rid, RF_ACTIVE);
2368 	if (adapter->memory == NULL) {
2369 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2370 		return (ENXIO);
2371 	}
2372 	adapter->osdep.mem_bus_space_tag =
2373 	    rman_get_bustag(adapter->memory);
2374 	adapter->osdep.mem_bus_space_handle =
2375 	    rman_get_bushandle(adapter->memory);
2376 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2377 
2378 	/* Default to a single queue */
2379 	adapter->num_queues = 1;
2380 
2381 	/*
2382 	 * Setup MSI/X or MSI if PCI Express
2383 	 */
2384 	adapter->msix = em_setup_msix(adapter);
2385 
2386 	adapter->hw.back = &adapter->osdep;
2387 
2388 	return (0);
2389 }
2390 
2391 /*********************************************************************
2392  *
2393  *  Setup the Legacy or MSI Interrupt handler
2394  *
2395  **********************************************************************/
2396 int
2397 em_allocate_legacy(struct adapter *adapter)
2398 {
2399 	device_t dev = adapter->dev;
2400 	int error, rid = 0;
2401 
2402 	/* Manually turn off all interrupts */
2403 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2404 
2405 	if (adapter->msix == 1) /* using MSI */
2406 		rid = 1;
2407 	/* We allocate a single interrupt resource */
2408 	adapter->res = bus_alloc_resource_any(dev,
2409 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2410 	if (adapter->res == NULL) {
2411 		device_printf(dev, "Unable to allocate bus resource: "
2412 		    "interrupt\n");
2413 		return (ENXIO);
2414 	}
2415 
2416 	/*
2417 	 * Allocate a fast interrupt and the associated
2418 	 * deferred processing contexts.
2419 	 */
2420 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2421 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2422 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2423 	    taskqueue_thread_enqueue, &adapter->tq);
2424 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2425 	    device_get_nameunit(adapter->dev));
2426 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2427 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2428 		device_printf(dev, "Failed to register fast interrupt "
2429 			    "handler: %d\n", error);
2430 		taskqueue_free(adapter->tq);
2431 		adapter->tq = NULL;
2432 		return (error);
2433 	}
2434 
2435 	return (0);
2436 }
2437 
2438 /*********************************************************************
2439  *
2440  *  Setup the MSIX Interrupt handlers
2441  *   This is not really Multiqueue, rather
2442  *   its just multiple interrupt vectors.
2443  *
2444  **********************************************************************/
2445 int
2446 em_allocate_msix(struct adapter *adapter)
2447 {
2448 	device_t	dev = adapter->dev;
2449 	struct		tx_ring *txr = adapter->tx_rings;
2450 	struct		rx_ring *rxr = adapter->rx_rings;
2451 	int		error, rid, vector = 0;
2452 
2453 
2454 	/* Make sure all interrupts are disabled */
2455 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2456 
2457 	/* First set up ring resources */
2458 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2459 
2460 		/* RX ring */
2461 		rid = vector + 1;
2462 
2463 		rxr->res = bus_alloc_resource_any(dev,
2464 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2465 		if (rxr->res == NULL) {
2466 			device_printf(dev,
2467 			    "Unable to allocate bus resource: "
2468 			    "RX MSIX Interrupt %d\n", i);
2469 			return (ENXIO);
2470 		}
2471 		if ((error = bus_setup_intr(dev, rxr->res,
2472 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2473 		    rxr, &rxr->tag)) != 0) {
2474 			device_printf(dev, "Failed to register RX handler");
2475 			return (error);
2476 		}
2477 #if __FreeBSD_version >= 800504
2478 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2479 #endif
2480 		rxr->msix = vector++; /* NOTE increment vector for TX */
2481 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2482 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2483 		    taskqueue_thread_enqueue, &rxr->tq);
2484 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2485 		    device_get_nameunit(adapter->dev));
2486 		/*
2487 		** Set the bit to enable interrupt
2488 		** in E1000_IMS -- bits 20 and 21
2489 		** are for RX0 and RX1, note this has
2490 		** NOTHING to do with the MSIX vector
2491 		*/
2492 		rxr->ims = 1 << (20 + i);
2493 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2494 
2495 		/* TX ring */
2496 		rid = vector + 1;
2497 		txr->res = bus_alloc_resource_any(dev,
2498 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2499 		if (txr->res == NULL) {
2500 			device_printf(dev,
2501 			    "Unable to allocate bus resource: "
2502 			    "TX MSIX Interrupt %d\n", i);
2503 			return (ENXIO);
2504 		}
2505 		if ((error = bus_setup_intr(dev, txr->res,
2506 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2507 		    txr, &txr->tag)) != 0) {
2508 			device_printf(dev, "Failed to register TX handler");
2509 			return (error);
2510 		}
2511 #if __FreeBSD_version >= 800504
2512 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2513 #endif
2514 		txr->msix = vector++; /* Increment vector for next pass */
2515 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2516 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2517 		    taskqueue_thread_enqueue, &txr->tq);
2518 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2519 		    device_get_nameunit(adapter->dev));
2520 		/*
2521 		** Set the bit to enable interrupt
2522 		** in E1000_IMS -- bits 22 and 23
2523 		** are for TX0 and TX1, note this has
2524 		** NOTHING to do with the MSIX vector
2525 		*/
2526 		txr->ims = 1 << (22 + i);
2527 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2528 	}
2529 
2530 	/* Link interrupt */
2531 	++rid;
2532 	adapter->res = bus_alloc_resource_any(dev,
2533 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2534 	if (!adapter->res) {
2535 		device_printf(dev,"Unable to allocate "
2536 		    "bus resource: Link interrupt [%d]\n", rid);
2537 		return (ENXIO);
2538         }
2539 	/* Set the link handler function */
2540 	error = bus_setup_intr(dev, adapter->res,
2541 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2542 	    em_msix_link, adapter, &adapter->tag);
2543 	if (error) {
2544 		adapter->res = NULL;
2545 		device_printf(dev, "Failed to register LINK handler");
2546 		return (error);
2547 	}
2548 #if __FreeBSD_version >= 800504
2549 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2550 #endif
2551 	adapter->linkvec = vector;
2552 	adapter->ivars |=  (8 | vector) << 16;
2553 	adapter->ivars |= 0x80000000;
2554 
2555 	return (0);
2556 }
2557 
2558 
2559 static void
2560 em_free_pci_resources(struct adapter *adapter)
2561 {
2562 	device_t	dev = adapter->dev;
2563 	struct tx_ring	*txr;
2564 	struct rx_ring	*rxr;
2565 	int		rid;
2566 
2567 
2568 	/*
2569 	** Release all the queue interrupt resources:
2570 	*/
2571 	for (int i = 0; i < adapter->num_queues; i++) {
2572 		txr = &adapter->tx_rings[i];
2573 		rxr = &adapter->rx_rings[i];
2574 		rid = txr->msix +1;
2575 		if (txr->tag != NULL) {
2576 			bus_teardown_intr(dev, txr->res, txr->tag);
2577 			txr->tag = NULL;
2578 		}
2579 		if (txr->res != NULL)
2580 			bus_release_resource(dev, SYS_RES_IRQ,
2581 			    rid, txr->res);
2582 		rid = rxr->msix +1;
2583 		if (rxr->tag != NULL) {
2584 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2585 			rxr->tag = NULL;
2586 		}
2587 		if (rxr->res != NULL)
2588 			bus_release_resource(dev, SYS_RES_IRQ,
2589 			    rid, rxr->res);
2590 	}
2591 
2592         if (adapter->linkvec) /* we are doing MSIX */
2593                 rid = adapter->linkvec + 1;
2594         else
2595                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2596 
2597 	if (adapter->tag != NULL) {
2598 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2599 		adapter->tag = NULL;
2600 	}
2601 
2602 	if (adapter->res != NULL)
2603 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2604 
2605 
2606 	if (adapter->msix)
2607 		pci_release_msi(dev);
2608 
2609 	if (adapter->msix_mem != NULL)
2610 		bus_release_resource(dev, SYS_RES_MEMORY,
2611 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2612 
2613 	if (adapter->memory != NULL)
2614 		bus_release_resource(dev, SYS_RES_MEMORY,
2615 		    PCIR_BAR(0), adapter->memory);
2616 
2617 	if (adapter->flash != NULL)
2618 		bus_release_resource(dev, SYS_RES_MEMORY,
2619 		    EM_FLASH, adapter->flash);
2620 }
2621 
2622 /*
2623  * Setup MSI or MSI/X
2624  */
2625 static int
2626 em_setup_msix(struct adapter *adapter)
2627 {
2628 	device_t dev = adapter->dev;
2629 	int val = 0;
2630 
2631 
2632 	/*
2633 	** Setup MSI/X for Hartwell: tests have shown
2634 	** use of two queues to be unstable, and to
2635 	** provide no great gain anyway, so we simply
2636 	** seperate the interrupts and use a single queue.
2637 	*/
2638 	if ((adapter->hw.mac.type == e1000_82574) &&
2639 	    (em_enable_msix == TRUE)) {
2640 		/* Map the MSIX BAR */
2641 		int rid = PCIR_BAR(EM_MSIX_BAR);
2642 		adapter->msix_mem = bus_alloc_resource_any(dev,
2643 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2644        		if (!adapter->msix_mem) {
2645 			/* May not be enabled */
2646                		device_printf(adapter->dev,
2647 			    "Unable to map MSIX table \n");
2648 			goto msi;
2649        		}
2650 		val = pci_msix_count(dev);
2651 		if (val < 3) {
2652 			bus_release_resource(dev, SYS_RES_MEMORY,
2653 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2654 			adapter->msix_mem = NULL;
2655                		device_printf(adapter->dev,
2656 			    "MSIX: insufficient vectors, using MSI\n");
2657 			goto msi;
2658 		}
2659 		val = 3;
2660 		adapter->num_queues = 1;
2661 		if (pci_alloc_msix(dev, &val) == 0) {
2662 			device_printf(adapter->dev,
2663 			    "Using MSIX interrupts "
2664 			    "with %d vectors\n", val);
2665 		}
2666 
2667 		return (val);
2668 	}
2669 msi:
2670        	val = pci_msi_count(dev);
2671        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2672                	adapter->msix = 1;
2673                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2674 		return (val);
2675 	}
2676 	/* Should only happen due to manual configuration */
2677 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2678 	return (0);
2679 }
2680 
2681 
2682 /*********************************************************************
2683  *
2684  *  Initialize the hardware to a configuration
2685  *  as specified by the adapter structure.
2686  *
2687  **********************************************************************/
2688 static void
2689 em_reset(struct adapter *adapter)
2690 {
2691 	device_t	dev = adapter->dev;
2692 	struct e1000_hw	*hw = &adapter->hw;
2693 	u16		rx_buffer_size;
2694 
2695 	INIT_DEBUGOUT("em_reset: begin");
2696 
2697 	/* Set up smart power down as default off on newer adapters. */
2698 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2699 	    hw->mac.type == e1000_82572)) {
2700 		u16 phy_tmp = 0;
2701 
2702 		/* Speed up time to link by disabling smart power down. */
2703 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2704 		phy_tmp &= ~IGP02E1000_PM_SPD;
2705 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2706 	}
2707 
2708 	/*
2709 	 * These parameters control the automatic generation (Tx) and
2710 	 * response (Rx) to Ethernet PAUSE frames.
2711 	 * - High water mark should allow for at least two frames to be
2712 	 *   received after sending an XOFF.
2713 	 * - Low water mark works best when it is very near the high water mark.
2714 	 *   This allows the receiver to restart by sending XON when it has
2715 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2716 	 *   restart after one full frame is pulled from the buffer. There
2717 	 *   could be several smaller frames in the buffer and if so they will
2718 	 *   not trigger the XON until their total number reduces the buffer
2719 	 *   by 1500.
2720 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2721 	 */
2722 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2723 
2724 	hw->fc.high_water = rx_buffer_size -
2725 	    roundup2(adapter->max_frame_size, 1024);
2726 	hw->fc.low_water = hw->fc.high_water - 1500;
2727 
2728 	if (hw->mac.type == e1000_80003es2lan)
2729 		hw->fc.pause_time = 0xFFFF;
2730 	else
2731 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2732 
2733 	hw->fc.send_xon = TRUE;
2734 
2735         /* Set Flow control, use the tunable location if sane */
2736         if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2737 		hw->fc.requested_mode = em_fc_setting;
2738 	else
2739 		hw->fc.requested_mode = e1000_fc_none;
2740 
2741 	/* Override - workaround for PCHLAN issue */
2742 	if (hw->mac.type == e1000_pchlan)
2743                 hw->fc.requested_mode = e1000_fc_rx_pause;
2744 
2745 	/* Issue a global reset */
2746 	e1000_reset_hw(hw);
2747 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2748 
2749 	if (e1000_init_hw(hw) < 0) {
2750 		device_printf(dev, "Hardware Initialization Failed\n");
2751 		return;
2752 	}
2753 
2754 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2755 	e1000_get_phy_info(hw);
2756 	e1000_check_for_link(hw);
2757 	return;
2758 }
2759 
2760 /*********************************************************************
2761  *
2762  *  Setup networking device structure and register an interface.
2763  *
2764  **********************************************************************/
2765 static int
2766 em_setup_interface(device_t dev, struct adapter *adapter)
2767 {
2768 	struct ifnet   *ifp;
2769 
2770 	INIT_DEBUGOUT("em_setup_interface: begin");
2771 
2772 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2773 	if (ifp == NULL) {
2774 		device_printf(dev, "can not allocate ifnet structure\n");
2775 		return (-1);
2776 	}
2777 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2778 	ifp->if_mtu = ETHERMTU;
2779 	ifp->if_init =  em_init;
2780 	ifp->if_softc = adapter;
2781 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2782 	ifp->if_ioctl = em_ioctl;
2783 	ifp->if_start = em_start;
2784 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2785 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2786 	IFQ_SET_READY(&ifp->if_snd);
2787 
2788 	ether_ifattach(ifp, adapter->hw.mac.addr);
2789 
2790 	ifp->if_capabilities = ifp->if_capenable = 0;
2791 
2792 #ifdef EM_MULTIQUEUE
2793 	/* Multiqueue tx functions */
2794 	ifp->if_transmit = em_mq_start;
2795 	ifp->if_qflush = em_qflush;
2796 #endif
2797 
2798 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2799 	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2800 
2801 	/* Enable TSO by default, can disable with ifconfig */
2802 	ifp->if_capabilities |= IFCAP_TSO4;
2803 	ifp->if_capenable |= IFCAP_TSO4;
2804 
2805 	/*
2806 	 * Tell the upper layer(s) we
2807 	 * support full VLAN capability
2808 	 */
2809 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2810 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2811 	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2812 
2813 	/*
2814 	** Dont turn this on by default, if vlans are
2815 	** created on another pseudo device (eg. lagg)
2816 	** then vlan events are not passed thru, breaking
2817 	** operation, but with HW FILTER off it works. If
2818 	** using vlans directly on the em driver you can
2819 	** enable this and get full hardware tag filtering.
2820 	*/
2821 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2822 
2823 #ifdef DEVICE_POLLING
2824 	ifp->if_capabilities |= IFCAP_POLLING;
2825 #endif
2826 
2827 	/* Enable only WOL MAGIC by default */
2828 	if (adapter->wol) {
2829 		ifp->if_capabilities |= IFCAP_WOL;
2830 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2831 	}
2832 
2833 	/*
2834 	 * Specify the media types supported by this adapter and register
2835 	 * callbacks to update media and link information
2836 	 */
2837 	ifmedia_init(&adapter->media, IFM_IMASK,
2838 	    em_media_change, em_media_status);
2839 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2840 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2841 		u_char fiber_type = IFM_1000_SX;	/* default type */
2842 
2843 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2844 			    0, NULL);
2845 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2846 	} else {
2847 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2848 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2849 			    0, NULL);
2850 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2851 			    0, NULL);
2852 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2853 			    0, NULL);
2854 		if (adapter->hw.phy.type != e1000_phy_ife) {
2855 			ifmedia_add(&adapter->media,
2856 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2857 			ifmedia_add(&adapter->media,
2858 				IFM_ETHER | IFM_1000_T, 0, NULL);
2859 		}
2860 	}
2861 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2862 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2863 	return (0);
2864 }
2865 
2866 
2867 /*
2868  * Manage DMA'able memory.
2869  */
2870 static void
2871 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2872 {
2873 	if (error)
2874 		return;
2875 	*(bus_addr_t *) arg = segs[0].ds_addr;
2876 }
2877 
2878 static int
2879 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2880         struct em_dma_alloc *dma, int mapflags)
2881 {
2882 	int error;
2883 
2884 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2885 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2886 				BUS_SPACE_MAXADDR,	/* lowaddr */
2887 				BUS_SPACE_MAXADDR,	/* highaddr */
2888 				NULL, NULL,		/* filter, filterarg */
2889 				size,			/* maxsize */
2890 				1,			/* nsegments */
2891 				size,			/* maxsegsize */
2892 				0,			/* flags */
2893 				NULL,			/* lockfunc */
2894 				NULL,			/* lockarg */
2895 				&dma->dma_tag);
2896 	if (error) {
2897 		device_printf(adapter->dev,
2898 		    "%s: bus_dma_tag_create failed: %d\n",
2899 		    __func__, error);
2900 		goto fail_0;
2901 	}
2902 
2903 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2904 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2905 	if (error) {
2906 		device_printf(adapter->dev,
2907 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2908 		    __func__, (uintmax_t)size, error);
2909 		goto fail_2;
2910 	}
2911 
2912 	dma->dma_paddr = 0;
2913 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2914 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2915 	if (error || dma->dma_paddr == 0) {
2916 		device_printf(adapter->dev,
2917 		    "%s: bus_dmamap_load failed: %d\n",
2918 		    __func__, error);
2919 		goto fail_3;
2920 	}
2921 
2922 	return (0);
2923 
2924 fail_3:
2925 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2926 fail_2:
2927 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2928 	bus_dma_tag_destroy(dma->dma_tag);
2929 fail_0:
2930 	dma->dma_map = NULL;
2931 	dma->dma_tag = NULL;
2932 
2933 	return (error);
2934 }
2935 
2936 static void
2937 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2938 {
2939 	if (dma->dma_tag == NULL)
2940 		return;
2941 	if (dma->dma_map != NULL) {
2942 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2943 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2944 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2945 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2946 		dma->dma_map = NULL;
2947 	}
2948 	bus_dma_tag_destroy(dma->dma_tag);
2949 	dma->dma_tag = NULL;
2950 }
2951 
2952 
2953 /*********************************************************************
2954  *
2955  *  Allocate memory for the transmit and receive rings, and then
2956  *  the descriptors associated with each, called only once at attach.
2957  *
2958  **********************************************************************/
2959 static int
2960 em_allocate_queues(struct adapter *adapter)
2961 {
2962 	device_t		dev = adapter->dev;
2963 	struct tx_ring		*txr = NULL;
2964 	struct rx_ring		*rxr = NULL;
2965 	int rsize, tsize, error = E1000_SUCCESS;
2966 	int txconf = 0, rxconf = 0;
2967 
2968 
2969 	/* Allocate the TX ring struct memory */
2970 	if (!(adapter->tx_rings =
2971 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2972 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2973 		device_printf(dev, "Unable to allocate TX ring memory\n");
2974 		error = ENOMEM;
2975 		goto fail;
2976 	}
2977 
2978 	/* Now allocate the RX */
2979 	if (!(adapter->rx_rings =
2980 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2981 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2982 		device_printf(dev, "Unable to allocate RX ring memory\n");
2983 		error = ENOMEM;
2984 		goto rx_fail;
2985 	}
2986 
2987 	tsize = roundup2(adapter->num_tx_desc *
2988 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2989 	/*
2990 	 * Now set up the TX queues, txconf is needed to handle the
2991 	 * possibility that things fail midcourse and we need to
2992 	 * undo memory gracefully
2993 	 */
2994 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2995 		/* Set up some basics */
2996 		txr = &adapter->tx_rings[i];
2997 		txr->adapter = adapter;
2998 		txr->me = i;
2999 
3000 		/* Initialize the TX lock */
3001 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3002 		    device_get_nameunit(dev), txr->me);
3003 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3004 
3005 		if (em_dma_malloc(adapter, tsize,
3006 			&txr->txdma, BUS_DMA_NOWAIT)) {
3007 			device_printf(dev,
3008 			    "Unable to allocate TX Descriptor memory\n");
3009 			error = ENOMEM;
3010 			goto err_tx_desc;
3011 		}
3012 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3013 		bzero((void *)txr->tx_base, tsize);
3014 
3015         	if (em_allocate_transmit_buffers(txr)) {
3016 			device_printf(dev,
3017 			    "Critical Failure setting up transmit buffers\n");
3018 			error = ENOMEM;
3019 			goto err_tx_desc;
3020         	}
3021 #if __FreeBSD_version >= 800000
3022 		/* Allocate a buf ring */
3023 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3024 		    M_WAITOK, &txr->tx_mtx);
3025 #endif
3026 	}
3027 
3028 	/*
3029 	 * Next the RX queues...
3030 	 */
3031 	rsize = roundup2(adapter->num_rx_desc *
3032 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3033 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3034 		rxr = &adapter->rx_rings[i];
3035 		rxr->adapter = adapter;
3036 		rxr->me = i;
3037 
3038 		/* Initialize the RX lock */
3039 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3040 		    device_get_nameunit(dev), txr->me);
3041 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3042 
3043 		if (em_dma_malloc(adapter, rsize,
3044 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3045 			device_printf(dev,
3046 			    "Unable to allocate RxDescriptor memory\n");
3047 			error = ENOMEM;
3048 			goto err_rx_desc;
3049 		}
3050 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3051 		bzero((void *)rxr->rx_base, rsize);
3052 
3053         	/* Allocate receive buffers for the ring*/
3054 		if (em_allocate_receive_buffers(rxr)) {
3055 			device_printf(dev,
3056 			    "Critical Failure setting up receive buffers\n");
3057 			error = ENOMEM;
3058 			goto err_rx_desc;
3059 		}
3060 	}
3061 
3062 	return (0);
3063 
3064 err_rx_desc:
3065 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3066 		em_dma_free(adapter, &rxr->rxdma);
3067 err_tx_desc:
3068 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3069 		em_dma_free(adapter, &txr->txdma);
3070 	free(adapter->rx_rings, M_DEVBUF);
3071 rx_fail:
3072 #if __FreeBSD_version >= 800000
3073 	buf_ring_free(txr->br, M_DEVBUF);
3074 #endif
3075 	free(adapter->tx_rings, M_DEVBUF);
3076 fail:
3077 	return (error);
3078 }
3079 
3080 
3081 /*********************************************************************
3082  *
3083  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3084  *  the information needed to transmit a packet on the wire. This is
3085  *  called only once at attach, setup is done every reset.
3086  *
3087  **********************************************************************/
3088 static int
3089 em_allocate_transmit_buffers(struct tx_ring *txr)
3090 {
3091 	struct adapter *adapter = txr->adapter;
3092 	device_t dev = adapter->dev;
3093 	struct em_buffer *txbuf;
3094 	int error, i;
3095 
3096 	/*
3097 	 * Setup DMA descriptor areas.
3098 	 */
3099 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3100 			       1, 0,			/* alignment, bounds */
3101 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3102 			       BUS_SPACE_MAXADDR,	/* highaddr */
3103 			       NULL, NULL,		/* filter, filterarg */
3104 			       EM_TSO_SIZE,		/* maxsize */
3105 			       EM_MAX_SCATTER,		/* nsegments */
3106 			       PAGE_SIZE,		/* maxsegsize */
3107 			       0,			/* flags */
3108 			       NULL,			/* lockfunc */
3109 			       NULL,			/* lockfuncarg */
3110 			       &txr->txtag))) {
3111 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3112 		goto fail;
3113 	}
3114 
3115 	if (!(txr->tx_buffers =
3116 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3117 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3118 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3119 		error = ENOMEM;
3120 		goto fail;
3121 	}
3122 
3123         /* Create the descriptor buffer dma maps */
3124 	txbuf = txr->tx_buffers;
3125 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3126 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3127 		if (error != 0) {
3128 			device_printf(dev, "Unable to create TX DMA map\n");
3129 			goto fail;
3130 		}
3131 	}
3132 
3133 	return 0;
3134 fail:
3135 	/* We free all, it handles case where we are in the middle */
3136 	em_free_transmit_structures(adapter);
3137 	return (error);
3138 }
3139 
3140 /*********************************************************************
3141  *
3142  *  Initialize a transmit ring.
3143  *
3144  **********************************************************************/
3145 static void
3146 em_setup_transmit_ring(struct tx_ring *txr)
3147 {
3148 	struct adapter *adapter = txr->adapter;
3149 	struct em_buffer *txbuf;
3150 	int i;
3151 
3152 	/* Clear the old descriptor contents */
3153 	EM_TX_LOCK(txr);
3154 	bzero((void *)txr->tx_base,
3155 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3156 	/* Reset indices */
3157 	txr->next_avail_desc = 0;
3158 	txr->next_to_clean = 0;
3159 
3160 	/* Free any existing tx buffers. */
3161         txbuf = txr->tx_buffers;
3162 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3163 		if (txbuf->m_head != NULL) {
3164 			bus_dmamap_sync(txr->txtag, txbuf->map,
3165 			    BUS_DMASYNC_POSTWRITE);
3166 			bus_dmamap_unload(txr->txtag, txbuf->map);
3167 			m_freem(txbuf->m_head);
3168 			txbuf->m_head = NULL;
3169 		}
3170 		/* clear the watch index */
3171 		txbuf->next_eop = -1;
3172         }
3173 
3174 	/* Set number of descriptors available */
3175 	txr->tx_avail = adapter->num_tx_desc;
3176 
3177 	/* Clear checksum offload context. */
3178 	txr->last_hw_offload = 0;
3179 	txr->last_hw_ipcss = 0;
3180 	txr->last_hw_ipcso = 0;
3181 	txr->last_hw_tucss = 0;
3182 	txr->last_hw_tucso = 0;
3183 
3184 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3185 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3186 	EM_TX_UNLOCK(txr);
3187 }
3188 
3189 /*********************************************************************
3190  *
3191  *  Initialize all transmit rings.
3192  *
3193  **********************************************************************/
3194 static void
3195 em_setup_transmit_structures(struct adapter *adapter)
3196 {
3197 	struct tx_ring *txr = adapter->tx_rings;
3198 
3199 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3200 		em_setup_transmit_ring(txr);
3201 
3202 	return;
3203 }
3204 
3205 /*********************************************************************
3206  *
3207  *  Enable transmit unit.
3208  *
3209  **********************************************************************/
3210 static void
3211 em_initialize_transmit_unit(struct adapter *adapter)
3212 {
3213 	struct tx_ring	*txr = adapter->tx_rings;
3214 	struct e1000_hw	*hw = &adapter->hw;
3215 	u32	tctl, tarc, tipg = 0;
3216 
3217 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3218 
3219 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3220 		u64 bus_addr = txr->txdma.dma_paddr;
3221 		/* Base and Len of TX Ring */
3222 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3223 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3224 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3225 	    	    (u32)(bus_addr >> 32));
3226 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3227 	    	    (u32)bus_addr);
3228 		/* Init the HEAD/TAIL indices */
3229 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3230 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3231 
3232 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3233 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3234 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3235 
3236 		txr->watchdog_check = FALSE;
3237 	}
3238 
3239 	/* Set the default values for the Tx Inter Packet Gap timer */
3240 	switch (adapter->hw.mac.type) {
3241 	case e1000_82542:
3242 		tipg = DEFAULT_82542_TIPG_IPGT;
3243 		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3244 		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3245 		break;
3246 	case e1000_80003es2lan:
3247 		tipg = DEFAULT_82543_TIPG_IPGR1;
3248 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3249 		    E1000_TIPG_IPGR2_SHIFT;
3250 		break;
3251 	default:
3252 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3253 		    (adapter->hw.phy.media_type ==
3254 		    e1000_media_type_internal_serdes))
3255 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3256 		else
3257 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3258 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3259 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3260 	}
3261 
3262 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3263 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3264 
3265 	if(adapter->hw.mac.type >= e1000_82540)
3266 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3267 		    adapter->tx_abs_int_delay.value);
3268 
3269 	if ((adapter->hw.mac.type == e1000_82571) ||
3270 	    (adapter->hw.mac.type == e1000_82572)) {
3271 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3272 		tarc |= SPEED_MODE_BIT;
3273 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3274 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3275 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3276 		tarc |= 1;
3277 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3278 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3279 		tarc |= 1;
3280 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3281 	}
3282 
3283 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3284 	if (adapter->tx_int_delay.value > 0)
3285 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3286 
3287 	/* Program the Transmit Control Register */
3288 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3289 	tctl &= ~E1000_TCTL_CT;
3290 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3291 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3292 
3293 	if (adapter->hw.mac.type >= e1000_82571)
3294 		tctl |= E1000_TCTL_MULR;
3295 
3296 	/* This write will effectively turn on the transmit unit. */
3297 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3298 
3299 }
3300 
3301 
3302 /*********************************************************************
3303  *
3304  *  Free all transmit rings.
3305  *
3306  **********************************************************************/
3307 static void
3308 em_free_transmit_structures(struct adapter *adapter)
3309 {
3310 	struct tx_ring *txr = adapter->tx_rings;
3311 
3312 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3313 		EM_TX_LOCK(txr);
3314 		em_free_transmit_buffers(txr);
3315 		em_dma_free(adapter, &txr->txdma);
3316 		EM_TX_UNLOCK(txr);
3317 		EM_TX_LOCK_DESTROY(txr);
3318 	}
3319 
3320 	free(adapter->tx_rings, M_DEVBUF);
3321 }
3322 
3323 /*********************************************************************
3324  *
3325  *  Free transmit ring related data structures.
3326  *
3327  **********************************************************************/
3328 static void
3329 em_free_transmit_buffers(struct tx_ring *txr)
3330 {
3331 	struct adapter		*adapter = txr->adapter;
3332 	struct em_buffer	*txbuf;
3333 
3334 	INIT_DEBUGOUT("free_transmit_ring: begin");
3335 
3336 	if (txr->tx_buffers == NULL)
3337 		return;
3338 
3339 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3340 		txbuf = &txr->tx_buffers[i];
3341 		if (txbuf->m_head != NULL) {
3342 			bus_dmamap_sync(txr->txtag, txbuf->map,
3343 			    BUS_DMASYNC_POSTWRITE);
3344 			bus_dmamap_unload(txr->txtag,
3345 			    txbuf->map);
3346 			m_freem(txbuf->m_head);
3347 			txbuf->m_head = NULL;
3348 			if (txbuf->map != NULL) {
3349 				bus_dmamap_destroy(txr->txtag,
3350 				    txbuf->map);
3351 				txbuf->map = NULL;
3352 			}
3353 		} else if (txbuf->map != NULL) {
3354 			bus_dmamap_unload(txr->txtag,
3355 			    txbuf->map);
3356 			bus_dmamap_destroy(txr->txtag,
3357 			    txbuf->map);
3358 			txbuf->map = NULL;
3359 		}
3360 	}
3361 #if __FreeBSD_version >= 800000
3362 	if (txr->br != NULL)
3363 		buf_ring_free(txr->br, M_DEVBUF);
3364 #endif
3365 	if (txr->tx_buffers != NULL) {
3366 		free(txr->tx_buffers, M_DEVBUF);
3367 		txr->tx_buffers = NULL;
3368 	}
3369 	if (txr->txtag != NULL) {
3370 		bus_dma_tag_destroy(txr->txtag);
3371 		txr->txtag = NULL;
3372 	}
3373 	return;
3374 }
3375 
3376 
3377 /*********************************************************************
3378  *  The offload context is protocol specific (TCP/UDP) and thus
3379  *  only needs to be set when the protocol changes. The occasion
3380  *  of a context change can be a performance detriment, and
3381  *  might be better just disabled. The reason arises in the way
3382  *  in which the controller supports pipelined requests from the
3383  *  Tx data DMA. Up to four requests can be pipelined, and they may
3384  *  belong to the same packet or to multiple packets. However all
3385  *  requests for one packet are issued before a request is issued
3386  *  for a subsequent packet and if a request for the next packet
3387  *  requires a context change, that request will be stalled
3388  *  until the previous request completes. This means setting up
3389  *  a new context effectively disables pipelined Tx data DMA which
3390  *  in turn greatly slow down performance to send small sized
3391  *  frames.
3392  **********************************************************************/
3393 static void
3394 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3395     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3396 {
3397 	struct adapter			*adapter = txr->adapter;
3398 	struct e1000_context_desc	*TXD = NULL;
3399 	struct em_buffer		*tx_buffer;
3400 	int				cur, hdr_len;
3401 	u32				cmd = 0;
3402 	u16				offload = 0;
3403 	u8				ipcso, ipcss, tucso, tucss;
3404 
3405 	ipcss = ipcso = tucss = tucso = 0;
3406 	hdr_len = ip_off + (ip->ip_hl << 2);
3407 	cur = txr->next_avail_desc;
3408 
3409 	/* Setup of IP header checksum. */
3410 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3411 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3412 		offload |= CSUM_IP;
3413 		ipcss = ip_off;
3414 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3415 		/*
3416 		 * Start offset for header checksum calculation.
3417 		 * End offset for header checksum calculation.
3418 		 * Offset of place to put the checksum.
3419 		 */
3420 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3421 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3422 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3423 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3424 		cmd |= E1000_TXD_CMD_IP;
3425 	}
3426 
3427 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3428  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3429  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3430  		offload |= CSUM_TCP;
3431  		tucss = hdr_len;
3432  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3433  		/*
3434  		 * Setting up new checksum offload context for every frames
3435  		 * takes a lot of processing time for hardware. This also
3436  		 * reduces performance a lot for small sized frames so avoid
3437  		 * it if driver can use previously configured checksum
3438  		 * offload context.
3439  		 */
3440  		if (txr->last_hw_offload == offload) {
3441  			if (offload & CSUM_IP) {
3442  				if (txr->last_hw_ipcss == ipcss &&
3443  				    txr->last_hw_ipcso == ipcso &&
3444  				    txr->last_hw_tucss == tucss &&
3445  				    txr->last_hw_tucso == tucso)
3446  					return;
3447  			} else {
3448  				if (txr->last_hw_tucss == tucss &&
3449  				    txr->last_hw_tucso == tucso)
3450  					return;
3451  			}
3452   		}
3453  		txr->last_hw_offload = offload;
3454  		txr->last_hw_tucss = tucss;
3455  		txr->last_hw_tucso = tucso;
3456  		/*
3457  		 * Start offset for payload checksum calculation.
3458  		 * End offset for payload checksum calculation.
3459  		 * Offset of place to put the checksum.
3460  		 */
3461 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3462  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3463  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3464  		TXD->upper_setup.tcp_fields.tucso = tucso;
3465  		cmd |= E1000_TXD_CMD_TCP;
3466  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3467  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3468  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3469  		tucss = hdr_len;
3470  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3471  		/*
3472  		 * Setting up new checksum offload context for every frames
3473  		 * takes a lot of processing time for hardware. This also
3474  		 * reduces performance a lot for small sized frames so avoid
3475  		 * it if driver can use previously configured checksum
3476  		 * offload context.
3477  		 */
3478  		if (txr->last_hw_offload == offload) {
3479  			if (offload & CSUM_IP) {
3480  				if (txr->last_hw_ipcss == ipcss &&
3481  				    txr->last_hw_ipcso == ipcso &&
3482  				    txr->last_hw_tucss == tucss &&
3483  				    txr->last_hw_tucso == tucso)
3484  					return;
3485  			} else {
3486  				if (txr->last_hw_tucss == tucss &&
3487  				    txr->last_hw_tucso == tucso)
3488  					return;
3489  			}
3490  		}
3491  		txr->last_hw_offload = offload;
3492  		txr->last_hw_tucss = tucss;
3493  		txr->last_hw_tucso = tucso;
3494  		/*
3495  		 * Start offset for header checksum calculation.
3496  		 * End offset for header checksum calculation.
3497  		 * Offset of place to put the checksum.
3498  		 */
3499 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3500  		TXD->upper_setup.tcp_fields.tucss = tucss;
3501  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3502  		TXD->upper_setup.tcp_fields.tucso = tucso;
3503   	}
3504 
3505  	if (offload & CSUM_IP) {
3506  		txr->last_hw_ipcss = ipcss;
3507  		txr->last_hw_ipcso = ipcso;
3508   	}
3509 
3510 	TXD->tcp_seg_setup.data = htole32(0);
3511 	TXD->cmd_and_length =
3512 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3513 	tx_buffer = &txr->tx_buffers[cur];
3514 	tx_buffer->m_head = NULL;
3515 	tx_buffer->next_eop = -1;
3516 
3517 	if (++cur == adapter->num_tx_desc)
3518 		cur = 0;
3519 
3520 	txr->tx_avail--;
3521 	txr->next_avail_desc = cur;
3522 }
3523 
3524 
3525 /**********************************************************************
3526  *
3527  *  Setup work for hardware segmentation offload (TSO)
3528  *
3529  **********************************************************************/
3530 static void
3531 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3532     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3533 {
3534 	struct adapter			*adapter = txr->adapter;
3535 	struct e1000_context_desc	*TXD;
3536 	struct em_buffer		*tx_buffer;
3537 	int cur, hdr_len;
3538 
3539 	/*
3540 	 * In theory we can use the same TSO context if and only if
3541 	 * frame is the same type(IP/TCP) and the same MSS. However
3542 	 * checking whether a frame has the same IP/TCP structure is
3543 	 * hard thing so just ignore that and always restablish a
3544 	 * new TSO context.
3545 	 */
3546 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3547 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3548 		      E1000_TXD_DTYP_D |	/* Data descr type */
3549 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3550 
3551 	/* IP and/or TCP header checksum calculation and insertion. */
3552 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3553 
3554 	cur = txr->next_avail_desc;
3555 	tx_buffer = &txr->tx_buffers[cur];
3556 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3557 
3558 	/*
3559 	 * Start offset for header checksum calculation.
3560 	 * End offset for header checksum calculation.
3561 	 * Offset of place put the checksum.
3562 	 */
3563 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3564 	TXD->lower_setup.ip_fields.ipcse =
3565 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3566 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3567 	/*
3568 	 * Start offset for payload checksum calculation.
3569 	 * End offset for payload checksum calculation.
3570 	 * Offset of place to put the checksum.
3571 	 */
3572 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3573 	TXD->upper_setup.tcp_fields.tucse = 0;
3574 	TXD->upper_setup.tcp_fields.tucso =
3575 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3576 	/*
3577 	 * Payload size per packet w/o any headers.
3578 	 * Length of all headers up to payload.
3579 	 */
3580 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3581 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3582 
3583 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3584 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3585 				E1000_TXD_CMD_TSE |	/* TSE context */
3586 				E1000_TXD_CMD_IP |	/* Do IP csum */
3587 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3588 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3589 
3590 	tx_buffer->m_head = NULL;
3591 	tx_buffer->next_eop = -1;
3592 
3593 	if (++cur == adapter->num_tx_desc)
3594 		cur = 0;
3595 
3596 	txr->tx_avail--;
3597 	txr->next_avail_desc = cur;
3598 	txr->tx_tso = TRUE;
3599 }
3600 
3601 
3602 /**********************************************************************
3603  *
3604  *  Examine each tx_buffer in the used queue. If the hardware is done
3605  *  processing the packet then free associated resources. The
3606  *  tx_buffer is put back on the free queue.
3607  *
3608  **********************************************************************/
3609 static bool
3610 em_txeof(struct tx_ring *txr)
3611 {
3612 	struct adapter	*adapter = txr->adapter;
3613         int first, last, done;
3614         struct em_buffer *tx_buffer;
3615         struct e1000_tx_desc   *tx_desc, *eop_desc;
3616 	struct ifnet   *ifp = adapter->ifp;
3617 
3618 	EM_TX_LOCK_ASSERT(txr);
3619 
3620         if (txr->tx_avail == adapter->num_tx_desc)
3621                 return (FALSE);
3622 
3623         first = txr->next_to_clean;
3624         tx_desc = &txr->tx_base[first];
3625         tx_buffer = &txr->tx_buffers[first];
3626 	last = tx_buffer->next_eop;
3627         eop_desc = &txr->tx_base[last];
3628 
3629 	/*
3630 	 * What this does is get the index of the
3631 	 * first descriptor AFTER the EOP of the
3632 	 * first packet, that way we can do the
3633 	 * simple comparison on the inner while loop.
3634 	 */
3635 	if (++last == adapter->num_tx_desc)
3636  		last = 0;
3637 	done = last;
3638 
3639         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3640             BUS_DMASYNC_POSTREAD);
3641 
3642         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3643 		/* We clean the range of the packet */
3644 		while (first != done) {
3645                 	tx_desc->upper.data = 0;
3646                 	tx_desc->lower.data = 0;
3647                 	tx_desc->buffer_addr = 0;
3648                 	++txr->tx_avail;
3649 
3650 			if (tx_buffer->m_head) {
3651 				bus_dmamap_sync(txr->txtag,
3652 				    tx_buffer->map,
3653 				    BUS_DMASYNC_POSTWRITE);
3654 				bus_dmamap_unload(txr->txtag,
3655 				    tx_buffer->map);
3656                         	m_freem(tx_buffer->m_head);
3657                         	tx_buffer->m_head = NULL;
3658                 	}
3659 			tx_buffer->next_eop = -1;
3660 			txr->watchdog_time = ticks;
3661 
3662 	                if (++first == adapter->num_tx_desc)
3663 				first = 0;
3664 
3665 	                tx_buffer = &txr->tx_buffers[first];
3666 			tx_desc = &txr->tx_base[first];
3667 		}
3668 		++ifp->if_opackets;
3669 		/* See if we can continue to the next packet */
3670 		last = tx_buffer->next_eop;
3671 		if (last != -1) {
3672         		eop_desc = &txr->tx_base[last];
3673 			/* Get new done point */
3674 			if (++last == adapter->num_tx_desc) last = 0;
3675 			done = last;
3676 		} else
3677 			break;
3678         }
3679         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3680             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3681 
3682         txr->next_to_clean = first;
3683 
3684         /*
3685          * If we have enough room, clear IFF_DRV_OACTIVE
3686          * to tell the stack that it is OK to send packets.
3687          */
3688         if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) {
3689                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3690 		/* Disable watchdog if all clean */
3691                 if (txr->tx_avail == adapter->num_tx_desc) {
3692 			txr->watchdog_check = FALSE;
3693 			return (FALSE);
3694 		}
3695         }
3696 
3697 	return (TRUE);
3698 }
3699 
3700 
3701 /*********************************************************************
3702  *
3703  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3704  *
3705  **********************************************************************/
3706 static void
3707 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3708 {
3709 	struct adapter		*adapter = rxr->adapter;
3710 	struct mbuf		*m;
3711 	bus_dma_segment_t	segs[1];
3712 	struct em_buffer	*rxbuf;
3713 	int			i, error, nsegs, cleaned;
3714 
3715 	i = rxr->next_to_refresh;
3716 	cleaned = -1;
3717 	while (i != limit) {
3718 		rxbuf = &rxr->rx_buffers[i];
3719 		/*
3720 		** Just skip entries with a buffer,
3721 		** they can only be due to an error
3722 		** and are to be reused.
3723 		*/
3724 		if (rxbuf->m_head != NULL)
3725 			goto reuse;
3726 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3727 		/*
3728 		** If we have a temporary resource shortage
3729 		** that causes a failure, just abort refresh
3730 		** for now, we will return to this point when
3731 		** reinvoked from em_rxeof.
3732 		*/
3733 		if (m == NULL)
3734 			goto update;
3735 		m->m_len = m->m_pkthdr.len = MCLBYTES;
3736 
3737 		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3738 			m_adj(m, ETHER_ALIGN);
3739 
3740 		/* Use bus_dma machinery to setup the memory mapping  */
3741 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3742 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3743 		if (error != 0) {
3744 			m_free(m);
3745 			goto update;
3746 		}
3747 
3748 		/* If nsegs is wrong then the stack is corrupt. */
3749 		KASSERT(nsegs == 1, ("Too many segments returned!"));
3750 
3751 		bus_dmamap_sync(rxr->rxtag,
3752 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3753 		rxbuf->m_head = m;
3754 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3755 reuse:
3756 		cleaned = i;
3757 		/* Calculate next index */
3758 		if (++i == adapter->num_rx_desc)
3759 			i = 0;
3760 		/* This is the work marker for refresh */
3761 		rxr->next_to_refresh = i;
3762 	}
3763 update:
3764 	/*
3765 	** Update the tail pointer only if,
3766 	** and as far as we have refreshed.
3767 	*/
3768 	if (cleaned != -1) /* Update tail index */
3769 		E1000_WRITE_REG(&adapter->hw,
3770 		    E1000_RDT(rxr->me), cleaned);
3771 
3772 	return;
3773 }
3774 
3775 
3776 /*********************************************************************
3777  *
3778  *  Allocate memory for rx_buffer structures. Since we use one
3779  *  rx_buffer per received packet, the maximum number of rx_buffer's
3780  *  that we'll need is equal to the number of receive descriptors
3781  *  that we've allocated.
3782  *
3783  **********************************************************************/
3784 static int
3785 em_allocate_receive_buffers(struct rx_ring *rxr)
3786 {
3787 	struct adapter		*adapter = rxr->adapter;
3788 	device_t		dev = adapter->dev;
3789 	struct em_buffer	*rxbuf;
3790 	int			error;
3791 
3792 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3793 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3794 	if (rxr->rx_buffers == NULL) {
3795 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3796 		return (ENOMEM);
3797 	}
3798 
3799 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3800 				1, 0,			/* alignment, bounds */
3801 				BUS_SPACE_MAXADDR,	/* lowaddr */
3802 				BUS_SPACE_MAXADDR,	/* highaddr */
3803 				NULL, NULL,		/* filter, filterarg */
3804 				MCLBYTES,		/* maxsize */
3805 				1,			/* nsegments */
3806 				MCLBYTES,		/* maxsegsize */
3807 				0,			/* flags */
3808 				NULL,			/* lockfunc */
3809 				NULL,			/* lockarg */
3810 				&rxr->rxtag);
3811 	if (error) {
3812 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3813 		    __func__, error);
3814 		goto fail;
3815 	}
3816 
3817 	rxbuf = rxr->rx_buffers;
3818 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3819 		rxbuf = &rxr->rx_buffers[i];
3820 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3821 		    &rxbuf->map);
3822 		if (error) {
3823 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3824 			    __func__, error);
3825 			goto fail;
3826 		}
3827 	}
3828 
3829 	return (0);
3830 
3831 fail:
3832 	em_free_receive_structures(adapter);
3833 	return (error);
3834 }
3835 
3836 
3837 /*********************************************************************
3838  *
3839  *  Initialize a receive ring and its buffers.
3840  *
3841  **********************************************************************/
3842 static int
3843 em_setup_receive_ring(struct rx_ring *rxr)
3844 {
3845 	struct	adapter 	*adapter = rxr->adapter;
3846 	struct em_buffer	*rxbuf;
3847 	bus_dma_segment_t	seg[1];
3848 	int			rsize, nsegs, error;
3849 
3850 
3851 	/* Clear the ring contents */
3852 	EM_RX_LOCK(rxr);
3853 	rsize = roundup2(adapter->num_rx_desc *
3854 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3855 	bzero((void *)rxr->rx_base, rsize);
3856 
3857 	/*
3858 	** Free current RX buffer structs and their mbufs
3859 	*/
3860 	for (int i = 0; i < adapter->num_rx_desc; i++) {
3861 		rxbuf = &rxr->rx_buffers[i];
3862 		if (rxbuf->m_head != NULL) {
3863 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3864 			    BUS_DMASYNC_POSTREAD);
3865 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3866 			m_freem(rxbuf->m_head);
3867 		}
3868 	}
3869 
3870 	/* Now replenish the mbufs */
3871 	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3872 
3873 		rxbuf = &rxr->rx_buffers[j];
3874 		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3875 		if (rxbuf->m_head == NULL)
3876 			return (ENOBUFS);
3877 		rxbuf->m_head->m_len = MCLBYTES;
3878 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3879 		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3880 
3881 		/* Get the memory mapping */
3882 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3883 		    rxbuf->map, rxbuf->m_head, seg,
3884 		    &nsegs, BUS_DMA_NOWAIT);
3885 		if (error != 0) {
3886 			m_freem(rxbuf->m_head);
3887 			rxbuf->m_head = NULL;
3888 			return (error);
3889 		}
3890 		bus_dmamap_sync(rxr->rxtag,
3891 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3892 
3893 		/* Update descriptor */
3894 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3895 	}
3896 
3897 
3898 	/* Setup our descriptor indices */
3899 	rxr->next_to_check = 0;
3900 	rxr->next_to_refresh = 0;
3901 
3902 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3903 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3904 
3905 	EM_RX_UNLOCK(rxr);
3906 	return (0);
3907 }
3908 
3909 /*********************************************************************
3910  *
3911  *  Initialize all receive rings.
3912  *
3913  **********************************************************************/
3914 static int
3915 em_setup_receive_structures(struct adapter *adapter)
3916 {
3917 	struct rx_ring *rxr = adapter->rx_rings;
3918 	int j;
3919 
3920 	for (j = 0; j < adapter->num_queues; j++, rxr++)
3921 		if (em_setup_receive_ring(rxr))
3922 			goto fail;
3923 
3924 	return (0);
3925 fail:
3926 	/*
3927 	 * Free RX buffers allocated so far, we will only handle
3928 	 * the rings that completed, the failing case will have
3929 	 * cleaned up for itself. 'j' failed, so its the terminus.
3930 	 */
3931 	for (int i = 0; i < j; ++i) {
3932 		rxr = &adapter->rx_rings[i];
3933 		for (int n = 0; n < adapter->num_rx_desc; n++) {
3934 			struct em_buffer *rxbuf;
3935 			rxbuf = &rxr->rx_buffers[n];
3936 			if (rxbuf->m_head != NULL) {
3937 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3938 			  	  BUS_DMASYNC_POSTREAD);
3939 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3940 				m_freem(rxbuf->m_head);
3941 				rxbuf->m_head = NULL;
3942 			}
3943 		}
3944 	}
3945 
3946 	return (ENOBUFS);
3947 }
3948 
3949 /*********************************************************************
3950  *
3951  *  Free all receive rings.
3952  *
3953  **********************************************************************/
3954 static void
3955 em_free_receive_structures(struct adapter *adapter)
3956 {
3957 	struct rx_ring *rxr = adapter->rx_rings;
3958 
3959 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3960 		em_free_receive_buffers(rxr);
3961 		/* Free the ring memory as well */
3962 		em_dma_free(adapter, &rxr->rxdma);
3963 		EM_RX_LOCK_DESTROY(rxr);
3964 	}
3965 
3966 	free(adapter->rx_rings, M_DEVBUF);
3967 }
3968 
3969 
3970 /*********************************************************************
3971  *
3972  *  Free receive ring data structures
3973  *
3974  **********************************************************************/
3975 static void
3976 em_free_receive_buffers(struct rx_ring *rxr)
3977 {
3978 	struct adapter		*adapter = rxr->adapter;
3979 	struct em_buffer	*rxbuf = NULL;
3980 
3981 	INIT_DEBUGOUT("free_receive_buffers: begin");
3982 
3983 	if (rxr->rx_buffers != NULL) {
3984 		for (int i = 0; i < adapter->num_rx_desc; i++) {
3985 			rxbuf = &rxr->rx_buffers[i];
3986 			if (rxbuf->map != NULL) {
3987 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3988 				    BUS_DMASYNC_POSTREAD);
3989 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3990 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3991 			}
3992 			if (rxbuf->m_head != NULL) {
3993 				m_freem(rxbuf->m_head);
3994 				rxbuf->m_head = NULL;
3995 			}
3996 		}
3997 		free(rxr->rx_buffers, M_DEVBUF);
3998 		rxr->rx_buffers = NULL;
3999 	}
4000 
4001 	if (rxr->rxtag != NULL) {
4002 		bus_dma_tag_destroy(rxr->rxtag);
4003 		rxr->rxtag = NULL;
4004 	}
4005 
4006 	return;
4007 }
4008 
4009 
4010 /*********************************************************************
4011  *
4012  *  Enable receive unit.
4013  *
4014  **********************************************************************/
4015 #define MAX_INTS_PER_SEC	8000
4016 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4017 
4018 static void
4019 em_initialize_receive_unit(struct adapter *adapter)
4020 {
4021 	struct rx_ring	*rxr = adapter->rx_rings;
4022 	struct ifnet	*ifp = adapter->ifp;
4023 	struct e1000_hw	*hw = &adapter->hw;
4024 	u64	bus_addr;
4025 	u32	rctl, rxcsum;
4026 
4027 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4028 
4029 	/*
4030 	 * Make sure receives are disabled while setting
4031 	 * up the descriptor ring
4032 	 */
4033 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4034 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4035 
4036 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4037 	    adapter->rx_abs_int_delay.value);
4038 	/*
4039 	 * Set the interrupt throttling rate. Value is calculated
4040 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4041 	 */
4042 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4043 
4044 	/*
4045 	** When using MSIX interrupts we need to throttle
4046 	** using the EITR register (82574 only)
4047 	*/
4048 	if (hw->mac.type == e1000_82574)
4049 		for (int i = 0; i < 4; i++)
4050 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4051 			    DEFAULT_ITR);
4052 
4053 	/* Disable accelerated ackknowledge */
4054 	if (adapter->hw.mac.type == e1000_82574)
4055 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4056 
4057 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4058 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4059 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4060 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4061 	}
4062 
4063 	/*
4064 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4065 	** long latencies are observed, like Lenovo X60. This
4066 	** change eliminates the problem, but since having positive
4067 	** values in RDTR is a known source of problems on other
4068 	** platforms another solution is being sought.
4069 	*/
4070 	if (hw->mac.type == e1000_82573)
4071 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4072 
4073 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4074 		/* Setup the Base and Length of the Rx Descriptor Ring */
4075 		bus_addr = rxr->rxdma.dma_paddr;
4076 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4077 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4078 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4079 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4080 		/* Setup the Head and Tail Descriptor Pointers */
4081 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4082 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4083 	}
4084 
4085 	/* Setup the Receive Control Register */
4086 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4087 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4088 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4089 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4090 
4091         /* Strip the CRC */
4092         rctl |= E1000_RCTL_SECRC;
4093 
4094         /* Make sure VLAN Filters are off */
4095         rctl &= ~E1000_RCTL_VFE;
4096 	rctl &= ~E1000_RCTL_SBP;
4097 	rctl |= E1000_RCTL_SZ_2048;
4098 	if (ifp->if_mtu > ETHERMTU)
4099 		rctl |= E1000_RCTL_LPE;
4100 	else
4101 		rctl &= ~E1000_RCTL_LPE;
4102 
4103 	/* Write out the settings */
4104 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4105 
4106 	return;
4107 }
4108 
4109 
4110 /*********************************************************************
4111  *
4112  *  This routine executes in interrupt context. It replenishes
4113  *  the mbufs in the descriptor and sends data which has been
4114  *  dma'ed into host memory to upper layer.
4115  *
4116  *  We loop at most count times if count is > 0, or until done if
4117  *  count < 0.
4118  *
4119  *  For polling we also now return the number of cleaned packets
4120  *********************************************************************/
4121 static bool
4122 em_rxeof(struct rx_ring *rxr, int count, int *done)
4123 {
4124 	struct adapter		*adapter = rxr->adapter;
4125 	struct ifnet		*ifp = adapter->ifp;
4126 	struct mbuf		*mp, *sendmp;
4127 	u8			status = 0;
4128 	u16 			len;
4129 	int			i, processed, rxdone = 0;
4130 	bool			eop;
4131 	struct e1000_rx_desc	*cur;
4132 
4133 	EM_RX_LOCK(rxr);
4134 
4135 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4136 
4137 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4138 			break;
4139 
4140 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4141 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4142 
4143 		cur = &rxr->rx_base[i];
4144 		status = cur->status;
4145 		mp = sendmp = NULL;
4146 
4147 		if ((status & E1000_RXD_STAT_DD) == 0)
4148 			break;
4149 
4150 		len = le16toh(cur->length);
4151 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4152 		count--;
4153 
4154 		if (((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) &&
4155 		    (rxr->discard == FALSE)) {
4156 
4157 			/* Assign correct length to the current fragment */
4158 			mp = rxr->rx_buffers[i].m_head;
4159 			mp->m_len = len;
4160 
4161 			/* Trigger for refresh */
4162 			rxr->rx_buffers[i].m_head = NULL;
4163 
4164 			if (rxr->fmp == NULL) {
4165 				mp->m_pkthdr.len = len;
4166 				rxr->fmp = mp; /* Store the first mbuf */
4167 				rxr->lmp = mp;
4168 			} else {
4169 				/* Chain mbuf's together */
4170 				mp->m_flags &= ~M_PKTHDR;
4171 				rxr->lmp->m_next = mp;
4172 				rxr->lmp = rxr->lmp->m_next;
4173 				rxr->fmp->m_pkthdr.len += len;
4174 			}
4175 
4176 			if (eop) {
4177 				rxr->fmp->m_pkthdr.rcvif = ifp;
4178 				ifp->if_ipackets++;
4179 				em_receive_checksum(cur, rxr->fmp);
4180 #ifndef __NO_STRICT_ALIGNMENT
4181 				if (adapter->max_frame_size >
4182 				    (MCLBYTES - ETHER_ALIGN) &&
4183 				    em_fixup_rx(rxr) != 0)
4184 					goto skip;
4185 #endif
4186 				if (status & E1000_RXD_STAT_VP) {
4187 					rxr->fmp->m_pkthdr.ether_vtag =
4188 					    (le16toh(cur->special) &
4189 					    E1000_RXD_SPC_VLAN_MASK);
4190 					rxr->fmp->m_flags |= M_VLANTAG;
4191 				}
4192 #ifdef EM_MULTIQUEUE
4193 				rxr->fmp->m_pkthdr.flowid = curcpu;
4194 				rxr->fmp->m_flags |= M_FLOWID;
4195 #endif
4196 #ifndef __NO_STRICT_ALIGNMENT
4197 skip:
4198 #endif
4199 				sendmp = rxr->fmp;
4200 				rxr->fmp = NULL;
4201 				rxr->lmp = NULL;
4202 			}
4203 		} else {
4204 			ifp->if_ierrors++;
4205 			++rxr->rx_discarded;
4206 			if (!eop) /* Catch subsequent segs */
4207 				rxr->discard = TRUE;
4208 			else
4209 				rxr->discard = FALSE;
4210 			em_rx_discard(rxr, i);
4211 			sendmp = NULL;
4212 		}
4213 
4214 		/* Zero out the receive descriptors status. */
4215 		cur->status = 0;
4216 		++rxdone;	/* cumulative for POLL */
4217 		++processed;
4218 
4219 		/* Advance our pointers to the next descriptor. */
4220 		if (++i == adapter->num_rx_desc)
4221 			i = 0;
4222 
4223 		/* Send to the stack */
4224 		if (sendmp != NULL) {
4225 			rxr->next_to_check = i;
4226 			EM_RX_UNLOCK(rxr);
4227 			(*ifp->if_input)(ifp, sendmp);
4228 			EM_RX_LOCK(rxr);
4229 			i = rxr->next_to_check;
4230 		}
4231 
4232 		/* Only refresh mbufs every 8 descriptors */
4233 		if (processed == 8) {
4234 			em_refresh_mbufs(rxr, i);
4235 			processed = 0;
4236 		}
4237 	}
4238 
4239 	/* Catch any remaining refresh work */
4240 	if (processed != 0) {
4241 		em_refresh_mbufs(rxr, i);
4242 		processed = 0;
4243 	}
4244 
4245 	rxr->next_to_check = i;
4246 	if (done != NULL)
4247 		*done = rxdone;
4248 	EM_RX_UNLOCK(rxr);
4249 
4250 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4251 }
4252 
4253 static __inline void
4254 em_rx_discard(struct rx_ring *rxr, int i)
4255 {
4256 	struct em_buffer	*rbuf;
4257 	struct mbuf		*m;
4258 
4259 	rbuf = &rxr->rx_buffers[i];
4260 	/* Free any previous pieces */
4261 	if (rxr->fmp != NULL) {
4262 		rxr->fmp->m_flags |= M_PKTHDR;
4263 		m_freem(rxr->fmp);
4264 		rxr->fmp = NULL;
4265 		rxr->lmp = NULL;
4266 	}
4267 
4268 	/* Reset state, keep loaded DMA map and reuse */
4269 	m = rbuf->m_head;
4270 	m->m_len = m->m_pkthdr.len = MCLBYTES;
4271 	m->m_flags |= M_PKTHDR;
4272 	m->m_data = m->m_ext.ext_buf;
4273 	m->m_next = NULL;
4274 
4275 	return;
4276 }
4277 
4278 #ifndef __NO_STRICT_ALIGNMENT
4279 /*
4280  * When jumbo frames are enabled we should realign entire payload on
4281  * architecures with strict alignment. This is serious design mistake of 8254x
4282  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4283  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4284  * payload. On architecures without strict alignment restrictions 8254x still
4285  * performs unaligned memory access which would reduce the performance too.
4286  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4287  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4288  * existing mbuf chain.
4289  *
4290  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4291  * not used at all on architectures with strict alignment.
4292  */
4293 static int
4294 em_fixup_rx(struct rx_ring *rxr)
4295 {
4296 	struct adapter *adapter = rxr->adapter;
4297 	struct mbuf *m, *n;
4298 	int error;
4299 
4300 	error = 0;
4301 	m = rxr->fmp;
4302 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4303 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4304 		m->m_data += ETHER_HDR_LEN;
4305 	} else {
4306 		MGETHDR(n, M_DONTWAIT, MT_DATA);
4307 		if (n != NULL) {
4308 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4309 			m->m_data += ETHER_HDR_LEN;
4310 			m->m_len -= ETHER_HDR_LEN;
4311 			n->m_len = ETHER_HDR_LEN;
4312 			M_MOVE_PKTHDR(n, m);
4313 			n->m_next = m;
4314 			rxr->fmp = n;
4315 		} else {
4316 			adapter->dropped_pkts++;
4317 			m_freem(rxr->fmp);
4318 			rxr->fmp = NULL;
4319 			error = ENOMEM;
4320 		}
4321 	}
4322 
4323 	return (error);
4324 }
4325 #endif
4326 
4327 /*********************************************************************
4328  *
4329  *  Verify that the hardware indicated that the checksum is valid.
4330  *  Inform the stack about the status of checksum so that stack
4331  *  doesn't spend time verifying the checksum.
4332  *
4333  *********************************************************************/
4334 static void
4335 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4336 {
4337 	/* Ignore Checksum bit is set */
4338 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4339 		mp->m_pkthdr.csum_flags = 0;
4340 		return;
4341 	}
4342 
4343 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4344 		/* Did it pass? */
4345 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4346 			/* IP Checksum Good */
4347 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4348 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4349 
4350 		} else {
4351 			mp->m_pkthdr.csum_flags = 0;
4352 		}
4353 	}
4354 
4355 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4356 		/* Did it pass? */
4357 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4358 			mp->m_pkthdr.csum_flags |=
4359 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4360 			mp->m_pkthdr.csum_data = htons(0xffff);
4361 		}
4362 	}
4363 }
4364 
4365 /*
4366  * This routine is run via an vlan
4367  * config EVENT
4368  */
4369 static void
4370 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4371 {
4372 	struct adapter	*adapter = ifp->if_softc;
4373 	u32		index, bit;
4374 
4375 	if (ifp->if_softc !=  arg)   /* Not our event */
4376 		return;
4377 
4378 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4379                 return;
4380 
4381 	index = (vtag >> 5) & 0x7F;
4382 	bit = vtag & 0x1F;
4383 	em_shadow_vfta[index] |= (1 << bit);
4384 	++adapter->num_vlans;
4385 	/* Re-init to load the changes */
4386 	em_init(adapter);
4387 }
4388 
4389 /*
4390  * This routine is run via an vlan
4391  * unconfig EVENT
4392  */
4393 static void
4394 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4395 {
4396 	struct adapter	*adapter = ifp->if_softc;
4397 	u32		index, bit;
4398 
4399 	if (ifp->if_softc !=  arg)
4400 		return;
4401 
4402 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4403                 return;
4404 
4405 	index = (vtag >> 5) & 0x7F;
4406 	bit = vtag & 0x1F;
4407 	em_shadow_vfta[index] &= ~(1 << bit);
4408 	--adapter->num_vlans;
4409 	/* Re-init to load the changes */
4410 	em_init(adapter);
4411 }
4412 
4413 static void
4414 em_setup_vlan_hw_support(struct adapter *adapter)
4415 {
4416 	struct e1000_hw *hw = &adapter->hw;
4417 	u32             reg;
4418 
4419 	/*
4420 	** We get here thru init_locked, meaning
4421 	** a soft reset, this has already cleared
4422 	** the VFTA and other state, so if there
4423 	** have been no vlan's registered do nothing.
4424 	*/
4425 	if (adapter->num_vlans == 0)
4426                 return;
4427 
4428 	/*
4429 	** A soft reset zero's out the VFTA, so
4430 	** we need to repopulate it now.
4431 	*/
4432 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4433                 if (em_shadow_vfta[i] != 0)
4434 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4435                             i, em_shadow_vfta[i]);
4436 
4437 	reg = E1000_READ_REG(hw, E1000_CTRL);
4438 	reg |= E1000_CTRL_VME;
4439 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4440 
4441 	/* Enable the Filter Table */
4442 	reg = E1000_READ_REG(hw, E1000_RCTL);
4443 	reg &= ~E1000_RCTL_CFIEN;
4444 	reg |= E1000_RCTL_VFE;
4445 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4446 
4447 	/* Update the frame size */
4448 	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4449 	    adapter->max_frame_size + VLAN_TAG_SIZE);
4450 }
4451 
4452 static void
4453 em_enable_intr(struct adapter *adapter)
4454 {
4455 	struct e1000_hw *hw = &adapter->hw;
4456 	u32 ims_mask = IMS_ENABLE_MASK;
4457 
4458 	if (hw->mac.type == e1000_82574) {
4459 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4460 		ims_mask |= EM_MSIX_MASK;
4461 	}
4462 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4463 }
4464 
4465 static void
4466 em_disable_intr(struct adapter *adapter)
4467 {
4468 	struct e1000_hw *hw = &adapter->hw;
4469 
4470 	if (hw->mac.type == e1000_82574)
4471 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4472 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4473 }
4474 
4475 /*
4476  * Bit of a misnomer, what this really means is
4477  * to enable OS management of the system... aka
4478  * to disable special hardware management features
4479  */
4480 static void
4481 em_init_manageability(struct adapter *adapter)
4482 {
4483 	/* A shared code workaround */
4484 #define E1000_82542_MANC2H E1000_MANC2H
4485 	if (adapter->has_manage) {
4486 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4487 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4488 
4489 		/* disable hardware interception of ARP */
4490 		manc &= ~(E1000_MANC_ARP_EN);
4491 
4492                 /* enable receiving management packets to the host */
4493 		manc |= E1000_MANC_EN_MNG2HOST;
4494 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4495 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4496 		manc2h |= E1000_MNG2HOST_PORT_623;
4497 		manc2h |= E1000_MNG2HOST_PORT_664;
4498 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4499 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4500 	}
4501 }
4502 
4503 /*
4504  * Give control back to hardware management
4505  * controller if there is one.
4506  */
4507 static void
4508 em_release_manageability(struct adapter *adapter)
4509 {
4510 	if (adapter->has_manage) {
4511 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4512 
4513 		/* re-enable hardware interception of ARP */
4514 		manc |= E1000_MANC_ARP_EN;
4515 		manc &= ~E1000_MANC_EN_MNG2HOST;
4516 
4517 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4518 	}
4519 }
4520 
4521 /*
4522  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4523  * For ASF and Pass Through versions of f/w this means
4524  * that the driver is loaded. For AMT version type f/w
4525  * this means that the network i/f is open.
4526  */
4527 static void
4528 em_get_hw_control(struct adapter *adapter)
4529 {
4530 	u32 ctrl_ext, swsm;
4531 
4532 	if (adapter->hw.mac.type == e1000_82573) {
4533 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4534 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4535 		    swsm | E1000_SWSM_DRV_LOAD);
4536 		return;
4537 	}
4538 	/* else */
4539 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4540 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4541 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4542 	return;
4543 }
4544 
4545 /*
4546  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4547  * For ASF and Pass Through versions of f/w this means that
4548  * the driver is no longer loaded. For AMT versions of the
4549  * f/w this means that the network i/f is closed.
4550  */
4551 static void
4552 em_release_hw_control(struct adapter *adapter)
4553 {
4554 	u32 ctrl_ext, swsm;
4555 
4556 	if (!adapter->has_manage)
4557 		return;
4558 
4559 	if (adapter->hw.mac.type == e1000_82573) {
4560 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4561 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4562 		    swsm & ~E1000_SWSM_DRV_LOAD);
4563 		return;
4564 	}
4565 	/* else */
4566 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4567 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4568 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4569 	return;
4570 }
4571 
4572 static int
4573 em_is_valid_ether_addr(u8 *addr)
4574 {
4575 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4576 
4577 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4578 		return (FALSE);
4579 	}
4580 
4581 	return (TRUE);
4582 }
4583 
4584 /*
4585 ** Parse the interface capabilities with regard
4586 ** to both system management and wake-on-lan for
4587 ** later use.
4588 */
4589 static void
4590 em_get_wakeup(device_t dev)
4591 {
4592 	struct adapter	*adapter = device_get_softc(dev);
4593 	u16		eeprom_data = 0, device_id, apme_mask;
4594 
4595 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4596 	apme_mask = EM_EEPROM_APME;
4597 
4598 	switch (adapter->hw.mac.type) {
4599 	case e1000_82573:
4600 	case e1000_82583:
4601 		adapter->has_amt = TRUE;
4602 		/* Falls thru */
4603 	case e1000_82571:
4604 	case e1000_82572:
4605 	case e1000_80003es2lan:
4606 		if (adapter->hw.bus.func == 1) {
4607 			e1000_read_nvm(&adapter->hw,
4608 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4609 			break;
4610 		} else
4611 			e1000_read_nvm(&adapter->hw,
4612 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4613 		break;
4614 	case e1000_ich8lan:
4615 	case e1000_ich9lan:
4616 	case e1000_ich10lan:
4617 	case e1000_pchlan:
4618 		apme_mask = E1000_WUC_APME;
4619 		adapter->has_amt = TRUE;
4620 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4621 		break;
4622 	default:
4623 		e1000_read_nvm(&adapter->hw,
4624 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4625 		break;
4626 	}
4627 	if (eeprom_data & apme_mask)
4628 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4629 	/*
4630          * We have the eeprom settings, now apply the special cases
4631          * where the eeprom may be wrong or the board won't support
4632          * wake on lan on a particular port
4633 	 */
4634 	device_id = pci_get_device(dev);
4635         switch (device_id) {
4636 	case E1000_DEV_ID_82571EB_FIBER:
4637 		/* Wake events only supported on port A for dual fiber
4638 		 * regardless of eeprom setting */
4639 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4640 		    E1000_STATUS_FUNC_1)
4641 			adapter->wol = 0;
4642 		break;
4643 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4644 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4645 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4646                 /* if quad port adapter, disable WoL on all but port A */
4647 		if (global_quad_port_a != 0)
4648 			adapter->wol = 0;
4649 		/* Reset for multiple quad port adapters */
4650 		if (++global_quad_port_a == 4)
4651 			global_quad_port_a = 0;
4652                 break;
4653 	}
4654 	return;
4655 }
4656 
4657 
4658 /*
4659  * Enable PCI Wake On Lan capability
4660  */
4661 static void
4662 em_enable_wakeup(device_t dev)
4663 {
4664 	struct adapter	*adapter = device_get_softc(dev);
4665 	struct ifnet	*ifp = adapter->ifp;
4666 	u32		pmc, ctrl, ctrl_ext, rctl;
4667 	u16     	status;
4668 
4669 	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4670 		return;
4671 
4672 	/* Advertise the wakeup capability */
4673 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4674 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4675 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4676 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4677 
4678 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4679 	    (adapter->hw.mac.type == e1000_pchlan) ||
4680 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4681 	    (adapter->hw.mac.type == e1000_ich10lan)) {
4682 		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4683 		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4684 	}
4685 
4686 	/* Keep the laser running on Fiber adapters */
4687 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4688 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4689 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4690 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4691 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4692 	}
4693 
4694 	/*
4695 	** Determine type of Wakeup: note that wol
4696 	** is set with all bits on by default.
4697 	*/
4698 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4699 		adapter->wol &= ~E1000_WUFC_MAG;
4700 
4701 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4702 		adapter->wol &= ~E1000_WUFC_MC;
4703 	else {
4704 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4705 		rctl |= E1000_RCTL_MPE;
4706 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4707 	}
4708 
4709 	if (adapter->hw.mac.type == e1000_pchlan) {
4710 		if (em_enable_phy_wakeup(adapter))
4711 			return;
4712 	} else {
4713 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4714 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4715 	}
4716 
4717 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4718 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4719 
4720         /* Request PME */
4721         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4722 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4723 	if (ifp->if_capenable & IFCAP_WOL)
4724 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4725         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4726 
4727 	return;
4728 }
4729 
4730 /*
4731 ** WOL in the newer chipset interfaces (pchlan)
4732 ** require thing to be copied into the phy
4733 */
4734 static int
4735 em_enable_phy_wakeup(struct adapter *adapter)
4736 {
4737 	struct e1000_hw *hw = &adapter->hw;
4738 	u32 mreg, ret = 0;
4739 	u16 preg;
4740 
4741 	/* copy MAC RARs to PHY RARs */
4742 	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4743 		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4744 		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4745 		e1000_write_phy_reg(hw, BM_RAR_M(i),
4746 		    (u16)((mreg >> 16) & 0xFFFF));
4747 		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4748 		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4749 		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4750 		    (u16)((mreg >> 16) & 0xFFFF));
4751 	}
4752 
4753 	/* copy MAC MTA to PHY MTA */
4754 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4755 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4756 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4757 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4758 		    (u16)((mreg >> 16) & 0xFFFF));
4759 	}
4760 
4761 	/* configure PHY Rx Control register */
4762 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4763 	mreg = E1000_READ_REG(hw, E1000_RCTL);
4764 	if (mreg & E1000_RCTL_UPE)
4765 		preg |= BM_RCTL_UPE;
4766 	if (mreg & E1000_RCTL_MPE)
4767 		preg |= BM_RCTL_MPE;
4768 	preg &= ~(BM_RCTL_MO_MASK);
4769 	if (mreg & E1000_RCTL_MO_3)
4770 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4771 				<< BM_RCTL_MO_SHIFT);
4772 	if (mreg & E1000_RCTL_BAM)
4773 		preg |= BM_RCTL_BAM;
4774 	if (mreg & E1000_RCTL_PMCF)
4775 		preg |= BM_RCTL_PMCF;
4776 	mreg = E1000_READ_REG(hw, E1000_CTRL);
4777 	if (mreg & E1000_CTRL_RFCE)
4778 		preg |= BM_RCTL_RFCE;
4779 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4780 
4781 	/* enable PHY wakeup in MAC register */
4782 	E1000_WRITE_REG(hw, E1000_WUC,
4783 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4784 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4785 
4786 	/* configure and enable PHY wakeup in PHY registers */
4787 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4788 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4789 
4790 	/* activate PHY wakeup */
4791 	ret = hw->phy.ops.acquire(hw);
4792 	if (ret) {
4793 		printf("Could not acquire PHY\n");
4794 		return ret;
4795 	}
4796 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4797 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4798 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4799 	if (ret) {
4800 		printf("Could not read PHY page 769\n");
4801 		goto out;
4802 	}
4803 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4804 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4805 	if (ret)
4806 		printf("Could not set PHY Host Wakeup bit\n");
4807 out:
4808 	hw->phy.ops.release(hw);
4809 
4810 	return ret;
4811 }
4812 
4813 static void
4814 em_led_func(void *arg, int onoff)
4815 {
4816 	struct adapter	*adapter = arg;
4817 
4818 	EM_CORE_LOCK(adapter);
4819 	if (onoff) {
4820 		e1000_setup_led(&adapter->hw);
4821 		e1000_led_on(&adapter->hw);
4822 	} else {
4823 		e1000_led_off(&adapter->hw);
4824 		e1000_cleanup_led(&adapter->hw);
4825 	}
4826 	EM_CORE_UNLOCK(adapter);
4827 }
4828 
4829 /**********************************************************************
4830  *
4831  *  Update the board statistics counters.
4832  *
4833  **********************************************************************/
4834 static void
4835 em_update_stats_counters(struct adapter *adapter)
4836 {
4837 	struct ifnet   *ifp;
4838 
4839 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4840 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4841 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4842 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4843 	}
4844 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4845 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4846 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4847 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4848 
4849 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4850 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4851 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4852 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4853 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4854 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4855 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4856 	/*
4857 	** For watchdog management we need to know if we have been
4858 	** paused during the last interval, so capture that here.
4859 	*/
4860 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4861 	adapter->stats.xoffrxc += adapter->pause_frames;
4862 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4863 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4864 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4865 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4866 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4867 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4868 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4869 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4870 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4871 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4872 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4873 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4874 
4875 	/* For the 64-bit byte counters the low dword must be read first. */
4876 	/* Both registers clear on the read of the high dword */
4877 
4878 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4879 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4880 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4881 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4882 
4883 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4884 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4885 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4886 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4887 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4888 
4889 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4890 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4891 
4892 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4893 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4894 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4895 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4896 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4897 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4898 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4899 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4900 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4901 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4902 
4903 	/* Interrupt Counts */
4904 
4905 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
4906 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
4907 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
4908 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
4909 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
4910 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
4911 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
4912 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
4913 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
4914 
4915 	if (adapter->hw.mac.type >= e1000_82543) {
4916 		adapter->stats.algnerrc +=
4917 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4918 		adapter->stats.rxerrc +=
4919 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4920 		adapter->stats.tncrs +=
4921 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4922 		adapter->stats.cexterr +=
4923 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4924 		adapter->stats.tsctc +=
4925 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4926 		adapter->stats.tsctfc +=
4927 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4928 	}
4929 	ifp = adapter->ifp;
4930 
4931 	ifp->if_collisions = adapter->stats.colc;
4932 
4933 	/* Rx Errors */
4934 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4935 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4936 	    adapter->stats.ruc + adapter->stats.roc +
4937 	    adapter->stats.mpc + adapter->stats.cexterr;
4938 
4939 	/* Tx Errors */
4940 	ifp->if_oerrors = adapter->stats.ecol +
4941 	    adapter->stats.latecol + adapter->watchdog_events;
4942 }
4943 
4944 /* Export a single 32-bit register via a read-only sysctl. */
4945 static int
4946 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
4947 {
4948 	struct adapter *adapter;
4949 	u_int val;
4950 
4951 	adapter = oidp->oid_arg1;
4952 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
4953 	return (sysctl_handle_int(oidp, &val, 0, req));
4954 }
4955 
4956 /*
4957  * Add sysctl variables, one per statistic, to the system.
4958  */
4959 static void
4960 em_add_hw_stats(struct adapter *adapter)
4961 {
4962 	device_t dev = adapter->dev;
4963 
4964 	struct tx_ring *txr = adapter->tx_rings;
4965 	struct rx_ring *rxr = adapter->rx_rings;
4966 
4967 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4968 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
4969 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
4970 	struct e1000_hw_stats *stats = &adapter->stats;
4971 
4972 	struct sysctl_oid *stat_node, *queue_node, *int_node;
4973 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
4974 
4975 #define QUEUE_NAME_LEN 32
4976 	char namebuf[QUEUE_NAME_LEN];
4977 
4978 	/* Driver Statistics */
4979 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
4980 			CTLFLAG_RD, &adapter->link_irq, 0,
4981 			"Link MSIX IRQ Handled");
4982 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
4983 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
4984 			 "Std mbuf failed");
4985 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
4986 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
4987 			 "Std mbuf cluster failed");
4988 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
4989 			CTLFLAG_RD, &adapter->dropped_pkts,
4990 			"Driver dropped packets");
4991 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
4992 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
4993 			"Driver tx dma failure in xmit");
4994 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
4995 			CTLFLAG_RD, &adapter->rx_overruns,
4996 			"RX overruns");
4997 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
4998 			CTLFLAG_RD, &adapter->watchdog_events,
4999 			"Watchdog timeouts");
5000 
5001 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5002 			CTLFLAG_RD, adapter, E1000_CTRL,
5003 			em_sysctl_reg_handler, "IU",
5004 			"Device Control Register");
5005 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5006 			CTLFLAG_RD, adapter, E1000_RCTL,
5007 			em_sysctl_reg_handler, "IU",
5008 			"Receiver Control Register");
5009 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5010 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5011 			"Flow Control High Watermark");
5012 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5013 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5014 			"Flow Control Low Watermark");
5015 
5016 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5017 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5018 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5019 					    CTLFLAG_RD, NULL, "Queue Name");
5020 		queue_list = SYSCTL_CHILDREN(queue_node);
5021 
5022 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5023 				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5024 				em_sysctl_reg_handler, "IU",
5025  				"Transmit Descriptor Head");
5026 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5027 				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5028 				em_sysctl_reg_handler, "IU",
5029  				"Transmit Descriptor Tail");
5030 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5031 				CTLFLAG_RD, &txr->tx_irq,
5032 				"Queue MSI-X Transmit Interrupts");
5033 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5034 				CTLFLAG_RD, &txr->no_desc_avail,
5035 				"Queue No Descriptor Available");
5036 
5037 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5038 				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5039 				em_sysctl_reg_handler, "IU",
5040 				"Receive Descriptor Head");
5041 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5042 				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5043 				em_sysctl_reg_handler, "IU",
5044 				"Receive Descriptor Tail");
5045 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5046 				CTLFLAG_RD, &rxr->rx_irq,
5047 				"Queue MSI-X Receive Interrupts");
5048 	}
5049 
5050 	/* MAC stats get their own sub node */
5051 
5052 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5053 				    CTLFLAG_RD, NULL, "Statistics");
5054 	stat_list = SYSCTL_CHILDREN(stat_node);
5055 
5056 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5057 			CTLFLAG_RD, &stats->ecol,
5058 			"Excessive collisions");
5059 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5060 			CTLFLAG_RD, &stats->scc,
5061 			"Single collisions");
5062 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5063 			CTLFLAG_RD, &stats->mcc,
5064 			"Multiple collisions");
5065 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5066 			CTLFLAG_RD, &stats->latecol,
5067 			"Late collisions");
5068 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5069 			CTLFLAG_RD, &stats->colc,
5070 			"Collision Count");
5071 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5072 			CTLFLAG_RD, &adapter->stats.symerrs,
5073 			"Symbol Errors");
5074 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5075 			CTLFLAG_RD, &adapter->stats.sec,
5076 			"Sequence Errors");
5077 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5078 			CTLFLAG_RD, &adapter->stats.dc,
5079 			"Defer Count");
5080 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5081 			CTLFLAG_RD, &adapter->stats.mpc,
5082 			"Missed Packets");
5083 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5084 			CTLFLAG_RD, &adapter->stats.rnbc,
5085 			"Receive No Buffers");
5086 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5087 			CTLFLAG_RD, &adapter->stats.ruc,
5088 			"Receive Undersize");
5089 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5090 			CTLFLAG_RD, &adapter->stats.rfc,
5091 			"Fragmented Packets Received ");
5092 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5093 			CTLFLAG_RD, &adapter->stats.roc,
5094 			"Oversized Packets Received");
5095 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5096 			CTLFLAG_RD, &adapter->stats.rjc,
5097 			"Recevied Jabber");
5098 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5099 			CTLFLAG_RD, &adapter->stats.rxerrc,
5100 			"Receive Errors");
5101 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5102 			CTLFLAG_RD, &adapter->stats.crcerrs,
5103 			"CRC errors");
5104 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5105 			CTLFLAG_RD, &adapter->stats.algnerrc,
5106 			"Alignment Errors");
5107 	/* On 82575 these are collision counts */
5108 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5109 			CTLFLAG_RD, &adapter->stats.cexterr,
5110 			"Collision/Carrier extension errors");
5111 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5112 			CTLFLAG_RD, &adapter->stats.xonrxc,
5113 			"XON Received");
5114 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5115 			CTLFLAG_RD, &adapter->stats.xontxc,
5116 			"XON Transmitted");
5117 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5118 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5119 			"XOFF Received");
5120 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5121 			CTLFLAG_RD, &adapter->stats.xofftxc,
5122 			"XOFF Transmitted");
5123 
5124 	/* Packet Reception Stats */
5125 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5126 			CTLFLAG_RD, &adapter->stats.tpr,
5127 			"Total Packets Received ");
5128 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5129 			CTLFLAG_RD, &adapter->stats.gprc,
5130 			"Good Packets Received");
5131 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5132 			CTLFLAG_RD, &adapter->stats.bprc,
5133 			"Broadcast Packets Received");
5134 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5135 			CTLFLAG_RD, &adapter->stats.mprc,
5136 			"Multicast Packets Received");
5137 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5138 			CTLFLAG_RD, &adapter->stats.prc64,
5139 			"64 byte frames received ");
5140 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5141 			CTLFLAG_RD, &adapter->stats.prc127,
5142 			"65-127 byte frames received");
5143 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5144 			CTLFLAG_RD, &adapter->stats.prc255,
5145 			"128-255 byte frames received");
5146 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5147 			CTLFLAG_RD, &adapter->stats.prc511,
5148 			"256-511 byte frames received");
5149 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5150 			CTLFLAG_RD, &adapter->stats.prc1023,
5151 			"512-1023 byte frames received");
5152 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5153 			CTLFLAG_RD, &adapter->stats.prc1522,
5154 			"1023-1522 byte frames received");
5155  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5156  			CTLFLAG_RD, &adapter->stats.gorc,
5157  			"Good Octets Received");
5158 
5159 	/* Packet Transmission Stats */
5160  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5161  			CTLFLAG_RD, &adapter->stats.gotc,
5162  			"Good Octets Transmitted");
5163 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5164 			CTLFLAG_RD, &adapter->stats.tpt,
5165 			"Total Packets Transmitted");
5166 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5167 			CTLFLAG_RD, &adapter->stats.gptc,
5168 			"Good Packets Transmitted");
5169 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5170 			CTLFLAG_RD, &adapter->stats.bptc,
5171 			"Broadcast Packets Transmitted");
5172 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5173 			CTLFLAG_RD, &adapter->stats.mptc,
5174 			"Multicast Packets Transmitted");
5175 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5176 			CTLFLAG_RD, &adapter->stats.ptc64,
5177 			"64 byte frames transmitted ");
5178 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5179 			CTLFLAG_RD, &adapter->stats.ptc127,
5180 			"65-127 byte frames transmitted");
5181 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5182 			CTLFLAG_RD, &adapter->stats.ptc255,
5183 			"128-255 byte frames transmitted");
5184 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5185 			CTLFLAG_RD, &adapter->stats.ptc511,
5186 			"256-511 byte frames transmitted");
5187 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5188 			CTLFLAG_RD, &adapter->stats.ptc1023,
5189 			"512-1023 byte frames transmitted");
5190 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5191 			CTLFLAG_RD, &adapter->stats.ptc1522,
5192 			"1024-1522 byte frames transmitted");
5193 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5194 			CTLFLAG_RD, &adapter->stats.tsctc,
5195 			"TSO Contexts Transmitted");
5196 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5197 			CTLFLAG_RD, &adapter->stats.tsctfc,
5198 			"TSO Contexts Failed");
5199 
5200 
5201 	/* Interrupt Stats */
5202 
5203 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5204 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5205 	int_list = SYSCTL_CHILDREN(int_node);
5206 
5207 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5208 			CTLFLAG_RD, &adapter->stats.iac,
5209 			"Interrupt Assertion Count");
5210 
5211 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5212 			CTLFLAG_RD, &adapter->stats.icrxptc,
5213 			"Interrupt Cause Rx Pkt Timer Expire Count");
5214 
5215 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5216 			CTLFLAG_RD, &adapter->stats.icrxatc,
5217 			"Interrupt Cause Rx Abs Timer Expire Count");
5218 
5219 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5220 			CTLFLAG_RD, &adapter->stats.ictxptc,
5221 			"Interrupt Cause Tx Pkt Timer Expire Count");
5222 
5223 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5224 			CTLFLAG_RD, &adapter->stats.ictxatc,
5225 			"Interrupt Cause Tx Abs Timer Expire Count");
5226 
5227 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5228 			CTLFLAG_RD, &adapter->stats.ictxqec,
5229 			"Interrupt Cause Tx Queue Empty Count");
5230 
5231 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5232 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5233 			"Interrupt Cause Tx Queue Min Thresh Count");
5234 
5235 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5236 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5237 			"Interrupt Cause Rx Desc Min Thresh Count");
5238 
5239 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5240 			CTLFLAG_RD, &adapter->stats.icrxoc,
5241 			"Interrupt Cause Receiver Overrun Count");
5242 }
5243 
5244 /**********************************************************************
5245  *
5246  *  This routine provides a way to dump out the adapter eeprom,
5247  *  often a useful debug/service tool. This only dumps the first
5248  *  32 words, stuff that matters is in that extent.
5249  *
5250  **********************************************************************/
5251 static int
5252 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5253 {
5254 	struct adapter *adapter;
5255 	int error;
5256 	int result;
5257 
5258 	result = -1;
5259 	error = sysctl_handle_int(oidp, &result, 0, req);
5260 
5261 	if (error || !req->newptr)
5262 		return (error);
5263 
5264 	/*
5265 	 * This value will cause a hex dump of the
5266 	 * first 32 16-bit words of the EEPROM to
5267 	 * the screen.
5268 	 */
5269 	if (result == 1) {
5270 		adapter = (struct adapter *)arg1;
5271 		em_print_nvm_info(adapter);
5272         }
5273 
5274 	return (error);
5275 }
5276 
5277 static void
5278 em_print_nvm_info(struct adapter *adapter)
5279 {
5280 	u16	eeprom_data;
5281 	int	i, j, row = 0;
5282 
5283 	/* Its a bit crude, but it gets the job done */
5284 	printf("\nInterface EEPROM Dump:\n");
5285 	printf("Offset\n0x0000  ");
5286 	for (i = 0, j = 0; i < 32; i++, j++) {
5287 		if (j == 8) { /* Make the offset block */
5288 			j = 0; ++row;
5289 			printf("\n0x00%x0  ",row);
5290 		}
5291 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5292 		printf("%04x ", eeprom_data);
5293 	}
5294 	printf("\n");
5295 }
5296 
5297 static int
5298 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5299 {
5300 	struct em_int_delay_info *info;
5301 	struct adapter *adapter;
5302 	u32 regval;
5303 	int error, usecs, ticks;
5304 
5305 	info = (struct em_int_delay_info *)arg1;
5306 	usecs = info->value;
5307 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5308 	if (error != 0 || req->newptr == NULL)
5309 		return (error);
5310 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5311 		return (EINVAL);
5312 	info->value = usecs;
5313 	ticks = EM_USECS_TO_TICKS(usecs);
5314 
5315 	adapter = info->adapter;
5316 
5317 	EM_CORE_LOCK(adapter);
5318 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5319 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5320 	/* Handle a few special cases. */
5321 	switch (info->offset) {
5322 	case E1000_RDTR:
5323 		break;
5324 	case E1000_TIDV:
5325 		if (ticks == 0) {
5326 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5327 			/* Don't write 0 into the TIDV register. */
5328 			regval++;
5329 		} else
5330 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5331 		break;
5332 	}
5333 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5334 	EM_CORE_UNLOCK(adapter);
5335 	return (0);
5336 }
5337 
5338 static void
5339 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5340 	const char *description, struct em_int_delay_info *info,
5341 	int offset, int value)
5342 {
5343 	info->adapter = adapter;
5344 	info->offset = offset;
5345 	info->value = value;
5346 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5347 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5348 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5349 	    info, 0, em_sysctl_int_delay, "I", description);
5350 }
5351 
5352 static void
5353 em_add_rx_process_limit(struct adapter *adapter, const char *name,
5354 	const char *description, int *limit, int value)
5355 {
5356 	*limit = value;
5357 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5358 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5359 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5360 }
5361 
5362 
5363