xref: /freebsd/sys/dev/e1000/if_em.c (revision 70e0bbedef95258a4dadc996d641a9bebd3f107d)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2011, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71 
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79 
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84 
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88 
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int	em_display_debug_stats = 0;
93 
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98 
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108 
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111 	/* Intel(R) PRO/1000 Network Connection */
112 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116 						PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131 
132 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137 						PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	/* required last entry */
176 	{ 0, 0, 0, 0, 0}
177 };
178 
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182 
183 static char *em_strings[] = {
184 	"Intel(R) PRO/1000 Network Connection"
185 };
186 
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int	em_probe(device_t);
191 static int	em_attach(device_t);
192 static int	em_detach(device_t);
193 static int	em_shutdown(device_t);
194 static int	em_suspend(device_t);
195 static int	em_resume(device_t);
196 static void	em_start(struct ifnet *);
197 static void	em_start_locked(struct ifnet *, struct tx_ring *);
198 #ifdef EM_MULTIQUEUE
199 static int	em_mq_start(struct ifnet *, struct mbuf *);
200 static int	em_mq_start_locked(struct ifnet *,
201 		    struct tx_ring *, struct mbuf *);
202 static void	em_qflush(struct ifnet *);
203 #endif
204 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
205 static void	em_init(void *);
206 static void	em_init_locked(struct adapter *);
207 static void	em_stop(void *);
208 static void	em_media_status(struct ifnet *, struct ifmediareq *);
209 static int	em_media_change(struct ifnet *);
210 static void	em_identify_hardware(struct adapter *);
211 static int	em_allocate_pci_resources(struct adapter *);
212 static int	em_allocate_legacy(struct adapter *);
213 static int	em_allocate_msix(struct adapter *);
214 static int	em_allocate_queues(struct adapter *);
215 static int	em_setup_msix(struct adapter *);
216 static void	em_free_pci_resources(struct adapter *);
217 static void	em_local_timer(void *);
218 static void	em_reset(struct adapter *);
219 static int	em_setup_interface(device_t, struct adapter *);
220 
221 static void	em_setup_transmit_structures(struct adapter *);
222 static void	em_initialize_transmit_unit(struct adapter *);
223 static int	em_allocate_transmit_buffers(struct tx_ring *);
224 static void	em_free_transmit_structures(struct adapter *);
225 static void	em_free_transmit_buffers(struct tx_ring *);
226 
227 static int	em_setup_receive_structures(struct adapter *);
228 static int	em_allocate_receive_buffers(struct rx_ring *);
229 static void	em_initialize_receive_unit(struct adapter *);
230 static void	em_free_receive_structures(struct adapter *);
231 static void	em_free_receive_buffers(struct rx_ring *);
232 
233 static void	em_enable_intr(struct adapter *);
234 static void	em_disable_intr(struct adapter *);
235 static void	em_update_stats_counters(struct adapter *);
236 static void	em_add_hw_stats(struct adapter *adapter);
237 static bool	em_txeof(struct tx_ring *);
238 static bool	em_rxeof(struct rx_ring *, int, int *);
239 #ifndef __NO_STRICT_ALIGNMENT
240 static int	em_fixup_rx(struct rx_ring *);
241 #endif
242 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
243 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
244 		    struct ip *, u32 *, u32 *);
245 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
246 		    struct tcphdr *, u32 *, u32 *);
247 static void	em_set_promisc(struct adapter *);
248 static void	em_disable_promisc(struct adapter *);
249 static void	em_set_multi(struct adapter *);
250 static void	em_update_link_status(struct adapter *);
251 static void	em_refresh_mbufs(struct rx_ring *, int);
252 static void	em_register_vlan(void *, struct ifnet *, u16);
253 static void	em_unregister_vlan(void *, struct ifnet *, u16);
254 static void	em_setup_vlan_hw_support(struct adapter *);
255 static int	em_xmit(struct tx_ring *, struct mbuf **);
256 static int	em_dma_malloc(struct adapter *, bus_size_t,
257 		    struct em_dma_alloc *, int);
258 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
259 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
260 static void	em_print_nvm_info(struct adapter *);
261 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
262 static void	em_print_debug_info(struct adapter *);
263 static int 	em_is_valid_ether_addr(u8 *);
264 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
265 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
266 		    const char *, struct em_int_delay_info *, int, int);
267 /* Management and WOL Support */
268 static void	em_init_manageability(struct adapter *);
269 static void	em_release_manageability(struct adapter *);
270 static void     em_get_hw_control(struct adapter *);
271 static void     em_release_hw_control(struct adapter *);
272 static void	em_get_wakeup(device_t);
273 static void     em_enable_wakeup(device_t);
274 static int	em_enable_phy_wakeup(struct adapter *);
275 static void	em_led_func(void *, int);
276 static void	em_disable_aspm(struct adapter *);
277 
278 static int	em_irq_fast(void *);
279 
280 /* MSIX handlers */
281 static void	em_msix_tx(void *);
282 static void	em_msix_rx(void *);
283 static void	em_msix_link(void *);
284 static void	em_handle_tx(void *context, int pending);
285 static void	em_handle_rx(void *context, int pending);
286 static void	em_handle_link(void *context, int pending);
287 
288 static void	em_set_sysctl_value(struct adapter *, const char *,
289 		    const char *, int *, int);
290 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
291 
292 static __inline void em_rx_discard(struct rx_ring *, int);
293 
294 #ifdef DEVICE_POLLING
295 static poll_handler_t em_poll;
296 #endif /* POLLING */
297 
298 /*********************************************************************
299  *  FreeBSD Device Interface Entry Points
300  *********************************************************************/
301 
302 static device_method_t em_methods[] = {
303 	/* Device interface */
304 	DEVMETHOD(device_probe, em_probe),
305 	DEVMETHOD(device_attach, em_attach),
306 	DEVMETHOD(device_detach, em_detach),
307 	DEVMETHOD(device_shutdown, em_shutdown),
308 	DEVMETHOD(device_suspend, em_suspend),
309 	DEVMETHOD(device_resume, em_resume),
310 	{0, 0}
311 };
312 
313 static driver_t em_driver = {
314 	"em", em_methods, sizeof(struct adapter),
315 };
316 
317 devclass_t em_devclass;
318 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
319 MODULE_DEPEND(em, pci, 1, 1, 1);
320 MODULE_DEPEND(em, ether, 1, 1, 1);
321 
322 /*********************************************************************
323  *  Tunable default values.
324  *********************************************************************/
325 
326 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
327 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
328 #define M_TSO_LEN			66
329 
330 /* Allow common code without TSO */
331 #ifndef CSUM_TSO
332 #define CSUM_TSO	0
333 #endif
334 
335 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
336 
337 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
338 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
339 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
340 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
341 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
342     0, "Default transmit interrupt delay in usecs");
343 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
344     0, "Default receive interrupt delay in usecs");
345 
346 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
347 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
348 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
349 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
350 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
351     &em_tx_abs_int_delay_dflt, 0,
352     "Default transmit interrupt delay limit in usecs");
353 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
354     &em_rx_abs_int_delay_dflt, 0,
355     "Default receive interrupt delay limit in usecs");
356 
357 static int em_rxd = EM_DEFAULT_RXD;
358 static int em_txd = EM_DEFAULT_TXD;
359 TUNABLE_INT("hw.em.rxd", &em_rxd);
360 TUNABLE_INT("hw.em.txd", &em_txd);
361 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
362     "Number of receive descriptors per queue");
363 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
364     "Number of transmit descriptors per queue");
365 
366 static int em_smart_pwr_down = FALSE;
367 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
368 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
369     0, "Set to true to leave smart power down enabled on newer adapters");
370 
371 /* Controls whether promiscuous also shows bad packets */
372 static int em_debug_sbp = FALSE;
373 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
374 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
375     "Show bad packets in promiscuous mode");
376 
377 static int em_enable_msix = TRUE;
378 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
379 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
380     "Enable MSI-X interrupts");
381 
382 /* How many packets rxeof tries to clean at a time */
383 static int em_rx_process_limit = 100;
384 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
385 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
386     &em_rx_process_limit, 0,
387     "Maximum number of received packets to process "
388     "at a time, -1 means unlimited");
389 
390 /* Energy efficient ethernet - default to OFF */
391 static int eee_setting = 0;
392 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
393 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
394     "Enable Energy Efficient Ethernet");
395 
396 /* Global used in WOL setup with multiport cards */
397 static int global_quad_port_a = 0;
398 
399 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
400 #include <dev/netmap/if_em_netmap.h>
401 #endif /* DEV_NETMAP */
402 
403 /*********************************************************************
404  *  Device identification routine
405  *
406  *  em_probe determines if the driver should be loaded on
407  *  adapter based on PCI vendor/device id of the adapter.
408  *
409  *  return BUS_PROBE_DEFAULT on success, positive on failure
410  *********************************************************************/
411 
412 static int
413 em_probe(device_t dev)
414 {
415 	char		adapter_name[60];
416 	u16		pci_vendor_id = 0;
417 	u16		pci_device_id = 0;
418 	u16		pci_subvendor_id = 0;
419 	u16		pci_subdevice_id = 0;
420 	em_vendor_info_t *ent;
421 
422 	INIT_DEBUGOUT("em_probe: begin");
423 
424 	pci_vendor_id = pci_get_vendor(dev);
425 	if (pci_vendor_id != EM_VENDOR_ID)
426 		return (ENXIO);
427 
428 	pci_device_id = pci_get_device(dev);
429 	pci_subvendor_id = pci_get_subvendor(dev);
430 	pci_subdevice_id = pci_get_subdevice(dev);
431 
432 	ent = em_vendor_info_array;
433 	while (ent->vendor_id != 0) {
434 		if ((pci_vendor_id == ent->vendor_id) &&
435 		    (pci_device_id == ent->device_id) &&
436 
437 		    ((pci_subvendor_id == ent->subvendor_id) ||
438 		    (ent->subvendor_id == PCI_ANY_ID)) &&
439 
440 		    ((pci_subdevice_id == ent->subdevice_id) ||
441 		    (ent->subdevice_id == PCI_ANY_ID))) {
442 			sprintf(adapter_name, "%s %s",
443 				em_strings[ent->index],
444 				em_driver_version);
445 			device_set_desc_copy(dev, adapter_name);
446 			return (BUS_PROBE_DEFAULT);
447 		}
448 		ent++;
449 	}
450 
451 	return (ENXIO);
452 }
453 
454 /*********************************************************************
455  *  Device initialization routine
456  *
457  *  The attach entry point is called when the driver is being loaded.
458  *  This routine identifies the type of hardware, allocates all resources
459  *  and initializes the hardware.
460  *
461  *  return 0 on success, positive on failure
462  *********************************************************************/
463 
464 static int
465 em_attach(device_t dev)
466 {
467 	struct adapter	*adapter;
468 	struct e1000_hw	*hw;
469 	int		error = 0;
470 
471 	INIT_DEBUGOUT("em_attach: begin");
472 
473 	if (resource_disabled("em", device_get_unit(dev))) {
474 		device_printf(dev, "Disabled by device hint\n");
475 		return (ENXIO);
476 	}
477 
478 	adapter = device_get_softc(dev);
479 	adapter->dev = adapter->osdep.dev = dev;
480 	hw = &adapter->hw;
481 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
482 
483 	/* SYSCTL stuff */
484 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
485 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
486 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
487 	    em_sysctl_nvm_info, "I", "NVM Information");
488 
489 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
490 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
491 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
492 	    em_sysctl_debug_info, "I", "Debug Information");
493 
494 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
495 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
496 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
497 	    em_set_flowcntl, "I", "Flow Control");
498 
499 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
500 
501 	/* Determine hardware and mac info */
502 	em_identify_hardware(adapter);
503 
504 	/* Setup PCI resources */
505 	if (em_allocate_pci_resources(adapter)) {
506 		device_printf(dev, "Allocation of PCI resources failed\n");
507 		error = ENXIO;
508 		goto err_pci;
509 	}
510 
511 	/*
512 	** For ICH8 and family we need to
513 	** map the flash memory, and this
514 	** must happen after the MAC is
515 	** identified
516 	*/
517 	if ((hw->mac.type == e1000_ich8lan) ||
518 	    (hw->mac.type == e1000_ich9lan) ||
519 	    (hw->mac.type == e1000_ich10lan) ||
520 	    (hw->mac.type == e1000_pchlan) ||
521 	    (hw->mac.type == e1000_pch2lan)) {
522 		int rid = EM_BAR_TYPE_FLASH;
523 		adapter->flash = bus_alloc_resource_any(dev,
524 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
525 		if (adapter->flash == NULL) {
526 			device_printf(dev, "Mapping of Flash failed\n");
527 			error = ENXIO;
528 			goto err_pci;
529 		}
530 		/* This is used in the shared code */
531 		hw->flash_address = (u8 *)adapter->flash;
532 		adapter->osdep.flash_bus_space_tag =
533 		    rman_get_bustag(adapter->flash);
534 		adapter->osdep.flash_bus_space_handle =
535 		    rman_get_bushandle(adapter->flash);
536 	}
537 
538 	/* Do Shared Code initialization */
539 	if (e1000_setup_init_funcs(hw, TRUE)) {
540 		device_printf(dev, "Setup of Shared code failed\n");
541 		error = ENXIO;
542 		goto err_pci;
543 	}
544 
545 	e1000_get_bus_info(hw);
546 
547 	/* Set up some sysctls for the tunable interrupt delays */
548 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
549 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
550 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
551 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
552 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
553 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
554 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
555 	    "receive interrupt delay limit in usecs",
556 	    &adapter->rx_abs_int_delay,
557 	    E1000_REGISTER(hw, E1000_RADV),
558 	    em_rx_abs_int_delay_dflt);
559 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
560 	    "transmit interrupt delay limit in usecs",
561 	    &adapter->tx_abs_int_delay,
562 	    E1000_REGISTER(hw, E1000_TADV),
563 	    em_tx_abs_int_delay_dflt);
564 
565 	/* Sysctl for limiting the amount of work done in the taskqueue */
566 	em_set_sysctl_value(adapter, "rx_processing_limit",
567 	    "max number of rx packets to process", &adapter->rx_process_limit,
568 	    em_rx_process_limit);
569 
570 	/*
571 	 * Validate number of transmit and receive descriptors. It
572 	 * must not exceed hardware maximum, and must be multiple
573 	 * of E1000_DBA_ALIGN.
574 	 */
575 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
576 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
577 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
578 		    EM_DEFAULT_TXD, em_txd);
579 		adapter->num_tx_desc = EM_DEFAULT_TXD;
580 	} else
581 		adapter->num_tx_desc = em_txd;
582 
583 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
584 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
585 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
586 		    EM_DEFAULT_RXD, em_rxd);
587 		adapter->num_rx_desc = EM_DEFAULT_RXD;
588 	} else
589 		adapter->num_rx_desc = em_rxd;
590 
591 	hw->mac.autoneg = DO_AUTO_NEG;
592 	hw->phy.autoneg_wait_to_complete = FALSE;
593 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
594 
595 	/* Copper options */
596 	if (hw->phy.media_type == e1000_media_type_copper) {
597 		hw->phy.mdix = AUTO_ALL_MODES;
598 		hw->phy.disable_polarity_correction = FALSE;
599 		hw->phy.ms_type = EM_MASTER_SLAVE;
600 	}
601 
602 	/*
603 	 * Set the frame limits assuming
604 	 * standard ethernet sized frames.
605 	 */
606 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
607 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
608 
609 	/*
610 	 * This controls when hardware reports transmit completion
611 	 * status.
612 	 */
613 	hw->mac.report_tx_early = 1;
614 
615 	/*
616 	** Get queue/ring memory
617 	*/
618 	if (em_allocate_queues(adapter)) {
619 		error = ENOMEM;
620 		goto err_pci;
621 	}
622 
623 	/* Allocate multicast array memory. */
624 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
625 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
626 	if (adapter->mta == NULL) {
627 		device_printf(dev, "Can not allocate multicast setup array\n");
628 		error = ENOMEM;
629 		goto err_late;
630 	}
631 
632 	/* Check SOL/IDER usage */
633 	if (e1000_check_reset_block(hw))
634 		device_printf(dev, "PHY reset is blocked"
635 		    " due to SOL/IDER session.\n");
636 
637 	/* Sysctl for setting Energy Efficient Ethernet */
638 	em_set_sysctl_value(adapter, "eee_control",
639 	    "enable Energy Efficient Ethernet",
640 	    &hw->dev_spec.ich8lan.eee_disable, eee_setting);
641 
642 	/*
643 	** Start from a known state, this is
644 	** important in reading the nvm and
645 	** mac from that.
646 	*/
647 	e1000_reset_hw(hw);
648 
649 
650 	/* Make sure we have a good EEPROM before we read from it */
651 	if (e1000_validate_nvm_checksum(hw) < 0) {
652 		/*
653 		** Some PCI-E parts fail the first check due to
654 		** the link being in sleep state, call it again,
655 		** if it fails a second time its a real issue.
656 		*/
657 		if (e1000_validate_nvm_checksum(hw) < 0) {
658 			device_printf(dev,
659 			    "The EEPROM Checksum Is Not Valid\n");
660 			error = EIO;
661 			goto err_late;
662 		}
663 	}
664 
665 	/* Copy the permanent MAC address out of the EEPROM */
666 	if (e1000_read_mac_addr(hw) < 0) {
667 		device_printf(dev, "EEPROM read error while reading MAC"
668 		    " address\n");
669 		error = EIO;
670 		goto err_late;
671 	}
672 
673 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
674 		device_printf(dev, "Invalid MAC address\n");
675 		error = EIO;
676 		goto err_late;
677 	}
678 
679 	/*
680 	**  Do interrupt configuration
681 	*/
682 	if (adapter->msix > 1) /* Do MSIX */
683 		error = em_allocate_msix(adapter);
684 	else  /* MSI or Legacy */
685 		error = em_allocate_legacy(adapter);
686 	if (error)
687 		goto err_late;
688 
689 	/*
690 	 * Get Wake-on-Lan and Management info for later use
691 	 */
692 	em_get_wakeup(dev);
693 
694 	/* Setup OS specific network interface */
695 	if (em_setup_interface(dev, adapter) != 0)
696 		goto err_late;
697 
698 	em_reset(adapter);
699 
700 	/* Initialize statistics */
701 	em_update_stats_counters(adapter);
702 
703 	hw->mac.get_link_status = 1;
704 	em_update_link_status(adapter);
705 
706 	/* Register for VLAN events */
707 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
708 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
709 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
710 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
711 
712 	em_add_hw_stats(adapter);
713 
714 	/* Non-AMT based hardware can now take control from firmware */
715 	if (adapter->has_manage && !adapter->has_amt)
716 		em_get_hw_control(adapter);
717 
718 	/* Tell the stack that the interface is not active */
719 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
720 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
721 
722 	adapter->led_dev = led_create(em_led_func, adapter,
723 	    device_get_nameunit(dev));
724 #ifdef DEV_NETMAP
725 	em_netmap_attach(adapter);
726 #endif /* DEV_NETMAP */
727 
728 	INIT_DEBUGOUT("em_attach: end");
729 
730 	return (0);
731 
732 err_late:
733 	em_free_transmit_structures(adapter);
734 	em_free_receive_structures(adapter);
735 	em_release_hw_control(adapter);
736 	if (adapter->ifp != NULL)
737 		if_free(adapter->ifp);
738 err_pci:
739 	em_free_pci_resources(adapter);
740 	free(adapter->mta, M_DEVBUF);
741 	EM_CORE_LOCK_DESTROY(adapter);
742 
743 	return (error);
744 }
745 
746 /*********************************************************************
747  *  Device removal routine
748  *
749  *  The detach entry point is called when the driver is being removed.
750  *  This routine stops the adapter and deallocates all the resources
751  *  that were allocated for driver operation.
752  *
753  *  return 0 on success, positive on failure
754  *********************************************************************/
755 
756 static int
757 em_detach(device_t dev)
758 {
759 	struct adapter	*adapter = device_get_softc(dev);
760 	struct ifnet	*ifp = adapter->ifp;
761 
762 	INIT_DEBUGOUT("em_detach: begin");
763 
764 	/* Make sure VLANS are not using driver */
765 	if (adapter->ifp->if_vlantrunk != NULL) {
766 		device_printf(dev,"Vlan in use, detach first\n");
767 		return (EBUSY);
768 	}
769 
770 #ifdef DEVICE_POLLING
771 	if (ifp->if_capenable & IFCAP_POLLING)
772 		ether_poll_deregister(ifp);
773 #endif
774 
775 	if (adapter->led_dev != NULL)
776 		led_destroy(adapter->led_dev);
777 
778 	EM_CORE_LOCK(adapter);
779 	adapter->in_detach = 1;
780 	em_stop(adapter);
781 	EM_CORE_UNLOCK(adapter);
782 	EM_CORE_LOCK_DESTROY(adapter);
783 
784 	e1000_phy_hw_reset(&adapter->hw);
785 
786 	em_release_manageability(adapter);
787 	em_release_hw_control(adapter);
788 
789 	/* Unregister VLAN events */
790 	if (adapter->vlan_attach != NULL)
791 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
792 	if (adapter->vlan_detach != NULL)
793 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
794 
795 	ether_ifdetach(adapter->ifp);
796 	callout_drain(&adapter->timer);
797 
798 #ifdef DEV_NETMAP
799 	netmap_detach(ifp);
800 #endif /* DEV_NETMAP */
801 
802 	em_free_pci_resources(adapter);
803 	bus_generic_detach(dev);
804 	if_free(ifp);
805 
806 	em_free_transmit_structures(adapter);
807 	em_free_receive_structures(adapter);
808 
809 	em_release_hw_control(adapter);
810 	free(adapter->mta, M_DEVBUF);
811 
812 	return (0);
813 }
814 
815 /*********************************************************************
816  *
817  *  Shutdown entry point
818  *
819  **********************************************************************/
820 
821 static int
822 em_shutdown(device_t dev)
823 {
824 	return em_suspend(dev);
825 }
826 
827 /*
828  * Suspend/resume device methods.
829  */
830 static int
831 em_suspend(device_t dev)
832 {
833 	struct adapter *adapter = device_get_softc(dev);
834 
835 	EM_CORE_LOCK(adapter);
836 
837         em_release_manageability(adapter);
838 	em_release_hw_control(adapter);
839 	em_enable_wakeup(dev);
840 
841 	EM_CORE_UNLOCK(adapter);
842 
843 	return bus_generic_suspend(dev);
844 }
845 
846 static int
847 em_resume(device_t dev)
848 {
849 	struct adapter *adapter = device_get_softc(dev);
850 	struct ifnet *ifp = adapter->ifp;
851 
852 	EM_CORE_LOCK(adapter);
853 	if (adapter->hw.mac.type == e1000_pch2lan)
854 		e1000_resume_workarounds_pchlan(&adapter->hw);
855 	em_init_locked(adapter);
856 	em_init_manageability(adapter);
857 	EM_CORE_UNLOCK(adapter);
858 	em_start(ifp);
859 
860 	return bus_generic_resume(dev);
861 }
862 
863 
864 #ifdef EM_MULTIQUEUE
865 /*********************************************************************
866  *  Multiqueue Transmit routines
867  *
868  *  em_mq_start is called by the stack to initiate a transmit.
869  *  however, if busy the driver can queue the request rather
870  *  than do an immediate send. It is this that is an advantage
871  *  in this driver, rather than also having multiple tx queues.
872  **********************************************************************/
873 static int
874 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
875 {
876 	struct adapter  *adapter = txr->adapter;
877         struct mbuf     *next;
878         int             err = 0, enq = 0;
879 
880 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
881 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
882 		if (m != NULL)
883 			err = drbr_enqueue(ifp, txr->br, m);
884 		return (err);
885 	}
886 
887 	enq = 0;
888 	if (m == NULL) {
889 		next = drbr_dequeue(ifp, txr->br);
890 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
891 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
892 			return (err);
893 		next = drbr_dequeue(ifp, txr->br);
894 	} else
895 		next = m;
896 
897 	/* Process the queue */
898 	while (next != NULL) {
899 		if ((err = em_xmit(txr, &next)) != 0) {
900                         if (next != NULL)
901                                 err = drbr_enqueue(ifp, txr->br, next);
902                         break;
903 		}
904 		enq++;
905 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
906 		ETHER_BPF_MTAP(ifp, next);
907 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
908                         break;
909 		next = drbr_dequeue(ifp, txr->br);
910 	}
911 
912 	if (enq > 0) {
913                 /* Set the watchdog */
914                 txr->queue_status = EM_QUEUE_WORKING;
915 		txr->watchdog_time = ticks;
916 	}
917 
918 	if (txr->tx_avail < EM_MAX_SCATTER)
919 		em_txeof(txr);
920 	if (txr->tx_avail < EM_MAX_SCATTER)
921 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
922 	return (err);
923 }
924 
925 /*
926 ** Multiqueue capable stack interface
927 */
928 static int
929 em_mq_start(struct ifnet *ifp, struct mbuf *m)
930 {
931 	struct adapter	*adapter = ifp->if_softc;
932 	struct tx_ring	*txr = adapter->tx_rings;
933 	int 		error;
934 
935 	if (EM_TX_TRYLOCK(txr)) {
936 		error = em_mq_start_locked(ifp, txr, m);
937 		EM_TX_UNLOCK(txr);
938 	} else
939 		error = drbr_enqueue(ifp, txr->br, m);
940 
941 	return (error);
942 }
943 
944 /*
945 ** Flush all ring buffers
946 */
947 static void
948 em_qflush(struct ifnet *ifp)
949 {
950 	struct adapter  *adapter = ifp->if_softc;
951 	struct tx_ring  *txr = adapter->tx_rings;
952 	struct mbuf     *m;
953 
954 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
955 		EM_TX_LOCK(txr);
956 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
957 			m_freem(m);
958 		EM_TX_UNLOCK(txr);
959 	}
960 	if_qflush(ifp);
961 }
962 #endif /* EM_MULTIQUEUE */
963 
964 static void
965 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
966 {
967 	struct adapter	*adapter = ifp->if_softc;
968 	struct mbuf	*m_head;
969 
970 	EM_TX_LOCK_ASSERT(txr);
971 
972 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
973 	    IFF_DRV_RUNNING)
974 		return;
975 
976 	if (!adapter->link_active)
977 		return;
978 
979 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
980         	/* Call cleanup if number of TX descriptors low */
981 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
982 			em_txeof(txr);
983 		if (txr->tx_avail < EM_MAX_SCATTER) {
984 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
985 			break;
986 		}
987                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
988 		if (m_head == NULL)
989 			break;
990 		/*
991 		 *  Encapsulation can modify our pointer, and or make it
992 		 *  NULL on failure.  In that event, we can't requeue.
993 		 */
994 		if (em_xmit(txr, &m_head)) {
995 			if (m_head == NULL)
996 				break;
997 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
998 			break;
999 		}
1000 
1001 		/* Send a copy of the frame to the BPF listener */
1002 		ETHER_BPF_MTAP(ifp, m_head);
1003 
1004 		/* Set timeout in case hardware has problems transmitting. */
1005 		txr->watchdog_time = ticks;
1006                 txr->queue_status = EM_QUEUE_WORKING;
1007 	}
1008 
1009 	return;
1010 }
1011 
1012 static void
1013 em_start(struct ifnet *ifp)
1014 {
1015 	struct adapter	*adapter = ifp->if_softc;
1016 	struct tx_ring	*txr = adapter->tx_rings;
1017 
1018 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1019 		EM_TX_LOCK(txr);
1020 		em_start_locked(ifp, txr);
1021 		EM_TX_UNLOCK(txr);
1022 	}
1023 	/*
1024 	** If we went inactive schedule
1025 	** a task to clean up.
1026 	*/
1027 	if (ifp->if_drv_flags & IFF_DRV_OACTIVE)
1028 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1029 	return;
1030 }
1031 
1032 /*********************************************************************
1033  *  Ioctl entry point
1034  *
1035  *  em_ioctl is called when the user wants to configure the
1036  *  interface.
1037  *
1038  *  return 0 on success, positive on failure
1039  **********************************************************************/
1040 
1041 static int
1042 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1043 {
1044 	struct adapter	*adapter = ifp->if_softc;
1045 	struct ifreq	*ifr = (struct ifreq *)data;
1046 #if defined(INET) || defined(INET6)
1047 	struct ifaddr	*ifa = (struct ifaddr *)data;
1048 #endif
1049 	bool		avoid_reset = FALSE;
1050 	int		error = 0;
1051 
1052 	if (adapter->in_detach)
1053 		return (error);
1054 
1055 	switch (command) {
1056 	case SIOCSIFADDR:
1057 #ifdef INET
1058 		if (ifa->ifa_addr->sa_family == AF_INET)
1059 			avoid_reset = TRUE;
1060 #endif
1061 #ifdef INET6
1062 		if (ifa->ifa_addr->sa_family == AF_INET6)
1063 			avoid_reset = TRUE;
1064 #endif
1065 		/*
1066 		** Calling init results in link renegotiation,
1067 		** so we avoid doing it when possible.
1068 		*/
1069 		if (avoid_reset) {
1070 			ifp->if_flags |= IFF_UP;
1071 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1072 				em_init(adapter);
1073 #ifdef INET
1074 			if (!(ifp->if_flags & IFF_NOARP))
1075 				arp_ifinit(ifp, ifa);
1076 #endif
1077 		} else
1078 			error = ether_ioctl(ifp, command, data);
1079 		break;
1080 	case SIOCSIFMTU:
1081 	    {
1082 		int max_frame_size;
1083 
1084 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1085 
1086 		EM_CORE_LOCK(adapter);
1087 		switch (adapter->hw.mac.type) {
1088 		case e1000_82571:
1089 		case e1000_82572:
1090 		case e1000_ich9lan:
1091 		case e1000_ich10lan:
1092 		case e1000_pch2lan:
1093 		case e1000_82574:
1094 		case e1000_82583:
1095 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1096 			max_frame_size = 9234;
1097 			break;
1098 		case e1000_pchlan:
1099 			max_frame_size = 4096;
1100 			break;
1101 			/* Adapters that do not support jumbo frames */
1102 		case e1000_ich8lan:
1103 			max_frame_size = ETHER_MAX_LEN;
1104 			break;
1105 		default:
1106 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1107 		}
1108 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1109 		    ETHER_CRC_LEN) {
1110 			EM_CORE_UNLOCK(adapter);
1111 			error = EINVAL;
1112 			break;
1113 		}
1114 
1115 		ifp->if_mtu = ifr->ifr_mtu;
1116 		adapter->max_frame_size =
1117 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1118 		em_init_locked(adapter);
1119 		EM_CORE_UNLOCK(adapter);
1120 		break;
1121 	    }
1122 	case SIOCSIFFLAGS:
1123 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1124 		    SIOCSIFFLAGS (Set Interface Flags)");
1125 		EM_CORE_LOCK(adapter);
1126 		if (ifp->if_flags & IFF_UP) {
1127 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1128 				if ((ifp->if_flags ^ adapter->if_flags) &
1129 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1130 					em_disable_promisc(adapter);
1131 					em_set_promisc(adapter);
1132 				}
1133 			} else
1134 				em_init_locked(adapter);
1135 		} else
1136 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1137 				em_stop(adapter);
1138 		adapter->if_flags = ifp->if_flags;
1139 		EM_CORE_UNLOCK(adapter);
1140 		break;
1141 	case SIOCADDMULTI:
1142 	case SIOCDELMULTI:
1143 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1144 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1145 			EM_CORE_LOCK(adapter);
1146 			em_disable_intr(adapter);
1147 			em_set_multi(adapter);
1148 #ifdef DEVICE_POLLING
1149 			if (!(ifp->if_capenable & IFCAP_POLLING))
1150 #endif
1151 				em_enable_intr(adapter);
1152 			EM_CORE_UNLOCK(adapter);
1153 		}
1154 		break;
1155 	case SIOCSIFMEDIA:
1156 		/* Check SOL/IDER usage */
1157 		EM_CORE_LOCK(adapter);
1158 		if (e1000_check_reset_block(&adapter->hw)) {
1159 			EM_CORE_UNLOCK(adapter);
1160 			device_printf(adapter->dev, "Media change is"
1161 			    " blocked due to SOL/IDER session.\n");
1162 			break;
1163 		}
1164 		EM_CORE_UNLOCK(adapter);
1165 		/* falls thru */
1166 	case SIOCGIFMEDIA:
1167 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1168 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1169 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1170 		break;
1171 	case SIOCSIFCAP:
1172 	    {
1173 		int mask, reinit;
1174 
1175 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1176 		reinit = 0;
1177 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1178 #ifdef DEVICE_POLLING
1179 		if (mask & IFCAP_POLLING) {
1180 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1181 				error = ether_poll_register(em_poll, ifp);
1182 				if (error)
1183 					return (error);
1184 				EM_CORE_LOCK(adapter);
1185 				em_disable_intr(adapter);
1186 				ifp->if_capenable |= IFCAP_POLLING;
1187 				EM_CORE_UNLOCK(adapter);
1188 			} else {
1189 				error = ether_poll_deregister(ifp);
1190 				/* Enable interrupt even in error case */
1191 				EM_CORE_LOCK(adapter);
1192 				em_enable_intr(adapter);
1193 				ifp->if_capenable &= ~IFCAP_POLLING;
1194 				EM_CORE_UNLOCK(adapter);
1195 			}
1196 		}
1197 #endif
1198 		if (mask & IFCAP_HWCSUM) {
1199 			ifp->if_capenable ^= IFCAP_HWCSUM;
1200 			reinit = 1;
1201 		}
1202 		if (mask & IFCAP_TSO4) {
1203 			ifp->if_capenable ^= IFCAP_TSO4;
1204 			reinit = 1;
1205 		}
1206 		if (mask & IFCAP_VLAN_HWTAGGING) {
1207 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1208 			reinit = 1;
1209 		}
1210 		if (mask & IFCAP_VLAN_HWFILTER) {
1211 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1212 			reinit = 1;
1213 		}
1214 		if (mask & IFCAP_VLAN_HWTSO) {
1215 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1216 			reinit = 1;
1217 		}
1218 		if ((mask & IFCAP_WOL) &&
1219 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1220 			if (mask & IFCAP_WOL_MCAST)
1221 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1222 			if (mask & IFCAP_WOL_MAGIC)
1223 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1224 		}
1225 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1226 			em_init(adapter);
1227 		VLAN_CAPABILITIES(ifp);
1228 		break;
1229 	    }
1230 
1231 	default:
1232 		error = ether_ioctl(ifp, command, data);
1233 		break;
1234 	}
1235 
1236 	return (error);
1237 }
1238 
1239 
1240 /*********************************************************************
1241  *  Init entry point
1242  *
1243  *  This routine is used in two ways. It is used by the stack as
1244  *  init entry point in network interface structure. It is also used
1245  *  by the driver as a hw/sw initialization routine to get to a
1246  *  consistent state.
1247  *
1248  *  return 0 on success, positive on failure
1249  **********************************************************************/
1250 
1251 static void
1252 em_init_locked(struct adapter *adapter)
1253 {
1254 	struct ifnet	*ifp = adapter->ifp;
1255 	device_t	dev = adapter->dev;
1256 
1257 	INIT_DEBUGOUT("em_init: begin");
1258 
1259 	EM_CORE_LOCK_ASSERT(adapter);
1260 
1261 	em_disable_intr(adapter);
1262 	callout_stop(&adapter->timer);
1263 
1264 	/* Get the latest mac address, User can use a LAA */
1265         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1266               ETHER_ADDR_LEN);
1267 
1268 	/* Put the address into the Receive Address Array */
1269 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1270 
1271 	/*
1272 	 * With the 82571 adapter, RAR[0] may be overwritten
1273 	 * when the other port is reset, we make a duplicate
1274 	 * in RAR[14] for that eventuality, this assures
1275 	 * the interface continues to function.
1276 	 */
1277 	if (adapter->hw.mac.type == e1000_82571) {
1278 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1279 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1280 		    E1000_RAR_ENTRIES - 1);
1281 	}
1282 
1283 	/* Initialize the hardware */
1284 	em_reset(adapter);
1285 	em_update_link_status(adapter);
1286 
1287 	/* Setup VLAN support, basic and offload if available */
1288 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1289 
1290 	/* Set hardware offload abilities */
1291 	ifp->if_hwassist = 0;
1292 	if (ifp->if_capenable & IFCAP_TXCSUM)
1293 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1294 	if (ifp->if_capenable & IFCAP_TSO4)
1295 		ifp->if_hwassist |= CSUM_TSO;
1296 
1297 	/* Configure for OS presence */
1298 	em_init_manageability(adapter);
1299 
1300 	/* Prepare transmit descriptors and buffers */
1301 	em_setup_transmit_structures(adapter);
1302 	em_initialize_transmit_unit(adapter);
1303 
1304 	/* Setup Multicast table */
1305 	em_set_multi(adapter);
1306 
1307 	/*
1308 	** Figure out the desired mbuf
1309 	** pool for doing jumbos
1310 	*/
1311 	if (adapter->max_frame_size <= 2048)
1312 		adapter->rx_mbuf_sz = MCLBYTES;
1313 	else if (adapter->max_frame_size <= 4096)
1314 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1315 	else
1316 		adapter->rx_mbuf_sz = MJUM9BYTES;
1317 
1318 	/* Prepare receive descriptors and buffers */
1319 	if (em_setup_receive_structures(adapter)) {
1320 		device_printf(dev, "Could not setup receive structures\n");
1321 		em_stop(adapter);
1322 		return;
1323 	}
1324 	em_initialize_receive_unit(adapter);
1325 
1326 	/* Use real VLAN Filter support? */
1327 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1328 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1329 			/* Use real VLAN Filter support */
1330 			em_setup_vlan_hw_support(adapter);
1331 		else {
1332 			u32 ctrl;
1333 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1334 			ctrl |= E1000_CTRL_VME;
1335 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1336 		}
1337 	}
1338 
1339 	/* Don't lose promiscuous settings */
1340 	em_set_promisc(adapter);
1341 
1342 	/* Set the interface as ACTIVE */
1343 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1344 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1345 
1346 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1347 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1348 
1349 	/* MSI/X configuration for 82574 */
1350 	if (adapter->hw.mac.type == e1000_82574) {
1351 		int tmp;
1352 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1353 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1354 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1355 		/* Set the IVAR - interrupt vector routing. */
1356 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1357 	}
1358 
1359 #ifdef DEVICE_POLLING
1360 	/*
1361 	 * Only enable interrupts if we are not polling, make sure
1362 	 * they are off otherwise.
1363 	 */
1364 	if (ifp->if_capenable & IFCAP_POLLING)
1365 		em_disable_intr(adapter);
1366 	else
1367 #endif /* DEVICE_POLLING */
1368 		em_enable_intr(adapter);
1369 
1370 	/* AMT based hardware can now take control from firmware */
1371 	if (adapter->has_manage && adapter->has_amt)
1372 		em_get_hw_control(adapter);
1373 }
1374 
1375 static void
1376 em_init(void *arg)
1377 {
1378 	struct adapter *adapter = arg;
1379 
1380 	EM_CORE_LOCK(adapter);
1381 	em_init_locked(adapter);
1382 	EM_CORE_UNLOCK(adapter);
1383 }
1384 
1385 
1386 #ifdef DEVICE_POLLING
1387 /*********************************************************************
1388  *
1389  *  Legacy polling routine: note this only works with single queue
1390  *
1391  *********************************************************************/
1392 static int
1393 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1394 {
1395 	struct adapter *adapter = ifp->if_softc;
1396 	struct tx_ring	*txr = adapter->tx_rings;
1397 	struct rx_ring	*rxr = adapter->rx_rings;
1398 	u32		reg_icr;
1399 	int		rx_done;
1400 
1401 	EM_CORE_LOCK(adapter);
1402 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1403 		EM_CORE_UNLOCK(adapter);
1404 		return (0);
1405 	}
1406 
1407 	if (cmd == POLL_AND_CHECK_STATUS) {
1408 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1409 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1410 			callout_stop(&adapter->timer);
1411 			adapter->hw.mac.get_link_status = 1;
1412 			em_update_link_status(adapter);
1413 			callout_reset(&adapter->timer, hz,
1414 			    em_local_timer, adapter);
1415 		}
1416 	}
1417 	EM_CORE_UNLOCK(adapter);
1418 
1419 	em_rxeof(rxr, count, &rx_done);
1420 
1421 	EM_TX_LOCK(txr);
1422 	em_txeof(txr);
1423 #ifdef EM_MULTIQUEUE
1424 	if (!drbr_empty(ifp, txr->br))
1425 		em_mq_start_locked(ifp, txr, NULL);
1426 #else
1427 	em_start_locked(ifp, txr);
1428 #endif
1429 	EM_TX_UNLOCK(txr);
1430 
1431 	return (rx_done);
1432 }
1433 #endif /* DEVICE_POLLING */
1434 
1435 
1436 /*********************************************************************
1437  *
1438  *  Fast Legacy/MSI Combined Interrupt Service routine
1439  *
1440  *********************************************************************/
1441 static int
1442 em_irq_fast(void *arg)
1443 {
1444 	struct adapter	*adapter = arg;
1445 	struct ifnet	*ifp;
1446 	u32		reg_icr;
1447 
1448 	ifp = adapter->ifp;
1449 
1450 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1451 
1452 	/* Hot eject?  */
1453 	if (reg_icr == 0xffffffff)
1454 		return FILTER_STRAY;
1455 
1456 	/* Definitely not our interrupt.  */
1457 	if (reg_icr == 0x0)
1458 		return FILTER_STRAY;
1459 
1460 	/*
1461 	 * Starting with the 82571 chip, bit 31 should be used to
1462 	 * determine whether the interrupt belongs to us.
1463 	 */
1464 	if (adapter->hw.mac.type >= e1000_82571 &&
1465 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1466 		return FILTER_STRAY;
1467 
1468 	em_disable_intr(adapter);
1469 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1470 
1471 	/* Link status change */
1472 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1473 		adapter->hw.mac.get_link_status = 1;
1474 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1475 	}
1476 
1477 	if (reg_icr & E1000_ICR_RXO)
1478 		adapter->rx_overruns++;
1479 	return FILTER_HANDLED;
1480 }
1481 
1482 /* Combined RX/TX handler, used by Legacy and MSI */
1483 static void
1484 em_handle_que(void *context, int pending)
1485 {
1486 	struct adapter	*adapter = context;
1487 	struct ifnet	*ifp = adapter->ifp;
1488 	struct tx_ring	*txr = adapter->tx_rings;
1489 	struct rx_ring	*rxr = adapter->rx_rings;
1490 
1491 
1492 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1493 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1494 		EM_TX_LOCK(txr);
1495 		em_txeof(txr);
1496 #ifdef EM_MULTIQUEUE
1497 		if (!drbr_empty(ifp, txr->br))
1498 			em_mq_start_locked(ifp, txr, NULL);
1499 #else
1500 		em_start_locked(ifp, txr);
1501 #endif
1502 		EM_TX_UNLOCK(txr);
1503 		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1504 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1505 			return;
1506 		}
1507 	}
1508 
1509 	em_enable_intr(adapter);
1510 	return;
1511 }
1512 
1513 
1514 /*********************************************************************
1515  *
1516  *  MSIX Interrupt Service Routines
1517  *
1518  **********************************************************************/
1519 static void
1520 em_msix_tx(void *arg)
1521 {
1522 	struct tx_ring *txr = arg;
1523 	struct adapter *adapter = txr->adapter;
1524 	bool		more;
1525 
1526 	++txr->tx_irq;
1527 	EM_TX_LOCK(txr);
1528 	more = em_txeof(txr);
1529 	EM_TX_UNLOCK(txr);
1530 	if (more)
1531 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1532 	else
1533 		/* Reenable this interrupt */
1534 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1535 	return;
1536 }
1537 
1538 /*********************************************************************
1539  *
1540  *  MSIX RX Interrupt Service routine
1541  *
1542  **********************************************************************/
1543 
1544 static void
1545 em_msix_rx(void *arg)
1546 {
1547 	struct rx_ring	*rxr = arg;
1548 	struct adapter	*adapter = rxr->adapter;
1549 	bool		more;
1550 
1551 	++rxr->rx_irq;
1552 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1553 	if (more)
1554 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1555 	else
1556 		/* Reenable this interrupt */
1557 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1558 	return;
1559 }
1560 
1561 /*********************************************************************
1562  *
1563  *  MSIX Link Fast Interrupt Service routine
1564  *
1565  **********************************************************************/
1566 static void
1567 em_msix_link(void *arg)
1568 {
1569 	struct adapter	*adapter = arg;
1570 	u32		reg_icr;
1571 
1572 	++adapter->link_irq;
1573 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1574 
1575 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1576 		adapter->hw.mac.get_link_status = 1;
1577 		em_handle_link(adapter, 0);
1578 	} else
1579 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1580 		    EM_MSIX_LINK | E1000_IMS_LSC);
1581 	return;
1582 }
1583 
1584 static void
1585 em_handle_rx(void *context, int pending)
1586 {
1587 	struct rx_ring	*rxr = context;
1588 	struct adapter	*adapter = rxr->adapter;
1589         bool            more;
1590 
1591 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1592 	if (more)
1593 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1594 	else
1595 		/* Reenable this interrupt */
1596 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1597 }
1598 
1599 static void
1600 em_handle_tx(void *context, int pending)
1601 {
1602 	struct tx_ring	*txr = context;
1603 	struct adapter	*adapter = txr->adapter;
1604 	struct ifnet	*ifp = adapter->ifp;
1605 
1606 	EM_TX_LOCK(txr);
1607 	em_txeof(txr);
1608 #ifdef EM_MULTIQUEUE
1609 	if (!drbr_empty(ifp, txr->br))
1610 		em_mq_start_locked(ifp, txr, NULL);
1611 #else
1612 	em_start_locked(ifp, txr);
1613 #endif
1614 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1615 	EM_TX_UNLOCK(txr);
1616 }
1617 
1618 static void
1619 em_handle_link(void *context, int pending)
1620 {
1621 	struct adapter	*adapter = context;
1622 	struct ifnet *ifp = adapter->ifp;
1623 
1624 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1625 		return;
1626 
1627 	EM_CORE_LOCK(adapter);
1628 	callout_stop(&adapter->timer);
1629 	em_update_link_status(adapter);
1630 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1631 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1632 	    EM_MSIX_LINK | E1000_IMS_LSC);
1633 	EM_CORE_UNLOCK(adapter);
1634 }
1635 
1636 
1637 /*********************************************************************
1638  *
1639  *  Media Ioctl callback
1640  *
1641  *  This routine is called whenever the user queries the status of
1642  *  the interface using ifconfig.
1643  *
1644  **********************************************************************/
1645 static void
1646 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1647 {
1648 	struct adapter *adapter = ifp->if_softc;
1649 	u_char fiber_type = IFM_1000_SX;
1650 
1651 	INIT_DEBUGOUT("em_media_status: begin");
1652 
1653 	EM_CORE_LOCK(adapter);
1654 	em_update_link_status(adapter);
1655 
1656 	ifmr->ifm_status = IFM_AVALID;
1657 	ifmr->ifm_active = IFM_ETHER;
1658 
1659 	if (!adapter->link_active) {
1660 		EM_CORE_UNLOCK(adapter);
1661 		return;
1662 	}
1663 
1664 	ifmr->ifm_status |= IFM_ACTIVE;
1665 
1666 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1667 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1668 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1669 	} else {
1670 		switch (adapter->link_speed) {
1671 		case 10:
1672 			ifmr->ifm_active |= IFM_10_T;
1673 			break;
1674 		case 100:
1675 			ifmr->ifm_active |= IFM_100_TX;
1676 			break;
1677 		case 1000:
1678 			ifmr->ifm_active |= IFM_1000_T;
1679 			break;
1680 		}
1681 		if (adapter->link_duplex == FULL_DUPLEX)
1682 			ifmr->ifm_active |= IFM_FDX;
1683 		else
1684 			ifmr->ifm_active |= IFM_HDX;
1685 	}
1686 	EM_CORE_UNLOCK(adapter);
1687 }
1688 
1689 /*********************************************************************
1690  *
1691  *  Media Ioctl callback
1692  *
1693  *  This routine is called when the user changes speed/duplex using
1694  *  media/mediopt option with ifconfig.
1695  *
1696  **********************************************************************/
1697 static int
1698 em_media_change(struct ifnet *ifp)
1699 {
1700 	struct adapter *adapter = ifp->if_softc;
1701 	struct ifmedia  *ifm = &adapter->media;
1702 
1703 	INIT_DEBUGOUT("em_media_change: begin");
1704 
1705 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1706 		return (EINVAL);
1707 
1708 	EM_CORE_LOCK(adapter);
1709 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1710 	case IFM_AUTO:
1711 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1712 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1713 		break;
1714 	case IFM_1000_LX:
1715 	case IFM_1000_SX:
1716 	case IFM_1000_T:
1717 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1718 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1719 		break;
1720 	case IFM_100_TX:
1721 		adapter->hw.mac.autoneg = FALSE;
1722 		adapter->hw.phy.autoneg_advertised = 0;
1723 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1724 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1725 		else
1726 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1727 		break;
1728 	case IFM_10_T:
1729 		adapter->hw.mac.autoneg = FALSE;
1730 		adapter->hw.phy.autoneg_advertised = 0;
1731 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1732 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1733 		else
1734 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1735 		break;
1736 	default:
1737 		device_printf(adapter->dev, "Unsupported media type\n");
1738 	}
1739 
1740 	em_init_locked(adapter);
1741 	EM_CORE_UNLOCK(adapter);
1742 
1743 	return (0);
1744 }
1745 
1746 /*********************************************************************
1747  *
1748  *  This routine maps the mbufs to tx descriptors.
1749  *
1750  *  return 0 on success, positive on failure
1751  **********************************************************************/
1752 
1753 static int
1754 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1755 {
1756 	struct adapter		*adapter = txr->adapter;
1757 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1758 	bus_dmamap_t		map;
1759 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1760 	struct e1000_tx_desc	*ctxd = NULL;
1761 	struct mbuf		*m_head;
1762 	struct ether_header	*eh;
1763 	struct ip		*ip = NULL;
1764 	struct tcphdr		*tp = NULL;
1765 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1766 	int			ip_off, poff;
1767 	int			nsegs, i, j, first, last = 0;
1768 	int			error, do_tso, tso_desc = 0, remap = 1;
1769 
1770 retry:
1771 	m_head = *m_headp;
1772 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1773 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1774 	ip_off = poff = 0;
1775 
1776 	/*
1777 	 * Intel recommends entire IP/TCP header length reside in a single
1778 	 * buffer. If multiple descriptors are used to describe the IP and
1779 	 * TCP header, each descriptor should describe one or more
1780 	 * complete headers; descriptors referencing only parts of headers
1781 	 * are not supported. If all layer headers are not coalesced into
1782 	 * a single buffer, each buffer should not cross a 4KB boundary,
1783 	 * or be larger than the maximum read request size.
1784 	 * Controller also requires modifing IP/TCP header to make TSO work
1785 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1786 	 * IP/TCP header into a single buffer to meet the requirement of
1787 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1788 	 * which also has similiar restrictions.
1789 	 */
1790 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1791 		if (do_tso || (m_head->m_next != NULL &&
1792 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1793 			if (M_WRITABLE(*m_headp) == 0) {
1794 				m_head = m_dup(*m_headp, M_DONTWAIT);
1795 				m_freem(*m_headp);
1796 				if (m_head == NULL) {
1797 					*m_headp = NULL;
1798 					return (ENOBUFS);
1799 				}
1800 				*m_headp = m_head;
1801 			}
1802 		}
1803 		/*
1804 		 * XXX
1805 		 * Assume IPv4, we don't have TSO/checksum offload support
1806 		 * for IPv6 yet.
1807 		 */
1808 		ip_off = sizeof(struct ether_header);
1809 		m_head = m_pullup(m_head, ip_off);
1810 		if (m_head == NULL) {
1811 			*m_headp = NULL;
1812 			return (ENOBUFS);
1813 		}
1814 		eh = mtod(m_head, struct ether_header *);
1815 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1816 			ip_off = sizeof(struct ether_vlan_header);
1817 			m_head = m_pullup(m_head, ip_off);
1818 			if (m_head == NULL) {
1819 				*m_headp = NULL;
1820 				return (ENOBUFS);
1821 			}
1822 		}
1823 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1824 		if (m_head == NULL) {
1825 			*m_headp = NULL;
1826 			return (ENOBUFS);
1827 		}
1828 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1829 		poff = ip_off + (ip->ip_hl << 2);
1830 		if (do_tso) {
1831 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1832 			if (m_head == NULL) {
1833 				*m_headp = NULL;
1834 				return (ENOBUFS);
1835 			}
1836 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1837 			/*
1838 			 * TSO workaround:
1839 			 *   pull 4 more bytes of data into it.
1840 			 */
1841 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1842 			if (m_head == NULL) {
1843 				*m_headp = NULL;
1844 				return (ENOBUFS);
1845 			}
1846 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1847 			ip->ip_len = 0;
1848 			ip->ip_sum = 0;
1849 			/*
1850 			 * The pseudo TCP checksum does not include TCP payload
1851 			 * length so driver should recompute the checksum here
1852 			 * what hardware expect to see. This is adherence of
1853 			 * Microsoft's Large Send specification.
1854 			 */
1855 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1856 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1857 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1858 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1859 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1860 			if (m_head == NULL) {
1861 				*m_headp = NULL;
1862 				return (ENOBUFS);
1863 			}
1864 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1865 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1866 			if (m_head == NULL) {
1867 				*m_headp = NULL;
1868 				return (ENOBUFS);
1869 			}
1870 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1871 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1872 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1873 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1874 			if (m_head == NULL) {
1875 				*m_headp = NULL;
1876 				return (ENOBUFS);
1877 			}
1878 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1879 		}
1880 		*m_headp = m_head;
1881 	}
1882 
1883 	/*
1884 	 * Map the packet for DMA
1885 	 *
1886 	 * Capture the first descriptor index,
1887 	 * this descriptor will have the index
1888 	 * of the EOP which is the only one that
1889 	 * now gets a DONE bit writeback.
1890 	 */
1891 	first = txr->next_avail_desc;
1892 	tx_buffer = &txr->tx_buffers[first];
1893 	tx_buffer_mapped = tx_buffer;
1894 	map = tx_buffer->map;
1895 
1896 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1897 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1898 
1899 	/*
1900 	 * There are two types of errors we can (try) to handle:
1901 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1902 	 *   out of segments.  Defragment the mbuf chain and try again.
1903 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1904 	 *   at this point in time.  Defer sending and try again later.
1905 	 * All other errors, in particular EINVAL, are fatal and prevent the
1906 	 * mbuf chain from ever going through.  Drop it and report error.
1907 	 */
1908 	if (error == EFBIG && remap) {
1909 		struct mbuf *m;
1910 
1911 		m = m_defrag(*m_headp, M_DONTWAIT);
1912 		if (m == NULL) {
1913 			adapter->mbuf_alloc_failed++;
1914 			m_freem(*m_headp);
1915 			*m_headp = NULL;
1916 			return (ENOBUFS);
1917 		}
1918 		*m_headp = m;
1919 
1920 		/* Try it again, but only once */
1921 		remap = 0;
1922 		goto retry;
1923 	} else if (error == ENOMEM) {
1924 		adapter->no_tx_dma_setup++;
1925 		return (error);
1926 	} else if (error != 0) {
1927 		adapter->no_tx_dma_setup++;
1928 		m_freem(*m_headp);
1929 		*m_headp = NULL;
1930 		return (error);
1931 	}
1932 
1933 	/*
1934 	 * TSO Hardware workaround, if this packet is not
1935 	 * TSO, and is only a single descriptor long, and
1936 	 * it follows a TSO burst, then we need to add a
1937 	 * sentinel descriptor to prevent premature writeback.
1938 	 */
1939 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1940 		if (nsegs == 1)
1941 			tso_desc = TRUE;
1942 		txr->tx_tso = FALSE;
1943 	}
1944 
1945         if (nsegs > (txr->tx_avail - 2)) {
1946                 txr->no_desc_avail++;
1947 		bus_dmamap_unload(txr->txtag, map);
1948 		return (ENOBUFS);
1949         }
1950 	m_head = *m_headp;
1951 
1952 	/* Do hardware assists */
1953 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1954 		em_tso_setup(txr, m_head, ip_off, ip, tp,
1955 		    &txd_upper, &txd_lower);
1956 		/* we need to make a final sentinel transmit desc */
1957 		tso_desc = TRUE;
1958 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1959 		em_transmit_checksum_setup(txr, m_head,
1960 		    ip_off, ip, &txd_upper, &txd_lower);
1961 
1962 	if (m_head->m_flags & M_VLANTAG) {
1963 		/* Set the vlan id. */
1964 		txd_upper |=
1965 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
1966                 /* Tell hardware to add tag */
1967                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
1968         }
1969 
1970 	i = txr->next_avail_desc;
1971 
1972 	/* Set up our transmit descriptors */
1973 	for (j = 0; j < nsegs; j++) {
1974 		bus_size_t seg_len;
1975 		bus_addr_t seg_addr;
1976 
1977 		tx_buffer = &txr->tx_buffers[i];
1978 		ctxd = &txr->tx_base[i];
1979 		seg_addr = segs[j].ds_addr;
1980 		seg_len  = segs[j].ds_len;
1981 		/*
1982 		** TSO Workaround:
1983 		** If this is the last descriptor, we want to
1984 		** split it so we have a small final sentinel
1985 		*/
1986 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1987 			seg_len -= 4;
1988 			ctxd->buffer_addr = htole64(seg_addr);
1989 			ctxd->lower.data = htole32(
1990 			adapter->txd_cmd | txd_lower | seg_len);
1991 			ctxd->upper.data =
1992 			    htole32(txd_upper);
1993 			if (++i == adapter->num_tx_desc)
1994 				i = 0;
1995 			/* Now make the sentinel */
1996 			++txd_used; /* using an extra txd */
1997 			ctxd = &txr->tx_base[i];
1998 			tx_buffer = &txr->tx_buffers[i];
1999 			ctxd->buffer_addr =
2000 			    htole64(seg_addr + seg_len);
2001 			ctxd->lower.data = htole32(
2002 			adapter->txd_cmd | txd_lower | 4);
2003 			ctxd->upper.data =
2004 			    htole32(txd_upper);
2005 			last = i;
2006 			if (++i == adapter->num_tx_desc)
2007 				i = 0;
2008 		} else {
2009 			ctxd->buffer_addr = htole64(seg_addr);
2010 			ctxd->lower.data = htole32(
2011 			adapter->txd_cmd | txd_lower | seg_len);
2012 			ctxd->upper.data =
2013 			    htole32(txd_upper);
2014 			last = i;
2015 			if (++i == adapter->num_tx_desc)
2016 				i = 0;
2017 		}
2018 		tx_buffer->m_head = NULL;
2019 		tx_buffer->next_eop = -1;
2020 	}
2021 
2022 	txr->next_avail_desc = i;
2023 	txr->tx_avail -= nsegs;
2024 	if (tso_desc) /* TSO used an extra for sentinel */
2025 		txr->tx_avail -= txd_used;
2026 
2027         tx_buffer->m_head = m_head;
2028 	/*
2029 	** Here we swap the map so the last descriptor,
2030 	** which gets the completion interrupt has the
2031 	** real map, and the first descriptor gets the
2032 	** unused map from this descriptor.
2033 	*/
2034 	tx_buffer_mapped->map = tx_buffer->map;
2035 	tx_buffer->map = map;
2036         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2037 
2038         /*
2039          * Last Descriptor of Packet
2040 	 * needs End Of Packet (EOP)
2041 	 * and Report Status (RS)
2042          */
2043         ctxd->lower.data |=
2044 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2045 	/*
2046 	 * Keep track in the first buffer which
2047 	 * descriptor will be written back
2048 	 */
2049 	tx_buffer = &txr->tx_buffers[first];
2050 	tx_buffer->next_eop = last;
2051 	/* Update the watchdog time early and often */
2052 	txr->watchdog_time = ticks;
2053 
2054 	/*
2055 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2056 	 * that this frame is available to transmit.
2057 	 */
2058 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2059 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2060 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2061 
2062 	return (0);
2063 }
2064 
2065 static void
2066 em_set_promisc(struct adapter *adapter)
2067 {
2068 	struct ifnet	*ifp = adapter->ifp;
2069 	u32		reg_rctl;
2070 
2071 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2072 
2073 	if (ifp->if_flags & IFF_PROMISC) {
2074 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2075 		/* Turn this on if you want to see bad packets */
2076 		if (em_debug_sbp)
2077 			reg_rctl |= E1000_RCTL_SBP;
2078 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2079 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2080 		reg_rctl |= E1000_RCTL_MPE;
2081 		reg_rctl &= ~E1000_RCTL_UPE;
2082 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2083 	}
2084 }
2085 
2086 static void
2087 em_disable_promisc(struct adapter *adapter)
2088 {
2089 	u32	reg_rctl;
2090 
2091 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2092 
2093 	reg_rctl &=  (~E1000_RCTL_UPE);
2094 	reg_rctl &=  (~E1000_RCTL_MPE);
2095 	reg_rctl &=  (~E1000_RCTL_SBP);
2096 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2097 }
2098 
2099 
2100 /*********************************************************************
2101  *  Multicast Update
2102  *
2103  *  This routine is called whenever multicast address list is updated.
2104  *
2105  **********************************************************************/
2106 
2107 static void
2108 em_set_multi(struct adapter *adapter)
2109 {
2110 	struct ifnet	*ifp = adapter->ifp;
2111 	struct ifmultiaddr *ifma;
2112 	u32 reg_rctl = 0;
2113 	u8  *mta; /* Multicast array memory */
2114 	int mcnt = 0;
2115 
2116 	IOCTL_DEBUGOUT("em_set_multi: begin");
2117 
2118 	mta = adapter->mta;
2119 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2120 
2121 	if (adapter->hw.mac.type == e1000_82542 &&
2122 	    adapter->hw.revision_id == E1000_REVISION_2) {
2123 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2124 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2125 			e1000_pci_clear_mwi(&adapter->hw);
2126 		reg_rctl |= E1000_RCTL_RST;
2127 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2128 		msec_delay(5);
2129 	}
2130 
2131 #if __FreeBSD_version < 800000
2132 	IF_ADDR_LOCK(ifp);
2133 #else
2134 	if_maddr_rlock(ifp);
2135 #endif
2136 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2137 		if (ifma->ifma_addr->sa_family != AF_LINK)
2138 			continue;
2139 
2140 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2141 			break;
2142 
2143 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2144 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2145 		mcnt++;
2146 	}
2147 #if __FreeBSD_version < 800000
2148 	IF_ADDR_UNLOCK(ifp);
2149 #else
2150 	if_maddr_runlock(ifp);
2151 #endif
2152 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2153 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2154 		reg_rctl |= E1000_RCTL_MPE;
2155 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2156 	} else
2157 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2158 
2159 	if (adapter->hw.mac.type == e1000_82542 &&
2160 	    adapter->hw.revision_id == E1000_REVISION_2) {
2161 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2162 		reg_rctl &= ~E1000_RCTL_RST;
2163 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2164 		msec_delay(5);
2165 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2166 			e1000_pci_set_mwi(&adapter->hw);
2167 	}
2168 }
2169 
2170 
2171 /*********************************************************************
2172  *  Timer routine
2173  *
2174  *  This routine checks for link status and updates statistics.
2175  *
2176  **********************************************************************/
2177 
2178 static void
2179 em_local_timer(void *arg)
2180 {
2181 	struct adapter	*adapter = arg;
2182 	struct ifnet	*ifp = adapter->ifp;
2183 	struct tx_ring	*txr = adapter->tx_rings;
2184 	struct rx_ring	*rxr = adapter->rx_rings;
2185 	u32		trigger;
2186 
2187 	EM_CORE_LOCK_ASSERT(adapter);
2188 
2189 	em_update_link_status(adapter);
2190 	em_update_stats_counters(adapter);
2191 
2192 	/* Reset LAA into RAR[0] on 82571 */
2193 	if ((adapter->hw.mac.type == e1000_82571) &&
2194 	    e1000_get_laa_state_82571(&adapter->hw))
2195 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2196 
2197 	/* Mask to use in the irq trigger */
2198 	if (adapter->msix_mem)
2199 		trigger = rxr->ims; /* RX for 82574 */
2200 	else
2201 		trigger = E1000_ICS_RXDMT0;
2202 
2203 	/*
2204 	** Check on the state of the TX queue(s), this
2205 	** can be done without the lock because its RO
2206 	** and the HUNG state will be static if set.
2207 	*/
2208 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2209 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2210 		    (adapter->pause_frames == 0))
2211 			goto hung;
2212 		/* Schedule a TX tasklet if needed */
2213 		if (txr->tx_avail <= EM_MAX_SCATTER)
2214 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2215 	}
2216 
2217 	adapter->pause_frames = 0;
2218 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2219 #ifndef DEVICE_POLLING
2220 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2221 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2222 #endif
2223 	return;
2224 hung:
2225 	/* Looks like we're hung */
2226 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2227 	device_printf(adapter->dev,
2228 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2229 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2230 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2231 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2232 	    "Next TX to Clean = %d\n",
2233 	    txr->me, txr->tx_avail, txr->next_to_clean);
2234 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2235 	adapter->watchdog_events++;
2236 	adapter->pause_frames = 0;
2237 	em_init_locked(adapter);
2238 }
2239 
2240 
2241 static void
2242 em_update_link_status(struct adapter *adapter)
2243 {
2244 	struct e1000_hw *hw = &adapter->hw;
2245 	struct ifnet *ifp = adapter->ifp;
2246 	device_t dev = adapter->dev;
2247 	struct tx_ring *txr = adapter->tx_rings;
2248 	u32 link_check = 0;
2249 
2250 	/* Get the cached link value or read phy for real */
2251 	switch (hw->phy.media_type) {
2252 	case e1000_media_type_copper:
2253 		if (hw->mac.get_link_status) {
2254 			/* Do the work to read phy */
2255 			e1000_check_for_link(hw);
2256 			link_check = !hw->mac.get_link_status;
2257 			if (link_check) /* ESB2 fix */
2258 				e1000_cfg_on_link_up(hw);
2259 		} else
2260 			link_check = TRUE;
2261 		break;
2262 	case e1000_media_type_fiber:
2263 		e1000_check_for_link(hw);
2264 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2265                                  E1000_STATUS_LU);
2266 		break;
2267 	case e1000_media_type_internal_serdes:
2268 		e1000_check_for_link(hw);
2269 		link_check = adapter->hw.mac.serdes_has_link;
2270 		break;
2271 	default:
2272 	case e1000_media_type_unknown:
2273 		break;
2274 	}
2275 
2276 	/* Now check for a transition */
2277 	if (link_check && (adapter->link_active == 0)) {
2278 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2279 		    &adapter->link_duplex);
2280 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2281 		if ((adapter->link_speed != SPEED_1000) &&
2282 		    ((hw->mac.type == e1000_82571) ||
2283 		    (hw->mac.type == e1000_82572))) {
2284 			int tarc0;
2285 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2286 			tarc0 &= ~SPEED_MODE_BIT;
2287 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2288 		}
2289 		if (bootverbose)
2290 			device_printf(dev, "Link is up %d Mbps %s\n",
2291 			    adapter->link_speed,
2292 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2293 			    "Full Duplex" : "Half Duplex"));
2294 		adapter->link_active = 1;
2295 		adapter->smartspeed = 0;
2296 		ifp->if_baudrate = adapter->link_speed * 1000000;
2297 		if_link_state_change(ifp, LINK_STATE_UP);
2298 	} else if (!link_check && (adapter->link_active == 1)) {
2299 		ifp->if_baudrate = adapter->link_speed = 0;
2300 		adapter->link_duplex = 0;
2301 		if (bootverbose)
2302 			device_printf(dev, "Link is Down\n");
2303 		adapter->link_active = 0;
2304 		/* Link down, disable watchdog */
2305 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2306 			txr->queue_status = EM_QUEUE_IDLE;
2307 		if_link_state_change(ifp, LINK_STATE_DOWN);
2308 	}
2309 }
2310 
2311 /*********************************************************************
2312  *
2313  *  This routine disables all traffic on the adapter by issuing a
2314  *  global reset on the MAC and deallocates TX/RX buffers.
2315  *
2316  *  This routine should always be called with BOTH the CORE
2317  *  and TX locks.
2318  **********************************************************************/
2319 
2320 static void
2321 em_stop(void *arg)
2322 {
2323 	struct adapter	*adapter = arg;
2324 	struct ifnet	*ifp = adapter->ifp;
2325 	struct tx_ring	*txr = adapter->tx_rings;
2326 
2327 	EM_CORE_LOCK_ASSERT(adapter);
2328 
2329 	INIT_DEBUGOUT("em_stop: begin");
2330 
2331 	em_disable_intr(adapter);
2332 	callout_stop(&adapter->timer);
2333 
2334 	/* Tell the stack that the interface is no longer active */
2335 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2336 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2337 
2338         /* Unarm watchdog timer. */
2339 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2340 		EM_TX_LOCK(txr);
2341 		txr->queue_status = EM_QUEUE_IDLE;
2342 		EM_TX_UNLOCK(txr);
2343 	}
2344 
2345 	e1000_reset_hw(&adapter->hw);
2346 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2347 
2348 	e1000_led_off(&adapter->hw);
2349 	e1000_cleanup_led(&adapter->hw);
2350 }
2351 
2352 
2353 /*********************************************************************
2354  *
2355  *  Determine hardware revision.
2356  *
2357  **********************************************************************/
2358 static void
2359 em_identify_hardware(struct adapter *adapter)
2360 {
2361 	device_t dev = adapter->dev;
2362 
2363 	/* Make sure our PCI config space has the necessary stuff set */
2364 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2365 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2366 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2367 		device_printf(dev, "Memory Access and/or Bus Master bits "
2368 		    "were not set!\n");
2369 		adapter->hw.bus.pci_cmd_word |=
2370 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2371 		pci_write_config(dev, PCIR_COMMAND,
2372 		    adapter->hw.bus.pci_cmd_word, 2);
2373 	}
2374 
2375 	/* Save off the information about this board */
2376 	adapter->hw.vendor_id = pci_get_vendor(dev);
2377 	adapter->hw.device_id = pci_get_device(dev);
2378 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2379 	adapter->hw.subsystem_vendor_id =
2380 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2381 	adapter->hw.subsystem_device_id =
2382 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2383 
2384 	/* Do Shared Code Init and Setup */
2385 	if (e1000_set_mac_type(&adapter->hw)) {
2386 		device_printf(dev, "Setup init failure\n");
2387 		return;
2388 	}
2389 }
2390 
2391 static int
2392 em_allocate_pci_resources(struct adapter *adapter)
2393 {
2394 	device_t	dev = adapter->dev;
2395 	int		rid;
2396 
2397 	rid = PCIR_BAR(0);
2398 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2399 	    &rid, RF_ACTIVE);
2400 	if (adapter->memory == NULL) {
2401 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2402 		return (ENXIO);
2403 	}
2404 	adapter->osdep.mem_bus_space_tag =
2405 	    rman_get_bustag(adapter->memory);
2406 	adapter->osdep.mem_bus_space_handle =
2407 	    rman_get_bushandle(adapter->memory);
2408 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2409 
2410 	/* Default to a single queue */
2411 	adapter->num_queues = 1;
2412 
2413 	/*
2414 	 * Setup MSI/X or MSI if PCI Express
2415 	 */
2416 	adapter->msix = em_setup_msix(adapter);
2417 
2418 	adapter->hw.back = &adapter->osdep;
2419 
2420 	return (0);
2421 }
2422 
2423 /*********************************************************************
2424  *
2425  *  Setup the Legacy or MSI Interrupt handler
2426  *
2427  **********************************************************************/
2428 int
2429 em_allocate_legacy(struct adapter *adapter)
2430 {
2431 	device_t dev = adapter->dev;
2432 	struct tx_ring	*txr = adapter->tx_rings;
2433 	int error, rid = 0;
2434 
2435 	/* Manually turn off all interrupts */
2436 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2437 
2438 	if (adapter->msix == 1) /* using MSI */
2439 		rid = 1;
2440 	/* We allocate a single interrupt resource */
2441 	adapter->res = bus_alloc_resource_any(dev,
2442 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2443 	if (adapter->res == NULL) {
2444 		device_printf(dev, "Unable to allocate bus resource: "
2445 		    "interrupt\n");
2446 		return (ENXIO);
2447 	}
2448 
2449 	/*
2450 	 * Allocate a fast interrupt and the associated
2451 	 * deferred processing contexts.
2452 	 */
2453 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2454 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2455 	    taskqueue_thread_enqueue, &adapter->tq);
2456 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2457 	    device_get_nameunit(adapter->dev));
2458 	/* Use a TX only tasklet for local timer */
2459 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2460 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2461 	    taskqueue_thread_enqueue, &txr->tq);
2462 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2463 	    device_get_nameunit(adapter->dev));
2464 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2465 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2466 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2467 		device_printf(dev, "Failed to register fast interrupt "
2468 			    "handler: %d\n", error);
2469 		taskqueue_free(adapter->tq);
2470 		adapter->tq = NULL;
2471 		return (error);
2472 	}
2473 
2474 	return (0);
2475 }
2476 
2477 /*********************************************************************
2478  *
2479  *  Setup the MSIX Interrupt handlers
2480  *   This is not really Multiqueue, rather
2481  *   its just seperate interrupt vectors
2482  *   for TX, RX, and Link.
2483  *
2484  **********************************************************************/
2485 int
2486 em_allocate_msix(struct adapter *adapter)
2487 {
2488 	device_t	dev = adapter->dev;
2489 	struct		tx_ring *txr = adapter->tx_rings;
2490 	struct		rx_ring *rxr = adapter->rx_rings;
2491 	int		error, rid, vector = 0;
2492 
2493 
2494 	/* Make sure all interrupts are disabled */
2495 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2496 
2497 	/* First set up ring resources */
2498 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2499 
2500 		/* RX ring */
2501 		rid = vector + 1;
2502 
2503 		rxr->res = bus_alloc_resource_any(dev,
2504 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2505 		if (rxr->res == NULL) {
2506 			device_printf(dev,
2507 			    "Unable to allocate bus resource: "
2508 			    "RX MSIX Interrupt %d\n", i);
2509 			return (ENXIO);
2510 		}
2511 		if ((error = bus_setup_intr(dev, rxr->res,
2512 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2513 		    rxr, &rxr->tag)) != 0) {
2514 			device_printf(dev, "Failed to register RX handler");
2515 			return (error);
2516 		}
2517 #if __FreeBSD_version >= 800504
2518 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2519 #endif
2520 		rxr->msix = vector++; /* NOTE increment vector for TX */
2521 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2522 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2523 		    taskqueue_thread_enqueue, &rxr->tq);
2524 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2525 		    device_get_nameunit(adapter->dev));
2526 		/*
2527 		** Set the bit to enable interrupt
2528 		** in E1000_IMS -- bits 20 and 21
2529 		** are for RX0 and RX1, note this has
2530 		** NOTHING to do with the MSIX vector
2531 		*/
2532 		rxr->ims = 1 << (20 + i);
2533 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2534 
2535 		/* TX ring */
2536 		rid = vector + 1;
2537 		txr->res = bus_alloc_resource_any(dev,
2538 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2539 		if (txr->res == NULL) {
2540 			device_printf(dev,
2541 			    "Unable to allocate bus resource: "
2542 			    "TX MSIX Interrupt %d\n", i);
2543 			return (ENXIO);
2544 		}
2545 		if ((error = bus_setup_intr(dev, txr->res,
2546 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2547 		    txr, &txr->tag)) != 0) {
2548 			device_printf(dev, "Failed to register TX handler");
2549 			return (error);
2550 		}
2551 #if __FreeBSD_version >= 800504
2552 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2553 #endif
2554 		txr->msix = vector++; /* Increment vector for next pass */
2555 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2556 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2557 		    taskqueue_thread_enqueue, &txr->tq);
2558 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2559 		    device_get_nameunit(adapter->dev));
2560 		/*
2561 		** Set the bit to enable interrupt
2562 		** in E1000_IMS -- bits 22 and 23
2563 		** are for TX0 and TX1, note this has
2564 		** NOTHING to do with the MSIX vector
2565 		*/
2566 		txr->ims = 1 << (22 + i);
2567 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2568 	}
2569 
2570 	/* Link interrupt */
2571 	++rid;
2572 	adapter->res = bus_alloc_resource_any(dev,
2573 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2574 	if (!adapter->res) {
2575 		device_printf(dev,"Unable to allocate "
2576 		    "bus resource: Link interrupt [%d]\n", rid);
2577 		return (ENXIO);
2578         }
2579 	/* Set the link handler function */
2580 	error = bus_setup_intr(dev, adapter->res,
2581 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2582 	    em_msix_link, adapter, &adapter->tag);
2583 	if (error) {
2584 		adapter->res = NULL;
2585 		device_printf(dev, "Failed to register LINK handler");
2586 		return (error);
2587 	}
2588 #if __FreeBSD_version >= 800504
2589 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2590 #endif
2591 	adapter->linkvec = vector;
2592 	adapter->ivars |=  (8 | vector) << 16;
2593 	adapter->ivars |= 0x80000000;
2594 
2595 	return (0);
2596 }
2597 
2598 
2599 static void
2600 em_free_pci_resources(struct adapter *adapter)
2601 {
2602 	device_t	dev = adapter->dev;
2603 	struct tx_ring	*txr;
2604 	struct rx_ring	*rxr;
2605 	int		rid;
2606 
2607 
2608 	/*
2609 	** Release all the queue interrupt resources:
2610 	*/
2611 	for (int i = 0; i < adapter->num_queues; i++) {
2612 		txr = &adapter->tx_rings[i];
2613 		rxr = &adapter->rx_rings[i];
2614 		/* an early abort? */
2615 		if ((txr == NULL) || (rxr == NULL))
2616 			break;
2617 		rid = txr->msix +1;
2618 		if (txr->tag != NULL) {
2619 			bus_teardown_intr(dev, txr->res, txr->tag);
2620 			txr->tag = NULL;
2621 		}
2622 		if (txr->res != NULL)
2623 			bus_release_resource(dev, SYS_RES_IRQ,
2624 			    rid, txr->res);
2625 		rid = rxr->msix +1;
2626 		if (rxr->tag != NULL) {
2627 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2628 			rxr->tag = NULL;
2629 		}
2630 		if (rxr->res != NULL)
2631 			bus_release_resource(dev, SYS_RES_IRQ,
2632 			    rid, rxr->res);
2633 	}
2634 
2635         if (adapter->linkvec) /* we are doing MSIX */
2636                 rid = adapter->linkvec + 1;
2637         else
2638                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2639 
2640 	if (adapter->tag != NULL) {
2641 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2642 		adapter->tag = NULL;
2643 	}
2644 
2645 	if (adapter->res != NULL)
2646 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2647 
2648 
2649 	if (adapter->msix)
2650 		pci_release_msi(dev);
2651 
2652 	if (adapter->msix_mem != NULL)
2653 		bus_release_resource(dev, SYS_RES_MEMORY,
2654 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2655 
2656 	if (adapter->memory != NULL)
2657 		bus_release_resource(dev, SYS_RES_MEMORY,
2658 		    PCIR_BAR(0), adapter->memory);
2659 
2660 	if (adapter->flash != NULL)
2661 		bus_release_resource(dev, SYS_RES_MEMORY,
2662 		    EM_FLASH, adapter->flash);
2663 }
2664 
2665 /*
2666  * Setup MSI or MSI/X
2667  */
2668 static int
2669 em_setup_msix(struct adapter *adapter)
2670 {
2671 	device_t dev = adapter->dev;
2672 	int val = 0;
2673 
2674 	/*
2675 	** Setup MSI/X for Hartwell: tests have shown
2676 	** use of two queues to be unstable, and to
2677 	** provide no great gain anyway, so we simply
2678 	** seperate the interrupts and use a single queue.
2679 	*/
2680 	if ((adapter->hw.mac.type == e1000_82574) &&
2681 	    (em_enable_msix == TRUE)) {
2682 		/* Map the MSIX BAR */
2683 		int rid = PCIR_BAR(EM_MSIX_BAR);
2684 		adapter->msix_mem = bus_alloc_resource_any(dev,
2685 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2686        		if (!adapter->msix_mem) {
2687 			/* May not be enabled */
2688                		device_printf(adapter->dev,
2689 			    "Unable to map MSIX table \n");
2690 			goto msi;
2691        		}
2692 		val = pci_msix_count(dev);
2693 		/* We only need 3 vectors */
2694 		if (val > 3)
2695 			val = 3;
2696 		if ((val != 3) && (val != 5)) {
2697 			bus_release_resource(dev, SYS_RES_MEMORY,
2698 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2699 			adapter->msix_mem = NULL;
2700                		device_printf(adapter->dev,
2701 			    "MSIX: incorrect vectors, using MSI\n");
2702 			goto msi;
2703 		}
2704 
2705 		if (pci_alloc_msix(dev, &val) == 0) {
2706 			device_printf(adapter->dev,
2707 			    "Using MSIX interrupts "
2708 			    "with %d vectors\n", val);
2709 		}
2710 
2711 		return (val);
2712 	}
2713 msi:
2714        	val = pci_msi_count(dev);
2715        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2716                	adapter->msix = 1;
2717                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2718 		return (val);
2719 	}
2720 	/* Should only happen due to manual configuration */
2721 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2722 	return (0);
2723 }
2724 
2725 
2726 /*********************************************************************
2727  *
2728  *  Initialize the hardware to a configuration
2729  *  as specified by the adapter structure.
2730  *
2731  **********************************************************************/
2732 static void
2733 em_reset(struct adapter *adapter)
2734 {
2735 	device_t	dev = adapter->dev;
2736 	struct ifnet	*ifp = adapter->ifp;
2737 	struct e1000_hw	*hw = &adapter->hw;
2738 	u16		rx_buffer_size;
2739 	u32		pba;
2740 
2741 	INIT_DEBUGOUT("em_reset: begin");
2742 
2743 	/* Set up smart power down as default off on newer adapters. */
2744 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2745 	    hw->mac.type == e1000_82572)) {
2746 		u16 phy_tmp = 0;
2747 
2748 		/* Speed up time to link by disabling smart power down. */
2749 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2750 		phy_tmp &= ~IGP02E1000_PM_SPD;
2751 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2752 	}
2753 
2754 	/*
2755 	 * Packet Buffer Allocation (PBA)
2756 	 * Writing PBA sets the receive portion of the buffer
2757 	 * the remainder is used for the transmit buffer.
2758 	 */
2759 	switch (hw->mac.type) {
2760 	/* Total Packet Buffer on these is 48K */
2761 	case e1000_82571:
2762 	case e1000_82572:
2763 	case e1000_80003es2lan:
2764 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2765 		break;
2766 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2767 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2768 		break;
2769 	case e1000_82574:
2770 	case e1000_82583:
2771 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2772 		break;
2773 	case e1000_ich8lan:
2774 		pba = E1000_PBA_8K;
2775 		break;
2776 	case e1000_ich9lan:
2777 	case e1000_ich10lan:
2778 		/* Boost Receive side for jumbo frames */
2779 		if (adapter->max_frame_size > 4096)
2780 			pba = E1000_PBA_14K;
2781 		else
2782 			pba = E1000_PBA_10K;
2783 		break;
2784 	case e1000_pchlan:
2785 	case e1000_pch2lan:
2786 		pba = E1000_PBA_26K;
2787 		break;
2788 	default:
2789 		if (adapter->max_frame_size > 8192)
2790 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2791 		else
2792 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2793 	}
2794 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2795 
2796 	/*
2797 	 * These parameters control the automatic generation (Tx) and
2798 	 * response (Rx) to Ethernet PAUSE frames.
2799 	 * - High water mark should allow for at least two frames to be
2800 	 *   received after sending an XOFF.
2801 	 * - Low water mark works best when it is very near the high water mark.
2802 	 *   This allows the receiver to restart by sending XON when it has
2803 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2804 	 *   restart after one full frame is pulled from the buffer. There
2805 	 *   could be several smaller frames in the buffer and if so they will
2806 	 *   not trigger the XON until their total number reduces the buffer
2807 	 *   by 1500.
2808 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2809 	 */
2810 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2811 	hw->fc.high_water = rx_buffer_size -
2812 	    roundup2(adapter->max_frame_size, 1024);
2813 	hw->fc.low_water = hw->fc.high_water - 1500;
2814 
2815 	if (adapter->fc) /* locally set flow control value? */
2816 		hw->fc.requested_mode = adapter->fc;
2817 	else
2818 		hw->fc.requested_mode = e1000_fc_full;
2819 
2820 	if (hw->mac.type == e1000_80003es2lan)
2821 		hw->fc.pause_time = 0xFFFF;
2822 	else
2823 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2824 
2825 	hw->fc.send_xon = TRUE;
2826 
2827 	/* Device specific overrides/settings */
2828 	switch (hw->mac.type) {
2829 	case e1000_pchlan:
2830 		/* Workaround: no TX flow ctrl for PCH */
2831                 hw->fc.requested_mode = e1000_fc_rx_pause;
2832 		hw->fc.pause_time = 0xFFFF; /* override */
2833 		if (ifp->if_mtu > ETHERMTU) {
2834 			hw->fc.high_water = 0x3500;
2835 			hw->fc.low_water = 0x1500;
2836 		} else {
2837 			hw->fc.high_water = 0x5000;
2838 			hw->fc.low_water = 0x3000;
2839 		}
2840 		hw->fc.refresh_time = 0x1000;
2841 		break;
2842 	case e1000_pch2lan:
2843 		hw->fc.high_water = 0x5C20;
2844 		hw->fc.low_water = 0x5048;
2845 		hw->fc.pause_time = 0x0650;
2846 		hw->fc.refresh_time = 0x0400;
2847 		/* Jumbos need adjusted PBA */
2848 		if (ifp->if_mtu > ETHERMTU)
2849 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2850 		else
2851 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2852 		break;
2853         case e1000_ich9lan:
2854         case e1000_ich10lan:
2855 		if (ifp->if_mtu > ETHERMTU) {
2856 			hw->fc.high_water = 0x2800;
2857 			hw->fc.low_water = hw->fc.high_water - 8;
2858 			break;
2859 		}
2860 		/* else fall thru */
2861 	default:
2862 		if (hw->mac.type == e1000_80003es2lan)
2863 			hw->fc.pause_time = 0xFFFF;
2864 		break;
2865 	}
2866 
2867 	/* Issue a global reset */
2868 	e1000_reset_hw(hw);
2869 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2870 	em_disable_aspm(adapter);
2871 	/* and a re-init */
2872 	if (e1000_init_hw(hw) < 0) {
2873 		device_printf(dev, "Hardware Initialization Failed\n");
2874 		return;
2875 	}
2876 
2877 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2878 	e1000_get_phy_info(hw);
2879 	e1000_check_for_link(hw);
2880 	return;
2881 }
2882 
2883 /*********************************************************************
2884  *
2885  *  Setup networking device structure and register an interface.
2886  *
2887  **********************************************************************/
2888 static int
2889 em_setup_interface(device_t dev, struct adapter *adapter)
2890 {
2891 	struct ifnet   *ifp;
2892 
2893 	INIT_DEBUGOUT("em_setup_interface: begin");
2894 
2895 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2896 	if (ifp == NULL) {
2897 		device_printf(dev, "can not allocate ifnet structure\n");
2898 		return (-1);
2899 	}
2900 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2901 	ifp->if_init =  em_init;
2902 	ifp->if_softc = adapter;
2903 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2904 	ifp->if_ioctl = em_ioctl;
2905 	ifp->if_start = em_start;
2906 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2907 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2908 	IFQ_SET_READY(&ifp->if_snd);
2909 
2910 	ether_ifattach(ifp, adapter->hw.mac.addr);
2911 
2912 	ifp->if_capabilities = ifp->if_capenable = 0;
2913 
2914 #ifdef EM_MULTIQUEUE
2915 	/* Multiqueue stack interface */
2916 	ifp->if_transmit = em_mq_start;
2917 	ifp->if_qflush = em_qflush;
2918 #endif
2919 
2920 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2921 	ifp->if_capabilities |= IFCAP_TSO4;
2922 	/*
2923 	 * Tell the upper layer(s) we
2924 	 * support full VLAN capability
2925 	 */
2926 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2927 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2928 			     |  IFCAP_VLAN_HWTSO
2929 			     |  IFCAP_VLAN_MTU;
2930 	ifp->if_capenable = ifp->if_capabilities;
2931 
2932 	/*
2933 	** Don't turn this on by default, if vlans are
2934 	** created on another pseudo device (eg. lagg)
2935 	** then vlan events are not passed thru, breaking
2936 	** operation, but with HW FILTER off it works. If
2937 	** using vlans directly on the em driver you can
2938 	** enable this and get full hardware tag filtering.
2939 	*/
2940 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2941 
2942 #ifdef DEVICE_POLLING
2943 	ifp->if_capabilities |= IFCAP_POLLING;
2944 #endif
2945 
2946 	/* Enable only WOL MAGIC by default */
2947 	if (adapter->wol) {
2948 		ifp->if_capabilities |= IFCAP_WOL;
2949 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2950 	}
2951 
2952 	/*
2953 	 * Specify the media types supported by this adapter and register
2954 	 * callbacks to update media and link information
2955 	 */
2956 	ifmedia_init(&adapter->media, IFM_IMASK,
2957 	    em_media_change, em_media_status);
2958 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2959 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2960 		u_char fiber_type = IFM_1000_SX;	/* default type */
2961 
2962 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2963 			    0, NULL);
2964 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2965 	} else {
2966 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2967 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2968 			    0, NULL);
2969 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2970 			    0, NULL);
2971 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2972 			    0, NULL);
2973 		if (adapter->hw.phy.type != e1000_phy_ife) {
2974 			ifmedia_add(&adapter->media,
2975 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2976 			ifmedia_add(&adapter->media,
2977 				IFM_ETHER | IFM_1000_T, 0, NULL);
2978 		}
2979 	}
2980 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2981 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2982 	return (0);
2983 }
2984 
2985 
2986 /*
2987  * Manage DMA'able memory.
2988  */
2989 static void
2990 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2991 {
2992 	if (error)
2993 		return;
2994 	*(bus_addr_t *) arg = segs[0].ds_addr;
2995 }
2996 
2997 static int
2998 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2999         struct em_dma_alloc *dma, int mapflags)
3000 {
3001 	int error;
3002 
3003 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3004 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3005 				BUS_SPACE_MAXADDR,	/* lowaddr */
3006 				BUS_SPACE_MAXADDR,	/* highaddr */
3007 				NULL, NULL,		/* filter, filterarg */
3008 				size,			/* maxsize */
3009 				1,			/* nsegments */
3010 				size,			/* maxsegsize */
3011 				0,			/* flags */
3012 				NULL,			/* lockfunc */
3013 				NULL,			/* lockarg */
3014 				&dma->dma_tag);
3015 	if (error) {
3016 		device_printf(adapter->dev,
3017 		    "%s: bus_dma_tag_create failed: %d\n",
3018 		    __func__, error);
3019 		goto fail_0;
3020 	}
3021 
3022 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3023 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3024 	if (error) {
3025 		device_printf(adapter->dev,
3026 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3027 		    __func__, (uintmax_t)size, error);
3028 		goto fail_2;
3029 	}
3030 
3031 	dma->dma_paddr = 0;
3032 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3033 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3034 	if (error || dma->dma_paddr == 0) {
3035 		device_printf(adapter->dev,
3036 		    "%s: bus_dmamap_load failed: %d\n",
3037 		    __func__, error);
3038 		goto fail_3;
3039 	}
3040 
3041 	return (0);
3042 
3043 fail_3:
3044 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3045 fail_2:
3046 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3047 	bus_dma_tag_destroy(dma->dma_tag);
3048 fail_0:
3049 	dma->dma_map = NULL;
3050 	dma->dma_tag = NULL;
3051 
3052 	return (error);
3053 }
3054 
3055 static void
3056 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3057 {
3058 	if (dma->dma_tag == NULL)
3059 		return;
3060 	if (dma->dma_map != NULL) {
3061 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3062 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3063 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3064 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3065 		dma->dma_map = NULL;
3066 	}
3067 	bus_dma_tag_destroy(dma->dma_tag);
3068 	dma->dma_tag = NULL;
3069 }
3070 
3071 
3072 /*********************************************************************
3073  *
3074  *  Allocate memory for the transmit and receive rings, and then
3075  *  the descriptors associated with each, called only once at attach.
3076  *
3077  **********************************************************************/
3078 static int
3079 em_allocate_queues(struct adapter *adapter)
3080 {
3081 	device_t		dev = adapter->dev;
3082 	struct tx_ring		*txr = NULL;
3083 	struct rx_ring		*rxr = NULL;
3084 	int rsize, tsize, error = E1000_SUCCESS;
3085 	int txconf = 0, rxconf = 0;
3086 
3087 
3088 	/* Allocate the TX ring struct memory */
3089 	if (!(adapter->tx_rings =
3090 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3091 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3092 		device_printf(dev, "Unable to allocate TX ring memory\n");
3093 		error = ENOMEM;
3094 		goto fail;
3095 	}
3096 
3097 	/* Now allocate the RX */
3098 	if (!(adapter->rx_rings =
3099 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3100 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3101 		device_printf(dev, "Unable to allocate RX ring memory\n");
3102 		error = ENOMEM;
3103 		goto rx_fail;
3104 	}
3105 
3106 	tsize = roundup2(adapter->num_tx_desc *
3107 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3108 	/*
3109 	 * Now set up the TX queues, txconf is needed to handle the
3110 	 * possibility that things fail midcourse and we need to
3111 	 * undo memory gracefully
3112 	 */
3113 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3114 		/* Set up some basics */
3115 		txr = &adapter->tx_rings[i];
3116 		txr->adapter = adapter;
3117 		txr->me = i;
3118 
3119 		/* Initialize the TX lock */
3120 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3121 		    device_get_nameunit(dev), txr->me);
3122 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3123 
3124 		if (em_dma_malloc(adapter, tsize,
3125 			&txr->txdma, BUS_DMA_NOWAIT)) {
3126 			device_printf(dev,
3127 			    "Unable to allocate TX Descriptor memory\n");
3128 			error = ENOMEM;
3129 			goto err_tx_desc;
3130 		}
3131 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3132 		bzero((void *)txr->tx_base, tsize);
3133 
3134         	if (em_allocate_transmit_buffers(txr)) {
3135 			device_printf(dev,
3136 			    "Critical Failure setting up transmit buffers\n");
3137 			error = ENOMEM;
3138 			goto err_tx_desc;
3139         	}
3140 #if __FreeBSD_version >= 800000
3141 		/* Allocate a buf ring */
3142 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3143 		    M_WAITOK, &txr->tx_mtx);
3144 #endif
3145 	}
3146 
3147 	/*
3148 	 * Next the RX queues...
3149 	 */
3150 	rsize = roundup2(adapter->num_rx_desc *
3151 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3152 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3153 		rxr = &adapter->rx_rings[i];
3154 		rxr->adapter = adapter;
3155 		rxr->me = i;
3156 
3157 		/* Initialize the RX lock */
3158 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3159 		    device_get_nameunit(dev), txr->me);
3160 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3161 
3162 		if (em_dma_malloc(adapter, rsize,
3163 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3164 			device_printf(dev,
3165 			    "Unable to allocate RxDescriptor memory\n");
3166 			error = ENOMEM;
3167 			goto err_rx_desc;
3168 		}
3169 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3170 		bzero((void *)rxr->rx_base, rsize);
3171 
3172         	/* Allocate receive buffers for the ring*/
3173 		if (em_allocate_receive_buffers(rxr)) {
3174 			device_printf(dev,
3175 			    "Critical Failure setting up receive buffers\n");
3176 			error = ENOMEM;
3177 			goto err_rx_desc;
3178 		}
3179 	}
3180 
3181 	return (0);
3182 
3183 err_rx_desc:
3184 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3185 		em_dma_free(adapter, &rxr->rxdma);
3186 err_tx_desc:
3187 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3188 		em_dma_free(adapter, &txr->txdma);
3189 	free(adapter->rx_rings, M_DEVBUF);
3190 rx_fail:
3191 #if __FreeBSD_version >= 800000
3192 	buf_ring_free(txr->br, M_DEVBUF);
3193 #endif
3194 	free(adapter->tx_rings, M_DEVBUF);
3195 fail:
3196 	return (error);
3197 }
3198 
3199 
3200 /*********************************************************************
3201  *
3202  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3203  *  the information needed to transmit a packet on the wire. This is
3204  *  called only once at attach, setup is done every reset.
3205  *
3206  **********************************************************************/
3207 static int
3208 em_allocate_transmit_buffers(struct tx_ring *txr)
3209 {
3210 	struct adapter *adapter = txr->adapter;
3211 	device_t dev = adapter->dev;
3212 	struct em_buffer *txbuf;
3213 	int error, i;
3214 
3215 	/*
3216 	 * Setup DMA descriptor areas.
3217 	 */
3218 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3219 			       1, 0,			/* alignment, bounds */
3220 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3221 			       BUS_SPACE_MAXADDR,	/* highaddr */
3222 			       NULL, NULL,		/* filter, filterarg */
3223 			       EM_TSO_SIZE,		/* maxsize */
3224 			       EM_MAX_SCATTER,		/* nsegments */
3225 			       PAGE_SIZE,		/* maxsegsize */
3226 			       0,			/* flags */
3227 			       NULL,			/* lockfunc */
3228 			       NULL,			/* lockfuncarg */
3229 			       &txr->txtag))) {
3230 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3231 		goto fail;
3232 	}
3233 
3234 	if (!(txr->tx_buffers =
3235 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3236 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3237 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3238 		error = ENOMEM;
3239 		goto fail;
3240 	}
3241 
3242         /* Create the descriptor buffer dma maps */
3243 	txbuf = txr->tx_buffers;
3244 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3245 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3246 		if (error != 0) {
3247 			device_printf(dev, "Unable to create TX DMA map\n");
3248 			goto fail;
3249 		}
3250 	}
3251 
3252 	return 0;
3253 fail:
3254 	/* We free all, it handles case where we are in the middle */
3255 	em_free_transmit_structures(adapter);
3256 	return (error);
3257 }
3258 
3259 /*********************************************************************
3260  *
3261  *  Initialize a transmit ring.
3262  *
3263  **********************************************************************/
3264 static void
3265 em_setup_transmit_ring(struct tx_ring *txr)
3266 {
3267 	struct adapter *adapter = txr->adapter;
3268 	struct em_buffer *txbuf;
3269 	int i;
3270 #ifdef DEV_NETMAP
3271 	struct netmap_adapter *na = NA(adapter->ifp);
3272 	struct netmap_slot *slot;
3273 #endif /* DEV_NETMAP */
3274 
3275 	/* Clear the old descriptor contents */
3276 	EM_TX_LOCK(txr);
3277 #ifdef DEV_NETMAP
3278 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3279 #endif /* DEV_NETMAP */
3280 
3281 	bzero((void *)txr->tx_base,
3282 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3283 	/* Reset indices */
3284 	txr->next_avail_desc = 0;
3285 	txr->next_to_clean = 0;
3286 
3287 	/* Free any existing tx buffers. */
3288         txbuf = txr->tx_buffers;
3289 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3290 		if (txbuf->m_head != NULL) {
3291 			bus_dmamap_sync(txr->txtag, txbuf->map,
3292 			    BUS_DMASYNC_POSTWRITE);
3293 			bus_dmamap_unload(txr->txtag, txbuf->map);
3294 			m_freem(txbuf->m_head);
3295 			txbuf->m_head = NULL;
3296 		}
3297 #ifdef DEV_NETMAP
3298 		if (slot) {
3299 			int si = i + na->tx_rings[txr->me].nkr_hwofs;
3300 			uint64_t paddr;
3301 			void *addr;
3302 
3303 			if (si >= na->num_tx_desc)
3304 				si -= na->num_tx_desc;
3305 			addr = PNMB(slot + si, &paddr);
3306 			txr->tx_base[i].buffer_addr = htole64(paddr);
3307 			/* reload the map for netmap mode */
3308 			netmap_load_map(txr->txtag, txbuf->map, addr);
3309 		}
3310 #endif /* DEV_NETMAP */
3311 
3312 		/* clear the watch index */
3313 		txbuf->next_eop = -1;
3314         }
3315 
3316 	/* Set number of descriptors available */
3317 	txr->tx_avail = adapter->num_tx_desc;
3318 	txr->queue_status = EM_QUEUE_IDLE;
3319 
3320 	/* Clear checksum offload context. */
3321 	txr->last_hw_offload = 0;
3322 	txr->last_hw_ipcss = 0;
3323 	txr->last_hw_ipcso = 0;
3324 	txr->last_hw_tucss = 0;
3325 	txr->last_hw_tucso = 0;
3326 
3327 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3328 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3329 	EM_TX_UNLOCK(txr);
3330 }
3331 
3332 /*********************************************************************
3333  *
3334  *  Initialize all transmit rings.
3335  *
3336  **********************************************************************/
3337 static void
3338 em_setup_transmit_structures(struct adapter *adapter)
3339 {
3340 	struct tx_ring *txr = adapter->tx_rings;
3341 
3342 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3343 		em_setup_transmit_ring(txr);
3344 
3345 	return;
3346 }
3347 
3348 /*********************************************************************
3349  *
3350  *  Enable transmit unit.
3351  *
3352  **********************************************************************/
3353 static void
3354 em_initialize_transmit_unit(struct adapter *adapter)
3355 {
3356 	struct tx_ring	*txr = adapter->tx_rings;
3357 	struct e1000_hw	*hw = &adapter->hw;
3358 	u32	tctl, tarc, tipg = 0;
3359 
3360 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3361 
3362 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3363 		u64 bus_addr = txr->txdma.dma_paddr;
3364 		/* Base and Len of TX Ring */
3365 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3366 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3367 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3368 	    	    (u32)(bus_addr >> 32));
3369 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3370 	    	    (u32)bus_addr);
3371 		/* Init the HEAD/TAIL indices */
3372 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3373 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3374 
3375 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3376 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3377 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3378 
3379 		txr->queue_status = EM_QUEUE_IDLE;
3380 	}
3381 
3382 	/* Set the default values for the Tx Inter Packet Gap timer */
3383 	switch (adapter->hw.mac.type) {
3384 	case e1000_80003es2lan:
3385 		tipg = DEFAULT_82543_TIPG_IPGR1;
3386 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3387 		    E1000_TIPG_IPGR2_SHIFT;
3388 		break;
3389 	default:
3390 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3391 		    (adapter->hw.phy.media_type ==
3392 		    e1000_media_type_internal_serdes))
3393 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3394 		else
3395 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3396 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3397 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3398 	}
3399 
3400 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3401 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3402 
3403 	if(adapter->hw.mac.type >= e1000_82540)
3404 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3405 		    adapter->tx_abs_int_delay.value);
3406 
3407 	if ((adapter->hw.mac.type == e1000_82571) ||
3408 	    (adapter->hw.mac.type == e1000_82572)) {
3409 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3410 		tarc |= SPEED_MODE_BIT;
3411 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3412 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3413 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3414 		tarc |= 1;
3415 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3416 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3417 		tarc |= 1;
3418 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3419 	}
3420 
3421 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3422 	if (adapter->tx_int_delay.value > 0)
3423 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3424 
3425 	/* Program the Transmit Control Register */
3426 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3427 	tctl &= ~E1000_TCTL_CT;
3428 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3429 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3430 
3431 	if (adapter->hw.mac.type >= e1000_82571)
3432 		tctl |= E1000_TCTL_MULR;
3433 
3434 	/* This write will effectively turn on the transmit unit. */
3435 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3436 
3437 }
3438 
3439 
3440 /*********************************************************************
3441  *
3442  *  Free all transmit rings.
3443  *
3444  **********************************************************************/
3445 static void
3446 em_free_transmit_structures(struct adapter *adapter)
3447 {
3448 	struct tx_ring *txr = adapter->tx_rings;
3449 
3450 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3451 		EM_TX_LOCK(txr);
3452 		em_free_transmit_buffers(txr);
3453 		em_dma_free(adapter, &txr->txdma);
3454 		EM_TX_UNLOCK(txr);
3455 		EM_TX_LOCK_DESTROY(txr);
3456 	}
3457 
3458 	free(adapter->tx_rings, M_DEVBUF);
3459 }
3460 
3461 /*********************************************************************
3462  *
3463  *  Free transmit ring related data structures.
3464  *
3465  **********************************************************************/
3466 static void
3467 em_free_transmit_buffers(struct tx_ring *txr)
3468 {
3469 	struct adapter		*adapter = txr->adapter;
3470 	struct em_buffer	*txbuf;
3471 
3472 	INIT_DEBUGOUT("free_transmit_ring: begin");
3473 
3474 	if (txr->tx_buffers == NULL)
3475 		return;
3476 
3477 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3478 		txbuf = &txr->tx_buffers[i];
3479 		if (txbuf->m_head != NULL) {
3480 			bus_dmamap_sync(txr->txtag, txbuf->map,
3481 			    BUS_DMASYNC_POSTWRITE);
3482 			bus_dmamap_unload(txr->txtag,
3483 			    txbuf->map);
3484 			m_freem(txbuf->m_head);
3485 			txbuf->m_head = NULL;
3486 			if (txbuf->map != NULL) {
3487 				bus_dmamap_destroy(txr->txtag,
3488 				    txbuf->map);
3489 				txbuf->map = NULL;
3490 			}
3491 		} else if (txbuf->map != NULL) {
3492 			bus_dmamap_unload(txr->txtag,
3493 			    txbuf->map);
3494 			bus_dmamap_destroy(txr->txtag,
3495 			    txbuf->map);
3496 			txbuf->map = NULL;
3497 		}
3498 	}
3499 #if __FreeBSD_version >= 800000
3500 	if (txr->br != NULL)
3501 		buf_ring_free(txr->br, M_DEVBUF);
3502 #endif
3503 	if (txr->tx_buffers != NULL) {
3504 		free(txr->tx_buffers, M_DEVBUF);
3505 		txr->tx_buffers = NULL;
3506 	}
3507 	if (txr->txtag != NULL) {
3508 		bus_dma_tag_destroy(txr->txtag);
3509 		txr->txtag = NULL;
3510 	}
3511 	return;
3512 }
3513 
3514 
3515 /*********************************************************************
3516  *  The offload context is protocol specific (TCP/UDP) and thus
3517  *  only needs to be set when the protocol changes. The occasion
3518  *  of a context change can be a performance detriment, and
3519  *  might be better just disabled. The reason arises in the way
3520  *  in which the controller supports pipelined requests from the
3521  *  Tx data DMA. Up to four requests can be pipelined, and they may
3522  *  belong to the same packet or to multiple packets. However all
3523  *  requests for one packet are issued before a request is issued
3524  *  for a subsequent packet and if a request for the next packet
3525  *  requires a context change, that request will be stalled
3526  *  until the previous request completes. This means setting up
3527  *  a new context effectively disables pipelined Tx data DMA which
3528  *  in turn greatly slow down performance to send small sized
3529  *  frames.
3530  **********************************************************************/
3531 static void
3532 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3533     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3534 {
3535 	struct adapter			*adapter = txr->adapter;
3536 	struct e1000_context_desc	*TXD = NULL;
3537 	struct em_buffer		*tx_buffer;
3538 	int				cur, hdr_len;
3539 	u32				cmd = 0;
3540 	u16				offload = 0;
3541 	u8				ipcso, ipcss, tucso, tucss;
3542 
3543 	ipcss = ipcso = tucss = tucso = 0;
3544 	hdr_len = ip_off + (ip->ip_hl << 2);
3545 	cur = txr->next_avail_desc;
3546 
3547 	/* Setup of IP header checksum. */
3548 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3549 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3550 		offload |= CSUM_IP;
3551 		ipcss = ip_off;
3552 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3553 		/*
3554 		 * Start offset for header checksum calculation.
3555 		 * End offset for header checksum calculation.
3556 		 * Offset of place to put the checksum.
3557 		 */
3558 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3559 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3560 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3561 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3562 		cmd |= E1000_TXD_CMD_IP;
3563 	}
3564 
3565 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3566  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3567  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3568  		offload |= CSUM_TCP;
3569  		tucss = hdr_len;
3570  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3571  		/*
3572  		 * Setting up new checksum offload context for every frames
3573  		 * takes a lot of processing time for hardware. This also
3574  		 * reduces performance a lot for small sized frames so avoid
3575  		 * it if driver can use previously configured checksum
3576  		 * offload context.
3577  		 */
3578  		if (txr->last_hw_offload == offload) {
3579  			if (offload & CSUM_IP) {
3580  				if (txr->last_hw_ipcss == ipcss &&
3581  				    txr->last_hw_ipcso == ipcso &&
3582  				    txr->last_hw_tucss == tucss &&
3583  				    txr->last_hw_tucso == tucso)
3584  					return;
3585  			} else {
3586  				if (txr->last_hw_tucss == tucss &&
3587  				    txr->last_hw_tucso == tucso)
3588  					return;
3589  			}
3590   		}
3591  		txr->last_hw_offload = offload;
3592  		txr->last_hw_tucss = tucss;
3593  		txr->last_hw_tucso = tucso;
3594  		/*
3595  		 * Start offset for payload checksum calculation.
3596  		 * End offset for payload checksum calculation.
3597  		 * Offset of place to put the checksum.
3598  		 */
3599 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3600  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3601  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3602  		TXD->upper_setup.tcp_fields.tucso = tucso;
3603  		cmd |= E1000_TXD_CMD_TCP;
3604  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3605  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3606  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3607  		tucss = hdr_len;
3608  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3609  		/*
3610  		 * Setting up new checksum offload context for every frames
3611  		 * takes a lot of processing time for hardware. This also
3612  		 * reduces performance a lot for small sized frames so avoid
3613  		 * it if driver can use previously configured checksum
3614  		 * offload context.
3615  		 */
3616  		if (txr->last_hw_offload == offload) {
3617  			if (offload & CSUM_IP) {
3618  				if (txr->last_hw_ipcss == ipcss &&
3619  				    txr->last_hw_ipcso == ipcso &&
3620  				    txr->last_hw_tucss == tucss &&
3621  				    txr->last_hw_tucso == tucso)
3622  					return;
3623  			} else {
3624  				if (txr->last_hw_tucss == tucss &&
3625  				    txr->last_hw_tucso == tucso)
3626  					return;
3627  			}
3628  		}
3629  		txr->last_hw_offload = offload;
3630  		txr->last_hw_tucss = tucss;
3631  		txr->last_hw_tucso = tucso;
3632  		/*
3633  		 * Start offset for header checksum calculation.
3634  		 * End offset for header checksum calculation.
3635  		 * Offset of place to put the checksum.
3636  		 */
3637 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3638  		TXD->upper_setup.tcp_fields.tucss = tucss;
3639  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3640  		TXD->upper_setup.tcp_fields.tucso = tucso;
3641   	}
3642 
3643  	if (offload & CSUM_IP) {
3644  		txr->last_hw_ipcss = ipcss;
3645  		txr->last_hw_ipcso = ipcso;
3646   	}
3647 
3648 	TXD->tcp_seg_setup.data = htole32(0);
3649 	TXD->cmd_and_length =
3650 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3651 	tx_buffer = &txr->tx_buffers[cur];
3652 	tx_buffer->m_head = NULL;
3653 	tx_buffer->next_eop = -1;
3654 
3655 	if (++cur == adapter->num_tx_desc)
3656 		cur = 0;
3657 
3658 	txr->tx_avail--;
3659 	txr->next_avail_desc = cur;
3660 }
3661 
3662 
3663 /**********************************************************************
3664  *
3665  *  Setup work for hardware segmentation offload (TSO)
3666  *
3667  **********************************************************************/
3668 static void
3669 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3670     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3671 {
3672 	struct adapter			*adapter = txr->adapter;
3673 	struct e1000_context_desc	*TXD;
3674 	struct em_buffer		*tx_buffer;
3675 	int cur, hdr_len;
3676 
3677 	/*
3678 	 * In theory we can use the same TSO context if and only if
3679 	 * frame is the same type(IP/TCP) and the same MSS. However
3680 	 * checking whether a frame has the same IP/TCP structure is
3681 	 * hard thing so just ignore that and always restablish a
3682 	 * new TSO context.
3683 	 */
3684 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3685 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3686 		      E1000_TXD_DTYP_D |	/* Data descr type */
3687 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3688 
3689 	/* IP and/or TCP header checksum calculation and insertion. */
3690 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3691 
3692 	cur = txr->next_avail_desc;
3693 	tx_buffer = &txr->tx_buffers[cur];
3694 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3695 
3696 	/*
3697 	 * Start offset for header checksum calculation.
3698 	 * End offset for header checksum calculation.
3699 	 * Offset of place put the checksum.
3700 	 */
3701 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3702 	TXD->lower_setup.ip_fields.ipcse =
3703 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3704 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3705 	/*
3706 	 * Start offset for payload checksum calculation.
3707 	 * End offset for payload checksum calculation.
3708 	 * Offset of place to put the checksum.
3709 	 */
3710 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3711 	TXD->upper_setup.tcp_fields.tucse = 0;
3712 	TXD->upper_setup.tcp_fields.tucso =
3713 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3714 	/*
3715 	 * Payload size per packet w/o any headers.
3716 	 * Length of all headers up to payload.
3717 	 */
3718 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3719 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3720 
3721 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3722 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3723 				E1000_TXD_CMD_TSE |	/* TSE context */
3724 				E1000_TXD_CMD_IP |	/* Do IP csum */
3725 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3726 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3727 
3728 	tx_buffer->m_head = NULL;
3729 	tx_buffer->next_eop = -1;
3730 
3731 	if (++cur == adapter->num_tx_desc)
3732 		cur = 0;
3733 
3734 	txr->tx_avail--;
3735 	txr->next_avail_desc = cur;
3736 	txr->tx_tso = TRUE;
3737 }
3738 
3739 
3740 /**********************************************************************
3741  *
3742  *  Examine each tx_buffer in the used queue. If the hardware is done
3743  *  processing the packet then free associated resources. The
3744  *  tx_buffer is put back on the free queue.
3745  *
3746  **********************************************************************/
3747 static bool
3748 em_txeof(struct tx_ring *txr)
3749 {
3750 	struct adapter	*adapter = txr->adapter;
3751         int first, last, done, processed;
3752         struct em_buffer *tx_buffer;
3753         struct e1000_tx_desc   *tx_desc, *eop_desc;
3754 	struct ifnet   *ifp = adapter->ifp;
3755 
3756 	EM_TX_LOCK_ASSERT(txr);
3757 #ifdef DEV_NETMAP
3758 	if (ifp->if_capenable & IFCAP_NETMAP) {
3759 		struct netmap_adapter *na = NA(ifp);
3760 
3761 		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3762 		EM_TX_UNLOCK(txr);
3763 		EM_CORE_LOCK(adapter);
3764 		selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
3765 		EM_CORE_UNLOCK(adapter);
3766 		EM_TX_LOCK(txr);
3767 		return (FALSE);
3768 	}
3769 #endif /* DEV_NETMAP */
3770 
3771 	/* No work, make sure watchdog is off */
3772         if (txr->tx_avail == adapter->num_tx_desc) {
3773 		txr->queue_status = EM_QUEUE_IDLE;
3774                 return (FALSE);
3775 	}
3776 
3777 	processed = 0;
3778         first = txr->next_to_clean;
3779         tx_desc = &txr->tx_base[first];
3780         tx_buffer = &txr->tx_buffers[first];
3781 	last = tx_buffer->next_eop;
3782         eop_desc = &txr->tx_base[last];
3783 
3784 	/*
3785 	 * What this does is get the index of the
3786 	 * first descriptor AFTER the EOP of the
3787 	 * first packet, that way we can do the
3788 	 * simple comparison on the inner while loop.
3789 	 */
3790 	if (++last == adapter->num_tx_desc)
3791  		last = 0;
3792 	done = last;
3793 
3794         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3795             BUS_DMASYNC_POSTREAD);
3796 
3797         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3798 		/* We clean the range of the packet */
3799 		while (first != done) {
3800                 	tx_desc->upper.data = 0;
3801                 	tx_desc->lower.data = 0;
3802                 	tx_desc->buffer_addr = 0;
3803                 	++txr->tx_avail;
3804 			++processed;
3805 
3806 			if (tx_buffer->m_head) {
3807 				bus_dmamap_sync(txr->txtag,
3808 				    tx_buffer->map,
3809 				    BUS_DMASYNC_POSTWRITE);
3810 				bus_dmamap_unload(txr->txtag,
3811 				    tx_buffer->map);
3812                         	m_freem(tx_buffer->m_head);
3813                         	tx_buffer->m_head = NULL;
3814                 	}
3815 			tx_buffer->next_eop = -1;
3816 			txr->watchdog_time = ticks;
3817 
3818 	                if (++first == adapter->num_tx_desc)
3819 				first = 0;
3820 
3821 	                tx_buffer = &txr->tx_buffers[first];
3822 			tx_desc = &txr->tx_base[first];
3823 		}
3824 		++ifp->if_opackets;
3825 		/* See if we can continue to the next packet */
3826 		last = tx_buffer->next_eop;
3827 		if (last != -1) {
3828         		eop_desc = &txr->tx_base[last];
3829 			/* Get new done point */
3830 			if (++last == adapter->num_tx_desc) last = 0;
3831 			done = last;
3832 		} else
3833 			break;
3834         }
3835         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3836             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3837 
3838         txr->next_to_clean = first;
3839 
3840 	/*
3841 	** Watchdog calculation, we know there's
3842 	** work outstanding or the first return
3843 	** would have been taken, so none processed
3844 	** for too long indicates a hang. local timer
3845 	** will examine this and do a reset if needed.
3846 	*/
3847 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3848 		txr->queue_status = EM_QUEUE_HUNG;
3849 
3850         /*
3851          * If we have a minimum free, clear IFF_DRV_OACTIVE
3852          * to tell the stack that it is OK to send packets.
3853 	 * Notice that all writes of OACTIVE happen under the
3854 	 * TX lock which, with a single queue, guarantees
3855 	 * sanity.
3856          */
3857         if (txr->tx_avail >= EM_MAX_SCATTER)
3858 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3859 
3860 	/* Disable watchdog if all clean */
3861 	if (txr->tx_avail == adapter->num_tx_desc) {
3862 		txr->queue_status = EM_QUEUE_IDLE;
3863 		return (FALSE);
3864 	}
3865 
3866 	return (TRUE);
3867 }
3868 
3869 
3870 /*********************************************************************
3871  *
3872  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3873  *
3874  **********************************************************************/
3875 static void
3876 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3877 {
3878 	struct adapter		*adapter = rxr->adapter;
3879 	struct mbuf		*m;
3880 	bus_dma_segment_t	segs[1];
3881 	struct em_buffer	*rxbuf;
3882 	int			i, j, error, nsegs;
3883 	bool			cleaned = FALSE;
3884 
3885 	i = j = rxr->next_to_refresh;
3886 	/*
3887 	** Get one descriptor beyond
3888 	** our work mark to control
3889 	** the loop.
3890 	*/
3891 	if (++j == adapter->num_rx_desc)
3892 		j = 0;
3893 
3894 	while (j != limit) {
3895 		rxbuf = &rxr->rx_buffers[i];
3896 		if (rxbuf->m_head == NULL) {
3897 			m = m_getjcl(M_DONTWAIT, MT_DATA,
3898 			    M_PKTHDR, adapter->rx_mbuf_sz);
3899 			/*
3900 			** If we have a temporary resource shortage
3901 			** that causes a failure, just abort refresh
3902 			** for now, we will return to this point when
3903 			** reinvoked from em_rxeof.
3904 			*/
3905 			if (m == NULL)
3906 				goto update;
3907 		} else
3908 			m = rxbuf->m_head;
3909 
3910 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3911 		m->m_flags |= M_PKTHDR;
3912 		m->m_data = m->m_ext.ext_buf;
3913 
3914 		/* Use bus_dma machinery to setup the memory mapping  */
3915 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3916 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3917 		if (error != 0) {
3918 			printf("Refresh mbufs: hdr dmamap load"
3919 			    " failure - %d\n", error);
3920 			m_free(m);
3921 			rxbuf->m_head = NULL;
3922 			goto update;
3923 		}
3924 		rxbuf->m_head = m;
3925 		bus_dmamap_sync(rxr->rxtag,
3926 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3927 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3928 		cleaned = TRUE;
3929 
3930 		i = j; /* Next is precalulated for us */
3931 		rxr->next_to_refresh = i;
3932 		/* Calculate next controlling index */
3933 		if (++j == adapter->num_rx_desc)
3934 			j = 0;
3935 	}
3936 update:
3937 	/*
3938 	** Update the tail pointer only if,
3939 	** and as far as we have refreshed.
3940 	*/
3941 	if (cleaned)
3942 		E1000_WRITE_REG(&adapter->hw,
3943 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3944 
3945 	return;
3946 }
3947 
3948 
3949 /*********************************************************************
3950  *
3951  *  Allocate memory for rx_buffer structures. Since we use one
3952  *  rx_buffer per received packet, the maximum number of rx_buffer's
3953  *  that we'll need is equal to the number of receive descriptors
3954  *  that we've allocated.
3955  *
3956  **********************************************************************/
3957 static int
3958 em_allocate_receive_buffers(struct rx_ring *rxr)
3959 {
3960 	struct adapter		*adapter = rxr->adapter;
3961 	device_t		dev = adapter->dev;
3962 	struct em_buffer	*rxbuf;
3963 	int			error;
3964 
3965 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3966 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3967 	if (rxr->rx_buffers == NULL) {
3968 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3969 		return (ENOMEM);
3970 	}
3971 
3972 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3973 				1, 0,			/* alignment, bounds */
3974 				BUS_SPACE_MAXADDR,	/* lowaddr */
3975 				BUS_SPACE_MAXADDR,	/* highaddr */
3976 				NULL, NULL,		/* filter, filterarg */
3977 				MJUM9BYTES,		/* maxsize */
3978 				1,			/* nsegments */
3979 				MJUM9BYTES,		/* maxsegsize */
3980 				0,			/* flags */
3981 				NULL,			/* lockfunc */
3982 				NULL,			/* lockarg */
3983 				&rxr->rxtag);
3984 	if (error) {
3985 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3986 		    __func__, error);
3987 		goto fail;
3988 	}
3989 
3990 	rxbuf = rxr->rx_buffers;
3991 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3992 		rxbuf = &rxr->rx_buffers[i];
3993 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3994 		    &rxbuf->map);
3995 		if (error) {
3996 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3997 			    __func__, error);
3998 			goto fail;
3999 		}
4000 	}
4001 
4002 	return (0);
4003 
4004 fail:
4005 	em_free_receive_structures(adapter);
4006 	return (error);
4007 }
4008 
4009 
4010 /*********************************************************************
4011  *
4012  *  Initialize a receive ring and its buffers.
4013  *
4014  **********************************************************************/
4015 static int
4016 em_setup_receive_ring(struct rx_ring *rxr)
4017 {
4018 	struct	adapter 	*adapter = rxr->adapter;
4019 	struct em_buffer	*rxbuf;
4020 	bus_dma_segment_t	seg[1];
4021 	int			rsize, nsegs, error;
4022 
4023 
4024 	/* Clear the ring contents */
4025 	EM_RX_LOCK(rxr);
4026 	rsize = roundup2(adapter->num_rx_desc *
4027 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4028 	bzero((void *)rxr->rx_base, rsize);
4029 
4030 	/*
4031 	** Free current RX buffer structs and their mbufs
4032 	*/
4033 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4034 		rxbuf = &rxr->rx_buffers[i];
4035 		if (rxbuf->m_head != NULL) {
4036 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4037 			    BUS_DMASYNC_POSTREAD);
4038 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4039 			m_freem(rxbuf->m_head);
4040 		}
4041 	}
4042 
4043 	/* Now replenish the mbufs */
4044         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4045 		rxbuf = &rxr->rx_buffers[j];
4046 		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4047 		    M_PKTHDR, adapter->rx_mbuf_sz);
4048 		if (rxbuf->m_head == NULL) {
4049 			error = ENOBUFS;
4050 			goto fail;
4051 		}
4052 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4053 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4054 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4055 
4056 		/* Get the memory mapping */
4057 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4058 		    rxbuf->map, rxbuf->m_head, seg,
4059 		    &nsegs, BUS_DMA_NOWAIT);
4060 		if (error != 0) {
4061 			m_freem(rxbuf->m_head);
4062 			rxbuf->m_head = NULL;
4063 			goto fail;
4064 		}
4065 		bus_dmamap_sync(rxr->rxtag,
4066 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4067 
4068 		/* Update descriptor */
4069 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4070 	}
4071 	rxr->next_to_check = 0;
4072 	rxr->next_to_refresh = 0;
4073 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4074 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4075 
4076 #ifdef DEV_NETMAP
4077     {
4078 	/*
4079 	 * This driver is slightly different from the standard:
4080 	 * it refills the rings in blocks of 8, so the while()
4081 	 * above completes any leftover work. Also, after if_init()
4082 	 * the ring starts at rxr->next_to_check instead of 0.
4083 	 *
4084 	 * Currently: we leave the mbufs allocated even in netmap
4085 	 * mode, and simply make the NIC ring point to the
4086 	 * correct buffer (netmap_buf or mbuf) depending on
4087 	 * the mode. To avoid mbuf leaks, when in netmap mode we
4088 	 * must make sure that next_to_refresh == next_to_check - 1
4089 	 * so that the above while() loop is never run on init.
4090 	 *
4091 	 * A better way would be to free the mbufs when entering
4092 	 * netmap mode, and set next_to_refresh/check in
4093 	 * a way that the mbufs are completely reallocated
4094 	 * when going back to standard mode.
4095 	 */
4096 	struct netmap_adapter *na = NA(adapter->ifp);
4097 	struct netmap_slot *slot = netmap_reset(na,
4098 		NR_RX, rxr->me, rxr->next_to_check);
4099 	int sj = slot ? na->rx_rings[rxr->me].nkr_hwofs : 0;
4100 
4101 	/* slot sj corresponds to entry j in the NIC ring */
4102 	if (sj < 0)
4103 		sj += adapter->num_rx_desc;
4104 
4105 	for (int j = 0; j != adapter->num_rx_desc; j++, sj++) {
4106 		rxbuf = &rxr->rx_buffers[j];
4107 		/* no mbuf and regular mode -> skip this entry */
4108 		if (rxbuf->m_head == NULL && !slot)
4109 			continue;
4110 		/* Handle wrap. Cannot use "na" here, could be NULL */
4111 		if (sj >= adapter->num_rx_desc)
4112 			sj -= adapter->num_rx_desc;
4113 		/* see comment, set slot addr and map */
4114 		if (slot) {
4115 			uint64_t paddr;
4116 			void *addr = PNMB(slot + sj, &paddr);
4117 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4118 			/* Update descriptor */
4119 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4120 		} else {
4121 			/* Get the memory mapping */
4122 			bus_dmamap_load_mbuf_sg(rxr->rxtag,
4123 			    rxbuf->map, rxbuf->m_head, seg,
4124 			    &nsegs, BUS_DMA_NOWAIT);
4125 			/* Update descriptor */
4126 			rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4127 		}
4128 		bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD);
4129 	}
4130     }
4131 #endif /* DEV_NETMAP */
4132 
4133 fail:
4134 	EM_RX_UNLOCK(rxr);
4135 	return (error);
4136 }
4137 
4138 /*********************************************************************
4139  *
4140  *  Initialize all receive rings.
4141  *
4142  **********************************************************************/
4143 static int
4144 em_setup_receive_structures(struct adapter *adapter)
4145 {
4146 	struct rx_ring *rxr = adapter->rx_rings;
4147 	int q;
4148 
4149 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4150 		if (em_setup_receive_ring(rxr))
4151 			goto fail;
4152 
4153 	return (0);
4154 fail:
4155 	/*
4156 	 * Free RX buffers allocated so far, we will only handle
4157 	 * the rings that completed, the failing case will have
4158 	 * cleaned up for itself. 'q' failed, so its the terminus.
4159 	 */
4160 	for (int i = 0; i < q; ++i) {
4161 		rxr = &adapter->rx_rings[i];
4162 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4163 			struct em_buffer *rxbuf;
4164 			rxbuf = &rxr->rx_buffers[n];
4165 			if (rxbuf->m_head != NULL) {
4166 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4167 			  	  BUS_DMASYNC_POSTREAD);
4168 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4169 				m_freem(rxbuf->m_head);
4170 				rxbuf->m_head = NULL;
4171 			}
4172 		}
4173 		rxr->next_to_check = 0;
4174 		rxr->next_to_refresh = 0;
4175 	}
4176 
4177 	return (ENOBUFS);
4178 }
4179 
4180 /*********************************************************************
4181  *
4182  *  Free all receive rings.
4183  *
4184  **********************************************************************/
4185 static void
4186 em_free_receive_structures(struct adapter *adapter)
4187 {
4188 	struct rx_ring *rxr = adapter->rx_rings;
4189 
4190 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4191 		em_free_receive_buffers(rxr);
4192 		/* Free the ring memory as well */
4193 		em_dma_free(adapter, &rxr->rxdma);
4194 		EM_RX_LOCK_DESTROY(rxr);
4195 	}
4196 
4197 	free(adapter->rx_rings, M_DEVBUF);
4198 }
4199 
4200 
4201 /*********************************************************************
4202  *
4203  *  Free receive ring data structures
4204  *
4205  **********************************************************************/
4206 static void
4207 em_free_receive_buffers(struct rx_ring *rxr)
4208 {
4209 	struct adapter		*adapter = rxr->adapter;
4210 	struct em_buffer	*rxbuf = NULL;
4211 
4212 	INIT_DEBUGOUT("free_receive_buffers: begin");
4213 
4214 	if (rxr->rx_buffers != NULL) {
4215 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4216 			rxbuf = &rxr->rx_buffers[i];
4217 			if (rxbuf->map != NULL) {
4218 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4219 				    BUS_DMASYNC_POSTREAD);
4220 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4221 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4222 			}
4223 			if (rxbuf->m_head != NULL) {
4224 				m_freem(rxbuf->m_head);
4225 				rxbuf->m_head = NULL;
4226 			}
4227 		}
4228 		free(rxr->rx_buffers, M_DEVBUF);
4229 		rxr->rx_buffers = NULL;
4230 		rxr->next_to_check = 0;
4231 		rxr->next_to_refresh = 0;
4232 	}
4233 
4234 	if (rxr->rxtag != NULL) {
4235 		bus_dma_tag_destroy(rxr->rxtag);
4236 		rxr->rxtag = NULL;
4237 	}
4238 
4239 	return;
4240 }
4241 
4242 
4243 /*********************************************************************
4244  *
4245  *  Enable receive unit.
4246  *
4247  **********************************************************************/
4248 #define MAX_INTS_PER_SEC	8000
4249 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4250 
4251 static void
4252 em_initialize_receive_unit(struct adapter *adapter)
4253 {
4254 	struct rx_ring	*rxr = adapter->rx_rings;
4255 	struct ifnet	*ifp = adapter->ifp;
4256 	struct e1000_hw	*hw = &adapter->hw;
4257 	u64	bus_addr;
4258 	u32	rctl, rxcsum;
4259 
4260 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4261 
4262 	/*
4263 	 * Make sure receives are disabled while setting
4264 	 * up the descriptor ring
4265 	 */
4266 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4267 	/* Do not disable if ever enabled on this hardware */
4268 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4269 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4270 
4271 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4272 	    adapter->rx_abs_int_delay.value);
4273 	/*
4274 	 * Set the interrupt throttling rate. Value is calculated
4275 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4276 	 */
4277 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4278 
4279 	/*
4280 	** When using MSIX interrupts we need to throttle
4281 	** using the EITR register (82574 only)
4282 	*/
4283 	if (hw->mac.type == e1000_82574) {
4284 		for (int i = 0; i < 4; i++)
4285 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4286 			    DEFAULT_ITR);
4287 		/* Disable accelerated acknowledge */
4288 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4289 	}
4290 
4291 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4292 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4293 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4294 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4295 	}
4296 
4297 	/*
4298 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4299 	** long latencies are observed, like Lenovo X60. This
4300 	** change eliminates the problem, but since having positive
4301 	** values in RDTR is a known source of problems on other
4302 	** platforms another solution is being sought.
4303 	*/
4304 	if (hw->mac.type == e1000_82573)
4305 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4306 
4307 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4308 		/* Setup the Base and Length of the Rx Descriptor Ring */
4309 		bus_addr = rxr->rxdma.dma_paddr;
4310 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4311 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4312 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4313 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4314 		/* Setup the Head and Tail Descriptor Pointers */
4315 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4316 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4317 #ifdef DEV_NETMAP
4318 		/*
4319 		 * an init() while a netmap client is active must
4320 		 * preserve the rx buffers passed to userspace.
4321 		 * In this driver it means we adjust RDT to
4322 		 * something different from next_to_refresh.
4323 		 */
4324 		if (ifp->if_capenable & IFCAP_NETMAP) {
4325 			struct netmap_adapter *na = NA(adapter->ifp);
4326 			struct netmap_kring *kring = &na->rx_rings[i];
4327 			int t = rxr->next_to_refresh - kring->nr_hwavail;
4328 
4329 			if (t < 0)
4330 				t += na->num_rx_desc;
4331 			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4332 		} else
4333 #endif /* DEV_NETMAP */
4334 		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4335 	}
4336 
4337 	/* Set PTHRESH for improved jumbo performance */
4338 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4339 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4340 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4341 	    (ifp->if_mtu > ETHERMTU)) {
4342 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4343 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4344 	}
4345 
4346 	if (adapter->hw.mac.type == e1000_pch2lan) {
4347 		if (ifp->if_mtu > ETHERMTU)
4348 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4349 		else
4350 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4351 	}
4352 
4353 	/* Setup the Receive Control Register */
4354 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4355 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4356 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4357 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4358 
4359         /* Strip the CRC */
4360         rctl |= E1000_RCTL_SECRC;
4361 
4362         /* Make sure VLAN Filters are off */
4363         rctl &= ~E1000_RCTL_VFE;
4364 	rctl &= ~E1000_RCTL_SBP;
4365 
4366 	if (adapter->rx_mbuf_sz == MCLBYTES)
4367 		rctl |= E1000_RCTL_SZ_2048;
4368 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4369 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4370 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4371 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4372 
4373 	if (ifp->if_mtu > ETHERMTU)
4374 		rctl |= E1000_RCTL_LPE;
4375 	else
4376 		rctl &= ~E1000_RCTL_LPE;
4377 
4378 	/* Write out the settings */
4379 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4380 
4381 	return;
4382 }
4383 
4384 
4385 /*********************************************************************
4386  *
4387  *  This routine executes in interrupt context. It replenishes
4388  *  the mbufs in the descriptor and sends data which has been
4389  *  dma'ed into host memory to upper layer.
4390  *
4391  *  We loop at most count times if count is > 0, or until done if
4392  *  count < 0.
4393  *
4394  *  For polling we also now return the number of cleaned packets
4395  *********************************************************************/
4396 static bool
4397 em_rxeof(struct rx_ring *rxr, int count, int *done)
4398 {
4399 	struct adapter		*adapter = rxr->adapter;
4400 	struct ifnet		*ifp = adapter->ifp;
4401 	struct mbuf		*mp, *sendmp;
4402 	u8			status = 0;
4403 	u16 			len;
4404 	int			i, processed, rxdone = 0;
4405 	bool			eop;
4406 	struct e1000_rx_desc	*cur;
4407 
4408 	EM_RX_LOCK(rxr);
4409 
4410 #ifdef DEV_NETMAP
4411 	if (ifp->if_capenable & IFCAP_NETMAP) {
4412 		struct netmap_adapter *na = NA(ifp);
4413 
4414 		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4415 		EM_RX_UNLOCK(rxr);
4416 		EM_CORE_LOCK(adapter);
4417 		selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
4418 		EM_CORE_UNLOCK(adapter);
4419 		return (0);
4420 	}
4421 #endif /* DEV_NETMAP */
4422 
4423 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4424 
4425 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4426 			break;
4427 
4428 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4429 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4430 
4431 		cur = &rxr->rx_base[i];
4432 		status = cur->status;
4433 		mp = sendmp = NULL;
4434 
4435 		if ((status & E1000_RXD_STAT_DD) == 0)
4436 			break;
4437 
4438 		len = le16toh(cur->length);
4439 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4440 
4441 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4442 		    (rxr->discard == TRUE)) {
4443 			ifp->if_ierrors++;
4444 			++rxr->rx_discarded;
4445 			if (!eop) /* Catch subsequent segs */
4446 				rxr->discard = TRUE;
4447 			else
4448 				rxr->discard = FALSE;
4449 			em_rx_discard(rxr, i);
4450 			goto next_desc;
4451 		}
4452 
4453 		/* Assign correct length to the current fragment */
4454 		mp = rxr->rx_buffers[i].m_head;
4455 		mp->m_len = len;
4456 
4457 		/* Trigger for refresh */
4458 		rxr->rx_buffers[i].m_head = NULL;
4459 
4460 		/* First segment? */
4461 		if (rxr->fmp == NULL) {
4462 			mp->m_pkthdr.len = len;
4463 			rxr->fmp = rxr->lmp = mp;
4464 		} else {
4465 			/* Chain mbuf's together */
4466 			mp->m_flags &= ~M_PKTHDR;
4467 			rxr->lmp->m_next = mp;
4468 			rxr->lmp = mp;
4469 			rxr->fmp->m_pkthdr.len += len;
4470 		}
4471 
4472 		if (eop) {
4473 			--count;
4474 			sendmp = rxr->fmp;
4475 			sendmp->m_pkthdr.rcvif = ifp;
4476 			ifp->if_ipackets++;
4477 			em_receive_checksum(cur, sendmp);
4478 #ifndef __NO_STRICT_ALIGNMENT
4479 			if (adapter->max_frame_size >
4480 			    (MCLBYTES - ETHER_ALIGN) &&
4481 			    em_fixup_rx(rxr) != 0)
4482 				goto skip;
4483 #endif
4484 			if (status & E1000_RXD_STAT_VP) {
4485 				sendmp->m_pkthdr.ether_vtag =
4486 				    le16toh(cur->special);
4487 				sendmp->m_flags |= M_VLANTAG;
4488 			}
4489 #ifndef __NO_STRICT_ALIGNMENT
4490 skip:
4491 #endif
4492 			rxr->fmp = rxr->lmp = NULL;
4493 		}
4494 next_desc:
4495 		/* Zero out the receive descriptors status. */
4496 		cur->status = 0;
4497 		++rxdone;	/* cumulative for POLL */
4498 		++processed;
4499 
4500 		/* Advance our pointers to the next descriptor. */
4501 		if (++i == adapter->num_rx_desc)
4502 			i = 0;
4503 
4504 		/* Send to the stack */
4505 		if (sendmp != NULL) {
4506 			rxr->next_to_check = i;
4507 			EM_RX_UNLOCK(rxr);
4508 			(*ifp->if_input)(ifp, sendmp);
4509 			EM_RX_LOCK(rxr);
4510 			i = rxr->next_to_check;
4511 		}
4512 
4513 		/* Only refresh mbufs every 8 descriptors */
4514 		if (processed == 8) {
4515 			em_refresh_mbufs(rxr, i);
4516 			processed = 0;
4517 		}
4518 	}
4519 
4520 	/* Catch any remaining refresh work */
4521 	if (e1000_rx_unrefreshed(rxr))
4522 		em_refresh_mbufs(rxr, i);
4523 
4524 	rxr->next_to_check = i;
4525 	if (done != NULL)
4526 		*done = rxdone;
4527 	EM_RX_UNLOCK(rxr);
4528 
4529 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4530 }
4531 
4532 static __inline void
4533 em_rx_discard(struct rx_ring *rxr, int i)
4534 {
4535 	struct em_buffer	*rbuf;
4536 
4537 	rbuf = &rxr->rx_buffers[i];
4538 	/* Free any previous pieces */
4539 	if (rxr->fmp != NULL) {
4540 		rxr->fmp->m_flags |= M_PKTHDR;
4541 		m_freem(rxr->fmp);
4542 		rxr->fmp = NULL;
4543 		rxr->lmp = NULL;
4544 	}
4545 	/*
4546 	** Free buffer and allow em_refresh_mbufs()
4547 	** to clean up and recharge buffer.
4548 	*/
4549 	if (rbuf->m_head) {
4550 		m_free(rbuf->m_head);
4551 		rbuf->m_head = NULL;
4552 	}
4553 	return;
4554 }
4555 
4556 #ifndef __NO_STRICT_ALIGNMENT
4557 /*
4558  * When jumbo frames are enabled we should realign entire payload on
4559  * architecures with strict alignment. This is serious design mistake of 8254x
4560  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4561  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4562  * payload. On architecures without strict alignment restrictions 8254x still
4563  * performs unaligned memory access which would reduce the performance too.
4564  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4565  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4566  * existing mbuf chain.
4567  *
4568  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4569  * not used at all on architectures with strict alignment.
4570  */
4571 static int
4572 em_fixup_rx(struct rx_ring *rxr)
4573 {
4574 	struct adapter *adapter = rxr->adapter;
4575 	struct mbuf *m, *n;
4576 	int error;
4577 
4578 	error = 0;
4579 	m = rxr->fmp;
4580 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4581 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4582 		m->m_data += ETHER_HDR_LEN;
4583 	} else {
4584 		MGETHDR(n, M_DONTWAIT, MT_DATA);
4585 		if (n != NULL) {
4586 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4587 			m->m_data += ETHER_HDR_LEN;
4588 			m->m_len -= ETHER_HDR_LEN;
4589 			n->m_len = ETHER_HDR_LEN;
4590 			M_MOVE_PKTHDR(n, m);
4591 			n->m_next = m;
4592 			rxr->fmp = n;
4593 		} else {
4594 			adapter->dropped_pkts++;
4595 			m_freem(rxr->fmp);
4596 			rxr->fmp = NULL;
4597 			error = ENOMEM;
4598 		}
4599 	}
4600 
4601 	return (error);
4602 }
4603 #endif
4604 
4605 /*********************************************************************
4606  *
4607  *  Verify that the hardware indicated that the checksum is valid.
4608  *  Inform the stack about the status of checksum so that stack
4609  *  doesn't spend time verifying the checksum.
4610  *
4611  *********************************************************************/
4612 static void
4613 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4614 {
4615 	/* Ignore Checksum bit is set */
4616 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4617 		mp->m_pkthdr.csum_flags = 0;
4618 		return;
4619 	}
4620 
4621 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4622 		/* Did it pass? */
4623 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4624 			/* IP Checksum Good */
4625 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4626 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4627 
4628 		} else {
4629 			mp->m_pkthdr.csum_flags = 0;
4630 		}
4631 	}
4632 
4633 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4634 		/* Did it pass? */
4635 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4636 			mp->m_pkthdr.csum_flags |=
4637 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4638 			mp->m_pkthdr.csum_data = htons(0xffff);
4639 		}
4640 	}
4641 }
4642 
4643 /*
4644  * This routine is run via an vlan
4645  * config EVENT
4646  */
4647 static void
4648 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4649 {
4650 	struct adapter	*adapter = ifp->if_softc;
4651 	u32		index, bit;
4652 
4653 	if (ifp->if_softc !=  arg)   /* Not our event */
4654 		return;
4655 
4656 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4657                 return;
4658 
4659 	EM_CORE_LOCK(adapter);
4660 	index = (vtag >> 5) & 0x7F;
4661 	bit = vtag & 0x1F;
4662 	adapter->shadow_vfta[index] |= (1 << bit);
4663 	++adapter->num_vlans;
4664 	/* Re-init to load the changes */
4665 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4666 		em_init_locked(adapter);
4667 	EM_CORE_UNLOCK(adapter);
4668 }
4669 
4670 /*
4671  * This routine is run via an vlan
4672  * unconfig EVENT
4673  */
4674 static void
4675 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4676 {
4677 	struct adapter	*adapter = ifp->if_softc;
4678 	u32		index, bit;
4679 
4680 	if (ifp->if_softc !=  arg)
4681 		return;
4682 
4683 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4684                 return;
4685 
4686 	EM_CORE_LOCK(adapter);
4687 	index = (vtag >> 5) & 0x7F;
4688 	bit = vtag & 0x1F;
4689 	adapter->shadow_vfta[index] &= ~(1 << bit);
4690 	--adapter->num_vlans;
4691 	/* Re-init to load the changes */
4692 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4693 		em_init_locked(adapter);
4694 	EM_CORE_UNLOCK(adapter);
4695 }
4696 
4697 static void
4698 em_setup_vlan_hw_support(struct adapter *adapter)
4699 {
4700 	struct e1000_hw *hw = &adapter->hw;
4701 	u32             reg;
4702 
4703 	/*
4704 	** We get here thru init_locked, meaning
4705 	** a soft reset, this has already cleared
4706 	** the VFTA and other state, so if there
4707 	** have been no vlan's registered do nothing.
4708 	*/
4709 	if (adapter->num_vlans == 0)
4710                 return;
4711 
4712 	/*
4713 	** A soft reset zero's out the VFTA, so
4714 	** we need to repopulate it now.
4715 	*/
4716 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4717                 if (adapter->shadow_vfta[i] != 0)
4718 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4719                             i, adapter->shadow_vfta[i]);
4720 
4721 	reg = E1000_READ_REG(hw, E1000_CTRL);
4722 	reg |= E1000_CTRL_VME;
4723 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4724 
4725 	/* Enable the Filter Table */
4726 	reg = E1000_READ_REG(hw, E1000_RCTL);
4727 	reg &= ~E1000_RCTL_CFIEN;
4728 	reg |= E1000_RCTL_VFE;
4729 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4730 }
4731 
4732 static void
4733 em_enable_intr(struct adapter *adapter)
4734 {
4735 	struct e1000_hw *hw = &adapter->hw;
4736 	u32 ims_mask = IMS_ENABLE_MASK;
4737 
4738 	if (hw->mac.type == e1000_82574) {
4739 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4740 		ims_mask |= EM_MSIX_MASK;
4741 	}
4742 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4743 }
4744 
4745 static void
4746 em_disable_intr(struct adapter *adapter)
4747 {
4748 	struct e1000_hw *hw = &adapter->hw;
4749 
4750 	if (hw->mac.type == e1000_82574)
4751 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4752 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4753 }
4754 
4755 /*
4756  * Bit of a misnomer, what this really means is
4757  * to enable OS management of the system... aka
4758  * to disable special hardware management features
4759  */
4760 static void
4761 em_init_manageability(struct adapter *adapter)
4762 {
4763 	/* A shared code workaround */
4764 #define E1000_82542_MANC2H E1000_MANC2H
4765 	if (adapter->has_manage) {
4766 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4767 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4768 
4769 		/* disable hardware interception of ARP */
4770 		manc &= ~(E1000_MANC_ARP_EN);
4771 
4772                 /* enable receiving management packets to the host */
4773 		manc |= E1000_MANC_EN_MNG2HOST;
4774 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4775 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4776 		manc2h |= E1000_MNG2HOST_PORT_623;
4777 		manc2h |= E1000_MNG2HOST_PORT_664;
4778 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4779 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4780 	}
4781 }
4782 
4783 /*
4784  * Give control back to hardware management
4785  * controller if there is one.
4786  */
4787 static void
4788 em_release_manageability(struct adapter *adapter)
4789 {
4790 	if (adapter->has_manage) {
4791 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4792 
4793 		/* re-enable hardware interception of ARP */
4794 		manc |= E1000_MANC_ARP_EN;
4795 		manc &= ~E1000_MANC_EN_MNG2HOST;
4796 
4797 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4798 	}
4799 }
4800 
4801 /*
4802  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4803  * For ASF and Pass Through versions of f/w this means
4804  * that the driver is loaded. For AMT version type f/w
4805  * this means that the network i/f is open.
4806  */
4807 static void
4808 em_get_hw_control(struct adapter *adapter)
4809 {
4810 	u32 ctrl_ext, swsm;
4811 
4812 	if (adapter->hw.mac.type == e1000_82573) {
4813 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4814 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4815 		    swsm | E1000_SWSM_DRV_LOAD);
4816 		return;
4817 	}
4818 	/* else */
4819 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4820 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4821 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4822 	return;
4823 }
4824 
4825 /*
4826  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4827  * For ASF and Pass Through versions of f/w this means that
4828  * the driver is no longer loaded. For AMT versions of the
4829  * f/w this means that the network i/f is closed.
4830  */
4831 static void
4832 em_release_hw_control(struct adapter *adapter)
4833 {
4834 	u32 ctrl_ext, swsm;
4835 
4836 	if (!adapter->has_manage)
4837 		return;
4838 
4839 	if (adapter->hw.mac.type == e1000_82573) {
4840 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4841 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4842 		    swsm & ~E1000_SWSM_DRV_LOAD);
4843 		return;
4844 	}
4845 	/* else */
4846 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4847 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4848 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4849 	return;
4850 }
4851 
4852 static int
4853 em_is_valid_ether_addr(u8 *addr)
4854 {
4855 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4856 
4857 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4858 		return (FALSE);
4859 	}
4860 
4861 	return (TRUE);
4862 }
4863 
4864 /*
4865 ** Parse the interface capabilities with regard
4866 ** to both system management and wake-on-lan for
4867 ** later use.
4868 */
4869 static void
4870 em_get_wakeup(device_t dev)
4871 {
4872 	struct adapter	*adapter = device_get_softc(dev);
4873 	u16		eeprom_data = 0, device_id, apme_mask;
4874 
4875 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4876 	apme_mask = EM_EEPROM_APME;
4877 
4878 	switch (adapter->hw.mac.type) {
4879 	case e1000_82573:
4880 	case e1000_82583:
4881 		adapter->has_amt = TRUE;
4882 		/* Falls thru */
4883 	case e1000_82571:
4884 	case e1000_82572:
4885 	case e1000_80003es2lan:
4886 		if (adapter->hw.bus.func == 1) {
4887 			e1000_read_nvm(&adapter->hw,
4888 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4889 			break;
4890 		} else
4891 			e1000_read_nvm(&adapter->hw,
4892 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4893 		break;
4894 	case e1000_ich8lan:
4895 	case e1000_ich9lan:
4896 	case e1000_ich10lan:
4897 	case e1000_pchlan:
4898 	case e1000_pch2lan:
4899 		apme_mask = E1000_WUC_APME;
4900 		adapter->has_amt = TRUE;
4901 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4902 		break;
4903 	default:
4904 		e1000_read_nvm(&adapter->hw,
4905 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4906 		break;
4907 	}
4908 	if (eeprom_data & apme_mask)
4909 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4910 	/*
4911          * We have the eeprom settings, now apply the special cases
4912          * where the eeprom may be wrong or the board won't support
4913          * wake on lan on a particular port
4914 	 */
4915 	device_id = pci_get_device(dev);
4916         switch (device_id) {
4917 	case E1000_DEV_ID_82571EB_FIBER:
4918 		/* Wake events only supported on port A for dual fiber
4919 		 * regardless of eeprom setting */
4920 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4921 		    E1000_STATUS_FUNC_1)
4922 			adapter->wol = 0;
4923 		break;
4924 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4925 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4926 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4927                 /* if quad port adapter, disable WoL on all but port A */
4928 		if (global_quad_port_a != 0)
4929 			adapter->wol = 0;
4930 		/* Reset for multiple quad port adapters */
4931 		if (++global_quad_port_a == 4)
4932 			global_quad_port_a = 0;
4933                 break;
4934 	}
4935 	return;
4936 }
4937 
4938 
4939 /*
4940  * Enable PCI Wake On Lan capability
4941  */
4942 static void
4943 em_enable_wakeup(device_t dev)
4944 {
4945 	struct adapter	*adapter = device_get_softc(dev);
4946 	struct ifnet	*ifp = adapter->ifp;
4947 	u32		pmc, ctrl, ctrl_ext, rctl;
4948 	u16     	status;
4949 
4950 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4951 		return;
4952 
4953 	/* Advertise the wakeup capability */
4954 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4955 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4956 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4957 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4958 
4959 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4960 	    (adapter->hw.mac.type == e1000_pchlan) ||
4961 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4962 	    (adapter->hw.mac.type == e1000_ich10lan))
4963 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4964 
4965 	/* Keep the laser running on Fiber adapters */
4966 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4967 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4968 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4969 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4970 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4971 	}
4972 
4973 	/*
4974 	** Determine type of Wakeup: note that wol
4975 	** is set with all bits on by default.
4976 	*/
4977 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4978 		adapter->wol &= ~E1000_WUFC_MAG;
4979 
4980 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4981 		adapter->wol &= ~E1000_WUFC_MC;
4982 	else {
4983 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4984 		rctl |= E1000_RCTL_MPE;
4985 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4986 	}
4987 
4988 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4989 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4990 		if (em_enable_phy_wakeup(adapter))
4991 			return;
4992 	} else {
4993 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4994 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4995 	}
4996 
4997 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4998 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4999 
5000         /* Request PME */
5001         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5002 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5003 	if (ifp->if_capenable & IFCAP_WOL)
5004 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5005         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5006 
5007 	return;
5008 }
5009 
5010 /*
5011 ** WOL in the newer chipset interfaces (pchlan)
5012 ** require thing to be copied into the phy
5013 */
5014 static int
5015 em_enable_phy_wakeup(struct adapter *adapter)
5016 {
5017 	struct e1000_hw *hw = &adapter->hw;
5018 	u32 mreg, ret = 0;
5019 	u16 preg;
5020 
5021 	/* copy MAC RARs to PHY RARs */
5022 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5023 
5024 	/* copy MAC MTA to PHY MTA */
5025 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5026 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5027 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5028 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5029 		    (u16)((mreg >> 16) & 0xFFFF));
5030 	}
5031 
5032 	/* configure PHY Rx Control register */
5033 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5034 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5035 	if (mreg & E1000_RCTL_UPE)
5036 		preg |= BM_RCTL_UPE;
5037 	if (mreg & E1000_RCTL_MPE)
5038 		preg |= BM_RCTL_MPE;
5039 	preg &= ~(BM_RCTL_MO_MASK);
5040 	if (mreg & E1000_RCTL_MO_3)
5041 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5042 				<< BM_RCTL_MO_SHIFT);
5043 	if (mreg & E1000_RCTL_BAM)
5044 		preg |= BM_RCTL_BAM;
5045 	if (mreg & E1000_RCTL_PMCF)
5046 		preg |= BM_RCTL_PMCF;
5047 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5048 	if (mreg & E1000_CTRL_RFCE)
5049 		preg |= BM_RCTL_RFCE;
5050 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5051 
5052 	/* enable PHY wakeup in MAC register */
5053 	E1000_WRITE_REG(hw, E1000_WUC,
5054 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5055 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5056 
5057 	/* configure and enable PHY wakeup in PHY registers */
5058 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5059 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5060 
5061 	/* activate PHY wakeup */
5062 	ret = hw->phy.ops.acquire(hw);
5063 	if (ret) {
5064 		printf("Could not acquire PHY\n");
5065 		return ret;
5066 	}
5067 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5068 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5069 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5070 	if (ret) {
5071 		printf("Could not read PHY page 769\n");
5072 		goto out;
5073 	}
5074 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5075 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5076 	if (ret)
5077 		printf("Could not set PHY Host Wakeup bit\n");
5078 out:
5079 	hw->phy.ops.release(hw);
5080 
5081 	return ret;
5082 }
5083 
5084 static void
5085 em_led_func(void *arg, int onoff)
5086 {
5087 	struct adapter	*adapter = arg;
5088 
5089 	EM_CORE_LOCK(adapter);
5090 	if (onoff) {
5091 		e1000_setup_led(&adapter->hw);
5092 		e1000_led_on(&adapter->hw);
5093 	} else {
5094 		e1000_led_off(&adapter->hw);
5095 		e1000_cleanup_led(&adapter->hw);
5096 	}
5097 	EM_CORE_UNLOCK(adapter);
5098 }
5099 
5100 /*
5101 ** Disable the L0S and L1 LINK states
5102 */
5103 static void
5104 em_disable_aspm(struct adapter *adapter)
5105 {
5106 	int		base, reg;
5107 	u16		link_cap,link_ctrl;
5108 	device_t	dev = adapter->dev;
5109 
5110 	switch (adapter->hw.mac.type) {
5111 		case e1000_82573:
5112 		case e1000_82574:
5113 		case e1000_82583:
5114 			break;
5115 		default:
5116 			return;
5117 	}
5118 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5119 		return;
5120 	reg = base + PCIR_EXPRESS_LINK_CAP;
5121 	link_cap = pci_read_config(dev, reg, 2);
5122 	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
5123 		return;
5124 	reg = base + PCIR_EXPRESS_LINK_CTL;
5125 	link_ctrl = pci_read_config(dev, reg, 2);
5126 	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5127 	pci_write_config(dev, reg, link_ctrl, 2);
5128 	return;
5129 }
5130 
5131 /**********************************************************************
5132  *
5133  *  Update the board statistics counters.
5134  *
5135  **********************************************************************/
5136 static void
5137 em_update_stats_counters(struct adapter *adapter)
5138 {
5139 	struct ifnet   *ifp;
5140 
5141 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5142 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5143 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5144 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5145 	}
5146 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5147 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5148 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5149 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5150 
5151 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5152 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5153 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5154 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5155 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5156 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5157 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5158 	/*
5159 	** For watchdog management we need to know if we have been
5160 	** paused during the last interval, so capture that here.
5161 	*/
5162 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5163 	adapter->stats.xoffrxc += adapter->pause_frames;
5164 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5165 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5166 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5167 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5168 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5169 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5170 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5171 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5172 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5173 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5174 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5175 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5176 
5177 	/* For the 64-bit byte counters the low dword must be read first. */
5178 	/* Both registers clear on the read of the high dword */
5179 
5180 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5181 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5182 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5183 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5184 
5185 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5186 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5187 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5188 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5189 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5190 
5191 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5192 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5193 
5194 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5195 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5196 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5197 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5198 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5199 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5200 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5201 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5202 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5203 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5204 
5205 	/* Interrupt Counts */
5206 
5207 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5208 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5209 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5210 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5211 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5212 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5213 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5214 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5215 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5216 
5217 	if (adapter->hw.mac.type >= e1000_82543) {
5218 		adapter->stats.algnerrc +=
5219 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5220 		adapter->stats.rxerrc +=
5221 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5222 		adapter->stats.tncrs +=
5223 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5224 		adapter->stats.cexterr +=
5225 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5226 		adapter->stats.tsctc +=
5227 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5228 		adapter->stats.tsctfc +=
5229 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5230 	}
5231 	ifp = adapter->ifp;
5232 
5233 	ifp->if_collisions = adapter->stats.colc;
5234 
5235 	/* Rx Errors */
5236 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5237 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5238 	    adapter->stats.ruc + adapter->stats.roc +
5239 	    adapter->stats.mpc + adapter->stats.cexterr;
5240 
5241 	/* Tx Errors */
5242 	ifp->if_oerrors = adapter->stats.ecol +
5243 	    adapter->stats.latecol + adapter->watchdog_events;
5244 }
5245 
5246 /* Export a single 32-bit register via a read-only sysctl. */
5247 static int
5248 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5249 {
5250 	struct adapter *adapter;
5251 	u_int val;
5252 
5253 	adapter = oidp->oid_arg1;
5254 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5255 	return (sysctl_handle_int(oidp, &val, 0, req));
5256 }
5257 
5258 /*
5259  * Add sysctl variables, one per statistic, to the system.
5260  */
5261 static void
5262 em_add_hw_stats(struct adapter *adapter)
5263 {
5264 	device_t dev = adapter->dev;
5265 
5266 	struct tx_ring *txr = adapter->tx_rings;
5267 	struct rx_ring *rxr = adapter->rx_rings;
5268 
5269 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5270 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5271 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5272 	struct e1000_hw_stats *stats = &adapter->stats;
5273 
5274 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5275 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5276 
5277 #define QUEUE_NAME_LEN 32
5278 	char namebuf[QUEUE_NAME_LEN];
5279 
5280 	/* Driver Statistics */
5281 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5282 			CTLFLAG_RD, &adapter->link_irq,
5283 			"Link MSIX IRQ Handled");
5284 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5285 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5286 			 "Std mbuf failed");
5287 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5288 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5289 			 "Std mbuf cluster failed");
5290 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5291 			CTLFLAG_RD, &adapter->dropped_pkts,
5292 			"Driver dropped packets");
5293 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5294 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5295 			"Driver tx dma failure in xmit");
5296 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5297 			CTLFLAG_RD, &adapter->rx_overruns,
5298 			"RX overruns");
5299 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5300 			CTLFLAG_RD, &adapter->watchdog_events,
5301 			"Watchdog timeouts");
5302 
5303 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5304 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5305 			em_sysctl_reg_handler, "IU",
5306 			"Device Control Register");
5307 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5308 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5309 			em_sysctl_reg_handler, "IU",
5310 			"Receiver Control Register");
5311 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5312 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5313 			"Flow Control High Watermark");
5314 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5315 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5316 			"Flow Control Low Watermark");
5317 
5318 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5319 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5320 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5321 					    CTLFLAG_RD, NULL, "Queue Name");
5322 		queue_list = SYSCTL_CHILDREN(queue_node);
5323 
5324 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5325 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5326 				E1000_TDH(txr->me),
5327 				em_sysctl_reg_handler, "IU",
5328  				"Transmit Descriptor Head");
5329 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5330 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5331 				E1000_TDT(txr->me),
5332 				em_sysctl_reg_handler, "IU",
5333  				"Transmit Descriptor Tail");
5334 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5335 				CTLFLAG_RD, &txr->tx_irq,
5336 				"Queue MSI-X Transmit Interrupts");
5337 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5338 				CTLFLAG_RD, &txr->no_desc_avail,
5339 				"Queue No Descriptor Available");
5340 
5341 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5342 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5343 				E1000_RDH(rxr->me),
5344 				em_sysctl_reg_handler, "IU",
5345 				"Receive Descriptor Head");
5346 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5347 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5348 				E1000_RDT(rxr->me),
5349 				em_sysctl_reg_handler, "IU",
5350 				"Receive Descriptor Tail");
5351 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5352 				CTLFLAG_RD, &rxr->rx_irq,
5353 				"Queue MSI-X Receive Interrupts");
5354 	}
5355 
5356 	/* MAC stats get their own sub node */
5357 
5358 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5359 				    CTLFLAG_RD, NULL, "Statistics");
5360 	stat_list = SYSCTL_CHILDREN(stat_node);
5361 
5362 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5363 			CTLFLAG_RD, &stats->ecol,
5364 			"Excessive collisions");
5365 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5366 			CTLFLAG_RD, &stats->scc,
5367 			"Single collisions");
5368 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5369 			CTLFLAG_RD, &stats->mcc,
5370 			"Multiple collisions");
5371 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5372 			CTLFLAG_RD, &stats->latecol,
5373 			"Late collisions");
5374 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5375 			CTLFLAG_RD, &stats->colc,
5376 			"Collision Count");
5377 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5378 			CTLFLAG_RD, &adapter->stats.symerrs,
5379 			"Symbol Errors");
5380 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5381 			CTLFLAG_RD, &adapter->stats.sec,
5382 			"Sequence Errors");
5383 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5384 			CTLFLAG_RD, &adapter->stats.dc,
5385 			"Defer Count");
5386 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5387 			CTLFLAG_RD, &adapter->stats.mpc,
5388 			"Missed Packets");
5389 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5390 			CTLFLAG_RD, &adapter->stats.rnbc,
5391 			"Receive No Buffers");
5392 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5393 			CTLFLAG_RD, &adapter->stats.ruc,
5394 			"Receive Undersize");
5395 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5396 			CTLFLAG_RD, &adapter->stats.rfc,
5397 			"Fragmented Packets Received ");
5398 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5399 			CTLFLAG_RD, &adapter->stats.roc,
5400 			"Oversized Packets Received");
5401 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5402 			CTLFLAG_RD, &adapter->stats.rjc,
5403 			"Recevied Jabber");
5404 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5405 			CTLFLAG_RD, &adapter->stats.rxerrc,
5406 			"Receive Errors");
5407 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5408 			CTLFLAG_RD, &adapter->stats.crcerrs,
5409 			"CRC errors");
5410 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5411 			CTLFLAG_RD, &adapter->stats.algnerrc,
5412 			"Alignment Errors");
5413 	/* On 82575 these are collision counts */
5414 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5415 			CTLFLAG_RD, &adapter->stats.cexterr,
5416 			"Collision/Carrier extension errors");
5417 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5418 			CTLFLAG_RD, &adapter->stats.xonrxc,
5419 			"XON Received");
5420 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5421 			CTLFLAG_RD, &adapter->stats.xontxc,
5422 			"XON Transmitted");
5423 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5424 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5425 			"XOFF Received");
5426 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5427 			CTLFLAG_RD, &adapter->stats.xofftxc,
5428 			"XOFF Transmitted");
5429 
5430 	/* Packet Reception Stats */
5431 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5432 			CTLFLAG_RD, &adapter->stats.tpr,
5433 			"Total Packets Received ");
5434 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5435 			CTLFLAG_RD, &adapter->stats.gprc,
5436 			"Good Packets Received");
5437 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5438 			CTLFLAG_RD, &adapter->stats.bprc,
5439 			"Broadcast Packets Received");
5440 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5441 			CTLFLAG_RD, &adapter->stats.mprc,
5442 			"Multicast Packets Received");
5443 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5444 			CTLFLAG_RD, &adapter->stats.prc64,
5445 			"64 byte frames received ");
5446 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5447 			CTLFLAG_RD, &adapter->stats.prc127,
5448 			"65-127 byte frames received");
5449 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5450 			CTLFLAG_RD, &adapter->stats.prc255,
5451 			"128-255 byte frames received");
5452 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5453 			CTLFLAG_RD, &adapter->stats.prc511,
5454 			"256-511 byte frames received");
5455 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5456 			CTLFLAG_RD, &adapter->stats.prc1023,
5457 			"512-1023 byte frames received");
5458 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5459 			CTLFLAG_RD, &adapter->stats.prc1522,
5460 			"1023-1522 byte frames received");
5461  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5462  			CTLFLAG_RD, &adapter->stats.gorc,
5463  			"Good Octets Received");
5464 
5465 	/* Packet Transmission Stats */
5466  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5467  			CTLFLAG_RD, &adapter->stats.gotc,
5468  			"Good Octets Transmitted");
5469 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5470 			CTLFLAG_RD, &adapter->stats.tpt,
5471 			"Total Packets Transmitted");
5472 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5473 			CTLFLAG_RD, &adapter->stats.gptc,
5474 			"Good Packets Transmitted");
5475 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5476 			CTLFLAG_RD, &adapter->stats.bptc,
5477 			"Broadcast Packets Transmitted");
5478 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5479 			CTLFLAG_RD, &adapter->stats.mptc,
5480 			"Multicast Packets Transmitted");
5481 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5482 			CTLFLAG_RD, &adapter->stats.ptc64,
5483 			"64 byte frames transmitted ");
5484 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5485 			CTLFLAG_RD, &adapter->stats.ptc127,
5486 			"65-127 byte frames transmitted");
5487 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5488 			CTLFLAG_RD, &adapter->stats.ptc255,
5489 			"128-255 byte frames transmitted");
5490 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5491 			CTLFLAG_RD, &adapter->stats.ptc511,
5492 			"256-511 byte frames transmitted");
5493 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5494 			CTLFLAG_RD, &adapter->stats.ptc1023,
5495 			"512-1023 byte frames transmitted");
5496 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5497 			CTLFLAG_RD, &adapter->stats.ptc1522,
5498 			"1024-1522 byte frames transmitted");
5499 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5500 			CTLFLAG_RD, &adapter->stats.tsctc,
5501 			"TSO Contexts Transmitted");
5502 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5503 			CTLFLAG_RD, &adapter->stats.tsctfc,
5504 			"TSO Contexts Failed");
5505 
5506 
5507 	/* Interrupt Stats */
5508 
5509 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5510 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5511 	int_list = SYSCTL_CHILDREN(int_node);
5512 
5513 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5514 			CTLFLAG_RD, &adapter->stats.iac,
5515 			"Interrupt Assertion Count");
5516 
5517 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5518 			CTLFLAG_RD, &adapter->stats.icrxptc,
5519 			"Interrupt Cause Rx Pkt Timer Expire Count");
5520 
5521 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5522 			CTLFLAG_RD, &adapter->stats.icrxatc,
5523 			"Interrupt Cause Rx Abs Timer Expire Count");
5524 
5525 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5526 			CTLFLAG_RD, &adapter->stats.ictxptc,
5527 			"Interrupt Cause Tx Pkt Timer Expire Count");
5528 
5529 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5530 			CTLFLAG_RD, &adapter->stats.ictxatc,
5531 			"Interrupt Cause Tx Abs Timer Expire Count");
5532 
5533 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5534 			CTLFLAG_RD, &adapter->stats.ictxqec,
5535 			"Interrupt Cause Tx Queue Empty Count");
5536 
5537 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5538 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5539 			"Interrupt Cause Tx Queue Min Thresh Count");
5540 
5541 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5542 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5543 			"Interrupt Cause Rx Desc Min Thresh Count");
5544 
5545 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5546 			CTLFLAG_RD, &adapter->stats.icrxoc,
5547 			"Interrupt Cause Receiver Overrun Count");
5548 }
5549 
5550 /**********************************************************************
5551  *
5552  *  This routine provides a way to dump out the adapter eeprom,
5553  *  often a useful debug/service tool. This only dumps the first
5554  *  32 words, stuff that matters is in that extent.
5555  *
5556  **********************************************************************/
5557 static int
5558 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5559 {
5560 	struct adapter *adapter = (struct adapter *)arg1;
5561 	int error;
5562 	int result;
5563 
5564 	result = -1;
5565 	error = sysctl_handle_int(oidp, &result, 0, req);
5566 
5567 	if (error || !req->newptr)
5568 		return (error);
5569 
5570 	/*
5571 	 * This value will cause a hex dump of the
5572 	 * first 32 16-bit words of the EEPROM to
5573 	 * the screen.
5574 	 */
5575 	if (result == 1)
5576 		em_print_nvm_info(adapter);
5577 
5578 	return (error);
5579 }
5580 
5581 static void
5582 em_print_nvm_info(struct adapter *adapter)
5583 {
5584 	u16	eeprom_data;
5585 	int	i, j, row = 0;
5586 
5587 	/* Its a bit crude, but it gets the job done */
5588 	printf("\nInterface EEPROM Dump:\n");
5589 	printf("Offset\n0x0000  ");
5590 	for (i = 0, j = 0; i < 32; i++, j++) {
5591 		if (j == 8) { /* Make the offset block */
5592 			j = 0; ++row;
5593 			printf("\n0x00%x0  ",row);
5594 		}
5595 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5596 		printf("%04x ", eeprom_data);
5597 	}
5598 	printf("\n");
5599 }
5600 
5601 static int
5602 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5603 {
5604 	struct em_int_delay_info *info;
5605 	struct adapter *adapter;
5606 	u32 regval;
5607 	int error, usecs, ticks;
5608 
5609 	info = (struct em_int_delay_info *)arg1;
5610 	usecs = info->value;
5611 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5612 	if (error != 0 || req->newptr == NULL)
5613 		return (error);
5614 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5615 		return (EINVAL);
5616 	info->value = usecs;
5617 	ticks = EM_USECS_TO_TICKS(usecs);
5618 
5619 	adapter = info->adapter;
5620 
5621 	EM_CORE_LOCK(adapter);
5622 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5623 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5624 	/* Handle a few special cases. */
5625 	switch (info->offset) {
5626 	case E1000_RDTR:
5627 		break;
5628 	case E1000_TIDV:
5629 		if (ticks == 0) {
5630 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5631 			/* Don't write 0 into the TIDV register. */
5632 			regval++;
5633 		} else
5634 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5635 		break;
5636 	}
5637 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5638 	EM_CORE_UNLOCK(adapter);
5639 	return (0);
5640 }
5641 
5642 static void
5643 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5644 	const char *description, struct em_int_delay_info *info,
5645 	int offset, int value)
5646 {
5647 	info->adapter = adapter;
5648 	info->offset = offset;
5649 	info->value = value;
5650 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5651 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5652 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5653 	    info, 0, em_sysctl_int_delay, "I", description);
5654 }
5655 
5656 static void
5657 em_set_sysctl_value(struct adapter *adapter, const char *name,
5658 	const char *description, int *limit, int value)
5659 {
5660 	*limit = value;
5661 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5662 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5663 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5664 }
5665 
5666 
5667 /*
5668 ** Set flow control using sysctl:
5669 ** Flow control values:
5670 **      0 - off
5671 **      1 - rx pause
5672 **      2 - tx pause
5673 **      3 - full
5674 */
5675 static int
5676 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5677 {
5678         int		error;
5679 	static int	input = 3; /* default is full */
5680         struct adapter	*adapter = (struct adapter *) arg1;
5681 
5682         error = sysctl_handle_int(oidp, &input, 0, req);
5683 
5684         if ((error) || (req->newptr == NULL))
5685                 return (error);
5686 
5687 	if (input == adapter->fc) /* no change? */
5688 		return (error);
5689 
5690         switch (input) {
5691                 case e1000_fc_rx_pause:
5692                 case e1000_fc_tx_pause:
5693                 case e1000_fc_full:
5694                 case e1000_fc_none:
5695                         adapter->hw.fc.requested_mode = input;
5696 			adapter->fc = input;
5697                         break;
5698                 default:
5699 			/* Do nothing */
5700 			return (error);
5701         }
5702 
5703         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5704         e1000_force_mac_fc(&adapter->hw);
5705         return (error);
5706 }
5707 
5708 
5709 static int
5710 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5711 {
5712 	struct adapter *adapter;
5713 	int error;
5714 	int result;
5715 
5716 	result = -1;
5717 	error = sysctl_handle_int(oidp, &result, 0, req);
5718 
5719 	if (error || !req->newptr)
5720 		return (error);
5721 
5722 	if (result == 1) {
5723 		adapter = (struct adapter *)arg1;
5724 		em_print_debug_info(adapter);
5725         }
5726 
5727 	return (error);
5728 }
5729 
5730 /*
5731 ** This routine is meant to be fluid, add whatever is
5732 ** needed for debugging a problem.  -jfv
5733 */
5734 static void
5735 em_print_debug_info(struct adapter *adapter)
5736 {
5737 	device_t dev = adapter->dev;
5738 	struct tx_ring *txr = adapter->tx_rings;
5739 	struct rx_ring *rxr = adapter->rx_rings;
5740 
5741 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5742 		printf("Interface is RUNNING ");
5743 	else
5744 		printf("Interface is NOT RUNNING\n");
5745 
5746 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5747 		printf("and INACTIVE\n");
5748 	else
5749 		printf("and ACTIVE\n");
5750 
5751 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5752 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5753 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5754 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5755 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5756 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5757 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5758 	device_printf(dev, "TX descriptors avail = %d\n",
5759 	    txr->tx_avail);
5760 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5761 	    txr->no_desc_avail);
5762 	device_printf(dev, "RX discarded packets = %ld\n",
5763 	    rxr->rx_discarded);
5764 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5765 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5766 }
5767