xref: /freebsd/sys/dev/e1000/if_em.c (revision ab0b9f6b3073e6c4d1dfbf07444d7db67a189a96)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2013, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_var.h>
67 #include <net/if_arp.h>
68 #include <net/if_dl.h>
69 #include <net/if_media.h>
70 
71 #include <net/if_types.h>
72 #include <net/if_vlan_var.h>
73 
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #include <netinet/udp.h>
81 
82 #include <machine/in_cksum.h>
83 #include <dev/led/led.h>
84 #include <dev/pci/pcivar.h>
85 #include <dev/pci/pcireg.h>
86 
87 #include "e1000_api.h"
88 #include "e1000_82571.h"
89 #include "if_em.h"
90 
91 /*********************************************************************
92  *  Set this to one to display debug statistics
93  *********************************************************************/
94 int	em_display_debug_stats = 0;
95 
96 /*********************************************************************
97  *  Driver version:
98  *********************************************************************/
99 char em_driver_version[] = "7.3.8";
100 
101 /*********************************************************************
102  *  PCI Device ID Table
103  *
104  *  Used by probe to select devices to load on
105  *  Last field stores an index into e1000_strings
106  *  Last entry must be all 0s
107  *
108  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
109  *********************************************************************/
110 
111 static em_vendor_info_t em_vendor_info_array[] =
112 {
113 	/* Intel(R) PRO/1000 Network Connection */
114 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
117 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
118 						PCI_ANY_ID, PCI_ANY_ID, 0},
119 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
120 						PCI_ANY_ID, PCI_ANY_ID, 0},
121 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
122 						PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
124 						PCI_ANY_ID, PCI_ANY_ID, 0},
125 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
126 						PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
128 						PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
133 
134 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
139 						PCI_ANY_ID, PCI_ANY_ID, 0},
140 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
141 						PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
143 						PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
145 						PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
180 						PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
182 						PCI_ANY_ID, PCI_ANY_ID, 0},
183 	/* required last entry */
184 	{ 0, 0, 0, 0, 0}
185 };
186 
187 /*********************************************************************
188  *  Table of branding strings for all supported NICs.
189  *********************************************************************/
190 
191 static char *em_strings[] = {
192 	"Intel(R) PRO/1000 Network Connection"
193 };
194 
195 /*********************************************************************
196  *  Function prototypes
197  *********************************************************************/
198 static int	em_probe(device_t);
199 static int	em_attach(device_t);
200 static int	em_detach(device_t);
201 static int	em_shutdown(device_t);
202 static int	em_suspend(device_t);
203 static int	em_resume(device_t);
204 #ifdef EM_MULTIQUEUE
205 static int	em_mq_start(struct ifnet *, struct mbuf *);
206 static int	em_mq_start_locked(struct ifnet *,
207 		    struct tx_ring *, struct mbuf *);
208 static void	em_qflush(struct ifnet *);
209 #else
210 static void	em_start(struct ifnet *);
211 static void	em_start_locked(struct ifnet *, struct tx_ring *);
212 #endif
213 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
214 static void	em_init(void *);
215 static void	em_init_locked(struct adapter *);
216 static void	em_stop(void *);
217 static void	em_media_status(struct ifnet *, struct ifmediareq *);
218 static int	em_media_change(struct ifnet *);
219 static void	em_identify_hardware(struct adapter *);
220 static int	em_allocate_pci_resources(struct adapter *);
221 static int	em_allocate_legacy(struct adapter *);
222 static int	em_allocate_msix(struct adapter *);
223 static int	em_allocate_queues(struct adapter *);
224 static int	em_setup_msix(struct adapter *);
225 static void	em_free_pci_resources(struct adapter *);
226 static void	em_local_timer(void *);
227 static void	em_reset(struct adapter *);
228 static int	em_setup_interface(device_t, struct adapter *);
229 
230 static void	em_setup_transmit_structures(struct adapter *);
231 static void	em_initialize_transmit_unit(struct adapter *);
232 static int	em_allocate_transmit_buffers(struct tx_ring *);
233 static void	em_free_transmit_structures(struct adapter *);
234 static void	em_free_transmit_buffers(struct tx_ring *);
235 
236 static int	em_setup_receive_structures(struct adapter *);
237 static int	em_allocate_receive_buffers(struct rx_ring *);
238 static void	em_initialize_receive_unit(struct adapter *);
239 static void	em_free_receive_structures(struct adapter *);
240 static void	em_free_receive_buffers(struct rx_ring *);
241 
242 static void	em_enable_intr(struct adapter *);
243 static void	em_disable_intr(struct adapter *);
244 static void	em_update_stats_counters(struct adapter *);
245 static void	em_add_hw_stats(struct adapter *adapter);
246 static void	em_txeof(struct tx_ring *);
247 static bool	em_rxeof(struct rx_ring *, int, int *);
248 #ifndef __NO_STRICT_ALIGNMENT
249 static int	em_fixup_rx(struct rx_ring *);
250 #endif
251 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
252 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
253 		    struct ip *, u32 *, u32 *);
254 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
255 		    struct tcphdr *, u32 *, u32 *);
256 static void	em_set_promisc(struct adapter *);
257 static void	em_disable_promisc(struct adapter *);
258 static void	em_set_multi(struct adapter *);
259 static void	em_update_link_status(struct adapter *);
260 static void	em_refresh_mbufs(struct rx_ring *, int);
261 static void	em_register_vlan(void *, struct ifnet *, u16);
262 static void	em_unregister_vlan(void *, struct ifnet *, u16);
263 static void	em_setup_vlan_hw_support(struct adapter *);
264 static int	em_xmit(struct tx_ring *, struct mbuf **);
265 static int	em_dma_malloc(struct adapter *, bus_size_t,
266 		    struct em_dma_alloc *, int);
267 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
268 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
269 static void	em_print_nvm_info(struct adapter *);
270 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
271 static void	em_print_debug_info(struct adapter *);
272 static int 	em_is_valid_ether_addr(u8 *);
273 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
274 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
275 		    const char *, struct em_int_delay_info *, int, int);
276 /* Management and WOL Support */
277 static void	em_init_manageability(struct adapter *);
278 static void	em_release_manageability(struct adapter *);
279 static void     em_get_hw_control(struct adapter *);
280 static void     em_release_hw_control(struct adapter *);
281 static void	em_get_wakeup(device_t);
282 static void     em_enable_wakeup(device_t);
283 static int	em_enable_phy_wakeup(struct adapter *);
284 static void	em_led_func(void *, int);
285 static void	em_disable_aspm(struct adapter *);
286 
287 static int	em_irq_fast(void *);
288 
289 /* MSIX handlers */
290 static void	em_msix_tx(void *);
291 static void	em_msix_rx(void *);
292 static void	em_msix_link(void *);
293 static void	em_handle_tx(void *context, int pending);
294 static void	em_handle_rx(void *context, int pending);
295 static void	em_handle_link(void *context, int pending);
296 
297 static void	em_set_sysctl_value(struct adapter *, const char *,
298 		    const char *, int *, int);
299 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
300 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
301 
302 static __inline void em_rx_discard(struct rx_ring *, int);
303 
304 #ifdef DEVICE_POLLING
305 static poll_handler_t em_poll;
306 #endif /* POLLING */
307 
308 /*********************************************************************
309  *  FreeBSD Device Interface Entry Points
310  *********************************************************************/
311 
312 static device_method_t em_methods[] = {
313 	/* Device interface */
314 	DEVMETHOD(device_probe, em_probe),
315 	DEVMETHOD(device_attach, em_attach),
316 	DEVMETHOD(device_detach, em_detach),
317 	DEVMETHOD(device_shutdown, em_shutdown),
318 	DEVMETHOD(device_suspend, em_suspend),
319 	DEVMETHOD(device_resume, em_resume),
320 	DEVMETHOD_END
321 };
322 
323 static driver_t em_driver = {
324 	"em", em_methods, sizeof(struct adapter),
325 };
326 
327 devclass_t em_devclass;
328 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
329 MODULE_DEPEND(em, pci, 1, 1, 1);
330 MODULE_DEPEND(em, ether, 1, 1, 1);
331 
332 /*********************************************************************
333  *  Tunable default values.
334  *********************************************************************/
335 
336 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
337 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
338 #define M_TSO_LEN			66
339 
340 #define MAX_INTS_PER_SEC	8000
341 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
342 
343 /* Allow common code without TSO */
344 #ifndef CSUM_TSO
345 #define CSUM_TSO	0
346 #endif
347 
348 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
349 
350 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
351 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
352 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
353 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
354 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
355     0, "Default transmit interrupt delay in usecs");
356 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
357     0, "Default receive interrupt delay in usecs");
358 
359 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
360 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
361 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
362 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
363 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
364     &em_tx_abs_int_delay_dflt, 0,
365     "Default transmit interrupt delay limit in usecs");
366 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
367     &em_rx_abs_int_delay_dflt, 0,
368     "Default receive interrupt delay limit in usecs");
369 
370 static int em_rxd = EM_DEFAULT_RXD;
371 static int em_txd = EM_DEFAULT_TXD;
372 TUNABLE_INT("hw.em.rxd", &em_rxd);
373 TUNABLE_INT("hw.em.txd", &em_txd);
374 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
375     "Number of receive descriptors per queue");
376 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
377     "Number of transmit descriptors per queue");
378 
379 static int em_smart_pwr_down = FALSE;
380 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
381 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
382     0, "Set to true to leave smart power down enabled on newer adapters");
383 
384 /* Controls whether promiscuous also shows bad packets */
385 static int em_debug_sbp = FALSE;
386 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
387 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
388     "Show bad packets in promiscuous mode");
389 
390 static int em_enable_msix = TRUE;
391 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
392 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
393     "Enable MSI-X interrupts");
394 
395 /* How many packets rxeof tries to clean at a time */
396 static int em_rx_process_limit = 100;
397 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
398 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
399     &em_rx_process_limit, 0,
400     "Maximum number of received packets to process "
401     "at a time, -1 means unlimited");
402 
403 /* Energy efficient ethernet - default to OFF */
404 static int eee_setting = 1;
405 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
406 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
407     "Enable Energy Efficient Ethernet");
408 
409 /* Global used in WOL setup with multiport cards */
410 static int global_quad_port_a = 0;
411 
412 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
413 #include <dev/netmap/if_em_netmap.h>
414 #endif /* DEV_NETMAP */
415 
416 /*********************************************************************
417  *  Device identification routine
418  *
419  *  em_probe determines if the driver should be loaded on
420  *  adapter based on PCI vendor/device id of the adapter.
421  *
422  *  return BUS_PROBE_DEFAULT on success, positive on failure
423  *********************************************************************/
424 
425 static int
426 em_probe(device_t dev)
427 {
428 	char		adapter_name[60];
429 	u16		pci_vendor_id = 0;
430 	u16		pci_device_id = 0;
431 	u16		pci_subvendor_id = 0;
432 	u16		pci_subdevice_id = 0;
433 	em_vendor_info_t *ent;
434 
435 	INIT_DEBUGOUT("em_probe: begin");
436 
437 	pci_vendor_id = pci_get_vendor(dev);
438 	if (pci_vendor_id != EM_VENDOR_ID)
439 		return (ENXIO);
440 
441 	pci_device_id = pci_get_device(dev);
442 	pci_subvendor_id = pci_get_subvendor(dev);
443 	pci_subdevice_id = pci_get_subdevice(dev);
444 
445 	ent = em_vendor_info_array;
446 	while (ent->vendor_id != 0) {
447 		if ((pci_vendor_id == ent->vendor_id) &&
448 		    (pci_device_id == ent->device_id) &&
449 
450 		    ((pci_subvendor_id == ent->subvendor_id) ||
451 		    (ent->subvendor_id == PCI_ANY_ID)) &&
452 
453 		    ((pci_subdevice_id == ent->subdevice_id) ||
454 		    (ent->subdevice_id == PCI_ANY_ID))) {
455 			sprintf(adapter_name, "%s %s",
456 				em_strings[ent->index],
457 				em_driver_version);
458 			device_set_desc_copy(dev, adapter_name);
459 			return (BUS_PROBE_DEFAULT);
460 		}
461 		ent++;
462 	}
463 
464 	return (ENXIO);
465 }
466 
467 /*********************************************************************
468  *  Device initialization routine
469  *
470  *  The attach entry point is called when the driver is being loaded.
471  *  This routine identifies the type of hardware, allocates all resources
472  *  and initializes the hardware.
473  *
474  *  return 0 on success, positive on failure
475  *********************************************************************/
476 
477 static int
478 em_attach(device_t dev)
479 {
480 	struct adapter	*adapter;
481 	struct e1000_hw	*hw;
482 	int		error = 0;
483 
484 	INIT_DEBUGOUT("em_attach: begin");
485 
486 	if (resource_disabled("em", device_get_unit(dev))) {
487 		device_printf(dev, "Disabled by device hint\n");
488 		return (ENXIO);
489 	}
490 
491 	adapter = device_get_softc(dev);
492 	adapter->dev = adapter->osdep.dev = dev;
493 	hw = &adapter->hw;
494 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
495 
496 	/* SYSCTL stuff */
497 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
498 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
499 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
500 	    em_sysctl_nvm_info, "I", "NVM Information");
501 
502 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
503 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
504 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
505 	    em_sysctl_debug_info, "I", "Debug Information");
506 
507 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
508 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
509 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
510 	    em_set_flowcntl, "I", "Flow Control");
511 
512 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
513 
514 	/* Determine hardware and mac info */
515 	em_identify_hardware(adapter);
516 
517 	/* Setup PCI resources */
518 	if (em_allocate_pci_resources(adapter)) {
519 		device_printf(dev, "Allocation of PCI resources failed\n");
520 		error = ENXIO;
521 		goto err_pci;
522 	}
523 
524 	/*
525 	** For ICH8 and family we need to
526 	** map the flash memory, and this
527 	** must happen after the MAC is
528 	** identified
529 	*/
530 	if ((hw->mac.type == e1000_ich8lan) ||
531 	    (hw->mac.type == e1000_ich9lan) ||
532 	    (hw->mac.type == e1000_ich10lan) ||
533 	    (hw->mac.type == e1000_pchlan) ||
534 	    (hw->mac.type == e1000_pch2lan) ||
535 	    (hw->mac.type == e1000_pch_lpt)) {
536 		int rid = EM_BAR_TYPE_FLASH;
537 		adapter->flash = bus_alloc_resource_any(dev,
538 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
539 		if (adapter->flash == NULL) {
540 			device_printf(dev, "Mapping of Flash failed\n");
541 			error = ENXIO;
542 			goto err_pci;
543 		}
544 		/* This is used in the shared code */
545 		hw->flash_address = (u8 *)adapter->flash;
546 		adapter->osdep.flash_bus_space_tag =
547 		    rman_get_bustag(adapter->flash);
548 		adapter->osdep.flash_bus_space_handle =
549 		    rman_get_bushandle(adapter->flash);
550 	}
551 
552 	/* Do Shared Code initialization */
553 	if (e1000_setup_init_funcs(hw, TRUE)) {
554 		device_printf(dev, "Setup of Shared code failed\n");
555 		error = ENXIO;
556 		goto err_pci;
557 	}
558 
559 	e1000_get_bus_info(hw);
560 
561 	/* Set up some sysctls for the tunable interrupt delays */
562 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
563 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
564 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
565 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
566 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
567 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
568 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
569 	    "receive interrupt delay limit in usecs",
570 	    &adapter->rx_abs_int_delay,
571 	    E1000_REGISTER(hw, E1000_RADV),
572 	    em_rx_abs_int_delay_dflt);
573 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
574 	    "transmit interrupt delay limit in usecs",
575 	    &adapter->tx_abs_int_delay,
576 	    E1000_REGISTER(hw, E1000_TADV),
577 	    em_tx_abs_int_delay_dflt);
578 	em_add_int_delay_sysctl(adapter, "itr",
579 	    "interrupt delay limit in usecs/4",
580 	    &adapter->tx_itr,
581 	    E1000_REGISTER(hw, E1000_ITR),
582 	    DEFAULT_ITR);
583 
584 	/* Sysctl for limiting the amount of work done in the taskqueue */
585 	em_set_sysctl_value(adapter, "rx_processing_limit",
586 	    "max number of rx packets to process", &adapter->rx_process_limit,
587 	    em_rx_process_limit);
588 
589 	/*
590 	 * Validate number of transmit and receive descriptors. It
591 	 * must not exceed hardware maximum, and must be multiple
592 	 * of E1000_DBA_ALIGN.
593 	 */
594 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
595 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
596 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
597 		    EM_DEFAULT_TXD, em_txd);
598 		adapter->num_tx_desc = EM_DEFAULT_TXD;
599 	} else
600 		adapter->num_tx_desc = em_txd;
601 
602 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
603 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
604 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
605 		    EM_DEFAULT_RXD, em_rxd);
606 		adapter->num_rx_desc = EM_DEFAULT_RXD;
607 	} else
608 		adapter->num_rx_desc = em_rxd;
609 
610 	hw->mac.autoneg = DO_AUTO_NEG;
611 	hw->phy.autoneg_wait_to_complete = FALSE;
612 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
613 
614 	/* Copper options */
615 	if (hw->phy.media_type == e1000_media_type_copper) {
616 		hw->phy.mdix = AUTO_ALL_MODES;
617 		hw->phy.disable_polarity_correction = FALSE;
618 		hw->phy.ms_type = EM_MASTER_SLAVE;
619 	}
620 
621 	/*
622 	 * Set the frame limits assuming
623 	 * standard ethernet sized frames.
624 	 */
625 	adapter->hw.mac.max_frame_size =
626 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
627 
628 	/*
629 	 * This controls when hardware reports transmit completion
630 	 * status.
631 	 */
632 	hw->mac.report_tx_early = 1;
633 
634 	/*
635 	** Get queue/ring memory
636 	*/
637 	if (em_allocate_queues(adapter)) {
638 		error = ENOMEM;
639 		goto err_pci;
640 	}
641 
642 	/* Allocate multicast array memory. */
643 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
644 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
645 	if (adapter->mta == NULL) {
646 		device_printf(dev, "Can not allocate multicast setup array\n");
647 		error = ENOMEM;
648 		goto err_late;
649 	}
650 
651 	/* Check SOL/IDER usage */
652 	if (e1000_check_reset_block(hw))
653 		device_printf(dev, "PHY reset is blocked"
654 		    " due to SOL/IDER session.\n");
655 
656 	/* Sysctl for setting Energy Efficient Ethernet */
657 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
658 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
659 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
660 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
661 	    adapter, 0, em_sysctl_eee, "I",
662 	    "Disable Energy Efficient Ethernet");
663 
664 	/*
665 	** Start from a known state, this is
666 	** important in reading the nvm and
667 	** mac from that.
668 	*/
669 	e1000_reset_hw(hw);
670 
671 
672 	/* Make sure we have a good EEPROM before we read from it */
673 	if (e1000_validate_nvm_checksum(hw) < 0) {
674 		/*
675 		** Some PCI-E parts fail the first check due to
676 		** the link being in sleep state, call it again,
677 		** if it fails a second time its a real issue.
678 		*/
679 		if (e1000_validate_nvm_checksum(hw) < 0) {
680 			device_printf(dev,
681 			    "The EEPROM Checksum Is Not Valid\n");
682 			error = EIO;
683 			goto err_late;
684 		}
685 	}
686 
687 	/* Copy the permanent MAC address out of the EEPROM */
688 	if (e1000_read_mac_addr(hw) < 0) {
689 		device_printf(dev, "EEPROM read error while reading MAC"
690 		    " address\n");
691 		error = EIO;
692 		goto err_late;
693 	}
694 
695 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
696 		device_printf(dev, "Invalid MAC address\n");
697 		error = EIO;
698 		goto err_late;
699 	}
700 
701 	/*
702 	**  Do interrupt configuration
703 	*/
704 	if (adapter->msix > 1) /* Do MSIX */
705 		error = em_allocate_msix(adapter);
706 	else  /* MSI or Legacy */
707 		error = em_allocate_legacy(adapter);
708 	if (error)
709 		goto err_late;
710 
711 	/*
712 	 * Get Wake-on-Lan and Management info for later use
713 	 */
714 	em_get_wakeup(dev);
715 
716 	/* Setup OS specific network interface */
717 	if (em_setup_interface(dev, adapter) != 0)
718 		goto err_late;
719 
720 	em_reset(adapter);
721 
722 	/* Initialize statistics */
723 	em_update_stats_counters(adapter);
724 
725 	hw->mac.get_link_status = 1;
726 	em_update_link_status(adapter);
727 
728 	/* Register for VLAN events */
729 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
730 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
731 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
732 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
733 
734 	em_add_hw_stats(adapter);
735 
736 	/* Non-AMT based hardware can now take control from firmware */
737 	if (adapter->has_manage && !adapter->has_amt)
738 		em_get_hw_control(adapter);
739 
740 	/* Tell the stack that the interface is not active */
741 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
742 	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
743 
744 	adapter->led_dev = led_create(em_led_func, adapter,
745 	    device_get_nameunit(dev));
746 #ifdef DEV_NETMAP
747 	em_netmap_attach(adapter);
748 #endif /* DEV_NETMAP */
749 
750 	INIT_DEBUGOUT("em_attach: end");
751 
752 	return (0);
753 
754 err_late:
755 	em_free_transmit_structures(adapter);
756 	em_free_receive_structures(adapter);
757 	em_release_hw_control(adapter);
758 	if (adapter->ifp != NULL)
759 		if_free(adapter->ifp);
760 err_pci:
761 	em_free_pci_resources(adapter);
762 	free(adapter->mta, M_DEVBUF);
763 	EM_CORE_LOCK_DESTROY(adapter);
764 
765 	return (error);
766 }
767 
768 /*********************************************************************
769  *  Device removal routine
770  *
771  *  The detach entry point is called when the driver is being removed.
772  *  This routine stops the adapter and deallocates all the resources
773  *  that were allocated for driver operation.
774  *
775  *  return 0 on success, positive on failure
776  *********************************************************************/
777 
778 static int
779 em_detach(device_t dev)
780 {
781 	struct adapter	*adapter = device_get_softc(dev);
782 	struct ifnet	*ifp = adapter->ifp;
783 
784 	INIT_DEBUGOUT("em_detach: begin");
785 
786 	/* Make sure VLANS are not using driver */
787 	if (adapter->ifp->if_vlantrunk != NULL) {
788 		device_printf(dev,"Vlan in use, detach first\n");
789 		return (EBUSY);
790 	}
791 
792 #ifdef DEVICE_POLLING
793 	if (ifp->if_capenable & IFCAP_POLLING)
794 		ether_poll_deregister(ifp);
795 #endif
796 
797 	if (adapter->led_dev != NULL)
798 		led_destroy(adapter->led_dev);
799 
800 	EM_CORE_LOCK(adapter);
801 	adapter->in_detach = 1;
802 	em_stop(adapter);
803 	EM_CORE_UNLOCK(adapter);
804 	EM_CORE_LOCK_DESTROY(adapter);
805 
806 	e1000_phy_hw_reset(&adapter->hw);
807 
808 	em_release_manageability(adapter);
809 	em_release_hw_control(adapter);
810 
811 	/* Unregister VLAN events */
812 	if (adapter->vlan_attach != NULL)
813 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
814 	if (adapter->vlan_detach != NULL)
815 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
816 
817 	ether_ifdetach(adapter->ifp);
818 	callout_drain(&adapter->timer);
819 
820 #ifdef DEV_NETMAP
821 	netmap_detach(ifp);
822 #endif /* DEV_NETMAP */
823 
824 	em_free_pci_resources(adapter);
825 	bus_generic_detach(dev);
826 	if_free(ifp);
827 
828 	em_free_transmit_structures(adapter);
829 	em_free_receive_structures(adapter);
830 
831 	em_release_hw_control(adapter);
832 	free(adapter->mta, M_DEVBUF);
833 
834 	return (0);
835 }
836 
837 /*********************************************************************
838  *
839  *  Shutdown entry point
840  *
841  **********************************************************************/
842 
843 static int
844 em_shutdown(device_t dev)
845 {
846 	return em_suspend(dev);
847 }
848 
849 /*
850  * Suspend/resume device methods.
851  */
852 static int
853 em_suspend(device_t dev)
854 {
855 	struct adapter *adapter = device_get_softc(dev);
856 
857 	EM_CORE_LOCK(adapter);
858 
859         em_release_manageability(adapter);
860 	em_release_hw_control(adapter);
861 	em_enable_wakeup(dev);
862 
863 	EM_CORE_UNLOCK(adapter);
864 
865 	return bus_generic_suspend(dev);
866 }
867 
868 static int
869 em_resume(device_t dev)
870 {
871 	struct adapter *adapter = device_get_softc(dev);
872 	struct tx_ring	*txr = adapter->tx_rings;
873 	struct ifnet *ifp = adapter->ifp;
874 
875 	EM_CORE_LOCK(adapter);
876 	if (adapter->hw.mac.type == e1000_pch2lan)
877 		e1000_resume_workarounds_pchlan(&adapter->hw);
878 	em_init_locked(adapter);
879 	em_init_manageability(adapter);
880 
881 	if ((ifp->if_flags & IFF_UP) &&
882 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
883 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
884 			EM_TX_LOCK(txr);
885 #ifdef EM_MULTIQUEUE
886 			if (!drbr_empty(ifp, txr->br))
887 				em_mq_start_locked(ifp, txr, NULL);
888 #else
889 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
890 				em_start_locked(ifp, txr);
891 #endif
892 			EM_TX_UNLOCK(txr);
893 		}
894 	}
895 	EM_CORE_UNLOCK(adapter);
896 
897 	return bus_generic_resume(dev);
898 }
899 
900 
901 #ifdef EM_MULTIQUEUE
902 /*********************************************************************
903  *  Multiqueue Transmit routines
904  *
905  *  em_mq_start is called by the stack to initiate a transmit.
906  *  however, if busy the driver can queue the request rather
907  *  than do an immediate send. It is this that is an advantage
908  *  in this driver, rather than also having multiple tx queues.
909  **********************************************************************/
910 static int
911 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
912 {
913 	struct adapter  *adapter = txr->adapter;
914         struct mbuf     *next;
915         int             err = 0, enq = 0;
916 
917 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
918 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
919 		if (m != NULL)
920 			err = drbr_enqueue(ifp, txr->br, m);
921 		return (err);
922 	}
923 
924 	enq = 0;
925 	if (m != NULL) {
926 		err = drbr_enqueue(ifp, txr->br, m);
927 		if (err)
928 			return (err);
929 	}
930 
931 	/* Process the queue */
932 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
933 		if ((err = em_xmit(txr, &next)) != 0) {
934 			if (next == NULL)
935 				drbr_advance(ifp, txr->br);
936 			else
937 				drbr_putback(ifp, txr->br, next);
938 			break;
939 		}
940 		drbr_advance(ifp, txr->br);
941 		enq++;
942 		ifp->if_obytes += next->m_pkthdr.len;
943 		if (next->m_flags & M_MCAST)
944 			ifp->if_omcasts++;
945 		ETHER_BPF_MTAP(ifp, next);
946 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
947                         break;
948 	}
949 
950 	if (enq > 0) {
951                 /* Set the watchdog */
952                 txr->queue_status = EM_QUEUE_WORKING;
953 		txr->watchdog_time = ticks;
954 	}
955 
956 	if (txr->tx_avail < EM_MAX_SCATTER)
957 		em_txeof(txr);
958 	if (txr->tx_avail < EM_MAX_SCATTER)
959 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
960 	return (err);
961 }
962 
963 /*
964 ** Multiqueue capable stack interface
965 */
966 static int
967 em_mq_start(struct ifnet *ifp, struct mbuf *m)
968 {
969 	struct adapter	*adapter = ifp->if_softc;
970 	struct tx_ring	*txr = adapter->tx_rings;
971 	int 		error;
972 
973 	if (EM_TX_TRYLOCK(txr)) {
974 		error = em_mq_start_locked(ifp, txr, m);
975 		EM_TX_UNLOCK(txr);
976 	} else
977 		error = drbr_enqueue(ifp, txr->br, m);
978 
979 	return (error);
980 }
981 
982 /*
983 ** Flush all ring buffers
984 */
985 static void
986 em_qflush(struct ifnet *ifp)
987 {
988 	struct adapter  *adapter = ifp->if_softc;
989 	struct tx_ring  *txr = adapter->tx_rings;
990 	struct mbuf     *m;
991 
992 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
993 		EM_TX_LOCK(txr);
994 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
995 			m_freem(m);
996 		EM_TX_UNLOCK(txr);
997 	}
998 	if_qflush(ifp);
999 }
1000 #else  /* !EM_MULTIQUEUE */
1001 
1002 static void
1003 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1004 {
1005 	struct adapter	*adapter = ifp->if_softc;
1006 	struct mbuf	*m_head;
1007 
1008 	EM_TX_LOCK_ASSERT(txr);
1009 
1010 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1011 	    IFF_DRV_RUNNING)
1012 		return;
1013 
1014 	if (!adapter->link_active)
1015 		return;
1016 
1017 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1018         	/* Call cleanup if number of TX descriptors low */
1019 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1020 			em_txeof(txr);
1021 		if (txr->tx_avail < EM_MAX_SCATTER) {
1022 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1023 			break;
1024 		}
1025                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1026 		if (m_head == NULL)
1027 			break;
1028 		/*
1029 		 *  Encapsulation can modify our pointer, and or make it
1030 		 *  NULL on failure.  In that event, we can't requeue.
1031 		 */
1032 		if (em_xmit(txr, &m_head)) {
1033 			if (m_head == NULL)
1034 				break;
1035 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1036 			break;
1037 		}
1038 
1039 		/* Send a copy of the frame to the BPF listener */
1040 		ETHER_BPF_MTAP(ifp, m_head);
1041 
1042 		/* Set timeout in case hardware has problems transmitting. */
1043 		txr->watchdog_time = ticks;
1044                 txr->queue_status = EM_QUEUE_WORKING;
1045 	}
1046 
1047 	return;
1048 }
1049 
1050 static void
1051 em_start(struct ifnet *ifp)
1052 {
1053 	struct adapter	*adapter = ifp->if_softc;
1054 	struct tx_ring	*txr = adapter->tx_rings;
1055 
1056 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1057 		EM_TX_LOCK(txr);
1058 		em_start_locked(ifp, txr);
1059 		EM_TX_UNLOCK(txr);
1060 	}
1061 	return;
1062 }
1063 #endif /* EM_MULTIQUEUE */
1064 
1065 /*********************************************************************
1066  *  Ioctl entry point
1067  *
1068  *  em_ioctl is called when the user wants to configure the
1069  *  interface.
1070  *
1071  *  return 0 on success, positive on failure
1072  **********************************************************************/
1073 
1074 static int
1075 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1076 {
1077 	struct adapter	*adapter = ifp->if_softc;
1078 	struct ifreq	*ifr = (struct ifreq *)data;
1079 #if defined(INET) || defined(INET6)
1080 	struct ifaddr	*ifa = (struct ifaddr *)data;
1081 #endif
1082 	bool		avoid_reset = FALSE;
1083 	int		error = 0;
1084 
1085 	if (adapter->in_detach)
1086 		return (error);
1087 
1088 	switch (command) {
1089 	case SIOCSIFADDR:
1090 #ifdef INET
1091 		if (ifa->ifa_addr->sa_family == AF_INET)
1092 			avoid_reset = TRUE;
1093 #endif
1094 #ifdef INET6
1095 		if (ifa->ifa_addr->sa_family == AF_INET6)
1096 			avoid_reset = TRUE;
1097 #endif
1098 		/*
1099 		** Calling init results in link renegotiation,
1100 		** so we avoid doing it when possible.
1101 		*/
1102 		if (avoid_reset) {
1103 			ifp->if_flags |= IFF_UP;
1104 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1105 				em_init(adapter);
1106 #ifdef INET
1107 			if (!(ifp->if_flags & IFF_NOARP))
1108 				arp_ifinit(ifp, ifa);
1109 #endif
1110 		} else
1111 			error = ether_ioctl(ifp, command, data);
1112 		break;
1113 	case SIOCSIFMTU:
1114 	    {
1115 		int max_frame_size;
1116 
1117 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1118 
1119 		EM_CORE_LOCK(adapter);
1120 		switch (adapter->hw.mac.type) {
1121 		case e1000_82571:
1122 		case e1000_82572:
1123 		case e1000_ich9lan:
1124 		case e1000_ich10lan:
1125 		case e1000_pch2lan:
1126 		case e1000_pch_lpt:
1127 		case e1000_82574:
1128 		case e1000_82583:
1129 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1130 			max_frame_size = 9234;
1131 			break;
1132 		case e1000_pchlan:
1133 			max_frame_size = 4096;
1134 			break;
1135 			/* Adapters that do not support jumbo frames */
1136 		case e1000_ich8lan:
1137 			max_frame_size = ETHER_MAX_LEN;
1138 			break;
1139 		default:
1140 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1141 		}
1142 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1143 		    ETHER_CRC_LEN) {
1144 			EM_CORE_UNLOCK(adapter);
1145 			error = EINVAL;
1146 			break;
1147 		}
1148 
1149 		ifp->if_mtu = ifr->ifr_mtu;
1150 		adapter->hw.mac.max_frame_size =
1151 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1152 		em_init_locked(adapter);
1153 		EM_CORE_UNLOCK(adapter);
1154 		break;
1155 	    }
1156 	case SIOCSIFFLAGS:
1157 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1158 		    SIOCSIFFLAGS (Set Interface Flags)");
1159 		EM_CORE_LOCK(adapter);
1160 		if (ifp->if_flags & IFF_UP) {
1161 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1162 				if ((ifp->if_flags ^ adapter->if_flags) &
1163 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1164 					em_disable_promisc(adapter);
1165 					em_set_promisc(adapter);
1166 				}
1167 			} else
1168 				em_init_locked(adapter);
1169 		} else
1170 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1171 				em_stop(adapter);
1172 		adapter->if_flags = ifp->if_flags;
1173 		EM_CORE_UNLOCK(adapter);
1174 		break;
1175 	case SIOCADDMULTI:
1176 	case SIOCDELMULTI:
1177 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1178 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1179 			EM_CORE_LOCK(adapter);
1180 			em_disable_intr(adapter);
1181 			em_set_multi(adapter);
1182 #ifdef DEVICE_POLLING
1183 			if (!(ifp->if_capenable & IFCAP_POLLING))
1184 #endif
1185 				em_enable_intr(adapter);
1186 			EM_CORE_UNLOCK(adapter);
1187 		}
1188 		break;
1189 	case SIOCSIFMEDIA:
1190 		/* Check SOL/IDER usage */
1191 		EM_CORE_LOCK(adapter);
1192 		if (e1000_check_reset_block(&adapter->hw)) {
1193 			EM_CORE_UNLOCK(adapter);
1194 			device_printf(adapter->dev, "Media change is"
1195 			    " blocked due to SOL/IDER session.\n");
1196 			break;
1197 		}
1198 		EM_CORE_UNLOCK(adapter);
1199 		/* falls thru */
1200 	case SIOCGIFMEDIA:
1201 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1202 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1203 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1204 		break;
1205 	case SIOCSIFCAP:
1206 	    {
1207 		int mask, reinit;
1208 
1209 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1210 		reinit = 0;
1211 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1212 #ifdef DEVICE_POLLING
1213 		if (mask & IFCAP_POLLING) {
1214 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1215 				error = ether_poll_register(em_poll, ifp);
1216 				if (error)
1217 					return (error);
1218 				EM_CORE_LOCK(adapter);
1219 				em_disable_intr(adapter);
1220 				ifp->if_capenable |= IFCAP_POLLING;
1221 				EM_CORE_UNLOCK(adapter);
1222 			} else {
1223 				error = ether_poll_deregister(ifp);
1224 				/* Enable interrupt even in error case */
1225 				EM_CORE_LOCK(adapter);
1226 				em_enable_intr(adapter);
1227 				ifp->if_capenable &= ~IFCAP_POLLING;
1228 				EM_CORE_UNLOCK(adapter);
1229 			}
1230 		}
1231 #endif
1232 		if (mask & IFCAP_HWCSUM) {
1233 			ifp->if_capenable ^= IFCAP_HWCSUM;
1234 			reinit = 1;
1235 		}
1236 		if (mask & IFCAP_TSO4) {
1237 			ifp->if_capenable ^= IFCAP_TSO4;
1238 			reinit = 1;
1239 		}
1240 		if (mask & IFCAP_VLAN_HWTAGGING) {
1241 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1242 			reinit = 1;
1243 		}
1244 		if (mask & IFCAP_VLAN_HWFILTER) {
1245 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1246 			reinit = 1;
1247 		}
1248 		if (mask & IFCAP_VLAN_HWTSO) {
1249 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1250 			reinit = 1;
1251 		}
1252 		if ((mask & IFCAP_WOL) &&
1253 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1254 			if (mask & IFCAP_WOL_MCAST)
1255 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1256 			if (mask & IFCAP_WOL_MAGIC)
1257 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1258 		}
1259 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1260 			em_init(adapter);
1261 		VLAN_CAPABILITIES(ifp);
1262 		break;
1263 	    }
1264 
1265 	default:
1266 		error = ether_ioctl(ifp, command, data);
1267 		break;
1268 	}
1269 
1270 	return (error);
1271 }
1272 
1273 
1274 /*********************************************************************
1275  *  Init entry point
1276  *
1277  *  This routine is used in two ways. It is used by the stack as
1278  *  init entry point in network interface structure. It is also used
1279  *  by the driver as a hw/sw initialization routine to get to a
1280  *  consistent state.
1281  *
1282  *  return 0 on success, positive on failure
1283  **********************************************************************/
1284 
1285 static void
1286 em_init_locked(struct adapter *adapter)
1287 {
1288 	struct ifnet	*ifp = adapter->ifp;
1289 	device_t	dev = adapter->dev;
1290 
1291 	INIT_DEBUGOUT("em_init: begin");
1292 
1293 	EM_CORE_LOCK_ASSERT(adapter);
1294 
1295 	em_disable_intr(adapter);
1296 	callout_stop(&adapter->timer);
1297 
1298 	/* Get the latest mac address, User can use a LAA */
1299         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1300               ETHER_ADDR_LEN);
1301 
1302 	/* Put the address into the Receive Address Array */
1303 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1304 
1305 	/*
1306 	 * With the 82571 adapter, RAR[0] may be overwritten
1307 	 * when the other port is reset, we make a duplicate
1308 	 * in RAR[14] for that eventuality, this assures
1309 	 * the interface continues to function.
1310 	 */
1311 	if (adapter->hw.mac.type == e1000_82571) {
1312 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1313 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1314 		    E1000_RAR_ENTRIES - 1);
1315 	}
1316 
1317 	/* Initialize the hardware */
1318 	em_reset(adapter);
1319 	em_update_link_status(adapter);
1320 
1321 	/* Setup VLAN support, basic and offload if available */
1322 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1323 
1324 	/* Set hardware offload abilities */
1325 	ifp->if_hwassist = 0;
1326 	if (ifp->if_capenable & IFCAP_TXCSUM)
1327 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1328 	if (ifp->if_capenable & IFCAP_TSO4)
1329 		ifp->if_hwassist |= CSUM_TSO;
1330 
1331 	/* Configure for OS presence */
1332 	em_init_manageability(adapter);
1333 
1334 	/* Prepare transmit descriptors and buffers */
1335 	em_setup_transmit_structures(adapter);
1336 	em_initialize_transmit_unit(adapter);
1337 
1338 	/* Setup Multicast table */
1339 	em_set_multi(adapter);
1340 
1341 	/*
1342 	** Figure out the desired mbuf
1343 	** pool for doing jumbos
1344 	*/
1345 	if (adapter->hw.mac.max_frame_size <= 2048)
1346 		adapter->rx_mbuf_sz = MCLBYTES;
1347 	else if (adapter->hw.mac.max_frame_size <= 4096)
1348 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1349 	else
1350 		adapter->rx_mbuf_sz = MJUM9BYTES;
1351 
1352 	/* Prepare receive descriptors and buffers */
1353 	if (em_setup_receive_structures(adapter)) {
1354 		device_printf(dev, "Could not setup receive structures\n");
1355 		em_stop(adapter);
1356 		return;
1357 	}
1358 	em_initialize_receive_unit(adapter);
1359 
1360 	/* Use real VLAN Filter support? */
1361 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1362 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1363 			/* Use real VLAN Filter support */
1364 			em_setup_vlan_hw_support(adapter);
1365 		else {
1366 			u32 ctrl;
1367 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1368 			ctrl |= E1000_CTRL_VME;
1369 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1370 		}
1371 	}
1372 
1373 	/* Don't lose promiscuous settings */
1374 	em_set_promisc(adapter);
1375 
1376 	/* Set the interface as ACTIVE */
1377 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1378 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1379 
1380 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1381 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1382 
1383 	/* MSI/X configuration for 82574 */
1384 	if (adapter->hw.mac.type == e1000_82574) {
1385 		int tmp;
1386 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1387 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1388 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1389 		/* Set the IVAR - interrupt vector routing. */
1390 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1391 	}
1392 
1393 #ifdef DEVICE_POLLING
1394 	/*
1395 	 * Only enable interrupts if we are not polling, make sure
1396 	 * they are off otherwise.
1397 	 */
1398 	if (ifp->if_capenable & IFCAP_POLLING)
1399 		em_disable_intr(adapter);
1400 	else
1401 #endif /* DEVICE_POLLING */
1402 		em_enable_intr(adapter);
1403 
1404 	/* AMT based hardware can now take control from firmware */
1405 	if (adapter->has_manage && adapter->has_amt)
1406 		em_get_hw_control(adapter);
1407 }
1408 
1409 static void
1410 em_init(void *arg)
1411 {
1412 	struct adapter *adapter = arg;
1413 
1414 	EM_CORE_LOCK(adapter);
1415 	em_init_locked(adapter);
1416 	EM_CORE_UNLOCK(adapter);
1417 }
1418 
1419 
1420 #ifdef DEVICE_POLLING
1421 /*********************************************************************
1422  *
1423  *  Legacy polling routine: note this only works with single queue
1424  *
1425  *********************************************************************/
1426 static int
1427 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1428 {
1429 	struct adapter *adapter = ifp->if_softc;
1430 	struct tx_ring	*txr = adapter->tx_rings;
1431 	struct rx_ring	*rxr = adapter->rx_rings;
1432 	u32		reg_icr;
1433 	int		rx_done;
1434 
1435 	EM_CORE_LOCK(adapter);
1436 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1437 		EM_CORE_UNLOCK(adapter);
1438 		return (0);
1439 	}
1440 
1441 	if (cmd == POLL_AND_CHECK_STATUS) {
1442 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1443 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1444 			callout_stop(&adapter->timer);
1445 			adapter->hw.mac.get_link_status = 1;
1446 			em_update_link_status(adapter);
1447 			callout_reset(&adapter->timer, hz,
1448 			    em_local_timer, adapter);
1449 		}
1450 	}
1451 	EM_CORE_UNLOCK(adapter);
1452 
1453 	em_rxeof(rxr, count, &rx_done);
1454 
1455 	EM_TX_LOCK(txr);
1456 	em_txeof(txr);
1457 #ifdef EM_MULTIQUEUE
1458 	if (!drbr_empty(ifp, txr->br))
1459 		em_mq_start_locked(ifp, txr, NULL);
1460 #else
1461 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1462 		em_start_locked(ifp, txr);
1463 #endif
1464 	EM_TX_UNLOCK(txr);
1465 
1466 	return (rx_done);
1467 }
1468 #endif /* DEVICE_POLLING */
1469 
1470 
1471 /*********************************************************************
1472  *
1473  *  Fast Legacy/MSI Combined Interrupt Service routine
1474  *
1475  *********************************************************************/
1476 static int
1477 em_irq_fast(void *arg)
1478 {
1479 	struct adapter	*adapter = arg;
1480 	struct ifnet	*ifp;
1481 	u32		reg_icr;
1482 
1483 	ifp = adapter->ifp;
1484 
1485 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1486 
1487 	/* Hot eject?  */
1488 	if (reg_icr == 0xffffffff)
1489 		return FILTER_STRAY;
1490 
1491 	/* Definitely not our interrupt.  */
1492 	if (reg_icr == 0x0)
1493 		return FILTER_STRAY;
1494 
1495 	/*
1496 	 * Starting with the 82571 chip, bit 31 should be used to
1497 	 * determine whether the interrupt belongs to us.
1498 	 */
1499 	if (adapter->hw.mac.type >= e1000_82571 &&
1500 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1501 		return FILTER_STRAY;
1502 
1503 	em_disable_intr(adapter);
1504 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1505 
1506 	/* Link status change */
1507 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1508 		adapter->hw.mac.get_link_status = 1;
1509 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1510 	}
1511 
1512 	if (reg_icr & E1000_ICR_RXO)
1513 		adapter->rx_overruns++;
1514 	return FILTER_HANDLED;
1515 }
1516 
1517 /* Combined RX/TX handler, used by Legacy and MSI */
1518 static void
1519 em_handle_que(void *context, int pending)
1520 {
1521 	struct adapter	*adapter = context;
1522 	struct ifnet	*ifp = adapter->ifp;
1523 	struct tx_ring	*txr = adapter->tx_rings;
1524 	struct rx_ring	*rxr = adapter->rx_rings;
1525 
1526 
1527 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1528 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1529 		EM_TX_LOCK(txr);
1530 		em_txeof(txr);
1531 #ifdef EM_MULTIQUEUE
1532 		if (!drbr_empty(ifp, txr->br))
1533 			em_mq_start_locked(ifp, txr, NULL);
1534 #else
1535 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1536 			em_start_locked(ifp, txr);
1537 #endif
1538 		EM_TX_UNLOCK(txr);
1539 		if (more) {
1540 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1541 			return;
1542 		}
1543 	}
1544 
1545 	em_enable_intr(adapter);
1546 	return;
1547 }
1548 
1549 
1550 /*********************************************************************
1551  *
1552  *  MSIX Interrupt Service Routines
1553  *
1554  **********************************************************************/
1555 static void
1556 em_msix_tx(void *arg)
1557 {
1558 	struct tx_ring *txr = arg;
1559 	struct adapter *adapter = txr->adapter;
1560 	struct ifnet	*ifp = adapter->ifp;
1561 
1562 	++txr->tx_irq;
1563 	EM_TX_LOCK(txr);
1564 	em_txeof(txr);
1565 #ifdef EM_MULTIQUEUE
1566 	if (!drbr_empty(ifp, txr->br))
1567 		em_mq_start_locked(ifp, txr, NULL);
1568 #else
1569 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1570 		em_start_locked(ifp, txr);
1571 #endif
1572 	/* Reenable this interrupt */
1573 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1574 	EM_TX_UNLOCK(txr);
1575 	return;
1576 }
1577 
1578 /*********************************************************************
1579  *
1580  *  MSIX RX Interrupt Service routine
1581  *
1582  **********************************************************************/
1583 
1584 static void
1585 em_msix_rx(void *arg)
1586 {
1587 	struct rx_ring	*rxr = arg;
1588 	struct adapter	*adapter = rxr->adapter;
1589 	bool		more;
1590 
1591 	++rxr->rx_irq;
1592 	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1593 		return;
1594 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1595 	if (more)
1596 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1597 	else
1598 		/* Reenable this interrupt */
1599 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1600 	return;
1601 }
1602 
1603 /*********************************************************************
1604  *
1605  *  MSIX Link Fast Interrupt Service routine
1606  *
1607  **********************************************************************/
1608 static void
1609 em_msix_link(void *arg)
1610 {
1611 	struct adapter	*adapter = arg;
1612 	u32		reg_icr;
1613 
1614 	++adapter->link_irq;
1615 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1616 
1617 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1618 		adapter->hw.mac.get_link_status = 1;
1619 		em_handle_link(adapter, 0);
1620 	} else
1621 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1622 		    EM_MSIX_LINK | E1000_IMS_LSC);
1623 	return;
1624 }
1625 
1626 static void
1627 em_handle_rx(void *context, int pending)
1628 {
1629 	struct rx_ring	*rxr = context;
1630 	struct adapter	*adapter = rxr->adapter;
1631         bool            more;
1632 
1633 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1634 	if (more)
1635 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1636 	else
1637 		/* Reenable this interrupt */
1638 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1639 }
1640 
1641 static void
1642 em_handle_tx(void *context, int pending)
1643 {
1644 	struct tx_ring	*txr = context;
1645 	struct adapter	*adapter = txr->adapter;
1646 	struct ifnet	*ifp = adapter->ifp;
1647 
1648 	EM_TX_LOCK(txr);
1649 	em_txeof(txr);
1650 #ifdef EM_MULTIQUEUE
1651 	if (!drbr_empty(ifp, txr->br))
1652 		em_mq_start_locked(ifp, txr, NULL);
1653 #else
1654 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1655 		em_start_locked(ifp, txr);
1656 #endif
1657 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1658 	EM_TX_UNLOCK(txr);
1659 }
1660 
1661 static void
1662 em_handle_link(void *context, int pending)
1663 {
1664 	struct adapter	*adapter = context;
1665 	struct tx_ring	*txr = adapter->tx_rings;
1666 	struct ifnet *ifp = adapter->ifp;
1667 
1668 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1669 		return;
1670 
1671 	EM_CORE_LOCK(adapter);
1672 	callout_stop(&adapter->timer);
1673 	em_update_link_status(adapter);
1674 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1675 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1676 	    EM_MSIX_LINK | E1000_IMS_LSC);
1677 	if (adapter->link_active) {
1678 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1679 			EM_TX_LOCK(txr);
1680 #ifdef EM_MULTIQUEUE
1681 			if (!drbr_empty(ifp, txr->br))
1682 				em_mq_start_locked(ifp, txr, NULL);
1683 #else
1684 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1685 				em_start_locked(ifp, txr);
1686 #endif
1687 			EM_TX_UNLOCK(txr);
1688 		}
1689 	}
1690 	EM_CORE_UNLOCK(adapter);
1691 }
1692 
1693 
1694 /*********************************************************************
1695  *
1696  *  Media Ioctl callback
1697  *
1698  *  This routine is called whenever the user queries the status of
1699  *  the interface using ifconfig.
1700  *
1701  **********************************************************************/
1702 static void
1703 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1704 {
1705 	struct adapter *adapter = ifp->if_softc;
1706 	u_char fiber_type = IFM_1000_SX;
1707 
1708 	INIT_DEBUGOUT("em_media_status: begin");
1709 
1710 	EM_CORE_LOCK(adapter);
1711 	em_update_link_status(adapter);
1712 
1713 	ifmr->ifm_status = IFM_AVALID;
1714 	ifmr->ifm_active = IFM_ETHER;
1715 
1716 	if (!adapter->link_active) {
1717 		EM_CORE_UNLOCK(adapter);
1718 		return;
1719 	}
1720 
1721 	ifmr->ifm_status |= IFM_ACTIVE;
1722 
1723 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1724 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1725 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1726 	} else {
1727 		switch (adapter->link_speed) {
1728 		case 10:
1729 			ifmr->ifm_active |= IFM_10_T;
1730 			break;
1731 		case 100:
1732 			ifmr->ifm_active |= IFM_100_TX;
1733 			break;
1734 		case 1000:
1735 			ifmr->ifm_active |= IFM_1000_T;
1736 			break;
1737 		}
1738 		if (adapter->link_duplex == FULL_DUPLEX)
1739 			ifmr->ifm_active |= IFM_FDX;
1740 		else
1741 			ifmr->ifm_active |= IFM_HDX;
1742 	}
1743 	EM_CORE_UNLOCK(adapter);
1744 }
1745 
1746 /*********************************************************************
1747  *
1748  *  Media Ioctl callback
1749  *
1750  *  This routine is called when the user changes speed/duplex using
1751  *  media/mediopt option with ifconfig.
1752  *
1753  **********************************************************************/
1754 static int
1755 em_media_change(struct ifnet *ifp)
1756 {
1757 	struct adapter *adapter = ifp->if_softc;
1758 	struct ifmedia  *ifm = &adapter->media;
1759 
1760 	INIT_DEBUGOUT("em_media_change: begin");
1761 
1762 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1763 		return (EINVAL);
1764 
1765 	EM_CORE_LOCK(adapter);
1766 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1767 	case IFM_AUTO:
1768 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1769 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1770 		break;
1771 	case IFM_1000_LX:
1772 	case IFM_1000_SX:
1773 	case IFM_1000_T:
1774 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1775 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1776 		break;
1777 	case IFM_100_TX:
1778 		adapter->hw.mac.autoneg = FALSE;
1779 		adapter->hw.phy.autoneg_advertised = 0;
1780 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1781 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1782 		else
1783 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1784 		break;
1785 	case IFM_10_T:
1786 		adapter->hw.mac.autoneg = FALSE;
1787 		adapter->hw.phy.autoneg_advertised = 0;
1788 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1789 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1790 		else
1791 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1792 		break;
1793 	default:
1794 		device_printf(adapter->dev, "Unsupported media type\n");
1795 	}
1796 
1797 	em_init_locked(adapter);
1798 	EM_CORE_UNLOCK(adapter);
1799 
1800 	return (0);
1801 }
1802 
1803 /*********************************************************************
1804  *
1805  *  This routine maps the mbufs to tx descriptors.
1806  *
1807  *  return 0 on success, positive on failure
1808  **********************************************************************/
1809 
1810 static int
1811 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1812 {
1813 	struct adapter		*adapter = txr->adapter;
1814 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1815 	bus_dmamap_t		map;
1816 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1817 	struct e1000_tx_desc	*ctxd = NULL;
1818 	struct mbuf		*m_head;
1819 	struct ether_header	*eh;
1820 	struct ip		*ip = NULL;
1821 	struct tcphdr		*tp = NULL;
1822 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1823 	int			ip_off, poff;
1824 	int			nsegs, i, j, first, last = 0;
1825 	int			error, do_tso, tso_desc = 0, remap = 1;
1826 
1827 retry:
1828 	m_head = *m_headp;
1829 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1830 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1831 	ip_off = poff = 0;
1832 
1833 	/*
1834 	 * Intel recommends entire IP/TCP header length reside in a single
1835 	 * buffer. If multiple descriptors are used to describe the IP and
1836 	 * TCP header, each descriptor should describe one or more
1837 	 * complete headers; descriptors referencing only parts of headers
1838 	 * are not supported. If all layer headers are not coalesced into
1839 	 * a single buffer, each buffer should not cross a 4KB boundary,
1840 	 * or be larger than the maximum read request size.
1841 	 * Controller also requires modifing IP/TCP header to make TSO work
1842 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1843 	 * IP/TCP header into a single buffer to meet the requirement of
1844 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1845 	 * which also has similiar restrictions.
1846 	 */
1847 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1848 		if (do_tso || (m_head->m_next != NULL &&
1849 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1850 			if (M_WRITABLE(*m_headp) == 0) {
1851 				m_head = m_dup(*m_headp, M_NOWAIT);
1852 				m_freem(*m_headp);
1853 				if (m_head == NULL) {
1854 					*m_headp = NULL;
1855 					return (ENOBUFS);
1856 				}
1857 				*m_headp = m_head;
1858 			}
1859 		}
1860 		/*
1861 		 * XXX
1862 		 * Assume IPv4, we don't have TSO/checksum offload support
1863 		 * for IPv6 yet.
1864 		 */
1865 		ip_off = sizeof(struct ether_header);
1866 		m_head = m_pullup(m_head, ip_off);
1867 		if (m_head == NULL) {
1868 			*m_headp = NULL;
1869 			return (ENOBUFS);
1870 		}
1871 		eh = mtod(m_head, struct ether_header *);
1872 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1873 			ip_off = sizeof(struct ether_vlan_header);
1874 			m_head = m_pullup(m_head, ip_off);
1875 			if (m_head == NULL) {
1876 				*m_headp = NULL;
1877 				return (ENOBUFS);
1878 			}
1879 		}
1880 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1881 		if (m_head == NULL) {
1882 			*m_headp = NULL;
1883 			return (ENOBUFS);
1884 		}
1885 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1886 		poff = ip_off + (ip->ip_hl << 2);
1887 		if (do_tso) {
1888 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1889 			if (m_head == NULL) {
1890 				*m_headp = NULL;
1891 				return (ENOBUFS);
1892 			}
1893 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1894 			/*
1895 			 * TSO workaround:
1896 			 *   pull 4 more bytes of data into it.
1897 			 */
1898 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1899 			if (m_head == NULL) {
1900 				*m_headp = NULL;
1901 				return (ENOBUFS);
1902 			}
1903 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1904 			ip->ip_len = 0;
1905 			ip->ip_sum = 0;
1906 			/*
1907 			 * The pseudo TCP checksum does not include TCP payload
1908 			 * length so driver should recompute the checksum here
1909 			 * what hardware expect to see. This is adherence of
1910 			 * Microsoft's Large Send specification.
1911 			 */
1912 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1913 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1914 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1915 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1916 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1917 			if (m_head == NULL) {
1918 				*m_headp = NULL;
1919 				return (ENOBUFS);
1920 			}
1921 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1922 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1923 			if (m_head == NULL) {
1924 				*m_headp = NULL;
1925 				return (ENOBUFS);
1926 			}
1927 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1928 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1929 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1930 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1931 			if (m_head == NULL) {
1932 				*m_headp = NULL;
1933 				return (ENOBUFS);
1934 			}
1935 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1936 		}
1937 		*m_headp = m_head;
1938 	}
1939 
1940 	/*
1941 	 * Map the packet for DMA
1942 	 *
1943 	 * Capture the first descriptor index,
1944 	 * this descriptor will have the index
1945 	 * of the EOP which is the only one that
1946 	 * now gets a DONE bit writeback.
1947 	 */
1948 	first = txr->next_avail_desc;
1949 	tx_buffer = &txr->tx_buffers[first];
1950 	tx_buffer_mapped = tx_buffer;
1951 	map = tx_buffer->map;
1952 
1953 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1954 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1955 
1956 	/*
1957 	 * There are two types of errors we can (try) to handle:
1958 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1959 	 *   out of segments.  Defragment the mbuf chain and try again.
1960 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1961 	 *   at this point in time.  Defer sending and try again later.
1962 	 * All other errors, in particular EINVAL, are fatal and prevent the
1963 	 * mbuf chain from ever going through.  Drop it and report error.
1964 	 */
1965 	if (error == EFBIG && remap) {
1966 		struct mbuf *m;
1967 
1968 		m = m_defrag(*m_headp, M_NOWAIT);
1969 		if (m == NULL) {
1970 			adapter->mbuf_alloc_failed++;
1971 			m_freem(*m_headp);
1972 			*m_headp = NULL;
1973 			return (ENOBUFS);
1974 		}
1975 		*m_headp = m;
1976 
1977 		/* Try it again, but only once */
1978 		remap = 0;
1979 		goto retry;
1980 	} else if (error == ENOMEM) {
1981 		adapter->no_tx_dma_setup++;
1982 		return (error);
1983 	} else if (error != 0) {
1984 		adapter->no_tx_dma_setup++;
1985 		m_freem(*m_headp);
1986 		*m_headp = NULL;
1987 		return (error);
1988 	}
1989 
1990 	/*
1991 	 * TSO Hardware workaround, if this packet is not
1992 	 * TSO, and is only a single descriptor long, and
1993 	 * it follows a TSO burst, then we need to add a
1994 	 * sentinel descriptor to prevent premature writeback.
1995 	 */
1996 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1997 		if (nsegs == 1)
1998 			tso_desc = TRUE;
1999 		txr->tx_tso = FALSE;
2000 	}
2001 
2002         if (nsegs > (txr->tx_avail - 2)) {
2003                 txr->no_desc_avail++;
2004 		bus_dmamap_unload(txr->txtag, map);
2005 		return (ENOBUFS);
2006         }
2007 	m_head = *m_headp;
2008 
2009 	/* Do hardware assists */
2010 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2011 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2012 		    &txd_upper, &txd_lower);
2013 		/* we need to make a final sentinel transmit desc */
2014 		tso_desc = TRUE;
2015 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2016 		em_transmit_checksum_setup(txr, m_head,
2017 		    ip_off, ip, &txd_upper, &txd_lower);
2018 
2019 	if (m_head->m_flags & M_VLANTAG) {
2020 		/* Set the vlan id. */
2021 		txd_upper |=
2022 		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2023                 /* Tell hardware to add tag */
2024                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2025         }
2026 
2027 	i = txr->next_avail_desc;
2028 
2029 	/* Set up our transmit descriptors */
2030 	for (j = 0; j < nsegs; j++) {
2031 		bus_size_t seg_len;
2032 		bus_addr_t seg_addr;
2033 
2034 		tx_buffer = &txr->tx_buffers[i];
2035 		ctxd = &txr->tx_base[i];
2036 		seg_addr = segs[j].ds_addr;
2037 		seg_len  = segs[j].ds_len;
2038 		/*
2039 		** TSO Workaround:
2040 		** If this is the last descriptor, we want to
2041 		** split it so we have a small final sentinel
2042 		*/
2043 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2044 			seg_len -= 4;
2045 			ctxd->buffer_addr = htole64(seg_addr);
2046 			ctxd->lower.data = htole32(
2047 			adapter->txd_cmd | txd_lower | seg_len);
2048 			ctxd->upper.data =
2049 			    htole32(txd_upper);
2050 			if (++i == adapter->num_tx_desc)
2051 				i = 0;
2052 			/* Now make the sentinel */
2053 			++txd_used; /* using an extra txd */
2054 			ctxd = &txr->tx_base[i];
2055 			tx_buffer = &txr->tx_buffers[i];
2056 			ctxd->buffer_addr =
2057 			    htole64(seg_addr + seg_len);
2058 			ctxd->lower.data = htole32(
2059 			adapter->txd_cmd | txd_lower | 4);
2060 			ctxd->upper.data =
2061 			    htole32(txd_upper);
2062 			last = i;
2063 			if (++i == adapter->num_tx_desc)
2064 				i = 0;
2065 		} else {
2066 			ctxd->buffer_addr = htole64(seg_addr);
2067 			ctxd->lower.data = htole32(
2068 			adapter->txd_cmd | txd_lower | seg_len);
2069 			ctxd->upper.data =
2070 			    htole32(txd_upper);
2071 			last = i;
2072 			if (++i == adapter->num_tx_desc)
2073 				i = 0;
2074 		}
2075 		tx_buffer->m_head = NULL;
2076 		tx_buffer->next_eop = -1;
2077 	}
2078 
2079 	txr->next_avail_desc = i;
2080 	txr->tx_avail -= nsegs;
2081 	if (tso_desc) /* TSO used an extra for sentinel */
2082 		txr->tx_avail -= txd_used;
2083 
2084         tx_buffer->m_head = m_head;
2085 	/*
2086 	** Here we swap the map so the last descriptor,
2087 	** which gets the completion interrupt has the
2088 	** real map, and the first descriptor gets the
2089 	** unused map from this descriptor.
2090 	*/
2091 	tx_buffer_mapped->map = tx_buffer->map;
2092 	tx_buffer->map = map;
2093         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2094 
2095         /*
2096          * Last Descriptor of Packet
2097 	 * needs End Of Packet (EOP)
2098 	 * and Report Status (RS)
2099          */
2100         ctxd->lower.data |=
2101 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2102 	/*
2103 	 * Keep track in the first buffer which
2104 	 * descriptor will be written back
2105 	 */
2106 	tx_buffer = &txr->tx_buffers[first];
2107 	tx_buffer->next_eop = last;
2108 	/* Update the watchdog time early and often */
2109 	txr->watchdog_time = ticks;
2110 
2111 	/*
2112 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2113 	 * that this frame is available to transmit.
2114 	 */
2115 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2116 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2117 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2118 
2119 	return (0);
2120 }
2121 
2122 static void
2123 em_set_promisc(struct adapter *adapter)
2124 {
2125 	struct ifnet	*ifp = adapter->ifp;
2126 	u32		reg_rctl;
2127 
2128 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2129 
2130 	if (ifp->if_flags & IFF_PROMISC) {
2131 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2132 		/* Turn this on if you want to see bad packets */
2133 		if (em_debug_sbp)
2134 			reg_rctl |= E1000_RCTL_SBP;
2135 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2136 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2137 		reg_rctl |= E1000_RCTL_MPE;
2138 		reg_rctl &= ~E1000_RCTL_UPE;
2139 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2140 	}
2141 }
2142 
2143 static void
2144 em_disable_promisc(struct adapter *adapter)
2145 {
2146 	struct ifnet	*ifp = adapter->ifp;
2147 	u32		reg_rctl;
2148 	int		mcnt = 0;
2149 
2150 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2151 	reg_rctl &=  (~E1000_RCTL_UPE);
2152 	if (ifp->if_flags & IFF_ALLMULTI)
2153 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2154 	else {
2155 		struct  ifmultiaddr *ifma;
2156 #if __FreeBSD_version < 800000
2157 		IF_ADDR_LOCK(ifp);
2158 #else
2159 		if_maddr_rlock(ifp);
2160 #endif
2161 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2162 			if (ifma->ifma_addr->sa_family != AF_LINK)
2163 				continue;
2164 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2165 				break;
2166 			mcnt++;
2167 		}
2168 #if __FreeBSD_version < 800000
2169 		IF_ADDR_UNLOCK(ifp);
2170 #else
2171 		if_maddr_runlock(ifp);
2172 #endif
2173 	}
2174 	/* Don't disable if in MAX groups */
2175 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2176 		reg_rctl &=  (~E1000_RCTL_MPE);
2177 	reg_rctl &=  (~E1000_RCTL_SBP);
2178 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2179 }
2180 
2181 
2182 /*********************************************************************
2183  *  Multicast Update
2184  *
2185  *  This routine is called whenever multicast address list is updated.
2186  *
2187  **********************************************************************/
2188 
2189 static void
2190 em_set_multi(struct adapter *adapter)
2191 {
2192 	struct ifnet	*ifp = adapter->ifp;
2193 	struct ifmultiaddr *ifma;
2194 	u32 reg_rctl = 0;
2195 	u8  *mta; /* Multicast array memory */
2196 	int mcnt = 0;
2197 
2198 	IOCTL_DEBUGOUT("em_set_multi: begin");
2199 
2200 	mta = adapter->mta;
2201 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2202 
2203 	if (adapter->hw.mac.type == e1000_82542 &&
2204 	    adapter->hw.revision_id == E1000_REVISION_2) {
2205 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2206 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2207 			e1000_pci_clear_mwi(&adapter->hw);
2208 		reg_rctl |= E1000_RCTL_RST;
2209 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2210 		msec_delay(5);
2211 	}
2212 
2213 #if __FreeBSD_version < 800000
2214 	IF_ADDR_LOCK(ifp);
2215 #else
2216 	if_maddr_rlock(ifp);
2217 #endif
2218 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2219 		if (ifma->ifma_addr->sa_family != AF_LINK)
2220 			continue;
2221 
2222 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2223 			break;
2224 
2225 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2226 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2227 		mcnt++;
2228 	}
2229 #if __FreeBSD_version < 800000
2230 	IF_ADDR_UNLOCK(ifp);
2231 #else
2232 	if_maddr_runlock(ifp);
2233 #endif
2234 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2235 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2236 		reg_rctl |= E1000_RCTL_MPE;
2237 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2238 	} else
2239 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2240 
2241 	if (adapter->hw.mac.type == e1000_82542 &&
2242 	    adapter->hw.revision_id == E1000_REVISION_2) {
2243 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2244 		reg_rctl &= ~E1000_RCTL_RST;
2245 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2246 		msec_delay(5);
2247 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2248 			e1000_pci_set_mwi(&adapter->hw);
2249 	}
2250 }
2251 
2252 
2253 /*********************************************************************
2254  *  Timer routine
2255  *
2256  *  This routine checks for link status and updates statistics.
2257  *
2258  **********************************************************************/
2259 
2260 static void
2261 em_local_timer(void *arg)
2262 {
2263 	struct adapter	*adapter = arg;
2264 	struct ifnet	*ifp = adapter->ifp;
2265 	struct tx_ring	*txr = adapter->tx_rings;
2266 	struct rx_ring	*rxr = adapter->rx_rings;
2267 	u32		trigger;
2268 
2269 	EM_CORE_LOCK_ASSERT(adapter);
2270 
2271 	em_update_link_status(adapter);
2272 	em_update_stats_counters(adapter);
2273 
2274 	/* Reset LAA into RAR[0] on 82571 */
2275 	if ((adapter->hw.mac.type == e1000_82571) &&
2276 	    e1000_get_laa_state_82571(&adapter->hw))
2277 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2278 
2279 	/* Mask to use in the irq trigger */
2280 	if (adapter->msix_mem)
2281 		trigger = rxr->ims;
2282 	else
2283 		trigger = E1000_ICS_RXDMT0;
2284 
2285 	/*
2286 	** Check on the state of the TX queue(s), this
2287 	** can be done without the lock because its RO
2288 	** and the HUNG state will be static if set.
2289 	*/
2290 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2291 		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2292 		    (adapter->pause_frames == 0))
2293 			goto hung;
2294 		/* Schedule a TX tasklet if needed */
2295 		if (txr->tx_avail <= EM_MAX_SCATTER)
2296 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2297 	}
2298 
2299 	adapter->pause_frames = 0;
2300 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2301 #ifndef DEVICE_POLLING
2302 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2303 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2304 #endif
2305 	return;
2306 hung:
2307 	/* Looks like we're hung */
2308 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2309 	device_printf(adapter->dev,
2310 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2311 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2312 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2313 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2314 	    "Next TX to Clean = %d\n",
2315 	    txr->me, txr->tx_avail, txr->next_to_clean);
2316 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2317 	adapter->watchdog_events++;
2318 	adapter->pause_frames = 0;
2319 	em_init_locked(adapter);
2320 }
2321 
2322 
2323 static void
2324 em_update_link_status(struct adapter *adapter)
2325 {
2326 	struct e1000_hw *hw = &adapter->hw;
2327 	struct ifnet *ifp = adapter->ifp;
2328 	device_t dev = adapter->dev;
2329 	struct tx_ring *txr = adapter->tx_rings;
2330 	u32 link_check = 0;
2331 
2332 	/* Get the cached link value or read phy for real */
2333 	switch (hw->phy.media_type) {
2334 	case e1000_media_type_copper:
2335 		if (hw->mac.get_link_status) {
2336 			/* Do the work to read phy */
2337 			e1000_check_for_link(hw);
2338 			link_check = !hw->mac.get_link_status;
2339 			if (link_check) /* ESB2 fix */
2340 				e1000_cfg_on_link_up(hw);
2341 		} else
2342 			link_check = TRUE;
2343 		break;
2344 	case e1000_media_type_fiber:
2345 		e1000_check_for_link(hw);
2346 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2347                                  E1000_STATUS_LU);
2348 		break;
2349 	case e1000_media_type_internal_serdes:
2350 		e1000_check_for_link(hw);
2351 		link_check = adapter->hw.mac.serdes_has_link;
2352 		break;
2353 	default:
2354 	case e1000_media_type_unknown:
2355 		break;
2356 	}
2357 
2358 	/* Now check for a transition */
2359 	if (link_check && (adapter->link_active == 0)) {
2360 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2361 		    &adapter->link_duplex);
2362 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2363 		if ((adapter->link_speed != SPEED_1000) &&
2364 		    ((hw->mac.type == e1000_82571) ||
2365 		    (hw->mac.type == e1000_82572))) {
2366 			int tarc0;
2367 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2368 			tarc0 &= ~SPEED_MODE_BIT;
2369 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2370 		}
2371 		if (bootverbose)
2372 			device_printf(dev, "Link is up %d Mbps %s\n",
2373 			    adapter->link_speed,
2374 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2375 			    "Full Duplex" : "Half Duplex"));
2376 		adapter->link_active = 1;
2377 		adapter->smartspeed = 0;
2378 		ifp->if_baudrate = adapter->link_speed * 1000000;
2379 		if_link_state_change(ifp, LINK_STATE_UP);
2380 	} else if (!link_check && (adapter->link_active == 1)) {
2381 		ifp->if_baudrate = adapter->link_speed = 0;
2382 		adapter->link_duplex = 0;
2383 		if (bootverbose)
2384 			device_printf(dev, "Link is Down\n");
2385 		adapter->link_active = 0;
2386 		/* Link down, disable watchdog */
2387 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2388 			txr->queue_status = EM_QUEUE_IDLE;
2389 		if_link_state_change(ifp, LINK_STATE_DOWN);
2390 	}
2391 }
2392 
2393 /*********************************************************************
2394  *
2395  *  This routine disables all traffic on the adapter by issuing a
2396  *  global reset on the MAC and deallocates TX/RX buffers.
2397  *
2398  *  This routine should always be called with BOTH the CORE
2399  *  and TX locks.
2400  **********************************************************************/
2401 
2402 static void
2403 em_stop(void *arg)
2404 {
2405 	struct adapter	*adapter = arg;
2406 	struct ifnet	*ifp = adapter->ifp;
2407 	struct tx_ring	*txr = adapter->tx_rings;
2408 
2409 	EM_CORE_LOCK_ASSERT(adapter);
2410 
2411 	INIT_DEBUGOUT("em_stop: begin");
2412 
2413 	em_disable_intr(adapter);
2414 	callout_stop(&adapter->timer);
2415 
2416 	/* Tell the stack that the interface is no longer active */
2417 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2418 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2419 
2420         /* Unarm watchdog timer. */
2421 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2422 		EM_TX_LOCK(txr);
2423 		txr->queue_status = EM_QUEUE_IDLE;
2424 		EM_TX_UNLOCK(txr);
2425 	}
2426 
2427 	e1000_reset_hw(&adapter->hw);
2428 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2429 
2430 	e1000_led_off(&adapter->hw);
2431 	e1000_cleanup_led(&adapter->hw);
2432 }
2433 
2434 
2435 /*********************************************************************
2436  *
2437  *  Determine hardware revision.
2438  *
2439  **********************************************************************/
2440 static void
2441 em_identify_hardware(struct adapter *adapter)
2442 {
2443 	device_t dev = adapter->dev;
2444 
2445 	/* Make sure our PCI config space has the necessary stuff set */
2446 	pci_enable_busmaster(dev);
2447 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2448 
2449 	/* Save off the information about this board */
2450 	adapter->hw.vendor_id = pci_get_vendor(dev);
2451 	adapter->hw.device_id = pci_get_device(dev);
2452 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2453 	adapter->hw.subsystem_vendor_id =
2454 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2455 	adapter->hw.subsystem_device_id =
2456 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2457 
2458 	/* Do Shared Code Init and Setup */
2459 	if (e1000_set_mac_type(&adapter->hw)) {
2460 		device_printf(dev, "Setup init failure\n");
2461 		return;
2462 	}
2463 }
2464 
2465 static int
2466 em_allocate_pci_resources(struct adapter *adapter)
2467 {
2468 	device_t	dev = adapter->dev;
2469 	int		rid;
2470 
2471 	rid = PCIR_BAR(0);
2472 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2473 	    &rid, RF_ACTIVE);
2474 	if (adapter->memory == NULL) {
2475 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2476 		return (ENXIO);
2477 	}
2478 	adapter->osdep.mem_bus_space_tag =
2479 	    rman_get_bustag(adapter->memory);
2480 	adapter->osdep.mem_bus_space_handle =
2481 	    rman_get_bushandle(adapter->memory);
2482 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2483 
2484 	/* Default to a single queue */
2485 	adapter->num_queues = 1;
2486 
2487 	/*
2488 	 * Setup MSI/X or MSI if PCI Express
2489 	 */
2490 	adapter->msix = em_setup_msix(adapter);
2491 
2492 	adapter->hw.back = &adapter->osdep;
2493 
2494 	return (0);
2495 }
2496 
2497 /*********************************************************************
2498  *
2499  *  Setup the Legacy or MSI Interrupt handler
2500  *
2501  **********************************************************************/
2502 int
2503 em_allocate_legacy(struct adapter *adapter)
2504 {
2505 	device_t dev = adapter->dev;
2506 	struct tx_ring	*txr = adapter->tx_rings;
2507 	int error, rid = 0;
2508 
2509 	/* Manually turn off all interrupts */
2510 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2511 
2512 	if (adapter->msix == 1) /* using MSI */
2513 		rid = 1;
2514 	/* We allocate a single interrupt resource */
2515 	adapter->res = bus_alloc_resource_any(dev,
2516 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2517 	if (adapter->res == NULL) {
2518 		device_printf(dev, "Unable to allocate bus resource: "
2519 		    "interrupt\n");
2520 		return (ENXIO);
2521 	}
2522 
2523 	/*
2524 	 * Allocate a fast interrupt and the associated
2525 	 * deferred processing contexts.
2526 	 */
2527 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2528 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2529 	    taskqueue_thread_enqueue, &adapter->tq);
2530 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2531 	    device_get_nameunit(adapter->dev));
2532 	/* Use a TX only tasklet for local timer */
2533 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2534 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2535 	    taskqueue_thread_enqueue, &txr->tq);
2536 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2537 	    device_get_nameunit(adapter->dev));
2538 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2539 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2540 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2541 		device_printf(dev, "Failed to register fast interrupt "
2542 			    "handler: %d\n", error);
2543 		taskqueue_free(adapter->tq);
2544 		adapter->tq = NULL;
2545 		return (error);
2546 	}
2547 
2548 	return (0);
2549 }
2550 
2551 /*********************************************************************
2552  *
2553  *  Setup the MSIX Interrupt handlers
2554  *   This is not really Multiqueue, rather
2555  *   its just seperate interrupt vectors
2556  *   for TX, RX, and Link.
2557  *
2558  **********************************************************************/
2559 int
2560 em_allocate_msix(struct adapter *adapter)
2561 {
2562 	device_t	dev = adapter->dev;
2563 	struct		tx_ring *txr = adapter->tx_rings;
2564 	struct		rx_ring *rxr = adapter->rx_rings;
2565 	int		error, rid, vector = 0;
2566 
2567 
2568 	/* Make sure all interrupts are disabled */
2569 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2570 
2571 	/* First set up ring resources */
2572 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2573 
2574 		/* RX ring */
2575 		rid = vector + 1;
2576 
2577 		rxr->res = bus_alloc_resource_any(dev,
2578 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2579 		if (rxr->res == NULL) {
2580 			device_printf(dev,
2581 			    "Unable to allocate bus resource: "
2582 			    "RX MSIX Interrupt %d\n", i);
2583 			return (ENXIO);
2584 		}
2585 		if ((error = bus_setup_intr(dev, rxr->res,
2586 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2587 		    rxr, &rxr->tag)) != 0) {
2588 			device_printf(dev, "Failed to register RX handler");
2589 			return (error);
2590 		}
2591 #if __FreeBSD_version >= 800504
2592 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2593 #endif
2594 		rxr->msix = vector++; /* NOTE increment vector for TX */
2595 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2596 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2597 		    taskqueue_thread_enqueue, &rxr->tq);
2598 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2599 		    device_get_nameunit(adapter->dev));
2600 		/*
2601 		** Set the bit to enable interrupt
2602 		** in E1000_IMS -- bits 20 and 21
2603 		** are for RX0 and RX1, note this has
2604 		** NOTHING to do with the MSIX vector
2605 		*/
2606 		rxr->ims = 1 << (20 + i);
2607 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2608 
2609 		/* TX ring */
2610 		rid = vector + 1;
2611 		txr->res = bus_alloc_resource_any(dev,
2612 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2613 		if (txr->res == NULL) {
2614 			device_printf(dev,
2615 			    "Unable to allocate bus resource: "
2616 			    "TX MSIX Interrupt %d\n", i);
2617 			return (ENXIO);
2618 		}
2619 		if ((error = bus_setup_intr(dev, txr->res,
2620 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2621 		    txr, &txr->tag)) != 0) {
2622 			device_printf(dev, "Failed to register TX handler");
2623 			return (error);
2624 		}
2625 #if __FreeBSD_version >= 800504
2626 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2627 #endif
2628 		txr->msix = vector++; /* Increment vector for next pass */
2629 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2630 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2631 		    taskqueue_thread_enqueue, &txr->tq);
2632 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2633 		    device_get_nameunit(adapter->dev));
2634 		/*
2635 		** Set the bit to enable interrupt
2636 		** in E1000_IMS -- bits 22 and 23
2637 		** are for TX0 and TX1, note this has
2638 		** NOTHING to do with the MSIX vector
2639 		*/
2640 		txr->ims = 1 << (22 + i);
2641 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2642 	}
2643 
2644 	/* Link interrupt */
2645 	++rid;
2646 	adapter->res = bus_alloc_resource_any(dev,
2647 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2648 	if (!adapter->res) {
2649 		device_printf(dev,"Unable to allocate "
2650 		    "bus resource: Link interrupt [%d]\n", rid);
2651 		return (ENXIO);
2652         }
2653 	/* Set the link handler function */
2654 	error = bus_setup_intr(dev, adapter->res,
2655 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2656 	    em_msix_link, adapter, &adapter->tag);
2657 	if (error) {
2658 		adapter->res = NULL;
2659 		device_printf(dev, "Failed to register LINK handler");
2660 		return (error);
2661 	}
2662 #if __FreeBSD_version >= 800504
2663 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2664 #endif
2665 	adapter->linkvec = vector;
2666 	adapter->ivars |=  (8 | vector) << 16;
2667 	adapter->ivars |= 0x80000000;
2668 
2669 	return (0);
2670 }
2671 
2672 
2673 static void
2674 em_free_pci_resources(struct adapter *adapter)
2675 {
2676 	device_t	dev = adapter->dev;
2677 	struct tx_ring	*txr;
2678 	struct rx_ring	*rxr;
2679 	int		rid;
2680 
2681 
2682 	/*
2683 	** Release all the queue interrupt resources:
2684 	*/
2685 	for (int i = 0; i < adapter->num_queues; i++) {
2686 		txr = &adapter->tx_rings[i];
2687 		rxr = &adapter->rx_rings[i];
2688 		/* an early abort? */
2689 		if ((txr == NULL) || (rxr == NULL))
2690 			break;
2691 		rid = txr->msix +1;
2692 		if (txr->tag != NULL) {
2693 			bus_teardown_intr(dev, txr->res, txr->tag);
2694 			txr->tag = NULL;
2695 		}
2696 		if (txr->res != NULL)
2697 			bus_release_resource(dev, SYS_RES_IRQ,
2698 			    rid, txr->res);
2699 		rid = rxr->msix +1;
2700 		if (rxr->tag != NULL) {
2701 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2702 			rxr->tag = NULL;
2703 		}
2704 		if (rxr->res != NULL)
2705 			bus_release_resource(dev, SYS_RES_IRQ,
2706 			    rid, rxr->res);
2707 	}
2708 
2709         if (adapter->linkvec) /* we are doing MSIX */
2710                 rid = adapter->linkvec + 1;
2711         else
2712                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2713 
2714 	if (adapter->tag != NULL) {
2715 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2716 		adapter->tag = NULL;
2717 	}
2718 
2719 	if (adapter->res != NULL)
2720 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2721 
2722 
2723 	if (adapter->msix)
2724 		pci_release_msi(dev);
2725 
2726 	if (adapter->msix_mem != NULL)
2727 		bus_release_resource(dev, SYS_RES_MEMORY,
2728 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2729 
2730 	if (adapter->memory != NULL)
2731 		bus_release_resource(dev, SYS_RES_MEMORY,
2732 		    PCIR_BAR(0), adapter->memory);
2733 
2734 	if (adapter->flash != NULL)
2735 		bus_release_resource(dev, SYS_RES_MEMORY,
2736 		    EM_FLASH, adapter->flash);
2737 }
2738 
2739 /*
2740  * Setup MSI or MSI/X
2741  */
2742 static int
2743 em_setup_msix(struct adapter *adapter)
2744 {
2745 	device_t dev = adapter->dev;
2746 	int val;
2747 
2748 	/*
2749 	** Setup MSI/X for Hartwell: tests have shown
2750 	** use of two queues to be unstable, and to
2751 	** provide no great gain anyway, so we simply
2752 	** seperate the interrupts and use a single queue.
2753 	*/
2754 	if ((adapter->hw.mac.type == e1000_82574) &&
2755 	    (em_enable_msix == TRUE)) {
2756 		/* Map the MSIX BAR */
2757 		int rid = PCIR_BAR(EM_MSIX_BAR);
2758 		adapter->msix_mem = bus_alloc_resource_any(dev,
2759 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2760        		if (adapter->msix_mem == NULL) {
2761 			/* May not be enabled */
2762                		device_printf(adapter->dev,
2763 			    "Unable to map MSIX table \n");
2764 			goto msi;
2765        		}
2766 		val = pci_msix_count(dev);
2767 		/* We only need/want 3 vectors */
2768 		if (val >= 3)
2769 			val = 3;
2770 		else {
2771                		device_printf(adapter->dev,
2772 			    "MSIX: insufficient vectors, using MSI\n");
2773 			goto msi;
2774 		}
2775 
2776 		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2777 			device_printf(adapter->dev,
2778 			    "Using MSIX interrupts "
2779 			    "with %d vectors\n", val);
2780 			return (val);
2781 		}
2782 
2783 		/*
2784 		** If MSIX alloc failed or provided us with
2785 		** less than needed, free and fall through to MSI
2786 		*/
2787 		pci_release_msi(dev);
2788 	}
2789 msi:
2790 	if (adapter->msix_mem != NULL) {
2791 		bus_release_resource(dev, SYS_RES_MEMORY,
2792 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2793 		adapter->msix_mem = NULL;
2794 	}
2795        	val = 1;
2796        	if (pci_alloc_msi(dev, &val) == 0) {
2797                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2798 		return (val);
2799 	}
2800 	/* Should only happen due to manual configuration */
2801 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2802 	return (0);
2803 }
2804 
2805 
2806 /*********************************************************************
2807  *
2808  *  Initialize the hardware to a configuration
2809  *  as specified by the adapter structure.
2810  *
2811  **********************************************************************/
2812 static void
2813 em_reset(struct adapter *adapter)
2814 {
2815 	device_t	dev = adapter->dev;
2816 	struct ifnet	*ifp = adapter->ifp;
2817 	struct e1000_hw	*hw = &adapter->hw;
2818 	u16		rx_buffer_size;
2819 	u32		pba;
2820 
2821 	INIT_DEBUGOUT("em_reset: begin");
2822 
2823 	/* Set up smart power down as default off on newer adapters. */
2824 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2825 	    hw->mac.type == e1000_82572)) {
2826 		u16 phy_tmp = 0;
2827 
2828 		/* Speed up time to link by disabling smart power down. */
2829 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2830 		phy_tmp &= ~IGP02E1000_PM_SPD;
2831 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2832 	}
2833 
2834 	/*
2835 	 * Packet Buffer Allocation (PBA)
2836 	 * Writing PBA sets the receive portion of the buffer
2837 	 * the remainder is used for the transmit buffer.
2838 	 */
2839 	switch (hw->mac.type) {
2840 	/* Total Packet Buffer on these is 48K */
2841 	case e1000_82571:
2842 	case e1000_82572:
2843 	case e1000_80003es2lan:
2844 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2845 		break;
2846 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2847 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2848 		break;
2849 	case e1000_82574:
2850 	case e1000_82583:
2851 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2852 		break;
2853 	case e1000_ich8lan:
2854 		pba = E1000_PBA_8K;
2855 		break;
2856 	case e1000_ich9lan:
2857 	case e1000_ich10lan:
2858 		/* Boost Receive side for jumbo frames */
2859 		if (adapter->hw.mac.max_frame_size > 4096)
2860 			pba = E1000_PBA_14K;
2861 		else
2862 			pba = E1000_PBA_10K;
2863 		break;
2864 	case e1000_pchlan:
2865 	case e1000_pch2lan:
2866 	case e1000_pch_lpt:
2867 		pba = E1000_PBA_26K;
2868 		break;
2869 	default:
2870 		if (adapter->hw.mac.max_frame_size > 8192)
2871 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2872 		else
2873 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2874 	}
2875 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2876 
2877 	/*
2878 	 * These parameters control the automatic generation (Tx) and
2879 	 * response (Rx) to Ethernet PAUSE frames.
2880 	 * - High water mark should allow for at least two frames to be
2881 	 *   received after sending an XOFF.
2882 	 * - Low water mark works best when it is very near the high water mark.
2883 	 *   This allows the receiver to restart by sending XON when it has
2884 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2885 	 *   restart after one full frame is pulled from the buffer. There
2886 	 *   could be several smaller frames in the buffer and if so they will
2887 	 *   not trigger the XON until their total number reduces the buffer
2888 	 *   by 1500.
2889 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2890 	 */
2891 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2892 	hw->fc.high_water = rx_buffer_size -
2893 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2894 	hw->fc.low_water = hw->fc.high_water - 1500;
2895 
2896 	if (adapter->fc) /* locally set flow control value? */
2897 		hw->fc.requested_mode = adapter->fc;
2898 	else
2899 		hw->fc.requested_mode = e1000_fc_full;
2900 
2901 	if (hw->mac.type == e1000_80003es2lan)
2902 		hw->fc.pause_time = 0xFFFF;
2903 	else
2904 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2905 
2906 	hw->fc.send_xon = TRUE;
2907 
2908 	/* Device specific overrides/settings */
2909 	switch (hw->mac.type) {
2910 	case e1000_pchlan:
2911 		/* Workaround: no TX flow ctrl for PCH */
2912                 hw->fc.requested_mode = e1000_fc_rx_pause;
2913 		hw->fc.pause_time = 0xFFFF; /* override */
2914 		if (ifp->if_mtu > ETHERMTU) {
2915 			hw->fc.high_water = 0x3500;
2916 			hw->fc.low_water = 0x1500;
2917 		} else {
2918 			hw->fc.high_water = 0x5000;
2919 			hw->fc.low_water = 0x3000;
2920 		}
2921 		hw->fc.refresh_time = 0x1000;
2922 		break;
2923 	case e1000_pch2lan:
2924 	case e1000_pch_lpt:
2925 		hw->fc.high_water = 0x5C20;
2926 		hw->fc.low_water = 0x5048;
2927 		hw->fc.pause_time = 0x0650;
2928 		hw->fc.refresh_time = 0x0400;
2929 		/* Jumbos need adjusted PBA */
2930 		if (ifp->if_mtu > ETHERMTU)
2931 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2932 		else
2933 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2934 		break;
2935         case e1000_ich9lan:
2936         case e1000_ich10lan:
2937 		if (ifp->if_mtu > ETHERMTU) {
2938 			hw->fc.high_water = 0x2800;
2939 			hw->fc.low_water = hw->fc.high_water - 8;
2940 			break;
2941 		}
2942 		/* else fall thru */
2943 	default:
2944 		if (hw->mac.type == e1000_80003es2lan)
2945 			hw->fc.pause_time = 0xFFFF;
2946 		break;
2947 	}
2948 
2949 	/* Issue a global reset */
2950 	e1000_reset_hw(hw);
2951 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2952 	em_disable_aspm(adapter);
2953 	/* and a re-init */
2954 	if (e1000_init_hw(hw) < 0) {
2955 		device_printf(dev, "Hardware Initialization Failed\n");
2956 		return;
2957 	}
2958 
2959 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2960 	e1000_get_phy_info(hw);
2961 	e1000_check_for_link(hw);
2962 	return;
2963 }
2964 
2965 /*********************************************************************
2966  *
2967  *  Setup networking device structure and register an interface.
2968  *
2969  **********************************************************************/
2970 static int
2971 em_setup_interface(device_t dev, struct adapter *adapter)
2972 {
2973 	struct ifnet   *ifp;
2974 
2975 	INIT_DEBUGOUT("em_setup_interface: begin");
2976 
2977 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2978 	if (ifp == NULL) {
2979 		device_printf(dev, "can not allocate ifnet structure\n");
2980 		return (-1);
2981 	}
2982 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2983 	ifp->if_init =  em_init;
2984 	ifp->if_softc = adapter;
2985 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2986 	ifp->if_ioctl = em_ioctl;
2987 #ifdef EM_MULTIQUEUE
2988 	/* Multiqueue stack interface */
2989 	ifp->if_transmit = em_mq_start;
2990 	ifp->if_qflush = em_qflush;
2991 #else
2992 	ifp->if_start = em_start;
2993 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2994 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2995 	IFQ_SET_READY(&ifp->if_snd);
2996 #endif
2997 
2998 	ether_ifattach(ifp, adapter->hw.mac.addr);
2999 
3000 	ifp->if_capabilities = ifp->if_capenable = 0;
3001 
3002 
3003 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3004 	ifp->if_capabilities |= IFCAP_TSO4;
3005 	/*
3006 	 * Tell the upper layer(s) we
3007 	 * support full VLAN capability
3008 	 */
3009 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3010 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3011 			     |  IFCAP_VLAN_HWTSO
3012 			     |  IFCAP_VLAN_MTU;
3013 	ifp->if_capenable = ifp->if_capabilities;
3014 
3015 	/*
3016 	** Don't turn this on by default, if vlans are
3017 	** created on another pseudo device (eg. lagg)
3018 	** then vlan events are not passed thru, breaking
3019 	** operation, but with HW FILTER off it works. If
3020 	** using vlans directly on the em driver you can
3021 	** enable this and get full hardware tag filtering.
3022 	*/
3023 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3024 
3025 #ifdef DEVICE_POLLING
3026 	ifp->if_capabilities |= IFCAP_POLLING;
3027 #endif
3028 
3029 	/* Enable only WOL MAGIC by default */
3030 	if (adapter->wol) {
3031 		ifp->if_capabilities |= IFCAP_WOL;
3032 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3033 	}
3034 
3035 	/*
3036 	 * Specify the media types supported by this adapter and register
3037 	 * callbacks to update media and link information
3038 	 */
3039 	ifmedia_init(&adapter->media, IFM_IMASK,
3040 	    em_media_change, em_media_status);
3041 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3042 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3043 		u_char fiber_type = IFM_1000_SX;	/* default type */
3044 
3045 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3046 			    0, NULL);
3047 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3048 	} else {
3049 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3050 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3051 			    0, NULL);
3052 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3053 			    0, NULL);
3054 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3055 			    0, NULL);
3056 		if (adapter->hw.phy.type != e1000_phy_ife) {
3057 			ifmedia_add(&adapter->media,
3058 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3059 			ifmedia_add(&adapter->media,
3060 				IFM_ETHER | IFM_1000_T, 0, NULL);
3061 		}
3062 	}
3063 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3064 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3065 	return (0);
3066 }
3067 
3068 
3069 /*
3070  * Manage DMA'able memory.
3071  */
3072 static void
3073 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3074 {
3075 	if (error)
3076 		return;
3077 	*(bus_addr_t *) arg = segs[0].ds_addr;
3078 }
3079 
3080 static int
3081 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3082         struct em_dma_alloc *dma, int mapflags)
3083 {
3084 	int error;
3085 
3086 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3087 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3088 				BUS_SPACE_MAXADDR,	/* lowaddr */
3089 				BUS_SPACE_MAXADDR,	/* highaddr */
3090 				NULL, NULL,		/* filter, filterarg */
3091 				size,			/* maxsize */
3092 				1,			/* nsegments */
3093 				size,			/* maxsegsize */
3094 				0,			/* flags */
3095 				NULL,			/* lockfunc */
3096 				NULL,			/* lockarg */
3097 				&dma->dma_tag);
3098 	if (error) {
3099 		device_printf(adapter->dev,
3100 		    "%s: bus_dma_tag_create failed: %d\n",
3101 		    __func__, error);
3102 		goto fail_0;
3103 	}
3104 
3105 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3106 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3107 	if (error) {
3108 		device_printf(adapter->dev,
3109 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3110 		    __func__, (uintmax_t)size, error);
3111 		goto fail_2;
3112 	}
3113 
3114 	dma->dma_paddr = 0;
3115 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3116 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3117 	if (error || dma->dma_paddr == 0) {
3118 		device_printf(adapter->dev,
3119 		    "%s: bus_dmamap_load failed: %d\n",
3120 		    __func__, error);
3121 		goto fail_3;
3122 	}
3123 
3124 	return (0);
3125 
3126 fail_3:
3127 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3128 fail_2:
3129 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3130 	bus_dma_tag_destroy(dma->dma_tag);
3131 fail_0:
3132 	dma->dma_map = NULL;
3133 	dma->dma_tag = NULL;
3134 
3135 	return (error);
3136 }
3137 
3138 static void
3139 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3140 {
3141 	if (dma->dma_tag == NULL)
3142 		return;
3143 	if (dma->dma_map != NULL) {
3144 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3145 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3146 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3147 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3148 		dma->dma_map = NULL;
3149 	}
3150 	bus_dma_tag_destroy(dma->dma_tag);
3151 	dma->dma_tag = NULL;
3152 }
3153 
3154 
3155 /*********************************************************************
3156  *
3157  *  Allocate memory for the transmit and receive rings, and then
3158  *  the descriptors associated with each, called only once at attach.
3159  *
3160  **********************************************************************/
3161 static int
3162 em_allocate_queues(struct adapter *adapter)
3163 {
3164 	device_t		dev = adapter->dev;
3165 	struct tx_ring		*txr = NULL;
3166 	struct rx_ring		*rxr = NULL;
3167 	int rsize, tsize, error = E1000_SUCCESS;
3168 	int txconf = 0, rxconf = 0;
3169 
3170 
3171 	/* Allocate the TX ring struct memory */
3172 	if (!(adapter->tx_rings =
3173 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3174 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3175 		device_printf(dev, "Unable to allocate TX ring memory\n");
3176 		error = ENOMEM;
3177 		goto fail;
3178 	}
3179 
3180 	/* Now allocate the RX */
3181 	if (!(adapter->rx_rings =
3182 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3183 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3184 		device_printf(dev, "Unable to allocate RX ring memory\n");
3185 		error = ENOMEM;
3186 		goto rx_fail;
3187 	}
3188 
3189 	tsize = roundup2(adapter->num_tx_desc *
3190 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3191 	/*
3192 	 * Now set up the TX queues, txconf is needed to handle the
3193 	 * possibility that things fail midcourse and we need to
3194 	 * undo memory gracefully
3195 	 */
3196 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3197 		/* Set up some basics */
3198 		txr = &adapter->tx_rings[i];
3199 		txr->adapter = adapter;
3200 		txr->me = i;
3201 
3202 		/* Initialize the TX lock */
3203 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3204 		    device_get_nameunit(dev), txr->me);
3205 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3206 
3207 		if (em_dma_malloc(adapter, tsize,
3208 			&txr->txdma, BUS_DMA_NOWAIT)) {
3209 			device_printf(dev,
3210 			    "Unable to allocate TX Descriptor memory\n");
3211 			error = ENOMEM;
3212 			goto err_tx_desc;
3213 		}
3214 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3215 		bzero((void *)txr->tx_base, tsize);
3216 
3217         	if (em_allocate_transmit_buffers(txr)) {
3218 			device_printf(dev,
3219 			    "Critical Failure setting up transmit buffers\n");
3220 			error = ENOMEM;
3221 			goto err_tx_desc;
3222         	}
3223 #if __FreeBSD_version >= 800000
3224 		/* Allocate a buf ring */
3225 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3226 		    M_WAITOK, &txr->tx_mtx);
3227 #endif
3228 	}
3229 
3230 	/*
3231 	 * Next the RX queues...
3232 	 */
3233 	rsize = roundup2(adapter->num_rx_desc *
3234 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3235 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3236 		rxr = &adapter->rx_rings[i];
3237 		rxr->adapter = adapter;
3238 		rxr->me = i;
3239 
3240 		/* Initialize the RX lock */
3241 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3242 		    device_get_nameunit(dev), txr->me);
3243 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3244 
3245 		if (em_dma_malloc(adapter, rsize,
3246 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3247 			device_printf(dev,
3248 			    "Unable to allocate RxDescriptor memory\n");
3249 			error = ENOMEM;
3250 			goto err_rx_desc;
3251 		}
3252 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3253 		bzero((void *)rxr->rx_base, rsize);
3254 
3255         	/* Allocate receive buffers for the ring*/
3256 		if (em_allocate_receive_buffers(rxr)) {
3257 			device_printf(dev,
3258 			    "Critical Failure setting up receive buffers\n");
3259 			error = ENOMEM;
3260 			goto err_rx_desc;
3261 		}
3262 	}
3263 
3264 	return (0);
3265 
3266 err_rx_desc:
3267 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3268 		em_dma_free(adapter, &rxr->rxdma);
3269 err_tx_desc:
3270 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3271 		em_dma_free(adapter, &txr->txdma);
3272 	free(adapter->rx_rings, M_DEVBUF);
3273 rx_fail:
3274 #if __FreeBSD_version >= 800000
3275 	buf_ring_free(txr->br, M_DEVBUF);
3276 #endif
3277 	free(adapter->tx_rings, M_DEVBUF);
3278 fail:
3279 	return (error);
3280 }
3281 
3282 
3283 /*********************************************************************
3284  *
3285  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3286  *  the information needed to transmit a packet on the wire. This is
3287  *  called only once at attach, setup is done every reset.
3288  *
3289  **********************************************************************/
3290 static int
3291 em_allocate_transmit_buffers(struct tx_ring *txr)
3292 {
3293 	struct adapter *adapter = txr->adapter;
3294 	device_t dev = adapter->dev;
3295 	struct em_buffer *txbuf;
3296 	int error, i;
3297 
3298 	/*
3299 	 * Setup DMA descriptor areas.
3300 	 */
3301 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3302 			       1, 0,			/* alignment, bounds */
3303 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3304 			       BUS_SPACE_MAXADDR,	/* highaddr */
3305 			       NULL, NULL,		/* filter, filterarg */
3306 			       EM_TSO_SIZE,		/* maxsize */
3307 			       EM_MAX_SCATTER,		/* nsegments */
3308 			       PAGE_SIZE,		/* maxsegsize */
3309 			       0,			/* flags */
3310 			       NULL,			/* lockfunc */
3311 			       NULL,			/* lockfuncarg */
3312 			       &txr->txtag))) {
3313 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3314 		goto fail;
3315 	}
3316 
3317 	if (!(txr->tx_buffers =
3318 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3319 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3320 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3321 		error = ENOMEM;
3322 		goto fail;
3323 	}
3324 
3325         /* Create the descriptor buffer dma maps */
3326 	txbuf = txr->tx_buffers;
3327 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3328 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3329 		if (error != 0) {
3330 			device_printf(dev, "Unable to create TX DMA map\n");
3331 			goto fail;
3332 		}
3333 	}
3334 
3335 	return 0;
3336 fail:
3337 	/* We free all, it handles case where we are in the middle */
3338 	em_free_transmit_structures(adapter);
3339 	return (error);
3340 }
3341 
3342 /*********************************************************************
3343  *
3344  *  Initialize a transmit ring.
3345  *
3346  **********************************************************************/
3347 static void
3348 em_setup_transmit_ring(struct tx_ring *txr)
3349 {
3350 	struct adapter *adapter = txr->adapter;
3351 	struct em_buffer *txbuf;
3352 	int i;
3353 #ifdef DEV_NETMAP
3354 	struct netmap_adapter *na = NA(adapter->ifp);
3355 	struct netmap_slot *slot;
3356 #endif /* DEV_NETMAP */
3357 
3358 	/* Clear the old descriptor contents */
3359 	EM_TX_LOCK(txr);
3360 #ifdef DEV_NETMAP
3361 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3362 #endif /* DEV_NETMAP */
3363 
3364 	bzero((void *)txr->tx_base,
3365 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3366 	/* Reset indices */
3367 	txr->next_avail_desc = 0;
3368 	txr->next_to_clean = 0;
3369 
3370 	/* Free any existing tx buffers. */
3371         txbuf = txr->tx_buffers;
3372 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3373 		if (txbuf->m_head != NULL) {
3374 			bus_dmamap_sync(txr->txtag, txbuf->map,
3375 			    BUS_DMASYNC_POSTWRITE);
3376 			bus_dmamap_unload(txr->txtag, txbuf->map);
3377 			m_freem(txbuf->m_head);
3378 			txbuf->m_head = NULL;
3379 		}
3380 #ifdef DEV_NETMAP
3381 		if (slot) {
3382 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3383 			uint64_t paddr;
3384 			void *addr;
3385 
3386 			addr = PNMB(slot + si, &paddr);
3387 			txr->tx_base[i].buffer_addr = htole64(paddr);
3388 			/* reload the map for netmap mode */
3389 			netmap_load_map(txr->txtag, txbuf->map, addr);
3390 		}
3391 #endif /* DEV_NETMAP */
3392 
3393 		/* clear the watch index */
3394 		txbuf->next_eop = -1;
3395         }
3396 
3397 	/* Set number of descriptors available */
3398 	txr->tx_avail = adapter->num_tx_desc;
3399 	txr->queue_status = EM_QUEUE_IDLE;
3400 
3401 	/* Clear checksum offload context. */
3402 	txr->last_hw_offload = 0;
3403 	txr->last_hw_ipcss = 0;
3404 	txr->last_hw_ipcso = 0;
3405 	txr->last_hw_tucss = 0;
3406 	txr->last_hw_tucso = 0;
3407 
3408 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3409 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3410 	EM_TX_UNLOCK(txr);
3411 }
3412 
3413 /*********************************************************************
3414  *
3415  *  Initialize all transmit rings.
3416  *
3417  **********************************************************************/
3418 static void
3419 em_setup_transmit_structures(struct adapter *adapter)
3420 {
3421 	struct tx_ring *txr = adapter->tx_rings;
3422 
3423 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3424 		em_setup_transmit_ring(txr);
3425 
3426 	return;
3427 }
3428 
3429 /*********************************************************************
3430  *
3431  *  Enable transmit unit.
3432  *
3433  **********************************************************************/
3434 static void
3435 em_initialize_transmit_unit(struct adapter *adapter)
3436 {
3437 	struct tx_ring	*txr = adapter->tx_rings;
3438 	struct e1000_hw	*hw = &adapter->hw;
3439 	u32	tctl, tarc, tipg = 0;
3440 
3441 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3442 
3443 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3444 		u64 bus_addr = txr->txdma.dma_paddr;
3445 		/* Base and Len of TX Ring */
3446 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3447 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3448 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3449 	    	    (u32)(bus_addr >> 32));
3450 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3451 	    	    (u32)bus_addr);
3452 		/* Init the HEAD/TAIL indices */
3453 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3454 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3455 
3456 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3457 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3458 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3459 
3460 		txr->queue_status = EM_QUEUE_IDLE;
3461 	}
3462 
3463 	/* Set the default values for the Tx Inter Packet Gap timer */
3464 	switch (adapter->hw.mac.type) {
3465 	case e1000_80003es2lan:
3466 		tipg = DEFAULT_82543_TIPG_IPGR1;
3467 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3468 		    E1000_TIPG_IPGR2_SHIFT;
3469 		break;
3470 	default:
3471 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3472 		    (adapter->hw.phy.media_type ==
3473 		    e1000_media_type_internal_serdes))
3474 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3475 		else
3476 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3477 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3478 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3479 	}
3480 
3481 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3482 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3483 
3484 	if(adapter->hw.mac.type >= e1000_82540)
3485 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3486 		    adapter->tx_abs_int_delay.value);
3487 
3488 	if ((adapter->hw.mac.type == e1000_82571) ||
3489 	    (adapter->hw.mac.type == e1000_82572)) {
3490 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3491 		tarc |= SPEED_MODE_BIT;
3492 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3493 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3494 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3495 		tarc |= 1;
3496 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3497 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3498 		tarc |= 1;
3499 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3500 	}
3501 
3502 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3503 	if (adapter->tx_int_delay.value > 0)
3504 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3505 
3506 	/* Program the Transmit Control Register */
3507 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3508 	tctl &= ~E1000_TCTL_CT;
3509 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3510 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3511 
3512 	if (adapter->hw.mac.type >= e1000_82571)
3513 		tctl |= E1000_TCTL_MULR;
3514 
3515 	/* This write will effectively turn on the transmit unit. */
3516 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3517 
3518 }
3519 
3520 
3521 /*********************************************************************
3522  *
3523  *  Free all transmit rings.
3524  *
3525  **********************************************************************/
3526 static void
3527 em_free_transmit_structures(struct adapter *adapter)
3528 {
3529 	struct tx_ring *txr = adapter->tx_rings;
3530 
3531 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3532 		EM_TX_LOCK(txr);
3533 		em_free_transmit_buffers(txr);
3534 		em_dma_free(adapter, &txr->txdma);
3535 		EM_TX_UNLOCK(txr);
3536 		EM_TX_LOCK_DESTROY(txr);
3537 	}
3538 
3539 	free(adapter->tx_rings, M_DEVBUF);
3540 }
3541 
3542 /*********************************************************************
3543  *
3544  *  Free transmit ring related data structures.
3545  *
3546  **********************************************************************/
3547 static void
3548 em_free_transmit_buffers(struct tx_ring *txr)
3549 {
3550 	struct adapter		*adapter = txr->adapter;
3551 	struct em_buffer	*txbuf;
3552 
3553 	INIT_DEBUGOUT("free_transmit_ring: begin");
3554 
3555 	if (txr->tx_buffers == NULL)
3556 		return;
3557 
3558 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3559 		txbuf = &txr->tx_buffers[i];
3560 		if (txbuf->m_head != NULL) {
3561 			bus_dmamap_sync(txr->txtag, txbuf->map,
3562 			    BUS_DMASYNC_POSTWRITE);
3563 			bus_dmamap_unload(txr->txtag,
3564 			    txbuf->map);
3565 			m_freem(txbuf->m_head);
3566 			txbuf->m_head = NULL;
3567 			if (txbuf->map != NULL) {
3568 				bus_dmamap_destroy(txr->txtag,
3569 				    txbuf->map);
3570 				txbuf->map = NULL;
3571 			}
3572 		} else if (txbuf->map != NULL) {
3573 			bus_dmamap_unload(txr->txtag,
3574 			    txbuf->map);
3575 			bus_dmamap_destroy(txr->txtag,
3576 			    txbuf->map);
3577 			txbuf->map = NULL;
3578 		}
3579 	}
3580 #if __FreeBSD_version >= 800000
3581 	if (txr->br != NULL)
3582 		buf_ring_free(txr->br, M_DEVBUF);
3583 #endif
3584 	if (txr->tx_buffers != NULL) {
3585 		free(txr->tx_buffers, M_DEVBUF);
3586 		txr->tx_buffers = NULL;
3587 	}
3588 	if (txr->txtag != NULL) {
3589 		bus_dma_tag_destroy(txr->txtag);
3590 		txr->txtag = NULL;
3591 	}
3592 	return;
3593 }
3594 
3595 
3596 /*********************************************************************
3597  *  The offload context is protocol specific (TCP/UDP) and thus
3598  *  only needs to be set when the protocol changes. The occasion
3599  *  of a context change can be a performance detriment, and
3600  *  might be better just disabled. The reason arises in the way
3601  *  in which the controller supports pipelined requests from the
3602  *  Tx data DMA. Up to four requests can be pipelined, and they may
3603  *  belong to the same packet or to multiple packets. However all
3604  *  requests for one packet are issued before a request is issued
3605  *  for a subsequent packet and if a request for the next packet
3606  *  requires a context change, that request will be stalled
3607  *  until the previous request completes. This means setting up
3608  *  a new context effectively disables pipelined Tx data DMA which
3609  *  in turn greatly slow down performance to send small sized
3610  *  frames.
3611  **********************************************************************/
3612 static void
3613 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3614     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3615 {
3616 	struct adapter			*adapter = txr->adapter;
3617 	struct e1000_context_desc	*TXD = NULL;
3618 	struct em_buffer		*tx_buffer;
3619 	int				cur, hdr_len;
3620 	u32				cmd = 0;
3621 	u16				offload = 0;
3622 	u8				ipcso, ipcss, tucso, tucss;
3623 
3624 	ipcss = ipcso = tucss = tucso = 0;
3625 	hdr_len = ip_off + (ip->ip_hl << 2);
3626 	cur = txr->next_avail_desc;
3627 
3628 	/* Setup of IP header checksum. */
3629 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3630 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3631 		offload |= CSUM_IP;
3632 		ipcss = ip_off;
3633 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3634 		/*
3635 		 * Start offset for header checksum calculation.
3636 		 * End offset for header checksum calculation.
3637 		 * Offset of place to put the checksum.
3638 		 */
3639 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3640 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3641 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3642 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3643 		cmd |= E1000_TXD_CMD_IP;
3644 	}
3645 
3646 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3647  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3648  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3649  		offload |= CSUM_TCP;
3650  		tucss = hdr_len;
3651  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3652  		/*
3653  		 * Setting up new checksum offload context for every frames
3654  		 * takes a lot of processing time for hardware. This also
3655  		 * reduces performance a lot for small sized frames so avoid
3656  		 * it if driver can use previously configured checksum
3657  		 * offload context.
3658  		 */
3659  		if (txr->last_hw_offload == offload) {
3660  			if (offload & CSUM_IP) {
3661  				if (txr->last_hw_ipcss == ipcss &&
3662  				    txr->last_hw_ipcso == ipcso &&
3663  				    txr->last_hw_tucss == tucss &&
3664  				    txr->last_hw_tucso == tucso)
3665  					return;
3666  			} else {
3667  				if (txr->last_hw_tucss == tucss &&
3668  				    txr->last_hw_tucso == tucso)
3669  					return;
3670  			}
3671   		}
3672  		txr->last_hw_offload = offload;
3673  		txr->last_hw_tucss = tucss;
3674  		txr->last_hw_tucso = tucso;
3675  		/*
3676  		 * Start offset for payload checksum calculation.
3677  		 * End offset for payload checksum calculation.
3678  		 * Offset of place to put the checksum.
3679  		 */
3680 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3681  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3682  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3683  		TXD->upper_setup.tcp_fields.tucso = tucso;
3684  		cmd |= E1000_TXD_CMD_TCP;
3685  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3686  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3687  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3688  		tucss = hdr_len;
3689  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3690  		/*
3691  		 * Setting up new checksum offload context for every frames
3692  		 * takes a lot of processing time for hardware. This also
3693  		 * reduces performance a lot for small sized frames so avoid
3694  		 * it if driver can use previously configured checksum
3695  		 * offload context.
3696  		 */
3697  		if (txr->last_hw_offload == offload) {
3698  			if (offload & CSUM_IP) {
3699  				if (txr->last_hw_ipcss == ipcss &&
3700  				    txr->last_hw_ipcso == ipcso &&
3701  				    txr->last_hw_tucss == tucss &&
3702  				    txr->last_hw_tucso == tucso)
3703  					return;
3704  			} else {
3705  				if (txr->last_hw_tucss == tucss &&
3706  				    txr->last_hw_tucso == tucso)
3707  					return;
3708  			}
3709  		}
3710  		txr->last_hw_offload = offload;
3711  		txr->last_hw_tucss = tucss;
3712  		txr->last_hw_tucso = tucso;
3713  		/*
3714  		 * Start offset for header checksum calculation.
3715  		 * End offset for header checksum calculation.
3716  		 * Offset of place to put the checksum.
3717  		 */
3718 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3719  		TXD->upper_setup.tcp_fields.tucss = tucss;
3720  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3721  		TXD->upper_setup.tcp_fields.tucso = tucso;
3722   	}
3723 
3724  	if (offload & CSUM_IP) {
3725  		txr->last_hw_ipcss = ipcss;
3726  		txr->last_hw_ipcso = ipcso;
3727   	}
3728 
3729 	TXD->tcp_seg_setup.data = htole32(0);
3730 	TXD->cmd_and_length =
3731 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3732 	tx_buffer = &txr->tx_buffers[cur];
3733 	tx_buffer->m_head = NULL;
3734 	tx_buffer->next_eop = -1;
3735 
3736 	if (++cur == adapter->num_tx_desc)
3737 		cur = 0;
3738 
3739 	txr->tx_avail--;
3740 	txr->next_avail_desc = cur;
3741 }
3742 
3743 
3744 /**********************************************************************
3745  *
3746  *  Setup work for hardware segmentation offload (TSO)
3747  *
3748  **********************************************************************/
3749 static void
3750 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3751     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3752 {
3753 	struct adapter			*adapter = txr->adapter;
3754 	struct e1000_context_desc	*TXD;
3755 	struct em_buffer		*tx_buffer;
3756 	int cur, hdr_len;
3757 
3758 	/*
3759 	 * In theory we can use the same TSO context if and only if
3760 	 * frame is the same type(IP/TCP) and the same MSS. However
3761 	 * checking whether a frame has the same IP/TCP structure is
3762 	 * hard thing so just ignore that and always restablish a
3763 	 * new TSO context.
3764 	 */
3765 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3766 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3767 		      E1000_TXD_DTYP_D |	/* Data descr type */
3768 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3769 
3770 	/* IP and/or TCP header checksum calculation and insertion. */
3771 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3772 
3773 	cur = txr->next_avail_desc;
3774 	tx_buffer = &txr->tx_buffers[cur];
3775 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3776 
3777 	/*
3778 	 * Start offset for header checksum calculation.
3779 	 * End offset for header checksum calculation.
3780 	 * Offset of place put the checksum.
3781 	 */
3782 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3783 	TXD->lower_setup.ip_fields.ipcse =
3784 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3785 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3786 	/*
3787 	 * Start offset for payload checksum calculation.
3788 	 * End offset for payload checksum calculation.
3789 	 * Offset of place to put the checksum.
3790 	 */
3791 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3792 	TXD->upper_setup.tcp_fields.tucse = 0;
3793 	TXD->upper_setup.tcp_fields.tucso =
3794 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3795 	/*
3796 	 * Payload size per packet w/o any headers.
3797 	 * Length of all headers up to payload.
3798 	 */
3799 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3800 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3801 
3802 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3803 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3804 				E1000_TXD_CMD_TSE |	/* TSE context */
3805 				E1000_TXD_CMD_IP |	/* Do IP csum */
3806 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3807 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3808 
3809 	tx_buffer->m_head = NULL;
3810 	tx_buffer->next_eop = -1;
3811 
3812 	if (++cur == adapter->num_tx_desc)
3813 		cur = 0;
3814 
3815 	txr->tx_avail--;
3816 	txr->next_avail_desc = cur;
3817 	txr->tx_tso = TRUE;
3818 }
3819 
3820 
3821 /**********************************************************************
3822  *
3823  *  Examine each tx_buffer in the used queue. If the hardware is done
3824  *  processing the packet then free associated resources. The
3825  *  tx_buffer is put back on the free queue.
3826  *
3827  **********************************************************************/
3828 static void
3829 em_txeof(struct tx_ring *txr)
3830 {
3831 	struct adapter	*adapter = txr->adapter;
3832         int first, last, done, processed;
3833         struct em_buffer *tx_buffer;
3834         struct e1000_tx_desc   *tx_desc, *eop_desc;
3835 	struct ifnet   *ifp = adapter->ifp;
3836 
3837 	EM_TX_LOCK_ASSERT(txr);
3838 #ifdef DEV_NETMAP
3839 	if (netmap_tx_irq(ifp, txr->me))
3840 		return;
3841 #endif /* DEV_NETMAP */
3842 
3843 	/* No work, make sure watchdog is off */
3844         if (txr->tx_avail == adapter->num_tx_desc) {
3845 		txr->queue_status = EM_QUEUE_IDLE;
3846                 return;
3847 	}
3848 
3849 	processed = 0;
3850         first = txr->next_to_clean;
3851         tx_desc = &txr->tx_base[first];
3852         tx_buffer = &txr->tx_buffers[first];
3853 	last = tx_buffer->next_eop;
3854         eop_desc = &txr->tx_base[last];
3855 
3856 	/*
3857 	 * What this does is get the index of the
3858 	 * first descriptor AFTER the EOP of the
3859 	 * first packet, that way we can do the
3860 	 * simple comparison on the inner while loop.
3861 	 */
3862 	if (++last == adapter->num_tx_desc)
3863  		last = 0;
3864 	done = last;
3865 
3866         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3867             BUS_DMASYNC_POSTREAD);
3868 
3869         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3870 		/* We clean the range of the packet */
3871 		while (first != done) {
3872                 	tx_desc->upper.data = 0;
3873                 	tx_desc->lower.data = 0;
3874                 	tx_desc->buffer_addr = 0;
3875                 	++txr->tx_avail;
3876 			++processed;
3877 
3878 			if (tx_buffer->m_head) {
3879 				bus_dmamap_sync(txr->txtag,
3880 				    tx_buffer->map,
3881 				    BUS_DMASYNC_POSTWRITE);
3882 				bus_dmamap_unload(txr->txtag,
3883 				    tx_buffer->map);
3884                         	m_freem(tx_buffer->m_head);
3885                         	tx_buffer->m_head = NULL;
3886                 	}
3887 			tx_buffer->next_eop = -1;
3888 			txr->watchdog_time = ticks;
3889 
3890 	                if (++first == adapter->num_tx_desc)
3891 				first = 0;
3892 
3893 	                tx_buffer = &txr->tx_buffers[first];
3894 			tx_desc = &txr->tx_base[first];
3895 		}
3896 		++ifp->if_opackets;
3897 		/* See if we can continue to the next packet */
3898 		last = tx_buffer->next_eop;
3899 		if (last != -1) {
3900         		eop_desc = &txr->tx_base[last];
3901 			/* Get new done point */
3902 			if (++last == adapter->num_tx_desc) last = 0;
3903 			done = last;
3904 		} else
3905 			break;
3906         }
3907         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3908             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3909 
3910         txr->next_to_clean = first;
3911 
3912 	/*
3913 	** Watchdog calculation, we know there's
3914 	** work outstanding or the first return
3915 	** would have been taken, so none processed
3916 	** for too long indicates a hang. local timer
3917 	** will examine this and do a reset if needed.
3918 	*/
3919 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3920 		txr->queue_status = EM_QUEUE_HUNG;
3921 
3922         /*
3923          * If we have a minimum free, clear IFF_DRV_OACTIVE
3924          * to tell the stack that it is OK to send packets.
3925 	 * Notice that all writes of OACTIVE happen under the
3926 	 * TX lock which, with a single queue, guarantees
3927 	 * sanity.
3928          */
3929         if (txr->tx_avail >= EM_MAX_SCATTER)
3930 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3931 
3932 	/* Disable watchdog if all clean */
3933 	if (txr->tx_avail == adapter->num_tx_desc) {
3934 		txr->queue_status = EM_QUEUE_IDLE;
3935 	}
3936 }
3937 
3938 
3939 /*********************************************************************
3940  *
3941  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3942  *
3943  **********************************************************************/
3944 static void
3945 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3946 {
3947 	struct adapter		*adapter = rxr->adapter;
3948 	struct mbuf		*m;
3949 	bus_dma_segment_t	segs[1];
3950 	struct em_buffer	*rxbuf;
3951 	int			i, j, error, nsegs;
3952 	bool			cleaned = FALSE;
3953 
3954 	i = j = rxr->next_to_refresh;
3955 	/*
3956 	** Get one descriptor beyond
3957 	** our work mark to control
3958 	** the loop.
3959 	*/
3960 	if (++j == adapter->num_rx_desc)
3961 		j = 0;
3962 
3963 	while (j != limit) {
3964 		rxbuf = &rxr->rx_buffers[i];
3965 		if (rxbuf->m_head == NULL) {
3966 			m = m_getjcl(M_NOWAIT, MT_DATA,
3967 			    M_PKTHDR, adapter->rx_mbuf_sz);
3968 			/*
3969 			** If we have a temporary resource shortage
3970 			** that causes a failure, just abort refresh
3971 			** for now, we will return to this point when
3972 			** reinvoked from em_rxeof.
3973 			*/
3974 			if (m == NULL)
3975 				goto update;
3976 		} else
3977 			m = rxbuf->m_head;
3978 
3979 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3980 		m->m_flags |= M_PKTHDR;
3981 		m->m_data = m->m_ext.ext_buf;
3982 
3983 		/* Use bus_dma machinery to setup the memory mapping  */
3984 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3985 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3986 		if (error != 0) {
3987 			printf("Refresh mbufs: hdr dmamap load"
3988 			    " failure - %d\n", error);
3989 			m_free(m);
3990 			rxbuf->m_head = NULL;
3991 			goto update;
3992 		}
3993 		rxbuf->m_head = m;
3994 		bus_dmamap_sync(rxr->rxtag,
3995 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3996 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3997 		cleaned = TRUE;
3998 
3999 		i = j; /* Next is precalulated for us */
4000 		rxr->next_to_refresh = i;
4001 		/* Calculate next controlling index */
4002 		if (++j == adapter->num_rx_desc)
4003 			j = 0;
4004 	}
4005 update:
4006 	/*
4007 	** Update the tail pointer only if,
4008 	** and as far as we have refreshed.
4009 	*/
4010 	if (cleaned)
4011 		E1000_WRITE_REG(&adapter->hw,
4012 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4013 
4014 	return;
4015 }
4016 
4017 
4018 /*********************************************************************
4019  *
4020  *  Allocate memory for rx_buffer structures. Since we use one
4021  *  rx_buffer per received packet, the maximum number of rx_buffer's
4022  *  that we'll need is equal to the number of receive descriptors
4023  *  that we've allocated.
4024  *
4025  **********************************************************************/
4026 static int
4027 em_allocate_receive_buffers(struct rx_ring *rxr)
4028 {
4029 	struct adapter		*adapter = rxr->adapter;
4030 	device_t		dev = adapter->dev;
4031 	struct em_buffer	*rxbuf;
4032 	int			error;
4033 
4034 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4035 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4036 	if (rxr->rx_buffers == NULL) {
4037 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4038 		return (ENOMEM);
4039 	}
4040 
4041 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4042 				1, 0,			/* alignment, bounds */
4043 				BUS_SPACE_MAXADDR,	/* lowaddr */
4044 				BUS_SPACE_MAXADDR,	/* highaddr */
4045 				NULL, NULL,		/* filter, filterarg */
4046 				MJUM9BYTES,		/* maxsize */
4047 				1,			/* nsegments */
4048 				MJUM9BYTES,		/* maxsegsize */
4049 				0,			/* flags */
4050 				NULL,			/* lockfunc */
4051 				NULL,			/* lockarg */
4052 				&rxr->rxtag);
4053 	if (error) {
4054 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4055 		    __func__, error);
4056 		goto fail;
4057 	}
4058 
4059 	rxbuf = rxr->rx_buffers;
4060 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4061 		rxbuf = &rxr->rx_buffers[i];
4062 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4063 		if (error) {
4064 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4065 			    __func__, error);
4066 			goto fail;
4067 		}
4068 	}
4069 
4070 	return (0);
4071 
4072 fail:
4073 	em_free_receive_structures(adapter);
4074 	return (error);
4075 }
4076 
4077 
4078 /*********************************************************************
4079  *
4080  *  Initialize a receive ring and its buffers.
4081  *
4082  **********************************************************************/
4083 static int
4084 em_setup_receive_ring(struct rx_ring *rxr)
4085 {
4086 	struct	adapter 	*adapter = rxr->adapter;
4087 	struct em_buffer	*rxbuf;
4088 	bus_dma_segment_t	seg[1];
4089 	int			rsize, nsegs, error = 0;
4090 #ifdef DEV_NETMAP
4091 	struct netmap_adapter *na = NA(adapter->ifp);
4092 	struct netmap_slot *slot;
4093 #endif
4094 
4095 
4096 	/* Clear the ring contents */
4097 	EM_RX_LOCK(rxr);
4098 	rsize = roundup2(adapter->num_rx_desc *
4099 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4100 	bzero((void *)rxr->rx_base, rsize);
4101 #ifdef DEV_NETMAP
4102 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4103 #endif
4104 
4105 	/*
4106 	** Free current RX buffer structs and their mbufs
4107 	*/
4108 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4109 		rxbuf = &rxr->rx_buffers[i];
4110 		if (rxbuf->m_head != NULL) {
4111 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4112 			    BUS_DMASYNC_POSTREAD);
4113 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4114 			m_freem(rxbuf->m_head);
4115 			rxbuf->m_head = NULL; /* mark as freed */
4116 		}
4117 	}
4118 
4119 	/* Now replenish the mbufs */
4120         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4121 		rxbuf = &rxr->rx_buffers[j];
4122 #ifdef DEV_NETMAP
4123 		if (slot) {
4124 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4125 			uint64_t paddr;
4126 			void *addr;
4127 
4128 			addr = PNMB(slot + si, &paddr);
4129 			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4130 			/* Update descriptor */
4131 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4132 			continue;
4133 		}
4134 #endif /* DEV_NETMAP */
4135 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4136 		    M_PKTHDR, adapter->rx_mbuf_sz);
4137 		if (rxbuf->m_head == NULL) {
4138 			error = ENOBUFS;
4139 			goto fail;
4140 		}
4141 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4142 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4143 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4144 
4145 		/* Get the memory mapping */
4146 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4147 		    rxbuf->map, rxbuf->m_head, seg,
4148 		    &nsegs, BUS_DMA_NOWAIT);
4149 		if (error != 0) {
4150 			m_freem(rxbuf->m_head);
4151 			rxbuf->m_head = NULL;
4152 			goto fail;
4153 		}
4154 		bus_dmamap_sync(rxr->rxtag,
4155 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4156 
4157 		/* Update descriptor */
4158 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4159 	}
4160 	rxr->next_to_check = 0;
4161 	rxr->next_to_refresh = 0;
4162 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4163 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4164 
4165 fail:
4166 	EM_RX_UNLOCK(rxr);
4167 	return (error);
4168 }
4169 
4170 /*********************************************************************
4171  *
4172  *  Initialize all receive rings.
4173  *
4174  **********************************************************************/
4175 static int
4176 em_setup_receive_structures(struct adapter *adapter)
4177 {
4178 	struct rx_ring *rxr = adapter->rx_rings;
4179 	int q;
4180 
4181 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4182 		if (em_setup_receive_ring(rxr))
4183 			goto fail;
4184 
4185 	return (0);
4186 fail:
4187 	/*
4188 	 * Free RX buffers allocated so far, we will only handle
4189 	 * the rings that completed, the failing case will have
4190 	 * cleaned up for itself. 'q' failed, so its the terminus.
4191 	 */
4192 	for (int i = 0; i < q; ++i) {
4193 		rxr = &adapter->rx_rings[i];
4194 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4195 			struct em_buffer *rxbuf;
4196 			rxbuf = &rxr->rx_buffers[n];
4197 			if (rxbuf->m_head != NULL) {
4198 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4199 			  	  BUS_DMASYNC_POSTREAD);
4200 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4201 				m_freem(rxbuf->m_head);
4202 				rxbuf->m_head = NULL;
4203 			}
4204 		}
4205 		rxr->next_to_check = 0;
4206 		rxr->next_to_refresh = 0;
4207 	}
4208 
4209 	return (ENOBUFS);
4210 }
4211 
4212 /*********************************************************************
4213  *
4214  *  Free all receive rings.
4215  *
4216  **********************************************************************/
4217 static void
4218 em_free_receive_structures(struct adapter *adapter)
4219 {
4220 	struct rx_ring *rxr = adapter->rx_rings;
4221 
4222 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4223 		em_free_receive_buffers(rxr);
4224 		/* Free the ring memory as well */
4225 		em_dma_free(adapter, &rxr->rxdma);
4226 		EM_RX_LOCK_DESTROY(rxr);
4227 	}
4228 
4229 	free(adapter->rx_rings, M_DEVBUF);
4230 }
4231 
4232 
4233 /*********************************************************************
4234  *
4235  *  Free receive ring data structures
4236  *
4237  **********************************************************************/
4238 static void
4239 em_free_receive_buffers(struct rx_ring *rxr)
4240 {
4241 	struct adapter		*adapter = rxr->adapter;
4242 	struct em_buffer	*rxbuf = NULL;
4243 
4244 	INIT_DEBUGOUT("free_receive_buffers: begin");
4245 
4246 	if (rxr->rx_buffers != NULL) {
4247 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4248 			rxbuf = &rxr->rx_buffers[i];
4249 			if (rxbuf->map != NULL) {
4250 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4251 				    BUS_DMASYNC_POSTREAD);
4252 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4253 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4254 			}
4255 			if (rxbuf->m_head != NULL) {
4256 				m_freem(rxbuf->m_head);
4257 				rxbuf->m_head = NULL;
4258 			}
4259 		}
4260 		free(rxr->rx_buffers, M_DEVBUF);
4261 		rxr->rx_buffers = NULL;
4262 		rxr->next_to_check = 0;
4263 		rxr->next_to_refresh = 0;
4264 	}
4265 
4266 	if (rxr->rxtag != NULL) {
4267 		bus_dma_tag_destroy(rxr->rxtag);
4268 		rxr->rxtag = NULL;
4269 	}
4270 
4271 	return;
4272 }
4273 
4274 
4275 /*********************************************************************
4276  *
4277  *  Enable receive unit.
4278  *
4279  **********************************************************************/
4280 
4281 static void
4282 em_initialize_receive_unit(struct adapter *adapter)
4283 {
4284 	struct rx_ring	*rxr = adapter->rx_rings;
4285 	struct ifnet	*ifp = adapter->ifp;
4286 	struct e1000_hw	*hw = &adapter->hw;
4287 	u64	bus_addr;
4288 	u32	rctl, rxcsum;
4289 
4290 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4291 
4292 	/*
4293 	 * Make sure receives are disabled while setting
4294 	 * up the descriptor ring
4295 	 */
4296 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4297 	/* Do not disable if ever enabled on this hardware */
4298 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4299 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4300 
4301 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4302 	    adapter->rx_abs_int_delay.value);
4303 	/*
4304 	 * Set the interrupt throttling rate. Value is calculated
4305 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4306 	 */
4307 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4308 
4309 	/*
4310 	** When using MSIX interrupts we need to throttle
4311 	** using the EITR register (82574 only)
4312 	*/
4313 	if (hw->mac.type == e1000_82574) {
4314 		for (int i = 0; i < 4; i++)
4315 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4316 			    DEFAULT_ITR);
4317 		/* Disable accelerated acknowledge */
4318 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4319 	}
4320 
4321 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4322 	if (ifp->if_capenable & IFCAP_RXCSUM)
4323 		rxcsum |= E1000_RXCSUM_TUOFL;
4324 	else
4325 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4326 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4327 
4328 	/*
4329 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4330 	** long latencies are observed, like Lenovo X60. This
4331 	** change eliminates the problem, but since having positive
4332 	** values in RDTR is a known source of problems on other
4333 	** platforms another solution is being sought.
4334 	*/
4335 	if (hw->mac.type == e1000_82573)
4336 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4337 
4338 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4339 		/* Setup the Base and Length of the Rx Descriptor Ring */
4340 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4341 
4342 		bus_addr = rxr->rxdma.dma_paddr;
4343 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4344 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4345 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4346 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4347 		/* Setup the Head and Tail Descriptor Pointers */
4348 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4349 #ifdef DEV_NETMAP
4350 		/*
4351 		 * an init() while a netmap client is active must
4352 		 * preserve the rx buffers passed to userspace.
4353 		 */
4354 		if (ifp->if_capenable & IFCAP_NETMAP)
4355 			rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4356 #endif /* DEV_NETMAP */
4357 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4358 	}
4359 
4360 	/* Set PTHRESH for improved jumbo performance */
4361 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4362 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4363 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4364 	    (ifp->if_mtu > ETHERMTU)) {
4365 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4366 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4367 	}
4368 
4369 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4370 		if (ifp->if_mtu > ETHERMTU)
4371 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4372 		else
4373 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4374 	}
4375 
4376 	/* Setup the Receive Control Register */
4377 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4378 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4379 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4380 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4381 
4382         /* Strip the CRC */
4383         rctl |= E1000_RCTL_SECRC;
4384 
4385         /* Make sure VLAN Filters are off */
4386         rctl &= ~E1000_RCTL_VFE;
4387 	rctl &= ~E1000_RCTL_SBP;
4388 
4389 	if (adapter->rx_mbuf_sz == MCLBYTES)
4390 		rctl |= E1000_RCTL_SZ_2048;
4391 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4392 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4393 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4394 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4395 
4396 	if (ifp->if_mtu > ETHERMTU)
4397 		rctl |= E1000_RCTL_LPE;
4398 	else
4399 		rctl &= ~E1000_RCTL_LPE;
4400 
4401 	/* Write out the settings */
4402 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4403 
4404 	return;
4405 }
4406 
4407 
4408 /*********************************************************************
4409  *
4410  *  This routine executes in interrupt context. It replenishes
4411  *  the mbufs in the descriptor and sends data which has been
4412  *  dma'ed into host memory to upper layer.
4413  *
4414  *  We loop at most count times if count is > 0, or until done if
4415  *  count < 0.
4416  *
4417  *  For polling we also now return the number of cleaned packets
4418  *********************************************************************/
4419 static bool
4420 em_rxeof(struct rx_ring *rxr, int count, int *done)
4421 {
4422 	struct adapter		*adapter = rxr->adapter;
4423 	struct ifnet		*ifp = adapter->ifp;
4424 	struct mbuf		*mp, *sendmp;
4425 	u8			status = 0;
4426 	u16 			len;
4427 	int			i, processed, rxdone = 0;
4428 	bool			eop;
4429 	struct e1000_rx_desc	*cur;
4430 
4431 	EM_RX_LOCK(rxr);
4432 
4433 #ifdef DEV_NETMAP
4434 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4435 		EM_RX_UNLOCK(rxr);
4436 		return (FALSE);
4437 	}
4438 #endif /* DEV_NETMAP */
4439 
4440 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4441 
4442 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4443 			break;
4444 
4445 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4446 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4447 
4448 		cur = &rxr->rx_base[i];
4449 		status = cur->status;
4450 		mp = sendmp = NULL;
4451 
4452 		if ((status & E1000_RXD_STAT_DD) == 0)
4453 			break;
4454 
4455 		len = le16toh(cur->length);
4456 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4457 
4458 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4459 		    (rxr->discard == TRUE)) {
4460 			adapter->dropped_pkts++;
4461 			++rxr->rx_discarded;
4462 			if (!eop) /* Catch subsequent segs */
4463 				rxr->discard = TRUE;
4464 			else
4465 				rxr->discard = FALSE;
4466 			em_rx_discard(rxr, i);
4467 			goto next_desc;
4468 		}
4469 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4470 
4471 		/* Assign correct length to the current fragment */
4472 		mp = rxr->rx_buffers[i].m_head;
4473 		mp->m_len = len;
4474 
4475 		/* Trigger for refresh */
4476 		rxr->rx_buffers[i].m_head = NULL;
4477 
4478 		/* First segment? */
4479 		if (rxr->fmp == NULL) {
4480 			mp->m_pkthdr.len = len;
4481 			rxr->fmp = rxr->lmp = mp;
4482 		} else {
4483 			/* Chain mbuf's together */
4484 			mp->m_flags &= ~M_PKTHDR;
4485 			rxr->lmp->m_next = mp;
4486 			rxr->lmp = mp;
4487 			rxr->fmp->m_pkthdr.len += len;
4488 		}
4489 
4490 		if (eop) {
4491 			--count;
4492 			sendmp = rxr->fmp;
4493 			sendmp->m_pkthdr.rcvif = ifp;
4494 			ifp->if_ipackets++;
4495 			em_receive_checksum(cur, sendmp);
4496 #ifndef __NO_STRICT_ALIGNMENT
4497 			if (adapter->hw.mac.max_frame_size >
4498 			    (MCLBYTES - ETHER_ALIGN) &&
4499 			    em_fixup_rx(rxr) != 0)
4500 				goto skip;
4501 #endif
4502 			if (status & E1000_RXD_STAT_VP) {
4503 				sendmp->m_pkthdr.ether_vtag =
4504 				    le16toh(cur->special);
4505 				sendmp->m_flags |= M_VLANTAG;
4506 			}
4507 #ifndef __NO_STRICT_ALIGNMENT
4508 skip:
4509 #endif
4510 			rxr->fmp = rxr->lmp = NULL;
4511 		}
4512 next_desc:
4513 		/* Zero out the receive descriptors status. */
4514 		cur->status = 0;
4515 		++rxdone;	/* cumulative for POLL */
4516 		++processed;
4517 
4518 		/* Advance our pointers to the next descriptor. */
4519 		if (++i == adapter->num_rx_desc)
4520 			i = 0;
4521 
4522 		/* Send to the stack */
4523 		if (sendmp != NULL) {
4524 			rxr->next_to_check = i;
4525 			EM_RX_UNLOCK(rxr);
4526 			(*ifp->if_input)(ifp, sendmp);
4527 			EM_RX_LOCK(rxr);
4528 			i = rxr->next_to_check;
4529 		}
4530 
4531 		/* Only refresh mbufs every 8 descriptors */
4532 		if (processed == 8) {
4533 			em_refresh_mbufs(rxr, i);
4534 			processed = 0;
4535 		}
4536 	}
4537 
4538 	/* Catch any remaining refresh work */
4539 	if (e1000_rx_unrefreshed(rxr))
4540 		em_refresh_mbufs(rxr, i);
4541 
4542 	rxr->next_to_check = i;
4543 	if (done != NULL)
4544 		*done = rxdone;
4545 	EM_RX_UNLOCK(rxr);
4546 
4547 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4548 }
4549 
4550 static __inline void
4551 em_rx_discard(struct rx_ring *rxr, int i)
4552 {
4553 	struct em_buffer	*rbuf;
4554 
4555 	rbuf = &rxr->rx_buffers[i];
4556 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4557 
4558 	/* Free any previous pieces */
4559 	if (rxr->fmp != NULL) {
4560 		rxr->fmp->m_flags |= M_PKTHDR;
4561 		m_freem(rxr->fmp);
4562 		rxr->fmp = NULL;
4563 		rxr->lmp = NULL;
4564 	}
4565 	/*
4566 	** Free buffer and allow em_refresh_mbufs()
4567 	** to clean up and recharge buffer.
4568 	*/
4569 	if (rbuf->m_head) {
4570 		m_free(rbuf->m_head);
4571 		rbuf->m_head = NULL;
4572 	}
4573 	return;
4574 }
4575 
4576 #ifndef __NO_STRICT_ALIGNMENT
4577 /*
4578  * When jumbo frames are enabled we should realign entire payload on
4579  * architecures with strict alignment. This is serious design mistake of 8254x
4580  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4581  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4582  * payload. On architecures without strict alignment restrictions 8254x still
4583  * performs unaligned memory access which would reduce the performance too.
4584  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4585  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4586  * existing mbuf chain.
4587  *
4588  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4589  * not used at all on architectures with strict alignment.
4590  */
4591 static int
4592 em_fixup_rx(struct rx_ring *rxr)
4593 {
4594 	struct adapter *adapter = rxr->adapter;
4595 	struct mbuf *m, *n;
4596 	int error;
4597 
4598 	error = 0;
4599 	m = rxr->fmp;
4600 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4601 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4602 		m->m_data += ETHER_HDR_LEN;
4603 	} else {
4604 		MGETHDR(n, M_NOWAIT, MT_DATA);
4605 		if (n != NULL) {
4606 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4607 			m->m_data += ETHER_HDR_LEN;
4608 			m->m_len -= ETHER_HDR_LEN;
4609 			n->m_len = ETHER_HDR_LEN;
4610 			M_MOVE_PKTHDR(n, m);
4611 			n->m_next = m;
4612 			rxr->fmp = n;
4613 		} else {
4614 			adapter->dropped_pkts++;
4615 			m_freem(rxr->fmp);
4616 			rxr->fmp = NULL;
4617 			error = ENOMEM;
4618 		}
4619 	}
4620 
4621 	return (error);
4622 }
4623 #endif
4624 
4625 /*********************************************************************
4626  *
4627  *  Verify that the hardware indicated that the checksum is valid.
4628  *  Inform the stack about the status of checksum so that stack
4629  *  doesn't spend time verifying the checksum.
4630  *
4631  *********************************************************************/
4632 static void
4633 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4634 {
4635 	mp->m_pkthdr.csum_flags = 0;
4636 
4637 	/* Ignore Checksum bit is set */
4638 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4639 		return;
4640 
4641 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4642 		return;
4643 
4644 	/* IP Checksum Good? */
4645 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4646 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4647 
4648 	/* TCP or UDP checksum */
4649 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4650 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4651 		mp->m_pkthdr.csum_data = htons(0xffff);
4652 	}
4653 }
4654 
4655 /*
4656  * This routine is run via an vlan
4657  * config EVENT
4658  */
4659 static void
4660 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4661 {
4662 	struct adapter	*adapter = ifp->if_softc;
4663 	u32		index, bit;
4664 
4665 	if (ifp->if_softc !=  arg)   /* Not our event */
4666 		return;
4667 
4668 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4669                 return;
4670 
4671 	EM_CORE_LOCK(adapter);
4672 	index = (vtag >> 5) & 0x7F;
4673 	bit = vtag & 0x1F;
4674 	adapter->shadow_vfta[index] |= (1 << bit);
4675 	++adapter->num_vlans;
4676 	/* Re-init to load the changes */
4677 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4678 		em_init_locked(adapter);
4679 	EM_CORE_UNLOCK(adapter);
4680 }
4681 
4682 /*
4683  * This routine is run via an vlan
4684  * unconfig EVENT
4685  */
4686 static void
4687 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4688 {
4689 	struct adapter	*adapter = ifp->if_softc;
4690 	u32		index, bit;
4691 
4692 	if (ifp->if_softc !=  arg)
4693 		return;
4694 
4695 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4696                 return;
4697 
4698 	EM_CORE_LOCK(adapter);
4699 	index = (vtag >> 5) & 0x7F;
4700 	bit = vtag & 0x1F;
4701 	adapter->shadow_vfta[index] &= ~(1 << bit);
4702 	--adapter->num_vlans;
4703 	/* Re-init to load the changes */
4704 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4705 		em_init_locked(adapter);
4706 	EM_CORE_UNLOCK(adapter);
4707 }
4708 
4709 static void
4710 em_setup_vlan_hw_support(struct adapter *adapter)
4711 {
4712 	struct e1000_hw *hw = &adapter->hw;
4713 	u32             reg;
4714 
4715 	/*
4716 	** We get here thru init_locked, meaning
4717 	** a soft reset, this has already cleared
4718 	** the VFTA and other state, so if there
4719 	** have been no vlan's registered do nothing.
4720 	*/
4721 	if (adapter->num_vlans == 0)
4722                 return;
4723 
4724 	/*
4725 	** A soft reset zero's out the VFTA, so
4726 	** we need to repopulate it now.
4727 	*/
4728 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4729                 if (adapter->shadow_vfta[i] != 0)
4730 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4731                             i, adapter->shadow_vfta[i]);
4732 
4733 	reg = E1000_READ_REG(hw, E1000_CTRL);
4734 	reg |= E1000_CTRL_VME;
4735 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4736 
4737 	/* Enable the Filter Table */
4738 	reg = E1000_READ_REG(hw, E1000_RCTL);
4739 	reg &= ~E1000_RCTL_CFIEN;
4740 	reg |= E1000_RCTL_VFE;
4741 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4742 }
4743 
4744 static void
4745 em_enable_intr(struct adapter *adapter)
4746 {
4747 	struct e1000_hw *hw = &adapter->hw;
4748 	u32 ims_mask = IMS_ENABLE_MASK;
4749 
4750 	if (hw->mac.type == e1000_82574) {
4751 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4752 		ims_mask |= EM_MSIX_MASK;
4753 	}
4754 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4755 }
4756 
4757 static void
4758 em_disable_intr(struct adapter *adapter)
4759 {
4760 	struct e1000_hw *hw = &adapter->hw;
4761 
4762 	if (hw->mac.type == e1000_82574)
4763 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4764 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4765 }
4766 
4767 /*
4768  * Bit of a misnomer, what this really means is
4769  * to enable OS management of the system... aka
4770  * to disable special hardware management features
4771  */
4772 static void
4773 em_init_manageability(struct adapter *adapter)
4774 {
4775 	/* A shared code workaround */
4776 #define E1000_82542_MANC2H E1000_MANC2H
4777 	if (adapter->has_manage) {
4778 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4779 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4780 
4781 		/* disable hardware interception of ARP */
4782 		manc &= ~(E1000_MANC_ARP_EN);
4783 
4784                 /* enable receiving management packets to the host */
4785 		manc |= E1000_MANC_EN_MNG2HOST;
4786 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4787 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4788 		manc2h |= E1000_MNG2HOST_PORT_623;
4789 		manc2h |= E1000_MNG2HOST_PORT_664;
4790 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4791 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4792 	}
4793 }
4794 
4795 /*
4796  * Give control back to hardware management
4797  * controller if there is one.
4798  */
4799 static void
4800 em_release_manageability(struct adapter *adapter)
4801 {
4802 	if (adapter->has_manage) {
4803 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4804 
4805 		/* re-enable hardware interception of ARP */
4806 		manc |= E1000_MANC_ARP_EN;
4807 		manc &= ~E1000_MANC_EN_MNG2HOST;
4808 
4809 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4810 	}
4811 }
4812 
4813 /*
4814  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4815  * For ASF and Pass Through versions of f/w this means
4816  * that the driver is loaded. For AMT version type f/w
4817  * this means that the network i/f is open.
4818  */
4819 static void
4820 em_get_hw_control(struct adapter *adapter)
4821 {
4822 	u32 ctrl_ext, swsm;
4823 
4824 	if (adapter->hw.mac.type == e1000_82573) {
4825 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4826 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4827 		    swsm | E1000_SWSM_DRV_LOAD);
4828 		return;
4829 	}
4830 	/* else */
4831 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4832 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4833 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4834 	return;
4835 }
4836 
4837 /*
4838  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4839  * For ASF and Pass Through versions of f/w this means that
4840  * the driver is no longer loaded. For AMT versions of the
4841  * f/w this means that the network i/f is closed.
4842  */
4843 static void
4844 em_release_hw_control(struct adapter *adapter)
4845 {
4846 	u32 ctrl_ext, swsm;
4847 
4848 	if (!adapter->has_manage)
4849 		return;
4850 
4851 	if (adapter->hw.mac.type == e1000_82573) {
4852 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4853 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4854 		    swsm & ~E1000_SWSM_DRV_LOAD);
4855 		return;
4856 	}
4857 	/* else */
4858 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4859 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4860 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4861 	return;
4862 }
4863 
4864 static int
4865 em_is_valid_ether_addr(u8 *addr)
4866 {
4867 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4868 
4869 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4870 		return (FALSE);
4871 	}
4872 
4873 	return (TRUE);
4874 }
4875 
4876 /*
4877 ** Parse the interface capabilities with regard
4878 ** to both system management and wake-on-lan for
4879 ** later use.
4880 */
4881 static void
4882 em_get_wakeup(device_t dev)
4883 {
4884 	struct adapter	*adapter = device_get_softc(dev);
4885 	u16		eeprom_data = 0, device_id, apme_mask;
4886 
4887 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4888 	apme_mask = EM_EEPROM_APME;
4889 
4890 	switch (adapter->hw.mac.type) {
4891 	case e1000_82573:
4892 	case e1000_82583:
4893 		adapter->has_amt = TRUE;
4894 		/* Falls thru */
4895 	case e1000_82571:
4896 	case e1000_82572:
4897 	case e1000_80003es2lan:
4898 		if (adapter->hw.bus.func == 1) {
4899 			e1000_read_nvm(&adapter->hw,
4900 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4901 			break;
4902 		} else
4903 			e1000_read_nvm(&adapter->hw,
4904 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4905 		break;
4906 	case e1000_ich8lan:
4907 	case e1000_ich9lan:
4908 	case e1000_ich10lan:
4909 	case e1000_pchlan:
4910 	case e1000_pch2lan:
4911 		apme_mask = E1000_WUC_APME;
4912 		adapter->has_amt = TRUE;
4913 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4914 		break;
4915 	default:
4916 		e1000_read_nvm(&adapter->hw,
4917 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4918 		break;
4919 	}
4920 	if (eeprom_data & apme_mask)
4921 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4922 	/*
4923          * We have the eeprom settings, now apply the special cases
4924          * where the eeprom may be wrong or the board won't support
4925          * wake on lan on a particular port
4926 	 */
4927 	device_id = pci_get_device(dev);
4928         switch (device_id) {
4929 	case E1000_DEV_ID_82571EB_FIBER:
4930 		/* Wake events only supported on port A for dual fiber
4931 		 * regardless of eeprom setting */
4932 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4933 		    E1000_STATUS_FUNC_1)
4934 			adapter->wol = 0;
4935 		break;
4936 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4937 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4938 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4939                 /* if quad port adapter, disable WoL on all but port A */
4940 		if (global_quad_port_a != 0)
4941 			adapter->wol = 0;
4942 		/* Reset for multiple quad port adapters */
4943 		if (++global_quad_port_a == 4)
4944 			global_quad_port_a = 0;
4945                 break;
4946 	}
4947 	return;
4948 }
4949 
4950 
4951 /*
4952  * Enable PCI Wake On Lan capability
4953  */
4954 static void
4955 em_enable_wakeup(device_t dev)
4956 {
4957 	struct adapter	*adapter = device_get_softc(dev);
4958 	struct ifnet	*ifp = adapter->ifp;
4959 	u32		pmc, ctrl, ctrl_ext, rctl;
4960 	u16     	status;
4961 
4962 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4963 		return;
4964 
4965 	/* Advertise the wakeup capability */
4966 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4967 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4968 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4969 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4970 
4971 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4972 	    (adapter->hw.mac.type == e1000_pchlan) ||
4973 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4974 	    (adapter->hw.mac.type == e1000_ich10lan))
4975 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4976 
4977 	/* Keep the laser running on Fiber adapters */
4978 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4979 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4980 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4981 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4982 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4983 	}
4984 
4985 	/*
4986 	** Determine type of Wakeup: note that wol
4987 	** is set with all bits on by default.
4988 	*/
4989 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4990 		adapter->wol &= ~E1000_WUFC_MAG;
4991 
4992 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4993 		adapter->wol &= ~E1000_WUFC_MC;
4994 	else {
4995 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4996 		rctl |= E1000_RCTL_MPE;
4997 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4998 	}
4999 
5000 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5001 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5002 		if (em_enable_phy_wakeup(adapter))
5003 			return;
5004 	} else {
5005 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5006 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5007 	}
5008 
5009 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5010 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5011 
5012         /* Request PME */
5013         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5014 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5015 	if (ifp->if_capenable & IFCAP_WOL)
5016 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5017         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5018 
5019 	return;
5020 }
5021 
5022 /*
5023 ** WOL in the newer chipset interfaces (pchlan)
5024 ** require thing to be copied into the phy
5025 */
5026 static int
5027 em_enable_phy_wakeup(struct adapter *adapter)
5028 {
5029 	struct e1000_hw *hw = &adapter->hw;
5030 	u32 mreg, ret = 0;
5031 	u16 preg;
5032 
5033 	/* copy MAC RARs to PHY RARs */
5034 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5035 
5036 	/* copy MAC MTA to PHY MTA */
5037 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5038 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5039 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5040 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5041 		    (u16)((mreg >> 16) & 0xFFFF));
5042 	}
5043 
5044 	/* configure PHY Rx Control register */
5045 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5046 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5047 	if (mreg & E1000_RCTL_UPE)
5048 		preg |= BM_RCTL_UPE;
5049 	if (mreg & E1000_RCTL_MPE)
5050 		preg |= BM_RCTL_MPE;
5051 	preg &= ~(BM_RCTL_MO_MASK);
5052 	if (mreg & E1000_RCTL_MO_3)
5053 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5054 				<< BM_RCTL_MO_SHIFT);
5055 	if (mreg & E1000_RCTL_BAM)
5056 		preg |= BM_RCTL_BAM;
5057 	if (mreg & E1000_RCTL_PMCF)
5058 		preg |= BM_RCTL_PMCF;
5059 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5060 	if (mreg & E1000_CTRL_RFCE)
5061 		preg |= BM_RCTL_RFCE;
5062 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5063 
5064 	/* enable PHY wakeup in MAC register */
5065 	E1000_WRITE_REG(hw, E1000_WUC,
5066 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5067 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5068 
5069 	/* configure and enable PHY wakeup in PHY registers */
5070 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5071 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5072 
5073 	/* activate PHY wakeup */
5074 	ret = hw->phy.ops.acquire(hw);
5075 	if (ret) {
5076 		printf("Could not acquire PHY\n");
5077 		return ret;
5078 	}
5079 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5080 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5081 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5082 	if (ret) {
5083 		printf("Could not read PHY page 769\n");
5084 		goto out;
5085 	}
5086 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5087 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5088 	if (ret)
5089 		printf("Could not set PHY Host Wakeup bit\n");
5090 out:
5091 	hw->phy.ops.release(hw);
5092 
5093 	return ret;
5094 }
5095 
5096 static void
5097 em_led_func(void *arg, int onoff)
5098 {
5099 	struct adapter	*adapter = arg;
5100 
5101 	EM_CORE_LOCK(adapter);
5102 	if (onoff) {
5103 		e1000_setup_led(&adapter->hw);
5104 		e1000_led_on(&adapter->hw);
5105 	} else {
5106 		e1000_led_off(&adapter->hw);
5107 		e1000_cleanup_led(&adapter->hw);
5108 	}
5109 	EM_CORE_UNLOCK(adapter);
5110 }
5111 
5112 /*
5113 ** Disable the L0S and L1 LINK states
5114 */
5115 static void
5116 em_disable_aspm(struct adapter *adapter)
5117 {
5118 	int		base, reg;
5119 	u16		link_cap,link_ctrl;
5120 	device_t	dev = adapter->dev;
5121 
5122 	switch (adapter->hw.mac.type) {
5123 		case e1000_82573:
5124 		case e1000_82574:
5125 		case e1000_82583:
5126 			break;
5127 		default:
5128 			return;
5129 	}
5130 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5131 		return;
5132 	reg = base + PCIER_LINK_CAP;
5133 	link_cap = pci_read_config(dev, reg, 2);
5134 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5135 		return;
5136 	reg = base + PCIER_LINK_CTL;
5137 	link_ctrl = pci_read_config(dev, reg, 2);
5138 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5139 	pci_write_config(dev, reg, link_ctrl, 2);
5140 	return;
5141 }
5142 
5143 /**********************************************************************
5144  *
5145  *  Update the board statistics counters.
5146  *
5147  **********************************************************************/
5148 static void
5149 em_update_stats_counters(struct adapter *adapter)
5150 {
5151 	struct ifnet   *ifp;
5152 
5153 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5154 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5155 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5156 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5157 	}
5158 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5159 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5160 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5161 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5162 
5163 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5164 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5165 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5166 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5167 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5168 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5169 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5170 	/*
5171 	** For watchdog management we need to know if we have been
5172 	** paused during the last interval, so capture that here.
5173 	*/
5174 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5175 	adapter->stats.xoffrxc += adapter->pause_frames;
5176 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5177 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5178 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5179 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5180 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5181 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5182 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5183 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5184 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5185 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5186 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5187 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5188 
5189 	/* For the 64-bit byte counters the low dword must be read first. */
5190 	/* Both registers clear on the read of the high dword */
5191 
5192 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5193 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5194 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5195 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5196 
5197 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5198 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5199 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5200 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5201 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5202 
5203 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5204 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5205 
5206 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5207 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5208 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5209 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5210 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5211 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5212 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5213 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5214 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5215 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5216 
5217 	/* Interrupt Counts */
5218 
5219 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5220 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5221 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5222 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5223 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5224 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5225 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5226 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5227 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5228 
5229 	if (adapter->hw.mac.type >= e1000_82543) {
5230 		adapter->stats.algnerrc +=
5231 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5232 		adapter->stats.rxerrc +=
5233 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5234 		adapter->stats.tncrs +=
5235 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5236 		adapter->stats.cexterr +=
5237 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5238 		adapter->stats.tsctc +=
5239 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5240 		adapter->stats.tsctfc +=
5241 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5242 	}
5243 	ifp = adapter->ifp;
5244 
5245 	ifp->if_collisions = adapter->stats.colc;
5246 
5247 	/* Rx Errors */
5248 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5249 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5250 	    adapter->stats.ruc + adapter->stats.roc +
5251 	    adapter->stats.mpc + adapter->stats.cexterr;
5252 
5253 	/* Tx Errors */
5254 	ifp->if_oerrors = adapter->stats.ecol +
5255 	    adapter->stats.latecol + adapter->watchdog_events;
5256 }
5257 
5258 /* Export a single 32-bit register via a read-only sysctl. */
5259 static int
5260 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5261 {
5262 	struct adapter *adapter;
5263 	u_int val;
5264 
5265 	adapter = oidp->oid_arg1;
5266 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5267 	return (sysctl_handle_int(oidp, &val, 0, req));
5268 }
5269 
5270 /*
5271  * Add sysctl variables, one per statistic, to the system.
5272  */
5273 static void
5274 em_add_hw_stats(struct adapter *adapter)
5275 {
5276 	device_t dev = adapter->dev;
5277 
5278 	struct tx_ring *txr = adapter->tx_rings;
5279 	struct rx_ring *rxr = adapter->rx_rings;
5280 
5281 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5282 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5283 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5284 	struct e1000_hw_stats *stats = &adapter->stats;
5285 
5286 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5287 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5288 
5289 #define QUEUE_NAME_LEN 32
5290 	char namebuf[QUEUE_NAME_LEN];
5291 
5292 	/* Driver Statistics */
5293 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5294 			CTLFLAG_RD, &adapter->link_irq,
5295 			"Link MSIX IRQ Handled");
5296 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5297 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5298 			 "Std mbuf failed");
5299 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5300 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5301 			 "Std mbuf cluster failed");
5302 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5303 			CTLFLAG_RD, &adapter->dropped_pkts,
5304 			"Driver dropped packets");
5305 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5306 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5307 			"Driver tx dma failure in xmit");
5308 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5309 			CTLFLAG_RD, &adapter->rx_overruns,
5310 			"RX overruns");
5311 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5312 			CTLFLAG_RD, &adapter->watchdog_events,
5313 			"Watchdog timeouts");
5314 
5315 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5316 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5317 			em_sysctl_reg_handler, "IU",
5318 			"Device Control Register");
5319 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5320 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5321 			em_sysctl_reg_handler, "IU",
5322 			"Receiver Control Register");
5323 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5324 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5325 			"Flow Control High Watermark");
5326 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5327 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5328 			"Flow Control Low Watermark");
5329 
5330 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5331 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5332 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5333 					    CTLFLAG_RD, NULL, "Queue Name");
5334 		queue_list = SYSCTL_CHILDREN(queue_node);
5335 
5336 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5337 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5338 				E1000_TDH(txr->me),
5339 				em_sysctl_reg_handler, "IU",
5340  				"Transmit Descriptor Head");
5341 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5342 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5343 				E1000_TDT(txr->me),
5344 				em_sysctl_reg_handler, "IU",
5345  				"Transmit Descriptor Tail");
5346 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5347 				CTLFLAG_RD, &txr->tx_irq,
5348 				"Queue MSI-X Transmit Interrupts");
5349 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5350 				CTLFLAG_RD, &txr->no_desc_avail,
5351 				"Queue No Descriptor Available");
5352 
5353 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5354 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5355 				E1000_RDH(rxr->me),
5356 				em_sysctl_reg_handler, "IU",
5357 				"Receive Descriptor Head");
5358 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5359 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5360 				E1000_RDT(rxr->me),
5361 				em_sysctl_reg_handler, "IU",
5362 				"Receive Descriptor Tail");
5363 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5364 				CTLFLAG_RD, &rxr->rx_irq,
5365 				"Queue MSI-X Receive Interrupts");
5366 	}
5367 
5368 	/* MAC stats get their own sub node */
5369 
5370 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5371 				    CTLFLAG_RD, NULL, "Statistics");
5372 	stat_list = SYSCTL_CHILDREN(stat_node);
5373 
5374 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5375 			CTLFLAG_RD, &stats->ecol,
5376 			"Excessive collisions");
5377 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5378 			CTLFLAG_RD, &stats->scc,
5379 			"Single collisions");
5380 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5381 			CTLFLAG_RD, &stats->mcc,
5382 			"Multiple collisions");
5383 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5384 			CTLFLAG_RD, &stats->latecol,
5385 			"Late collisions");
5386 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5387 			CTLFLAG_RD, &stats->colc,
5388 			"Collision Count");
5389 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5390 			CTLFLAG_RD, &adapter->stats.symerrs,
5391 			"Symbol Errors");
5392 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5393 			CTLFLAG_RD, &adapter->stats.sec,
5394 			"Sequence Errors");
5395 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5396 			CTLFLAG_RD, &adapter->stats.dc,
5397 			"Defer Count");
5398 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5399 			CTLFLAG_RD, &adapter->stats.mpc,
5400 			"Missed Packets");
5401 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5402 			CTLFLAG_RD, &adapter->stats.rnbc,
5403 			"Receive No Buffers");
5404 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5405 			CTLFLAG_RD, &adapter->stats.ruc,
5406 			"Receive Undersize");
5407 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5408 			CTLFLAG_RD, &adapter->stats.rfc,
5409 			"Fragmented Packets Received ");
5410 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5411 			CTLFLAG_RD, &adapter->stats.roc,
5412 			"Oversized Packets Received");
5413 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5414 			CTLFLAG_RD, &adapter->stats.rjc,
5415 			"Recevied Jabber");
5416 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5417 			CTLFLAG_RD, &adapter->stats.rxerrc,
5418 			"Receive Errors");
5419 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5420 			CTLFLAG_RD, &adapter->stats.crcerrs,
5421 			"CRC errors");
5422 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5423 			CTLFLAG_RD, &adapter->stats.algnerrc,
5424 			"Alignment Errors");
5425 	/* On 82575 these are collision counts */
5426 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5427 			CTLFLAG_RD, &adapter->stats.cexterr,
5428 			"Collision/Carrier extension errors");
5429 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5430 			CTLFLAG_RD, &adapter->stats.xonrxc,
5431 			"XON Received");
5432 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5433 			CTLFLAG_RD, &adapter->stats.xontxc,
5434 			"XON Transmitted");
5435 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5436 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5437 			"XOFF Received");
5438 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5439 			CTLFLAG_RD, &adapter->stats.xofftxc,
5440 			"XOFF Transmitted");
5441 
5442 	/* Packet Reception Stats */
5443 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5444 			CTLFLAG_RD, &adapter->stats.tpr,
5445 			"Total Packets Received ");
5446 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5447 			CTLFLAG_RD, &adapter->stats.gprc,
5448 			"Good Packets Received");
5449 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5450 			CTLFLAG_RD, &adapter->stats.bprc,
5451 			"Broadcast Packets Received");
5452 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5453 			CTLFLAG_RD, &adapter->stats.mprc,
5454 			"Multicast Packets Received");
5455 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5456 			CTLFLAG_RD, &adapter->stats.prc64,
5457 			"64 byte frames received ");
5458 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5459 			CTLFLAG_RD, &adapter->stats.prc127,
5460 			"65-127 byte frames received");
5461 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5462 			CTLFLAG_RD, &adapter->stats.prc255,
5463 			"128-255 byte frames received");
5464 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5465 			CTLFLAG_RD, &adapter->stats.prc511,
5466 			"256-511 byte frames received");
5467 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5468 			CTLFLAG_RD, &adapter->stats.prc1023,
5469 			"512-1023 byte frames received");
5470 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5471 			CTLFLAG_RD, &adapter->stats.prc1522,
5472 			"1023-1522 byte frames received");
5473  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5474  			CTLFLAG_RD, &adapter->stats.gorc,
5475  			"Good Octets Received");
5476 
5477 	/* Packet Transmission Stats */
5478  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5479  			CTLFLAG_RD, &adapter->stats.gotc,
5480  			"Good Octets Transmitted");
5481 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5482 			CTLFLAG_RD, &adapter->stats.tpt,
5483 			"Total Packets Transmitted");
5484 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5485 			CTLFLAG_RD, &adapter->stats.gptc,
5486 			"Good Packets Transmitted");
5487 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5488 			CTLFLAG_RD, &adapter->stats.bptc,
5489 			"Broadcast Packets Transmitted");
5490 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5491 			CTLFLAG_RD, &adapter->stats.mptc,
5492 			"Multicast Packets Transmitted");
5493 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5494 			CTLFLAG_RD, &adapter->stats.ptc64,
5495 			"64 byte frames transmitted ");
5496 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5497 			CTLFLAG_RD, &adapter->stats.ptc127,
5498 			"65-127 byte frames transmitted");
5499 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5500 			CTLFLAG_RD, &adapter->stats.ptc255,
5501 			"128-255 byte frames transmitted");
5502 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5503 			CTLFLAG_RD, &adapter->stats.ptc511,
5504 			"256-511 byte frames transmitted");
5505 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5506 			CTLFLAG_RD, &adapter->stats.ptc1023,
5507 			"512-1023 byte frames transmitted");
5508 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5509 			CTLFLAG_RD, &adapter->stats.ptc1522,
5510 			"1024-1522 byte frames transmitted");
5511 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5512 			CTLFLAG_RD, &adapter->stats.tsctc,
5513 			"TSO Contexts Transmitted");
5514 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5515 			CTLFLAG_RD, &adapter->stats.tsctfc,
5516 			"TSO Contexts Failed");
5517 
5518 
5519 	/* Interrupt Stats */
5520 
5521 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5522 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5523 	int_list = SYSCTL_CHILDREN(int_node);
5524 
5525 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5526 			CTLFLAG_RD, &adapter->stats.iac,
5527 			"Interrupt Assertion Count");
5528 
5529 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5530 			CTLFLAG_RD, &adapter->stats.icrxptc,
5531 			"Interrupt Cause Rx Pkt Timer Expire Count");
5532 
5533 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5534 			CTLFLAG_RD, &adapter->stats.icrxatc,
5535 			"Interrupt Cause Rx Abs Timer Expire Count");
5536 
5537 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5538 			CTLFLAG_RD, &adapter->stats.ictxptc,
5539 			"Interrupt Cause Tx Pkt Timer Expire Count");
5540 
5541 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5542 			CTLFLAG_RD, &adapter->stats.ictxatc,
5543 			"Interrupt Cause Tx Abs Timer Expire Count");
5544 
5545 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5546 			CTLFLAG_RD, &adapter->stats.ictxqec,
5547 			"Interrupt Cause Tx Queue Empty Count");
5548 
5549 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5550 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5551 			"Interrupt Cause Tx Queue Min Thresh Count");
5552 
5553 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5554 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5555 			"Interrupt Cause Rx Desc Min Thresh Count");
5556 
5557 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5558 			CTLFLAG_RD, &adapter->stats.icrxoc,
5559 			"Interrupt Cause Receiver Overrun Count");
5560 }
5561 
5562 /**********************************************************************
5563  *
5564  *  This routine provides a way to dump out the adapter eeprom,
5565  *  often a useful debug/service tool. This only dumps the first
5566  *  32 words, stuff that matters is in that extent.
5567  *
5568  **********************************************************************/
5569 static int
5570 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5571 {
5572 	struct adapter *adapter = (struct adapter *)arg1;
5573 	int error;
5574 	int result;
5575 
5576 	result = -1;
5577 	error = sysctl_handle_int(oidp, &result, 0, req);
5578 
5579 	if (error || !req->newptr)
5580 		return (error);
5581 
5582 	/*
5583 	 * This value will cause a hex dump of the
5584 	 * first 32 16-bit words of the EEPROM to
5585 	 * the screen.
5586 	 */
5587 	if (result == 1)
5588 		em_print_nvm_info(adapter);
5589 
5590 	return (error);
5591 }
5592 
5593 static void
5594 em_print_nvm_info(struct adapter *adapter)
5595 {
5596 	u16	eeprom_data;
5597 	int	i, j, row = 0;
5598 
5599 	/* Its a bit crude, but it gets the job done */
5600 	printf("\nInterface EEPROM Dump:\n");
5601 	printf("Offset\n0x0000  ");
5602 	for (i = 0, j = 0; i < 32; i++, j++) {
5603 		if (j == 8) { /* Make the offset block */
5604 			j = 0; ++row;
5605 			printf("\n0x00%x0  ",row);
5606 		}
5607 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5608 		printf("%04x ", eeprom_data);
5609 	}
5610 	printf("\n");
5611 }
5612 
5613 static int
5614 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5615 {
5616 	struct em_int_delay_info *info;
5617 	struct adapter *adapter;
5618 	u32 regval;
5619 	int error, usecs, ticks;
5620 
5621 	info = (struct em_int_delay_info *)arg1;
5622 	usecs = info->value;
5623 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5624 	if (error != 0 || req->newptr == NULL)
5625 		return (error);
5626 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5627 		return (EINVAL);
5628 	info->value = usecs;
5629 	ticks = EM_USECS_TO_TICKS(usecs);
5630 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5631 		ticks *= 4;
5632 
5633 	adapter = info->adapter;
5634 
5635 	EM_CORE_LOCK(adapter);
5636 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5637 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5638 	/* Handle a few special cases. */
5639 	switch (info->offset) {
5640 	case E1000_RDTR:
5641 		break;
5642 	case E1000_TIDV:
5643 		if (ticks == 0) {
5644 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5645 			/* Don't write 0 into the TIDV register. */
5646 			regval++;
5647 		} else
5648 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5649 		break;
5650 	}
5651 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5652 	EM_CORE_UNLOCK(adapter);
5653 	return (0);
5654 }
5655 
5656 static void
5657 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5658 	const char *description, struct em_int_delay_info *info,
5659 	int offset, int value)
5660 {
5661 	info->adapter = adapter;
5662 	info->offset = offset;
5663 	info->value = value;
5664 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5665 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5666 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5667 	    info, 0, em_sysctl_int_delay, "I", description);
5668 }
5669 
5670 static void
5671 em_set_sysctl_value(struct adapter *adapter, const char *name,
5672 	const char *description, int *limit, int value)
5673 {
5674 	*limit = value;
5675 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5676 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5677 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5678 }
5679 
5680 
5681 /*
5682 ** Set flow control using sysctl:
5683 ** Flow control values:
5684 **      0 - off
5685 **      1 - rx pause
5686 **      2 - tx pause
5687 **      3 - full
5688 */
5689 static int
5690 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5691 {
5692         int		error;
5693 	static int	input = 3; /* default is full */
5694         struct adapter	*adapter = (struct adapter *) arg1;
5695 
5696         error = sysctl_handle_int(oidp, &input, 0, req);
5697 
5698         if ((error) || (req->newptr == NULL))
5699                 return (error);
5700 
5701 	if (input == adapter->fc) /* no change? */
5702 		return (error);
5703 
5704         switch (input) {
5705                 case e1000_fc_rx_pause:
5706                 case e1000_fc_tx_pause:
5707                 case e1000_fc_full:
5708                 case e1000_fc_none:
5709                         adapter->hw.fc.requested_mode = input;
5710 			adapter->fc = input;
5711                         break;
5712                 default:
5713 			/* Do nothing */
5714 			return (error);
5715         }
5716 
5717         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5718         e1000_force_mac_fc(&adapter->hw);
5719         return (error);
5720 }
5721 
5722 /*
5723 ** Manage Energy Efficient Ethernet:
5724 ** Control values:
5725 **     0/1 - enabled/disabled
5726 */
5727 static int
5728 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5729 {
5730        struct adapter *adapter = (struct adapter *) arg1;
5731        int             error, value;
5732 
5733        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5734        error = sysctl_handle_int(oidp, &value, 0, req);
5735        if (error || req->newptr == NULL)
5736                return (error);
5737        EM_CORE_LOCK(adapter);
5738        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5739        em_init_locked(adapter);
5740        EM_CORE_UNLOCK(adapter);
5741        return (0);
5742 }
5743 
5744 static int
5745 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5746 {
5747 	struct adapter *adapter;
5748 	int error;
5749 	int result;
5750 
5751 	result = -1;
5752 	error = sysctl_handle_int(oidp, &result, 0, req);
5753 
5754 	if (error || !req->newptr)
5755 		return (error);
5756 
5757 	if (result == 1) {
5758 		adapter = (struct adapter *)arg1;
5759 		em_print_debug_info(adapter);
5760         }
5761 
5762 	return (error);
5763 }
5764 
5765 /*
5766 ** This routine is meant to be fluid, add whatever is
5767 ** needed for debugging a problem.  -jfv
5768 */
5769 static void
5770 em_print_debug_info(struct adapter *adapter)
5771 {
5772 	device_t dev = adapter->dev;
5773 	struct tx_ring *txr = adapter->tx_rings;
5774 	struct rx_ring *rxr = adapter->rx_rings;
5775 
5776 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5777 		printf("Interface is RUNNING ");
5778 	else
5779 		printf("Interface is NOT RUNNING\n");
5780 
5781 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5782 		printf("and INACTIVE\n");
5783 	else
5784 		printf("and ACTIVE\n");
5785 
5786 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5787 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5788 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5789 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5790 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5791 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5792 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5793 	device_printf(dev, "TX descriptors avail = %d\n",
5794 	    txr->tx_avail);
5795 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5796 	    txr->no_desc_avail);
5797 	device_printf(dev, "RX discarded packets = %ld\n",
5798 	    rxr->rx_discarded);
5799 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5800 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5801 }
5802