xref: /freebsd/sys/dev/e1000/if_em.c (revision 88f578841fd49ff796b62a8d3531bd73afd1a88e)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Set this to one to display debug statistics
100  *********************************************************************/
101 int	em_display_debug_stats = 0;
102 
103 /*********************************************************************
104  *  Driver version:
105  *********************************************************************/
106 char em_driver_version[] = "7.4.2";
107 
108 /*********************************************************************
109  *  PCI Device ID Table
110  *
111  *  Used by probe to select devices to load on
112  *  Last field stores an index into e1000_strings
113  *  Last entry must be all 0s
114  *
115  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
116  *********************************************************************/
117 
118 static em_vendor_info_t em_vendor_info_array[] =
119 {
120 	/* Intel(R) PRO/1000 Network Connection */
121 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
127 						PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
129 						PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
131 						PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
133 						PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
135 						PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 
141 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
146 						PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
148 						PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
150 						PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
152 						PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
182 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
187 						PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
189 						PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
192 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
194 	/* required last entry */
195 	{ 0, 0, 0, 0, 0}
196 };
197 
198 /*********************************************************************
199  *  Table of branding strings for all supported NICs.
200  *********************************************************************/
201 
202 static char *em_strings[] = {
203 	"Intel(R) PRO/1000 Network Connection"
204 };
205 
206 /*********************************************************************
207  *  Function prototypes
208  *********************************************************************/
209 static int	em_probe(device_t);
210 static int	em_attach(device_t);
211 static int	em_detach(device_t);
212 static int	em_shutdown(device_t);
213 static int	em_suspend(device_t);
214 static int	em_resume(device_t);
215 #ifdef EM_MULTIQUEUE
216 static int	em_mq_start(if_t, struct mbuf *);
217 static int	em_mq_start_locked(if_t,
218 		    struct tx_ring *);
219 static void	em_qflush(if_t);
220 #else
221 static void	em_start(if_t);
222 static void	em_start_locked(if_t, struct tx_ring *);
223 #endif
224 static int	em_ioctl(if_t, u_long, caddr_t);
225 static uint64_t	em_get_counter(if_t, ift_counter);
226 static void	em_init(void *);
227 static void	em_init_locked(struct adapter *);
228 static void	em_stop(void *);
229 static void	em_media_status(if_t, struct ifmediareq *);
230 static int	em_media_change(if_t);
231 static void	em_identify_hardware(struct adapter *);
232 static int	em_allocate_pci_resources(struct adapter *);
233 static int	em_allocate_legacy(struct adapter *);
234 static int	em_allocate_msix(struct adapter *);
235 static int	em_allocate_queues(struct adapter *);
236 static int	em_setup_msix(struct adapter *);
237 static void	em_free_pci_resources(struct adapter *);
238 static void	em_local_timer(void *);
239 static void	em_reset(struct adapter *);
240 static int	em_setup_interface(device_t, struct adapter *);
241 
242 static void	em_setup_transmit_structures(struct adapter *);
243 static void	em_initialize_transmit_unit(struct adapter *);
244 static int	em_allocate_transmit_buffers(struct tx_ring *);
245 static void	em_free_transmit_structures(struct adapter *);
246 static void	em_free_transmit_buffers(struct tx_ring *);
247 
248 static int	em_setup_receive_structures(struct adapter *);
249 static int	em_allocate_receive_buffers(struct rx_ring *);
250 static void	em_initialize_receive_unit(struct adapter *);
251 static void	em_free_receive_structures(struct adapter *);
252 static void	em_free_receive_buffers(struct rx_ring *);
253 
254 static void	em_enable_intr(struct adapter *);
255 static void	em_disable_intr(struct adapter *);
256 static void	em_update_stats_counters(struct adapter *);
257 static void	em_add_hw_stats(struct adapter *adapter);
258 static void	em_txeof(struct tx_ring *);
259 static bool	em_rxeof(struct rx_ring *, int, int *);
260 #ifndef __NO_STRICT_ALIGNMENT
261 static int	em_fixup_rx(struct rx_ring *);
262 #endif
263 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
264 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
265 		    struct ip *, u32 *, u32 *);
266 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
267 		    struct tcphdr *, u32 *, u32 *);
268 static void	em_set_promisc(struct adapter *);
269 static void	em_disable_promisc(struct adapter *);
270 static void	em_set_multi(struct adapter *);
271 static void	em_update_link_status(struct adapter *);
272 static void	em_refresh_mbufs(struct rx_ring *, int);
273 static void	em_register_vlan(void *, if_t, u16);
274 static void	em_unregister_vlan(void *, if_t, u16);
275 static void	em_setup_vlan_hw_support(struct adapter *);
276 static int	em_xmit(struct tx_ring *, struct mbuf **);
277 static int	em_dma_malloc(struct adapter *, bus_size_t,
278 		    struct em_dma_alloc *, int);
279 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
280 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
281 static void	em_print_nvm_info(struct adapter *);
282 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283 static void	em_print_debug_info(struct adapter *);
284 static int 	em_is_valid_ether_addr(u8 *);
285 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
287 		    const char *, struct em_int_delay_info *, int, int);
288 /* Management and WOL Support */
289 static void	em_init_manageability(struct adapter *);
290 static void	em_release_manageability(struct adapter *);
291 static void     em_get_hw_control(struct adapter *);
292 static void     em_release_hw_control(struct adapter *);
293 static void	em_get_wakeup(device_t);
294 static void     em_enable_wakeup(device_t);
295 static int	em_enable_phy_wakeup(struct adapter *);
296 static void	em_led_func(void *, int);
297 static void	em_disable_aspm(struct adapter *);
298 
299 static int	em_irq_fast(void *);
300 
301 /* MSIX handlers */
302 static void	em_msix_tx(void *);
303 static void	em_msix_rx(void *);
304 static void	em_msix_link(void *);
305 static void	em_handle_tx(void *context, int pending);
306 static void	em_handle_rx(void *context, int pending);
307 static void	em_handle_link(void *context, int pending);
308 
309 #ifdef EM_MULTIQUEUE
310 static void	em_enable_vectors_82574(struct adapter *);
311 #endif
312 
313 static void	em_set_sysctl_value(struct adapter *, const char *,
314 		    const char *, int *, int);
315 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
316 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
317 
318 static __inline void em_rx_discard(struct rx_ring *, int);
319 
320 #ifdef DEVICE_POLLING
321 static poll_handler_t em_poll;
322 #endif /* POLLING */
323 
324 /*********************************************************************
325  *  FreeBSD Device Interface Entry Points
326  *********************************************************************/
327 
328 static device_method_t em_methods[] = {
329 	/* Device interface */
330 	DEVMETHOD(device_probe, em_probe),
331 	DEVMETHOD(device_attach, em_attach),
332 	DEVMETHOD(device_detach, em_detach),
333 	DEVMETHOD(device_shutdown, em_shutdown),
334 	DEVMETHOD(device_suspend, em_suspend),
335 	DEVMETHOD(device_resume, em_resume),
336 	DEVMETHOD_END
337 };
338 
339 static driver_t em_driver = {
340 	"em", em_methods, sizeof(struct adapter),
341 };
342 
343 devclass_t em_devclass;
344 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
345 MODULE_DEPEND(em, pci, 1, 1, 1);
346 MODULE_DEPEND(em, ether, 1, 1, 1);
347 #ifdef DEV_NETMAP
348 MODULE_DEPEND(em, netmap, 1, 1, 1);
349 #endif /* DEV_NETMAP */
350 
351 /*********************************************************************
352  *  Tunable default values.
353  *********************************************************************/
354 
355 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
356 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
357 #define M_TSO_LEN			66
358 
359 #define MAX_INTS_PER_SEC	8000
360 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
361 
362 /* Allow common code without TSO */
363 #ifndef CSUM_TSO
364 #define CSUM_TSO	0
365 #endif
366 
367 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
368 
369 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
370 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
371 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
372     0, "Default transmit interrupt delay in usecs");
373 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
374     0, "Default receive interrupt delay in usecs");
375 
376 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
377 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
378 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
379     &em_tx_abs_int_delay_dflt, 0,
380     "Default transmit interrupt delay limit in usecs");
381 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
382     &em_rx_abs_int_delay_dflt, 0,
383     "Default receive interrupt delay limit in usecs");
384 
385 static int em_rxd = EM_DEFAULT_RXD;
386 static int em_txd = EM_DEFAULT_TXD;
387 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
388     "Number of receive descriptors per queue");
389 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
390     "Number of transmit descriptors per queue");
391 
392 static int em_smart_pwr_down = FALSE;
393 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
394     0, "Set to true to leave smart power down enabled on newer adapters");
395 
396 /* Controls whether promiscuous also shows bad packets */
397 static int em_debug_sbp = FALSE;
398 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
399     "Show bad packets in promiscuous mode");
400 
401 static int em_enable_msix = TRUE;
402 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
403     "Enable MSI-X interrupts");
404 
405 #ifdef EM_MULTIQUEUE
406 static int em_num_queues = 1;
407 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
408     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
409 #endif
410 
411 /*
412 ** Global variable to store last used CPU when binding queues
413 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
414 ** queue is bound to a cpu.
415 */
416 static int em_last_bind_cpu = -1;
417 
418 /* How many packets rxeof tries to clean at a time */
419 static int em_rx_process_limit = 100;
420 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
421     &em_rx_process_limit, 0,
422     "Maximum number of received packets to process "
423     "at a time, -1 means unlimited");
424 
425 /* Energy efficient ethernet - default to OFF */
426 static int eee_setting = 1;
427 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
428     "Enable Energy Efficient Ethernet");
429 
430 /* Global used in WOL setup with multiport cards */
431 static int global_quad_port_a = 0;
432 
433 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
434 #include <dev/netmap/if_em_netmap.h>
435 #endif /* DEV_NETMAP */
436 
437 /*********************************************************************
438  *  Device identification routine
439  *
440  *  em_probe determines if the driver should be loaded on
441  *  adapter based on PCI vendor/device id of the adapter.
442  *
443  *  return BUS_PROBE_DEFAULT on success, positive on failure
444  *********************************************************************/
445 
446 static int
447 em_probe(device_t dev)
448 {
449 	char		adapter_name[60];
450 	uint16_t	pci_vendor_id = 0;
451 	uint16_t	pci_device_id = 0;
452 	uint16_t	pci_subvendor_id = 0;
453 	uint16_t	pci_subdevice_id = 0;
454 	em_vendor_info_t *ent;
455 
456 	INIT_DEBUGOUT("em_probe: begin");
457 
458 	pci_vendor_id = pci_get_vendor(dev);
459 	if (pci_vendor_id != EM_VENDOR_ID)
460 		return (ENXIO);
461 
462 	pci_device_id = pci_get_device(dev);
463 	pci_subvendor_id = pci_get_subvendor(dev);
464 	pci_subdevice_id = pci_get_subdevice(dev);
465 
466 	ent = em_vendor_info_array;
467 	while (ent->vendor_id != 0) {
468 		if ((pci_vendor_id == ent->vendor_id) &&
469 		    (pci_device_id == ent->device_id) &&
470 
471 		    ((pci_subvendor_id == ent->subvendor_id) ||
472 		    (ent->subvendor_id == PCI_ANY_ID)) &&
473 
474 		    ((pci_subdevice_id == ent->subdevice_id) ||
475 		    (ent->subdevice_id == PCI_ANY_ID))) {
476 			sprintf(adapter_name, "%s %s",
477 				em_strings[ent->index],
478 				em_driver_version);
479 			device_set_desc_copy(dev, adapter_name);
480 			return (BUS_PROBE_DEFAULT);
481 		}
482 		ent++;
483 	}
484 
485 	return (ENXIO);
486 }
487 
488 /*********************************************************************
489  *  Device initialization routine
490  *
491  *  The attach entry point is called when the driver is being loaded.
492  *  This routine identifies the type of hardware, allocates all resources
493  *  and initializes the hardware.
494  *
495  *  return 0 on success, positive on failure
496  *********************************************************************/
497 
498 static int
499 em_attach(device_t dev)
500 {
501 	struct adapter	*adapter;
502 	struct e1000_hw	*hw;
503 	int		error = 0;
504 
505 	INIT_DEBUGOUT("em_attach: begin");
506 
507 	if (resource_disabled("em", device_get_unit(dev))) {
508 		device_printf(dev, "Disabled by device hint\n");
509 		return (ENXIO);
510 	}
511 
512 	adapter = device_get_softc(dev);
513 	adapter->dev = adapter->osdep.dev = dev;
514 	hw = &adapter->hw;
515 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
516 
517 	/* SYSCTL stuff */
518 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
519 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
520 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
521 	    em_sysctl_nvm_info, "I", "NVM Information");
522 
523 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
524 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
525 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
526 	    em_sysctl_debug_info, "I", "Debug Information");
527 
528 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
529 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
530 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
531 	    em_set_flowcntl, "I", "Flow Control");
532 
533 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
534 
535 	/* Determine hardware and mac info */
536 	em_identify_hardware(adapter);
537 
538 	/* Setup PCI resources */
539 	if (em_allocate_pci_resources(adapter)) {
540 		device_printf(dev, "Allocation of PCI resources failed\n");
541 		error = ENXIO;
542 		goto err_pci;
543 	}
544 
545 	/*
546 	** For ICH8 and family we need to
547 	** map the flash memory, and this
548 	** must happen after the MAC is
549 	** identified
550 	*/
551 	if ((hw->mac.type == e1000_ich8lan) ||
552 	    (hw->mac.type == e1000_ich9lan) ||
553 	    (hw->mac.type == e1000_ich10lan) ||
554 	    (hw->mac.type == e1000_pchlan) ||
555 	    (hw->mac.type == e1000_pch2lan) ||
556 	    (hw->mac.type == e1000_pch_lpt)) {
557 		int rid = EM_BAR_TYPE_FLASH;
558 		adapter->flash = bus_alloc_resource_any(dev,
559 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
560 		if (adapter->flash == NULL) {
561 			device_printf(dev, "Mapping of Flash failed\n");
562 			error = ENXIO;
563 			goto err_pci;
564 		}
565 		/* This is used in the shared code */
566 		hw->flash_address = (u8 *)adapter->flash;
567 		adapter->osdep.flash_bus_space_tag =
568 		    rman_get_bustag(adapter->flash);
569 		adapter->osdep.flash_bus_space_handle =
570 		    rman_get_bushandle(adapter->flash);
571 	}
572 
573 	/* Do Shared Code initialization */
574 	if (e1000_setup_init_funcs(hw, TRUE)) {
575 		device_printf(dev, "Setup of Shared code failed\n");
576 		error = ENXIO;
577 		goto err_pci;
578 	}
579 
580 	/*
581 	 * Setup MSI/X or MSI if PCI Express
582 	 */
583 	adapter->msix = em_setup_msix(adapter);
584 
585 	e1000_get_bus_info(hw);
586 
587 	/* Set up some sysctls for the tunable interrupt delays */
588 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
589 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
590 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
591 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
592 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
593 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
594 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
595 	    "receive interrupt delay limit in usecs",
596 	    &adapter->rx_abs_int_delay,
597 	    E1000_REGISTER(hw, E1000_RADV),
598 	    em_rx_abs_int_delay_dflt);
599 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
600 	    "transmit interrupt delay limit in usecs",
601 	    &adapter->tx_abs_int_delay,
602 	    E1000_REGISTER(hw, E1000_TADV),
603 	    em_tx_abs_int_delay_dflt);
604 	em_add_int_delay_sysctl(adapter, "itr",
605 	    "interrupt delay limit in usecs/4",
606 	    &adapter->tx_itr,
607 	    E1000_REGISTER(hw, E1000_ITR),
608 	    DEFAULT_ITR);
609 
610 	/* Sysctl for limiting the amount of work done in the taskqueue */
611 	em_set_sysctl_value(adapter, "rx_processing_limit",
612 	    "max number of rx packets to process", &adapter->rx_process_limit,
613 	    em_rx_process_limit);
614 
615 	/*
616 	 * Validate number of transmit and receive descriptors. It
617 	 * must not exceed hardware maximum, and must be multiple
618 	 * of E1000_DBA_ALIGN.
619 	 */
620 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
621 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
622 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
623 		    EM_DEFAULT_TXD, em_txd);
624 		adapter->num_tx_desc = EM_DEFAULT_TXD;
625 	} else
626 		adapter->num_tx_desc = em_txd;
627 
628 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
629 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
630 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
631 		    EM_DEFAULT_RXD, em_rxd);
632 		adapter->num_rx_desc = EM_DEFAULT_RXD;
633 	} else
634 		adapter->num_rx_desc = em_rxd;
635 
636 	hw->mac.autoneg = DO_AUTO_NEG;
637 	hw->phy.autoneg_wait_to_complete = FALSE;
638 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
639 
640 	/* Copper options */
641 	if (hw->phy.media_type == e1000_media_type_copper) {
642 		hw->phy.mdix = AUTO_ALL_MODES;
643 		hw->phy.disable_polarity_correction = FALSE;
644 		hw->phy.ms_type = EM_MASTER_SLAVE;
645 	}
646 
647 	/*
648 	 * Set the frame limits assuming
649 	 * standard ethernet sized frames.
650 	 */
651 	adapter->hw.mac.max_frame_size =
652 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
653 
654 	/*
655 	 * This controls when hardware reports transmit completion
656 	 * status.
657 	 */
658 	hw->mac.report_tx_early = 1;
659 
660 	/*
661 	** Get queue/ring memory
662 	*/
663 	if (em_allocate_queues(adapter)) {
664 		error = ENOMEM;
665 		goto err_pci;
666 	}
667 
668 	/* Allocate multicast array memory. */
669 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
670 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
671 	if (adapter->mta == NULL) {
672 		device_printf(dev, "Can not allocate multicast setup array\n");
673 		error = ENOMEM;
674 		goto err_late;
675 	}
676 
677 	/* Check SOL/IDER usage */
678 	if (e1000_check_reset_block(hw))
679 		device_printf(dev, "PHY reset is blocked"
680 		    " due to SOL/IDER session.\n");
681 
682 	/* Sysctl for setting Energy Efficient Ethernet */
683 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
684 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
685 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
686 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
687 	    adapter, 0, em_sysctl_eee, "I",
688 	    "Disable Energy Efficient Ethernet");
689 
690 	/*
691 	** Start from a known state, this is
692 	** important in reading the nvm and
693 	** mac from that.
694 	*/
695 	e1000_reset_hw(hw);
696 
697 
698 	/* Make sure we have a good EEPROM before we read from it */
699 	if (e1000_validate_nvm_checksum(hw) < 0) {
700 		/*
701 		** Some PCI-E parts fail the first check due to
702 		** the link being in sleep state, call it again,
703 		** if it fails a second time its a real issue.
704 		*/
705 		if (e1000_validate_nvm_checksum(hw) < 0) {
706 			device_printf(dev,
707 			    "The EEPROM Checksum Is Not Valid\n");
708 			error = EIO;
709 			goto err_late;
710 		}
711 	}
712 
713 	/* Copy the permanent MAC address out of the EEPROM */
714 	if (e1000_read_mac_addr(hw) < 0) {
715 		device_printf(dev, "EEPROM read error while reading MAC"
716 		    " address\n");
717 		error = EIO;
718 		goto err_late;
719 	}
720 
721 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
722 		device_printf(dev, "Invalid MAC address\n");
723 		error = EIO;
724 		goto err_late;
725 	}
726 
727 	/* Disable ULP support */
728 	e1000_disable_ulp_lpt_lp(hw, TRUE);
729 
730 	/*
731 	**  Do interrupt configuration
732 	*/
733 	if (adapter->msix > 1) /* Do MSIX */
734 		error = em_allocate_msix(adapter);
735 	else  /* MSI or Legacy */
736 		error = em_allocate_legacy(adapter);
737 	if (error)
738 		goto err_late;
739 
740 	/*
741 	 * Get Wake-on-Lan and Management info for later use
742 	 */
743 	em_get_wakeup(dev);
744 
745 	/* Setup OS specific network interface */
746 	if (em_setup_interface(dev, adapter) != 0)
747 		goto err_late;
748 
749 	em_reset(adapter);
750 
751 	/* Initialize statistics */
752 	em_update_stats_counters(adapter);
753 
754 	hw->mac.get_link_status = 1;
755 	em_update_link_status(adapter);
756 
757 	/* Register for VLAN events */
758 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
759 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
760 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
761 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
762 
763 	em_add_hw_stats(adapter);
764 
765 	/* Non-AMT based hardware can now take control from firmware */
766 	if (adapter->has_manage && !adapter->has_amt)
767 		em_get_hw_control(adapter);
768 
769 	/* Tell the stack that the interface is not active */
770 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
771 
772 	adapter->led_dev = led_create(em_led_func, adapter,
773 	    device_get_nameunit(dev));
774 #ifdef DEV_NETMAP
775 	em_netmap_attach(adapter);
776 #endif /* DEV_NETMAP */
777 
778 	INIT_DEBUGOUT("em_attach: end");
779 
780 	return (0);
781 
782 err_late:
783 	em_free_transmit_structures(adapter);
784 	em_free_receive_structures(adapter);
785 	em_release_hw_control(adapter);
786 	if (adapter->ifp != (void *)NULL)
787 		if_free(adapter->ifp);
788 err_pci:
789 	em_free_pci_resources(adapter);
790 	free(adapter->mta, M_DEVBUF);
791 	EM_CORE_LOCK_DESTROY(adapter);
792 
793 	return (error);
794 }
795 
796 /*********************************************************************
797  *  Device removal routine
798  *
799  *  The detach entry point is called when the driver is being removed.
800  *  This routine stops the adapter and deallocates all the resources
801  *  that were allocated for driver operation.
802  *
803  *  return 0 on success, positive on failure
804  *********************************************************************/
805 
806 static int
807 em_detach(device_t dev)
808 {
809 	struct adapter	*adapter = device_get_softc(dev);
810 	if_t ifp = adapter->ifp;
811 
812 	INIT_DEBUGOUT("em_detach: begin");
813 
814 	/* Make sure VLANS are not using driver */
815 	if (if_vlantrunkinuse(ifp)) {
816 		device_printf(dev,"Vlan in use, detach first\n");
817 		return (EBUSY);
818 	}
819 
820 #ifdef DEVICE_POLLING
821 	if (if_getcapenable(ifp) & IFCAP_POLLING)
822 		ether_poll_deregister(ifp);
823 #endif
824 
825 	if (adapter->led_dev != NULL)
826 		led_destroy(adapter->led_dev);
827 
828 	EM_CORE_LOCK(adapter);
829 	adapter->in_detach = 1;
830 	em_stop(adapter);
831 	EM_CORE_UNLOCK(adapter);
832 	EM_CORE_LOCK_DESTROY(adapter);
833 
834 	e1000_phy_hw_reset(&adapter->hw);
835 
836 	em_release_manageability(adapter);
837 	em_release_hw_control(adapter);
838 
839 	/* Unregister VLAN events */
840 	if (adapter->vlan_attach != NULL)
841 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
842 	if (adapter->vlan_detach != NULL)
843 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
844 
845 	ether_ifdetach(adapter->ifp);
846 	callout_drain(&adapter->timer);
847 
848 #ifdef DEV_NETMAP
849 	netmap_detach(ifp);
850 #endif /* DEV_NETMAP */
851 
852 	em_free_pci_resources(adapter);
853 	bus_generic_detach(dev);
854 	if_free(ifp);
855 
856 	em_free_transmit_structures(adapter);
857 	em_free_receive_structures(adapter);
858 
859 	em_release_hw_control(adapter);
860 	free(adapter->mta, M_DEVBUF);
861 
862 	return (0);
863 }
864 
865 /*********************************************************************
866  *
867  *  Shutdown entry point
868  *
869  **********************************************************************/
870 
871 static int
872 em_shutdown(device_t dev)
873 {
874 	return em_suspend(dev);
875 }
876 
877 /*
878  * Suspend/resume device methods.
879  */
880 static int
881 em_suspend(device_t dev)
882 {
883 	struct adapter *adapter = device_get_softc(dev);
884 
885 	EM_CORE_LOCK(adapter);
886 
887         em_release_manageability(adapter);
888 	em_release_hw_control(adapter);
889 	em_enable_wakeup(dev);
890 
891 	EM_CORE_UNLOCK(adapter);
892 
893 	return bus_generic_suspend(dev);
894 }
895 
896 static int
897 em_resume(device_t dev)
898 {
899 	struct adapter *adapter = device_get_softc(dev);
900 	struct tx_ring	*txr = adapter->tx_rings;
901 	if_t ifp = adapter->ifp;
902 
903 	EM_CORE_LOCK(adapter);
904 	if (adapter->hw.mac.type == e1000_pch2lan)
905 		e1000_resume_workarounds_pchlan(&adapter->hw);
906 	em_init_locked(adapter);
907 	em_init_manageability(adapter);
908 
909 	if ((if_getflags(ifp) & IFF_UP) &&
910 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
911 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
912 			EM_TX_LOCK(txr);
913 #ifdef EM_MULTIQUEUE
914 			if (!drbr_empty(ifp, txr->br))
915 				em_mq_start_locked(ifp, txr);
916 #else
917 			if (!if_sendq_empty(ifp))
918 				em_start_locked(ifp, txr);
919 #endif
920 			EM_TX_UNLOCK(txr);
921 		}
922 	}
923 	EM_CORE_UNLOCK(adapter);
924 
925 	return bus_generic_resume(dev);
926 }
927 
928 
929 #ifndef EM_MULTIQUEUE
930 static void
931 em_start_locked(if_t ifp, struct tx_ring *txr)
932 {
933 	struct adapter	*adapter = if_getsoftc(ifp);
934 	struct mbuf	*m_head;
935 
936 	EM_TX_LOCK_ASSERT(txr);
937 
938 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
939 	    IFF_DRV_RUNNING)
940 		return;
941 
942 	if (!adapter->link_active)
943 		return;
944 
945 	while (!if_sendq_empty(ifp)) {
946         	/* Call cleanup if number of TX descriptors low */
947 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
948 			em_txeof(txr);
949 		if (txr->tx_avail < EM_MAX_SCATTER) {
950 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
951 			break;
952 		}
953 		m_head = if_dequeue(ifp);
954 		if (m_head == NULL)
955 			break;
956 		/*
957 		 *  Encapsulation can modify our pointer, and or make it
958 		 *  NULL on failure.  In that event, we can't requeue.
959 		 */
960 		if (em_xmit(txr, &m_head)) {
961 			if (m_head == NULL)
962 				break;
963 			if_sendq_prepend(ifp, m_head);
964 			break;
965 		}
966 
967 		/* Mark the queue as having work */
968 		if (txr->busy == EM_TX_IDLE)
969 			txr->busy = EM_TX_BUSY;
970 
971 		/* Send a copy of the frame to the BPF listener */
972 		ETHER_BPF_MTAP(ifp, m_head);
973 
974 	}
975 
976 	return;
977 }
978 
979 static void
980 em_start(if_t ifp)
981 {
982 	struct adapter	*adapter = if_getsoftc(ifp);
983 	struct tx_ring	*txr = adapter->tx_rings;
984 
985 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
986 		EM_TX_LOCK(txr);
987 		em_start_locked(ifp, txr);
988 		EM_TX_UNLOCK(txr);
989 	}
990 	return;
991 }
992 #else /* EM_MULTIQUEUE */
993 /*********************************************************************
994  *  Multiqueue Transmit routines
995  *
996  *  em_mq_start is called by the stack to initiate a transmit.
997  *  however, if busy the driver can queue the request rather
998  *  than do an immediate send. It is this that is an advantage
999  *  in this driver, rather than also having multiple tx queues.
1000  **********************************************************************/
1001 /*
1002 ** Multiqueue capable stack interface
1003 */
1004 static int
1005 em_mq_start(if_t ifp, struct mbuf *m)
1006 {
1007 	struct adapter	*adapter = if_getsoftc(ifp);
1008 	struct tx_ring	*txr = adapter->tx_rings;
1009 	unsigned int	i, error;
1010 
1011 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1012 		i = m->m_pkthdr.flowid % adapter->num_queues;
1013 	else
1014 		i = curcpu % adapter->num_queues;
1015 
1016 	txr = &adapter->tx_rings[i];
1017 
1018 	error = drbr_enqueue(ifp, txr->br, m);
1019 	if (error)
1020 		return (error);
1021 
1022 	if (EM_TX_TRYLOCK(txr)) {
1023 		em_mq_start_locked(ifp, txr);
1024 		EM_TX_UNLOCK(txr);
1025 	} else
1026 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1027 
1028 	return (0);
1029 }
1030 
1031 static int
1032 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1033 {
1034 	struct adapter  *adapter = txr->adapter;
1035         struct mbuf     *next;
1036         int             err = 0, enq = 0;
1037 
1038 	EM_TX_LOCK_ASSERT(txr);
1039 
1040 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1041 	    adapter->link_active == 0) {
1042 		return (ENETDOWN);
1043 	}
1044 
1045 	/* Process the queue */
1046 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1047 		if ((err = em_xmit(txr, &next)) != 0) {
1048 			if (next == NULL) {
1049 				/* It was freed, move forward */
1050 				drbr_advance(ifp, txr->br);
1051 			} else {
1052 				/*
1053 				 * Still have one left, it may not be
1054 				 * the same since the transmit function
1055 				 * may have changed it.
1056 				 */
1057 				drbr_putback(ifp, txr->br, next);
1058 			}
1059 			break;
1060 		}
1061 		drbr_advance(ifp, txr->br);
1062 		enq++;
1063 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1064 		if (next->m_flags & M_MCAST)
1065 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1066 		ETHER_BPF_MTAP(ifp, next);
1067 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1068                         break;
1069 	}
1070 
1071 	/* Mark the queue as having work */
1072 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1073 		txr->busy = EM_TX_BUSY;
1074 
1075 	if (txr->tx_avail < EM_MAX_SCATTER)
1076 		em_txeof(txr);
1077 	if (txr->tx_avail < EM_MAX_SCATTER) {
1078 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1079 	}
1080 	return (err);
1081 }
1082 
1083 /*
1084 ** Flush all ring buffers
1085 */
1086 static void
1087 em_qflush(if_t ifp)
1088 {
1089 	struct adapter  *adapter = if_getsoftc(ifp);
1090 	struct tx_ring  *txr = adapter->tx_rings;
1091 	struct mbuf     *m;
1092 
1093 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1094 		EM_TX_LOCK(txr);
1095 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1096 			m_freem(m);
1097 		EM_TX_UNLOCK(txr);
1098 	}
1099 	if_qflush(ifp);
1100 }
1101 #endif /* EM_MULTIQUEUE */
1102 
1103 /*********************************************************************
1104  *  Ioctl entry point
1105  *
1106  *  em_ioctl is called when the user wants to configure the
1107  *  interface.
1108  *
1109  *  return 0 on success, positive on failure
1110  **********************************************************************/
1111 
1112 static int
1113 em_ioctl(if_t ifp, u_long command, caddr_t data)
1114 {
1115 	struct adapter	*adapter = if_getsoftc(ifp);
1116 	struct ifreq	*ifr = (struct ifreq *)data;
1117 #if defined(INET) || defined(INET6)
1118 	struct ifaddr	*ifa = (struct ifaddr *)data;
1119 #endif
1120 	bool		avoid_reset = FALSE;
1121 	int		error = 0;
1122 
1123 	if (adapter->in_detach)
1124 		return (error);
1125 
1126 	switch (command) {
1127 	case SIOCSIFADDR:
1128 #ifdef INET
1129 		if (ifa->ifa_addr->sa_family == AF_INET)
1130 			avoid_reset = TRUE;
1131 #endif
1132 #ifdef INET6
1133 		if (ifa->ifa_addr->sa_family == AF_INET6)
1134 			avoid_reset = TRUE;
1135 #endif
1136 		/*
1137 		** Calling init results in link renegotiation,
1138 		** so we avoid doing it when possible.
1139 		*/
1140 		if (avoid_reset) {
1141 			if_setflagbits(ifp,IFF_UP,0);
1142 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1143 				em_init(adapter);
1144 #ifdef INET
1145 			if (!(if_getflags(ifp) & IFF_NOARP))
1146 				arp_ifinit(ifp, ifa);
1147 #endif
1148 		} else
1149 			error = ether_ioctl(ifp, command, data);
1150 		break;
1151 	case SIOCSIFMTU:
1152 	    {
1153 		int max_frame_size;
1154 
1155 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1156 
1157 		EM_CORE_LOCK(adapter);
1158 		switch (adapter->hw.mac.type) {
1159 		case e1000_82571:
1160 		case e1000_82572:
1161 		case e1000_ich9lan:
1162 		case e1000_ich10lan:
1163 		case e1000_pch2lan:
1164 		case e1000_pch_lpt:
1165 		case e1000_82574:
1166 		case e1000_82583:
1167 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1168 			max_frame_size = 9234;
1169 			break;
1170 		case e1000_pchlan:
1171 			max_frame_size = 4096;
1172 			break;
1173 			/* Adapters that do not support jumbo frames */
1174 		case e1000_ich8lan:
1175 			max_frame_size = ETHER_MAX_LEN;
1176 			break;
1177 		default:
1178 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1179 		}
1180 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1181 		    ETHER_CRC_LEN) {
1182 			EM_CORE_UNLOCK(adapter);
1183 			error = EINVAL;
1184 			break;
1185 		}
1186 
1187 		if_setmtu(ifp, ifr->ifr_mtu);
1188 		adapter->hw.mac.max_frame_size =
1189 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1190 		em_init_locked(adapter);
1191 		EM_CORE_UNLOCK(adapter);
1192 		break;
1193 	    }
1194 	case SIOCSIFFLAGS:
1195 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1196 		    SIOCSIFFLAGS (Set Interface Flags)");
1197 		EM_CORE_LOCK(adapter);
1198 		if (if_getflags(ifp) & IFF_UP) {
1199 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1200 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1201 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1202 					em_disable_promisc(adapter);
1203 					em_set_promisc(adapter);
1204 				}
1205 			} else
1206 				em_init_locked(adapter);
1207 		} else
1208 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1209 				em_stop(adapter);
1210 		adapter->if_flags = if_getflags(ifp);
1211 		EM_CORE_UNLOCK(adapter);
1212 		break;
1213 	case SIOCADDMULTI:
1214 	case SIOCDELMULTI:
1215 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1216 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1217 			EM_CORE_LOCK(adapter);
1218 			em_disable_intr(adapter);
1219 			em_set_multi(adapter);
1220 #ifdef DEVICE_POLLING
1221 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1222 #endif
1223 				em_enable_intr(adapter);
1224 			EM_CORE_UNLOCK(adapter);
1225 		}
1226 		break;
1227 	case SIOCSIFMEDIA:
1228 		/* Check SOL/IDER usage */
1229 		EM_CORE_LOCK(adapter);
1230 		if (e1000_check_reset_block(&adapter->hw)) {
1231 			EM_CORE_UNLOCK(adapter);
1232 			device_printf(adapter->dev, "Media change is"
1233 			    " blocked due to SOL/IDER session.\n");
1234 			break;
1235 		}
1236 		EM_CORE_UNLOCK(adapter);
1237 		/* falls thru */
1238 	case SIOCGIFMEDIA:
1239 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1240 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1241 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1242 		break;
1243 	case SIOCSIFCAP:
1244 	    {
1245 		int mask, reinit;
1246 
1247 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1248 		reinit = 0;
1249 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1250 #ifdef DEVICE_POLLING
1251 		if (mask & IFCAP_POLLING) {
1252 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1253 				error = ether_poll_register(em_poll, ifp);
1254 				if (error)
1255 					return (error);
1256 				EM_CORE_LOCK(adapter);
1257 				em_disable_intr(adapter);
1258 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1259 				EM_CORE_UNLOCK(adapter);
1260 			} else {
1261 				error = ether_poll_deregister(ifp);
1262 				/* Enable interrupt even in error case */
1263 				EM_CORE_LOCK(adapter);
1264 				em_enable_intr(adapter);
1265 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1266 				EM_CORE_UNLOCK(adapter);
1267 			}
1268 		}
1269 #endif
1270 		if (mask & IFCAP_HWCSUM) {
1271 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1272 			reinit = 1;
1273 		}
1274 		if (mask & IFCAP_TSO4) {
1275 			if_togglecapenable(ifp,IFCAP_TSO4);
1276 			reinit = 1;
1277 		}
1278 		if (mask & IFCAP_VLAN_HWTAGGING) {
1279 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1280 			reinit = 1;
1281 		}
1282 		if (mask & IFCAP_VLAN_HWFILTER) {
1283 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1284 			reinit = 1;
1285 		}
1286 		if (mask & IFCAP_VLAN_HWTSO) {
1287 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1288 			reinit = 1;
1289 		}
1290 		if ((mask & IFCAP_WOL) &&
1291 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1292 			if (mask & IFCAP_WOL_MCAST)
1293 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1294 			if (mask & IFCAP_WOL_MAGIC)
1295 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1296 		}
1297 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1298 			em_init(adapter);
1299 		if_vlancap(ifp);
1300 		break;
1301 	    }
1302 
1303 	default:
1304 		error = ether_ioctl(ifp, command, data);
1305 		break;
1306 	}
1307 
1308 	return (error);
1309 }
1310 
1311 
1312 /*********************************************************************
1313  *  Init entry point
1314  *
1315  *  This routine is used in two ways. It is used by the stack as
1316  *  init entry point in network interface structure. It is also used
1317  *  by the driver as a hw/sw initialization routine to get to a
1318  *  consistent state.
1319  *
1320  *  return 0 on success, positive on failure
1321  **********************************************************************/
1322 
1323 static void
1324 em_init_locked(struct adapter *adapter)
1325 {
1326 	if_t ifp = adapter->ifp;
1327 	device_t	dev = adapter->dev;
1328 
1329 	INIT_DEBUGOUT("em_init: begin");
1330 
1331 	EM_CORE_LOCK_ASSERT(adapter);
1332 
1333 	em_disable_intr(adapter);
1334 	callout_stop(&adapter->timer);
1335 
1336 	/* Get the latest mac address, User can use a LAA */
1337         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1338               ETHER_ADDR_LEN);
1339 
1340 	/* Put the address into the Receive Address Array */
1341 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1342 
1343 	/*
1344 	 * With the 82571 adapter, RAR[0] may be overwritten
1345 	 * when the other port is reset, we make a duplicate
1346 	 * in RAR[14] for that eventuality, this assures
1347 	 * the interface continues to function.
1348 	 */
1349 	if (adapter->hw.mac.type == e1000_82571) {
1350 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1351 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1352 		    E1000_RAR_ENTRIES - 1);
1353 	}
1354 
1355 	/* Initialize the hardware */
1356 	em_reset(adapter);
1357 	em_update_link_status(adapter);
1358 
1359 	/* Setup VLAN support, basic and offload if available */
1360 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1361 
1362 	/* Set hardware offload abilities */
1363 	if_clearhwassist(ifp);
1364 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1365 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1366 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1367 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1368 
1369 	/* Configure for OS presence */
1370 	em_init_manageability(adapter);
1371 
1372 	/* Prepare transmit descriptors and buffers */
1373 	em_setup_transmit_structures(adapter);
1374 	em_initialize_transmit_unit(adapter);
1375 
1376 	/* Setup Multicast table */
1377 	em_set_multi(adapter);
1378 
1379 	/*
1380 	** Figure out the desired mbuf
1381 	** pool for doing jumbos
1382 	*/
1383 	if (adapter->hw.mac.max_frame_size <= 2048)
1384 		adapter->rx_mbuf_sz = MCLBYTES;
1385 	else if (adapter->hw.mac.max_frame_size <= 4096)
1386 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1387 	else
1388 		adapter->rx_mbuf_sz = MJUM9BYTES;
1389 
1390 	/* Prepare receive descriptors and buffers */
1391 	if (em_setup_receive_structures(adapter)) {
1392 		device_printf(dev, "Could not setup receive structures\n");
1393 		em_stop(adapter);
1394 		return;
1395 	}
1396 	em_initialize_receive_unit(adapter);
1397 
1398 	/* Use real VLAN Filter support? */
1399 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1400 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1401 			/* Use real VLAN Filter support */
1402 			em_setup_vlan_hw_support(adapter);
1403 		else {
1404 			u32 ctrl;
1405 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1406 			ctrl |= E1000_CTRL_VME;
1407 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1408 		}
1409 	}
1410 
1411 	/* Don't lose promiscuous settings */
1412 	em_set_promisc(adapter);
1413 
1414 	/* Set the interface as ACTIVE */
1415 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1416 
1417 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1418 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1419 
1420 	/* MSI/X configuration for 82574 */
1421 	if (adapter->hw.mac.type == e1000_82574) {
1422 		int tmp;
1423 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1424 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1425 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1426 		/* Set the IVAR - interrupt vector routing. */
1427 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1428 	}
1429 
1430 #ifdef DEVICE_POLLING
1431 	/*
1432 	 * Only enable interrupts if we are not polling, make sure
1433 	 * they are off otherwise.
1434 	 */
1435 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1436 		em_disable_intr(adapter);
1437 	else
1438 #endif /* DEVICE_POLLING */
1439 		em_enable_intr(adapter);
1440 
1441 	/* AMT based hardware can now take control from firmware */
1442 	if (adapter->has_manage && adapter->has_amt)
1443 		em_get_hw_control(adapter);
1444 }
1445 
1446 static void
1447 em_init(void *arg)
1448 {
1449 	struct adapter *adapter = arg;
1450 
1451 	EM_CORE_LOCK(adapter);
1452 	em_init_locked(adapter);
1453 	EM_CORE_UNLOCK(adapter);
1454 }
1455 
1456 
1457 #ifdef DEVICE_POLLING
1458 /*********************************************************************
1459  *
1460  *  Legacy polling routine: note this only works with single queue
1461  *
1462  *********************************************************************/
1463 static int
1464 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1465 {
1466 	struct adapter *adapter = if_getsoftc(ifp);
1467 	struct tx_ring	*txr = adapter->tx_rings;
1468 	struct rx_ring	*rxr = adapter->rx_rings;
1469 	u32		reg_icr;
1470 	int		rx_done;
1471 
1472 	EM_CORE_LOCK(adapter);
1473 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1474 		EM_CORE_UNLOCK(adapter);
1475 		return (0);
1476 	}
1477 
1478 	if (cmd == POLL_AND_CHECK_STATUS) {
1479 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1480 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1481 			callout_stop(&adapter->timer);
1482 			adapter->hw.mac.get_link_status = 1;
1483 			em_update_link_status(adapter);
1484 			callout_reset(&adapter->timer, hz,
1485 			    em_local_timer, adapter);
1486 		}
1487 	}
1488 	EM_CORE_UNLOCK(adapter);
1489 
1490 	em_rxeof(rxr, count, &rx_done);
1491 
1492 	EM_TX_LOCK(txr);
1493 	em_txeof(txr);
1494 #ifdef EM_MULTIQUEUE
1495 	if (!drbr_empty(ifp, txr->br))
1496 		em_mq_start_locked(ifp, txr);
1497 #else
1498 	if (!if_sendq_empty(ifp))
1499 		em_start_locked(ifp, txr);
1500 #endif
1501 	EM_TX_UNLOCK(txr);
1502 
1503 	return (rx_done);
1504 }
1505 #endif /* DEVICE_POLLING */
1506 
1507 
1508 /*********************************************************************
1509  *
1510  *  Fast Legacy/MSI Combined Interrupt Service routine
1511  *
1512  *********************************************************************/
1513 static int
1514 em_irq_fast(void *arg)
1515 {
1516 	struct adapter	*adapter = arg;
1517 	if_t ifp;
1518 	u32		reg_icr;
1519 
1520 	ifp = adapter->ifp;
1521 
1522 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1523 
1524 	/* Hot eject?  */
1525 	if (reg_icr == 0xffffffff)
1526 		return FILTER_STRAY;
1527 
1528 	/* Definitely not our interrupt.  */
1529 	if (reg_icr == 0x0)
1530 		return FILTER_STRAY;
1531 
1532 	/*
1533 	 * Starting with the 82571 chip, bit 31 should be used to
1534 	 * determine whether the interrupt belongs to us.
1535 	 */
1536 	if (adapter->hw.mac.type >= e1000_82571 &&
1537 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1538 		return FILTER_STRAY;
1539 
1540 	em_disable_intr(adapter);
1541 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1542 
1543 	/* Link status change */
1544 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1545 		adapter->hw.mac.get_link_status = 1;
1546 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1547 	}
1548 
1549 	if (reg_icr & E1000_ICR_RXO)
1550 		adapter->rx_overruns++;
1551 	return FILTER_HANDLED;
1552 }
1553 
1554 /* Combined RX/TX handler, used by Legacy and MSI */
1555 static void
1556 em_handle_que(void *context, int pending)
1557 {
1558 	struct adapter	*adapter = context;
1559 	if_t ifp = adapter->ifp;
1560 	struct tx_ring	*txr = adapter->tx_rings;
1561 	struct rx_ring	*rxr = adapter->rx_rings;
1562 
1563 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1564 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1565 
1566 		EM_TX_LOCK(txr);
1567 		em_txeof(txr);
1568 #ifdef EM_MULTIQUEUE
1569 		if (!drbr_empty(ifp, txr->br))
1570 			em_mq_start_locked(ifp, txr);
1571 #else
1572 		if (!if_sendq_empty(ifp))
1573 			em_start_locked(ifp, txr);
1574 #endif
1575 		EM_TX_UNLOCK(txr);
1576 		if (more) {
1577 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1578 			return;
1579 		}
1580 	}
1581 
1582 	em_enable_intr(adapter);
1583 	return;
1584 }
1585 
1586 
1587 /*********************************************************************
1588  *
1589  *  MSIX Interrupt Service Routines
1590  *
1591  **********************************************************************/
1592 static void
1593 em_msix_tx(void *arg)
1594 {
1595 	struct tx_ring *txr = arg;
1596 	struct adapter *adapter = txr->adapter;
1597 	if_t ifp = adapter->ifp;
1598 
1599 	++txr->tx_irq;
1600 	EM_TX_LOCK(txr);
1601 	em_txeof(txr);
1602 #ifdef EM_MULTIQUEUE
1603 	if (!drbr_empty(ifp, txr->br))
1604 		em_mq_start_locked(ifp, txr);
1605 #else
1606 	if (!if_sendq_empty(ifp))
1607 		em_start_locked(ifp, txr);
1608 #endif
1609 
1610 	/* Reenable this interrupt */
1611 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1612 	EM_TX_UNLOCK(txr);
1613 	return;
1614 }
1615 
1616 /*********************************************************************
1617  *
1618  *  MSIX RX Interrupt Service routine
1619  *
1620  **********************************************************************/
1621 
1622 static void
1623 em_msix_rx(void *arg)
1624 {
1625 	struct rx_ring	*rxr = arg;
1626 	struct adapter	*adapter = rxr->adapter;
1627 	bool		more;
1628 
1629 	++rxr->rx_irq;
1630 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1631 		return;
1632 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633 	if (more)
1634 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635 	else {
1636 		/* Reenable this interrupt */
1637 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638 	}
1639 	return;
1640 }
1641 
1642 /*********************************************************************
1643  *
1644  *  MSIX Link Fast Interrupt Service routine
1645  *
1646  **********************************************************************/
1647 static void
1648 em_msix_link(void *arg)
1649 {
1650 	struct adapter	*adapter = arg;
1651 	u32		reg_icr;
1652 
1653 	++adapter->link_irq;
1654 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1655 
1656 	if (reg_icr & E1000_ICR_RXO)
1657 		adapter->rx_overruns++;
1658 
1659 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1660 		adapter->hw.mac.get_link_status = 1;
1661 		em_handle_link(adapter, 0);
1662 	} else
1663 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1664 		    EM_MSIX_LINK | E1000_IMS_LSC);
1665 	/*
1666  	** Because we must read the ICR for this interrupt
1667  	** it may clear other causes using autoclear, for
1668  	** this reason we simply create a soft interrupt
1669  	** for all these vectors.
1670  	*/
1671 	if (reg_icr) {
1672 		E1000_WRITE_REG(&adapter->hw,
1673 			E1000_ICS, adapter->ims);
1674 	}
1675 	return;
1676 }
1677 
1678 static void
1679 em_handle_rx(void *context, int pending)
1680 {
1681 	struct rx_ring	*rxr = context;
1682 	struct adapter	*adapter = rxr->adapter;
1683         bool            more;
1684 
1685 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1686 	if (more)
1687 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1688 	else {
1689 		/* Reenable this interrupt */
1690 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1691 	}
1692 }
1693 
1694 static void
1695 em_handle_tx(void *context, int pending)
1696 {
1697 	struct tx_ring	*txr = context;
1698 	struct adapter	*adapter = txr->adapter;
1699 	if_t ifp = adapter->ifp;
1700 
1701 	EM_TX_LOCK(txr);
1702 	em_txeof(txr);
1703 #ifdef EM_MULTIQUEUE
1704 	if (!drbr_empty(ifp, txr->br))
1705 		em_mq_start_locked(ifp, txr);
1706 #else
1707 	if (!if_sendq_empty(ifp))
1708 		em_start_locked(ifp, txr);
1709 #endif
1710 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1711 	EM_TX_UNLOCK(txr);
1712 }
1713 
1714 static void
1715 em_handle_link(void *context, int pending)
1716 {
1717 	struct adapter	*adapter = context;
1718 	struct tx_ring	*txr = adapter->tx_rings;
1719 	if_t ifp = adapter->ifp;
1720 
1721 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1722 		return;
1723 
1724 	EM_CORE_LOCK(adapter);
1725 	callout_stop(&adapter->timer);
1726 	em_update_link_status(adapter);
1727 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1728 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1729 	    EM_MSIX_LINK | E1000_IMS_LSC);
1730 	if (adapter->link_active) {
1731 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1732 			EM_TX_LOCK(txr);
1733 #ifdef EM_MULTIQUEUE
1734 			if (!drbr_empty(ifp, txr->br))
1735 				em_mq_start_locked(ifp, txr);
1736 #else
1737 			if (if_sendq_empty(ifp))
1738 				em_start_locked(ifp, txr);
1739 #endif
1740 			EM_TX_UNLOCK(txr);
1741 		}
1742 	}
1743 	EM_CORE_UNLOCK(adapter);
1744 }
1745 
1746 
1747 /*********************************************************************
1748  *
1749  *  Media Ioctl callback
1750  *
1751  *  This routine is called whenever the user queries the status of
1752  *  the interface using ifconfig.
1753  *
1754  **********************************************************************/
1755 static void
1756 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1757 {
1758 	struct adapter *adapter = if_getsoftc(ifp);
1759 	u_char fiber_type = IFM_1000_SX;
1760 
1761 	INIT_DEBUGOUT("em_media_status: begin");
1762 
1763 	EM_CORE_LOCK(adapter);
1764 	em_update_link_status(adapter);
1765 
1766 	ifmr->ifm_status = IFM_AVALID;
1767 	ifmr->ifm_active = IFM_ETHER;
1768 
1769 	if (!adapter->link_active) {
1770 		EM_CORE_UNLOCK(adapter);
1771 		return;
1772 	}
1773 
1774 	ifmr->ifm_status |= IFM_ACTIVE;
1775 
1776 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1777 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1778 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1779 	} else {
1780 		switch (adapter->link_speed) {
1781 		case 10:
1782 			ifmr->ifm_active |= IFM_10_T;
1783 			break;
1784 		case 100:
1785 			ifmr->ifm_active |= IFM_100_TX;
1786 			break;
1787 		case 1000:
1788 			ifmr->ifm_active |= IFM_1000_T;
1789 			break;
1790 		}
1791 		if (adapter->link_duplex == FULL_DUPLEX)
1792 			ifmr->ifm_active |= IFM_FDX;
1793 		else
1794 			ifmr->ifm_active |= IFM_HDX;
1795 	}
1796 	EM_CORE_UNLOCK(adapter);
1797 }
1798 
1799 /*********************************************************************
1800  *
1801  *  Media Ioctl callback
1802  *
1803  *  This routine is called when the user changes speed/duplex using
1804  *  media/mediopt option with ifconfig.
1805  *
1806  **********************************************************************/
1807 static int
1808 em_media_change(if_t ifp)
1809 {
1810 	struct adapter *adapter = if_getsoftc(ifp);
1811 	struct ifmedia  *ifm = &adapter->media;
1812 
1813 	INIT_DEBUGOUT("em_media_change: begin");
1814 
1815 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1816 		return (EINVAL);
1817 
1818 	EM_CORE_LOCK(adapter);
1819 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1820 	case IFM_AUTO:
1821 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1822 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1823 		break;
1824 	case IFM_1000_LX:
1825 	case IFM_1000_SX:
1826 	case IFM_1000_T:
1827 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1828 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1829 		break;
1830 	case IFM_100_TX:
1831 		adapter->hw.mac.autoneg = FALSE;
1832 		adapter->hw.phy.autoneg_advertised = 0;
1833 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1834 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1835 		else
1836 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1837 		break;
1838 	case IFM_10_T:
1839 		adapter->hw.mac.autoneg = FALSE;
1840 		adapter->hw.phy.autoneg_advertised = 0;
1841 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1842 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1843 		else
1844 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1845 		break;
1846 	default:
1847 		device_printf(adapter->dev, "Unsupported media type\n");
1848 	}
1849 
1850 	em_init_locked(adapter);
1851 	EM_CORE_UNLOCK(adapter);
1852 
1853 	return (0);
1854 }
1855 
1856 /*********************************************************************
1857  *
1858  *  This routine maps the mbufs to tx descriptors.
1859  *
1860  *  return 0 on success, positive on failure
1861  **********************************************************************/
1862 
1863 static int
1864 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1865 {
1866 	struct adapter		*adapter = txr->adapter;
1867 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1868 	bus_dmamap_t		map;
1869 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1870 	struct e1000_tx_desc	*ctxd = NULL;
1871 	struct mbuf		*m_head;
1872 	struct ether_header	*eh;
1873 	struct ip		*ip = NULL;
1874 	struct tcphdr		*tp = NULL;
1875 	u32			txd_upper = 0, txd_lower = 0, txd_used = 0;
1876 	int			ip_off, poff;
1877 	int			nsegs, i, j, first, last = 0;
1878 	int			error, do_tso, tso_desc = 0, remap = 1;
1879 
1880 	m_head = *m_headp;
1881 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1882 	ip_off = poff = 0;
1883 
1884 	/*
1885 	 * Intel recommends entire IP/TCP header length reside in a single
1886 	 * buffer. If multiple descriptors are used to describe the IP and
1887 	 * TCP header, each descriptor should describe one or more
1888 	 * complete headers; descriptors referencing only parts of headers
1889 	 * are not supported. If all layer headers are not coalesced into
1890 	 * a single buffer, each buffer should not cross a 4KB boundary,
1891 	 * or be larger than the maximum read request size.
1892 	 * Controller also requires modifing IP/TCP header to make TSO work
1893 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1894 	 * IP/TCP header into a single buffer to meet the requirement of
1895 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1896 	 * which also has similiar restrictions.
1897 	 */
1898 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1899 		if (do_tso || (m_head->m_next != NULL &&
1900 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1901 			if (M_WRITABLE(*m_headp) == 0) {
1902 				m_head = m_dup(*m_headp, M_NOWAIT);
1903 				m_freem(*m_headp);
1904 				if (m_head == NULL) {
1905 					*m_headp = NULL;
1906 					return (ENOBUFS);
1907 				}
1908 				*m_headp = m_head;
1909 			}
1910 		}
1911 		/*
1912 		 * XXX
1913 		 * Assume IPv4, we don't have TSO/checksum offload support
1914 		 * for IPv6 yet.
1915 		 */
1916 		ip_off = sizeof(struct ether_header);
1917 		m_head = m_pullup(m_head, ip_off);
1918 		if (m_head == NULL) {
1919 			*m_headp = NULL;
1920 			return (ENOBUFS);
1921 		}
1922 		eh = mtod(m_head, struct ether_header *);
1923 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1924 			ip_off = sizeof(struct ether_vlan_header);
1925 			m_head = m_pullup(m_head, ip_off);
1926 			if (m_head == NULL) {
1927 				*m_headp = NULL;
1928 				return (ENOBUFS);
1929 			}
1930 		}
1931 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1932 		if (m_head == NULL) {
1933 			*m_headp = NULL;
1934 			return (ENOBUFS);
1935 		}
1936 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1937 		poff = ip_off + (ip->ip_hl << 2);
1938 		if (do_tso) {
1939 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1940 			if (m_head == NULL) {
1941 				*m_headp = NULL;
1942 				return (ENOBUFS);
1943 			}
1944 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1945 			/*
1946 			 * TSO workaround:
1947 			 *   pull 4 more bytes of data into it.
1948 			 */
1949 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1950 			if (m_head == NULL) {
1951 				*m_headp = NULL;
1952 				return (ENOBUFS);
1953 			}
1954 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1955 			ip->ip_len = 0;
1956 			ip->ip_sum = 0;
1957 			/*
1958 			 * The pseudo TCP checksum does not include TCP payload
1959 			 * length so driver should recompute the checksum here
1960 			 * what hardware expect to see. This is adherence of
1961 			 * Microsoft's Large Send specification.
1962 			 */
1963 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1964 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1965 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1966 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1967 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1968 			if (m_head == NULL) {
1969 				*m_headp = NULL;
1970 				return (ENOBUFS);
1971 			}
1972 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1973 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1974 			if (m_head == NULL) {
1975 				*m_headp = NULL;
1976 				return (ENOBUFS);
1977 			}
1978 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1979 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1980 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1981 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1982 			if (m_head == NULL) {
1983 				*m_headp = NULL;
1984 				return (ENOBUFS);
1985 			}
1986 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1987 		}
1988 		*m_headp = m_head;
1989 	}
1990 
1991 	/*
1992 	 * Map the packet for DMA
1993 	 *
1994 	 * Capture the first descriptor index,
1995 	 * this descriptor will have the index
1996 	 * of the EOP which is the only one that
1997 	 * now gets a DONE bit writeback.
1998 	 */
1999 	first = txr->next_avail_desc;
2000 	tx_buffer = &txr->tx_buffers[first];
2001 	tx_buffer_mapped = tx_buffer;
2002 	map = tx_buffer->map;
2003 
2004 retry:
2005 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2006 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2007 
2008 	/*
2009 	 * There are two types of errors we can (try) to handle:
2010 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2011 	 *   out of segments.  Defragment the mbuf chain and try again.
2012 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2013 	 *   at this point in time.  Defer sending and try again later.
2014 	 * All other errors, in particular EINVAL, are fatal and prevent the
2015 	 * mbuf chain from ever going through.  Drop it and report error.
2016 	 */
2017 	if (error == EFBIG && remap) {
2018 		struct mbuf *m;
2019 
2020 		m = m_defrag(*m_headp, M_NOWAIT);
2021 		if (m == NULL) {
2022 			adapter->mbuf_alloc_failed++;
2023 			m_freem(*m_headp);
2024 			*m_headp = NULL;
2025 			return (ENOBUFS);
2026 		}
2027 		*m_headp = m;
2028 
2029 		/* Try it again, but only once */
2030 		remap = 0;
2031 		goto retry;
2032 	} else if (error != 0) {
2033 		adapter->no_tx_dma_setup++;
2034 		m_freem(*m_headp);
2035 		*m_headp = NULL;
2036 		return (error);
2037 	}
2038 
2039 	/*
2040 	 * TSO Hardware workaround, if this packet is not
2041 	 * TSO, and is only a single descriptor long, and
2042 	 * it follows a TSO burst, then we need to add a
2043 	 * sentinel descriptor to prevent premature writeback.
2044 	 */
2045 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
2046 		if (nsegs == 1)
2047 			tso_desc = TRUE;
2048 		txr->tx_tso = FALSE;
2049 	}
2050 
2051         if (nsegs > (txr->tx_avail - 2)) {
2052                 txr->no_desc_avail++;
2053 		bus_dmamap_unload(txr->txtag, map);
2054 		return (ENOBUFS);
2055         }
2056 	m_head = *m_headp;
2057 
2058 	/* Do hardware assists */
2059 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2060 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2061 		    &txd_upper, &txd_lower);
2062 		/* we need to make a final sentinel transmit desc */
2063 		tso_desc = TRUE;
2064 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2065 		em_transmit_checksum_setup(txr, m_head,
2066 		    ip_off, ip, &txd_upper, &txd_lower);
2067 
2068 	if (m_head->m_flags & M_VLANTAG) {
2069 		/* Set the vlan id. */
2070 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2071                 /* Tell hardware to add tag */
2072                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2073         }
2074 
2075 	i = txr->next_avail_desc;
2076 
2077 	/* Set up our transmit descriptors */
2078 	for (j = 0; j < nsegs; j++) {
2079 		bus_size_t seg_len;
2080 		bus_addr_t seg_addr;
2081 
2082 		tx_buffer = &txr->tx_buffers[i];
2083 		ctxd = &txr->tx_base[i];
2084 		seg_addr = segs[j].ds_addr;
2085 		seg_len  = segs[j].ds_len;
2086 		/*
2087 		** TSO Workaround:
2088 		** If this is the last descriptor, we want to
2089 		** split it so we have a small final sentinel
2090 		*/
2091 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2092 			seg_len -= 4;
2093 			ctxd->buffer_addr = htole64(seg_addr);
2094 			ctxd->lower.data = htole32(
2095 			adapter->txd_cmd | txd_lower | seg_len);
2096 			ctxd->upper.data =
2097 			    htole32(txd_upper);
2098 			if (++i == adapter->num_tx_desc)
2099 				i = 0;
2100 			/* Now make the sentinel */
2101 			++txd_used; /* using an extra txd */
2102 			ctxd = &txr->tx_base[i];
2103 			tx_buffer = &txr->tx_buffers[i];
2104 			ctxd->buffer_addr =
2105 			    htole64(seg_addr + seg_len);
2106 			ctxd->lower.data = htole32(
2107 			adapter->txd_cmd | txd_lower | 4);
2108 			ctxd->upper.data =
2109 			    htole32(txd_upper);
2110 			last = i;
2111 			if (++i == adapter->num_tx_desc)
2112 				i = 0;
2113 		} else {
2114 			ctxd->buffer_addr = htole64(seg_addr);
2115 			ctxd->lower.data = htole32(
2116 			adapter->txd_cmd | txd_lower | seg_len);
2117 			ctxd->upper.data =
2118 			    htole32(txd_upper);
2119 			last = i;
2120 			if (++i == adapter->num_tx_desc)
2121 				i = 0;
2122 		}
2123 		tx_buffer->m_head = NULL;
2124 		tx_buffer->next_eop = -1;
2125 	}
2126 
2127 	txr->next_avail_desc = i;
2128 	txr->tx_avail -= nsegs;
2129 	if (tso_desc) /* TSO used an extra for sentinel */
2130 		txr->tx_avail -= txd_used;
2131 
2132         tx_buffer->m_head = m_head;
2133 	/*
2134 	** Here we swap the map so the last descriptor,
2135 	** which gets the completion interrupt has the
2136 	** real map, and the first descriptor gets the
2137 	** unused map from this descriptor.
2138 	*/
2139 	tx_buffer_mapped->map = tx_buffer->map;
2140 	tx_buffer->map = map;
2141         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2142 
2143         /*
2144          * Last Descriptor of Packet
2145 	 * needs End Of Packet (EOP)
2146 	 * and Report Status (RS)
2147          */
2148         ctxd->lower.data |=
2149 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2150 	/*
2151 	 * Keep track in the first buffer which
2152 	 * descriptor will be written back
2153 	 */
2154 	tx_buffer = &txr->tx_buffers[first];
2155 	tx_buffer->next_eop = last;
2156 
2157 	/*
2158 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2159 	 * that this frame is available to transmit.
2160 	 */
2161 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2162 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2163 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2164 
2165 	return (0);
2166 }
2167 
2168 static void
2169 em_set_promisc(struct adapter *adapter)
2170 {
2171 	if_t ifp = adapter->ifp;
2172 	u32		reg_rctl;
2173 
2174 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2175 
2176 	if (if_getflags(ifp) & IFF_PROMISC) {
2177 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2178 		/* Turn this on if you want to see bad packets */
2179 		if (em_debug_sbp)
2180 			reg_rctl |= E1000_RCTL_SBP;
2181 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2182 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2183 		reg_rctl |= E1000_RCTL_MPE;
2184 		reg_rctl &= ~E1000_RCTL_UPE;
2185 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2186 	}
2187 }
2188 
2189 static void
2190 em_disable_promisc(struct adapter *adapter)
2191 {
2192 	if_t		ifp = adapter->ifp;
2193 	u32		reg_rctl;
2194 	int		mcnt = 0;
2195 
2196 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2197 	reg_rctl &=  (~E1000_RCTL_UPE);
2198 	if (if_getflags(ifp) & IFF_ALLMULTI)
2199 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2200 	else
2201 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2202 	/* Don't disable if in MAX groups */
2203 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2204 		reg_rctl &=  (~E1000_RCTL_MPE);
2205 	reg_rctl &=  (~E1000_RCTL_SBP);
2206 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2207 }
2208 
2209 
2210 /*********************************************************************
2211  *  Multicast Update
2212  *
2213  *  This routine is called whenever multicast address list is updated.
2214  *
2215  **********************************************************************/
2216 
2217 static void
2218 em_set_multi(struct adapter *adapter)
2219 {
2220 	if_t ifp = adapter->ifp;
2221 	u32 reg_rctl = 0;
2222 	u8  *mta; /* Multicast array memory */
2223 	int mcnt = 0;
2224 
2225 	IOCTL_DEBUGOUT("em_set_multi: begin");
2226 
2227 	mta = adapter->mta;
2228 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2229 
2230 	if (adapter->hw.mac.type == e1000_82542 &&
2231 	    adapter->hw.revision_id == E1000_REVISION_2) {
2232 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2233 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2234 			e1000_pci_clear_mwi(&adapter->hw);
2235 		reg_rctl |= E1000_RCTL_RST;
2236 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237 		msec_delay(5);
2238 	}
2239 
2240 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2241 
2242 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2243 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2244 		reg_rctl |= E1000_RCTL_MPE;
2245 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2246 	} else
2247 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2248 
2249 	if (adapter->hw.mac.type == e1000_82542 &&
2250 	    adapter->hw.revision_id == E1000_REVISION_2) {
2251 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2252 		reg_rctl &= ~E1000_RCTL_RST;
2253 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2254 		msec_delay(5);
2255 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2256 			e1000_pci_set_mwi(&adapter->hw);
2257 	}
2258 }
2259 
2260 
2261 /*********************************************************************
2262  *  Timer routine
2263  *
2264  *  This routine checks for link status and updates statistics.
2265  *
2266  **********************************************************************/
2267 
2268 static void
2269 em_local_timer(void *arg)
2270 {
2271 	struct adapter	*adapter = arg;
2272 	if_t ifp = adapter->ifp;
2273 	struct tx_ring	*txr = adapter->tx_rings;
2274 	struct rx_ring	*rxr = adapter->rx_rings;
2275 	u32		trigger = 0;
2276 
2277 	EM_CORE_LOCK_ASSERT(adapter);
2278 
2279 	em_update_link_status(adapter);
2280 	em_update_stats_counters(adapter);
2281 
2282 	/* Reset LAA into RAR[0] on 82571 */
2283 	if ((adapter->hw.mac.type == e1000_82571) &&
2284 	    e1000_get_laa_state_82571(&adapter->hw))
2285 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2286 
2287 	/* Mask to use in the irq trigger */
2288 	if (adapter->msix_mem) {
2289 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2290 			trigger |= rxr->ims;
2291 		rxr = adapter->rx_rings;
2292 	} else
2293 		trigger = E1000_ICS_RXDMT0;
2294 
2295 	/*
2296 	** Check on the state of the TX queue(s), this
2297 	** can be done without the lock because its RO
2298 	** and the HUNG state will be static if set.
2299 	*/
2300 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2301 		if (txr->busy == EM_TX_HUNG)
2302 			goto hung;
2303 		if (txr->busy >= EM_TX_MAXTRIES)
2304 			txr->busy = EM_TX_HUNG;
2305 		/* Schedule a TX tasklet if needed */
2306 		if (txr->tx_avail <= EM_MAX_SCATTER)
2307 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2308 	}
2309 
2310 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2311 #ifndef DEVICE_POLLING
2312 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2313 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2314 #endif
2315 	return;
2316 hung:
2317 	/* Looks like we're hung */
2318 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2319 			txr->me);
2320 	em_print_debug_info(adapter);
2321 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2322 	adapter->watchdog_events++;
2323 	em_init_locked(adapter);
2324 }
2325 
2326 
2327 static void
2328 em_update_link_status(struct adapter *adapter)
2329 {
2330 	struct e1000_hw *hw = &adapter->hw;
2331 	if_t ifp = adapter->ifp;
2332 	device_t dev = adapter->dev;
2333 	struct tx_ring *txr = adapter->tx_rings;
2334 	u32 link_check = 0;
2335 
2336 	/* Get the cached link value or read phy for real */
2337 	switch (hw->phy.media_type) {
2338 	case e1000_media_type_copper:
2339 		if (hw->mac.get_link_status) {
2340 			/* Do the work to read phy */
2341 			e1000_check_for_link(hw);
2342 			link_check = !hw->mac.get_link_status;
2343 			if (link_check) /* ESB2 fix */
2344 				e1000_cfg_on_link_up(hw);
2345 		} else
2346 			link_check = TRUE;
2347 		break;
2348 	case e1000_media_type_fiber:
2349 		e1000_check_for_link(hw);
2350 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2351                                  E1000_STATUS_LU);
2352 		break;
2353 	case e1000_media_type_internal_serdes:
2354 		e1000_check_for_link(hw);
2355 		link_check = adapter->hw.mac.serdes_has_link;
2356 		break;
2357 	default:
2358 	case e1000_media_type_unknown:
2359 		break;
2360 	}
2361 
2362 	/* Now check for a transition */
2363 	if (link_check && (adapter->link_active == 0)) {
2364 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2365 		    &adapter->link_duplex);
2366 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2367 		if ((adapter->link_speed != SPEED_1000) &&
2368 		    ((hw->mac.type == e1000_82571) ||
2369 		    (hw->mac.type == e1000_82572))) {
2370 			int tarc0;
2371 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2372 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2373 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2374 		}
2375 		if (bootverbose)
2376 			device_printf(dev, "Link is up %d Mbps %s\n",
2377 			    adapter->link_speed,
2378 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2379 			    "Full Duplex" : "Half Duplex"));
2380 		adapter->link_active = 1;
2381 		adapter->smartspeed = 0;
2382 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2383 		if_link_state_change(ifp, LINK_STATE_UP);
2384 	} else if (!link_check && (adapter->link_active == 1)) {
2385 		if_setbaudrate(ifp, 0);
2386 		adapter->link_speed = 0;
2387 		adapter->link_duplex = 0;
2388 		if (bootverbose)
2389 			device_printf(dev, "Link is Down\n");
2390 		adapter->link_active = 0;
2391 		/* Link down, disable hang detection */
2392 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2393 			txr->busy = EM_TX_IDLE;
2394 		if_link_state_change(ifp, LINK_STATE_DOWN);
2395 	}
2396 }
2397 
2398 /*********************************************************************
2399  *
2400  *  This routine disables all traffic on the adapter by issuing a
2401  *  global reset on the MAC and deallocates TX/RX buffers.
2402  *
2403  *  This routine should always be called with BOTH the CORE
2404  *  and TX locks.
2405  **********************************************************************/
2406 
2407 static void
2408 em_stop(void *arg)
2409 {
2410 	struct adapter	*adapter = arg;
2411 	if_t ifp = adapter->ifp;
2412 	struct tx_ring	*txr = adapter->tx_rings;
2413 
2414 	EM_CORE_LOCK_ASSERT(adapter);
2415 
2416 	INIT_DEBUGOUT("em_stop: begin");
2417 
2418 	em_disable_intr(adapter);
2419 	callout_stop(&adapter->timer);
2420 
2421 	/* Tell the stack that the interface is no longer active */
2422 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2423 
2424         /* Disarm Hang Detection. */
2425 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2426 		EM_TX_LOCK(txr);
2427 		txr->busy = EM_TX_IDLE;
2428 		EM_TX_UNLOCK(txr);
2429 	}
2430 
2431 	e1000_reset_hw(&adapter->hw);
2432 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2433 
2434 	e1000_led_off(&adapter->hw);
2435 	e1000_cleanup_led(&adapter->hw);
2436 }
2437 
2438 
2439 /*********************************************************************
2440  *
2441  *  Determine hardware revision.
2442  *
2443  **********************************************************************/
2444 static void
2445 em_identify_hardware(struct adapter *adapter)
2446 {
2447 	device_t dev = adapter->dev;
2448 
2449 	/* Make sure our PCI config space has the necessary stuff set */
2450 	pci_enable_busmaster(dev);
2451 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2452 
2453 	/* Save off the information about this board */
2454 	adapter->hw.vendor_id = pci_get_vendor(dev);
2455 	adapter->hw.device_id = pci_get_device(dev);
2456 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2457 	adapter->hw.subsystem_vendor_id =
2458 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2459 	adapter->hw.subsystem_device_id =
2460 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2461 
2462 	/* Do Shared Code Init and Setup */
2463 	if (e1000_set_mac_type(&adapter->hw)) {
2464 		device_printf(dev, "Setup init failure\n");
2465 		return;
2466 	}
2467 }
2468 
2469 static int
2470 em_allocate_pci_resources(struct adapter *adapter)
2471 {
2472 	device_t	dev = adapter->dev;
2473 	int		rid;
2474 
2475 	rid = PCIR_BAR(0);
2476 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2477 	    &rid, RF_ACTIVE);
2478 	if (adapter->memory == NULL) {
2479 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2480 		return (ENXIO);
2481 	}
2482 	adapter->osdep.mem_bus_space_tag =
2483 	    rman_get_bustag(adapter->memory);
2484 	adapter->osdep.mem_bus_space_handle =
2485 	    rman_get_bushandle(adapter->memory);
2486 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2487 
2488 	adapter->hw.back = &adapter->osdep;
2489 
2490 	return (0);
2491 }
2492 
2493 /*********************************************************************
2494  *
2495  *  Setup the Legacy or MSI Interrupt handler
2496  *
2497  **********************************************************************/
2498 int
2499 em_allocate_legacy(struct adapter *adapter)
2500 {
2501 	device_t dev = adapter->dev;
2502 	struct tx_ring	*txr = adapter->tx_rings;
2503 	int error, rid = 0;
2504 
2505 	/* Manually turn off all interrupts */
2506 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2507 
2508 	if (adapter->msix == 1) /* using MSI */
2509 		rid = 1;
2510 	/* We allocate a single interrupt resource */
2511 	adapter->res = bus_alloc_resource_any(dev,
2512 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2513 	if (adapter->res == NULL) {
2514 		device_printf(dev, "Unable to allocate bus resource: "
2515 		    "interrupt\n");
2516 		return (ENXIO);
2517 	}
2518 
2519 	/*
2520 	 * Allocate a fast interrupt and the associated
2521 	 * deferred processing contexts.
2522 	 */
2523 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2524 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2525 	    taskqueue_thread_enqueue, &adapter->tq);
2526 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2527 	    device_get_nameunit(adapter->dev));
2528 	/* Use a TX only tasklet for local timer */
2529 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2530 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2531 	    taskqueue_thread_enqueue, &txr->tq);
2532 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2533 	    device_get_nameunit(adapter->dev));
2534 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2535 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2536 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2537 		device_printf(dev, "Failed to register fast interrupt "
2538 			    "handler: %d\n", error);
2539 		taskqueue_free(adapter->tq);
2540 		adapter->tq = NULL;
2541 		return (error);
2542 	}
2543 
2544 	return (0);
2545 }
2546 
2547 /*********************************************************************
2548  *
2549  *  Setup the MSIX Interrupt handlers
2550  *   This is not really Multiqueue, rather
2551  *   its just seperate interrupt vectors
2552  *   for TX, RX, and Link.
2553  *
2554  **********************************************************************/
2555 int
2556 em_allocate_msix(struct adapter *adapter)
2557 {
2558 	device_t	dev = adapter->dev;
2559 	struct		tx_ring *txr = adapter->tx_rings;
2560 	struct		rx_ring *rxr = adapter->rx_rings;
2561 	int		error, rid, vector = 0;
2562 	int		cpu_id = 0;
2563 
2564 
2565 	/* Make sure all interrupts are disabled */
2566 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2567 
2568 	/* First set up ring resources */
2569 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2570 
2571 		/* RX ring */
2572 		rid = vector + 1;
2573 
2574 		rxr->res = bus_alloc_resource_any(dev,
2575 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2576 		if (rxr->res == NULL) {
2577 			device_printf(dev,
2578 			    "Unable to allocate bus resource: "
2579 			    "RX MSIX Interrupt %d\n", i);
2580 			return (ENXIO);
2581 		}
2582 		if ((error = bus_setup_intr(dev, rxr->res,
2583 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2584 		    rxr, &rxr->tag)) != 0) {
2585 			device_printf(dev, "Failed to register RX handler");
2586 			return (error);
2587 		}
2588 #if __FreeBSD_version >= 800504
2589 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2590 #endif
2591 		rxr->msix = vector;
2592 
2593 		if (em_last_bind_cpu < 0)
2594 			em_last_bind_cpu = CPU_FIRST();
2595 		cpu_id = em_last_bind_cpu;
2596 		bus_bind_intr(dev, rxr->res, cpu_id);
2597 
2598 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2599 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2600 		    taskqueue_thread_enqueue, &rxr->tq);
2601 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2602 		    device_get_nameunit(adapter->dev), cpu_id);
2603 		/*
2604 		** Set the bit to enable interrupt
2605 		** in E1000_IMS -- bits 20 and 21
2606 		** are for RX0 and RX1, note this has
2607 		** NOTHING to do with the MSIX vector
2608 		*/
2609 		rxr->ims = 1 << (20 + i);
2610 		adapter->ims |= rxr->ims;
2611 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2612 
2613 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2614 	}
2615 
2616 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2617 		/* TX ring */
2618 		rid = vector + 1;
2619 		txr->res = bus_alloc_resource_any(dev,
2620 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2621 		if (txr->res == NULL) {
2622 			device_printf(dev,
2623 			    "Unable to allocate bus resource: "
2624 			    "TX MSIX Interrupt %d\n", i);
2625 			return (ENXIO);
2626 		}
2627 		if ((error = bus_setup_intr(dev, txr->res,
2628 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2629 		    txr, &txr->tag)) != 0) {
2630 			device_printf(dev, "Failed to register TX handler");
2631 			return (error);
2632 		}
2633 #if __FreeBSD_version >= 800504
2634 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2635 #endif
2636 		txr->msix = vector;
2637 
2638                 if (em_last_bind_cpu < 0)
2639                         em_last_bind_cpu = CPU_FIRST();
2640                 cpu_id = em_last_bind_cpu;
2641                 bus_bind_intr(dev, txr->res, cpu_id);
2642 
2643 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2644 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2645 		    taskqueue_thread_enqueue, &txr->tq);
2646 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2647 		    device_get_nameunit(adapter->dev), cpu_id);
2648 		/*
2649 		** Set the bit to enable interrupt
2650 		** in E1000_IMS -- bits 22 and 23
2651 		** are for TX0 and TX1, note this has
2652 		** NOTHING to do with the MSIX vector
2653 		*/
2654 		txr->ims = 1 << (22 + i);
2655 		adapter->ims |= txr->ims;
2656 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2657 
2658 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2659 	}
2660 
2661 	/* Link interrupt */
2662 	rid = vector + 1;
2663 	adapter->res = bus_alloc_resource_any(dev,
2664 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2665 	if (!adapter->res) {
2666 		device_printf(dev,"Unable to allocate "
2667 		    "bus resource: Link interrupt [%d]\n", rid);
2668 		return (ENXIO);
2669         }
2670 	/* Set the link handler function */
2671 	error = bus_setup_intr(dev, adapter->res,
2672 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2673 	    em_msix_link, adapter, &adapter->tag);
2674 	if (error) {
2675 		adapter->res = NULL;
2676 		device_printf(dev, "Failed to register LINK handler");
2677 		return (error);
2678 	}
2679 #if __FreeBSD_version >= 800504
2680 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2681 #endif
2682 	adapter->linkvec = vector;
2683 	adapter->ivars |=  (8 | vector) << 16;
2684 	adapter->ivars |= 0x80000000;
2685 
2686 	return (0);
2687 }
2688 
2689 
2690 static void
2691 em_free_pci_resources(struct adapter *adapter)
2692 {
2693 	device_t	dev = adapter->dev;
2694 	struct tx_ring	*txr;
2695 	struct rx_ring	*rxr;
2696 	int		rid;
2697 
2698 
2699 	/*
2700 	** Release all the queue interrupt resources:
2701 	*/
2702 	for (int i = 0; i < adapter->num_queues; i++) {
2703 		txr = &adapter->tx_rings[i];
2704 		/* an early abort? */
2705 		if (txr == NULL)
2706 			break;
2707 		rid = txr->msix +1;
2708 		if (txr->tag != NULL) {
2709 			bus_teardown_intr(dev, txr->res, txr->tag);
2710 			txr->tag = NULL;
2711 		}
2712 		if (txr->res != NULL)
2713 			bus_release_resource(dev, SYS_RES_IRQ,
2714 			    rid, txr->res);
2715 
2716 		rxr = &adapter->rx_rings[i];
2717 		/* an early abort? */
2718 		if (rxr == NULL)
2719 			break;
2720 		rid = rxr->msix +1;
2721 		if (rxr->tag != NULL) {
2722 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2723 			rxr->tag = NULL;
2724 		}
2725 		if (rxr->res != NULL)
2726 			bus_release_resource(dev, SYS_RES_IRQ,
2727 			    rid, rxr->res);
2728 	}
2729 
2730         if (adapter->linkvec) /* we are doing MSIX */
2731                 rid = adapter->linkvec + 1;
2732         else
2733                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2734 
2735 	if (adapter->tag != NULL) {
2736 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2737 		adapter->tag = NULL;
2738 	}
2739 
2740 	if (adapter->res != NULL)
2741 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2742 
2743 
2744 	if (adapter->msix)
2745 		pci_release_msi(dev);
2746 
2747 	if (adapter->msix_mem != NULL)
2748 		bus_release_resource(dev, SYS_RES_MEMORY,
2749 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2750 
2751 	if (adapter->memory != NULL)
2752 		bus_release_resource(dev, SYS_RES_MEMORY,
2753 		    PCIR_BAR(0), adapter->memory);
2754 
2755 	if (adapter->flash != NULL)
2756 		bus_release_resource(dev, SYS_RES_MEMORY,
2757 		    EM_FLASH, adapter->flash);
2758 }
2759 
2760 /*
2761  * Setup MSI or MSI/X
2762  */
2763 static int
2764 em_setup_msix(struct adapter *adapter)
2765 {
2766 	device_t dev = adapter->dev;
2767 	int val;
2768 
2769 	/* Nearly always going to use one queue */
2770 	adapter->num_queues = 1;
2771 
2772 	/*
2773 	** Try using MSI-X for Hartwell adapters
2774 	*/
2775 	if ((adapter->hw.mac.type == e1000_82574) &&
2776 	    (em_enable_msix == TRUE)) {
2777 #ifdef EM_MULTIQUEUE
2778 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2779 		if (adapter->num_queues > 1)
2780 			em_enable_vectors_82574(adapter);
2781 #endif
2782 		/* Map the MSIX BAR */
2783 		int rid = PCIR_BAR(EM_MSIX_BAR);
2784 		adapter->msix_mem = bus_alloc_resource_any(dev,
2785 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2786        		if (adapter->msix_mem == NULL) {
2787 			/* May not be enabled */
2788                		device_printf(adapter->dev,
2789 			    "Unable to map MSIX table \n");
2790 			goto msi;
2791        		}
2792 		val = pci_msix_count(dev);
2793 
2794 #ifdef EM_MULTIQUEUE
2795 		/* We need 5 vectors in the multiqueue case */
2796 		if (adapter->num_queues > 1 ) {
2797 			if (val >= 5)
2798 				val = 5;
2799 			else {
2800 				adapter->num_queues = 1;
2801 				device_printf(adapter->dev,
2802 				    "Insufficient MSIX vectors for >1 queue, "
2803 				    "using single queue...\n");
2804 				goto msix_one;
2805 			}
2806 		} else {
2807 msix_one:
2808 #endif
2809 			if (val >= 3)
2810 				val = 3;
2811 			else {
2812 				device_printf(adapter->dev,
2813 			    	"Insufficient MSIX vectors, using MSI\n");
2814 				goto msi;
2815 			}
2816 #ifdef EM_MULTIQUEUE
2817 		}
2818 #endif
2819 
2820 		if ((pci_alloc_msix(dev, &val) == 0)) {
2821 			device_printf(adapter->dev,
2822 			    "Using MSIX interrupts "
2823 			    "with %d vectors\n", val);
2824 			return (val);
2825 		}
2826 
2827 		/*
2828 		** If MSIX alloc failed or provided us with
2829 		** less than needed, free and fall through to MSI
2830 		*/
2831 		pci_release_msi(dev);
2832 	}
2833 msi:
2834 	if (adapter->msix_mem != NULL) {
2835 		bus_release_resource(dev, SYS_RES_MEMORY,
2836 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2837 		adapter->msix_mem = NULL;
2838 	}
2839        	val = 1;
2840        	if (pci_alloc_msi(dev, &val) == 0) {
2841                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2842 		return (val);
2843 	}
2844 	/* Should only happen due to manual configuration */
2845 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2846 	return (0);
2847 }
2848 
2849 
2850 /*********************************************************************
2851  *
2852  *  Initialize the hardware to a configuration
2853  *  as specified by the adapter structure.
2854  *
2855  **********************************************************************/
2856 static void
2857 em_reset(struct adapter *adapter)
2858 {
2859 	device_t	dev = adapter->dev;
2860 	if_t ifp = adapter->ifp;
2861 	struct e1000_hw	*hw = &adapter->hw;
2862 	u16		rx_buffer_size;
2863 	u32		pba;
2864 
2865 	INIT_DEBUGOUT("em_reset: begin");
2866 
2867 	/* Set up smart power down as default off on newer adapters. */
2868 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2869 	    hw->mac.type == e1000_82572)) {
2870 		u16 phy_tmp = 0;
2871 
2872 		/* Speed up time to link by disabling smart power down. */
2873 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2874 		phy_tmp &= ~IGP02E1000_PM_SPD;
2875 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2876 	}
2877 
2878 	/*
2879 	 * Packet Buffer Allocation (PBA)
2880 	 * Writing PBA sets the receive portion of the buffer
2881 	 * the remainder is used for the transmit buffer.
2882 	 */
2883 	switch (hw->mac.type) {
2884 	/* Total Packet Buffer on these is 48K */
2885 	case e1000_82571:
2886 	case e1000_82572:
2887 	case e1000_80003es2lan:
2888 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2889 		break;
2890 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2891 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2892 		break;
2893 	case e1000_82574:
2894 	case e1000_82583:
2895 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2896 		break;
2897 	case e1000_ich8lan:
2898 		pba = E1000_PBA_8K;
2899 		break;
2900 	case e1000_ich9lan:
2901 	case e1000_ich10lan:
2902 		/* Boost Receive side for jumbo frames */
2903 		if (adapter->hw.mac.max_frame_size > 4096)
2904 			pba = E1000_PBA_14K;
2905 		else
2906 			pba = E1000_PBA_10K;
2907 		break;
2908 	case e1000_pchlan:
2909 	case e1000_pch2lan:
2910 	case e1000_pch_lpt:
2911 		pba = E1000_PBA_26K;
2912 		break;
2913 	default:
2914 		if (adapter->hw.mac.max_frame_size > 8192)
2915 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2916 		else
2917 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2918 	}
2919 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2920 
2921 	/*
2922 	 * These parameters control the automatic generation (Tx) and
2923 	 * response (Rx) to Ethernet PAUSE frames.
2924 	 * - High water mark should allow for at least two frames to be
2925 	 *   received after sending an XOFF.
2926 	 * - Low water mark works best when it is very near the high water mark.
2927 	 *   This allows the receiver to restart by sending XON when it has
2928 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2929 	 *   restart after one full frame is pulled from the buffer. There
2930 	 *   could be several smaller frames in the buffer and if so they will
2931 	 *   not trigger the XON until their total number reduces the buffer
2932 	 *   by 1500.
2933 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2934 	 */
2935 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2936 	hw->fc.high_water = rx_buffer_size -
2937 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2938 	hw->fc.low_water = hw->fc.high_water - 1500;
2939 
2940 	if (adapter->fc) /* locally set flow control value? */
2941 		hw->fc.requested_mode = adapter->fc;
2942 	else
2943 		hw->fc.requested_mode = e1000_fc_full;
2944 
2945 	if (hw->mac.type == e1000_80003es2lan)
2946 		hw->fc.pause_time = 0xFFFF;
2947 	else
2948 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2949 
2950 	hw->fc.send_xon = TRUE;
2951 
2952 	/* Device specific overrides/settings */
2953 	switch (hw->mac.type) {
2954 	case e1000_pchlan:
2955 		/* Workaround: no TX flow ctrl for PCH */
2956                 hw->fc.requested_mode = e1000_fc_rx_pause;
2957 		hw->fc.pause_time = 0xFFFF; /* override */
2958 		if (if_getmtu(ifp) > ETHERMTU) {
2959 			hw->fc.high_water = 0x3500;
2960 			hw->fc.low_water = 0x1500;
2961 		} else {
2962 			hw->fc.high_water = 0x5000;
2963 			hw->fc.low_water = 0x3000;
2964 		}
2965 		hw->fc.refresh_time = 0x1000;
2966 		break;
2967 	case e1000_pch2lan:
2968 	case e1000_pch_lpt:
2969 		hw->fc.high_water = 0x5C20;
2970 		hw->fc.low_water = 0x5048;
2971 		hw->fc.pause_time = 0x0650;
2972 		hw->fc.refresh_time = 0x0400;
2973 		/* Jumbos need adjusted PBA */
2974 		if (if_getmtu(ifp) > ETHERMTU)
2975 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2976 		else
2977 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2978 		break;
2979         case e1000_ich9lan:
2980         case e1000_ich10lan:
2981 		if (if_getmtu(ifp) > ETHERMTU) {
2982 			hw->fc.high_water = 0x2800;
2983 			hw->fc.low_water = hw->fc.high_water - 8;
2984 			break;
2985 		}
2986 		/* else fall thru */
2987 	default:
2988 		if (hw->mac.type == e1000_80003es2lan)
2989 			hw->fc.pause_time = 0xFFFF;
2990 		break;
2991 	}
2992 
2993 	/* Issue a global reset */
2994 	e1000_reset_hw(hw);
2995 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2996 	em_disable_aspm(adapter);
2997 	/* and a re-init */
2998 	if (e1000_init_hw(hw) < 0) {
2999 		device_printf(dev, "Hardware Initialization Failed\n");
3000 		return;
3001 	}
3002 
3003 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3004 	e1000_get_phy_info(hw);
3005 	e1000_check_for_link(hw);
3006 	return;
3007 }
3008 
3009 /*********************************************************************
3010  *
3011  *  Setup networking device structure and register an interface.
3012  *
3013  **********************************************************************/
3014 static int
3015 em_setup_interface(device_t dev, struct adapter *adapter)
3016 {
3017 	if_t ifp;
3018 
3019 	INIT_DEBUGOUT("em_setup_interface: begin");
3020 
3021 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3022 	if (ifp == 0) {
3023 		device_printf(dev, "can not allocate ifnet structure\n");
3024 		return (-1);
3025 	}
3026 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3027 	if_setdev(ifp, dev);
3028 	if_setinitfn(ifp, em_init);
3029 	if_setsoftc(ifp, adapter);
3030 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3031 	if_setioctlfn(ifp, em_ioctl);
3032 	if_setgetcounterfn(ifp, em_get_counter);
3033 #ifdef EM_MULTIQUEUE
3034 	/* Multiqueue stack interface */
3035 	if_settransmitfn(ifp, em_mq_start);
3036 	if_setqflushfn(ifp, em_qflush);
3037 #else
3038 	if_setstartfn(ifp, em_start);
3039 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3040 	if_setsendqready(ifp);
3041 #endif
3042 
3043 	ether_ifattach(ifp, adapter->hw.mac.addr);
3044 
3045 	if_setcapabilities(ifp, 0);
3046 	if_setcapenable(ifp, 0);
3047 
3048 
3049 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3050 	    IFCAP_TSO4, 0);
3051 	/*
3052 	 * Tell the upper layer(s) we
3053 	 * support full VLAN capability
3054 	 */
3055 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3056 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3057 	    IFCAP_VLAN_MTU, 0);
3058 	if_setcapenable(ifp, if_getcapabilities(ifp));
3059 
3060 	/*
3061 	** Don't turn this on by default, if vlans are
3062 	** created on another pseudo device (eg. lagg)
3063 	** then vlan events are not passed thru, breaking
3064 	** operation, but with HW FILTER off it works. If
3065 	** using vlans directly on the em driver you can
3066 	** enable this and get full hardware tag filtering.
3067 	*/
3068 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3069 
3070 #ifdef DEVICE_POLLING
3071 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3072 #endif
3073 
3074 	/* Enable only WOL MAGIC by default */
3075 	if (adapter->wol) {
3076 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3077 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3078 	}
3079 
3080 	/*
3081 	 * Specify the media types supported by this adapter and register
3082 	 * callbacks to update media and link information
3083 	 */
3084 	ifmedia_init(&adapter->media, IFM_IMASK,
3085 	    em_media_change, em_media_status);
3086 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3087 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3088 		u_char fiber_type = IFM_1000_SX;	/* default type */
3089 
3090 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3091 			    0, NULL);
3092 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3093 	} else {
3094 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3095 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3096 			    0, NULL);
3097 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3098 			    0, NULL);
3099 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3100 			    0, NULL);
3101 		if (adapter->hw.phy.type != e1000_phy_ife) {
3102 			ifmedia_add(&adapter->media,
3103 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3104 			ifmedia_add(&adapter->media,
3105 				IFM_ETHER | IFM_1000_T, 0, NULL);
3106 		}
3107 	}
3108 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3109 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3110 	return (0);
3111 }
3112 
3113 
3114 /*
3115  * Manage DMA'able memory.
3116  */
3117 static void
3118 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3119 {
3120 	if (error)
3121 		return;
3122 	*(bus_addr_t *) arg = segs[0].ds_addr;
3123 }
3124 
3125 static int
3126 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3127         struct em_dma_alloc *dma, int mapflags)
3128 {
3129 	int error;
3130 
3131 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3132 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3133 				BUS_SPACE_MAXADDR,	/* lowaddr */
3134 				BUS_SPACE_MAXADDR,	/* highaddr */
3135 				NULL, NULL,		/* filter, filterarg */
3136 				size,			/* maxsize */
3137 				1,			/* nsegments */
3138 				size,			/* maxsegsize */
3139 				0,			/* flags */
3140 				NULL,			/* lockfunc */
3141 				NULL,			/* lockarg */
3142 				&dma->dma_tag);
3143 	if (error) {
3144 		device_printf(adapter->dev,
3145 		    "%s: bus_dma_tag_create failed: %d\n",
3146 		    __func__, error);
3147 		goto fail_0;
3148 	}
3149 
3150 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3151 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3152 	if (error) {
3153 		device_printf(adapter->dev,
3154 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3155 		    __func__, (uintmax_t)size, error);
3156 		goto fail_2;
3157 	}
3158 
3159 	dma->dma_paddr = 0;
3160 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3161 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3162 	if (error || dma->dma_paddr == 0) {
3163 		device_printf(adapter->dev,
3164 		    "%s: bus_dmamap_load failed: %d\n",
3165 		    __func__, error);
3166 		goto fail_3;
3167 	}
3168 
3169 	return (0);
3170 
3171 fail_3:
3172 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3173 fail_2:
3174 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3175 	bus_dma_tag_destroy(dma->dma_tag);
3176 fail_0:
3177 	dma->dma_tag = NULL;
3178 
3179 	return (error);
3180 }
3181 
3182 static void
3183 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3184 {
3185 	if (dma->dma_tag == NULL)
3186 		return;
3187 	if (dma->dma_paddr != 0) {
3188 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3189 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3190 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3191 		dma->dma_paddr = 0;
3192 	}
3193 	if (dma->dma_vaddr != NULL) {
3194 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3195 		dma->dma_vaddr = NULL;
3196 	}
3197 	bus_dma_tag_destroy(dma->dma_tag);
3198 	dma->dma_tag = NULL;
3199 }
3200 
3201 
3202 /*********************************************************************
3203  *
3204  *  Allocate memory for the transmit and receive rings, and then
3205  *  the descriptors associated with each, called only once at attach.
3206  *
3207  **********************************************************************/
3208 static int
3209 em_allocate_queues(struct adapter *adapter)
3210 {
3211 	device_t		dev = adapter->dev;
3212 	struct tx_ring		*txr = NULL;
3213 	struct rx_ring		*rxr = NULL;
3214 	int rsize, tsize, error = E1000_SUCCESS;
3215 	int txconf = 0, rxconf = 0;
3216 
3217 
3218 	/* Allocate the TX ring struct memory */
3219 	if (!(adapter->tx_rings =
3220 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3221 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3222 		device_printf(dev, "Unable to allocate TX ring memory\n");
3223 		error = ENOMEM;
3224 		goto fail;
3225 	}
3226 
3227 	/* Now allocate the RX */
3228 	if (!(adapter->rx_rings =
3229 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3230 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3231 		device_printf(dev, "Unable to allocate RX ring memory\n");
3232 		error = ENOMEM;
3233 		goto rx_fail;
3234 	}
3235 
3236 	tsize = roundup2(adapter->num_tx_desc *
3237 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3238 	/*
3239 	 * Now set up the TX queues, txconf is needed to handle the
3240 	 * possibility that things fail midcourse and we need to
3241 	 * undo memory gracefully
3242 	 */
3243 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3244 		/* Set up some basics */
3245 		txr = &adapter->tx_rings[i];
3246 		txr->adapter = adapter;
3247 		txr->me = i;
3248 
3249 		/* Initialize the TX lock */
3250 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3251 		    device_get_nameunit(dev), txr->me);
3252 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3253 
3254 		if (em_dma_malloc(adapter, tsize,
3255 			&txr->txdma, BUS_DMA_NOWAIT)) {
3256 			device_printf(dev,
3257 			    "Unable to allocate TX Descriptor memory\n");
3258 			error = ENOMEM;
3259 			goto err_tx_desc;
3260 		}
3261 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3262 		bzero((void *)txr->tx_base, tsize);
3263 
3264         	if (em_allocate_transmit_buffers(txr)) {
3265 			device_printf(dev,
3266 			    "Critical Failure setting up transmit buffers\n");
3267 			error = ENOMEM;
3268 			goto err_tx_desc;
3269         	}
3270 #if __FreeBSD_version >= 800000
3271 		/* Allocate a buf ring */
3272 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3273 		    M_WAITOK, &txr->tx_mtx);
3274 #endif
3275 	}
3276 
3277 	/*
3278 	 * Next the RX queues...
3279 	 */
3280 	rsize = roundup2(adapter->num_rx_desc *
3281 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3282 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3283 		rxr = &adapter->rx_rings[i];
3284 		rxr->adapter = adapter;
3285 		rxr->me = i;
3286 
3287 		/* Initialize the RX lock */
3288 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3289 		    device_get_nameunit(dev), txr->me);
3290 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3291 
3292 		if (em_dma_malloc(adapter, rsize,
3293 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3294 			device_printf(dev,
3295 			    "Unable to allocate RxDescriptor memory\n");
3296 			error = ENOMEM;
3297 			goto err_rx_desc;
3298 		}
3299 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3300 		bzero((void *)rxr->rx_base, rsize);
3301 
3302         	/* Allocate receive buffers for the ring*/
3303 		if (em_allocate_receive_buffers(rxr)) {
3304 			device_printf(dev,
3305 			    "Critical Failure setting up receive buffers\n");
3306 			error = ENOMEM;
3307 			goto err_rx_desc;
3308 		}
3309 	}
3310 
3311 	return (0);
3312 
3313 err_rx_desc:
3314 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3315 		em_dma_free(adapter, &rxr->rxdma);
3316 err_tx_desc:
3317 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3318 		em_dma_free(adapter, &txr->txdma);
3319 	free(adapter->rx_rings, M_DEVBUF);
3320 rx_fail:
3321 #if __FreeBSD_version >= 800000
3322 	buf_ring_free(txr->br, M_DEVBUF);
3323 #endif
3324 	free(adapter->tx_rings, M_DEVBUF);
3325 fail:
3326 	return (error);
3327 }
3328 
3329 
3330 /*********************************************************************
3331  *
3332  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3333  *  the information needed to transmit a packet on the wire. This is
3334  *  called only once at attach, setup is done every reset.
3335  *
3336  **********************************************************************/
3337 static int
3338 em_allocate_transmit_buffers(struct tx_ring *txr)
3339 {
3340 	struct adapter *adapter = txr->adapter;
3341 	device_t dev = adapter->dev;
3342 	struct em_buffer *txbuf;
3343 	int error, i;
3344 
3345 	/*
3346 	 * Setup DMA descriptor areas.
3347 	 */
3348 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3349 			       1, 0,			/* alignment, bounds */
3350 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3351 			       BUS_SPACE_MAXADDR,	/* highaddr */
3352 			       NULL, NULL,		/* filter, filterarg */
3353 			       EM_TSO_SIZE,		/* maxsize */
3354 			       EM_MAX_SCATTER,		/* nsegments */
3355 			       PAGE_SIZE,		/* maxsegsize */
3356 			       0,			/* flags */
3357 			       NULL,			/* lockfunc */
3358 			       NULL,			/* lockfuncarg */
3359 			       &txr->txtag))) {
3360 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3361 		goto fail;
3362 	}
3363 
3364 	if (!(txr->tx_buffers =
3365 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3366 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3367 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3368 		error = ENOMEM;
3369 		goto fail;
3370 	}
3371 
3372         /* Create the descriptor buffer dma maps */
3373 	txbuf = txr->tx_buffers;
3374 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3375 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3376 		if (error != 0) {
3377 			device_printf(dev, "Unable to create TX DMA map\n");
3378 			goto fail;
3379 		}
3380 	}
3381 
3382 	return 0;
3383 fail:
3384 	/* We free all, it handles case where we are in the middle */
3385 	em_free_transmit_structures(adapter);
3386 	return (error);
3387 }
3388 
3389 /*********************************************************************
3390  *
3391  *  Initialize a transmit ring.
3392  *
3393  **********************************************************************/
3394 static void
3395 em_setup_transmit_ring(struct tx_ring *txr)
3396 {
3397 	struct adapter *adapter = txr->adapter;
3398 	struct em_buffer *txbuf;
3399 	int i;
3400 #ifdef DEV_NETMAP
3401 	struct netmap_slot *slot;
3402 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3403 #endif /* DEV_NETMAP */
3404 
3405 	/* Clear the old descriptor contents */
3406 	EM_TX_LOCK(txr);
3407 #ifdef DEV_NETMAP
3408 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3409 #endif /* DEV_NETMAP */
3410 
3411 	bzero((void *)txr->tx_base,
3412 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3413 	/* Reset indices */
3414 	txr->next_avail_desc = 0;
3415 	txr->next_to_clean = 0;
3416 
3417 	/* Free any existing tx buffers. */
3418         txbuf = txr->tx_buffers;
3419 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3420 		if (txbuf->m_head != NULL) {
3421 			bus_dmamap_sync(txr->txtag, txbuf->map,
3422 			    BUS_DMASYNC_POSTWRITE);
3423 			bus_dmamap_unload(txr->txtag, txbuf->map);
3424 			m_freem(txbuf->m_head);
3425 			txbuf->m_head = NULL;
3426 		}
3427 #ifdef DEV_NETMAP
3428 		if (slot) {
3429 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3430 			uint64_t paddr;
3431 			void *addr;
3432 
3433 			addr = PNMB(na, slot + si, &paddr);
3434 			txr->tx_base[i].buffer_addr = htole64(paddr);
3435 			/* reload the map for netmap mode */
3436 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3437 		}
3438 #endif /* DEV_NETMAP */
3439 
3440 		/* clear the watch index */
3441 		txbuf->next_eop = -1;
3442         }
3443 
3444 	/* Set number of descriptors available */
3445 	txr->tx_avail = adapter->num_tx_desc;
3446 	txr->busy = EM_TX_IDLE;
3447 
3448 	/* Clear checksum offload context. */
3449 	txr->last_hw_offload = 0;
3450 	txr->last_hw_ipcss = 0;
3451 	txr->last_hw_ipcso = 0;
3452 	txr->last_hw_tucss = 0;
3453 	txr->last_hw_tucso = 0;
3454 
3455 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3456 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3457 	EM_TX_UNLOCK(txr);
3458 }
3459 
3460 /*********************************************************************
3461  *
3462  *  Initialize all transmit rings.
3463  *
3464  **********************************************************************/
3465 static void
3466 em_setup_transmit_structures(struct adapter *adapter)
3467 {
3468 	struct tx_ring *txr = adapter->tx_rings;
3469 
3470 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3471 		em_setup_transmit_ring(txr);
3472 
3473 	return;
3474 }
3475 
3476 /*********************************************************************
3477  *
3478  *  Enable transmit unit.
3479  *
3480  **********************************************************************/
3481 static void
3482 em_initialize_transmit_unit(struct adapter *adapter)
3483 {
3484 	struct tx_ring	*txr = adapter->tx_rings;
3485 	struct e1000_hw	*hw = &adapter->hw;
3486 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3487 
3488 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3489 
3490 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3491 		u64 bus_addr = txr->txdma.dma_paddr;
3492 		/* Base and Len of TX Ring */
3493 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3494 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3495 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3496 	    	    (u32)(bus_addr >> 32));
3497 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3498 	    	    (u32)bus_addr);
3499 		/* Init the HEAD/TAIL indices */
3500 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3501 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3502 
3503 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3504 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3505 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3506 
3507 		txr->busy = EM_TX_IDLE;
3508 		txdctl = 0; /* clear txdctl */
3509                 txdctl |= 0x1f; /* PTHRESH */
3510                 txdctl |= 1 << 8; /* HTHRESH */
3511                 txdctl |= 1 << 16;/* WTHRESH */
3512 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3513 		txdctl |= E1000_TXDCTL_GRAN;
3514                 txdctl |= 1 << 25; /* LWTHRESH */
3515 
3516                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3517 	}
3518 
3519 	/* Set the default values for the Tx Inter Packet Gap timer */
3520 	switch (adapter->hw.mac.type) {
3521 	case e1000_80003es2lan:
3522 		tipg = DEFAULT_82543_TIPG_IPGR1;
3523 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3524 		    E1000_TIPG_IPGR2_SHIFT;
3525 		break;
3526 	default:
3527 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3528 		    (adapter->hw.phy.media_type ==
3529 		    e1000_media_type_internal_serdes))
3530 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3531 		else
3532 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3533 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3534 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3535 	}
3536 
3537 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3538 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3539 
3540 	if(adapter->hw.mac.type >= e1000_82540)
3541 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3542 		    adapter->tx_abs_int_delay.value);
3543 
3544 	if ((adapter->hw.mac.type == e1000_82571) ||
3545 	    (adapter->hw.mac.type == e1000_82572)) {
3546 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3547 		tarc |= TARC_SPEED_MODE_BIT;
3548 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3549 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3550 		/* errata: program both queues to unweighted RR */
3551 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3552 		tarc |= 1;
3553 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3554 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3555 		tarc |= 1;
3556 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3557 	} else if (adapter->hw.mac.type == e1000_82574) {
3558 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3559 		tarc |= TARC_ERRATA_BIT;
3560 		if ( adapter->num_queues > 1) {
3561 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3562 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3563 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3564 		} else
3565 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3566 	}
3567 
3568 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3569 	if (adapter->tx_int_delay.value > 0)
3570 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3571 
3572 	/* Program the Transmit Control Register */
3573 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3574 	tctl &= ~E1000_TCTL_CT;
3575 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3576 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3577 
3578 	if (adapter->hw.mac.type >= e1000_82571)
3579 		tctl |= E1000_TCTL_MULR;
3580 
3581 	/* This write will effectively turn on the transmit unit. */
3582 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3583 
3584 }
3585 
3586 
3587 /*********************************************************************
3588  *
3589  *  Free all transmit rings.
3590  *
3591  **********************************************************************/
3592 static void
3593 em_free_transmit_structures(struct adapter *adapter)
3594 {
3595 	struct tx_ring *txr = adapter->tx_rings;
3596 
3597 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3598 		EM_TX_LOCK(txr);
3599 		em_free_transmit_buffers(txr);
3600 		em_dma_free(adapter, &txr->txdma);
3601 		EM_TX_UNLOCK(txr);
3602 		EM_TX_LOCK_DESTROY(txr);
3603 	}
3604 
3605 	free(adapter->tx_rings, M_DEVBUF);
3606 }
3607 
3608 /*********************************************************************
3609  *
3610  *  Free transmit ring related data structures.
3611  *
3612  **********************************************************************/
3613 static void
3614 em_free_transmit_buffers(struct tx_ring *txr)
3615 {
3616 	struct adapter		*adapter = txr->adapter;
3617 	struct em_buffer	*txbuf;
3618 
3619 	INIT_DEBUGOUT("free_transmit_ring: begin");
3620 
3621 	if (txr->tx_buffers == NULL)
3622 		return;
3623 
3624 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3625 		txbuf = &txr->tx_buffers[i];
3626 		if (txbuf->m_head != NULL) {
3627 			bus_dmamap_sync(txr->txtag, txbuf->map,
3628 			    BUS_DMASYNC_POSTWRITE);
3629 			bus_dmamap_unload(txr->txtag,
3630 			    txbuf->map);
3631 			m_freem(txbuf->m_head);
3632 			txbuf->m_head = NULL;
3633 			if (txbuf->map != NULL) {
3634 				bus_dmamap_destroy(txr->txtag,
3635 				    txbuf->map);
3636 				txbuf->map = NULL;
3637 			}
3638 		} else if (txbuf->map != NULL) {
3639 			bus_dmamap_unload(txr->txtag,
3640 			    txbuf->map);
3641 			bus_dmamap_destroy(txr->txtag,
3642 			    txbuf->map);
3643 			txbuf->map = NULL;
3644 		}
3645 	}
3646 #if __FreeBSD_version >= 800000
3647 	if (txr->br != NULL)
3648 		buf_ring_free(txr->br, M_DEVBUF);
3649 #endif
3650 	if (txr->tx_buffers != NULL) {
3651 		free(txr->tx_buffers, M_DEVBUF);
3652 		txr->tx_buffers = NULL;
3653 	}
3654 	if (txr->txtag != NULL) {
3655 		bus_dma_tag_destroy(txr->txtag);
3656 		txr->txtag = NULL;
3657 	}
3658 	return;
3659 }
3660 
3661 
3662 /*********************************************************************
3663  *  The offload context is protocol specific (TCP/UDP) and thus
3664  *  only needs to be set when the protocol changes. The occasion
3665  *  of a context change can be a performance detriment, and
3666  *  might be better just disabled. The reason arises in the way
3667  *  in which the controller supports pipelined requests from the
3668  *  Tx data DMA. Up to four requests can be pipelined, and they may
3669  *  belong to the same packet or to multiple packets. However all
3670  *  requests for one packet are issued before a request is issued
3671  *  for a subsequent packet and if a request for the next packet
3672  *  requires a context change, that request will be stalled
3673  *  until the previous request completes. This means setting up
3674  *  a new context effectively disables pipelined Tx data DMA which
3675  *  in turn greatly slow down performance to send small sized
3676  *  frames.
3677  **********************************************************************/
3678 static void
3679 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3680     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3681 {
3682 	struct adapter			*adapter = txr->adapter;
3683 	struct e1000_context_desc	*TXD = NULL;
3684 	struct em_buffer		*tx_buffer;
3685 	int				cur, hdr_len;
3686 	u32				cmd = 0;
3687 	u16				offload = 0;
3688 	u8				ipcso, ipcss, tucso, tucss;
3689 
3690 	ipcss = ipcso = tucss = tucso = 0;
3691 	hdr_len = ip_off + (ip->ip_hl << 2);
3692 	cur = txr->next_avail_desc;
3693 
3694 	/* Setup of IP header checksum. */
3695 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3696 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3697 		offload |= CSUM_IP;
3698 		ipcss = ip_off;
3699 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3700 		/*
3701 		 * Start offset for header checksum calculation.
3702 		 * End offset for header checksum calculation.
3703 		 * Offset of place to put the checksum.
3704 		 */
3705 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3706 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3707 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3708 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3709 		cmd |= E1000_TXD_CMD_IP;
3710 	}
3711 
3712 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3713  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3714  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3715  		offload |= CSUM_TCP;
3716  		tucss = hdr_len;
3717  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3718  		/*
3719  		 * Setting up new checksum offload context for every frames
3720  		 * takes a lot of processing time for hardware. This also
3721  		 * reduces performance a lot for small sized frames so avoid
3722  		 * it if driver can use previously configured checksum
3723  		 * offload context.
3724  		 */
3725  		if (txr->last_hw_offload == offload) {
3726  			if (offload & CSUM_IP) {
3727  				if (txr->last_hw_ipcss == ipcss &&
3728  				    txr->last_hw_ipcso == ipcso &&
3729  				    txr->last_hw_tucss == tucss &&
3730  				    txr->last_hw_tucso == tucso)
3731  					return;
3732  			} else {
3733  				if (txr->last_hw_tucss == tucss &&
3734  				    txr->last_hw_tucso == tucso)
3735  					return;
3736  			}
3737   		}
3738  		txr->last_hw_offload = offload;
3739  		txr->last_hw_tucss = tucss;
3740  		txr->last_hw_tucso = tucso;
3741  		/*
3742  		 * Start offset for payload checksum calculation.
3743  		 * End offset for payload checksum calculation.
3744  		 * Offset of place to put the checksum.
3745  		 */
3746 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3747  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3748  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3749  		TXD->upper_setup.tcp_fields.tucso = tucso;
3750  		cmd |= E1000_TXD_CMD_TCP;
3751  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3752  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3753  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3754  		tucss = hdr_len;
3755  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3756  		/*
3757  		 * Setting up new checksum offload context for every frames
3758  		 * takes a lot of processing time for hardware. This also
3759  		 * reduces performance a lot for small sized frames so avoid
3760  		 * it if driver can use previously configured checksum
3761  		 * offload context.
3762  		 */
3763  		if (txr->last_hw_offload == offload) {
3764  			if (offload & CSUM_IP) {
3765  				if (txr->last_hw_ipcss == ipcss &&
3766  				    txr->last_hw_ipcso == ipcso &&
3767  				    txr->last_hw_tucss == tucss &&
3768  				    txr->last_hw_tucso == tucso)
3769  					return;
3770  			} else {
3771  				if (txr->last_hw_tucss == tucss &&
3772  				    txr->last_hw_tucso == tucso)
3773  					return;
3774  			}
3775  		}
3776  		txr->last_hw_offload = offload;
3777  		txr->last_hw_tucss = tucss;
3778  		txr->last_hw_tucso = tucso;
3779  		/*
3780  		 * Start offset for header checksum calculation.
3781  		 * End offset for header checksum calculation.
3782  		 * Offset of place to put the checksum.
3783  		 */
3784 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3785  		TXD->upper_setup.tcp_fields.tucss = tucss;
3786  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3787  		TXD->upper_setup.tcp_fields.tucso = tucso;
3788   	}
3789 
3790  	if (offload & CSUM_IP) {
3791  		txr->last_hw_ipcss = ipcss;
3792  		txr->last_hw_ipcso = ipcso;
3793   	}
3794 
3795 	TXD->tcp_seg_setup.data = htole32(0);
3796 	TXD->cmd_and_length =
3797 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3798 	tx_buffer = &txr->tx_buffers[cur];
3799 	tx_buffer->m_head = NULL;
3800 	tx_buffer->next_eop = -1;
3801 
3802 	if (++cur == adapter->num_tx_desc)
3803 		cur = 0;
3804 
3805 	txr->tx_avail--;
3806 	txr->next_avail_desc = cur;
3807 }
3808 
3809 
3810 /**********************************************************************
3811  *
3812  *  Setup work for hardware segmentation offload (TSO)
3813  *
3814  **********************************************************************/
3815 static void
3816 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3817     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3818 {
3819 	struct adapter			*adapter = txr->adapter;
3820 	struct e1000_context_desc	*TXD;
3821 	struct em_buffer		*tx_buffer;
3822 	int cur, hdr_len;
3823 
3824 	/*
3825 	 * In theory we can use the same TSO context if and only if
3826 	 * frame is the same type(IP/TCP) and the same MSS. However
3827 	 * checking whether a frame has the same IP/TCP structure is
3828 	 * hard thing so just ignore that and always restablish a
3829 	 * new TSO context.
3830 	 */
3831 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3832 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3833 		      E1000_TXD_DTYP_D |	/* Data descr type */
3834 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3835 
3836 	/* IP and/or TCP header checksum calculation and insertion. */
3837 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3838 
3839 	cur = txr->next_avail_desc;
3840 	tx_buffer = &txr->tx_buffers[cur];
3841 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3842 
3843 	/*
3844 	 * Start offset for header checksum calculation.
3845 	 * End offset for header checksum calculation.
3846 	 * Offset of place put the checksum.
3847 	 */
3848 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3849 	TXD->lower_setup.ip_fields.ipcse =
3850 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3851 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3852 	/*
3853 	 * Start offset for payload checksum calculation.
3854 	 * End offset for payload checksum calculation.
3855 	 * Offset of place to put the checksum.
3856 	 */
3857 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3858 	TXD->upper_setup.tcp_fields.tucse = 0;
3859 	TXD->upper_setup.tcp_fields.tucso =
3860 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3861 	/*
3862 	 * Payload size per packet w/o any headers.
3863 	 * Length of all headers up to payload.
3864 	 */
3865 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3866 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3867 
3868 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3869 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3870 				E1000_TXD_CMD_TSE |	/* TSE context */
3871 				E1000_TXD_CMD_IP |	/* Do IP csum */
3872 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3873 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3874 
3875 	tx_buffer->m_head = NULL;
3876 	tx_buffer->next_eop = -1;
3877 
3878 	if (++cur == adapter->num_tx_desc)
3879 		cur = 0;
3880 
3881 	txr->tx_avail--;
3882 	txr->next_avail_desc = cur;
3883 	txr->tx_tso = TRUE;
3884 }
3885 
3886 
3887 /**********************************************************************
3888  *
3889  *  Examine each tx_buffer in the used queue. If the hardware is done
3890  *  processing the packet then free associated resources. The
3891  *  tx_buffer is put back on the free queue.
3892  *
3893  **********************************************************************/
3894 static void
3895 em_txeof(struct tx_ring *txr)
3896 {
3897 	struct adapter	*adapter = txr->adapter;
3898         int first, last, done, processed;
3899         struct em_buffer *tx_buffer;
3900         struct e1000_tx_desc   *tx_desc, *eop_desc;
3901 	if_t ifp = adapter->ifp;
3902 
3903 	EM_TX_LOCK_ASSERT(txr);
3904 #ifdef DEV_NETMAP
3905 	if (netmap_tx_irq(ifp, txr->me))
3906 		return;
3907 #endif /* DEV_NETMAP */
3908 
3909 	/* No work, make sure hang detection is disabled */
3910         if (txr->tx_avail == adapter->num_tx_desc) {
3911 		txr->busy = EM_TX_IDLE;
3912                 return;
3913 	}
3914 
3915 	processed = 0;
3916         first = txr->next_to_clean;
3917         tx_desc = &txr->tx_base[first];
3918         tx_buffer = &txr->tx_buffers[first];
3919 	last = tx_buffer->next_eop;
3920         eop_desc = &txr->tx_base[last];
3921 
3922 	/*
3923 	 * What this does is get the index of the
3924 	 * first descriptor AFTER the EOP of the
3925 	 * first packet, that way we can do the
3926 	 * simple comparison on the inner while loop.
3927 	 */
3928 	if (++last == adapter->num_tx_desc)
3929  		last = 0;
3930 	done = last;
3931 
3932         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3933             BUS_DMASYNC_POSTREAD);
3934 
3935         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3936 		/* We clean the range of the packet */
3937 		while (first != done) {
3938                 	tx_desc->upper.data = 0;
3939                 	tx_desc->lower.data = 0;
3940                 	tx_desc->buffer_addr = 0;
3941                 	++txr->tx_avail;
3942 			++processed;
3943 
3944 			if (tx_buffer->m_head) {
3945 				bus_dmamap_sync(txr->txtag,
3946 				    tx_buffer->map,
3947 				    BUS_DMASYNC_POSTWRITE);
3948 				bus_dmamap_unload(txr->txtag,
3949 				    tx_buffer->map);
3950                         	m_freem(tx_buffer->m_head);
3951                         	tx_buffer->m_head = NULL;
3952                 	}
3953 			tx_buffer->next_eop = -1;
3954 
3955 	                if (++first == adapter->num_tx_desc)
3956 				first = 0;
3957 
3958 	                tx_buffer = &txr->tx_buffers[first];
3959 			tx_desc = &txr->tx_base[first];
3960 		}
3961 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
3962 		/* See if we can continue to the next packet */
3963 		last = tx_buffer->next_eop;
3964 		if (last != -1) {
3965         		eop_desc = &txr->tx_base[last];
3966 			/* Get new done point */
3967 			if (++last == adapter->num_tx_desc) last = 0;
3968 			done = last;
3969 		} else
3970 			break;
3971         }
3972         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3973             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3974 
3975         txr->next_to_clean = first;
3976 
3977 	/*
3978 	** Hang detection: we know there's work outstanding
3979 	** or the entry return would have been taken, so no
3980 	** descriptor processed here indicates a potential hang.
3981 	** The local timer will examine this and do a reset if needed.
3982 	*/
3983 	if (processed == 0) {
3984 		if (txr->busy != EM_TX_HUNG)
3985 			++txr->busy;
3986 	} else /* At least one descriptor was cleaned */
3987 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
3988 
3989         /*
3990          * If we have a minimum free, clear IFF_DRV_OACTIVE
3991          * to tell the stack that it is OK to send packets.
3992 	 * Notice that all writes of OACTIVE happen under the
3993 	 * TX lock which, with a single queue, guarantees
3994 	 * sanity.
3995          */
3996         if (txr->tx_avail >= EM_MAX_SCATTER) {
3997 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
3998 	}
3999 
4000 	/* Disable hang detection if all clean */
4001 	if (txr->tx_avail == adapter->num_tx_desc)
4002 		txr->busy = EM_TX_IDLE;
4003 }
4004 
4005 
4006 /*********************************************************************
4007  *
4008  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4009  *
4010  **********************************************************************/
4011 static void
4012 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4013 {
4014 	struct adapter		*adapter = rxr->adapter;
4015 	struct mbuf		*m;
4016 	bus_dma_segment_t	segs[1];
4017 	struct em_buffer	*rxbuf;
4018 	int			i, j, error, nsegs;
4019 	bool			cleaned = FALSE;
4020 
4021 	i = j = rxr->next_to_refresh;
4022 	/*
4023 	** Get one descriptor beyond
4024 	** our work mark to control
4025 	** the loop.
4026 	*/
4027 	if (++j == adapter->num_rx_desc)
4028 		j = 0;
4029 
4030 	while (j != limit) {
4031 		rxbuf = &rxr->rx_buffers[i];
4032 		if (rxbuf->m_head == NULL) {
4033 			m = m_getjcl(M_NOWAIT, MT_DATA,
4034 			    M_PKTHDR, adapter->rx_mbuf_sz);
4035 			/*
4036 			** If we have a temporary resource shortage
4037 			** that causes a failure, just abort refresh
4038 			** for now, we will return to this point when
4039 			** reinvoked from em_rxeof.
4040 			*/
4041 			if (m == NULL)
4042 				goto update;
4043 		} else
4044 			m = rxbuf->m_head;
4045 
4046 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4047 		m->m_flags |= M_PKTHDR;
4048 		m->m_data = m->m_ext.ext_buf;
4049 
4050 		/* Use bus_dma machinery to setup the memory mapping  */
4051 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4052 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
4053 		if (error != 0) {
4054 			printf("Refresh mbufs: hdr dmamap load"
4055 			    " failure - %d\n", error);
4056 			m_free(m);
4057 			rxbuf->m_head = NULL;
4058 			goto update;
4059 		}
4060 		rxbuf->m_head = m;
4061 		bus_dmamap_sync(rxr->rxtag,
4062 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4063 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
4064 		cleaned = TRUE;
4065 
4066 		i = j; /* Next is precalulated for us */
4067 		rxr->next_to_refresh = i;
4068 		/* Calculate next controlling index */
4069 		if (++j == adapter->num_rx_desc)
4070 			j = 0;
4071 	}
4072 update:
4073 	/*
4074 	** Update the tail pointer only if,
4075 	** and as far as we have refreshed.
4076 	*/
4077 	if (cleaned)
4078 		E1000_WRITE_REG(&adapter->hw,
4079 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4080 
4081 	return;
4082 }
4083 
4084 
4085 /*********************************************************************
4086  *
4087  *  Allocate memory for rx_buffer structures. Since we use one
4088  *  rx_buffer per received packet, the maximum number of rx_buffer's
4089  *  that we'll need is equal to the number of receive descriptors
4090  *  that we've allocated.
4091  *
4092  **********************************************************************/
4093 static int
4094 em_allocate_receive_buffers(struct rx_ring *rxr)
4095 {
4096 	struct adapter		*adapter = rxr->adapter;
4097 	device_t		dev = adapter->dev;
4098 	struct em_buffer	*rxbuf;
4099 	int			error;
4100 
4101 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4102 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4103 	if (rxr->rx_buffers == NULL) {
4104 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4105 		return (ENOMEM);
4106 	}
4107 
4108 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4109 				1, 0,			/* alignment, bounds */
4110 				BUS_SPACE_MAXADDR,	/* lowaddr */
4111 				BUS_SPACE_MAXADDR,	/* highaddr */
4112 				NULL, NULL,		/* filter, filterarg */
4113 				MJUM9BYTES,		/* maxsize */
4114 				1,			/* nsegments */
4115 				MJUM9BYTES,		/* maxsegsize */
4116 				0,			/* flags */
4117 				NULL,			/* lockfunc */
4118 				NULL,			/* lockarg */
4119 				&rxr->rxtag);
4120 	if (error) {
4121 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4122 		    __func__, error);
4123 		goto fail;
4124 	}
4125 
4126 	rxbuf = rxr->rx_buffers;
4127 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4128 		rxbuf = &rxr->rx_buffers[i];
4129 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4130 		if (error) {
4131 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4132 			    __func__, error);
4133 			goto fail;
4134 		}
4135 	}
4136 
4137 	return (0);
4138 
4139 fail:
4140 	em_free_receive_structures(adapter);
4141 	return (error);
4142 }
4143 
4144 
4145 /*********************************************************************
4146  *
4147  *  Initialize a receive ring and its buffers.
4148  *
4149  **********************************************************************/
4150 static int
4151 em_setup_receive_ring(struct rx_ring *rxr)
4152 {
4153 	struct	adapter 	*adapter = rxr->adapter;
4154 	struct em_buffer	*rxbuf;
4155 	bus_dma_segment_t	seg[1];
4156 	int			rsize, nsegs, error = 0;
4157 #ifdef DEV_NETMAP
4158 	struct netmap_slot *slot;
4159 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4160 #endif
4161 
4162 
4163 	/* Clear the ring contents */
4164 	EM_RX_LOCK(rxr);
4165 	rsize = roundup2(adapter->num_rx_desc *
4166 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4167 	bzero((void *)rxr->rx_base, rsize);
4168 #ifdef DEV_NETMAP
4169 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4170 #endif
4171 
4172 	/*
4173 	** Free current RX buffer structs and their mbufs
4174 	*/
4175 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4176 		rxbuf = &rxr->rx_buffers[i];
4177 		if (rxbuf->m_head != NULL) {
4178 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4179 			    BUS_DMASYNC_POSTREAD);
4180 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4181 			m_freem(rxbuf->m_head);
4182 			rxbuf->m_head = NULL; /* mark as freed */
4183 		}
4184 	}
4185 
4186 	/* Now replenish the mbufs */
4187         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4188 		rxbuf = &rxr->rx_buffers[j];
4189 #ifdef DEV_NETMAP
4190 		if (slot) {
4191 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4192 			uint64_t paddr;
4193 			void *addr;
4194 
4195 			addr = PNMB(na, slot + si, &paddr);
4196 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4197 			/* Update descriptor */
4198 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4199 			continue;
4200 		}
4201 #endif /* DEV_NETMAP */
4202 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4203 		    M_PKTHDR, adapter->rx_mbuf_sz);
4204 		if (rxbuf->m_head == NULL) {
4205 			error = ENOBUFS;
4206 			goto fail;
4207 		}
4208 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4209 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4210 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4211 
4212 		/* Get the memory mapping */
4213 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4214 		    rxbuf->map, rxbuf->m_head, seg,
4215 		    &nsegs, BUS_DMA_NOWAIT);
4216 		if (error != 0) {
4217 			m_freem(rxbuf->m_head);
4218 			rxbuf->m_head = NULL;
4219 			goto fail;
4220 		}
4221 		bus_dmamap_sync(rxr->rxtag,
4222 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4223 
4224 		/* Update descriptor */
4225 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4226 	}
4227 	rxr->next_to_check = 0;
4228 	rxr->next_to_refresh = 0;
4229 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4230 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4231 
4232 fail:
4233 	EM_RX_UNLOCK(rxr);
4234 	return (error);
4235 }
4236 
4237 /*********************************************************************
4238  *
4239  *  Initialize all receive rings.
4240  *
4241  **********************************************************************/
4242 static int
4243 em_setup_receive_structures(struct adapter *adapter)
4244 {
4245 	struct rx_ring *rxr = adapter->rx_rings;
4246 	int q;
4247 
4248 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4249 		if (em_setup_receive_ring(rxr))
4250 			goto fail;
4251 
4252 	return (0);
4253 fail:
4254 	/*
4255 	 * Free RX buffers allocated so far, we will only handle
4256 	 * the rings that completed, the failing case will have
4257 	 * cleaned up for itself. 'q' failed, so its the terminus.
4258 	 */
4259 	for (int i = 0; i < q; ++i) {
4260 		rxr = &adapter->rx_rings[i];
4261 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4262 			struct em_buffer *rxbuf;
4263 			rxbuf = &rxr->rx_buffers[n];
4264 			if (rxbuf->m_head != NULL) {
4265 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4266 			  	  BUS_DMASYNC_POSTREAD);
4267 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4268 				m_freem(rxbuf->m_head);
4269 				rxbuf->m_head = NULL;
4270 			}
4271 		}
4272 		rxr->next_to_check = 0;
4273 		rxr->next_to_refresh = 0;
4274 	}
4275 
4276 	return (ENOBUFS);
4277 }
4278 
4279 /*********************************************************************
4280  *
4281  *  Free all receive rings.
4282  *
4283  **********************************************************************/
4284 static void
4285 em_free_receive_structures(struct adapter *adapter)
4286 {
4287 	struct rx_ring *rxr = adapter->rx_rings;
4288 
4289 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4290 		em_free_receive_buffers(rxr);
4291 		/* Free the ring memory as well */
4292 		em_dma_free(adapter, &rxr->rxdma);
4293 		EM_RX_LOCK_DESTROY(rxr);
4294 	}
4295 
4296 	free(adapter->rx_rings, M_DEVBUF);
4297 }
4298 
4299 
4300 /*********************************************************************
4301  *
4302  *  Free receive ring data structures
4303  *
4304  **********************************************************************/
4305 static void
4306 em_free_receive_buffers(struct rx_ring *rxr)
4307 {
4308 	struct adapter		*adapter = rxr->adapter;
4309 	struct em_buffer	*rxbuf = NULL;
4310 
4311 	INIT_DEBUGOUT("free_receive_buffers: begin");
4312 
4313 	if (rxr->rx_buffers != NULL) {
4314 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4315 			rxbuf = &rxr->rx_buffers[i];
4316 			if (rxbuf->map != NULL) {
4317 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4318 				    BUS_DMASYNC_POSTREAD);
4319 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4320 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4321 			}
4322 			if (rxbuf->m_head != NULL) {
4323 				m_freem(rxbuf->m_head);
4324 				rxbuf->m_head = NULL;
4325 			}
4326 		}
4327 		free(rxr->rx_buffers, M_DEVBUF);
4328 		rxr->rx_buffers = NULL;
4329 		rxr->next_to_check = 0;
4330 		rxr->next_to_refresh = 0;
4331 	}
4332 
4333 	if (rxr->rxtag != NULL) {
4334 		bus_dma_tag_destroy(rxr->rxtag);
4335 		rxr->rxtag = NULL;
4336 	}
4337 
4338 	return;
4339 }
4340 
4341 
4342 /*********************************************************************
4343  *
4344  *  Enable receive unit.
4345  *
4346  **********************************************************************/
4347 
4348 static void
4349 em_initialize_receive_unit(struct adapter *adapter)
4350 {
4351 	struct rx_ring	*rxr = adapter->rx_rings;
4352 	if_t ifp = adapter->ifp;
4353 	struct e1000_hw	*hw = &adapter->hw;
4354 	u64	bus_addr;
4355 	u32	rctl, rxcsum;
4356 
4357 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4358 
4359 	/*
4360 	 * Make sure receives are disabled while setting
4361 	 * up the descriptor ring
4362 	 */
4363 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4364 	/* Do not disable if ever enabled on this hardware */
4365 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4366 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4367 
4368 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4369 	    adapter->rx_abs_int_delay.value);
4370 
4371 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4372 	    adapter->rx_int_delay.value);
4373 	/*
4374 	 * Set the interrupt throttling rate. Value is calculated
4375 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4376 	 */
4377 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4378 
4379 	/*
4380 	** When using MSIX interrupts we need to throttle
4381 	** using the EITR register (82574 only)
4382 	*/
4383 	if (hw->mac.type == e1000_82574) {
4384 		u32 rfctl;
4385 		for (int i = 0; i < 4; i++)
4386 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4387 			    DEFAULT_ITR);
4388 		/* Disable accelerated acknowledge */
4389 		rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4390 		rfctl |= E1000_RFCTL_ACK_DIS;
4391 		E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4392 	}
4393 
4394 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4395 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4396 #ifdef EM_MULTIQUEUE
4397 		rxcsum |= E1000_RXCSUM_TUOFL |
4398 			  E1000_RXCSUM_IPOFL |
4399 			  E1000_RXCSUM_PCSD;
4400 #else
4401 		rxcsum |= E1000_RXCSUM_TUOFL;
4402 #endif
4403 	} else
4404 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4405 
4406 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4407 
4408 #ifdef EM_MULTIQUEUE
4409 	if (adapter->num_queues > 1) {
4410 		uint32_t rss_key[10];
4411 		uint32_t reta;
4412 		int i;
4413 
4414 		/*
4415 		* Configure RSS key
4416 		*/
4417 		arc4rand(rss_key, sizeof(rss_key), 0);
4418 		for (i = 0; i < 10; ++i)
4419 			E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]);
4420 
4421 		/*
4422 		* Configure RSS redirect table in following fashion:
4423 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4424 		*/
4425 		reta = 0;
4426 		for (i = 0; i < 4; ++i) {
4427 			uint32_t q;
4428 			q = (i % adapter->num_queues) << 7;
4429 			reta |= q << (8 * i);
4430 		}
4431 		for (i = 0; i < 32; ++i)
4432 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4433 
4434 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4435 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4436 				E1000_MRQC_RSS_FIELD_IPV4 |
4437 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4438 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4439 				E1000_MRQC_RSS_FIELD_IPV6 |
4440 				E1000_MRQC_RSS_FIELD_IPV6_TCP);
4441 	}
4442 #endif
4443 	/*
4444 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4445 	** long latencies are observed, like Lenovo X60. This
4446 	** change eliminates the problem, but since having positive
4447 	** values in RDTR is a known source of problems on other
4448 	** platforms another solution is being sought.
4449 	*/
4450 	if (hw->mac.type == e1000_82573)
4451 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4452 
4453 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4454 		/* Setup the Base and Length of the Rx Descriptor Ring */
4455 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4456 
4457 		bus_addr = rxr->rxdma.dma_paddr;
4458 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4459 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4460 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4461 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4462 		/* Setup the Head and Tail Descriptor Pointers */
4463 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4464 #ifdef DEV_NETMAP
4465 		/*
4466 		 * an init() while a netmap client is active must
4467 		 * preserve the rx buffers passed to userspace.
4468 		 */
4469 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4470 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4471 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4472 		}
4473 #endif /* DEV_NETMAP */
4474 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4475 	}
4476 
4477 	/*
4478 	 * Set PTHRESH for improved jumbo performance
4479 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4480 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4481 	 * Only write to RXDCTL(1) if there is a need for different
4482 	 * settings.
4483 	 */
4484 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4485 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4486 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4487 	    (if_getmtu(ifp) > ETHERMTU)) {
4488 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4489 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4490 	} else if ((adapter->hw.mac.type == e1000_82574) &&
4491 		  (if_getmtu(ifp) > ETHERMTU)) {
4492 		for (int i = 0; i < adapter->num_queues; i++) {
4493 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4494 
4495                 	rxdctl |= 0x20; /* PTHRESH */
4496                 	rxdctl |= 4 << 8; /* HTHRESH */
4497                 	rxdctl |= 4 << 16;/* WTHRESH */
4498 			rxdctl |= 1 << 24; /* Switch to granularity */
4499 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4500 		}
4501 	}
4502 
4503 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4504 		if (if_getmtu(ifp) > ETHERMTU)
4505 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4506 		else
4507 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4508 	}
4509 
4510 	/* Setup the Receive Control Register */
4511 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4512 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4513 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4514 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4515 
4516         /* Strip the CRC */
4517         rctl |= E1000_RCTL_SECRC;
4518 
4519         /* Make sure VLAN Filters are off */
4520         rctl &= ~E1000_RCTL_VFE;
4521 	rctl &= ~E1000_RCTL_SBP;
4522 
4523 	if (adapter->rx_mbuf_sz == MCLBYTES)
4524 		rctl |= E1000_RCTL_SZ_2048;
4525 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4526 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4527 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4528 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4529 
4530 	if (if_getmtu(ifp) > ETHERMTU)
4531 		rctl |= E1000_RCTL_LPE;
4532 	else
4533 		rctl &= ~E1000_RCTL_LPE;
4534 
4535 	/* Write out the settings */
4536 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4537 
4538 	return;
4539 }
4540 
4541 
4542 /*********************************************************************
4543  *
4544  *  This routine executes in interrupt context. It replenishes
4545  *  the mbufs in the descriptor and sends data which has been
4546  *  dma'ed into host memory to upper layer.
4547  *
4548  *  We loop at most count times if count is > 0, or until done if
4549  *  count < 0.
4550  *
4551  *  For polling we also now return the number of cleaned packets
4552  *********************************************************************/
4553 static bool
4554 em_rxeof(struct rx_ring *rxr, int count, int *done)
4555 {
4556 	struct adapter		*adapter = rxr->adapter;
4557 	if_t ifp = adapter->ifp;
4558 	struct mbuf		*mp, *sendmp;
4559 	u8			status = 0;
4560 	u16 			len;
4561 	int			i, processed, rxdone = 0;
4562 	bool			eop;
4563 	struct e1000_rx_desc	*cur;
4564 
4565 	EM_RX_LOCK(rxr);
4566 
4567 	/* Sync the ring */
4568 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4569 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4570 
4571 
4572 #ifdef DEV_NETMAP
4573 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4574 		EM_RX_UNLOCK(rxr);
4575 		return (FALSE);
4576 	}
4577 #endif /* DEV_NETMAP */
4578 
4579 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4580 
4581 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4582 			break;
4583 
4584 		cur = &rxr->rx_base[i];
4585 		status = cur->status;
4586 		mp = sendmp = NULL;
4587 
4588 		if ((status & E1000_RXD_STAT_DD) == 0)
4589 			break;
4590 
4591 		len = le16toh(cur->length);
4592 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4593 
4594 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4595 		    (rxr->discard == TRUE)) {
4596 			adapter->dropped_pkts++;
4597 			++rxr->rx_discarded;
4598 			if (!eop) /* Catch subsequent segs */
4599 				rxr->discard = TRUE;
4600 			else
4601 				rxr->discard = FALSE;
4602 			em_rx_discard(rxr, i);
4603 			goto next_desc;
4604 		}
4605 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4606 
4607 		/* Assign correct length to the current fragment */
4608 		mp = rxr->rx_buffers[i].m_head;
4609 		mp->m_len = len;
4610 
4611 		/* Trigger for refresh */
4612 		rxr->rx_buffers[i].m_head = NULL;
4613 
4614 		/* First segment? */
4615 		if (rxr->fmp == NULL) {
4616 			mp->m_pkthdr.len = len;
4617 			rxr->fmp = rxr->lmp = mp;
4618 		} else {
4619 			/* Chain mbuf's together */
4620 			mp->m_flags &= ~M_PKTHDR;
4621 			rxr->lmp->m_next = mp;
4622 			rxr->lmp = mp;
4623 			rxr->fmp->m_pkthdr.len += len;
4624 		}
4625 
4626 		if (eop) {
4627 			--count;
4628 			sendmp = rxr->fmp;
4629 			if_setrcvif(sendmp, ifp);
4630 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4631 			em_receive_checksum(cur, sendmp);
4632 #ifndef __NO_STRICT_ALIGNMENT
4633 			if (adapter->hw.mac.max_frame_size >
4634 			    (MCLBYTES - ETHER_ALIGN) &&
4635 			    em_fixup_rx(rxr) != 0)
4636 				goto skip;
4637 #endif
4638 			if (status & E1000_RXD_STAT_VP) {
4639 				if_setvtag(sendmp,
4640 				    le16toh(cur->special));
4641 				sendmp->m_flags |= M_VLANTAG;
4642 			}
4643 #ifndef __NO_STRICT_ALIGNMENT
4644 skip:
4645 #endif
4646 			rxr->fmp = rxr->lmp = NULL;
4647 		}
4648 next_desc:
4649 		/* Sync the ring */
4650 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4651 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4652 
4653 		/* Zero out the receive descriptors status. */
4654 		cur->status = 0;
4655 		++rxdone;	/* cumulative for POLL */
4656 		++processed;
4657 
4658 		/* Advance our pointers to the next descriptor. */
4659 		if (++i == adapter->num_rx_desc)
4660 			i = 0;
4661 
4662 		/* Send to the stack */
4663 		if (sendmp != NULL) {
4664 			rxr->next_to_check = i;
4665 			EM_RX_UNLOCK(rxr);
4666 			if_input(ifp, sendmp);
4667 			EM_RX_LOCK(rxr);
4668 			i = rxr->next_to_check;
4669 		}
4670 
4671 		/* Only refresh mbufs every 8 descriptors */
4672 		if (processed == 8) {
4673 			em_refresh_mbufs(rxr, i);
4674 			processed = 0;
4675 		}
4676 	}
4677 
4678 	/* Catch any remaining refresh work */
4679 	if (e1000_rx_unrefreshed(rxr))
4680 		em_refresh_mbufs(rxr, i);
4681 
4682 	rxr->next_to_check = i;
4683 	if (done != NULL)
4684 		*done = rxdone;
4685 	EM_RX_UNLOCK(rxr);
4686 
4687 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4688 }
4689 
4690 static __inline void
4691 em_rx_discard(struct rx_ring *rxr, int i)
4692 {
4693 	struct em_buffer	*rbuf;
4694 
4695 	rbuf = &rxr->rx_buffers[i];
4696 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4697 
4698 	/* Free any previous pieces */
4699 	if (rxr->fmp != NULL) {
4700 		rxr->fmp->m_flags |= M_PKTHDR;
4701 		m_freem(rxr->fmp);
4702 		rxr->fmp = NULL;
4703 		rxr->lmp = NULL;
4704 	}
4705 	/*
4706 	** Free buffer and allow em_refresh_mbufs()
4707 	** to clean up and recharge buffer.
4708 	*/
4709 	if (rbuf->m_head) {
4710 		m_free(rbuf->m_head);
4711 		rbuf->m_head = NULL;
4712 	}
4713 	return;
4714 }
4715 
4716 #ifndef __NO_STRICT_ALIGNMENT
4717 /*
4718  * When jumbo frames are enabled we should realign entire payload on
4719  * architecures with strict alignment. This is serious design mistake of 8254x
4720  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4721  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4722  * payload. On architecures without strict alignment restrictions 8254x still
4723  * performs unaligned memory access which would reduce the performance too.
4724  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4725  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4726  * existing mbuf chain.
4727  *
4728  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4729  * not used at all on architectures with strict alignment.
4730  */
4731 static int
4732 em_fixup_rx(struct rx_ring *rxr)
4733 {
4734 	struct adapter *adapter = rxr->adapter;
4735 	struct mbuf *m, *n;
4736 	int error;
4737 
4738 	error = 0;
4739 	m = rxr->fmp;
4740 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4741 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4742 		m->m_data += ETHER_HDR_LEN;
4743 	} else {
4744 		MGETHDR(n, M_NOWAIT, MT_DATA);
4745 		if (n != NULL) {
4746 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4747 			m->m_data += ETHER_HDR_LEN;
4748 			m->m_len -= ETHER_HDR_LEN;
4749 			n->m_len = ETHER_HDR_LEN;
4750 			M_MOVE_PKTHDR(n, m);
4751 			n->m_next = m;
4752 			rxr->fmp = n;
4753 		} else {
4754 			adapter->dropped_pkts++;
4755 			m_freem(rxr->fmp);
4756 			rxr->fmp = NULL;
4757 			error = ENOMEM;
4758 		}
4759 	}
4760 
4761 	return (error);
4762 }
4763 #endif
4764 
4765 /*********************************************************************
4766  *
4767  *  Verify that the hardware indicated that the checksum is valid.
4768  *  Inform the stack about the status of checksum so that stack
4769  *  doesn't spend time verifying the checksum.
4770  *
4771  *********************************************************************/
4772 static void
4773 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4774 {
4775 	mp->m_pkthdr.csum_flags = 0;
4776 
4777 	/* Ignore Checksum bit is set */
4778 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4779 		return;
4780 
4781 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4782 		return;
4783 
4784 	/* IP Checksum Good? */
4785 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4786 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4787 
4788 	/* TCP or UDP checksum */
4789 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4790 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4791 		mp->m_pkthdr.csum_data = htons(0xffff);
4792 	}
4793 }
4794 
4795 /*
4796  * This routine is run via an vlan
4797  * config EVENT
4798  */
4799 static void
4800 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4801 {
4802 	struct adapter	*adapter = if_getsoftc(ifp);
4803 	u32		index, bit;
4804 
4805 	if ((void*)adapter !=  arg)   /* Not our event */
4806 		return;
4807 
4808 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4809                 return;
4810 
4811 	EM_CORE_LOCK(adapter);
4812 	index = (vtag >> 5) & 0x7F;
4813 	bit = vtag & 0x1F;
4814 	adapter->shadow_vfta[index] |= (1 << bit);
4815 	++adapter->num_vlans;
4816 	/* Re-init to load the changes */
4817 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4818 		em_init_locked(adapter);
4819 	EM_CORE_UNLOCK(adapter);
4820 }
4821 
4822 /*
4823  * This routine is run via an vlan
4824  * unconfig EVENT
4825  */
4826 static void
4827 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4828 {
4829 	struct adapter	*adapter = if_getsoftc(ifp);
4830 	u32		index, bit;
4831 
4832 	if (adapter != arg)
4833 		return;
4834 
4835 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4836                 return;
4837 
4838 	EM_CORE_LOCK(adapter);
4839 	index = (vtag >> 5) & 0x7F;
4840 	bit = vtag & 0x1F;
4841 	adapter->shadow_vfta[index] &= ~(1 << bit);
4842 	--adapter->num_vlans;
4843 	/* Re-init to load the changes */
4844 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4845 		em_init_locked(adapter);
4846 	EM_CORE_UNLOCK(adapter);
4847 }
4848 
4849 static void
4850 em_setup_vlan_hw_support(struct adapter *adapter)
4851 {
4852 	struct e1000_hw *hw = &adapter->hw;
4853 	u32             reg;
4854 
4855 	/*
4856 	** We get here thru init_locked, meaning
4857 	** a soft reset, this has already cleared
4858 	** the VFTA and other state, so if there
4859 	** have been no vlan's registered do nothing.
4860 	*/
4861 	if (adapter->num_vlans == 0)
4862                 return;
4863 
4864 	/*
4865 	** A soft reset zero's out the VFTA, so
4866 	** we need to repopulate it now.
4867 	*/
4868 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4869                 if (adapter->shadow_vfta[i] != 0)
4870 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4871                             i, adapter->shadow_vfta[i]);
4872 
4873 	reg = E1000_READ_REG(hw, E1000_CTRL);
4874 	reg |= E1000_CTRL_VME;
4875 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4876 
4877 	/* Enable the Filter Table */
4878 	reg = E1000_READ_REG(hw, E1000_RCTL);
4879 	reg &= ~E1000_RCTL_CFIEN;
4880 	reg |= E1000_RCTL_VFE;
4881 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4882 }
4883 
4884 static void
4885 em_enable_intr(struct adapter *adapter)
4886 {
4887 	struct e1000_hw *hw = &adapter->hw;
4888 	u32 ims_mask = IMS_ENABLE_MASK;
4889 
4890 	if (hw->mac.type == e1000_82574) {
4891 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4892 		ims_mask |= EM_MSIX_MASK;
4893 	}
4894 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4895 }
4896 
4897 static void
4898 em_disable_intr(struct adapter *adapter)
4899 {
4900 	struct e1000_hw *hw = &adapter->hw;
4901 
4902 	if (hw->mac.type == e1000_82574)
4903 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4904 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4905 }
4906 
4907 /*
4908  * Bit of a misnomer, what this really means is
4909  * to enable OS management of the system... aka
4910  * to disable special hardware management features
4911  */
4912 static void
4913 em_init_manageability(struct adapter *adapter)
4914 {
4915 	/* A shared code workaround */
4916 #define E1000_82542_MANC2H E1000_MANC2H
4917 	if (adapter->has_manage) {
4918 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4919 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4920 
4921 		/* disable hardware interception of ARP */
4922 		manc &= ~(E1000_MANC_ARP_EN);
4923 
4924                 /* enable receiving management packets to the host */
4925 		manc |= E1000_MANC_EN_MNG2HOST;
4926 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4927 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4928 		manc2h |= E1000_MNG2HOST_PORT_623;
4929 		manc2h |= E1000_MNG2HOST_PORT_664;
4930 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4931 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4932 	}
4933 }
4934 
4935 /*
4936  * Give control back to hardware management
4937  * controller if there is one.
4938  */
4939 static void
4940 em_release_manageability(struct adapter *adapter)
4941 {
4942 	if (adapter->has_manage) {
4943 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4944 
4945 		/* re-enable hardware interception of ARP */
4946 		manc |= E1000_MANC_ARP_EN;
4947 		manc &= ~E1000_MANC_EN_MNG2HOST;
4948 
4949 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4950 	}
4951 }
4952 
4953 /*
4954  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4955  * For ASF and Pass Through versions of f/w this means
4956  * that the driver is loaded. For AMT version type f/w
4957  * this means that the network i/f is open.
4958  */
4959 static void
4960 em_get_hw_control(struct adapter *adapter)
4961 {
4962 	u32 ctrl_ext, swsm;
4963 
4964 	if (adapter->hw.mac.type == e1000_82573) {
4965 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4966 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4967 		    swsm | E1000_SWSM_DRV_LOAD);
4968 		return;
4969 	}
4970 	/* else */
4971 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4972 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4973 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4974 	return;
4975 }
4976 
4977 /*
4978  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4979  * For ASF and Pass Through versions of f/w this means that
4980  * the driver is no longer loaded. For AMT versions of the
4981  * f/w this means that the network i/f is closed.
4982  */
4983 static void
4984 em_release_hw_control(struct adapter *adapter)
4985 {
4986 	u32 ctrl_ext, swsm;
4987 
4988 	if (!adapter->has_manage)
4989 		return;
4990 
4991 	if (adapter->hw.mac.type == e1000_82573) {
4992 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4993 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4994 		    swsm & ~E1000_SWSM_DRV_LOAD);
4995 		return;
4996 	}
4997 	/* else */
4998 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4999 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5000 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5001 	return;
5002 }
5003 
5004 static int
5005 em_is_valid_ether_addr(u8 *addr)
5006 {
5007 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5008 
5009 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5010 		return (FALSE);
5011 	}
5012 
5013 	return (TRUE);
5014 }
5015 
5016 /*
5017 ** Parse the interface capabilities with regard
5018 ** to both system management and wake-on-lan for
5019 ** later use.
5020 */
5021 static void
5022 em_get_wakeup(device_t dev)
5023 {
5024 	struct adapter	*adapter = device_get_softc(dev);
5025 	u16		eeprom_data = 0, device_id, apme_mask;
5026 
5027 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5028 	apme_mask = EM_EEPROM_APME;
5029 
5030 	switch (adapter->hw.mac.type) {
5031 	case e1000_82573:
5032 	case e1000_82583:
5033 		adapter->has_amt = TRUE;
5034 		/* Falls thru */
5035 	case e1000_82571:
5036 	case e1000_82572:
5037 	case e1000_80003es2lan:
5038 		if (adapter->hw.bus.func == 1) {
5039 			e1000_read_nvm(&adapter->hw,
5040 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5041 			break;
5042 		} else
5043 			e1000_read_nvm(&adapter->hw,
5044 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5045 		break;
5046 	case e1000_ich8lan:
5047 	case e1000_ich9lan:
5048 	case e1000_ich10lan:
5049 	case e1000_pchlan:
5050 	case e1000_pch2lan:
5051 		apme_mask = E1000_WUC_APME;
5052 		adapter->has_amt = TRUE;
5053 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5054 		break;
5055 	default:
5056 		e1000_read_nvm(&adapter->hw,
5057 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5058 		break;
5059 	}
5060 	if (eeprom_data & apme_mask)
5061 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5062 	/*
5063          * We have the eeprom settings, now apply the special cases
5064          * where the eeprom may be wrong or the board won't support
5065          * wake on lan on a particular port
5066 	 */
5067 	device_id = pci_get_device(dev);
5068         switch (device_id) {
5069 	case E1000_DEV_ID_82571EB_FIBER:
5070 		/* Wake events only supported on port A for dual fiber
5071 		 * regardless of eeprom setting */
5072 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5073 		    E1000_STATUS_FUNC_1)
5074 			adapter->wol = 0;
5075 		break;
5076 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5077 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5078 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5079                 /* if quad port adapter, disable WoL on all but port A */
5080 		if (global_quad_port_a != 0)
5081 			adapter->wol = 0;
5082 		/* Reset for multiple quad port adapters */
5083 		if (++global_quad_port_a == 4)
5084 			global_quad_port_a = 0;
5085                 break;
5086 	}
5087 	return;
5088 }
5089 
5090 
5091 /*
5092  * Enable PCI Wake On Lan capability
5093  */
5094 static void
5095 em_enable_wakeup(device_t dev)
5096 {
5097 	struct adapter	*adapter = device_get_softc(dev);
5098 	if_t ifp = adapter->ifp;
5099 	u32		pmc, ctrl, ctrl_ext, rctl;
5100 	u16     	status;
5101 
5102 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5103 		return;
5104 
5105 	/* Advertise the wakeup capability */
5106 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5107 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5108 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5109 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5110 
5111 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5112 	    (adapter->hw.mac.type == e1000_pchlan) ||
5113 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5114 	    (adapter->hw.mac.type == e1000_ich10lan))
5115 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5116 
5117 	/* Keep the laser running on Fiber adapters */
5118 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5119 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5120 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5121 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5122 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5123 	}
5124 
5125 	/*
5126 	** Determine type of Wakeup: note that wol
5127 	** is set with all bits on by default.
5128 	*/
5129 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5130 		adapter->wol &= ~E1000_WUFC_MAG;
5131 
5132 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5133 		adapter->wol &= ~E1000_WUFC_MC;
5134 	else {
5135 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5136 		rctl |= E1000_RCTL_MPE;
5137 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5138 	}
5139 
5140 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5141 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5142 		if (em_enable_phy_wakeup(adapter))
5143 			return;
5144 	} else {
5145 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5146 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5147 	}
5148 
5149 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5150 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5151 
5152         /* Request PME */
5153         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5154 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5155 	if (if_getcapenable(ifp) & IFCAP_WOL)
5156 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5157         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5158 
5159 	return;
5160 }
5161 
5162 /*
5163 ** WOL in the newer chipset interfaces (pchlan)
5164 ** require thing to be copied into the phy
5165 */
5166 static int
5167 em_enable_phy_wakeup(struct adapter *adapter)
5168 {
5169 	struct e1000_hw *hw = &adapter->hw;
5170 	u32 mreg, ret = 0;
5171 	u16 preg;
5172 
5173 	/* copy MAC RARs to PHY RARs */
5174 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5175 
5176 	/* copy MAC MTA to PHY MTA */
5177 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5178 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5179 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5180 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5181 		    (u16)((mreg >> 16) & 0xFFFF));
5182 	}
5183 
5184 	/* configure PHY Rx Control register */
5185 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5186 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5187 	if (mreg & E1000_RCTL_UPE)
5188 		preg |= BM_RCTL_UPE;
5189 	if (mreg & E1000_RCTL_MPE)
5190 		preg |= BM_RCTL_MPE;
5191 	preg &= ~(BM_RCTL_MO_MASK);
5192 	if (mreg & E1000_RCTL_MO_3)
5193 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5194 				<< BM_RCTL_MO_SHIFT);
5195 	if (mreg & E1000_RCTL_BAM)
5196 		preg |= BM_RCTL_BAM;
5197 	if (mreg & E1000_RCTL_PMCF)
5198 		preg |= BM_RCTL_PMCF;
5199 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5200 	if (mreg & E1000_CTRL_RFCE)
5201 		preg |= BM_RCTL_RFCE;
5202 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5203 
5204 	/* enable PHY wakeup in MAC register */
5205 	E1000_WRITE_REG(hw, E1000_WUC,
5206 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5207 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5208 
5209 	/* configure and enable PHY wakeup in PHY registers */
5210 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5211 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5212 
5213 	/* activate PHY wakeup */
5214 	ret = hw->phy.ops.acquire(hw);
5215 	if (ret) {
5216 		printf("Could not acquire PHY\n");
5217 		return ret;
5218 	}
5219 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5220 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5221 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5222 	if (ret) {
5223 		printf("Could not read PHY page 769\n");
5224 		goto out;
5225 	}
5226 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5227 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5228 	if (ret)
5229 		printf("Could not set PHY Host Wakeup bit\n");
5230 out:
5231 	hw->phy.ops.release(hw);
5232 
5233 	return ret;
5234 }
5235 
5236 static void
5237 em_led_func(void *arg, int onoff)
5238 {
5239 	struct adapter	*adapter = arg;
5240 
5241 	EM_CORE_LOCK(adapter);
5242 	if (onoff) {
5243 		e1000_setup_led(&adapter->hw);
5244 		e1000_led_on(&adapter->hw);
5245 	} else {
5246 		e1000_led_off(&adapter->hw);
5247 		e1000_cleanup_led(&adapter->hw);
5248 	}
5249 	EM_CORE_UNLOCK(adapter);
5250 }
5251 
5252 /*
5253 ** Disable the L0S and L1 LINK states
5254 */
5255 static void
5256 em_disable_aspm(struct adapter *adapter)
5257 {
5258 	int		base, reg;
5259 	u16		link_cap,link_ctrl;
5260 	device_t	dev = adapter->dev;
5261 
5262 	switch (adapter->hw.mac.type) {
5263 		case e1000_82573:
5264 		case e1000_82574:
5265 		case e1000_82583:
5266 			break;
5267 		default:
5268 			return;
5269 	}
5270 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5271 		return;
5272 	reg = base + PCIER_LINK_CAP;
5273 	link_cap = pci_read_config(dev, reg, 2);
5274 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5275 		return;
5276 	reg = base + PCIER_LINK_CTL;
5277 	link_ctrl = pci_read_config(dev, reg, 2);
5278 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5279 	pci_write_config(dev, reg, link_ctrl, 2);
5280 	return;
5281 }
5282 
5283 /**********************************************************************
5284  *
5285  *  Update the board statistics counters.
5286  *
5287  **********************************************************************/
5288 static void
5289 em_update_stats_counters(struct adapter *adapter)
5290 {
5291 
5292 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5293 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5294 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5295 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5296 	}
5297 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5298 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5299 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5300 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5301 
5302 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5303 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5304 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5305 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5306 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5307 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5308 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5309 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5310 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5311 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5312 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5313 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5314 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5315 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5316 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5317 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5318 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5319 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5320 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5321 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5322 
5323 	/* For the 64-bit byte counters the low dword must be read first. */
5324 	/* Both registers clear on the read of the high dword */
5325 
5326 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5327 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5328 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5329 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5330 
5331 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5332 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5333 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5334 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5335 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5336 
5337 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5338 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5339 
5340 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5341 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5342 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5343 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5344 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5345 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5346 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5347 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5348 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5349 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5350 
5351 	/* Interrupt Counts */
5352 
5353 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5354 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5355 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5356 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5357 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5358 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5359 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5360 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5361 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5362 
5363 	if (adapter->hw.mac.type >= e1000_82543) {
5364 		adapter->stats.algnerrc +=
5365 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5366 		adapter->stats.rxerrc +=
5367 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5368 		adapter->stats.tncrs +=
5369 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5370 		adapter->stats.cexterr +=
5371 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5372 		adapter->stats.tsctc +=
5373 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5374 		adapter->stats.tsctfc +=
5375 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5376 	}
5377 }
5378 
5379 static uint64_t
5380 em_get_counter(if_t ifp, ift_counter cnt)
5381 {
5382 	struct adapter *adapter;
5383 
5384 	adapter = if_getsoftc(ifp);
5385 
5386 	switch (cnt) {
5387 	case IFCOUNTER_COLLISIONS:
5388 		return (adapter->stats.colc);
5389 	case IFCOUNTER_IERRORS:
5390 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5391 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5392 		    adapter->stats.ruc + adapter->stats.roc +
5393 		    adapter->stats.mpc + adapter->stats.cexterr);
5394 	case IFCOUNTER_OERRORS:
5395 		return (adapter->stats.ecol + adapter->stats.latecol +
5396 		    adapter->watchdog_events);
5397 	default:
5398 		return (if_get_counter_default(ifp, cnt));
5399 	}
5400 }
5401 
5402 /* Export a single 32-bit register via a read-only sysctl. */
5403 static int
5404 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5405 {
5406 	struct adapter *adapter;
5407 	u_int val;
5408 
5409 	adapter = oidp->oid_arg1;
5410 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5411 	return (sysctl_handle_int(oidp, &val, 0, req));
5412 }
5413 
5414 /*
5415  * Add sysctl variables, one per statistic, to the system.
5416  */
5417 static void
5418 em_add_hw_stats(struct adapter *adapter)
5419 {
5420 	device_t dev = adapter->dev;
5421 
5422 	struct tx_ring *txr = adapter->tx_rings;
5423 	struct rx_ring *rxr = adapter->rx_rings;
5424 
5425 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5426 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5427 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5428 	struct e1000_hw_stats *stats = &adapter->stats;
5429 
5430 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5431 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5432 
5433 #define QUEUE_NAME_LEN 32
5434 	char namebuf[QUEUE_NAME_LEN];
5435 
5436 	/* Driver Statistics */
5437 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5438 			CTLFLAG_RD, &adapter->link_irq,
5439 			"Link MSIX IRQ Handled");
5440 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5441 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5442 			 "Std mbuf failed");
5443 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5444 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5445 			 "Std mbuf cluster failed");
5446 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5447 			CTLFLAG_RD, &adapter->dropped_pkts,
5448 			"Driver dropped packets");
5449 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5450 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5451 			"Driver tx dma failure in xmit");
5452 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5453 			CTLFLAG_RD, &adapter->rx_overruns,
5454 			"RX overruns");
5455 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5456 			CTLFLAG_RD, &adapter->watchdog_events,
5457 			"Watchdog timeouts");
5458 
5459 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5460 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5461 			em_sysctl_reg_handler, "IU",
5462 			"Device Control Register");
5463 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5464 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5465 			em_sysctl_reg_handler, "IU",
5466 			"Receiver Control Register");
5467 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5468 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5469 			"Flow Control High Watermark");
5470 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5471 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5472 			"Flow Control Low Watermark");
5473 
5474 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5475 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5476 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5477 					    CTLFLAG_RD, NULL, "TX Queue Name");
5478 		queue_list = SYSCTL_CHILDREN(queue_node);
5479 
5480 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5481 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5482 				E1000_TDH(txr->me),
5483 				em_sysctl_reg_handler, "IU",
5484  				"Transmit Descriptor Head");
5485 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5486 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5487 				E1000_TDT(txr->me),
5488 				em_sysctl_reg_handler, "IU",
5489  				"Transmit Descriptor Tail");
5490 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5491 				CTLFLAG_RD, &txr->tx_irq,
5492 				"Queue MSI-X Transmit Interrupts");
5493 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5494 				CTLFLAG_RD, &txr->no_desc_avail,
5495 				"Queue No Descriptor Available");
5496 
5497 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5498 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5499 					    CTLFLAG_RD, NULL, "RX Queue Name");
5500 		queue_list = SYSCTL_CHILDREN(queue_node);
5501 
5502 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5503 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5504 				E1000_RDH(rxr->me),
5505 				em_sysctl_reg_handler, "IU",
5506 				"Receive Descriptor Head");
5507 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5508 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5509 				E1000_RDT(rxr->me),
5510 				em_sysctl_reg_handler, "IU",
5511 				"Receive Descriptor Tail");
5512 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5513 				CTLFLAG_RD, &rxr->rx_irq,
5514 				"Queue MSI-X Receive Interrupts");
5515 	}
5516 
5517 	/* MAC stats get their own sub node */
5518 
5519 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5520 				    CTLFLAG_RD, NULL, "Statistics");
5521 	stat_list = SYSCTL_CHILDREN(stat_node);
5522 
5523 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5524 			CTLFLAG_RD, &stats->ecol,
5525 			"Excessive collisions");
5526 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5527 			CTLFLAG_RD, &stats->scc,
5528 			"Single collisions");
5529 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5530 			CTLFLAG_RD, &stats->mcc,
5531 			"Multiple collisions");
5532 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5533 			CTLFLAG_RD, &stats->latecol,
5534 			"Late collisions");
5535 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5536 			CTLFLAG_RD, &stats->colc,
5537 			"Collision Count");
5538 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5539 			CTLFLAG_RD, &adapter->stats.symerrs,
5540 			"Symbol Errors");
5541 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5542 			CTLFLAG_RD, &adapter->stats.sec,
5543 			"Sequence Errors");
5544 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5545 			CTLFLAG_RD, &adapter->stats.dc,
5546 			"Defer Count");
5547 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5548 			CTLFLAG_RD, &adapter->stats.mpc,
5549 			"Missed Packets");
5550 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5551 			CTLFLAG_RD, &adapter->stats.rnbc,
5552 			"Receive No Buffers");
5553 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5554 			CTLFLAG_RD, &adapter->stats.ruc,
5555 			"Receive Undersize");
5556 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5557 			CTLFLAG_RD, &adapter->stats.rfc,
5558 			"Fragmented Packets Received ");
5559 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5560 			CTLFLAG_RD, &adapter->stats.roc,
5561 			"Oversized Packets Received");
5562 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5563 			CTLFLAG_RD, &adapter->stats.rjc,
5564 			"Recevied Jabber");
5565 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5566 			CTLFLAG_RD, &adapter->stats.rxerrc,
5567 			"Receive Errors");
5568 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5569 			CTLFLAG_RD, &adapter->stats.crcerrs,
5570 			"CRC errors");
5571 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5572 			CTLFLAG_RD, &adapter->stats.algnerrc,
5573 			"Alignment Errors");
5574 	/* On 82575 these are collision counts */
5575 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5576 			CTLFLAG_RD, &adapter->stats.cexterr,
5577 			"Collision/Carrier extension errors");
5578 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5579 			CTLFLAG_RD, &adapter->stats.xonrxc,
5580 			"XON Received");
5581 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5582 			CTLFLAG_RD, &adapter->stats.xontxc,
5583 			"XON Transmitted");
5584 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5585 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5586 			"XOFF Received");
5587 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5588 			CTLFLAG_RD, &adapter->stats.xofftxc,
5589 			"XOFF Transmitted");
5590 
5591 	/* Packet Reception Stats */
5592 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5593 			CTLFLAG_RD, &adapter->stats.tpr,
5594 			"Total Packets Received ");
5595 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5596 			CTLFLAG_RD, &adapter->stats.gprc,
5597 			"Good Packets Received");
5598 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5599 			CTLFLAG_RD, &adapter->stats.bprc,
5600 			"Broadcast Packets Received");
5601 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5602 			CTLFLAG_RD, &adapter->stats.mprc,
5603 			"Multicast Packets Received");
5604 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5605 			CTLFLAG_RD, &adapter->stats.prc64,
5606 			"64 byte frames received ");
5607 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5608 			CTLFLAG_RD, &adapter->stats.prc127,
5609 			"65-127 byte frames received");
5610 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5611 			CTLFLAG_RD, &adapter->stats.prc255,
5612 			"128-255 byte frames received");
5613 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5614 			CTLFLAG_RD, &adapter->stats.prc511,
5615 			"256-511 byte frames received");
5616 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5617 			CTLFLAG_RD, &adapter->stats.prc1023,
5618 			"512-1023 byte frames received");
5619 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5620 			CTLFLAG_RD, &adapter->stats.prc1522,
5621 			"1023-1522 byte frames received");
5622  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5623  			CTLFLAG_RD, &adapter->stats.gorc,
5624  			"Good Octets Received");
5625 
5626 	/* Packet Transmission Stats */
5627  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5628  			CTLFLAG_RD, &adapter->stats.gotc,
5629  			"Good Octets Transmitted");
5630 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5631 			CTLFLAG_RD, &adapter->stats.tpt,
5632 			"Total Packets Transmitted");
5633 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5634 			CTLFLAG_RD, &adapter->stats.gptc,
5635 			"Good Packets Transmitted");
5636 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5637 			CTLFLAG_RD, &adapter->stats.bptc,
5638 			"Broadcast Packets Transmitted");
5639 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5640 			CTLFLAG_RD, &adapter->stats.mptc,
5641 			"Multicast Packets Transmitted");
5642 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5643 			CTLFLAG_RD, &adapter->stats.ptc64,
5644 			"64 byte frames transmitted ");
5645 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5646 			CTLFLAG_RD, &adapter->stats.ptc127,
5647 			"65-127 byte frames transmitted");
5648 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5649 			CTLFLAG_RD, &adapter->stats.ptc255,
5650 			"128-255 byte frames transmitted");
5651 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5652 			CTLFLAG_RD, &adapter->stats.ptc511,
5653 			"256-511 byte frames transmitted");
5654 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5655 			CTLFLAG_RD, &adapter->stats.ptc1023,
5656 			"512-1023 byte frames transmitted");
5657 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5658 			CTLFLAG_RD, &adapter->stats.ptc1522,
5659 			"1024-1522 byte frames transmitted");
5660 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5661 			CTLFLAG_RD, &adapter->stats.tsctc,
5662 			"TSO Contexts Transmitted");
5663 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5664 			CTLFLAG_RD, &adapter->stats.tsctfc,
5665 			"TSO Contexts Failed");
5666 
5667 
5668 	/* Interrupt Stats */
5669 
5670 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5671 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5672 	int_list = SYSCTL_CHILDREN(int_node);
5673 
5674 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5675 			CTLFLAG_RD, &adapter->stats.iac,
5676 			"Interrupt Assertion Count");
5677 
5678 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5679 			CTLFLAG_RD, &adapter->stats.icrxptc,
5680 			"Interrupt Cause Rx Pkt Timer Expire Count");
5681 
5682 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5683 			CTLFLAG_RD, &adapter->stats.icrxatc,
5684 			"Interrupt Cause Rx Abs Timer Expire Count");
5685 
5686 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5687 			CTLFLAG_RD, &adapter->stats.ictxptc,
5688 			"Interrupt Cause Tx Pkt Timer Expire Count");
5689 
5690 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5691 			CTLFLAG_RD, &adapter->stats.ictxatc,
5692 			"Interrupt Cause Tx Abs Timer Expire Count");
5693 
5694 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5695 			CTLFLAG_RD, &adapter->stats.ictxqec,
5696 			"Interrupt Cause Tx Queue Empty Count");
5697 
5698 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5699 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5700 			"Interrupt Cause Tx Queue Min Thresh Count");
5701 
5702 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5703 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5704 			"Interrupt Cause Rx Desc Min Thresh Count");
5705 
5706 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5707 			CTLFLAG_RD, &adapter->stats.icrxoc,
5708 			"Interrupt Cause Receiver Overrun Count");
5709 }
5710 
5711 /**********************************************************************
5712  *
5713  *  This routine provides a way to dump out the adapter eeprom,
5714  *  often a useful debug/service tool. This only dumps the first
5715  *  32 words, stuff that matters is in that extent.
5716  *
5717  **********************************************************************/
5718 static int
5719 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5720 {
5721 	struct adapter *adapter = (struct adapter *)arg1;
5722 	int error;
5723 	int result;
5724 
5725 	result = -1;
5726 	error = sysctl_handle_int(oidp, &result, 0, req);
5727 
5728 	if (error || !req->newptr)
5729 		return (error);
5730 
5731 	/*
5732 	 * This value will cause a hex dump of the
5733 	 * first 32 16-bit words of the EEPROM to
5734 	 * the screen.
5735 	 */
5736 	if (result == 1)
5737 		em_print_nvm_info(adapter);
5738 
5739 	return (error);
5740 }
5741 
5742 static void
5743 em_print_nvm_info(struct adapter *adapter)
5744 {
5745 	u16	eeprom_data;
5746 	int	i, j, row = 0;
5747 
5748 	/* Its a bit crude, but it gets the job done */
5749 	printf("\nInterface EEPROM Dump:\n");
5750 	printf("Offset\n0x0000  ");
5751 	for (i = 0, j = 0; i < 32; i++, j++) {
5752 		if (j == 8) { /* Make the offset block */
5753 			j = 0; ++row;
5754 			printf("\n0x00%x0  ",row);
5755 		}
5756 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5757 		printf("%04x ", eeprom_data);
5758 	}
5759 	printf("\n");
5760 }
5761 
5762 static int
5763 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5764 {
5765 	struct em_int_delay_info *info;
5766 	struct adapter *adapter;
5767 	u32 regval;
5768 	int error, usecs, ticks;
5769 
5770 	info = (struct em_int_delay_info *)arg1;
5771 	usecs = info->value;
5772 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5773 	if (error != 0 || req->newptr == NULL)
5774 		return (error);
5775 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5776 		return (EINVAL);
5777 	info->value = usecs;
5778 	ticks = EM_USECS_TO_TICKS(usecs);
5779 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5780 		ticks *= 4;
5781 
5782 	adapter = info->adapter;
5783 
5784 	EM_CORE_LOCK(adapter);
5785 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5786 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5787 	/* Handle a few special cases. */
5788 	switch (info->offset) {
5789 	case E1000_RDTR:
5790 		break;
5791 	case E1000_TIDV:
5792 		if (ticks == 0) {
5793 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5794 			/* Don't write 0 into the TIDV register. */
5795 			regval++;
5796 		} else
5797 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5798 		break;
5799 	}
5800 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5801 	EM_CORE_UNLOCK(adapter);
5802 	return (0);
5803 }
5804 
5805 static void
5806 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5807 	const char *description, struct em_int_delay_info *info,
5808 	int offset, int value)
5809 {
5810 	info->adapter = adapter;
5811 	info->offset = offset;
5812 	info->value = value;
5813 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5814 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5815 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5816 	    info, 0, em_sysctl_int_delay, "I", description);
5817 }
5818 
5819 static void
5820 em_set_sysctl_value(struct adapter *adapter, const char *name,
5821 	const char *description, int *limit, int value)
5822 {
5823 	*limit = value;
5824 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5825 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5826 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5827 }
5828 
5829 
5830 /*
5831 ** Set flow control using sysctl:
5832 ** Flow control values:
5833 **      0 - off
5834 **      1 - rx pause
5835 **      2 - tx pause
5836 **      3 - full
5837 */
5838 static int
5839 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5840 {
5841         int		error;
5842 	static int	input = 3; /* default is full */
5843         struct adapter	*adapter = (struct adapter *) arg1;
5844 
5845         error = sysctl_handle_int(oidp, &input, 0, req);
5846 
5847         if ((error) || (req->newptr == NULL))
5848                 return (error);
5849 
5850 	if (input == adapter->fc) /* no change? */
5851 		return (error);
5852 
5853         switch (input) {
5854                 case e1000_fc_rx_pause:
5855                 case e1000_fc_tx_pause:
5856                 case e1000_fc_full:
5857                 case e1000_fc_none:
5858                         adapter->hw.fc.requested_mode = input;
5859 			adapter->fc = input;
5860                         break;
5861                 default:
5862 			/* Do nothing */
5863 			return (error);
5864         }
5865 
5866         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5867         e1000_force_mac_fc(&adapter->hw);
5868         return (error);
5869 }
5870 
5871 /*
5872 ** Manage Energy Efficient Ethernet:
5873 ** Control values:
5874 **     0/1 - enabled/disabled
5875 */
5876 static int
5877 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5878 {
5879        struct adapter *adapter = (struct adapter *) arg1;
5880        int             error, value;
5881 
5882        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5883        error = sysctl_handle_int(oidp, &value, 0, req);
5884        if (error || req->newptr == NULL)
5885                return (error);
5886        EM_CORE_LOCK(adapter);
5887        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5888        em_init_locked(adapter);
5889        EM_CORE_UNLOCK(adapter);
5890        return (0);
5891 }
5892 
5893 static int
5894 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5895 {
5896 	struct adapter *adapter;
5897 	int error;
5898 	int result;
5899 
5900 	result = -1;
5901 	error = sysctl_handle_int(oidp, &result, 0, req);
5902 
5903 	if (error || !req->newptr)
5904 		return (error);
5905 
5906 	if (result == 1) {
5907 		adapter = (struct adapter *)arg1;
5908 		em_print_debug_info(adapter);
5909         }
5910 
5911 	return (error);
5912 }
5913 
5914 /*
5915 ** This routine is meant to be fluid, add whatever is
5916 ** needed for debugging a problem.  -jfv
5917 */
5918 static void
5919 em_print_debug_info(struct adapter *adapter)
5920 {
5921 	device_t dev = adapter->dev;
5922 	struct tx_ring *txr = adapter->tx_rings;
5923 	struct rx_ring *rxr = adapter->rx_rings;
5924 
5925 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5926 		printf("Interface is RUNNING ");
5927 	else
5928 		printf("Interface is NOT RUNNING\n");
5929 
5930 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5931 		printf("and INACTIVE\n");
5932 	else
5933 		printf("and ACTIVE\n");
5934 
5935 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5936 		device_printf(dev, "TX Queue %d ------\n", i);
5937 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5938 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
5939 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
5940 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
5941 		device_printf(dev, "TX descriptors avail = %d\n",
5942 	    		txr->tx_avail);
5943 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5944 	    		txr->no_desc_avail);
5945 		device_printf(dev, "RX Queue %d ------\n", i);
5946 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5947 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
5948 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
5949 		device_printf(dev, "RX discarded packets = %ld\n",
5950 	    		rxr->rx_discarded);
5951 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5952 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5953 	}
5954 }
5955 
5956 #ifdef EM_MULTIQUEUE
5957 /*
5958  * 82574 only:
5959  * Write a new value to the EEPROM increasing the number of MSIX
5960  * vectors from 3 to 5, for proper multiqueue support.
5961  */
5962 static void
5963 em_enable_vectors_82574(struct adapter *adapter)
5964 {
5965 	struct e1000_hw *hw = &adapter->hw;
5966 	device_t dev = adapter->dev;
5967 	u16 edata;
5968 
5969 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5970 	printf("Current cap: %#06x\n", edata);
5971 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
5972 		device_printf(dev, "Writing to eeprom: increasing "
5973 		    "reported MSIX vectors from 3 to 5...\n");
5974 		edata &= ~(EM_NVM_MSIX_N_MASK);
5975 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
5976 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5977 		e1000_update_nvm_checksum(hw);
5978 		device_printf(dev, "Writing to eeprom: done\n");
5979 	}
5980 }
5981 #endif
5982 
5983 #ifdef DDB
5984 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
5985 {
5986 	devclass_t	dc;
5987 	int max_em;
5988 
5989 	dc = devclass_find("em");
5990 	max_em = devclass_get_maxunit(dc);
5991 
5992 	for (int index = 0; index < (max_em - 1); index++) {
5993 		device_t dev;
5994 		dev = devclass_get_device(dc, index);
5995 		if (device_get_driver(dev) == &em_driver) {
5996 			struct adapter *adapter = device_get_softc(dev);
5997 			EM_CORE_LOCK(adapter);
5998 			em_init_locked(adapter);
5999 			EM_CORE_UNLOCK(adapter);
6000 		}
6001 	}
6002 }
6003 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6004 {
6005 	devclass_t	dc;
6006 	int max_em;
6007 
6008 	dc = devclass_find("em");
6009 	max_em = devclass_get_maxunit(dc);
6010 
6011 	for (int index = 0; index < (max_em - 1); index++) {
6012 		device_t dev;
6013 		dev = devclass_get_device(dc, index);
6014 		if (device_get_driver(dev) == &em_driver)
6015 			em_print_debug_info(device_get_softc(dev));
6016 	}
6017 
6018 }
6019 #endif
6020