xref: /freebsd/sys/dev/e1000/if_em.c (revision 39ee7a7a6bdd1557b1c3532abf60d139798ac88b)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Set this to one to display debug statistics
100  *********************************************************************/
101 int	em_display_debug_stats = 0;
102 
103 /*********************************************************************
104  *  Driver version:
105  *********************************************************************/
106 char em_driver_version[] = "7.4.2";
107 
108 /*********************************************************************
109  *  PCI Device ID Table
110  *
111  *  Used by probe to select devices to load on
112  *  Last field stores an index into e1000_strings
113  *  Last entry must be all 0s
114  *
115  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
116  *********************************************************************/
117 
118 static em_vendor_info_t em_vendor_info_array[] =
119 {
120 	/* Intel(R) PRO/1000 Network Connection */
121 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
127 						PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
129 						PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
131 						PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
133 						PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
135 						PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 
141 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
146 						PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
148 						PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
150 						PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
152 						PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
182 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
187 						PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
189 						PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
192 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
194 	/* required last entry */
195 	{ 0, 0, 0, 0, 0}
196 };
197 
198 /*********************************************************************
199  *  Table of branding strings for all supported NICs.
200  *********************************************************************/
201 
202 static char *em_strings[] = {
203 	"Intel(R) PRO/1000 Network Connection"
204 };
205 
206 /*********************************************************************
207  *  Function prototypes
208  *********************************************************************/
209 static int	em_probe(device_t);
210 static int	em_attach(device_t);
211 static int	em_detach(device_t);
212 static int	em_shutdown(device_t);
213 static int	em_suspend(device_t);
214 static int	em_resume(device_t);
215 #ifdef EM_MULTIQUEUE
216 static int	em_mq_start(if_t, struct mbuf *);
217 static int	em_mq_start_locked(if_t,
218 		    struct tx_ring *);
219 static void	em_qflush(if_t);
220 #else
221 static void	em_start(if_t);
222 static void	em_start_locked(if_t, struct tx_ring *);
223 #endif
224 static int	em_ioctl(if_t, u_long, caddr_t);
225 static uint64_t	em_get_counter(if_t, ift_counter);
226 static void	em_init(void *);
227 static void	em_init_locked(struct adapter *);
228 static void	em_stop(void *);
229 static void	em_media_status(if_t, struct ifmediareq *);
230 static int	em_media_change(if_t);
231 static void	em_identify_hardware(struct adapter *);
232 static int	em_allocate_pci_resources(struct adapter *);
233 static int	em_allocate_legacy(struct adapter *);
234 static int	em_allocate_msix(struct adapter *);
235 static int	em_allocate_queues(struct adapter *);
236 static int	em_setup_msix(struct adapter *);
237 static void	em_free_pci_resources(struct adapter *);
238 static void	em_local_timer(void *);
239 static void	em_reset(struct adapter *);
240 static int	em_setup_interface(device_t, struct adapter *);
241 
242 static void	em_setup_transmit_structures(struct adapter *);
243 static void	em_initialize_transmit_unit(struct adapter *);
244 static int	em_allocate_transmit_buffers(struct tx_ring *);
245 static void	em_free_transmit_structures(struct adapter *);
246 static void	em_free_transmit_buffers(struct tx_ring *);
247 
248 static int	em_setup_receive_structures(struct adapter *);
249 static int	em_allocate_receive_buffers(struct rx_ring *);
250 static void	em_initialize_receive_unit(struct adapter *);
251 static void	em_free_receive_structures(struct adapter *);
252 static void	em_free_receive_buffers(struct rx_ring *);
253 
254 static void	em_enable_intr(struct adapter *);
255 static void	em_disable_intr(struct adapter *);
256 static void	em_update_stats_counters(struct adapter *);
257 static void	em_add_hw_stats(struct adapter *adapter);
258 static void	em_txeof(struct tx_ring *);
259 static bool	em_rxeof(struct rx_ring *, int, int *);
260 #ifndef __NO_STRICT_ALIGNMENT
261 static int	em_fixup_rx(struct rx_ring *);
262 #endif
263 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
264 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
265 		    struct ip *, u32 *, u32 *);
266 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
267 		    struct tcphdr *, u32 *, u32 *);
268 static void	em_set_promisc(struct adapter *);
269 static void	em_disable_promisc(struct adapter *);
270 static void	em_set_multi(struct adapter *);
271 static void	em_update_link_status(struct adapter *);
272 static void	em_refresh_mbufs(struct rx_ring *, int);
273 static void	em_register_vlan(void *, if_t, u16);
274 static void	em_unregister_vlan(void *, if_t, u16);
275 static void	em_setup_vlan_hw_support(struct adapter *);
276 static int	em_xmit(struct tx_ring *, struct mbuf **);
277 static int	em_dma_malloc(struct adapter *, bus_size_t,
278 		    struct em_dma_alloc *, int);
279 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
280 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
281 static void	em_print_nvm_info(struct adapter *);
282 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283 static void	em_print_debug_info(struct adapter *);
284 static int 	em_is_valid_ether_addr(u8 *);
285 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
287 		    const char *, struct em_int_delay_info *, int, int);
288 /* Management and WOL Support */
289 static void	em_init_manageability(struct adapter *);
290 static void	em_release_manageability(struct adapter *);
291 static void     em_get_hw_control(struct adapter *);
292 static void     em_release_hw_control(struct adapter *);
293 static void	em_get_wakeup(device_t);
294 static void     em_enable_wakeup(device_t);
295 static int	em_enable_phy_wakeup(struct adapter *);
296 static void	em_led_func(void *, int);
297 static void	em_disable_aspm(struct adapter *);
298 
299 static int	em_irq_fast(void *);
300 
301 /* MSIX handlers */
302 static void	em_msix_tx(void *);
303 static void	em_msix_rx(void *);
304 static void	em_msix_link(void *);
305 static void	em_handle_tx(void *context, int pending);
306 static void	em_handle_rx(void *context, int pending);
307 static void	em_handle_link(void *context, int pending);
308 
309 #ifdef EM_MULTIQUEUE
310 static void	em_enable_vectors_82574(struct adapter *);
311 #endif
312 
313 static void	em_set_sysctl_value(struct adapter *, const char *,
314 		    const char *, int *, int);
315 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
316 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
317 
318 static __inline void em_rx_discard(struct rx_ring *, int);
319 
320 #ifdef DEVICE_POLLING
321 static poll_handler_t em_poll;
322 #endif /* POLLING */
323 
324 /*********************************************************************
325  *  FreeBSD Device Interface Entry Points
326  *********************************************************************/
327 
328 static device_method_t em_methods[] = {
329 	/* Device interface */
330 	DEVMETHOD(device_probe, em_probe),
331 	DEVMETHOD(device_attach, em_attach),
332 	DEVMETHOD(device_detach, em_detach),
333 	DEVMETHOD(device_shutdown, em_shutdown),
334 	DEVMETHOD(device_suspend, em_suspend),
335 	DEVMETHOD(device_resume, em_resume),
336 	DEVMETHOD_END
337 };
338 
339 static driver_t em_driver = {
340 	"em", em_methods, sizeof(struct adapter),
341 };
342 
343 devclass_t em_devclass;
344 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
345 MODULE_DEPEND(em, pci, 1, 1, 1);
346 MODULE_DEPEND(em, ether, 1, 1, 1);
347 #ifdef DEV_NETMAP
348 MODULE_DEPEND(em, netmap, 1, 1, 1);
349 #endif /* DEV_NETMAP */
350 
351 /*********************************************************************
352  *  Tunable default values.
353  *********************************************************************/
354 
355 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
356 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
357 #define M_TSO_LEN			66
358 
359 #define MAX_INTS_PER_SEC	8000
360 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
361 
362 /* Allow common code without TSO */
363 #ifndef CSUM_TSO
364 #define CSUM_TSO	0
365 #endif
366 
367 #define TSO_WORKAROUND	4
368 
369 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
370 
371 static int em_disable_crc_stripping = 0;
372 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
373     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
374 
375 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
376 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
377 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
378     0, "Default transmit interrupt delay in usecs");
379 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
380     0, "Default receive interrupt delay in usecs");
381 
382 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
383 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
384 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
385     &em_tx_abs_int_delay_dflt, 0,
386     "Default transmit interrupt delay limit in usecs");
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
388     &em_rx_abs_int_delay_dflt, 0,
389     "Default receive interrupt delay limit in usecs");
390 
391 static int em_rxd = EM_DEFAULT_RXD;
392 static int em_txd = EM_DEFAULT_TXD;
393 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
394     "Number of receive descriptors per queue");
395 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
396     "Number of transmit descriptors per queue");
397 
398 static int em_smart_pwr_down = FALSE;
399 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
400     0, "Set to true to leave smart power down enabled on newer adapters");
401 
402 /* Controls whether promiscuous also shows bad packets */
403 static int em_debug_sbp = FALSE;
404 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
405     "Show bad packets in promiscuous mode");
406 
407 static int em_enable_msix = TRUE;
408 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
409     "Enable MSI-X interrupts");
410 
411 #ifdef EM_MULTIQUEUE
412 static int em_num_queues = 1;
413 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
414     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
415 #endif
416 
417 /*
418 ** Global variable to store last used CPU when binding queues
419 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
420 ** queue is bound to a cpu.
421 */
422 static int em_last_bind_cpu = -1;
423 
424 /* How many packets rxeof tries to clean at a time */
425 static int em_rx_process_limit = 100;
426 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
427     &em_rx_process_limit, 0,
428     "Maximum number of received packets to process "
429     "at a time, -1 means unlimited");
430 
431 /* Energy efficient ethernet - default to OFF */
432 static int eee_setting = 1;
433 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
434     "Enable Energy Efficient Ethernet");
435 
436 /* Global used in WOL setup with multiport cards */
437 static int global_quad_port_a = 0;
438 
439 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
440 #include <dev/netmap/if_em_netmap.h>
441 #endif /* DEV_NETMAP */
442 
443 /*********************************************************************
444  *  Device identification routine
445  *
446  *  em_probe determines if the driver should be loaded on
447  *  adapter based on PCI vendor/device id of the adapter.
448  *
449  *  return BUS_PROBE_DEFAULT on success, positive on failure
450  *********************************************************************/
451 
452 static int
453 em_probe(device_t dev)
454 {
455 	char		adapter_name[60];
456 	uint16_t	pci_vendor_id = 0;
457 	uint16_t	pci_device_id = 0;
458 	uint16_t	pci_subvendor_id = 0;
459 	uint16_t	pci_subdevice_id = 0;
460 	em_vendor_info_t *ent;
461 
462 	INIT_DEBUGOUT("em_probe: begin");
463 
464 	pci_vendor_id = pci_get_vendor(dev);
465 	if (pci_vendor_id != EM_VENDOR_ID)
466 		return (ENXIO);
467 
468 	pci_device_id = pci_get_device(dev);
469 	pci_subvendor_id = pci_get_subvendor(dev);
470 	pci_subdevice_id = pci_get_subdevice(dev);
471 
472 	ent = em_vendor_info_array;
473 	while (ent->vendor_id != 0) {
474 		if ((pci_vendor_id == ent->vendor_id) &&
475 		    (pci_device_id == ent->device_id) &&
476 
477 		    ((pci_subvendor_id == ent->subvendor_id) ||
478 		    (ent->subvendor_id == PCI_ANY_ID)) &&
479 
480 		    ((pci_subdevice_id == ent->subdevice_id) ||
481 		    (ent->subdevice_id == PCI_ANY_ID))) {
482 			sprintf(adapter_name, "%s %s",
483 				em_strings[ent->index],
484 				em_driver_version);
485 			device_set_desc_copy(dev, adapter_name);
486 			return (BUS_PROBE_DEFAULT);
487 		}
488 		ent++;
489 	}
490 
491 	return (ENXIO);
492 }
493 
494 /*********************************************************************
495  *  Device initialization routine
496  *
497  *  The attach entry point is called when the driver is being loaded.
498  *  This routine identifies the type of hardware, allocates all resources
499  *  and initializes the hardware.
500  *
501  *  return 0 on success, positive on failure
502  *********************************************************************/
503 
504 static int
505 em_attach(device_t dev)
506 {
507 	struct adapter	*adapter;
508 	struct e1000_hw	*hw;
509 	int		error = 0;
510 
511 	INIT_DEBUGOUT("em_attach: begin");
512 
513 	if (resource_disabled("em", device_get_unit(dev))) {
514 		device_printf(dev, "Disabled by device hint\n");
515 		return (ENXIO);
516 	}
517 
518 	adapter = device_get_softc(dev);
519 	adapter->dev = adapter->osdep.dev = dev;
520 	hw = &adapter->hw;
521 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
522 
523 	/* SYSCTL stuff */
524 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
525 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
526 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
527 	    em_sysctl_nvm_info, "I", "NVM Information");
528 
529 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
530 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
531 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
532 	    em_sysctl_debug_info, "I", "Debug Information");
533 
534 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
535 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
536 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
537 	    em_set_flowcntl, "I", "Flow Control");
538 
539 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
540 
541 	/* Determine hardware and mac info */
542 	em_identify_hardware(adapter);
543 
544 	/* Setup PCI resources */
545 	if (em_allocate_pci_resources(adapter)) {
546 		device_printf(dev, "Allocation of PCI resources failed\n");
547 		error = ENXIO;
548 		goto err_pci;
549 	}
550 
551 	/*
552 	** For ICH8 and family we need to
553 	** map the flash memory, and this
554 	** must happen after the MAC is
555 	** identified
556 	*/
557 	if ((hw->mac.type == e1000_ich8lan) ||
558 	    (hw->mac.type == e1000_ich9lan) ||
559 	    (hw->mac.type == e1000_ich10lan) ||
560 	    (hw->mac.type == e1000_pchlan) ||
561 	    (hw->mac.type == e1000_pch2lan) ||
562 	    (hw->mac.type == e1000_pch_lpt)) {
563 		int rid = EM_BAR_TYPE_FLASH;
564 		adapter->flash = bus_alloc_resource_any(dev,
565 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
566 		if (adapter->flash == NULL) {
567 			device_printf(dev, "Mapping of Flash failed\n");
568 			error = ENXIO;
569 			goto err_pci;
570 		}
571 		/* This is used in the shared code */
572 		hw->flash_address = (u8 *)adapter->flash;
573 		adapter->osdep.flash_bus_space_tag =
574 		    rman_get_bustag(adapter->flash);
575 		adapter->osdep.flash_bus_space_handle =
576 		    rman_get_bushandle(adapter->flash);
577 	}
578 
579 	/* Do Shared Code initialization */
580 	if (e1000_setup_init_funcs(hw, TRUE)) {
581 		device_printf(dev, "Setup of Shared code failed\n");
582 		error = ENXIO;
583 		goto err_pci;
584 	}
585 
586 	/*
587 	 * Setup MSI/X or MSI if PCI Express
588 	 */
589 	adapter->msix = em_setup_msix(adapter);
590 
591 	e1000_get_bus_info(hw);
592 
593 	/* Set up some sysctls for the tunable interrupt delays */
594 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
595 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
596 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
597 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
598 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
599 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
600 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
601 	    "receive interrupt delay limit in usecs",
602 	    &adapter->rx_abs_int_delay,
603 	    E1000_REGISTER(hw, E1000_RADV),
604 	    em_rx_abs_int_delay_dflt);
605 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
606 	    "transmit interrupt delay limit in usecs",
607 	    &adapter->tx_abs_int_delay,
608 	    E1000_REGISTER(hw, E1000_TADV),
609 	    em_tx_abs_int_delay_dflt);
610 	em_add_int_delay_sysctl(adapter, "itr",
611 	    "interrupt delay limit in usecs/4",
612 	    &adapter->tx_itr,
613 	    E1000_REGISTER(hw, E1000_ITR),
614 	    DEFAULT_ITR);
615 
616 	/* Sysctl for limiting the amount of work done in the taskqueue */
617 	em_set_sysctl_value(adapter, "rx_processing_limit",
618 	    "max number of rx packets to process", &adapter->rx_process_limit,
619 	    em_rx_process_limit);
620 
621 	/*
622 	 * Validate number of transmit and receive descriptors. It
623 	 * must not exceed hardware maximum, and must be multiple
624 	 * of E1000_DBA_ALIGN.
625 	 */
626 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
627 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
628 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
629 		    EM_DEFAULT_TXD, em_txd);
630 		adapter->num_tx_desc = EM_DEFAULT_TXD;
631 	} else
632 		adapter->num_tx_desc = em_txd;
633 
634 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
635 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
636 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
637 		    EM_DEFAULT_RXD, em_rxd);
638 		adapter->num_rx_desc = EM_DEFAULT_RXD;
639 	} else
640 		adapter->num_rx_desc = em_rxd;
641 
642 	hw->mac.autoneg = DO_AUTO_NEG;
643 	hw->phy.autoneg_wait_to_complete = FALSE;
644 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
645 
646 	/* Copper options */
647 	if (hw->phy.media_type == e1000_media_type_copper) {
648 		hw->phy.mdix = AUTO_ALL_MODES;
649 		hw->phy.disable_polarity_correction = FALSE;
650 		hw->phy.ms_type = EM_MASTER_SLAVE;
651 	}
652 
653 	/*
654 	 * Set the frame limits assuming
655 	 * standard ethernet sized frames.
656 	 */
657 	adapter->hw.mac.max_frame_size =
658 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
659 
660 	/*
661 	 * This controls when hardware reports transmit completion
662 	 * status.
663 	 */
664 	hw->mac.report_tx_early = 1;
665 
666 	/*
667 	** Get queue/ring memory
668 	*/
669 	if (em_allocate_queues(adapter)) {
670 		error = ENOMEM;
671 		goto err_pci;
672 	}
673 
674 	/* Allocate multicast array memory. */
675 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
676 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
677 	if (adapter->mta == NULL) {
678 		device_printf(dev, "Can not allocate multicast setup array\n");
679 		error = ENOMEM;
680 		goto err_late;
681 	}
682 
683 	/* Check SOL/IDER usage */
684 	if (e1000_check_reset_block(hw))
685 		device_printf(dev, "PHY reset is blocked"
686 		    " due to SOL/IDER session.\n");
687 
688 	/* Sysctl for setting Energy Efficient Ethernet */
689 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
690 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
691 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
692 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
693 	    adapter, 0, em_sysctl_eee, "I",
694 	    "Disable Energy Efficient Ethernet");
695 
696 	/*
697 	** Start from a known state, this is
698 	** important in reading the nvm and
699 	** mac from that.
700 	*/
701 	e1000_reset_hw(hw);
702 
703 
704 	/* Make sure we have a good EEPROM before we read from it */
705 	if (e1000_validate_nvm_checksum(hw) < 0) {
706 		/*
707 		** Some PCI-E parts fail the first check due to
708 		** the link being in sleep state, call it again,
709 		** if it fails a second time its a real issue.
710 		*/
711 		if (e1000_validate_nvm_checksum(hw) < 0) {
712 			device_printf(dev,
713 			    "The EEPROM Checksum Is Not Valid\n");
714 			error = EIO;
715 			goto err_late;
716 		}
717 	}
718 
719 	/* Copy the permanent MAC address out of the EEPROM */
720 	if (e1000_read_mac_addr(hw) < 0) {
721 		device_printf(dev, "EEPROM read error while reading MAC"
722 		    " address\n");
723 		error = EIO;
724 		goto err_late;
725 	}
726 
727 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
728 		device_printf(dev, "Invalid MAC address\n");
729 		error = EIO;
730 		goto err_late;
731 	}
732 
733 	/* Disable ULP support */
734 	e1000_disable_ulp_lpt_lp(hw, TRUE);
735 
736 	/*
737 	**  Do interrupt configuration
738 	*/
739 	if (adapter->msix > 1) /* Do MSIX */
740 		error = em_allocate_msix(adapter);
741 	else  /* MSI or Legacy */
742 		error = em_allocate_legacy(adapter);
743 	if (error)
744 		goto err_late;
745 
746 	/*
747 	 * Get Wake-on-Lan and Management info for later use
748 	 */
749 	em_get_wakeup(dev);
750 
751 	/* Setup OS specific network interface */
752 	if (em_setup_interface(dev, adapter) != 0)
753 		goto err_late;
754 
755 	em_reset(adapter);
756 
757 	/* Initialize statistics */
758 	em_update_stats_counters(adapter);
759 
760 	hw->mac.get_link_status = 1;
761 	em_update_link_status(adapter);
762 
763 	/* Register for VLAN events */
764 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
765 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
766 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
767 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
768 
769 	em_add_hw_stats(adapter);
770 
771 	/* Non-AMT based hardware can now take control from firmware */
772 	if (adapter->has_manage && !adapter->has_amt)
773 		em_get_hw_control(adapter);
774 
775 	/* Tell the stack that the interface is not active */
776 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
777 
778 	adapter->led_dev = led_create(em_led_func, adapter,
779 	    device_get_nameunit(dev));
780 #ifdef DEV_NETMAP
781 	em_netmap_attach(adapter);
782 #endif /* DEV_NETMAP */
783 
784 	INIT_DEBUGOUT("em_attach: end");
785 
786 	return (0);
787 
788 err_late:
789 	em_free_transmit_structures(adapter);
790 	em_free_receive_structures(adapter);
791 	em_release_hw_control(adapter);
792 	if (adapter->ifp != (void *)NULL)
793 		if_free(adapter->ifp);
794 err_pci:
795 	em_free_pci_resources(adapter);
796 	free(adapter->mta, M_DEVBUF);
797 	EM_CORE_LOCK_DESTROY(adapter);
798 
799 	return (error);
800 }
801 
802 /*********************************************************************
803  *  Device removal routine
804  *
805  *  The detach entry point is called when the driver is being removed.
806  *  This routine stops the adapter and deallocates all the resources
807  *  that were allocated for driver operation.
808  *
809  *  return 0 on success, positive on failure
810  *********************************************************************/
811 
812 static int
813 em_detach(device_t dev)
814 {
815 	struct adapter	*adapter = device_get_softc(dev);
816 	if_t ifp = adapter->ifp;
817 
818 	INIT_DEBUGOUT("em_detach: begin");
819 
820 	/* Make sure VLANS are not using driver */
821 	if (if_vlantrunkinuse(ifp)) {
822 		device_printf(dev,"Vlan in use, detach first\n");
823 		return (EBUSY);
824 	}
825 
826 #ifdef DEVICE_POLLING
827 	if (if_getcapenable(ifp) & IFCAP_POLLING)
828 		ether_poll_deregister(ifp);
829 #endif
830 
831 	if (adapter->led_dev != NULL)
832 		led_destroy(adapter->led_dev);
833 
834 	EM_CORE_LOCK(adapter);
835 	adapter->in_detach = 1;
836 	em_stop(adapter);
837 	EM_CORE_UNLOCK(adapter);
838 	EM_CORE_LOCK_DESTROY(adapter);
839 
840 	e1000_phy_hw_reset(&adapter->hw);
841 
842 	em_release_manageability(adapter);
843 	em_release_hw_control(adapter);
844 
845 	/* Unregister VLAN events */
846 	if (adapter->vlan_attach != NULL)
847 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
848 	if (adapter->vlan_detach != NULL)
849 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
850 
851 	ether_ifdetach(adapter->ifp);
852 	callout_drain(&adapter->timer);
853 
854 #ifdef DEV_NETMAP
855 	netmap_detach(ifp);
856 #endif /* DEV_NETMAP */
857 
858 	em_free_pci_resources(adapter);
859 	bus_generic_detach(dev);
860 	if_free(ifp);
861 
862 	em_free_transmit_structures(adapter);
863 	em_free_receive_structures(adapter);
864 
865 	em_release_hw_control(adapter);
866 	free(adapter->mta, M_DEVBUF);
867 
868 	return (0);
869 }
870 
871 /*********************************************************************
872  *
873  *  Shutdown entry point
874  *
875  **********************************************************************/
876 
877 static int
878 em_shutdown(device_t dev)
879 {
880 	return em_suspend(dev);
881 }
882 
883 /*
884  * Suspend/resume device methods.
885  */
886 static int
887 em_suspend(device_t dev)
888 {
889 	struct adapter *adapter = device_get_softc(dev);
890 
891 	EM_CORE_LOCK(adapter);
892 
893         em_release_manageability(adapter);
894 	em_release_hw_control(adapter);
895 	em_enable_wakeup(dev);
896 
897 	EM_CORE_UNLOCK(adapter);
898 
899 	return bus_generic_suspend(dev);
900 }
901 
902 static int
903 em_resume(device_t dev)
904 {
905 	struct adapter *adapter = device_get_softc(dev);
906 	struct tx_ring	*txr = adapter->tx_rings;
907 	if_t ifp = adapter->ifp;
908 
909 	EM_CORE_LOCK(adapter);
910 	if (adapter->hw.mac.type == e1000_pch2lan)
911 		e1000_resume_workarounds_pchlan(&adapter->hw);
912 	em_init_locked(adapter);
913 	em_init_manageability(adapter);
914 
915 	if ((if_getflags(ifp) & IFF_UP) &&
916 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
917 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
918 			EM_TX_LOCK(txr);
919 #ifdef EM_MULTIQUEUE
920 			if (!drbr_empty(ifp, txr->br))
921 				em_mq_start_locked(ifp, txr);
922 #else
923 			if (!if_sendq_empty(ifp))
924 				em_start_locked(ifp, txr);
925 #endif
926 			EM_TX_UNLOCK(txr);
927 		}
928 	}
929 	EM_CORE_UNLOCK(adapter);
930 
931 	return bus_generic_resume(dev);
932 }
933 
934 
935 #ifndef EM_MULTIQUEUE
936 static void
937 em_start_locked(if_t ifp, struct tx_ring *txr)
938 {
939 	struct adapter	*adapter = if_getsoftc(ifp);
940 	struct mbuf	*m_head;
941 
942 	EM_TX_LOCK_ASSERT(txr);
943 
944 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
945 	    IFF_DRV_RUNNING)
946 		return;
947 
948 	if (!adapter->link_active)
949 		return;
950 
951 	while (!if_sendq_empty(ifp)) {
952         	/* Call cleanup if number of TX descriptors low */
953 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
954 			em_txeof(txr);
955 		if (txr->tx_avail < EM_MAX_SCATTER) {
956 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
957 			break;
958 		}
959 		m_head = if_dequeue(ifp);
960 		if (m_head == NULL)
961 			break;
962 		/*
963 		 *  Encapsulation can modify our pointer, and or make it
964 		 *  NULL on failure.  In that event, we can't requeue.
965 		 */
966 		if (em_xmit(txr, &m_head)) {
967 			if (m_head == NULL)
968 				break;
969 			if_sendq_prepend(ifp, m_head);
970 			break;
971 		}
972 
973 		/* Mark the queue as having work */
974 		if (txr->busy == EM_TX_IDLE)
975 			txr->busy = EM_TX_BUSY;
976 
977 		/* Send a copy of the frame to the BPF listener */
978 		ETHER_BPF_MTAP(ifp, m_head);
979 
980 	}
981 
982 	return;
983 }
984 
985 static void
986 em_start(if_t ifp)
987 {
988 	struct adapter	*adapter = if_getsoftc(ifp);
989 	struct tx_ring	*txr = adapter->tx_rings;
990 
991 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
992 		EM_TX_LOCK(txr);
993 		em_start_locked(ifp, txr);
994 		EM_TX_UNLOCK(txr);
995 	}
996 	return;
997 }
998 #else /* EM_MULTIQUEUE */
999 /*********************************************************************
1000  *  Multiqueue Transmit routines
1001  *
1002  *  em_mq_start is called by the stack to initiate a transmit.
1003  *  however, if busy the driver can queue the request rather
1004  *  than do an immediate send. It is this that is an advantage
1005  *  in this driver, rather than also having multiple tx queues.
1006  **********************************************************************/
1007 /*
1008 ** Multiqueue capable stack interface
1009 */
1010 static int
1011 em_mq_start(if_t ifp, struct mbuf *m)
1012 {
1013 	struct adapter	*adapter = if_getsoftc(ifp);
1014 	struct tx_ring	*txr = adapter->tx_rings;
1015 	unsigned int	i, error;
1016 
1017 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1018 		i = m->m_pkthdr.flowid % adapter->num_queues;
1019 	else
1020 		i = curcpu % adapter->num_queues;
1021 
1022 	txr = &adapter->tx_rings[i];
1023 
1024 	error = drbr_enqueue(ifp, txr->br, m);
1025 	if (error)
1026 		return (error);
1027 
1028 	if (EM_TX_TRYLOCK(txr)) {
1029 		em_mq_start_locked(ifp, txr);
1030 		EM_TX_UNLOCK(txr);
1031 	} else
1032 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1033 
1034 	return (0);
1035 }
1036 
1037 static int
1038 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1039 {
1040 	struct adapter  *adapter = txr->adapter;
1041         struct mbuf     *next;
1042         int             err = 0, enq = 0;
1043 
1044 	EM_TX_LOCK_ASSERT(txr);
1045 
1046 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1047 	    adapter->link_active == 0) {
1048 		return (ENETDOWN);
1049 	}
1050 
1051 	/* Process the queue */
1052 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1053 		if ((err = em_xmit(txr, &next)) != 0) {
1054 			if (next == NULL) {
1055 				/* It was freed, move forward */
1056 				drbr_advance(ifp, txr->br);
1057 			} else {
1058 				/*
1059 				 * Still have one left, it may not be
1060 				 * the same since the transmit function
1061 				 * may have changed it.
1062 				 */
1063 				drbr_putback(ifp, txr->br, next);
1064 			}
1065 			break;
1066 		}
1067 		drbr_advance(ifp, txr->br);
1068 		enq++;
1069 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1070 		if (next->m_flags & M_MCAST)
1071 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1072 		ETHER_BPF_MTAP(ifp, next);
1073 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1074                         break;
1075 	}
1076 
1077 	/* Mark the queue as having work */
1078 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1079 		txr->busy = EM_TX_BUSY;
1080 
1081 	if (txr->tx_avail < EM_MAX_SCATTER)
1082 		em_txeof(txr);
1083 	if (txr->tx_avail < EM_MAX_SCATTER) {
1084 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1085 	}
1086 	return (err);
1087 }
1088 
1089 /*
1090 ** Flush all ring buffers
1091 */
1092 static void
1093 em_qflush(if_t ifp)
1094 {
1095 	struct adapter  *adapter = if_getsoftc(ifp);
1096 	struct tx_ring  *txr = adapter->tx_rings;
1097 	struct mbuf     *m;
1098 
1099 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1100 		EM_TX_LOCK(txr);
1101 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1102 			m_freem(m);
1103 		EM_TX_UNLOCK(txr);
1104 	}
1105 	if_qflush(ifp);
1106 }
1107 #endif /* EM_MULTIQUEUE */
1108 
1109 /*********************************************************************
1110  *  Ioctl entry point
1111  *
1112  *  em_ioctl is called when the user wants to configure the
1113  *  interface.
1114  *
1115  *  return 0 on success, positive on failure
1116  **********************************************************************/
1117 
1118 static int
1119 em_ioctl(if_t ifp, u_long command, caddr_t data)
1120 {
1121 	struct adapter	*adapter = if_getsoftc(ifp);
1122 	struct ifreq	*ifr = (struct ifreq *)data;
1123 #if defined(INET) || defined(INET6)
1124 	struct ifaddr	*ifa = (struct ifaddr *)data;
1125 #endif
1126 	bool		avoid_reset = FALSE;
1127 	int		error = 0;
1128 
1129 	if (adapter->in_detach)
1130 		return (error);
1131 
1132 	switch (command) {
1133 	case SIOCSIFADDR:
1134 #ifdef INET
1135 		if (ifa->ifa_addr->sa_family == AF_INET)
1136 			avoid_reset = TRUE;
1137 #endif
1138 #ifdef INET6
1139 		if (ifa->ifa_addr->sa_family == AF_INET6)
1140 			avoid_reset = TRUE;
1141 #endif
1142 		/*
1143 		** Calling init results in link renegotiation,
1144 		** so we avoid doing it when possible.
1145 		*/
1146 		if (avoid_reset) {
1147 			if_setflagbits(ifp,IFF_UP,0);
1148 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1149 				em_init(adapter);
1150 #ifdef INET
1151 			if (!(if_getflags(ifp) & IFF_NOARP))
1152 				arp_ifinit(ifp, ifa);
1153 #endif
1154 		} else
1155 			error = ether_ioctl(ifp, command, data);
1156 		break;
1157 	case SIOCSIFMTU:
1158 	    {
1159 		int max_frame_size;
1160 
1161 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1162 
1163 		EM_CORE_LOCK(adapter);
1164 		switch (adapter->hw.mac.type) {
1165 		case e1000_82571:
1166 		case e1000_82572:
1167 		case e1000_ich9lan:
1168 		case e1000_ich10lan:
1169 		case e1000_pch2lan:
1170 		case e1000_pch_lpt:
1171 		case e1000_82574:
1172 		case e1000_82583:
1173 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1174 			max_frame_size = 9234;
1175 			break;
1176 		case e1000_pchlan:
1177 			max_frame_size = 4096;
1178 			break;
1179 			/* Adapters that do not support jumbo frames */
1180 		case e1000_ich8lan:
1181 			max_frame_size = ETHER_MAX_LEN;
1182 			break;
1183 		default:
1184 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1185 		}
1186 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1187 		    ETHER_CRC_LEN) {
1188 			EM_CORE_UNLOCK(adapter);
1189 			error = EINVAL;
1190 			break;
1191 		}
1192 
1193 		if_setmtu(ifp, ifr->ifr_mtu);
1194 		adapter->hw.mac.max_frame_size =
1195 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1196 		em_init_locked(adapter);
1197 		EM_CORE_UNLOCK(adapter);
1198 		break;
1199 	    }
1200 	case SIOCSIFFLAGS:
1201 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1202 		    SIOCSIFFLAGS (Set Interface Flags)");
1203 		EM_CORE_LOCK(adapter);
1204 		if (if_getflags(ifp) & IFF_UP) {
1205 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1206 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1207 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1208 					em_disable_promisc(adapter);
1209 					em_set_promisc(adapter);
1210 				}
1211 			} else
1212 				em_init_locked(adapter);
1213 		} else
1214 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1215 				em_stop(adapter);
1216 		adapter->if_flags = if_getflags(ifp);
1217 		EM_CORE_UNLOCK(adapter);
1218 		break;
1219 	case SIOCADDMULTI:
1220 	case SIOCDELMULTI:
1221 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1222 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1223 			EM_CORE_LOCK(adapter);
1224 			em_disable_intr(adapter);
1225 			em_set_multi(adapter);
1226 #ifdef DEVICE_POLLING
1227 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1228 #endif
1229 				em_enable_intr(adapter);
1230 			EM_CORE_UNLOCK(adapter);
1231 		}
1232 		break;
1233 	case SIOCSIFMEDIA:
1234 		/* Check SOL/IDER usage */
1235 		EM_CORE_LOCK(adapter);
1236 		if (e1000_check_reset_block(&adapter->hw)) {
1237 			EM_CORE_UNLOCK(adapter);
1238 			device_printf(adapter->dev, "Media change is"
1239 			    " blocked due to SOL/IDER session.\n");
1240 			break;
1241 		}
1242 		EM_CORE_UNLOCK(adapter);
1243 		/* falls thru */
1244 	case SIOCGIFMEDIA:
1245 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1246 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1247 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1248 		break;
1249 	case SIOCSIFCAP:
1250 	    {
1251 		int mask, reinit;
1252 
1253 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1254 		reinit = 0;
1255 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1256 #ifdef DEVICE_POLLING
1257 		if (mask & IFCAP_POLLING) {
1258 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1259 				error = ether_poll_register(em_poll, ifp);
1260 				if (error)
1261 					return (error);
1262 				EM_CORE_LOCK(adapter);
1263 				em_disable_intr(adapter);
1264 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1265 				EM_CORE_UNLOCK(adapter);
1266 			} else {
1267 				error = ether_poll_deregister(ifp);
1268 				/* Enable interrupt even in error case */
1269 				EM_CORE_LOCK(adapter);
1270 				em_enable_intr(adapter);
1271 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1272 				EM_CORE_UNLOCK(adapter);
1273 			}
1274 		}
1275 #endif
1276 		if (mask & IFCAP_HWCSUM) {
1277 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1278 			reinit = 1;
1279 		}
1280 		if (mask & IFCAP_TSO4) {
1281 			if_togglecapenable(ifp,IFCAP_TSO4);
1282 			reinit = 1;
1283 		}
1284 		if (mask & IFCAP_VLAN_HWTAGGING) {
1285 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1286 			reinit = 1;
1287 		}
1288 		if (mask & IFCAP_VLAN_HWFILTER) {
1289 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1290 			reinit = 1;
1291 		}
1292 		if (mask & IFCAP_VLAN_HWTSO) {
1293 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1294 			reinit = 1;
1295 		}
1296 		if ((mask & IFCAP_WOL) &&
1297 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1298 			if (mask & IFCAP_WOL_MCAST)
1299 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1300 			if (mask & IFCAP_WOL_MAGIC)
1301 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1302 		}
1303 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1304 			em_init(adapter);
1305 		if_vlancap(ifp);
1306 		break;
1307 	    }
1308 
1309 	default:
1310 		error = ether_ioctl(ifp, command, data);
1311 		break;
1312 	}
1313 
1314 	return (error);
1315 }
1316 
1317 
1318 /*********************************************************************
1319  *  Init entry point
1320  *
1321  *  This routine is used in two ways. It is used by the stack as
1322  *  init entry point in network interface structure. It is also used
1323  *  by the driver as a hw/sw initialization routine to get to a
1324  *  consistent state.
1325  *
1326  *  return 0 on success, positive on failure
1327  **********************************************************************/
1328 
1329 static void
1330 em_init_locked(struct adapter *adapter)
1331 {
1332 	if_t ifp = adapter->ifp;
1333 	device_t	dev = adapter->dev;
1334 
1335 	INIT_DEBUGOUT("em_init: begin");
1336 
1337 	EM_CORE_LOCK_ASSERT(adapter);
1338 
1339 	em_disable_intr(adapter);
1340 	callout_stop(&adapter->timer);
1341 
1342 	/* Get the latest mac address, User can use a LAA */
1343         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1344               ETHER_ADDR_LEN);
1345 
1346 	/* Put the address into the Receive Address Array */
1347 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1348 
1349 	/*
1350 	 * With the 82571 adapter, RAR[0] may be overwritten
1351 	 * when the other port is reset, we make a duplicate
1352 	 * in RAR[14] for that eventuality, this assures
1353 	 * the interface continues to function.
1354 	 */
1355 	if (adapter->hw.mac.type == e1000_82571) {
1356 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1357 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1358 		    E1000_RAR_ENTRIES - 1);
1359 	}
1360 
1361 	/* Initialize the hardware */
1362 	em_reset(adapter);
1363 	em_update_link_status(adapter);
1364 
1365 	/* Setup VLAN support, basic and offload if available */
1366 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1367 
1368 	/* Set hardware offload abilities */
1369 	if_clearhwassist(ifp);
1370 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1371 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1372 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1373 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1374 
1375 	/* Configure for OS presence */
1376 	em_init_manageability(adapter);
1377 
1378 	/* Prepare transmit descriptors and buffers */
1379 	em_setup_transmit_structures(adapter);
1380 	em_initialize_transmit_unit(adapter);
1381 
1382 	/* Setup Multicast table */
1383 	em_set_multi(adapter);
1384 
1385 	/*
1386 	** Figure out the desired mbuf
1387 	** pool for doing jumbos
1388 	*/
1389 	if (adapter->hw.mac.max_frame_size <= 2048)
1390 		adapter->rx_mbuf_sz = MCLBYTES;
1391 	else if (adapter->hw.mac.max_frame_size <= 4096)
1392 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1393 	else
1394 		adapter->rx_mbuf_sz = MJUM9BYTES;
1395 
1396 	/* Prepare receive descriptors and buffers */
1397 	if (em_setup_receive_structures(adapter)) {
1398 		device_printf(dev, "Could not setup receive structures\n");
1399 		em_stop(adapter);
1400 		return;
1401 	}
1402 	em_initialize_receive_unit(adapter);
1403 
1404 	/* Use real VLAN Filter support? */
1405 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1406 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1407 			/* Use real VLAN Filter support */
1408 			em_setup_vlan_hw_support(adapter);
1409 		else {
1410 			u32 ctrl;
1411 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1412 			ctrl |= E1000_CTRL_VME;
1413 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1414 		}
1415 	}
1416 
1417 	/* Don't lose promiscuous settings */
1418 	em_set_promisc(adapter);
1419 
1420 	/* Set the interface as ACTIVE */
1421 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1422 
1423 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1424 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1425 
1426 	/* MSI/X configuration for 82574 */
1427 	if (adapter->hw.mac.type == e1000_82574) {
1428 		int tmp;
1429 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1430 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1431 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1432 		/* Set the IVAR - interrupt vector routing. */
1433 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1434 	}
1435 
1436 #ifdef DEVICE_POLLING
1437 	/*
1438 	 * Only enable interrupts if we are not polling, make sure
1439 	 * they are off otherwise.
1440 	 */
1441 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1442 		em_disable_intr(adapter);
1443 	else
1444 #endif /* DEVICE_POLLING */
1445 		em_enable_intr(adapter);
1446 
1447 	/* AMT based hardware can now take control from firmware */
1448 	if (adapter->has_manage && adapter->has_amt)
1449 		em_get_hw_control(adapter);
1450 }
1451 
1452 static void
1453 em_init(void *arg)
1454 {
1455 	struct adapter *adapter = arg;
1456 
1457 	EM_CORE_LOCK(adapter);
1458 	em_init_locked(adapter);
1459 	EM_CORE_UNLOCK(adapter);
1460 }
1461 
1462 
1463 #ifdef DEVICE_POLLING
1464 /*********************************************************************
1465  *
1466  *  Legacy polling routine: note this only works with single queue
1467  *
1468  *********************************************************************/
1469 static int
1470 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1471 {
1472 	struct adapter *adapter = if_getsoftc(ifp);
1473 	struct tx_ring	*txr = adapter->tx_rings;
1474 	struct rx_ring	*rxr = adapter->rx_rings;
1475 	u32		reg_icr;
1476 	int		rx_done;
1477 
1478 	EM_CORE_LOCK(adapter);
1479 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1480 		EM_CORE_UNLOCK(adapter);
1481 		return (0);
1482 	}
1483 
1484 	if (cmd == POLL_AND_CHECK_STATUS) {
1485 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1486 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1487 			callout_stop(&adapter->timer);
1488 			adapter->hw.mac.get_link_status = 1;
1489 			em_update_link_status(adapter);
1490 			callout_reset(&adapter->timer, hz,
1491 			    em_local_timer, adapter);
1492 		}
1493 	}
1494 	EM_CORE_UNLOCK(adapter);
1495 
1496 	em_rxeof(rxr, count, &rx_done);
1497 
1498 	EM_TX_LOCK(txr);
1499 	em_txeof(txr);
1500 #ifdef EM_MULTIQUEUE
1501 	if (!drbr_empty(ifp, txr->br))
1502 		em_mq_start_locked(ifp, txr);
1503 #else
1504 	if (!if_sendq_empty(ifp))
1505 		em_start_locked(ifp, txr);
1506 #endif
1507 	EM_TX_UNLOCK(txr);
1508 
1509 	return (rx_done);
1510 }
1511 #endif /* DEVICE_POLLING */
1512 
1513 
1514 /*********************************************************************
1515  *
1516  *  Fast Legacy/MSI Combined Interrupt Service routine
1517  *
1518  *********************************************************************/
1519 static int
1520 em_irq_fast(void *arg)
1521 {
1522 	struct adapter	*adapter = arg;
1523 	if_t ifp;
1524 	u32		reg_icr;
1525 
1526 	ifp = adapter->ifp;
1527 
1528 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1529 
1530 	/* Hot eject?  */
1531 	if (reg_icr == 0xffffffff)
1532 		return FILTER_STRAY;
1533 
1534 	/* Definitely not our interrupt.  */
1535 	if (reg_icr == 0x0)
1536 		return FILTER_STRAY;
1537 
1538 	/*
1539 	 * Starting with the 82571 chip, bit 31 should be used to
1540 	 * determine whether the interrupt belongs to us.
1541 	 */
1542 	if (adapter->hw.mac.type >= e1000_82571 &&
1543 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1544 		return FILTER_STRAY;
1545 
1546 	em_disable_intr(adapter);
1547 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1548 
1549 	/* Link status change */
1550 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1551 		adapter->hw.mac.get_link_status = 1;
1552 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1553 	}
1554 
1555 	if (reg_icr & E1000_ICR_RXO)
1556 		adapter->rx_overruns++;
1557 	return FILTER_HANDLED;
1558 }
1559 
1560 /* Combined RX/TX handler, used by Legacy and MSI */
1561 static void
1562 em_handle_que(void *context, int pending)
1563 {
1564 	struct adapter	*adapter = context;
1565 	if_t ifp = adapter->ifp;
1566 	struct tx_ring	*txr = adapter->tx_rings;
1567 	struct rx_ring	*rxr = adapter->rx_rings;
1568 
1569 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1570 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1571 
1572 		EM_TX_LOCK(txr);
1573 		em_txeof(txr);
1574 #ifdef EM_MULTIQUEUE
1575 		if (!drbr_empty(ifp, txr->br))
1576 			em_mq_start_locked(ifp, txr);
1577 #else
1578 		if (!if_sendq_empty(ifp))
1579 			em_start_locked(ifp, txr);
1580 #endif
1581 		EM_TX_UNLOCK(txr);
1582 		if (more) {
1583 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1584 			return;
1585 		}
1586 	}
1587 
1588 	em_enable_intr(adapter);
1589 	return;
1590 }
1591 
1592 
1593 /*********************************************************************
1594  *
1595  *  MSIX Interrupt Service Routines
1596  *
1597  **********************************************************************/
1598 static void
1599 em_msix_tx(void *arg)
1600 {
1601 	struct tx_ring *txr = arg;
1602 	struct adapter *adapter = txr->adapter;
1603 	if_t ifp = adapter->ifp;
1604 
1605 	++txr->tx_irq;
1606 	EM_TX_LOCK(txr);
1607 	em_txeof(txr);
1608 #ifdef EM_MULTIQUEUE
1609 	if (!drbr_empty(ifp, txr->br))
1610 		em_mq_start_locked(ifp, txr);
1611 #else
1612 	if (!if_sendq_empty(ifp))
1613 		em_start_locked(ifp, txr);
1614 #endif
1615 
1616 	/* Reenable this interrupt */
1617 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1618 	EM_TX_UNLOCK(txr);
1619 	return;
1620 }
1621 
1622 /*********************************************************************
1623  *
1624  *  MSIX RX Interrupt Service routine
1625  *
1626  **********************************************************************/
1627 
1628 static void
1629 em_msix_rx(void *arg)
1630 {
1631 	struct rx_ring	*rxr = arg;
1632 	struct adapter	*adapter = rxr->adapter;
1633 	bool		more;
1634 
1635 	++rxr->rx_irq;
1636 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1637 		return;
1638 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1639 	if (more)
1640 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1641 	else {
1642 		/* Reenable this interrupt */
1643 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1644 	}
1645 	return;
1646 }
1647 
1648 /*********************************************************************
1649  *
1650  *  MSIX Link Fast Interrupt Service routine
1651  *
1652  **********************************************************************/
1653 static void
1654 em_msix_link(void *arg)
1655 {
1656 	struct adapter	*adapter = arg;
1657 	u32		reg_icr;
1658 
1659 	++adapter->link_irq;
1660 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1661 
1662 	if (reg_icr & E1000_ICR_RXO)
1663 		adapter->rx_overruns++;
1664 
1665 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1666 		adapter->hw.mac.get_link_status = 1;
1667 		em_handle_link(adapter, 0);
1668 	} else
1669 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1670 		    EM_MSIX_LINK | E1000_IMS_LSC);
1671 	/*
1672  	** Because we must read the ICR for this interrupt
1673  	** it may clear other causes using autoclear, for
1674  	** this reason we simply create a soft interrupt
1675  	** for all these vectors.
1676  	*/
1677 	if (reg_icr) {
1678 		E1000_WRITE_REG(&adapter->hw,
1679 			E1000_ICS, adapter->ims);
1680 	}
1681 	return;
1682 }
1683 
1684 static void
1685 em_handle_rx(void *context, int pending)
1686 {
1687 	struct rx_ring	*rxr = context;
1688 	struct adapter	*adapter = rxr->adapter;
1689         bool            more;
1690 
1691 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1692 	if (more)
1693 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1694 	else {
1695 		/* Reenable this interrupt */
1696 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1697 	}
1698 }
1699 
1700 static void
1701 em_handle_tx(void *context, int pending)
1702 {
1703 	struct tx_ring	*txr = context;
1704 	struct adapter	*adapter = txr->adapter;
1705 	if_t ifp = adapter->ifp;
1706 
1707 	EM_TX_LOCK(txr);
1708 	em_txeof(txr);
1709 #ifdef EM_MULTIQUEUE
1710 	if (!drbr_empty(ifp, txr->br))
1711 		em_mq_start_locked(ifp, txr);
1712 #else
1713 	if (!if_sendq_empty(ifp))
1714 		em_start_locked(ifp, txr);
1715 #endif
1716 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1717 	EM_TX_UNLOCK(txr);
1718 }
1719 
1720 static void
1721 em_handle_link(void *context, int pending)
1722 {
1723 	struct adapter	*adapter = context;
1724 	struct tx_ring	*txr = adapter->tx_rings;
1725 	if_t ifp = adapter->ifp;
1726 
1727 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1728 		return;
1729 
1730 	EM_CORE_LOCK(adapter);
1731 	callout_stop(&adapter->timer);
1732 	em_update_link_status(adapter);
1733 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1734 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1735 	    EM_MSIX_LINK | E1000_IMS_LSC);
1736 	if (adapter->link_active) {
1737 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1738 			EM_TX_LOCK(txr);
1739 #ifdef EM_MULTIQUEUE
1740 			if (!drbr_empty(ifp, txr->br))
1741 				em_mq_start_locked(ifp, txr);
1742 #else
1743 			if (if_sendq_empty(ifp))
1744 				em_start_locked(ifp, txr);
1745 #endif
1746 			EM_TX_UNLOCK(txr);
1747 		}
1748 	}
1749 	EM_CORE_UNLOCK(adapter);
1750 }
1751 
1752 
1753 /*********************************************************************
1754  *
1755  *  Media Ioctl callback
1756  *
1757  *  This routine is called whenever the user queries the status of
1758  *  the interface using ifconfig.
1759  *
1760  **********************************************************************/
1761 static void
1762 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1763 {
1764 	struct adapter *adapter = if_getsoftc(ifp);
1765 	u_char fiber_type = IFM_1000_SX;
1766 
1767 	INIT_DEBUGOUT("em_media_status: begin");
1768 
1769 	EM_CORE_LOCK(adapter);
1770 	em_update_link_status(adapter);
1771 
1772 	ifmr->ifm_status = IFM_AVALID;
1773 	ifmr->ifm_active = IFM_ETHER;
1774 
1775 	if (!adapter->link_active) {
1776 		EM_CORE_UNLOCK(adapter);
1777 		return;
1778 	}
1779 
1780 	ifmr->ifm_status |= IFM_ACTIVE;
1781 
1782 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1783 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1784 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1785 	} else {
1786 		switch (adapter->link_speed) {
1787 		case 10:
1788 			ifmr->ifm_active |= IFM_10_T;
1789 			break;
1790 		case 100:
1791 			ifmr->ifm_active |= IFM_100_TX;
1792 			break;
1793 		case 1000:
1794 			ifmr->ifm_active |= IFM_1000_T;
1795 			break;
1796 		}
1797 		if (adapter->link_duplex == FULL_DUPLEX)
1798 			ifmr->ifm_active |= IFM_FDX;
1799 		else
1800 			ifmr->ifm_active |= IFM_HDX;
1801 	}
1802 	EM_CORE_UNLOCK(adapter);
1803 }
1804 
1805 /*********************************************************************
1806  *
1807  *  Media Ioctl callback
1808  *
1809  *  This routine is called when the user changes speed/duplex using
1810  *  media/mediopt option with ifconfig.
1811  *
1812  **********************************************************************/
1813 static int
1814 em_media_change(if_t ifp)
1815 {
1816 	struct adapter *adapter = if_getsoftc(ifp);
1817 	struct ifmedia  *ifm = &adapter->media;
1818 
1819 	INIT_DEBUGOUT("em_media_change: begin");
1820 
1821 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1822 		return (EINVAL);
1823 
1824 	EM_CORE_LOCK(adapter);
1825 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1826 	case IFM_AUTO:
1827 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1828 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1829 		break;
1830 	case IFM_1000_LX:
1831 	case IFM_1000_SX:
1832 	case IFM_1000_T:
1833 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1834 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1835 		break;
1836 	case IFM_100_TX:
1837 		adapter->hw.mac.autoneg = FALSE;
1838 		adapter->hw.phy.autoneg_advertised = 0;
1839 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1840 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1841 		else
1842 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1843 		break;
1844 	case IFM_10_T:
1845 		adapter->hw.mac.autoneg = FALSE;
1846 		adapter->hw.phy.autoneg_advertised = 0;
1847 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1848 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1849 		else
1850 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1851 		break;
1852 	default:
1853 		device_printf(adapter->dev, "Unsupported media type\n");
1854 	}
1855 
1856 	em_init_locked(adapter);
1857 	EM_CORE_UNLOCK(adapter);
1858 
1859 	return (0);
1860 }
1861 
1862 /*********************************************************************
1863  *
1864  *  This routine maps the mbufs to tx descriptors.
1865  *
1866  *  return 0 on success, positive on failure
1867  **********************************************************************/
1868 
1869 static int
1870 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1871 {
1872 	struct adapter		*adapter = txr->adapter;
1873 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1874 	bus_dmamap_t		map;
1875 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1876 	struct e1000_tx_desc	*ctxd = NULL;
1877 	struct mbuf		*m_head;
1878 	struct ether_header	*eh;
1879 	struct ip		*ip = NULL;
1880 	struct tcphdr		*tp = NULL;
1881 	u32			txd_upper = 0, txd_lower = 0;
1882 	int			ip_off, poff;
1883 	int			nsegs, i, j, first, last = 0;
1884 	int			error;
1885 	bool			do_tso, tso_desc, remap = TRUE;
1886 
1887 	m_head = *m_headp;
1888 	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1889 	tso_desc = FALSE;
1890 	ip_off = poff = 0;
1891 
1892 	/*
1893 	 * Intel recommends entire IP/TCP header length reside in a single
1894 	 * buffer. If multiple descriptors are used to describe the IP and
1895 	 * TCP header, each descriptor should describe one or more
1896 	 * complete headers; descriptors referencing only parts of headers
1897 	 * are not supported. If all layer headers are not coalesced into
1898 	 * a single buffer, each buffer should not cross a 4KB boundary,
1899 	 * or be larger than the maximum read request size.
1900 	 * Controller also requires modifing IP/TCP header to make TSO work
1901 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1902 	 * IP/TCP header into a single buffer to meet the requirement of
1903 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1904 	 * which also has similiar restrictions.
1905 	 */
1906 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1907 		if (do_tso || (m_head->m_next != NULL &&
1908 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1909 			if (M_WRITABLE(*m_headp) == 0) {
1910 				m_head = m_dup(*m_headp, M_NOWAIT);
1911 				m_freem(*m_headp);
1912 				if (m_head == NULL) {
1913 					*m_headp = NULL;
1914 					return (ENOBUFS);
1915 				}
1916 				*m_headp = m_head;
1917 			}
1918 		}
1919 		/*
1920 		 * XXX
1921 		 * Assume IPv4, we don't have TSO/checksum offload support
1922 		 * for IPv6 yet.
1923 		 */
1924 		ip_off = sizeof(struct ether_header);
1925 		if (m_head->m_len < ip_off) {
1926 			m_head = m_pullup(m_head, ip_off);
1927 			if (m_head == NULL) {
1928 				*m_headp = NULL;
1929 				return (ENOBUFS);
1930 			}
1931 		}
1932 		eh = mtod(m_head, struct ether_header *);
1933 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1934 			ip_off = sizeof(struct ether_vlan_header);
1935 			if (m_head->m_len < ip_off) {
1936 				m_head = m_pullup(m_head, ip_off);
1937 				if (m_head == NULL) {
1938 					*m_headp = NULL;
1939 					return (ENOBUFS);
1940 				}
1941 			}
1942 		}
1943 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1944 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1945 			if (m_head == NULL) {
1946 				*m_headp = NULL;
1947 				return (ENOBUFS);
1948 			}
1949 		}
1950 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1951 		poff = ip_off + (ip->ip_hl << 2);
1952 
1953 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1954 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1955 				m_head = m_pullup(m_head, poff +
1956 				    sizeof(struct tcphdr));
1957 				if (m_head == NULL) {
1958 					*m_headp = NULL;
1959 					return (ENOBUFS);
1960 				}
1961 			}
1962 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1963 			/*
1964 			 * TSO workaround:
1965 			 *   pull 4 more bytes of data into it.
1966 			 */
1967 			if (m_head->m_len < poff + (tp->th_off << 2)) {
1968 				m_head = m_pullup(m_head, poff +
1969 				                 (tp->th_off << 2) +
1970 				                 TSO_WORKAROUND);
1971 				if (m_head == NULL) {
1972 					*m_headp = NULL;
1973 					return (ENOBUFS);
1974 				}
1975 			}
1976 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1977 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1978 			if (do_tso) {
1979 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
1980 				                  (ip->ip_hl << 2) +
1981 				                  (tp->th_off << 2));
1982 				ip->ip_sum = 0;
1983 				/*
1984 				 * The pseudo TCP checksum does not include TCP
1985 				 * payload length so driver should recompute
1986 				 * the checksum here what hardware expect to
1987 				 * see. This is adherence of Microsoft's Large
1988 				 * Send specification.
1989 			 	*/
1990 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1991 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1992 			}
1993 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1994 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
1995 				m_head = m_pullup(m_head, poff +
1996 				    sizeof(struct udphdr));
1997 				if (m_head == NULL) {
1998 					*m_headp = NULL;
1999 					return (ENOBUFS);
2000 				}
2001 			}
2002 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2003 		}
2004 		*m_headp = m_head;
2005 	}
2006 
2007 	/*
2008 	 * Map the packet for DMA
2009 	 *
2010 	 * Capture the first descriptor index,
2011 	 * this descriptor will have the index
2012 	 * of the EOP which is the only one that
2013 	 * now gets a DONE bit writeback.
2014 	 */
2015 	first = txr->next_avail_desc;
2016 	tx_buffer = &txr->tx_buffers[first];
2017 	tx_buffer_mapped = tx_buffer;
2018 	map = tx_buffer->map;
2019 
2020 retry:
2021 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2022 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2023 
2024 	/*
2025 	 * There are two types of errors we can (try) to handle:
2026 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2027 	 *   out of segments.  Defragment the mbuf chain and try again.
2028 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2029 	 *   at this point in time.  Defer sending and try again later.
2030 	 * All other errors, in particular EINVAL, are fatal and prevent the
2031 	 * mbuf chain from ever going through.  Drop it and report error.
2032 	 */
2033 	if (error == EFBIG && remap) {
2034 		struct mbuf *m;
2035 
2036 		m = m_defrag(*m_headp, M_NOWAIT);
2037 		if (m == NULL) {
2038 			adapter->mbuf_alloc_failed++;
2039 			m_freem(*m_headp);
2040 			*m_headp = NULL;
2041 			return (ENOBUFS);
2042 		}
2043 		*m_headp = m;
2044 
2045 		/* Try it again, but only once */
2046 		remap = FALSE;
2047 		goto retry;
2048 	} else if (error != 0) {
2049 		adapter->no_tx_dma_setup++;
2050 		m_freem(*m_headp);
2051 		*m_headp = NULL;
2052 		return (error);
2053 	}
2054 
2055 	/*
2056 	 * TSO Hardware workaround, if this packet is not
2057 	 * TSO, and is only a single descriptor long, and
2058 	 * it follows a TSO burst, then we need to add a
2059 	 * sentinel descriptor to prevent premature writeback.
2060 	 */
2061 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2062 		if (nsegs == 1)
2063 			tso_desc = TRUE;
2064 		txr->tx_tso = FALSE;
2065 	}
2066 
2067         if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) {
2068                 txr->no_desc_avail++;
2069 		bus_dmamap_unload(txr->txtag, map);
2070 		return (ENOBUFS);
2071         }
2072 	m_head = *m_headp;
2073 
2074 	/* Do hardware assists */
2075 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2076 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2077 		    &txd_upper, &txd_lower);
2078 		/* we need to make a final sentinel transmit desc */
2079 		tso_desc = TRUE;
2080 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2081 		em_transmit_checksum_setup(txr, m_head,
2082 		    ip_off, ip, &txd_upper, &txd_lower);
2083 
2084 	if (m_head->m_flags & M_VLANTAG) {
2085 		/* Set the vlan id. */
2086 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2087                 /* Tell hardware to add tag */
2088                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2089         }
2090 
2091 	i = txr->next_avail_desc;
2092 
2093 	/* Set up our transmit descriptors */
2094 	for (j = 0; j < nsegs; j++) {
2095 		bus_size_t seg_len;
2096 		bus_addr_t seg_addr;
2097 
2098 		tx_buffer = &txr->tx_buffers[i];
2099 		ctxd = &txr->tx_base[i];
2100 		seg_addr = segs[j].ds_addr;
2101 		seg_len  = segs[j].ds_len;
2102 		/*
2103 		** TSO Workaround:
2104 		** If this is the last descriptor, we want to
2105 		** split it so we have a small final sentinel
2106 		*/
2107 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2108 			seg_len -= TSO_WORKAROUND;
2109 			ctxd->buffer_addr = htole64(seg_addr);
2110 			ctxd->lower.data = htole32(
2111 				adapter->txd_cmd | txd_lower | seg_len);
2112 			ctxd->upper.data = htole32(txd_upper);
2113 			if (++i == adapter->num_tx_desc)
2114 				i = 0;
2115 
2116 			/* Now make the sentinel */
2117 			txr->tx_avail--;
2118 			ctxd = &txr->tx_base[i];
2119 			tx_buffer = &txr->tx_buffers[i];
2120 			ctxd->buffer_addr =
2121 			    htole64(seg_addr + seg_len);
2122 			ctxd->lower.data = htole32(
2123 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2124 			ctxd->upper.data =
2125 			    htole32(txd_upper);
2126 			last = i;
2127 			if (++i == adapter->num_tx_desc)
2128 				i = 0;
2129 		} else {
2130 			ctxd->buffer_addr = htole64(seg_addr);
2131 			ctxd->lower.data = htole32(
2132 			adapter->txd_cmd | txd_lower | seg_len);
2133 			ctxd->upper.data = htole32(txd_upper);
2134 			last = i;
2135 			if (++i == adapter->num_tx_desc)
2136 				i = 0;
2137 		}
2138 		tx_buffer->m_head = NULL;
2139 		tx_buffer->next_eop = -1;
2140 	}
2141 
2142 	txr->next_avail_desc = i;
2143 	txr->tx_avail -= nsegs;
2144 
2145         tx_buffer->m_head = m_head;
2146 	/*
2147 	** Here we swap the map so the last descriptor,
2148 	** which gets the completion interrupt has the
2149 	** real map, and the first descriptor gets the
2150 	** unused map from this descriptor.
2151 	*/
2152 	tx_buffer_mapped->map = tx_buffer->map;
2153 	tx_buffer->map = map;
2154         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2155 
2156         /*
2157          * Last Descriptor of Packet
2158 	 * needs End Of Packet (EOP)
2159 	 * and Report Status (RS)
2160          */
2161         ctxd->lower.data |=
2162 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2163 	/*
2164 	 * Keep track in the first buffer which
2165 	 * descriptor will be written back
2166 	 */
2167 	tx_buffer = &txr->tx_buffers[first];
2168 	tx_buffer->next_eop = last;
2169 
2170 	/*
2171 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2172 	 * that this frame is available to transmit.
2173 	 */
2174 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2175 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2176 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2177 
2178 	return (0);
2179 }
2180 
2181 static void
2182 em_set_promisc(struct adapter *adapter)
2183 {
2184 	if_t ifp = adapter->ifp;
2185 	u32		reg_rctl;
2186 
2187 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2188 
2189 	if (if_getflags(ifp) & IFF_PROMISC) {
2190 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2191 		/* Turn this on if you want to see bad packets */
2192 		if (em_debug_sbp)
2193 			reg_rctl |= E1000_RCTL_SBP;
2194 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2195 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2196 		reg_rctl |= E1000_RCTL_MPE;
2197 		reg_rctl &= ~E1000_RCTL_UPE;
2198 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2199 	}
2200 }
2201 
2202 static void
2203 em_disable_promisc(struct adapter *adapter)
2204 {
2205 	if_t		ifp = adapter->ifp;
2206 	u32		reg_rctl;
2207 	int		mcnt = 0;
2208 
2209 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2210 	reg_rctl &=  (~E1000_RCTL_UPE);
2211 	if (if_getflags(ifp) & IFF_ALLMULTI)
2212 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2213 	else
2214 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2215 	/* Don't disable if in MAX groups */
2216 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2217 		reg_rctl &=  (~E1000_RCTL_MPE);
2218 	reg_rctl &=  (~E1000_RCTL_SBP);
2219 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2220 }
2221 
2222 
2223 /*********************************************************************
2224  *  Multicast Update
2225  *
2226  *  This routine is called whenever multicast address list is updated.
2227  *
2228  **********************************************************************/
2229 
2230 static void
2231 em_set_multi(struct adapter *adapter)
2232 {
2233 	if_t ifp = adapter->ifp;
2234 	u32 reg_rctl = 0;
2235 	u8  *mta; /* Multicast array memory */
2236 	int mcnt = 0;
2237 
2238 	IOCTL_DEBUGOUT("em_set_multi: begin");
2239 
2240 	mta = adapter->mta;
2241 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2242 
2243 	if (adapter->hw.mac.type == e1000_82542 &&
2244 	    adapter->hw.revision_id == E1000_REVISION_2) {
2245 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2246 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247 			e1000_pci_clear_mwi(&adapter->hw);
2248 		reg_rctl |= E1000_RCTL_RST;
2249 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2250 		msec_delay(5);
2251 	}
2252 
2253 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2254 
2255 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2256 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2257 		reg_rctl |= E1000_RCTL_MPE;
2258 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2259 	} else
2260 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2261 
2262 	if (adapter->hw.mac.type == e1000_82542 &&
2263 	    adapter->hw.revision_id == E1000_REVISION_2) {
2264 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2265 		reg_rctl &= ~E1000_RCTL_RST;
2266 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2267 		msec_delay(5);
2268 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2269 			e1000_pci_set_mwi(&adapter->hw);
2270 	}
2271 }
2272 
2273 
2274 /*********************************************************************
2275  *  Timer routine
2276  *
2277  *  This routine checks for link status and updates statistics.
2278  *
2279  **********************************************************************/
2280 
2281 static void
2282 em_local_timer(void *arg)
2283 {
2284 	struct adapter	*adapter = arg;
2285 	if_t ifp = adapter->ifp;
2286 	struct tx_ring	*txr = adapter->tx_rings;
2287 	struct rx_ring	*rxr = adapter->rx_rings;
2288 	u32		trigger = 0;
2289 
2290 	EM_CORE_LOCK_ASSERT(adapter);
2291 
2292 	em_update_link_status(adapter);
2293 	em_update_stats_counters(adapter);
2294 
2295 	/* Reset LAA into RAR[0] on 82571 */
2296 	if ((adapter->hw.mac.type == e1000_82571) &&
2297 	    e1000_get_laa_state_82571(&adapter->hw))
2298 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2299 
2300 	/* Mask to use in the irq trigger */
2301 	if (adapter->msix_mem) {
2302 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2303 			trigger |= rxr->ims;
2304 		rxr = adapter->rx_rings;
2305 	} else
2306 		trigger = E1000_ICS_RXDMT0;
2307 
2308 	/*
2309 	** Check on the state of the TX queue(s), this
2310 	** can be done without the lock because its RO
2311 	** and the HUNG state will be static if set.
2312 	*/
2313 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2314 		if (txr->busy == EM_TX_HUNG)
2315 			goto hung;
2316 		if (txr->busy >= EM_TX_MAXTRIES)
2317 			txr->busy = EM_TX_HUNG;
2318 		/* Schedule a TX tasklet if needed */
2319 		if (txr->tx_avail <= EM_MAX_SCATTER)
2320 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2321 	}
2322 
2323 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2324 #ifndef DEVICE_POLLING
2325 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2326 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2327 #endif
2328 	return;
2329 hung:
2330 	/* Looks like we're hung */
2331 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2332 			txr->me);
2333 	em_print_debug_info(adapter);
2334 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2335 	adapter->watchdog_events++;
2336 	em_init_locked(adapter);
2337 }
2338 
2339 
2340 static void
2341 em_update_link_status(struct adapter *adapter)
2342 {
2343 	struct e1000_hw *hw = &adapter->hw;
2344 	if_t ifp = adapter->ifp;
2345 	device_t dev = adapter->dev;
2346 	struct tx_ring *txr = adapter->tx_rings;
2347 	u32 link_check = 0;
2348 
2349 	/* Get the cached link value or read phy for real */
2350 	switch (hw->phy.media_type) {
2351 	case e1000_media_type_copper:
2352 		if (hw->mac.get_link_status) {
2353 			/* Do the work to read phy */
2354 			e1000_check_for_link(hw);
2355 			link_check = !hw->mac.get_link_status;
2356 			if (link_check) /* ESB2 fix */
2357 				e1000_cfg_on_link_up(hw);
2358 		} else
2359 			link_check = TRUE;
2360 		break;
2361 	case e1000_media_type_fiber:
2362 		e1000_check_for_link(hw);
2363 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2364                                  E1000_STATUS_LU);
2365 		break;
2366 	case e1000_media_type_internal_serdes:
2367 		e1000_check_for_link(hw);
2368 		link_check = adapter->hw.mac.serdes_has_link;
2369 		break;
2370 	default:
2371 	case e1000_media_type_unknown:
2372 		break;
2373 	}
2374 
2375 	/* Now check for a transition */
2376 	if (link_check && (adapter->link_active == 0)) {
2377 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2378 		    &adapter->link_duplex);
2379 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2380 		if ((adapter->link_speed != SPEED_1000) &&
2381 		    ((hw->mac.type == e1000_82571) ||
2382 		    (hw->mac.type == e1000_82572))) {
2383 			int tarc0;
2384 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2385 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2386 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2387 		}
2388 		if (bootverbose)
2389 			device_printf(dev, "Link is up %d Mbps %s\n",
2390 			    adapter->link_speed,
2391 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2392 			    "Full Duplex" : "Half Duplex"));
2393 		adapter->link_active = 1;
2394 		adapter->smartspeed = 0;
2395 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2396 		if_link_state_change(ifp, LINK_STATE_UP);
2397 	} else if (!link_check && (adapter->link_active == 1)) {
2398 		if_setbaudrate(ifp, 0);
2399 		adapter->link_speed = 0;
2400 		adapter->link_duplex = 0;
2401 		if (bootverbose)
2402 			device_printf(dev, "Link is Down\n");
2403 		adapter->link_active = 0;
2404 		/* Link down, disable hang detection */
2405 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2406 			txr->busy = EM_TX_IDLE;
2407 		if_link_state_change(ifp, LINK_STATE_DOWN);
2408 	}
2409 }
2410 
2411 /*********************************************************************
2412  *
2413  *  This routine disables all traffic on the adapter by issuing a
2414  *  global reset on the MAC and deallocates TX/RX buffers.
2415  *
2416  *  This routine should always be called with BOTH the CORE
2417  *  and TX locks.
2418  **********************************************************************/
2419 
2420 static void
2421 em_stop(void *arg)
2422 {
2423 	struct adapter	*adapter = arg;
2424 	if_t ifp = adapter->ifp;
2425 	struct tx_ring	*txr = adapter->tx_rings;
2426 
2427 	EM_CORE_LOCK_ASSERT(adapter);
2428 
2429 	INIT_DEBUGOUT("em_stop: begin");
2430 
2431 	em_disable_intr(adapter);
2432 	callout_stop(&adapter->timer);
2433 
2434 	/* Tell the stack that the interface is no longer active */
2435 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2436 
2437         /* Disarm Hang Detection. */
2438 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2439 		EM_TX_LOCK(txr);
2440 		txr->busy = EM_TX_IDLE;
2441 		EM_TX_UNLOCK(txr);
2442 	}
2443 
2444 	e1000_reset_hw(&adapter->hw);
2445 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2446 
2447 	e1000_led_off(&adapter->hw);
2448 	e1000_cleanup_led(&adapter->hw);
2449 }
2450 
2451 
2452 /*********************************************************************
2453  *
2454  *  Determine hardware revision.
2455  *
2456  **********************************************************************/
2457 static void
2458 em_identify_hardware(struct adapter *adapter)
2459 {
2460 	device_t dev = adapter->dev;
2461 
2462 	/* Make sure our PCI config space has the necessary stuff set */
2463 	pci_enable_busmaster(dev);
2464 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2465 
2466 	/* Save off the information about this board */
2467 	adapter->hw.vendor_id = pci_get_vendor(dev);
2468 	adapter->hw.device_id = pci_get_device(dev);
2469 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2470 	adapter->hw.subsystem_vendor_id =
2471 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2472 	adapter->hw.subsystem_device_id =
2473 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2474 
2475 	/* Do Shared Code Init and Setup */
2476 	if (e1000_set_mac_type(&adapter->hw)) {
2477 		device_printf(dev, "Setup init failure\n");
2478 		return;
2479 	}
2480 }
2481 
2482 static int
2483 em_allocate_pci_resources(struct adapter *adapter)
2484 {
2485 	device_t	dev = adapter->dev;
2486 	int		rid;
2487 
2488 	rid = PCIR_BAR(0);
2489 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2490 	    &rid, RF_ACTIVE);
2491 	if (adapter->memory == NULL) {
2492 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2493 		return (ENXIO);
2494 	}
2495 	adapter->osdep.mem_bus_space_tag =
2496 	    rman_get_bustag(adapter->memory);
2497 	adapter->osdep.mem_bus_space_handle =
2498 	    rman_get_bushandle(adapter->memory);
2499 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2500 
2501 	adapter->hw.back = &adapter->osdep;
2502 
2503 	return (0);
2504 }
2505 
2506 /*********************************************************************
2507  *
2508  *  Setup the Legacy or MSI Interrupt handler
2509  *
2510  **********************************************************************/
2511 int
2512 em_allocate_legacy(struct adapter *adapter)
2513 {
2514 	device_t dev = adapter->dev;
2515 	struct tx_ring	*txr = adapter->tx_rings;
2516 	int error, rid = 0;
2517 
2518 	/* Manually turn off all interrupts */
2519 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2520 
2521 	if (adapter->msix == 1) /* using MSI */
2522 		rid = 1;
2523 	/* We allocate a single interrupt resource */
2524 	adapter->res = bus_alloc_resource_any(dev,
2525 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2526 	if (adapter->res == NULL) {
2527 		device_printf(dev, "Unable to allocate bus resource: "
2528 		    "interrupt\n");
2529 		return (ENXIO);
2530 	}
2531 
2532 	/*
2533 	 * Allocate a fast interrupt and the associated
2534 	 * deferred processing contexts.
2535 	 */
2536 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2537 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2538 	    taskqueue_thread_enqueue, &adapter->tq);
2539 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2540 	    device_get_nameunit(adapter->dev));
2541 	/* Use a TX only tasklet for local timer */
2542 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2543 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2544 	    taskqueue_thread_enqueue, &txr->tq);
2545 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2546 	    device_get_nameunit(adapter->dev));
2547 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2548 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2549 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2550 		device_printf(dev, "Failed to register fast interrupt "
2551 			    "handler: %d\n", error);
2552 		taskqueue_free(adapter->tq);
2553 		adapter->tq = NULL;
2554 		return (error);
2555 	}
2556 
2557 	return (0);
2558 }
2559 
2560 /*********************************************************************
2561  *
2562  *  Setup the MSIX Interrupt handlers
2563  *   This is not really Multiqueue, rather
2564  *   its just seperate interrupt vectors
2565  *   for TX, RX, and Link.
2566  *
2567  **********************************************************************/
2568 int
2569 em_allocate_msix(struct adapter *adapter)
2570 {
2571 	device_t	dev = adapter->dev;
2572 	struct		tx_ring *txr = adapter->tx_rings;
2573 	struct		rx_ring *rxr = adapter->rx_rings;
2574 	int		error, rid, vector = 0;
2575 	int		cpu_id = 0;
2576 
2577 
2578 	/* Make sure all interrupts are disabled */
2579 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2580 
2581 	/* First set up ring resources */
2582 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2583 
2584 		/* RX ring */
2585 		rid = vector + 1;
2586 
2587 		rxr->res = bus_alloc_resource_any(dev,
2588 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2589 		if (rxr->res == NULL) {
2590 			device_printf(dev,
2591 			    "Unable to allocate bus resource: "
2592 			    "RX MSIX Interrupt %d\n", i);
2593 			return (ENXIO);
2594 		}
2595 		if ((error = bus_setup_intr(dev, rxr->res,
2596 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2597 		    rxr, &rxr->tag)) != 0) {
2598 			device_printf(dev, "Failed to register RX handler");
2599 			return (error);
2600 		}
2601 #if __FreeBSD_version >= 800504
2602 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2603 #endif
2604 		rxr->msix = vector;
2605 
2606 		if (em_last_bind_cpu < 0)
2607 			em_last_bind_cpu = CPU_FIRST();
2608 		cpu_id = em_last_bind_cpu;
2609 		bus_bind_intr(dev, rxr->res, cpu_id);
2610 
2611 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2612 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2613 		    taskqueue_thread_enqueue, &rxr->tq);
2614 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2615 		    device_get_nameunit(adapter->dev), cpu_id);
2616 		/*
2617 		** Set the bit to enable interrupt
2618 		** in E1000_IMS -- bits 20 and 21
2619 		** are for RX0 and RX1, note this has
2620 		** NOTHING to do with the MSIX vector
2621 		*/
2622 		rxr->ims = 1 << (20 + i);
2623 		adapter->ims |= rxr->ims;
2624 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2625 
2626 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2627 	}
2628 
2629 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2630 		/* TX ring */
2631 		rid = vector + 1;
2632 		txr->res = bus_alloc_resource_any(dev,
2633 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2634 		if (txr->res == NULL) {
2635 			device_printf(dev,
2636 			    "Unable to allocate bus resource: "
2637 			    "TX MSIX Interrupt %d\n", i);
2638 			return (ENXIO);
2639 		}
2640 		if ((error = bus_setup_intr(dev, txr->res,
2641 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2642 		    txr, &txr->tag)) != 0) {
2643 			device_printf(dev, "Failed to register TX handler");
2644 			return (error);
2645 		}
2646 #if __FreeBSD_version >= 800504
2647 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2648 #endif
2649 		txr->msix = vector;
2650 
2651                 if (em_last_bind_cpu < 0)
2652                         em_last_bind_cpu = CPU_FIRST();
2653                 cpu_id = em_last_bind_cpu;
2654                 bus_bind_intr(dev, txr->res, cpu_id);
2655 
2656 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2657 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2658 		    taskqueue_thread_enqueue, &txr->tq);
2659 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2660 		    device_get_nameunit(adapter->dev), cpu_id);
2661 		/*
2662 		** Set the bit to enable interrupt
2663 		** in E1000_IMS -- bits 22 and 23
2664 		** are for TX0 and TX1, note this has
2665 		** NOTHING to do with the MSIX vector
2666 		*/
2667 		txr->ims = 1 << (22 + i);
2668 		adapter->ims |= txr->ims;
2669 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2670 
2671 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2672 	}
2673 
2674 	/* Link interrupt */
2675 	rid = vector + 1;
2676 	adapter->res = bus_alloc_resource_any(dev,
2677 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2678 	if (!adapter->res) {
2679 		device_printf(dev,"Unable to allocate "
2680 		    "bus resource: Link interrupt [%d]\n", rid);
2681 		return (ENXIO);
2682         }
2683 	/* Set the link handler function */
2684 	error = bus_setup_intr(dev, adapter->res,
2685 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2686 	    em_msix_link, adapter, &adapter->tag);
2687 	if (error) {
2688 		adapter->res = NULL;
2689 		device_printf(dev, "Failed to register LINK handler");
2690 		return (error);
2691 	}
2692 #if __FreeBSD_version >= 800504
2693 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2694 #endif
2695 	adapter->linkvec = vector;
2696 	adapter->ivars |=  (8 | vector) << 16;
2697 	adapter->ivars |= 0x80000000;
2698 
2699 	return (0);
2700 }
2701 
2702 
2703 static void
2704 em_free_pci_resources(struct adapter *adapter)
2705 {
2706 	device_t	dev = adapter->dev;
2707 	struct tx_ring	*txr;
2708 	struct rx_ring	*rxr;
2709 	int		rid;
2710 
2711 
2712 	/*
2713 	** Release all the queue interrupt resources:
2714 	*/
2715 	for (int i = 0; i < adapter->num_queues; i++) {
2716 		txr = &adapter->tx_rings[i];
2717 		/* an early abort? */
2718 		if (txr == NULL)
2719 			break;
2720 		rid = txr->msix +1;
2721 		if (txr->tag != NULL) {
2722 			bus_teardown_intr(dev, txr->res, txr->tag);
2723 			txr->tag = NULL;
2724 		}
2725 		if (txr->res != NULL)
2726 			bus_release_resource(dev, SYS_RES_IRQ,
2727 			    rid, txr->res);
2728 
2729 		rxr = &adapter->rx_rings[i];
2730 		/* an early abort? */
2731 		if (rxr == NULL)
2732 			break;
2733 		rid = rxr->msix +1;
2734 		if (rxr->tag != NULL) {
2735 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2736 			rxr->tag = NULL;
2737 		}
2738 		if (rxr->res != NULL)
2739 			bus_release_resource(dev, SYS_RES_IRQ,
2740 			    rid, rxr->res);
2741 	}
2742 
2743         if (adapter->linkvec) /* we are doing MSIX */
2744                 rid = adapter->linkvec + 1;
2745         else
2746                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2747 
2748 	if (adapter->tag != NULL) {
2749 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2750 		adapter->tag = NULL;
2751 	}
2752 
2753 	if (adapter->res != NULL)
2754 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2755 
2756 
2757 	if (adapter->msix)
2758 		pci_release_msi(dev);
2759 
2760 	if (adapter->msix_mem != NULL)
2761 		bus_release_resource(dev, SYS_RES_MEMORY,
2762 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2763 
2764 	if (adapter->memory != NULL)
2765 		bus_release_resource(dev, SYS_RES_MEMORY,
2766 		    PCIR_BAR(0), adapter->memory);
2767 
2768 	if (adapter->flash != NULL)
2769 		bus_release_resource(dev, SYS_RES_MEMORY,
2770 		    EM_FLASH, adapter->flash);
2771 }
2772 
2773 /*
2774  * Setup MSI or MSI/X
2775  */
2776 static int
2777 em_setup_msix(struct adapter *adapter)
2778 {
2779 	device_t dev = adapter->dev;
2780 	int val;
2781 
2782 	/* Nearly always going to use one queue */
2783 	adapter->num_queues = 1;
2784 
2785 	/*
2786 	** Try using MSI-X for Hartwell adapters
2787 	*/
2788 	if ((adapter->hw.mac.type == e1000_82574) &&
2789 	    (em_enable_msix == TRUE)) {
2790 #ifdef EM_MULTIQUEUE
2791 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2792 		if (adapter->num_queues > 1)
2793 			em_enable_vectors_82574(adapter);
2794 #endif
2795 		/* Map the MSIX BAR */
2796 		int rid = PCIR_BAR(EM_MSIX_BAR);
2797 		adapter->msix_mem = bus_alloc_resource_any(dev,
2798 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2799        		if (adapter->msix_mem == NULL) {
2800 			/* May not be enabled */
2801                		device_printf(adapter->dev,
2802 			    "Unable to map MSIX table \n");
2803 			goto msi;
2804        		}
2805 		val = pci_msix_count(dev);
2806 
2807 #ifdef EM_MULTIQUEUE
2808 		/* We need 5 vectors in the multiqueue case */
2809 		if (adapter->num_queues > 1 ) {
2810 			if (val >= 5)
2811 				val = 5;
2812 			else {
2813 				adapter->num_queues = 1;
2814 				device_printf(adapter->dev,
2815 				    "Insufficient MSIX vectors for >1 queue, "
2816 				    "using single queue...\n");
2817 				goto msix_one;
2818 			}
2819 		} else {
2820 msix_one:
2821 #endif
2822 			if (val >= 3)
2823 				val = 3;
2824 			else {
2825 				device_printf(adapter->dev,
2826 			    	"Insufficient MSIX vectors, using MSI\n");
2827 				goto msi;
2828 			}
2829 #ifdef EM_MULTIQUEUE
2830 		}
2831 #endif
2832 
2833 		if ((pci_alloc_msix(dev, &val) == 0)) {
2834 			device_printf(adapter->dev,
2835 			    "Using MSIX interrupts "
2836 			    "with %d vectors\n", val);
2837 			return (val);
2838 		}
2839 
2840 		/*
2841 		** If MSIX alloc failed or provided us with
2842 		** less than needed, free and fall through to MSI
2843 		*/
2844 		pci_release_msi(dev);
2845 	}
2846 msi:
2847 	if (adapter->msix_mem != NULL) {
2848 		bus_release_resource(dev, SYS_RES_MEMORY,
2849 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2850 		adapter->msix_mem = NULL;
2851 	}
2852        	val = 1;
2853        	if (pci_alloc_msi(dev, &val) == 0) {
2854                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2855 		return (val);
2856 	}
2857 	/* Should only happen due to manual configuration */
2858 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2859 	return (0);
2860 }
2861 
2862 
2863 /*********************************************************************
2864  *
2865  *  Initialize the hardware to a configuration
2866  *  as specified by the adapter structure.
2867  *
2868  **********************************************************************/
2869 static void
2870 em_reset(struct adapter *adapter)
2871 {
2872 	device_t	dev = adapter->dev;
2873 	if_t ifp = adapter->ifp;
2874 	struct e1000_hw	*hw = &adapter->hw;
2875 	u16		rx_buffer_size;
2876 	u32		pba;
2877 
2878 	INIT_DEBUGOUT("em_reset: begin");
2879 
2880 	/* Set up smart power down as default off on newer adapters. */
2881 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2882 	    hw->mac.type == e1000_82572)) {
2883 		u16 phy_tmp = 0;
2884 
2885 		/* Speed up time to link by disabling smart power down. */
2886 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2887 		phy_tmp &= ~IGP02E1000_PM_SPD;
2888 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2889 	}
2890 
2891 	/*
2892 	 * Packet Buffer Allocation (PBA)
2893 	 * Writing PBA sets the receive portion of the buffer
2894 	 * the remainder is used for the transmit buffer.
2895 	 */
2896 	switch (hw->mac.type) {
2897 	/* Total Packet Buffer on these is 48K */
2898 	case e1000_82571:
2899 	case e1000_82572:
2900 	case e1000_80003es2lan:
2901 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2902 		break;
2903 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2904 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2905 		break;
2906 	case e1000_82574:
2907 	case e1000_82583:
2908 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2909 		break;
2910 	case e1000_ich8lan:
2911 		pba = E1000_PBA_8K;
2912 		break;
2913 	case e1000_ich9lan:
2914 	case e1000_ich10lan:
2915 		/* Boost Receive side for jumbo frames */
2916 		if (adapter->hw.mac.max_frame_size > 4096)
2917 			pba = E1000_PBA_14K;
2918 		else
2919 			pba = E1000_PBA_10K;
2920 		break;
2921 	case e1000_pchlan:
2922 	case e1000_pch2lan:
2923 	case e1000_pch_lpt:
2924 		pba = E1000_PBA_26K;
2925 		break;
2926 	default:
2927 		if (adapter->hw.mac.max_frame_size > 8192)
2928 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2929 		else
2930 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2931 	}
2932 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2933 
2934 	/*
2935 	 * These parameters control the automatic generation (Tx) and
2936 	 * response (Rx) to Ethernet PAUSE frames.
2937 	 * - High water mark should allow for at least two frames to be
2938 	 *   received after sending an XOFF.
2939 	 * - Low water mark works best when it is very near the high water mark.
2940 	 *   This allows the receiver to restart by sending XON when it has
2941 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2942 	 *   restart after one full frame is pulled from the buffer. There
2943 	 *   could be several smaller frames in the buffer and if so they will
2944 	 *   not trigger the XON until their total number reduces the buffer
2945 	 *   by 1500.
2946 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2947 	 */
2948 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2949 	hw->fc.high_water = rx_buffer_size -
2950 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2951 	hw->fc.low_water = hw->fc.high_water - 1500;
2952 
2953 	if (adapter->fc) /* locally set flow control value? */
2954 		hw->fc.requested_mode = adapter->fc;
2955 	else
2956 		hw->fc.requested_mode = e1000_fc_full;
2957 
2958 	if (hw->mac.type == e1000_80003es2lan)
2959 		hw->fc.pause_time = 0xFFFF;
2960 	else
2961 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2962 
2963 	hw->fc.send_xon = TRUE;
2964 
2965 	/* Device specific overrides/settings */
2966 	switch (hw->mac.type) {
2967 	case e1000_pchlan:
2968 		/* Workaround: no TX flow ctrl for PCH */
2969                 hw->fc.requested_mode = e1000_fc_rx_pause;
2970 		hw->fc.pause_time = 0xFFFF; /* override */
2971 		if (if_getmtu(ifp) > ETHERMTU) {
2972 			hw->fc.high_water = 0x3500;
2973 			hw->fc.low_water = 0x1500;
2974 		} else {
2975 			hw->fc.high_water = 0x5000;
2976 			hw->fc.low_water = 0x3000;
2977 		}
2978 		hw->fc.refresh_time = 0x1000;
2979 		break;
2980 	case e1000_pch2lan:
2981 	case e1000_pch_lpt:
2982 		hw->fc.high_water = 0x5C20;
2983 		hw->fc.low_water = 0x5048;
2984 		hw->fc.pause_time = 0x0650;
2985 		hw->fc.refresh_time = 0x0400;
2986 		/* Jumbos need adjusted PBA */
2987 		if (if_getmtu(ifp) > ETHERMTU)
2988 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2989 		else
2990 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2991 		break;
2992         case e1000_ich9lan:
2993         case e1000_ich10lan:
2994 		if (if_getmtu(ifp) > ETHERMTU) {
2995 			hw->fc.high_water = 0x2800;
2996 			hw->fc.low_water = hw->fc.high_water - 8;
2997 			break;
2998 		}
2999 		/* else fall thru */
3000 	default:
3001 		if (hw->mac.type == e1000_80003es2lan)
3002 			hw->fc.pause_time = 0xFFFF;
3003 		break;
3004 	}
3005 
3006 	/* Issue a global reset */
3007 	e1000_reset_hw(hw);
3008 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3009 	em_disable_aspm(adapter);
3010 	/* and a re-init */
3011 	if (e1000_init_hw(hw) < 0) {
3012 		device_printf(dev, "Hardware Initialization Failed\n");
3013 		return;
3014 	}
3015 
3016 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3017 	e1000_get_phy_info(hw);
3018 	e1000_check_for_link(hw);
3019 	return;
3020 }
3021 
3022 /*********************************************************************
3023  *
3024  *  Setup networking device structure and register an interface.
3025  *
3026  **********************************************************************/
3027 static int
3028 em_setup_interface(device_t dev, struct adapter *adapter)
3029 {
3030 	if_t ifp;
3031 
3032 	INIT_DEBUGOUT("em_setup_interface: begin");
3033 
3034 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3035 	if (ifp == 0) {
3036 		device_printf(dev, "can not allocate ifnet structure\n");
3037 		return (-1);
3038 	}
3039 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3040 	if_setdev(ifp, dev);
3041 	if_setinitfn(ifp, em_init);
3042 	if_setsoftc(ifp, adapter);
3043 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3044 	if_setioctlfn(ifp, em_ioctl);
3045 	if_setgetcounterfn(ifp, em_get_counter);
3046 	/* TSO parameters */
3047 	ifp->if_hw_tsomax = IP_MAXPACKET;
3048 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER;
3049 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3050 
3051 #ifdef EM_MULTIQUEUE
3052 	/* Multiqueue stack interface */
3053 	if_settransmitfn(ifp, em_mq_start);
3054 	if_setqflushfn(ifp, em_qflush);
3055 #else
3056 	if_setstartfn(ifp, em_start);
3057 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3058 	if_setsendqready(ifp);
3059 #endif
3060 
3061 	ether_ifattach(ifp, adapter->hw.mac.addr);
3062 
3063 	if_setcapabilities(ifp, 0);
3064 	if_setcapenable(ifp, 0);
3065 
3066 
3067 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3068 	    IFCAP_TSO4, 0);
3069 	/*
3070 	 * Tell the upper layer(s) we
3071 	 * support full VLAN capability
3072 	 */
3073 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3074 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3075 	    IFCAP_VLAN_MTU, 0);
3076 	if_setcapenable(ifp, if_getcapabilities(ifp));
3077 
3078 	/*
3079 	** Don't turn this on by default, if vlans are
3080 	** created on another pseudo device (eg. lagg)
3081 	** then vlan events are not passed thru, breaking
3082 	** operation, but with HW FILTER off it works. If
3083 	** using vlans directly on the em driver you can
3084 	** enable this and get full hardware tag filtering.
3085 	*/
3086 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3087 
3088 #ifdef DEVICE_POLLING
3089 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3090 #endif
3091 
3092 	/* Enable only WOL MAGIC by default */
3093 	if (adapter->wol) {
3094 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3095 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3096 	}
3097 
3098 	/*
3099 	 * Specify the media types supported by this adapter and register
3100 	 * callbacks to update media and link information
3101 	 */
3102 	ifmedia_init(&adapter->media, IFM_IMASK,
3103 	    em_media_change, em_media_status);
3104 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3105 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3106 		u_char fiber_type = IFM_1000_SX;	/* default type */
3107 
3108 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3109 			    0, NULL);
3110 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3111 	} else {
3112 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3113 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3114 			    0, NULL);
3115 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3116 			    0, NULL);
3117 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3118 			    0, NULL);
3119 		if (adapter->hw.phy.type != e1000_phy_ife) {
3120 			ifmedia_add(&adapter->media,
3121 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3122 			ifmedia_add(&adapter->media,
3123 				IFM_ETHER | IFM_1000_T, 0, NULL);
3124 		}
3125 	}
3126 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3127 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3128 	return (0);
3129 }
3130 
3131 
3132 /*
3133  * Manage DMA'able memory.
3134  */
3135 static void
3136 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3137 {
3138 	if (error)
3139 		return;
3140 	*(bus_addr_t *) arg = segs[0].ds_addr;
3141 }
3142 
3143 static int
3144 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3145         struct em_dma_alloc *dma, int mapflags)
3146 {
3147 	int error;
3148 
3149 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3150 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3151 				BUS_SPACE_MAXADDR,	/* lowaddr */
3152 				BUS_SPACE_MAXADDR,	/* highaddr */
3153 				NULL, NULL,		/* filter, filterarg */
3154 				size,			/* maxsize */
3155 				1,			/* nsegments */
3156 				size,			/* maxsegsize */
3157 				0,			/* flags */
3158 				NULL,			/* lockfunc */
3159 				NULL,			/* lockarg */
3160 				&dma->dma_tag);
3161 	if (error) {
3162 		device_printf(adapter->dev,
3163 		    "%s: bus_dma_tag_create failed: %d\n",
3164 		    __func__, error);
3165 		goto fail_0;
3166 	}
3167 
3168 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3169 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3170 	if (error) {
3171 		device_printf(adapter->dev,
3172 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3173 		    __func__, (uintmax_t)size, error);
3174 		goto fail_2;
3175 	}
3176 
3177 	dma->dma_paddr = 0;
3178 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3179 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3180 	if (error || dma->dma_paddr == 0) {
3181 		device_printf(adapter->dev,
3182 		    "%s: bus_dmamap_load failed: %d\n",
3183 		    __func__, error);
3184 		goto fail_3;
3185 	}
3186 
3187 	return (0);
3188 
3189 fail_3:
3190 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3191 fail_2:
3192 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3193 	bus_dma_tag_destroy(dma->dma_tag);
3194 fail_0:
3195 	dma->dma_tag = NULL;
3196 
3197 	return (error);
3198 }
3199 
3200 static void
3201 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3202 {
3203 	if (dma->dma_tag == NULL)
3204 		return;
3205 	if (dma->dma_paddr != 0) {
3206 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3207 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3208 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3209 		dma->dma_paddr = 0;
3210 	}
3211 	if (dma->dma_vaddr != NULL) {
3212 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3213 		dma->dma_vaddr = NULL;
3214 	}
3215 	bus_dma_tag_destroy(dma->dma_tag);
3216 	dma->dma_tag = NULL;
3217 }
3218 
3219 
3220 /*********************************************************************
3221  *
3222  *  Allocate memory for the transmit and receive rings, and then
3223  *  the descriptors associated with each, called only once at attach.
3224  *
3225  **********************************************************************/
3226 static int
3227 em_allocate_queues(struct adapter *adapter)
3228 {
3229 	device_t		dev = adapter->dev;
3230 	struct tx_ring		*txr = NULL;
3231 	struct rx_ring		*rxr = NULL;
3232 	int rsize, tsize, error = E1000_SUCCESS;
3233 	int txconf = 0, rxconf = 0;
3234 
3235 
3236 	/* Allocate the TX ring struct memory */
3237 	if (!(adapter->tx_rings =
3238 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3239 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3240 		device_printf(dev, "Unable to allocate TX ring memory\n");
3241 		error = ENOMEM;
3242 		goto fail;
3243 	}
3244 
3245 	/* Now allocate the RX */
3246 	if (!(adapter->rx_rings =
3247 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3248 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3249 		device_printf(dev, "Unable to allocate RX ring memory\n");
3250 		error = ENOMEM;
3251 		goto rx_fail;
3252 	}
3253 
3254 	tsize = roundup2(adapter->num_tx_desc *
3255 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3256 	/*
3257 	 * Now set up the TX queues, txconf is needed to handle the
3258 	 * possibility that things fail midcourse and we need to
3259 	 * undo memory gracefully
3260 	 */
3261 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3262 		/* Set up some basics */
3263 		txr = &adapter->tx_rings[i];
3264 		txr->adapter = adapter;
3265 		txr->me = i;
3266 
3267 		/* Initialize the TX lock */
3268 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3269 		    device_get_nameunit(dev), txr->me);
3270 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3271 
3272 		if (em_dma_malloc(adapter, tsize,
3273 			&txr->txdma, BUS_DMA_NOWAIT)) {
3274 			device_printf(dev,
3275 			    "Unable to allocate TX Descriptor memory\n");
3276 			error = ENOMEM;
3277 			goto err_tx_desc;
3278 		}
3279 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3280 		bzero((void *)txr->tx_base, tsize);
3281 
3282         	if (em_allocate_transmit_buffers(txr)) {
3283 			device_printf(dev,
3284 			    "Critical Failure setting up transmit buffers\n");
3285 			error = ENOMEM;
3286 			goto err_tx_desc;
3287         	}
3288 #if __FreeBSD_version >= 800000
3289 		/* Allocate a buf ring */
3290 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3291 		    M_WAITOK, &txr->tx_mtx);
3292 #endif
3293 	}
3294 
3295 	/*
3296 	 * Next the RX queues...
3297 	 */
3298 	rsize = roundup2(adapter->num_rx_desc *
3299 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3300 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3301 		rxr = &adapter->rx_rings[i];
3302 		rxr->adapter = adapter;
3303 		rxr->me = i;
3304 
3305 		/* Initialize the RX lock */
3306 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3307 		    device_get_nameunit(dev), txr->me);
3308 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3309 
3310 		if (em_dma_malloc(adapter, rsize,
3311 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3312 			device_printf(dev,
3313 			    "Unable to allocate RxDescriptor memory\n");
3314 			error = ENOMEM;
3315 			goto err_rx_desc;
3316 		}
3317 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3318 		bzero((void *)rxr->rx_base, rsize);
3319 
3320         	/* Allocate receive buffers for the ring*/
3321 		if (em_allocate_receive_buffers(rxr)) {
3322 			device_printf(dev,
3323 			    "Critical Failure setting up receive buffers\n");
3324 			error = ENOMEM;
3325 			goto err_rx_desc;
3326 		}
3327 	}
3328 
3329 	return (0);
3330 
3331 err_rx_desc:
3332 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3333 		em_dma_free(adapter, &rxr->rxdma);
3334 err_tx_desc:
3335 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3336 		em_dma_free(adapter, &txr->txdma);
3337 	free(adapter->rx_rings, M_DEVBUF);
3338 rx_fail:
3339 #if __FreeBSD_version >= 800000
3340 	buf_ring_free(txr->br, M_DEVBUF);
3341 #endif
3342 	free(adapter->tx_rings, M_DEVBUF);
3343 fail:
3344 	return (error);
3345 }
3346 
3347 
3348 /*********************************************************************
3349  *
3350  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3351  *  the information needed to transmit a packet on the wire. This is
3352  *  called only once at attach, setup is done every reset.
3353  *
3354  **********************************************************************/
3355 static int
3356 em_allocate_transmit_buffers(struct tx_ring *txr)
3357 {
3358 	struct adapter *adapter = txr->adapter;
3359 	device_t dev = adapter->dev;
3360 	struct em_buffer *txbuf;
3361 	int error, i;
3362 
3363 	/*
3364 	 * Setup DMA descriptor areas.
3365 	 */
3366 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3367 			       1, 0,			/* alignment, bounds */
3368 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3369 			       BUS_SPACE_MAXADDR,	/* highaddr */
3370 			       NULL, NULL,		/* filter, filterarg */
3371 			       EM_TSO_SIZE,		/* maxsize */
3372 			       EM_MAX_SCATTER,		/* nsegments */
3373 			       PAGE_SIZE,		/* maxsegsize */
3374 			       0,			/* flags */
3375 			       NULL,			/* lockfunc */
3376 			       NULL,			/* lockfuncarg */
3377 			       &txr->txtag))) {
3378 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3379 		goto fail;
3380 	}
3381 
3382 	if (!(txr->tx_buffers =
3383 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3384 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3385 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3386 		error = ENOMEM;
3387 		goto fail;
3388 	}
3389 
3390         /* Create the descriptor buffer dma maps */
3391 	txbuf = txr->tx_buffers;
3392 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3393 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3394 		if (error != 0) {
3395 			device_printf(dev, "Unable to create TX DMA map\n");
3396 			goto fail;
3397 		}
3398 	}
3399 
3400 	return 0;
3401 fail:
3402 	/* We free all, it handles case where we are in the middle */
3403 	em_free_transmit_structures(adapter);
3404 	return (error);
3405 }
3406 
3407 /*********************************************************************
3408  *
3409  *  Initialize a transmit ring.
3410  *
3411  **********************************************************************/
3412 static void
3413 em_setup_transmit_ring(struct tx_ring *txr)
3414 {
3415 	struct adapter *adapter = txr->adapter;
3416 	struct em_buffer *txbuf;
3417 	int i;
3418 #ifdef DEV_NETMAP
3419 	struct netmap_slot *slot;
3420 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3421 #endif /* DEV_NETMAP */
3422 
3423 	/* Clear the old descriptor contents */
3424 	EM_TX_LOCK(txr);
3425 #ifdef DEV_NETMAP
3426 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3427 #endif /* DEV_NETMAP */
3428 
3429 	bzero((void *)txr->tx_base,
3430 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3431 	/* Reset indices */
3432 	txr->next_avail_desc = 0;
3433 	txr->next_to_clean = 0;
3434 
3435 	/* Free any existing tx buffers. */
3436         txbuf = txr->tx_buffers;
3437 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3438 		if (txbuf->m_head != NULL) {
3439 			bus_dmamap_sync(txr->txtag, txbuf->map,
3440 			    BUS_DMASYNC_POSTWRITE);
3441 			bus_dmamap_unload(txr->txtag, txbuf->map);
3442 			m_freem(txbuf->m_head);
3443 			txbuf->m_head = NULL;
3444 		}
3445 #ifdef DEV_NETMAP
3446 		if (slot) {
3447 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3448 			uint64_t paddr;
3449 			void *addr;
3450 
3451 			addr = PNMB(na, slot + si, &paddr);
3452 			txr->tx_base[i].buffer_addr = htole64(paddr);
3453 			/* reload the map for netmap mode */
3454 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3455 		}
3456 #endif /* DEV_NETMAP */
3457 
3458 		/* clear the watch index */
3459 		txbuf->next_eop = -1;
3460         }
3461 
3462 	/* Set number of descriptors available */
3463 	txr->tx_avail = adapter->num_tx_desc;
3464 	txr->busy = EM_TX_IDLE;
3465 
3466 	/* Clear checksum offload context. */
3467 	txr->last_hw_offload = 0;
3468 	txr->last_hw_ipcss = 0;
3469 	txr->last_hw_ipcso = 0;
3470 	txr->last_hw_tucss = 0;
3471 	txr->last_hw_tucso = 0;
3472 
3473 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3474 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3475 	EM_TX_UNLOCK(txr);
3476 }
3477 
3478 /*********************************************************************
3479  *
3480  *  Initialize all transmit rings.
3481  *
3482  **********************************************************************/
3483 static void
3484 em_setup_transmit_structures(struct adapter *adapter)
3485 {
3486 	struct tx_ring *txr = adapter->tx_rings;
3487 
3488 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3489 		em_setup_transmit_ring(txr);
3490 
3491 	return;
3492 }
3493 
3494 /*********************************************************************
3495  *
3496  *  Enable transmit unit.
3497  *
3498  **********************************************************************/
3499 static void
3500 em_initialize_transmit_unit(struct adapter *adapter)
3501 {
3502 	struct tx_ring	*txr = adapter->tx_rings;
3503 	struct e1000_hw	*hw = &adapter->hw;
3504 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3505 
3506 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3507 
3508 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3509 		u64 bus_addr = txr->txdma.dma_paddr;
3510 		/* Base and Len of TX Ring */
3511 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3512 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3513 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3514 	    	    (u32)(bus_addr >> 32));
3515 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3516 	    	    (u32)bus_addr);
3517 		/* Init the HEAD/TAIL indices */
3518 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3519 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3520 
3521 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3522 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3523 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3524 
3525 		txr->busy = EM_TX_IDLE;
3526 		txdctl = 0; /* clear txdctl */
3527                 txdctl |= 0x1f; /* PTHRESH */
3528                 txdctl |= 1 << 8; /* HTHRESH */
3529                 txdctl |= 1 << 16;/* WTHRESH */
3530 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3531 		txdctl |= E1000_TXDCTL_GRAN;
3532                 txdctl |= 1 << 25; /* LWTHRESH */
3533 
3534                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3535 	}
3536 
3537 	/* Set the default values for the Tx Inter Packet Gap timer */
3538 	switch (adapter->hw.mac.type) {
3539 	case e1000_80003es2lan:
3540 		tipg = DEFAULT_82543_TIPG_IPGR1;
3541 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3542 		    E1000_TIPG_IPGR2_SHIFT;
3543 		break;
3544 	default:
3545 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3546 		    (adapter->hw.phy.media_type ==
3547 		    e1000_media_type_internal_serdes))
3548 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3549 		else
3550 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3551 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3552 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3553 	}
3554 
3555 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3556 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3557 
3558 	if(adapter->hw.mac.type >= e1000_82540)
3559 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3560 		    adapter->tx_abs_int_delay.value);
3561 
3562 	if ((adapter->hw.mac.type == e1000_82571) ||
3563 	    (adapter->hw.mac.type == e1000_82572)) {
3564 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3565 		tarc |= TARC_SPEED_MODE_BIT;
3566 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3567 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3568 		/* errata: program both queues to unweighted RR */
3569 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3570 		tarc |= 1;
3571 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3572 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3573 		tarc |= 1;
3574 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3575 	} else if (adapter->hw.mac.type == e1000_82574) {
3576 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3577 		tarc |= TARC_ERRATA_BIT;
3578 		if ( adapter->num_queues > 1) {
3579 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3580 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3581 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3582 		} else
3583 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3584 	}
3585 
3586 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3587 	if (adapter->tx_int_delay.value > 0)
3588 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3589 
3590 	/* Program the Transmit Control Register */
3591 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3592 	tctl &= ~E1000_TCTL_CT;
3593 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3594 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3595 
3596 	if (adapter->hw.mac.type >= e1000_82571)
3597 		tctl |= E1000_TCTL_MULR;
3598 
3599 	/* This write will effectively turn on the transmit unit. */
3600 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3601 
3602 }
3603 
3604 
3605 /*********************************************************************
3606  *
3607  *  Free all transmit rings.
3608  *
3609  **********************************************************************/
3610 static void
3611 em_free_transmit_structures(struct adapter *adapter)
3612 {
3613 	struct tx_ring *txr = adapter->tx_rings;
3614 
3615 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3616 		EM_TX_LOCK(txr);
3617 		em_free_transmit_buffers(txr);
3618 		em_dma_free(adapter, &txr->txdma);
3619 		EM_TX_UNLOCK(txr);
3620 		EM_TX_LOCK_DESTROY(txr);
3621 	}
3622 
3623 	free(adapter->tx_rings, M_DEVBUF);
3624 }
3625 
3626 /*********************************************************************
3627  *
3628  *  Free transmit ring related data structures.
3629  *
3630  **********************************************************************/
3631 static void
3632 em_free_transmit_buffers(struct tx_ring *txr)
3633 {
3634 	struct adapter		*adapter = txr->adapter;
3635 	struct em_buffer	*txbuf;
3636 
3637 	INIT_DEBUGOUT("free_transmit_ring: begin");
3638 
3639 	if (txr->tx_buffers == NULL)
3640 		return;
3641 
3642 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3643 		txbuf = &txr->tx_buffers[i];
3644 		if (txbuf->m_head != NULL) {
3645 			bus_dmamap_sync(txr->txtag, txbuf->map,
3646 			    BUS_DMASYNC_POSTWRITE);
3647 			bus_dmamap_unload(txr->txtag,
3648 			    txbuf->map);
3649 			m_freem(txbuf->m_head);
3650 			txbuf->m_head = NULL;
3651 			if (txbuf->map != NULL) {
3652 				bus_dmamap_destroy(txr->txtag,
3653 				    txbuf->map);
3654 				txbuf->map = NULL;
3655 			}
3656 		} else if (txbuf->map != NULL) {
3657 			bus_dmamap_unload(txr->txtag,
3658 			    txbuf->map);
3659 			bus_dmamap_destroy(txr->txtag,
3660 			    txbuf->map);
3661 			txbuf->map = NULL;
3662 		}
3663 	}
3664 #if __FreeBSD_version >= 800000
3665 	if (txr->br != NULL)
3666 		buf_ring_free(txr->br, M_DEVBUF);
3667 #endif
3668 	if (txr->tx_buffers != NULL) {
3669 		free(txr->tx_buffers, M_DEVBUF);
3670 		txr->tx_buffers = NULL;
3671 	}
3672 	if (txr->txtag != NULL) {
3673 		bus_dma_tag_destroy(txr->txtag);
3674 		txr->txtag = NULL;
3675 	}
3676 	return;
3677 }
3678 
3679 
3680 /*********************************************************************
3681  *  The offload context is protocol specific (TCP/UDP) and thus
3682  *  only needs to be set when the protocol changes. The occasion
3683  *  of a context change can be a performance detriment, and
3684  *  might be better just disabled. The reason arises in the way
3685  *  in which the controller supports pipelined requests from the
3686  *  Tx data DMA. Up to four requests can be pipelined, and they may
3687  *  belong to the same packet or to multiple packets. However all
3688  *  requests for one packet are issued before a request is issued
3689  *  for a subsequent packet and if a request for the next packet
3690  *  requires a context change, that request will be stalled
3691  *  until the previous request completes. This means setting up
3692  *  a new context effectively disables pipelined Tx data DMA which
3693  *  in turn greatly slow down performance to send small sized
3694  *  frames.
3695  **********************************************************************/
3696 static void
3697 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3698     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3699 {
3700 	struct adapter			*adapter = txr->adapter;
3701 	struct e1000_context_desc	*TXD = NULL;
3702 	struct em_buffer		*tx_buffer;
3703 	int				cur, hdr_len;
3704 	u32				cmd = 0;
3705 	u16				offload = 0;
3706 	u8				ipcso, ipcss, tucso, tucss;
3707 
3708 	ipcss = ipcso = tucss = tucso = 0;
3709 	hdr_len = ip_off + (ip->ip_hl << 2);
3710 	cur = txr->next_avail_desc;
3711 
3712 	/* Setup of IP header checksum. */
3713 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3714 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3715 		offload |= CSUM_IP;
3716 		ipcss = ip_off;
3717 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3718 		/*
3719 		 * Start offset for header checksum calculation.
3720 		 * End offset for header checksum calculation.
3721 		 * Offset of place to put the checksum.
3722 		 */
3723 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3724 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3725 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3726 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3727 		cmd |= E1000_TXD_CMD_IP;
3728 	}
3729 
3730 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3731  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3732  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3733  		offload |= CSUM_TCP;
3734  		tucss = hdr_len;
3735  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3736  		/*
3737  		 * Setting up new checksum offload context for every frames
3738  		 * takes a lot of processing time for hardware. This also
3739  		 * reduces performance a lot for small sized frames so avoid
3740  		 * it if driver can use previously configured checksum
3741  		 * offload context.
3742  		 */
3743  		if (txr->last_hw_offload == offload) {
3744  			if (offload & CSUM_IP) {
3745  				if (txr->last_hw_ipcss == ipcss &&
3746  				    txr->last_hw_ipcso == ipcso &&
3747  				    txr->last_hw_tucss == tucss &&
3748  				    txr->last_hw_tucso == tucso)
3749  					return;
3750  			} else {
3751  				if (txr->last_hw_tucss == tucss &&
3752  				    txr->last_hw_tucso == tucso)
3753  					return;
3754  			}
3755   		}
3756  		txr->last_hw_offload = offload;
3757  		txr->last_hw_tucss = tucss;
3758  		txr->last_hw_tucso = tucso;
3759  		/*
3760  		 * Start offset for payload checksum calculation.
3761  		 * End offset for payload checksum calculation.
3762  		 * Offset of place to put the checksum.
3763  		 */
3764 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3765  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3766  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3767  		TXD->upper_setup.tcp_fields.tucso = tucso;
3768  		cmd |= E1000_TXD_CMD_TCP;
3769  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3770  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3771  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3772  		tucss = hdr_len;
3773  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3774  		/*
3775  		 * Setting up new checksum offload context for every frames
3776  		 * takes a lot of processing time for hardware. This also
3777  		 * reduces performance a lot for small sized frames so avoid
3778  		 * it if driver can use previously configured checksum
3779  		 * offload context.
3780  		 */
3781  		if (txr->last_hw_offload == offload) {
3782  			if (offload & CSUM_IP) {
3783  				if (txr->last_hw_ipcss == ipcss &&
3784  				    txr->last_hw_ipcso == ipcso &&
3785  				    txr->last_hw_tucss == tucss &&
3786  				    txr->last_hw_tucso == tucso)
3787  					return;
3788  			} else {
3789  				if (txr->last_hw_tucss == tucss &&
3790  				    txr->last_hw_tucso == tucso)
3791  					return;
3792  			}
3793  		}
3794  		txr->last_hw_offload = offload;
3795  		txr->last_hw_tucss = tucss;
3796  		txr->last_hw_tucso = tucso;
3797  		/*
3798  		 * Start offset for header checksum calculation.
3799  		 * End offset for header checksum calculation.
3800  		 * Offset of place to put the checksum.
3801  		 */
3802 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3803  		TXD->upper_setup.tcp_fields.tucss = tucss;
3804  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3805  		TXD->upper_setup.tcp_fields.tucso = tucso;
3806   	}
3807 
3808  	if (offload & CSUM_IP) {
3809  		txr->last_hw_ipcss = ipcss;
3810  		txr->last_hw_ipcso = ipcso;
3811   	}
3812 
3813 	TXD->tcp_seg_setup.data = htole32(0);
3814 	TXD->cmd_and_length =
3815 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3816 	tx_buffer = &txr->tx_buffers[cur];
3817 	tx_buffer->m_head = NULL;
3818 	tx_buffer->next_eop = -1;
3819 
3820 	if (++cur == adapter->num_tx_desc)
3821 		cur = 0;
3822 
3823 	txr->tx_avail--;
3824 	txr->next_avail_desc = cur;
3825 }
3826 
3827 
3828 /**********************************************************************
3829  *
3830  *  Setup work for hardware segmentation offload (TSO)
3831  *
3832  **********************************************************************/
3833 static void
3834 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3835     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3836 {
3837 	struct adapter			*adapter = txr->adapter;
3838 	struct e1000_context_desc	*TXD;
3839 	struct em_buffer		*tx_buffer;
3840 	int cur, hdr_len;
3841 
3842 	/*
3843 	 * In theory we can use the same TSO context if and only if
3844 	 * frame is the same type(IP/TCP) and the same MSS. However
3845 	 * checking whether a frame has the same IP/TCP structure is
3846 	 * hard thing so just ignore that and always restablish a
3847 	 * new TSO context.
3848 	 */
3849 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3850 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3851 		      E1000_TXD_DTYP_D |	/* Data descr type */
3852 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3853 
3854 	/* IP and/or TCP header checksum calculation and insertion. */
3855 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3856 
3857 	cur = txr->next_avail_desc;
3858 	tx_buffer = &txr->tx_buffers[cur];
3859 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3860 
3861 	/*
3862 	 * Start offset for header checksum calculation.
3863 	 * End offset for header checksum calculation.
3864 	 * Offset of place put the checksum.
3865 	 */
3866 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3867 	TXD->lower_setup.ip_fields.ipcse =
3868 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3869 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3870 	/*
3871 	 * Start offset for payload checksum calculation.
3872 	 * End offset for payload checksum calculation.
3873 	 * Offset of place to put the checksum.
3874 	 */
3875 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3876 	TXD->upper_setup.tcp_fields.tucse = 0;
3877 	TXD->upper_setup.tcp_fields.tucso =
3878 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3879 	/*
3880 	 * Payload size per packet w/o any headers.
3881 	 * Length of all headers up to payload.
3882 	 */
3883 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3884 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3885 
3886 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3887 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3888 				E1000_TXD_CMD_TSE |	/* TSE context */
3889 				E1000_TXD_CMD_IP |	/* Do IP csum */
3890 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3891 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3892 
3893 	tx_buffer->m_head = NULL;
3894 	tx_buffer->next_eop = -1;
3895 
3896 	if (++cur == adapter->num_tx_desc)
3897 		cur = 0;
3898 
3899 	txr->tx_avail--;
3900 	txr->next_avail_desc = cur;
3901 	txr->tx_tso = TRUE;
3902 }
3903 
3904 
3905 /**********************************************************************
3906  *
3907  *  Examine each tx_buffer in the used queue. If the hardware is done
3908  *  processing the packet then free associated resources. The
3909  *  tx_buffer is put back on the free queue.
3910  *
3911  **********************************************************************/
3912 static void
3913 em_txeof(struct tx_ring *txr)
3914 {
3915 	struct adapter	*adapter = txr->adapter;
3916         int first, last, done, processed;
3917         struct em_buffer *tx_buffer;
3918         struct e1000_tx_desc   *tx_desc, *eop_desc;
3919 	if_t ifp = adapter->ifp;
3920 
3921 	EM_TX_LOCK_ASSERT(txr);
3922 #ifdef DEV_NETMAP
3923 	if (netmap_tx_irq(ifp, txr->me))
3924 		return;
3925 #endif /* DEV_NETMAP */
3926 
3927 	/* No work, make sure hang detection is disabled */
3928         if (txr->tx_avail == adapter->num_tx_desc) {
3929 		txr->busy = EM_TX_IDLE;
3930                 return;
3931 	}
3932 
3933 	processed = 0;
3934         first = txr->next_to_clean;
3935         tx_desc = &txr->tx_base[first];
3936         tx_buffer = &txr->tx_buffers[first];
3937 	last = tx_buffer->next_eop;
3938         eop_desc = &txr->tx_base[last];
3939 
3940 	/*
3941 	 * What this does is get the index of the
3942 	 * first descriptor AFTER the EOP of the
3943 	 * first packet, that way we can do the
3944 	 * simple comparison on the inner while loop.
3945 	 */
3946 	if (++last == adapter->num_tx_desc)
3947  		last = 0;
3948 	done = last;
3949 
3950         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3951             BUS_DMASYNC_POSTREAD);
3952 
3953         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3954 		/* We clean the range of the packet */
3955 		while (first != done) {
3956                 	tx_desc->upper.data = 0;
3957                 	tx_desc->lower.data = 0;
3958                 	tx_desc->buffer_addr = 0;
3959                 	++txr->tx_avail;
3960 			++processed;
3961 
3962 			if (tx_buffer->m_head) {
3963 				bus_dmamap_sync(txr->txtag,
3964 				    tx_buffer->map,
3965 				    BUS_DMASYNC_POSTWRITE);
3966 				bus_dmamap_unload(txr->txtag,
3967 				    tx_buffer->map);
3968                         	m_freem(tx_buffer->m_head);
3969                         	tx_buffer->m_head = NULL;
3970                 	}
3971 			tx_buffer->next_eop = -1;
3972 
3973 	                if (++first == adapter->num_tx_desc)
3974 				first = 0;
3975 
3976 	                tx_buffer = &txr->tx_buffers[first];
3977 			tx_desc = &txr->tx_base[first];
3978 		}
3979 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
3980 		/* See if we can continue to the next packet */
3981 		last = tx_buffer->next_eop;
3982 		if (last != -1) {
3983         		eop_desc = &txr->tx_base[last];
3984 			/* Get new done point */
3985 			if (++last == adapter->num_tx_desc) last = 0;
3986 			done = last;
3987 		} else
3988 			break;
3989         }
3990         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3991             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3992 
3993         txr->next_to_clean = first;
3994 
3995 	/*
3996 	** Hang detection: we know there's work outstanding
3997 	** or the entry return would have been taken, so no
3998 	** descriptor processed here indicates a potential hang.
3999 	** The local timer will examine this and do a reset if needed.
4000 	*/
4001 	if (processed == 0) {
4002 		if (txr->busy != EM_TX_HUNG)
4003 			++txr->busy;
4004 	} else /* At least one descriptor was cleaned */
4005 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4006 
4007         /*
4008          * If we have a minimum free, clear IFF_DRV_OACTIVE
4009          * to tell the stack that it is OK to send packets.
4010 	 * Notice that all writes of OACTIVE happen under the
4011 	 * TX lock which, with a single queue, guarantees
4012 	 * sanity.
4013          */
4014         if (txr->tx_avail >= EM_MAX_SCATTER) {
4015 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4016 	}
4017 
4018 	/* Disable hang detection if all clean */
4019 	if (txr->tx_avail == adapter->num_tx_desc)
4020 		txr->busy = EM_TX_IDLE;
4021 }
4022 
4023 
4024 /*********************************************************************
4025  *
4026  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4027  *
4028  **********************************************************************/
4029 static void
4030 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4031 {
4032 	struct adapter		*adapter = rxr->adapter;
4033 	struct mbuf		*m;
4034 	bus_dma_segment_t	segs[1];
4035 	struct em_buffer	*rxbuf;
4036 	int			i, j, error, nsegs;
4037 	bool			cleaned = FALSE;
4038 
4039 	i = j = rxr->next_to_refresh;
4040 	/*
4041 	** Get one descriptor beyond
4042 	** our work mark to control
4043 	** the loop.
4044 	*/
4045 	if (++j == adapter->num_rx_desc)
4046 		j = 0;
4047 
4048 	while (j != limit) {
4049 		rxbuf = &rxr->rx_buffers[i];
4050 		if (rxbuf->m_head == NULL) {
4051 			m = m_getjcl(M_NOWAIT, MT_DATA,
4052 			    M_PKTHDR, adapter->rx_mbuf_sz);
4053 			/*
4054 			** If we have a temporary resource shortage
4055 			** that causes a failure, just abort refresh
4056 			** for now, we will return to this point when
4057 			** reinvoked from em_rxeof.
4058 			*/
4059 			if (m == NULL)
4060 				goto update;
4061 		} else
4062 			m = rxbuf->m_head;
4063 
4064 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4065 		m->m_flags |= M_PKTHDR;
4066 		m->m_data = m->m_ext.ext_buf;
4067 
4068 		/* Use bus_dma machinery to setup the memory mapping  */
4069 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4070 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
4071 		if (error != 0) {
4072 			printf("Refresh mbufs: hdr dmamap load"
4073 			    " failure - %d\n", error);
4074 			m_free(m);
4075 			rxbuf->m_head = NULL;
4076 			goto update;
4077 		}
4078 		rxbuf->m_head = m;
4079 		bus_dmamap_sync(rxr->rxtag,
4080 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4081 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
4082 		cleaned = TRUE;
4083 
4084 		i = j; /* Next is precalulated for us */
4085 		rxr->next_to_refresh = i;
4086 		/* Calculate next controlling index */
4087 		if (++j == adapter->num_rx_desc)
4088 			j = 0;
4089 	}
4090 update:
4091 	/*
4092 	** Update the tail pointer only if,
4093 	** and as far as we have refreshed.
4094 	*/
4095 	if (cleaned)
4096 		E1000_WRITE_REG(&adapter->hw,
4097 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4098 
4099 	return;
4100 }
4101 
4102 
4103 /*********************************************************************
4104  *
4105  *  Allocate memory for rx_buffer structures. Since we use one
4106  *  rx_buffer per received packet, the maximum number of rx_buffer's
4107  *  that we'll need is equal to the number of receive descriptors
4108  *  that we've allocated.
4109  *
4110  **********************************************************************/
4111 static int
4112 em_allocate_receive_buffers(struct rx_ring *rxr)
4113 {
4114 	struct adapter		*adapter = rxr->adapter;
4115 	device_t		dev = adapter->dev;
4116 	struct em_buffer	*rxbuf;
4117 	int			error;
4118 
4119 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4120 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4121 	if (rxr->rx_buffers == NULL) {
4122 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4123 		return (ENOMEM);
4124 	}
4125 
4126 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4127 				1, 0,			/* alignment, bounds */
4128 				BUS_SPACE_MAXADDR,	/* lowaddr */
4129 				BUS_SPACE_MAXADDR,	/* highaddr */
4130 				NULL, NULL,		/* filter, filterarg */
4131 				MJUM9BYTES,		/* maxsize */
4132 				1,			/* nsegments */
4133 				MJUM9BYTES,		/* maxsegsize */
4134 				0,			/* flags */
4135 				NULL,			/* lockfunc */
4136 				NULL,			/* lockarg */
4137 				&rxr->rxtag);
4138 	if (error) {
4139 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4140 		    __func__, error);
4141 		goto fail;
4142 	}
4143 
4144 	rxbuf = rxr->rx_buffers;
4145 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4146 		rxbuf = &rxr->rx_buffers[i];
4147 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4148 		if (error) {
4149 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4150 			    __func__, error);
4151 			goto fail;
4152 		}
4153 	}
4154 
4155 	return (0);
4156 
4157 fail:
4158 	em_free_receive_structures(adapter);
4159 	return (error);
4160 }
4161 
4162 
4163 /*********************************************************************
4164  *
4165  *  Initialize a receive ring and its buffers.
4166  *
4167  **********************************************************************/
4168 static int
4169 em_setup_receive_ring(struct rx_ring *rxr)
4170 {
4171 	struct	adapter 	*adapter = rxr->adapter;
4172 	struct em_buffer	*rxbuf;
4173 	bus_dma_segment_t	seg[1];
4174 	int			rsize, nsegs, error = 0;
4175 #ifdef DEV_NETMAP
4176 	struct netmap_slot *slot;
4177 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4178 #endif
4179 
4180 
4181 	/* Clear the ring contents */
4182 	EM_RX_LOCK(rxr);
4183 	rsize = roundup2(adapter->num_rx_desc *
4184 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4185 	bzero((void *)rxr->rx_base, rsize);
4186 #ifdef DEV_NETMAP
4187 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4188 #endif
4189 
4190 	/*
4191 	** Free current RX buffer structs and their mbufs
4192 	*/
4193 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4194 		rxbuf = &rxr->rx_buffers[i];
4195 		if (rxbuf->m_head != NULL) {
4196 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4197 			    BUS_DMASYNC_POSTREAD);
4198 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4199 			m_freem(rxbuf->m_head);
4200 			rxbuf->m_head = NULL; /* mark as freed */
4201 		}
4202 	}
4203 
4204 	/* Now replenish the mbufs */
4205         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4206 		rxbuf = &rxr->rx_buffers[j];
4207 #ifdef DEV_NETMAP
4208 		if (slot) {
4209 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4210 			uint64_t paddr;
4211 			void *addr;
4212 
4213 			addr = PNMB(na, slot + si, &paddr);
4214 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4215 			/* Update descriptor */
4216 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4217 			continue;
4218 		}
4219 #endif /* DEV_NETMAP */
4220 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4221 		    M_PKTHDR, adapter->rx_mbuf_sz);
4222 		if (rxbuf->m_head == NULL) {
4223 			error = ENOBUFS;
4224 			goto fail;
4225 		}
4226 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4227 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4228 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4229 
4230 		/* Get the memory mapping */
4231 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4232 		    rxbuf->map, rxbuf->m_head, seg,
4233 		    &nsegs, BUS_DMA_NOWAIT);
4234 		if (error != 0) {
4235 			m_freem(rxbuf->m_head);
4236 			rxbuf->m_head = NULL;
4237 			goto fail;
4238 		}
4239 		bus_dmamap_sync(rxr->rxtag,
4240 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4241 
4242 		/* Update descriptor */
4243 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4244 	}
4245 	rxr->next_to_check = 0;
4246 	rxr->next_to_refresh = 0;
4247 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4248 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4249 
4250 fail:
4251 	EM_RX_UNLOCK(rxr);
4252 	return (error);
4253 }
4254 
4255 /*********************************************************************
4256  *
4257  *  Initialize all receive rings.
4258  *
4259  **********************************************************************/
4260 static int
4261 em_setup_receive_structures(struct adapter *adapter)
4262 {
4263 	struct rx_ring *rxr = adapter->rx_rings;
4264 	int q;
4265 
4266 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4267 		if (em_setup_receive_ring(rxr))
4268 			goto fail;
4269 
4270 	return (0);
4271 fail:
4272 	/*
4273 	 * Free RX buffers allocated so far, we will only handle
4274 	 * the rings that completed, the failing case will have
4275 	 * cleaned up for itself. 'q' failed, so its the terminus.
4276 	 */
4277 	for (int i = 0; i < q; ++i) {
4278 		rxr = &adapter->rx_rings[i];
4279 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4280 			struct em_buffer *rxbuf;
4281 			rxbuf = &rxr->rx_buffers[n];
4282 			if (rxbuf->m_head != NULL) {
4283 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4284 			  	  BUS_DMASYNC_POSTREAD);
4285 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4286 				m_freem(rxbuf->m_head);
4287 				rxbuf->m_head = NULL;
4288 			}
4289 		}
4290 		rxr->next_to_check = 0;
4291 		rxr->next_to_refresh = 0;
4292 	}
4293 
4294 	return (ENOBUFS);
4295 }
4296 
4297 /*********************************************************************
4298  *
4299  *  Free all receive rings.
4300  *
4301  **********************************************************************/
4302 static void
4303 em_free_receive_structures(struct adapter *adapter)
4304 {
4305 	struct rx_ring *rxr = adapter->rx_rings;
4306 
4307 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4308 		em_free_receive_buffers(rxr);
4309 		/* Free the ring memory as well */
4310 		em_dma_free(adapter, &rxr->rxdma);
4311 		EM_RX_LOCK_DESTROY(rxr);
4312 	}
4313 
4314 	free(adapter->rx_rings, M_DEVBUF);
4315 }
4316 
4317 
4318 /*********************************************************************
4319  *
4320  *  Free receive ring data structures
4321  *
4322  **********************************************************************/
4323 static void
4324 em_free_receive_buffers(struct rx_ring *rxr)
4325 {
4326 	struct adapter		*adapter = rxr->adapter;
4327 	struct em_buffer	*rxbuf = NULL;
4328 
4329 	INIT_DEBUGOUT("free_receive_buffers: begin");
4330 
4331 	if (rxr->rx_buffers != NULL) {
4332 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4333 			rxbuf = &rxr->rx_buffers[i];
4334 			if (rxbuf->map != NULL) {
4335 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4336 				    BUS_DMASYNC_POSTREAD);
4337 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4338 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4339 			}
4340 			if (rxbuf->m_head != NULL) {
4341 				m_freem(rxbuf->m_head);
4342 				rxbuf->m_head = NULL;
4343 			}
4344 		}
4345 		free(rxr->rx_buffers, M_DEVBUF);
4346 		rxr->rx_buffers = NULL;
4347 		rxr->next_to_check = 0;
4348 		rxr->next_to_refresh = 0;
4349 	}
4350 
4351 	if (rxr->rxtag != NULL) {
4352 		bus_dma_tag_destroy(rxr->rxtag);
4353 		rxr->rxtag = NULL;
4354 	}
4355 
4356 	return;
4357 }
4358 
4359 
4360 /*********************************************************************
4361  *
4362  *  Enable receive unit.
4363  *
4364  **********************************************************************/
4365 
4366 static void
4367 em_initialize_receive_unit(struct adapter *adapter)
4368 {
4369 	struct rx_ring	*rxr = adapter->rx_rings;
4370 	if_t ifp = adapter->ifp;
4371 	struct e1000_hw	*hw = &adapter->hw;
4372 	u64	bus_addr;
4373 	u32	rctl, rxcsum;
4374 
4375 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4376 
4377 	/*
4378 	 * Make sure receives are disabled while setting
4379 	 * up the descriptor ring
4380 	 */
4381 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4382 	/* Do not disable if ever enabled on this hardware */
4383 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4384 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4385 
4386 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4387 	    adapter->rx_abs_int_delay.value);
4388 
4389 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4390 	    adapter->rx_int_delay.value);
4391 	/*
4392 	 * Set the interrupt throttling rate. Value is calculated
4393 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4394 	 */
4395 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4396 
4397 	/*
4398 	** When using MSIX interrupts we need to throttle
4399 	** using the EITR register (82574 only)
4400 	*/
4401 	if (hw->mac.type == e1000_82574) {
4402 		u32 rfctl;
4403 		for (int i = 0; i < 4; i++)
4404 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4405 			    DEFAULT_ITR);
4406 		/* Disable accelerated acknowledge */
4407 		rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4408 		rfctl |= E1000_RFCTL_ACK_DIS;
4409 		E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4410 	}
4411 
4412 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4413 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4414 #ifdef EM_MULTIQUEUE
4415 		rxcsum |= E1000_RXCSUM_TUOFL |
4416 			  E1000_RXCSUM_IPOFL |
4417 			  E1000_RXCSUM_PCSD;
4418 #else
4419 		rxcsum |= E1000_RXCSUM_TUOFL;
4420 #endif
4421 	} else
4422 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4423 
4424 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4425 
4426 #ifdef EM_MULTIQUEUE
4427 	if (adapter->num_queues > 1) {
4428 		uint32_t rss_key[10];
4429 		uint32_t reta;
4430 		int i;
4431 
4432 		/*
4433 		* Configure RSS key
4434 		*/
4435 		arc4rand(rss_key, sizeof(rss_key), 0);
4436 		for (i = 0; i < 10; ++i)
4437 			E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]);
4438 
4439 		/*
4440 		* Configure RSS redirect table in following fashion:
4441 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4442 		*/
4443 		reta = 0;
4444 		for (i = 0; i < 4; ++i) {
4445 			uint32_t q;
4446 			q = (i % adapter->num_queues) << 7;
4447 			reta |= q << (8 * i);
4448 		}
4449 		for (i = 0; i < 32; ++i)
4450 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4451 
4452 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4453 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4454 				E1000_MRQC_RSS_FIELD_IPV4 |
4455 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4456 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4457 				E1000_MRQC_RSS_FIELD_IPV6 |
4458 				E1000_MRQC_RSS_FIELD_IPV6_TCP);
4459 	}
4460 #endif
4461 	/*
4462 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4463 	** long latencies are observed, like Lenovo X60. This
4464 	** change eliminates the problem, but since having positive
4465 	** values in RDTR is a known source of problems on other
4466 	** platforms another solution is being sought.
4467 	*/
4468 	if (hw->mac.type == e1000_82573)
4469 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4470 
4471 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4472 		/* Setup the Base and Length of the Rx Descriptor Ring */
4473 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4474 
4475 		bus_addr = rxr->rxdma.dma_paddr;
4476 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4477 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4478 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4479 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4480 		/* Setup the Head and Tail Descriptor Pointers */
4481 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4482 #ifdef DEV_NETMAP
4483 		/*
4484 		 * an init() while a netmap client is active must
4485 		 * preserve the rx buffers passed to userspace.
4486 		 */
4487 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4488 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4489 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4490 		}
4491 #endif /* DEV_NETMAP */
4492 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4493 	}
4494 
4495 	/*
4496 	 * Set PTHRESH for improved jumbo performance
4497 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4498 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4499 	 * Only write to RXDCTL(1) if there is a need for different
4500 	 * settings.
4501 	 */
4502 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4503 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4504 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4505 	    (if_getmtu(ifp) > ETHERMTU)) {
4506 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4507 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4508 	} else if ((adapter->hw.mac.type == e1000_82574) &&
4509 		  (if_getmtu(ifp) > ETHERMTU)) {
4510 		for (int i = 0; i < adapter->num_queues; i++) {
4511 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4512 
4513                 	rxdctl |= 0x20; /* PTHRESH */
4514                 	rxdctl |= 4 << 8; /* HTHRESH */
4515                 	rxdctl |= 4 << 16;/* WTHRESH */
4516 			rxdctl |= 1 << 24; /* Switch to granularity */
4517 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4518 		}
4519 	}
4520 
4521 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4522 		if (if_getmtu(ifp) > ETHERMTU)
4523 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4524 		else
4525 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4526 	}
4527 
4528 	/* Setup the Receive Control Register */
4529 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4530 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4531 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4532 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4533 
4534         /* Strip the CRC */
4535         if (!em_disable_crc_stripping)
4536 		rctl |= E1000_RCTL_SECRC;
4537 
4538         /* Make sure VLAN Filters are off */
4539         rctl &= ~E1000_RCTL_VFE;
4540 	rctl &= ~E1000_RCTL_SBP;
4541 
4542 	if (adapter->rx_mbuf_sz == MCLBYTES)
4543 		rctl |= E1000_RCTL_SZ_2048;
4544 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4545 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4546 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4547 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4548 
4549 	if (if_getmtu(ifp) > ETHERMTU)
4550 		rctl |= E1000_RCTL_LPE;
4551 	else
4552 		rctl &= ~E1000_RCTL_LPE;
4553 
4554 	/* Write out the settings */
4555 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4556 
4557 	return;
4558 }
4559 
4560 
4561 /*********************************************************************
4562  *
4563  *  This routine executes in interrupt context. It replenishes
4564  *  the mbufs in the descriptor and sends data which has been
4565  *  dma'ed into host memory to upper layer.
4566  *
4567  *  We loop at most count times if count is > 0, or until done if
4568  *  count < 0.
4569  *
4570  *  For polling we also now return the number of cleaned packets
4571  *********************************************************************/
4572 static bool
4573 em_rxeof(struct rx_ring *rxr, int count, int *done)
4574 {
4575 	struct adapter		*adapter = rxr->adapter;
4576 	if_t ifp = adapter->ifp;
4577 	struct mbuf		*mp, *sendmp;
4578 	u8			status = 0;
4579 	u16 			len;
4580 	int			i, processed, rxdone = 0;
4581 	bool			eop;
4582 	struct e1000_rx_desc	*cur;
4583 
4584 	EM_RX_LOCK(rxr);
4585 
4586 	/* Sync the ring */
4587 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4588 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4589 
4590 
4591 #ifdef DEV_NETMAP
4592 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4593 		EM_RX_UNLOCK(rxr);
4594 		return (FALSE);
4595 	}
4596 #endif /* DEV_NETMAP */
4597 
4598 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4599 
4600 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4601 			break;
4602 
4603 		cur = &rxr->rx_base[i];
4604 		status = cur->status;
4605 		mp = sendmp = NULL;
4606 
4607 		if ((status & E1000_RXD_STAT_DD) == 0)
4608 			break;
4609 
4610 		len = le16toh(cur->length);
4611 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4612 
4613 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4614 		    (rxr->discard == TRUE)) {
4615 			adapter->dropped_pkts++;
4616 			++rxr->rx_discarded;
4617 			if (!eop) /* Catch subsequent segs */
4618 				rxr->discard = TRUE;
4619 			else
4620 				rxr->discard = FALSE;
4621 			em_rx_discard(rxr, i);
4622 			goto next_desc;
4623 		}
4624 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4625 
4626 		/* Assign correct length to the current fragment */
4627 		mp = rxr->rx_buffers[i].m_head;
4628 		mp->m_len = len;
4629 
4630 		/* Trigger for refresh */
4631 		rxr->rx_buffers[i].m_head = NULL;
4632 
4633 		/* First segment? */
4634 		if (rxr->fmp == NULL) {
4635 			mp->m_pkthdr.len = len;
4636 			rxr->fmp = rxr->lmp = mp;
4637 		} else {
4638 			/* Chain mbuf's together */
4639 			mp->m_flags &= ~M_PKTHDR;
4640 			rxr->lmp->m_next = mp;
4641 			rxr->lmp = mp;
4642 			rxr->fmp->m_pkthdr.len += len;
4643 		}
4644 
4645 		if (eop) {
4646 			--count;
4647 			sendmp = rxr->fmp;
4648 			if_setrcvif(sendmp, ifp);
4649 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4650 			em_receive_checksum(cur, sendmp);
4651 #ifndef __NO_STRICT_ALIGNMENT
4652 			if (adapter->hw.mac.max_frame_size >
4653 			    (MCLBYTES - ETHER_ALIGN) &&
4654 			    em_fixup_rx(rxr) != 0)
4655 				goto skip;
4656 #endif
4657 			if (status & E1000_RXD_STAT_VP) {
4658 				if_setvtag(sendmp,
4659 				    le16toh(cur->special));
4660 				sendmp->m_flags |= M_VLANTAG;
4661 			}
4662 #ifndef __NO_STRICT_ALIGNMENT
4663 skip:
4664 #endif
4665 			rxr->fmp = rxr->lmp = NULL;
4666 		}
4667 next_desc:
4668 		/* Sync the ring */
4669 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4670 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4671 
4672 		/* Zero out the receive descriptors status. */
4673 		cur->status = 0;
4674 		++rxdone;	/* cumulative for POLL */
4675 		++processed;
4676 
4677 		/* Advance our pointers to the next descriptor. */
4678 		if (++i == adapter->num_rx_desc)
4679 			i = 0;
4680 
4681 		/* Send to the stack */
4682 		if (sendmp != NULL) {
4683 			rxr->next_to_check = i;
4684 			EM_RX_UNLOCK(rxr);
4685 			if_input(ifp, sendmp);
4686 			EM_RX_LOCK(rxr);
4687 			i = rxr->next_to_check;
4688 		}
4689 
4690 		/* Only refresh mbufs every 8 descriptors */
4691 		if (processed == 8) {
4692 			em_refresh_mbufs(rxr, i);
4693 			processed = 0;
4694 		}
4695 	}
4696 
4697 	/* Catch any remaining refresh work */
4698 	if (e1000_rx_unrefreshed(rxr))
4699 		em_refresh_mbufs(rxr, i);
4700 
4701 	rxr->next_to_check = i;
4702 	if (done != NULL)
4703 		*done = rxdone;
4704 	EM_RX_UNLOCK(rxr);
4705 
4706 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4707 }
4708 
4709 static __inline void
4710 em_rx_discard(struct rx_ring *rxr, int i)
4711 {
4712 	struct em_buffer	*rbuf;
4713 
4714 	rbuf = &rxr->rx_buffers[i];
4715 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4716 
4717 	/* Free any previous pieces */
4718 	if (rxr->fmp != NULL) {
4719 		rxr->fmp->m_flags |= M_PKTHDR;
4720 		m_freem(rxr->fmp);
4721 		rxr->fmp = NULL;
4722 		rxr->lmp = NULL;
4723 	}
4724 	/*
4725 	** Free buffer and allow em_refresh_mbufs()
4726 	** to clean up and recharge buffer.
4727 	*/
4728 	if (rbuf->m_head) {
4729 		m_free(rbuf->m_head);
4730 		rbuf->m_head = NULL;
4731 	}
4732 	return;
4733 }
4734 
4735 #ifndef __NO_STRICT_ALIGNMENT
4736 /*
4737  * When jumbo frames are enabled we should realign entire payload on
4738  * architecures with strict alignment. This is serious design mistake of 8254x
4739  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4740  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4741  * payload. On architecures without strict alignment restrictions 8254x still
4742  * performs unaligned memory access which would reduce the performance too.
4743  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4744  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4745  * existing mbuf chain.
4746  *
4747  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4748  * not used at all on architectures with strict alignment.
4749  */
4750 static int
4751 em_fixup_rx(struct rx_ring *rxr)
4752 {
4753 	struct adapter *adapter = rxr->adapter;
4754 	struct mbuf *m, *n;
4755 	int error;
4756 
4757 	error = 0;
4758 	m = rxr->fmp;
4759 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4760 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4761 		m->m_data += ETHER_HDR_LEN;
4762 	} else {
4763 		MGETHDR(n, M_NOWAIT, MT_DATA);
4764 		if (n != NULL) {
4765 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4766 			m->m_data += ETHER_HDR_LEN;
4767 			m->m_len -= ETHER_HDR_LEN;
4768 			n->m_len = ETHER_HDR_LEN;
4769 			M_MOVE_PKTHDR(n, m);
4770 			n->m_next = m;
4771 			rxr->fmp = n;
4772 		} else {
4773 			adapter->dropped_pkts++;
4774 			m_freem(rxr->fmp);
4775 			rxr->fmp = NULL;
4776 			error = ENOMEM;
4777 		}
4778 	}
4779 
4780 	return (error);
4781 }
4782 #endif
4783 
4784 /*********************************************************************
4785  *
4786  *  Verify that the hardware indicated that the checksum is valid.
4787  *  Inform the stack about the status of checksum so that stack
4788  *  doesn't spend time verifying the checksum.
4789  *
4790  *********************************************************************/
4791 static void
4792 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4793 {
4794 	mp->m_pkthdr.csum_flags = 0;
4795 
4796 	/* Ignore Checksum bit is set */
4797 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4798 		return;
4799 
4800 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4801 		return;
4802 
4803 	/* IP Checksum Good? */
4804 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4805 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4806 
4807 	/* TCP or UDP checksum */
4808 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4809 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4810 		mp->m_pkthdr.csum_data = htons(0xffff);
4811 	}
4812 }
4813 
4814 /*
4815  * This routine is run via an vlan
4816  * config EVENT
4817  */
4818 static void
4819 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4820 {
4821 	struct adapter	*adapter = if_getsoftc(ifp);
4822 	u32		index, bit;
4823 
4824 	if ((void*)adapter !=  arg)   /* Not our event */
4825 		return;
4826 
4827 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4828                 return;
4829 
4830 	EM_CORE_LOCK(adapter);
4831 	index = (vtag >> 5) & 0x7F;
4832 	bit = vtag & 0x1F;
4833 	adapter->shadow_vfta[index] |= (1 << bit);
4834 	++adapter->num_vlans;
4835 	/* Re-init to load the changes */
4836 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4837 		em_init_locked(adapter);
4838 	EM_CORE_UNLOCK(adapter);
4839 }
4840 
4841 /*
4842  * This routine is run via an vlan
4843  * unconfig EVENT
4844  */
4845 static void
4846 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4847 {
4848 	struct adapter	*adapter = if_getsoftc(ifp);
4849 	u32		index, bit;
4850 
4851 	if (adapter != arg)
4852 		return;
4853 
4854 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4855                 return;
4856 
4857 	EM_CORE_LOCK(adapter);
4858 	index = (vtag >> 5) & 0x7F;
4859 	bit = vtag & 0x1F;
4860 	adapter->shadow_vfta[index] &= ~(1 << bit);
4861 	--adapter->num_vlans;
4862 	/* Re-init to load the changes */
4863 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4864 		em_init_locked(adapter);
4865 	EM_CORE_UNLOCK(adapter);
4866 }
4867 
4868 static void
4869 em_setup_vlan_hw_support(struct adapter *adapter)
4870 {
4871 	struct e1000_hw *hw = &adapter->hw;
4872 	u32             reg;
4873 
4874 	/*
4875 	** We get here thru init_locked, meaning
4876 	** a soft reset, this has already cleared
4877 	** the VFTA and other state, so if there
4878 	** have been no vlan's registered do nothing.
4879 	*/
4880 	if (adapter->num_vlans == 0)
4881                 return;
4882 
4883 	/*
4884 	** A soft reset zero's out the VFTA, so
4885 	** we need to repopulate it now.
4886 	*/
4887 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4888                 if (adapter->shadow_vfta[i] != 0)
4889 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4890                             i, adapter->shadow_vfta[i]);
4891 
4892 	reg = E1000_READ_REG(hw, E1000_CTRL);
4893 	reg |= E1000_CTRL_VME;
4894 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4895 
4896 	/* Enable the Filter Table */
4897 	reg = E1000_READ_REG(hw, E1000_RCTL);
4898 	reg &= ~E1000_RCTL_CFIEN;
4899 	reg |= E1000_RCTL_VFE;
4900 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4901 }
4902 
4903 static void
4904 em_enable_intr(struct adapter *adapter)
4905 {
4906 	struct e1000_hw *hw = &adapter->hw;
4907 	u32 ims_mask = IMS_ENABLE_MASK;
4908 
4909 	if (hw->mac.type == e1000_82574) {
4910 		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
4911 		ims_mask |= adapter->ims;
4912 	}
4913 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4914 }
4915 
4916 static void
4917 em_disable_intr(struct adapter *adapter)
4918 {
4919 	struct e1000_hw *hw = &adapter->hw;
4920 
4921 	if (hw->mac.type == e1000_82574)
4922 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4923 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4924 }
4925 
4926 /*
4927  * Bit of a misnomer, what this really means is
4928  * to enable OS management of the system... aka
4929  * to disable special hardware management features
4930  */
4931 static void
4932 em_init_manageability(struct adapter *adapter)
4933 {
4934 	/* A shared code workaround */
4935 #define E1000_82542_MANC2H E1000_MANC2H
4936 	if (adapter->has_manage) {
4937 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4938 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4939 
4940 		/* disable hardware interception of ARP */
4941 		manc &= ~(E1000_MANC_ARP_EN);
4942 
4943                 /* enable receiving management packets to the host */
4944 		manc |= E1000_MANC_EN_MNG2HOST;
4945 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4946 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4947 		manc2h |= E1000_MNG2HOST_PORT_623;
4948 		manc2h |= E1000_MNG2HOST_PORT_664;
4949 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4950 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4951 	}
4952 }
4953 
4954 /*
4955  * Give control back to hardware management
4956  * controller if there is one.
4957  */
4958 static void
4959 em_release_manageability(struct adapter *adapter)
4960 {
4961 	if (adapter->has_manage) {
4962 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4963 
4964 		/* re-enable hardware interception of ARP */
4965 		manc |= E1000_MANC_ARP_EN;
4966 		manc &= ~E1000_MANC_EN_MNG2HOST;
4967 
4968 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4969 	}
4970 }
4971 
4972 /*
4973  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4974  * For ASF and Pass Through versions of f/w this means
4975  * that the driver is loaded. For AMT version type f/w
4976  * this means that the network i/f is open.
4977  */
4978 static void
4979 em_get_hw_control(struct adapter *adapter)
4980 {
4981 	u32 ctrl_ext, swsm;
4982 
4983 	if (adapter->hw.mac.type == e1000_82573) {
4984 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4985 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4986 		    swsm | E1000_SWSM_DRV_LOAD);
4987 		return;
4988 	}
4989 	/* else */
4990 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4991 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4992 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4993 	return;
4994 }
4995 
4996 /*
4997  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4998  * For ASF and Pass Through versions of f/w this means that
4999  * the driver is no longer loaded. For AMT versions of the
5000  * f/w this means that the network i/f is closed.
5001  */
5002 static void
5003 em_release_hw_control(struct adapter *adapter)
5004 {
5005 	u32 ctrl_ext, swsm;
5006 
5007 	if (!adapter->has_manage)
5008 		return;
5009 
5010 	if (adapter->hw.mac.type == e1000_82573) {
5011 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5012 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5013 		    swsm & ~E1000_SWSM_DRV_LOAD);
5014 		return;
5015 	}
5016 	/* else */
5017 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5018 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5019 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5020 	return;
5021 }
5022 
5023 static int
5024 em_is_valid_ether_addr(u8 *addr)
5025 {
5026 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5027 
5028 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5029 		return (FALSE);
5030 	}
5031 
5032 	return (TRUE);
5033 }
5034 
5035 /*
5036 ** Parse the interface capabilities with regard
5037 ** to both system management and wake-on-lan for
5038 ** later use.
5039 */
5040 static void
5041 em_get_wakeup(device_t dev)
5042 {
5043 	struct adapter	*adapter = device_get_softc(dev);
5044 	u16		eeprom_data = 0, device_id, apme_mask;
5045 
5046 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5047 	apme_mask = EM_EEPROM_APME;
5048 
5049 	switch (adapter->hw.mac.type) {
5050 	case e1000_82573:
5051 	case e1000_82583:
5052 		adapter->has_amt = TRUE;
5053 		/* Falls thru */
5054 	case e1000_82571:
5055 	case e1000_82572:
5056 	case e1000_80003es2lan:
5057 		if (adapter->hw.bus.func == 1) {
5058 			e1000_read_nvm(&adapter->hw,
5059 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5060 			break;
5061 		} else
5062 			e1000_read_nvm(&adapter->hw,
5063 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5064 		break;
5065 	case e1000_ich8lan:
5066 	case e1000_ich9lan:
5067 	case e1000_ich10lan:
5068 	case e1000_pchlan:
5069 	case e1000_pch2lan:
5070 		apme_mask = E1000_WUC_APME;
5071 		adapter->has_amt = TRUE;
5072 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5073 		break;
5074 	default:
5075 		e1000_read_nvm(&adapter->hw,
5076 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5077 		break;
5078 	}
5079 	if (eeprom_data & apme_mask)
5080 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5081 	/*
5082          * We have the eeprom settings, now apply the special cases
5083          * where the eeprom may be wrong or the board won't support
5084          * wake on lan on a particular port
5085 	 */
5086 	device_id = pci_get_device(dev);
5087         switch (device_id) {
5088 	case E1000_DEV_ID_82571EB_FIBER:
5089 		/* Wake events only supported on port A for dual fiber
5090 		 * regardless of eeprom setting */
5091 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5092 		    E1000_STATUS_FUNC_1)
5093 			adapter->wol = 0;
5094 		break;
5095 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5096 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5097 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5098                 /* if quad port adapter, disable WoL on all but port A */
5099 		if (global_quad_port_a != 0)
5100 			adapter->wol = 0;
5101 		/* Reset for multiple quad port adapters */
5102 		if (++global_quad_port_a == 4)
5103 			global_quad_port_a = 0;
5104                 break;
5105 	}
5106 	return;
5107 }
5108 
5109 
5110 /*
5111  * Enable PCI Wake On Lan capability
5112  */
5113 static void
5114 em_enable_wakeup(device_t dev)
5115 {
5116 	struct adapter	*adapter = device_get_softc(dev);
5117 	if_t ifp = adapter->ifp;
5118 	u32		pmc, ctrl, ctrl_ext, rctl;
5119 	u16     	status;
5120 
5121 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5122 		return;
5123 
5124 	/* Advertise the wakeup capability */
5125 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5126 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5127 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5128 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5129 
5130 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5131 	    (adapter->hw.mac.type == e1000_pchlan) ||
5132 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5133 	    (adapter->hw.mac.type == e1000_ich10lan))
5134 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5135 
5136 	/* Keep the laser running on Fiber adapters */
5137 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5138 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5139 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5140 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5141 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5142 	}
5143 
5144 	/*
5145 	** Determine type of Wakeup: note that wol
5146 	** is set with all bits on by default.
5147 	*/
5148 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5149 		adapter->wol &= ~E1000_WUFC_MAG;
5150 
5151 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5152 		adapter->wol &= ~E1000_WUFC_MC;
5153 	else {
5154 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5155 		rctl |= E1000_RCTL_MPE;
5156 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5157 	}
5158 
5159 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5160 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5161 		if (em_enable_phy_wakeup(adapter))
5162 			return;
5163 	} else {
5164 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5165 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5166 	}
5167 
5168 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5169 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5170 
5171         /* Request PME */
5172         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5173 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5174 	if (if_getcapenable(ifp) & IFCAP_WOL)
5175 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5176         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5177 
5178 	return;
5179 }
5180 
5181 /*
5182 ** WOL in the newer chipset interfaces (pchlan)
5183 ** require thing to be copied into the phy
5184 */
5185 static int
5186 em_enable_phy_wakeup(struct adapter *adapter)
5187 {
5188 	struct e1000_hw *hw = &adapter->hw;
5189 	u32 mreg, ret = 0;
5190 	u16 preg;
5191 
5192 	/* copy MAC RARs to PHY RARs */
5193 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5194 
5195 	/* copy MAC MTA to PHY MTA */
5196 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5197 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5198 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5199 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5200 		    (u16)((mreg >> 16) & 0xFFFF));
5201 	}
5202 
5203 	/* configure PHY Rx Control register */
5204 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5205 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5206 	if (mreg & E1000_RCTL_UPE)
5207 		preg |= BM_RCTL_UPE;
5208 	if (mreg & E1000_RCTL_MPE)
5209 		preg |= BM_RCTL_MPE;
5210 	preg &= ~(BM_RCTL_MO_MASK);
5211 	if (mreg & E1000_RCTL_MO_3)
5212 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5213 				<< BM_RCTL_MO_SHIFT);
5214 	if (mreg & E1000_RCTL_BAM)
5215 		preg |= BM_RCTL_BAM;
5216 	if (mreg & E1000_RCTL_PMCF)
5217 		preg |= BM_RCTL_PMCF;
5218 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5219 	if (mreg & E1000_CTRL_RFCE)
5220 		preg |= BM_RCTL_RFCE;
5221 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5222 
5223 	/* enable PHY wakeup in MAC register */
5224 	E1000_WRITE_REG(hw, E1000_WUC,
5225 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5226 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5227 
5228 	/* configure and enable PHY wakeup in PHY registers */
5229 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5230 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5231 
5232 	/* activate PHY wakeup */
5233 	ret = hw->phy.ops.acquire(hw);
5234 	if (ret) {
5235 		printf("Could not acquire PHY\n");
5236 		return ret;
5237 	}
5238 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5239 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5240 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5241 	if (ret) {
5242 		printf("Could not read PHY page 769\n");
5243 		goto out;
5244 	}
5245 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5246 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5247 	if (ret)
5248 		printf("Could not set PHY Host Wakeup bit\n");
5249 out:
5250 	hw->phy.ops.release(hw);
5251 
5252 	return ret;
5253 }
5254 
5255 static void
5256 em_led_func(void *arg, int onoff)
5257 {
5258 	struct adapter	*adapter = arg;
5259 
5260 	EM_CORE_LOCK(adapter);
5261 	if (onoff) {
5262 		e1000_setup_led(&adapter->hw);
5263 		e1000_led_on(&adapter->hw);
5264 	} else {
5265 		e1000_led_off(&adapter->hw);
5266 		e1000_cleanup_led(&adapter->hw);
5267 	}
5268 	EM_CORE_UNLOCK(adapter);
5269 }
5270 
5271 /*
5272 ** Disable the L0S and L1 LINK states
5273 */
5274 static void
5275 em_disable_aspm(struct adapter *adapter)
5276 {
5277 	int		base, reg;
5278 	u16		link_cap,link_ctrl;
5279 	device_t	dev = adapter->dev;
5280 
5281 	switch (adapter->hw.mac.type) {
5282 		case e1000_82573:
5283 		case e1000_82574:
5284 		case e1000_82583:
5285 			break;
5286 		default:
5287 			return;
5288 	}
5289 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5290 		return;
5291 	reg = base + PCIER_LINK_CAP;
5292 	link_cap = pci_read_config(dev, reg, 2);
5293 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5294 		return;
5295 	reg = base + PCIER_LINK_CTL;
5296 	link_ctrl = pci_read_config(dev, reg, 2);
5297 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5298 	pci_write_config(dev, reg, link_ctrl, 2);
5299 	return;
5300 }
5301 
5302 /**********************************************************************
5303  *
5304  *  Update the board statistics counters.
5305  *
5306  **********************************************************************/
5307 static void
5308 em_update_stats_counters(struct adapter *adapter)
5309 {
5310 
5311 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5312 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5313 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5314 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5315 	}
5316 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5317 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5318 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5319 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5320 
5321 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5322 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5323 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5324 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5325 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5326 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5327 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5328 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5329 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5330 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5331 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5332 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5333 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5334 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5335 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5336 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5337 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5338 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5339 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5340 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5341 
5342 	/* For the 64-bit byte counters the low dword must be read first. */
5343 	/* Both registers clear on the read of the high dword */
5344 
5345 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5346 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5347 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5348 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5349 
5350 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5351 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5352 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5353 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5354 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5355 
5356 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5357 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5358 
5359 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5360 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5361 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5362 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5363 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5364 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5365 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5366 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5367 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5368 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5369 
5370 	/* Interrupt Counts */
5371 
5372 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5373 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5374 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5375 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5376 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5377 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5378 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5379 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5380 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5381 
5382 	if (adapter->hw.mac.type >= e1000_82543) {
5383 		adapter->stats.algnerrc +=
5384 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5385 		adapter->stats.rxerrc +=
5386 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5387 		adapter->stats.tncrs +=
5388 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5389 		adapter->stats.cexterr +=
5390 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5391 		adapter->stats.tsctc +=
5392 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5393 		adapter->stats.tsctfc +=
5394 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5395 	}
5396 }
5397 
5398 static uint64_t
5399 em_get_counter(if_t ifp, ift_counter cnt)
5400 {
5401 	struct adapter *adapter;
5402 
5403 	adapter = if_getsoftc(ifp);
5404 
5405 	switch (cnt) {
5406 	case IFCOUNTER_COLLISIONS:
5407 		return (adapter->stats.colc);
5408 	case IFCOUNTER_IERRORS:
5409 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5410 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5411 		    adapter->stats.ruc + adapter->stats.roc +
5412 		    adapter->stats.mpc + adapter->stats.cexterr);
5413 	case IFCOUNTER_OERRORS:
5414 		return (adapter->stats.ecol + adapter->stats.latecol +
5415 		    adapter->watchdog_events);
5416 	default:
5417 		return (if_get_counter_default(ifp, cnt));
5418 	}
5419 }
5420 
5421 /* Export a single 32-bit register via a read-only sysctl. */
5422 static int
5423 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5424 {
5425 	struct adapter *adapter;
5426 	u_int val;
5427 
5428 	adapter = oidp->oid_arg1;
5429 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5430 	return (sysctl_handle_int(oidp, &val, 0, req));
5431 }
5432 
5433 /*
5434  * Add sysctl variables, one per statistic, to the system.
5435  */
5436 static void
5437 em_add_hw_stats(struct adapter *adapter)
5438 {
5439 	device_t dev = adapter->dev;
5440 
5441 	struct tx_ring *txr = adapter->tx_rings;
5442 	struct rx_ring *rxr = adapter->rx_rings;
5443 
5444 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5445 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5446 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5447 	struct e1000_hw_stats *stats = &adapter->stats;
5448 
5449 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5450 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5451 
5452 #define QUEUE_NAME_LEN 32
5453 	char namebuf[QUEUE_NAME_LEN];
5454 
5455 	/* Driver Statistics */
5456 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5457 			CTLFLAG_RD, &adapter->link_irq,
5458 			"Link MSIX IRQ Handled");
5459 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5460 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5461 			 "Std mbuf failed");
5462 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5463 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5464 			 "Std mbuf cluster failed");
5465 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5466 			CTLFLAG_RD, &adapter->dropped_pkts,
5467 			"Driver dropped packets");
5468 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5469 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5470 			"Driver tx dma failure in xmit");
5471 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5472 			CTLFLAG_RD, &adapter->rx_overruns,
5473 			"RX overruns");
5474 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5475 			CTLFLAG_RD, &adapter->watchdog_events,
5476 			"Watchdog timeouts");
5477 
5478 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5479 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5480 			em_sysctl_reg_handler, "IU",
5481 			"Device Control Register");
5482 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5483 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5484 			em_sysctl_reg_handler, "IU",
5485 			"Receiver Control Register");
5486 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5487 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5488 			"Flow Control High Watermark");
5489 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5490 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5491 			"Flow Control Low Watermark");
5492 
5493 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5494 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5495 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5496 					    CTLFLAG_RD, NULL, "TX Queue Name");
5497 		queue_list = SYSCTL_CHILDREN(queue_node);
5498 
5499 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5500 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5501 				E1000_TDH(txr->me),
5502 				em_sysctl_reg_handler, "IU",
5503  				"Transmit Descriptor Head");
5504 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5505 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5506 				E1000_TDT(txr->me),
5507 				em_sysctl_reg_handler, "IU",
5508  				"Transmit Descriptor Tail");
5509 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5510 				CTLFLAG_RD, &txr->tx_irq,
5511 				"Queue MSI-X Transmit Interrupts");
5512 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5513 				CTLFLAG_RD, &txr->no_desc_avail,
5514 				"Queue No Descriptor Available");
5515 
5516 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5517 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5518 					    CTLFLAG_RD, NULL, "RX Queue Name");
5519 		queue_list = SYSCTL_CHILDREN(queue_node);
5520 
5521 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5522 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5523 				E1000_RDH(rxr->me),
5524 				em_sysctl_reg_handler, "IU",
5525 				"Receive Descriptor Head");
5526 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5527 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5528 				E1000_RDT(rxr->me),
5529 				em_sysctl_reg_handler, "IU",
5530 				"Receive Descriptor Tail");
5531 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5532 				CTLFLAG_RD, &rxr->rx_irq,
5533 				"Queue MSI-X Receive Interrupts");
5534 	}
5535 
5536 	/* MAC stats get their own sub node */
5537 
5538 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5539 				    CTLFLAG_RD, NULL, "Statistics");
5540 	stat_list = SYSCTL_CHILDREN(stat_node);
5541 
5542 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5543 			CTLFLAG_RD, &stats->ecol,
5544 			"Excessive collisions");
5545 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5546 			CTLFLAG_RD, &stats->scc,
5547 			"Single collisions");
5548 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5549 			CTLFLAG_RD, &stats->mcc,
5550 			"Multiple collisions");
5551 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5552 			CTLFLAG_RD, &stats->latecol,
5553 			"Late collisions");
5554 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5555 			CTLFLAG_RD, &stats->colc,
5556 			"Collision Count");
5557 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5558 			CTLFLAG_RD, &adapter->stats.symerrs,
5559 			"Symbol Errors");
5560 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5561 			CTLFLAG_RD, &adapter->stats.sec,
5562 			"Sequence Errors");
5563 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5564 			CTLFLAG_RD, &adapter->stats.dc,
5565 			"Defer Count");
5566 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5567 			CTLFLAG_RD, &adapter->stats.mpc,
5568 			"Missed Packets");
5569 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5570 			CTLFLAG_RD, &adapter->stats.rnbc,
5571 			"Receive No Buffers");
5572 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5573 			CTLFLAG_RD, &adapter->stats.ruc,
5574 			"Receive Undersize");
5575 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5576 			CTLFLAG_RD, &adapter->stats.rfc,
5577 			"Fragmented Packets Received ");
5578 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5579 			CTLFLAG_RD, &adapter->stats.roc,
5580 			"Oversized Packets Received");
5581 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5582 			CTLFLAG_RD, &adapter->stats.rjc,
5583 			"Recevied Jabber");
5584 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5585 			CTLFLAG_RD, &adapter->stats.rxerrc,
5586 			"Receive Errors");
5587 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5588 			CTLFLAG_RD, &adapter->stats.crcerrs,
5589 			"CRC errors");
5590 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5591 			CTLFLAG_RD, &adapter->stats.algnerrc,
5592 			"Alignment Errors");
5593 	/* On 82575 these are collision counts */
5594 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5595 			CTLFLAG_RD, &adapter->stats.cexterr,
5596 			"Collision/Carrier extension errors");
5597 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5598 			CTLFLAG_RD, &adapter->stats.xonrxc,
5599 			"XON Received");
5600 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5601 			CTLFLAG_RD, &adapter->stats.xontxc,
5602 			"XON Transmitted");
5603 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5604 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5605 			"XOFF Received");
5606 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5607 			CTLFLAG_RD, &adapter->stats.xofftxc,
5608 			"XOFF Transmitted");
5609 
5610 	/* Packet Reception Stats */
5611 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5612 			CTLFLAG_RD, &adapter->stats.tpr,
5613 			"Total Packets Received ");
5614 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5615 			CTLFLAG_RD, &adapter->stats.gprc,
5616 			"Good Packets Received");
5617 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5618 			CTLFLAG_RD, &adapter->stats.bprc,
5619 			"Broadcast Packets Received");
5620 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5621 			CTLFLAG_RD, &adapter->stats.mprc,
5622 			"Multicast Packets Received");
5623 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5624 			CTLFLAG_RD, &adapter->stats.prc64,
5625 			"64 byte frames received ");
5626 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5627 			CTLFLAG_RD, &adapter->stats.prc127,
5628 			"65-127 byte frames received");
5629 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5630 			CTLFLAG_RD, &adapter->stats.prc255,
5631 			"128-255 byte frames received");
5632 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5633 			CTLFLAG_RD, &adapter->stats.prc511,
5634 			"256-511 byte frames received");
5635 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5636 			CTLFLAG_RD, &adapter->stats.prc1023,
5637 			"512-1023 byte frames received");
5638 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5639 			CTLFLAG_RD, &adapter->stats.prc1522,
5640 			"1023-1522 byte frames received");
5641  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5642  			CTLFLAG_RD, &adapter->stats.gorc,
5643  			"Good Octets Received");
5644 
5645 	/* Packet Transmission Stats */
5646  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5647  			CTLFLAG_RD, &adapter->stats.gotc,
5648  			"Good Octets Transmitted");
5649 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5650 			CTLFLAG_RD, &adapter->stats.tpt,
5651 			"Total Packets Transmitted");
5652 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5653 			CTLFLAG_RD, &adapter->stats.gptc,
5654 			"Good Packets Transmitted");
5655 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5656 			CTLFLAG_RD, &adapter->stats.bptc,
5657 			"Broadcast Packets Transmitted");
5658 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5659 			CTLFLAG_RD, &adapter->stats.mptc,
5660 			"Multicast Packets Transmitted");
5661 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5662 			CTLFLAG_RD, &adapter->stats.ptc64,
5663 			"64 byte frames transmitted ");
5664 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5665 			CTLFLAG_RD, &adapter->stats.ptc127,
5666 			"65-127 byte frames transmitted");
5667 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5668 			CTLFLAG_RD, &adapter->stats.ptc255,
5669 			"128-255 byte frames transmitted");
5670 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5671 			CTLFLAG_RD, &adapter->stats.ptc511,
5672 			"256-511 byte frames transmitted");
5673 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5674 			CTLFLAG_RD, &adapter->stats.ptc1023,
5675 			"512-1023 byte frames transmitted");
5676 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5677 			CTLFLAG_RD, &adapter->stats.ptc1522,
5678 			"1024-1522 byte frames transmitted");
5679 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5680 			CTLFLAG_RD, &adapter->stats.tsctc,
5681 			"TSO Contexts Transmitted");
5682 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5683 			CTLFLAG_RD, &adapter->stats.tsctfc,
5684 			"TSO Contexts Failed");
5685 
5686 
5687 	/* Interrupt Stats */
5688 
5689 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5690 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5691 	int_list = SYSCTL_CHILDREN(int_node);
5692 
5693 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5694 			CTLFLAG_RD, &adapter->stats.iac,
5695 			"Interrupt Assertion Count");
5696 
5697 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5698 			CTLFLAG_RD, &adapter->stats.icrxptc,
5699 			"Interrupt Cause Rx Pkt Timer Expire Count");
5700 
5701 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5702 			CTLFLAG_RD, &adapter->stats.icrxatc,
5703 			"Interrupt Cause Rx Abs Timer Expire Count");
5704 
5705 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5706 			CTLFLAG_RD, &adapter->stats.ictxptc,
5707 			"Interrupt Cause Tx Pkt Timer Expire Count");
5708 
5709 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5710 			CTLFLAG_RD, &adapter->stats.ictxatc,
5711 			"Interrupt Cause Tx Abs Timer Expire Count");
5712 
5713 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5714 			CTLFLAG_RD, &adapter->stats.ictxqec,
5715 			"Interrupt Cause Tx Queue Empty Count");
5716 
5717 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5718 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5719 			"Interrupt Cause Tx Queue Min Thresh Count");
5720 
5721 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5722 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5723 			"Interrupt Cause Rx Desc Min Thresh Count");
5724 
5725 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5726 			CTLFLAG_RD, &adapter->stats.icrxoc,
5727 			"Interrupt Cause Receiver Overrun Count");
5728 }
5729 
5730 /**********************************************************************
5731  *
5732  *  This routine provides a way to dump out the adapter eeprom,
5733  *  often a useful debug/service tool. This only dumps the first
5734  *  32 words, stuff that matters is in that extent.
5735  *
5736  **********************************************************************/
5737 static int
5738 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5739 {
5740 	struct adapter *adapter = (struct adapter *)arg1;
5741 	int error;
5742 	int result;
5743 
5744 	result = -1;
5745 	error = sysctl_handle_int(oidp, &result, 0, req);
5746 
5747 	if (error || !req->newptr)
5748 		return (error);
5749 
5750 	/*
5751 	 * This value will cause a hex dump of the
5752 	 * first 32 16-bit words of the EEPROM to
5753 	 * the screen.
5754 	 */
5755 	if (result == 1)
5756 		em_print_nvm_info(adapter);
5757 
5758 	return (error);
5759 }
5760 
5761 static void
5762 em_print_nvm_info(struct adapter *adapter)
5763 {
5764 	u16	eeprom_data;
5765 	int	i, j, row = 0;
5766 
5767 	/* Its a bit crude, but it gets the job done */
5768 	printf("\nInterface EEPROM Dump:\n");
5769 	printf("Offset\n0x0000  ");
5770 	for (i = 0, j = 0; i < 32; i++, j++) {
5771 		if (j == 8) { /* Make the offset block */
5772 			j = 0; ++row;
5773 			printf("\n0x00%x0  ",row);
5774 		}
5775 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5776 		printf("%04x ", eeprom_data);
5777 	}
5778 	printf("\n");
5779 }
5780 
5781 static int
5782 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5783 {
5784 	struct em_int_delay_info *info;
5785 	struct adapter *adapter;
5786 	u32 regval;
5787 	int error, usecs, ticks;
5788 
5789 	info = (struct em_int_delay_info *)arg1;
5790 	usecs = info->value;
5791 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5792 	if (error != 0 || req->newptr == NULL)
5793 		return (error);
5794 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5795 		return (EINVAL);
5796 	info->value = usecs;
5797 	ticks = EM_USECS_TO_TICKS(usecs);
5798 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5799 		ticks *= 4;
5800 
5801 	adapter = info->adapter;
5802 
5803 	EM_CORE_LOCK(adapter);
5804 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5805 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5806 	/* Handle a few special cases. */
5807 	switch (info->offset) {
5808 	case E1000_RDTR:
5809 		break;
5810 	case E1000_TIDV:
5811 		if (ticks == 0) {
5812 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5813 			/* Don't write 0 into the TIDV register. */
5814 			regval++;
5815 		} else
5816 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5817 		break;
5818 	}
5819 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5820 	EM_CORE_UNLOCK(adapter);
5821 	return (0);
5822 }
5823 
5824 static void
5825 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5826 	const char *description, struct em_int_delay_info *info,
5827 	int offset, int value)
5828 {
5829 	info->adapter = adapter;
5830 	info->offset = offset;
5831 	info->value = value;
5832 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5833 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5834 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5835 	    info, 0, em_sysctl_int_delay, "I", description);
5836 }
5837 
5838 static void
5839 em_set_sysctl_value(struct adapter *adapter, const char *name,
5840 	const char *description, int *limit, int value)
5841 {
5842 	*limit = value;
5843 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5844 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5845 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5846 }
5847 
5848 
5849 /*
5850 ** Set flow control using sysctl:
5851 ** Flow control values:
5852 **      0 - off
5853 **      1 - rx pause
5854 **      2 - tx pause
5855 **      3 - full
5856 */
5857 static int
5858 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5859 {
5860         int		error;
5861 	static int	input = 3; /* default is full */
5862         struct adapter	*adapter = (struct adapter *) arg1;
5863 
5864         error = sysctl_handle_int(oidp, &input, 0, req);
5865 
5866         if ((error) || (req->newptr == NULL))
5867                 return (error);
5868 
5869 	if (input == adapter->fc) /* no change? */
5870 		return (error);
5871 
5872         switch (input) {
5873                 case e1000_fc_rx_pause:
5874                 case e1000_fc_tx_pause:
5875                 case e1000_fc_full:
5876                 case e1000_fc_none:
5877                         adapter->hw.fc.requested_mode = input;
5878 			adapter->fc = input;
5879                         break;
5880                 default:
5881 			/* Do nothing */
5882 			return (error);
5883         }
5884 
5885         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5886         e1000_force_mac_fc(&adapter->hw);
5887         return (error);
5888 }
5889 
5890 /*
5891 ** Manage Energy Efficient Ethernet:
5892 ** Control values:
5893 **     0/1 - enabled/disabled
5894 */
5895 static int
5896 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5897 {
5898        struct adapter *adapter = (struct adapter *) arg1;
5899        int             error, value;
5900 
5901        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5902        error = sysctl_handle_int(oidp, &value, 0, req);
5903        if (error || req->newptr == NULL)
5904                return (error);
5905        EM_CORE_LOCK(adapter);
5906        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5907        em_init_locked(adapter);
5908        EM_CORE_UNLOCK(adapter);
5909        return (0);
5910 }
5911 
5912 static int
5913 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5914 {
5915 	struct adapter *adapter;
5916 	int error;
5917 	int result;
5918 
5919 	result = -1;
5920 	error = sysctl_handle_int(oidp, &result, 0, req);
5921 
5922 	if (error || !req->newptr)
5923 		return (error);
5924 
5925 	if (result == 1) {
5926 		adapter = (struct adapter *)arg1;
5927 		em_print_debug_info(adapter);
5928         }
5929 
5930 	return (error);
5931 }
5932 
5933 /*
5934 ** This routine is meant to be fluid, add whatever is
5935 ** needed for debugging a problem.  -jfv
5936 */
5937 static void
5938 em_print_debug_info(struct adapter *adapter)
5939 {
5940 	device_t dev = adapter->dev;
5941 	struct tx_ring *txr = adapter->tx_rings;
5942 	struct rx_ring *rxr = adapter->rx_rings;
5943 
5944 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5945 		printf("Interface is RUNNING ");
5946 	else
5947 		printf("Interface is NOT RUNNING\n");
5948 
5949 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5950 		printf("and INACTIVE\n");
5951 	else
5952 		printf("and ACTIVE\n");
5953 
5954 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5955 		device_printf(dev, "TX Queue %d ------\n", i);
5956 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5957 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
5958 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
5959 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
5960 		device_printf(dev, "TX descriptors avail = %d\n",
5961 	    		txr->tx_avail);
5962 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5963 	    		txr->no_desc_avail);
5964 		device_printf(dev, "RX Queue %d ------\n", i);
5965 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5966 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
5967 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
5968 		device_printf(dev, "RX discarded packets = %ld\n",
5969 	    		rxr->rx_discarded);
5970 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5971 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5972 	}
5973 }
5974 
5975 #ifdef EM_MULTIQUEUE
5976 /*
5977  * 82574 only:
5978  * Write a new value to the EEPROM increasing the number of MSIX
5979  * vectors from 3 to 5, for proper multiqueue support.
5980  */
5981 static void
5982 em_enable_vectors_82574(struct adapter *adapter)
5983 {
5984 	struct e1000_hw *hw = &adapter->hw;
5985 	device_t dev = adapter->dev;
5986 	u16 edata;
5987 
5988 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5989 	printf("Current cap: %#06x\n", edata);
5990 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
5991 		device_printf(dev, "Writing to eeprom: increasing "
5992 		    "reported MSIX vectors from 3 to 5...\n");
5993 		edata &= ~(EM_NVM_MSIX_N_MASK);
5994 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
5995 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5996 		e1000_update_nvm_checksum(hw);
5997 		device_printf(dev, "Writing to eeprom: done\n");
5998 	}
5999 }
6000 #endif
6001 
6002 #ifdef DDB
6003 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6004 {
6005 	devclass_t	dc;
6006 	int max_em;
6007 
6008 	dc = devclass_find("em");
6009 	max_em = devclass_get_maxunit(dc);
6010 
6011 	for (int index = 0; index < (max_em - 1); index++) {
6012 		device_t dev;
6013 		dev = devclass_get_device(dc, index);
6014 		if (device_get_driver(dev) == &em_driver) {
6015 			struct adapter *adapter = device_get_softc(dev);
6016 			EM_CORE_LOCK(adapter);
6017 			em_init_locked(adapter);
6018 			EM_CORE_UNLOCK(adapter);
6019 		}
6020 	}
6021 }
6022 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6023 {
6024 	devclass_t	dc;
6025 	int max_em;
6026 
6027 	dc = devclass_find("em");
6028 	max_em = devclass_get_maxunit(dc);
6029 
6030 	for (int index = 0; index < (max_em - 1); index++) {
6031 		device_t dev;
6032 		dev = devclass_get_device(dc, index);
6033 		if (device_get_driver(dev) == &em_driver)
6034 			em_print_debug_info(device_get_softc(dev));
6035 	}
6036 
6037 }
6038 #endif
6039