xref: /freebsd/sys/dev/e1000/if_em.c (revision 119b75925c562202145d7bac7b676b98029c6cb9)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Set this to one to display debug statistics
100  *********************************************************************/
101 int	em_display_debug_stats = 0;
102 
103 /*********************************************************************
104  *  Driver version:
105  *********************************************************************/
106 char em_driver_version[] = "7.4.2";
107 
108 /*********************************************************************
109  *  PCI Device ID Table
110  *
111  *  Used by probe to select devices to load on
112  *  Last field stores an index into e1000_strings
113  *  Last entry must be all 0s
114  *
115  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
116  *********************************************************************/
117 
118 static em_vendor_info_t em_vendor_info_array[] =
119 {
120 	/* Intel(R) PRO/1000 Network Connection */
121 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
127 						PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
129 						PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
131 						PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
133 						PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
135 						PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 
141 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
146 						PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
148 						PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
150 						PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
152 						PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
182 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
187 						PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
189 						PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
192 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
194 	/* required last entry */
195 	{ 0, 0, 0, 0, 0}
196 };
197 
198 /*********************************************************************
199  *  Table of branding strings for all supported NICs.
200  *********************************************************************/
201 
202 static char *em_strings[] = {
203 	"Intel(R) PRO/1000 Network Connection"
204 };
205 
206 /*********************************************************************
207  *  Function prototypes
208  *********************************************************************/
209 static int	em_probe(device_t);
210 static int	em_attach(device_t);
211 static int	em_detach(device_t);
212 static int	em_shutdown(device_t);
213 static int	em_suspend(device_t);
214 static int	em_resume(device_t);
215 #ifdef EM_MULTIQUEUE
216 static int	em_mq_start(if_t, struct mbuf *);
217 static int	em_mq_start_locked(if_t,
218 		    struct tx_ring *);
219 static void	em_qflush(if_t);
220 #else
221 static void	em_start(if_t);
222 static void	em_start_locked(if_t, struct tx_ring *);
223 #endif
224 static int	em_ioctl(if_t, u_long, caddr_t);
225 static uint64_t	em_get_counter(if_t, ift_counter);
226 static void	em_init(void *);
227 static void	em_init_locked(struct adapter *);
228 static void	em_stop(void *);
229 static void	em_media_status(if_t, struct ifmediareq *);
230 static int	em_media_change(if_t);
231 static void	em_identify_hardware(struct adapter *);
232 static int	em_allocate_pci_resources(struct adapter *);
233 static int	em_allocate_legacy(struct adapter *);
234 static int	em_allocate_msix(struct adapter *);
235 static int	em_allocate_queues(struct adapter *);
236 static int	em_setup_msix(struct adapter *);
237 static void	em_free_pci_resources(struct adapter *);
238 static void	em_local_timer(void *);
239 static void	em_reset(struct adapter *);
240 static int	em_setup_interface(device_t, struct adapter *);
241 
242 static void	em_setup_transmit_structures(struct adapter *);
243 static void	em_initialize_transmit_unit(struct adapter *);
244 static int	em_allocate_transmit_buffers(struct tx_ring *);
245 static void	em_free_transmit_structures(struct adapter *);
246 static void	em_free_transmit_buffers(struct tx_ring *);
247 
248 static int	em_setup_receive_structures(struct adapter *);
249 static int	em_allocate_receive_buffers(struct rx_ring *);
250 static void	em_initialize_receive_unit(struct adapter *);
251 static void	em_free_receive_structures(struct adapter *);
252 static void	em_free_receive_buffers(struct rx_ring *);
253 
254 static void	em_enable_intr(struct adapter *);
255 static void	em_disable_intr(struct adapter *);
256 static void	em_update_stats_counters(struct adapter *);
257 static void	em_add_hw_stats(struct adapter *adapter);
258 static void	em_txeof(struct tx_ring *);
259 static bool	em_rxeof(struct rx_ring *, int, int *);
260 #ifndef __NO_STRICT_ALIGNMENT
261 static int	em_fixup_rx(struct rx_ring *);
262 #endif
263 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
264 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
265 		    struct ip *, u32 *, u32 *);
266 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
267 		    struct tcphdr *, u32 *, u32 *);
268 static void	em_set_promisc(struct adapter *);
269 static void	em_disable_promisc(struct adapter *);
270 static void	em_set_multi(struct adapter *);
271 static void	em_update_link_status(struct adapter *);
272 static void	em_refresh_mbufs(struct rx_ring *, int);
273 static void	em_register_vlan(void *, if_t, u16);
274 static void	em_unregister_vlan(void *, if_t, u16);
275 static void	em_setup_vlan_hw_support(struct adapter *);
276 static int	em_xmit(struct tx_ring *, struct mbuf **);
277 static int	em_dma_malloc(struct adapter *, bus_size_t,
278 		    struct em_dma_alloc *, int);
279 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
280 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
281 static void	em_print_nvm_info(struct adapter *);
282 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283 static void	em_print_debug_info(struct adapter *);
284 static int 	em_is_valid_ether_addr(u8 *);
285 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
287 		    const char *, struct em_int_delay_info *, int, int);
288 /* Management and WOL Support */
289 static void	em_init_manageability(struct adapter *);
290 static void	em_release_manageability(struct adapter *);
291 static void     em_get_hw_control(struct adapter *);
292 static void     em_release_hw_control(struct adapter *);
293 static void	em_get_wakeup(device_t);
294 static void     em_enable_wakeup(device_t);
295 static int	em_enable_phy_wakeup(struct adapter *);
296 static void	em_led_func(void *, int);
297 static void	em_disable_aspm(struct adapter *);
298 
299 static int	em_irq_fast(void *);
300 
301 /* MSIX handlers */
302 static void	em_msix_tx(void *);
303 static void	em_msix_rx(void *);
304 static void	em_msix_link(void *);
305 static void	em_handle_tx(void *context, int pending);
306 static void	em_handle_rx(void *context, int pending);
307 static void	em_handle_link(void *context, int pending);
308 
309 #ifdef EM_MULTIQUEUE
310 static void	em_enable_vectors_82574(struct adapter *);
311 #endif
312 
313 static void	em_set_sysctl_value(struct adapter *, const char *,
314 		    const char *, int *, int);
315 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
316 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
317 
318 static __inline void em_rx_discard(struct rx_ring *, int);
319 
320 #ifdef DEVICE_POLLING
321 static poll_handler_t em_poll;
322 #endif /* POLLING */
323 
324 /*********************************************************************
325  *  FreeBSD Device Interface Entry Points
326  *********************************************************************/
327 
328 static device_method_t em_methods[] = {
329 	/* Device interface */
330 	DEVMETHOD(device_probe, em_probe),
331 	DEVMETHOD(device_attach, em_attach),
332 	DEVMETHOD(device_detach, em_detach),
333 	DEVMETHOD(device_shutdown, em_shutdown),
334 	DEVMETHOD(device_suspend, em_suspend),
335 	DEVMETHOD(device_resume, em_resume),
336 	DEVMETHOD_END
337 };
338 
339 static driver_t em_driver = {
340 	"em", em_methods, sizeof(struct adapter),
341 };
342 
343 devclass_t em_devclass;
344 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
345 MODULE_DEPEND(em, pci, 1, 1, 1);
346 MODULE_DEPEND(em, ether, 1, 1, 1);
347 #ifdef DEV_NETMAP
348 MODULE_DEPEND(em, netmap, 1, 1, 1);
349 #endif /* DEV_NETMAP */
350 
351 /*********************************************************************
352  *  Tunable default values.
353  *********************************************************************/
354 
355 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
356 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
357 #define M_TSO_LEN			66
358 
359 #define MAX_INTS_PER_SEC	8000
360 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
361 
362 /* Allow common code without TSO */
363 #ifndef CSUM_TSO
364 #define CSUM_TSO	0
365 #endif
366 
367 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
368 
369 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
370 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
371 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
372     0, "Default transmit interrupt delay in usecs");
373 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
374     0, "Default receive interrupt delay in usecs");
375 
376 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
377 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
378 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
379     &em_tx_abs_int_delay_dflt, 0,
380     "Default transmit interrupt delay limit in usecs");
381 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
382     &em_rx_abs_int_delay_dflt, 0,
383     "Default receive interrupt delay limit in usecs");
384 
385 static int em_rxd = EM_DEFAULT_RXD;
386 static int em_txd = EM_DEFAULT_TXD;
387 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
388     "Number of receive descriptors per queue");
389 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
390     "Number of transmit descriptors per queue");
391 
392 static int em_smart_pwr_down = FALSE;
393 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
394     0, "Set to true to leave smart power down enabled on newer adapters");
395 
396 /* Controls whether promiscuous also shows bad packets */
397 static int em_debug_sbp = FALSE;
398 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
399     "Show bad packets in promiscuous mode");
400 
401 static int em_enable_msix = TRUE;
402 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
403     "Enable MSI-X interrupts");
404 
405 #ifdef EM_MULTIQUEUE
406 static int em_num_queues = 1;
407 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
408     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
409 #endif
410 
411 /*
412 ** Global variable to store last used CPU when binding queues
413 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
414 ** queue is bound to a cpu.
415 */
416 static int em_last_bind_cpu = -1;
417 
418 /* How many packets rxeof tries to clean at a time */
419 static int em_rx_process_limit = 100;
420 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
421     &em_rx_process_limit, 0,
422     "Maximum number of received packets to process "
423     "at a time, -1 means unlimited");
424 
425 /* Energy efficient ethernet - default to OFF */
426 static int eee_setting = 1;
427 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
428     "Enable Energy Efficient Ethernet");
429 
430 /* Global used in WOL setup with multiport cards */
431 static int global_quad_port_a = 0;
432 
433 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
434 #include <dev/netmap/if_em_netmap.h>
435 #endif /* DEV_NETMAP */
436 
437 /*********************************************************************
438  *  Device identification routine
439  *
440  *  em_probe determines if the driver should be loaded on
441  *  adapter based on PCI vendor/device id of the adapter.
442  *
443  *  return BUS_PROBE_DEFAULT on success, positive on failure
444  *********************************************************************/
445 
446 static int
447 em_probe(device_t dev)
448 {
449 	char		adapter_name[60];
450 	uint16_t	pci_vendor_id = 0;
451 	uint16_t	pci_device_id = 0;
452 	uint16_t	pci_subvendor_id = 0;
453 	uint16_t	pci_subdevice_id = 0;
454 	em_vendor_info_t *ent;
455 
456 	INIT_DEBUGOUT("em_probe: begin");
457 
458 	pci_vendor_id = pci_get_vendor(dev);
459 	if (pci_vendor_id != EM_VENDOR_ID)
460 		return (ENXIO);
461 
462 	pci_device_id = pci_get_device(dev);
463 	pci_subvendor_id = pci_get_subvendor(dev);
464 	pci_subdevice_id = pci_get_subdevice(dev);
465 
466 	ent = em_vendor_info_array;
467 	while (ent->vendor_id != 0) {
468 		if ((pci_vendor_id == ent->vendor_id) &&
469 		    (pci_device_id == ent->device_id) &&
470 
471 		    ((pci_subvendor_id == ent->subvendor_id) ||
472 		    (ent->subvendor_id == PCI_ANY_ID)) &&
473 
474 		    ((pci_subdevice_id == ent->subdevice_id) ||
475 		    (ent->subdevice_id == PCI_ANY_ID))) {
476 			sprintf(adapter_name, "%s %s",
477 				em_strings[ent->index],
478 				em_driver_version);
479 			device_set_desc_copy(dev, adapter_name);
480 			return (BUS_PROBE_DEFAULT);
481 		}
482 		ent++;
483 	}
484 
485 	return (ENXIO);
486 }
487 
488 /*********************************************************************
489  *  Device initialization routine
490  *
491  *  The attach entry point is called when the driver is being loaded.
492  *  This routine identifies the type of hardware, allocates all resources
493  *  and initializes the hardware.
494  *
495  *  return 0 on success, positive on failure
496  *********************************************************************/
497 
498 static int
499 em_attach(device_t dev)
500 {
501 	struct adapter	*adapter;
502 	struct e1000_hw	*hw;
503 	int		error = 0;
504 
505 	INIT_DEBUGOUT("em_attach: begin");
506 
507 	if (resource_disabled("em", device_get_unit(dev))) {
508 		device_printf(dev, "Disabled by device hint\n");
509 		return (ENXIO);
510 	}
511 
512 	adapter = device_get_softc(dev);
513 	adapter->dev = adapter->osdep.dev = dev;
514 	hw = &adapter->hw;
515 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
516 
517 	/* SYSCTL stuff */
518 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
519 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
520 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
521 	    em_sysctl_nvm_info, "I", "NVM Information");
522 
523 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
524 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
525 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
526 	    em_sysctl_debug_info, "I", "Debug Information");
527 
528 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
529 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
530 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
531 	    em_set_flowcntl, "I", "Flow Control");
532 
533 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
534 
535 	/* Determine hardware and mac info */
536 	em_identify_hardware(adapter);
537 
538 	/* Setup PCI resources */
539 	if (em_allocate_pci_resources(adapter)) {
540 		device_printf(dev, "Allocation of PCI resources failed\n");
541 		error = ENXIO;
542 		goto err_pci;
543 	}
544 
545 	/*
546 	** For ICH8 and family we need to
547 	** map the flash memory, and this
548 	** must happen after the MAC is
549 	** identified
550 	*/
551 	if ((hw->mac.type == e1000_ich8lan) ||
552 	    (hw->mac.type == e1000_ich9lan) ||
553 	    (hw->mac.type == e1000_ich10lan) ||
554 	    (hw->mac.type == e1000_pchlan) ||
555 	    (hw->mac.type == e1000_pch2lan) ||
556 	    (hw->mac.type == e1000_pch_lpt)) {
557 		int rid = EM_BAR_TYPE_FLASH;
558 		adapter->flash = bus_alloc_resource_any(dev,
559 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
560 		if (adapter->flash == NULL) {
561 			device_printf(dev, "Mapping of Flash failed\n");
562 			error = ENXIO;
563 			goto err_pci;
564 		}
565 		/* This is used in the shared code */
566 		hw->flash_address = (u8 *)adapter->flash;
567 		adapter->osdep.flash_bus_space_tag =
568 		    rman_get_bustag(adapter->flash);
569 		adapter->osdep.flash_bus_space_handle =
570 		    rman_get_bushandle(adapter->flash);
571 	}
572 
573 	/* Do Shared Code initialization */
574 	if (e1000_setup_init_funcs(hw, TRUE)) {
575 		device_printf(dev, "Setup of Shared code failed\n");
576 		error = ENXIO;
577 		goto err_pci;
578 	}
579 
580 	/*
581 	 * Setup MSI/X or MSI if PCI Express
582 	 */
583 	adapter->msix = em_setup_msix(adapter);
584 
585 	e1000_get_bus_info(hw);
586 
587 	/* Set up some sysctls for the tunable interrupt delays */
588 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
589 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
590 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
591 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
592 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
593 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
594 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
595 	    "receive interrupt delay limit in usecs",
596 	    &adapter->rx_abs_int_delay,
597 	    E1000_REGISTER(hw, E1000_RADV),
598 	    em_rx_abs_int_delay_dflt);
599 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
600 	    "transmit interrupt delay limit in usecs",
601 	    &adapter->tx_abs_int_delay,
602 	    E1000_REGISTER(hw, E1000_TADV),
603 	    em_tx_abs_int_delay_dflt);
604 	em_add_int_delay_sysctl(adapter, "itr",
605 	    "interrupt delay limit in usecs/4",
606 	    &adapter->tx_itr,
607 	    E1000_REGISTER(hw, E1000_ITR),
608 	    DEFAULT_ITR);
609 
610 	/* Sysctl for limiting the amount of work done in the taskqueue */
611 	em_set_sysctl_value(adapter, "rx_processing_limit",
612 	    "max number of rx packets to process", &adapter->rx_process_limit,
613 	    em_rx_process_limit);
614 
615 	/*
616 	 * Validate number of transmit and receive descriptors. It
617 	 * must not exceed hardware maximum, and must be multiple
618 	 * of E1000_DBA_ALIGN.
619 	 */
620 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
621 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
622 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
623 		    EM_DEFAULT_TXD, em_txd);
624 		adapter->num_tx_desc = EM_DEFAULT_TXD;
625 	} else
626 		adapter->num_tx_desc = em_txd;
627 
628 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
629 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
630 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
631 		    EM_DEFAULT_RXD, em_rxd);
632 		adapter->num_rx_desc = EM_DEFAULT_RXD;
633 	} else
634 		adapter->num_rx_desc = em_rxd;
635 
636 	hw->mac.autoneg = DO_AUTO_NEG;
637 	hw->phy.autoneg_wait_to_complete = FALSE;
638 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
639 
640 	/* Copper options */
641 	if (hw->phy.media_type == e1000_media_type_copper) {
642 		hw->phy.mdix = AUTO_ALL_MODES;
643 		hw->phy.disable_polarity_correction = FALSE;
644 		hw->phy.ms_type = EM_MASTER_SLAVE;
645 	}
646 
647 	/*
648 	 * Set the frame limits assuming
649 	 * standard ethernet sized frames.
650 	 */
651 	adapter->hw.mac.max_frame_size =
652 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
653 
654 	/*
655 	 * This controls when hardware reports transmit completion
656 	 * status.
657 	 */
658 	hw->mac.report_tx_early = 1;
659 
660 	/*
661 	** Get queue/ring memory
662 	*/
663 	if (em_allocate_queues(adapter)) {
664 		error = ENOMEM;
665 		goto err_pci;
666 	}
667 
668 	/* Allocate multicast array memory. */
669 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
670 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
671 	if (adapter->mta == NULL) {
672 		device_printf(dev, "Can not allocate multicast setup array\n");
673 		error = ENOMEM;
674 		goto err_late;
675 	}
676 
677 	/* Check SOL/IDER usage */
678 	if (e1000_check_reset_block(hw))
679 		device_printf(dev, "PHY reset is blocked"
680 		    " due to SOL/IDER session.\n");
681 
682 	/* Sysctl for setting Energy Efficient Ethernet */
683 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
684 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
685 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
686 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
687 	    adapter, 0, em_sysctl_eee, "I",
688 	    "Disable Energy Efficient Ethernet");
689 
690 	/*
691 	** Start from a known state, this is
692 	** important in reading the nvm and
693 	** mac from that.
694 	*/
695 	e1000_reset_hw(hw);
696 
697 
698 	/* Make sure we have a good EEPROM before we read from it */
699 	if (e1000_validate_nvm_checksum(hw) < 0) {
700 		/*
701 		** Some PCI-E parts fail the first check due to
702 		** the link being in sleep state, call it again,
703 		** if it fails a second time its a real issue.
704 		*/
705 		if (e1000_validate_nvm_checksum(hw) < 0) {
706 			device_printf(dev,
707 			    "The EEPROM Checksum Is Not Valid\n");
708 			error = EIO;
709 			goto err_late;
710 		}
711 	}
712 
713 	/* Copy the permanent MAC address out of the EEPROM */
714 	if (e1000_read_mac_addr(hw) < 0) {
715 		device_printf(dev, "EEPROM read error while reading MAC"
716 		    " address\n");
717 		error = EIO;
718 		goto err_late;
719 	}
720 
721 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
722 		device_printf(dev, "Invalid MAC address\n");
723 		error = EIO;
724 		goto err_late;
725 	}
726 
727 	/* Disable ULP support */
728 	e1000_disable_ulp_lpt_lp(hw, TRUE);
729 
730 	/*
731 	**  Do interrupt configuration
732 	*/
733 	if (adapter->msix > 1) /* Do MSIX */
734 		error = em_allocate_msix(adapter);
735 	else  /* MSI or Legacy */
736 		error = em_allocate_legacy(adapter);
737 	if (error)
738 		goto err_late;
739 
740 	/*
741 	 * Get Wake-on-Lan and Management info for later use
742 	 */
743 	em_get_wakeup(dev);
744 
745 	/* Setup OS specific network interface */
746 	if (em_setup_interface(dev, adapter) != 0)
747 		goto err_late;
748 
749 	em_reset(adapter);
750 
751 	/* Initialize statistics */
752 	em_update_stats_counters(adapter);
753 
754 	hw->mac.get_link_status = 1;
755 	em_update_link_status(adapter);
756 
757 	/* Register for VLAN events */
758 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
759 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
760 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
761 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
762 
763 	em_add_hw_stats(adapter);
764 
765 	/* Non-AMT based hardware can now take control from firmware */
766 	if (adapter->has_manage && !adapter->has_amt)
767 		em_get_hw_control(adapter);
768 
769 	/* Tell the stack that the interface is not active */
770 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
771 
772 	adapter->led_dev = led_create(em_led_func, adapter,
773 	    device_get_nameunit(dev));
774 #ifdef DEV_NETMAP
775 	em_netmap_attach(adapter);
776 #endif /* DEV_NETMAP */
777 
778 	INIT_DEBUGOUT("em_attach: end");
779 
780 	return (0);
781 
782 err_late:
783 	em_free_transmit_structures(adapter);
784 	em_free_receive_structures(adapter);
785 	em_release_hw_control(adapter);
786 	if (adapter->ifp != (void *)NULL)
787 		if_free(adapter->ifp);
788 err_pci:
789 	em_free_pci_resources(adapter);
790 	free(adapter->mta, M_DEVBUF);
791 	EM_CORE_LOCK_DESTROY(adapter);
792 
793 	return (error);
794 }
795 
796 /*********************************************************************
797  *  Device removal routine
798  *
799  *  The detach entry point is called when the driver is being removed.
800  *  This routine stops the adapter and deallocates all the resources
801  *  that were allocated for driver operation.
802  *
803  *  return 0 on success, positive on failure
804  *********************************************************************/
805 
806 static int
807 em_detach(device_t dev)
808 {
809 	struct adapter	*adapter = device_get_softc(dev);
810 	if_t ifp = adapter->ifp;
811 
812 	INIT_DEBUGOUT("em_detach: begin");
813 
814 	/* Make sure VLANS are not using driver */
815 	if (if_vlantrunkinuse(ifp)) {
816 		device_printf(dev,"Vlan in use, detach first\n");
817 		return (EBUSY);
818 	}
819 
820 #ifdef DEVICE_POLLING
821 	if (if_getcapenable(ifp) & IFCAP_POLLING)
822 		ether_poll_deregister(ifp);
823 #endif
824 
825 	if (adapter->led_dev != NULL)
826 		led_destroy(adapter->led_dev);
827 
828 	EM_CORE_LOCK(adapter);
829 	adapter->in_detach = 1;
830 	em_stop(adapter);
831 	EM_CORE_UNLOCK(adapter);
832 	EM_CORE_LOCK_DESTROY(adapter);
833 
834 	e1000_phy_hw_reset(&adapter->hw);
835 
836 	em_release_manageability(adapter);
837 	em_release_hw_control(adapter);
838 
839 	/* Unregister VLAN events */
840 	if (adapter->vlan_attach != NULL)
841 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
842 	if (adapter->vlan_detach != NULL)
843 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
844 
845 	ether_ifdetach(adapter->ifp);
846 	callout_drain(&adapter->timer);
847 
848 #ifdef DEV_NETMAP
849 	netmap_detach(ifp);
850 #endif /* DEV_NETMAP */
851 
852 	em_free_pci_resources(adapter);
853 	bus_generic_detach(dev);
854 	if_free(ifp);
855 
856 	em_free_transmit_structures(adapter);
857 	em_free_receive_structures(adapter);
858 
859 	em_release_hw_control(adapter);
860 	free(adapter->mta, M_DEVBUF);
861 
862 	return (0);
863 }
864 
865 /*********************************************************************
866  *
867  *  Shutdown entry point
868  *
869  **********************************************************************/
870 
871 static int
872 em_shutdown(device_t dev)
873 {
874 	return em_suspend(dev);
875 }
876 
877 /*
878  * Suspend/resume device methods.
879  */
880 static int
881 em_suspend(device_t dev)
882 {
883 	struct adapter *adapter = device_get_softc(dev);
884 
885 	EM_CORE_LOCK(adapter);
886 
887         em_release_manageability(adapter);
888 	em_release_hw_control(adapter);
889 	em_enable_wakeup(dev);
890 
891 	EM_CORE_UNLOCK(adapter);
892 
893 	return bus_generic_suspend(dev);
894 }
895 
896 static int
897 em_resume(device_t dev)
898 {
899 	struct adapter *adapter = device_get_softc(dev);
900 	struct tx_ring	*txr = adapter->tx_rings;
901 	if_t ifp = adapter->ifp;
902 
903 	EM_CORE_LOCK(adapter);
904 	if (adapter->hw.mac.type == e1000_pch2lan)
905 		e1000_resume_workarounds_pchlan(&adapter->hw);
906 	em_init_locked(adapter);
907 	em_init_manageability(adapter);
908 
909 	if ((if_getflags(ifp) & IFF_UP) &&
910 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
911 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
912 			EM_TX_LOCK(txr);
913 #ifdef EM_MULTIQUEUE
914 			if (!drbr_empty(ifp, txr->br))
915 				em_mq_start_locked(ifp, txr);
916 #else
917 			if (!if_sendq_empty(ifp))
918 				em_start_locked(ifp, txr);
919 #endif
920 			EM_TX_UNLOCK(txr);
921 		}
922 	}
923 	EM_CORE_UNLOCK(adapter);
924 
925 	return bus_generic_resume(dev);
926 }
927 
928 
929 #ifndef EM_MULTIQUEUE
930 static void
931 em_start_locked(if_t ifp, struct tx_ring *txr)
932 {
933 	struct adapter	*adapter = if_getsoftc(ifp);
934 	struct mbuf	*m_head;
935 
936 	EM_TX_LOCK_ASSERT(txr);
937 
938 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
939 	    IFF_DRV_RUNNING)
940 		return;
941 
942 	if (!adapter->link_active)
943 		return;
944 
945 	while (!if_sendq_empty(ifp)) {
946         	/* Call cleanup if number of TX descriptors low */
947 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
948 			em_txeof(txr);
949 		if (txr->tx_avail < EM_MAX_SCATTER) {
950 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
951 			break;
952 		}
953 		m_head = if_dequeue(ifp);
954 		if (m_head == NULL)
955 			break;
956 		/*
957 		 *  Encapsulation can modify our pointer, and or make it
958 		 *  NULL on failure.  In that event, we can't requeue.
959 		 */
960 		if (em_xmit(txr, &m_head)) {
961 			if (m_head == NULL)
962 				break;
963 			if_sendq_prepend(ifp, m_head);
964 			break;
965 		}
966 
967 		/* Mark the queue as having work */
968 		if (txr->busy == EM_TX_IDLE)
969 			txr->busy = EM_TX_BUSY;
970 
971 		/* Send a copy of the frame to the BPF listener */
972 		ETHER_BPF_MTAP(ifp, m_head);
973 
974 	}
975 
976 	return;
977 }
978 
979 static void
980 em_start(if_t ifp)
981 {
982 	struct adapter	*adapter = if_getsoftc(ifp);
983 	struct tx_ring	*txr = adapter->tx_rings;
984 
985 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
986 		EM_TX_LOCK(txr);
987 		em_start_locked(ifp, txr);
988 		EM_TX_UNLOCK(txr);
989 	}
990 	return;
991 }
992 #else /* EM_MULTIQUEUE */
993 /*********************************************************************
994  *  Multiqueue Transmit routines
995  *
996  *  em_mq_start is called by the stack to initiate a transmit.
997  *  however, if busy the driver can queue the request rather
998  *  than do an immediate send. It is this that is an advantage
999  *  in this driver, rather than also having multiple tx queues.
1000  **********************************************************************/
1001 /*
1002 ** Multiqueue capable stack interface
1003 */
1004 static int
1005 em_mq_start(if_t ifp, struct mbuf *m)
1006 {
1007 	struct adapter	*adapter = if_getsoftc(ifp);
1008 	struct tx_ring	*txr = adapter->tx_rings;
1009 	unsigned int	i, error;
1010 
1011 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1012 		i = m->m_pkthdr.flowid % adapter->num_queues;
1013 	else
1014 		i = curcpu % adapter->num_queues;
1015 
1016 	txr = &adapter->tx_rings[i];
1017 
1018 	error = drbr_enqueue(ifp, txr->br, m);
1019 	if (error)
1020 		return (error);
1021 
1022 	if (EM_TX_TRYLOCK(txr)) {
1023 		em_mq_start_locked(ifp, txr);
1024 		EM_TX_UNLOCK(txr);
1025 	} else
1026 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1027 
1028 	return (0);
1029 }
1030 
1031 static int
1032 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1033 {
1034 	struct adapter  *adapter = txr->adapter;
1035         struct mbuf     *next;
1036         int             err = 0, enq = 0;
1037 
1038 	EM_TX_LOCK_ASSERT(txr);
1039 
1040 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1041 	    adapter->link_active == 0) {
1042 		return (ENETDOWN);
1043 	}
1044 
1045 	/* Process the queue */
1046 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1047 		if ((err = em_xmit(txr, &next)) != 0) {
1048 			if (next == NULL) {
1049 				/* It was freed, move forward */
1050 				drbr_advance(ifp, txr->br);
1051 			} else {
1052 				/*
1053 				 * Still have one left, it may not be
1054 				 * the same since the transmit function
1055 				 * may have changed it.
1056 				 */
1057 				drbr_putback(ifp, txr->br, next);
1058 			}
1059 			break;
1060 		}
1061 		drbr_advance(ifp, txr->br);
1062 		enq++;
1063 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1064 		if (next->m_flags & M_MCAST)
1065 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1066 		ETHER_BPF_MTAP(ifp, next);
1067 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1068                         break;
1069 	}
1070 
1071 	/* Mark the queue as having work */
1072 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1073 		txr->busy = EM_TX_BUSY;
1074 
1075 	if (txr->tx_avail < EM_MAX_SCATTER)
1076 		em_txeof(txr);
1077 	if (txr->tx_avail < EM_MAX_SCATTER) {
1078 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1079 	}
1080 	return (err);
1081 }
1082 
1083 /*
1084 ** Flush all ring buffers
1085 */
1086 static void
1087 em_qflush(if_t ifp)
1088 {
1089 	struct adapter  *adapter = if_getsoftc(ifp);
1090 	struct tx_ring  *txr = adapter->tx_rings;
1091 	struct mbuf     *m;
1092 
1093 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1094 		EM_TX_LOCK(txr);
1095 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1096 			m_freem(m);
1097 		EM_TX_UNLOCK(txr);
1098 	}
1099 	if_qflush(ifp);
1100 }
1101 #endif /* EM_MULTIQUEUE */
1102 
1103 /*********************************************************************
1104  *  Ioctl entry point
1105  *
1106  *  em_ioctl is called when the user wants to configure the
1107  *  interface.
1108  *
1109  *  return 0 on success, positive on failure
1110  **********************************************************************/
1111 
1112 static int
1113 em_ioctl(if_t ifp, u_long command, caddr_t data)
1114 {
1115 	struct adapter	*adapter = if_getsoftc(ifp);
1116 	struct ifreq	*ifr = (struct ifreq *)data;
1117 #if defined(INET) || defined(INET6)
1118 	struct ifaddr	*ifa = (struct ifaddr *)data;
1119 #endif
1120 	bool		avoid_reset = FALSE;
1121 	int		error = 0;
1122 
1123 	if (adapter->in_detach)
1124 		return (error);
1125 
1126 	switch (command) {
1127 	case SIOCSIFADDR:
1128 #ifdef INET
1129 		if (ifa->ifa_addr->sa_family == AF_INET)
1130 			avoid_reset = TRUE;
1131 #endif
1132 #ifdef INET6
1133 		if (ifa->ifa_addr->sa_family == AF_INET6)
1134 			avoid_reset = TRUE;
1135 #endif
1136 		/*
1137 		** Calling init results in link renegotiation,
1138 		** so we avoid doing it when possible.
1139 		*/
1140 		if (avoid_reset) {
1141 			if_setflagbits(ifp,IFF_UP,0);
1142 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1143 				em_init(adapter);
1144 #ifdef INET
1145 			if (!(if_getflags(ifp) & IFF_NOARP))
1146 				arp_ifinit(ifp, ifa);
1147 #endif
1148 		} else
1149 			error = ether_ioctl(ifp, command, data);
1150 		break;
1151 	case SIOCSIFMTU:
1152 	    {
1153 		int max_frame_size;
1154 
1155 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1156 
1157 		EM_CORE_LOCK(adapter);
1158 		switch (adapter->hw.mac.type) {
1159 		case e1000_82571:
1160 		case e1000_82572:
1161 		case e1000_ich9lan:
1162 		case e1000_ich10lan:
1163 		case e1000_pch2lan:
1164 		case e1000_pch_lpt:
1165 		case e1000_82574:
1166 		case e1000_82583:
1167 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1168 			max_frame_size = 9234;
1169 			break;
1170 		case e1000_pchlan:
1171 			max_frame_size = 4096;
1172 			break;
1173 			/* Adapters that do not support jumbo frames */
1174 		case e1000_ich8lan:
1175 			max_frame_size = ETHER_MAX_LEN;
1176 			break;
1177 		default:
1178 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1179 		}
1180 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1181 		    ETHER_CRC_LEN) {
1182 			EM_CORE_UNLOCK(adapter);
1183 			error = EINVAL;
1184 			break;
1185 		}
1186 
1187 		if_setmtu(ifp, ifr->ifr_mtu);
1188 		adapter->hw.mac.max_frame_size =
1189 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1190 		em_init_locked(adapter);
1191 		EM_CORE_UNLOCK(adapter);
1192 		break;
1193 	    }
1194 	case SIOCSIFFLAGS:
1195 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1196 		    SIOCSIFFLAGS (Set Interface Flags)");
1197 		EM_CORE_LOCK(adapter);
1198 		if (if_getflags(ifp) & IFF_UP) {
1199 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1200 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1201 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1202 					em_disable_promisc(adapter);
1203 					em_set_promisc(adapter);
1204 				}
1205 			} else
1206 				em_init_locked(adapter);
1207 		} else
1208 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1209 				em_stop(adapter);
1210 		adapter->if_flags = if_getflags(ifp);
1211 		EM_CORE_UNLOCK(adapter);
1212 		break;
1213 	case SIOCADDMULTI:
1214 	case SIOCDELMULTI:
1215 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1216 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1217 			EM_CORE_LOCK(adapter);
1218 			em_disable_intr(adapter);
1219 			em_set_multi(adapter);
1220 #ifdef DEVICE_POLLING
1221 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1222 #endif
1223 				em_enable_intr(adapter);
1224 			EM_CORE_UNLOCK(adapter);
1225 		}
1226 		break;
1227 	case SIOCSIFMEDIA:
1228 		/* Check SOL/IDER usage */
1229 		EM_CORE_LOCK(adapter);
1230 		if (e1000_check_reset_block(&adapter->hw)) {
1231 			EM_CORE_UNLOCK(adapter);
1232 			device_printf(adapter->dev, "Media change is"
1233 			    " blocked due to SOL/IDER session.\n");
1234 			break;
1235 		}
1236 		EM_CORE_UNLOCK(adapter);
1237 		/* falls thru */
1238 	case SIOCGIFMEDIA:
1239 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1240 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1241 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1242 		break;
1243 	case SIOCSIFCAP:
1244 	    {
1245 		int mask, reinit;
1246 
1247 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1248 		reinit = 0;
1249 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1250 #ifdef DEVICE_POLLING
1251 		if (mask & IFCAP_POLLING) {
1252 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1253 				error = ether_poll_register(em_poll, ifp);
1254 				if (error)
1255 					return (error);
1256 				EM_CORE_LOCK(adapter);
1257 				em_disable_intr(adapter);
1258 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1259 				EM_CORE_UNLOCK(adapter);
1260 			} else {
1261 				error = ether_poll_deregister(ifp);
1262 				/* Enable interrupt even in error case */
1263 				EM_CORE_LOCK(adapter);
1264 				em_enable_intr(adapter);
1265 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1266 				EM_CORE_UNLOCK(adapter);
1267 			}
1268 		}
1269 #endif
1270 		if (mask & IFCAP_HWCSUM) {
1271 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1272 			reinit = 1;
1273 		}
1274 		if (mask & IFCAP_TSO4) {
1275 			if_togglecapenable(ifp,IFCAP_TSO4);
1276 			reinit = 1;
1277 		}
1278 		if (mask & IFCAP_VLAN_HWTAGGING) {
1279 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1280 			reinit = 1;
1281 		}
1282 		if (mask & IFCAP_VLAN_HWFILTER) {
1283 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1284 			reinit = 1;
1285 		}
1286 		if (mask & IFCAP_VLAN_HWTSO) {
1287 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1288 			reinit = 1;
1289 		}
1290 		if ((mask & IFCAP_WOL) &&
1291 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1292 			if (mask & IFCAP_WOL_MCAST)
1293 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1294 			if (mask & IFCAP_WOL_MAGIC)
1295 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1296 		}
1297 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1298 			em_init(adapter);
1299 		if_vlancap(ifp);
1300 		break;
1301 	    }
1302 
1303 	default:
1304 		error = ether_ioctl(ifp, command, data);
1305 		break;
1306 	}
1307 
1308 	return (error);
1309 }
1310 
1311 
1312 /*********************************************************************
1313  *  Init entry point
1314  *
1315  *  This routine is used in two ways. It is used by the stack as
1316  *  init entry point in network interface structure. It is also used
1317  *  by the driver as a hw/sw initialization routine to get to a
1318  *  consistent state.
1319  *
1320  *  return 0 on success, positive on failure
1321  **********************************************************************/
1322 
1323 static void
1324 em_init_locked(struct adapter *adapter)
1325 {
1326 	if_t ifp = adapter->ifp;
1327 	device_t	dev = adapter->dev;
1328 
1329 	INIT_DEBUGOUT("em_init: begin");
1330 
1331 	EM_CORE_LOCK_ASSERT(adapter);
1332 
1333 	em_disable_intr(adapter);
1334 	callout_stop(&adapter->timer);
1335 
1336 	/* Get the latest mac address, User can use a LAA */
1337         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1338               ETHER_ADDR_LEN);
1339 
1340 	/* Put the address into the Receive Address Array */
1341 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1342 
1343 	/*
1344 	 * With the 82571 adapter, RAR[0] may be overwritten
1345 	 * when the other port is reset, we make a duplicate
1346 	 * in RAR[14] for that eventuality, this assures
1347 	 * the interface continues to function.
1348 	 */
1349 	if (adapter->hw.mac.type == e1000_82571) {
1350 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1351 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1352 		    E1000_RAR_ENTRIES - 1);
1353 	}
1354 
1355 	/* Initialize the hardware */
1356 	em_reset(adapter);
1357 	em_update_link_status(adapter);
1358 
1359 	/* Setup VLAN support, basic and offload if available */
1360 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1361 
1362 	/* Set hardware offload abilities */
1363 	if_clearhwassist(ifp);
1364 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1365 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1366 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1367 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1368 
1369 	/* Configure for OS presence */
1370 	em_init_manageability(adapter);
1371 
1372 	/* Prepare transmit descriptors and buffers */
1373 	em_setup_transmit_structures(adapter);
1374 	em_initialize_transmit_unit(adapter);
1375 
1376 	/* Setup Multicast table */
1377 	em_set_multi(adapter);
1378 
1379 	/*
1380 	** Figure out the desired mbuf
1381 	** pool for doing jumbos
1382 	*/
1383 	if (adapter->hw.mac.max_frame_size <= 2048)
1384 		adapter->rx_mbuf_sz = MCLBYTES;
1385 	else if (adapter->hw.mac.max_frame_size <= 4096)
1386 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1387 	else
1388 		adapter->rx_mbuf_sz = MJUM9BYTES;
1389 
1390 	/* Prepare receive descriptors and buffers */
1391 	if (em_setup_receive_structures(adapter)) {
1392 		device_printf(dev, "Could not setup receive structures\n");
1393 		em_stop(adapter);
1394 		return;
1395 	}
1396 	em_initialize_receive_unit(adapter);
1397 
1398 	/* Use real VLAN Filter support? */
1399 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1400 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1401 			/* Use real VLAN Filter support */
1402 			em_setup_vlan_hw_support(adapter);
1403 		else {
1404 			u32 ctrl;
1405 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1406 			ctrl |= E1000_CTRL_VME;
1407 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1408 		}
1409 	}
1410 
1411 	/* Don't lose promiscuous settings */
1412 	em_set_promisc(adapter);
1413 
1414 	/* Set the interface as ACTIVE */
1415 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1416 
1417 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1418 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1419 
1420 	/* MSI/X configuration for 82574 */
1421 	if (adapter->hw.mac.type == e1000_82574) {
1422 		int tmp;
1423 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1424 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1425 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1426 		/* Set the IVAR - interrupt vector routing. */
1427 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1428 	}
1429 
1430 #ifdef DEVICE_POLLING
1431 	/*
1432 	 * Only enable interrupts if we are not polling, make sure
1433 	 * they are off otherwise.
1434 	 */
1435 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1436 		em_disable_intr(adapter);
1437 	else
1438 #endif /* DEVICE_POLLING */
1439 		em_enable_intr(adapter);
1440 
1441 	/* AMT based hardware can now take control from firmware */
1442 	if (adapter->has_manage && adapter->has_amt)
1443 		em_get_hw_control(adapter);
1444 }
1445 
1446 static void
1447 em_init(void *arg)
1448 {
1449 	struct adapter *adapter = arg;
1450 
1451 	EM_CORE_LOCK(adapter);
1452 	em_init_locked(adapter);
1453 	EM_CORE_UNLOCK(adapter);
1454 }
1455 
1456 
1457 #ifdef DEVICE_POLLING
1458 /*********************************************************************
1459  *
1460  *  Legacy polling routine: note this only works with single queue
1461  *
1462  *********************************************************************/
1463 static int
1464 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1465 {
1466 	struct adapter *adapter = if_getsoftc(ifp);
1467 	struct tx_ring	*txr = adapter->tx_rings;
1468 	struct rx_ring	*rxr = adapter->rx_rings;
1469 	u32		reg_icr;
1470 	int		rx_done;
1471 
1472 	EM_CORE_LOCK(adapter);
1473 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1474 		EM_CORE_UNLOCK(adapter);
1475 		return (0);
1476 	}
1477 
1478 	if (cmd == POLL_AND_CHECK_STATUS) {
1479 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1480 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1481 			callout_stop(&adapter->timer);
1482 			adapter->hw.mac.get_link_status = 1;
1483 			em_update_link_status(adapter);
1484 			callout_reset(&adapter->timer, hz,
1485 			    em_local_timer, adapter);
1486 		}
1487 	}
1488 	EM_CORE_UNLOCK(adapter);
1489 
1490 	em_rxeof(rxr, count, &rx_done);
1491 
1492 	EM_TX_LOCK(txr);
1493 	em_txeof(txr);
1494 #ifdef EM_MULTIQUEUE
1495 	if (!drbr_empty(ifp, txr->br))
1496 		em_mq_start_locked(ifp, txr);
1497 #else
1498 	if (!if_sendq_empty(ifp))
1499 		em_start_locked(ifp, txr);
1500 #endif
1501 	EM_TX_UNLOCK(txr);
1502 
1503 	return (rx_done);
1504 }
1505 #endif /* DEVICE_POLLING */
1506 
1507 
1508 /*********************************************************************
1509  *
1510  *  Fast Legacy/MSI Combined Interrupt Service routine
1511  *
1512  *********************************************************************/
1513 static int
1514 em_irq_fast(void *arg)
1515 {
1516 	struct adapter	*adapter = arg;
1517 	if_t ifp;
1518 	u32		reg_icr;
1519 
1520 	ifp = adapter->ifp;
1521 
1522 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1523 
1524 	/* Hot eject?  */
1525 	if (reg_icr == 0xffffffff)
1526 		return FILTER_STRAY;
1527 
1528 	/* Definitely not our interrupt.  */
1529 	if (reg_icr == 0x0)
1530 		return FILTER_STRAY;
1531 
1532 	/*
1533 	 * Starting with the 82571 chip, bit 31 should be used to
1534 	 * determine whether the interrupt belongs to us.
1535 	 */
1536 	if (adapter->hw.mac.type >= e1000_82571 &&
1537 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1538 		return FILTER_STRAY;
1539 
1540 	em_disable_intr(adapter);
1541 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1542 
1543 	/* Link status change */
1544 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1545 		adapter->hw.mac.get_link_status = 1;
1546 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1547 	}
1548 
1549 	if (reg_icr & E1000_ICR_RXO)
1550 		adapter->rx_overruns++;
1551 	return FILTER_HANDLED;
1552 }
1553 
1554 /* Combined RX/TX handler, used by Legacy and MSI */
1555 static void
1556 em_handle_que(void *context, int pending)
1557 {
1558 	struct adapter	*adapter = context;
1559 	if_t ifp = adapter->ifp;
1560 	struct tx_ring	*txr = adapter->tx_rings;
1561 	struct rx_ring	*rxr = adapter->rx_rings;
1562 
1563 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1564 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1565 
1566 		EM_TX_LOCK(txr);
1567 		em_txeof(txr);
1568 #ifdef EM_MULTIQUEUE
1569 		if (!drbr_empty(ifp, txr->br))
1570 			em_mq_start_locked(ifp, txr);
1571 #else
1572 		if (!if_sendq_empty(ifp))
1573 			em_start_locked(ifp, txr);
1574 #endif
1575 		EM_TX_UNLOCK(txr);
1576 		if (more) {
1577 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1578 			return;
1579 		}
1580 	}
1581 
1582 	em_enable_intr(adapter);
1583 	return;
1584 }
1585 
1586 
1587 /*********************************************************************
1588  *
1589  *  MSIX Interrupt Service Routines
1590  *
1591  **********************************************************************/
1592 static void
1593 em_msix_tx(void *arg)
1594 {
1595 	struct tx_ring *txr = arg;
1596 	struct adapter *adapter = txr->adapter;
1597 	if_t ifp = adapter->ifp;
1598 
1599 	++txr->tx_irq;
1600 	EM_TX_LOCK(txr);
1601 	em_txeof(txr);
1602 #ifdef EM_MULTIQUEUE
1603 	if (!drbr_empty(ifp, txr->br))
1604 		em_mq_start_locked(ifp, txr);
1605 #else
1606 	if (!if_sendq_empty(ifp))
1607 		em_start_locked(ifp, txr);
1608 #endif
1609 
1610 	/* Reenable this interrupt */
1611 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1612 	EM_TX_UNLOCK(txr);
1613 	return;
1614 }
1615 
1616 /*********************************************************************
1617  *
1618  *  MSIX RX Interrupt Service routine
1619  *
1620  **********************************************************************/
1621 
1622 static void
1623 em_msix_rx(void *arg)
1624 {
1625 	struct rx_ring	*rxr = arg;
1626 	struct adapter	*adapter = rxr->adapter;
1627 	bool		more;
1628 
1629 	++rxr->rx_irq;
1630 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1631 		return;
1632 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633 	if (more)
1634 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635 	else {
1636 		/* Reenable this interrupt */
1637 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638 	}
1639 	return;
1640 }
1641 
1642 /*********************************************************************
1643  *
1644  *  MSIX Link Fast Interrupt Service routine
1645  *
1646  **********************************************************************/
1647 static void
1648 em_msix_link(void *arg)
1649 {
1650 	struct adapter	*adapter = arg;
1651 	u32		reg_icr;
1652 
1653 	++adapter->link_irq;
1654 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1655 
1656 	if (reg_icr & E1000_ICR_RXO)
1657 		adapter->rx_overruns++;
1658 
1659 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1660 		adapter->hw.mac.get_link_status = 1;
1661 		em_handle_link(adapter, 0);
1662 	} else
1663 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1664 		    EM_MSIX_LINK | E1000_IMS_LSC);
1665 	/*
1666  	** Because we must read the ICR for this interrupt
1667  	** it may clear other causes using autoclear, for
1668  	** this reason we simply create a soft interrupt
1669  	** for all these vectors.
1670  	*/
1671 	if (reg_icr) {
1672 		E1000_WRITE_REG(&adapter->hw,
1673 			E1000_ICS, adapter->ims);
1674 	}
1675 	return;
1676 }
1677 
1678 static void
1679 em_handle_rx(void *context, int pending)
1680 {
1681 	struct rx_ring	*rxr = context;
1682 	struct adapter	*adapter = rxr->adapter;
1683         bool            more;
1684 
1685 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1686 	if (more)
1687 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1688 	else {
1689 		/* Reenable this interrupt */
1690 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1691 	}
1692 }
1693 
1694 static void
1695 em_handle_tx(void *context, int pending)
1696 {
1697 	struct tx_ring	*txr = context;
1698 	struct adapter	*adapter = txr->adapter;
1699 	if_t ifp = adapter->ifp;
1700 
1701 	EM_TX_LOCK(txr);
1702 	em_txeof(txr);
1703 #ifdef EM_MULTIQUEUE
1704 	if (!drbr_empty(ifp, txr->br))
1705 		em_mq_start_locked(ifp, txr);
1706 #else
1707 	if (!if_sendq_empty(ifp))
1708 		em_start_locked(ifp, txr);
1709 #endif
1710 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1711 	EM_TX_UNLOCK(txr);
1712 }
1713 
1714 static void
1715 em_handle_link(void *context, int pending)
1716 {
1717 	struct adapter	*adapter = context;
1718 	struct tx_ring	*txr = adapter->tx_rings;
1719 	if_t ifp = adapter->ifp;
1720 
1721 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1722 		return;
1723 
1724 	EM_CORE_LOCK(adapter);
1725 	callout_stop(&adapter->timer);
1726 	em_update_link_status(adapter);
1727 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1728 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1729 	    EM_MSIX_LINK | E1000_IMS_LSC);
1730 	if (adapter->link_active) {
1731 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1732 			EM_TX_LOCK(txr);
1733 #ifdef EM_MULTIQUEUE
1734 			if (!drbr_empty(ifp, txr->br))
1735 				em_mq_start_locked(ifp, txr);
1736 #else
1737 			if (if_sendq_empty(ifp))
1738 				em_start_locked(ifp, txr);
1739 #endif
1740 			EM_TX_UNLOCK(txr);
1741 		}
1742 	}
1743 	EM_CORE_UNLOCK(adapter);
1744 }
1745 
1746 
1747 /*********************************************************************
1748  *
1749  *  Media Ioctl callback
1750  *
1751  *  This routine is called whenever the user queries the status of
1752  *  the interface using ifconfig.
1753  *
1754  **********************************************************************/
1755 static void
1756 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1757 {
1758 	struct adapter *adapter = if_getsoftc(ifp);
1759 	u_char fiber_type = IFM_1000_SX;
1760 
1761 	INIT_DEBUGOUT("em_media_status: begin");
1762 
1763 	EM_CORE_LOCK(adapter);
1764 	em_update_link_status(adapter);
1765 
1766 	ifmr->ifm_status = IFM_AVALID;
1767 	ifmr->ifm_active = IFM_ETHER;
1768 
1769 	if (!adapter->link_active) {
1770 		EM_CORE_UNLOCK(adapter);
1771 		return;
1772 	}
1773 
1774 	ifmr->ifm_status |= IFM_ACTIVE;
1775 
1776 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1777 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1778 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1779 	} else {
1780 		switch (adapter->link_speed) {
1781 		case 10:
1782 			ifmr->ifm_active |= IFM_10_T;
1783 			break;
1784 		case 100:
1785 			ifmr->ifm_active |= IFM_100_TX;
1786 			break;
1787 		case 1000:
1788 			ifmr->ifm_active |= IFM_1000_T;
1789 			break;
1790 		}
1791 		if (adapter->link_duplex == FULL_DUPLEX)
1792 			ifmr->ifm_active |= IFM_FDX;
1793 		else
1794 			ifmr->ifm_active |= IFM_HDX;
1795 	}
1796 	EM_CORE_UNLOCK(adapter);
1797 }
1798 
1799 /*********************************************************************
1800  *
1801  *  Media Ioctl callback
1802  *
1803  *  This routine is called when the user changes speed/duplex using
1804  *  media/mediopt option with ifconfig.
1805  *
1806  **********************************************************************/
1807 static int
1808 em_media_change(if_t ifp)
1809 {
1810 	struct adapter *adapter = if_getsoftc(ifp);
1811 	struct ifmedia  *ifm = &adapter->media;
1812 
1813 	INIT_DEBUGOUT("em_media_change: begin");
1814 
1815 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1816 		return (EINVAL);
1817 
1818 	EM_CORE_LOCK(adapter);
1819 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1820 	case IFM_AUTO:
1821 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1822 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1823 		break;
1824 	case IFM_1000_LX:
1825 	case IFM_1000_SX:
1826 	case IFM_1000_T:
1827 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1828 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1829 		break;
1830 	case IFM_100_TX:
1831 		adapter->hw.mac.autoneg = FALSE;
1832 		adapter->hw.phy.autoneg_advertised = 0;
1833 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1834 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1835 		else
1836 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1837 		break;
1838 	case IFM_10_T:
1839 		adapter->hw.mac.autoneg = FALSE;
1840 		adapter->hw.phy.autoneg_advertised = 0;
1841 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1842 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1843 		else
1844 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1845 		break;
1846 	default:
1847 		device_printf(adapter->dev, "Unsupported media type\n");
1848 	}
1849 
1850 	em_init_locked(adapter);
1851 	EM_CORE_UNLOCK(adapter);
1852 
1853 	return (0);
1854 }
1855 
1856 /*********************************************************************
1857  *
1858  *  This routine maps the mbufs to tx descriptors.
1859  *
1860  *  return 0 on success, positive on failure
1861  **********************************************************************/
1862 
1863 static int
1864 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1865 {
1866 	struct adapter		*adapter = txr->adapter;
1867 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1868 	bus_dmamap_t		map;
1869 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1870 	struct e1000_tx_desc	*ctxd = NULL;
1871 	struct mbuf		*m_head;
1872 	struct ether_header	*eh;
1873 	struct ip		*ip = NULL;
1874 	struct tcphdr		*tp = NULL;
1875 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1876 	int			ip_off, poff;
1877 	int			nsegs, i, j, first, last = 0;
1878 	int			error, do_tso, tso_desc = 0, remap = 1;
1879 
1880 	m_head = *m_headp;
1881 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1882 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1883 	ip_off = poff = 0;
1884 
1885 	/*
1886 	 * Intel recommends entire IP/TCP header length reside in a single
1887 	 * buffer. If multiple descriptors are used to describe the IP and
1888 	 * TCP header, each descriptor should describe one or more
1889 	 * complete headers; descriptors referencing only parts of headers
1890 	 * are not supported. If all layer headers are not coalesced into
1891 	 * a single buffer, each buffer should not cross a 4KB boundary,
1892 	 * or be larger than the maximum read request size.
1893 	 * Controller also requires modifing IP/TCP header to make TSO work
1894 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1895 	 * IP/TCP header into a single buffer to meet the requirement of
1896 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1897 	 * which also has similiar restrictions.
1898 	 */
1899 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1900 		if (do_tso || (m_head->m_next != NULL &&
1901 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1902 			if (M_WRITABLE(*m_headp) == 0) {
1903 				m_head = m_dup(*m_headp, M_NOWAIT);
1904 				m_freem(*m_headp);
1905 				if (m_head == NULL) {
1906 					*m_headp = NULL;
1907 					return (ENOBUFS);
1908 				}
1909 				*m_headp = m_head;
1910 			}
1911 		}
1912 		/*
1913 		 * XXX
1914 		 * Assume IPv4, we don't have TSO/checksum offload support
1915 		 * for IPv6 yet.
1916 		 */
1917 		ip_off = sizeof(struct ether_header);
1918 		m_head = m_pullup(m_head, ip_off);
1919 		if (m_head == NULL) {
1920 			*m_headp = NULL;
1921 			return (ENOBUFS);
1922 		}
1923 		eh = mtod(m_head, struct ether_header *);
1924 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1925 			ip_off = sizeof(struct ether_vlan_header);
1926 			m_head = m_pullup(m_head, ip_off);
1927 			if (m_head == NULL) {
1928 				*m_headp = NULL;
1929 				return (ENOBUFS);
1930 			}
1931 		}
1932 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1933 		if (m_head == NULL) {
1934 			*m_headp = NULL;
1935 			return (ENOBUFS);
1936 		}
1937 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1938 		poff = ip_off + (ip->ip_hl << 2);
1939 		if (do_tso) {
1940 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1941 			if (m_head == NULL) {
1942 				*m_headp = NULL;
1943 				return (ENOBUFS);
1944 			}
1945 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1946 			/*
1947 			 * TSO workaround:
1948 			 *   pull 4 more bytes of data into it.
1949 			 */
1950 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1951 			if (m_head == NULL) {
1952 				*m_headp = NULL;
1953 				return (ENOBUFS);
1954 			}
1955 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1956 			ip->ip_len = 0;
1957 			ip->ip_sum = 0;
1958 			/*
1959 			 * The pseudo TCP checksum does not include TCP payload
1960 			 * length so driver should recompute the checksum here
1961 			 * what hardware expect to see. This is adherence of
1962 			 * Microsoft's Large Send specification.
1963 			 */
1964 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1965 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1966 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1967 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1968 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1969 			if (m_head == NULL) {
1970 				*m_headp = NULL;
1971 				return (ENOBUFS);
1972 			}
1973 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1974 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1975 			if (m_head == NULL) {
1976 				*m_headp = NULL;
1977 				return (ENOBUFS);
1978 			}
1979 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1980 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1981 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1982 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1983 			if (m_head == NULL) {
1984 				*m_headp = NULL;
1985 				return (ENOBUFS);
1986 			}
1987 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1988 		}
1989 		*m_headp = m_head;
1990 	}
1991 
1992 	/*
1993 	 * Map the packet for DMA
1994 	 *
1995 	 * Capture the first descriptor index,
1996 	 * this descriptor will have the index
1997 	 * of the EOP which is the only one that
1998 	 * now gets a DONE bit writeback.
1999 	 */
2000 	first = txr->next_avail_desc;
2001 	tx_buffer = &txr->tx_buffers[first];
2002 	tx_buffer_mapped = tx_buffer;
2003 	map = tx_buffer->map;
2004 
2005 retry:
2006 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2007 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2008 
2009 	/*
2010 	 * There are two types of errors we can (try) to handle:
2011 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2012 	 *   out of segments.  Defragment the mbuf chain and try again.
2013 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2014 	 *   at this point in time.  Defer sending and try again later.
2015 	 * All other errors, in particular EINVAL, are fatal and prevent the
2016 	 * mbuf chain from ever going through.  Drop it and report error.
2017 	 */
2018 	if (error == EFBIG && remap) {
2019 		struct mbuf *m;
2020 
2021 		m = m_defrag(*m_headp, M_NOWAIT);
2022 		if (m == NULL) {
2023 			adapter->mbuf_alloc_failed++;
2024 			m_freem(*m_headp);
2025 			*m_headp = NULL;
2026 			return (ENOBUFS);
2027 		}
2028 		*m_headp = m;
2029 
2030 		/* Try it again, but only once */
2031 		remap = 0;
2032 		goto retry;
2033 	} else if (error == ENOMEM) {
2034 		adapter->no_tx_dma_setup++;
2035 		return (error);
2036 	} else if (error != 0) {
2037 		adapter->no_tx_dma_setup++;
2038 		m_freem(*m_headp);
2039 		*m_headp = NULL;
2040 		return (error);
2041 	}
2042 
2043 	/*
2044 	 * TSO Hardware workaround, if this packet is not
2045 	 * TSO, and is only a single descriptor long, and
2046 	 * it follows a TSO burst, then we need to add a
2047 	 * sentinel descriptor to prevent premature writeback.
2048 	 */
2049 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
2050 		if (nsegs == 1)
2051 			tso_desc = TRUE;
2052 		txr->tx_tso = FALSE;
2053 	}
2054 
2055         if (nsegs > (txr->tx_avail - 2)) {
2056                 txr->no_desc_avail++;
2057 		bus_dmamap_unload(txr->txtag, map);
2058 		return (ENOBUFS);
2059         }
2060 	m_head = *m_headp;
2061 
2062 	/* Do hardware assists */
2063 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2064 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2065 		    &txd_upper, &txd_lower);
2066 		/* we need to make a final sentinel transmit desc */
2067 		tso_desc = TRUE;
2068 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2069 		em_transmit_checksum_setup(txr, m_head,
2070 		    ip_off, ip, &txd_upper, &txd_lower);
2071 
2072 	if (m_head->m_flags & M_VLANTAG) {
2073 		/* Set the vlan id. */
2074 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2075                 /* Tell hardware to add tag */
2076                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2077         }
2078 
2079 	i = txr->next_avail_desc;
2080 
2081 	/* Set up our transmit descriptors */
2082 	for (j = 0; j < nsegs; j++) {
2083 		bus_size_t seg_len;
2084 		bus_addr_t seg_addr;
2085 
2086 		tx_buffer = &txr->tx_buffers[i];
2087 		ctxd = &txr->tx_base[i];
2088 		seg_addr = segs[j].ds_addr;
2089 		seg_len  = segs[j].ds_len;
2090 		/*
2091 		** TSO Workaround:
2092 		** If this is the last descriptor, we want to
2093 		** split it so we have a small final sentinel
2094 		*/
2095 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2096 			seg_len -= 4;
2097 			ctxd->buffer_addr = htole64(seg_addr);
2098 			ctxd->lower.data = htole32(
2099 			adapter->txd_cmd | txd_lower | seg_len);
2100 			ctxd->upper.data =
2101 			    htole32(txd_upper);
2102 			if (++i == adapter->num_tx_desc)
2103 				i = 0;
2104 			/* Now make the sentinel */
2105 			++txd_used; /* using an extra txd */
2106 			ctxd = &txr->tx_base[i];
2107 			tx_buffer = &txr->tx_buffers[i];
2108 			ctxd->buffer_addr =
2109 			    htole64(seg_addr + seg_len);
2110 			ctxd->lower.data = htole32(
2111 			adapter->txd_cmd | txd_lower | 4);
2112 			ctxd->upper.data =
2113 			    htole32(txd_upper);
2114 			last = i;
2115 			if (++i == adapter->num_tx_desc)
2116 				i = 0;
2117 		} else {
2118 			ctxd->buffer_addr = htole64(seg_addr);
2119 			ctxd->lower.data = htole32(
2120 			adapter->txd_cmd | txd_lower | seg_len);
2121 			ctxd->upper.data =
2122 			    htole32(txd_upper);
2123 			last = i;
2124 			if (++i == adapter->num_tx_desc)
2125 				i = 0;
2126 		}
2127 		tx_buffer->m_head = NULL;
2128 		tx_buffer->next_eop = -1;
2129 	}
2130 
2131 	txr->next_avail_desc = i;
2132 	txr->tx_avail -= nsegs;
2133 	if (tso_desc) /* TSO used an extra for sentinel */
2134 		txr->tx_avail -= txd_used;
2135 
2136         tx_buffer->m_head = m_head;
2137 	/*
2138 	** Here we swap the map so the last descriptor,
2139 	** which gets the completion interrupt has the
2140 	** real map, and the first descriptor gets the
2141 	** unused map from this descriptor.
2142 	*/
2143 	tx_buffer_mapped->map = tx_buffer->map;
2144 	tx_buffer->map = map;
2145         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2146 
2147         /*
2148          * Last Descriptor of Packet
2149 	 * needs End Of Packet (EOP)
2150 	 * and Report Status (RS)
2151          */
2152         ctxd->lower.data |=
2153 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2154 	/*
2155 	 * Keep track in the first buffer which
2156 	 * descriptor will be written back
2157 	 */
2158 	tx_buffer = &txr->tx_buffers[first];
2159 	tx_buffer->next_eop = last;
2160 
2161 	/*
2162 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2163 	 * that this frame is available to transmit.
2164 	 */
2165 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2166 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2167 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2168 
2169 	return (0);
2170 }
2171 
2172 static void
2173 em_set_promisc(struct adapter *adapter)
2174 {
2175 	if_t ifp = adapter->ifp;
2176 	u32		reg_rctl;
2177 
2178 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2179 
2180 	if (if_getflags(ifp) & IFF_PROMISC) {
2181 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2182 		/* Turn this on if you want to see bad packets */
2183 		if (em_debug_sbp)
2184 			reg_rctl |= E1000_RCTL_SBP;
2185 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2186 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2187 		reg_rctl |= E1000_RCTL_MPE;
2188 		reg_rctl &= ~E1000_RCTL_UPE;
2189 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2190 	}
2191 }
2192 
2193 static void
2194 em_disable_promisc(struct adapter *adapter)
2195 {
2196 	if_t		ifp = adapter->ifp;
2197 	u32		reg_rctl;
2198 	int		mcnt = 0;
2199 
2200 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2201 	reg_rctl &=  (~E1000_RCTL_UPE);
2202 	if (if_getflags(ifp) & IFF_ALLMULTI)
2203 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2204 	else
2205 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2206 	/* Don't disable if in MAX groups */
2207 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2208 		reg_rctl &=  (~E1000_RCTL_MPE);
2209 	reg_rctl &=  (~E1000_RCTL_SBP);
2210 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2211 }
2212 
2213 
2214 /*********************************************************************
2215  *  Multicast Update
2216  *
2217  *  This routine is called whenever multicast address list is updated.
2218  *
2219  **********************************************************************/
2220 
2221 static void
2222 em_set_multi(struct adapter *adapter)
2223 {
2224 	if_t ifp = adapter->ifp;
2225 	u32 reg_rctl = 0;
2226 	u8  *mta; /* Multicast array memory */
2227 	int mcnt = 0;
2228 
2229 	IOCTL_DEBUGOUT("em_set_multi: begin");
2230 
2231 	mta = adapter->mta;
2232 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2233 
2234 	if (adapter->hw.mac.type == e1000_82542 &&
2235 	    adapter->hw.revision_id == E1000_REVISION_2) {
2236 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2237 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2238 			e1000_pci_clear_mwi(&adapter->hw);
2239 		reg_rctl |= E1000_RCTL_RST;
2240 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2241 		msec_delay(5);
2242 	}
2243 
2244 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2245 
2246 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2247 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2248 		reg_rctl |= E1000_RCTL_MPE;
2249 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2250 	} else
2251 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2252 
2253 	if (adapter->hw.mac.type == e1000_82542 &&
2254 	    adapter->hw.revision_id == E1000_REVISION_2) {
2255 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2256 		reg_rctl &= ~E1000_RCTL_RST;
2257 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2258 		msec_delay(5);
2259 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2260 			e1000_pci_set_mwi(&adapter->hw);
2261 	}
2262 }
2263 
2264 
2265 /*********************************************************************
2266  *  Timer routine
2267  *
2268  *  This routine checks for link status and updates statistics.
2269  *
2270  **********************************************************************/
2271 
2272 static void
2273 em_local_timer(void *arg)
2274 {
2275 	struct adapter	*adapter = arg;
2276 	if_t ifp = adapter->ifp;
2277 	struct tx_ring	*txr = adapter->tx_rings;
2278 	struct rx_ring	*rxr = adapter->rx_rings;
2279 	u32		trigger = 0;
2280 
2281 	EM_CORE_LOCK_ASSERT(adapter);
2282 
2283 	em_update_link_status(adapter);
2284 	em_update_stats_counters(adapter);
2285 
2286 	/* Reset LAA into RAR[0] on 82571 */
2287 	if ((adapter->hw.mac.type == e1000_82571) &&
2288 	    e1000_get_laa_state_82571(&adapter->hw))
2289 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2290 
2291 	/* Mask to use in the irq trigger */
2292 	if (adapter->msix_mem) {
2293 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2294 			trigger |= rxr->ims;
2295 		rxr = adapter->rx_rings;
2296 	} else
2297 		trigger = E1000_ICS_RXDMT0;
2298 
2299 	/*
2300 	** Check on the state of the TX queue(s), this
2301 	** can be done without the lock because its RO
2302 	** and the HUNG state will be static if set.
2303 	*/
2304 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2305 		if (txr->busy == EM_TX_HUNG)
2306 			goto hung;
2307 		if (txr->busy >= EM_TX_MAXTRIES)
2308 			txr->busy = EM_TX_HUNG;
2309 		/* Schedule a TX tasklet if needed */
2310 		if (txr->tx_avail <= EM_MAX_SCATTER)
2311 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2312 	}
2313 
2314 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2315 #ifndef DEVICE_POLLING
2316 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2317 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2318 #endif
2319 	return;
2320 hung:
2321 	/* Looks like we're hung */
2322 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2323 			txr->me);
2324 	em_print_debug_info(adapter);
2325 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2326 	adapter->watchdog_events++;
2327 	em_init_locked(adapter);
2328 }
2329 
2330 
2331 static void
2332 em_update_link_status(struct adapter *adapter)
2333 {
2334 	struct e1000_hw *hw = &adapter->hw;
2335 	if_t ifp = adapter->ifp;
2336 	device_t dev = adapter->dev;
2337 	struct tx_ring *txr = adapter->tx_rings;
2338 	u32 link_check = 0;
2339 
2340 	/* Get the cached link value or read phy for real */
2341 	switch (hw->phy.media_type) {
2342 	case e1000_media_type_copper:
2343 		if (hw->mac.get_link_status) {
2344 			/* Do the work to read phy */
2345 			e1000_check_for_link(hw);
2346 			link_check = !hw->mac.get_link_status;
2347 			if (link_check) /* ESB2 fix */
2348 				e1000_cfg_on_link_up(hw);
2349 		} else
2350 			link_check = TRUE;
2351 		break;
2352 	case e1000_media_type_fiber:
2353 		e1000_check_for_link(hw);
2354 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2355                                  E1000_STATUS_LU);
2356 		break;
2357 	case e1000_media_type_internal_serdes:
2358 		e1000_check_for_link(hw);
2359 		link_check = adapter->hw.mac.serdes_has_link;
2360 		break;
2361 	default:
2362 	case e1000_media_type_unknown:
2363 		break;
2364 	}
2365 
2366 	/* Now check for a transition */
2367 	if (link_check && (adapter->link_active == 0)) {
2368 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2369 		    &adapter->link_duplex);
2370 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2371 		if ((adapter->link_speed != SPEED_1000) &&
2372 		    ((hw->mac.type == e1000_82571) ||
2373 		    (hw->mac.type == e1000_82572))) {
2374 			int tarc0;
2375 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2376 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2377 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2378 		}
2379 		if (bootverbose)
2380 			device_printf(dev, "Link is up %d Mbps %s\n",
2381 			    adapter->link_speed,
2382 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2383 			    "Full Duplex" : "Half Duplex"));
2384 		adapter->link_active = 1;
2385 		adapter->smartspeed = 0;
2386 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2387 		if_link_state_change(ifp, LINK_STATE_UP);
2388 	} else if (!link_check && (adapter->link_active == 1)) {
2389 		if_setbaudrate(ifp, 0);
2390 		adapter->link_speed = 0;
2391 		adapter->link_duplex = 0;
2392 		if (bootverbose)
2393 			device_printf(dev, "Link is Down\n");
2394 		adapter->link_active = 0;
2395 		/* Link down, disable hang detection */
2396 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2397 			txr->busy = EM_TX_IDLE;
2398 		if_link_state_change(ifp, LINK_STATE_DOWN);
2399 	}
2400 }
2401 
2402 /*********************************************************************
2403  *
2404  *  This routine disables all traffic on the adapter by issuing a
2405  *  global reset on the MAC and deallocates TX/RX buffers.
2406  *
2407  *  This routine should always be called with BOTH the CORE
2408  *  and TX locks.
2409  **********************************************************************/
2410 
2411 static void
2412 em_stop(void *arg)
2413 {
2414 	struct adapter	*adapter = arg;
2415 	if_t ifp = adapter->ifp;
2416 	struct tx_ring	*txr = adapter->tx_rings;
2417 
2418 	EM_CORE_LOCK_ASSERT(adapter);
2419 
2420 	INIT_DEBUGOUT("em_stop: begin");
2421 
2422 	em_disable_intr(adapter);
2423 	callout_stop(&adapter->timer);
2424 
2425 	/* Tell the stack that the interface is no longer active */
2426 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2427 
2428         /* Disarm Hang Detection. */
2429 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2430 		EM_TX_LOCK(txr);
2431 		txr->busy = EM_TX_IDLE;
2432 		EM_TX_UNLOCK(txr);
2433 	}
2434 
2435 	e1000_reset_hw(&adapter->hw);
2436 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2437 
2438 	e1000_led_off(&adapter->hw);
2439 	e1000_cleanup_led(&adapter->hw);
2440 }
2441 
2442 
2443 /*********************************************************************
2444  *
2445  *  Determine hardware revision.
2446  *
2447  **********************************************************************/
2448 static void
2449 em_identify_hardware(struct adapter *adapter)
2450 {
2451 	device_t dev = adapter->dev;
2452 
2453 	/* Make sure our PCI config space has the necessary stuff set */
2454 	pci_enable_busmaster(dev);
2455 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2456 
2457 	/* Save off the information about this board */
2458 	adapter->hw.vendor_id = pci_get_vendor(dev);
2459 	adapter->hw.device_id = pci_get_device(dev);
2460 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2461 	adapter->hw.subsystem_vendor_id =
2462 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2463 	adapter->hw.subsystem_device_id =
2464 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2465 
2466 	/* Do Shared Code Init and Setup */
2467 	if (e1000_set_mac_type(&adapter->hw)) {
2468 		device_printf(dev, "Setup init failure\n");
2469 		return;
2470 	}
2471 }
2472 
2473 static int
2474 em_allocate_pci_resources(struct adapter *adapter)
2475 {
2476 	device_t	dev = adapter->dev;
2477 	int		rid;
2478 
2479 	rid = PCIR_BAR(0);
2480 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2481 	    &rid, RF_ACTIVE);
2482 	if (adapter->memory == NULL) {
2483 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2484 		return (ENXIO);
2485 	}
2486 	adapter->osdep.mem_bus_space_tag =
2487 	    rman_get_bustag(adapter->memory);
2488 	adapter->osdep.mem_bus_space_handle =
2489 	    rman_get_bushandle(adapter->memory);
2490 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2491 
2492 	adapter->hw.back = &adapter->osdep;
2493 
2494 	return (0);
2495 }
2496 
2497 /*********************************************************************
2498  *
2499  *  Setup the Legacy or MSI Interrupt handler
2500  *
2501  **********************************************************************/
2502 int
2503 em_allocate_legacy(struct adapter *adapter)
2504 {
2505 	device_t dev = adapter->dev;
2506 	struct tx_ring	*txr = adapter->tx_rings;
2507 	int error, rid = 0;
2508 
2509 	/* Manually turn off all interrupts */
2510 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2511 
2512 	if (adapter->msix == 1) /* using MSI */
2513 		rid = 1;
2514 	/* We allocate a single interrupt resource */
2515 	adapter->res = bus_alloc_resource_any(dev,
2516 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2517 	if (adapter->res == NULL) {
2518 		device_printf(dev, "Unable to allocate bus resource: "
2519 		    "interrupt\n");
2520 		return (ENXIO);
2521 	}
2522 
2523 	/*
2524 	 * Allocate a fast interrupt and the associated
2525 	 * deferred processing contexts.
2526 	 */
2527 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2528 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2529 	    taskqueue_thread_enqueue, &adapter->tq);
2530 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2531 	    device_get_nameunit(adapter->dev));
2532 	/* Use a TX only tasklet for local timer */
2533 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2534 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2535 	    taskqueue_thread_enqueue, &txr->tq);
2536 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2537 	    device_get_nameunit(adapter->dev));
2538 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2539 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2540 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2541 		device_printf(dev, "Failed to register fast interrupt "
2542 			    "handler: %d\n", error);
2543 		taskqueue_free(adapter->tq);
2544 		adapter->tq = NULL;
2545 		return (error);
2546 	}
2547 
2548 	return (0);
2549 }
2550 
2551 /*********************************************************************
2552  *
2553  *  Setup the MSIX Interrupt handlers
2554  *   This is not really Multiqueue, rather
2555  *   its just seperate interrupt vectors
2556  *   for TX, RX, and Link.
2557  *
2558  **********************************************************************/
2559 int
2560 em_allocate_msix(struct adapter *adapter)
2561 {
2562 	device_t	dev = adapter->dev;
2563 	struct		tx_ring *txr = adapter->tx_rings;
2564 	struct		rx_ring *rxr = adapter->rx_rings;
2565 	int		error, rid, vector = 0;
2566 	int		cpu_id = 0;
2567 
2568 
2569 	/* Make sure all interrupts are disabled */
2570 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2571 
2572 	/* First set up ring resources */
2573 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2574 
2575 		/* RX ring */
2576 		rid = vector + 1;
2577 
2578 		rxr->res = bus_alloc_resource_any(dev,
2579 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2580 		if (rxr->res == NULL) {
2581 			device_printf(dev,
2582 			    "Unable to allocate bus resource: "
2583 			    "RX MSIX Interrupt %d\n", i);
2584 			return (ENXIO);
2585 		}
2586 		if ((error = bus_setup_intr(dev, rxr->res,
2587 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2588 		    rxr, &rxr->tag)) != 0) {
2589 			device_printf(dev, "Failed to register RX handler");
2590 			return (error);
2591 		}
2592 #if __FreeBSD_version >= 800504
2593 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2594 #endif
2595 		rxr->msix = vector;
2596 
2597 		if (em_last_bind_cpu < 0)
2598 			em_last_bind_cpu = CPU_FIRST();
2599 		cpu_id = em_last_bind_cpu;
2600 		bus_bind_intr(dev, rxr->res, cpu_id);
2601 
2602 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2603 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2604 		    taskqueue_thread_enqueue, &rxr->tq);
2605 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2606 		    device_get_nameunit(adapter->dev), cpu_id);
2607 		/*
2608 		** Set the bit to enable interrupt
2609 		** in E1000_IMS -- bits 20 and 21
2610 		** are for RX0 and RX1, note this has
2611 		** NOTHING to do with the MSIX vector
2612 		*/
2613 		rxr->ims = 1 << (20 + i);
2614 		adapter->ims |= rxr->ims;
2615 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2616 
2617 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2618 	}
2619 
2620 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2621 		/* TX ring */
2622 		rid = vector + 1;
2623 		txr->res = bus_alloc_resource_any(dev,
2624 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2625 		if (txr->res == NULL) {
2626 			device_printf(dev,
2627 			    "Unable to allocate bus resource: "
2628 			    "TX MSIX Interrupt %d\n", i);
2629 			return (ENXIO);
2630 		}
2631 		if ((error = bus_setup_intr(dev, txr->res,
2632 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2633 		    txr, &txr->tag)) != 0) {
2634 			device_printf(dev, "Failed to register TX handler");
2635 			return (error);
2636 		}
2637 #if __FreeBSD_version >= 800504
2638 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2639 #endif
2640 		txr->msix = vector;
2641 
2642                 if (em_last_bind_cpu < 0)
2643                         em_last_bind_cpu = CPU_FIRST();
2644                 cpu_id = em_last_bind_cpu;
2645                 bus_bind_intr(dev, txr->res, cpu_id);
2646 
2647 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2648 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2649 		    taskqueue_thread_enqueue, &txr->tq);
2650 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2651 		    device_get_nameunit(adapter->dev), cpu_id);
2652 		/*
2653 		** Set the bit to enable interrupt
2654 		** in E1000_IMS -- bits 22 and 23
2655 		** are for TX0 and TX1, note this has
2656 		** NOTHING to do with the MSIX vector
2657 		*/
2658 		txr->ims = 1 << (22 + i);
2659 		adapter->ims |= txr->ims;
2660 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2661 
2662 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2663 	}
2664 
2665 	/* Link interrupt */
2666 	rid = vector + 1;
2667 	adapter->res = bus_alloc_resource_any(dev,
2668 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2669 	if (!adapter->res) {
2670 		device_printf(dev,"Unable to allocate "
2671 		    "bus resource: Link interrupt [%d]\n", rid);
2672 		return (ENXIO);
2673         }
2674 	/* Set the link handler function */
2675 	error = bus_setup_intr(dev, adapter->res,
2676 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2677 	    em_msix_link, adapter, &adapter->tag);
2678 	if (error) {
2679 		adapter->res = NULL;
2680 		device_printf(dev, "Failed to register LINK handler");
2681 		return (error);
2682 	}
2683 #if __FreeBSD_version >= 800504
2684 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2685 #endif
2686 	adapter->linkvec = vector;
2687 	adapter->ivars |=  (8 | vector) << 16;
2688 	adapter->ivars |= 0x80000000;
2689 
2690 	return (0);
2691 }
2692 
2693 
2694 static void
2695 em_free_pci_resources(struct adapter *adapter)
2696 {
2697 	device_t	dev = adapter->dev;
2698 	struct tx_ring	*txr;
2699 	struct rx_ring	*rxr;
2700 	int		rid;
2701 
2702 
2703 	/*
2704 	** Release all the queue interrupt resources:
2705 	*/
2706 	for (int i = 0; i < adapter->num_queues; i++) {
2707 		txr = &adapter->tx_rings[i];
2708 		/* an early abort? */
2709 		if (txr == NULL)
2710 			break;
2711 		rid = txr->msix +1;
2712 		if (txr->tag != NULL) {
2713 			bus_teardown_intr(dev, txr->res, txr->tag);
2714 			txr->tag = NULL;
2715 		}
2716 		if (txr->res != NULL)
2717 			bus_release_resource(dev, SYS_RES_IRQ,
2718 			    rid, txr->res);
2719 
2720 		rxr = &adapter->rx_rings[i];
2721 		/* an early abort? */
2722 		if (rxr == NULL)
2723 			break;
2724 		rid = rxr->msix +1;
2725 		if (rxr->tag != NULL) {
2726 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2727 			rxr->tag = NULL;
2728 		}
2729 		if (rxr->res != NULL)
2730 			bus_release_resource(dev, SYS_RES_IRQ,
2731 			    rid, rxr->res);
2732 	}
2733 
2734         if (adapter->linkvec) /* we are doing MSIX */
2735                 rid = adapter->linkvec + 1;
2736         else
2737                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2738 
2739 	if (adapter->tag != NULL) {
2740 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2741 		adapter->tag = NULL;
2742 	}
2743 
2744 	if (adapter->res != NULL)
2745 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2746 
2747 
2748 	if (adapter->msix)
2749 		pci_release_msi(dev);
2750 
2751 	if (adapter->msix_mem != NULL)
2752 		bus_release_resource(dev, SYS_RES_MEMORY,
2753 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2754 
2755 	if (adapter->memory != NULL)
2756 		bus_release_resource(dev, SYS_RES_MEMORY,
2757 		    PCIR_BAR(0), adapter->memory);
2758 
2759 	if (adapter->flash != NULL)
2760 		bus_release_resource(dev, SYS_RES_MEMORY,
2761 		    EM_FLASH, adapter->flash);
2762 }
2763 
2764 /*
2765  * Setup MSI or MSI/X
2766  */
2767 static int
2768 em_setup_msix(struct adapter *adapter)
2769 {
2770 	device_t dev = adapter->dev;
2771 	int val;
2772 
2773 	/* Nearly always going to use one queue */
2774 	adapter->num_queues = 1;
2775 
2776 	/*
2777 	** Try using MSI-X for Hartwell adapters
2778 	*/
2779 	if ((adapter->hw.mac.type == e1000_82574) &&
2780 	    (em_enable_msix == TRUE)) {
2781 #ifdef EM_MULTIQUEUE
2782 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2783 		if (adapter->num_queues > 1)
2784 			em_enable_vectors_82574(adapter);
2785 #endif
2786 		/* Map the MSIX BAR */
2787 		int rid = PCIR_BAR(EM_MSIX_BAR);
2788 		adapter->msix_mem = bus_alloc_resource_any(dev,
2789 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2790        		if (adapter->msix_mem == NULL) {
2791 			/* May not be enabled */
2792                		device_printf(adapter->dev,
2793 			    "Unable to map MSIX table \n");
2794 			goto msi;
2795        		}
2796 		val = pci_msix_count(dev);
2797 
2798 #ifdef EM_MULTIQUEUE
2799 		/* We need 5 vectors in the multiqueue case */
2800 		if (adapter->num_queues > 1 ) {
2801 			if (val >= 5)
2802 				val = 5;
2803 			else {
2804 				adapter->num_queues = 1;
2805 				device_printf(adapter->dev,
2806 				    "Insufficient MSIX vectors for >1 queue, "
2807 				    "using single queue...\n");
2808 				goto msix_one;
2809 			}
2810 		} else {
2811 msix_one:
2812 #endif
2813 			if (val >= 3)
2814 				val = 3;
2815 			else {
2816 				device_printf(adapter->dev,
2817 			    	"Insufficient MSIX vectors, using MSI\n");
2818 				goto msi;
2819 			}
2820 #ifdef EM_MULTIQUEUE
2821 		}
2822 #endif
2823 
2824 		if ((pci_alloc_msix(dev, &val) == 0)) {
2825 			device_printf(adapter->dev,
2826 			    "Using MSIX interrupts "
2827 			    "with %d vectors\n", val);
2828 			return (val);
2829 		}
2830 
2831 		/*
2832 		** If MSIX alloc failed or provided us with
2833 		** less than needed, free and fall through to MSI
2834 		*/
2835 		pci_release_msi(dev);
2836 	}
2837 msi:
2838 	if (adapter->msix_mem != NULL) {
2839 		bus_release_resource(dev, SYS_RES_MEMORY,
2840 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2841 		adapter->msix_mem = NULL;
2842 	}
2843        	val = 1;
2844        	if (pci_alloc_msi(dev, &val) == 0) {
2845                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2846 		return (val);
2847 	}
2848 	/* Should only happen due to manual configuration */
2849 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2850 	return (0);
2851 }
2852 
2853 
2854 /*********************************************************************
2855  *
2856  *  Initialize the hardware to a configuration
2857  *  as specified by the adapter structure.
2858  *
2859  **********************************************************************/
2860 static void
2861 em_reset(struct adapter *adapter)
2862 {
2863 	device_t	dev = adapter->dev;
2864 	if_t ifp = adapter->ifp;
2865 	struct e1000_hw	*hw = &adapter->hw;
2866 	u16		rx_buffer_size;
2867 	u32		pba;
2868 
2869 	INIT_DEBUGOUT("em_reset: begin");
2870 
2871 	/* Set up smart power down as default off on newer adapters. */
2872 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2873 	    hw->mac.type == e1000_82572)) {
2874 		u16 phy_tmp = 0;
2875 
2876 		/* Speed up time to link by disabling smart power down. */
2877 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2878 		phy_tmp &= ~IGP02E1000_PM_SPD;
2879 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2880 	}
2881 
2882 	/*
2883 	 * Packet Buffer Allocation (PBA)
2884 	 * Writing PBA sets the receive portion of the buffer
2885 	 * the remainder is used for the transmit buffer.
2886 	 */
2887 	switch (hw->mac.type) {
2888 	/* Total Packet Buffer on these is 48K */
2889 	case e1000_82571:
2890 	case e1000_82572:
2891 	case e1000_80003es2lan:
2892 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2893 		break;
2894 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2895 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2896 		break;
2897 	case e1000_82574:
2898 	case e1000_82583:
2899 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2900 		break;
2901 	case e1000_ich8lan:
2902 		pba = E1000_PBA_8K;
2903 		break;
2904 	case e1000_ich9lan:
2905 	case e1000_ich10lan:
2906 		/* Boost Receive side for jumbo frames */
2907 		if (adapter->hw.mac.max_frame_size > 4096)
2908 			pba = E1000_PBA_14K;
2909 		else
2910 			pba = E1000_PBA_10K;
2911 		break;
2912 	case e1000_pchlan:
2913 	case e1000_pch2lan:
2914 	case e1000_pch_lpt:
2915 		pba = E1000_PBA_26K;
2916 		break;
2917 	default:
2918 		if (adapter->hw.mac.max_frame_size > 8192)
2919 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2920 		else
2921 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2922 	}
2923 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2924 
2925 	/*
2926 	 * These parameters control the automatic generation (Tx) and
2927 	 * response (Rx) to Ethernet PAUSE frames.
2928 	 * - High water mark should allow for at least two frames to be
2929 	 *   received after sending an XOFF.
2930 	 * - Low water mark works best when it is very near the high water mark.
2931 	 *   This allows the receiver to restart by sending XON when it has
2932 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2933 	 *   restart after one full frame is pulled from the buffer. There
2934 	 *   could be several smaller frames in the buffer and if so they will
2935 	 *   not trigger the XON until their total number reduces the buffer
2936 	 *   by 1500.
2937 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2938 	 */
2939 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2940 	hw->fc.high_water = rx_buffer_size -
2941 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2942 	hw->fc.low_water = hw->fc.high_water - 1500;
2943 
2944 	if (adapter->fc) /* locally set flow control value? */
2945 		hw->fc.requested_mode = adapter->fc;
2946 	else
2947 		hw->fc.requested_mode = e1000_fc_full;
2948 
2949 	if (hw->mac.type == e1000_80003es2lan)
2950 		hw->fc.pause_time = 0xFFFF;
2951 	else
2952 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2953 
2954 	hw->fc.send_xon = TRUE;
2955 
2956 	/* Device specific overrides/settings */
2957 	switch (hw->mac.type) {
2958 	case e1000_pchlan:
2959 		/* Workaround: no TX flow ctrl for PCH */
2960                 hw->fc.requested_mode = e1000_fc_rx_pause;
2961 		hw->fc.pause_time = 0xFFFF; /* override */
2962 		if (if_getmtu(ifp) > ETHERMTU) {
2963 			hw->fc.high_water = 0x3500;
2964 			hw->fc.low_water = 0x1500;
2965 		} else {
2966 			hw->fc.high_water = 0x5000;
2967 			hw->fc.low_water = 0x3000;
2968 		}
2969 		hw->fc.refresh_time = 0x1000;
2970 		break;
2971 	case e1000_pch2lan:
2972 	case e1000_pch_lpt:
2973 		hw->fc.high_water = 0x5C20;
2974 		hw->fc.low_water = 0x5048;
2975 		hw->fc.pause_time = 0x0650;
2976 		hw->fc.refresh_time = 0x0400;
2977 		/* Jumbos need adjusted PBA */
2978 		if (if_getmtu(ifp) > ETHERMTU)
2979 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2980 		else
2981 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2982 		break;
2983         case e1000_ich9lan:
2984         case e1000_ich10lan:
2985 		if (if_getmtu(ifp) > ETHERMTU) {
2986 			hw->fc.high_water = 0x2800;
2987 			hw->fc.low_water = hw->fc.high_water - 8;
2988 			break;
2989 		}
2990 		/* else fall thru */
2991 	default:
2992 		if (hw->mac.type == e1000_80003es2lan)
2993 			hw->fc.pause_time = 0xFFFF;
2994 		break;
2995 	}
2996 
2997 	/* Issue a global reset */
2998 	e1000_reset_hw(hw);
2999 	E1000_WRITE_REG(hw, E1000_WUC, 0);
3000 	em_disable_aspm(adapter);
3001 	/* and a re-init */
3002 	if (e1000_init_hw(hw) < 0) {
3003 		device_printf(dev, "Hardware Initialization Failed\n");
3004 		return;
3005 	}
3006 
3007 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3008 	e1000_get_phy_info(hw);
3009 	e1000_check_for_link(hw);
3010 	return;
3011 }
3012 
3013 /*********************************************************************
3014  *
3015  *  Setup networking device structure and register an interface.
3016  *
3017  **********************************************************************/
3018 static int
3019 em_setup_interface(device_t dev, struct adapter *adapter)
3020 {
3021 	if_t ifp;
3022 
3023 	INIT_DEBUGOUT("em_setup_interface: begin");
3024 
3025 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3026 	if (ifp == 0) {
3027 		device_printf(dev, "can not allocate ifnet structure\n");
3028 		return (-1);
3029 	}
3030 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3031 	if_setdev(ifp, dev);
3032 	if_setinitfn(ifp, em_init);
3033 	if_setsoftc(ifp, adapter);
3034 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3035 	if_setioctlfn(ifp, em_ioctl);
3036 	if_setgetcounterfn(ifp, em_get_counter);
3037 #ifdef EM_MULTIQUEUE
3038 	/* Multiqueue stack interface */
3039 	if_settransmitfn(ifp, em_mq_start);
3040 	if_setqflushfn(ifp, em_qflush);
3041 #else
3042 	if_setstartfn(ifp, em_start);
3043 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3044 	if_setsendqready(ifp);
3045 #endif
3046 
3047 	ether_ifattach(ifp, adapter->hw.mac.addr);
3048 
3049 	if_setcapabilities(ifp, 0);
3050 	if_setcapenable(ifp, 0);
3051 
3052 
3053 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3054 	    IFCAP_TSO4, 0);
3055 	/*
3056 	 * Tell the upper layer(s) we
3057 	 * support full VLAN capability
3058 	 */
3059 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3060 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3061 	    IFCAP_VLAN_MTU, 0);
3062 	if_setcapenable(ifp, if_getcapabilities(ifp));
3063 
3064 	/*
3065 	** Don't turn this on by default, if vlans are
3066 	** created on another pseudo device (eg. lagg)
3067 	** then vlan events are not passed thru, breaking
3068 	** operation, but with HW FILTER off it works. If
3069 	** using vlans directly on the em driver you can
3070 	** enable this and get full hardware tag filtering.
3071 	*/
3072 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3073 
3074 #ifdef DEVICE_POLLING
3075 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3076 #endif
3077 
3078 	/* Enable only WOL MAGIC by default */
3079 	if (adapter->wol) {
3080 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3081 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3082 	}
3083 
3084 	/*
3085 	 * Specify the media types supported by this adapter and register
3086 	 * callbacks to update media and link information
3087 	 */
3088 	ifmedia_init(&adapter->media, IFM_IMASK,
3089 	    em_media_change, em_media_status);
3090 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3091 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3092 		u_char fiber_type = IFM_1000_SX;	/* default type */
3093 
3094 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3095 			    0, NULL);
3096 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3097 	} else {
3098 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3099 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3100 			    0, NULL);
3101 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3102 			    0, NULL);
3103 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3104 			    0, NULL);
3105 		if (adapter->hw.phy.type != e1000_phy_ife) {
3106 			ifmedia_add(&adapter->media,
3107 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3108 			ifmedia_add(&adapter->media,
3109 				IFM_ETHER | IFM_1000_T, 0, NULL);
3110 		}
3111 	}
3112 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3113 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3114 	return (0);
3115 }
3116 
3117 
3118 /*
3119  * Manage DMA'able memory.
3120  */
3121 static void
3122 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3123 {
3124 	if (error)
3125 		return;
3126 	*(bus_addr_t *) arg = segs[0].ds_addr;
3127 }
3128 
3129 static int
3130 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3131         struct em_dma_alloc *dma, int mapflags)
3132 {
3133 	int error;
3134 
3135 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3136 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3137 				BUS_SPACE_MAXADDR,	/* lowaddr */
3138 				BUS_SPACE_MAXADDR,	/* highaddr */
3139 				NULL, NULL,		/* filter, filterarg */
3140 				size,			/* maxsize */
3141 				1,			/* nsegments */
3142 				size,			/* maxsegsize */
3143 				0,			/* flags */
3144 				NULL,			/* lockfunc */
3145 				NULL,			/* lockarg */
3146 				&dma->dma_tag);
3147 	if (error) {
3148 		device_printf(adapter->dev,
3149 		    "%s: bus_dma_tag_create failed: %d\n",
3150 		    __func__, error);
3151 		goto fail_0;
3152 	}
3153 
3154 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3155 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3156 	if (error) {
3157 		device_printf(adapter->dev,
3158 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3159 		    __func__, (uintmax_t)size, error);
3160 		goto fail_2;
3161 	}
3162 
3163 	dma->dma_paddr = 0;
3164 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3165 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3166 	if (error || dma->dma_paddr == 0) {
3167 		device_printf(adapter->dev,
3168 		    "%s: bus_dmamap_load failed: %d\n",
3169 		    __func__, error);
3170 		goto fail_3;
3171 	}
3172 
3173 	return (0);
3174 
3175 fail_3:
3176 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3177 fail_2:
3178 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3179 	bus_dma_tag_destroy(dma->dma_tag);
3180 fail_0:
3181 	dma->dma_tag = NULL;
3182 
3183 	return (error);
3184 }
3185 
3186 static void
3187 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3188 {
3189 	if (dma->dma_tag == NULL)
3190 		return;
3191 	if (dma->dma_paddr != 0) {
3192 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3193 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3194 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3195 		dma->dma_paddr = 0;
3196 	}
3197 	if (dma->dma_vaddr != NULL) {
3198 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3199 		dma->dma_vaddr = NULL;
3200 	}
3201 	bus_dma_tag_destroy(dma->dma_tag);
3202 	dma->dma_tag = NULL;
3203 }
3204 
3205 
3206 /*********************************************************************
3207  *
3208  *  Allocate memory for the transmit and receive rings, and then
3209  *  the descriptors associated with each, called only once at attach.
3210  *
3211  **********************************************************************/
3212 static int
3213 em_allocate_queues(struct adapter *adapter)
3214 {
3215 	device_t		dev = adapter->dev;
3216 	struct tx_ring		*txr = NULL;
3217 	struct rx_ring		*rxr = NULL;
3218 	int rsize, tsize, error = E1000_SUCCESS;
3219 	int txconf = 0, rxconf = 0;
3220 
3221 
3222 	/* Allocate the TX ring struct memory */
3223 	if (!(adapter->tx_rings =
3224 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3225 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3226 		device_printf(dev, "Unable to allocate TX ring memory\n");
3227 		error = ENOMEM;
3228 		goto fail;
3229 	}
3230 
3231 	/* Now allocate the RX */
3232 	if (!(adapter->rx_rings =
3233 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3234 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3235 		device_printf(dev, "Unable to allocate RX ring memory\n");
3236 		error = ENOMEM;
3237 		goto rx_fail;
3238 	}
3239 
3240 	tsize = roundup2(adapter->num_tx_desc *
3241 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3242 	/*
3243 	 * Now set up the TX queues, txconf is needed to handle the
3244 	 * possibility that things fail midcourse and we need to
3245 	 * undo memory gracefully
3246 	 */
3247 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3248 		/* Set up some basics */
3249 		txr = &adapter->tx_rings[i];
3250 		txr->adapter = adapter;
3251 		txr->me = i;
3252 
3253 		/* Initialize the TX lock */
3254 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3255 		    device_get_nameunit(dev), txr->me);
3256 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3257 
3258 		if (em_dma_malloc(adapter, tsize,
3259 			&txr->txdma, BUS_DMA_NOWAIT)) {
3260 			device_printf(dev,
3261 			    "Unable to allocate TX Descriptor memory\n");
3262 			error = ENOMEM;
3263 			goto err_tx_desc;
3264 		}
3265 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3266 		bzero((void *)txr->tx_base, tsize);
3267 
3268         	if (em_allocate_transmit_buffers(txr)) {
3269 			device_printf(dev,
3270 			    "Critical Failure setting up transmit buffers\n");
3271 			error = ENOMEM;
3272 			goto err_tx_desc;
3273         	}
3274 #if __FreeBSD_version >= 800000
3275 		/* Allocate a buf ring */
3276 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3277 		    M_WAITOK, &txr->tx_mtx);
3278 #endif
3279 	}
3280 
3281 	/*
3282 	 * Next the RX queues...
3283 	 */
3284 	rsize = roundup2(adapter->num_rx_desc *
3285 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3286 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3287 		rxr = &adapter->rx_rings[i];
3288 		rxr->adapter = adapter;
3289 		rxr->me = i;
3290 
3291 		/* Initialize the RX lock */
3292 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3293 		    device_get_nameunit(dev), txr->me);
3294 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3295 
3296 		if (em_dma_malloc(adapter, rsize,
3297 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3298 			device_printf(dev,
3299 			    "Unable to allocate RxDescriptor memory\n");
3300 			error = ENOMEM;
3301 			goto err_rx_desc;
3302 		}
3303 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3304 		bzero((void *)rxr->rx_base, rsize);
3305 
3306         	/* Allocate receive buffers for the ring*/
3307 		if (em_allocate_receive_buffers(rxr)) {
3308 			device_printf(dev,
3309 			    "Critical Failure setting up receive buffers\n");
3310 			error = ENOMEM;
3311 			goto err_rx_desc;
3312 		}
3313 	}
3314 
3315 	return (0);
3316 
3317 err_rx_desc:
3318 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3319 		em_dma_free(adapter, &rxr->rxdma);
3320 err_tx_desc:
3321 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3322 		em_dma_free(adapter, &txr->txdma);
3323 	free(adapter->rx_rings, M_DEVBUF);
3324 rx_fail:
3325 #if __FreeBSD_version >= 800000
3326 	buf_ring_free(txr->br, M_DEVBUF);
3327 #endif
3328 	free(adapter->tx_rings, M_DEVBUF);
3329 fail:
3330 	return (error);
3331 }
3332 
3333 
3334 /*********************************************************************
3335  *
3336  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3337  *  the information needed to transmit a packet on the wire. This is
3338  *  called only once at attach, setup is done every reset.
3339  *
3340  **********************************************************************/
3341 static int
3342 em_allocate_transmit_buffers(struct tx_ring *txr)
3343 {
3344 	struct adapter *adapter = txr->adapter;
3345 	device_t dev = adapter->dev;
3346 	struct em_buffer *txbuf;
3347 	int error, i;
3348 
3349 	/*
3350 	 * Setup DMA descriptor areas.
3351 	 */
3352 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3353 			       1, 0,			/* alignment, bounds */
3354 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3355 			       BUS_SPACE_MAXADDR,	/* highaddr */
3356 			       NULL, NULL,		/* filter, filterarg */
3357 			       EM_TSO_SIZE,		/* maxsize */
3358 			       EM_MAX_SCATTER,		/* nsegments */
3359 			       PAGE_SIZE,		/* maxsegsize */
3360 			       0,			/* flags */
3361 			       NULL,			/* lockfunc */
3362 			       NULL,			/* lockfuncarg */
3363 			       &txr->txtag))) {
3364 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3365 		goto fail;
3366 	}
3367 
3368 	if (!(txr->tx_buffers =
3369 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3370 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3371 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3372 		error = ENOMEM;
3373 		goto fail;
3374 	}
3375 
3376         /* Create the descriptor buffer dma maps */
3377 	txbuf = txr->tx_buffers;
3378 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3379 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3380 		if (error != 0) {
3381 			device_printf(dev, "Unable to create TX DMA map\n");
3382 			goto fail;
3383 		}
3384 	}
3385 
3386 	return 0;
3387 fail:
3388 	/* We free all, it handles case where we are in the middle */
3389 	em_free_transmit_structures(adapter);
3390 	return (error);
3391 }
3392 
3393 /*********************************************************************
3394  *
3395  *  Initialize a transmit ring.
3396  *
3397  **********************************************************************/
3398 static void
3399 em_setup_transmit_ring(struct tx_ring *txr)
3400 {
3401 	struct adapter *adapter = txr->adapter;
3402 	struct em_buffer *txbuf;
3403 	int i;
3404 #ifdef DEV_NETMAP
3405 	struct netmap_slot *slot;
3406 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3407 #endif /* DEV_NETMAP */
3408 
3409 	/* Clear the old descriptor contents */
3410 	EM_TX_LOCK(txr);
3411 #ifdef DEV_NETMAP
3412 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3413 #endif /* DEV_NETMAP */
3414 
3415 	bzero((void *)txr->tx_base,
3416 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3417 	/* Reset indices */
3418 	txr->next_avail_desc = 0;
3419 	txr->next_to_clean = 0;
3420 
3421 	/* Free any existing tx buffers. */
3422         txbuf = txr->tx_buffers;
3423 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3424 		if (txbuf->m_head != NULL) {
3425 			bus_dmamap_sync(txr->txtag, txbuf->map,
3426 			    BUS_DMASYNC_POSTWRITE);
3427 			bus_dmamap_unload(txr->txtag, txbuf->map);
3428 			m_freem(txbuf->m_head);
3429 			txbuf->m_head = NULL;
3430 		}
3431 #ifdef DEV_NETMAP
3432 		if (slot) {
3433 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3434 			uint64_t paddr;
3435 			void *addr;
3436 
3437 			addr = PNMB(na, slot + si, &paddr);
3438 			txr->tx_base[i].buffer_addr = htole64(paddr);
3439 			/* reload the map for netmap mode */
3440 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3441 		}
3442 #endif /* DEV_NETMAP */
3443 
3444 		/* clear the watch index */
3445 		txbuf->next_eop = -1;
3446         }
3447 
3448 	/* Set number of descriptors available */
3449 	txr->tx_avail = adapter->num_tx_desc;
3450 	txr->busy = EM_TX_IDLE;
3451 
3452 	/* Clear checksum offload context. */
3453 	txr->last_hw_offload = 0;
3454 	txr->last_hw_ipcss = 0;
3455 	txr->last_hw_ipcso = 0;
3456 	txr->last_hw_tucss = 0;
3457 	txr->last_hw_tucso = 0;
3458 
3459 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3460 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3461 	EM_TX_UNLOCK(txr);
3462 }
3463 
3464 /*********************************************************************
3465  *
3466  *  Initialize all transmit rings.
3467  *
3468  **********************************************************************/
3469 static void
3470 em_setup_transmit_structures(struct adapter *adapter)
3471 {
3472 	struct tx_ring *txr = adapter->tx_rings;
3473 
3474 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3475 		em_setup_transmit_ring(txr);
3476 
3477 	return;
3478 }
3479 
3480 /*********************************************************************
3481  *
3482  *  Enable transmit unit.
3483  *
3484  **********************************************************************/
3485 static void
3486 em_initialize_transmit_unit(struct adapter *adapter)
3487 {
3488 	struct tx_ring	*txr = adapter->tx_rings;
3489 	struct e1000_hw	*hw = &adapter->hw;
3490 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3491 
3492 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3493 
3494 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3495 		u64 bus_addr = txr->txdma.dma_paddr;
3496 		/* Base and Len of TX Ring */
3497 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3498 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3499 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3500 	    	    (u32)(bus_addr >> 32));
3501 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3502 	    	    (u32)bus_addr);
3503 		/* Init the HEAD/TAIL indices */
3504 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3505 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3506 
3507 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3508 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3509 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3510 
3511 		txr->busy = EM_TX_IDLE;
3512 		txdctl = 0; /* clear txdctl */
3513                 txdctl |= 0x1f; /* PTHRESH */
3514                 txdctl |= 1 << 8; /* HTHRESH */
3515                 txdctl |= 1 << 16;/* WTHRESH */
3516 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3517 		txdctl |= E1000_TXDCTL_GRAN;
3518                 txdctl |= 1 << 25; /* LWTHRESH */
3519 
3520                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3521 	}
3522 
3523 	/* Set the default values for the Tx Inter Packet Gap timer */
3524 	switch (adapter->hw.mac.type) {
3525 	case e1000_80003es2lan:
3526 		tipg = DEFAULT_82543_TIPG_IPGR1;
3527 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3528 		    E1000_TIPG_IPGR2_SHIFT;
3529 		break;
3530 	default:
3531 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3532 		    (adapter->hw.phy.media_type ==
3533 		    e1000_media_type_internal_serdes))
3534 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3535 		else
3536 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3537 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3538 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3539 	}
3540 
3541 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3542 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3543 
3544 	if(adapter->hw.mac.type >= e1000_82540)
3545 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3546 		    adapter->tx_abs_int_delay.value);
3547 
3548 	if ((adapter->hw.mac.type == e1000_82571) ||
3549 	    (adapter->hw.mac.type == e1000_82572)) {
3550 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3551 		tarc |= TARC_SPEED_MODE_BIT;
3552 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3553 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3554 		/* errata: program both queues to unweighted RR */
3555 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3556 		tarc |= 1;
3557 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3558 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3559 		tarc |= 1;
3560 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3561 	} else if (adapter->hw.mac.type == e1000_82574) {
3562 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3563 		tarc |= TARC_ERRATA_BIT;
3564 		if ( adapter->num_queues > 1) {
3565 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3566 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3567 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3568 		} else
3569 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3570 	}
3571 
3572 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3573 	if (adapter->tx_int_delay.value > 0)
3574 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3575 
3576 	/* Program the Transmit Control Register */
3577 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3578 	tctl &= ~E1000_TCTL_CT;
3579 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3580 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3581 
3582 	if (adapter->hw.mac.type >= e1000_82571)
3583 		tctl |= E1000_TCTL_MULR;
3584 
3585 	/* This write will effectively turn on the transmit unit. */
3586 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3587 
3588 }
3589 
3590 
3591 /*********************************************************************
3592  *
3593  *  Free all transmit rings.
3594  *
3595  **********************************************************************/
3596 static void
3597 em_free_transmit_structures(struct adapter *adapter)
3598 {
3599 	struct tx_ring *txr = adapter->tx_rings;
3600 
3601 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3602 		EM_TX_LOCK(txr);
3603 		em_free_transmit_buffers(txr);
3604 		em_dma_free(adapter, &txr->txdma);
3605 		EM_TX_UNLOCK(txr);
3606 		EM_TX_LOCK_DESTROY(txr);
3607 	}
3608 
3609 	free(adapter->tx_rings, M_DEVBUF);
3610 }
3611 
3612 /*********************************************************************
3613  *
3614  *  Free transmit ring related data structures.
3615  *
3616  **********************************************************************/
3617 static void
3618 em_free_transmit_buffers(struct tx_ring *txr)
3619 {
3620 	struct adapter		*adapter = txr->adapter;
3621 	struct em_buffer	*txbuf;
3622 
3623 	INIT_DEBUGOUT("free_transmit_ring: begin");
3624 
3625 	if (txr->tx_buffers == NULL)
3626 		return;
3627 
3628 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3629 		txbuf = &txr->tx_buffers[i];
3630 		if (txbuf->m_head != NULL) {
3631 			bus_dmamap_sync(txr->txtag, txbuf->map,
3632 			    BUS_DMASYNC_POSTWRITE);
3633 			bus_dmamap_unload(txr->txtag,
3634 			    txbuf->map);
3635 			m_freem(txbuf->m_head);
3636 			txbuf->m_head = NULL;
3637 			if (txbuf->map != NULL) {
3638 				bus_dmamap_destroy(txr->txtag,
3639 				    txbuf->map);
3640 				txbuf->map = NULL;
3641 			}
3642 		} else if (txbuf->map != NULL) {
3643 			bus_dmamap_unload(txr->txtag,
3644 			    txbuf->map);
3645 			bus_dmamap_destroy(txr->txtag,
3646 			    txbuf->map);
3647 			txbuf->map = NULL;
3648 		}
3649 	}
3650 #if __FreeBSD_version >= 800000
3651 	if (txr->br != NULL)
3652 		buf_ring_free(txr->br, M_DEVBUF);
3653 #endif
3654 	if (txr->tx_buffers != NULL) {
3655 		free(txr->tx_buffers, M_DEVBUF);
3656 		txr->tx_buffers = NULL;
3657 	}
3658 	if (txr->txtag != NULL) {
3659 		bus_dma_tag_destroy(txr->txtag);
3660 		txr->txtag = NULL;
3661 	}
3662 	return;
3663 }
3664 
3665 
3666 /*********************************************************************
3667  *  The offload context is protocol specific (TCP/UDP) and thus
3668  *  only needs to be set when the protocol changes. The occasion
3669  *  of a context change can be a performance detriment, and
3670  *  might be better just disabled. The reason arises in the way
3671  *  in which the controller supports pipelined requests from the
3672  *  Tx data DMA. Up to four requests can be pipelined, and they may
3673  *  belong to the same packet or to multiple packets. However all
3674  *  requests for one packet are issued before a request is issued
3675  *  for a subsequent packet and if a request for the next packet
3676  *  requires a context change, that request will be stalled
3677  *  until the previous request completes. This means setting up
3678  *  a new context effectively disables pipelined Tx data DMA which
3679  *  in turn greatly slow down performance to send small sized
3680  *  frames.
3681  **********************************************************************/
3682 static void
3683 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3684     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3685 {
3686 	struct adapter			*adapter = txr->adapter;
3687 	struct e1000_context_desc	*TXD = NULL;
3688 	struct em_buffer		*tx_buffer;
3689 	int				cur, hdr_len;
3690 	u32				cmd = 0;
3691 	u16				offload = 0;
3692 	u8				ipcso, ipcss, tucso, tucss;
3693 
3694 	ipcss = ipcso = tucss = tucso = 0;
3695 	hdr_len = ip_off + (ip->ip_hl << 2);
3696 	cur = txr->next_avail_desc;
3697 
3698 	/* Setup of IP header checksum. */
3699 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3700 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3701 		offload |= CSUM_IP;
3702 		ipcss = ip_off;
3703 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3704 		/*
3705 		 * Start offset for header checksum calculation.
3706 		 * End offset for header checksum calculation.
3707 		 * Offset of place to put the checksum.
3708 		 */
3709 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3710 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3711 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3712 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3713 		cmd |= E1000_TXD_CMD_IP;
3714 	}
3715 
3716 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3717  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3718  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3719  		offload |= CSUM_TCP;
3720  		tucss = hdr_len;
3721  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3722  		/*
3723  		 * Setting up new checksum offload context for every frames
3724  		 * takes a lot of processing time for hardware. This also
3725  		 * reduces performance a lot for small sized frames so avoid
3726  		 * it if driver can use previously configured checksum
3727  		 * offload context.
3728  		 */
3729  		if (txr->last_hw_offload == offload) {
3730  			if (offload & CSUM_IP) {
3731  				if (txr->last_hw_ipcss == ipcss &&
3732  				    txr->last_hw_ipcso == ipcso &&
3733  				    txr->last_hw_tucss == tucss &&
3734  				    txr->last_hw_tucso == tucso)
3735  					return;
3736  			} else {
3737  				if (txr->last_hw_tucss == tucss &&
3738  				    txr->last_hw_tucso == tucso)
3739  					return;
3740  			}
3741   		}
3742  		txr->last_hw_offload = offload;
3743  		txr->last_hw_tucss = tucss;
3744  		txr->last_hw_tucso = tucso;
3745  		/*
3746  		 * Start offset for payload checksum calculation.
3747  		 * End offset for payload checksum calculation.
3748  		 * Offset of place to put the checksum.
3749  		 */
3750 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3751  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3752  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3753  		TXD->upper_setup.tcp_fields.tucso = tucso;
3754  		cmd |= E1000_TXD_CMD_TCP;
3755  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3756  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3757  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3758  		tucss = hdr_len;
3759  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3760  		/*
3761  		 * Setting up new checksum offload context for every frames
3762  		 * takes a lot of processing time for hardware. This also
3763  		 * reduces performance a lot for small sized frames so avoid
3764  		 * it if driver can use previously configured checksum
3765  		 * offload context.
3766  		 */
3767  		if (txr->last_hw_offload == offload) {
3768  			if (offload & CSUM_IP) {
3769  				if (txr->last_hw_ipcss == ipcss &&
3770  				    txr->last_hw_ipcso == ipcso &&
3771  				    txr->last_hw_tucss == tucss &&
3772  				    txr->last_hw_tucso == tucso)
3773  					return;
3774  			} else {
3775  				if (txr->last_hw_tucss == tucss &&
3776  				    txr->last_hw_tucso == tucso)
3777  					return;
3778  			}
3779  		}
3780  		txr->last_hw_offload = offload;
3781  		txr->last_hw_tucss = tucss;
3782  		txr->last_hw_tucso = tucso;
3783  		/*
3784  		 * Start offset for header checksum calculation.
3785  		 * End offset for header checksum calculation.
3786  		 * Offset of place to put the checksum.
3787  		 */
3788 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3789  		TXD->upper_setup.tcp_fields.tucss = tucss;
3790  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3791  		TXD->upper_setup.tcp_fields.tucso = tucso;
3792   	}
3793 
3794  	if (offload & CSUM_IP) {
3795  		txr->last_hw_ipcss = ipcss;
3796  		txr->last_hw_ipcso = ipcso;
3797   	}
3798 
3799 	TXD->tcp_seg_setup.data = htole32(0);
3800 	TXD->cmd_and_length =
3801 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3802 	tx_buffer = &txr->tx_buffers[cur];
3803 	tx_buffer->m_head = NULL;
3804 	tx_buffer->next_eop = -1;
3805 
3806 	if (++cur == adapter->num_tx_desc)
3807 		cur = 0;
3808 
3809 	txr->tx_avail--;
3810 	txr->next_avail_desc = cur;
3811 }
3812 
3813 
3814 /**********************************************************************
3815  *
3816  *  Setup work for hardware segmentation offload (TSO)
3817  *
3818  **********************************************************************/
3819 static void
3820 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3821     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3822 {
3823 	struct adapter			*adapter = txr->adapter;
3824 	struct e1000_context_desc	*TXD;
3825 	struct em_buffer		*tx_buffer;
3826 	int cur, hdr_len;
3827 
3828 	/*
3829 	 * In theory we can use the same TSO context if and only if
3830 	 * frame is the same type(IP/TCP) and the same MSS. However
3831 	 * checking whether a frame has the same IP/TCP structure is
3832 	 * hard thing so just ignore that and always restablish a
3833 	 * new TSO context.
3834 	 */
3835 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3836 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3837 		      E1000_TXD_DTYP_D |	/* Data descr type */
3838 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3839 
3840 	/* IP and/or TCP header checksum calculation and insertion. */
3841 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3842 
3843 	cur = txr->next_avail_desc;
3844 	tx_buffer = &txr->tx_buffers[cur];
3845 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3846 
3847 	/*
3848 	 * Start offset for header checksum calculation.
3849 	 * End offset for header checksum calculation.
3850 	 * Offset of place put the checksum.
3851 	 */
3852 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3853 	TXD->lower_setup.ip_fields.ipcse =
3854 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3855 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3856 	/*
3857 	 * Start offset for payload checksum calculation.
3858 	 * End offset for payload checksum calculation.
3859 	 * Offset of place to put the checksum.
3860 	 */
3861 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3862 	TXD->upper_setup.tcp_fields.tucse = 0;
3863 	TXD->upper_setup.tcp_fields.tucso =
3864 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3865 	/*
3866 	 * Payload size per packet w/o any headers.
3867 	 * Length of all headers up to payload.
3868 	 */
3869 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3870 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3871 
3872 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3873 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3874 				E1000_TXD_CMD_TSE |	/* TSE context */
3875 				E1000_TXD_CMD_IP |	/* Do IP csum */
3876 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3877 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3878 
3879 	tx_buffer->m_head = NULL;
3880 	tx_buffer->next_eop = -1;
3881 
3882 	if (++cur == adapter->num_tx_desc)
3883 		cur = 0;
3884 
3885 	txr->tx_avail--;
3886 	txr->next_avail_desc = cur;
3887 	txr->tx_tso = TRUE;
3888 }
3889 
3890 
3891 /**********************************************************************
3892  *
3893  *  Examine each tx_buffer in the used queue. If the hardware is done
3894  *  processing the packet then free associated resources. The
3895  *  tx_buffer is put back on the free queue.
3896  *
3897  **********************************************************************/
3898 static void
3899 em_txeof(struct tx_ring *txr)
3900 {
3901 	struct adapter	*adapter = txr->adapter;
3902         int first, last, done, processed;
3903         struct em_buffer *tx_buffer;
3904         struct e1000_tx_desc   *tx_desc, *eop_desc;
3905 	if_t ifp = adapter->ifp;
3906 
3907 	EM_TX_LOCK_ASSERT(txr);
3908 #ifdef DEV_NETMAP
3909 	if (netmap_tx_irq(ifp, txr->me))
3910 		return;
3911 #endif /* DEV_NETMAP */
3912 
3913 	/* No work, make sure hang detection is disabled */
3914         if (txr->tx_avail == adapter->num_tx_desc) {
3915 		txr->busy = EM_TX_IDLE;
3916                 return;
3917 	}
3918 
3919 	processed = 0;
3920         first = txr->next_to_clean;
3921         tx_desc = &txr->tx_base[first];
3922         tx_buffer = &txr->tx_buffers[first];
3923 	last = tx_buffer->next_eop;
3924         eop_desc = &txr->tx_base[last];
3925 
3926 	/*
3927 	 * What this does is get the index of the
3928 	 * first descriptor AFTER the EOP of the
3929 	 * first packet, that way we can do the
3930 	 * simple comparison on the inner while loop.
3931 	 */
3932 	if (++last == adapter->num_tx_desc)
3933  		last = 0;
3934 	done = last;
3935 
3936         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3937             BUS_DMASYNC_POSTREAD);
3938 
3939         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3940 		/* We clean the range of the packet */
3941 		while (first != done) {
3942                 	tx_desc->upper.data = 0;
3943                 	tx_desc->lower.data = 0;
3944                 	tx_desc->buffer_addr = 0;
3945                 	++txr->tx_avail;
3946 			++processed;
3947 
3948 			if (tx_buffer->m_head) {
3949 				bus_dmamap_sync(txr->txtag,
3950 				    tx_buffer->map,
3951 				    BUS_DMASYNC_POSTWRITE);
3952 				bus_dmamap_unload(txr->txtag,
3953 				    tx_buffer->map);
3954                         	m_freem(tx_buffer->m_head);
3955                         	tx_buffer->m_head = NULL;
3956                 	}
3957 			tx_buffer->next_eop = -1;
3958 
3959 	                if (++first == adapter->num_tx_desc)
3960 				first = 0;
3961 
3962 	                tx_buffer = &txr->tx_buffers[first];
3963 			tx_desc = &txr->tx_base[first];
3964 		}
3965 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
3966 		/* See if we can continue to the next packet */
3967 		last = tx_buffer->next_eop;
3968 		if (last != -1) {
3969         		eop_desc = &txr->tx_base[last];
3970 			/* Get new done point */
3971 			if (++last == adapter->num_tx_desc) last = 0;
3972 			done = last;
3973 		} else
3974 			break;
3975         }
3976         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3977             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3978 
3979         txr->next_to_clean = first;
3980 
3981 	/*
3982 	** Hang detection: we know there's work outstanding
3983 	** or the entry return would have been taken, so no
3984 	** descriptor processed here indicates a potential hang.
3985 	** The local timer will examine this and do a reset if needed.
3986 	*/
3987 	if (processed == 0) {
3988 		if (txr->busy != EM_TX_HUNG)
3989 			++txr->busy;
3990 	} else /* At least one descriptor was cleaned */
3991 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
3992 
3993         /*
3994          * If we have a minimum free, clear IFF_DRV_OACTIVE
3995          * to tell the stack that it is OK to send packets.
3996 	 * Notice that all writes of OACTIVE happen under the
3997 	 * TX lock which, with a single queue, guarantees
3998 	 * sanity.
3999          */
4000         if (txr->tx_avail >= EM_MAX_SCATTER) {
4001 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4002 	}
4003 
4004 	/* Disable hang detection if all clean */
4005 	if (txr->tx_avail == adapter->num_tx_desc)
4006 		txr->busy = EM_TX_IDLE;
4007 }
4008 
4009 
4010 /*********************************************************************
4011  *
4012  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4013  *
4014  **********************************************************************/
4015 static void
4016 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4017 {
4018 	struct adapter		*adapter = rxr->adapter;
4019 	struct mbuf		*m;
4020 	bus_dma_segment_t	segs[1];
4021 	struct em_buffer	*rxbuf;
4022 	int			i, j, error, nsegs;
4023 	bool			cleaned = FALSE;
4024 
4025 	i = j = rxr->next_to_refresh;
4026 	/*
4027 	** Get one descriptor beyond
4028 	** our work mark to control
4029 	** the loop.
4030 	*/
4031 	if (++j == adapter->num_rx_desc)
4032 		j = 0;
4033 
4034 	while (j != limit) {
4035 		rxbuf = &rxr->rx_buffers[i];
4036 		if (rxbuf->m_head == NULL) {
4037 			m = m_getjcl(M_NOWAIT, MT_DATA,
4038 			    M_PKTHDR, adapter->rx_mbuf_sz);
4039 			/*
4040 			** If we have a temporary resource shortage
4041 			** that causes a failure, just abort refresh
4042 			** for now, we will return to this point when
4043 			** reinvoked from em_rxeof.
4044 			*/
4045 			if (m == NULL)
4046 				goto update;
4047 		} else
4048 			m = rxbuf->m_head;
4049 
4050 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4051 		m->m_flags |= M_PKTHDR;
4052 		m->m_data = m->m_ext.ext_buf;
4053 
4054 		/* Use bus_dma machinery to setup the memory mapping  */
4055 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4056 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
4057 		if (error != 0) {
4058 			printf("Refresh mbufs: hdr dmamap load"
4059 			    " failure - %d\n", error);
4060 			m_free(m);
4061 			rxbuf->m_head = NULL;
4062 			goto update;
4063 		}
4064 		rxbuf->m_head = m;
4065 		bus_dmamap_sync(rxr->rxtag,
4066 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4067 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
4068 		cleaned = TRUE;
4069 
4070 		i = j; /* Next is precalulated for us */
4071 		rxr->next_to_refresh = i;
4072 		/* Calculate next controlling index */
4073 		if (++j == adapter->num_rx_desc)
4074 			j = 0;
4075 	}
4076 update:
4077 	/*
4078 	** Update the tail pointer only if,
4079 	** and as far as we have refreshed.
4080 	*/
4081 	if (cleaned)
4082 		E1000_WRITE_REG(&adapter->hw,
4083 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4084 
4085 	return;
4086 }
4087 
4088 
4089 /*********************************************************************
4090  *
4091  *  Allocate memory for rx_buffer structures. Since we use one
4092  *  rx_buffer per received packet, the maximum number of rx_buffer's
4093  *  that we'll need is equal to the number of receive descriptors
4094  *  that we've allocated.
4095  *
4096  **********************************************************************/
4097 static int
4098 em_allocate_receive_buffers(struct rx_ring *rxr)
4099 {
4100 	struct adapter		*adapter = rxr->adapter;
4101 	device_t		dev = adapter->dev;
4102 	struct em_buffer	*rxbuf;
4103 	int			error;
4104 
4105 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4106 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4107 	if (rxr->rx_buffers == NULL) {
4108 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4109 		return (ENOMEM);
4110 	}
4111 
4112 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4113 				1, 0,			/* alignment, bounds */
4114 				BUS_SPACE_MAXADDR,	/* lowaddr */
4115 				BUS_SPACE_MAXADDR,	/* highaddr */
4116 				NULL, NULL,		/* filter, filterarg */
4117 				MJUM9BYTES,		/* maxsize */
4118 				1,			/* nsegments */
4119 				MJUM9BYTES,		/* maxsegsize */
4120 				0,			/* flags */
4121 				NULL,			/* lockfunc */
4122 				NULL,			/* lockarg */
4123 				&rxr->rxtag);
4124 	if (error) {
4125 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4126 		    __func__, error);
4127 		goto fail;
4128 	}
4129 
4130 	rxbuf = rxr->rx_buffers;
4131 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4132 		rxbuf = &rxr->rx_buffers[i];
4133 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4134 		if (error) {
4135 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4136 			    __func__, error);
4137 			goto fail;
4138 		}
4139 	}
4140 
4141 	return (0);
4142 
4143 fail:
4144 	em_free_receive_structures(adapter);
4145 	return (error);
4146 }
4147 
4148 
4149 /*********************************************************************
4150  *
4151  *  Initialize a receive ring and its buffers.
4152  *
4153  **********************************************************************/
4154 static int
4155 em_setup_receive_ring(struct rx_ring *rxr)
4156 {
4157 	struct	adapter 	*adapter = rxr->adapter;
4158 	struct em_buffer	*rxbuf;
4159 	bus_dma_segment_t	seg[1];
4160 	int			rsize, nsegs, error = 0;
4161 #ifdef DEV_NETMAP
4162 	struct netmap_slot *slot;
4163 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4164 #endif
4165 
4166 
4167 	/* Clear the ring contents */
4168 	EM_RX_LOCK(rxr);
4169 	rsize = roundup2(adapter->num_rx_desc *
4170 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4171 	bzero((void *)rxr->rx_base, rsize);
4172 #ifdef DEV_NETMAP
4173 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4174 #endif
4175 
4176 	/*
4177 	** Free current RX buffer structs and their mbufs
4178 	*/
4179 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4180 		rxbuf = &rxr->rx_buffers[i];
4181 		if (rxbuf->m_head != NULL) {
4182 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4183 			    BUS_DMASYNC_POSTREAD);
4184 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4185 			m_freem(rxbuf->m_head);
4186 			rxbuf->m_head = NULL; /* mark as freed */
4187 		}
4188 	}
4189 
4190 	/* Now replenish the mbufs */
4191         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4192 		rxbuf = &rxr->rx_buffers[j];
4193 #ifdef DEV_NETMAP
4194 		if (slot) {
4195 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4196 			uint64_t paddr;
4197 			void *addr;
4198 
4199 			addr = PNMB(na, slot + si, &paddr);
4200 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4201 			/* Update descriptor */
4202 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4203 			continue;
4204 		}
4205 #endif /* DEV_NETMAP */
4206 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4207 		    M_PKTHDR, adapter->rx_mbuf_sz);
4208 		if (rxbuf->m_head == NULL) {
4209 			error = ENOBUFS;
4210 			goto fail;
4211 		}
4212 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4213 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4214 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4215 
4216 		/* Get the memory mapping */
4217 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4218 		    rxbuf->map, rxbuf->m_head, seg,
4219 		    &nsegs, BUS_DMA_NOWAIT);
4220 		if (error != 0) {
4221 			m_freem(rxbuf->m_head);
4222 			rxbuf->m_head = NULL;
4223 			goto fail;
4224 		}
4225 		bus_dmamap_sync(rxr->rxtag,
4226 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4227 
4228 		/* Update descriptor */
4229 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4230 	}
4231 	rxr->next_to_check = 0;
4232 	rxr->next_to_refresh = 0;
4233 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4234 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4235 
4236 fail:
4237 	EM_RX_UNLOCK(rxr);
4238 	return (error);
4239 }
4240 
4241 /*********************************************************************
4242  *
4243  *  Initialize all receive rings.
4244  *
4245  **********************************************************************/
4246 static int
4247 em_setup_receive_structures(struct adapter *adapter)
4248 {
4249 	struct rx_ring *rxr = adapter->rx_rings;
4250 	int q;
4251 
4252 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4253 		if (em_setup_receive_ring(rxr))
4254 			goto fail;
4255 
4256 	return (0);
4257 fail:
4258 	/*
4259 	 * Free RX buffers allocated so far, we will only handle
4260 	 * the rings that completed, the failing case will have
4261 	 * cleaned up for itself. 'q' failed, so its the terminus.
4262 	 */
4263 	for (int i = 0; i < q; ++i) {
4264 		rxr = &adapter->rx_rings[i];
4265 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4266 			struct em_buffer *rxbuf;
4267 			rxbuf = &rxr->rx_buffers[n];
4268 			if (rxbuf->m_head != NULL) {
4269 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4270 			  	  BUS_DMASYNC_POSTREAD);
4271 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4272 				m_freem(rxbuf->m_head);
4273 				rxbuf->m_head = NULL;
4274 			}
4275 		}
4276 		rxr->next_to_check = 0;
4277 		rxr->next_to_refresh = 0;
4278 	}
4279 
4280 	return (ENOBUFS);
4281 }
4282 
4283 /*********************************************************************
4284  *
4285  *  Free all receive rings.
4286  *
4287  **********************************************************************/
4288 static void
4289 em_free_receive_structures(struct adapter *adapter)
4290 {
4291 	struct rx_ring *rxr = adapter->rx_rings;
4292 
4293 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4294 		em_free_receive_buffers(rxr);
4295 		/* Free the ring memory as well */
4296 		em_dma_free(adapter, &rxr->rxdma);
4297 		EM_RX_LOCK_DESTROY(rxr);
4298 	}
4299 
4300 	free(adapter->rx_rings, M_DEVBUF);
4301 }
4302 
4303 
4304 /*********************************************************************
4305  *
4306  *  Free receive ring data structures
4307  *
4308  **********************************************************************/
4309 static void
4310 em_free_receive_buffers(struct rx_ring *rxr)
4311 {
4312 	struct adapter		*adapter = rxr->adapter;
4313 	struct em_buffer	*rxbuf = NULL;
4314 
4315 	INIT_DEBUGOUT("free_receive_buffers: begin");
4316 
4317 	if (rxr->rx_buffers != NULL) {
4318 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4319 			rxbuf = &rxr->rx_buffers[i];
4320 			if (rxbuf->map != NULL) {
4321 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4322 				    BUS_DMASYNC_POSTREAD);
4323 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4324 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4325 			}
4326 			if (rxbuf->m_head != NULL) {
4327 				m_freem(rxbuf->m_head);
4328 				rxbuf->m_head = NULL;
4329 			}
4330 		}
4331 		free(rxr->rx_buffers, M_DEVBUF);
4332 		rxr->rx_buffers = NULL;
4333 		rxr->next_to_check = 0;
4334 		rxr->next_to_refresh = 0;
4335 	}
4336 
4337 	if (rxr->rxtag != NULL) {
4338 		bus_dma_tag_destroy(rxr->rxtag);
4339 		rxr->rxtag = NULL;
4340 	}
4341 
4342 	return;
4343 }
4344 
4345 
4346 /*********************************************************************
4347  *
4348  *  Enable receive unit.
4349  *
4350  **********************************************************************/
4351 
4352 static void
4353 em_initialize_receive_unit(struct adapter *adapter)
4354 {
4355 	struct rx_ring	*rxr = adapter->rx_rings;
4356 	if_t ifp = adapter->ifp;
4357 	struct e1000_hw	*hw = &adapter->hw;
4358 	u64	bus_addr;
4359 	u32	rctl, rxcsum;
4360 
4361 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4362 
4363 	/*
4364 	 * Make sure receives are disabled while setting
4365 	 * up the descriptor ring
4366 	 */
4367 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4368 	/* Do not disable if ever enabled on this hardware */
4369 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4370 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4371 
4372 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4373 	    adapter->rx_abs_int_delay.value);
4374 
4375 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4376 	    adapter->rx_int_delay.value);
4377 	/*
4378 	 * Set the interrupt throttling rate. Value is calculated
4379 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4380 	 */
4381 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4382 
4383 	/*
4384 	** When using MSIX interrupts we need to throttle
4385 	** using the EITR register (82574 only)
4386 	*/
4387 	if (hw->mac.type == e1000_82574) {
4388 		u32 rfctl;
4389 		for (int i = 0; i < 4; i++)
4390 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4391 			    DEFAULT_ITR);
4392 		/* Disable accelerated acknowledge */
4393 		rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4394 		rfctl |= E1000_RFCTL_ACK_DIS;
4395 		E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4396 	}
4397 
4398 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4399 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4400 #ifdef EM_MULTIQUEUE
4401 		rxcsum |= E1000_RXCSUM_TUOFL |
4402 			  E1000_RXCSUM_IPOFL |
4403 			  E1000_RXCSUM_PCSD;
4404 #else
4405 		rxcsum |= E1000_RXCSUM_TUOFL;
4406 #endif
4407 	} else
4408 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4409 
4410 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4411 
4412 #ifdef EM_MULTIQUEUE
4413 	if (adapter->num_queues > 1) {
4414 		uint32_t rss_key[10];
4415 		uint32_t reta;
4416 		int i;
4417 
4418 		/*
4419 		* Configure RSS key
4420 		*/
4421 		arc4rand(rss_key, sizeof(rss_key), 0);
4422 		for (i = 0; i < 10; ++i)
4423 			E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]);
4424 
4425 		/*
4426 		* Configure RSS redirect table in following fashion:
4427 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4428 		*/
4429 		reta = 0;
4430 		for (i = 0; i < 4; ++i) {
4431 			uint32_t q;
4432 			q = (i % adapter->num_queues) << 7;
4433 			reta |= q << (8 * i);
4434 		}
4435 		for (i = 0; i < 32; ++i)
4436 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4437 
4438 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4439 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4440 				E1000_MRQC_RSS_FIELD_IPV4 |
4441 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4442 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4443 				E1000_MRQC_RSS_FIELD_IPV6 |
4444 				E1000_MRQC_RSS_FIELD_IPV6_TCP);
4445 	}
4446 #endif
4447 	/*
4448 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4449 	** long latencies are observed, like Lenovo X60. This
4450 	** change eliminates the problem, but since having positive
4451 	** values in RDTR is a known source of problems on other
4452 	** platforms another solution is being sought.
4453 	*/
4454 	if (hw->mac.type == e1000_82573)
4455 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4456 
4457 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4458 		/* Setup the Base and Length of the Rx Descriptor Ring */
4459 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4460 
4461 		bus_addr = rxr->rxdma.dma_paddr;
4462 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4463 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4464 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4465 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4466 		/* Setup the Head and Tail Descriptor Pointers */
4467 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4468 #ifdef DEV_NETMAP
4469 		/*
4470 		 * an init() while a netmap client is active must
4471 		 * preserve the rx buffers passed to userspace.
4472 		 */
4473 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4474 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4475 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4476 		}
4477 #endif /* DEV_NETMAP */
4478 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4479 	}
4480 
4481 	/*
4482 	 * Set PTHRESH for improved jumbo performance
4483 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4484 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4485 	 * Only write to RXDCTL(1) if there is a need for different
4486 	 * settings.
4487 	 */
4488 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4489 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4490 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4491 	    (if_getmtu(ifp) > ETHERMTU)) {
4492 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4493 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4494 	} else if ((adapter->hw.mac.type == e1000_82574) &&
4495 		  (if_getmtu(ifp) > ETHERMTU)) {
4496 		for (int i = 0; i < adapter->num_queues; i++) {
4497 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4498 
4499                 	rxdctl |= 0x20; /* PTHRESH */
4500                 	rxdctl |= 4 << 8; /* HTHRESH */
4501                 	rxdctl |= 4 << 16;/* WTHRESH */
4502 			rxdctl |= 1 << 24; /* Switch to granularity */
4503 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4504 		}
4505 	}
4506 
4507 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4508 		if (if_getmtu(ifp) > ETHERMTU)
4509 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4510 		else
4511 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4512 	}
4513 
4514 	/* Setup the Receive Control Register */
4515 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4516 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4517 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4518 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4519 
4520         /* Strip the CRC */
4521         rctl |= E1000_RCTL_SECRC;
4522 
4523         /* Make sure VLAN Filters are off */
4524         rctl &= ~E1000_RCTL_VFE;
4525 	rctl &= ~E1000_RCTL_SBP;
4526 
4527 	if (adapter->rx_mbuf_sz == MCLBYTES)
4528 		rctl |= E1000_RCTL_SZ_2048;
4529 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4530 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4531 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4532 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4533 
4534 	if (if_getmtu(ifp) > ETHERMTU)
4535 		rctl |= E1000_RCTL_LPE;
4536 	else
4537 		rctl &= ~E1000_RCTL_LPE;
4538 
4539 	/* Write out the settings */
4540 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4541 
4542 	return;
4543 }
4544 
4545 
4546 /*********************************************************************
4547  *
4548  *  This routine executes in interrupt context. It replenishes
4549  *  the mbufs in the descriptor and sends data which has been
4550  *  dma'ed into host memory to upper layer.
4551  *
4552  *  We loop at most count times if count is > 0, or until done if
4553  *  count < 0.
4554  *
4555  *  For polling we also now return the number of cleaned packets
4556  *********************************************************************/
4557 static bool
4558 em_rxeof(struct rx_ring *rxr, int count, int *done)
4559 {
4560 	struct adapter		*adapter = rxr->adapter;
4561 	if_t ifp = adapter->ifp;
4562 	struct mbuf		*mp, *sendmp;
4563 	u8			status = 0;
4564 	u16 			len;
4565 	int			i, processed, rxdone = 0;
4566 	bool			eop;
4567 	struct e1000_rx_desc	*cur;
4568 
4569 	EM_RX_LOCK(rxr);
4570 
4571 	/* Sync the ring */
4572 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4573 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4574 
4575 
4576 #ifdef DEV_NETMAP
4577 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4578 		EM_RX_UNLOCK(rxr);
4579 		return (FALSE);
4580 	}
4581 #endif /* DEV_NETMAP */
4582 
4583 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4584 
4585 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4586 			break;
4587 
4588 		cur = &rxr->rx_base[i];
4589 		status = cur->status;
4590 		mp = sendmp = NULL;
4591 
4592 		if ((status & E1000_RXD_STAT_DD) == 0)
4593 			break;
4594 
4595 		len = le16toh(cur->length);
4596 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4597 
4598 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4599 		    (rxr->discard == TRUE)) {
4600 			adapter->dropped_pkts++;
4601 			++rxr->rx_discarded;
4602 			if (!eop) /* Catch subsequent segs */
4603 				rxr->discard = TRUE;
4604 			else
4605 				rxr->discard = FALSE;
4606 			em_rx_discard(rxr, i);
4607 			goto next_desc;
4608 		}
4609 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4610 
4611 		/* Assign correct length to the current fragment */
4612 		mp = rxr->rx_buffers[i].m_head;
4613 		mp->m_len = len;
4614 
4615 		/* Trigger for refresh */
4616 		rxr->rx_buffers[i].m_head = NULL;
4617 
4618 		/* First segment? */
4619 		if (rxr->fmp == NULL) {
4620 			mp->m_pkthdr.len = len;
4621 			rxr->fmp = rxr->lmp = mp;
4622 		} else {
4623 			/* Chain mbuf's together */
4624 			mp->m_flags &= ~M_PKTHDR;
4625 			rxr->lmp->m_next = mp;
4626 			rxr->lmp = mp;
4627 			rxr->fmp->m_pkthdr.len += len;
4628 		}
4629 
4630 		if (eop) {
4631 			--count;
4632 			sendmp = rxr->fmp;
4633 			if_setrcvif(sendmp, ifp);
4634 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4635 			em_receive_checksum(cur, sendmp);
4636 #ifndef __NO_STRICT_ALIGNMENT
4637 			if (adapter->hw.mac.max_frame_size >
4638 			    (MCLBYTES - ETHER_ALIGN) &&
4639 			    em_fixup_rx(rxr) != 0)
4640 				goto skip;
4641 #endif
4642 			if (status & E1000_RXD_STAT_VP) {
4643 				if_setvtag(sendmp,
4644 				    le16toh(cur->special));
4645 				sendmp->m_flags |= M_VLANTAG;
4646 			}
4647 #ifndef __NO_STRICT_ALIGNMENT
4648 skip:
4649 #endif
4650 			rxr->fmp = rxr->lmp = NULL;
4651 		}
4652 next_desc:
4653 		/* Sync the ring */
4654 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4655 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4656 
4657 		/* Zero out the receive descriptors status. */
4658 		cur->status = 0;
4659 		++rxdone;	/* cumulative for POLL */
4660 		++processed;
4661 
4662 		/* Advance our pointers to the next descriptor. */
4663 		if (++i == adapter->num_rx_desc)
4664 			i = 0;
4665 
4666 		/* Send to the stack */
4667 		if (sendmp != NULL) {
4668 			rxr->next_to_check = i;
4669 			EM_RX_UNLOCK(rxr);
4670 			if_input(ifp, sendmp);
4671 			EM_RX_LOCK(rxr);
4672 			i = rxr->next_to_check;
4673 		}
4674 
4675 		/* Only refresh mbufs every 8 descriptors */
4676 		if (processed == 8) {
4677 			em_refresh_mbufs(rxr, i);
4678 			processed = 0;
4679 		}
4680 	}
4681 
4682 	/* Catch any remaining refresh work */
4683 	if (e1000_rx_unrefreshed(rxr))
4684 		em_refresh_mbufs(rxr, i);
4685 
4686 	rxr->next_to_check = i;
4687 	if (done != NULL)
4688 		*done = rxdone;
4689 	EM_RX_UNLOCK(rxr);
4690 
4691 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4692 }
4693 
4694 static __inline void
4695 em_rx_discard(struct rx_ring *rxr, int i)
4696 {
4697 	struct em_buffer	*rbuf;
4698 
4699 	rbuf = &rxr->rx_buffers[i];
4700 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4701 
4702 	/* Free any previous pieces */
4703 	if (rxr->fmp != NULL) {
4704 		rxr->fmp->m_flags |= M_PKTHDR;
4705 		m_freem(rxr->fmp);
4706 		rxr->fmp = NULL;
4707 		rxr->lmp = NULL;
4708 	}
4709 	/*
4710 	** Free buffer and allow em_refresh_mbufs()
4711 	** to clean up and recharge buffer.
4712 	*/
4713 	if (rbuf->m_head) {
4714 		m_free(rbuf->m_head);
4715 		rbuf->m_head = NULL;
4716 	}
4717 	return;
4718 }
4719 
4720 #ifndef __NO_STRICT_ALIGNMENT
4721 /*
4722  * When jumbo frames are enabled we should realign entire payload on
4723  * architecures with strict alignment. This is serious design mistake of 8254x
4724  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4725  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4726  * payload. On architecures without strict alignment restrictions 8254x still
4727  * performs unaligned memory access which would reduce the performance too.
4728  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4729  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4730  * existing mbuf chain.
4731  *
4732  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4733  * not used at all on architectures with strict alignment.
4734  */
4735 static int
4736 em_fixup_rx(struct rx_ring *rxr)
4737 {
4738 	struct adapter *adapter = rxr->adapter;
4739 	struct mbuf *m, *n;
4740 	int error;
4741 
4742 	error = 0;
4743 	m = rxr->fmp;
4744 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4745 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4746 		m->m_data += ETHER_HDR_LEN;
4747 	} else {
4748 		MGETHDR(n, M_NOWAIT, MT_DATA);
4749 		if (n != NULL) {
4750 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4751 			m->m_data += ETHER_HDR_LEN;
4752 			m->m_len -= ETHER_HDR_LEN;
4753 			n->m_len = ETHER_HDR_LEN;
4754 			M_MOVE_PKTHDR(n, m);
4755 			n->m_next = m;
4756 			rxr->fmp = n;
4757 		} else {
4758 			adapter->dropped_pkts++;
4759 			m_freem(rxr->fmp);
4760 			rxr->fmp = NULL;
4761 			error = ENOMEM;
4762 		}
4763 	}
4764 
4765 	return (error);
4766 }
4767 #endif
4768 
4769 /*********************************************************************
4770  *
4771  *  Verify that the hardware indicated that the checksum is valid.
4772  *  Inform the stack about the status of checksum so that stack
4773  *  doesn't spend time verifying the checksum.
4774  *
4775  *********************************************************************/
4776 static void
4777 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4778 {
4779 	mp->m_pkthdr.csum_flags = 0;
4780 
4781 	/* Ignore Checksum bit is set */
4782 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4783 		return;
4784 
4785 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4786 		return;
4787 
4788 	/* IP Checksum Good? */
4789 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4790 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4791 
4792 	/* TCP or UDP checksum */
4793 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4794 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4795 		mp->m_pkthdr.csum_data = htons(0xffff);
4796 	}
4797 }
4798 
4799 /*
4800  * This routine is run via an vlan
4801  * config EVENT
4802  */
4803 static void
4804 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4805 {
4806 	struct adapter	*adapter = if_getsoftc(ifp);
4807 	u32		index, bit;
4808 
4809 	if ((void*)adapter !=  arg)   /* Not our event */
4810 		return;
4811 
4812 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4813                 return;
4814 
4815 	EM_CORE_LOCK(adapter);
4816 	index = (vtag >> 5) & 0x7F;
4817 	bit = vtag & 0x1F;
4818 	adapter->shadow_vfta[index] |= (1 << bit);
4819 	++adapter->num_vlans;
4820 	/* Re-init to load the changes */
4821 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4822 		em_init_locked(adapter);
4823 	EM_CORE_UNLOCK(adapter);
4824 }
4825 
4826 /*
4827  * This routine is run via an vlan
4828  * unconfig EVENT
4829  */
4830 static void
4831 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4832 {
4833 	struct adapter	*adapter = if_getsoftc(ifp);
4834 	u32		index, bit;
4835 
4836 	if (adapter != arg)
4837 		return;
4838 
4839 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4840                 return;
4841 
4842 	EM_CORE_LOCK(adapter);
4843 	index = (vtag >> 5) & 0x7F;
4844 	bit = vtag & 0x1F;
4845 	adapter->shadow_vfta[index] &= ~(1 << bit);
4846 	--adapter->num_vlans;
4847 	/* Re-init to load the changes */
4848 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4849 		em_init_locked(adapter);
4850 	EM_CORE_UNLOCK(adapter);
4851 }
4852 
4853 static void
4854 em_setup_vlan_hw_support(struct adapter *adapter)
4855 {
4856 	struct e1000_hw *hw = &adapter->hw;
4857 	u32             reg;
4858 
4859 	/*
4860 	** We get here thru init_locked, meaning
4861 	** a soft reset, this has already cleared
4862 	** the VFTA and other state, so if there
4863 	** have been no vlan's registered do nothing.
4864 	*/
4865 	if (adapter->num_vlans == 0)
4866                 return;
4867 
4868 	/*
4869 	** A soft reset zero's out the VFTA, so
4870 	** we need to repopulate it now.
4871 	*/
4872 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4873                 if (adapter->shadow_vfta[i] != 0)
4874 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4875                             i, adapter->shadow_vfta[i]);
4876 
4877 	reg = E1000_READ_REG(hw, E1000_CTRL);
4878 	reg |= E1000_CTRL_VME;
4879 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4880 
4881 	/* Enable the Filter Table */
4882 	reg = E1000_READ_REG(hw, E1000_RCTL);
4883 	reg &= ~E1000_RCTL_CFIEN;
4884 	reg |= E1000_RCTL_VFE;
4885 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4886 }
4887 
4888 static void
4889 em_enable_intr(struct adapter *adapter)
4890 {
4891 	struct e1000_hw *hw = &adapter->hw;
4892 	u32 ims_mask = IMS_ENABLE_MASK;
4893 
4894 	if (hw->mac.type == e1000_82574) {
4895 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4896 		ims_mask |= EM_MSIX_MASK;
4897 	}
4898 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4899 }
4900 
4901 static void
4902 em_disable_intr(struct adapter *adapter)
4903 {
4904 	struct e1000_hw *hw = &adapter->hw;
4905 
4906 	if (hw->mac.type == e1000_82574)
4907 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4908 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4909 }
4910 
4911 /*
4912  * Bit of a misnomer, what this really means is
4913  * to enable OS management of the system... aka
4914  * to disable special hardware management features
4915  */
4916 static void
4917 em_init_manageability(struct adapter *adapter)
4918 {
4919 	/* A shared code workaround */
4920 #define E1000_82542_MANC2H E1000_MANC2H
4921 	if (adapter->has_manage) {
4922 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4923 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4924 
4925 		/* disable hardware interception of ARP */
4926 		manc &= ~(E1000_MANC_ARP_EN);
4927 
4928                 /* enable receiving management packets to the host */
4929 		manc |= E1000_MANC_EN_MNG2HOST;
4930 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4931 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4932 		manc2h |= E1000_MNG2HOST_PORT_623;
4933 		manc2h |= E1000_MNG2HOST_PORT_664;
4934 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4935 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4936 	}
4937 }
4938 
4939 /*
4940  * Give control back to hardware management
4941  * controller if there is one.
4942  */
4943 static void
4944 em_release_manageability(struct adapter *adapter)
4945 {
4946 	if (adapter->has_manage) {
4947 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4948 
4949 		/* re-enable hardware interception of ARP */
4950 		manc |= E1000_MANC_ARP_EN;
4951 		manc &= ~E1000_MANC_EN_MNG2HOST;
4952 
4953 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4954 	}
4955 }
4956 
4957 /*
4958  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4959  * For ASF and Pass Through versions of f/w this means
4960  * that the driver is loaded. For AMT version type f/w
4961  * this means that the network i/f is open.
4962  */
4963 static void
4964 em_get_hw_control(struct adapter *adapter)
4965 {
4966 	u32 ctrl_ext, swsm;
4967 
4968 	if (adapter->hw.mac.type == e1000_82573) {
4969 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4970 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4971 		    swsm | E1000_SWSM_DRV_LOAD);
4972 		return;
4973 	}
4974 	/* else */
4975 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4976 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4977 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4978 	return;
4979 }
4980 
4981 /*
4982  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4983  * For ASF and Pass Through versions of f/w this means that
4984  * the driver is no longer loaded. For AMT versions of the
4985  * f/w this means that the network i/f is closed.
4986  */
4987 static void
4988 em_release_hw_control(struct adapter *adapter)
4989 {
4990 	u32 ctrl_ext, swsm;
4991 
4992 	if (!adapter->has_manage)
4993 		return;
4994 
4995 	if (adapter->hw.mac.type == e1000_82573) {
4996 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4997 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4998 		    swsm & ~E1000_SWSM_DRV_LOAD);
4999 		return;
5000 	}
5001 	/* else */
5002 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5003 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5004 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5005 	return;
5006 }
5007 
5008 static int
5009 em_is_valid_ether_addr(u8 *addr)
5010 {
5011 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5012 
5013 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5014 		return (FALSE);
5015 	}
5016 
5017 	return (TRUE);
5018 }
5019 
5020 /*
5021 ** Parse the interface capabilities with regard
5022 ** to both system management and wake-on-lan for
5023 ** later use.
5024 */
5025 static void
5026 em_get_wakeup(device_t dev)
5027 {
5028 	struct adapter	*adapter = device_get_softc(dev);
5029 	u16		eeprom_data = 0, device_id, apme_mask;
5030 
5031 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5032 	apme_mask = EM_EEPROM_APME;
5033 
5034 	switch (adapter->hw.mac.type) {
5035 	case e1000_82573:
5036 	case e1000_82583:
5037 		adapter->has_amt = TRUE;
5038 		/* Falls thru */
5039 	case e1000_82571:
5040 	case e1000_82572:
5041 	case e1000_80003es2lan:
5042 		if (adapter->hw.bus.func == 1) {
5043 			e1000_read_nvm(&adapter->hw,
5044 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5045 			break;
5046 		} else
5047 			e1000_read_nvm(&adapter->hw,
5048 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5049 		break;
5050 	case e1000_ich8lan:
5051 	case e1000_ich9lan:
5052 	case e1000_ich10lan:
5053 	case e1000_pchlan:
5054 	case e1000_pch2lan:
5055 		apme_mask = E1000_WUC_APME;
5056 		adapter->has_amt = TRUE;
5057 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5058 		break;
5059 	default:
5060 		e1000_read_nvm(&adapter->hw,
5061 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5062 		break;
5063 	}
5064 	if (eeprom_data & apme_mask)
5065 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5066 	/*
5067          * We have the eeprom settings, now apply the special cases
5068          * where the eeprom may be wrong or the board won't support
5069          * wake on lan on a particular port
5070 	 */
5071 	device_id = pci_get_device(dev);
5072         switch (device_id) {
5073 	case E1000_DEV_ID_82571EB_FIBER:
5074 		/* Wake events only supported on port A for dual fiber
5075 		 * regardless of eeprom setting */
5076 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5077 		    E1000_STATUS_FUNC_1)
5078 			adapter->wol = 0;
5079 		break;
5080 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5081 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5082 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5083                 /* if quad port adapter, disable WoL on all but port A */
5084 		if (global_quad_port_a != 0)
5085 			adapter->wol = 0;
5086 		/* Reset for multiple quad port adapters */
5087 		if (++global_quad_port_a == 4)
5088 			global_quad_port_a = 0;
5089                 break;
5090 	}
5091 	return;
5092 }
5093 
5094 
5095 /*
5096  * Enable PCI Wake On Lan capability
5097  */
5098 static void
5099 em_enable_wakeup(device_t dev)
5100 {
5101 	struct adapter	*adapter = device_get_softc(dev);
5102 	if_t ifp = adapter->ifp;
5103 	u32		pmc, ctrl, ctrl_ext, rctl;
5104 	u16     	status;
5105 
5106 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5107 		return;
5108 
5109 	/* Advertise the wakeup capability */
5110 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5111 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5112 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5113 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5114 
5115 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5116 	    (adapter->hw.mac.type == e1000_pchlan) ||
5117 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5118 	    (adapter->hw.mac.type == e1000_ich10lan))
5119 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5120 
5121 	/* Keep the laser running on Fiber adapters */
5122 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5123 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5124 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5125 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5126 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5127 	}
5128 
5129 	/*
5130 	** Determine type of Wakeup: note that wol
5131 	** is set with all bits on by default.
5132 	*/
5133 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5134 		adapter->wol &= ~E1000_WUFC_MAG;
5135 
5136 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5137 		adapter->wol &= ~E1000_WUFC_MC;
5138 	else {
5139 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5140 		rctl |= E1000_RCTL_MPE;
5141 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5142 	}
5143 
5144 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5145 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5146 		if (em_enable_phy_wakeup(adapter))
5147 			return;
5148 	} else {
5149 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5150 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5151 	}
5152 
5153 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5154 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5155 
5156         /* Request PME */
5157         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5158 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5159 	if (if_getcapenable(ifp) & IFCAP_WOL)
5160 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5161         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5162 
5163 	return;
5164 }
5165 
5166 /*
5167 ** WOL in the newer chipset interfaces (pchlan)
5168 ** require thing to be copied into the phy
5169 */
5170 static int
5171 em_enable_phy_wakeup(struct adapter *adapter)
5172 {
5173 	struct e1000_hw *hw = &adapter->hw;
5174 	u32 mreg, ret = 0;
5175 	u16 preg;
5176 
5177 	/* copy MAC RARs to PHY RARs */
5178 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5179 
5180 	/* copy MAC MTA to PHY MTA */
5181 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5182 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5183 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5184 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5185 		    (u16)((mreg >> 16) & 0xFFFF));
5186 	}
5187 
5188 	/* configure PHY Rx Control register */
5189 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5190 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5191 	if (mreg & E1000_RCTL_UPE)
5192 		preg |= BM_RCTL_UPE;
5193 	if (mreg & E1000_RCTL_MPE)
5194 		preg |= BM_RCTL_MPE;
5195 	preg &= ~(BM_RCTL_MO_MASK);
5196 	if (mreg & E1000_RCTL_MO_3)
5197 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5198 				<< BM_RCTL_MO_SHIFT);
5199 	if (mreg & E1000_RCTL_BAM)
5200 		preg |= BM_RCTL_BAM;
5201 	if (mreg & E1000_RCTL_PMCF)
5202 		preg |= BM_RCTL_PMCF;
5203 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5204 	if (mreg & E1000_CTRL_RFCE)
5205 		preg |= BM_RCTL_RFCE;
5206 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5207 
5208 	/* enable PHY wakeup in MAC register */
5209 	E1000_WRITE_REG(hw, E1000_WUC,
5210 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5211 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5212 
5213 	/* configure and enable PHY wakeup in PHY registers */
5214 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5215 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5216 
5217 	/* activate PHY wakeup */
5218 	ret = hw->phy.ops.acquire(hw);
5219 	if (ret) {
5220 		printf("Could not acquire PHY\n");
5221 		return ret;
5222 	}
5223 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5224 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5225 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5226 	if (ret) {
5227 		printf("Could not read PHY page 769\n");
5228 		goto out;
5229 	}
5230 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5231 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5232 	if (ret)
5233 		printf("Could not set PHY Host Wakeup bit\n");
5234 out:
5235 	hw->phy.ops.release(hw);
5236 
5237 	return ret;
5238 }
5239 
5240 static void
5241 em_led_func(void *arg, int onoff)
5242 {
5243 	struct adapter	*adapter = arg;
5244 
5245 	EM_CORE_LOCK(adapter);
5246 	if (onoff) {
5247 		e1000_setup_led(&adapter->hw);
5248 		e1000_led_on(&adapter->hw);
5249 	} else {
5250 		e1000_led_off(&adapter->hw);
5251 		e1000_cleanup_led(&adapter->hw);
5252 	}
5253 	EM_CORE_UNLOCK(adapter);
5254 }
5255 
5256 /*
5257 ** Disable the L0S and L1 LINK states
5258 */
5259 static void
5260 em_disable_aspm(struct adapter *adapter)
5261 {
5262 	int		base, reg;
5263 	u16		link_cap,link_ctrl;
5264 	device_t	dev = adapter->dev;
5265 
5266 	switch (adapter->hw.mac.type) {
5267 		case e1000_82573:
5268 		case e1000_82574:
5269 		case e1000_82583:
5270 			break;
5271 		default:
5272 			return;
5273 	}
5274 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5275 		return;
5276 	reg = base + PCIER_LINK_CAP;
5277 	link_cap = pci_read_config(dev, reg, 2);
5278 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5279 		return;
5280 	reg = base + PCIER_LINK_CTL;
5281 	link_ctrl = pci_read_config(dev, reg, 2);
5282 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5283 	pci_write_config(dev, reg, link_ctrl, 2);
5284 	return;
5285 }
5286 
5287 /**********************************************************************
5288  *
5289  *  Update the board statistics counters.
5290  *
5291  **********************************************************************/
5292 static void
5293 em_update_stats_counters(struct adapter *adapter)
5294 {
5295 
5296 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5297 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5298 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5299 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5300 	}
5301 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5302 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5303 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5304 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5305 
5306 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5307 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5308 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5309 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5310 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5311 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5312 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5313 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5314 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5315 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5316 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5317 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5318 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5319 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5320 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5321 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5322 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5323 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5324 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5325 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5326 
5327 	/* For the 64-bit byte counters the low dword must be read first. */
5328 	/* Both registers clear on the read of the high dword */
5329 
5330 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5331 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5332 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5333 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5334 
5335 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5336 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5337 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5338 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5339 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5340 
5341 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5342 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5343 
5344 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5345 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5346 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5347 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5348 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5349 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5350 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5351 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5352 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5353 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5354 
5355 	/* Interrupt Counts */
5356 
5357 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5358 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5359 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5360 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5361 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5362 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5363 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5364 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5365 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5366 
5367 	if (adapter->hw.mac.type >= e1000_82543) {
5368 		adapter->stats.algnerrc +=
5369 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5370 		adapter->stats.rxerrc +=
5371 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5372 		adapter->stats.tncrs +=
5373 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5374 		adapter->stats.cexterr +=
5375 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5376 		adapter->stats.tsctc +=
5377 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5378 		adapter->stats.tsctfc +=
5379 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5380 	}
5381 }
5382 
5383 static uint64_t
5384 em_get_counter(if_t ifp, ift_counter cnt)
5385 {
5386 	struct adapter *adapter;
5387 
5388 	adapter = if_getsoftc(ifp);
5389 
5390 	switch (cnt) {
5391 	case IFCOUNTER_COLLISIONS:
5392 		return (adapter->stats.colc);
5393 	case IFCOUNTER_IERRORS:
5394 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5395 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5396 		    adapter->stats.ruc + adapter->stats.roc +
5397 		    adapter->stats.mpc + adapter->stats.cexterr);
5398 	case IFCOUNTER_OERRORS:
5399 		return (adapter->stats.ecol + adapter->stats.latecol +
5400 		    adapter->watchdog_events);
5401 	default:
5402 		return (if_get_counter_default(ifp, cnt));
5403 	}
5404 }
5405 
5406 /* Export a single 32-bit register via a read-only sysctl. */
5407 static int
5408 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5409 {
5410 	struct adapter *adapter;
5411 	u_int val;
5412 
5413 	adapter = oidp->oid_arg1;
5414 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5415 	return (sysctl_handle_int(oidp, &val, 0, req));
5416 }
5417 
5418 /*
5419  * Add sysctl variables, one per statistic, to the system.
5420  */
5421 static void
5422 em_add_hw_stats(struct adapter *adapter)
5423 {
5424 	device_t dev = adapter->dev;
5425 
5426 	struct tx_ring *txr = adapter->tx_rings;
5427 	struct rx_ring *rxr = adapter->rx_rings;
5428 
5429 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5430 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5431 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5432 	struct e1000_hw_stats *stats = &adapter->stats;
5433 
5434 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5435 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5436 
5437 #define QUEUE_NAME_LEN 32
5438 	char namebuf[QUEUE_NAME_LEN];
5439 
5440 	/* Driver Statistics */
5441 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5442 			CTLFLAG_RD, &adapter->link_irq,
5443 			"Link MSIX IRQ Handled");
5444 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5445 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5446 			 "Std mbuf failed");
5447 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5448 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5449 			 "Std mbuf cluster failed");
5450 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5451 			CTLFLAG_RD, &adapter->dropped_pkts,
5452 			"Driver dropped packets");
5453 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5454 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5455 			"Driver tx dma failure in xmit");
5456 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5457 			CTLFLAG_RD, &adapter->rx_overruns,
5458 			"RX overruns");
5459 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5460 			CTLFLAG_RD, &adapter->watchdog_events,
5461 			"Watchdog timeouts");
5462 
5463 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5464 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5465 			em_sysctl_reg_handler, "IU",
5466 			"Device Control Register");
5467 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5468 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5469 			em_sysctl_reg_handler, "IU",
5470 			"Receiver Control Register");
5471 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5472 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5473 			"Flow Control High Watermark");
5474 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5475 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5476 			"Flow Control Low Watermark");
5477 
5478 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5479 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5480 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5481 					    CTLFLAG_RD, NULL, "TX Queue Name");
5482 		queue_list = SYSCTL_CHILDREN(queue_node);
5483 
5484 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5485 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5486 				E1000_TDH(txr->me),
5487 				em_sysctl_reg_handler, "IU",
5488  				"Transmit Descriptor Head");
5489 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5490 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5491 				E1000_TDT(txr->me),
5492 				em_sysctl_reg_handler, "IU",
5493  				"Transmit Descriptor Tail");
5494 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5495 				CTLFLAG_RD, &txr->tx_irq,
5496 				"Queue MSI-X Transmit Interrupts");
5497 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5498 				CTLFLAG_RD, &txr->no_desc_avail,
5499 				"Queue No Descriptor Available");
5500 
5501 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5502 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5503 					    CTLFLAG_RD, NULL, "RX Queue Name");
5504 		queue_list = SYSCTL_CHILDREN(queue_node);
5505 
5506 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5507 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5508 				E1000_RDH(rxr->me),
5509 				em_sysctl_reg_handler, "IU",
5510 				"Receive Descriptor Head");
5511 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5512 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5513 				E1000_RDT(rxr->me),
5514 				em_sysctl_reg_handler, "IU",
5515 				"Receive Descriptor Tail");
5516 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5517 				CTLFLAG_RD, &rxr->rx_irq,
5518 				"Queue MSI-X Receive Interrupts");
5519 	}
5520 
5521 	/* MAC stats get their own sub node */
5522 
5523 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5524 				    CTLFLAG_RD, NULL, "Statistics");
5525 	stat_list = SYSCTL_CHILDREN(stat_node);
5526 
5527 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5528 			CTLFLAG_RD, &stats->ecol,
5529 			"Excessive collisions");
5530 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5531 			CTLFLAG_RD, &stats->scc,
5532 			"Single collisions");
5533 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5534 			CTLFLAG_RD, &stats->mcc,
5535 			"Multiple collisions");
5536 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5537 			CTLFLAG_RD, &stats->latecol,
5538 			"Late collisions");
5539 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5540 			CTLFLAG_RD, &stats->colc,
5541 			"Collision Count");
5542 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5543 			CTLFLAG_RD, &adapter->stats.symerrs,
5544 			"Symbol Errors");
5545 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5546 			CTLFLAG_RD, &adapter->stats.sec,
5547 			"Sequence Errors");
5548 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5549 			CTLFLAG_RD, &adapter->stats.dc,
5550 			"Defer Count");
5551 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5552 			CTLFLAG_RD, &adapter->stats.mpc,
5553 			"Missed Packets");
5554 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5555 			CTLFLAG_RD, &adapter->stats.rnbc,
5556 			"Receive No Buffers");
5557 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5558 			CTLFLAG_RD, &adapter->stats.ruc,
5559 			"Receive Undersize");
5560 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5561 			CTLFLAG_RD, &adapter->stats.rfc,
5562 			"Fragmented Packets Received ");
5563 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5564 			CTLFLAG_RD, &adapter->stats.roc,
5565 			"Oversized Packets Received");
5566 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5567 			CTLFLAG_RD, &adapter->stats.rjc,
5568 			"Recevied Jabber");
5569 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5570 			CTLFLAG_RD, &adapter->stats.rxerrc,
5571 			"Receive Errors");
5572 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5573 			CTLFLAG_RD, &adapter->stats.crcerrs,
5574 			"CRC errors");
5575 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5576 			CTLFLAG_RD, &adapter->stats.algnerrc,
5577 			"Alignment Errors");
5578 	/* On 82575 these are collision counts */
5579 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5580 			CTLFLAG_RD, &adapter->stats.cexterr,
5581 			"Collision/Carrier extension errors");
5582 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5583 			CTLFLAG_RD, &adapter->stats.xonrxc,
5584 			"XON Received");
5585 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5586 			CTLFLAG_RD, &adapter->stats.xontxc,
5587 			"XON Transmitted");
5588 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5589 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5590 			"XOFF Received");
5591 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5592 			CTLFLAG_RD, &adapter->stats.xofftxc,
5593 			"XOFF Transmitted");
5594 
5595 	/* Packet Reception Stats */
5596 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5597 			CTLFLAG_RD, &adapter->stats.tpr,
5598 			"Total Packets Received ");
5599 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5600 			CTLFLAG_RD, &adapter->stats.gprc,
5601 			"Good Packets Received");
5602 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5603 			CTLFLAG_RD, &adapter->stats.bprc,
5604 			"Broadcast Packets Received");
5605 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5606 			CTLFLAG_RD, &adapter->stats.mprc,
5607 			"Multicast Packets Received");
5608 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5609 			CTLFLAG_RD, &adapter->stats.prc64,
5610 			"64 byte frames received ");
5611 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5612 			CTLFLAG_RD, &adapter->stats.prc127,
5613 			"65-127 byte frames received");
5614 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5615 			CTLFLAG_RD, &adapter->stats.prc255,
5616 			"128-255 byte frames received");
5617 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5618 			CTLFLAG_RD, &adapter->stats.prc511,
5619 			"256-511 byte frames received");
5620 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5621 			CTLFLAG_RD, &adapter->stats.prc1023,
5622 			"512-1023 byte frames received");
5623 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5624 			CTLFLAG_RD, &adapter->stats.prc1522,
5625 			"1023-1522 byte frames received");
5626  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5627  			CTLFLAG_RD, &adapter->stats.gorc,
5628  			"Good Octets Received");
5629 
5630 	/* Packet Transmission Stats */
5631  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5632  			CTLFLAG_RD, &adapter->stats.gotc,
5633  			"Good Octets Transmitted");
5634 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5635 			CTLFLAG_RD, &adapter->stats.tpt,
5636 			"Total Packets Transmitted");
5637 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5638 			CTLFLAG_RD, &adapter->stats.gptc,
5639 			"Good Packets Transmitted");
5640 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5641 			CTLFLAG_RD, &adapter->stats.bptc,
5642 			"Broadcast Packets Transmitted");
5643 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5644 			CTLFLAG_RD, &adapter->stats.mptc,
5645 			"Multicast Packets Transmitted");
5646 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5647 			CTLFLAG_RD, &adapter->stats.ptc64,
5648 			"64 byte frames transmitted ");
5649 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5650 			CTLFLAG_RD, &adapter->stats.ptc127,
5651 			"65-127 byte frames transmitted");
5652 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5653 			CTLFLAG_RD, &adapter->stats.ptc255,
5654 			"128-255 byte frames transmitted");
5655 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5656 			CTLFLAG_RD, &adapter->stats.ptc511,
5657 			"256-511 byte frames transmitted");
5658 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5659 			CTLFLAG_RD, &adapter->stats.ptc1023,
5660 			"512-1023 byte frames transmitted");
5661 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5662 			CTLFLAG_RD, &adapter->stats.ptc1522,
5663 			"1024-1522 byte frames transmitted");
5664 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5665 			CTLFLAG_RD, &adapter->stats.tsctc,
5666 			"TSO Contexts Transmitted");
5667 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5668 			CTLFLAG_RD, &adapter->stats.tsctfc,
5669 			"TSO Contexts Failed");
5670 
5671 
5672 	/* Interrupt Stats */
5673 
5674 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5675 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5676 	int_list = SYSCTL_CHILDREN(int_node);
5677 
5678 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5679 			CTLFLAG_RD, &adapter->stats.iac,
5680 			"Interrupt Assertion Count");
5681 
5682 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5683 			CTLFLAG_RD, &adapter->stats.icrxptc,
5684 			"Interrupt Cause Rx Pkt Timer Expire Count");
5685 
5686 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5687 			CTLFLAG_RD, &adapter->stats.icrxatc,
5688 			"Interrupt Cause Rx Abs Timer Expire Count");
5689 
5690 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5691 			CTLFLAG_RD, &adapter->stats.ictxptc,
5692 			"Interrupt Cause Tx Pkt Timer Expire Count");
5693 
5694 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5695 			CTLFLAG_RD, &adapter->stats.ictxatc,
5696 			"Interrupt Cause Tx Abs Timer Expire Count");
5697 
5698 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5699 			CTLFLAG_RD, &adapter->stats.ictxqec,
5700 			"Interrupt Cause Tx Queue Empty Count");
5701 
5702 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5703 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5704 			"Interrupt Cause Tx Queue Min Thresh Count");
5705 
5706 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5707 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5708 			"Interrupt Cause Rx Desc Min Thresh Count");
5709 
5710 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5711 			CTLFLAG_RD, &adapter->stats.icrxoc,
5712 			"Interrupt Cause Receiver Overrun Count");
5713 }
5714 
5715 /**********************************************************************
5716  *
5717  *  This routine provides a way to dump out the adapter eeprom,
5718  *  often a useful debug/service tool. This only dumps the first
5719  *  32 words, stuff that matters is in that extent.
5720  *
5721  **********************************************************************/
5722 static int
5723 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5724 {
5725 	struct adapter *adapter = (struct adapter *)arg1;
5726 	int error;
5727 	int result;
5728 
5729 	result = -1;
5730 	error = sysctl_handle_int(oidp, &result, 0, req);
5731 
5732 	if (error || !req->newptr)
5733 		return (error);
5734 
5735 	/*
5736 	 * This value will cause a hex dump of the
5737 	 * first 32 16-bit words of the EEPROM to
5738 	 * the screen.
5739 	 */
5740 	if (result == 1)
5741 		em_print_nvm_info(adapter);
5742 
5743 	return (error);
5744 }
5745 
5746 static void
5747 em_print_nvm_info(struct adapter *adapter)
5748 {
5749 	u16	eeprom_data;
5750 	int	i, j, row = 0;
5751 
5752 	/* Its a bit crude, but it gets the job done */
5753 	printf("\nInterface EEPROM Dump:\n");
5754 	printf("Offset\n0x0000  ");
5755 	for (i = 0, j = 0; i < 32; i++, j++) {
5756 		if (j == 8) { /* Make the offset block */
5757 			j = 0; ++row;
5758 			printf("\n0x00%x0  ",row);
5759 		}
5760 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5761 		printf("%04x ", eeprom_data);
5762 	}
5763 	printf("\n");
5764 }
5765 
5766 static int
5767 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5768 {
5769 	struct em_int_delay_info *info;
5770 	struct adapter *adapter;
5771 	u32 regval;
5772 	int error, usecs, ticks;
5773 
5774 	info = (struct em_int_delay_info *)arg1;
5775 	usecs = info->value;
5776 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5777 	if (error != 0 || req->newptr == NULL)
5778 		return (error);
5779 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5780 		return (EINVAL);
5781 	info->value = usecs;
5782 	ticks = EM_USECS_TO_TICKS(usecs);
5783 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5784 		ticks *= 4;
5785 
5786 	adapter = info->adapter;
5787 
5788 	EM_CORE_LOCK(adapter);
5789 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5790 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5791 	/* Handle a few special cases. */
5792 	switch (info->offset) {
5793 	case E1000_RDTR:
5794 		break;
5795 	case E1000_TIDV:
5796 		if (ticks == 0) {
5797 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5798 			/* Don't write 0 into the TIDV register. */
5799 			regval++;
5800 		} else
5801 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5802 		break;
5803 	}
5804 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5805 	EM_CORE_UNLOCK(adapter);
5806 	return (0);
5807 }
5808 
5809 static void
5810 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5811 	const char *description, struct em_int_delay_info *info,
5812 	int offset, int value)
5813 {
5814 	info->adapter = adapter;
5815 	info->offset = offset;
5816 	info->value = value;
5817 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5818 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5819 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5820 	    info, 0, em_sysctl_int_delay, "I", description);
5821 }
5822 
5823 static void
5824 em_set_sysctl_value(struct adapter *adapter, const char *name,
5825 	const char *description, int *limit, int value)
5826 {
5827 	*limit = value;
5828 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5829 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5830 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5831 }
5832 
5833 
5834 /*
5835 ** Set flow control using sysctl:
5836 ** Flow control values:
5837 **      0 - off
5838 **      1 - rx pause
5839 **      2 - tx pause
5840 **      3 - full
5841 */
5842 static int
5843 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5844 {
5845         int		error;
5846 	static int	input = 3; /* default is full */
5847         struct adapter	*adapter = (struct adapter *) arg1;
5848 
5849         error = sysctl_handle_int(oidp, &input, 0, req);
5850 
5851         if ((error) || (req->newptr == NULL))
5852                 return (error);
5853 
5854 	if (input == adapter->fc) /* no change? */
5855 		return (error);
5856 
5857         switch (input) {
5858                 case e1000_fc_rx_pause:
5859                 case e1000_fc_tx_pause:
5860                 case e1000_fc_full:
5861                 case e1000_fc_none:
5862                         adapter->hw.fc.requested_mode = input;
5863 			adapter->fc = input;
5864                         break;
5865                 default:
5866 			/* Do nothing */
5867 			return (error);
5868         }
5869 
5870         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5871         e1000_force_mac_fc(&adapter->hw);
5872         return (error);
5873 }
5874 
5875 /*
5876 ** Manage Energy Efficient Ethernet:
5877 ** Control values:
5878 **     0/1 - enabled/disabled
5879 */
5880 static int
5881 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5882 {
5883        struct adapter *adapter = (struct adapter *) arg1;
5884        int             error, value;
5885 
5886        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5887        error = sysctl_handle_int(oidp, &value, 0, req);
5888        if (error || req->newptr == NULL)
5889                return (error);
5890        EM_CORE_LOCK(adapter);
5891        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5892        em_init_locked(adapter);
5893        EM_CORE_UNLOCK(adapter);
5894        return (0);
5895 }
5896 
5897 static int
5898 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5899 {
5900 	struct adapter *adapter;
5901 	int error;
5902 	int result;
5903 
5904 	result = -1;
5905 	error = sysctl_handle_int(oidp, &result, 0, req);
5906 
5907 	if (error || !req->newptr)
5908 		return (error);
5909 
5910 	if (result == 1) {
5911 		adapter = (struct adapter *)arg1;
5912 		em_print_debug_info(adapter);
5913         }
5914 
5915 	return (error);
5916 }
5917 
5918 /*
5919 ** This routine is meant to be fluid, add whatever is
5920 ** needed for debugging a problem.  -jfv
5921 */
5922 static void
5923 em_print_debug_info(struct adapter *adapter)
5924 {
5925 	device_t dev = adapter->dev;
5926 	struct tx_ring *txr = adapter->tx_rings;
5927 	struct rx_ring *rxr = adapter->rx_rings;
5928 
5929 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5930 		printf("Interface is RUNNING ");
5931 	else
5932 		printf("Interface is NOT RUNNING\n");
5933 
5934 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5935 		printf("and INACTIVE\n");
5936 	else
5937 		printf("and ACTIVE\n");
5938 
5939 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5940 		device_printf(dev, "TX Queue %d ------\n", i);
5941 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5942 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
5943 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
5944 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
5945 		device_printf(dev, "TX descriptors avail = %d\n",
5946 	    		txr->tx_avail);
5947 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5948 	    		txr->no_desc_avail);
5949 		device_printf(dev, "RX Queue %d ------\n", i);
5950 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5951 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
5952 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
5953 		device_printf(dev, "RX discarded packets = %ld\n",
5954 	    		rxr->rx_discarded);
5955 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5956 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5957 	}
5958 }
5959 
5960 #ifdef EM_MULTIQUEUE
5961 /*
5962  * 82574 only:
5963  * Write a new value to the EEPROM increasing the number of MSIX
5964  * vectors from 3 to 5, for proper multiqueue support.
5965  */
5966 static void
5967 em_enable_vectors_82574(struct adapter *adapter)
5968 {
5969 	struct e1000_hw *hw = &adapter->hw;
5970 	device_t dev = adapter->dev;
5971 	u16 edata;
5972 
5973 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5974 	printf("Current cap: %#06x\n", edata);
5975 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
5976 		device_printf(dev, "Writing to eeprom: increasing "
5977 		    "reported MSIX vectors from 3 to 5...\n");
5978 		edata &= ~(EM_NVM_MSIX_N_MASK);
5979 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
5980 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5981 		e1000_update_nvm_checksum(hw);
5982 		device_printf(dev, "Writing to eeprom: done\n");
5983 	}
5984 }
5985 #endif
5986 
5987 #ifdef DDB
5988 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
5989 {
5990 	devclass_t	dc;
5991 	int max_em;
5992 
5993 	dc = devclass_find("em");
5994 	max_em = devclass_get_maxunit(dc);
5995 
5996 	for (int index = 0; index < (max_em - 1); index++) {
5997 		device_t dev;
5998 		dev = devclass_get_device(dc, index);
5999 		if (device_get_driver(dev) == &em_driver) {
6000 			struct adapter *adapter = device_get_softc(dev);
6001 			EM_CORE_LOCK(adapter);
6002 			em_init_locked(adapter);
6003 			EM_CORE_UNLOCK(adapter);
6004 		}
6005 	}
6006 }
6007 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6008 {
6009 	devclass_t	dc;
6010 	int max_em;
6011 
6012 	dc = devclass_find("em");
6013 	max_em = devclass_get_maxunit(dc);
6014 
6015 	for (int index = 0; index < (max_em - 1); index++) {
6016 		device_t dev;
6017 		dev = devclass_get_device(dc, index);
6018 		if (device_get_driver(dev) == &em_driver)
6019 			em_print_debug_info(device_get_softc(dev));
6020 	}
6021 
6022 }
6023 #endif
6024