xref: /freebsd/sys/dev/e1000/if_em.c (revision 3fe401a500cdfc73d8c066da3c577c4b9f0aa953)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2015, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69 
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80 
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88 
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93 
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97 
98 /*********************************************************************
99  *  Set this to one to display debug statistics
100  *********************************************************************/
101 int	em_display_debug_stats = 0;
102 
103 /*********************************************************************
104  *  Driver version:
105  *********************************************************************/
106 char em_driver_version[] = "7.4.2";
107 
108 /*********************************************************************
109  *  PCI Device ID Table
110  *
111  *  Used by probe to select devices to load on
112  *  Last field stores an index into e1000_strings
113  *  Last entry must be all 0s
114  *
115  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
116  *********************************************************************/
117 
118 static em_vendor_info_t em_vendor_info_array[] =
119 {
120 	/* Intel(R) PRO/1000 Network Connection */
121 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
127 						PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
129 						PCI_ANY_ID, PCI_ANY_ID, 0},
130 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
131 						PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
133 						PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
135 						PCI_ANY_ID, PCI_ANY_ID, 0},
136 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
140 
141 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
142 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
146 						PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
148 						PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
150 						PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
152 						PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
177 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
179 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
180 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
181 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
182 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
183 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
184 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
185 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
186 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
187 						PCI_ANY_ID, PCI_ANY_ID, 0},
188 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
189 						PCI_ANY_ID, PCI_ANY_ID, 0},
190 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
191 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
192 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
193 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
194 	/* required last entry */
195 	{ 0, 0, 0, 0, 0}
196 };
197 
198 /*********************************************************************
199  *  Table of branding strings for all supported NICs.
200  *********************************************************************/
201 
202 static char *em_strings[] = {
203 	"Intel(R) PRO/1000 Network Connection"
204 };
205 
206 /*********************************************************************
207  *  Function prototypes
208  *********************************************************************/
209 static int	em_probe(device_t);
210 static int	em_attach(device_t);
211 static int	em_detach(device_t);
212 static int	em_shutdown(device_t);
213 static int	em_suspend(device_t);
214 static int	em_resume(device_t);
215 #ifdef EM_MULTIQUEUE
216 static int	em_mq_start(if_t, struct mbuf *);
217 static int	em_mq_start_locked(if_t,
218 		    struct tx_ring *);
219 static void	em_qflush(if_t);
220 #else
221 static void	em_start(if_t);
222 static void	em_start_locked(if_t, struct tx_ring *);
223 #endif
224 static int	em_ioctl(if_t, u_long, caddr_t);
225 static uint64_t	em_get_counter(if_t, ift_counter);
226 static void	em_init(void *);
227 static void	em_init_locked(struct adapter *);
228 static void	em_stop(void *);
229 static void	em_media_status(if_t, struct ifmediareq *);
230 static int	em_media_change(if_t);
231 static void	em_identify_hardware(struct adapter *);
232 static int	em_allocate_pci_resources(struct adapter *);
233 static int	em_allocate_legacy(struct adapter *);
234 static int	em_allocate_msix(struct adapter *);
235 static int	em_allocate_queues(struct adapter *);
236 static int	em_setup_msix(struct adapter *);
237 static void	em_free_pci_resources(struct adapter *);
238 static void	em_local_timer(void *);
239 static void	em_reset(struct adapter *);
240 static int	em_setup_interface(device_t, struct adapter *);
241 
242 static void	em_setup_transmit_structures(struct adapter *);
243 static void	em_initialize_transmit_unit(struct adapter *);
244 static int	em_allocate_transmit_buffers(struct tx_ring *);
245 static void	em_free_transmit_structures(struct adapter *);
246 static void	em_free_transmit_buffers(struct tx_ring *);
247 
248 static int	em_setup_receive_structures(struct adapter *);
249 static int	em_allocate_receive_buffers(struct rx_ring *);
250 static void	em_initialize_receive_unit(struct adapter *);
251 static void	em_free_receive_structures(struct adapter *);
252 static void	em_free_receive_buffers(struct rx_ring *);
253 
254 static void	em_enable_intr(struct adapter *);
255 static void	em_disable_intr(struct adapter *);
256 static void	em_update_stats_counters(struct adapter *);
257 static void	em_add_hw_stats(struct adapter *adapter);
258 static void	em_txeof(struct tx_ring *);
259 static bool	em_rxeof(struct rx_ring *, int, int *);
260 #ifndef __NO_STRICT_ALIGNMENT
261 static int	em_fixup_rx(struct rx_ring *);
262 #endif
263 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
264 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
265 		    struct ip *, u32 *, u32 *);
266 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
267 		    struct tcphdr *, u32 *, u32 *);
268 static void	em_set_promisc(struct adapter *);
269 static void	em_disable_promisc(struct adapter *);
270 static void	em_set_multi(struct adapter *);
271 static void	em_update_link_status(struct adapter *);
272 static void	em_refresh_mbufs(struct rx_ring *, int);
273 static void	em_register_vlan(void *, if_t, u16);
274 static void	em_unregister_vlan(void *, if_t, u16);
275 static void	em_setup_vlan_hw_support(struct adapter *);
276 static int	em_xmit(struct tx_ring *, struct mbuf **);
277 static int	em_dma_malloc(struct adapter *, bus_size_t,
278 		    struct em_dma_alloc *, int);
279 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
280 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
281 static void	em_print_nvm_info(struct adapter *);
282 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283 static void	em_print_debug_info(struct adapter *);
284 static int 	em_is_valid_ether_addr(u8 *);
285 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
287 		    const char *, struct em_int_delay_info *, int, int);
288 /* Management and WOL Support */
289 static void	em_init_manageability(struct adapter *);
290 static void	em_release_manageability(struct adapter *);
291 static void     em_get_hw_control(struct adapter *);
292 static void     em_release_hw_control(struct adapter *);
293 static void	em_get_wakeup(device_t);
294 static void     em_enable_wakeup(device_t);
295 static int	em_enable_phy_wakeup(struct adapter *);
296 static void	em_led_func(void *, int);
297 static void	em_disable_aspm(struct adapter *);
298 
299 static int	em_irq_fast(void *);
300 
301 /* MSIX handlers */
302 static void	em_msix_tx(void *);
303 static void	em_msix_rx(void *);
304 static void	em_msix_link(void *);
305 static void	em_handle_tx(void *context, int pending);
306 static void	em_handle_rx(void *context, int pending);
307 static void	em_handle_link(void *context, int pending);
308 
309 #ifdef EM_MULTIQUEUE
310 static void	em_enable_vectors_82574(struct adapter *);
311 #endif
312 
313 static void	em_set_sysctl_value(struct adapter *, const char *,
314 		    const char *, int *, int);
315 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
316 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
317 
318 static __inline void em_rx_discard(struct rx_ring *, int);
319 
320 #ifdef DEVICE_POLLING
321 static poll_handler_t em_poll;
322 #endif /* POLLING */
323 
324 /*********************************************************************
325  *  FreeBSD Device Interface Entry Points
326  *********************************************************************/
327 
328 static device_method_t em_methods[] = {
329 	/* Device interface */
330 	DEVMETHOD(device_probe, em_probe),
331 	DEVMETHOD(device_attach, em_attach),
332 	DEVMETHOD(device_detach, em_detach),
333 	DEVMETHOD(device_shutdown, em_shutdown),
334 	DEVMETHOD(device_suspend, em_suspend),
335 	DEVMETHOD(device_resume, em_resume),
336 	DEVMETHOD_END
337 };
338 
339 static driver_t em_driver = {
340 	"em", em_methods, sizeof(struct adapter),
341 };
342 
343 devclass_t em_devclass;
344 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
345 MODULE_DEPEND(em, pci, 1, 1, 1);
346 MODULE_DEPEND(em, ether, 1, 1, 1);
347 #ifdef DEV_NETMAP
348 MODULE_DEPEND(em, netmap, 1, 1, 1);
349 #endif /* DEV_NETMAP */
350 
351 /*********************************************************************
352  *  Tunable default values.
353  *********************************************************************/
354 
355 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
356 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
357 #define M_TSO_LEN			66
358 
359 #define MAX_INTS_PER_SEC	8000
360 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
361 
362 /* Allow common code without TSO */
363 #ifndef CSUM_TSO
364 #define CSUM_TSO	0
365 #endif
366 
367 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
368 
369 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
370 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
371 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
372     0, "Default transmit interrupt delay in usecs");
373 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
374     0, "Default receive interrupt delay in usecs");
375 
376 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
377 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
378 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
379     &em_tx_abs_int_delay_dflt, 0,
380     "Default transmit interrupt delay limit in usecs");
381 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
382     &em_rx_abs_int_delay_dflt, 0,
383     "Default receive interrupt delay limit in usecs");
384 
385 static int em_rxd = EM_DEFAULT_RXD;
386 static int em_txd = EM_DEFAULT_TXD;
387 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
388     "Number of receive descriptors per queue");
389 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
390     "Number of transmit descriptors per queue");
391 
392 static int em_smart_pwr_down = FALSE;
393 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
394     0, "Set to true to leave smart power down enabled on newer adapters");
395 
396 /* Controls whether promiscuous also shows bad packets */
397 static int em_debug_sbp = FALSE;
398 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
399     "Show bad packets in promiscuous mode");
400 
401 static int em_enable_msix = TRUE;
402 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
403     "Enable MSI-X interrupts");
404 
405 #ifdef EM_MULTIQUEUE
406 static int em_num_queues = 1;
407 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
408     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
409 #endif
410 
411 /*
412 ** Global variable to store last used CPU when binding queues
413 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
414 ** queue is bound to a cpu.
415 */
416 static int em_last_bind_cpu = -1;
417 
418 /* How many packets rxeof tries to clean at a time */
419 static int em_rx_process_limit = 100;
420 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
421     &em_rx_process_limit, 0,
422     "Maximum number of received packets to process "
423     "at a time, -1 means unlimited");
424 
425 /* Energy efficient ethernet - default to OFF */
426 static int eee_setting = 1;
427 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
428     "Enable Energy Efficient Ethernet");
429 
430 /* Global used in WOL setup with multiport cards */
431 static int global_quad_port_a = 0;
432 
433 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
434 #include <dev/netmap/if_em_netmap.h>
435 #endif /* DEV_NETMAP */
436 
437 /*********************************************************************
438  *  Device identification routine
439  *
440  *  em_probe determines if the driver should be loaded on
441  *  adapter based on PCI vendor/device id of the adapter.
442  *
443  *  return BUS_PROBE_DEFAULT on success, positive on failure
444  *********************************************************************/
445 
446 static int
447 em_probe(device_t dev)
448 {
449 	char		adapter_name[60];
450 	uint16_t	pci_vendor_id = 0;
451 	uint16_t	pci_device_id = 0;
452 	uint16_t	pci_subvendor_id = 0;
453 	uint16_t	pci_subdevice_id = 0;
454 	em_vendor_info_t *ent;
455 
456 	INIT_DEBUGOUT("em_probe: begin");
457 
458 	pci_vendor_id = pci_get_vendor(dev);
459 	if (pci_vendor_id != EM_VENDOR_ID)
460 		return (ENXIO);
461 
462 	pci_device_id = pci_get_device(dev);
463 	pci_subvendor_id = pci_get_subvendor(dev);
464 	pci_subdevice_id = pci_get_subdevice(dev);
465 
466 	ent = em_vendor_info_array;
467 	while (ent->vendor_id != 0) {
468 		if ((pci_vendor_id == ent->vendor_id) &&
469 		    (pci_device_id == ent->device_id) &&
470 
471 		    ((pci_subvendor_id == ent->subvendor_id) ||
472 		    (ent->subvendor_id == PCI_ANY_ID)) &&
473 
474 		    ((pci_subdevice_id == ent->subdevice_id) ||
475 		    (ent->subdevice_id == PCI_ANY_ID))) {
476 			sprintf(adapter_name, "%s %s",
477 				em_strings[ent->index],
478 				em_driver_version);
479 			device_set_desc_copy(dev, adapter_name);
480 			return (BUS_PROBE_DEFAULT);
481 		}
482 		ent++;
483 	}
484 
485 	return (ENXIO);
486 }
487 
488 /*********************************************************************
489  *  Device initialization routine
490  *
491  *  The attach entry point is called when the driver is being loaded.
492  *  This routine identifies the type of hardware, allocates all resources
493  *  and initializes the hardware.
494  *
495  *  return 0 on success, positive on failure
496  *********************************************************************/
497 
498 static int
499 em_attach(device_t dev)
500 {
501 	struct adapter	*adapter;
502 	struct e1000_hw	*hw;
503 	int		error = 0;
504 
505 	INIT_DEBUGOUT("em_attach: begin");
506 
507 	if (resource_disabled("em", device_get_unit(dev))) {
508 		device_printf(dev, "Disabled by device hint\n");
509 		return (ENXIO);
510 	}
511 
512 	adapter = device_get_softc(dev);
513 	adapter->dev = adapter->osdep.dev = dev;
514 	hw = &adapter->hw;
515 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
516 
517 	/* SYSCTL stuff */
518 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
519 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
520 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
521 	    em_sysctl_nvm_info, "I", "NVM Information");
522 
523 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
524 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
525 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
526 	    em_sysctl_debug_info, "I", "Debug Information");
527 
528 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
529 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
530 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
531 	    em_set_flowcntl, "I", "Flow Control");
532 
533 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
534 
535 	/* Determine hardware and mac info */
536 	em_identify_hardware(adapter);
537 
538 	/* Setup PCI resources */
539 	if (em_allocate_pci_resources(adapter)) {
540 		device_printf(dev, "Allocation of PCI resources failed\n");
541 		error = ENXIO;
542 		goto err_pci;
543 	}
544 
545 	/*
546 	** For ICH8 and family we need to
547 	** map the flash memory, and this
548 	** must happen after the MAC is
549 	** identified
550 	*/
551 	if ((hw->mac.type == e1000_ich8lan) ||
552 	    (hw->mac.type == e1000_ich9lan) ||
553 	    (hw->mac.type == e1000_ich10lan) ||
554 	    (hw->mac.type == e1000_pchlan) ||
555 	    (hw->mac.type == e1000_pch2lan) ||
556 	    (hw->mac.type == e1000_pch_lpt)) {
557 		int rid = EM_BAR_TYPE_FLASH;
558 		adapter->flash = bus_alloc_resource_any(dev,
559 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
560 		if (adapter->flash == NULL) {
561 			device_printf(dev, "Mapping of Flash failed\n");
562 			error = ENXIO;
563 			goto err_pci;
564 		}
565 		/* This is used in the shared code */
566 		hw->flash_address = (u8 *)adapter->flash;
567 		adapter->osdep.flash_bus_space_tag =
568 		    rman_get_bustag(adapter->flash);
569 		adapter->osdep.flash_bus_space_handle =
570 		    rman_get_bushandle(adapter->flash);
571 	}
572 
573 	/* Do Shared Code initialization */
574 	if (e1000_setup_init_funcs(hw, TRUE)) {
575 		device_printf(dev, "Setup of Shared code failed\n");
576 		error = ENXIO;
577 		goto err_pci;
578 	}
579 
580 	/*
581 	 * Setup MSI/X or MSI if PCI Express
582 	 */
583 	adapter->msix = em_setup_msix(adapter);
584 
585 	e1000_get_bus_info(hw);
586 
587 	/* Set up some sysctls for the tunable interrupt delays */
588 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
589 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
590 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
591 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
592 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
593 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
594 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
595 	    "receive interrupt delay limit in usecs",
596 	    &adapter->rx_abs_int_delay,
597 	    E1000_REGISTER(hw, E1000_RADV),
598 	    em_rx_abs_int_delay_dflt);
599 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
600 	    "transmit interrupt delay limit in usecs",
601 	    &adapter->tx_abs_int_delay,
602 	    E1000_REGISTER(hw, E1000_TADV),
603 	    em_tx_abs_int_delay_dflt);
604 	em_add_int_delay_sysctl(adapter, "itr",
605 	    "interrupt delay limit in usecs/4",
606 	    &adapter->tx_itr,
607 	    E1000_REGISTER(hw, E1000_ITR),
608 	    DEFAULT_ITR);
609 
610 	/* Sysctl for limiting the amount of work done in the taskqueue */
611 	em_set_sysctl_value(adapter, "rx_processing_limit",
612 	    "max number of rx packets to process", &adapter->rx_process_limit,
613 	    em_rx_process_limit);
614 
615 	/*
616 	 * Validate number of transmit and receive descriptors. It
617 	 * must not exceed hardware maximum, and must be multiple
618 	 * of E1000_DBA_ALIGN.
619 	 */
620 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
621 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
622 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
623 		    EM_DEFAULT_TXD, em_txd);
624 		adapter->num_tx_desc = EM_DEFAULT_TXD;
625 	} else
626 		adapter->num_tx_desc = em_txd;
627 
628 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
629 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
630 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
631 		    EM_DEFAULT_RXD, em_rxd);
632 		adapter->num_rx_desc = EM_DEFAULT_RXD;
633 	} else
634 		adapter->num_rx_desc = em_rxd;
635 
636 	hw->mac.autoneg = DO_AUTO_NEG;
637 	hw->phy.autoneg_wait_to_complete = FALSE;
638 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
639 
640 	/* Copper options */
641 	if (hw->phy.media_type == e1000_media_type_copper) {
642 		hw->phy.mdix = AUTO_ALL_MODES;
643 		hw->phy.disable_polarity_correction = FALSE;
644 		hw->phy.ms_type = EM_MASTER_SLAVE;
645 	}
646 
647 	/*
648 	 * Set the frame limits assuming
649 	 * standard ethernet sized frames.
650 	 */
651 	adapter->hw.mac.max_frame_size =
652 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
653 
654 	/*
655 	 * This controls when hardware reports transmit completion
656 	 * status.
657 	 */
658 	hw->mac.report_tx_early = 1;
659 
660 	/*
661 	** Get queue/ring memory
662 	*/
663 	if (em_allocate_queues(adapter)) {
664 		error = ENOMEM;
665 		goto err_pci;
666 	}
667 
668 	/* Allocate multicast array memory. */
669 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
670 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
671 	if (adapter->mta == NULL) {
672 		device_printf(dev, "Can not allocate multicast setup array\n");
673 		error = ENOMEM;
674 		goto err_late;
675 	}
676 
677 	/* Check SOL/IDER usage */
678 	if (e1000_check_reset_block(hw))
679 		device_printf(dev, "PHY reset is blocked"
680 		    " due to SOL/IDER session.\n");
681 
682 	/* Sysctl for setting Energy Efficient Ethernet */
683 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
684 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
685 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
686 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
687 	    adapter, 0, em_sysctl_eee, "I",
688 	    "Disable Energy Efficient Ethernet");
689 
690 	/*
691 	** Start from a known state, this is
692 	** important in reading the nvm and
693 	** mac from that.
694 	*/
695 	e1000_reset_hw(hw);
696 
697 
698 	/* Make sure we have a good EEPROM before we read from it */
699 	if (e1000_validate_nvm_checksum(hw) < 0) {
700 		/*
701 		** Some PCI-E parts fail the first check due to
702 		** the link being in sleep state, call it again,
703 		** if it fails a second time its a real issue.
704 		*/
705 		if (e1000_validate_nvm_checksum(hw) < 0) {
706 			device_printf(dev,
707 			    "The EEPROM Checksum Is Not Valid\n");
708 			error = EIO;
709 			goto err_late;
710 		}
711 	}
712 
713 	/* Copy the permanent MAC address out of the EEPROM */
714 	if (e1000_read_mac_addr(hw) < 0) {
715 		device_printf(dev, "EEPROM read error while reading MAC"
716 		    " address\n");
717 		error = EIO;
718 		goto err_late;
719 	}
720 
721 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
722 		device_printf(dev, "Invalid MAC address\n");
723 		error = EIO;
724 		goto err_late;
725 	}
726 
727 	/* Disable ULP support */
728 	e1000_disable_ulp_lpt_lp(hw, TRUE);
729 
730 	/*
731 	**  Do interrupt configuration
732 	*/
733 	if (adapter->msix > 1) /* Do MSIX */
734 		error = em_allocate_msix(adapter);
735 	else  /* MSI or Legacy */
736 		error = em_allocate_legacy(adapter);
737 	if (error)
738 		goto err_late;
739 
740 	/*
741 	 * Get Wake-on-Lan and Management info for later use
742 	 */
743 	em_get_wakeup(dev);
744 
745 	/* Setup OS specific network interface */
746 	if (em_setup_interface(dev, adapter) != 0)
747 		goto err_late;
748 
749 	em_reset(adapter);
750 
751 	/* Initialize statistics */
752 	em_update_stats_counters(adapter);
753 
754 	hw->mac.get_link_status = 1;
755 	em_update_link_status(adapter);
756 
757 	/* Register for VLAN events */
758 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
759 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
760 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
761 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
762 
763 	em_add_hw_stats(adapter);
764 
765 	/* Non-AMT based hardware can now take control from firmware */
766 	if (adapter->has_manage && !adapter->has_amt)
767 		em_get_hw_control(adapter);
768 
769 	/* Tell the stack that the interface is not active */
770 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
771 
772 	adapter->led_dev = led_create(em_led_func, adapter,
773 	    device_get_nameunit(dev));
774 #ifdef DEV_NETMAP
775 	em_netmap_attach(adapter);
776 #endif /* DEV_NETMAP */
777 
778 	INIT_DEBUGOUT("em_attach: end");
779 
780 	return (0);
781 
782 err_late:
783 	em_free_transmit_structures(adapter);
784 	em_free_receive_structures(adapter);
785 	em_release_hw_control(adapter);
786 	if (adapter->ifp != (void *)NULL)
787 		if_free(adapter->ifp);
788 err_pci:
789 	em_free_pci_resources(adapter);
790 	free(adapter->mta, M_DEVBUF);
791 	EM_CORE_LOCK_DESTROY(adapter);
792 
793 	return (error);
794 }
795 
796 /*********************************************************************
797  *  Device removal routine
798  *
799  *  The detach entry point is called when the driver is being removed.
800  *  This routine stops the adapter and deallocates all the resources
801  *  that were allocated for driver operation.
802  *
803  *  return 0 on success, positive on failure
804  *********************************************************************/
805 
806 static int
807 em_detach(device_t dev)
808 {
809 	struct adapter	*adapter = device_get_softc(dev);
810 	if_t ifp = adapter->ifp;
811 
812 	INIT_DEBUGOUT("em_detach: begin");
813 
814 	/* Make sure VLANS are not using driver */
815 	if (if_vlantrunkinuse(ifp)) {
816 		device_printf(dev,"Vlan in use, detach first\n");
817 		return (EBUSY);
818 	}
819 
820 #ifdef DEVICE_POLLING
821 	if (if_getcapenable(ifp) & IFCAP_POLLING)
822 		ether_poll_deregister(ifp);
823 #endif
824 
825 	if (adapter->led_dev != NULL)
826 		led_destroy(adapter->led_dev);
827 
828 	EM_CORE_LOCK(adapter);
829 	adapter->in_detach = 1;
830 	em_stop(adapter);
831 	EM_CORE_UNLOCK(adapter);
832 	EM_CORE_LOCK_DESTROY(adapter);
833 
834 	e1000_phy_hw_reset(&adapter->hw);
835 
836 	em_release_manageability(adapter);
837 	em_release_hw_control(adapter);
838 
839 	/* Unregister VLAN events */
840 	if (adapter->vlan_attach != NULL)
841 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
842 	if (adapter->vlan_detach != NULL)
843 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
844 
845 	ether_ifdetach(adapter->ifp);
846 	callout_drain(&adapter->timer);
847 
848 #ifdef DEV_NETMAP
849 	netmap_detach(ifp);
850 #endif /* DEV_NETMAP */
851 
852 	em_free_pci_resources(adapter);
853 	bus_generic_detach(dev);
854 	if_free(ifp);
855 
856 	em_free_transmit_structures(adapter);
857 	em_free_receive_structures(adapter);
858 
859 	em_release_hw_control(adapter);
860 	free(adapter->mta, M_DEVBUF);
861 
862 	return (0);
863 }
864 
865 /*********************************************************************
866  *
867  *  Shutdown entry point
868  *
869  **********************************************************************/
870 
871 static int
872 em_shutdown(device_t dev)
873 {
874 	return em_suspend(dev);
875 }
876 
877 /*
878  * Suspend/resume device methods.
879  */
880 static int
881 em_suspend(device_t dev)
882 {
883 	struct adapter *adapter = device_get_softc(dev);
884 
885 	EM_CORE_LOCK(adapter);
886 
887         em_release_manageability(adapter);
888 	em_release_hw_control(adapter);
889 	em_enable_wakeup(dev);
890 
891 	EM_CORE_UNLOCK(adapter);
892 
893 	return bus_generic_suspend(dev);
894 }
895 
896 static int
897 em_resume(device_t dev)
898 {
899 	struct adapter *adapter = device_get_softc(dev);
900 	struct tx_ring	*txr = adapter->tx_rings;
901 	if_t ifp = adapter->ifp;
902 
903 	EM_CORE_LOCK(adapter);
904 	if (adapter->hw.mac.type == e1000_pch2lan)
905 		e1000_resume_workarounds_pchlan(&adapter->hw);
906 	em_init_locked(adapter);
907 	em_init_manageability(adapter);
908 
909 	if ((if_getflags(ifp) & IFF_UP) &&
910 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
911 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
912 			EM_TX_LOCK(txr);
913 #ifdef EM_MULTIQUEUE
914 			if (!drbr_empty(ifp, txr->br))
915 				em_mq_start_locked(ifp, txr);
916 #else
917 			if (!if_sendq_empty(ifp))
918 				em_start_locked(ifp, txr);
919 #endif
920 			EM_TX_UNLOCK(txr);
921 		}
922 	}
923 	EM_CORE_UNLOCK(adapter);
924 
925 	return bus_generic_resume(dev);
926 }
927 
928 
929 #ifndef EM_MULTIQUEUE
930 static void
931 em_start_locked(if_t ifp, struct tx_ring *txr)
932 {
933 	struct adapter	*adapter = if_getsoftc(ifp);
934 	struct mbuf	*m_head;
935 
936 	EM_TX_LOCK_ASSERT(txr);
937 
938 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
939 	    IFF_DRV_RUNNING)
940 		return;
941 
942 	if (!adapter->link_active)
943 		return;
944 
945 	while (!if_sendq_empty(ifp)) {
946         	/* Call cleanup if number of TX descriptors low */
947 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
948 			em_txeof(txr);
949 		if (txr->tx_avail < EM_MAX_SCATTER) {
950 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
951 			break;
952 		}
953 		m_head = if_dequeue(ifp);
954 		if (m_head == NULL)
955 			break;
956 		/*
957 		 *  Encapsulation can modify our pointer, and or make it
958 		 *  NULL on failure.  In that event, we can't requeue.
959 		 */
960 		if (em_xmit(txr, &m_head)) {
961 			if (m_head == NULL)
962 				break;
963 			if_sendq_prepend(ifp, m_head);
964 			break;
965 		}
966 
967 		/* Mark the queue as having work */
968 		if (txr->busy == EM_TX_IDLE)
969 			txr->busy = EM_TX_BUSY;
970 
971 		/* Send a copy of the frame to the BPF listener */
972 		ETHER_BPF_MTAP(ifp, m_head);
973 
974 	}
975 
976 	return;
977 }
978 
979 static void
980 em_start(if_t ifp)
981 {
982 	struct adapter	*adapter = if_getsoftc(ifp);
983 	struct tx_ring	*txr = adapter->tx_rings;
984 
985 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
986 		EM_TX_LOCK(txr);
987 		em_start_locked(ifp, txr);
988 		EM_TX_UNLOCK(txr);
989 	}
990 	return;
991 }
992 #else /* EM_MULTIQUEUE */
993 /*********************************************************************
994  *  Multiqueue Transmit routines
995  *
996  *  em_mq_start is called by the stack to initiate a transmit.
997  *  however, if busy the driver can queue the request rather
998  *  than do an immediate send. It is this that is an advantage
999  *  in this driver, rather than also having multiple tx queues.
1000  **********************************************************************/
1001 /*
1002 ** Multiqueue capable stack interface
1003 */
1004 static int
1005 em_mq_start(if_t ifp, struct mbuf *m)
1006 {
1007 	struct adapter	*adapter = if_getsoftc(ifp);
1008 	struct tx_ring	*txr = adapter->tx_rings;
1009 	unsigned int	i, error;
1010 
1011 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1012 		i = m->m_pkthdr.flowid % adapter->num_queues;
1013 	else
1014 		i = curcpu % adapter->num_queues;
1015 
1016 	txr = &adapter->tx_rings[i];
1017 
1018 	error = drbr_enqueue(ifp, txr->br, m);
1019 	if (error)
1020 		return (error);
1021 
1022 	if (EM_TX_TRYLOCK(txr)) {
1023 		em_mq_start_locked(ifp, txr);
1024 		EM_TX_UNLOCK(txr);
1025 	} else
1026 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1027 
1028 	return (0);
1029 }
1030 
1031 static int
1032 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1033 {
1034 	struct adapter  *adapter = txr->adapter;
1035         struct mbuf     *next;
1036         int             err = 0, enq = 0;
1037 
1038 	EM_TX_LOCK_ASSERT(txr);
1039 
1040 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1041 	    adapter->link_active == 0) {
1042 		return (ENETDOWN);
1043 	}
1044 
1045 	/* Process the queue */
1046 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1047 		if ((err = em_xmit(txr, &next)) != 0) {
1048 			if (next == NULL) {
1049 				/* It was freed, move forward */
1050 				drbr_advance(ifp, txr->br);
1051 			} else {
1052 				/*
1053 				 * Still have one left, it may not be
1054 				 * the same since the transmit function
1055 				 * may have changed it.
1056 				 */
1057 				drbr_putback(ifp, txr->br, next);
1058 			}
1059 			break;
1060 		}
1061 		drbr_advance(ifp, txr->br);
1062 		enq++;
1063 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1064 		if (next->m_flags & M_MCAST)
1065 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1066 		ETHER_BPF_MTAP(ifp, next);
1067 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1068                         break;
1069 	}
1070 
1071 	/* Mark the queue as having work */
1072 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1073 		txr->busy = EM_TX_BUSY;
1074 
1075 	if (txr->tx_avail < EM_MAX_SCATTER)
1076 		em_txeof(txr);
1077 	if (txr->tx_avail < EM_MAX_SCATTER) {
1078 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1079 	}
1080 	return (err);
1081 }
1082 
1083 /*
1084 ** Flush all ring buffers
1085 */
1086 static void
1087 em_qflush(if_t ifp)
1088 {
1089 	struct adapter  *adapter = if_getsoftc(ifp);
1090 	struct tx_ring  *txr = adapter->tx_rings;
1091 	struct mbuf     *m;
1092 
1093 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1094 		EM_TX_LOCK(txr);
1095 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1096 			m_freem(m);
1097 		EM_TX_UNLOCK(txr);
1098 	}
1099 	if_qflush(ifp);
1100 }
1101 #endif /* EM_MULTIQUEUE */
1102 
1103 /*********************************************************************
1104  *  Ioctl entry point
1105  *
1106  *  em_ioctl is called when the user wants to configure the
1107  *  interface.
1108  *
1109  *  return 0 on success, positive on failure
1110  **********************************************************************/
1111 
1112 static int
1113 em_ioctl(if_t ifp, u_long command, caddr_t data)
1114 {
1115 	struct adapter	*adapter = if_getsoftc(ifp);
1116 	struct ifreq	*ifr = (struct ifreq *)data;
1117 #if defined(INET) || defined(INET6)
1118 	struct ifaddr	*ifa = (struct ifaddr *)data;
1119 #endif
1120 	bool		avoid_reset = FALSE;
1121 	int		error = 0;
1122 
1123 	if (adapter->in_detach)
1124 		return (error);
1125 
1126 	switch (command) {
1127 	case SIOCSIFADDR:
1128 #ifdef INET
1129 		if (ifa->ifa_addr->sa_family == AF_INET)
1130 			avoid_reset = TRUE;
1131 #endif
1132 #ifdef INET6
1133 		if (ifa->ifa_addr->sa_family == AF_INET6)
1134 			avoid_reset = TRUE;
1135 #endif
1136 		/*
1137 		** Calling init results in link renegotiation,
1138 		** so we avoid doing it when possible.
1139 		*/
1140 		if (avoid_reset) {
1141 			if_setflagbits(ifp,IFF_UP,0);
1142 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1143 				em_init(adapter);
1144 #ifdef INET
1145 			if (!(if_getflags(ifp) & IFF_NOARP))
1146 				arp_ifinit(ifp, ifa);
1147 #endif
1148 		} else
1149 			error = ether_ioctl(ifp, command, data);
1150 		break;
1151 	case SIOCSIFMTU:
1152 	    {
1153 		int max_frame_size;
1154 
1155 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1156 
1157 		EM_CORE_LOCK(adapter);
1158 		switch (adapter->hw.mac.type) {
1159 		case e1000_82571:
1160 		case e1000_82572:
1161 		case e1000_ich9lan:
1162 		case e1000_ich10lan:
1163 		case e1000_pch2lan:
1164 		case e1000_pch_lpt:
1165 		case e1000_82574:
1166 		case e1000_82583:
1167 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1168 			max_frame_size = 9234;
1169 			break;
1170 		case e1000_pchlan:
1171 			max_frame_size = 4096;
1172 			break;
1173 			/* Adapters that do not support jumbo frames */
1174 		case e1000_ich8lan:
1175 			max_frame_size = ETHER_MAX_LEN;
1176 			break;
1177 		default:
1178 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1179 		}
1180 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1181 		    ETHER_CRC_LEN) {
1182 			EM_CORE_UNLOCK(adapter);
1183 			error = EINVAL;
1184 			break;
1185 		}
1186 
1187 		if_setmtu(ifp, ifr->ifr_mtu);
1188 		adapter->hw.mac.max_frame_size =
1189 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1190 		em_init_locked(adapter);
1191 		EM_CORE_UNLOCK(adapter);
1192 		break;
1193 	    }
1194 	case SIOCSIFFLAGS:
1195 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1196 		    SIOCSIFFLAGS (Set Interface Flags)");
1197 		EM_CORE_LOCK(adapter);
1198 		if (if_getflags(ifp) & IFF_UP) {
1199 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1200 				if ((if_getflags(ifp) ^ adapter->if_flags) &
1201 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1202 					em_disable_promisc(adapter);
1203 					em_set_promisc(adapter);
1204 				}
1205 			} else
1206 				em_init_locked(adapter);
1207 		} else
1208 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1209 				em_stop(adapter);
1210 		adapter->if_flags = if_getflags(ifp);
1211 		EM_CORE_UNLOCK(adapter);
1212 		break;
1213 	case SIOCADDMULTI:
1214 	case SIOCDELMULTI:
1215 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1216 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1217 			EM_CORE_LOCK(adapter);
1218 			em_disable_intr(adapter);
1219 			em_set_multi(adapter);
1220 #ifdef DEVICE_POLLING
1221 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1222 #endif
1223 				em_enable_intr(adapter);
1224 			EM_CORE_UNLOCK(adapter);
1225 		}
1226 		break;
1227 	case SIOCSIFMEDIA:
1228 		/* Check SOL/IDER usage */
1229 		EM_CORE_LOCK(adapter);
1230 		if (e1000_check_reset_block(&adapter->hw)) {
1231 			EM_CORE_UNLOCK(adapter);
1232 			device_printf(adapter->dev, "Media change is"
1233 			    " blocked due to SOL/IDER session.\n");
1234 			break;
1235 		}
1236 		EM_CORE_UNLOCK(adapter);
1237 		/* falls thru */
1238 	case SIOCGIFMEDIA:
1239 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1240 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1241 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1242 		break;
1243 	case SIOCSIFCAP:
1244 	    {
1245 		int mask, reinit;
1246 
1247 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1248 		reinit = 0;
1249 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1250 #ifdef DEVICE_POLLING
1251 		if (mask & IFCAP_POLLING) {
1252 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1253 				error = ether_poll_register(em_poll, ifp);
1254 				if (error)
1255 					return (error);
1256 				EM_CORE_LOCK(adapter);
1257 				em_disable_intr(adapter);
1258 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1259 				EM_CORE_UNLOCK(adapter);
1260 			} else {
1261 				error = ether_poll_deregister(ifp);
1262 				/* Enable interrupt even in error case */
1263 				EM_CORE_LOCK(adapter);
1264 				em_enable_intr(adapter);
1265 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1266 				EM_CORE_UNLOCK(adapter);
1267 			}
1268 		}
1269 #endif
1270 		if (mask & IFCAP_HWCSUM) {
1271 			if_togglecapenable(ifp,IFCAP_HWCSUM);
1272 			reinit = 1;
1273 		}
1274 		if (mask & IFCAP_TSO4) {
1275 			if_togglecapenable(ifp,IFCAP_TSO4);
1276 			reinit = 1;
1277 		}
1278 		if (mask & IFCAP_VLAN_HWTAGGING) {
1279 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1280 			reinit = 1;
1281 		}
1282 		if (mask & IFCAP_VLAN_HWFILTER) {
1283 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1284 			reinit = 1;
1285 		}
1286 		if (mask & IFCAP_VLAN_HWTSO) {
1287 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1288 			reinit = 1;
1289 		}
1290 		if ((mask & IFCAP_WOL) &&
1291 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1292 			if (mask & IFCAP_WOL_MCAST)
1293 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1294 			if (mask & IFCAP_WOL_MAGIC)
1295 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1296 		}
1297 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1298 			em_init(adapter);
1299 		if_vlancap(ifp);
1300 		break;
1301 	    }
1302 
1303 	default:
1304 		error = ether_ioctl(ifp, command, data);
1305 		break;
1306 	}
1307 
1308 	return (error);
1309 }
1310 
1311 
1312 /*********************************************************************
1313  *  Init entry point
1314  *
1315  *  This routine is used in two ways. It is used by the stack as
1316  *  init entry point in network interface structure. It is also used
1317  *  by the driver as a hw/sw initialization routine to get to a
1318  *  consistent state.
1319  *
1320  *  return 0 on success, positive on failure
1321  **********************************************************************/
1322 
1323 static void
1324 em_init_locked(struct adapter *adapter)
1325 {
1326 	if_t ifp = adapter->ifp;
1327 	device_t	dev = adapter->dev;
1328 
1329 	INIT_DEBUGOUT("em_init: begin");
1330 
1331 	EM_CORE_LOCK_ASSERT(adapter);
1332 
1333 	em_disable_intr(adapter);
1334 	callout_stop(&adapter->timer);
1335 
1336 	/* Get the latest mac address, User can use a LAA */
1337         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1338               ETHER_ADDR_LEN);
1339 
1340 	/* Put the address into the Receive Address Array */
1341 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1342 
1343 	/*
1344 	 * With the 82571 adapter, RAR[0] may be overwritten
1345 	 * when the other port is reset, we make a duplicate
1346 	 * in RAR[14] for that eventuality, this assures
1347 	 * the interface continues to function.
1348 	 */
1349 	if (adapter->hw.mac.type == e1000_82571) {
1350 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1351 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1352 		    E1000_RAR_ENTRIES - 1);
1353 	}
1354 
1355 	/* Initialize the hardware */
1356 	em_reset(adapter);
1357 	em_update_link_status(adapter);
1358 
1359 	/* Setup VLAN support, basic and offload if available */
1360 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1361 
1362 	/* Set hardware offload abilities */
1363 	if_clearhwassist(ifp);
1364 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1365 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1366 	if (if_getcapenable(ifp) & IFCAP_TSO4)
1367 		if_sethwassistbits(ifp, CSUM_TSO, 0);
1368 
1369 	/* Configure for OS presence */
1370 	em_init_manageability(adapter);
1371 
1372 	/* Prepare transmit descriptors and buffers */
1373 	em_setup_transmit_structures(adapter);
1374 	em_initialize_transmit_unit(adapter);
1375 
1376 	/* Setup Multicast table */
1377 	em_set_multi(adapter);
1378 
1379 	/*
1380 	** Figure out the desired mbuf
1381 	** pool for doing jumbos
1382 	*/
1383 	if (adapter->hw.mac.max_frame_size <= 2048)
1384 		adapter->rx_mbuf_sz = MCLBYTES;
1385 	else if (adapter->hw.mac.max_frame_size <= 4096)
1386 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1387 	else
1388 		adapter->rx_mbuf_sz = MJUM9BYTES;
1389 
1390 	/* Prepare receive descriptors and buffers */
1391 	if (em_setup_receive_structures(adapter)) {
1392 		device_printf(dev, "Could not setup receive structures\n");
1393 		em_stop(adapter);
1394 		return;
1395 	}
1396 	em_initialize_receive_unit(adapter);
1397 
1398 	/* Use real VLAN Filter support? */
1399 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1400 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1401 			/* Use real VLAN Filter support */
1402 			em_setup_vlan_hw_support(adapter);
1403 		else {
1404 			u32 ctrl;
1405 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1406 			ctrl |= E1000_CTRL_VME;
1407 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1408 		}
1409 	}
1410 
1411 	/* Don't lose promiscuous settings */
1412 	em_set_promisc(adapter);
1413 
1414 	/* Set the interface as ACTIVE */
1415 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1416 
1417 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1418 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1419 
1420 	/* MSI/X configuration for 82574 */
1421 	if (adapter->hw.mac.type == e1000_82574) {
1422 		int tmp;
1423 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1424 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1425 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1426 		/* Set the IVAR - interrupt vector routing. */
1427 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1428 	}
1429 
1430 #ifdef DEVICE_POLLING
1431 	/*
1432 	 * Only enable interrupts if we are not polling, make sure
1433 	 * they are off otherwise.
1434 	 */
1435 	if (if_getcapenable(ifp) & IFCAP_POLLING)
1436 		em_disable_intr(adapter);
1437 	else
1438 #endif /* DEVICE_POLLING */
1439 		em_enable_intr(adapter);
1440 
1441 	/* AMT based hardware can now take control from firmware */
1442 	if (adapter->has_manage && adapter->has_amt)
1443 		em_get_hw_control(adapter);
1444 }
1445 
1446 static void
1447 em_init(void *arg)
1448 {
1449 	struct adapter *adapter = arg;
1450 
1451 	EM_CORE_LOCK(adapter);
1452 	em_init_locked(adapter);
1453 	EM_CORE_UNLOCK(adapter);
1454 }
1455 
1456 
1457 #ifdef DEVICE_POLLING
1458 /*********************************************************************
1459  *
1460  *  Legacy polling routine: note this only works with single queue
1461  *
1462  *********************************************************************/
1463 static int
1464 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1465 {
1466 	struct adapter *adapter = if_getsoftc(ifp);
1467 	struct tx_ring	*txr = adapter->tx_rings;
1468 	struct rx_ring	*rxr = adapter->rx_rings;
1469 	u32		reg_icr;
1470 	int		rx_done;
1471 
1472 	EM_CORE_LOCK(adapter);
1473 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1474 		EM_CORE_UNLOCK(adapter);
1475 		return (0);
1476 	}
1477 
1478 	if (cmd == POLL_AND_CHECK_STATUS) {
1479 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1480 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1481 			callout_stop(&adapter->timer);
1482 			adapter->hw.mac.get_link_status = 1;
1483 			em_update_link_status(adapter);
1484 			callout_reset(&adapter->timer, hz,
1485 			    em_local_timer, adapter);
1486 		}
1487 	}
1488 	EM_CORE_UNLOCK(adapter);
1489 
1490 	em_rxeof(rxr, count, &rx_done);
1491 
1492 	EM_TX_LOCK(txr);
1493 	em_txeof(txr);
1494 #ifdef EM_MULTIQUEUE
1495 	if (!drbr_empty(ifp, txr->br))
1496 		em_mq_start_locked(ifp, txr);
1497 #else
1498 	if (!if_sendq_empty(ifp))
1499 		em_start_locked(ifp, txr);
1500 #endif
1501 	EM_TX_UNLOCK(txr);
1502 
1503 	return (rx_done);
1504 }
1505 #endif /* DEVICE_POLLING */
1506 
1507 
1508 /*********************************************************************
1509  *
1510  *  Fast Legacy/MSI Combined Interrupt Service routine
1511  *
1512  *********************************************************************/
1513 static int
1514 em_irq_fast(void *arg)
1515 {
1516 	struct adapter	*adapter = arg;
1517 	if_t ifp;
1518 	u32		reg_icr;
1519 
1520 	ifp = adapter->ifp;
1521 
1522 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1523 
1524 	/* Hot eject?  */
1525 	if (reg_icr == 0xffffffff)
1526 		return FILTER_STRAY;
1527 
1528 	/* Definitely not our interrupt.  */
1529 	if (reg_icr == 0x0)
1530 		return FILTER_STRAY;
1531 
1532 	/*
1533 	 * Starting with the 82571 chip, bit 31 should be used to
1534 	 * determine whether the interrupt belongs to us.
1535 	 */
1536 	if (adapter->hw.mac.type >= e1000_82571 &&
1537 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1538 		return FILTER_STRAY;
1539 
1540 	em_disable_intr(adapter);
1541 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1542 
1543 	/* Link status change */
1544 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1545 		adapter->hw.mac.get_link_status = 1;
1546 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1547 	}
1548 
1549 	if (reg_icr & E1000_ICR_RXO)
1550 		adapter->rx_overruns++;
1551 	return FILTER_HANDLED;
1552 }
1553 
1554 /* Combined RX/TX handler, used by Legacy and MSI */
1555 static void
1556 em_handle_que(void *context, int pending)
1557 {
1558 	struct adapter	*adapter = context;
1559 	if_t ifp = adapter->ifp;
1560 	struct tx_ring	*txr = adapter->tx_rings;
1561 	struct rx_ring	*rxr = adapter->rx_rings;
1562 
1563 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1564 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1565 
1566 		EM_TX_LOCK(txr);
1567 		em_txeof(txr);
1568 #ifdef EM_MULTIQUEUE
1569 		if (!drbr_empty(ifp, txr->br))
1570 			em_mq_start_locked(ifp, txr);
1571 #else
1572 		if (!if_sendq_empty(ifp))
1573 			em_start_locked(ifp, txr);
1574 #endif
1575 		EM_TX_UNLOCK(txr);
1576 		if (more) {
1577 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1578 			return;
1579 		}
1580 	}
1581 
1582 	em_enable_intr(adapter);
1583 	return;
1584 }
1585 
1586 
1587 /*********************************************************************
1588  *
1589  *  MSIX Interrupt Service Routines
1590  *
1591  **********************************************************************/
1592 static void
1593 em_msix_tx(void *arg)
1594 {
1595 	struct tx_ring *txr = arg;
1596 	struct adapter *adapter = txr->adapter;
1597 	if_t ifp = adapter->ifp;
1598 
1599 	++txr->tx_irq;
1600 	EM_TX_LOCK(txr);
1601 	em_txeof(txr);
1602 #ifdef EM_MULTIQUEUE
1603 	if (!drbr_empty(ifp, txr->br))
1604 		em_mq_start_locked(ifp, txr);
1605 #else
1606 	if (!if_sendq_empty(ifp))
1607 		em_start_locked(ifp, txr);
1608 #endif
1609 
1610 	/* Reenable this interrupt */
1611 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1612 	EM_TX_UNLOCK(txr);
1613 	return;
1614 }
1615 
1616 /*********************************************************************
1617  *
1618  *  MSIX RX Interrupt Service routine
1619  *
1620  **********************************************************************/
1621 
1622 static void
1623 em_msix_rx(void *arg)
1624 {
1625 	struct rx_ring	*rxr = arg;
1626 	struct adapter	*adapter = rxr->adapter;
1627 	bool		more;
1628 
1629 	++rxr->rx_irq;
1630 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1631 		return;
1632 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633 	if (more)
1634 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635 	else {
1636 		/* Reenable this interrupt */
1637 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638 	}
1639 	return;
1640 }
1641 
1642 /*********************************************************************
1643  *
1644  *  MSIX Link Fast Interrupt Service routine
1645  *
1646  **********************************************************************/
1647 static void
1648 em_msix_link(void *arg)
1649 {
1650 	struct adapter	*adapter = arg;
1651 	u32		reg_icr;
1652 
1653 	++adapter->link_irq;
1654 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1655 
1656 	if (reg_icr & E1000_ICR_RXO)
1657 		adapter->rx_overruns++;
1658 
1659 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1660 		adapter->hw.mac.get_link_status = 1;
1661 		em_handle_link(adapter, 0);
1662 	} else
1663 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1664 		    EM_MSIX_LINK | E1000_IMS_LSC);
1665 	/*
1666  	** Because we must read the ICR for this interrupt
1667  	** it may clear other causes using autoclear, for
1668  	** this reason we simply create a soft interrupt
1669  	** for all these vectors.
1670  	*/
1671 	if (reg_icr) {
1672 		E1000_WRITE_REG(&adapter->hw,
1673 			E1000_ICS, adapter->ims);
1674 	}
1675 	return;
1676 }
1677 
1678 static void
1679 em_handle_rx(void *context, int pending)
1680 {
1681 	struct rx_ring	*rxr = context;
1682 	struct adapter	*adapter = rxr->adapter;
1683         bool            more;
1684 
1685 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1686 	if (more)
1687 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1688 	else {
1689 		/* Reenable this interrupt */
1690 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1691 	}
1692 }
1693 
1694 static void
1695 em_handle_tx(void *context, int pending)
1696 {
1697 	struct tx_ring	*txr = context;
1698 	struct adapter	*adapter = txr->adapter;
1699 	if_t ifp = adapter->ifp;
1700 
1701 	EM_TX_LOCK(txr);
1702 	em_txeof(txr);
1703 #ifdef EM_MULTIQUEUE
1704 	if (!drbr_empty(ifp, txr->br))
1705 		em_mq_start_locked(ifp, txr);
1706 #else
1707 	if (!if_sendq_empty(ifp))
1708 		em_start_locked(ifp, txr);
1709 #endif
1710 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1711 	EM_TX_UNLOCK(txr);
1712 }
1713 
1714 static void
1715 em_handle_link(void *context, int pending)
1716 {
1717 	struct adapter	*adapter = context;
1718 	struct tx_ring	*txr = adapter->tx_rings;
1719 	if_t ifp = adapter->ifp;
1720 
1721 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1722 		return;
1723 
1724 	EM_CORE_LOCK(adapter);
1725 	callout_stop(&adapter->timer);
1726 	em_update_link_status(adapter);
1727 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1728 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1729 	    EM_MSIX_LINK | E1000_IMS_LSC);
1730 	if (adapter->link_active) {
1731 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1732 			EM_TX_LOCK(txr);
1733 #ifdef EM_MULTIQUEUE
1734 			if (!drbr_empty(ifp, txr->br))
1735 				em_mq_start_locked(ifp, txr);
1736 #else
1737 			if (if_sendq_empty(ifp))
1738 				em_start_locked(ifp, txr);
1739 #endif
1740 			EM_TX_UNLOCK(txr);
1741 		}
1742 	}
1743 	EM_CORE_UNLOCK(adapter);
1744 }
1745 
1746 
1747 /*********************************************************************
1748  *
1749  *  Media Ioctl callback
1750  *
1751  *  This routine is called whenever the user queries the status of
1752  *  the interface using ifconfig.
1753  *
1754  **********************************************************************/
1755 static void
1756 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1757 {
1758 	struct adapter *adapter = if_getsoftc(ifp);
1759 	u_char fiber_type = IFM_1000_SX;
1760 
1761 	INIT_DEBUGOUT("em_media_status: begin");
1762 
1763 	EM_CORE_LOCK(adapter);
1764 	em_update_link_status(adapter);
1765 
1766 	ifmr->ifm_status = IFM_AVALID;
1767 	ifmr->ifm_active = IFM_ETHER;
1768 
1769 	if (!adapter->link_active) {
1770 		EM_CORE_UNLOCK(adapter);
1771 		return;
1772 	}
1773 
1774 	ifmr->ifm_status |= IFM_ACTIVE;
1775 
1776 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1777 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1778 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1779 	} else {
1780 		switch (adapter->link_speed) {
1781 		case 10:
1782 			ifmr->ifm_active |= IFM_10_T;
1783 			break;
1784 		case 100:
1785 			ifmr->ifm_active |= IFM_100_TX;
1786 			break;
1787 		case 1000:
1788 			ifmr->ifm_active |= IFM_1000_T;
1789 			break;
1790 		}
1791 		if (adapter->link_duplex == FULL_DUPLEX)
1792 			ifmr->ifm_active |= IFM_FDX;
1793 		else
1794 			ifmr->ifm_active |= IFM_HDX;
1795 	}
1796 	EM_CORE_UNLOCK(adapter);
1797 }
1798 
1799 /*********************************************************************
1800  *
1801  *  Media Ioctl callback
1802  *
1803  *  This routine is called when the user changes speed/duplex using
1804  *  media/mediopt option with ifconfig.
1805  *
1806  **********************************************************************/
1807 static int
1808 em_media_change(if_t ifp)
1809 {
1810 	struct adapter *adapter = if_getsoftc(ifp);
1811 	struct ifmedia  *ifm = &adapter->media;
1812 
1813 	INIT_DEBUGOUT("em_media_change: begin");
1814 
1815 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1816 		return (EINVAL);
1817 
1818 	EM_CORE_LOCK(adapter);
1819 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1820 	case IFM_AUTO:
1821 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1822 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1823 		break;
1824 	case IFM_1000_LX:
1825 	case IFM_1000_SX:
1826 	case IFM_1000_T:
1827 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1828 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1829 		break;
1830 	case IFM_100_TX:
1831 		adapter->hw.mac.autoneg = FALSE;
1832 		adapter->hw.phy.autoneg_advertised = 0;
1833 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1834 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1835 		else
1836 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1837 		break;
1838 	case IFM_10_T:
1839 		adapter->hw.mac.autoneg = FALSE;
1840 		adapter->hw.phy.autoneg_advertised = 0;
1841 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1842 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1843 		else
1844 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1845 		break;
1846 	default:
1847 		device_printf(adapter->dev, "Unsupported media type\n");
1848 	}
1849 
1850 	em_init_locked(adapter);
1851 	EM_CORE_UNLOCK(adapter);
1852 
1853 	return (0);
1854 }
1855 
1856 /*********************************************************************
1857  *
1858  *  This routine maps the mbufs to tx descriptors.
1859  *
1860  *  return 0 on success, positive on failure
1861  **********************************************************************/
1862 
1863 static int
1864 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1865 {
1866 	struct adapter		*adapter = txr->adapter;
1867 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1868 	bus_dmamap_t		map;
1869 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1870 	struct e1000_tx_desc	*ctxd = NULL;
1871 	struct mbuf		*m_head;
1872 	struct ether_header	*eh;
1873 	struct ip		*ip = NULL;
1874 	struct tcphdr		*tp = NULL;
1875 	u32			txd_upper = 0, txd_lower = 0, txd_used = 0;
1876 	int			ip_off, poff;
1877 	int			nsegs, i, j, first, last = 0;
1878 	int			error, do_tso, tso_desc = 0, remap = 1;
1879 
1880 	m_head = *m_headp;
1881 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1882 	ip_off = poff = 0;
1883 
1884 	/*
1885 	 * Intel recommends entire IP/TCP header length reside in a single
1886 	 * buffer. If multiple descriptors are used to describe the IP and
1887 	 * TCP header, each descriptor should describe one or more
1888 	 * complete headers; descriptors referencing only parts of headers
1889 	 * are not supported. If all layer headers are not coalesced into
1890 	 * a single buffer, each buffer should not cross a 4KB boundary,
1891 	 * or be larger than the maximum read request size.
1892 	 * Controller also requires modifing IP/TCP header to make TSO work
1893 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1894 	 * IP/TCP header into a single buffer to meet the requirement of
1895 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1896 	 * which also has similiar restrictions.
1897 	 */
1898 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1899 		if (do_tso || (m_head->m_next != NULL &&
1900 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1901 			if (M_WRITABLE(*m_headp) == 0) {
1902 				m_head = m_dup(*m_headp, M_NOWAIT);
1903 				m_freem(*m_headp);
1904 				if (m_head == NULL) {
1905 					*m_headp = NULL;
1906 					return (ENOBUFS);
1907 				}
1908 				*m_headp = m_head;
1909 			}
1910 		}
1911 		/*
1912 		 * XXX
1913 		 * Assume IPv4, we don't have TSO/checksum offload support
1914 		 * for IPv6 yet.
1915 		 */
1916 		ip_off = sizeof(struct ether_header);
1917 		m_head = m_pullup(m_head, ip_off);
1918 		if (m_head == NULL) {
1919 			*m_headp = NULL;
1920 			return (ENOBUFS);
1921 		}
1922 		eh = mtod(m_head, struct ether_header *);
1923 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1924 			ip_off = sizeof(struct ether_vlan_header);
1925 			m_head = m_pullup(m_head, ip_off);
1926 			if (m_head == NULL) {
1927 				*m_headp = NULL;
1928 				return (ENOBUFS);
1929 			}
1930 		}
1931 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1932 		if (m_head == NULL) {
1933 			*m_headp = NULL;
1934 			return (ENOBUFS);
1935 		}
1936 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1937 		poff = ip_off + (ip->ip_hl << 2);
1938 		if (do_tso) {
1939 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1940 			if (m_head == NULL) {
1941 				*m_headp = NULL;
1942 				return (ENOBUFS);
1943 			}
1944 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1945 			/*
1946 			 * TSO workaround:
1947 			 *   pull 4 more bytes of data into it.
1948 			 */
1949 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1950 			if (m_head == NULL) {
1951 				*m_headp = NULL;
1952 				return (ENOBUFS);
1953 			}
1954 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1955 			ip->ip_len = 0;
1956 			ip->ip_sum = 0;
1957 			/*
1958 			 * The pseudo TCP checksum does not include TCP payload
1959 			 * length so driver should recompute the checksum here
1960 			 * what hardware expect to see. This is adherence of
1961 			 * Microsoft's Large Send specification.
1962 			 */
1963 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1964 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1965 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1966 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1967 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1968 			if (m_head == NULL) {
1969 				*m_headp = NULL;
1970 				return (ENOBUFS);
1971 			}
1972 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1973 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1974 			if (m_head == NULL) {
1975 				*m_headp = NULL;
1976 				return (ENOBUFS);
1977 			}
1978 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1979 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1980 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1981 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1982 			if (m_head == NULL) {
1983 				*m_headp = NULL;
1984 				return (ENOBUFS);
1985 			}
1986 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1987 		}
1988 		*m_headp = m_head;
1989 	}
1990 
1991 	/*
1992 	 * Map the packet for DMA
1993 	 *
1994 	 * Capture the first descriptor index,
1995 	 * this descriptor will have the index
1996 	 * of the EOP which is the only one that
1997 	 * now gets a DONE bit writeback.
1998 	 */
1999 	first = txr->next_avail_desc;
2000 	tx_buffer = &txr->tx_buffers[first];
2001 	tx_buffer_mapped = tx_buffer;
2002 	map = tx_buffer->map;
2003 
2004 retry:
2005 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2006 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2007 
2008 	/*
2009 	 * There are two types of errors we can (try) to handle:
2010 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2011 	 *   out of segments.  Defragment the mbuf chain and try again.
2012 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2013 	 *   at this point in time.  Defer sending and try again later.
2014 	 * All other errors, in particular EINVAL, are fatal and prevent the
2015 	 * mbuf chain from ever going through.  Drop it and report error.
2016 	 */
2017 	if (error == EFBIG && remap) {
2018 		struct mbuf *m;
2019 
2020 		m = m_defrag(*m_headp, M_NOWAIT);
2021 		if (m == NULL) {
2022 			adapter->mbuf_alloc_failed++;
2023 			m_freem(*m_headp);
2024 			*m_headp = NULL;
2025 			return (ENOBUFS);
2026 		}
2027 		*m_headp = m;
2028 
2029 		/* Try it again, but only once */
2030 		remap = 0;
2031 		goto retry;
2032 	} else if (error == ENOMEM) {
2033 		adapter->no_tx_dma_setup++;
2034 		return (error);
2035 	} else if (error != 0) {
2036 		adapter->no_tx_dma_setup++;
2037 		m_freem(*m_headp);
2038 		*m_headp = NULL;
2039 		return (error);
2040 	}
2041 
2042 	/*
2043 	 * TSO Hardware workaround, if this packet is not
2044 	 * TSO, and is only a single descriptor long, and
2045 	 * it follows a TSO burst, then we need to add a
2046 	 * sentinel descriptor to prevent premature writeback.
2047 	 */
2048 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
2049 		if (nsegs == 1)
2050 			tso_desc = TRUE;
2051 		txr->tx_tso = FALSE;
2052 	}
2053 
2054         if (nsegs > (txr->tx_avail - 2)) {
2055                 txr->no_desc_avail++;
2056 		bus_dmamap_unload(txr->txtag, map);
2057 		return (ENOBUFS);
2058         }
2059 	m_head = *m_headp;
2060 
2061 	/* Do hardware assists */
2062 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2063 		em_tso_setup(txr, m_head, ip_off, ip, tp,
2064 		    &txd_upper, &txd_lower);
2065 		/* we need to make a final sentinel transmit desc */
2066 		tso_desc = TRUE;
2067 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2068 		em_transmit_checksum_setup(txr, m_head,
2069 		    ip_off, ip, &txd_upper, &txd_lower);
2070 
2071 	if (m_head->m_flags & M_VLANTAG) {
2072 		/* Set the vlan id. */
2073 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2074                 /* Tell hardware to add tag */
2075                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2076         }
2077 
2078 	i = txr->next_avail_desc;
2079 
2080 	/* Set up our transmit descriptors */
2081 	for (j = 0; j < nsegs; j++) {
2082 		bus_size_t seg_len;
2083 		bus_addr_t seg_addr;
2084 
2085 		tx_buffer = &txr->tx_buffers[i];
2086 		ctxd = &txr->tx_base[i];
2087 		seg_addr = segs[j].ds_addr;
2088 		seg_len  = segs[j].ds_len;
2089 		/*
2090 		** TSO Workaround:
2091 		** If this is the last descriptor, we want to
2092 		** split it so we have a small final sentinel
2093 		*/
2094 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2095 			seg_len -= 4;
2096 			ctxd->buffer_addr = htole64(seg_addr);
2097 			ctxd->lower.data = htole32(
2098 			adapter->txd_cmd | txd_lower | seg_len);
2099 			ctxd->upper.data =
2100 			    htole32(txd_upper);
2101 			if (++i == adapter->num_tx_desc)
2102 				i = 0;
2103 			/* Now make the sentinel */
2104 			++txd_used; /* using an extra txd */
2105 			ctxd = &txr->tx_base[i];
2106 			tx_buffer = &txr->tx_buffers[i];
2107 			ctxd->buffer_addr =
2108 			    htole64(seg_addr + seg_len);
2109 			ctxd->lower.data = htole32(
2110 			adapter->txd_cmd | txd_lower | 4);
2111 			ctxd->upper.data =
2112 			    htole32(txd_upper);
2113 			last = i;
2114 			if (++i == adapter->num_tx_desc)
2115 				i = 0;
2116 		} else {
2117 			ctxd->buffer_addr = htole64(seg_addr);
2118 			ctxd->lower.data = htole32(
2119 			adapter->txd_cmd | txd_lower | seg_len);
2120 			ctxd->upper.data =
2121 			    htole32(txd_upper);
2122 			last = i;
2123 			if (++i == adapter->num_tx_desc)
2124 				i = 0;
2125 		}
2126 		tx_buffer->m_head = NULL;
2127 		tx_buffer->next_eop = -1;
2128 	}
2129 
2130 	txr->next_avail_desc = i;
2131 	txr->tx_avail -= nsegs;
2132 	if (tso_desc) /* TSO used an extra for sentinel */
2133 		txr->tx_avail -= txd_used;
2134 
2135         tx_buffer->m_head = m_head;
2136 	/*
2137 	** Here we swap the map so the last descriptor,
2138 	** which gets the completion interrupt has the
2139 	** real map, and the first descriptor gets the
2140 	** unused map from this descriptor.
2141 	*/
2142 	tx_buffer_mapped->map = tx_buffer->map;
2143 	tx_buffer->map = map;
2144         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2145 
2146         /*
2147          * Last Descriptor of Packet
2148 	 * needs End Of Packet (EOP)
2149 	 * and Report Status (RS)
2150          */
2151         ctxd->lower.data |=
2152 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2153 	/*
2154 	 * Keep track in the first buffer which
2155 	 * descriptor will be written back
2156 	 */
2157 	tx_buffer = &txr->tx_buffers[first];
2158 	tx_buffer->next_eop = last;
2159 
2160 	/*
2161 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2162 	 * that this frame is available to transmit.
2163 	 */
2164 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2165 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2166 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2167 
2168 	return (0);
2169 }
2170 
2171 static void
2172 em_set_promisc(struct adapter *adapter)
2173 {
2174 	if_t ifp = adapter->ifp;
2175 	u32		reg_rctl;
2176 
2177 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2178 
2179 	if (if_getflags(ifp) & IFF_PROMISC) {
2180 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2181 		/* Turn this on if you want to see bad packets */
2182 		if (em_debug_sbp)
2183 			reg_rctl |= E1000_RCTL_SBP;
2184 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2185 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2186 		reg_rctl |= E1000_RCTL_MPE;
2187 		reg_rctl &= ~E1000_RCTL_UPE;
2188 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2189 	}
2190 }
2191 
2192 static void
2193 em_disable_promisc(struct adapter *adapter)
2194 {
2195 	if_t		ifp = adapter->ifp;
2196 	u32		reg_rctl;
2197 	int		mcnt = 0;
2198 
2199 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2200 	reg_rctl &=  (~E1000_RCTL_UPE);
2201 	if (if_getflags(ifp) & IFF_ALLMULTI)
2202 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2203 	else
2204 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2205 	/* Don't disable if in MAX groups */
2206 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2207 		reg_rctl &=  (~E1000_RCTL_MPE);
2208 	reg_rctl &=  (~E1000_RCTL_SBP);
2209 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2210 }
2211 
2212 
2213 /*********************************************************************
2214  *  Multicast Update
2215  *
2216  *  This routine is called whenever multicast address list is updated.
2217  *
2218  **********************************************************************/
2219 
2220 static void
2221 em_set_multi(struct adapter *adapter)
2222 {
2223 	if_t ifp = adapter->ifp;
2224 	u32 reg_rctl = 0;
2225 	u8  *mta; /* Multicast array memory */
2226 	int mcnt = 0;
2227 
2228 	IOCTL_DEBUGOUT("em_set_multi: begin");
2229 
2230 	mta = adapter->mta;
2231 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2232 
2233 	if (adapter->hw.mac.type == e1000_82542 &&
2234 	    adapter->hw.revision_id == E1000_REVISION_2) {
2235 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2236 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2237 			e1000_pci_clear_mwi(&adapter->hw);
2238 		reg_rctl |= E1000_RCTL_RST;
2239 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2240 		msec_delay(5);
2241 	}
2242 
2243 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2244 
2245 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2246 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2247 		reg_rctl |= E1000_RCTL_MPE;
2248 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2249 	} else
2250 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2251 
2252 	if (adapter->hw.mac.type == e1000_82542 &&
2253 	    adapter->hw.revision_id == E1000_REVISION_2) {
2254 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2255 		reg_rctl &= ~E1000_RCTL_RST;
2256 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2257 		msec_delay(5);
2258 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2259 			e1000_pci_set_mwi(&adapter->hw);
2260 	}
2261 }
2262 
2263 
2264 /*********************************************************************
2265  *  Timer routine
2266  *
2267  *  This routine checks for link status and updates statistics.
2268  *
2269  **********************************************************************/
2270 
2271 static void
2272 em_local_timer(void *arg)
2273 {
2274 	struct adapter	*adapter = arg;
2275 	if_t ifp = adapter->ifp;
2276 	struct tx_ring	*txr = adapter->tx_rings;
2277 	struct rx_ring	*rxr = adapter->rx_rings;
2278 	u32		trigger = 0;
2279 
2280 	EM_CORE_LOCK_ASSERT(adapter);
2281 
2282 	em_update_link_status(adapter);
2283 	em_update_stats_counters(adapter);
2284 
2285 	/* Reset LAA into RAR[0] on 82571 */
2286 	if ((adapter->hw.mac.type == e1000_82571) &&
2287 	    e1000_get_laa_state_82571(&adapter->hw))
2288 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2289 
2290 	/* Mask to use in the irq trigger */
2291 	if (adapter->msix_mem) {
2292 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2293 			trigger |= rxr->ims;
2294 		rxr = adapter->rx_rings;
2295 	} else
2296 		trigger = E1000_ICS_RXDMT0;
2297 
2298 	/*
2299 	** Check on the state of the TX queue(s), this
2300 	** can be done without the lock because its RO
2301 	** and the HUNG state will be static if set.
2302 	*/
2303 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2304 		if (txr->busy == EM_TX_HUNG)
2305 			goto hung;
2306 		if (txr->busy >= EM_TX_MAXTRIES)
2307 			txr->busy = EM_TX_HUNG;
2308 		/* Schedule a TX tasklet if needed */
2309 		if (txr->tx_avail <= EM_MAX_SCATTER)
2310 			taskqueue_enqueue(txr->tq, &txr->tx_task);
2311 	}
2312 
2313 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2314 #ifndef DEVICE_POLLING
2315 	/* Trigger an RX interrupt to guarantee mbuf refresh */
2316 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2317 #endif
2318 	return;
2319 hung:
2320 	/* Looks like we're hung */
2321 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2322 			txr->me);
2323 	em_print_debug_info(adapter);
2324 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2325 	adapter->watchdog_events++;
2326 	em_init_locked(adapter);
2327 }
2328 
2329 
2330 static void
2331 em_update_link_status(struct adapter *adapter)
2332 {
2333 	struct e1000_hw *hw = &adapter->hw;
2334 	if_t ifp = adapter->ifp;
2335 	device_t dev = adapter->dev;
2336 	struct tx_ring *txr = adapter->tx_rings;
2337 	u32 link_check = 0;
2338 
2339 	/* Get the cached link value or read phy for real */
2340 	switch (hw->phy.media_type) {
2341 	case e1000_media_type_copper:
2342 		if (hw->mac.get_link_status) {
2343 			/* Do the work to read phy */
2344 			e1000_check_for_link(hw);
2345 			link_check = !hw->mac.get_link_status;
2346 			if (link_check) /* ESB2 fix */
2347 				e1000_cfg_on_link_up(hw);
2348 		} else
2349 			link_check = TRUE;
2350 		break;
2351 	case e1000_media_type_fiber:
2352 		e1000_check_for_link(hw);
2353 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2354                                  E1000_STATUS_LU);
2355 		break;
2356 	case e1000_media_type_internal_serdes:
2357 		e1000_check_for_link(hw);
2358 		link_check = adapter->hw.mac.serdes_has_link;
2359 		break;
2360 	default:
2361 	case e1000_media_type_unknown:
2362 		break;
2363 	}
2364 
2365 	/* Now check for a transition */
2366 	if (link_check && (adapter->link_active == 0)) {
2367 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2368 		    &adapter->link_duplex);
2369 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2370 		if ((adapter->link_speed != SPEED_1000) &&
2371 		    ((hw->mac.type == e1000_82571) ||
2372 		    (hw->mac.type == e1000_82572))) {
2373 			int tarc0;
2374 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2375 			tarc0 &= ~TARC_SPEED_MODE_BIT;
2376 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2377 		}
2378 		if (bootverbose)
2379 			device_printf(dev, "Link is up %d Mbps %s\n",
2380 			    adapter->link_speed,
2381 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2382 			    "Full Duplex" : "Half Duplex"));
2383 		adapter->link_active = 1;
2384 		adapter->smartspeed = 0;
2385 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2386 		if_link_state_change(ifp, LINK_STATE_UP);
2387 	} else if (!link_check && (adapter->link_active == 1)) {
2388 		if_setbaudrate(ifp, 0);
2389 		adapter->link_speed = 0;
2390 		adapter->link_duplex = 0;
2391 		if (bootverbose)
2392 			device_printf(dev, "Link is Down\n");
2393 		adapter->link_active = 0;
2394 		/* Link down, disable hang detection */
2395 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2396 			txr->busy = EM_TX_IDLE;
2397 		if_link_state_change(ifp, LINK_STATE_DOWN);
2398 	}
2399 }
2400 
2401 /*********************************************************************
2402  *
2403  *  This routine disables all traffic on the adapter by issuing a
2404  *  global reset on the MAC and deallocates TX/RX buffers.
2405  *
2406  *  This routine should always be called with BOTH the CORE
2407  *  and TX locks.
2408  **********************************************************************/
2409 
2410 static void
2411 em_stop(void *arg)
2412 {
2413 	struct adapter	*adapter = arg;
2414 	if_t ifp = adapter->ifp;
2415 	struct tx_ring	*txr = adapter->tx_rings;
2416 
2417 	EM_CORE_LOCK_ASSERT(adapter);
2418 
2419 	INIT_DEBUGOUT("em_stop: begin");
2420 
2421 	em_disable_intr(adapter);
2422 	callout_stop(&adapter->timer);
2423 
2424 	/* Tell the stack that the interface is no longer active */
2425 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2426 
2427         /* Disarm Hang Detection. */
2428 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2429 		EM_TX_LOCK(txr);
2430 		txr->busy = EM_TX_IDLE;
2431 		EM_TX_UNLOCK(txr);
2432 	}
2433 
2434 	e1000_reset_hw(&adapter->hw);
2435 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2436 
2437 	e1000_led_off(&adapter->hw);
2438 	e1000_cleanup_led(&adapter->hw);
2439 }
2440 
2441 
2442 /*********************************************************************
2443  *
2444  *  Determine hardware revision.
2445  *
2446  **********************************************************************/
2447 static void
2448 em_identify_hardware(struct adapter *adapter)
2449 {
2450 	device_t dev = adapter->dev;
2451 
2452 	/* Make sure our PCI config space has the necessary stuff set */
2453 	pci_enable_busmaster(dev);
2454 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2455 
2456 	/* Save off the information about this board */
2457 	adapter->hw.vendor_id = pci_get_vendor(dev);
2458 	adapter->hw.device_id = pci_get_device(dev);
2459 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2460 	adapter->hw.subsystem_vendor_id =
2461 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2462 	adapter->hw.subsystem_device_id =
2463 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2464 
2465 	/* Do Shared Code Init and Setup */
2466 	if (e1000_set_mac_type(&adapter->hw)) {
2467 		device_printf(dev, "Setup init failure\n");
2468 		return;
2469 	}
2470 }
2471 
2472 static int
2473 em_allocate_pci_resources(struct adapter *adapter)
2474 {
2475 	device_t	dev = adapter->dev;
2476 	int		rid;
2477 
2478 	rid = PCIR_BAR(0);
2479 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2480 	    &rid, RF_ACTIVE);
2481 	if (adapter->memory == NULL) {
2482 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2483 		return (ENXIO);
2484 	}
2485 	adapter->osdep.mem_bus_space_tag =
2486 	    rman_get_bustag(adapter->memory);
2487 	adapter->osdep.mem_bus_space_handle =
2488 	    rman_get_bushandle(adapter->memory);
2489 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2490 
2491 	adapter->hw.back = &adapter->osdep;
2492 
2493 	return (0);
2494 }
2495 
2496 /*********************************************************************
2497  *
2498  *  Setup the Legacy or MSI Interrupt handler
2499  *
2500  **********************************************************************/
2501 int
2502 em_allocate_legacy(struct adapter *adapter)
2503 {
2504 	device_t dev = adapter->dev;
2505 	struct tx_ring	*txr = adapter->tx_rings;
2506 	int error, rid = 0;
2507 
2508 	/* Manually turn off all interrupts */
2509 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2510 
2511 	if (adapter->msix == 1) /* using MSI */
2512 		rid = 1;
2513 	/* We allocate a single interrupt resource */
2514 	adapter->res = bus_alloc_resource_any(dev,
2515 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2516 	if (adapter->res == NULL) {
2517 		device_printf(dev, "Unable to allocate bus resource: "
2518 		    "interrupt\n");
2519 		return (ENXIO);
2520 	}
2521 
2522 	/*
2523 	 * Allocate a fast interrupt and the associated
2524 	 * deferred processing contexts.
2525 	 */
2526 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2527 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2528 	    taskqueue_thread_enqueue, &adapter->tq);
2529 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2530 	    device_get_nameunit(adapter->dev));
2531 	/* Use a TX only tasklet for local timer */
2532 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2533 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2534 	    taskqueue_thread_enqueue, &txr->tq);
2535 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2536 	    device_get_nameunit(adapter->dev));
2537 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2538 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2539 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2540 		device_printf(dev, "Failed to register fast interrupt "
2541 			    "handler: %d\n", error);
2542 		taskqueue_free(adapter->tq);
2543 		adapter->tq = NULL;
2544 		return (error);
2545 	}
2546 
2547 	return (0);
2548 }
2549 
2550 /*********************************************************************
2551  *
2552  *  Setup the MSIX Interrupt handlers
2553  *   This is not really Multiqueue, rather
2554  *   its just seperate interrupt vectors
2555  *   for TX, RX, and Link.
2556  *
2557  **********************************************************************/
2558 int
2559 em_allocate_msix(struct adapter *adapter)
2560 {
2561 	device_t	dev = adapter->dev;
2562 	struct		tx_ring *txr = adapter->tx_rings;
2563 	struct		rx_ring *rxr = adapter->rx_rings;
2564 	int		error, rid, vector = 0;
2565 	int		cpu_id = 0;
2566 
2567 
2568 	/* Make sure all interrupts are disabled */
2569 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2570 
2571 	/* First set up ring resources */
2572 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2573 
2574 		/* RX ring */
2575 		rid = vector + 1;
2576 
2577 		rxr->res = bus_alloc_resource_any(dev,
2578 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2579 		if (rxr->res == NULL) {
2580 			device_printf(dev,
2581 			    "Unable to allocate bus resource: "
2582 			    "RX MSIX Interrupt %d\n", i);
2583 			return (ENXIO);
2584 		}
2585 		if ((error = bus_setup_intr(dev, rxr->res,
2586 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2587 		    rxr, &rxr->tag)) != 0) {
2588 			device_printf(dev, "Failed to register RX handler");
2589 			return (error);
2590 		}
2591 #if __FreeBSD_version >= 800504
2592 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2593 #endif
2594 		rxr->msix = vector;
2595 
2596 		if (em_last_bind_cpu < 0)
2597 			em_last_bind_cpu = CPU_FIRST();
2598 		cpu_id = em_last_bind_cpu;
2599 		bus_bind_intr(dev, rxr->res, cpu_id);
2600 
2601 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2602 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2603 		    taskqueue_thread_enqueue, &rxr->tq);
2604 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2605 		    device_get_nameunit(adapter->dev), cpu_id);
2606 		/*
2607 		** Set the bit to enable interrupt
2608 		** in E1000_IMS -- bits 20 and 21
2609 		** are for RX0 and RX1, note this has
2610 		** NOTHING to do with the MSIX vector
2611 		*/
2612 		rxr->ims = 1 << (20 + i);
2613 		adapter->ims |= rxr->ims;
2614 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2615 
2616 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2617 	}
2618 
2619 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2620 		/* TX ring */
2621 		rid = vector + 1;
2622 		txr->res = bus_alloc_resource_any(dev,
2623 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2624 		if (txr->res == NULL) {
2625 			device_printf(dev,
2626 			    "Unable to allocate bus resource: "
2627 			    "TX MSIX Interrupt %d\n", i);
2628 			return (ENXIO);
2629 		}
2630 		if ((error = bus_setup_intr(dev, txr->res,
2631 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2632 		    txr, &txr->tag)) != 0) {
2633 			device_printf(dev, "Failed to register TX handler");
2634 			return (error);
2635 		}
2636 #if __FreeBSD_version >= 800504
2637 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2638 #endif
2639 		txr->msix = vector;
2640 
2641                 if (em_last_bind_cpu < 0)
2642                         em_last_bind_cpu = CPU_FIRST();
2643                 cpu_id = em_last_bind_cpu;
2644                 bus_bind_intr(dev, txr->res, cpu_id);
2645 
2646 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2647 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2648 		    taskqueue_thread_enqueue, &txr->tq);
2649 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2650 		    device_get_nameunit(adapter->dev), cpu_id);
2651 		/*
2652 		** Set the bit to enable interrupt
2653 		** in E1000_IMS -- bits 22 and 23
2654 		** are for TX0 and TX1, note this has
2655 		** NOTHING to do with the MSIX vector
2656 		*/
2657 		txr->ims = 1 << (22 + i);
2658 		adapter->ims |= txr->ims;
2659 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2660 
2661 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2662 	}
2663 
2664 	/* Link interrupt */
2665 	rid = vector + 1;
2666 	adapter->res = bus_alloc_resource_any(dev,
2667 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2668 	if (!adapter->res) {
2669 		device_printf(dev,"Unable to allocate "
2670 		    "bus resource: Link interrupt [%d]\n", rid);
2671 		return (ENXIO);
2672         }
2673 	/* Set the link handler function */
2674 	error = bus_setup_intr(dev, adapter->res,
2675 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2676 	    em_msix_link, adapter, &adapter->tag);
2677 	if (error) {
2678 		adapter->res = NULL;
2679 		device_printf(dev, "Failed to register LINK handler");
2680 		return (error);
2681 	}
2682 #if __FreeBSD_version >= 800504
2683 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2684 #endif
2685 	adapter->linkvec = vector;
2686 	adapter->ivars |=  (8 | vector) << 16;
2687 	adapter->ivars |= 0x80000000;
2688 
2689 	return (0);
2690 }
2691 
2692 
2693 static void
2694 em_free_pci_resources(struct adapter *adapter)
2695 {
2696 	device_t	dev = adapter->dev;
2697 	struct tx_ring	*txr;
2698 	struct rx_ring	*rxr;
2699 	int		rid;
2700 
2701 
2702 	/*
2703 	** Release all the queue interrupt resources:
2704 	*/
2705 	for (int i = 0; i < adapter->num_queues; i++) {
2706 		txr = &adapter->tx_rings[i];
2707 		/* an early abort? */
2708 		if (txr == NULL)
2709 			break;
2710 		rid = txr->msix +1;
2711 		if (txr->tag != NULL) {
2712 			bus_teardown_intr(dev, txr->res, txr->tag);
2713 			txr->tag = NULL;
2714 		}
2715 		if (txr->res != NULL)
2716 			bus_release_resource(dev, SYS_RES_IRQ,
2717 			    rid, txr->res);
2718 
2719 		rxr = &adapter->rx_rings[i];
2720 		/* an early abort? */
2721 		if (rxr == NULL)
2722 			break;
2723 		rid = rxr->msix +1;
2724 		if (rxr->tag != NULL) {
2725 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2726 			rxr->tag = NULL;
2727 		}
2728 		if (rxr->res != NULL)
2729 			bus_release_resource(dev, SYS_RES_IRQ,
2730 			    rid, rxr->res);
2731 	}
2732 
2733         if (adapter->linkvec) /* we are doing MSIX */
2734                 rid = adapter->linkvec + 1;
2735         else
2736                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2737 
2738 	if (adapter->tag != NULL) {
2739 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2740 		adapter->tag = NULL;
2741 	}
2742 
2743 	if (adapter->res != NULL)
2744 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2745 
2746 
2747 	if (adapter->msix)
2748 		pci_release_msi(dev);
2749 
2750 	if (adapter->msix_mem != NULL)
2751 		bus_release_resource(dev, SYS_RES_MEMORY,
2752 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2753 
2754 	if (adapter->memory != NULL)
2755 		bus_release_resource(dev, SYS_RES_MEMORY,
2756 		    PCIR_BAR(0), adapter->memory);
2757 
2758 	if (adapter->flash != NULL)
2759 		bus_release_resource(dev, SYS_RES_MEMORY,
2760 		    EM_FLASH, adapter->flash);
2761 }
2762 
2763 /*
2764  * Setup MSI or MSI/X
2765  */
2766 static int
2767 em_setup_msix(struct adapter *adapter)
2768 {
2769 	device_t dev = adapter->dev;
2770 	int val;
2771 
2772 	/* Nearly always going to use one queue */
2773 	adapter->num_queues = 1;
2774 
2775 	/*
2776 	** Try using MSI-X for Hartwell adapters
2777 	*/
2778 	if ((adapter->hw.mac.type == e1000_82574) &&
2779 	    (em_enable_msix == TRUE)) {
2780 #ifdef EM_MULTIQUEUE
2781 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2782 		if (adapter->num_queues > 1)
2783 			em_enable_vectors_82574(adapter);
2784 #endif
2785 		/* Map the MSIX BAR */
2786 		int rid = PCIR_BAR(EM_MSIX_BAR);
2787 		adapter->msix_mem = bus_alloc_resource_any(dev,
2788 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2789        		if (adapter->msix_mem == NULL) {
2790 			/* May not be enabled */
2791                		device_printf(adapter->dev,
2792 			    "Unable to map MSIX table \n");
2793 			goto msi;
2794        		}
2795 		val = pci_msix_count(dev);
2796 
2797 #ifdef EM_MULTIQUEUE
2798 		/* We need 5 vectors in the multiqueue case */
2799 		if (adapter->num_queues > 1 ) {
2800 			if (val >= 5)
2801 				val = 5;
2802 			else {
2803 				adapter->num_queues = 1;
2804 				device_printf(adapter->dev,
2805 				    "Insufficient MSIX vectors for >1 queue, "
2806 				    "using single queue...\n");
2807 				goto msix_one;
2808 			}
2809 		} else {
2810 msix_one:
2811 #endif
2812 			if (val >= 3)
2813 				val = 3;
2814 			else {
2815 				device_printf(adapter->dev,
2816 			    	"Insufficient MSIX vectors, using MSI\n");
2817 				goto msi;
2818 			}
2819 #ifdef EM_MULTIQUEUE
2820 		}
2821 #endif
2822 
2823 		if ((pci_alloc_msix(dev, &val) == 0)) {
2824 			device_printf(adapter->dev,
2825 			    "Using MSIX interrupts "
2826 			    "with %d vectors\n", val);
2827 			return (val);
2828 		}
2829 
2830 		/*
2831 		** If MSIX alloc failed or provided us with
2832 		** less than needed, free and fall through to MSI
2833 		*/
2834 		pci_release_msi(dev);
2835 	}
2836 msi:
2837 	if (adapter->msix_mem != NULL) {
2838 		bus_release_resource(dev, SYS_RES_MEMORY,
2839 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2840 		adapter->msix_mem = NULL;
2841 	}
2842        	val = 1;
2843        	if (pci_alloc_msi(dev, &val) == 0) {
2844                	device_printf(adapter->dev, "Using an MSI interrupt\n");
2845 		return (val);
2846 	}
2847 	/* Should only happen due to manual configuration */
2848 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2849 	return (0);
2850 }
2851 
2852 
2853 /*********************************************************************
2854  *
2855  *  Initialize the hardware to a configuration
2856  *  as specified by the adapter structure.
2857  *
2858  **********************************************************************/
2859 static void
2860 em_reset(struct adapter *adapter)
2861 {
2862 	device_t	dev = adapter->dev;
2863 	if_t ifp = adapter->ifp;
2864 	struct e1000_hw	*hw = &adapter->hw;
2865 	u16		rx_buffer_size;
2866 	u32		pba;
2867 
2868 	INIT_DEBUGOUT("em_reset: begin");
2869 
2870 	/* Set up smart power down as default off on newer adapters. */
2871 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2872 	    hw->mac.type == e1000_82572)) {
2873 		u16 phy_tmp = 0;
2874 
2875 		/* Speed up time to link by disabling smart power down. */
2876 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2877 		phy_tmp &= ~IGP02E1000_PM_SPD;
2878 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2879 	}
2880 
2881 	/*
2882 	 * Packet Buffer Allocation (PBA)
2883 	 * Writing PBA sets the receive portion of the buffer
2884 	 * the remainder is used for the transmit buffer.
2885 	 */
2886 	switch (hw->mac.type) {
2887 	/* Total Packet Buffer on these is 48K */
2888 	case e1000_82571:
2889 	case e1000_82572:
2890 	case e1000_80003es2lan:
2891 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2892 		break;
2893 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2894 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2895 		break;
2896 	case e1000_82574:
2897 	case e1000_82583:
2898 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2899 		break;
2900 	case e1000_ich8lan:
2901 		pba = E1000_PBA_8K;
2902 		break;
2903 	case e1000_ich9lan:
2904 	case e1000_ich10lan:
2905 		/* Boost Receive side for jumbo frames */
2906 		if (adapter->hw.mac.max_frame_size > 4096)
2907 			pba = E1000_PBA_14K;
2908 		else
2909 			pba = E1000_PBA_10K;
2910 		break;
2911 	case e1000_pchlan:
2912 	case e1000_pch2lan:
2913 	case e1000_pch_lpt:
2914 		pba = E1000_PBA_26K;
2915 		break;
2916 	default:
2917 		if (adapter->hw.mac.max_frame_size > 8192)
2918 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2919 		else
2920 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2921 	}
2922 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2923 
2924 	/*
2925 	 * These parameters control the automatic generation (Tx) and
2926 	 * response (Rx) to Ethernet PAUSE frames.
2927 	 * - High water mark should allow for at least two frames to be
2928 	 *   received after sending an XOFF.
2929 	 * - Low water mark works best when it is very near the high water mark.
2930 	 *   This allows the receiver to restart by sending XON when it has
2931 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2932 	 *   restart after one full frame is pulled from the buffer. There
2933 	 *   could be several smaller frames in the buffer and if so they will
2934 	 *   not trigger the XON until their total number reduces the buffer
2935 	 *   by 1500.
2936 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2937 	 */
2938 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2939 	hw->fc.high_water = rx_buffer_size -
2940 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2941 	hw->fc.low_water = hw->fc.high_water - 1500;
2942 
2943 	if (adapter->fc) /* locally set flow control value? */
2944 		hw->fc.requested_mode = adapter->fc;
2945 	else
2946 		hw->fc.requested_mode = e1000_fc_full;
2947 
2948 	if (hw->mac.type == e1000_80003es2lan)
2949 		hw->fc.pause_time = 0xFFFF;
2950 	else
2951 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2952 
2953 	hw->fc.send_xon = TRUE;
2954 
2955 	/* Device specific overrides/settings */
2956 	switch (hw->mac.type) {
2957 	case e1000_pchlan:
2958 		/* Workaround: no TX flow ctrl for PCH */
2959                 hw->fc.requested_mode = e1000_fc_rx_pause;
2960 		hw->fc.pause_time = 0xFFFF; /* override */
2961 		if (if_getmtu(ifp) > ETHERMTU) {
2962 			hw->fc.high_water = 0x3500;
2963 			hw->fc.low_water = 0x1500;
2964 		} else {
2965 			hw->fc.high_water = 0x5000;
2966 			hw->fc.low_water = 0x3000;
2967 		}
2968 		hw->fc.refresh_time = 0x1000;
2969 		break;
2970 	case e1000_pch2lan:
2971 	case e1000_pch_lpt:
2972 		hw->fc.high_water = 0x5C20;
2973 		hw->fc.low_water = 0x5048;
2974 		hw->fc.pause_time = 0x0650;
2975 		hw->fc.refresh_time = 0x0400;
2976 		/* Jumbos need adjusted PBA */
2977 		if (if_getmtu(ifp) > ETHERMTU)
2978 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2979 		else
2980 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2981 		break;
2982         case e1000_ich9lan:
2983         case e1000_ich10lan:
2984 		if (if_getmtu(ifp) > ETHERMTU) {
2985 			hw->fc.high_water = 0x2800;
2986 			hw->fc.low_water = hw->fc.high_water - 8;
2987 			break;
2988 		}
2989 		/* else fall thru */
2990 	default:
2991 		if (hw->mac.type == e1000_80003es2lan)
2992 			hw->fc.pause_time = 0xFFFF;
2993 		break;
2994 	}
2995 
2996 	/* Issue a global reset */
2997 	e1000_reset_hw(hw);
2998 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2999 	em_disable_aspm(adapter);
3000 	/* and a re-init */
3001 	if (e1000_init_hw(hw) < 0) {
3002 		device_printf(dev, "Hardware Initialization Failed\n");
3003 		return;
3004 	}
3005 
3006 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3007 	e1000_get_phy_info(hw);
3008 	e1000_check_for_link(hw);
3009 	return;
3010 }
3011 
3012 /*********************************************************************
3013  *
3014  *  Setup networking device structure and register an interface.
3015  *
3016  **********************************************************************/
3017 static int
3018 em_setup_interface(device_t dev, struct adapter *adapter)
3019 {
3020 	if_t ifp;
3021 
3022 	INIT_DEBUGOUT("em_setup_interface: begin");
3023 
3024 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3025 	if (ifp == 0) {
3026 		device_printf(dev, "can not allocate ifnet structure\n");
3027 		return (-1);
3028 	}
3029 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3030 	if_setdev(ifp, dev);
3031 	if_setinitfn(ifp, em_init);
3032 	if_setsoftc(ifp, adapter);
3033 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3034 	if_setioctlfn(ifp, em_ioctl);
3035 	if_setgetcounterfn(ifp, em_get_counter);
3036 #ifdef EM_MULTIQUEUE
3037 	/* Multiqueue stack interface */
3038 	if_settransmitfn(ifp, em_mq_start);
3039 	if_setqflushfn(ifp, em_qflush);
3040 #else
3041 	if_setstartfn(ifp, em_start);
3042 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3043 	if_setsendqready(ifp);
3044 #endif
3045 
3046 	ether_ifattach(ifp, adapter->hw.mac.addr);
3047 
3048 	if_setcapabilities(ifp, 0);
3049 	if_setcapenable(ifp, 0);
3050 
3051 
3052 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3053 	    IFCAP_TSO4, 0);
3054 	/*
3055 	 * Tell the upper layer(s) we
3056 	 * support full VLAN capability
3057 	 */
3058 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3059 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3060 	    IFCAP_VLAN_MTU, 0);
3061 	if_setcapenable(ifp, if_getcapabilities(ifp));
3062 
3063 	/*
3064 	** Don't turn this on by default, if vlans are
3065 	** created on another pseudo device (eg. lagg)
3066 	** then vlan events are not passed thru, breaking
3067 	** operation, but with HW FILTER off it works. If
3068 	** using vlans directly on the em driver you can
3069 	** enable this and get full hardware tag filtering.
3070 	*/
3071 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3072 
3073 #ifdef DEVICE_POLLING
3074 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3075 #endif
3076 
3077 	/* Enable only WOL MAGIC by default */
3078 	if (adapter->wol) {
3079 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3080 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3081 	}
3082 
3083 	/*
3084 	 * Specify the media types supported by this adapter and register
3085 	 * callbacks to update media and link information
3086 	 */
3087 	ifmedia_init(&adapter->media, IFM_IMASK,
3088 	    em_media_change, em_media_status);
3089 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3090 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3091 		u_char fiber_type = IFM_1000_SX;	/* default type */
3092 
3093 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3094 			    0, NULL);
3095 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3096 	} else {
3097 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3098 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3099 			    0, NULL);
3100 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3101 			    0, NULL);
3102 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3103 			    0, NULL);
3104 		if (adapter->hw.phy.type != e1000_phy_ife) {
3105 			ifmedia_add(&adapter->media,
3106 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3107 			ifmedia_add(&adapter->media,
3108 				IFM_ETHER | IFM_1000_T, 0, NULL);
3109 		}
3110 	}
3111 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3112 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3113 	return (0);
3114 }
3115 
3116 
3117 /*
3118  * Manage DMA'able memory.
3119  */
3120 static void
3121 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3122 {
3123 	if (error)
3124 		return;
3125 	*(bus_addr_t *) arg = segs[0].ds_addr;
3126 }
3127 
3128 static int
3129 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3130         struct em_dma_alloc *dma, int mapflags)
3131 {
3132 	int error;
3133 
3134 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3135 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3136 				BUS_SPACE_MAXADDR,	/* lowaddr */
3137 				BUS_SPACE_MAXADDR,	/* highaddr */
3138 				NULL, NULL,		/* filter, filterarg */
3139 				size,			/* maxsize */
3140 				1,			/* nsegments */
3141 				size,			/* maxsegsize */
3142 				0,			/* flags */
3143 				NULL,			/* lockfunc */
3144 				NULL,			/* lockarg */
3145 				&dma->dma_tag);
3146 	if (error) {
3147 		device_printf(adapter->dev,
3148 		    "%s: bus_dma_tag_create failed: %d\n",
3149 		    __func__, error);
3150 		goto fail_0;
3151 	}
3152 
3153 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3154 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3155 	if (error) {
3156 		device_printf(adapter->dev,
3157 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3158 		    __func__, (uintmax_t)size, error);
3159 		goto fail_2;
3160 	}
3161 
3162 	dma->dma_paddr = 0;
3163 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3164 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3165 	if (error || dma->dma_paddr == 0) {
3166 		device_printf(adapter->dev,
3167 		    "%s: bus_dmamap_load failed: %d\n",
3168 		    __func__, error);
3169 		goto fail_3;
3170 	}
3171 
3172 	return (0);
3173 
3174 fail_3:
3175 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3176 fail_2:
3177 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3178 	bus_dma_tag_destroy(dma->dma_tag);
3179 fail_0:
3180 	dma->dma_tag = NULL;
3181 
3182 	return (error);
3183 }
3184 
3185 static void
3186 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3187 {
3188 	if (dma->dma_tag == NULL)
3189 		return;
3190 	if (dma->dma_paddr != 0) {
3191 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3192 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3193 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3194 		dma->dma_paddr = 0;
3195 	}
3196 	if (dma->dma_vaddr != NULL) {
3197 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3198 		dma->dma_vaddr = NULL;
3199 	}
3200 	bus_dma_tag_destroy(dma->dma_tag);
3201 	dma->dma_tag = NULL;
3202 }
3203 
3204 
3205 /*********************************************************************
3206  *
3207  *  Allocate memory for the transmit and receive rings, and then
3208  *  the descriptors associated with each, called only once at attach.
3209  *
3210  **********************************************************************/
3211 static int
3212 em_allocate_queues(struct adapter *adapter)
3213 {
3214 	device_t		dev = adapter->dev;
3215 	struct tx_ring		*txr = NULL;
3216 	struct rx_ring		*rxr = NULL;
3217 	int rsize, tsize, error = E1000_SUCCESS;
3218 	int txconf = 0, rxconf = 0;
3219 
3220 
3221 	/* Allocate the TX ring struct memory */
3222 	if (!(adapter->tx_rings =
3223 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3224 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3225 		device_printf(dev, "Unable to allocate TX ring memory\n");
3226 		error = ENOMEM;
3227 		goto fail;
3228 	}
3229 
3230 	/* Now allocate the RX */
3231 	if (!(adapter->rx_rings =
3232 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3233 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3234 		device_printf(dev, "Unable to allocate RX ring memory\n");
3235 		error = ENOMEM;
3236 		goto rx_fail;
3237 	}
3238 
3239 	tsize = roundup2(adapter->num_tx_desc *
3240 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3241 	/*
3242 	 * Now set up the TX queues, txconf is needed to handle the
3243 	 * possibility that things fail midcourse and we need to
3244 	 * undo memory gracefully
3245 	 */
3246 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3247 		/* Set up some basics */
3248 		txr = &adapter->tx_rings[i];
3249 		txr->adapter = adapter;
3250 		txr->me = i;
3251 
3252 		/* Initialize the TX lock */
3253 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3254 		    device_get_nameunit(dev), txr->me);
3255 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3256 
3257 		if (em_dma_malloc(adapter, tsize,
3258 			&txr->txdma, BUS_DMA_NOWAIT)) {
3259 			device_printf(dev,
3260 			    "Unable to allocate TX Descriptor memory\n");
3261 			error = ENOMEM;
3262 			goto err_tx_desc;
3263 		}
3264 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3265 		bzero((void *)txr->tx_base, tsize);
3266 
3267         	if (em_allocate_transmit_buffers(txr)) {
3268 			device_printf(dev,
3269 			    "Critical Failure setting up transmit buffers\n");
3270 			error = ENOMEM;
3271 			goto err_tx_desc;
3272         	}
3273 #if __FreeBSD_version >= 800000
3274 		/* Allocate a buf ring */
3275 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3276 		    M_WAITOK, &txr->tx_mtx);
3277 #endif
3278 	}
3279 
3280 	/*
3281 	 * Next the RX queues...
3282 	 */
3283 	rsize = roundup2(adapter->num_rx_desc *
3284 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3285 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3286 		rxr = &adapter->rx_rings[i];
3287 		rxr->adapter = adapter;
3288 		rxr->me = i;
3289 
3290 		/* Initialize the RX lock */
3291 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3292 		    device_get_nameunit(dev), txr->me);
3293 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3294 
3295 		if (em_dma_malloc(adapter, rsize,
3296 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3297 			device_printf(dev,
3298 			    "Unable to allocate RxDescriptor memory\n");
3299 			error = ENOMEM;
3300 			goto err_rx_desc;
3301 		}
3302 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3303 		bzero((void *)rxr->rx_base, rsize);
3304 
3305         	/* Allocate receive buffers for the ring*/
3306 		if (em_allocate_receive_buffers(rxr)) {
3307 			device_printf(dev,
3308 			    "Critical Failure setting up receive buffers\n");
3309 			error = ENOMEM;
3310 			goto err_rx_desc;
3311 		}
3312 	}
3313 
3314 	return (0);
3315 
3316 err_rx_desc:
3317 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3318 		em_dma_free(adapter, &rxr->rxdma);
3319 err_tx_desc:
3320 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3321 		em_dma_free(adapter, &txr->txdma);
3322 	free(adapter->rx_rings, M_DEVBUF);
3323 rx_fail:
3324 #if __FreeBSD_version >= 800000
3325 	buf_ring_free(txr->br, M_DEVBUF);
3326 #endif
3327 	free(adapter->tx_rings, M_DEVBUF);
3328 fail:
3329 	return (error);
3330 }
3331 
3332 
3333 /*********************************************************************
3334  *
3335  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3336  *  the information needed to transmit a packet on the wire. This is
3337  *  called only once at attach, setup is done every reset.
3338  *
3339  **********************************************************************/
3340 static int
3341 em_allocate_transmit_buffers(struct tx_ring *txr)
3342 {
3343 	struct adapter *adapter = txr->adapter;
3344 	device_t dev = adapter->dev;
3345 	struct em_buffer *txbuf;
3346 	int error, i;
3347 
3348 	/*
3349 	 * Setup DMA descriptor areas.
3350 	 */
3351 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3352 			       1, 0,			/* alignment, bounds */
3353 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3354 			       BUS_SPACE_MAXADDR,	/* highaddr */
3355 			       NULL, NULL,		/* filter, filterarg */
3356 			       EM_TSO_SIZE,		/* maxsize */
3357 			       EM_MAX_SCATTER,		/* nsegments */
3358 			       PAGE_SIZE,		/* maxsegsize */
3359 			       0,			/* flags */
3360 			       NULL,			/* lockfunc */
3361 			       NULL,			/* lockfuncarg */
3362 			       &txr->txtag))) {
3363 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3364 		goto fail;
3365 	}
3366 
3367 	if (!(txr->tx_buffers =
3368 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3369 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3370 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3371 		error = ENOMEM;
3372 		goto fail;
3373 	}
3374 
3375         /* Create the descriptor buffer dma maps */
3376 	txbuf = txr->tx_buffers;
3377 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3378 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3379 		if (error != 0) {
3380 			device_printf(dev, "Unable to create TX DMA map\n");
3381 			goto fail;
3382 		}
3383 	}
3384 
3385 	return 0;
3386 fail:
3387 	/* We free all, it handles case where we are in the middle */
3388 	em_free_transmit_structures(adapter);
3389 	return (error);
3390 }
3391 
3392 /*********************************************************************
3393  *
3394  *  Initialize a transmit ring.
3395  *
3396  **********************************************************************/
3397 static void
3398 em_setup_transmit_ring(struct tx_ring *txr)
3399 {
3400 	struct adapter *adapter = txr->adapter;
3401 	struct em_buffer *txbuf;
3402 	int i;
3403 #ifdef DEV_NETMAP
3404 	struct netmap_slot *slot;
3405 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3406 #endif /* DEV_NETMAP */
3407 
3408 	/* Clear the old descriptor contents */
3409 	EM_TX_LOCK(txr);
3410 #ifdef DEV_NETMAP
3411 	slot = netmap_reset(na, NR_TX, txr->me, 0);
3412 #endif /* DEV_NETMAP */
3413 
3414 	bzero((void *)txr->tx_base,
3415 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3416 	/* Reset indices */
3417 	txr->next_avail_desc = 0;
3418 	txr->next_to_clean = 0;
3419 
3420 	/* Free any existing tx buffers. */
3421         txbuf = txr->tx_buffers;
3422 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3423 		if (txbuf->m_head != NULL) {
3424 			bus_dmamap_sync(txr->txtag, txbuf->map,
3425 			    BUS_DMASYNC_POSTWRITE);
3426 			bus_dmamap_unload(txr->txtag, txbuf->map);
3427 			m_freem(txbuf->m_head);
3428 			txbuf->m_head = NULL;
3429 		}
3430 #ifdef DEV_NETMAP
3431 		if (slot) {
3432 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3433 			uint64_t paddr;
3434 			void *addr;
3435 
3436 			addr = PNMB(na, slot + si, &paddr);
3437 			txr->tx_base[i].buffer_addr = htole64(paddr);
3438 			/* reload the map for netmap mode */
3439 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3440 		}
3441 #endif /* DEV_NETMAP */
3442 
3443 		/* clear the watch index */
3444 		txbuf->next_eop = -1;
3445         }
3446 
3447 	/* Set number of descriptors available */
3448 	txr->tx_avail = adapter->num_tx_desc;
3449 	txr->busy = EM_TX_IDLE;
3450 
3451 	/* Clear checksum offload context. */
3452 	txr->last_hw_offload = 0;
3453 	txr->last_hw_ipcss = 0;
3454 	txr->last_hw_ipcso = 0;
3455 	txr->last_hw_tucss = 0;
3456 	txr->last_hw_tucso = 0;
3457 
3458 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3459 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3460 	EM_TX_UNLOCK(txr);
3461 }
3462 
3463 /*********************************************************************
3464  *
3465  *  Initialize all transmit rings.
3466  *
3467  **********************************************************************/
3468 static void
3469 em_setup_transmit_structures(struct adapter *adapter)
3470 {
3471 	struct tx_ring *txr = adapter->tx_rings;
3472 
3473 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3474 		em_setup_transmit_ring(txr);
3475 
3476 	return;
3477 }
3478 
3479 /*********************************************************************
3480  *
3481  *  Enable transmit unit.
3482  *
3483  **********************************************************************/
3484 static void
3485 em_initialize_transmit_unit(struct adapter *adapter)
3486 {
3487 	struct tx_ring	*txr = adapter->tx_rings;
3488 	struct e1000_hw	*hw = &adapter->hw;
3489 	u32	tctl, txdctl = 0, tarc, tipg = 0;
3490 
3491 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3492 
3493 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3494 		u64 bus_addr = txr->txdma.dma_paddr;
3495 		/* Base and Len of TX Ring */
3496 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3497 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3498 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3499 	    	    (u32)(bus_addr >> 32));
3500 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3501 	    	    (u32)bus_addr);
3502 		/* Init the HEAD/TAIL indices */
3503 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3504 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3505 
3506 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3507 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3508 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3509 
3510 		txr->busy = EM_TX_IDLE;
3511 		txdctl = 0; /* clear txdctl */
3512                 txdctl |= 0x1f; /* PTHRESH */
3513                 txdctl |= 1 << 8; /* HTHRESH */
3514                 txdctl |= 1 << 16;/* WTHRESH */
3515 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3516 		txdctl |= E1000_TXDCTL_GRAN;
3517                 txdctl |= 1 << 25; /* LWTHRESH */
3518 
3519                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3520 	}
3521 
3522 	/* Set the default values for the Tx Inter Packet Gap timer */
3523 	switch (adapter->hw.mac.type) {
3524 	case e1000_80003es2lan:
3525 		tipg = DEFAULT_82543_TIPG_IPGR1;
3526 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3527 		    E1000_TIPG_IPGR2_SHIFT;
3528 		break;
3529 	default:
3530 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3531 		    (adapter->hw.phy.media_type ==
3532 		    e1000_media_type_internal_serdes))
3533 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3534 		else
3535 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3536 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3537 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3538 	}
3539 
3540 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3541 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3542 
3543 	if(adapter->hw.mac.type >= e1000_82540)
3544 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3545 		    adapter->tx_abs_int_delay.value);
3546 
3547 	if ((adapter->hw.mac.type == e1000_82571) ||
3548 	    (adapter->hw.mac.type == e1000_82572)) {
3549 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3550 		tarc |= TARC_SPEED_MODE_BIT;
3551 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3552 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3553 		/* errata: program both queues to unweighted RR */
3554 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3555 		tarc |= 1;
3556 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3557 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3558 		tarc |= 1;
3559 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3560 	} else if (adapter->hw.mac.type == e1000_82574) {
3561 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3562 		tarc |= TARC_ERRATA_BIT;
3563 		if ( adapter->num_queues > 1) {
3564 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3565 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3566 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3567 		} else
3568 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3569 	}
3570 
3571 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3572 	if (adapter->tx_int_delay.value > 0)
3573 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3574 
3575 	/* Program the Transmit Control Register */
3576 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3577 	tctl &= ~E1000_TCTL_CT;
3578 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3579 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3580 
3581 	if (adapter->hw.mac.type >= e1000_82571)
3582 		tctl |= E1000_TCTL_MULR;
3583 
3584 	/* This write will effectively turn on the transmit unit. */
3585 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3586 
3587 }
3588 
3589 
3590 /*********************************************************************
3591  *
3592  *  Free all transmit rings.
3593  *
3594  **********************************************************************/
3595 static void
3596 em_free_transmit_structures(struct adapter *adapter)
3597 {
3598 	struct tx_ring *txr = adapter->tx_rings;
3599 
3600 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3601 		EM_TX_LOCK(txr);
3602 		em_free_transmit_buffers(txr);
3603 		em_dma_free(adapter, &txr->txdma);
3604 		EM_TX_UNLOCK(txr);
3605 		EM_TX_LOCK_DESTROY(txr);
3606 	}
3607 
3608 	free(adapter->tx_rings, M_DEVBUF);
3609 }
3610 
3611 /*********************************************************************
3612  *
3613  *  Free transmit ring related data structures.
3614  *
3615  **********************************************************************/
3616 static void
3617 em_free_transmit_buffers(struct tx_ring *txr)
3618 {
3619 	struct adapter		*adapter = txr->adapter;
3620 	struct em_buffer	*txbuf;
3621 
3622 	INIT_DEBUGOUT("free_transmit_ring: begin");
3623 
3624 	if (txr->tx_buffers == NULL)
3625 		return;
3626 
3627 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3628 		txbuf = &txr->tx_buffers[i];
3629 		if (txbuf->m_head != NULL) {
3630 			bus_dmamap_sync(txr->txtag, txbuf->map,
3631 			    BUS_DMASYNC_POSTWRITE);
3632 			bus_dmamap_unload(txr->txtag,
3633 			    txbuf->map);
3634 			m_freem(txbuf->m_head);
3635 			txbuf->m_head = NULL;
3636 			if (txbuf->map != NULL) {
3637 				bus_dmamap_destroy(txr->txtag,
3638 				    txbuf->map);
3639 				txbuf->map = NULL;
3640 			}
3641 		} else if (txbuf->map != NULL) {
3642 			bus_dmamap_unload(txr->txtag,
3643 			    txbuf->map);
3644 			bus_dmamap_destroy(txr->txtag,
3645 			    txbuf->map);
3646 			txbuf->map = NULL;
3647 		}
3648 	}
3649 #if __FreeBSD_version >= 800000
3650 	if (txr->br != NULL)
3651 		buf_ring_free(txr->br, M_DEVBUF);
3652 #endif
3653 	if (txr->tx_buffers != NULL) {
3654 		free(txr->tx_buffers, M_DEVBUF);
3655 		txr->tx_buffers = NULL;
3656 	}
3657 	if (txr->txtag != NULL) {
3658 		bus_dma_tag_destroy(txr->txtag);
3659 		txr->txtag = NULL;
3660 	}
3661 	return;
3662 }
3663 
3664 
3665 /*********************************************************************
3666  *  The offload context is protocol specific (TCP/UDP) and thus
3667  *  only needs to be set when the protocol changes. The occasion
3668  *  of a context change can be a performance detriment, and
3669  *  might be better just disabled. The reason arises in the way
3670  *  in which the controller supports pipelined requests from the
3671  *  Tx data DMA. Up to four requests can be pipelined, and they may
3672  *  belong to the same packet or to multiple packets. However all
3673  *  requests for one packet are issued before a request is issued
3674  *  for a subsequent packet and if a request for the next packet
3675  *  requires a context change, that request will be stalled
3676  *  until the previous request completes. This means setting up
3677  *  a new context effectively disables pipelined Tx data DMA which
3678  *  in turn greatly slow down performance to send small sized
3679  *  frames.
3680  **********************************************************************/
3681 static void
3682 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3683     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3684 {
3685 	struct adapter			*adapter = txr->adapter;
3686 	struct e1000_context_desc	*TXD = NULL;
3687 	struct em_buffer		*tx_buffer;
3688 	int				cur, hdr_len;
3689 	u32				cmd = 0;
3690 	u16				offload = 0;
3691 	u8				ipcso, ipcss, tucso, tucss;
3692 
3693 	ipcss = ipcso = tucss = tucso = 0;
3694 	hdr_len = ip_off + (ip->ip_hl << 2);
3695 	cur = txr->next_avail_desc;
3696 
3697 	/* Setup of IP header checksum. */
3698 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3699 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3700 		offload |= CSUM_IP;
3701 		ipcss = ip_off;
3702 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3703 		/*
3704 		 * Start offset for header checksum calculation.
3705 		 * End offset for header checksum calculation.
3706 		 * Offset of place to put the checksum.
3707 		 */
3708 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3709 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3710 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3711 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3712 		cmd |= E1000_TXD_CMD_IP;
3713 	}
3714 
3715 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3716  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3717  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3718  		offload |= CSUM_TCP;
3719  		tucss = hdr_len;
3720  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3721  		/*
3722  		 * Setting up new checksum offload context for every frames
3723  		 * takes a lot of processing time for hardware. This also
3724  		 * reduces performance a lot for small sized frames so avoid
3725  		 * it if driver can use previously configured checksum
3726  		 * offload context.
3727  		 */
3728  		if (txr->last_hw_offload == offload) {
3729  			if (offload & CSUM_IP) {
3730  				if (txr->last_hw_ipcss == ipcss &&
3731  				    txr->last_hw_ipcso == ipcso &&
3732  				    txr->last_hw_tucss == tucss &&
3733  				    txr->last_hw_tucso == tucso)
3734  					return;
3735  			} else {
3736  				if (txr->last_hw_tucss == tucss &&
3737  				    txr->last_hw_tucso == tucso)
3738  					return;
3739  			}
3740   		}
3741  		txr->last_hw_offload = offload;
3742  		txr->last_hw_tucss = tucss;
3743  		txr->last_hw_tucso = tucso;
3744  		/*
3745  		 * Start offset for payload checksum calculation.
3746  		 * End offset for payload checksum calculation.
3747  		 * Offset of place to put the checksum.
3748  		 */
3749 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3750  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3751  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3752  		TXD->upper_setup.tcp_fields.tucso = tucso;
3753  		cmd |= E1000_TXD_CMD_TCP;
3754  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3755  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3756  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3757  		tucss = hdr_len;
3758  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3759  		/*
3760  		 * Setting up new checksum offload context for every frames
3761  		 * takes a lot of processing time for hardware. This also
3762  		 * reduces performance a lot for small sized frames so avoid
3763  		 * it if driver can use previously configured checksum
3764  		 * offload context.
3765  		 */
3766  		if (txr->last_hw_offload == offload) {
3767  			if (offload & CSUM_IP) {
3768  				if (txr->last_hw_ipcss == ipcss &&
3769  				    txr->last_hw_ipcso == ipcso &&
3770  				    txr->last_hw_tucss == tucss &&
3771  				    txr->last_hw_tucso == tucso)
3772  					return;
3773  			} else {
3774  				if (txr->last_hw_tucss == tucss &&
3775  				    txr->last_hw_tucso == tucso)
3776  					return;
3777  			}
3778  		}
3779  		txr->last_hw_offload = offload;
3780  		txr->last_hw_tucss = tucss;
3781  		txr->last_hw_tucso = tucso;
3782  		/*
3783  		 * Start offset for header checksum calculation.
3784  		 * End offset for header checksum calculation.
3785  		 * Offset of place to put the checksum.
3786  		 */
3787 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3788  		TXD->upper_setup.tcp_fields.tucss = tucss;
3789  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3790  		TXD->upper_setup.tcp_fields.tucso = tucso;
3791   	}
3792 
3793  	if (offload & CSUM_IP) {
3794  		txr->last_hw_ipcss = ipcss;
3795  		txr->last_hw_ipcso = ipcso;
3796   	}
3797 
3798 	TXD->tcp_seg_setup.data = htole32(0);
3799 	TXD->cmd_and_length =
3800 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3801 	tx_buffer = &txr->tx_buffers[cur];
3802 	tx_buffer->m_head = NULL;
3803 	tx_buffer->next_eop = -1;
3804 
3805 	if (++cur == adapter->num_tx_desc)
3806 		cur = 0;
3807 
3808 	txr->tx_avail--;
3809 	txr->next_avail_desc = cur;
3810 }
3811 
3812 
3813 /**********************************************************************
3814  *
3815  *  Setup work for hardware segmentation offload (TSO)
3816  *
3817  **********************************************************************/
3818 static void
3819 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3820     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3821 {
3822 	struct adapter			*adapter = txr->adapter;
3823 	struct e1000_context_desc	*TXD;
3824 	struct em_buffer		*tx_buffer;
3825 	int cur, hdr_len;
3826 
3827 	/*
3828 	 * In theory we can use the same TSO context if and only if
3829 	 * frame is the same type(IP/TCP) and the same MSS. However
3830 	 * checking whether a frame has the same IP/TCP structure is
3831 	 * hard thing so just ignore that and always restablish a
3832 	 * new TSO context.
3833 	 */
3834 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3835 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3836 		      E1000_TXD_DTYP_D |	/* Data descr type */
3837 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3838 
3839 	/* IP and/or TCP header checksum calculation and insertion. */
3840 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3841 
3842 	cur = txr->next_avail_desc;
3843 	tx_buffer = &txr->tx_buffers[cur];
3844 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3845 
3846 	/*
3847 	 * Start offset for header checksum calculation.
3848 	 * End offset for header checksum calculation.
3849 	 * Offset of place put the checksum.
3850 	 */
3851 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3852 	TXD->lower_setup.ip_fields.ipcse =
3853 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3854 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3855 	/*
3856 	 * Start offset for payload checksum calculation.
3857 	 * End offset for payload checksum calculation.
3858 	 * Offset of place to put the checksum.
3859 	 */
3860 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3861 	TXD->upper_setup.tcp_fields.tucse = 0;
3862 	TXD->upper_setup.tcp_fields.tucso =
3863 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3864 	/*
3865 	 * Payload size per packet w/o any headers.
3866 	 * Length of all headers up to payload.
3867 	 */
3868 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3869 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3870 
3871 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3872 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3873 				E1000_TXD_CMD_TSE |	/* TSE context */
3874 				E1000_TXD_CMD_IP |	/* Do IP csum */
3875 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3876 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3877 
3878 	tx_buffer->m_head = NULL;
3879 	tx_buffer->next_eop = -1;
3880 
3881 	if (++cur == adapter->num_tx_desc)
3882 		cur = 0;
3883 
3884 	txr->tx_avail--;
3885 	txr->next_avail_desc = cur;
3886 	txr->tx_tso = TRUE;
3887 }
3888 
3889 
3890 /**********************************************************************
3891  *
3892  *  Examine each tx_buffer in the used queue. If the hardware is done
3893  *  processing the packet then free associated resources. The
3894  *  tx_buffer is put back on the free queue.
3895  *
3896  **********************************************************************/
3897 static void
3898 em_txeof(struct tx_ring *txr)
3899 {
3900 	struct adapter	*adapter = txr->adapter;
3901         int first, last, done, processed;
3902         struct em_buffer *tx_buffer;
3903         struct e1000_tx_desc   *tx_desc, *eop_desc;
3904 	if_t ifp = adapter->ifp;
3905 
3906 	EM_TX_LOCK_ASSERT(txr);
3907 #ifdef DEV_NETMAP
3908 	if (netmap_tx_irq(ifp, txr->me))
3909 		return;
3910 #endif /* DEV_NETMAP */
3911 
3912 	/* No work, make sure hang detection is disabled */
3913         if (txr->tx_avail == adapter->num_tx_desc) {
3914 		txr->busy = EM_TX_IDLE;
3915                 return;
3916 	}
3917 
3918 	processed = 0;
3919         first = txr->next_to_clean;
3920         tx_desc = &txr->tx_base[first];
3921         tx_buffer = &txr->tx_buffers[first];
3922 	last = tx_buffer->next_eop;
3923         eop_desc = &txr->tx_base[last];
3924 
3925 	/*
3926 	 * What this does is get the index of the
3927 	 * first descriptor AFTER the EOP of the
3928 	 * first packet, that way we can do the
3929 	 * simple comparison on the inner while loop.
3930 	 */
3931 	if (++last == adapter->num_tx_desc)
3932  		last = 0;
3933 	done = last;
3934 
3935         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3936             BUS_DMASYNC_POSTREAD);
3937 
3938         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3939 		/* We clean the range of the packet */
3940 		while (first != done) {
3941                 	tx_desc->upper.data = 0;
3942                 	tx_desc->lower.data = 0;
3943                 	tx_desc->buffer_addr = 0;
3944                 	++txr->tx_avail;
3945 			++processed;
3946 
3947 			if (tx_buffer->m_head) {
3948 				bus_dmamap_sync(txr->txtag,
3949 				    tx_buffer->map,
3950 				    BUS_DMASYNC_POSTWRITE);
3951 				bus_dmamap_unload(txr->txtag,
3952 				    tx_buffer->map);
3953                         	m_freem(tx_buffer->m_head);
3954                         	tx_buffer->m_head = NULL;
3955                 	}
3956 			tx_buffer->next_eop = -1;
3957 
3958 	                if (++first == adapter->num_tx_desc)
3959 				first = 0;
3960 
3961 	                tx_buffer = &txr->tx_buffers[first];
3962 			tx_desc = &txr->tx_base[first];
3963 		}
3964 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
3965 		/* See if we can continue to the next packet */
3966 		last = tx_buffer->next_eop;
3967 		if (last != -1) {
3968         		eop_desc = &txr->tx_base[last];
3969 			/* Get new done point */
3970 			if (++last == adapter->num_tx_desc) last = 0;
3971 			done = last;
3972 		} else
3973 			break;
3974         }
3975         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3976             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3977 
3978         txr->next_to_clean = first;
3979 
3980 	/*
3981 	** Hang detection: we know there's work outstanding
3982 	** or the entry return would have been taken, so no
3983 	** descriptor processed here indicates a potential hang.
3984 	** The local timer will examine this and do a reset if needed.
3985 	*/
3986 	if (processed == 0) {
3987 		if (txr->busy != EM_TX_HUNG)
3988 			++txr->busy;
3989 	} else /* At least one descriptor was cleaned */
3990 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
3991 
3992         /*
3993          * If we have a minimum free, clear IFF_DRV_OACTIVE
3994          * to tell the stack that it is OK to send packets.
3995 	 * Notice that all writes of OACTIVE happen under the
3996 	 * TX lock which, with a single queue, guarantees
3997 	 * sanity.
3998          */
3999         if (txr->tx_avail >= EM_MAX_SCATTER) {
4000 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4001 	}
4002 
4003 	/* Disable hang detection if all clean */
4004 	if (txr->tx_avail == adapter->num_tx_desc)
4005 		txr->busy = EM_TX_IDLE;
4006 }
4007 
4008 
4009 /*********************************************************************
4010  *
4011  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4012  *
4013  **********************************************************************/
4014 static void
4015 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4016 {
4017 	struct adapter		*adapter = rxr->adapter;
4018 	struct mbuf		*m;
4019 	bus_dma_segment_t	segs[1];
4020 	struct em_buffer	*rxbuf;
4021 	int			i, j, error, nsegs;
4022 	bool			cleaned = FALSE;
4023 
4024 	i = j = rxr->next_to_refresh;
4025 	/*
4026 	** Get one descriptor beyond
4027 	** our work mark to control
4028 	** the loop.
4029 	*/
4030 	if (++j == adapter->num_rx_desc)
4031 		j = 0;
4032 
4033 	while (j != limit) {
4034 		rxbuf = &rxr->rx_buffers[i];
4035 		if (rxbuf->m_head == NULL) {
4036 			m = m_getjcl(M_NOWAIT, MT_DATA,
4037 			    M_PKTHDR, adapter->rx_mbuf_sz);
4038 			/*
4039 			** If we have a temporary resource shortage
4040 			** that causes a failure, just abort refresh
4041 			** for now, we will return to this point when
4042 			** reinvoked from em_rxeof.
4043 			*/
4044 			if (m == NULL)
4045 				goto update;
4046 		} else
4047 			m = rxbuf->m_head;
4048 
4049 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4050 		m->m_flags |= M_PKTHDR;
4051 		m->m_data = m->m_ext.ext_buf;
4052 
4053 		/* Use bus_dma machinery to setup the memory mapping  */
4054 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4055 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
4056 		if (error != 0) {
4057 			printf("Refresh mbufs: hdr dmamap load"
4058 			    " failure - %d\n", error);
4059 			m_free(m);
4060 			rxbuf->m_head = NULL;
4061 			goto update;
4062 		}
4063 		rxbuf->m_head = m;
4064 		bus_dmamap_sync(rxr->rxtag,
4065 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4066 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
4067 		cleaned = TRUE;
4068 
4069 		i = j; /* Next is precalulated for us */
4070 		rxr->next_to_refresh = i;
4071 		/* Calculate next controlling index */
4072 		if (++j == adapter->num_rx_desc)
4073 			j = 0;
4074 	}
4075 update:
4076 	/*
4077 	** Update the tail pointer only if,
4078 	** and as far as we have refreshed.
4079 	*/
4080 	if (cleaned)
4081 		E1000_WRITE_REG(&adapter->hw,
4082 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4083 
4084 	return;
4085 }
4086 
4087 
4088 /*********************************************************************
4089  *
4090  *  Allocate memory for rx_buffer structures. Since we use one
4091  *  rx_buffer per received packet, the maximum number of rx_buffer's
4092  *  that we'll need is equal to the number of receive descriptors
4093  *  that we've allocated.
4094  *
4095  **********************************************************************/
4096 static int
4097 em_allocate_receive_buffers(struct rx_ring *rxr)
4098 {
4099 	struct adapter		*adapter = rxr->adapter;
4100 	device_t		dev = adapter->dev;
4101 	struct em_buffer	*rxbuf;
4102 	int			error;
4103 
4104 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4105 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4106 	if (rxr->rx_buffers == NULL) {
4107 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4108 		return (ENOMEM);
4109 	}
4110 
4111 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4112 				1, 0,			/* alignment, bounds */
4113 				BUS_SPACE_MAXADDR,	/* lowaddr */
4114 				BUS_SPACE_MAXADDR,	/* highaddr */
4115 				NULL, NULL,		/* filter, filterarg */
4116 				MJUM9BYTES,		/* maxsize */
4117 				1,			/* nsegments */
4118 				MJUM9BYTES,		/* maxsegsize */
4119 				0,			/* flags */
4120 				NULL,			/* lockfunc */
4121 				NULL,			/* lockarg */
4122 				&rxr->rxtag);
4123 	if (error) {
4124 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4125 		    __func__, error);
4126 		goto fail;
4127 	}
4128 
4129 	rxbuf = rxr->rx_buffers;
4130 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4131 		rxbuf = &rxr->rx_buffers[i];
4132 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4133 		if (error) {
4134 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4135 			    __func__, error);
4136 			goto fail;
4137 		}
4138 	}
4139 
4140 	return (0);
4141 
4142 fail:
4143 	em_free_receive_structures(adapter);
4144 	return (error);
4145 }
4146 
4147 
4148 /*********************************************************************
4149  *
4150  *  Initialize a receive ring and its buffers.
4151  *
4152  **********************************************************************/
4153 static int
4154 em_setup_receive_ring(struct rx_ring *rxr)
4155 {
4156 	struct	adapter 	*adapter = rxr->adapter;
4157 	struct em_buffer	*rxbuf;
4158 	bus_dma_segment_t	seg[1];
4159 	int			rsize, nsegs, error = 0;
4160 #ifdef DEV_NETMAP
4161 	struct netmap_slot *slot;
4162 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4163 #endif
4164 
4165 
4166 	/* Clear the ring contents */
4167 	EM_RX_LOCK(rxr);
4168 	rsize = roundup2(adapter->num_rx_desc *
4169 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4170 	bzero((void *)rxr->rx_base, rsize);
4171 #ifdef DEV_NETMAP
4172 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4173 #endif
4174 
4175 	/*
4176 	** Free current RX buffer structs and their mbufs
4177 	*/
4178 	for (int i = 0; i < adapter->num_rx_desc; i++) {
4179 		rxbuf = &rxr->rx_buffers[i];
4180 		if (rxbuf->m_head != NULL) {
4181 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4182 			    BUS_DMASYNC_POSTREAD);
4183 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4184 			m_freem(rxbuf->m_head);
4185 			rxbuf->m_head = NULL; /* mark as freed */
4186 		}
4187 	}
4188 
4189 	/* Now replenish the mbufs */
4190         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4191 		rxbuf = &rxr->rx_buffers[j];
4192 #ifdef DEV_NETMAP
4193 		if (slot) {
4194 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4195 			uint64_t paddr;
4196 			void *addr;
4197 
4198 			addr = PNMB(na, slot + si, &paddr);
4199 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4200 			/* Update descriptor */
4201 			rxr->rx_base[j].buffer_addr = htole64(paddr);
4202 			continue;
4203 		}
4204 #endif /* DEV_NETMAP */
4205 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4206 		    M_PKTHDR, adapter->rx_mbuf_sz);
4207 		if (rxbuf->m_head == NULL) {
4208 			error = ENOBUFS;
4209 			goto fail;
4210 		}
4211 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4212 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4213 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4214 
4215 		/* Get the memory mapping */
4216 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4217 		    rxbuf->map, rxbuf->m_head, seg,
4218 		    &nsegs, BUS_DMA_NOWAIT);
4219 		if (error != 0) {
4220 			m_freem(rxbuf->m_head);
4221 			rxbuf->m_head = NULL;
4222 			goto fail;
4223 		}
4224 		bus_dmamap_sync(rxr->rxtag,
4225 		    rxbuf->map, BUS_DMASYNC_PREREAD);
4226 
4227 		/* Update descriptor */
4228 		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4229 	}
4230 	rxr->next_to_check = 0;
4231 	rxr->next_to_refresh = 0;
4232 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4233 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4234 
4235 fail:
4236 	EM_RX_UNLOCK(rxr);
4237 	return (error);
4238 }
4239 
4240 /*********************************************************************
4241  *
4242  *  Initialize all receive rings.
4243  *
4244  **********************************************************************/
4245 static int
4246 em_setup_receive_structures(struct adapter *adapter)
4247 {
4248 	struct rx_ring *rxr = adapter->rx_rings;
4249 	int q;
4250 
4251 	for (q = 0; q < adapter->num_queues; q++, rxr++)
4252 		if (em_setup_receive_ring(rxr))
4253 			goto fail;
4254 
4255 	return (0);
4256 fail:
4257 	/*
4258 	 * Free RX buffers allocated so far, we will only handle
4259 	 * the rings that completed, the failing case will have
4260 	 * cleaned up for itself. 'q' failed, so its the terminus.
4261 	 */
4262 	for (int i = 0; i < q; ++i) {
4263 		rxr = &adapter->rx_rings[i];
4264 		for (int n = 0; n < adapter->num_rx_desc; n++) {
4265 			struct em_buffer *rxbuf;
4266 			rxbuf = &rxr->rx_buffers[n];
4267 			if (rxbuf->m_head != NULL) {
4268 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4269 			  	  BUS_DMASYNC_POSTREAD);
4270 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4271 				m_freem(rxbuf->m_head);
4272 				rxbuf->m_head = NULL;
4273 			}
4274 		}
4275 		rxr->next_to_check = 0;
4276 		rxr->next_to_refresh = 0;
4277 	}
4278 
4279 	return (ENOBUFS);
4280 }
4281 
4282 /*********************************************************************
4283  *
4284  *  Free all receive rings.
4285  *
4286  **********************************************************************/
4287 static void
4288 em_free_receive_structures(struct adapter *adapter)
4289 {
4290 	struct rx_ring *rxr = adapter->rx_rings;
4291 
4292 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4293 		em_free_receive_buffers(rxr);
4294 		/* Free the ring memory as well */
4295 		em_dma_free(adapter, &rxr->rxdma);
4296 		EM_RX_LOCK_DESTROY(rxr);
4297 	}
4298 
4299 	free(adapter->rx_rings, M_DEVBUF);
4300 }
4301 
4302 
4303 /*********************************************************************
4304  *
4305  *  Free receive ring data structures
4306  *
4307  **********************************************************************/
4308 static void
4309 em_free_receive_buffers(struct rx_ring *rxr)
4310 {
4311 	struct adapter		*adapter = rxr->adapter;
4312 	struct em_buffer	*rxbuf = NULL;
4313 
4314 	INIT_DEBUGOUT("free_receive_buffers: begin");
4315 
4316 	if (rxr->rx_buffers != NULL) {
4317 		for (int i = 0; i < adapter->num_rx_desc; i++) {
4318 			rxbuf = &rxr->rx_buffers[i];
4319 			if (rxbuf->map != NULL) {
4320 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4321 				    BUS_DMASYNC_POSTREAD);
4322 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4323 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4324 			}
4325 			if (rxbuf->m_head != NULL) {
4326 				m_freem(rxbuf->m_head);
4327 				rxbuf->m_head = NULL;
4328 			}
4329 		}
4330 		free(rxr->rx_buffers, M_DEVBUF);
4331 		rxr->rx_buffers = NULL;
4332 		rxr->next_to_check = 0;
4333 		rxr->next_to_refresh = 0;
4334 	}
4335 
4336 	if (rxr->rxtag != NULL) {
4337 		bus_dma_tag_destroy(rxr->rxtag);
4338 		rxr->rxtag = NULL;
4339 	}
4340 
4341 	return;
4342 }
4343 
4344 
4345 /*********************************************************************
4346  *
4347  *  Enable receive unit.
4348  *
4349  **********************************************************************/
4350 
4351 static void
4352 em_initialize_receive_unit(struct adapter *adapter)
4353 {
4354 	struct rx_ring	*rxr = adapter->rx_rings;
4355 	if_t ifp = adapter->ifp;
4356 	struct e1000_hw	*hw = &adapter->hw;
4357 	u64	bus_addr;
4358 	u32	rctl, rxcsum;
4359 
4360 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4361 
4362 	/*
4363 	 * Make sure receives are disabled while setting
4364 	 * up the descriptor ring
4365 	 */
4366 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4367 	/* Do not disable if ever enabled on this hardware */
4368 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4369 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4370 
4371 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4372 	    adapter->rx_abs_int_delay.value);
4373 
4374 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4375 	    adapter->rx_int_delay.value);
4376 	/*
4377 	 * Set the interrupt throttling rate. Value is calculated
4378 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4379 	 */
4380 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4381 
4382 	/*
4383 	** When using MSIX interrupts we need to throttle
4384 	** using the EITR register (82574 only)
4385 	*/
4386 	if (hw->mac.type == e1000_82574) {
4387 		u32 rfctl;
4388 		for (int i = 0; i < 4; i++)
4389 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4390 			    DEFAULT_ITR);
4391 		/* Disable accelerated acknowledge */
4392 		rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4393 		rfctl |= E1000_RFCTL_ACK_DIS;
4394 		E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4395 	}
4396 
4397 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4398 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4399 #ifdef EM_MULTIQUEUE
4400 		rxcsum |= E1000_RXCSUM_TUOFL |
4401 			  E1000_RXCSUM_IPOFL |
4402 			  E1000_RXCSUM_PCSD;
4403 #else
4404 		rxcsum |= E1000_RXCSUM_TUOFL;
4405 #endif
4406 	} else
4407 		rxcsum &= ~E1000_RXCSUM_TUOFL;
4408 
4409 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4410 
4411 #ifdef EM_MULTIQUEUE
4412 	if (adapter->num_queues > 1) {
4413 		uint32_t rss_key[10];
4414 		uint32_t reta;
4415 		int i;
4416 
4417 		/*
4418 		* Configure RSS key
4419 		*/
4420 		arc4rand(rss_key, sizeof(rss_key), 0);
4421 		for (i = 0; i < 10; ++i)
4422 			E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]);
4423 
4424 		/*
4425 		* Configure RSS redirect table in following fashion:
4426 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4427 		*/
4428 		reta = 0;
4429 		for (i = 0; i < 4; ++i) {
4430 			uint32_t q;
4431 			q = (i % adapter->num_queues) << 7;
4432 			reta |= q << (8 * i);
4433 		}
4434 		for (i = 0; i < 32; ++i)
4435 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4436 
4437 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4438 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4439 				E1000_MRQC_RSS_FIELD_IPV4 |
4440 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4441 				E1000_MRQC_RSS_FIELD_IPV6_EX |
4442 				E1000_MRQC_RSS_FIELD_IPV6 |
4443 				E1000_MRQC_RSS_FIELD_IPV6_TCP);
4444 	}
4445 #endif
4446 	/*
4447 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4448 	** long latencies are observed, like Lenovo X60. This
4449 	** change eliminates the problem, but since having positive
4450 	** values in RDTR is a known source of problems on other
4451 	** platforms another solution is being sought.
4452 	*/
4453 	if (hw->mac.type == e1000_82573)
4454 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4455 
4456 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4457 		/* Setup the Base and Length of the Rx Descriptor Ring */
4458 		u32 rdt = adapter->num_rx_desc - 1; /* default */
4459 
4460 		bus_addr = rxr->rxdma.dma_paddr;
4461 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4462 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4463 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4464 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4465 		/* Setup the Head and Tail Descriptor Pointers */
4466 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4467 #ifdef DEV_NETMAP
4468 		/*
4469 		 * an init() while a netmap client is active must
4470 		 * preserve the rx buffers passed to userspace.
4471 		 */
4472 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4473 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4474 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4475 		}
4476 #endif /* DEV_NETMAP */
4477 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4478 	}
4479 
4480 	/*
4481 	 * Set PTHRESH for improved jumbo performance
4482 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4483 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4484 	 * Only write to RXDCTL(1) if there is a need for different
4485 	 * settings.
4486 	 */
4487 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4488 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4489 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4490 	    (if_getmtu(ifp) > ETHERMTU)) {
4491 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4492 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4493 	} else if ((adapter->hw.mac.type == e1000_82574) &&
4494 		  (if_getmtu(ifp) > ETHERMTU)) {
4495 		for (int i = 0; i < adapter->num_queues; i++) {
4496 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4497 
4498                 	rxdctl |= 0x20; /* PTHRESH */
4499                 	rxdctl |= 4 << 8; /* HTHRESH */
4500                 	rxdctl |= 4 << 16;/* WTHRESH */
4501 			rxdctl |= 1 << 24; /* Switch to granularity */
4502 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4503 		}
4504 	}
4505 
4506 	if (adapter->hw.mac.type >= e1000_pch2lan) {
4507 		if (if_getmtu(ifp) > ETHERMTU)
4508 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4509 		else
4510 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4511 	}
4512 
4513 	/* Setup the Receive Control Register */
4514 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4515 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4516 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4517 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4518 
4519         /* Strip the CRC */
4520         rctl |= E1000_RCTL_SECRC;
4521 
4522         /* Make sure VLAN Filters are off */
4523         rctl &= ~E1000_RCTL_VFE;
4524 	rctl &= ~E1000_RCTL_SBP;
4525 
4526 	if (adapter->rx_mbuf_sz == MCLBYTES)
4527 		rctl |= E1000_RCTL_SZ_2048;
4528 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4529 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4530 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4531 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4532 
4533 	if (if_getmtu(ifp) > ETHERMTU)
4534 		rctl |= E1000_RCTL_LPE;
4535 	else
4536 		rctl &= ~E1000_RCTL_LPE;
4537 
4538 	/* Write out the settings */
4539 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4540 
4541 	return;
4542 }
4543 
4544 
4545 /*********************************************************************
4546  *
4547  *  This routine executes in interrupt context. It replenishes
4548  *  the mbufs in the descriptor and sends data which has been
4549  *  dma'ed into host memory to upper layer.
4550  *
4551  *  We loop at most count times if count is > 0, or until done if
4552  *  count < 0.
4553  *
4554  *  For polling we also now return the number of cleaned packets
4555  *********************************************************************/
4556 static bool
4557 em_rxeof(struct rx_ring *rxr, int count, int *done)
4558 {
4559 	struct adapter		*adapter = rxr->adapter;
4560 	if_t ifp = adapter->ifp;
4561 	struct mbuf		*mp, *sendmp;
4562 	u8			status = 0;
4563 	u16 			len;
4564 	int			i, processed, rxdone = 0;
4565 	bool			eop;
4566 	struct e1000_rx_desc	*cur;
4567 
4568 	EM_RX_LOCK(rxr);
4569 
4570 	/* Sync the ring */
4571 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4572 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4573 
4574 
4575 #ifdef DEV_NETMAP
4576 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4577 		EM_RX_UNLOCK(rxr);
4578 		return (FALSE);
4579 	}
4580 #endif /* DEV_NETMAP */
4581 
4582 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4583 
4584 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4585 			break;
4586 
4587 		cur = &rxr->rx_base[i];
4588 		status = cur->status;
4589 		mp = sendmp = NULL;
4590 
4591 		if ((status & E1000_RXD_STAT_DD) == 0)
4592 			break;
4593 
4594 		len = le16toh(cur->length);
4595 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4596 
4597 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4598 		    (rxr->discard == TRUE)) {
4599 			adapter->dropped_pkts++;
4600 			++rxr->rx_discarded;
4601 			if (!eop) /* Catch subsequent segs */
4602 				rxr->discard = TRUE;
4603 			else
4604 				rxr->discard = FALSE;
4605 			em_rx_discard(rxr, i);
4606 			goto next_desc;
4607 		}
4608 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4609 
4610 		/* Assign correct length to the current fragment */
4611 		mp = rxr->rx_buffers[i].m_head;
4612 		mp->m_len = len;
4613 
4614 		/* Trigger for refresh */
4615 		rxr->rx_buffers[i].m_head = NULL;
4616 
4617 		/* First segment? */
4618 		if (rxr->fmp == NULL) {
4619 			mp->m_pkthdr.len = len;
4620 			rxr->fmp = rxr->lmp = mp;
4621 		} else {
4622 			/* Chain mbuf's together */
4623 			mp->m_flags &= ~M_PKTHDR;
4624 			rxr->lmp->m_next = mp;
4625 			rxr->lmp = mp;
4626 			rxr->fmp->m_pkthdr.len += len;
4627 		}
4628 
4629 		if (eop) {
4630 			--count;
4631 			sendmp = rxr->fmp;
4632 			if_setrcvif(sendmp, ifp);
4633 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4634 			em_receive_checksum(cur, sendmp);
4635 #ifndef __NO_STRICT_ALIGNMENT
4636 			if (adapter->hw.mac.max_frame_size >
4637 			    (MCLBYTES - ETHER_ALIGN) &&
4638 			    em_fixup_rx(rxr) != 0)
4639 				goto skip;
4640 #endif
4641 			if (status & E1000_RXD_STAT_VP) {
4642 				if_setvtag(sendmp,
4643 				    le16toh(cur->special));
4644 				sendmp->m_flags |= M_VLANTAG;
4645 			}
4646 #ifndef __NO_STRICT_ALIGNMENT
4647 skip:
4648 #endif
4649 			rxr->fmp = rxr->lmp = NULL;
4650 		}
4651 next_desc:
4652 		/* Sync the ring */
4653 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4654 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4655 
4656 		/* Zero out the receive descriptors status. */
4657 		cur->status = 0;
4658 		++rxdone;	/* cumulative for POLL */
4659 		++processed;
4660 
4661 		/* Advance our pointers to the next descriptor. */
4662 		if (++i == adapter->num_rx_desc)
4663 			i = 0;
4664 
4665 		/* Send to the stack */
4666 		if (sendmp != NULL) {
4667 			rxr->next_to_check = i;
4668 			EM_RX_UNLOCK(rxr);
4669 			if_input(ifp, sendmp);
4670 			EM_RX_LOCK(rxr);
4671 			i = rxr->next_to_check;
4672 		}
4673 
4674 		/* Only refresh mbufs every 8 descriptors */
4675 		if (processed == 8) {
4676 			em_refresh_mbufs(rxr, i);
4677 			processed = 0;
4678 		}
4679 	}
4680 
4681 	/* Catch any remaining refresh work */
4682 	if (e1000_rx_unrefreshed(rxr))
4683 		em_refresh_mbufs(rxr, i);
4684 
4685 	rxr->next_to_check = i;
4686 	if (done != NULL)
4687 		*done = rxdone;
4688 	EM_RX_UNLOCK(rxr);
4689 
4690 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4691 }
4692 
4693 static __inline void
4694 em_rx_discard(struct rx_ring *rxr, int i)
4695 {
4696 	struct em_buffer	*rbuf;
4697 
4698 	rbuf = &rxr->rx_buffers[i];
4699 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4700 
4701 	/* Free any previous pieces */
4702 	if (rxr->fmp != NULL) {
4703 		rxr->fmp->m_flags |= M_PKTHDR;
4704 		m_freem(rxr->fmp);
4705 		rxr->fmp = NULL;
4706 		rxr->lmp = NULL;
4707 	}
4708 	/*
4709 	** Free buffer and allow em_refresh_mbufs()
4710 	** to clean up and recharge buffer.
4711 	*/
4712 	if (rbuf->m_head) {
4713 		m_free(rbuf->m_head);
4714 		rbuf->m_head = NULL;
4715 	}
4716 	return;
4717 }
4718 
4719 #ifndef __NO_STRICT_ALIGNMENT
4720 /*
4721  * When jumbo frames are enabled we should realign entire payload on
4722  * architecures with strict alignment. This is serious design mistake of 8254x
4723  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4724  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4725  * payload. On architecures without strict alignment restrictions 8254x still
4726  * performs unaligned memory access which would reduce the performance too.
4727  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4728  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4729  * existing mbuf chain.
4730  *
4731  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4732  * not used at all on architectures with strict alignment.
4733  */
4734 static int
4735 em_fixup_rx(struct rx_ring *rxr)
4736 {
4737 	struct adapter *adapter = rxr->adapter;
4738 	struct mbuf *m, *n;
4739 	int error;
4740 
4741 	error = 0;
4742 	m = rxr->fmp;
4743 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4744 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4745 		m->m_data += ETHER_HDR_LEN;
4746 	} else {
4747 		MGETHDR(n, M_NOWAIT, MT_DATA);
4748 		if (n != NULL) {
4749 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4750 			m->m_data += ETHER_HDR_LEN;
4751 			m->m_len -= ETHER_HDR_LEN;
4752 			n->m_len = ETHER_HDR_LEN;
4753 			M_MOVE_PKTHDR(n, m);
4754 			n->m_next = m;
4755 			rxr->fmp = n;
4756 		} else {
4757 			adapter->dropped_pkts++;
4758 			m_freem(rxr->fmp);
4759 			rxr->fmp = NULL;
4760 			error = ENOMEM;
4761 		}
4762 	}
4763 
4764 	return (error);
4765 }
4766 #endif
4767 
4768 /*********************************************************************
4769  *
4770  *  Verify that the hardware indicated that the checksum is valid.
4771  *  Inform the stack about the status of checksum so that stack
4772  *  doesn't spend time verifying the checksum.
4773  *
4774  *********************************************************************/
4775 static void
4776 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4777 {
4778 	mp->m_pkthdr.csum_flags = 0;
4779 
4780 	/* Ignore Checksum bit is set */
4781 	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4782 		return;
4783 
4784 	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4785 		return;
4786 
4787 	/* IP Checksum Good? */
4788 	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4789 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4790 
4791 	/* TCP or UDP checksum */
4792 	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4793 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4794 		mp->m_pkthdr.csum_data = htons(0xffff);
4795 	}
4796 }
4797 
4798 /*
4799  * This routine is run via an vlan
4800  * config EVENT
4801  */
4802 static void
4803 em_register_vlan(void *arg, if_t ifp, u16 vtag)
4804 {
4805 	struct adapter	*adapter = if_getsoftc(ifp);
4806 	u32		index, bit;
4807 
4808 	if ((void*)adapter !=  arg)   /* Not our event */
4809 		return;
4810 
4811 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4812                 return;
4813 
4814 	EM_CORE_LOCK(adapter);
4815 	index = (vtag >> 5) & 0x7F;
4816 	bit = vtag & 0x1F;
4817 	adapter->shadow_vfta[index] |= (1 << bit);
4818 	++adapter->num_vlans;
4819 	/* Re-init to load the changes */
4820 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4821 		em_init_locked(adapter);
4822 	EM_CORE_UNLOCK(adapter);
4823 }
4824 
4825 /*
4826  * This routine is run via an vlan
4827  * unconfig EVENT
4828  */
4829 static void
4830 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4831 {
4832 	struct adapter	*adapter = if_getsoftc(ifp);
4833 	u32		index, bit;
4834 
4835 	if (adapter != arg)
4836 		return;
4837 
4838 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4839                 return;
4840 
4841 	EM_CORE_LOCK(adapter);
4842 	index = (vtag >> 5) & 0x7F;
4843 	bit = vtag & 0x1F;
4844 	adapter->shadow_vfta[index] &= ~(1 << bit);
4845 	--adapter->num_vlans;
4846 	/* Re-init to load the changes */
4847 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4848 		em_init_locked(adapter);
4849 	EM_CORE_UNLOCK(adapter);
4850 }
4851 
4852 static void
4853 em_setup_vlan_hw_support(struct adapter *adapter)
4854 {
4855 	struct e1000_hw *hw = &adapter->hw;
4856 	u32             reg;
4857 
4858 	/*
4859 	** We get here thru init_locked, meaning
4860 	** a soft reset, this has already cleared
4861 	** the VFTA and other state, so if there
4862 	** have been no vlan's registered do nothing.
4863 	*/
4864 	if (adapter->num_vlans == 0)
4865                 return;
4866 
4867 	/*
4868 	** A soft reset zero's out the VFTA, so
4869 	** we need to repopulate it now.
4870 	*/
4871 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4872                 if (adapter->shadow_vfta[i] != 0)
4873 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4874                             i, adapter->shadow_vfta[i]);
4875 
4876 	reg = E1000_READ_REG(hw, E1000_CTRL);
4877 	reg |= E1000_CTRL_VME;
4878 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4879 
4880 	/* Enable the Filter Table */
4881 	reg = E1000_READ_REG(hw, E1000_RCTL);
4882 	reg &= ~E1000_RCTL_CFIEN;
4883 	reg |= E1000_RCTL_VFE;
4884 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4885 }
4886 
4887 static void
4888 em_enable_intr(struct adapter *adapter)
4889 {
4890 	struct e1000_hw *hw = &adapter->hw;
4891 	u32 ims_mask = IMS_ENABLE_MASK;
4892 
4893 	if (hw->mac.type == e1000_82574) {
4894 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4895 		ims_mask |= EM_MSIX_MASK;
4896 	}
4897 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4898 }
4899 
4900 static void
4901 em_disable_intr(struct adapter *adapter)
4902 {
4903 	struct e1000_hw *hw = &adapter->hw;
4904 
4905 	if (hw->mac.type == e1000_82574)
4906 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4907 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4908 }
4909 
4910 /*
4911  * Bit of a misnomer, what this really means is
4912  * to enable OS management of the system... aka
4913  * to disable special hardware management features
4914  */
4915 static void
4916 em_init_manageability(struct adapter *adapter)
4917 {
4918 	/* A shared code workaround */
4919 #define E1000_82542_MANC2H E1000_MANC2H
4920 	if (adapter->has_manage) {
4921 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4922 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4923 
4924 		/* disable hardware interception of ARP */
4925 		manc &= ~(E1000_MANC_ARP_EN);
4926 
4927                 /* enable receiving management packets to the host */
4928 		manc |= E1000_MANC_EN_MNG2HOST;
4929 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4930 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4931 		manc2h |= E1000_MNG2HOST_PORT_623;
4932 		manc2h |= E1000_MNG2HOST_PORT_664;
4933 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4934 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4935 	}
4936 }
4937 
4938 /*
4939  * Give control back to hardware management
4940  * controller if there is one.
4941  */
4942 static void
4943 em_release_manageability(struct adapter *adapter)
4944 {
4945 	if (adapter->has_manage) {
4946 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4947 
4948 		/* re-enable hardware interception of ARP */
4949 		manc |= E1000_MANC_ARP_EN;
4950 		manc &= ~E1000_MANC_EN_MNG2HOST;
4951 
4952 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4953 	}
4954 }
4955 
4956 /*
4957  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4958  * For ASF and Pass Through versions of f/w this means
4959  * that the driver is loaded. For AMT version type f/w
4960  * this means that the network i/f is open.
4961  */
4962 static void
4963 em_get_hw_control(struct adapter *adapter)
4964 {
4965 	u32 ctrl_ext, swsm;
4966 
4967 	if (adapter->hw.mac.type == e1000_82573) {
4968 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4969 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4970 		    swsm | E1000_SWSM_DRV_LOAD);
4971 		return;
4972 	}
4973 	/* else */
4974 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4975 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4976 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4977 	return;
4978 }
4979 
4980 /*
4981  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4982  * For ASF and Pass Through versions of f/w this means that
4983  * the driver is no longer loaded. For AMT versions of the
4984  * f/w this means that the network i/f is closed.
4985  */
4986 static void
4987 em_release_hw_control(struct adapter *adapter)
4988 {
4989 	u32 ctrl_ext, swsm;
4990 
4991 	if (!adapter->has_manage)
4992 		return;
4993 
4994 	if (adapter->hw.mac.type == e1000_82573) {
4995 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4996 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4997 		    swsm & ~E1000_SWSM_DRV_LOAD);
4998 		return;
4999 	}
5000 	/* else */
5001 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5002 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5003 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5004 	return;
5005 }
5006 
5007 static int
5008 em_is_valid_ether_addr(u8 *addr)
5009 {
5010 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5011 
5012 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5013 		return (FALSE);
5014 	}
5015 
5016 	return (TRUE);
5017 }
5018 
5019 /*
5020 ** Parse the interface capabilities with regard
5021 ** to both system management and wake-on-lan for
5022 ** later use.
5023 */
5024 static void
5025 em_get_wakeup(device_t dev)
5026 {
5027 	struct adapter	*adapter = device_get_softc(dev);
5028 	u16		eeprom_data = 0, device_id, apme_mask;
5029 
5030 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5031 	apme_mask = EM_EEPROM_APME;
5032 
5033 	switch (adapter->hw.mac.type) {
5034 	case e1000_82573:
5035 	case e1000_82583:
5036 		adapter->has_amt = TRUE;
5037 		/* Falls thru */
5038 	case e1000_82571:
5039 	case e1000_82572:
5040 	case e1000_80003es2lan:
5041 		if (adapter->hw.bus.func == 1) {
5042 			e1000_read_nvm(&adapter->hw,
5043 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5044 			break;
5045 		} else
5046 			e1000_read_nvm(&adapter->hw,
5047 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5048 		break;
5049 	case e1000_ich8lan:
5050 	case e1000_ich9lan:
5051 	case e1000_ich10lan:
5052 	case e1000_pchlan:
5053 	case e1000_pch2lan:
5054 		apme_mask = E1000_WUC_APME;
5055 		adapter->has_amt = TRUE;
5056 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5057 		break;
5058 	default:
5059 		e1000_read_nvm(&adapter->hw,
5060 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5061 		break;
5062 	}
5063 	if (eeprom_data & apme_mask)
5064 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5065 	/*
5066          * We have the eeprom settings, now apply the special cases
5067          * where the eeprom may be wrong or the board won't support
5068          * wake on lan on a particular port
5069 	 */
5070 	device_id = pci_get_device(dev);
5071         switch (device_id) {
5072 	case E1000_DEV_ID_82571EB_FIBER:
5073 		/* Wake events only supported on port A for dual fiber
5074 		 * regardless of eeprom setting */
5075 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5076 		    E1000_STATUS_FUNC_1)
5077 			adapter->wol = 0;
5078 		break;
5079 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5080 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5081 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5082                 /* if quad port adapter, disable WoL on all but port A */
5083 		if (global_quad_port_a != 0)
5084 			adapter->wol = 0;
5085 		/* Reset for multiple quad port adapters */
5086 		if (++global_quad_port_a == 4)
5087 			global_quad_port_a = 0;
5088                 break;
5089 	}
5090 	return;
5091 }
5092 
5093 
5094 /*
5095  * Enable PCI Wake On Lan capability
5096  */
5097 static void
5098 em_enable_wakeup(device_t dev)
5099 {
5100 	struct adapter	*adapter = device_get_softc(dev);
5101 	if_t ifp = adapter->ifp;
5102 	u32		pmc, ctrl, ctrl_ext, rctl;
5103 	u16     	status;
5104 
5105 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5106 		return;
5107 
5108 	/* Advertise the wakeup capability */
5109 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5110 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5111 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5112 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5113 
5114 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5115 	    (adapter->hw.mac.type == e1000_pchlan) ||
5116 	    (adapter->hw.mac.type == e1000_ich9lan) ||
5117 	    (adapter->hw.mac.type == e1000_ich10lan))
5118 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5119 
5120 	/* Keep the laser running on Fiber adapters */
5121 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5122 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5123 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5124 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5125 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5126 	}
5127 
5128 	/*
5129 	** Determine type of Wakeup: note that wol
5130 	** is set with all bits on by default.
5131 	*/
5132 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5133 		adapter->wol &= ~E1000_WUFC_MAG;
5134 
5135 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5136 		adapter->wol &= ~E1000_WUFC_MC;
5137 	else {
5138 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5139 		rctl |= E1000_RCTL_MPE;
5140 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5141 	}
5142 
5143 	if ((adapter->hw.mac.type == e1000_pchlan) ||
5144 	    (adapter->hw.mac.type == e1000_pch2lan)) {
5145 		if (em_enable_phy_wakeup(adapter))
5146 			return;
5147 	} else {
5148 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5149 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5150 	}
5151 
5152 	if (adapter->hw.phy.type == e1000_phy_igp_3)
5153 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5154 
5155         /* Request PME */
5156         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5157 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5158 	if (if_getcapenable(ifp) & IFCAP_WOL)
5159 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5160         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5161 
5162 	return;
5163 }
5164 
5165 /*
5166 ** WOL in the newer chipset interfaces (pchlan)
5167 ** require thing to be copied into the phy
5168 */
5169 static int
5170 em_enable_phy_wakeup(struct adapter *adapter)
5171 {
5172 	struct e1000_hw *hw = &adapter->hw;
5173 	u32 mreg, ret = 0;
5174 	u16 preg;
5175 
5176 	/* copy MAC RARs to PHY RARs */
5177 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5178 
5179 	/* copy MAC MTA to PHY MTA */
5180 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5181 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5182 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5183 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5184 		    (u16)((mreg >> 16) & 0xFFFF));
5185 	}
5186 
5187 	/* configure PHY Rx Control register */
5188 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5189 	mreg = E1000_READ_REG(hw, E1000_RCTL);
5190 	if (mreg & E1000_RCTL_UPE)
5191 		preg |= BM_RCTL_UPE;
5192 	if (mreg & E1000_RCTL_MPE)
5193 		preg |= BM_RCTL_MPE;
5194 	preg &= ~(BM_RCTL_MO_MASK);
5195 	if (mreg & E1000_RCTL_MO_3)
5196 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5197 				<< BM_RCTL_MO_SHIFT);
5198 	if (mreg & E1000_RCTL_BAM)
5199 		preg |= BM_RCTL_BAM;
5200 	if (mreg & E1000_RCTL_PMCF)
5201 		preg |= BM_RCTL_PMCF;
5202 	mreg = E1000_READ_REG(hw, E1000_CTRL);
5203 	if (mreg & E1000_CTRL_RFCE)
5204 		preg |= BM_RCTL_RFCE;
5205 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5206 
5207 	/* enable PHY wakeup in MAC register */
5208 	E1000_WRITE_REG(hw, E1000_WUC,
5209 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5210 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5211 
5212 	/* configure and enable PHY wakeup in PHY registers */
5213 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5214 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5215 
5216 	/* activate PHY wakeup */
5217 	ret = hw->phy.ops.acquire(hw);
5218 	if (ret) {
5219 		printf("Could not acquire PHY\n");
5220 		return ret;
5221 	}
5222 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5223 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5224 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5225 	if (ret) {
5226 		printf("Could not read PHY page 769\n");
5227 		goto out;
5228 	}
5229 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5230 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5231 	if (ret)
5232 		printf("Could not set PHY Host Wakeup bit\n");
5233 out:
5234 	hw->phy.ops.release(hw);
5235 
5236 	return ret;
5237 }
5238 
5239 static void
5240 em_led_func(void *arg, int onoff)
5241 {
5242 	struct adapter	*adapter = arg;
5243 
5244 	EM_CORE_LOCK(adapter);
5245 	if (onoff) {
5246 		e1000_setup_led(&adapter->hw);
5247 		e1000_led_on(&adapter->hw);
5248 	} else {
5249 		e1000_led_off(&adapter->hw);
5250 		e1000_cleanup_led(&adapter->hw);
5251 	}
5252 	EM_CORE_UNLOCK(adapter);
5253 }
5254 
5255 /*
5256 ** Disable the L0S and L1 LINK states
5257 */
5258 static void
5259 em_disable_aspm(struct adapter *adapter)
5260 {
5261 	int		base, reg;
5262 	u16		link_cap,link_ctrl;
5263 	device_t	dev = adapter->dev;
5264 
5265 	switch (adapter->hw.mac.type) {
5266 		case e1000_82573:
5267 		case e1000_82574:
5268 		case e1000_82583:
5269 			break;
5270 		default:
5271 			return;
5272 	}
5273 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5274 		return;
5275 	reg = base + PCIER_LINK_CAP;
5276 	link_cap = pci_read_config(dev, reg, 2);
5277 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5278 		return;
5279 	reg = base + PCIER_LINK_CTL;
5280 	link_ctrl = pci_read_config(dev, reg, 2);
5281 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5282 	pci_write_config(dev, reg, link_ctrl, 2);
5283 	return;
5284 }
5285 
5286 /**********************************************************************
5287  *
5288  *  Update the board statistics counters.
5289  *
5290  **********************************************************************/
5291 static void
5292 em_update_stats_counters(struct adapter *adapter)
5293 {
5294 
5295 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5296 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5297 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5298 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5299 	}
5300 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5301 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5302 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5303 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5304 
5305 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5306 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5307 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5308 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5309 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5310 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5311 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5312 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5313 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5314 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5315 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5316 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5317 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5318 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5319 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5320 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5321 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5322 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5323 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5324 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5325 
5326 	/* For the 64-bit byte counters the low dword must be read first. */
5327 	/* Both registers clear on the read of the high dword */
5328 
5329 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5330 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5331 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5332 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5333 
5334 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5335 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5336 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5337 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5338 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5339 
5340 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5341 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5342 
5343 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5344 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5345 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5346 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5347 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5348 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5349 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5350 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5351 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5352 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5353 
5354 	/* Interrupt Counts */
5355 
5356 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5357 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5358 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5359 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5360 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5361 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5362 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5363 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5364 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5365 
5366 	if (adapter->hw.mac.type >= e1000_82543) {
5367 		adapter->stats.algnerrc +=
5368 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5369 		adapter->stats.rxerrc +=
5370 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5371 		adapter->stats.tncrs +=
5372 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5373 		adapter->stats.cexterr +=
5374 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5375 		adapter->stats.tsctc +=
5376 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5377 		adapter->stats.tsctfc +=
5378 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5379 	}
5380 }
5381 
5382 static uint64_t
5383 em_get_counter(if_t ifp, ift_counter cnt)
5384 {
5385 	struct adapter *adapter;
5386 
5387 	adapter = if_getsoftc(ifp);
5388 
5389 	switch (cnt) {
5390 	case IFCOUNTER_COLLISIONS:
5391 		return (adapter->stats.colc);
5392 	case IFCOUNTER_IERRORS:
5393 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5394 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5395 		    adapter->stats.ruc + adapter->stats.roc +
5396 		    adapter->stats.mpc + adapter->stats.cexterr);
5397 	case IFCOUNTER_OERRORS:
5398 		return (adapter->stats.ecol + adapter->stats.latecol +
5399 		    adapter->watchdog_events);
5400 	default:
5401 		return (if_get_counter_default(ifp, cnt));
5402 	}
5403 }
5404 
5405 /* Export a single 32-bit register via a read-only sysctl. */
5406 static int
5407 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5408 {
5409 	struct adapter *adapter;
5410 	u_int val;
5411 
5412 	adapter = oidp->oid_arg1;
5413 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5414 	return (sysctl_handle_int(oidp, &val, 0, req));
5415 }
5416 
5417 /*
5418  * Add sysctl variables, one per statistic, to the system.
5419  */
5420 static void
5421 em_add_hw_stats(struct adapter *adapter)
5422 {
5423 	device_t dev = adapter->dev;
5424 
5425 	struct tx_ring *txr = adapter->tx_rings;
5426 	struct rx_ring *rxr = adapter->rx_rings;
5427 
5428 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5429 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5430 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5431 	struct e1000_hw_stats *stats = &adapter->stats;
5432 
5433 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5434 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5435 
5436 #define QUEUE_NAME_LEN 32
5437 	char namebuf[QUEUE_NAME_LEN];
5438 
5439 	/* Driver Statistics */
5440 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5441 			CTLFLAG_RD, &adapter->link_irq,
5442 			"Link MSIX IRQ Handled");
5443 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5444 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5445 			 "Std mbuf failed");
5446 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5447 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5448 			 "Std mbuf cluster failed");
5449 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5450 			CTLFLAG_RD, &adapter->dropped_pkts,
5451 			"Driver dropped packets");
5452 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5453 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5454 			"Driver tx dma failure in xmit");
5455 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5456 			CTLFLAG_RD, &adapter->rx_overruns,
5457 			"RX overruns");
5458 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5459 			CTLFLAG_RD, &adapter->watchdog_events,
5460 			"Watchdog timeouts");
5461 
5462 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5463 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5464 			em_sysctl_reg_handler, "IU",
5465 			"Device Control Register");
5466 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5467 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5468 			em_sysctl_reg_handler, "IU",
5469 			"Receiver Control Register");
5470 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5471 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5472 			"Flow Control High Watermark");
5473 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5474 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5475 			"Flow Control Low Watermark");
5476 
5477 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5478 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5479 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5480 					    CTLFLAG_RD, NULL, "TX Queue Name");
5481 		queue_list = SYSCTL_CHILDREN(queue_node);
5482 
5483 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5484 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5485 				E1000_TDH(txr->me),
5486 				em_sysctl_reg_handler, "IU",
5487  				"Transmit Descriptor Head");
5488 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5489 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5490 				E1000_TDT(txr->me),
5491 				em_sysctl_reg_handler, "IU",
5492  				"Transmit Descriptor Tail");
5493 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5494 				CTLFLAG_RD, &txr->tx_irq,
5495 				"Queue MSI-X Transmit Interrupts");
5496 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5497 				CTLFLAG_RD, &txr->no_desc_avail,
5498 				"Queue No Descriptor Available");
5499 
5500 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5501 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5502 					    CTLFLAG_RD, NULL, "RX Queue Name");
5503 		queue_list = SYSCTL_CHILDREN(queue_node);
5504 
5505 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5506 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5507 				E1000_RDH(rxr->me),
5508 				em_sysctl_reg_handler, "IU",
5509 				"Receive Descriptor Head");
5510 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5511 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5512 				E1000_RDT(rxr->me),
5513 				em_sysctl_reg_handler, "IU",
5514 				"Receive Descriptor Tail");
5515 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5516 				CTLFLAG_RD, &rxr->rx_irq,
5517 				"Queue MSI-X Receive Interrupts");
5518 	}
5519 
5520 	/* MAC stats get their own sub node */
5521 
5522 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5523 				    CTLFLAG_RD, NULL, "Statistics");
5524 	stat_list = SYSCTL_CHILDREN(stat_node);
5525 
5526 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5527 			CTLFLAG_RD, &stats->ecol,
5528 			"Excessive collisions");
5529 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5530 			CTLFLAG_RD, &stats->scc,
5531 			"Single collisions");
5532 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5533 			CTLFLAG_RD, &stats->mcc,
5534 			"Multiple collisions");
5535 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5536 			CTLFLAG_RD, &stats->latecol,
5537 			"Late collisions");
5538 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5539 			CTLFLAG_RD, &stats->colc,
5540 			"Collision Count");
5541 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5542 			CTLFLAG_RD, &adapter->stats.symerrs,
5543 			"Symbol Errors");
5544 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5545 			CTLFLAG_RD, &adapter->stats.sec,
5546 			"Sequence Errors");
5547 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5548 			CTLFLAG_RD, &adapter->stats.dc,
5549 			"Defer Count");
5550 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5551 			CTLFLAG_RD, &adapter->stats.mpc,
5552 			"Missed Packets");
5553 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5554 			CTLFLAG_RD, &adapter->stats.rnbc,
5555 			"Receive No Buffers");
5556 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5557 			CTLFLAG_RD, &adapter->stats.ruc,
5558 			"Receive Undersize");
5559 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5560 			CTLFLAG_RD, &adapter->stats.rfc,
5561 			"Fragmented Packets Received ");
5562 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5563 			CTLFLAG_RD, &adapter->stats.roc,
5564 			"Oversized Packets Received");
5565 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5566 			CTLFLAG_RD, &adapter->stats.rjc,
5567 			"Recevied Jabber");
5568 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5569 			CTLFLAG_RD, &adapter->stats.rxerrc,
5570 			"Receive Errors");
5571 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5572 			CTLFLAG_RD, &adapter->stats.crcerrs,
5573 			"CRC errors");
5574 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5575 			CTLFLAG_RD, &adapter->stats.algnerrc,
5576 			"Alignment Errors");
5577 	/* On 82575 these are collision counts */
5578 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5579 			CTLFLAG_RD, &adapter->stats.cexterr,
5580 			"Collision/Carrier extension errors");
5581 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5582 			CTLFLAG_RD, &adapter->stats.xonrxc,
5583 			"XON Received");
5584 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5585 			CTLFLAG_RD, &adapter->stats.xontxc,
5586 			"XON Transmitted");
5587 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5588 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5589 			"XOFF Received");
5590 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5591 			CTLFLAG_RD, &adapter->stats.xofftxc,
5592 			"XOFF Transmitted");
5593 
5594 	/* Packet Reception Stats */
5595 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5596 			CTLFLAG_RD, &adapter->stats.tpr,
5597 			"Total Packets Received ");
5598 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5599 			CTLFLAG_RD, &adapter->stats.gprc,
5600 			"Good Packets Received");
5601 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5602 			CTLFLAG_RD, &adapter->stats.bprc,
5603 			"Broadcast Packets Received");
5604 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5605 			CTLFLAG_RD, &adapter->stats.mprc,
5606 			"Multicast Packets Received");
5607 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5608 			CTLFLAG_RD, &adapter->stats.prc64,
5609 			"64 byte frames received ");
5610 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5611 			CTLFLAG_RD, &adapter->stats.prc127,
5612 			"65-127 byte frames received");
5613 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5614 			CTLFLAG_RD, &adapter->stats.prc255,
5615 			"128-255 byte frames received");
5616 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5617 			CTLFLAG_RD, &adapter->stats.prc511,
5618 			"256-511 byte frames received");
5619 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5620 			CTLFLAG_RD, &adapter->stats.prc1023,
5621 			"512-1023 byte frames received");
5622 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5623 			CTLFLAG_RD, &adapter->stats.prc1522,
5624 			"1023-1522 byte frames received");
5625  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5626  			CTLFLAG_RD, &adapter->stats.gorc,
5627  			"Good Octets Received");
5628 
5629 	/* Packet Transmission Stats */
5630  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5631  			CTLFLAG_RD, &adapter->stats.gotc,
5632  			"Good Octets Transmitted");
5633 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5634 			CTLFLAG_RD, &adapter->stats.tpt,
5635 			"Total Packets Transmitted");
5636 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5637 			CTLFLAG_RD, &adapter->stats.gptc,
5638 			"Good Packets Transmitted");
5639 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5640 			CTLFLAG_RD, &adapter->stats.bptc,
5641 			"Broadcast Packets Transmitted");
5642 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5643 			CTLFLAG_RD, &adapter->stats.mptc,
5644 			"Multicast Packets Transmitted");
5645 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5646 			CTLFLAG_RD, &adapter->stats.ptc64,
5647 			"64 byte frames transmitted ");
5648 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5649 			CTLFLAG_RD, &adapter->stats.ptc127,
5650 			"65-127 byte frames transmitted");
5651 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5652 			CTLFLAG_RD, &adapter->stats.ptc255,
5653 			"128-255 byte frames transmitted");
5654 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5655 			CTLFLAG_RD, &adapter->stats.ptc511,
5656 			"256-511 byte frames transmitted");
5657 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5658 			CTLFLAG_RD, &adapter->stats.ptc1023,
5659 			"512-1023 byte frames transmitted");
5660 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5661 			CTLFLAG_RD, &adapter->stats.ptc1522,
5662 			"1024-1522 byte frames transmitted");
5663 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5664 			CTLFLAG_RD, &adapter->stats.tsctc,
5665 			"TSO Contexts Transmitted");
5666 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5667 			CTLFLAG_RD, &adapter->stats.tsctfc,
5668 			"TSO Contexts Failed");
5669 
5670 
5671 	/* Interrupt Stats */
5672 
5673 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5674 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5675 	int_list = SYSCTL_CHILDREN(int_node);
5676 
5677 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5678 			CTLFLAG_RD, &adapter->stats.iac,
5679 			"Interrupt Assertion Count");
5680 
5681 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5682 			CTLFLAG_RD, &adapter->stats.icrxptc,
5683 			"Interrupt Cause Rx Pkt Timer Expire Count");
5684 
5685 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5686 			CTLFLAG_RD, &adapter->stats.icrxatc,
5687 			"Interrupt Cause Rx Abs Timer Expire Count");
5688 
5689 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5690 			CTLFLAG_RD, &adapter->stats.ictxptc,
5691 			"Interrupt Cause Tx Pkt Timer Expire Count");
5692 
5693 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5694 			CTLFLAG_RD, &adapter->stats.ictxatc,
5695 			"Interrupt Cause Tx Abs Timer Expire Count");
5696 
5697 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5698 			CTLFLAG_RD, &adapter->stats.ictxqec,
5699 			"Interrupt Cause Tx Queue Empty Count");
5700 
5701 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5702 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5703 			"Interrupt Cause Tx Queue Min Thresh Count");
5704 
5705 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5706 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5707 			"Interrupt Cause Rx Desc Min Thresh Count");
5708 
5709 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5710 			CTLFLAG_RD, &adapter->stats.icrxoc,
5711 			"Interrupt Cause Receiver Overrun Count");
5712 }
5713 
5714 /**********************************************************************
5715  *
5716  *  This routine provides a way to dump out the adapter eeprom,
5717  *  often a useful debug/service tool. This only dumps the first
5718  *  32 words, stuff that matters is in that extent.
5719  *
5720  **********************************************************************/
5721 static int
5722 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5723 {
5724 	struct adapter *adapter = (struct adapter *)arg1;
5725 	int error;
5726 	int result;
5727 
5728 	result = -1;
5729 	error = sysctl_handle_int(oidp, &result, 0, req);
5730 
5731 	if (error || !req->newptr)
5732 		return (error);
5733 
5734 	/*
5735 	 * This value will cause a hex dump of the
5736 	 * first 32 16-bit words of the EEPROM to
5737 	 * the screen.
5738 	 */
5739 	if (result == 1)
5740 		em_print_nvm_info(adapter);
5741 
5742 	return (error);
5743 }
5744 
5745 static void
5746 em_print_nvm_info(struct adapter *adapter)
5747 {
5748 	u16	eeprom_data;
5749 	int	i, j, row = 0;
5750 
5751 	/* Its a bit crude, but it gets the job done */
5752 	printf("\nInterface EEPROM Dump:\n");
5753 	printf("Offset\n0x0000  ");
5754 	for (i = 0, j = 0; i < 32; i++, j++) {
5755 		if (j == 8) { /* Make the offset block */
5756 			j = 0; ++row;
5757 			printf("\n0x00%x0  ",row);
5758 		}
5759 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5760 		printf("%04x ", eeprom_data);
5761 	}
5762 	printf("\n");
5763 }
5764 
5765 static int
5766 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5767 {
5768 	struct em_int_delay_info *info;
5769 	struct adapter *adapter;
5770 	u32 regval;
5771 	int error, usecs, ticks;
5772 
5773 	info = (struct em_int_delay_info *)arg1;
5774 	usecs = info->value;
5775 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5776 	if (error != 0 || req->newptr == NULL)
5777 		return (error);
5778 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5779 		return (EINVAL);
5780 	info->value = usecs;
5781 	ticks = EM_USECS_TO_TICKS(usecs);
5782 	if (info->offset == E1000_ITR)	/* units are 256ns here */
5783 		ticks *= 4;
5784 
5785 	adapter = info->adapter;
5786 
5787 	EM_CORE_LOCK(adapter);
5788 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5789 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5790 	/* Handle a few special cases. */
5791 	switch (info->offset) {
5792 	case E1000_RDTR:
5793 		break;
5794 	case E1000_TIDV:
5795 		if (ticks == 0) {
5796 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5797 			/* Don't write 0 into the TIDV register. */
5798 			regval++;
5799 		} else
5800 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5801 		break;
5802 	}
5803 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5804 	EM_CORE_UNLOCK(adapter);
5805 	return (0);
5806 }
5807 
5808 static void
5809 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5810 	const char *description, struct em_int_delay_info *info,
5811 	int offset, int value)
5812 {
5813 	info->adapter = adapter;
5814 	info->offset = offset;
5815 	info->value = value;
5816 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5817 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5818 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5819 	    info, 0, em_sysctl_int_delay, "I", description);
5820 }
5821 
5822 static void
5823 em_set_sysctl_value(struct adapter *adapter, const char *name,
5824 	const char *description, int *limit, int value)
5825 {
5826 	*limit = value;
5827 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5828 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5829 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5830 }
5831 
5832 
5833 /*
5834 ** Set flow control using sysctl:
5835 ** Flow control values:
5836 **      0 - off
5837 **      1 - rx pause
5838 **      2 - tx pause
5839 **      3 - full
5840 */
5841 static int
5842 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5843 {
5844         int		error;
5845 	static int	input = 3; /* default is full */
5846         struct adapter	*adapter = (struct adapter *) arg1;
5847 
5848         error = sysctl_handle_int(oidp, &input, 0, req);
5849 
5850         if ((error) || (req->newptr == NULL))
5851                 return (error);
5852 
5853 	if (input == adapter->fc) /* no change? */
5854 		return (error);
5855 
5856         switch (input) {
5857                 case e1000_fc_rx_pause:
5858                 case e1000_fc_tx_pause:
5859                 case e1000_fc_full:
5860                 case e1000_fc_none:
5861                         adapter->hw.fc.requested_mode = input;
5862 			adapter->fc = input;
5863                         break;
5864                 default:
5865 			/* Do nothing */
5866 			return (error);
5867         }
5868 
5869         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5870         e1000_force_mac_fc(&adapter->hw);
5871         return (error);
5872 }
5873 
5874 /*
5875 ** Manage Energy Efficient Ethernet:
5876 ** Control values:
5877 **     0/1 - enabled/disabled
5878 */
5879 static int
5880 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5881 {
5882        struct adapter *adapter = (struct adapter *) arg1;
5883        int             error, value;
5884 
5885        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5886        error = sysctl_handle_int(oidp, &value, 0, req);
5887        if (error || req->newptr == NULL)
5888                return (error);
5889        EM_CORE_LOCK(adapter);
5890        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5891        em_init_locked(adapter);
5892        EM_CORE_UNLOCK(adapter);
5893        return (0);
5894 }
5895 
5896 static int
5897 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5898 {
5899 	struct adapter *adapter;
5900 	int error;
5901 	int result;
5902 
5903 	result = -1;
5904 	error = sysctl_handle_int(oidp, &result, 0, req);
5905 
5906 	if (error || !req->newptr)
5907 		return (error);
5908 
5909 	if (result == 1) {
5910 		adapter = (struct adapter *)arg1;
5911 		em_print_debug_info(adapter);
5912         }
5913 
5914 	return (error);
5915 }
5916 
5917 /*
5918 ** This routine is meant to be fluid, add whatever is
5919 ** needed for debugging a problem.  -jfv
5920 */
5921 static void
5922 em_print_debug_info(struct adapter *adapter)
5923 {
5924 	device_t dev = adapter->dev;
5925 	struct tx_ring *txr = adapter->tx_rings;
5926 	struct rx_ring *rxr = adapter->rx_rings;
5927 
5928 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5929 		printf("Interface is RUNNING ");
5930 	else
5931 		printf("Interface is NOT RUNNING\n");
5932 
5933 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5934 		printf("and INACTIVE\n");
5935 	else
5936 		printf("and ACTIVE\n");
5937 
5938 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5939 		device_printf(dev, "TX Queue %d ------\n", i);
5940 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5941 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
5942 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
5943 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
5944 		device_printf(dev, "TX descriptors avail = %d\n",
5945 	    		txr->tx_avail);
5946 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5947 	    		txr->no_desc_avail);
5948 		device_printf(dev, "RX Queue %d ------\n", i);
5949 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5950 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
5951 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
5952 		device_printf(dev, "RX discarded packets = %ld\n",
5953 	    		rxr->rx_discarded);
5954 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5955 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5956 	}
5957 }
5958 
5959 #ifdef EM_MULTIQUEUE
5960 /*
5961  * 82574 only:
5962  * Write a new value to the EEPROM increasing the number of MSIX
5963  * vectors from 3 to 5, for proper multiqueue support.
5964  */
5965 static void
5966 em_enable_vectors_82574(struct adapter *adapter)
5967 {
5968 	struct e1000_hw *hw = &adapter->hw;
5969 	device_t dev = adapter->dev;
5970 	u16 edata;
5971 
5972 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5973 	printf("Current cap: %#06x\n", edata);
5974 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
5975 		device_printf(dev, "Writing to eeprom: increasing "
5976 		    "reported MSIX vectors from 3 to 5...\n");
5977 		edata &= ~(EM_NVM_MSIX_N_MASK);
5978 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
5979 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
5980 		e1000_update_nvm_checksum(hw);
5981 		device_printf(dev, "Writing to eeprom: done\n");
5982 	}
5983 }
5984 #endif
5985 
5986 #ifdef DDB
5987 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
5988 {
5989 	devclass_t	dc;
5990 	int max_em;
5991 
5992 	dc = devclass_find("em");
5993 	max_em = devclass_get_maxunit(dc);
5994 
5995 	for (int index = 0; index < (max_em - 1); index++) {
5996 		device_t dev;
5997 		dev = devclass_get_device(dc, index);
5998 		if (device_get_driver(dev) == &em_driver) {
5999 			struct adapter *adapter = device_get_softc(dev);
6000 			EM_CORE_LOCK(adapter);
6001 			em_init_locked(adapter);
6002 			EM_CORE_UNLOCK(adapter);
6003 		}
6004 	}
6005 }
6006 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6007 {
6008 	devclass_t	dc;
6009 	int max_em;
6010 
6011 	dc = devclass_find("em");
6012 	max_em = devclass_get_maxunit(dc);
6013 
6014 	for (int index = 0; index < (max_em - 1); index++) {
6015 		device_t dev;
6016 		dev = devclass_get_device(dc, index);
6017 		if (device_get_driver(dev) == &em_driver)
6018 			em_print_debug_info(device_get_softc(dev));
6019 	}
6020 
6021 }
6022 #endif
6023